ICU-12526 integrate Unicode 9 beta

X-SVN-Rev: 38753
This commit is contained in:
Markus Scherer 2016-05-19 22:48:18 +00:00
parent 8cf118d150
commit 5e69db5c2f
73 changed files with 40214 additions and 19249 deletions

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -1,6 +1,6 @@
/*
********************************************************************************
* Copyright (C) 1996-2014, International Business Machines
* Copyright (C) 1996-2016, International Business Machines
* Corporation and others. All Rights Reserved.
********************************************************************************
*
@ -434,7 +434,7 @@ u_getNumericValue(UChar32 c) {
}
return numValue;
} else if(ntv<UPROPS_NTV_RESERVED_START) {
} else if(ntv<UPROPS_NTV_FRACTION20_START) {
/* sexagesimal (base 60) integer */
int32_t numValue=(ntv>>2)-0xbf;
int32_t exp=(ntv&3)+1;
@ -458,6 +458,12 @@ u_getNumericValue(UChar32 c) {
}
return numValue;
} else if(ntv<UPROPS_NTV_RESERVED_START) {
// fraction-20 e.g. 3/80
int32_t frac20=ntv-UPROPS_NTV_FRACTION20_START; // 0..0x17
int32_t numerator=2*(frac20&3)+1;
int32_t denominator=20<<(frac20>>2);
return (double)numerator/denominator;
} else {
/* reserved */
return U_NO_NUMERIC_VALUE;

File diff suppressed because it is too large Load diff

View file

@ -39,7 +39,7 @@ U_CDECL_BEGIN
* @see u_getUnicodeVersion
* @stable ICU 2.0
*/
#define U_UNICODE_VERSION "8.0"
#define U_UNICODE_VERSION "9.0"
/**
* \file
@ -1572,8 +1572,33 @@ enum UBlockCode {
/** @stable ICU 56 */
UBLOCK_SUTTON_SIGNWRITING = 262, /*[1D800]*/
/* New blocks in Unicode 9.0 */
/** @stable ICU 58 */
UBLOCK_ADLAM = 263, /*[1E900]*/
/** @stable ICU 58 */
UBLOCK_BHAIKSUKI = 264, /*[11C00]*/
/** @stable ICU 58 */
UBLOCK_CYRILLIC_EXTENDED_C = 265, /*[1C80]*/
/** @stable ICU 58 */
UBLOCK_GLAGOLITIC_SUPPLEMENT = 266, /*[1E000]*/
/** @stable ICU 58 */
UBLOCK_IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION = 267, /*[16FE0]*/
/** @stable ICU 58 */
UBLOCK_MARCHEN = 268, /*[11C70]*/
/** @stable ICU 58 */
UBLOCK_MONGOLIAN_SUPPLEMENT = 269, /*[11660]*/
/** @stable ICU 58 */
UBLOCK_NEWA = 270, /*[11400]*/
/** @stable ICU 58 */
UBLOCK_OSAGE = 271, /*[104B0]*/
/** @stable ICU 58 */
UBLOCK_TANGUT = 272, /*[17000]*/
/** @stable ICU 58 */
UBLOCK_TANGUT_COMPONENTS = 273, /*[18800]*/
/** @stable ICU 2.0 */
UBLOCK_COUNT = 263,
UBLOCK_COUNT = 274,
/** @stable ICU 2.0 */
UBLOCK_INVALID_CODE=-1
@ -1810,6 +1835,9 @@ typedef enum UJoiningGroup {
U_JG_MANICHAEAN_YODH, /**< @stable ICU 54 */
U_JG_MANICHAEAN_ZAYIN, /**< @stable ICU 54 */
U_JG_STRAIGHT_WAW, /**< @stable ICU 54 */
U_JG_AFRICAN_FEH, /**< @stable ICU 58 */
U_JG_AFRICAN_NOON, /**< @stable ICU 58 */
U_JG_AFRICAN_QAF, /**< @stable ICU 58 */
U_JG_COUNT
} UJoiningGroup;
@ -1836,10 +1864,23 @@ typedef enum UGraphemeClusterBreak {
U_GCB_LVT = 7, /*[LVT]*/
U_GCB_T = 8, /*[T]*/
U_GCB_V = 9, /*[V]*/
/** @stable ICU 4.0 */
U_GCB_SPACING_MARK = 10, /*[SM]*/ /* from here on: new in Unicode 5.1/ICU 4.0 */
/** @stable ICU 4.0 */
U_GCB_PREPEND = 11, /*[PP]*/
/** @stable ICU 50 */
U_GCB_REGIONAL_INDICATOR = 12, /*[RI]*/ /* new in Unicode 6.2/ICU 50 */
U_GCB_COUNT = 13
/** @stable ICU 58 */
U_GCB_E_BASE = 13, /*[EB]*/ /* from here on: new in Unicode 9.0/ICU 58 */
/** @stable ICU 58 */
U_GCB_E_BASE_GAZ = 14, /*[EBG]*/
/** @stable ICU 58 */
U_GCB_E_MODIFIER = 15, /*[EM]*/
/** @stable ICU 58 */
U_GCB_GLUE_AFTER_ZWJ = 16, /*[GAZ]*/
/** @stable ICU 58 */
U_GCB_ZWJ = 17, /*[ZWJ]*/
U_GCB_COUNT = 18
} UGraphemeClusterBreak;
/**
@ -1864,16 +1905,35 @@ typedef enum UWordBreakValues {
U_WB_MIDNUM = 5, /*[MN]*/
U_WB_NUMERIC = 6, /*[NU]*/
U_WB_EXTENDNUMLET = 7, /*[EX]*/
/** @stable ICU 4.0 */
U_WB_CR = 8, /*[CR]*/ /* from here on: new in Unicode 5.1/ICU 4.0 */
/** @stable ICU 4.0 */
U_WB_EXTEND = 9, /*[Extend]*/
/** @stable ICU 4.0 */
U_WB_LF = 10, /*[LF]*/
/** @stable ICU 4.0 */
U_WB_MIDNUMLET =11, /*[MB]*/
/** @stable ICU 4.0 */
U_WB_NEWLINE =12, /*[NL]*/
/** @stable ICU 50 */
U_WB_REGIONAL_INDICATOR = 13, /*[RI]*/ /* new in Unicode 6.2/ICU 50 */
/** @stable ICU 52 */
U_WB_HEBREW_LETTER = 14, /*[HL]*/ /* from here on: new in Unicode 6.3/ICU 52 */
/** @stable ICU 52 */
U_WB_SINGLE_QUOTE = 15, /*[SQ]*/
/** @stable ICU 52 */
U_WB_DOUBLE_QUOTE = 16, /*[DQ]*/
U_WB_COUNT = 17
/** @stable ICU 58 */
U_WB_E_BASE = 17, /*[EB]*/ /* from here on: new in Unicode 9.0/ICU 58 */
/** @stable ICU 58 */
U_WB_E_BASE_GAZ = 18, /*[EBG]*/
/** @stable ICU 58 */
U_WB_E_MODIFIER = 19, /*[EM]*/
/** @stable ICU 58 */
U_WB_GLUE_AFTER_ZWJ = 20, /*[GAZ]*/
/** @stable ICU 58 */
U_WB_ZWJ = 21, /*[ZWJ]*/
U_WB_COUNT = 22
} UWordBreakValues;
/**
@ -1951,18 +2011,35 @@ typedef enum ULineBreak {
U_LB_SPACE = 26, /*[SP]*/
U_LB_BREAK_SYMBOLS = 27, /*[SY]*/
U_LB_ZWSPACE = 28, /*[ZW]*/
/** @stable ICU 2.6 */
U_LB_NEXT_LINE = 29, /*[NL]*/ /* from here on: new in Unicode 4/ICU 2.6 */
/** @stable ICU 2.6 */
U_LB_WORD_JOINER = 30, /*[WJ]*/
/** @stable ICU 3.4 */
U_LB_H2 = 31, /*[H2]*/ /* from here on: new in Unicode 4.1/ICU 3.4 */
/** @stable ICU 3.4 */
U_LB_H3 = 32, /*[H3]*/
/** @stable ICU 3.4 */
U_LB_JL = 33, /*[JL]*/
/** @stable ICU 3.4 */
U_LB_JT = 34, /*[JT]*/
/** @stable ICU 3.4 */
U_LB_JV = 35, /*[JV]*/
/** @stable ICU 4.4 */
U_LB_CLOSE_PARENTHESIS = 36, /*[CP]*/ /* new in Unicode 5.2/ICU 4.4 */
/** @stable ICU 49 */
U_LB_CONDITIONAL_JAPANESE_STARTER = 37,/*[CJ]*/ /* new in Unicode 6.1/ICU 49 */
/** @stable ICU 49 */
U_LB_HEBREW_LETTER = 38, /*[HL]*/ /* new in Unicode 6.1/ICU 49 */
/** @stable ICU 50 */
U_LB_REGIONAL_INDICATOR = 39,/*[RI]*/ /* new in Unicode 6.2/ICU 50 */
U_LB_COUNT = 40
/** @stable ICU 58 */
U_LB_E_BASE = 40, /*[EB]*/ /* from here on: new in Unicode 9.0/ICU 58 */
/** @stable ICU 58 */
U_LB_E_MODIFIER = 41, /*[EM]*/
/** @stable ICU 58 */
U_LB_ZWJ = 42, /*[ZWJ]*/
U_LB_COUNT = 43
} ULineBreak;
/**

View file

@ -86,8 +86,15 @@ enum {
* ((ntv>>2)-0xbf) * 60^((ntv&3)+1) = (1..9)*(60^1..60^4)
*/
UPROPS_NTV_BASE60_START=0x300,
/**
* Fraction-20 values:
* frac20 = ntv-0x324 = 0..0x17 -> 1|3|5|7 / 20|40|80|160|320|640
* numerator: num = 2*(frac20&3)+1
* denominator: den = 20<<(frac20>>2)
*/
UPROPS_NTV_FRACTION20_START=UPROPS_NTV_BASE60_START+36, // 0x300+9*4=0x324
/** No numeric value (yet). */
UPROPS_NTV_RESERVED_START=UPROPS_NTV_BASE60_START+36, /* 0x300+9*4=0x324 */
UPROPS_NTV_RESERVED_START=UPROPS_NTV_FRACTION20_START+24, // 0x324+6*4=0x34c
UPROPS_NTV_MAX_SMALL_INT=UPROPS_NTV_FRACTION_START-UPROPS_NTV_NUMERIC_START-1
};

View file

@ -1,6 +1,6 @@
/*
*******************************************************************************
* Copyright (C) 2013-2015, International Business Machines
* Copyright (C) 2013-2016, International Business Machines
* Corporation and others. All Rights Reserved.
*******************************************************************************
* file name: uscript_props.cpp
@ -72,7 +72,7 @@ const int32_t SCRIPT_PROPS[] = {
0x1826 | ASPIRATIONAL, // Mong
0x1000 | RECOMMENDED | LB_LETTERS, // Mymr
0x168F | EXCLUSION, // Ogam
0x10308 | EXCLUSION, // Ital
0x10300 | EXCLUSION, // Ital
0x0B15 | RECOMMENDED, // Orya
0x16A0 | EXCLUSION, // Runr
0x0D85 | RECOMMENDED, // Sinh
@ -102,7 +102,7 @@ const int32_t SCRIPT_PROPS[] = {
0x10A00 | EXCLUSION | RTL, // Khar
0xA800 | LIMITED_USE, // Sylo
0x1980 | LIMITED_USE | LB_LETTERS, // Talu
0x2D5E | ASPIRATIONAL, // Tfng
0x2D30 | ASPIRATIONAL, // Tfng
0x103A0 | EXCLUSION, // Xpeo
0x1B05 | LIMITED_USE, // Bali
0x1BC0 | LIMITED_USE, // Batk
@ -129,7 +129,7 @@ const int32_t SCRIPT_PROPS[] = {
0x0840 | LIMITED_USE | RTL, // Mand
0,
0x10980 | EXCLUSION | RTL, // Mero
0x07D8 | LIMITED_USE | RTL, // Nkoo
0x07CA | LIMITED_USE | RTL, // Nkoo
0x10C00 | EXCLUSION | RTL, // Orkh
0x1036B | EXCLUSION, // Perm
0xA840 | EXCLUSION, // Phag
@ -146,7 +146,7 @@ const int32_t SCRIPT_PROPS[] = {
0x12000 | EXCLUSION, // Xsux
0,
0xFDD0 | UNKNOWN, // Zzzz
0x102B7 | EXCLUSION, // Cari
0x102A0 | EXCLUSION, // Cari
0x304B | RECOMMENDED | LB_LETTERS, // Jpan
0x1A20 | LIMITED_USE | LB_LETTERS, // Lana
0x10280 | EXCLUSION, // Lyci
@ -163,7 +163,7 @@ const int32_t SCRIPT_PROPS[] = {
0x11103 | LIMITED_USE, // Cakm
0xAC00 | RECOMMENDED, // Kore
0x11083 | EXCLUSION, // Kthi
0x10AC1 | EXCLUSION | RTL, // Mani
0x10AD8 | EXCLUSION | RTL, // Mani
0x10B60 | EXCLUSION | RTL, // Phli
0x10B8F | EXCLUSION | RTL, // Phlp
0,
@ -173,7 +173,7 @@ const int32_t SCRIPT_PROPS[] = {
0,
0,
0xA6A0 | LIMITED_USE, // Bamu
0xA4E8 | LIMITED_USE, // Lisu
0xA4D0 | LIMITED_USE, // Lisu
0,
0x10A60 | EXCLUSION | RTL, // Sarb
0x16AE6 | EXCLUSION, // Bass
@ -196,7 +196,7 @@ const int32_t SCRIPT_PROPS[] = {
0x11183 | EXCLUSION, // Shrd
0x110D0 | EXCLUSION, // Sora
0x11680 | EXCLUSION, // Takr
0,
0x18229 | EXCLUSION | LB_LETTERS, // Tang
0,
0x14400 | EXCLUSION, // Hluw
0x11208 | EXCLUSION, // Khoj
@ -209,6 +209,14 @@ const int32_t SCRIPT_PROPS[] = {
0x1128F | EXCLUSION, // Mult
0x11AC0 | EXCLUSION, // Pauc
0x1158E | EXCLUSION, // Sidd
0x1E909 | LIMITED_USE | RTL | CASED, // Adlm
0x11C0E | EXCLUSION, // Bhks
0x11C72 | EXCLUSION, // Marc
0x11412 | LIMITED_USE, // Newa
0x104B5 | LIMITED_USE | CASED, // Osge
0x5B57 | RECOMMENDED | LB_LETTERS, // Hanb
0x1112 | RECOMMENDED, // Jamo
0,
// End copy-paste from parsescriptmetadata.py
};

View file

@ -2,11 +2,11 @@
# Copyright (C) 2002-2016, International Business Machines Corporation and others.
# All Rights Reserved.
#
# file: char.txt
# file: char.txt
#
# ICU Character Break Rules, also known as Grapheme Cluster Boundaries
# See Unicode Standard Annex #29.
# These rules are based on UAX #29 Revision 28 (Draft 3) for Unicode Version 9.0
# These rules are based on UAX #29 Revision 28 (Draft 7) for Unicode Version 9.0
#
#
@ -14,30 +14,29 @@
#
$CR = [\p{Grapheme_Cluster_Break = CR}];
$LF = [\p{Grapheme_Cluster_Break = LF}];
$Control = [[\p{Grapheme_Cluster_Break = Control}]-[:Block=Tags:]];
# TODO: Restore if the Prepend set becomes non-empty again: $Prepend = [\p{Grapheme_Cluster_Break = Prepend}];
$Extend = [[\p{Grapheme_Cluster_Break = Extend}][:Block=Tags:]];
$SpacingMark = [\p{Grapheme_Cluster_Break = SpacingMark}];
$Control = [[\p{Grapheme_Cluster_Break = Control}]];
$Extend = [[\p{Grapheme_Cluster_Break = Extend}]];
$ZWJ = [\p{Grapheme_Cluster_Break = ZWJ}];
$Regional_Indicator = [\p{Grapheme_Cluster_Break = Regional_Indicator}];
$Prepend = [\p{Grapheme_Cluster_Break = Prepend}];
$SpacingMark = [\p{Grapheme_Cluster_Break = SpacingMark}];
#
# Korean Syllable Definitions
#
$L = [\p{Grapheme_Cluster_Break = L}];
$V = [\p{Grapheme_Cluster_Break = V}];
$T = [\p{Grapheme_Cluster_Break = T}];
$L = [\p{Grapheme_Cluster_Break = L}];
$V = [\p{Grapheme_Cluster_Break = V}];
$T = [\p{Grapheme_Cluster_Break = T}];
$LV = [\p{Grapheme_Cluster_Break = LV}];
$LVT = [\p{Grapheme_Cluster_Break = LVT}];
$LV = [\p{Grapheme_Cluster_Break = LV}];
$LVT = [\p{Grapheme_Cluster_Break = LVT}];
# Emoji defintions scraped from http://www.unicode.org/Public/emoji/2.0//emoji-data.txt
# Emoji defintions
$E_Base = [\u261D\u26F9\u270A-\u270D\U0001F385\U0001F3C3-\U0001F3C4\U0001F3CA-\U0001F3CB\U0001F442-\U0001F443\U0001F446-\U0001F450\U0001F466-\U0001F469\U0001F46E\U0001F470-\U0001F478\U0001F47C\U0001F481-\U0001F483\U0001F485-\U0001F487\U0001F4AA\U0001F575\U0001F590\U0001F595-\U0001F596\U0001F645-\U0001F647\U0001F64B-\U0001F64F\U0001F6A3\U0001F6B4-\U0001F6B6\U0001F6C0\U0001F918];
$E_Modifier = [\U0001F3FB-\U0001F3FF];
$ZWJ = [\u200D];
$GAZ = [\U0001F466-\U0001F469\U0001F48B\U0001F5E8\u2764];
$E_Base = [\p{Grapheme_Cluster_Break = EB}];
$E_Modifier = [\p{Grapheme_Cluster_Break = EM}];
$GAZ = [\p{Grapheme_Cluster_Break = GAZ}];
$E_Base_GAZ = [\p{Grapheme_Cluster_Break = EBG}];
## -------------------------------------------------
!!chain;
@ -53,21 +52,23 @@ $L ($L | $V | $LV | $LVT);
# GB 8. Keep pairs of regional indicators together
# Note that hard break '/' rule triggers only if there are three or more initial RIs,
^$Regional_Indicator $Regional_Indicator / $Regional_Indicator;
^$Regional_Indicator $Regional_Indicator;
^$Prepend* $Regional_Indicator $Regional_Indicator / $Regional_Indicator;
^$Prepend* $Regional_Indicator $Regional_Indicator;
# GB 9
[^$Control $CR $LF] ($Extend | $ZWJ);
# GB 9a (only for extended grapheme clusters)
[^$Control $CR $LF] $SpacingMark;
# GB 9b Restore if the Prepend set becomes non-empty again: $Prepend [^$Control $CR $LF];
# GB9c Emoji proposal
($E_Base | $GAZ) $E_Modifier;
# GB 9b
$Prepend [^$Control $CR $LF];
# GB 9d Don't break between ZWJ and Glue_After_Zwj
$ZWJ $GAZ;
# GB 10 Do not break within emoji modifier sequences or emoji zwj sequences.
($E_Base | $E_Base_GAZ) $E_Modifier;
# GB 11
$ZWJ ($GAZ | $E_Base_GAZ);
## -------------------------------------------------
@ -79,20 +80,25 @@ $T ($LVT | $T);
# GB 8. Going backwards, we must scan through any number of regional indicators as pairs.
#
$Regional_Indicator $Regional_Indicator / ($Regional_Indicator $Regional_Indicator)* [{eof}[^$Regional_Indicator]];
[{bof} $Extend $ZWJ $SpacingMark] $Regional_Indicator $Regional_Indicator / ($Regional_Indicator $Regional_Indicator)+ [{eof}[^$Regional_Indicator]];
[{bof} $Extend $ZWJ $SpacingMark] $Regional_Indicator / ($Regional_Indicator $Regional_Indicator)+ [{eof}[^$Regional_Indicator]];
$Regional_Indicator $Regional_Indicator;
$Regional_Indicator $Prepend;
# GB 9
($Extend | $ZWJ) [^$Control $CR $LF]; #note that this will chain into Regional_Indicator when needed.
# GB 9a
$SpacingMark [^$Control $CR $LF];
# GB 9b Restore if the Prepend set becomes non-empty again: [^$Control $CR $LF] $Prepend;
# GB 9c
$E_Modifier ($E_Base | $GAZ);
# GB 9b
[^$Control $CR $LF] $Prepend;
# GB 9d Don't break between ZWJ and Glue_After_Zwj
$GAZ $ZWJ;
# GB 10
$E_Modifier ($E_Base | $E_Base_GAZ);
# GB 11 Don't break between ZWJ and Glue_After_ZWJ
($GAZ | $E_Base_GAZ) $ZWJ;
## -------------------------------------------------

View file

@ -24,14 +24,8 @@
!!chain;
# Temporary definitions of Emoji Base and Emoji Modifiers, until properties are available.
$EB = [\u261D\u26F9\u270A-\u270D\U0001F385\U0001F3C3-\U0001F3C4\U0001F3CA-\U0001F3CB\U0001F442-\U0001F443\U0001F446-\U0001F450\U0001F466-\U0001F469\U0001F46E\U0001F470-\U0001F478\U0001F47C\U0001F481-\U0001F483\U0001F485-\U0001F487\U0001F4AA\U0001F575\U0001F590\U0001F595-\U0001F596\U0001F645-\U0001F647\U0001F64B-\U0001F64F\U0001F6A3\U0001F6B4-\U0001F6B6\U0001F6C0\U0001F918];
$EM = [\U0001F3FB-\U0001F3FF];
$AI = [:LineBreak = Ambiguous:];
$AL = [[:LineBreak = Alphabetic:] - [$EM\u2764]];
$AL = [:LineBreak = Alphabetic:];
$BA = [:LineBreak = Break_After:];
$BB = [:LineBreak = Break_Before:];
$BK = [:LineBreak = Mandatory_Break:];
@ -39,16 +33,18 @@ $B2 = [:LineBreak = Break_Both:];
$CB = [:LineBreak = Contingent_Break:];
$CJ = [:LineBreak = Conditional_Japanese_Starter:];
$CL = [:LineBreak = Close_Punctuation:];
$CM = [[:LineBreak = Combining_Mark:] \u200d];
# $CM = [:LineBreak = Combining_Mark:];
$CP = [:LineBreak = Close_Parenthesis:];
$CR = [:LineBreak = Carriage_Return:];
$EB = [:LineBreak = EB:];
$EM = [:LineBreak = EM:];
$EX = [:LineBreak = Exclamation:];
$GL = [:LineBreak = Glue:];
$HL = [:LineBreak = Hebrew_Letter:];
$HY = [:LineBreak = Hyphen:];
$H2 = [:LineBreak = H2:];
$H3 = [:LineBreak = H3:];
$ID = [[:LineBreak = Ideographic:][\u2764] - $EB];
$ID = [:LineBreak = Ideographic:];
$IN = [:LineBreak = Inseperable:];
$IS = [:LineBreak = Infix_Numeric:];
$JL = [:LineBreak = JL:];
@ -56,6 +52,7 @@ $JV = [:LineBreak = JV:];
$JT = [:LineBreak = JT:];
$LF = [:LineBreak = Line_Feed:];
$NL = [:LineBreak = Next_Line:];
# NS includes CJ for CSS strict line breaking.
$NS = [[:LineBreak = Nonstarter:] $CJ];
$NU = [:LineBreak = Numeric:];
$OP = [:LineBreak = Open_Punctuation:];
@ -70,23 +67,27 @@ $SY = [:LineBreak = Break_Symbols:];
$WJ = [:LineBreak = Word_Joiner:];
$XX = [:LineBreak = Unknown:];
$ZW = [:LineBreak = ZWSpace:];
$ZWJ = [\u200d];
$ZWJ = [:LineBreak = ZWJ:];
# By LB9, a ZWJ also behaves as a CM. Including it in the definition of CM avoids having to explicitly
# list it in the numerous rules that use CM.
# By LB1, SA characters with general categor of Mn or Mc also resolve to CM.
$CM = [[:LineBreak = Combining_Mark:] $ZWJ [$SA & [[:Mn:][:Mc:]]]];
# Dictionary character set, for triggering language-based break engines. Currently
# limited to LineBreak=Complex_Context. Note that this set only works in Unicode
# 5.0 or later as the definition of Complex_Context was corrected to include all
# characters requiring dictionary break.
# limited to LineBreak=Complex_Context (SA).
$dictionary = [:LineBreak = Complex_Context:];
$dictionary = [$SA];
#
# Rule LB1. By default, treat AI (characters with ambiguous east Asian width),
# SA (South East Asian: Thai, Lao, Khmer)
# SA (Dictionary chars, excluding Mn and Mc)
# SG (Unpaired Surrogates)
# XX (Unknown, unassigned)
# as $AL (Alphabetic)
#
$ALPlus = [$AL $AI $SA $SG $XX];
$ALPlus = [$AL $AI $SG $XX [$SA-[[:Mn:][:Mc:]]]];
## -------------------------------------------------
@ -140,7 +141,7 @@ $CAN_CM $CM* [$SP $ZW];
$LB8Breaks = [$LB4Breaks $ZW];
$LB8NonBreaks = [[$LB4NonBreaks] - [$ZW]];
# LB 8a ZWJ x ID Emoji proposal.
# LB 8a ZWJ x (ID | EB | EM) Emoji ZWJ sequences.
#
$ZWJ ($ID | $EB | $EM);
@ -287,7 +288,7 @@ $PR $CM* ($ID | $EB | $EM);
#
# LB 25 Numbers.
#
(($PR | $PO) $CM*)? (($OP | $HY) $CM*)? $NU ($CM* ($NU | $SY | $IS))*
(($PR | $PO) $CM*)? (($OP | $HY) $CM*)? $NU ($CM* ($NU | $SY | $IS))*
($CM* ($CL | $CP))? ($CM* ($PR | $PO))?;
# LB 26 Do not break a Korean syllable

View file

@ -29,14 +29,8 @@
!!chain;
# Temporary definitions of Emoji Base and Emoji Modifiers, until properties are available.
$EB = [\u261D\u26F9\u270A-\u270D\U0001F385\U0001F3C3-\U0001F3C4\U0001F3CA-\U0001F3CB\U0001F442-\U0001F443\U0001F446-\U0001F450\U0001F466-\U0001F469\U0001F46E\U0001F470-\U0001F478\U0001F47C\U0001F481-\U0001F483\U0001F485-\U0001F487\U0001F4AA\U0001F575\U0001F590\U0001F595-\U0001F596\U0001F645-\U0001F647\U0001F64B-\U0001F64F\U0001F6A3\U0001F6B4-\U0001F6B6\U0001F6C0\U0001F918];
$EM = [\U0001F3FB-\U0001F3FF];
$AI = [:LineBreak = Ambiguous:];
$AL = [[:LineBreak = Alphabetic:] - [$EM\u2764]];
$AL = [:LineBreak = Alphabetic:];
$BA = [:LineBreak = Break_After:];
$HH = [\u2010]; # \u2010 is HYPHEN, default line break is BA.
$BB = [:LineBreak = Break_Before:];
@ -45,16 +39,18 @@ $B2 = [:LineBreak = Break_Both:];
$CB = [:LineBreak = Contingent_Break:];
$CJ = [:LineBreak = Conditional_Japanese_Starter:];
$CL = [:LineBreak = Close_Punctuation:];
$CM = [[:LineBreak = Combining_Mark:] \u200d];
# $CM = [:LineBreak = Combining_Mark:];
$CP = [:LineBreak = Close_Parenthesis:];
$CR = [:LineBreak = Carriage_Return:];
$EB = [:LineBreak = EB:];
$EM = [:LineBreak = EM:];
$EX = [:LineBreak = Exclamation:];
$GL = [:LineBreak = Glue:];
$HL = [:LineBreak = Hebrew_Letter:];
$HY = [:LineBreak = Hyphen:];
$H2 = [:LineBreak = H2:];
$H3 = [:LineBreak = H3:];
$ID = [[:LineBreak = Ideographic:][\u2764] - $EB];
$ID = [:LineBreak = Ideographic:];
$IN = [:LineBreak = Inseperable:];
$IS = [:LineBreak = Infix_Numeric:];
$JL = [:LineBreak = JL:];
@ -62,6 +58,7 @@ $JV = [:LineBreak = JV:];
$JT = [:LineBreak = JT:];
$LF = [:LineBreak = Line_Feed:];
$NL = [:LineBreak = Next_Line:];
# NS includes CJ for CSS strict line breaking.
$NS = [[:LineBreak = Nonstarter:] $CJ];
$NU = [:LineBreak = Numeric:];
$OP = [:LineBreak = Open_Punctuation:];
@ -76,23 +73,27 @@ $SY = [:LineBreak = Break_Symbols:];
$WJ = [:LineBreak = Word_Joiner:];
$XX = [:LineBreak = Unknown:];
$ZW = [:LineBreak = ZWSpace:];
$ZWJ = [\u200d];
$ZWJ = [:LineBreak = ZWJ:];
# By LB9, a ZWJ also behaves as a CM. Including it in the definition of CM avoids having to explicitly
# list it in the numerous rules that use CM.
# By LB1, SA characters with general categor of Mn or Mc also resolve to CM.
$CM = [[:LineBreak = Combining_Mark:] $ZWJ [$SA & [[:Mn:][:Mc:]]]];
# Dictionary character set, for triggering language-based break engines. Currently
# limited to LineBreak=Complex_Context. Note that this set only works in Unicode
# 5.0 or later as the definition of Complex_Context was corrected to include all
# characters requiring dictionary break.
# limited to LineBreak=Complex_Context (SA).
$dictionary = [:LineBreak = Complex_Context:];
$dictionary = [$SA];
#
# Rule LB1. By default, treat AI (characters with ambiguous east Asian width),
# SA (South East Asian: Thai, Lao, Khmer)
# SA (Dictionary chars, excluding Mn and Mc)
# SG (Unpaired Surrogates)
# XX (Unknown, unassigned)
# as $AL (Alphabetic)
#
$ALPlus = [$AL $AI $SA $SG $XX];
$ALPlus = [$AL $AI $SG $XX [$SA-[[:Mn:][:Mc:]]]];
## -------------------------------------------------
@ -146,7 +147,7 @@ $CAN_CM $CM* [$SP $ZW];
$LB8Breaks = [$LB4Breaks $ZW];
$LB8NonBreaks = [[$LB4NonBreaks] - [$ZW]];
# LB 8a ZWJ x ID Emoji proposal.
# LB 8a ZWJ x (ID | EB | EM) Emoji ZWJ sequences.
#
$ZWJ ($ID | $EB | $EM);
@ -296,7 +297,7 @@ $PR $CM* ($ID | $EB | $EM);
#
# LB 25 Numbers.
#
(($PR | $PO) $CM*)? (($OP | $HY) $CM*)? $NU ($CM* ($NU | $SY | $IS))*
(($PR | $PO) $CM*)? (($OP | $HY) $CM*)? $NU ($CM* ($NU | $SY | $IS))*
($CM* ($CL | $CP))? ($CM* ($PR | $PO))?;
# LB 26 Do not break a Korean syllable

View file

@ -18,7 +18,7 @@
# not because the older behavior is desirable.
#
# This tailors the line break behavior to correspond to CSS
# line-break=loose (BCP47 -u-lb-loose) as defined for languages other than
# line-break=loose (BCP47 -u-lb-loose) as defined for languages other than
# Chinese & Japanese.
# It sets characters of class CJ to behave like ID.
# In addition, it allows breaks:
@ -31,14 +31,8 @@
!!chain;
# Temporary definitions of Emoji Base and Emoji Modifiers, until properties are available.
$EB = [\u261D\u26F9\u270A-\u270D\U0001F385\U0001F3C3-\U0001F3C4\U0001F3CA-\U0001F3CB\U0001F442-\U0001F443\U0001F446-\U0001F450\U0001F466-\U0001F469\U0001F46E\U0001F470-\U0001F478\U0001F47C\U0001F481-\U0001F483\U0001F485-\U0001F487\U0001F4AA\U0001F575\U0001F590\U0001F595-\U0001F596\U0001F645-\U0001F647\U0001F64B-\U0001F64F\U0001F6A3\U0001F6B4-\U0001F6B6\U0001F6C0\U0001F918];
$EM = [\U0001F3FB-\U0001F3FF];
$AI = [:LineBreak = Ambiguous:];
$AL = [[:LineBreak = Alphabetic:] - [$EM\u2764]];
$AL = [:LineBreak = Alphabetic:];
$BA = [:LineBreak = Break_After:];
$BB = [:LineBreak = Break_Before:];
$BK = [:LineBreak = Mandatory_Break:];
@ -46,16 +40,19 @@ $B2 = [:LineBreak = Break_Both:];
$CB = [:LineBreak = Contingent_Break:];
$CJ = [:LineBreak = Conditional_Japanese_Starter:];
$CL = [:LineBreak = Close_Punctuation:];
$CM = [[:LineBreak = Combining_Mark:] \u200d];
# $CM = [:LineBreak = Combining_Mark:];
$CP = [:LineBreak = Close_Parenthesis:];
$CR = [:LineBreak = Carriage_Return:];
$EB = [:LineBreak = EB:];
$EM = [:LineBreak = EM:];
$EX = [:LineBreak = Exclamation:];
$GL = [:LineBreak = Glue:];
$HL = [:LineBreak = Hebrew_Letter:];
$HY = [:LineBreak = Hyphen:];
$H2 = [:LineBreak = H2:];
$H3 = [:LineBreak = H3:];
$ID = [[:LineBreak = Ideographic:]$CJ[\u2764] - $EB];
# CSS Loose tailoring: CJ resolves to ID
$ID = [[:LineBreak = Ideographic:] $CJ];
$IN = [:LineBreak = Inseperable:];
$IS = [:LineBreak = Infix_Numeric:];
$JL = [:LineBreak = JL:];
@ -78,23 +75,27 @@ $SY = [:LineBreak = Break_Symbols:];
$WJ = [:LineBreak = Word_Joiner:];
$XX = [:LineBreak = Unknown:];
$ZW = [:LineBreak = ZWSpace:];
$ZWJ = [\u200d];
$ZWJ = [:LineBreak = ZWJ:];
# By LB9, a ZWJ also behaves as a CM. Including it in the definition of CM avoids having to explicitly
# list it in the numerous rules that use CM.
# By LB1, SA characters with general categor of Mn or Mc also resolve to CM.
$CM = [[:LineBreak = Combining_Mark:] $ZWJ [$SA & [[:Mn:][:Mc:]]]];
# Dictionary character set, for triggering language-based break engines. Currently
# limited to LineBreak=Complex_Context. Note that this set only works in Unicode
# 5.0 or later as the definition of Complex_Context was corrected to include all
# characters requiring dictionary break.
# limited to LineBreak=Complex_Context (SA).
$dictionary = [:LineBreak = Complex_Context:];
$dictionary = [$SA];
#
# Rule LB1. By default, treat AI (characters with ambiguous east Asian width),
# SA (South East Asian: Thai, Lao, Khmer)
# SA (Dictionary chars, excluding Mn and Mc)
# SG (Unpaired Surrogates)
# XX (Unknown, unassigned)
# as $AL (Alphabetic)
#
$ALPlus = [$AL $AI $SA $SG $XX];
$ALPlus = [$AL $AI $SG $XX [$SA-[[:Mn:][:Mc:]]]];
## -------------------------------------------------
@ -148,7 +149,7 @@ $CAN_CM $CM* [$SP $ZW];
$LB8Breaks = [$LB4Breaks $ZW];
$LB8NonBreaks = [[$LB4NonBreaks] - [$ZW]];
# LB 8a ZWJ x ID Emoji proposal.
# LB 8a ZWJ x (ID | EB | EM) Emoji ZWJ sequences.
#
$ZWJ ($ID | $EB | $EM);
@ -298,7 +299,7 @@ $PR $CM* ($ID | $EB | $EM);
#
# LB 25 Numbers.
#
(($PR | $PO) $CM*)? (($OP | $HY) $CM*)? $NU ($CM* ($NU | $SY | $IS))*
(($PR | $PO) $CM*)? (($OP | $HY) $CM*)? $NU ($CM* ($NU | $SY | $IS))*
($CM* ($CL | $CP))? ($CM* ($PR | $PO))?;
# LB 26 Do not break a Korean syllable

View file

@ -38,14 +38,8 @@
!!chain;
# Temporary definitions of Emoji Base and Emoji Modifiers, until properties are available.
$EB = [\u261D\u26F9\u270A-\u270D\U0001F385\U0001F3C3-\U0001F3C4\U0001F3CA-\U0001F3CB\U0001F442-\U0001F443\U0001F446-\U0001F450\U0001F466-\U0001F469\U0001F46E\U0001F470-\U0001F478\U0001F47C\U0001F481-\U0001F483\U0001F485-\U0001F487\U0001F4AA\U0001F575\U0001F590\U0001F595-\U0001F596\U0001F645-\U0001F647\U0001F64B-\U0001F64F\U0001F6A3\U0001F6B4-\U0001F6B6\U0001F6C0\U0001F918];
$EM = [\U0001F3FB-\U0001F3FF];
$AI = [:LineBreak = Ambiguous:];
$AL = [[:LineBreak = Alphabetic:] - [$EM\u2764]];
$AL = [:LineBreak = Alphabetic:];
$BAX = [\u2010 \u2013];
$BA = [[:LineBreak = Break_After:] - $BAX];
$BB = [:LineBreak = Break_Before:];
@ -54,9 +48,11 @@ $B2 = [:LineBreak = Break_Both:];
$CB = [:LineBreak = Contingent_Break:];
$CJ = [:LineBreak = Conditional_Japanese_Starter:];
$CL = [:LineBreak = Close_Punctuation:];
$CM = [[:LineBreak = Combining_Mark:] \u200d];
# $CM = [:LineBreak = Combining_Mark:];
$CP = [:LineBreak = Close_Parenthesis:];
$CR = [:LineBreak = Carriage_Return:];
$EB = [:LineBreak = EB:];
$EM = [:LineBreak = EM:];
$EXX = [\uFF01 \uFF1F];
$EX = [[:LineBreak = Exclamation:] - $EXX];
$GL = [:LineBreak = Glue:];
@ -64,7 +60,8 @@ $HL = [:LineBreak = Hebrew_Letter:];
$HY = [:LineBreak = Hyphen:];
$H2 = [:LineBreak = H2:];
$H3 = [:LineBreak = H3:];
$ID = [[:LineBreak = Ideographic:] $CJ [\u2764] - $EB];
# CSS Loose tailoring: CJ resolves to ID
$ID = [[:LineBreak = Ideographic:] $CJ];
$IN = [:LineBreak = Inseperable:];
$IS = [:LineBreak = Infix_Numeric:];
$JL = [:LineBreak = JL:];
@ -89,23 +86,27 @@ $SY = [:LineBreak = Break_Symbols:];
$WJ = [:LineBreak = Word_Joiner:];
$XX = [:LineBreak = Unknown:];
$ZW = [:LineBreak = ZWSpace:];
$ZWJ = [\u200d];
$ZWJ = [:LineBreak = ZWJ:];
# By LB9, a ZWJ also behaves as a CM. Including it in the definition of CM avoids having to explicitly
# list it in the numerous rules that use CM.
# By LB1, SA characters with general categor of Mn or Mc also resolve to CM.
$CM = [[:LineBreak = Combining_Mark:] $ZWJ [$SA & [[:Mn:][:Mc:]]]];
# Dictionary character set, for triggering language-based break engines. Currently
# limited to LineBreak=Complex_Context. Note that this set only works in Unicode
# 5.0 or later as the definition of Complex_Context was corrected to include all
# characters requiring dictionary break.
# limited to LineBreak=Complex_Context (SA).
$dictionary = [:LineBreak = Complex_Context:];
$dictionary = [$SA];
#
# Rule LB1. By default, treat AI (characters with ambiguous east Asian width),
# SA (South East Asian: Thai, Lao, Khmer)
# SA (Dictionary chars, excluding Mn and Mc)
# SG (Unpaired Surrogates)
# XX (Unknown, unassigned)
# as $AL (Alphabetic)
#
$ALPlus = [$AL $AI $SA $SG $XX];
$ALPlus = [$AL $AI $SG $XX [$SA-[[:Mn:][:Mc:]]]];
## -------------------------------------------------
@ -159,7 +160,7 @@ $CAN_CM $CM* [$SP $ZW];
$LB8Breaks = [$LB4Breaks $ZW];
$LB8NonBreaks = [[$LB4NonBreaks] - [$ZW]];
# LB 8a ZWJ x ID Emoji proposal.
# LB 8a ZWJ x (ID | EB | EM) Emoji ZWJ sequences.
#
$ZWJ ($ID | $EB | $EM);
@ -312,7 +313,7 @@ $PR $CM* ($ID | $EB | $EM);
# LB 25 Numbers.
#
# Here do not include $PRX at the beginning or $POX at the end
(($PR | $PO | $POX) $CM*)? (($OP | $HY) $CM*)? $NU ($CM* ($NU | $SY | $IS))*
(($PR | $PO | $POX) $CM*)? (($OP | $HY) $CM*)? $NU ($CM* ($NU | $SY | $IS))*
($CM* ($CL | $CP))? ($CM* ($PR | $PRX | $PO))?;
# LB 26 Do not break a Korean syllable

View file

@ -29,14 +29,8 @@
!!chain;
# Temporary definitions of Emoji Base and Emoji Modifiers, until properties are available.
$EB = [\u261D\u26F9\u270A-\u270D\U0001F385\U0001F3C3-\U0001F3C4\U0001F3CA-\U0001F3CB\U0001F442-\U0001F443\U0001F446-\U0001F450\U0001F466-\U0001F469\U0001F46E\U0001F470-\U0001F478\U0001F47C\U0001F481-\U0001F483\U0001F485-\U0001F487\U0001F4AA\U0001F575\U0001F590\U0001F595-\U0001F596\U0001F645-\U0001F647\U0001F64B-\U0001F64F\U0001F6A3\U0001F6B4-\U0001F6B6\U0001F6C0\U0001F918];
$EM = [\U0001F3FB-\U0001F3FF];
$AI = [:LineBreak = Ambiguous:];
$AL = [[:LineBreak = Alphabetic:] - [$EM\u2764]];
$AL = [:LineBreak = Alphabetic:];
$BA = [:LineBreak = Break_After:];
$HH = [\u2010]; # \u2010 is HYPHEN, default line break is BA.
$BB = [:LineBreak = Break_Before:];
@ -45,16 +39,19 @@ $B2 = [:LineBreak = Break_Both:];
$CB = [:LineBreak = Contingent_Break:];
$CJ = [:LineBreak = Conditional_Japanese_Starter:];
$CL = [:LineBreak = Close_Punctuation:];
$CM = [[:LineBreak = Combining_Mark:] \u200d];
# $CM = [:LineBreak = Combining_Mark:];
$CP = [:LineBreak = Close_Parenthesis:];
$CR = [:LineBreak = Carriage_Return:];
$EB = [:LineBreak = EB:];
$EM = [:LineBreak = EM:];
$EX = [:LineBreak = Exclamation:];
$GL = [:LineBreak = Glue:];
$HL = [:LineBreak = Hebrew_Letter:];
$HY = [:LineBreak = Hyphen:];
$H2 = [:LineBreak = H2:];
$H3 = [:LineBreak = H3:];
$ID = [[:LineBreak = Ideographic:]$CJ[\u2764] - $EB];
# CSS Loose tailoring: CJ resolves to ID
$ID = [[:LineBreak = Ideographic:] $CJ];
$IN = [:LineBreak = Inseperable:];
$IS = [:LineBreak = Infix_Numeric:];
$JL = [:LineBreak = JL:];
@ -77,23 +74,27 @@ $SY = [:LineBreak = Break_Symbols:];
$WJ = [:LineBreak = Word_Joiner:];
$XX = [:LineBreak = Unknown:];
$ZW = [:LineBreak = ZWSpace:];
$ZWJ = [\u200d];
$ZWJ = [:LineBreak = ZWJ:];
# By LB9, a ZWJ also behaves as a CM. Including it in the definition of CM avoids having to explicitly
# list it in the numerous rules that use CM.
# By LB1, SA characters with general categor of Mn or Mc also resolve to CM.
$CM = [[:LineBreak = Combining_Mark:] $ZWJ [$SA & [[:Mn:][:Mc:]]]];
# Dictionary character set, for triggering language-based break engines. Currently
# limited to LineBreak=Complex_Context. Note that this set only works in Unicode
# 5.0 or later as the definition of Complex_Context was corrected to include all
# characters requiring dictionary break.
# limited to LineBreak=Complex_Context (SA).
$dictionary = [:LineBreak = Complex_Context:];
$dictionary = [$SA];
#
# Rule LB1. By default, treat AI (characters with ambiguous east Asian width),
# SA (South East Asian: Thai, Lao, Khmer)
# SA (Dictionary chars, excluding Mn and Mc)
# SG (Unpaired Surrogates)
# XX (Unknown, unassigned)
# as $AL (Alphabetic)
#
$ALPlus = [$AL $AI $SA $SG $XX];
$ALPlus = [$AL $AI $SG $XX [$SA-[[:Mn:][:Mc:]]]];
## -------------------------------------------------
@ -147,7 +148,7 @@ $CAN_CM $CM* [$SP $ZW];
$LB8Breaks = [$LB4Breaks $ZW];
$LB8NonBreaks = [[$LB4NonBreaks] - [$ZW]];
# LB 8a ZWJ x ID Emoji proposal.
# LB 8a ZWJ x (ID | EB | EM) Emoji ZWJ sequences.
#
$ZWJ ($ID | $EB | $EM);

View file

@ -18,7 +18,7 @@
# not because the older behavior is desirable.
#
# This tailors the line break behavior to correspond to CSS
# line-break=normal (BCP47 -u-lb-normal) as defined for languages other than
# line-break=normal (BCP47 -u-lb-normal) as defined for languages other than
# Chinese & Japanese.
# It sets characters of class CJ to behave like ID.
@ -28,14 +28,8 @@
!!chain;
# Temporary definitions of Emoji Base and Emoji Modifiers, until properties are available.
$EB = [\u261D\u26F9\u270A-\u270D\U0001F385\U0001F3C3-\U0001F3C4\U0001F3CA-\U0001F3CB\U0001F442-\U0001F443\U0001F446-\U0001F450\U0001F466-\U0001F469\U0001F46E\U0001F470-\U0001F478\U0001F47C\U0001F481-\U0001F483\U0001F485-\U0001F487\U0001F4AA\U0001F575\U0001F590\U0001F595-\U0001F596\U0001F645-\U0001F647\U0001F64B-\U0001F64F\U0001F6A3\U0001F6B4-\U0001F6B6\U0001F6C0\U0001F918];
$EM = [\U0001F3FB-\U0001F3FF];
$AI = [:LineBreak = Ambiguous:];
$AL = [[:LineBreak = Alphabetic:] - [$EM\u2764]];
$AL = [:LineBreak = Alphabetic:];
$BA = [:LineBreak = Break_After:];
$BB = [:LineBreak = Break_Before:];
$BK = [:LineBreak = Mandatory_Break:];
@ -43,16 +37,19 @@ $B2 = [:LineBreak = Break_Both:];
$CB = [:LineBreak = Contingent_Break:];
$CJ = [:LineBreak = Conditional_Japanese_Starter:];
$CL = [:LineBreak = Close_Punctuation:];
$CM = [[:LineBreak = Combining_Mark:] \u200d];
# $CM = [:LineBreak = Combining_Mark:];
$CP = [:LineBreak = Close_Parenthesis:];
$CR = [:LineBreak = Carriage_Return:];
$EB = [:LineBreak = EB:];
$EM = [:LineBreak = EM:];
$EX = [:LineBreak = Exclamation:];
$GL = [:LineBreak = Glue:];
$HL = [:LineBreak = Hebrew_Letter:];
$HY = [:LineBreak = Hyphen:];
$H2 = [:LineBreak = H2:];
$H3 = [:LineBreak = H3:];
$ID = [[:LineBreak = Ideographic:] $CJ [\u2764] - $EB];
# CSS Normal tailoring: CJ resolves to ID
$ID = [[:LineBreak = Ideographic:] $CJ];
$IN = [:LineBreak = Inseperable:];
$IS = [:LineBreak = Infix_Numeric:];
$JL = [:LineBreak = JL:];
@ -74,23 +71,27 @@ $SY = [:LineBreak = Break_Symbols:];
$WJ = [:LineBreak = Word_Joiner:];
$XX = [:LineBreak = Unknown:];
$ZW = [:LineBreak = ZWSpace:];
$ZWJ = [\u200d];
$ZWJ = [:LineBreak = ZWJ:];
# By LB9, a ZWJ also behaves as a CM. Including it in the definition of CM avoids having to explicitly
# list it in the numerous rules that use CM.
# By LB1, SA characters with general categor of Mn or Mc also resolve to CM.
$CM = [[:LineBreak = Combining_Mark:] $ZWJ [$SA & [[:Mn:][:Mc:]]]];
# Dictionary character set, for triggering language-based break engines. Currently
# limited to LineBreak=Complex_Context. Note that this set only works in Unicode
# 5.0 or later as the definition of Complex_Context was corrected to include all
# characters requiring dictionary break.
# limited to LineBreak=Complex_Context (SA).
$dictionary = [:LineBreak = Complex_Context:];
$dictionary = [$SA];
#
# Rule LB1. By default, treat AI (characters with ambiguous east Asian width),
# SA (South East Asian: Thai, Lao, Khmer)
# SA (Dictionary chars, excluding Mn and Mc)
# SG (Unpaired Surrogates)
# XX (Unknown, unassigned)
# as $AL (Alphabetic)
#
$ALPlus = [$AL $AI $SA $SG $XX];
$ALPlus = [$AL $AI $SG $XX [$SA-[[:Mn:][:Mc:]]]];
## -------------------------------------------------
@ -144,7 +145,7 @@ $CAN_CM $CM* [$SP $ZW];
$LB8Breaks = [$LB4Breaks $ZW];
$LB8NonBreaks = [[$LB4NonBreaks] - [$ZW]];
# LB 8a ZWJ x ID Emoji proposal.
# LB 8a ZWJ x (ID | EB | EM) Emoji ZWJ sequences.
#
$ZWJ ($ID | $EB | $EM);
@ -291,7 +292,7 @@ $PR $CM* ($ID | $EB | $EM);
#
# LB 25 Numbers.
#
(($PR | $PO) $CM*)? (($OP | $HY) $CM*)? $NU ($CM* ($NU | $SY | $IS))*
(($PR | $PO) $CM*)? (($OP | $HY) $CM*)? $NU ($CM* ($NU | $SY | $IS))*
($CM* ($CL | $CP))? ($CM* ($PR | $PO))?;
# LB 26 Do not break a Korean syllable

View file

@ -29,14 +29,8 @@
!!chain;
# Temporary definitions of Emoji Base and Emoji Modifiers, until properties are available.
$EB = [\u261D\u26F9\u270A-\u270D\U0001F385\U0001F3C3-\U0001F3C4\U0001F3CA-\U0001F3CB\U0001F442-\U0001F443\U0001F446-\U0001F450\U0001F466-\U0001F469\U0001F46E\U0001F470-\U0001F478\U0001F47C\U0001F481-\U0001F483\U0001F485-\U0001F487\U0001F4AA\U0001F575\U0001F590\U0001F595-\U0001F596\U0001F645-\U0001F647\U0001F64B-\U0001F64F\U0001F6A3\U0001F6B4-\U0001F6B6\U0001F6C0\U0001F918];
$EM = [\U0001F3FB-\U0001F3FF];
$AI = [:LineBreak = Ambiguous:];
$AL = [[:LineBreak = Alphabetic:] - [$EM\u2764]];
$AL = [:LineBreak = Alphabetic:];
$BAX = [\u2010 \u2013];
$BA = [[:LineBreak = Break_After:] - $BAX];
$BB = [:LineBreak = Break_Before:];
@ -45,16 +39,19 @@ $B2 = [:LineBreak = Break_Both:];
$CB = [:LineBreak = Contingent_Break:];
$CJ = [:LineBreak = Conditional_Japanese_Starter:];
$CL = [:LineBreak = Close_Punctuation:];
$CM = [[:LineBreak = Combining_Mark:] \u200d];
# $CM = [:LineBreak = Combining_Mark:];
$CP = [:LineBreak = Close_Parenthesis:];
$CR = [:LineBreak = Carriage_Return:];
$EB = [:LineBreak = EB:];
$EM = [:LineBreak = EM:];
$EX = [:LineBreak = Exclamation:];
$GL = [:LineBreak = Glue:];
$HL = [:LineBreak = Hebrew_Letter:];
$HY = [:LineBreak = Hyphen:];
$H2 = [:LineBreak = H2:];
$H3 = [:LineBreak = H3:];
$ID = [[:LineBreak = Ideographic:] $CJ [\u2764] - $EB];
# CSS Normal tailoring: CJ resolves to ID
$ID = [[:LineBreak = Ideographic:] $CJ];
$IN = [:LineBreak = Inseperable:];
$IS = [:LineBreak = Infix_Numeric:];
$JL = [:LineBreak = JL:];
@ -77,23 +74,27 @@ $SY = [:LineBreak = Break_Symbols:];
$WJ = [:LineBreak = Word_Joiner:];
$XX = [:LineBreak = Unknown:];
$ZW = [:LineBreak = ZWSpace:];
$ZWJ = [\u200d];
$ZWJ = [:LineBreak = ZWJ:];
# By LB9, a ZWJ also behaves as a CM. Including it in the definition of CM avoids having to explicitly
# list it in the numerous rules that use CM.
# By LB1, SA characters with general categor of Mn or Mc also resolve to CM.
$CM = [[:LineBreak = Combining_Mark:] $ZWJ [$SA & [[:Mn:][:Mc:]]]];
# Dictionary character set, for triggering language-based break engines. Currently
# limited to LineBreak=Complex_Context. Note that this set only works in Unicode
# 5.0 or later as the definition of Complex_Context was corrected to include all
# characters requiring dictionary break.
# limited to LineBreak=Complex_Context (SA).
$dictionary = [:LineBreak = Complex_Context:];
$dictionary = [$SA];
#
# Rule LB1. By default, treat AI (characters with ambiguous east Asian width),
# SA (South East Asian: Thai, Lao, Khmer)
# SA (Dictionary chars, excluding Mn and Mc)
# SG (Unpaired Surrogates)
# XX (Unknown, unassigned)
# as $AL (Alphabetic)
#
$ALPlus = [$AL $AI $SA $SG $XX];
$ALPlus = [$AL $AI $SG $XX [$SA-[[:Mn:][:Mc:]]]];
## -------------------------------------------------
@ -147,7 +148,7 @@ $CAN_CM $CM* [$SP $ZW];
$LB8Breaks = [$LB4Breaks $ZW];
$LB8NonBreaks = [[$LB4NonBreaks] - [$ZW]];
# LB 8a ZWJ x ID Emoji proposal.
# LB 8a ZWJ x (ID | EB | EM) Emoji ZWJ sequences.
#
$ZWJ ($ID | $EB | $EM);
@ -297,7 +298,7 @@ $PR $CM* ($ID | $EB | $EM);
#
# LB 25 Numbers.
#
(($PR | $PO) $CM*)? (($OP | $HY) $CM*)? $NU ($CM* ($NU | $SY | $IS))*
(($PR | $PO) $CM*)? (($OP | $HY) $CM*)? $NU ($CM* ($NU | $SY | $IS))*
($CM* ($CL | $CP))? ($CM* ($PR | $PO))?;
# LB 26 Do not break a Korean syllable

View file

@ -19,7 +19,7 @@
# not because the older behavior is desirable.
#
# This tailors the line break behavior for Finnish, and to correspond to CSS
# line-break=normal (BCP47 -u-lb-normal) as defined for languages other than
# line-break=normal (BCP47 -u-lb-normal) as defined for languages other than
# Chinese & Japanese.
# It sets characters of class CJ to behave like ID.
@ -29,14 +29,8 @@
!!chain;
# Temporary definitions of Emoji Base and Emoji Modifiers, until properties are available.
$EB = [\u261D\u26F9\u270A-\u270D\U0001F385\U0001F3C3-\U0001F3C4\U0001F3CA-\U0001F3CB\U0001F442-\U0001F443\U0001F446-\U0001F450\U0001F466-\U0001F469\U0001F46E\U0001F470-\U0001F478\U0001F47C\U0001F481-\U0001F483\U0001F485-\U0001F487\U0001F4AA\U0001F575\U0001F590\U0001F595-\U0001F596\U0001F645-\U0001F647\U0001F64B-\U0001F64F\U0001F6A3\U0001F6B4-\U0001F6B6\U0001F6C0\U0001F918];
$EM = [\U0001F3FB-\U0001F3FF];
$AI = [:LineBreak = Ambiguous:];
$AL = [[:LineBreak = Alphabetic:] - [$EM\u2764]];
$AL = [:LineBreak = Alphabetic:];
$BA = [:LineBreak = Break_After:];
$HH = [\u2010]; # \u2010 is HYPHEN, default line break is BA.
$BB = [:LineBreak = Break_Before:];
@ -45,16 +39,19 @@ $B2 = [:LineBreak = Break_Both:];
$CB = [:LineBreak = Contingent_Break:];
$CJ = [:LineBreak = Conditional_Japanese_Starter:];
$CL = [:LineBreak = Close_Punctuation:];
$CM = [[:LineBreak = Combining_Mark:] \u200d];
# $CM = [:LineBreak = Combining_Mark:];
$CP = [:LineBreak = Close_Parenthesis:];
$CR = [:LineBreak = Carriage_Return:];
$EB = [:LineBreak = EB:];
$EM = [:LineBreak = EM:];
$EX = [:LineBreak = Exclamation:];
$GL = [:LineBreak = Glue:];
$HL = [:LineBreak = Hebrew_Letter:];
$HY = [:LineBreak = Hyphen:];
$H2 = [:LineBreak = H2:];
$H3 = [:LineBreak = H3:];
$ID = [[:LineBreak = Ideographic:] $CJ [\u2764] - $EB];
# CSS Normal tailoring: CJ resolves to ID
$ID = [[:LineBreak = Ideographic:] $CJ];
$IN = [:LineBreak = Inseperable:];
$IS = [:LineBreak = Infix_Numeric:];
$JL = [:LineBreak = JL:];
@ -76,23 +73,27 @@ $SY = [:LineBreak = Break_Symbols:];
$WJ = [:LineBreak = Word_Joiner:];
$XX = [:LineBreak = Unknown:];
$ZW = [:LineBreak = ZWSpace:];
$ZWJ = [\u200d];
$ZWJ = [:LineBreak = ZWJ:];
# By LB9, a ZWJ also behaves as a CM. Including it in the definition of CM avoids having to explicitly
# list it in the numerous rules that use CM.
# By LB1, SA characters with general categor of Mn or Mc also resolve to CM.
$CM = [[:LineBreak = Combining_Mark:] $ZWJ [$SA & [[:Mn:][:Mc:]]]];
# Dictionary character set, for triggering language-based break engines. Currently
# limited to LineBreak=Complex_Context. Note that this set only works in Unicode
# 5.0 or later as the definition of Complex_Context was corrected to include all
# characters requiring dictionary break.
# limited to LineBreak=Complex_Context (SA).
$dictionary = [:LineBreak = Complex_Context:];
$dictionary = [$SA];
#
# Rule LB1. By default, treat AI (characters with ambiguous east Asian width),
# SA (South East Asian: Thai, Lao, Khmer)
# SA (Dictionary chars, excluding Mn and Mc)
# SG (Unpaired Surrogates)
# XX (Unknown, unassigned)
# as $AL (Alphabetic)
#
$ALPlus = [$AL $AI $SA $SG $XX];
$ALPlus = [$AL $AI $SG $XX [$SA-[[:Mn:][:Mc:]]]];
## -------------------------------------------------
@ -146,7 +147,7 @@ $CAN_CM $CM* [$SP $ZW];
$LB8Breaks = [$LB4Breaks $ZW];
$LB8NonBreaks = [[$LB4NonBreaks] - [$ZW]];
# LB 8a ZWJ x ID Emoji proposal.
# LB 8a ZWJ x (ID | EB | EM) Emoji ZWJ sequences.
#
$ZWJ ($ID | $EB | $EM);
@ -296,7 +297,7 @@ $PR $CM* ($ID | $EB | $EM);
#
# LB 25 Numbers.
#
(($PR | $PO) $CM*)? (($OP | $HY) $CM*)? $NU ($CM* ($NU | $SY | $IS))*
(($PR | $PO) $CM*)? (($OP | $HY) $CM*)? $NU ($CM* ($NU | $SY | $IS))*
($CM* ($CL | $CP))? ($CM* ($PR | $PO))?;
# LB 26 Do not break a Korean syllable

View file

@ -6,8 +6,7 @@
#
# ICU Word Break Rules
# See Unicode Standard Annex #29.
# These rules are based on UAX #29 Revision 27 for Unicode Version 8.0
# with additions from L2/16-011R3 for Emoji sequences.
# These rules are based on UAX #29 Revision 28 (draft 7) for Unicode Version 9.0
#
# Note: Updates to word.txt will usually need to be merged into
# word_POSIX.txt also.
@ -25,17 +24,13 @@
# Character Class Definitions.
#
$E_Base = [\u261D\u26F9\u270A-\u270D\U0001F385\U0001F3C3-\U0001F3C4\U0001F3CA-\U0001F3CB\U0001F442-\U0001F443\U0001F446-\U0001F450\U0001F466-\U0001F469\U0001F46E\U0001F470-\U0001F478\U0001F47C\U0001F481-\U0001F483\U0001F485-\U0001F487\U0001F4AA\U0001F575\U0001F590\U0001F595-\U0001F596\U0001F645-\U0001F647\U0001F64B-\U0001F64F\U0001F6A3\U0001F6B4-\U0001F6B6\U0001F6C0\U0001F918];
$E_Modifier = [\U0001F3FB-\U0001F3FF];
$ZWJ = [\u200D];
$GAZ = [\U0001F466-\U0001F469\U0001F48B\U0001F5E8\u2764];
$CR = [\p{Word_Break = CR}];
$LF = [\p{Word_Break = LF}];
$Newline = [\p{Word_Break = Newline} ];
$Extend = [[\p{Word_Break = Extend}][:Block=Tags:]];
$Extend = [\p{Word_Break = Extend}];
$ZWJ = [\p{Word_Break = ZWJ}];
$Regional_Indicator = [\p{Word_Break = Regional_Indicator}];
$Format = [[\p{Word_Break = Format}] - [:Block=Tags:]];
$Format = [\p{Word_Break = Format}];
$Katakana = [\p{Word_Break = Katakana}];
$Hebrew_Letter = [\p{Word_Break = Hebrew_Letter}];
$ALetter = [\p{Word_Break = ALetter}];
@ -46,6 +41,10 @@ $MidLetter = [\p{Word_Break = MidLetter}];
$MidNum = [\p{Word_Break = MidNum}];
$Numeric = [\p{Word_Break = Numeric}];
$ExtendNumLet = [\p{Word_Break = ExtendNumLet}];
$E_Base = [\p{Word_Break = EB}];
$E_Modifier = [\p{Word_Break = EM}];
$GAZ = [\p{Word_Break = GAZ}];
$EBG = [\p{Word_Break = EBG}];
$Han = [:Han:];
$Hiragana = [:Hiragana:];
@ -99,7 +98,7 @@ $CR $LF;
# Rule 3c ZWJ x GAZ. Preceeds WB4, so no intervening Extend chars allowed.
#
$ZWJ $GAZ;
$ZWJ ($GAZ | $EBG);
# Rule 4 - ignore Format and Extend characters, except when they appear at the beginning
@ -171,7 +170,12 @@ $ExtendNumLetEx $Hebrew_Letter {200}; # (13b)
$ExtendNumLetEx $NumericEx {100}; # (13b)
$ExtendNumLetEx $KatakanaEx {400}; # (13b)
# rule 13c
# rule 14
# Do not break within emoji modifier sequences
($E_Base | $EBG) ($Format | $Extend | $ZWJ)* $E_Modifier;
# rules 15 - 17
# Pairs of Regional Indicators stay together.
# With rule chaining disabled by ^, this rule will match exactly two of them.
# No other rule begins with a Regional_Indicator, so chaining cannot extend the match.
@ -182,11 +186,6 @@ $ExtendNumLetEx $KatakanaEx {400}; # (13b)
$HangulSyllable $HangulSyllable {200};
$KanaKanji $KanaKanji {400}; # different rule status if both kana and kanji found
# rule 13d
# E_Base x E_Modifier
#
($E_Base | $GAZ) ($Format | $Extend | $ZWJ)* $E_Modifier;
## -------------------------------------------------
@ -210,7 +209,7 @@ $LF $CR;
# Rule 3c ZWJ x GAZ. Preceeds WB4, so no intervening Extend chars allowed.
#
$GAZ $ZWJ;
($GAZ | $EBG) $ZWJ;
# rule 4
($Format | $Extend | $ZWJ)* [^$CR $LF $Newline]?;
@ -254,26 +253,27 @@ $BackKatakanaEx $BackKatakanaEx;
$BackExtendNumLetEx ($BackALetterEx | $BackHebrew_LetterEx | $BackNumericEx | $BackKatakanaEx | $BackExtendNumLetEx);
($BackALetterEx | $BackHebrew_LetterEx | $BackNumericEx | $BackKatakanaEx) $BackExtendNumLetEx;
# rule 13c
# special handling for CJK characters: chain for later dictionary segmentation
$HangulSyllable $HangulSyllable;
$KanaKanji $KanaKanji; #different rule status if both kanji and kana found
# rule 14
$E_Modifier ($Format | $Extend | $ZWJ)* ($E_Base | $EBG);
# rule 15 - 17
# Pairs of Regional Indicators stay together.
^$BackRegional_IndicatorEx / ($BackRegional_IndicatorEx $BackRegional_IndicatorEx)*
($Format | $Extend | $ZWJ)* [[^$Regional_Indicator $Format $Extend $ZWJ] {eof}];
^$BackRegional_IndicatorEx $BackRegional_IndicatorEx / ($BackRegional_IndicatorEx $BackRegional_IndicatorEx)*
($Format | $Extend | $ZWJ)* [[^$Regional_Indicator $Format $Extend $ZWJ] {eof}];
$GAZ $ZWJ $BackRegional_IndicatorEx / ($BackRegional_IndicatorEx $BackRegional_IndicatorEx)*
($GAZ | $EBG) $ZWJ $BackRegional_IndicatorEx / ($BackRegional_IndicatorEx $BackRegional_IndicatorEx)*
($Format | $Extend | $ZWJ)* [[^$Regional_Indicator $Format $Extend $ZWJ] {eof}];
$GAZ $ZWJ $BackRegional_IndicatorEx $BackRegional_IndicatorEx / ($BackRegional_IndicatorEx $BackRegional_IndicatorEx)*
($GAZ | $EBG) $ZWJ $BackRegional_IndicatorEx $BackRegional_IndicatorEx / ($BackRegional_IndicatorEx $BackRegional_IndicatorEx)*
($Format | $Extend | $ZWJ)* [[^$Regional_Indicator $Format $Extend $ZWJ] {eof}];
# special handling for CJK characters: chain for later dictionary segmentation
$HangulSyllable $HangulSyllable;
$KanaKanji $KanaKanji; #different rule status if both kanji and kana found
# rule 13d
$E_Modifier ($Format | $Extend | $ZWJ)* ($E_Base | $GAZ);
## -------------------------------------------------

View file

@ -1,13 +1,12 @@
#
# Copyright (C) 2002-2016, International Business Machines Corporation
# Copyright (C) 2002-2016, International Business Machines Corporation
# and others. All Rights Reserved.
#
# file: word_POSIX.txt
#
# ICU Word Break Rules, POSIX locale.
# See Unicode Standard Annex #29.
# These rules are based on UAX #29 Revision 27 for Unicode Version 8.0
# with additions from L2/16-011R3 for Emoji sequences.
# These rules are based on UAX #29 Revision 28 (draft 7) for Unicode Version 9.0
#
# Note: Updates to word.txt will usually need to be merged into
# word_POSIX.txt also.
@ -25,17 +24,13 @@
# Character Class Definitions.
#
$E_Base = [\u261D\u26F9\u270A-\u270D\U0001F385\U0001F3C3-\U0001F3C4\U0001F3CA-\U0001F3CB\U0001F442-\U0001F443\U0001F446-\U0001F450\U0001F466-\U0001F469\U0001F46E\U0001F470-\U0001F478\U0001F47C\U0001F481-\U0001F483\U0001F485-\U0001F487\U0001F4AA\U0001F575\U0001F590\U0001F595-\U0001F596\U0001F645-\U0001F647\U0001F64B-\U0001F64F\U0001F6A3\U0001F6B4-\U0001F6B6\U0001F6C0\U0001F918];
$E_Modifier = [\U0001F3FB-\U0001F3FF];
$ZWJ = [\u200D];
$GAZ = [\U0001F466-\U0001F469\U0001F48B\U0001F5E8\u2764];
$CR = [\p{Word_Break = CR}];
$LF = [\p{Word_Break = LF}];
$Newline = [\p{Word_Break = Newline} ];
$Extend = [[\p{Word_Break = Extend}][:Block=Tags:]];
$Extend = [\p{Word_Break = Extend}];
$ZWJ = [\p{Word_Break = ZWJ}];
$Regional_Indicator = [\p{Word_Break = Regional_Indicator}];
$Format = [[\p{Word_Break = Format}] - [:Block=Tags:]];
$Format = [\p{Word_Break = Format}];
$Katakana = [\p{Word_Break = Katakana}];
$Hebrew_Letter = [\p{Word_Break = Hebrew_Letter}];
$ALetter = [\p{Word_Break = ALetter}];
@ -46,6 +41,10 @@ $MidLetter = [\p{Word_Break = MidLetter} - [\:]];
$MidNum = [\p{Word_Break = MidNum} [.]];
$Numeric = [\p{Word_Break = Numeric}];
$ExtendNumLet = [\p{Word_Break = ExtendNumLet}];
$E_Base = [\p{Word_Break = EB}];
$E_Modifier = [\p{Word_Break = EM}];
$GAZ = [\p{Word_Break = GAZ}];
$EBG = [\p{Word_Break = EBG}];
$Han = [:Han:];
$Hiragana = [:Hiragana:];
@ -56,7 +55,7 @@ $Hiragana = [:Hiragana:];
# 5.0 or later as the definition of Complex_Context was corrected to include all
# characters requiring dictionary break.
$Control = [\p{Grapheme_Cluster_Break = Control}];
$Control = [\p{Grapheme_Cluster_Break = Control}];
$HangulSyllable = [\uac00-\ud7a3];
$ComplexContext = [:LineBreak = Complex_Context:];
$KanaKanji = [$Han $Hiragana $Katakana];
@ -68,7 +67,7 @@ $ALetterPlus = [$ALetter-$dictionaryCJK [$ComplexContext-$Extend-$Control]];
#
# Rules 4 Ignore Format and Extend characters,
# Rules 4 Ignore Format and Extend characters,
# except when they appear at the beginning of a region of text.
#
# TODO: check if handling of katakana in dictionary makes rules incorrect/void
@ -99,7 +98,7 @@ $CR $LF;
# Rule 3c ZWJ x GAZ. Preceeds WB4, so no intervening Extend chars allowed.
#
$ZWJ $GAZ;
$ZWJ ($GAZ | $EBG);
# Rule 4 - ignore Format and Extend characters, except when they appear at the beginning
@ -148,7 +147,7 @@ $NumericEx $NumericEx {100};
$NumericEx ($ALetterEx | $Hebrew_LetterEx) {200};
# rule 11 and 12
# rule 11 and 12
$NumericEx ($MidNumEx | $MidNumLetEx | $Single_QuoteEx) $NumericEx {100};
@ -171,7 +170,12 @@ $ExtendNumLetEx $Hebrew_Letter {200}; # (13b)
$ExtendNumLetEx $NumericEx {100}; # (13b)
$ExtendNumLetEx $KatakanaEx {400}; # (13b)
# rule 13c
# rule 14
# Do not break within emoji modifier sequences
($E_Base | $EBG) ($Format | $Extend | $ZWJ)* $E_Modifier;
# rules 15 - 17
# Pairs of Regional Indicators stay together.
# With rule chaining disabled by ^, this rule will match exactly two of them.
# No other rule begins with a Regional_Indicator, so chaining cannot extend the match.
@ -180,12 +184,7 @@ $ExtendNumLetEx $KatakanaEx {400}; # (13b)
# special handling for CJK characters: chain for later dictionary segmentation
$HangulSyllable $HangulSyllable {200};
$KanaKanji $KanaKanji {400}; # different rule status if both kana and kanji found
# rule 13d
# E_Base x E_Modifier
#
($E_Base | $GAZ) ($Format | $Extend | $ZWJ)* $E_Modifier;
$KanaKanji $KanaKanji {400}; # different rule status if both kana and kanji found
## -------------------------------------------------
@ -210,7 +209,7 @@ $LF $CR;
# Rule 3c ZWJ x GAZ. Preceeds WB4, so no intervening Extend chars allowed.
#
$GAZ $ZWJ;
($GAZ | $EBG) $ZWJ;
# rule 4
($Format | $Extend | $ZWJ)* [^$CR $LF $Newline]?;
@ -252,27 +251,28 @@ $BackKatakanaEx $BackKatakanaEx;
# rules 13 a/b
#
$BackExtendNumLetEx ($BackALetterEx | $BackHebrew_LetterEx | $BackNumericEx | $BackKatakanaEx | $BackExtendNumLetEx);
($BackALetterEx | $BackHebrew_LetterEx | $BackNumericEx | $BackKatakanaEx) $BackExtendNumLetEx;
# rule 13c
^$BackRegional_IndicatorEx / ($BackRegional_IndicatorEx $BackRegional_IndicatorEx)*
($Format | $Extend | $ZWJ)* [[^$Regional_Indicator $Format $Extend $ZWJ] {eof}];
^$BackRegional_IndicatorEx $BackRegional_IndicatorEx / ($BackRegional_IndicatorEx $BackRegional_IndicatorEx)*
($Format | $Extend | $ZWJ)* [[^$Regional_Indicator $Format $Extend $ZWJ] {eof}];
$GAZ $ZWJ $BackRegional_IndicatorEx / ($BackRegional_IndicatorEx $BackRegional_IndicatorEx)*
($Format | $Extend | $ZWJ)* [[^$Regional_Indicator $Format $Extend $ZWJ] {eof}];
$GAZ $ZWJ $BackRegional_IndicatorEx $BackRegional_IndicatorEx / ($BackRegional_IndicatorEx $BackRegional_IndicatorEx)*
($Format | $Extend | $ZWJ)* [[^$Regional_Indicator $Format $Extend $ZWJ] {eof}];
($BackALetterEx | $BackHebrew_LetterEx | $BackNumericEx | $BackKatakanaEx) $BackExtendNumLetEx;
# special handling for CJK characters: chain for later dictionary segmentation
$HangulSyllable $HangulSyllable;
$KanaKanji $KanaKanji; #different rule status if both kanji and kana found
# rule 13d
# rule 14
$E_Modifier ($Format | $Extend | $ZWJ)* ($E_Base | $GAZ);
$E_Modifier ($Format | $Extend | $ZWJ)* ($E_Base | $EBG);
# rule 15 - 17
# Pairs of Regional Indicators stay together.
^$BackRegional_IndicatorEx / ($BackRegional_IndicatorEx $BackRegional_IndicatorEx)*
($Format | $Extend | $ZWJ)* [[^$Regional_Indicator $Format $Extend $ZWJ] {eof}];
^$BackRegional_IndicatorEx $BackRegional_IndicatorEx / ($BackRegional_IndicatorEx $BackRegional_IndicatorEx)*
($Format | $Extend | $ZWJ)* [[^$Regional_Indicator $Format $Extend $ZWJ] {eof}];
($GAZ | $EBG) $ZWJ $BackRegional_IndicatorEx / ($BackRegional_IndicatorEx $BackRegional_IndicatorEx)*
($Format | $Extend | $ZWJ)* [[^$Regional_Indicator $Format $Extend $ZWJ] {eof}];
($GAZ | $EBG) $ZWJ $BackRegional_IndicatorEx $BackRegional_IndicatorEx / ($BackRegional_IndicatorEx $BackRegional_IndicatorEx)*
($Format | $Extend | $ZWJ)* [[^$Regional_Indicator $Format $Extend $ZWJ] {eof}];

File diff suppressed because one or more lines are too long

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View file

@ -13,7 +13,7 @@
U_ICUDATA_NAME=icudt58
##############################################################################
U_ICUDATA_ENDIAN_SUFFIX=l
UNICODE_VERSION=8.0
UNICODE_VERSION=9.0
ICU_LIB_TARGET=$(DLL_OUTPUT)\$(U_ICUDATA_NAME).dll
# ICUMAKE

View file

@ -1,10 +1,11 @@
# CaseFolding-8.0.0.txt
# Date: 2015-01-13, 18:16:36 GMT [MD]
# CaseFolding-9.0.0.txt
# Date: 2016-03-02, 18:54:54 GMT
# © 2016 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see http://www.unicode.org/terms_of_use.html
#
# Unicode Character Database
# Copyright (c) 1991-2015 Unicode, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html
# For documentation, see http://www.unicode.org/reports/tr44/
# For documentation, see http://www.unicode.org/reports/tr44/
#
# Case Folding Properties
#
@ -593,6 +594,15 @@
13FB; C; 13F3; # CHEROKEE SMALL LETTER YU
13FC; C; 13F4; # CHEROKEE SMALL LETTER YV
13FD; C; 13F5; # CHEROKEE SMALL LETTER MV
1C80; C; 0432; # CYRILLIC SMALL LETTER ROUNDED VE
1C81; C; 0434; # CYRILLIC SMALL LETTER LONG-LEGGED DE
1C82; C; 043E; # CYRILLIC SMALL LETTER NARROW O
1C83; C; 0441; # CYRILLIC SMALL LETTER WIDE ES
1C84; C; 0442; # CYRILLIC SMALL LETTER TALL TE
1C85; C; 0442; # CYRILLIC SMALL LETTER THREE-LEGGED TE
1C86; C; 044A; # CYRILLIC SMALL LETTER TALL HARD SIGN
1C87; C; 0463; # CYRILLIC SMALL LETTER TALL YAT
1C88; C; A64B; # CYRILLIC SMALL LETTER UNBLENDED UK
1E00; C; 1E01; # LATIN CAPITAL LETTER A WITH RING BELOW
1E02; C; 1E03; # LATIN CAPITAL LETTER B WITH DOT ABOVE
1E04; C; 1E05; # LATIN CAPITAL LETTER B WITH DOT BELOW
@ -1163,6 +1173,7 @@ A7AA; C; 0266; # LATIN CAPITAL LETTER H WITH HOOK
A7AB; C; 025C; # LATIN CAPITAL LETTER REVERSED OPEN E
A7AC; C; 0261; # LATIN CAPITAL LETTER SCRIPT G
A7AD; C; 026C; # LATIN CAPITAL LETTER L WITH BELT
A7AE; C; 026A; # LATIN CAPITAL LETTER SMALL CAPITAL I
A7B0; C; 029E; # LATIN CAPITAL LETTER TURNED K
A7B1; C; 0287; # LATIN CAPITAL LETTER TURNED T
A7B2; C; 029D; # LATIN CAPITAL LETTER J WITH CROSSED-TAIL
@ -1327,6 +1338,42 @@ FF3A; C; FF5A; # FULLWIDTH LATIN CAPITAL LETTER Z
10425; C; 1044D; # DESERET CAPITAL LETTER ENG
10426; C; 1044E; # DESERET CAPITAL LETTER OI
10427; C; 1044F; # DESERET CAPITAL LETTER EW
104B0; C; 104D8; # OSAGE CAPITAL LETTER A
104B1; C; 104D9; # OSAGE CAPITAL LETTER AI
104B2; C; 104DA; # OSAGE CAPITAL LETTER AIN
104B3; C; 104DB; # OSAGE CAPITAL LETTER AH
104B4; C; 104DC; # OSAGE CAPITAL LETTER BRA
104B5; C; 104DD; # OSAGE CAPITAL LETTER CHA
104B6; C; 104DE; # OSAGE CAPITAL LETTER EHCHA
104B7; C; 104DF; # OSAGE CAPITAL LETTER E
104B8; C; 104E0; # OSAGE CAPITAL LETTER EIN
104B9; C; 104E1; # OSAGE CAPITAL LETTER HA
104BA; C; 104E2; # OSAGE CAPITAL LETTER HYA
104BB; C; 104E3; # OSAGE CAPITAL LETTER I
104BC; C; 104E4; # OSAGE CAPITAL LETTER KA
104BD; C; 104E5; # OSAGE CAPITAL LETTER EHKA
104BE; C; 104E6; # OSAGE CAPITAL LETTER KYA
104BF; C; 104E7; # OSAGE CAPITAL LETTER LA
104C0; C; 104E8; # OSAGE CAPITAL LETTER MA
104C1; C; 104E9; # OSAGE CAPITAL LETTER NA
104C2; C; 104EA; # OSAGE CAPITAL LETTER O
104C3; C; 104EB; # OSAGE CAPITAL LETTER OIN
104C4; C; 104EC; # OSAGE CAPITAL LETTER PA
104C5; C; 104ED; # OSAGE CAPITAL LETTER EHPA
104C6; C; 104EE; # OSAGE CAPITAL LETTER SA
104C7; C; 104EF; # OSAGE CAPITAL LETTER SHA
104C8; C; 104F0; # OSAGE CAPITAL LETTER TA
104C9; C; 104F1; # OSAGE CAPITAL LETTER EHTA
104CA; C; 104F2; # OSAGE CAPITAL LETTER TSA
104CB; C; 104F3; # OSAGE CAPITAL LETTER EHTSA
104CC; C; 104F4; # OSAGE CAPITAL LETTER TSHA
104CD; C; 104F5; # OSAGE CAPITAL LETTER DHA
104CE; C; 104F6; # OSAGE CAPITAL LETTER U
104CF; C; 104F7; # OSAGE CAPITAL LETTER WA
104D0; C; 104F8; # OSAGE CAPITAL LETTER KHA
104D1; C; 104F9; # OSAGE CAPITAL LETTER GHA
104D2; C; 104FA; # OSAGE CAPITAL LETTER ZA
104D3; C; 104FB; # OSAGE CAPITAL LETTER ZHA
10C80; C; 10CC0; # OLD HUNGARIAN CAPITAL LETTER A
10C81; C; 10CC1; # OLD HUNGARIAN CAPITAL LETTER AA
10C82; C; 10CC2; # OLD HUNGARIAN CAPITAL LETTER EB
@ -1410,5 +1457,39 @@ FF3A; C; FF5A; # FULLWIDTH LATIN CAPITAL LETTER Z
118BD; C; 118DD; # WARANG CITI CAPITAL LETTER SSUU
118BE; C; 118DE; # WARANG CITI CAPITAL LETTER SII
118BF; C; 118DF; # WARANG CITI CAPITAL LETTER VIYO
1E900; C; 1E922; # ADLAM CAPITAL LETTER ALIF
1E901; C; 1E923; # ADLAM CAPITAL LETTER DAALI
1E902; C; 1E924; # ADLAM CAPITAL LETTER LAAM
1E903; C; 1E925; # ADLAM CAPITAL LETTER MIIM
1E904; C; 1E926; # ADLAM CAPITAL LETTER BA
1E905; C; 1E927; # ADLAM CAPITAL LETTER SINNYIIYHE
1E906; C; 1E928; # ADLAM CAPITAL LETTER PE
1E907; C; 1E929; # ADLAM CAPITAL LETTER BHE
1E908; C; 1E92A; # ADLAM CAPITAL LETTER RA
1E909; C; 1E92B; # ADLAM CAPITAL LETTER E
1E90A; C; 1E92C; # ADLAM CAPITAL LETTER FA
1E90B; C; 1E92D; # ADLAM CAPITAL LETTER I
1E90C; C; 1E92E; # ADLAM CAPITAL LETTER O
1E90D; C; 1E92F; # ADLAM CAPITAL LETTER DHA
1E90E; C; 1E930; # ADLAM CAPITAL LETTER YHE
1E90F; C; 1E931; # ADLAM CAPITAL LETTER WAW
1E910; C; 1E932; # ADLAM CAPITAL LETTER NUN
1E911; C; 1E933; # ADLAM CAPITAL LETTER KAF
1E912; C; 1E934; # ADLAM CAPITAL LETTER YA
1E913; C; 1E935; # ADLAM CAPITAL LETTER U
1E914; C; 1E936; # ADLAM CAPITAL LETTER JIIM
1E915; C; 1E937; # ADLAM CAPITAL LETTER CHI
1E916; C; 1E938; # ADLAM CAPITAL LETTER HA
1E917; C; 1E939; # ADLAM CAPITAL LETTER QAAF
1E918; C; 1E93A; # ADLAM CAPITAL LETTER GA
1E919; C; 1E93B; # ADLAM CAPITAL LETTER NYA
1E91A; C; 1E93C; # ADLAM CAPITAL LETTER TU
1E91B; C; 1E93D; # ADLAM CAPITAL LETTER NHA
1E91C; C; 1E93E; # ADLAM CAPITAL LETTER VA
1E91D; C; 1E93F; # ADLAM CAPITAL LETTER KHA
1E91E; C; 1E940; # ADLAM CAPITAL LETTER GBE
1E91F; C; 1E941; # ADLAM CAPITAL LETTER ZAL
1E920; C; 1E942; # ADLAM CAPITAL LETTER KPO
1E921; C; 1E943; # ADLAM CAPITAL LETTER SHA
#
# EOF

File diff suppressed because it is too large Load diff

View file

@ -1,10 +1,11 @@
# DerivedNormalizationProps-8.0.0.txt
# Date: 2015-02-13, 13:30:23 GMT [MD]
# DerivedNormalizationProps-9.0.0.txt
# Date: 2016-03-02, 18:54:59 GMT
# © 2016 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see http://www.unicode.org/terms_of_use.html
#
# Unicode Character Database
# Copyright (c) 1991-2015 Unicode, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html
# For documentation, see http://www.unicode.org/reports/tr44/
# For documentation, see http://www.unicode.org/reports/tr44/
# ================================================
@ -1679,12 +1680,12 @@ FFED..FFEE ; NFKD_QC; N
1F16A..1F16B ; NFKD_QC; N
1F190 ; NFKD_QC; N
1F200..1F202 ; NFKD_QC; N
1F210..1F23A ; NFKD_QC; N
1F210..1F23B ; NFKD_QC; N
1F240..1F248 ; NFKD_QC; N
1F250..1F251 ; NFKD_QC; N
2F800..2FA1D ; NFKD_QC; N
# Total code points: 16893
# Total code points: 16894
# ================================================
@ -2082,12 +2083,12 @@ FFED..FFEE ; NFKC_QC; N
1F16A..1F16B ; NFKC_QC; N
1F190 ; NFKC_QC; N
1F200..1F202 ; NFKC_QC; N
1F210..1F23A ; NFKC_QC; N
1F210..1F23B ; NFKC_QC; N
1F240..1F248 ; NFKC_QC; N
1F250..1F251 ; NFKC_QC; N
2F800..2FA1D ; NFKC_QC; N
# Total code points: 4793
# Total code points: 4794
# ================================================
@ -3513,6 +3514,14 @@ FFE3 ; Expands_On_NFKC
17B4..17B5 ; NFKC_CF;
180B..180D ; NFKC_CF;
180E ; NFKC_CF;
1C80 ; NFKC_CF; 0432
1C81 ; NFKC_CF; 0434
1C82 ; NFKC_CF; 043E
1C83 ; NFKC_CF; 0441
1C84..1C85 ; NFKC_CF; 0442
1C86 ; NFKC_CF; 044A
1C87 ; NFKC_CF; 0463
1C88 ; NFKC_CF; A64B
1D2C ; NFKC_CF; 0061
1D2D ; NFKC_CF; 00E6
1D2E ; NFKC_CF; 0062
@ -5263,6 +5272,7 @@ A7AA ; NFKC_CF; 0266
A7AB ; NFKC_CF; 025C
A7AC ; NFKC_CF; 0261
A7AD ; NFKC_CF; 026C
A7AE ; NFKC_CF; 026A
A7B0 ; NFKC_CF; 029E
A7B1 ; NFKC_CF; 0287
A7B2 ; NFKC_CF; 029D
@ -6731,6 +6741,42 @@ FFF0..FFF8 ; NFKC_CF;
10425 ; NFKC_CF; 1044D
10426 ; NFKC_CF; 1044E
10427 ; NFKC_CF; 1044F
104B0 ; NFKC_CF; 104D8
104B1 ; NFKC_CF; 104D9
104B2 ; NFKC_CF; 104DA
104B3 ; NFKC_CF; 104DB
104B4 ; NFKC_CF; 104DC
104B5 ; NFKC_CF; 104DD
104B6 ; NFKC_CF; 104DE
104B7 ; NFKC_CF; 104DF
104B8 ; NFKC_CF; 104E0
104B9 ; NFKC_CF; 104E1
104BA ; NFKC_CF; 104E2
104BB ; NFKC_CF; 104E3
104BC ; NFKC_CF; 104E4
104BD ; NFKC_CF; 104E5
104BE ; NFKC_CF; 104E6
104BF ; NFKC_CF; 104E7
104C0 ; NFKC_CF; 104E8
104C1 ; NFKC_CF; 104E9
104C2 ; NFKC_CF; 104EA
104C3 ; NFKC_CF; 104EB
104C4 ; NFKC_CF; 104EC
104C5 ; NFKC_CF; 104ED
104C6 ; NFKC_CF; 104EE
104C7 ; NFKC_CF; 104EF
104C8 ; NFKC_CF; 104F0
104C9 ; NFKC_CF; 104F1
104CA ; NFKC_CF; 104F2
104CB ; NFKC_CF; 104F3
104CC ; NFKC_CF; 104F4
104CD ; NFKC_CF; 104F5
104CE ; NFKC_CF; 104F6
104CF ; NFKC_CF; 104F7
104D0 ; NFKC_CF; 104F8
104D1 ; NFKC_CF; 104F9
104D2 ; NFKC_CF; 104FA
104D3 ; NFKC_CF; 104FB
10C80 ; NFKC_CF; 10CC0
10C81 ; NFKC_CF; 10CC1
10C82 ; NFKC_CF; 10CC2
@ -7819,6 +7865,40 @@ FFF0..FFF8 ; NFKC_CF;
1D7FD ; NFKC_CF; 0037
1D7FE ; NFKC_CF; 0038
1D7FF ; NFKC_CF; 0039
1E900 ; NFKC_CF; 1E922
1E901 ; NFKC_CF; 1E923
1E902 ; NFKC_CF; 1E924
1E903 ; NFKC_CF; 1E925
1E904 ; NFKC_CF; 1E926
1E905 ; NFKC_CF; 1E927
1E906 ; NFKC_CF; 1E928
1E907 ; NFKC_CF; 1E929
1E908 ; NFKC_CF; 1E92A
1E909 ; NFKC_CF; 1E92B
1E90A ; NFKC_CF; 1E92C
1E90B ; NFKC_CF; 1E92D
1E90C ; NFKC_CF; 1E92E
1E90D ; NFKC_CF; 1E92F
1E90E ; NFKC_CF; 1E930
1E90F ; NFKC_CF; 1E931
1E910 ; NFKC_CF; 1E932
1E911 ; NFKC_CF; 1E933
1E912 ; NFKC_CF; 1E934
1E913 ; NFKC_CF; 1E935
1E914 ; NFKC_CF; 1E936
1E915 ; NFKC_CF; 1E937
1E916 ; NFKC_CF; 1E938
1E917 ; NFKC_CF; 1E939
1E918 ; NFKC_CF; 1E93A
1E919 ; NFKC_CF; 1E93B
1E91A ; NFKC_CF; 1E93C
1E91B ; NFKC_CF; 1E93D
1E91C ; NFKC_CF; 1E93E
1E91D ; NFKC_CF; 1E93F
1E91E ; NFKC_CF; 1E940
1E91F ; NFKC_CF; 1E941
1E920 ; NFKC_CF; 1E942
1E921 ; NFKC_CF; 1E943
1EE00 ; NFKC_CF; 0627
1EE01 ; NFKC_CF; 0628
1EE02 ; NFKC_CF; 062C
@ -8083,6 +8163,7 @@ FFF0..FFF8 ; NFKC_CF;
1F238 ; NFKC_CF; 7533
1F239 ; NFKC_CF; 5272
1F23A ; NFKC_CF; 55B6
1F23B ; NFKC_CF; 914D
1F240 ; NFKC_CF; 3014 672C 3015
1F241 ; NFKC_CF; 3014 4E09 3015
1F242 ; NFKC_CF; 3014 4E8C 3015
@ -8634,7 +8715,7 @@ E0080..E00FF ; NFKC_CF;
E0100..E01EF ; NFKC_CF;
E01F0..E0FFF ; NFKC_CF;
# Total code points: 10146
# Total code points: 10227
# ================================================
@ -8972,6 +9053,7 @@ E01F0..E0FFF ; NFKC_CF;
17B4..17B5 ; Changes_When_NFKC_Casefolded
180B..180D ; Changes_When_NFKC_Casefolded
180E ; Changes_When_NFKC_Casefolded
1C80..1C88 ; Changes_When_NFKC_Casefolded
1D2C..1D2E ; Changes_When_NFKC_Casefolded
1D30..1D3A ; Changes_When_NFKC_Casefolded
1D3C..1D4D ; Changes_When_NFKC_Casefolded
@ -9389,7 +9471,7 @@ A7A2 ; Changes_When_NFKC_Casefolded
A7A4 ; Changes_When_NFKC_Casefolded
A7A6 ; Changes_When_NFKC_Casefolded
A7A8 ; Changes_When_NFKC_Casefolded
A7AA..A7AD ; Changes_When_NFKC_Casefolded
A7AA..A7AE ; Changes_When_NFKC_Casefolded
A7B0..A7B4 ; Changes_When_NFKC_Casefolded
A7B6 ; Changes_When_NFKC_Casefolded
A7F8..A7F9 ; Changes_When_NFKC_Casefolded
@ -9519,6 +9601,7 @@ FFE9..FFEC ; Changes_When_NFKC_Casefolded
FFED..FFEE ; Changes_When_NFKC_Casefolded
FFF0..FFF8 ; Changes_When_NFKC_Casefolded
10400..10427 ; Changes_When_NFKC_Casefolded
104B0..104D3 ; Changes_When_NFKC_Casefolded
10C80..10CB2 ; Changes_When_NFKC_Casefolded
118A0..118BF ; Changes_When_NFKC_Casefolded
1BCA0..1BCA3 ; Changes_When_NFKC_Casefolded
@ -9566,6 +9649,7 @@ FFF0..FFF8 ; Changes_When_NFKC_Casefolded
1D7C3 ; Changes_When_NFKC_Casefolded
1D7C4..1D7CB ; Changes_When_NFKC_Casefolded
1D7CE..1D7FF ; Changes_When_NFKC_Casefolded
1E900..1E921 ; Changes_When_NFKC_Casefolded
1EE00..1EE03 ; Changes_When_NFKC_Casefolded
1EE05..1EE1F ; Changes_When_NFKC_Casefolded
1EE21..1EE22 ; Changes_When_NFKC_Casefolded
@ -9605,7 +9689,7 @@ FFF0..FFF8 ; Changes_When_NFKC_Casefolded
1F16A..1F16B ; Changes_When_NFKC_Casefolded
1F190 ; Changes_When_NFKC_Casefolded
1F200..1F202 ; Changes_When_NFKC_Casefolded
1F210..1F23A ; Changes_When_NFKC_Casefolded
1F210..1F23B ; Changes_When_NFKC_Casefolded
1F240..1F248 ; Changes_When_NFKC_Casefolded
1F250..1F251 ; Changes_When_NFKC_Casefolded
2F800..2FA1D ; Changes_When_NFKC_Casefolded
@ -9617,6 +9701,6 @@ E0080..E00FF ; Changes_When_NFKC_Casefolded
E0100..E01EF ; Changes_When_NFKC_Casefolded
E01F0..E0FFF ; Changes_When_NFKC_Casefolded
# Total code points: 10146
# Total code points: 10227
# EOF

File diff suppressed because it is too large Load diff

View file

@ -1,12 +1,14 @@
# NormalizationCorrections-8.0.0.txt
# Date: 2015-03-07, 01:30:00 GMT [KW, LI]
# NormalizationCorrections-9.0.0.txt
# Date: 2016-01-21, 22:00:00 GMT [KW, LI]
# © 2016 Unicode®, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html
#
# Unicode Character Database
# For documentation, see http://www.unicode.org/reports/tr44/
#
# This file is a normative contributory data file in the
# Unicode Character Database.
#
# Copyright (c) 1991-2015 Unicode, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html
#
# The normalization stability policy of the Unicode Consortium
# ordinarily precludes any change to the decomposition
# for any character, once established in a relevant version

View file

@ -1,10 +1,11 @@
# NormalizationTest-8.0.0.txt
# Date: 2015-02-13, 13:30:27 GMT [MD]
# NormalizationTest-9.0.0.txt
# Date: 2016-04-04, 11:41:55 GMT
# © 2016 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see http://www.unicode.org/terms_of_use.html
#
# Unicode Character Database
# Copyright (c) 1991-2015 Unicode, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html
# For documentation, see http://www.unicode.org/reports/tr44/
# For documentation, see http://www.unicode.org/reports/tr44/
#
# Normalization Test Suite
# Format:
@ -63,6 +64,8 @@
0045 0300 0304;00C8 0304;0045 0300 0304;00C8 0304;0045 0300 0304;
05B8 05B9 05B1 0591 05C3 05B0 05AC 059F;05B1 05B8 05B9 0591 05C3 05B0 05AC 059F;05B1 05B8 05B9 0591 05C3 05B0 05AC 059F;05B1 05B8 05B9 0591 05C3 05B0 05AC 059F;05B1 05B8 05B9 0591 05C3 05B0 05AC 059F;
0592 05B7 05BC 05A5 05B0 05C0 05C4 05AD;05B0 05B7 05BC 05A5 0592 05C0 05AD 05C4;05B0 05B7 05BC 05A5 0592 05C0 05AD 05C4;05B0 05B7 05BC 05A5 0592 05C0 05AD 05C4;05B0 05B7 05BC 05A5 0592 05C0 05AD 05C4;
1100 AC00 11A8;1100 AC01;1100 1100 1161 11A8;1100 AC01;1100 1100 1161 11A8;
1100 AC00 11A8 11A8;1100 AC01 11A8;1100 1100 1161 11A8 11A8;1100 AC01 11A8;1100 1100 1161 11A8 11A8;
#
@Part1 # Character by character test
# All characters not explicitly occurring in c1 of Part 1 have identical NFC, D, KC, KD forms.
@ -16407,6 +16410,7 @@ FFEE;FFEE;FFEE;25CB;25CB;
1F238;1F238;1F238;7533;7533;
1F239;1F239;1F239;5272;5272;
1F23A;1F23A;1F23A;55B6;55B6;
1F23B;1F23B;1F23B;914D;914D;
1F240;1F240;1F240;3014 672C 3015;3014 672C 3015;
1F241;1F241;1F241;3014 4E09 3015;3014 4E09 3015;
1F242;1F242;1F242;3014 4E8C 3015;3014 4E8C 3015;
@ -17523,6 +17527,34 @@ FFEE;FFEE;FFEE;25CB;25CB;
0061 085A 059A 0316 302A 0062;0061 302A 085A 0316 059A 0062;0061 302A 085A 0316 059A 0062;0061 302A 085A 0316 059A 0062;0061 302A 085A 0316 059A 0062;
0061 059A 0316 302A 085B 0062;0061 302A 0316 085B 059A 0062;0061 302A 0316 085B 059A 0062;0061 302A 0316 085B 059A 0062;0061 302A 0316 085B 059A 0062;
0061 085B 059A 0316 302A 0062;0061 302A 085B 0316 059A 0062;0061 302A 085B 0316 059A 0062;0061 302A 085B 0316 059A 0062;0061 302A 085B 0316 059A 0062;
0061 0315 0300 05AE 08D4 0062;00E0 05AE 08D4 0315 0062;0061 05AE 0300 08D4 0315 0062;00E0 05AE 08D4 0315 0062;0061 05AE 0300 08D4 0315 0062;
0061 08D4 0315 0300 05AE 0062;0061 05AE 08D4 0300 0315 0062;0061 05AE 08D4 0300 0315 0062;0061 05AE 08D4 0300 0315 0062;0061 05AE 08D4 0300 0315 0062;
0061 0315 0300 05AE 08D5 0062;00E0 05AE 08D5 0315 0062;0061 05AE 0300 08D5 0315 0062;00E0 05AE 08D5 0315 0062;0061 05AE 0300 08D5 0315 0062;
0061 08D5 0315 0300 05AE 0062;0061 05AE 08D5 0300 0315 0062;0061 05AE 08D5 0300 0315 0062;0061 05AE 08D5 0300 0315 0062;0061 05AE 08D5 0300 0315 0062;
0061 0315 0300 05AE 08D6 0062;00E0 05AE 08D6 0315 0062;0061 05AE 0300 08D6 0315 0062;00E0 05AE 08D6 0315 0062;0061 05AE 0300 08D6 0315 0062;
0061 08D6 0315 0300 05AE 0062;0061 05AE 08D6 0300 0315 0062;0061 05AE 08D6 0300 0315 0062;0061 05AE 08D6 0300 0315 0062;0061 05AE 08D6 0300 0315 0062;
0061 0315 0300 05AE 08D7 0062;00E0 05AE 08D7 0315 0062;0061 05AE 0300 08D7 0315 0062;00E0 05AE 08D7 0315 0062;0061 05AE 0300 08D7 0315 0062;
0061 08D7 0315 0300 05AE 0062;0061 05AE 08D7 0300 0315 0062;0061 05AE 08D7 0300 0315 0062;0061 05AE 08D7 0300 0315 0062;0061 05AE 08D7 0300 0315 0062;
0061 0315 0300 05AE 08D8 0062;00E0 05AE 08D8 0315 0062;0061 05AE 0300 08D8 0315 0062;00E0 05AE 08D8 0315 0062;0061 05AE 0300 08D8 0315 0062;
0061 08D8 0315 0300 05AE 0062;0061 05AE 08D8 0300 0315 0062;0061 05AE 08D8 0300 0315 0062;0061 05AE 08D8 0300 0315 0062;0061 05AE 08D8 0300 0315 0062;
0061 0315 0300 05AE 08D9 0062;00E0 05AE 08D9 0315 0062;0061 05AE 0300 08D9 0315 0062;00E0 05AE 08D9 0315 0062;0061 05AE 0300 08D9 0315 0062;
0061 08D9 0315 0300 05AE 0062;0061 05AE 08D9 0300 0315 0062;0061 05AE 08D9 0300 0315 0062;0061 05AE 08D9 0300 0315 0062;0061 05AE 08D9 0300 0315 0062;
0061 0315 0300 05AE 08DA 0062;00E0 05AE 08DA 0315 0062;0061 05AE 0300 08DA 0315 0062;00E0 05AE 08DA 0315 0062;0061 05AE 0300 08DA 0315 0062;
0061 08DA 0315 0300 05AE 0062;0061 05AE 08DA 0300 0315 0062;0061 05AE 08DA 0300 0315 0062;0061 05AE 08DA 0300 0315 0062;0061 05AE 08DA 0300 0315 0062;
0061 0315 0300 05AE 08DB 0062;00E0 05AE 08DB 0315 0062;0061 05AE 0300 08DB 0315 0062;00E0 05AE 08DB 0315 0062;0061 05AE 0300 08DB 0315 0062;
0061 08DB 0315 0300 05AE 0062;0061 05AE 08DB 0300 0315 0062;0061 05AE 08DB 0300 0315 0062;0061 05AE 08DB 0300 0315 0062;0061 05AE 08DB 0300 0315 0062;
0061 0315 0300 05AE 08DC 0062;00E0 05AE 08DC 0315 0062;0061 05AE 0300 08DC 0315 0062;00E0 05AE 08DC 0315 0062;0061 05AE 0300 08DC 0315 0062;
0061 08DC 0315 0300 05AE 0062;0061 05AE 08DC 0300 0315 0062;0061 05AE 08DC 0300 0315 0062;0061 05AE 08DC 0300 0315 0062;0061 05AE 08DC 0300 0315 0062;
0061 0315 0300 05AE 08DD 0062;00E0 05AE 08DD 0315 0062;0061 05AE 0300 08DD 0315 0062;00E0 05AE 08DD 0315 0062;0061 05AE 0300 08DD 0315 0062;
0061 08DD 0315 0300 05AE 0062;0061 05AE 08DD 0300 0315 0062;0061 05AE 08DD 0300 0315 0062;0061 05AE 08DD 0300 0315 0062;0061 05AE 08DD 0300 0315 0062;
0061 0315 0300 05AE 08DE 0062;00E0 05AE 08DE 0315 0062;0061 05AE 0300 08DE 0315 0062;00E0 05AE 08DE 0315 0062;0061 05AE 0300 08DE 0315 0062;
0061 08DE 0315 0300 05AE 0062;0061 05AE 08DE 0300 0315 0062;0061 05AE 08DE 0300 0315 0062;0061 05AE 08DE 0300 0315 0062;0061 05AE 08DE 0300 0315 0062;
0061 0315 0300 05AE 08DF 0062;00E0 05AE 08DF 0315 0062;0061 05AE 0300 08DF 0315 0062;00E0 05AE 08DF 0315 0062;0061 05AE 0300 08DF 0315 0062;
0061 08DF 0315 0300 05AE 0062;0061 05AE 08DF 0300 0315 0062;0061 05AE 08DF 0300 0315 0062;0061 05AE 08DF 0300 0315 0062;0061 05AE 08DF 0300 0315 0062;
0061 0315 0300 05AE 08E0 0062;00E0 05AE 08E0 0315 0062;0061 05AE 0300 08E0 0315 0062;00E0 05AE 08E0 0315 0062;0061 05AE 0300 08E0 0315 0062;
0061 08E0 0315 0300 05AE 0062;0061 05AE 08E0 0300 0315 0062;0061 05AE 08E0 0300 0315 0062;0061 05AE 08E0 0300 0315 0062;0061 05AE 08E0 0300 0315 0062;
0061 0315 0300 05AE 08E1 0062;00E0 05AE 08E1 0315 0062;0061 05AE 0300 08E1 0315 0062;00E0 05AE 08E1 0315 0062;0061 05AE 0300 08E1 0315 0062;
0061 08E1 0315 0300 05AE 0062;0061 05AE 08E1 0300 0315 0062;0061 05AE 08E1 0300 0315 0062;0061 05AE 08E1 0300 0315 0062;0061 05AE 08E1 0300 0315 0062;
0061 059A 0316 302A 08E3 0062;0061 302A 0316 08E3 059A 0062;0061 302A 0316 08E3 059A 0062;0061 302A 0316 08E3 059A 0062;0061 302A 0316 08E3 059A 0062;
0061 08E3 059A 0316 302A 0062;0061 302A 08E3 0316 059A 0062;0061 302A 08E3 0316 059A 0062;0061 302A 08E3 0316 059A 0062;0061 302A 08E3 0316 059A 0062;
0061 0315 0300 05AE 08E4 0062;00E0 05AE 08E4 0315 0062;0061 05AE 0300 08E4 0315 0062;00E0 05AE 08E4 0315 0062;0061 05AE 0300 08E4 0315 0062;
@ -17967,6 +17999,8 @@ FFEE;FFEE;FFEE;25CB;25CB;
0061 1DF4 0315 0300 05AE 0062;0061 05AE 1DF4 0300 0315 0062;0061 05AE 1DF4 0300 0315 0062;0061 05AE 1DF4 0300 0315 0062;0061 05AE 1DF4 0300 0315 0062;
0061 0315 0300 05AE 1DF5 0062;00E0 05AE 1DF5 0315 0062;0061 05AE 0300 1DF5 0315 0062;00E0 05AE 1DF5 0315 0062;0061 05AE 0300 1DF5 0315 0062;
0061 1DF5 0315 0300 05AE 0062;0061 05AE 1DF5 0300 0315 0062;0061 05AE 1DF5 0300 0315 0062;0061 05AE 1DF5 0300 0315 0062;0061 05AE 1DF5 0300 0315 0062;
0061 0315 0300 05AE 1DFB 0062;00E0 05AE 1DFB 0315 0062;0061 05AE 0300 1DFB 0315 0062;00E0 05AE 1DFB 0315 0062;0061 05AE 0300 1DFB 0315 0062;
0061 1DFB 0315 0300 05AE 0062;0061 05AE 1DFB 0300 0315 0062;0061 05AE 1DFB 0300 0315 0062;0061 05AE 1DFB 0300 0315 0062;0061 05AE 1DFB 0300 0315 0062;
0061 035D 035C 0315 1DFC 0062;0061 0315 035C 1DFC 035D 0062;0061 0315 035C 1DFC 035D 0062;0061 0315 035C 1DFC 035D 0062;0061 0315 035C 1DFC 035D 0062;
0061 1DFC 035D 035C 0315 0062;0061 0315 1DFC 035C 035D 0062;0061 0315 1DFC 035C 035D 0062;0061 0315 1DFC 035C 035D 0062;0061 0315 1DFC 035C 035D 0062;
0061 059A 0316 302A 1DFD 0062;0061 302A 0316 1DFD 059A 0062;0061 302A 0316 1DFD 059A 0062;0061 302A 0316 1DFD 059A 0062;0061 302A 0316 1DFD 059A 0062;
@ -18343,6 +18377,10 @@ FFEE;FFEE;FFEE;25CB;25CB;
0061 11373 0315 0300 05AE 0062;0061 05AE 11373 0300 0315 0062;0061 05AE 11373 0300 0315 0062;0061 05AE 11373 0300 0315 0062;0061 05AE 11373 0300 0315 0062;
0061 0315 0300 05AE 11374 0062;00E0 05AE 11374 0315 0062;0061 05AE 0300 11374 0315 0062;00E0 05AE 11374 0315 0062;0061 05AE 0300 11374 0315 0062;
0061 11374 0315 0300 05AE 0062;0061 05AE 11374 0300 0315 0062;0061 05AE 11374 0300 0315 0062;0061 05AE 11374 0300 0315 0062;0061 05AE 11374 0300 0315 0062;
0061 05B0 094D 3099 11442 0062;0061 3099 094D 11442 05B0 0062;0061 3099 094D 11442 05B0 0062;0061 3099 094D 11442 05B0 0062;0061 3099 094D 11442 05B0 0062;
0061 11442 05B0 094D 3099 0062;0061 3099 11442 094D 05B0 0062;0061 3099 11442 094D 05B0 0062;0061 3099 11442 094D 05B0 0062;0061 3099 11442 094D 05B0 0062;
0061 3099 093C 0334 11446 0062;0061 0334 093C 11446 3099 0062;0061 0334 093C 11446 3099 0062;0061 0334 093C 11446 3099 0062;0061 0334 093C 11446 3099 0062;
0061 11446 3099 093C 0334 0062;0061 0334 11446 093C 3099 0062;0061 0334 11446 093C 3099 0062;0061 0334 11446 093C 3099 0062;0061 0334 11446 093C 3099 0062;
0061 05B0 094D 3099 114C2 0062;0061 3099 094D 114C2 05B0 0062;0061 3099 094D 114C2 05B0 0062;0061 3099 094D 114C2 05B0 0062;0061 3099 094D 114C2 05B0 0062;
0061 114C2 05B0 094D 3099 0062;0061 3099 114C2 094D 05B0 0062;0061 3099 114C2 094D 05B0 0062;0061 3099 114C2 094D 05B0 0062;0061 3099 114C2 094D 05B0 0062;
0061 3099 093C 0334 114C3 0062;0061 0334 093C 114C3 3099 0062;0061 0334 093C 114C3 3099 0062;0061 0334 093C 114C3 3099 0062;0061 0334 093C 114C3 3099 0062;
@ -18359,6 +18397,8 @@ FFEE;FFEE;FFEE;25CB;25CB;
0061 116B7 3099 093C 0334 0062;0061 0334 116B7 093C 3099 0062;0061 0334 116B7 093C 3099 0062;0061 0334 116B7 093C 3099 0062;0061 0334 116B7 093C 3099 0062;
0061 05B0 094D 3099 1172B 0062;0061 3099 094D 1172B 05B0 0062;0061 3099 094D 1172B 05B0 0062;0061 3099 094D 1172B 05B0 0062;0061 3099 094D 1172B 05B0 0062;
0061 1172B 05B0 094D 3099 0062;0061 3099 1172B 094D 05B0 0062;0061 3099 1172B 094D 05B0 0062;0061 3099 1172B 094D 05B0 0062;0061 3099 1172B 094D 05B0 0062;
0061 05B0 094D 3099 11C3F 0062;0061 3099 094D 11C3F 05B0 0062;0061 3099 094D 11C3F 05B0 0062;0061 3099 094D 11C3F 05B0 0062;0061 3099 094D 11C3F 05B0 0062;
0061 11C3F 05B0 094D 3099 0062;0061 3099 11C3F 094D 05B0 0062;0061 3099 11C3F 094D 05B0 0062;0061 3099 11C3F 094D 05B0 0062;0061 3099 11C3F 094D 05B0 0062;
0061 093C 0334 16AF0 0062;0061 0334 16AF0 093C 0062;0061 0334 16AF0 093C 0062;0061 0334 16AF0 093C 0062;0061 0334 16AF0 093C 0062;
0061 16AF0 093C 0334 0062;0061 16AF0 0334 093C 0062;0061 16AF0 0334 093C 0062;0061 16AF0 0334 093C 0062;0061 16AF0 0334 093C 0062;
0061 093C 0334 16AF1 0062;0061 0334 16AF1 093C 0062;0061 0334 16AF1 093C 0062;0061 0334 16AF1 093C 0062;0061 0334 16AF1 093C 0062;
@ -18451,6 +18491,82 @@ FFEE;FFEE;FFEE;25CB;25CB;
0061 1D243 0315 0300 05AE 0062;0061 05AE 1D243 0300 0315 0062;0061 05AE 1D243 0300 0315 0062;0061 05AE 1D243 0300 0315 0062;0061 05AE 1D243 0300 0315 0062;
0061 0315 0300 05AE 1D244 0062;00E0 05AE 1D244 0315 0062;0061 05AE 0300 1D244 0315 0062;00E0 05AE 1D244 0315 0062;0061 05AE 0300 1D244 0315 0062;
0061 1D244 0315 0300 05AE 0062;0061 05AE 1D244 0300 0315 0062;0061 05AE 1D244 0300 0315 0062;0061 05AE 1D244 0300 0315 0062;0061 05AE 1D244 0300 0315 0062;
0061 0315 0300 05AE 1E000 0062;00E0 05AE 1E000 0315 0062;0061 05AE 0300 1E000 0315 0062;00E0 05AE 1E000 0315 0062;0061 05AE 0300 1E000 0315 0062;
0061 1E000 0315 0300 05AE 0062;0061 05AE 1E000 0300 0315 0062;0061 05AE 1E000 0300 0315 0062;0061 05AE 1E000 0300 0315 0062;0061 05AE 1E000 0300 0315 0062;
0061 0315 0300 05AE 1E001 0062;00E0 05AE 1E001 0315 0062;0061 05AE 0300 1E001 0315 0062;00E0 05AE 1E001 0315 0062;0061 05AE 0300 1E001 0315 0062;
0061 1E001 0315 0300 05AE 0062;0061 05AE 1E001 0300 0315 0062;0061 05AE 1E001 0300 0315 0062;0061 05AE 1E001 0300 0315 0062;0061 05AE 1E001 0300 0315 0062;
0061 0315 0300 05AE 1E002 0062;00E0 05AE 1E002 0315 0062;0061 05AE 0300 1E002 0315 0062;00E0 05AE 1E002 0315 0062;0061 05AE 0300 1E002 0315 0062;
0061 1E002 0315 0300 05AE 0062;0061 05AE 1E002 0300 0315 0062;0061 05AE 1E002 0300 0315 0062;0061 05AE 1E002 0300 0315 0062;0061 05AE 1E002 0300 0315 0062;
0061 0315 0300 05AE 1E003 0062;00E0 05AE 1E003 0315 0062;0061 05AE 0300 1E003 0315 0062;00E0 05AE 1E003 0315 0062;0061 05AE 0300 1E003 0315 0062;
0061 1E003 0315 0300 05AE 0062;0061 05AE 1E003 0300 0315 0062;0061 05AE 1E003 0300 0315 0062;0061 05AE 1E003 0300 0315 0062;0061 05AE 1E003 0300 0315 0062;
0061 0315 0300 05AE 1E004 0062;00E0 05AE 1E004 0315 0062;0061 05AE 0300 1E004 0315 0062;00E0 05AE 1E004 0315 0062;0061 05AE 0300 1E004 0315 0062;
0061 1E004 0315 0300 05AE 0062;0061 05AE 1E004 0300 0315 0062;0061 05AE 1E004 0300 0315 0062;0061 05AE 1E004 0300 0315 0062;0061 05AE 1E004 0300 0315 0062;
0061 0315 0300 05AE 1E005 0062;00E0 05AE 1E005 0315 0062;0061 05AE 0300 1E005 0315 0062;00E0 05AE 1E005 0315 0062;0061 05AE 0300 1E005 0315 0062;
0061 1E005 0315 0300 05AE 0062;0061 05AE 1E005 0300 0315 0062;0061 05AE 1E005 0300 0315 0062;0061 05AE 1E005 0300 0315 0062;0061 05AE 1E005 0300 0315 0062;
0061 0315 0300 05AE 1E006 0062;00E0 05AE 1E006 0315 0062;0061 05AE 0300 1E006 0315 0062;00E0 05AE 1E006 0315 0062;0061 05AE 0300 1E006 0315 0062;
0061 1E006 0315 0300 05AE 0062;0061 05AE 1E006 0300 0315 0062;0061 05AE 1E006 0300 0315 0062;0061 05AE 1E006 0300 0315 0062;0061 05AE 1E006 0300 0315 0062;
0061 0315 0300 05AE 1E008 0062;00E0 05AE 1E008 0315 0062;0061 05AE 0300 1E008 0315 0062;00E0 05AE 1E008 0315 0062;0061 05AE 0300 1E008 0315 0062;
0061 1E008 0315 0300 05AE 0062;0061 05AE 1E008 0300 0315 0062;0061 05AE 1E008 0300 0315 0062;0061 05AE 1E008 0300 0315 0062;0061 05AE 1E008 0300 0315 0062;
0061 0315 0300 05AE 1E009 0062;00E0 05AE 1E009 0315 0062;0061 05AE 0300 1E009 0315 0062;00E0 05AE 1E009 0315 0062;0061 05AE 0300 1E009 0315 0062;
0061 1E009 0315 0300 05AE 0062;0061 05AE 1E009 0300 0315 0062;0061 05AE 1E009 0300 0315 0062;0061 05AE 1E009 0300 0315 0062;0061 05AE 1E009 0300 0315 0062;
0061 0315 0300 05AE 1E00A 0062;00E0 05AE 1E00A 0315 0062;0061 05AE 0300 1E00A 0315 0062;00E0 05AE 1E00A 0315 0062;0061 05AE 0300 1E00A 0315 0062;
0061 1E00A 0315 0300 05AE 0062;0061 05AE 1E00A 0300 0315 0062;0061 05AE 1E00A 0300 0315 0062;0061 05AE 1E00A 0300 0315 0062;0061 05AE 1E00A 0300 0315 0062;
0061 0315 0300 05AE 1E00B 0062;00E0 05AE 1E00B 0315 0062;0061 05AE 0300 1E00B 0315 0062;00E0 05AE 1E00B 0315 0062;0061 05AE 0300 1E00B 0315 0062;
0061 1E00B 0315 0300 05AE 0062;0061 05AE 1E00B 0300 0315 0062;0061 05AE 1E00B 0300 0315 0062;0061 05AE 1E00B 0300 0315 0062;0061 05AE 1E00B 0300 0315 0062;
0061 0315 0300 05AE 1E00C 0062;00E0 05AE 1E00C 0315 0062;0061 05AE 0300 1E00C 0315 0062;00E0 05AE 1E00C 0315 0062;0061 05AE 0300 1E00C 0315 0062;
0061 1E00C 0315 0300 05AE 0062;0061 05AE 1E00C 0300 0315 0062;0061 05AE 1E00C 0300 0315 0062;0061 05AE 1E00C 0300 0315 0062;0061 05AE 1E00C 0300 0315 0062;
0061 0315 0300 05AE 1E00D 0062;00E0 05AE 1E00D 0315 0062;0061 05AE 0300 1E00D 0315 0062;00E0 05AE 1E00D 0315 0062;0061 05AE 0300 1E00D 0315 0062;
0061 1E00D 0315 0300 05AE 0062;0061 05AE 1E00D 0300 0315 0062;0061 05AE 1E00D 0300 0315 0062;0061 05AE 1E00D 0300 0315 0062;0061 05AE 1E00D 0300 0315 0062;
0061 0315 0300 05AE 1E00E 0062;00E0 05AE 1E00E 0315 0062;0061 05AE 0300 1E00E 0315 0062;00E0 05AE 1E00E 0315 0062;0061 05AE 0300 1E00E 0315 0062;
0061 1E00E 0315 0300 05AE 0062;0061 05AE 1E00E 0300 0315 0062;0061 05AE 1E00E 0300 0315 0062;0061 05AE 1E00E 0300 0315 0062;0061 05AE 1E00E 0300 0315 0062;
0061 0315 0300 05AE 1E00F 0062;00E0 05AE 1E00F 0315 0062;0061 05AE 0300 1E00F 0315 0062;00E0 05AE 1E00F 0315 0062;0061 05AE 0300 1E00F 0315 0062;
0061 1E00F 0315 0300 05AE 0062;0061 05AE 1E00F 0300 0315 0062;0061 05AE 1E00F 0300 0315 0062;0061 05AE 1E00F 0300 0315 0062;0061 05AE 1E00F 0300 0315 0062;
0061 0315 0300 05AE 1E010 0062;00E0 05AE 1E010 0315 0062;0061 05AE 0300 1E010 0315 0062;00E0 05AE 1E010 0315 0062;0061 05AE 0300 1E010 0315 0062;
0061 1E010 0315 0300 05AE 0062;0061 05AE 1E010 0300 0315 0062;0061 05AE 1E010 0300 0315 0062;0061 05AE 1E010 0300 0315 0062;0061 05AE 1E010 0300 0315 0062;
0061 0315 0300 05AE 1E011 0062;00E0 05AE 1E011 0315 0062;0061 05AE 0300 1E011 0315 0062;00E0 05AE 1E011 0315 0062;0061 05AE 0300 1E011 0315 0062;
0061 1E011 0315 0300 05AE 0062;0061 05AE 1E011 0300 0315 0062;0061 05AE 1E011 0300 0315 0062;0061 05AE 1E011 0300 0315 0062;0061 05AE 1E011 0300 0315 0062;
0061 0315 0300 05AE 1E012 0062;00E0 05AE 1E012 0315 0062;0061 05AE 0300 1E012 0315 0062;00E0 05AE 1E012 0315 0062;0061 05AE 0300 1E012 0315 0062;
0061 1E012 0315 0300 05AE 0062;0061 05AE 1E012 0300 0315 0062;0061 05AE 1E012 0300 0315 0062;0061 05AE 1E012 0300 0315 0062;0061 05AE 1E012 0300 0315 0062;
0061 0315 0300 05AE 1E013 0062;00E0 05AE 1E013 0315 0062;0061 05AE 0300 1E013 0315 0062;00E0 05AE 1E013 0315 0062;0061 05AE 0300 1E013 0315 0062;
0061 1E013 0315 0300 05AE 0062;0061 05AE 1E013 0300 0315 0062;0061 05AE 1E013 0300 0315 0062;0061 05AE 1E013 0300 0315 0062;0061 05AE 1E013 0300 0315 0062;
0061 0315 0300 05AE 1E014 0062;00E0 05AE 1E014 0315 0062;0061 05AE 0300 1E014 0315 0062;00E0 05AE 1E014 0315 0062;0061 05AE 0300 1E014 0315 0062;
0061 1E014 0315 0300 05AE 0062;0061 05AE 1E014 0300 0315 0062;0061 05AE 1E014 0300 0315 0062;0061 05AE 1E014 0300 0315 0062;0061 05AE 1E014 0300 0315 0062;
0061 0315 0300 05AE 1E015 0062;00E0 05AE 1E015 0315 0062;0061 05AE 0300 1E015 0315 0062;00E0 05AE 1E015 0315 0062;0061 05AE 0300 1E015 0315 0062;
0061 1E015 0315 0300 05AE 0062;0061 05AE 1E015 0300 0315 0062;0061 05AE 1E015 0300 0315 0062;0061 05AE 1E015 0300 0315 0062;0061 05AE 1E015 0300 0315 0062;
0061 0315 0300 05AE 1E016 0062;00E0 05AE 1E016 0315 0062;0061 05AE 0300 1E016 0315 0062;00E0 05AE 1E016 0315 0062;0061 05AE 0300 1E016 0315 0062;
0061 1E016 0315 0300 05AE 0062;0061 05AE 1E016 0300 0315 0062;0061 05AE 1E016 0300 0315 0062;0061 05AE 1E016 0300 0315 0062;0061 05AE 1E016 0300 0315 0062;
0061 0315 0300 05AE 1E017 0062;00E0 05AE 1E017 0315 0062;0061 05AE 0300 1E017 0315 0062;00E0 05AE 1E017 0315 0062;0061 05AE 0300 1E017 0315 0062;
0061 1E017 0315 0300 05AE 0062;0061 05AE 1E017 0300 0315 0062;0061 05AE 1E017 0300 0315 0062;0061 05AE 1E017 0300 0315 0062;0061 05AE 1E017 0300 0315 0062;
0061 0315 0300 05AE 1E018 0062;00E0 05AE 1E018 0315 0062;0061 05AE 0300 1E018 0315 0062;00E0 05AE 1E018 0315 0062;0061 05AE 0300 1E018 0315 0062;
0061 1E018 0315 0300 05AE 0062;0061 05AE 1E018 0300 0315 0062;0061 05AE 1E018 0300 0315 0062;0061 05AE 1E018 0300 0315 0062;0061 05AE 1E018 0300 0315 0062;
0061 0315 0300 05AE 1E01B 0062;00E0 05AE 1E01B 0315 0062;0061 05AE 0300 1E01B 0315 0062;00E0 05AE 1E01B 0315 0062;0061 05AE 0300 1E01B 0315 0062;
0061 1E01B 0315 0300 05AE 0062;0061 05AE 1E01B 0300 0315 0062;0061 05AE 1E01B 0300 0315 0062;0061 05AE 1E01B 0300 0315 0062;0061 05AE 1E01B 0300 0315 0062;
0061 0315 0300 05AE 1E01C 0062;00E0 05AE 1E01C 0315 0062;0061 05AE 0300 1E01C 0315 0062;00E0 05AE 1E01C 0315 0062;0061 05AE 0300 1E01C 0315 0062;
0061 1E01C 0315 0300 05AE 0062;0061 05AE 1E01C 0300 0315 0062;0061 05AE 1E01C 0300 0315 0062;0061 05AE 1E01C 0300 0315 0062;0061 05AE 1E01C 0300 0315 0062;
0061 0315 0300 05AE 1E01D 0062;00E0 05AE 1E01D 0315 0062;0061 05AE 0300 1E01D 0315 0062;00E0 05AE 1E01D 0315 0062;0061 05AE 0300 1E01D 0315 0062;
0061 1E01D 0315 0300 05AE 0062;0061 05AE 1E01D 0300 0315 0062;0061 05AE 1E01D 0300 0315 0062;0061 05AE 1E01D 0300 0315 0062;0061 05AE 1E01D 0300 0315 0062;
0061 0315 0300 05AE 1E01E 0062;00E0 05AE 1E01E 0315 0062;0061 05AE 0300 1E01E 0315 0062;00E0 05AE 1E01E 0315 0062;0061 05AE 0300 1E01E 0315 0062;
0061 1E01E 0315 0300 05AE 0062;0061 05AE 1E01E 0300 0315 0062;0061 05AE 1E01E 0300 0315 0062;0061 05AE 1E01E 0300 0315 0062;0061 05AE 1E01E 0300 0315 0062;
0061 0315 0300 05AE 1E01F 0062;00E0 05AE 1E01F 0315 0062;0061 05AE 0300 1E01F 0315 0062;00E0 05AE 1E01F 0315 0062;0061 05AE 0300 1E01F 0315 0062;
0061 1E01F 0315 0300 05AE 0062;0061 05AE 1E01F 0300 0315 0062;0061 05AE 1E01F 0300 0315 0062;0061 05AE 1E01F 0300 0315 0062;0061 05AE 1E01F 0300 0315 0062;
0061 0315 0300 05AE 1E020 0062;00E0 05AE 1E020 0315 0062;0061 05AE 0300 1E020 0315 0062;00E0 05AE 1E020 0315 0062;0061 05AE 0300 1E020 0315 0062;
0061 1E020 0315 0300 05AE 0062;0061 05AE 1E020 0300 0315 0062;0061 05AE 1E020 0300 0315 0062;0061 05AE 1E020 0300 0315 0062;0061 05AE 1E020 0300 0315 0062;
0061 0315 0300 05AE 1E021 0062;00E0 05AE 1E021 0315 0062;0061 05AE 0300 1E021 0315 0062;00E0 05AE 1E021 0315 0062;0061 05AE 0300 1E021 0315 0062;
0061 1E021 0315 0300 05AE 0062;0061 05AE 1E021 0300 0315 0062;0061 05AE 1E021 0300 0315 0062;0061 05AE 1E021 0300 0315 0062;0061 05AE 1E021 0300 0315 0062;
0061 0315 0300 05AE 1E023 0062;00E0 05AE 1E023 0315 0062;0061 05AE 0300 1E023 0315 0062;00E0 05AE 1E023 0315 0062;0061 05AE 0300 1E023 0315 0062;
0061 1E023 0315 0300 05AE 0062;0061 05AE 1E023 0300 0315 0062;0061 05AE 1E023 0300 0315 0062;0061 05AE 1E023 0300 0315 0062;0061 05AE 1E023 0300 0315 0062;
0061 0315 0300 05AE 1E024 0062;00E0 05AE 1E024 0315 0062;0061 05AE 0300 1E024 0315 0062;00E0 05AE 1E024 0315 0062;0061 05AE 0300 1E024 0315 0062;
0061 1E024 0315 0300 05AE 0062;0061 05AE 1E024 0300 0315 0062;0061 05AE 1E024 0300 0315 0062;0061 05AE 1E024 0300 0315 0062;0061 05AE 1E024 0300 0315 0062;
0061 0315 0300 05AE 1E026 0062;00E0 05AE 1E026 0315 0062;0061 05AE 0300 1E026 0315 0062;00E0 05AE 1E026 0315 0062;0061 05AE 0300 1E026 0315 0062;
0061 1E026 0315 0300 05AE 0062;0061 05AE 1E026 0300 0315 0062;0061 05AE 1E026 0300 0315 0062;0061 05AE 1E026 0300 0315 0062;0061 05AE 1E026 0300 0315 0062;
0061 0315 0300 05AE 1E027 0062;00E0 05AE 1E027 0315 0062;0061 05AE 0300 1E027 0315 0062;00E0 05AE 1E027 0315 0062;0061 05AE 0300 1E027 0315 0062;
0061 1E027 0315 0300 05AE 0062;0061 05AE 1E027 0300 0315 0062;0061 05AE 1E027 0300 0315 0062;0061 05AE 1E027 0300 0315 0062;0061 05AE 1E027 0300 0315 0062;
0061 0315 0300 05AE 1E028 0062;00E0 05AE 1E028 0315 0062;0061 05AE 0300 1E028 0315 0062;00E0 05AE 1E028 0315 0062;0061 05AE 0300 1E028 0315 0062;
0061 1E028 0315 0300 05AE 0062;0061 05AE 1E028 0300 0315 0062;0061 05AE 1E028 0300 0315 0062;0061 05AE 1E028 0300 0315 0062;0061 05AE 1E028 0300 0315 0062;
0061 0315 0300 05AE 1E029 0062;00E0 05AE 1E029 0315 0062;0061 05AE 0300 1E029 0315 0062;00E0 05AE 1E029 0315 0062;0061 05AE 0300 1E029 0315 0062;
0061 1E029 0315 0300 05AE 0062;0061 05AE 1E029 0300 0315 0062;0061 05AE 1E029 0300 0315 0062;0061 05AE 1E029 0300 0315 0062;0061 05AE 1E029 0300 0315 0062;
0061 0315 0300 05AE 1E02A 0062;00E0 05AE 1E02A 0315 0062;0061 05AE 0300 1E02A 0315 0062;00E0 05AE 1E02A 0315 0062;0061 05AE 0300 1E02A 0315 0062;
0061 1E02A 0315 0300 05AE 0062;0061 05AE 1E02A 0300 0315 0062;0061 05AE 1E02A 0300 0315 0062;0061 05AE 1E02A 0300 0315 0062;0061 05AE 1E02A 0300 0315 0062;
0061 059A 0316 302A 1E8D0 0062;0061 302A 0316 1E8D0 059A 0062;0061 302A 0316 1E8D0 059A 0062;0061 302A 0316 1E8D0 059A 0062;0061 302A 0316 1E8D0 059A 0062;
0061 1E8D0 059A 0316 302A 0062;0061 302A 1E8D0 0316 059A 0062;0061 302A 1E8D0 0316 059A 0062;0061 302A 1E8D0 0316 059A 0062;0061 302A 1E8D0 0316 059A 0062;
0061 059A 0316 302A 1E8D1 0062;0061 302A 0316 1E8D1 059A 0062;0061 302A 0316 1E8D1 059A 0062;0061 302A 0316 1E8D1 059A 0062;0061 302A 0316 1E8D1 059A 0062;
@ -18465,6 +18581,20 @@ FFEE;FFEE;FFEE;25CB;25CB;
0061 1E8D5 059A 0316 302A 0062;0061 302A 1E8D5 0316 059A 0062;0061 302A 1E8D5 0316 059A 0062;0061 302A 1E8D5 0316 059A 0062;0061 302A 1E8D5 0316 059A 0062;
0061 059A 0316 302A 1E8D6 0062;0061 302A 0316 1E8D6 059A 0062;0061 302A 0316 1E8D6 059A 0062;0061 302A 0316 1E8D6 059A 0062;0061 302A 0316 1E8D6 059A 0062;
0061 1E8D6 059A 0316 302A 0062;0061 302A 1E8D6 0316 059A 0062;0061 302A 1E8D6 0316 059A 0062;0061 302A 1E8D6 0316 059A 0062;0061 302A 1E8D6 0316 059A 0062;
0061 0315 0300 05AE 1E944 0062;00E0 05AE 1E944 0315 0062;0061 05AE 0300 1E944 0315 0062;00E0 05AE 1E944 0315 0062;0061 05AE 0300 1E944 0315 0062;
0061 1E944 0315 0300 05AE 0062;0061 05AE 1E944 0300 0315 0062;0061 05AE 1E944 0300 0315 0062;0061 05AE 1E944 0300 0315 0062;0061 05AE 1E944 0300 0315 0062;
0061 0315 0300 05AE 1E945 0062;00E0 05AE 1E945 0315 0062;0061 05AE 0300 1E945 0315 0062;00E0 05AE 1E945 0315 0062;0061 05AE 0300 1E945 0315 0062;
0061 1E945 0315 0300 05AE 0062;0061 05AE 1E945 0300 0315 0062;0061 05AE 1E945 0300 0315 0062;0061 05AE 1E945 0300 0315 0062;0061 05AE 1E945 0300 0315 0062;
0061 0315 0300 05AE 1E946 0062;00E0 05AE 1E946 0315 0062;0061 05AE 0300 1E946 0315 0062;00E0 05AE 1E946 0315 0062;0061 05AE 0300 1E946 0315 0062;
0061 1E946 0315 0300 05AE 0062;0061 05AE 1E946 0300 0315 0062;0061 05AE 1E946 0300 0315 0062;0061 05AE 1E946 0300 0315 0062;0061 05AE 1E946 0300 0315 0062;
0061 0315 0300 05AE 1E947 0062;00E0 05AE 1E947 0315 0062;0061 05AE 0300 1E947 0315 0062;00E0 05AE 1E947 0315 0062;0061 05AE 0300 1E947 0315 0062;
0061 1E947 0315 0300 05AE 0062;0061 05AE 1E947 0300 0315 0062;0061 05AE 1E947 0300 0315 0062;0061 05AE 1E947 0300 0315 0062;0061 05AE 1E947 0300 0315 0062;
0061 0315 0300 05AE 1E948 0062;00E0 05AE 1E948 0315 0062;0061 05AE 0300 1E948 0315 0062;00E0 05AE 1E948 0315 0062;0061 05AE 0300 1E948 0315 0062;
0061 1E948 0315 0300 05AE 0062;0061 05AE 1E948 0300 0315 0062;0061 05AE 1E948 0300 0315 0062;0061 05AE 1E948 0300 0315 0062;0061 05AE 1E948 0300 0315 0062;
0061 0315 0300 05AE 1E949 0062;00E0 05AE 1E949 0315 0062;0061 05AE 0300 1E949 0315 0062;00E0 05AE 1E949 0315 0062;0061 05AE 0300 1E949 0315 0062;
0061 1E949 0315 0300 05AE 0062;0061 05AE 1E949 0300 0315 0062;0061 05AE 1E949 0300 0315 0062;0061 05AE 1E949 0300 0315 0062;0061 05AE 1E949 0300 0315 0062;
0061 3099 093C 0334 1E94A 0062;0061 0334 093C 1E94A 3099 0062;0061 0334 093C 1E94A 3099 0062;0061 0334 093C 1E94A 3099 0062;0061 0334 093C 1E94A 3099 0062;
0061 1E94A 3099 093C 0334 0062;0061 0334 1E94A 093C 3099 0062;0061 0334 1E94A 093C 3099 0062;0061 0334 1E94A 093C 3099 0062;0061 0334 1E94A 093C 3099 0062;
#
@Part3 # PRI #29 Test
#

View file

@ -1,10 +1,11 @@
# SpecialCasing-8.0.0.txt
# Date: 2014-12-16, 23:08:04 GMT [MD]
# SpecialCasing-9.0.0.txt
# Date: 2016-03-02, 18:55:13 GMT
# © 2016 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see http://www.unicode.org/terms_of_use.html
#
# Unicode Character Database
# Copyright (c) 1991-2014 Unicode, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html
# For documentation, see http://www.unicode.org/reports/tr44/
# For documentation, see http://www.unicode.org/reports/tr44/
#
# Special Casing
#

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -38,6 +38,365 @@ For new script codes see http://www.unicode.org/iso15924/codechanges.html
---------------------------------------------------------------------------- ***
Unicode 9.0 update for ICU 58
* Command-line environment setup
ICU_ROOT=~/svn.icu/trunk
ICU_SRC_DIR=$ICU_ROOT/src
ICUDT=icudt58b
export LD_LIBRARY_PATH=$ICU_ROOT/dbg/lib
SRC_DATA_IN=$ICU_SRC_DIR/source/data/in
UNIDATA=$ICU_SRC_DIR/source/data/unidata
http://www.unicode.org/review/pri323/ -- beta review
http://www.unicode.org/reports/uax-proposed-updates.html
http://www.unicode.org/versions/beta-9.0.0.html
http://www.unicode.org/versions/Unicode9.0.0/
http://www.unicode.org/reports/tr44/tr44-17.html
*** ICU Trac
- ticket:12526: integrate Unicode 9
- C++ ^/icu/branches/markus/uni90
- Java ^/icu4j/branches/markus/uni90
*** CLDR Trac
- cldrbug 9414: UCA 9
- ^/branches/markus/uni90 at r11518 from trunk at r11517
- TODO cldrbug 8109: Unicode 8.0 script metadata
- TODO cldrbug 8418: Updated segmentation for Unicode 8.0
*** Unicode version numbers
- makedata.mak
- uchar.h
- com.ibm.icu.util.VersionInfo
- com.ibm.icu.dev.test.lang.UCharacterTest.VERSION_
- Run ICU4C "configure" _after_ updating the Unicode version number in uchar.h
so that the makefiles see the new version number.
*** data files & enums & parser code
* file preparation
- download UCD & IDNA files
- make sure that the Unicode data folder passed into preparseucd.py
includes a copy of the latest IdnaMappingTable.txt (can be in some subfolder)
- only for manual diffs: remove version suffixes from the file names
~/unidata/uni70/20140403$ ../../desuffixucd.py .
(see https://sites.google.com/site/unicodetools/inputdata)
- only for manual diffs: extract Unihan.zip to "here" (.../ucd/Unihan/*.txt), delete Unihan.zip
- ~/svn.icutools/trunk/src/unicode$ py/preparseucd.py ~/unidata/uni90/20160503 $ICU_SRC_DIR ~/svn.icutools/trunk/src
- This writes files (especially ppucd.txt) to the ICU4C unidata and testdata subfolders.
- also: from http://unicode.org/Public/security/9.0.0/ download new confusables.txt
and copy to $UNIDATA
cp ~/unidata/uni90/20160503/security/confusables.txt $UNIDATA
* preparseucd.py changes
- remove or add new Unicode scripts from/to the
only-in-ISO-15924 list according to the error messages:
ValueError: remove ['Tang'] from _scripts_only_in_iso15924
ValueError: sc = Hanb (uchar.h USCRIPT_HAN_WITH_BOPOMOFO) not in the UCD
ValueError: sc = Jamo (uchar.h USCRIPT_JAMO) not in the UCD
ValueError: sc = Zsye (uchar.h USCRIPT_SYMBOLS_EMOJI) not in the UCD
-> fix expectedLong names in cucdapi.c/TestUScriptCodeAPI()
and in com.ibm.icu.dev.test.lang.TestUScript.java
- DerivedNumericValues.txt new numeric values
0D58 ; 0.00625 ; ; 1/160 # No MALAYALAM FRACTION ONE ONE-HUNDRED-AND-SIXTIETH
0D59 ; 0.025 ; ; 1/40 # No MALAYALAM FRACTION ONE FORTIETH
0D5A ; 0.0375 ; ; 3/80 # No MALAYALAM FRACTION THREE EIGHTIETHS
0D5B ; 0.05 ; ; 1/20 # No MALAYALAM FRACTION ONE TWENTIETH
0D5D ; 0.15 ; ; 3/20 # No MALAYALAM FRACTION THREE TWENTIETHS
-> change uprops.h, corepropsbuilder.cpp/encodeNumericValue(),
uchar.c, UCharacterProperty.java
to support a new series of values
- adjust preparseucd.py for Tangut algorithmic names
in ppucd.txt:
algnamesrange;17000..187EC;han;CJK UNIFIED IDEOGRAPH-
->
algnamesrange;17000..187EC;han;TANGUT IDEOGRAPH-
- avoid block-compressing most String/Miscellaneous property values,
triggered by genprops not coping with a multi-code point Case_Folding on
block;1C80..1C8F;...;Cased;cf=0442;CWCF;...
keep block-compressing empty-string mappings NFKC_CF="" for tags and variation selectors
* PropertyAliases.txt changes
- 1 new property PCM=Prepended_Concatenation_Mark
Ignore: Only useful for layout engines.
Ok to list in ppucd.txt.
* PropertyValueAliases.txt new property values
blk; Adlam ; Adlam
blk; Bhaiksuki ; Bhaiksuki
blk; Cyrillic_Ext_C ; Cyrillic_Extended_C
blk; Glagolitic_Sup ; Glagolitic_Supplement
blk; Ideographic_Symbols ; Ideographic_Symbols_And_Punctuation
blk; Marchen ; Marchen
blk; Mongolian_Sup ; Mongolian_Supplement
blk; Newa ; Newa
blk; Osage ; Osage
blk; Tangut ; Tangut
blk; Tangut_Components ; Tangut_Components
-> add to uchar.h
use long property names for enum constants
-> add to UCharacter.UnicodeBlock IDs
Eclipse find UBLOCK_([^ ]+) = ([0-9]+), (/.+)
replace public static final int \1_ID = \2; \3
-> add to UCharacter.UnicodeBlock objects
Eclipse find UBLOCK_([^ ]+) = [0-9]+, (/.+)
replace public static final UnicodeBlock \1 = new UnicodeBlock("\1", \1_ID); \2
GCB; EB ; E_Base
GCB; EBG ; E_Base_GAZ
GCB; EM ; E_Modifier
GCB; GAZ ; Glue_After_Zwj
GCB; ZWJ ; ZWJ
-> uchar.h & UCharacter.GraphemeClusterBreak
jg ; African_Feh ; African_Feh
jg ; African_Noon ; African_Noon
jg ; African_Qaf ; African_Qaf
-> uchar.h & UCharacter.JoiningGroup
lb ; EB ; E_Base
lb ; EM ; E_Modifier
lb ; ZWJ ; ZWJ
-> uchar.h & UCharacter.LineBreak
sc ; Adlm ; Adlam
sc ; Bhks ; Bhaiksuki
sc ; Marc ; Marchen
sc ; Newa ; Newa
sc ; Osge ; Osage
sc ; Tang ; Tangut
-> all of them had been added already to uscript.h & com.ibm.icu.lang.UScript
WB ; EB ; E_Base
WB ; EBG ; E_Base_GAZ
WB ; EM ; E_Modifier
WB ; GAZ ; Glue_After_Zwj
WB ; ZWJ ; ZWJ
-> uchar.h & UCharacter.WordBreak
* update Script metadata: SCRIPT_PROPS[] in uscript_props.cpp & UScript.ScriptMetadata
(not strictly necessary for NOT_ENCODED scripts)
~/svn.icutools/trunk/src/unicode$ py/parsescriptmetadata.py $ICU_SRC_DIR/source/common/unicode/uscript.h ~/svn.cldr/trunk/common/properties/scriptMetadata.txt
* generate normalization data files
cd $ICU_ROOT/dbg
bin/gennorm2 -o $ICU_SRC_DIR/source/common/norm2_nfc_data.h -s $UNIDATA/norm2 nfc.txt --csource
bin/gennorm2 -o $SRC_DATA_IN/nfc.nrm -s $UNIDATA/norm2 nfc.txt
bin/gennorm2 -o $SRC_DATA_IN/nfkc.nrm -s $UNIDATA/norm2 nfc.txt nfkc.txt
bin/gennorm2 -o $SRC_DATA_IN/nfkc_cf.nrm -s $UNIDATA/norm2 nfc.txt nfkc.txt nfkc_cf.txt
bin/gennorm2 -o $SRC_DATA_IN/uts46.nrm -s $UNIDATA/norm2 nfc.txt uts46.txt
* build ICU (make install)
so that the tools build can pick up the new definitions from the installed header files.
$ICU_ROOT/dbg$ echo;echo;make -j5 install > out.txt 2>&1 ; tail -n 30 out.txt
* build Unicode tools using CMake+make
~/svn.icutools/trunk/src/unicode/c/icudefs.txt:
# Location (--prefix) of where ICU was installed.
set(ICU_INST_DIR /home/mscherer/svn.icu/trunk/inst)
# Location of the ICU source tree.
set(ICU_SRC_DIR /home/mscherer/svn.icu/trunk/src)
~/svn.icutools/trunk/dbg/unicode/c$
cmake ../../../src/unicode/c
make
* generate core properties data files
~/svn.icutools/trunk/dbg/unicode/c$
genprops/genprops $ICU_SRC_DIR
genuca/genuca --hanOrder implicit $ICU_SRC_DIR
genuca/genuca --hanOrder radical-stroke $ICU_SRC_DIR
- rebuild ICU (make install) & tools
* update uts46test.cpp and UTS46Test.java if there are new characters that are equivalent to
sequences with non-LDH ASCII (that is, their decompositions contain '=' or similar)
- grep IdnaMappingTable.txt or uts46.txt for "disallowed_STD3_valid" on non-ASCII characters
- Unicode 6.0..9.0: U+2260, U+226E, U+226F
- nothing new in 9.0, no test file to update
* run & fix ICU4C tests
- Andy handles RBBI & spoof check test failures
* collation: CLDR collation root, UCA DUCET
- UCA DUCET goes into Mark's Unicode tools, see
https://sites.google.com/site/unicodetools/home#TOC-UCA
- CLDR root data files are checked into (CLDR UCA branch)/common/uca/
cp (UCA generated)/CollationAuxiliary/* ~/svn.cldr/trunk/common/uca/
- cd (CLDR UCA branch)/common/uca/
- update source/data/unidata/FractionalUCA.txt with FractionalUCA_SHORT.txt
cp FractionalUCA_SHORT.txt $ICU_SRC_DIR/source/data/unidata/FractionalUCA.txt
- update source/data/unidata/UCARules.txt with UCA_Rules_SHORT.txt
cp $ICU_SRC_DIR/source/data/unidata/UCARules.txt /tmp/UCARules-old.txt
(note removing the underscore before "Rules")
cp UCA_Rules_SHORT.txt $ICU_SRC_DIR/source/data/unidata/UCARules.txt
- restore TODO diffs in UCARules.txt
meld /tmp/UCARules-old.txt $ICU_SRC_DIR/source/data/unidata/UCARules.txt
- update (ICU4C)/source/test/testdata/CollationTest_*.txt
and (ICU4J)/main/tests/collate/src/com/ibm/icu/dev/data/CollationTest_*.txt
from the CLDR root files (..._CLDR_..._SHORT.txt)
cp CollationTest_CLDR_NON_IGNORABLE_SHORT.txt $ICU_SRC_DIR/source/test/testdata/CollationTest_NON_IGNORABLE_SHORT.txt
cp CollationTest_CLDR_SHIFTED_SHORT.txt $ICU_SRC_DIR/source/test/testdata/CollationTest_SHIFTED_SHORT.txt
cp $ICU_SRC_DIR/source/test/testdata/CollationTest_*.txt ~/svn.icu4j/trunk/src/main/tests/collate/src/com/ibm/icu/dev/data
- if CLDR common/uca/unihan-index.txt changes, then update
CLDR common/collation/root.xml <collation type="private-unihan">
and regenerate (or update in parallel) $ICU_SRC_DIR/source/data/coll/root.txt
- run genuca, see command line above;
deal with
Error: Unknown script for first-primary sample character U+104B5 on line 32599 of /home/mscherer/svn.icu/trunk/src/source/data/unidata/FractionalUCA.txt:
FDD1 104B5; [75 B8 02, 05, 05] # Osage first primary (compressible)
(add the character to genuca.cpp sampleCharsToScripts[])
+ look up the USCRIPT_ code for the new sample characters
(should be obvious from the comment in the error output)
+ *add* mappings to sampleCharsToScripts[], do not replace them
(in case the script sample characters flip-flop)
+ insert new scripts in DUCET script order, see the top_byte table
at the beginning of FractionalUCA.txt
- rebuild ICU4C
* Unihan collators
- run Unicode Tools
org.unicode.draft.GenerateUnihanCollators
with VM arguments
-DSVN_WORKSPACE=/home/mscherer/svn.unitools/trunk
-DOTHER_WORKSPACE=/home/mscherer/svn.unitools
-DUCD_DIR=/home/mscherer/svn.unitools/trunk/data
-DCLDR_DIR=/home/mscherer/svn.cldr/trunk
-DUVERSION=9.0.0
-ea
- run Unicode Tools
org.unicode.draft.GenerateUnihanCollatorFiles
with the same arguments
- check CLDR diffs
cd ~/svn.cldr/trunk
meld common/collation/zh.xml ../Generated/cldr/han/replace/zh.xml
meld common/transforms/Han-Latin.xml ../Generated/cldr/han/replace/Han-Latin.xml
- copy to CLDR
cd ~/svn.cldr/trunk
cp ../Generated/cldr/han/replace/zh.xml common/collation/zh.xml
cp ../Generated/cldr/han/replace/Han-Latin.xml common/transforms/Han-Latin.xml
- commit to CLDR
- generate ICU zh collation data: run CLDR
org.unicode.cldr.icu.NewLdml2IcuConverter
with program arguments
-t collation
-s /home/mscherer/svn.cldr/trunk/common/collation
-m /home/mscherer/svn.cldr/trunk/common/supplemental
-d /home/mscherer/svn.icu/trunk/src/source/data/coll
-p /home/mscherer/svn.icu/trunk/src/source/data/xml/collation
zh
and VM arguments
-DCLDR_DIR=/home/mscherer/svn.cldr/trunk
- rebuild ICU4C
* run & fix ICU4C tests, now with new CLDR collation root data
- run all tests with the collation test data *_SHORT.txt or the full files
(the full ones have comments, useful for debugging)
- note on intltest: if collate/UCAConformanceTest fails, then
utility/MultithreadTest/TestCollators will fail as well;
fix the conformance test before looking into the multi-thread test
* update Java data files
- refresh just the UCD/UCA-related/derived files, just to be safe
- see (ICU4C)/source/data/icu4j-readme.txt
- mkdir /tmp/icu4j
- ~/svn.icu/trunk/dbg$ make ICU4J_ROOT=/tmp/icu4j icu4j-data-install
output:
...
Unicode .icu files built to ./out/build/icudt57l
echo timestamp > uni-core-data
mkdir -p ./out/icu4j/com/ibm/icu/impl/data/icudt57b
mkdir -p ./out/icu4j/tzdata/com/ibm/icu/impl/data/icudt57b
echo pnames.icu uprops.icu ucase.icu ubidi.icu nfc.nrm > ./out/icu4j/add.txt
LD_LIBRARY_PATH=../lib:../stubdata:../tools/ctestfw:$LD_LIBRARY_PATH ../bin/icupkg ./out/tmp/icudt57l.dat ./out/icu4j/icudt57b.dat -a ./out/icu4j/add.txt -s ./out/build/icudt57l -x '*' -tb -d ./out/icu4j/com/ibm/icu/impl/data/icudt57b
mv ./out/icu4j/"com/ibm/icu/impl/data/icudt57b/zoneinfo64.res" ./out/icu4j/"com/ibm/icu/impl/data/icudt57b/metaZones.res" ./out/icu4j/"com/ibm/icu/impl/data/icudt57b/timezoneTypes.res" ./out/icu4j/"com/ibm/icu/impl/data/icudt57b/windowsZones.res" "./out/icu4j/tzdata/com/ibm/icu/impl/data/icudt57b"
jar cf ./out/icu4j/icudata.jar -C ./out/icu4j com/ibm/icu/impl/data/icudt57b/
mkdir -p /tmp/icu4j/main/shared/data
cp ./out/icu4j/icudata.jar /tmp/icu4j/main/shared/data
jar cf ./out/icu4j/icutzdata.jar -C ./out/icu4j/tzdata com/ibm/icu/impl/data/icudt57b/
mkdir -p /tmp/icu4j/main/shared/data
cp ./out/icu4j/icutzdata.jar /tmp/icu4j/main/shared/data
make[1]: Leaving directory `/home/mscherer/svn.icu/trunk/dbg/data'
- copy the big-endian Unicode data files to another location,
separate from the other data files,
and then refresh ICU4J
cd ~/svn.icu/trunk/dbg/data/out/icu4j
mkdir -p /tmp/icu4j/com/ibm/icu/impl/data/$ICUDT/coll
mkdir -p /tmp/icu4j/com/ibm/icu/impl/data/$ICUDT/brkitr
cp com/ibm/icu/impl/data/$ICUDT/confusables.cfu /tmp/icu4j/com/ibm/icu/impl/data/$ICUDT
cp com/ibm/icu/impl/data/$ICUDT/*.icu /tmp/icu4j/com/ibm/icu/impl/data/$ICUDT
rm /tmp/icu4j/com/ibm/icu/impl/data/$ICUDT/cnvalias.icu
cp com/ibm/icu/impl/data/$ICUDT/*.nrm /tmp/icu4j/com/ibm/icu/impl/data/$ICUDT
cp com/ibm/icu/impl/data/$ICUDT/coll/* /tmp/icu4j/com/ibm/icu/impl/data/$ICUDT/coll
cp com/ibm/icu/impl/data/$ICUDT/brkitr/* /tmp/icu4j/com/ibm/icu/impl/data/$ICUDT/brkitr
jar uvf ~/svn.icu4j/trunk/src/main/shared/data/icudata.jar -C /tmp/icu4j com/ibm/icu/impl/data/$ICUDT
* When refreshing all of ICU4J data from ICU4C
- ~/svn.icu/trunk/dbg$ make ICU4J_ROOT=/tmp/icu4j icu4j-data-install
- cp /tmp/icu4j/main/shared/data/icudata.jar ~/svn.icu4j/trunk/src/main/shared/data
or
- ~/svn.icu/trunk/dbg$ make ICU4J_ROOT=~/svn.icu4j/trunk/src icu4j-data-install
* update CollationFCD.java
+ copy & paste the initializers of lcccIndex[] etc. from
ICU4C/source/i18n/collationfcd.cpp to
ICU4J/main/classes/collate/src/com/ibm/icu/impl/coll/CollationFCD.java
* refresh Java test .txt files
- copy new .txt files into ICU4J's main/tests/core/src/com/ibm/icu/dev/data/unicode
cd $ICU_SRC_DIR/source/data/unidata
cp confusables.txt confusablesWholeScript.txt NormalizationCorrections.txt NormalizationTest.txt SpecialCasing.txt UnicodeData.txt ~/svn.icu4j/trunk/src/main/tests/core/src/com/ibm/icu/dev/data/unicode
cd ../../test/testdata
cp BidiCharacterTest.txt BidiTest.txt ~/svn.icu4j/trunk/src/main/tests/core/src/com/ibm/icu/dev/data/unicode
cp ~/unidata/uni90/20160503/ucd/CompositionExclusions.txt ~/svn.icu4j/trunk/src/main/tests/core/src/com/ibm/icu/dev/data/unicode
* run & fix ICU4J tests
*** LayoutEngine script information
* Run icu4j-tools: com.ibm.icu.dev.tool.layout.ScriptNameBuilder.
This generates LEScripts.h, LELanguages.h, ScriptAndLanguageTags.h and ScriptAndLanguageTags.cpp
in the working directory.
(It also generates ScriptRunData.cpp, which is no longer needed.)
It also reads and regenerates tools/misc/src/com/ibm/icu/dev/tool/layout/ScriptAndLanguages
(a plain text file)
which maps ICU versions to the numbers of script/language constants
that were added then.
(This mapping is probably obsolete since we do not print "@stable ICU xy" any more.)
The generated files have a current copyright date and "@deprecated" statement.
* Review changes, fix Java tool if necessary, and copy to ICU4C
cd ~/svn.icu4j/trunk/src
meld $ICU_SRC_DIR/source/layout tools/misc/src/com/ibm/icu/dev/tool/layout
cp tools/misc/src/com/ibm/icu/dev/tool/layout/*.h $ICU_SRC_DIR/source/layout
cp tools/misc/src/com/ibm/icu/dev/tool/layout/ScriptAndLanguageTags.cpp $ICU_SRC_DIR/source/layout
*** API additions
- send notice to icu-design about new born-@stable API (enum constants etc.)
*** merge the Unicode update branches back onto the trunk
- do not merge the icudata.jar and testdata.jar,
instead rebuild them from merged & tested ICU4C
- make sure that changes to Unicode tools & ICU tools are checked in
http://www.unicode.org/utility/trac/log/trunk/unicodetools
http://bugs.icu-project.org/trac/log/tools/trunk
---------------------------------------------------------------------------- ***
New script codes early in ICU 58: http://bugs.icu-project.org/trac/ticket/11764
Adding

View file

@ -1,10 +1,13 @@
# Recommended confusable mapping for IDN
# File: confusables.txt
# Version: 8.0.0
# Generated: 2015-05-17, 03:09:04 GMT
# Checkin: $Revision: 1.32 $
# confusables.txt
# Date: 2016-04-05, 11:47:12 GMT
# © 2016 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see http://www.unicode.org/terms_of_use.html
#
# For documentation and usage, see http://www.unicode.org/reports/tr39/
# Unicode Security Mechanisms for UTR #39
# Version: 9.0.0
#
# For documentation and usage, see http://www.unicode.org/reports/tr39
#
05AD ; 0596 ; MA # ( ֭ → ֖ ) HEBREW ACCENT DEHI → HEBREW ACCENT TIPEHA #
@ -57,6 +60,7 @@ A67C ; 0306 ; MA # ( ꙼ → ̆ ) COMBINING CYRILLIC KAVYKA → COMBINING BREVE
0311 ; 0302 ; MA # ( ̑ → ̂ ) COMBINING INVERTED BREVE → COMBINING CIRCUMFLEX ACCENT #
065B ; 0302 ; MA # ( ٛ → ̂ ) ARABIC VOWEL SIGN INVERTED SMALL V ABOVE → COMBINING CIRCUMFLEX ACCENT #
07EE ; 0302 ; MA # ( ߮ → ̂ ) NKO COMBINING LONG DESCENDING TONE → COMBINING CIRCUMFLEX ACCENT #
A6F0 ; 0302 ; MA # ( ꛰ → ̂ ) BAMUM COMBINING MARK KOQNDON → COMBINING CIRCUMFLEX ACCENT #
05AF ; 030A ; MA # ( ֯ → ̊ ) HEBREW MARK MASORA CIRCLE → COMBINING RING ABOVE #
06DF ; 030A ; MA # ( ۟ → ̊ ) ARABIC SMALL HIGH ROUNDED ZERO → COMBINING RING ABOVE # →ْ→
@ -66,6 +70,7 @@ A67C ; 0306 ; MA # ( ꙼ → ̆ ) COMBINING CYRILLIC KAVYKA → COMBINING BREVE
0B82 ; 030A ; MA # ( ஂ → ̊ ) TAMIL SIGN ANUSVARA → COMBINING RING ABOVE #
1036 ; 030A ; MA # ( ံ → ̊ ) MYANMAR SIGN ANUSVARA → COMBINING RING ABOVE #
17C6 ; 030A ; MA # ( ំ → ̊ ) KHMER SIGN NIKAHIT → COMBINING RING ABOVE #
11300 ; 030A ; MA # ( 𑌀 → ̊ ) GRANTHA SIGN COMBINING ANUSVARA ABOVE → COMBINING RING ABOVE # →ஂ→
0E4D ; 030A ; MA # ( ํ → ̊ ) THAI CHARACTER NIKHAHIT → COMBINING RING ABOVE #
0ECD ; 030A ; MA # ( ໍ → ̊ ) LAO NIGGAHITA → COMBINING RING ABOVE #
0366 ; 030A ; MA # ( ͦ → ̊ ) COMBINING LATIN SMALL LETTER O → COMBINING RING ABOVE #
@ -105,6 +110,7 @@ A67C ; 0306 ; MA # ( ꙼ → ̆ ) COMBINING CYRILLIC KAVYKA → COMBINING BREVE
0305 ; 0304 ; MA # ( ̅ → ̄ ) COMBINING OVERLINE → COMBINING MACRON #
0659 ; 0304 ; MA # ( ٙ → ̄ ) ARABIC ZWARAKAY → COMBINING MACRON #
07EB ; 0304 ; MA # ( ߫ → ̄ ) NKO COMBINING SHORT HIGH TONE → COMBINING MACRON #
A6F1 ; 0304 ; MA # ( ꛱ → ̄ ) BAMUM COMBINING MARK TUKWENTIS → COMBINING MACRON #
1CDA ; 030E ; MA # ( ᳚ → ̎ ) VEDIC TONE DOUBLE SVARITA → COMBINING DOUBLE VERTICAL LINE ABOVE #
@ -147,6 +153,7 @@ A67C ; 0306 ; MA # ( ꙼ → ̆ ) COMBINING CYRILLIC KAVYKA → COMBINING BREVE
0A3C ; 0323 ; MA # ( ਼ → ̣ ) GURMUKHI SIGN NUKTA → COMBINING DOT BELOW #
0ABC ; 0323 ; MA # ( ઼ → ̣ ) GUJARATI SIGN NUKTA → COMBINING DOT BELOW #
0B3C ; 0323 ; MA # ( ଼ → ̣ ) ORIYA SIGN NUKTA → COMBINING DOT BELOW #
111CA ; 0323 ; MA # ( 𑇊 → ̣ ) SHARADA SIGN NUKTA → COMBINING DOT BELOW # →़→
114C3 ; 0323 ; MA # ( 𑓃 → ̣ ) TIRHUTA SIGN NUKTA → COMBINING DOT BELOW # →়→
10A3A ; 0323 ; MA # ( 𐨺 → ̣ ) KHAROSHTHI SIGN DOT BELOW → COMBINING DOT BELOW #
@ -216,6 +223,8 @@ FC63 ; FE7C 0670 ; MA #* ( ‎ﱣ‎ → ‎ﹼٰ‎ ) ARABIC LIGATURE SHADDA WI
0ECB ; 0E4B ; MA # ( ໋ → ๋ ) LAO TONE MAI CATAWA → THAI CHARACTER MAI CHATTAWA #
A66F ; 20E9 ; MA # ( ꙯ → ⃩ ) COMBINING CYRILLIC VZMET → COMBINING WIDE BRIDGE ABOVE #
2028 ; 0020 ; MA #* ( → ) LINE SEPARATOR → SPACE #
2029 ; 0020 ; MA #* ( → ) PARAGRAPH SEPARATOR → SPACE #
1680 ; 0020 ; MA #* ( → ) OGHAM SPACE MARK → SPACE #
@ -311,6 +320,7 @@ FF01 ; 0021 ; MA #* ( → ! ) FULLWIDTH EXCLAMATION MARK → EXCLAMATION MAR
0241 ; 003F ; MA # ( Ɂ → ? ) LATIN CAPITAL LETTER GLOTTAL STOP → QUESTION MARK # →ʔ→
097D ; 003F ; MA # ( → ? ) DEVANAGARI LETTER GLOTTAL STOP → QUESTION MARK #
13AE ; 003F ; MA # ( → ? ) CHEROKEE LETTER HE → QUESTION MARK # →Ɂ→→ʔ→
A6EB ; 003F ; MA # ( → ? ) BAMUM LETTER NTUU → QUESTION MARK # →ʔ→
2048 ; 003F 0021 ; MA #* ( ⁈ → ?! ) QUESTION EXCLAMATION MARK → QUESTION MARK, EXCLAMATION MARK #
@ -335,6 +345,8 @@ A4FA ; 002E 002E ; MA # ( ꓺ → .. ) LISU LETTER TONE MYA CYA → FULL STOP, F
2026 ; 002E 002E 002E ; MA #* ( … → ... ) HORIZONTAL ELLIPSIS → FULL STOP, FULL STOP, FULL STOP #
A6F4 ; A6F3 A6F3 ; MA #* ( ꛴ → ꛳꛳ ) BAMUM COLON → BAMUM FULL STOP, BAMUM FULL STOP #
30FB ; 00B7 ; MA #* ( ・ → · ) KATAKANA MIDDLE DOT → MIDDLE DOT # →•→
FF65 ; 00B7 ; MA #* ( ・ → · ) HALFWIDTH KATAKANA MIDDLE DOT → MIDDLE DOT # →•→
16EB ; 00B7 ; MA #* ( ᛫ → · ) RUNIC SINGLE PUNCTUATION → MIDDLE DOT #
@ -345,8 +357,8 @@ FF65 ; 00B7 ; MA #* ( ・ → · ) HALFWIDTH KATAKANA MIDDLE DOT → MIDDLE DOT
2027 ; 00B7 ; MA #* ( ‧ → · ) HYPHENATION POINT → MIDDLE DOT #
2219 ; 00B7 ; MA #* ( ∙ → · ) BULLET OPERATOR → MIDDLE DOT #
22C5 ; 00B7 ; MA #* ( ⋅ → · ) DOT OPERATOR → MIDDLE DOT #
1427 ; 00B7 ; MA # ( ᐧ → · ) CANADIAN SYLLABICS FINAL MIDDLE DOT → MIDDLE DOT #
A78F ; 00B7 ; MA # ( ꞏ → · ) LATIN LETTER SINOLOGICAL DOT → MIDDLE DOT #
1427 ; 00B7 ; MA # ( ᐧ → · ) CANADIAN SYLLABICS FINAL MIDDLE DOT → MIDDLE DOT #
22EF ; 00B7 00B7 00B7 ; MA #* ( ⋯ → ··· ) MIDLINE HORIZONTAL ELLIPSIS → MIDDLE DOT, MIDDLE DOT, MIDDLE DOT #
2D48 ; 00B7 00B7 00B7 ; MA # ( ⵈ → ··· ) TIFINAGH LETTER TUAREG YAQ → MIDDLE DOT, MIDDLE DOT, MIDDLE DOT # →⋯→
@ -514,6 +526,8 @@ A78F ; 00B7 ; MA # ( ꞏ → · ) LATIN LETTER SINOLOGICAL DOT → MIDDLE DOT #
18C2 ; 00B7 18C0 ; MA # ( ᣂ → ·ᣀ ) CANADIAN SYLLABICS SHWOY → MIDDLE DOT, CANADIAN SYLLABICS SHOY # →ᐧᣀ→
A830 ; 0964 ; MA #* ( ꠰ → । ) NORTH INDIC FRACTION ONE QUARTER → DEVANAGARI DANDA #
0965 ; 0964 0964 ; MA #* ( ॥ → ।। ) DEVANAGARI DOUBLE DANDA → DEVANAGARI DANDA, DEVANAGARI DANDA #
1C3C ; 1C3B 1C3B ; MA #* ( ᰼ → ᰻᰻ ) LEPCHA PUNCTUATION NYET THYOOM TA-ROL → LEPCHA PUNCTUATION TA-ROL, LEPCHA PUNCTUATION TA-ROL #
@ -606,7 +620,7 @@ FD3E ; 0028 ; MA #* ( → ( ) ORNATE LEFT PARENTHESIS → LEFT PARENTHESIS #
2E28 ; 0028 0028 ; MA #* ( ⸨ → (( ) LEFT DOUBLE PARENTHESIS → LEFT PARENTHESIS, LEFT PARENTHESIS #
3220 ; 0028 30FC 0029 ; MA #* ( ㈠ → (ー) ) PARENTHESIZED IDEOGRAPH ONE → LEFT PARENTHESIS, KATAKANA-HIRAGANA PROLONGED SOUND MARK, RIGHT PARENTHESIS # →(一)→
3220 ; 0028 30FC 0029 ; MA #* ( ㈠ → (ー) ) PARENTHESIZED IDEOGRAPH ONE → LEFT PARENTHESIS, KATAKANA-HIRAGANA PROLONGED SOUND MARK, RIGHT PARENTHESIS # →(一)→→(⼀)→
2475 ; 0028 0032 0029 ; MA #* ( ⑵ → (2) ) PARENTHESIZED DIGIT TWO → LEFT PARENTHESIS, DIGIT TWO, RIGHT PARENTHESIS #
@ -919,6 +933,9 @@ FD3F ; 0029 ; MA #* ( ﴿ → ) ) ORNATE RIGHT PARENTHESIS → RIGHT PARENTHESIS
27E8 ; 276C ; MA #* ( ⟨ → ❬ ) MATHEMATICAL LEFT ANGLE BRACKET → MEDIUM LEFT-POINTING ANGLE BRACKET ORNAMENT # →〈→
2329 ; 276C ; MA #* ( 〈 → ❬ ) LEFT-POINTING ANGLE BRACKET → MEDIUM LEFT-POINTING ANGLE BRACKET ORNAMENT # →〈→
3008 ; 276C ; MA #* ( 〈 → ❬ ) LEFT ANGLE BRACKET → MEDIUM LEFT-POINTING ANGLE BRACKET ORNAMENT #
31DB ; 276C ; MA #* ( ㇛ → ❬ ) CJK STROKE PD → MEDIUM LEFT-POINTING ANGLE BRACKET ORNAMENT # →⟨→→〈→
304F ; 276C ; MA # ( く → ❬ ) HIRAGANA LETTER KU → MEDIUM LEFT-POINTING ANGLE BRACKET ORNAMENT # →㇛→→⟨→→〈→
21FE8 ; 276C ; MA # ( 𡿨 → ❬ ) CJK UNIFIED IDEOGRAPH-21FE8 → MEDIUM LEFT-POINTING ANGLE BRACKET ORNAMENT # →㇛→→⟨→→〈→
27E9 ; 276D ; MA #* ( ⟩ → ❭ ) MATHEMATICAL RIGHT ANGLE BRACKET → MEDIUM RIGHT-POINTING ANGLE BRACKET ORNAMENT # →〉→
232A ; 276D ; MA #* ( 〉 → ❭ ) RIGHT-POINTING ANGLE BRACKET → MEDIUM RIGHT-POINTING ANGLE BRACKET ORNAMENT # →〉→
@ -940,9 +957,11 @@ FF3E ; FE3F ; MA #* ( → ︿ ) FULLWIDTH CIRCUMFLEX ACCENT → PRESENTATION
2571 ; 002F ; MA #* ( → / ) BOX DRAWINGS LIGHT DIAGONAL UPPER RIGHT TO LOWER LEFT → SOLIDUS #
27CB ; 002F ; MA #* ( → / ) MATHEMATICAL RISING DIAGONAL → SOLIDUS #
29F8 ; 002F ; MA #* ( → / ) BIG SOLIDUS → SOLIDUS #
1D23A ; 002F ; MA #* ( 𝈺 → / ) GREEK INSTRUMENTAL NOTATION SYMBOL-47 → SOLIDUS #
31D3 ; 002F ; MA #* ( → / ) CJK STROKE SP → SOLIDUS # →⼃→
3033 ; 002F ; MA # ( → / ) VERTICAL KANA REPEAT MARK UPPER HALF → SOLIDUS #
2CC6 ; 002F ; MA # ( → / ) COPTIC CAPITAL LETTER OLD COPTIC ESH → SOLIDUS #
30CE ; 002F ; MA # ( → / ) KATAKANA LETTER NO → SOLIDUS # →⼃→
4E3F ; 002F ; MA # ( 丿 → / ) CJK UNIFIED IDEOGRAPH-4E3F → SOLIDUS # →⼃→
2F03 ; 002F ; MA #* ( → / ) KANGXI RADICAL SLASH → SOLIDUS #
@ -958,6 +977,8 @@ FE68 ; 005C ; MA #* ( → \ ) SMALL REVERSE SOLIDUS → REVERSE SOLIDUS #
27CD ; 005C ; MA #* ( → \ ) MATHEMATICAL FALLING DIAGONAL → REVERSE SOLIDUS #
29F5 ; 005C ; MA #* ( → \ ) REVERSE SOLIDUS OPERATOR → REVERSE SOLIDUS #
29F9 ; 005C ; MA #* ( → \ ) BIG REVERSE SOLIDUS → REVERSE SOLIDUS #
1D20F ; 005C ; MA #* ( 𝈏 → \ ) GREEK VOCAL NOTATION SYMBOL-16 → REVERSE SOLIDUS #
1D23B ; 005C ; MA #* ( 𝈻 → \ ) GREEK INSTRUMENTAL NOTATION SYMBOL-48 → REVERSE SOLIDUS # →𝈏→
31D4 ; 005C ; MA #* ( → \ ) CJK STROKE D → REVERSE SOLIDUS # →⼂→
4E36 ; 005C ; MA # ( → \ ) CJK UNIFIED IDEOGRAPH-4E36 → REVERSE SOLIDUS # →⼂→
2F02 ; 005C ; MA #* ( → \ ) KANGXI RADICAL DOT → REVERSE SOLIDUS #
@ -974,6 +995,8 @@ A778 ; 0026 ; MA # ( → & ) LATIN SMALL LETTER UM → AMPERSAND #
111C7 ; 0970 ; MA #* ( 𑇇 → ॰ ) SHARADA ABBREVIATION SIGN → DEVANAGARI ABBREVIATION SIGN #
26AC ; 0970 ; MA #* ( ⚬ → ॰ ) MEDIUM SMALL WHITE CIRCLE → DEVANAGARI ABBREVIATION SIGN #
111DB ; A8FC ; MA #* ( 𑇛 → ꣼ ) SHARADA SIGN SIDDHAM → DEVANAGARI SIGN SIDDHAM #
17D9 ; 0E4F ; MA #* ( ៙ → ๏ ) KHMER SIGN PHNAEK MUAN → THAI CHARACTER FONGMAN #
17D5 ; 0E5A ; MA #* ( ៕ → ๚ ) KHMER SIGN BARIYOOSAN → THAI CHARACTER ANGKHANKHU #
@ -1001,6 +1024,8 @@ FFE3 ; 02C9 ; MA #* (  ̄ → ˉ ) FULLWIDTH MACRON → MODIFIER LETTER MACRON #
044A ; 02C9 0062 ; MA # ( ъ → ˉb ) CYRILLIC SMALL LETTER HARD SIGN → MODIFIER LETTER MACRON, LATIN SMALL LETTER B # →¯b→
A651 ; 02C9 0062 0069 ; MA # ( ꙑ → ˉbi ) CYRILLIC SMALL LETTER YERU WITH BACK YER → MODIFIER LETTER MACRON, LATIN SMALL LETTER B, LATIN SMALL LETTER I # →ъı→
0375 ; 02CF ; MA #* ( ͵ → ˏ ) GREEK LOWER NUMERAL SIGN → MODIFIER LETTER LOW ACUTE ACCENT #
02FB ; 02EA ; MA #* ( ˻ → ˪ ) MODIFIER LETTER BEGIN LOW TONE → MODIFIER LETTER YIN DEPARTING TONE MARK #
@ -1016,9 +1041,9 @@ A714 ; 02EB ; MA #* ( ꜔ → ˫ ) MODIFIER LETTER MID LEFT-STEM TONE BAR → MO
25CB ; 00B0 ; MA #* ( ○ → ° ) WHITE CIRCLE → DEGREE SIGN # →◦→→∘→
25E6 ; 00B0 ; MA #* ( ◦ → ° ) WHITE BULLET → DEGREE SIGN # →∘→
235C ; 00B0 0332 ; MA #* ( ⍜ → °̲ ) APL FUNCTIONAL SYMBOL CIRCLE UNDERBAR → DEGREE SIGN, COMBINING LOW LINE # →○̲→→∘̲→
235C ; 00B0 0332 ; MA #* ( ⍜ → °̲ ) APL FUNCTIONAL SYMBOL CIRCLE UNDERBAR → DEGREE SIGN, COMBINING LOW LINE # →○̲→
2364 ; 00B0 0308 ; MA #* ( ⍤ → °̈ ) APL FUNCTIONAL SYMBOL JOT DIAERESIS → DEGREE SIGN, COMBINING DIAERESIS # →◦̈→→∘̈→
2364 ; 00B0 0308 ; MA #* ( ⍤ → °̈ ) APL FUNCTIONAL SYMBOL JOT DIAERESIS → DEGREE SIGN, COMBINING DIAERESIS # →◦̈→
2103 ; 00B0 0043 ; MA #* ( ℃ → °C ) DEGREE CELSIUS → DEGREE SIGN, LATIN CAPITAL LETTER C #
@ -1040,6 +1065,8 @@ A714 ; 02EB ; MA #* ( ꜔ → ˫ ) MODIFIER LETTER MID LEFT-STEM TONE BAR → MO
24C5 ; 2117 ; MA #* ( Ⓟ → ℗ ) CIRCLED LATIN CAPITAL LETTER P → SOUND RECORDING COPYRIGHT #
1D21B ; 2144 ; MA #* ( 𝈛 → ⅄ ) GREEK VOCAL NOTATION SYMBOL-53 → TURNED SANS-SERIF CAPITAL Y #
2BEC ; 219E ; MA #* ( ⯬ → ↞ ) LEFTWARDS TWO-HEADED ARROW WITH TRIANGLE ARROWHEADS → LEFTWARDS TWO HEADED ARROW #
2BED ; 219F ; MA #* ( ⯭ → ↟ ) UPWARDS TWO-HEADED ARROW WITH TRIANGLE ARROWHEADS → UPWARDS TWO HEADED ARROW #
@ -1105,6 +1132,7 @@ A714 ; 02EB ; MA #* ( ꜔ → ˫ ) MODIFIER LETTER MID LEFT-STEM TONE BAR → MO
2039 ; 003C ; MA #* ( → < ) SINGLE LEFT-POINTING ANGLE QUOTATION MARK → LESS-THAN SIGN #
276E ; 003C ; MA #* ( → < ) HEAVY LEFT-POINTING ANGLE QUOTATION MARK ORNAMENT → LESS-THAN SIGN # →‹→
02C2 ; 003C ; MA #* ( ˂ → < ) MODIFIER LETTER LEFT ARROWHEAD → LESS-THAN SIGN #
1D236 ; 003C ; MA #* ( 𝈶 → < ) GREEK INSTRUMENTAL NOTATION SYMBOL-40 → LESS-THAN SIGN #
1438 ; 003C ; MA # ( → < ) CANADIAN SYLLABICS PA → LESS-THAN SIGN #
16B2 ; 003C ; MA # ( → < ) RUNIC LETTER KAUNA → LESS-THAN SIGN #
@ -1142,6 +1170,7 @@ A4FF ; 003D ; MA #* ( → = ) LISU PUNCTUATION FULL STOP → EQUALS SIGN #
203A ; 003E ; MA #* ( → > ) SINGLE RIGHT-POINTING ANGLE QUOTATION MARK → GREATER-THAN SIGN #
276F ; 003E ; MA #* ( → > ) HEAVY RIGHT-POINTING ANGLE QUOTATION MARK ORNAMENT → GREATER-THAN SIGN # →›→
02C3 ; 003E ; MA #* ( ˃ → > ) MODIFIER LETTER RIGHT ARROWHEAD → GREATER-THAN SIGN #
1D237 ; 003E ; MA #* ( 𝈷 → > ) GREEK INSTRUMENTAL NOTATION SYMBOL-42 → GREATER-THAN SIGN #
1433 ; 003E ; MA # ( → > ) CANADIAN SYLLABICS PO → GREATER-THAN SIGN #
1441 ; 003E 00B7 ; MA # ( ᑁ → >· ) CANADIAN SYLLABICS WEST-CREE PWO → GREATER-THAN SIGN, MIDDLE DOT # →ᐳᐧ→
@ -1160,8 +1189,8 @@ A4FF ; 003D ; MA #* ( → = ) LISU PUNCTUATION FULL STOP → EQUALS SIGN #
2368 ; 007E 0308 ; MA #* ( ⍨ → ~̈ ) APL FUNCTIONAL SYMBOL TILDE DIAERESIS → TILDE, COMBINING DIAERESIS #
2E1E ; 007E 0307 ; MA #* ( ⸞ → ~̇ ) TILDE WITH DOT ABOVE → TILDE, COMBINING DOT ABOVE # →⩪→→∼̇→→⁓̇→
2A6A ; 007E 0307 ; MA #* ( ⩪ → ~̇ ) TILDE OPERATOR WITH DOT ABOVE → TILDE, COMBINING DOT ABOVE # →∼̇→→⁓̇→
2E1E ; 007E 0307 ; MA #* ( ⸞ → ~̇ ) TILDE WITH DOT ABOVE → TILDE, COMBINING DOT ABOVE # →⩪→→∼̇→
2A6A ; 007E 0307 ; MA #* ( ⩪ → ~̇ ) TILDE OPERATOR WITH DOT ABOVE → TILDE, COMBINING DOT ABOVE # →∼̇→
2E1F ; 007E 0323 ; MA #* ( ⸟ → ~̣ ) TILDE WITH DOT BELOW → TILDE, COMBINING DOT BELOW #
@ -1190,6 +1219,10 @@ A4FF ; 003D ; MA #* ( → = ) LISU PUNCTUATION FULL STOP → EQUALS SIGN #
2A04 ; 228E ; MA #* ( ⨄ → ⊎ ) N-ARY UNION OPERATOR WITH PLUS → MULTISET UNION #
1D238 ; 228F ; MA #* ( 𝈸 → ⊏ ) GREEK INSTRUMENTAL NOTATION SYMBOL-43 → SQUARE IMAGE OF #
1D239 ; 2290 ; MA #* ( 𝈹 → ⊐ ) GREEK INSTRUMENTAL NOTATION SYMBOL-45 → SQUARE ORIGINAL OF #
2A05 ; 2293 ; MA #* ( ⨅ → ⊓ ) N-ARY SQUARE INTERSECTION OPERATOR → SQUARE CAP #
2A06 ; 2294 ; MA #* ( ⨆ → ⊔ ) N-ARY SQUARE UNION OPERATOR → SQUARE CUP #
@ -1281,6 +1314,7 @@ FFED ; 25AA ; MA #* ( ■ → ▪ ) HALFWIDTH BLACK SQUARE → BLACK SMALL SQUAR
1F319 ; 263D ; MA #* ( 🌙 → ☽ ) CRESCENT MOON → FIRST QUARTER MOON #
1F318 ; 263E ; MA #* ( 🌘 → ☾ ) WANING CRESCENT MOON SYMBOL → LAST QUARTER MOON #
23FE ; 263E ; MA #* ( ⏾ → ☾ ) POWER SLEEP SYMBOL → LAST QUARTER MOON #
29D9 ; 299A ; MA #* ( ⧙ → ⦚ ) RIGHT WIGGLY FENCE → VERTICAL ZIGZAG LINE #
@ -1355,6 +1389,7 @@ A75A ; 0032 ; MA # ( → 2 ) LATIN CAPITAL LETTER R ROTUNDA → DIGIT TWO #
03E8 ; 0032 ; MA # ( Ϩ → 2 ) COPTIC CAPITAL LETTER HORI → DIGIT TWO # →Ƨ→
A644 ; 0032 ; MA # ( → 2 ) CYRILLIC CAPITAL LETTER REVERSED DZE → DIGIT TWO # →Ƨ→
14BF ; 0032 ; MA # ( → 2 ) CANADIAN SYLLABICS SAYISI M → DIGIT TWO #
A6EF ; 0032 ; MA # ( → 2 ) BAMUM LETTER KOGHOM → DIGIT TWO # →Ƨ→
A9CF ; 0662 ; MA # ( ꧏ → ‎٢‎ ) JAVANESE PANGRANGKEP → ARABIC-INDIC DIGIT TWO #
06F2 ; 0662 ; MA # ( ۲ → ‎٢‎ ) EXTENDED ARABIC-INDIC DIGIT TWO → ARABIC-INDIC DIGIT TWO #
@ -1415,6 +1450,7 @@ A9CF ; 0662 ; MA # ( ꧏ → ‎٢‎ ) JAVANESE PANGRANGKEP → ARABIC-INDIC DI
335A ; 0032 70B9 ; MA #* ( ㍚ → 2点 ) IDEOGRAPHIC TELEGRAPH SYMBOL FOR HOUR TWO → DIGIT TWO, CJK UNIFIED IDEOGRAPH-70B9 #
1D206 ; 0033 ; MA #* ( 𝈆 → 3 ) GREEK VOCAL NOTATION SYMBOL-7 → DIGIT THREE #
1D7D1 ; 0033 ; MA # ( 𝟑 → 3 ) MATHEMATICAL BOLD DIGIT THREE → DIGIT THREE #
1D7DB ; 0033 ; MA # ( 𝟛 → 3 ) MATHEMATICAL DOUBLE-STRUCK DIGIT THREE → DIGIT THREE #
1D7E5 ; 0033 ; MA # ( 𝟥 → 3 ) MATHEMATICAL SANS-SERIF DIGIT THREE → DIGIT THREE #
@ -1524,6 +1560,7 @@ A76A ; 0033 ; MA # ( → 3 ) LATIN CAPITAL LETTER ET → DIGIT THREE #
335E ; 0036 70B9 ; MA #* ( ㍞ → 6点 ) IDEOGRAPHIC TELEGRAPH SYMBOL FOR HOUR SIX → DIGIT SIX, CJK UNIFIED IDEOGRAPH-70B9 #
1D212 ; 0037 ; MA #* ( 𝈒 → 7 ) GREEK VOCAL NOTATION SYMBOL-19 → DIGIT SEVEN #
1D7D5 ; 0037 ; MA # ( 𝟕 → 7 ) MATHEMATICAL BOLD DIGIT SEVEN → DIGIT SEVEN #
1D7DF ; 0037 ; MA # ( 𝟟 → 7 ) MATHEMATICAL DOUBLE-STRUCK DIGIT SEVEN → DIGIT SEVEN #
1D7E9 ; 0037 ; MA # ( 𝟩 → 7 ) MATHEMATICAL SANS-SERIF DIGIT SEVEN → DIGIT SEVEN #
@ -1573,6 +1610,7 @@ A76A ; 0033 ; MA # ( → 3 ) LATIN CAPITAL LETTER ET → DIGIT THREE #
0A67 ; 0039 ; MA # ( → 9 ) GURMUKHI DIGIT ONE → DIGIT NINE #
0B68 ; 0039 ; MA # ( → 9 ) ORIYA DIGIT TWO → DIGIT NINE #
09ED ; 0039 ; MA # ( → 9 ) BENGALI DIGIT SEVEN → DIGIT NINE #
0D6D ; 0039 ; MA # ( → 9 ) MALAYALAM DIGIT SEVEN → DIGIT NINE #
1D7D7 ; 0039 ; MA # ( 𝟗 → 9 ) MATHEMATICAL BOLD DIGIT NINE → DIGIT NINE #
1D7E1 ; 0039 ; MA # ( 𝟡 → 9 ) MATHEMATICAL DOUBLE-STRUCK DIGIT NINE → DIGIT NINE #
1D7EB ; 0039 ; MA # ( 𝟫 → 9 ) MATHEMATICAL SANS-SERIF DIGIT NINE → DIGIT NINE #
@ -1700,6 +1738,7 @@ A73D ; 0061 0079 ; MA # ( ꜽ → ay ) LATIN SMALL LETTER AY → LATIN SMALL LET
A73C ; 0041 0059 ; MA # ( Ꜽ → AY ) LATIN CAPITAL LETTER AY → LATIN CAPITAL LETTER A, LATIN CAPITAL LETTER Y #
2200 ; 2C6F ; MA #* ( ∀ → Ɐ ) FOR ALL → LATIN CAPITAL LETTER TURNED A #
1D217 ; 2C6F ; MA #* ( 𝈗 → Ɐ ) GREEK VOCAL NOTATION SYMBOL-24 → LATIN CAPITAL LETTER TURNED A #
15C4 ; 2C6F ; MA # ( ᗄ → Ɐ ) CANADIAN SYLLABICS CARRIER GHU → LATIN CAPITAL LETTER TURNED A # →∀→
A4EF ; 2C6F ; MA # ( ꓯ → Ɐ ) LISU LETTER AE → LATIN CAPITAL LETTER TURNED A #
@ -1737,6 +1776,7 @@ FF22 ; 0042 ; MA # ( → B ) FULLWIDTH LATIN CAPITAL LETTER B → LATIN CAPI
1D609 ; 0042 ; MA # ( 𝘉 → B ) MATHEMATICAL SANS-SERIF ITALIC CAPITAL B → LATIN CAPITAL LETTER B #
1D63D ; 0042 ; MA # ( 𝘽 → B ) MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL B → LATIN CAPITAL LETTER B #
1D671 ; 0042 ; MA # ( 𝙱 → B ) MATHEMATICAL MONOSPACE CAPITAL B → LATIN CAPITAL LETTER B #
A7B4 ; 0042 ; MA # ( → B ) LATIN CAPITAL LETTER BETA → LATIN CAPITAL LETTER B #
0392 ; 0042 ; MA # ( Β → B ) GREEK CAPITAL LETTER BETA → LATIN CAPITAL LETTER B #
1D6A9 ; 0042 ; MA # ( 𝚩 → B ) MATHEMATICAL BOLD CAPITAL BETA → LATIN CAPITAL LETTER B # →Β→
1D6E3 ; 0042 ; MA # ( 𝛣 → B ) MATHEMATICAL ITALIC CAPITAL BETA → LATIN CAPITAL LETTER B # →Β→
@ -1750,7 +1790,6 @@ A4D0 ; 0042 ; MA # ( → B ) LISU LETTER BA → LATIN CAPITAL LETTER B #
10282 ; 0042 ; MA # ( 𐊂 → B ) LYCIAN LETTER B → LATIN CAPITAL LETTER B #
102A1 ; 0042 ; MA # ( 𐊡 → B ) CARIAN LETTER P2 → LATIN CAPITAL LETTER B #
10301 ; 0042 ; MA # ( 𐌁 → B ) OLD ITALIC LETTER BE → LATIN CAPITAL LETTER B #
A7B4 ; 0042 ; MA # ( → B ) LATIN CAPITAL LETTER BETA → LATIN CAPITAL LETTER B #
0253 ; 0062 0314 ; MA # ( ɓ → b̔ ) LATIN SMALL LETTER B WITH HOOK → LATIN SMALL LETTER B, COMBINING REVERSED COMMA ABOVE #
@ -2021,8 +2060,13 @@ A4F1 ; 018E ; MA # ( ꓱ → Ǝ ) LISU LETTER EU → LATIN CAPITAL LETTER REVERS
1D14 ; 01DD 006F ; MA # ( ᴔ → ǝo ) LATIN SMALL LETTER TURNED OE → LATIN SMALL LETTER TURNED E, LATIN SMALL LETTER O # →əo→
AB41 ; 01DD 006F 0338 ; MA # ( ꭁ → ǝo̸ ) LATIN SMALL LETTER TURNED OE WITH STROKE → LATIN SMALL LETTER TURNED E, LATIN SMALL LETTER O, COMBINING LONG SOLIDUS OVERLAY # →ǝø→
AB42 ; 01DD 006F 0335 ; MA # ( ꭂ → ǝo̵ ) LATIN SMALL LETTER TURNED OE WITH HORIZONTAL STROKE → LATIN SMALL LETTER TURNED E, LATIN SMALL LETTER O, COMBINING SHORT STROKE OVERLAY # →ǝɵ→
04D8 ; 018F ; MA # ( Ә → Ə ) CYRILLIC CAPITAL LETTER SCHWA → LATIN CAPITAL LETTER SCHWA #
1D221 ; 0190 ; MA #* ( 𝈡 → Ɛ ) GREEK INSTRUMENTAL NOTATION SYMBOL-7 → LATIN CAPITAL LETTER OPEN E #
2107 ; 0190 ; MA # ( ℇ → Ɛ ) EULER CONSTANT → LATIN CAPITAL LETTER OPEN E #
0510 ; 0190 ; MA # ( Ԑ → Ɛ ) CYRILLIC CAPITAL LETTER REVERSED ZE → LATIN CAPITAL LETTER OPEN E #
13CB ; 0190 ; MA # ( Ꮛ → Ɛ ) CHEROKEE LETTER QUV → LATIN CAPITAL LETTER OPEN E #
@ -2035,9 +2079,9 @@ A4F1 ; 018E ; MA # ( ꓱ → Ǝ ) LISU LETTER EU → LATIN CAPITAL LETTER REVERS
0499 ; 025C 0326 ; MA # ( ҙ → ɜ̦ ) CYRILLIC SMALL LETTER ZE WITH DESCENDER → LATIN SMALL LETTER REVERSED OPEN E, COMBINING COMMA BELOW # →з̡→
A79D ; 025E ; MA # ( ꞝ → ɞ ) LATIN SMALL LETTER VOLAPUK OE → LATIN SMALL LETTER CLOSED REVERSED OPEN E #
10442 ; 025E ; MA # ( 𐑂 → ɞ ) DESERET SMALL LETTER VEE → LATIN SMALL LETTER CLOSED REVERSED OPEN E #
A79D ; 029A ; MA # ( ꞝ → ʚ ) LATIN SMALL LETTER VOLAPUK OE → LATIN SMALL LETTER CLOSED OPEN E #
1042A ; 029A ; MA # ( 𐐪 → ʚ ) DESERET SMALL LETTER LONG A → LATIN SMALL LETTER CLOSED OPEN E #
1D41F ; 0066 ; MA # ( 𝐟 → f ) MATHEMATICAL BOLD SMALL F → LATIN SMALL LETTER F #
@ -2059,6 +2103,7 @@ A799 ; 0066 ; MA # ( → f ) LATIN SMALL LETTER F WITH STROKE → LATIN SMAL
1E9D ; 0066 ; MA # ( → f ) LATIN SMALL LETTER LONG S WITH HIGH STROKE → LATIN SMALL LETTER F #
0584 ; 0066 ; MA # ( ք → f ) ARMENIAN SMALL LETTER KEH → LATIN SMALL LETTER F #
1D213 ; 0046 ; MA #* ( 𝈓 → F ) GREEK VOCAL NOTATION SYMBOL-20 → LATIN CAPITAL LETTER F # →Ϝ→
2131 ; 0046 ; MA # ( → F ) SCRIPT CAPITAL F → LATIN CAPITAL LETTER F #
1D405 ; 0046 ; MA # ( 𝐅 → F ) MATHEMATICAL BOLD CAPITAL F → LATIN CAPITAL LETTER F #
1D439 ; 0046 ; MA # ( 𝐹 → F ) MATHEMATICAL ITALIC CAPITAL F → LATIN CAPITAL LETTER F #
@ -2106,6 +2151,7 @@ FB02 ; 0066 006C ; MA # ( fl → fl ) LATIN SMALL LIGATURE FL → LATIN SMALL L
15B5 ; 2132 ; MA # ( ᖵ → Ⅎ ) CANADIAN SYLLABICS BLACKFOOT WI → TURNED CAPITAL F #
A4DE ; 2132 ; MA # ( ꓞ → Ⅎ ) LISU LETTER TSHA → TURNED CAPITAL F #
1D230 ; A7FB ; MA #* ( 𝈰 → ꟻ ) GREEK INSTRUMENTAL NOTATION SYMBOL-30 → LATIN EPIGRAPHIC LETTER REVERSED F #
15B7 ; A7FB ; MA # ( ᖷ → ꟻ ) CANADIAN SYLLABICS BLACKFOOT WA → LATIN EPIGRAPHIC LETTER REVERSED F #
FF47 ; 0067 ; MA # ( → g ) FULLWIDTH LATIN SMALL LETTER G → LATIN SMALL LETTER G # →ɡ→
@ -2238,6 +2284,7 @@ A695 ; 0068 0314 ; MA # ( ꚕ → h̔ ) CYRILLIC SMALL LETTER HWE → LATIN SMAL
0370 ; 2C75 ; MA # ( Ͱ → Ⱶ ) GREEK CAPITAL LETTER HETA → LATIN CAPITAL LETTER HALF H # →Ꮀ→
13A8 ; 2C75 ; MA # ( Ꭸ → Ⱶ ) CHEROKEE LETTER GE → LATIN CAPITAL LETTER HALF H # →Ͱ→→Ꮀ→
13B0 ; 2C75 ; MA # ( Ꮀ → Ⱶ ) CHEROKEE LETTER HO → LATIN CAPITAL LETTER HALF H #
A6B1 ; 2C75 ; MA # ( ꚱ → Ⱶ ) BAMUM LETTER NDAA → LATIN CAPITAL LETTER HALF H # →Ͱ→→Ꮀ→
A795 ; A727 ; MA # ( ꞕ → ꜧ ) LATIN SMALL LETTER H WITH PALATAL HOOK → LATIN SMALL LETTER HENG #
@ -2332,12 +2379,12 @@ FF2A ; 004A ; MA # ( → J ) FULLWIDTH LATIN CAPITAL LETTER J → LATIN CAPI
1D611 ; 004A ; MA # ( 𝘑 → J ) MATHEMATICAL SANS-SERIF ITALIC CAPITAL J → LATIN CAPITAL LETTER J #
1D645 ; 004A ; MA # ( 𝙅 → J ) MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL J → LATIN CAPITAL LETTER J #
1D679 ; 004A ; MA # ( 𝙹 → J ) MATHEMATICAL MONOSPACE CAPITAL J → LATIN CAPITAL LETTER J #
A7B2 ; 004A ; MA # ( → J ) LATIN CAPITAL LETTER J WITH CROSSED-TAIL → LATIN CAPITAL LETTER J #
037F ; 004A ; MA # ( Ϳ → J ) GREEK CAPITAL LETTER YOT → LATIN CAPITAL LETTER J #
0408 ; 004A ; MA # ( Ј → J ) CYRILLIC CAPITAL LETTER JE → LATIN CAPITAL LETTER J #
13AB ; 004A ; MA # ( → J ) CHEROKEE LETTER GU → LATIN CAPITAL LETTER J #
148D ; 004A ; MA # ( → J ) CANADIAN SYLLABICS CO → LATIN CAPITAL LETTER J #
A4D9 ; 004A ; MA # ( → J ) LISU LETTER JA → LATIN CAPITAL LETTER J #
A7B2 ; 004A ; MA # ( → J ) LATIN CAPITAL LETTER J WITH CROSSED-TAIL → LATIN CAPITAL LETTER J #
0249 ; 006A 0335 ; MA # ( ɉ → j̵ ) LATIN SMALL LETTER J WITH STROKE → LATIN SMALL LETTER J, COMBINING SHORT STROKE OVERLAY #
@ -2491,7 +2538,9 @@ FE8D ; 006C ; MA # ( → l ) ARABIC LETTER ALEF ISOLATED FORM → LATI
A4F2 ; 006C ; MA # ( → l ) LISU LETTER I → LATIN SMALL LETTER L # →I→
1028A ; 006C ; MA # ( 𐊊 → l ) LYCIAN LETTER J → LATIN SMALL LETTER L # →I→
10309 ; 006C ; MA # ( 𐌉 → l ) OLD ITALIC LETTER I → LATIN SMALL LETTER L # →I→
23FD ; 006C ; MA #* ( → l ) POWER ON SYMBOL → LATIN SMALL LETTER L # →I→
1D22A ; 004C ; MA #* ( 𝈪 → L ) GREEK INSTRUMENTAL NOTATION SYMBOL-23 → LATIN CAPITAL LETTER L #
216C ; 004C ; MA # ( → L ) ROMAN NUMERAL FIFTY → LATIN CAPITAL LETTER L #
2112 ; 004C ; MA # ( → L ) SCRIPT CAPITAL L → LATIN CAPITAL LETTER L #
1D40B ; 004C ; MA # ( 𝐋 → L ) MATHEMATICAL BOLD CAPITAL L → LATIN CAPITAL LETTER L #
@ -2848,6 +2897,7 @@ FBA9 ; 006F ; MA # ( → o ) ARABIC LETTER HEH GOAL MEDIAL FORM → LA
FBA7 ; 006F ; MA # ( → o ) ARABIC LETTER HEH GOAL FINAL FORM → LATIN SMALL LETTER O # →‎ہ‎→→‎ه‎→
FBA6 ; 006F ; MA # ( → o ) ARABIC LETTER HEH GOAL ISOLATED FORM → LATIN SMALL LETTER O # →‎ه‎→
06D5 ; 006F ; MA # ( ە → o ) ARABIC LETTER AE → LATIN SMALL LETTER O # →‎ه‎→
0D20 ; 006F ; MA # ( → o ) MALAYALAM LETTER TTHA → LATIN SMALL LETTER O #
101D ; 006F ; MA # ( → o ) MYANMAR LETTER WA → LATIN SMALL LETTER O #
118C8 ; 006F ; MA # ( 𑣈 → o ) WARANG CITI SMALL LETTER E → LATIN SMALL LETTER O #
118D7 ; 006F ; MA # ( 𑣗 → o ) WARANG CITI SMALL LETTER BU → LATIN SMALL LETTER O #
@ -2889,8 +2939,8 @@ FF2F ; 004F ; MA # ( → O ) FULLWIDTH LATIN CAPITAL LETTER O → LATIN CAPI
041E ; 004F ; MA # ( О → O ) CYRILLIC CAPITAL LETTER O → LATIN CAPITAL LETTER O #
0555 ; 004F ; MA # ( Օ → O ) ARMENIAN CAPITAL LETTER OH → LATIN CAPITAL LETTER O #
2D54 ; 004F ; MA # ( → O ) TIFINAGH LETTER YAR → LATIN CAPITAL LETTER O #
12D0 ; 004F ; MA # ( → O ) ETHIOPIC SYLLABLE PHARYNGEAL A → LATIN CAPITAL LETTER O # →Օ→
0B20 ; 004F ; MA # ( → O ) ORIYA LETTER TTHA → LATIN CAPITAL LETTER O # →→→0→
0D20 ; 004F ; MA # ( → O ) MALAYALAM LETTER TTHA → LATIN CAPITAL LETTER O #
A4F3 ; 004F ; MA # ( → O ) LISU LETTER O → LATIN CAPITAL LETTER O #
118B5 ; 004F ; MA # ( 𑢵 → O ) WARANG CITI CAPITAL LETTER AT → LATIN CAPITAL LETTER O #
10292 ; 004F ; MA # ( 𐊒 → O ) LYCIAN LETTER U → LATIN CAPITAL LETTER O #
@ -2923,6 +2973,7 @@ A74B ; 006F 0335 ; MA # ( ꝋ → o̵ ) LATIN SMALL LETTER O WITH LONG STROKE OV
2296 ; 004F 0335 ; MA #* ( ⊖ → O̵ ) CIRCLED MINUS → LATIN CAPITAL LETTER O, COMBINING SHORT STROKE OVERLAY # →θ→→Ꮎ→
229D ; 004F 0335 ; MA #* ( ⊝ → O̵ ) CIRCLED DASH → LATIN CAPITAL LETTER O, COMBINING SHORT STROKE OVERLAY # →⊖→→θ→→Ꮎ→
236C ; 004F 0335 ; MA #* ( ⍬ → O̵ ) APL FUNCTIONAL SYMBOL ZILDE → LATIN CAPITAL LETTER O, COMBINING SHORT STROKE OVERLAY # →θ→→Ꮎ→
1D21A ; 004F 0335 ; MA #* ( 𝈚 → O̵ ) GREEK VOCAL NOTATION SYMBOL-52 → LATIN CAPITAL LETTER O, COMBINING SHORT STROKE OVERLAY # →Ꝋ→→O̶→
1F714 ; 004F 0335 ; MA #* ( 🜔 → O̵ ) ALCHEMICAL SYMBOL FOR SALT → LATIN CAPITAL LETTER O, COMBINING SHORT STROKE OVERLAY # →Ɵ→→O̶→
019F ; 004F 0335 ; MA # ( Ɵ → O̵ ) LATIN CAPITAL LETTER O WITH MIDDLE TILDE → LATIN CAPITAL LETTER O, COMBINING SHORT STROKE OVERLAY # →O̶→
A74A ; 004F 0335 ; MA # ( Ꝋ → O̵ ) LATIN CAPITAL LETTER O WITH LONG STROKE OVERLAY → LATIN CAPITAL LETTER O, COMBINING SHORT STROKE OVERLAY # →O̶→
@ -3003,7 +3054,7 @@ FD94 ; 006F 0645 0645 ; MA # ( ‎ﶔ‎ → oمم ) ARABIC LIGATURE HEH W
FC53 ; 006F 0649 ; MA # ( ‎ﱓ‎ → ) ARABIC LIGATURE HEH WITH ALEF MAKSURA ISOLATED FORM → LATIN SMALL LETTER O, ARABIC LETTER ALEF MAKSURA # →‎هى‎→
FC54 ; 006F 0649 ; MA # ( ‎ﱔ‎ → ) ARABIC LIGATURE HEH WITH YEH ISOLATED FORM → LATIN SMALL LETTER O, ARABIC LETTER ALEF MAKSURA # →‎هي‎→
0D5F ; 006F 0D30 006F ; MA # ( ൟ → oരo ) MALAYALAM LETTER ARCHAIC II → LATIN SMALL LETTER O, MALAYALAM LETTER RA, LATIN SMALL LETTER O # →൦ര൦
0D5F ; 006F 0D30 006F ; MA # ( ൟ → oരo ) MALAYALAM LETTER ARCHAIC II → LATIN SMALL LETTER O, MALAYALAM LETTER RA, LATIN SMALL LETTER O # →ംരം
1010 ; 006F 102C ; MA # ( တ → oာ ) MYANMAR LETTER TA → LATIN SMALL LETTER O, MYANMAR VOWEL SIGN AA # →ဝာ→
@ -3019,6 +3070,8 @@ FC54 ; 006F 0649 ; MA # ( ‎ﱔ‎ → ) ARABIC LIGATURE HEH WITH YEH
A4DB ; 0186 ; MA # ( ꓛ → Ɔ ) LISU LETTER CHA → LATIN CAPITAL LETTER OPEN O #
10423 ; 0186 ; MA # ( 𐐣 → Ɔ ) DESERET CAPITAL LETTER EM → LATIN CAPITAL LETTER OPEN O #
AB3F ; 0254 0338 ; MA # ( ꬿ → ɔ̸ ) LATIN SMALL LETTER OPEN O WITH STROKE → LATIN SMALL LETTER OPEN O, COMBINING LONG SOLIDUS OVERLAY #
1043F ; 0277 ; MA # ( 𐐿 → ɷ ) DESERET SMALL LETTER KAY → LATIN SMALL LETTER CLOSED OMEGA #
2374 ; 0070 ; MA #* ( → p ) APL FUNCTIONAL SYMBOL RHO → LATIN SMALL LETTER P # →ρ→
@ -3160,6 +3213,7 @@ AB48 ; 0072 ; MA # ( → r ) LATIN SMALL LETTER DOUBLE R → LATIN SMALL LET
2C85 ; 0072 ; MA # ( → r ) COPTIC SMALL LETTER GAMMA → LATIN SMALL LETTER R # →г→
0433 ; 0072 ; MA # ( г → r ) CYRILLIC SMALL LETTER GHE → LATIN SMALL LETTER R #
1D216 ; 0052 ; MA #* ( 𝈖 → R ) GREEK VOCAL NOTATION SYMBOL-23 → LATIN CAPITAL LETTER R #
211B ; 0052 ; MA # ( → R ) SCRIPT CAPITAL R → LATIN CAPITAL LETTER R #
211C ; 0052 ; MA # ( → R ) BLACK-LETTER CAPITAL R → LATIN CAPITAL LETTER R #
211D ; 0052 ; MA # ( → R ) DOUBLE-STRUCK CAPITAL R → LATIN CAPITAL LETTER R #
@ -3273,6 +3327,7 @@ A4E2 ; 0053 ; MA # ( → S ) LISU LETTER SA → LATIN CAPITAL LETTER S #
1D74 ; 0073 0334 ; MA # ( ᵴ → s̴ ) LATIN SMALL LETTER S WITH MIDDLE TILDE → LATIN SMALL LETTER S, COMBINING TILDE OVERLAY #
A7B5 ; 00DF ; MA # ( ꞵ → ß ) LATIN SMALL LETTER BETA → LATIN SMALL LETTER SHARP S # →β→
03B2 ; 00DF ; MA # ( β → ß ) GREEK SMALL LETTER BETA → LATIN SMALL LETTER SHARP S #
03D0 ; 00DF ; MA # ( ϐ → ß ) GREEK BETA SYMBOL → LATIN SMALL LETTER SHARP S # →β→
1D6C3 ; 00DF ; MA # ( 𝛃 → ß ) MATHEMATICAL BOLD SMALL BETA → LATIN SMALL LETTER SHARP S # →β→
@ -3281,7 +3336,6 @@ A4E2 ; 0053 ; MA # ( → S ) LISU LETTER SA → LATIN CAPITAL LETTER S #
1D771 ; 00DF ; MA # ( 𝝱 → ß ) MATHEMATICAL SANS-SERIF BOLD SMALL BETA → LATIN SMALL LETTER SHARP S # →β→
1D7AB ; 00DF ; MA # ( 𝞫 → ß ) MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL BETA → LATIN SMALL LETTER SHARP S # →β→
13F0 ; 00DF ; MA # ( Ᏸ → ß ) CHEROKEE LETTER YE → LATIN SMALL LETTER SHARP S # →β→
A7B5 ; 00DF ; MA # ( ꞵ → ß ) LATIN SMALL LETTER BETA → LATIN SMALL LETTER SHARP S # →β→
1F75C ; 0073 0073 0073 ; MA #* ( 🝜 → sss ) ALCHEMICAL SYMBOL FOR STRATUM SUPER STRATUM → LATIN SMALL LETTER S, LATIN SMALL LETTER S, LATIN SMALL LETTER S #
@ -3446,6 +3500,7 @@ AB52 ; 0075 ; MA # ( → u ) LATIN SMALL LETTER U WITH LEFT HOOK → LATIN S
1D650 ; 0055 ; MA # ( 𝙐 → U ) MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL U → LATIN CAPITAL LETTER U #
1D684 ; 0055 ; MA # ( 𝚄 → U ) MATHEMATICAL MONOSPACE CAPITAL U → LATIN CAPITAL LETTER U #
054D ; 0055 ; MA # ( Ս → U ) ARMENIAN CAPITAL LETTER SEH → LATIN CAPITAL LETTER U #
1200 ; 0055 ; MA # ( → U ) ETHIOPIC SYLLABLE HA → LATIN CAPITAL LETTER U # →Ս→
144C ; 0055 ; MA # ( → U ) CANADIAN SYLLABICS TE → LATIN CAPITAL LETTER U #
A4F4 ; 0055 ; MA # ( → U ) LISU LETTER U → LATIN CAPITAL LETTER U #
118B8 ; 0055 ; MA # ( 𑢸 → U ) WARANG CITI CAPITAL LETTER PU → LATIN CAPITAL LETTER U #
@ -3466,6 +3521,7 @@ A4F4 ; 0055 ; MA # ( → U ) LISU LETTER U → LATIN CAPITAL LETTER U #
1D6B ; 0075 0065 ; MA # ( ᵫ → ue ) LATIN SMALL LETTER UE → LATIN SMALL LETTER U, LATIN SMALL LETTER E #
057A ; 0270 ; MA # ( պ → ɰ ) ARMENIAN SMALL LETTER PEH → LATIN SMALL LETTER TURNED M WITH LONG LEG #
1223 ; 0270 ; MA # ( ሣ → ɰ ) ETHIOPIC SYLLABLE SZAA → LATIN SMALL LETTER TURNED M WITH LONG LEG # →պ→
2127 ; 01B1 ; MA #* ( ℧ → Ʊ ) INVERTED OHM SIGN → LATIN CAPITAL LETTER UPSILON #
162E ; 01B1 ; MA # ( ᘮ → Ʊ ) CANADIAN SYLLABICS CARRIER LHU → LATIN CAPITAL LETTER UPSILON # →℧→
@ -3501,6 +3557,7 @@ FF56 ; 0076 ; MA # ( → v ) FULLWIDTH LATIN SMALL LETTER V → LATIN SMALL
05D8 ; 0076 ; MA # ( ט → v ) HEBREW LETTER TET → LATIN SMALL LETTER V #
118C0 ; 0076 ; MA # ( 𑣀 → v ) WARANG CITI SMALL LETTER NGAA → LATIN SMALL LETTER V #
1D20D ; 0056 ; MA #* ( 𝈍 → V ) GREEK VOCAL NOTATION SYMBOL-14 → LATIN CAPITAL LETTER V #
0667 ; 0056 ; MA # ( ٧ → V ) ARABIC-INDIC DIGIT SEVEN → LATIN CAPITAL LETTER V #
06F7 ; 0056 ; MA # ( ۷ → V ) EXTENDED ARABIC-INDIC DIGIT SEVEN → LATIN CAPITAL LETTER V # →‎٧‎→
2164 ; 0056 ; MA # ( → V ) ROMAN NUMERAL FIVE → LATIN CAPITAL LETTER V #
@ -3521,6 +3578,7 @@ FF56 ; 0076 ; MA # ( → v ) FULLWIDTH LATIN SMALL LETTER V → LATIN SMALL
2D38 ; 0056 ; MA # ( → V ) TIFINAGH LETTER YADH → LATIN CAPITAL LETTER V #
13D9 ; 0056 ; MA # ( → V ) CHEROKEE LETTER DO → LATIN CAPITAL LETTER V #
142F ; 0056 ; MA # ( → V ) CANADIAN SYLLABICS PE → LATIN CAPITAL LETTER V #
A6DF ; 0056 ; MA # ( → V ) BAMUM LETTER KO → LATIN CAPITAL LETTER V #
A4E6 ; 0056 ; MA # ( → V ) LISU LETTER HA → LATIN CAPITAL LETTER V #
118A0 ; 0056 ; MA # ( 𑢠 → V ) WARANG CITI CAPITAL LETTER NGAA → LATIN CAPITAL LETTER V #
1051D ; 0056 ; MA # ( 𐔝 → V ) ELBASAN LETTER TE → LATIN CAPITAL LETTER V #
@ -3565,9 +3623,9 @@ A4E6 ; 0056 ; MA # ( → V ) LISU LETTER HA → LATIN CAPITAL LETTER V #
1170E ; 0076 0076 ; MA # ( 𑜎 → vv ) AHOM LETTER LA → LATIN SMALL LETTER V, LATIN SMALL LETTER V # →w→
1170F ; 0076 0076 ; MA # ( 𑜏 → vv ) AHOM LETTER SA → LATIN SMALL LETTER V, LATIN SMALL LETTER V # →w→
114C5 ; 0076 0076 0307 ; MA # ( 𑓅 → vv̇ ) TIRHUTA GVANG → LATIN SMALL LETTER V, LATIN SMALL LETTER V, COMBINING DOT ABOVE # →ẇ
047D ; 0076 0076 0486 0487 ; MA # ( ѽ → vv҆҇ ) CYRILLIC SMALL LETTER OMEGA WITH TITLO → LATIN SMALL LETTER V, LATIN SMALL LETTER V, COMBINING CYRILLIC PSILI PNEUMATA, COMBINING CYRILLIC POKRYTIE # →ѡ҆҇
047D ; 0076 0076 0483 ; MA # ( ѽ → vv҃ ) CYRILLIC SMALL LETTER OMEGA WITH TITLO → LATIN SMALL LETTER V, LATIN SMALL LETTER V, COMBINING CYRILLIC TITLO # →ѡ҃
114C5 ; 0076 0076 0307 ; MA # ( 𑓅 → vv̇ ) TIRHUTA GVANG → LATIN SMALL LETTER V, LATIN SMALL LETTER V, COMBINING DOT ABOVE # →ẇ
A761 ; 0076 0079 ; MA # ( ꝡ → vy ) LATIN SMALL LETTER VY → LATIN SMALL LETTER V, LATIN SMALL LETTER Y #
@ -3584,6 +3642,7 @@ A761 ; 0076 0079 ; MA # ( ꝡ → vy ) LATIN SMALL LETTER VY → LATIN SMALL LET
041B ; 0245 ; MA # ( Л → Ʌ ) CYRILLIC CAPITAL LETTER EL → LATIN CAPITAL LETTER TURNED V # →Λ→
2D37 ; 0245 ; MA # ( ⴷ → Ʌ ) TIFINAGH LETTER YAD → LATIN CAPITAL LETTER TURNED V #
1431 ; 0245 ; MA # ( ᐱ → Ʌ ) CANADIAN SYLLABICS PI → LATIN CAPITAL LETTER TURNED V #
A6CE ; 0245 ; MA # ( ꛎ → Ʌ ) BAMUM LETTER MI → LATIN CAPITAL LETTER TURNED V # →Λ→
A4E5 ; 0245 ; MA # ( ꓥ → Ʌ ) LISU LETTER NGA → LATIN CAPITAL LETTER TURNED V #
1028D ; 0245 ; MA # ( 𐊍 → Ʌ ) LYCIAN LETTER L → LATIN CAPITAL LETTER TURNED V # →Λ→
@ -3656,6 +3715,7 @@ FF38 ; 0058 ; MA # ( → X ) FULLWIDTH LATIN CAPITAL LETTER X → LATIN CAPI
1D61F ; 0058 ; MA # ( 𝘟 → X ) MATHEMATICAL SANS-SERIF ITALIC CAPITAL X → LATIN CAPITAL LETTER X #
1D653 ; 0058 ; MA # ( 𝙓 → X ) MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL X → LATIN CAPITAL LETTER X #
1D687 ; 0058 ; MA # ( 𝚇 → X ) MATHEMATICAL MONOSPACE CAPITAL X → LATIN CAPITAL LETTER X #
A7B3 ; 0058 ; MA # ( → X ) LATIN CAPITAL LETTER CHI → LATIN CAPITAL LETTER X #
03A7 ; 0058 ; MA # ( Χ → X ) GREEK CAPITAL LETTER CHI → LATIN CAPITAL LETTER X #
1D6BE ; 0058 ; MA # ( 𝚾 → X ) MATHEMATICAL BOLD CAPITAL CHI → LATIN CAPITAL LETTER X # →Χ→
1D6F8 ; 0058 ; MA # ( 𝛸 → X ) MATHEMATICAL ITALIC CAPITAL CHI → LATIN CAPITAL LETTER X # →Χ→
@ -3671,7 +3731,6 @@ A4EB ; 0058 ; MA # ( → X ) LISU LETTER SHA → LATIN CAPITAL LETTER X #
102B4 ; 0058 ; MA # ( 𐊴 → X ) CARIAN LETTER X → LATIN CAPITAL LETTER X #
10317 ; 0058 ; MA # ( 𐌗 → X ) OLD ITALIC LETTER EKS → LATIN CAPITAL LETTER X #
10527 ; 0058 ; MA # ( 𐔧 → X ) ELBASAN LETTER KHE → LATIN CAPITAL LETTER X #
A7B3 ; 0058 ; MA # ( → X ) LATIN CAPITAL LETTER CHI → LATIN CAPITAL LETTER X #
2A30 ; 0078 0307 ; MA #* ( ⨰ → ẋ ) MULTIPLICATION SIGN WITH DOT ABOVE → LATIN SMALL LETTER X, COMBINING DOT ABOVE # →×̇→
@ -3819,6 +3878,8 @@ A4DC ; 005A ; MA # ( → Z ) LISU LETTER DZA → LATIN CAPITAL LETTER Z #
03F7 ; 00DE ; MA # ( Ϸ → Þ ) GREEK CAPITAL LETTER SHO → LATIN CAPITAL LETTER THORN #
2079 ; A770 ; MA #* ( ⁹ → ꝰ ) SUPERSCRIPT NINE → MODIFIER LETTER US #
1D24 ; 01A8 ; MA # ( ᴤ → ƨ ) LATIN LETTER VOICED LARYNGEAL SPIRANT → LATIN SMALL LETTER TONE TWO #
03E9 ; 01A8 ; MA # ( ϩ → ƨ ) COPTIC SMALL LETTER HORI → LATIN SMALL LETTER TONE TWO #
A645 ; 01A8 ; MA # ( ꙅ → ƨ ) CYRILLIC SMALL LETTER REVERSED DZE → LATIN SMALL LETTER TONE TWO #
@ -3829,6 +3890,8 @@ A645 ; 01A8 ; MA # ( ꙅ → ƨ ) CYRILLIC SMALL LETTER REVERSED DZE → LATIN S
02E4 ; 02C1 ; MA # ( ˤ → ˁ ) MODIFIER LETTER SMALL REVERSED GLOTTAL STOP → MODIFIER LETTER REVERSED GLOTTAL STOP #
A6CD ; 02A1 ; MA # ( ꛍ → ʡ ) BAMUM LETTER LU → LATIN LETTER GLOTTAL STOP WITH STROKE #
2299 ; 0298 ; MA #* ( ⊙ → ʘ ) CIRCLED DOT OPERATOR → LATIN LETTER BILABIAL CLICK #
2609 ; 0298 ; MA #* ( ☉ → ʘ ) SUN → LATIN LETTER BILABIAL CLICK # →⊙→
2A00 ; 0298 ; MA #* ( ⨀ → ʘ ) N-ARY CIRCLED DOT OPERATOR → LATIN LETTER BILABIAL CLICK # →⊙→
@ -3917,6 +3980,7 @@ A668 ; 0298 ; MA # ( Ꙩ → ʘ ) CYRILLIC CAPITAL LETTER MONOCULAR O → LATIN
1D79F ; 03A0 ; MA # ( 𝞟 → Π ) MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL PI → GREEK CAPITAL LETTER PI #
2CA0 ; 03A0 ; MA # ( Ⲡ → Π ) COPTIC CAPITAL LETTER PI → GREEK CAPITAL LETTER PI #
041F ; 03A0 ; MA # ( П → Π ) CYRILLIC CAPITAL LETTER PE → GREEK CAPITAL LETTER PI #
A6DB ; 03A0 ; MA # ( ꛛ → Π ) BAMUM LETTER NA → GREEK CAPITAL LETTER PI #
102AD ; 03D8 ; MA # ( 𐊭 → Ϙ ) CARIAN LETTER T → GREEK LETTER ARCHAIC KOPPA #
10312 ; 03D8 ; MA # ( 𐌒 → Ϙ ) OLD ITALIC LETTER KU → GREEK LETTER ARCHAIC KOPPA #
@ -3936,6 +4000,7 @@ A668 ; 0298 ; MA # ( Ꙩ → ʘ ) CYRILLIC CAPITAL LETTER MONOCULAR O → LATIN
2CAA ; 03A6 ; MA # ( Ⲫ → Φ ) COPTIC CAPITAL LETTER FI → GREEK CAPITAL LETTER PHI #
0424 ; 03A6 ; MA # ( Ф → Φ ) CYRILLIC CAPITAL LETTER EF → GREEK CAPITAL LETTER PHI #
0553 ; 03A6 ; MA # ( Փ → Φ ) ARMENIAN CAPITAL LETTER PIWR → GREEK CAPITAL LETTER PHI #
1240 ; 03A6 ; MA # ( ቀ → Φ ) ETHIOPIC SYLLABLE QA → GREEK CAPITAL LETTER PHI # →Փ→
16F0 ; 03A6 ; MA # ( ᛰ → Φ ) RUNIC BELGTHOR SYMBOL → GREEK CAPITAL LETTER PHI #
102B3 ; 03A6 ; MA # ( 𐊳 → Φ ) CARIAN LETTER NN → GREEK CAPITAL LETTER PHI #
@ -3966,6 +4031,7 @@ AB55 ; 03C7 ; MA # ( ꭕ → χ ) LATIN SMALL LETTER CHI WITH LOW LEFT SERIF →
102B5 ; 03A8 ; MA # ( 𐊵 → Ψ ) CARIAN LETTER N → GREEK CAPITAL LETTER PSI #
2375 ; 03C9 ; MA #* ( ⍵ → ω ) APL FUNCTIONAL SYMBOL OMEGA → GREEK SMALL LETTER OMEGA #
A7B7 ; 03C9 ; MA # ( ꞷ → ω ) LATIN SMALL LETTER OMEGA → GREEK SMALL LETTER OMEGA #
1D6DA ; 03C9 ; MA # ( 𝛚 → ω ) MATHEMATICAL BOLD SMALL OMEGA → GREEK SMALL LETTER OMEGA #
1D714 ; 03C9 ; MA # ( 𝜔 → ω ) MATHEMATICAL ITALIC SMALL OMEGA → GREEK SMALL LETTER OMEGA #
1D74E ; 03C9 ; MA # ( 𝝎 → ω ) MATHEMATICAL BOLD ITALIC SMALL OMEGA → GREEK SMALL LETTER OMEGA #
@ -3973,7 +4039,6 @@ AB55 ; 03C7 ; MA # ( ꭕ → χ ) LATIN SMALL LETTER CHI WITH LOW LEFT SERIF →
1D7C2 ; 03C9 ; MA # ( 𝟂 → ω ) MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL OMEGA → GREEK SMALL LETTER OMEGA #
2CB1 ; 03C9 ; MA # ( ⲱ → ω ) COPTIC SMALL LETTER OOU → GREEK SMALL LETTER OMEGA #
A64D ; 03C9 ; MA # ( ꙍ → ω ) CYRILLIC SMALL LETTER BROAD OMEGA → GREEK SMALL LETTER OMEGA # →ꞷ→
A7B7 ; 03C9 ; MA # ( ꞷ → ω ) LATIN SMALL LETTER OMEGA → GREEK SMALL LETTER OMEGA #
2126 ; 03A9 ; MA # ( Ω → Ω ) OHM SIGN → GREEK CAPITAL LETTER OMEGA #
1D6C0 ; 03A9 ; MA # ( 𝛀 → Ω ) MATHEMATICAL BOLD CAPITAL OMEGA → GREEK CAPITAL LETTER OMEGA #
@ -3997,7 +4062,9 @@ A7B7 ; 03C9 ; MA # ( ꞷ → ω ) LATIN SMALL LETTER OMEGA → GREEK SMALL LETTE
0496 ; 0416 0329 ; MA # ( Җ → Ж̩ ) CYRILLIC CAPITAL LETTER ZHE WITH DESCENDER → CYRILLIC CAPITAL LETTER ZHE, COMBINING VERTICAL LINE BELOW #
1D20B ; 0418 ; MA #* ( 𝈋 → И ) GREEK VOCAL NOTATION SYMBOL-12 → CYRILLIC CAPITAL LETTER I # →Ͷ→
0376 ; 0418 ; MA # ( Ͷ → И ) GREEK CAPITAL LETTER PAMPHYLIAN DIGAMMA → CYRILLIC CAPITAL LETTER I #
A6A1 ; 0418 ; MA # ( ꚡ → И ) BAMUM LETTER KA → CYRILLIC CAPITAL LETTER I # →Ͷ→
10425 ; 0418 ; MA # ( 𐐥 → И ) DESERET CAPITAL LETTER ENG → CYRILLIC CAPITAL LETTER I #
0419 ; 040D ; MA # ( Й → Ѝ ) CYRILLIC CAPITAL LETTER SHORT I → CYRILLIC CAPITAL LETTER I WITH GRAVE #
@ -4014,10 +4081,13 @@ A7B7 ; 03C9 ; MA # ( ꞷ → ω ) LATIN SMALL LETTER OMEGA → GREEK SMALL LETTE
AB60 ; 0459 ; MA # ( ꭠ → љ ) LATIN SMALL LETTER SAKHA YAT → CYRILLIC SMALL LETTER LJE #
1D202 ; 04FE ; MA #* ( 𝈂 → Ӿ ) GREEK VOCAL NOTATION SYMBOL-3 → CYRILLIC CAPITAL LETTER HA WITH STROKE #
1D222 ; 0460 ; MA #* ( 𝈢 → Ѡ ) GREEK INSTRUMENTAL NOTATION SYMBOL-8 → CYRILLIC CAPITAL LETTER OMEGA #
13C7 ; 0460 ; MA # ( Ꮗ → Ѡ ) CHEROKEE LETTER QUE → CYRILLIC CAPITAL LETTER OMEGA #
15EF ; 0460 ; MA # ( ᗯ → Ѡ ) CANADIAN SYLLABICS CARRIER GU → CYRILLIC CAPITAL LETTER OMEGA #
047C ; 0460 0483 ; MA # ( Ѽ → Ѡ҃ ) CYRILLIC CAPITAL LETTER OMEGA WITH TITLO → CYRILLIC CAPITAL LETTER OMEGA, COMBINING CYRILLIC TITLO #
047C ; 0460 0486 0487 ; MA # ( Ѽ → Ѡ҆҇ ) CYRILLIC CAPITAL LETTER OMEGA WITH TITLO → CYRILLIC CAPITAL LETTER OMEGA, COMBINING CYRILLIC PSILI PNEUMATA, COMBINING CYRILLIC POKRYTIE #
18ED ; 0460 00B7 ; MA # ( ᣭ → Ѡ· ) CANADIAN SYLLABICS CARRIER GWU → CYRILLIC CAPITAL LETTER OMEGA, MIDDLE DOT # →ᗯᐧ→
@ -4033,10 +4103,18 @@ A7B6 ; A64C ; MA # ( Ꞷ → Ꙍ ) LATIN CAPITAL LETTER OMEGA → CYRILLIC CAPIT
2CBC ; 0428 ; MA # ( Ⲽ → Ш ) COPTIC CAPITAL LETTER CRYPTOGRAMMIC NI → CYRILLIC CAPITAL LETTER SHA #
A650 ; 042A 006C ; MA # ( Ꙑ → Ъl ) CYRILLIC CAPITAL LETTER YERU WITH BACK YER → CYRILLIC CAPITAL LETTER HARD SIGN, LATIN SMALL LETTER L # →ЪІ→
2108 ; 042D ; MA #* ( ℈ → Э ) SCRUPLE → CYRILLIC CAPITAL LETTER E #
1F701 ; A658 ; MA #* ( 🜁 → Ꙙ ) ALCHEMICAL SYMBOL FOR AIR → CYRILLIC CAPITAL LETTER CLOSED LITTLE YUS #
A992 ; 2C3F ; MA # ( ꦒ → ⰿ ) JAVANESE LETTER GA → GLAGOLITIC SMALL LETTER MYSLITE #
0587 ; 0565 0582 ; MA # ( և → եւ ) ARMENIAN SMALL LIGATURE ECH YIWN → ARMENIAN SMALL LETTER ECH, ARMENIAN SMALL LETTER YIWN #
1294 ; 0571 ; MA # ( ኔ → ձ ) ETHIOPIC SYLLABLE NEE → ARMENIAN SMALL LETTER JA #
FB14 ; 0574 0565 ; MA # ( ﬔ → մե ) ARMENIAN SMALL LIGATURE MEN ECH → ARMENIAN SMALL LETTER MEN, ARMENIAN SMALL LETTER ECH #
FB15 ; 0574 056B ; MA # ( ﬕ → մի ) ARMENIAN SMALL LIGATURE MEN INI → ARMENIAN SMALL LETTER MEN, ARMENIAN SMALL LETTER INI #
@ -4047,6 +4125,8 @@ FB13 ; 0574 0576 ; MA # ( ﬓ → մն ) ARMENIAN SMALL LIGATURE MEN NOW → ARM
2229 ; 0548 ; MA #* ( ∩ → Ո ) INTERSECTION → ARMENIAN CAPITAL LETTER VO # →ᑎ→
22C2 ; 0548 ; MA #* ( ⋂ → Ո ) N-ARY INTERSECTION → ARMENIAN CAPITAL LETTER VO # →∩→→ᑎ→
1D245 ; 0548 ; MA #* ( 𝉅 → Ո ) GREEK MUSICAL LEIMMA → ARMENIAN CAPITAL LETTER VO # →∩→→ᑎ→
1260 ; 0548 ; MA # ( በ → Ո ) ETHIOPIC SYLLABLE BA → ARMENIAN CAPITAL LETTER VO #
144E ; 0548 ; MA # ( ᑎ → Ո ) CANADIAN SYLLABICS TI → ARMENIAN CAPITAL LETTER VO #
A4F5 ; 0548 ; MA # ( ꓵ → Ո ) LISU LETTER UE → ARMENIAN CAPITAL LETTER VO # →∩→→ᑎ→
@ -5316,6 +5396,14 @@ FE19 ; 2D57 ; MA #* ( ︙ → ⵗ ) PRESENTATION FORM FOR VERTICAL HORIZONTAL EL
205D ; 2D57 ; MA #* ( ⁝ → ⵗ ) TRICOLON → TIFINAGH LETTER TUAREG YAGH #
22EE ; 2D57 ; MA #* ( ⋮ → ⵗ ) VERTICAL ELLIPSIS → TIFINAGH LETTER TUAREG YAGH # →︙→→⁝→
0544 ; 1206 ; MA # ( Մ → ሆ ) ARMENIAN CAPITAL LETTER MEN → ETHIOPIC SYLLABLE HO #
054C ; 1261 ; MA # ( Ռ → ቡ ) ARMENIAN CAPITAL LETTER RA → ETHIOPIC SYLLABLE BU #
053B ; 12AE ; MA # ( Ի → ኮ ) ARMENIAN CAPITAL LETTER INI → ETHIOPIC SYLLABLE KO #
054A ; 1323 ; MA # ( Պ → ጣ ) ARMENIAN CAPITAL LETTER PEH → ETHIOPIC SYLLABLE THAA #
0906 ; 0905 093E ; MA # ( आ → अा ) DEVANAGARI LETTER AA → DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN AA #
0912 ; 0905 093E 0946 ; MA # ( ऒ → अाॆ ) DEVANAGARI LETTER SHORT O → DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN AA, DEVANAGARI VOWEL SIGN SHORT E # →अॊ→→आॆ→
@ -5340,6 +5428,8 @@ FE19 ; 2D57 ; MA #* ( ︙ → ⵗ ) PRESENTATION FORM FOR VERTICAL HORIZONTAL EL
111DC ; A8FB ; MA # ( 𑇜 → ꣻ ) SHARADA HEADSTROKE → DEVANAGARI HEADSTROKE #
111CB ; 093A ; MA # ( 𑇋 → ऺ ) SHARADA VOWEL MODIFIER MARK → DEVANAGARI VOWEL SIGN OE #
0AC1 ; 0941 ; MA # ( ુ → ु ) GUJARATI VOWEL SIGN U → DEVANAGARI VOWEL SIGN U #
0AC2 ; 0942 ; MA # ( ૂ → ू ) GUJARATI VOWEL SIGN UU → DEVANAGARI VOWEL SIGN UU #
@ -5562,18 +5652,25 @@ FE19 ; 2D57 ; MA #* ( ︙ → ⵗ ) PRESENTATION FORM FOR VERTICAL HORIZONTAL EL
0D6B ; 0D26 0D4D 0D30 ; MA # ( ൫ → ദ്ര ) MALAYALAM DIGIT FIVE → MALAYALAM LETTER DA, MALAYALAM SIGN VIRAMA, MALAYALAM LETTER RA #
0D79 ; 0D28 0D41 ; MA #* ( ൹ → നു ) MALAYALAM DATE MARK → MALAYALAM LETTER NA, MALAYALAM VOWEL SIGN U #
0D0C ; 0D28 0D41 ; MA # ( ഌ → നു ) MALAYALAM LETTER VOCALIC L → MALAYALAM LETTER NA, MALAYALAM VOWEL SIGN U #
0D19 ; 0D28 0D41 ; MA # ( ങ → നു ) MALAYALAM LETTER NGA → MALAYALAM LETTER NA, MALAYALAM VOWEL SIGN U # →ഌ→
0D6F ; 0D28 0D4D ; MA # ( ൯ → ന് ) MALAYALAM DIGIT NINE → MALAYALAM LETTER NA, MALAYALAM SIGN VIRAMA #
0D7B ; 0D28 0D4D ; MA # ( ൻ → ന് ) MALAYALAM LETTER CHILLU N → MALAYALAM LETTER NA, MALAYALAM SIGN VIRAMA # →൯→
0D8C ; 0D28 0D4D 0D28 ; MA # ( ඌ → ന്ന ) SINHALA LETTER UUYANNA → MALAYALAM LETTER NA, MALAYALAM SIGN VIRAMA, MALAYALAM LETTER NA #
0D6C ; 0D28 0D4D 0D28 ; MA # ( ൬ → ന്ന ) MALAYALAM DIGIT SIX → MALAYALAM LETTER NA, MALAYALAM SIGN VIRAMA, MALAYALAM LETTER NA #
0D5A ; 0D28 0D4D 0D2E ; MA #* ( ൚ → ന്മ ) MALAYALAM FRACTION THREE EIGHTIETHS → MALAYALAM LETTER NA, MALAYALAM SIGN VIRAMA, MALAYALAM LETTER MA #
0D31 ; 0D30 ; MA # ( റ → ര ) MALAYALAM LETTER RRA → MALAYALAM LETTER RA #
0D6A ; 0D30 0D4D ; MA # ( ൪ → ര് ) MALAYALAM DIGIT FOUR → MALAYALAM LETTER RA, MALAYALAM SIGN VIRAMA #
0D7C ; 0D30 0D4D ; MA # ( ർ → ര് ) MALAYALAM LETTER CHILLU RR → MALAYALAM LETTER RA, MALAYALAM SIGN VIRAMA # →൪→
0D6E ; 0D35 0D4D ; MA # ( ൮ → വ് ) MALAYALAM DIGIT EIGHT → MALAYALAM LETTER VA, MALAYALAM SIGN VIRAMA #
0D6E ; 0D35 0D4D 0D30 ; MA # ( ൮ → വ്ര ) MALAYALAM DIGIT EIGHT → MALAYALAM LETTER VA, MALAYALAM SIGN VIRAMA, MALAYALAM LETTER RA #
0D76 ; 0D39 0D4D 0D2E ; MA #* ( ൶ → ഹ്മ ) MALAYALAM FRACTION ONE SIXTEENTH → MALAYALAM LETTER HA, MALAYALAM SIGN VIRAMA, MALAYALAM LETTER MA #
0D42 ; 0D41 ; MA # ( ൂ → ു ) MALAYALAM VOWEL SIGN UU → MALAYALAM VOWEL SIGN U #
0D43 ; 0D41 ; MA # ( ൃ → ു ) MALAYALAM VOWEL SIGN VOCALIC R → MALAYALAM VOWEL SIGN U # →ൂ→
@ -5591,6 +5688,10 @@ FE19 ; 2D57 ; MA #* ( ︙ → ⵗ ) PRESENTATION FORM FOR VERTICAL HORIZONTAL EL
115DB ; 11584 ; MA # ( 𑗛 → 𑖄 ) SIDDHAM LETTER ALTERNATE U → SIDDHAM LETTER U #
115DC ; 115B2 ; MA # ( 𑗜 → 𑖲 ) SIDDHAM VOWEL SIGN ALTERNATE U → SIDDHAM VOWEL SIGN U #
115DD ; 115B3 ; MA # ( 𑗝 → 𑖳 ) SIDDHAM VOWEL SIGN ALTERNATE UU → SIDDHAM VOWEL SIGN UU #
0E03 ; 0E02 ; MA # ( ฃ → ข ) THAI CHARACTER KHO KHUAT → THAI CHARACTER KHO KHAI #
0E14 ; 0E04 ; MA # ( ด → ค ) THAI CHARACTER DO DEK → THAI CHARACTER KHO KHWAI #
@ -5689,6 +5790,8 @@ AA56 ; AA23 ; MA # ( ꩖ → ꨣ ) CHAM DIGIT SIX → CHAM LETTER RA #
1B58 ; 1B28 ; MA # ( ᭘ → ᬨ ) BALINESE DIGIT EIGHT → BALINESE LETTER PA KAPAL #
A9A3 ; A99D ; MA # ( ꦣ → ꦝ ) JAVANESE LETTER DA MAHAPRANA → JAVANESE LETTER DDA #
1896 ; 185C ; MA # ( ᢖ → ᡜ ) MONGOLIAN LETTER ALI GALI ZA → MONGOLIAN LETTER TODO DZA #
1855 ; 1835 ; MA # ( ᡕ → ᠵ ) MONGOLIAN LETTER TODO YA → MONGOLIAN LETTER JA #
@ -5965,11 +6068,15 @@ A4ED ; 1660 ; MA # ( ꓭ → ᙠ ) LISU LETTER GHA → CANADIAN SYLLABICS CARRIE
02E2 ; 18F5 ; MA # ( ˢ → ᣵ ) MODIFIER LETTER SMALL S → CANADIAN SYLLABICS CARRIER DENTAL S #
A6B0 ; 16B9 ; MA # ( ꚰ → ᚹ ) BAMUM LETTER TAA → RUNIC LETTER WUNJO WYNN W #
16E1 ; 16BC ; MA # ( ᛡ → ᚼ ) RUNIC LETTER IOR → RUNIC LETTER LONG-BRANCH-HAGALL H #
237F ; 16BD ; MA #* ( ⍿ → ᚽ ) VERTICAL LINE WITH MIDDLE DOT → RUNIC LETTER SHORT-TWIG-HAGALL H # →ᛂ→
16C2 ; 16BD ; MA # ( ᛂ → ᚽ ) RUNIC LETTER E → RUNIC LETTER SHORT-TWIG-HAGALL H #
1D23F ; 16CB ; MA #* ( 𝈿 → ᛋ ) GREEK INSTRUMENTAL NOTATION SYMBOL-52 → RUNIC LETTER SIGEL LONG-BRANCH-SOL S #
2191 ; 16CF ; MA #* ( ↑ → ᛏ ) UPWARDS ARROW → RUNIC LETTER TIWAZ TIR TYR T #
21BF ; 16D0 ; MA #* ( ↿ → ᛐ ) UPWARDS HARPOON WITH BARB LEFTWARDS → RUNIC LETTER SHORT-TWIG-TYR T #
@ -6671,6 +6778,7 @@ A4A7 ; A458 ; MA #* ( ꒧ → ꑘ ) YI RADICAL NYOP → YI SYLLABLE NYOP #
22A5 ; A4D5 ; MA #* ( ⊥ → ꓕ ) UP TACK → LISU LETTER THA #
27C2 ; A4D5 ; MA #* ( ⟂ → ꓕ ) PERPENDICULAR → LISU LETTER THA # →⊥→
1D21C ; A4D5 ; MA #* ( 𝈜 → ꓕ ) GREEK VOCAL NOTATION SYMBOL-54 → LISU LETTER THA # →Ʇ→
A7B1 ; A4D5 ; MA # ( Ʇ → ꓕ ) LATIN CAPITAL LETTER TURNED T → LISU LETTER THA #
A79E ; A4E4 ; MA # ( Ꞟ → ꓤ ) LATIN CAPITAL LETTER VOLAPUK UE → LISU LETTER ZA #
@ -6678,13 +6786,17 @@ A79E ; A4E4 ; MA # ( Ꞟ → ꓤ ) LATIN CAPITAL LETTER VOLAPUK UE → LISU LETT
2141 ; A4E8 ; MA #* ( ⅁ → ꓨ ) TURNED SANS-SERIF CAPITAL G → LISU LETTER HHA #
2142 ; A4F6 ; MA #* ( ⅂ → ꓶ ) TURNED SANS-SERIF CAPITAL L → LISU LETTER UH #
1D215 ; A4F6 ; MA #* ( 𝈕 → ꓶ ) GREEK VOCAL NOTATION SYMBOL-22 → LISU LETTER UH # →⅂→
1D22B ; A4F6 ; MA #* ( 𝈫 → ꓶ ) GREEK INSTRUMENTAL NOTATION SYMBOL-24 → LISU LETTER UH # →𝈕→→⅂→
10411 ; A4F6 ; MA # ( 𐐑 → ꓶ ) DESERET CAPITAL LETTER PEE → LISU LETTER UH # →⅂→
2295 ; 102A8 ; MA #* ( ⊕ → 𐊨 ) CIRCLED PLUS → CARIAN LETTER Q #
2A01 ; 102A8 ; MA #* ( ⨁ → 𐊨 ) N-ARY CIRCLED PLUS OPERATOR → CARIAN LETTER Q # →⊕→
1F728 ; 102A8 ; MA #* ( 🜨 → 𐊨 ) ALCHEMICAL SYMBOL FOR VERDIGRIS → CARIAN LETTER Q # →⊕→
A69A ; 102A8 ; MA # ( Ꚛ → 𐊨 ) CYRILLIC CAPITAL LETTER CROSSED O → CARIAN LETTER Q # →⊕→
25BD ; 102BC ; MA #* ( ▽ → 𐊼 ) WHITE DOWN-POINTING TRIANGLE → CARIAN LETTER K #
1D214 ; 102BC ; MA #* ( 𝈔 → 𐊼 ) GREEK VOCAL NOTATION SYMBOL-21 → CARIAN LETTER K # →▽→
1F704 ; 102BC ; MA #* ( 🜄 → 𐊼 ) ALCHEMICAL SYMBOL FOR WATER → CARIAN LETTER K # →▽→
29D6 ; 102C0 ; MA #* ( ⧖ → 𐋀 ) WHITE HOURGLASS → CARIAN LETTER G #
@ -6766,6 +6878,7 @@ F95E ; 4E39 ; MA # ( 丹 → 丹 ) CJK COMPATIBILITY IDEOGRAPH-F95E → CJK UNIF
2E83 ; 4E5A ; MA #* ( ⺃ → 乚 ) CJK RADICAL SECOND TWO → CJK UNIFIED IDEOGRAPH-4E5A #
31D6 ; 4E5B ; MA #* ( ㇖ → 乛 ) CJK STROKE HG → CJK UNIFIED IDEOGRAPH-4E5B #
2E82 ; 4E5B ; MA #* ( ⺂ → 乛 ) CJK RADICAL SECOND ONE → CJK UNIFIED IDEOGRAPH-4E5B # →㇖→
2EF2 ; 4E80 ; MA #* ( ⻲ → 亀 ) CJK RADICAL J-SIMPLIFIED TURTLE → CJK UNIFIED IDEOGRAPH-4E80 #
@ -6776,6 +6889,7 @@ F91B ; 4E82 ; MA # ( 亂 → 亂 ) CJK COMPATIBILITY IDEOGRAPH-F91B → CJK UNIF
F9BA ; 4E86 ; MA # ( 了 → 了 ) CJK COMPATIBILITY IDEOGRAPH-F9BA → CJK UNIFIED IDEOGRAPH-4E86 #
30CB ; 4E8C ; MA # ( ニ → 二 ) KATAKANA LETTER NI → CJK UNIFIED IDEOGRAPH-4E8C #
2F06 ; 4E8C ; MA #* ( ⼆ → 二 ) KANGXI RADICAL TWO → CJK UNIFIED IDEOGRAPH-4E8C #
2F803 ; 20122 ; MA # ( 𠄢 → 𠄢 ) CJK COMPATIBILITY IDEOGRAPH-2F803 → CJK UNIFIED IDEOGRAPH-20122 #
@ -6786,6 +6900,7 @@ F977 ; 4EAE ; MA # ( 亮 → 亮 ) CJK COMPATIBILITY IDEOGRAPH-F977 → CJK UNIF
2F08 ; 4EBA ; MA #* ( ⼈ → 人 ) KANGXI RADICAL MAN → CJK UNIFIED IDEOGRAPH-4EBA #
30A4 ; 4EBB ; MA # ( イ → 亻 ) KATAKANA LETTER I → CJK UNIFIED IDEOGRAPH-4EBB # →⺅→
2E85 ; 4EBB ; MA #* ( ⺅ → 亻 ) CJK RADICAL PERSON → CJK UNIFIED IDEOGRAPH-4EBB #
F9FD ; 4EC0 ; MA # ( 什 → 什 ) CJK COMPATIBILITY IDEOGRAPH-F9FD → CJK UNIFIED IDEOGRAPH-4EC0 #
@ -6851,6 +6966,7 @@ FA72 ; 5168 ; MA # ( 全 → 全 ) CJK COMPATIBILITY IDEOGRAPH-FA72 → CJK UNIF
F978 ; 5169 ; MA # ( 兩 → 兩 ) CJK COMPATIBILITY IDEOGRAPH-F978 → CJK UNIFIED IDEOGRAPH-5169 #
30CF ; 516B ; MA # ( ハ → 八 ) KATAKANA LETTER HA → CJK UNIFIED IDEOGRAPH-516B #
2F0B ; 516B ; MA #* ( ⼋ → 八 ) KANGXI RADICAL EIGHT → CJK UNIFIED IDEOGRAPH-516B #
F9D1 ; 516D ; MA # ( 六 → 六 ) CJK COMPATIBILITY IDEOGRAPH-F9D1 → CJK UNIFIED IDEOGRAPH-516D #
@ -6938,6 +7054,7 @@ F9C7 ; 5289 ; MA # ( 劉 → 劉 ) CJK COMPATIBILITY IDEOGRAPH-F9C7 → CJK UNIF
2F9D9 ; 20804 ; MA # ( 𠠄 → 𠠄 ) CJK COMPATIBILITY IDEOGRAPH-2F9D9 → CJK UNIFIED IDEOGRAPH-20804 #
30AB ; 529B ; MA # ( カ → 力 ) KATAKANA LETTER KA → CJK UNIFIED IDEOGRAPH-529B # →⼒→
F98A ; 529B ; MA # ( 力 → 力 ) CJK COMPATIBILITY IDEOGRAPH-F98A → CJK UNIFIED IDEOGRAPH-529B #
2F12 ; 529B ; MA #* ( ⼒ → 力 ) KANGXI RADICAL POWER → CJK UNIFIED IDEOGRAPH-529B #
@ -6991,11 +7108,16 @@ F9EB ; 533F ; MA # ( 匿 → 匿 ) CJK COMPATIBILITY IDEOGRAPH-F9EB → CJK UNIF
2F82C ; 5349 ; MA # ( 卉 → 卉 ) CJK COMPATIBILITY IDEOGRAPH-2F82C → CJK UNIFIED IDEOGRAPH-5349 #
0FD6 ; 534D ; MA #* ( ࿖ → 卍 ) LEFT-FACING SVASTI SIGN → CJK UNIFIED IDEOGRAPH-534D #
0FD5 ; 5350 ; MA #* ( ࿕ → 卐 ) RIGHT-FACING SVASTI SIGN → CJK UNIFIED IDEOGRAPH-5350 #
FA35 ; 5351 ; MA # ( 卑 → 卑 ) CJK COMPATIBILITY IDEOGRAPH-FA35 → CJK UNIFIED IDEOGRAPH-5351 #
2F82D ; 5351 ; MA # ( 卑 → 卑 ) CJK COMPATIBILITY IDEOGRAPH-2F82D → CJK UNIFIED IDEOGRAPH-5351 #
2F82E ; 535A ; MA # ( 博 → 博 ) CJK COMPATIBILITY IDEOGRAPH-2F82E → CJK UNIFIED IDEOGRAPH-535A #
30C8 ; 535C ; MA # ( ト → 卜 ) KATAKANA LETTER TO → CJK UNIFIED IDEOGRAPH-535C # →⼘→
2F18 ; 535C ; MA #* ( ⼘ → 卜 ) KANGXI RADICAL DIVINATION → CJK UNIFIED IDEOGRAPH-535C #
2F19 ; 5369 ; MA #* ( ⼙ → 卩 ) KANGXI RADICAL SEAL → CJK UNIFIED IDEOGRAPH-5369 #
@ -7028,7 +7150,10 @@ F96B ; 53C3 ; MA # ( 參 → 參 ) CJK COMPATIBILITY IDEOGRAPH-F96B → CJK UNIF
2F838 ; 20B63 ; MA # ( 𠭣 → 𠭣 ) CJK COMPATIBILITY IDEOGRAPH-2F838 → CJK UNIFIED IDEOGRAPH-20B63 #
30ED ; 53E3 ; MA # ( ロ → 口 ) KATAKANA LETTER RO → CJK UNIFIED IDEOGRAPH-53E3 # →⼞→→⼝→
2F1D ; 53E3 ; MA #* ( ⼝ → 口 ) KANGXI RADICAL MOUTH → CJK UNIFIED IDEOGRAPH-53E3 #
56D7 ; 53E3 ; MA # ( 囗 → 口 ) CJK UNIFIED IDEOGRAPH-56D7 → CJK UNIFIED IDEOGRAPH-53E3 # →⼞→→⼝→
2F1E ; 53E3 ; MA #* ( ⼞ → 口 ) KANGXI RADICAL ENCLOSURE → CJK UNIFIED IDEOGRAPH-53E3 # →⼝→
F906 ; 53E5 ; MA # ( 句 → 句 ) CJK COMPATIBILITY IDEOGRAPH-F906 → CJK UNIFIED IDEOGRAPH-53E5 #
@ -7099,8 +7224,6 @@ FA37 ; 5606 ; MA # ( 嘆 → 嘆 ) CJK COMPATIBILITY IDEOGRAPH-FA37 → CJK UNIF
FA38 ; 5668 ; MA # ( 器 → 器 ) CJK COMPATIBILITY IDEOGRAPH-FA38 → CJK UNIFIED IDEOGRAPH-5668 #
2F1E ; 56D7 ; MA #* ( ⼞ → 囗 ) KANGXI RADICAL ENCLOSURE → CJK UNIFIED IDEOGRAPH-56D7 #
F9A9 ; 56F9 ; MA # ( 囹 → 囹 ) CJK COMPATIBILITY IDEOGRAPH-F9A9 → CJK UNIFIED IDEOGRAPH-56F9 #
2F84B ; 5716 ; MA # ( 圖 → 圖 ) CJK COMPATIBILITY IDEOGRAPH-2F84B → CJK UNIFIED IDEOGRAPH-5716 #
@ -7108,6 +7231,8 @@ F9A9 ; 56F9 ; MA # ( 囹 → 囹 ) CJK COMPATIBILITY IDEOGRAPH-F9A9 → CJK UNIF
2F84D ; 5717 ; MA # ( 圗 → 圗 ) CJK COMPATIBILITY IDEOGRAPH-2F84D → CJK UNIFIED IDEOGRAPH-5717 #
2F1F ; 571F ; MA #* ( ⼟ → 土 ) KANGXI RADICAL EARTH → CJK UNIFIED IDEOGRAPH-571F #
58EB ; 571F ; MA # ( 士 → 土 ) CJK UNIFIED IDEOGRAPH-58EB → CJK UNIFIED IDEOGRAPH-571F # →⼠→→⼟→
2F20 ; 571F ; MA #* ( ⼠ → 土 ) KANGXI RADICAL SCHOLAR → CJK UNIFIED IDEOGRAPH-571F # →⼟→
2F855 ; 578B ; MA # ( 型 → 型 ) CJK COMPATIBILITY IDEOGRAPH-2F855 → CJK UNIFIED IDEOGRAPH-578B #
@ -7144,8 +7269,6 @@ F942 ; 58DF ; MA # ( 壟 → 壟 ) CJK COMPATIBILITY IDEOGRAPH-F942 → CJK UNIF
2F859 ; 214E4 ; MA # ( 𡓤 → 𡓤 ) CJK COMPATIBILITY IDEOGRAPH-2F859 → CJK UNIFIED IDEOGRAPH-214E4 #
2F20 ; 58EB ; MA #* ( ⼠ → 士 ) KANGXI RADICAL SCHOLAR → CJK UNIFIED IDEOGRAPH-58EB #
2F851 ; 58EE ; MA # ( 壮 → 壮 ) CJK COMPATIBILITY IDEOGRAPH-2F851 → CJK UNIFIED IDEOGRAPH-58EE #
2F85A ; 58F2 ; MA # ( 売 → 売 ) CJK COMPATIBILITY IDEOGRAPH-2F85A → CJK UNIFIED IDEOGRAPH-58F2 #
@ -7158,6 +7281,7 @@ F942 ; 58DF ; MA # ( 壟 → 壟 ) CJK COMPATIBILITY IDEOGRAPH-F942 → CJK UNIF
2F22 ; 590A ; MA #* ( ⼢ → 夊 ) KANGXI RADICAL GO SLOWLY → CJK UNIFIED IDEOGRAPH-590A #
30BF ; 5915 ; MA # ( タ → 夕 ) KATAKANA LETTER TA → CJK UNIFIED IDEOGRAPH-5915 # →⼣→
2F23 ; 5915 ; MA #* ( ⼣ → 夕 ) KANGXI RADICAL EVENING → CJK UNIFIED IDEOGRAPH-5915 #
2F85D ; 591A ; MA # ( 多 → 多 ) CJK COMPATIBILITY IDEOGRAPH-2F85D → CJK UNIFIED IDEOGRAPH-591A #
@ -7296,6 +7420,7 @@ F9AB ; 5DBA ; MA # ( 嶺 → 嶺 ) CJK COMPATIBILITY IDEOGRAPH-F9AB → CJK UNIF
2F882 ; 5DE2 ; MA # ( 巢 → 巢 ) CJK COMPATIBILITY IDEOGRAPH-2F882 → CJK UNIFIED IDEOGRAPH-5DE2 #
30A8 ; 5DE5 ; MA # ( エ → 工 ) KATAKANA LETTER E → CJK UNIFIED IDEOGRAPH-5DE5 # →⼯→
2F2F ; 5DE5 ; MA #* ( ⼯ → 工 ) KANGXI RADICAL WORK → CJK UNIFIED IDEOGRAPH-5DE5 #
2F30 ; 5DF1 ; MA #* ( ⼰ → 己 ) KANGXI RADICAL ONESELF → CJK UNIFIED IDEOGRAPH-5DF1 #
@ -9268,7 +9393,19 @@ FACE ; 9F9C ; MA # ( 龜 → 龜 ) CJK COMPATIBILITY IDEOGRAPH-FACE → CJK UNIF
2FD5 ; 9FA0 ; MA #* ( ⿕ → 龠 ) KANGXI RADICAL FLUTE → CJK UNIFIED IDEOGRAPH-9FA0 #
111DB ; A8FC ; MA #* ( 𑇛 → ꣼ ) SHARADA SIGN SIDDHAM → DEVANAGARI SIGN SIDDHAM #
23FC ; 23FB ; MA #* ( ⏼ → ⏻ ) POWER ON-OFF SYMBOL → POWER SYMBOL #
# total: 6069
11413 ; 11434 11442 11412 ; MA # ( 𑐓 → 𑐴𑑂𑐒 ) NEWA LETTER NGHA → NEWA LETTER HA, NEWA SIGN VIRAMA, NEWA LETTER NGA #
11419 ; 11434 11442 11418 ; MA # ( 𑐙 → 𑐴𑑂𑐘 ) NEWA LETTER NYHA → NEWA LETTER HA, NEWA SIGN VIRAMA, NEWA LETTER NYA #
11424 ; 11434 11442 11423 ; MA # ( 𑐤 → 𑐴𑑂𑐣 ) NEWA LETTER NHA → NEWA LETTER HA, NEWA SIGN VIRAMA, NEWA LETTER NA #
1142A ; 11434 11442 11429 ; MA # ( 𑐪 → 𑐴𑑂𑐩 ) NEWA LETTER MHA → NEWA LETTER HA, NEWA SIGN VIRAMA, NEWA LETTER MA #
1142D ; 11434 11442 1142C ; MA # ( 𑐭 → 𑐴𑑂𑐬 ) NEWA LETTER RHA → NEWA LETTER HA, NEWA SIGN VIRAMA, NEWA LETTER RA #
1142F ; 11434 11442 1142E ; MA # ( 𑐯 → 𑐴𑑂𑐮 ) NEWA LETTER LHA → NEWA LETTER HA, NEWA SIGN VIRAMA, NEWA LETTER LA #
# total: 6167

View file

@ -7,7 +7,7 @@
#
# Complete data for Unicode NFC normalization.
* Unicode 8.0.0
* Unicode 9.0.0
# Canonical_Combining_Class (ccc) values
0300..0314:230
@ -129,6 +129,7 @@
0825..0827:230
0829..082D:230
0859..085B:220
08D4..08E1:230
08E3:220
08E4..08E5:230
08E6:220
@ -233,6 +234,7 @@
1DCF:220
1DD0:202
1DD1..1DF5:230
1DFB:230
1DFC:233
1DFD:220
1DFE:230
@ -310,6 +312,8 @@ FE2E..FE2F:230
1134D:9
11366..1136C:230
11370..11374:230
11442:9
11446:7
114C2:9
114C3:7
115BF:9
@ -318,6 +322,7 @@ FE2E..FE2F:230
116B6:9
116B7:7
1172B:9
11C3F:9
16AF0..16AF4:1
16B30..16B36:230
1BC9E:1
@ -330,7 +335,14 @@ FE2E..FE2F:230
1D18A..1D18B:220
1D1AA..1D1AD:230
1D242..1D244:230
1E000..1E006:230
1E008..1E018:230
1E01B..1E021:230
1E023..1E024:230
1E026..1E02A:230
1E8D0..1E8D6:220
1E944..1E949:230
1E94A:7
# Canonical decomposition mappings
00C0=0041 0300

View file

@ -11,7 +11,7 @@
# to NFKC one-way mappings.
# Use this file as the second gennorm2 input file after nfc.txt.
* Unicode 8.0.0
* Unicode 9.0.0
00A0>0020
00A8>0020 0308
@ -3675,6 +3675,7 @@ FFEE>25CB
1F238>7533
1F239>5272
1F23A>55B6
1F23B>914D
1F240>3014 672C 3015
1F241>3014 4E09 3015
1F242>3014 4E8C 3015

View file

@ -12,7 +12,7 @@
# and reformatted into syntax for the gennorm2 Normalizer2 data generator tool.
# Use this file as the third gennorm2 input file after nfc.txt and nfkc.txt.
* Unicode 8.0.0
* Unicode 9.0.0
0041>0061
0042>0062
@ -640,6 +640,14 @@
13FD>13F5
17B4..17B5>
180B..180E>
1C80>0432
1C81>0434
1C82>043E
1C83>0441
1C84..1C85>0442
1C86>044A
1C87>0463
1C88>A64B
1D2C>0061
1D2D>00E6
1D2E>0062
@ -2388,6 +2396,7 @@ A7AA>0266
A7AB>025C
A7AC>0261
A7AD>026C
A7AE>026A
A7B0>029E
A7B1>0287
A7B2>029D
@ -3856,6 +3865,42 @@ FFF0..FFF8>
10425>1044D
10426>1044E
10427>1044F
104B0>104D8
104B1>104D9
104B2>104DA
104B3>104DB
104B4>104DC
104B5>104DD
104B6>104DE
104B7>104DF
104B8>104E0
104B9>104E1
104BA>104E2
104BB>104E3
104BC>104E4
104BD>104E5
104BE>104E6
104BF>104E7
104C0>104E8
104C1>104E9
104C2>104EA
104C3>104EB
104C4>104EC
104C5>104ED
104C6>104EE
104C7>104EF
104C8>104F0
104C9>104F1
104CA>104F2
104CB>104F3
104CC>104F4
104CD>104F5
104CE>104F6
104CF>104F7
104D0>104F8
104D1>104F9
104D2>104FA
104D3>104FB
10C80>10CC0
10C81>10CC1
10C82>10CC2
@ -4944,6 +4989,40 @@ FFF0..FFF8>
1D7FD>0037
1D7FE>0038
1D7FF>0039
1E900>1E922
1E901>1E923
1E902>1E924
1E903>1E925
1E904>1E926
1E905>1E927
1E906>1E928
1E907>1E929
1E908>1E92A
1E909>1E92B
1E90A>1E92C
1E90B>1E92D
1E90C>1E92E
1E90D>1E92F
1E90E>1E930
1E90F>1E931
1E910>1E932
1E911>1E933
1E912>1E934
1E913>1E935
1E914>1E936
1E915>1E937
1E916>1E938
1E917>1E939
1E918>1E93A
1E919>1E93B
1E91A>1E93C
1E91B>1E93D
1E91C>1E93E
1E91D>1E93F
1E91E>1E940
1E91F>1E941
1E920>1E942
1E921>1E943
1EE00>0627
1EE01>0628
1EE02>062C
@ -5208,6 +5287,7 @@ FFF0..FFF8>
1F238>7533
1F239>5272
1F23A>55B6
1F23B>914D
1F240>3014 672C 3015
1F241>3014 4E09 3015
1F242>3014 4E8C 3015

View file

@ -1,10 +1,11 @@
# Original file:
# IdnaMappingTable-8.0.0.txt
# Date: 2015-05-02 12:42:14 GMT [MD]
# IdnaMappingTable-9.0.0.txt
# Date: 2016-03-11, 09:01:52 GMT
# © 2016 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see http://www.unicode.org/terms_of_use.html
#
# Unicode IDNA Compatible Preprocessing (UTS #46)
# Copyright (c) 1991-2015 Unicode, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html
# For documentation, see http://www.unicode.org/reports/tr46/
# ================================================
@ -986,7 +987,11 @@
# 08A2..08AC valid # 6.1 ARABIC LETTER JEEM WITH TWO DOTS ABOVE..ARABIC LETTER ROHINGYA YEH
# 08AD..08B2 valid # 7.0 ARABIC LETTER LOW ALEF..ARABIC LETTER ZAIN WITH INVERTED V ABOVE
# 08B3..08B4 valid # 8.0 ARABIC LETTER AIN WITH THREE DOTS BELOW..ARABIC LETTER KAF WITH DOT BELOW
08B5..08E2 >FFFD # NA <reserved-08B5>..<reserved-08E2>
08B5 >FFFD # NA <reserved-08B5>
# 08B6..08BD valid # 9.0 ARABIC LETTER BEH WITH SMALL MEEM ABOVE..ARABIC LETTER AFRICAN NOON
08BE..08D3 >FFFD # NA <reserved-08BE>..<reserved-08D3>
# 08D4..08E1 valid # 9.0 ARABIC SMALL HIGH WORD AR-RUB..ARABIC SMALL HIGH SIGN SAFHA
08E2 >FFFD # 9.0 ARABIC DISPUTED END OF AYAH
# 08E3 valid # 8.0 ARABIC TURNED DAMMA BELOW
# 08E4..08FE valid # 6.1 ARABIC CURLY FATHA..ARABIC DAMMA WITH DOT
# 08FF valid # 7.0 ARABIC MARK SIDEWAYS NOON GHUNNA
@ -1232,7 +1237,7 @@
# 0C66..0C6F valid # 1.1 TELUGU DIGIT ZERO..TELUGU DIGIT NINE
0C70..0C77 >FFFD # NA <reserved-0C70>..<reserved-0C77>
# 0C78..0C7F valid # 5.1 TELUGU FRACTION DIGIT ZERO FOR ODD POWERS OF FOUR..TELUGU SIGN TUUMU
0C80 >FFFD # NA <reserved-0C80>
# 0C80 valid # 9.0 KANNADA SIGN SPACING CANDRABINDU
# 0C81 valid # 7.0 KANNADA SIGN CANDRABINDU
# 0C82..0C83 valid # 1.1 KANNADA SIGN ANUSVARA..KANNADA SIGN VISARGA
0C84 >FFFD # NA <reserved-0C84>
@ -1284,16 +1289,18 @@
0D49 >FFFD # NA <reserved-0D49>
# 0D4A..0D4D valid # 1.1 MALAYALAM VOWEL SIGN O..MALAYALAM SIGN VIRAMA
# 0D4E valid # 6.0 MALAYALAM LETTER DOT REPH
0D4F..0D56 >FFFD # NA <reserved-0D4F>..<reserved-0D56>
# 0D4F valid # 9.0 MALAYALAM SIGN PARA
0D50..0D53 >FFFD # NA <reserved-0D50>..<reserved-0D53>
# 0D54..0D56 valid # 9.0 MALAYALAM LETTER CHILLU M..MALAYALAM LETTER CHILLU LLL
# 0D57 valid # 1.1 MALAYALAM AU LENGTH MARK
0D58..0D5E >FFFD # NA <reserved-0D58>..<reserved-0D5E>
# 0D58..0D5E valid # 9.0 MALAYALAM FRACTION ONE ONE-HUNDRED-AND-SIXTIETH..MALAYALAM FRACTION ONE FIFTH
# 0D5F valid # 8.0 MALAYALAM LETTER ARCHAIC II
# 0D60..0D61 valid # 1.1 MALAYALAM LETTER VOCALIC RR..MALAYALAM LETTER VOCALIC LL
# 0D62..0D63 valid # 5.1 MALAYALAM VOWEL SIGN VOCALIC L..MALAYALAM VOWEL SIGN VOCALIC LL
0D64..0D65 >FFFD # NA <reserved-0D64>..<reserved-0D65>
# 0D66..0D6F valid # 1.1 MALAYALAM DIGIT ZERO..MALAYALAM DIGIT NINE
# 0D70..0D75 valid # 5.1 MALAYALAM NUMBER TEN..MALAYALAM FRACTION THREE QUARTERS
0D76..0D78 >FFFD # NA <reserved-0D76>..<reserved-0D78>
# 0D76..0D78 valid # 9.0 MALAYALAM FRACTION ONE SIXTEENTH..MALAYALAM FRACTION THREE SIXTEENTHS
# 0D79 valid # 5.1 MALAYALAM DATE MARK
# 0D7A..0D7F valid # 5.1 MALAYALAM LETTER CHILLU NN..MALAYALAM LETTER CHILLU K
0D80..0D81 >FFFD # NA <reserved-0D80>..<reserved-0D81>
@ -1672,7 +1679,15 @@
1C4A..1C4C >FFFD # NA <reserved-1C4A>..<reserved-1C4C>
# 1C4D..1C7D valid # 5.1 LEPCHA LETTER TTA..OL CHIKI AHAD
# 1C7E..1C7F valid # 5.1 OL CHIKI PUNCTUATION MUCAAD..OL CHIKI PUNCTUATION DOUBLE MUCAAD
1C80..1CBF >FFFD # NA <reserved-1C80>..<reserved-1CBF>
1C80 >0432 # 9.0 CYRILLIC SMALL LETTER ROUNDED VE
1C81 >0434 # 9.0 CYRILLIC SMALL LETTER LONG-LEGGED DE
1C82 >043E # 9.0 CYRILLIC SMALL LETTER NARROW O
1C83 >0441 # 9.0 CYRILLIC SMALL LETTER WIDE ES
1C84..1C85 >0442 # 9.0 CYRILLIC SMALL LETTER TALL TE..CYRILLIC SMALL LETTER THREE-LEGGED TE
1C86 >044A # 9.0 CYRILLIC SMALL LETTER TALL HARD SIGN
1C87 >0463 # 9.0 CYRILLIC SMALL LETTER TALL YAT
1C88 >A64B # 9.0 CYRILLIC SMALL LETTER UNBLENDED UK
1C89..1CBF >FFFD # NA <reserved-1C89>..<reserved-1CBF>
# 1CC0..1CC7 valid # 6.1 SUNDANESE PUNCTUATION BINDU SURYA..SUNDANESE PUNCTUATION BINDU BA SATANGA
1CC8..1CCF >FFFD # NA <reserved-1CC8>..<reserved-1CCF>
# 1CD0..1CD2 valid # 5.2 VEDIC TONE KARSHANA..VEDIC TONE PRENKHA
@ -1791,7 +1806,8 @@
# 1DC4..1DCA valid # 5.0 COMBINING MACRON-ACUTE..COMBINING LATIN SMALL LETTER R BELOW
# 1DCB..1DE6 valid # 5.1 COMBINING BREVE-MACRON..COMBINING LATIN SMALL LETTER Z
# 1DE7..1DF5 valid # 7.0 COMBINING LATIN SMALL LETTER ALPHA..COMBINING UP TACK ABOVE
1DF6..1DFB >FFFD # NA <reserved-1DF6>..<reserved-1DFB>
1DF6..1DFA >FFFD # NA <reserved-1DF6>..<reserved-1DFA>
# 1DFB valid # 9.0 COMBINING DELETION MARK
# 1DFC valid # 6.0 COMBINING DOUBLE INVERTED BREVE BELOW
# 1DFD valid # 5.2 COMBINING ALMOST EQUAL TO BELOW
# 1DFE..1DFF valid # 5.0 COMBINING LEFT ARROWHEAD ABOVE..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW
@ -2497,7 +2513,8 @@
# 23E8 valid # 5.2 DECIMAL EXPONENT SYMBOL
# 23E9..23F3 valid # 6.0 BLACK RIGHT-POINTING DOUBLE TRIANGLE..HOURGLASS WITH FLOWING SAND
# 23F4..23FA valid # 7.0 BLACK MEDIUM LEFT-POINTING TRIANGLE..BLACK CIRCLE FOR RECORD
23FB..23FF >FFFD # NA <reserved-23FB>..<reserved-23FF>
# 23FB..23FE valid # 9.0 POWER SYMBOL..POWER SLEEP SYMBOL
23FF >FFFD # NA <reserved-23FF>
# 2400..2424 valid # 1.1 SYMBOL FOR NULL..SYMBOL FOR NEWLINE
# 2425..2426 valid # 3.0 SYMBOL FOR DELETE FORM TWO..SYMBOL FOR SUBSTITUTE FORM TWO
2427..243F >FFFD # NA <reserved-2427>..<reserved-243F>
@ -2949,7 +2966,8 @@
# 2E31 valid # 5.2 WORD SEPARATOR MIDDLE DOT
# 2E32..2E3B valid # 6.1 TURNED COMMA..THREE-EM DASH
# 2E3C..2E42 valid # 7.0 STENOGRAPHIC FULL STOP..DOUBLE LOW-REVERSED-9 QUOTATION MARK
2E43..2E7F >FFFD # NA <reserved-2E43>..<reserved-2E7F>
# 2E43..2E44 valid # 9.0 DASH WITH LEFT UPTURN..DOUBLE SUSPENSION MARK
2E45..2E7F >FFFD # NA <reserved-2E45>..<reserved-2E7F>
# 2E80..2E99 valid # 3.0 CJK RADICAL REPEAT..CJK RADICAL RAP
2E9A >FFFD # NA <reserved-2E9A>
# 2E9B..2E9E valid # 3.0 CJK RADICAL CHOKE..CJK RADICAL DEATH
@ -4078,7 +4096,8 @@ A7AA >0266 # 6.1 LATIN CAPITAL LETTER H WITH HOOK
A7AB >025C # 7.0 LATIN CAPITAL LETTER REVERSED OPEN E
A7AC >0261 # 7.0 LATIN CAPITAL LETTER SCRIPT G
A7AD >026C # 7.0 LATIN CAPITAL LETTER L WITH BELT
A7AE..A7AF >FFFD # NA <reserved-A7AE>..<reserved-A7AF>
A7AE >026A # 9.0 LATIN CAPITAL LETTER SMALL CAPITAL I
A7AF >FFFD # NA <reserved-A7AF>
A7B0 >029E # 7.0 LATIN CAPITAL LETTER TURNED K
A7B1 >0287 # 7.0 LATIN CAPITAL LETTER TURNED T
A7B2 >029D # 8.0 LATIN CAPITAL LETTER J WITH CROSSED-TAIL
@ -4102,7 +4121,8 @@ A83A..A83F >FFFD # NA <reserved-A83A>..<reserved-A83F>
# A874..A877 valid # 5.0 PHAGS-PA SINGLE HEAD MARK..PHAGS-PA MARK DOUBLE SHAD
A878..A87F >FFFD # NA <reserved-A878>..<reserved-A87F>
# A880..A8C4 valid # 5.1 SAURASHTRA SIGN ANUSVARA..SAURASHTRA SIGN VIRAMA
A8C5..A8CD >FFFD # NA <reserved-A8C5>..<reserved-A8CD>
# A8C5 valid # 9.0 SAURASHTRA SIGN CANDRABINDU
A8C6..A8CD >FFFD # NA <reserved-A8C6>..<reserved-A8CD>
# A8CE..A8CF valid # 5.1 SAURASHTRA DANDA..SAURASHTRA DOUBLE DANDA
# A8D0..A8D9 valid # 5.1 SAURASHTRA DIGIT ZERO..SAURASHTRA DIGIT NINE
A8DA..A8DF >FFFD # NA <reserved-A8DA>..<reserved-A8DF>
@ -5665,7 +5685,8 @@ FFFE..FFFF >FFFD # 1.1 <noncharacter-FFFE>..<noncharacte
# 10137..1013Fvalid # 4.0 AEGEAN WEIGHT BASE UNIT..AEGEAN MEASURE THIRD SUBUNIT
# 10140..1018Avalid # 4.1 GREEK ACROPHONIC ATTIC ONE QUARTER..GREEK ZERO SIGN
# 1018B..1018Cvalid # 7.0 GREEK ONE QUARTER SIGN..GREEK SINUSOID SIGN
1018D..1018F >FFFD # NA <reserved-1018D>..<reserved-1018F>
# 1018D..1018Evalid # 9.0 GREEK INDICTION SIGN..NOMISMA SIGN
1018F >FFFD # NA <reserved-1018F>
# 10190..1019Bvalid # 5.1 ROMAN SEXTANS SIGN..ROMAN CENTURIAL SIGN
1019C..1019F >FFFD # NA <reserved-1019C>..<reserved-1019F>
# 101A0 valid # 7.0 GREEK SYMBOL TAU RHO
@ -5743,7 +5764,46 @@ FFFE..FFFF >FFFD # 1.1 <noncharacter-FFFE>..<noncharacte
# 1044E..1049Dvalid # 4.0 DESERET SMALL LETTER OI..OSMANYA LETTER OO
1049E..1049F >FFFD # NA <reserved-1049E>..<reserved-1049F>
# 104A0..104A9valid # 4.0 OSMANYA DIGIT ZERO..OSMANYA DIGIT NINE
104AA..104FF >FFFD # NA <reserved-104AA>..<reserved-104FF>
104AA..104AF >FFFD # NA <reserved-104AA>..<reserved-104AF>
104B0 >104D8 # 9.0 OSAGE CAPITAL LETTER A
104B1 >104D9 # 9.0 OSAGE CAPITAL LETTER AI
104B2 >104DA # 9.0 OSAGE CAPITAL LETTER AIN
104B3 >104DB # 9.0 OSAGE CAPITAL LETTER AH
104B4 >104DC # 9.0 OSAGE CAPITAL LETTER BRA
104B5 >104DD # 9.0 OSAGE CAPITAL LETTER CHA
104B6 >104DE # 9.0 OSAGE CAPITAL LETTER EHCHA
104B7 >104DF # 9.0 OSAGE CAPITAL LETTER E
104B8 >104E0 # 9.0 OSAGE CAPITAL LETTER EIN
104B9 >104E1 # 9.0 OSAGE CAPITAL LETTER HA
104BA >104E2 # 9.0 OSAGE CAPITAL LETTER HYA
104BB >104E3 # 9.0 OSAGE CAPITAL LETTER I
104BC >104E4 # 9.0 OSAGE CAPITAL LETTER KA
104BD >104E5 # 9.0 OSAGE CAPITAL LETTER EHKA
104BE >104E6 # 9.0 OSAGE CAPITAL LETTER KYA
104BF >104E7 # 9.0 OSAGE CAPITAL LETTER LA
104C0 >104E8 # 9.0 OSAGE CAPITAL LETTER MA
104C1 >104E9 # 9.0 OSAGE CAPITAL LETTER NA
104C2 >104EA # 9.0 OSAGE CAPITAL LETTER O
104C3 >104EB # 9.0 OSAGE CAPITAL LETTER OIN
104C4 >104EC # 9.0 OSAGE CAPITAL LETTER PA
104C5 >104ED # 9.0 OSAGE CAPITAL LETTER EHPA
104C6 >104EE # 9.0 OSAGE CAPITAL LETTER SA
104C7 >104EF # 9.0 OSAGE CAPITAL LETTER SHA
104C8 >104F0 # 9.0 OSAGE CAPITAL LETTER TA
104C9 >104F1 # 9.0 OSAGE CAPITAL LETTER EHTA
104CA >104F2 # 9.0 OSAGE CAPITAL LETTER TSA
104CB >104F3 # 9.0 OSAGE CAPITAL LETTER EHTSA
104CC >104F4 # 9.0 OSAGE CAPITAL LETTER TSHA
104CD >104F5 # 9.0 OSAGE CAPITAL LETTER DHA
104CE >104F6 # 9.0 OSAGE CAPITAL LETTER U
104CF >104F7 # 9.0 OSAGE CAPITAL LETTER WA
104D0 >104F8 # 9.0 OSAGE CAPITAL LETTER KHA
104D1 >104F9 # 9.0 OSAGE CAPITAL LETTER GHA
104D2 >104FA # 9.0 OSAGE CAPITAL LETTER ZA
104D3 >104FB # 9.0 OSAGE CAPITAL LETTER ZHA
104D4..104D7 >FFFD # NA <reserved-104D4>..<reserved-104D7>
# 104D8..104FBvalid # 9.0 OSAGE SMALL LETTER A..OSAGE SMALL LETTER ZHA
104FC..104FF >FFFD # NA <reserved-104FC>..<reserved-104FF>
# 10500..10527valid # 7.0 ELBASAN LETTER A..ELBASAN LETTER KHE
10528..1052F >FFFD # NA <reserved-10528>..<reserved-1052F>
# 10530..10563valid # 7.0 CAUCASIAN ALBANIAN LETTER ALT..CAUCASIAN ALBANIAN LETTER KIW
@ -5943,7 +6003,8 @@ FFFE..FFFF >FFFD # 1.1 <noncharacter-FFFE>..<noncharacte
11212 >FFFD # NA <reserved-11212>
# 11213..11237valid # 7.0 KHOJKI LETTER NYA..KHOJKI SIGN SHADDA
# 11238..1123Dvalid # 7.0 KHOJKI DANDA..KHOJKI ABBREVIATION SIGN
1123E..1127F >FFFD # NA <reserved-1123E>..<reserved-1127F>
# 1123E valid # 9.0 KHOJKI SIGN SUKUN
1123F..1127F >FFFD # NA <reserved-1123F>..<reserved-1127F>
# 11280..11286valid # 8.0 MULTANI LETTER A..MULTANI LETTER GA
11287 >FFFD # NA <reserved-11287>
# 11288 valid # 8.0 MULTANI LETTER GHA
@ -5989,7 +6050,15 @@ FFFE..FFFF >FFFD # 1.1 <noncharacter-FFFE>..<noncharacte
# 11366..1136Cvalid # 7.0 COMBINING GRANTHA DIGIT ZERO..COMBINING GRANTHA DIGIT SIX
1136D..1136F >FFFD # NA <reserved-1136D>..<reserved-1136F>
# 11370..11374valid # 7.0 COMBINING GRANTHA LETTER A..COMBINING GRANTHA LETTER PA
11375..1147F >FFFD # NA <reserved-11375>..<reserved-1147F>
11375..113FF >FFFD # NA <reserved-11375>..<reserved-113FF>
# 11400..1144Avalid # 9.0 NEWA LETTER A..NEWA SIDDHI
# 1144B..1144Fvalid # 9.0 NEWA DANDA..NEWA ABBREVIATION SIGN
# 11450..11459valid # 9.0 NEWA DIGIT ZERO..NEWA DIGIT NINE
1145A >FFFD # NA <reserved-1145A>
# 1145B valid # 9.0 NEWA PLACEHOLDER MARK
1145C >FFFD # NA <reserved-1145C>
# 1145D valid # 9.0 NEWA INSERTION SIGN
1145E..1147F >FFFD # NA <reserved-1145E>..<reserved-1147F>
# 11480..114C5valid # 7.0 TIRHUTA ANJI..TIRHUTA GVANG
# 114C6 valid # 7.0 TIRHUTA ABBREVIATION SIGN
# 114C7 valid # 7.0 TIRHUTA OM
@ -6008,7 +6077,9 @@ FFFE..FFFF >FFFD # 1.1 <noncharacter-FFFE>..<noncharacte
# 11644 valid # 7.0 MODI SIGN HUVA
11645..1164F >FFFD # NA <reserved-11645>..<reserved-1164F>
# 11650..11659valid # 7.0 MODI DIGIT ZERO..MODI DIGIT NINE
1165A..1167F >FFFD # NA <reserved-1165A>..<reserved-1167F>
1165A..1165F >FFFD # NA <reserved-1165A>..<reserved-1165F>
# 11660..1166Cvalid # 9.0 MONGOLIAN BIRGA WITH ORNAMENT..MONGOLIAN TURNED SWIRL BIRGA WITH DOUBLE ORNAMENT
1166D..1167F >FFFD # NA <reserved-1166D>..<reserved-1167F>
# 11680..116B7valid # 6.1 TAKRI LETTER A..TAKRI SIGN NUKTA
116B8..116BF >FFFD # NA <reserved-116B8>..<reserved-116BF>
# 116C0..116C9valid # 6.1 TAKRI DIGIT ZERO..TAKRI DIGIT NINE
@ -6058,7 +6129,24 @@ FFFE..FFFF >FFFD # 1.1 <noncharacter-FFFE>..<noncharacte
# 118FF valid # 7.0 WARANG CITI OM
11900..11ABF >FFFD # NA <reserved-11900>..<reserved-11ABF>
# 11AC0..11AF8valid # 7.0 PAU CIN HAU LETTER PA..PAU CIN HAU GLOTTAL STOP FINAL
11AF9..11FFF >FFFD # NA <reserved-11AF9>..<reserved-11FFF>
11AF9..11BFF >FFFD # NA <reserved-11AF9>..<reserved-11BFF>
# 11C00..11C08valid # 9.0 BHAIKSUKI LETTER A..BHAIKSUKI LETTER VOCALIC L
11C09 >FFFD # NA <reserved-11C09>
# 11C0A..11C36valid # 9.0 BHAIKSUKI LETTER E..BHAIKSUKI VOWEL SIGN VOCALIC L
11C37 >FFFD # NA <reserved-11C37>
# 11C38..11C40valid # 9.0 BHAIKSUKI VOWEL SIGN E..BHAIKSUKI SIGN AVAGRAHA
# 11C41..11C45valid # 9.0 BHAIKSUKI DANDA..BHAIKSUKI GAP FILLER-2
11C46..11C4F >FFFD # NA <reserved-11C46>..<reserved-11C4F>
# 11C50..11C59valid # 9.0 BHAIKSUKI DIGIT ZERO..BHAIKSUKI DIGIT NINE
# 11C5A..11C6Cvalid # 9.0 BHAIKSUKI NUMBER ONE..BHAIKSUKI HUNDREDS UNIT MARK
11C6D..11C6F >FFFD # NA <reserved-11C6D>..<reserved-11C6F>
# 11C70..11C71valid # 9.0 MARCHEN HEAD MARK..MARCHEN MARK SHAD
# 11C72..11C8Fvalid # 9.0 MARCHEN LETTER KA..MARCHEN LETTER A
11C90..11C91 >FFFD # NA <reserved-11C90>..<reserved-11C91>
# 11C92..11CA7valid # 9.0 MARCHEN SUBJOINED LETTER KA..MARCHEN SUBJOINED LETTER ZA
11CA8 >FFFD # NA <reserved-11CA8>
# 11CA9..11CB6valid # 9.0 MARCHEN SUBJOINED LETTER YA..MARCHEN SIGN CANDRABINDU
11CB7..11FFF >FFFD # NA <reserved-11CB7>..<reserved-11FFF>
# 12000..1236Evalid # 5.0 CUNEIFORM SIGN A..CUNEIFORM SIGN ZUM
# 1236F..12398valid # 7.0 CUNEIFORM SIGN KAP ELAMITE..CUNEIFORM SIGN UM TIMES ME
# 12399 valid # 8.0 CUNEIFORM SIGN U U
@ -6106,7 +6194,13 @@ FFFE..FFFF >FFFD # 1.1 <noncharacter-FFFE>..<noncharacte
# 16F50..16F7Evalid # 6.1 MIAO LETTER NASALIZATION..MIAO VOWEL SIGN NG
16F7F..16F8E >FFFD # NA <reserved-16F7F>..<reserved-16F8E>
# 16F8F..16F9Fvalid # 6.1 MIAO TONE RIGHT..MIAO LETTER REFORMED TONE-8
16FA0..1AFFF >FFFD # NA <reserved-16FA0>..<reserved-1AFFF>
16FA0..16FDF >FFFD # NA <reserved-16FA0>..<reserved-16FDF>
# 16FE0 valid # 9.0 TANGUT ITERATION MARK
16FE1..16FFF >FFFD # NA <reserved-16FE1>..<reserved-16FFF>
# 17000..187ECvalid # 9.0 TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187EC
187ED..187FF >FFFD # NA <reserved-187ED>..<reserved-187FF>
# 18800..18AF2valid # 9.0 TANGUT COMPONENT-001..TANGUT COMPONENT-755
18AF3..1AFFF >FFFD # NA <reserved-18AF3>..<reserved-1AFFF>
# 1B000..1B001valid # 6.0 KATAKANA LETTER ARCHAIC E..HIRAGANA LETTER ARCHAIC YE
1B002..1BBFF >FFFD # NA <reserved-1B002>..<reserved-1BBFF>
# 1BC00..1BC6Avalid # 7.0 DUPLOYAN LETTER H..DUPLOYAN LETTER VOCALIC M
@ -7176,12 +7270,62 @@ FFFE..FFFF >FFFD # 1.1 <noncharacter-FFFE>..<noncharacte
# 1DA9B..1DA9Fvalid # 8.0 SIGNWRITING FILL MODIFIER-2..SIGNWRITING FILL MODIFIER-6
1DAA0 >FFFD # NA <reserved-1DAA0>
# 1DAA1..1DAAFvalid # 8.0 SIGNWRITING ROTATION MODIFIER-2..SIGNWRITING ROTATION MODIFIER-16
1DAB0..1E7FF >FFFD # NA <reserved-1DAB0>..<reserved-1E7FF>
1DAB0..1DFFF >FFFD # NA <reserved-1DAB0>..<reserved-1DFFF>
# 1E000..1E006valid # 9.0 COMBINING GLAGOLITIC LETTER AZU..COMBINING GLAGOLITIC LETTER ZHIVETE
1E007 >FFFD # NA <reserved-1E007>
# 1E008..1E018valid # 9.0 COMBINING GLAGOLITIC LETTER ZEMLJA..COMBINING GLAGOLITIC LETTER HERU
1E019..1E01A >FFFD # NA <reserved-1E019>..<reserved-1E01A>
# 1E01B..1E021valid # 9.0 COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI
1E022 >FFFD # NA <reserved-1E022>
# 1E023..1E024valid # 9.0 COMBINING GLAGOLITIC LETTER YU..COMBINING GLAGOLITIC LETTER SMALL YUS
1E025 >FFFD # NA <reserved-1E025>
# 1E026..1E02Avalid # 9.0 COMBINING GLAGOLITIC LETTER YO..COMBINING GLAGOLITIC LETTER FITA
1E02B..1E7FF >FFFD # NA <reserved-1E02B>..<reserved-1E7FF>
# 1E800..1E8C4valid # 7.0 MENDE KIKAKUI SYLLABLE M001 KI..MENDE KIKAKUI SYLLABLE M060 NYON
1E8C5..1E8C6 >FFFD # NA <reserved-1E8C5>..<reserved-1E8C6>
# 1E8C7..1E8CFvalid # 7.0 MENDE KIKAKUI DIGIT ONE..MENDE KIKAKUI DIGIT NINE
# 1E8D0..1E8D6valid # 7.0 MENDE KIKAKUI COMBINING NUMBER TEENS..MENDE KIKAKUI COMBINING NUMBER MILLIONS
1E8D7..1EDFF >FFFD # NA <reserved-1E8D7>..<reserved-1EDFF>
1E8D7..1E8FF >FFFD # NA <reserved-1E8D7>..<reserved-1E8FF>
1E900 >1E922 # 9.0 ADLAM CAPITAL LETTER ALIF
1E901 >1E923 # 9.0 ADLAM CAPITAL LETTER DAALI
1E902 >1E924 # 9.0 ADLAM CAPITAL LETTER LAAM
1E903 >1E925 # 9.0 ADLAM CAPITAL LETTER MIIM
1E904 >1E926 # 9.0 ADLAM CAPITAL LETTER BA
1E905 >1E927 # 9.0 ADLAM CAPITAL LETTER SINNYIIYHE
1E906 >1E928 # 9.0 ADLAM CAPITAL LETTER PE
1E907 >1E929 # 9.0 ADLAM CAPITAL LETTER BHE
1E908 >1E92A # 9.0 ADLAM CAPITAL LETTER RA
1E909 >1E92B # 9.0 ADLAM CAPITAL LETTER E
1E90A >1E92C # 9.0 ADLAM CAPITAL LETTER FA
1E90B >1E92D # 9.0 ADLAM CAPITAL LETTER I
1E90C >1E92E # 9.0 ADLAM CAPITAL LETTER O
1E90D >1E92F # 9.0 ADLAM CAPITAL LETTER DHA
1E90E >1E930 # 9.0 ADLAM CAPITAL LETTER YHE
1E90F >1E931 # 9.0 ADLAM CAPITAL LETTER WAW
1E910 >1E932 # 9.0 ADLAM CAPITAL LETTER NUN
1E911 >1E933 # 9.0 ADLAM CAPITAL LETTER KAF
1E912 >1E934 # 9.0 ADLAM CAPITAL LETTER YA
1E913 >1E935 # 9.0 ADLAM CAPITAL LETTER U
1E914 >1E936 # 9.0 ADLAM CAPITAL LETTER JIIM
1E915 >1E937 # 9.0 ADLAM CAPITAL LETTER CHI
1E916 >1E938 # 9.0 ADLAM CAPITAL LETTER HA
1E917 >1E939 # 9.0 ADLAM CAPITAL LETTER QAAF
1E918 >1E93A # 9.0 ADLAM CAPITAL LETTER GA
1E919 >1E93B # 9.0 ADLAM CAPITAL LETTER NYA
1E91A >1E93C # 9.0 ADLAM CAPITAL LETTER TU
1E91B >1E93D # 9.0 ADLAM CAPITAL LETTER NHA
1E91C >1E93E # 9.0 ADLAM CAPITAL LETTER VA
1E91D >1E93F # 9.0 ADLAM CAPITAL LETTER KHA
1E91E >1E940 # 9.0 ADLAM CAPITAL LETTER GBE
1E91F >1E941 # 9.0 ADLAM CAPITAL LETTER ZAL
1E920 >1E942 # 9.0 ADLAM CAPITAL LETTER KPO
1E921 >1E943 # 9.0 ADLAM CAPITAL LETTER SHA
# 1E922..1E94Avalid # 9.0 ADLAM SMALL LETTER ALIF..ADLAM NUKTA
1E94B..1E94F >FFFD # NA <reserved-1E94B>..<reserved-1E94F>
# 1E950..1E959valid # 9.0 ADLAM DIGIT ZERO..ADLAM DIGIT NINE
1E95A..1E95D >FFFD # NA <reserved-1E95A>..<reserved-1E95D>
# 1E95E..1E95Fvalid # 9.0 ADLAM INITIAL EXCLAMATION MARK..ADLAM INITIAL QUESTION MARK
1E960..1EDFF >FFFD # NA <reserved-1E960>..<reserved-1EDFF>
1EE00 >0627 # 6.1 ARABIC MATHEMATICAL ALEF
1EE01 >0628 # 6.1 ARABIC MATHEMATICAL BEH
1EE02 >062C # 6.1 ARABIC MATHEMATICAL JEEM
@ -7468,7 +7612,8 @@ FFFE..FFFF >FFFD # 1.1 <noncharacter-FFFE>..<noncharacte
# 1F18E..1F18Fvalid # 6.0 NEGATIVE SQUARED AB..NEGATIVE SQUARED WC
1F190 >0064 006A # 5.2 SQUARE DJ
# 1F191..1F19Avalid # 6.0 SQUARED CL..SQUARED VS
1F19B..1F1E5 >FFFD # NA <reserved-1F19B>..<reserved-1F1E5>
# 1F19B..1F1ACvalid # 9.0 SQUARED THREE D..SQUARED VOD
1F1AD..1F1E5 >FFFD # NA <reserved-1F1AD>..<reserved-1F1E5>
# 1F1E6..1F1FFvalid # 6.0 REGIONAL INDICATOR SYMBOL LETTER A..REGIONAL INDICATOR SYMBOL LETTER Z
1F200 >307B 304B # 5.2 SQUARE HIRAGANA HOKA
1F201 >30B3 30B3 # 6.0 SQUARED KATAKANA KOKO
@ -7517,7 +7662,8 @@ FFFE..FFFF >FFFD # 1.1 <noncharacter-FFFE>..<noncharacte
1F238 >7533 # 6.0 SQUARED CJK UNIFIED IDEOGRAPH-7533
1F239 >5272 # 6.0 SQUARED CJK UNIFIED IDEOGRAPH-5272
1F23A >55B6 # 6.0 SQUARED CJK UNIFIED IDEOGRAPH-55B6
1F23B..1F23F >FFFD # NA <reserved-1F23B>..<reserved-1F23F>
1F23B >914D # 9.0 SQUARED CJK UNIFIED IDEOGRAPH-914D
1F23C..1F23F >FFFD # NA <reserved-1F23C>..<reserved-1F23F>
1F240 >3014 672C 3015 #5.2 TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-672C
1F241 >3014 4E09 3015 #5.2 TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-4E09
1F242 >3014 4E8C 3015 #5.2 TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-4E8C
@ -7566,9 +7712,9 @@ FFFE..FFFF >FFFD # 1.1 <noncharacter-FFFE>..<noncharacte
# 1F54B..1F54Fvalid # 8.0 KAABA..BOWL OF HYGIEIA
# 1F550..1F567valid # 6.0 CLOCK FACE ONE OCLOCK..CLOCK FACE TWELVE-THIRTY
# 1F568..1F579valid # 7.0 RIGHT SPEAKER..JOYSTICK
1F57A >FFFD # NA <reserved-1F57A>
# 1F57A valid # 9.0 MAN DANCING
# 1F57B..1F5A3valid # 7.0 LEFT HAND TELEPHONE RECEIVER..BLACK DOWN POINTING BACKHAND INDEX
1F5A4 >FFFD # NA <reserved-1F5A4>
# 1F5A4 valid # 9.0 BLACK HEART
# 1F5A5..1F5FAvalid # 7.0 DESKTOP COMPUTER..WORLD MAP
# 1F5FB..1F5FFvalid # 6.0 MOUNT FUJI..MOYAI
# 1F600 valid # 6.1 GRINNING FACE
@ -7600,11 +7746,13 @@ FFFE..FFFF >FFFD # 1.1 <noncharacter-FFFE>..<noncharacte
# 1F680..1F6C5valid # 6.0 ROCKET..LEFT LUGGAGE
# 1F6C6..1F6CFvalid # 7.0 TRIANGLE WITH ROUNDED CORNERS..BED
# 1F6D0 valid # 8.0 PLACE OF WORSHIP
1F6D1..1F6DF >FFFD # NA <reserved-1F6D1>..<reserved-1F6DF>
# 1F6D1..1F6D2valid # 9.0 OCTAGONAL SIGN..SHOPPING TROLLEY
1F6D3..1F6DF >FFFD # NA <reserved-1F6D3>..<reserved-1F6DF>
# 1F6E0..1F6ECvalid # 7.0 HAMMER AND WRENCH..AIRPLANE ARRIVING
1F6ED..1F6EF >FFFD # NA <reserved-1F6ED>..<reserved-1F6EF>
# 1F6F0..1F6F3valid # 7.0 SATELLITE..PASSENGER SHIP
1F6F4..1F6FF >FFFD # NA <reserved-1F6F4>..<reserved-1F6FF>
# 1F6F4..1F6F6valid # 9.0 SCOOTER..CANOE
1F6F7..1F6FF >FFFD # NA <reserved-1F6F7>..<reserved-1F6FF>
# 1F700..1F773valid # 6.0 ALCHEMICAL SYMBOL FOR QUINTESSENCE..ALCHEMICAL SYMBOL FOR HALF OUNCE
1F774..1F77F >FFFD # NA <reserved-1F774>..<reserved-1F77F>
# 1F780..1F7D4valid # 7.0 BLACK LEFT-POINTING ISOSCELES RIGHT TRIANGLE..HEAVY TWELVE POINTED PINWHEEL STAR
@ -7620,9 +7768,21 @@ FFFE..FFFF >FFFD # 1.1 <noncharacter-FFFE>..<noncharacte
# 1F890..1F8ADvalid # 7.0 LEFTWARDS TRIANGLE ARROWHEAD..WHITE ARROW SHAFT WIDTH TWO THIRDS
1F8AE..1F90F >FFFD # NA <reserved-1F8AE>..<reserved-1F90F>
# 1F910..1F918valid # 8.0 ZIPPER-MOUTH FACE..SIGN OF THE HORNS
1F919..1F97F >FFFD # NA <reserved-1F919>..<reserved-1F97F>
# 1F919..1F91Evalid # 9.0 CALL ME HAND..HAND WITH INDEX AND MIDDLE FINGERS CROSSED
1F91F >FFFD # NA <reserved-1F91F>
# 1F920..1F927valid # 9.0 FACE WITH COWBOY HAT..SNEEZING FACE
1F928..1F92F >FFFD # NA <reserved-1F928>..<reserved-1F92F>
# 1F930 valid # 9.0 PREGNANT WOMAN
1F931..1F932 >FFFD # NA <reserved-1F931>..<reserved-1F932>
# 1F933..1F93Evalid # 9.0 SELFIE..HANDBALL
1F93F >FFFD # NA <reserved-1F93F>
# 1F940..1F94Bvalid # 9.0 WILTED FLOWER..MARTIAL ARTS UNIFORM
1F94C..1F94F >FFFD # NA <reserved-1F94C>..<reserved-1F94F>
# 1F950..1F95Evalid # 9.0 CROISSANT..PANCAKES
1F95F..1F97F >FFFD # NA <reserved-1F95F>..<reserved-1F97F>
# 1F980..1F984valid # 8.0 CRAB..UNICORN FACE
1F985..1F9BF >FFFD # NA <reserved-1F985>..<reserved-1F9BF>
# 1F985..1F991valid # 9.0 EAGLE..SQUID
1F992..1F9BF >FFFD # NA <reserved-1F992>..<reserved-1F9BF>
# 1F9C0 valid # 8.0 CHEESE WEDGE
1F9C1..1FFFD >FFFD # NA <reserved-1F9C1>..<reserved-1FFFD>
1FFFE..1FFFF >FFFD # 2.0 <noncharacter-1FFFE>..<noncharacter-1FFFF>

File diff suppressed because it is too large Load diff

View file

@ -1,5 +1,5 @@
/*
* Copyright (C) 1999-2015, International Business Machines
* Copyright (C) 1999-2016, International Business Machines
* Corporation and others. All Rights Reserved.
*
* file name: collationfcd.cpp
@ -20,27 +20,27 @@ const uint8_t CollationFCD::lcccIndex[2048]={
0,0,0,0,0,0,0,0,1,1,2,3,0,0,0,0,
0,0,0,0,4,0,0,0,0,0,0,0,5,6,7,0,
8,0,9,0xa,0,0,0xb,0xc,0xd,0xe,0xf,0,0,0,0,0x10,
0x11,0x12,0x13,0,0,0,0,0x14,0,0x15,0x16,0,0,0x15,0x17,0,
0,0x15,0x17,0,0,0x15,0x17,0,0,0x15,0x17,0,0,0,0x17,0,
0,0,0x18,0,0,0x15,0x17,0,0,0,0x17,0,0,0,0x19,0,
0,0x1a,0x1b,0,0,0x1c,0x1b,0,0x1c,0x1d,0,0x1e,0x1f,0,0x20,0,
0,0x21,0,0,0x17,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0x22,0,0,0,0,0,
0x11,0x12,0x13,0,0,0,0x14,0x15,0,0x16,0x17,0,0,0x16,0x18,0,
0,0x16,0x18,0,0,0x16,0x18,0,0,0x16,0x18,0,0,0,0x18,0,
0,0,0x19,0,0,0x16,0x18,0,0,0,0x18,0,0,0,0x1a,0,
0,0x1b,0x1c,0,0,0x1d,0x1c,0,0x1d,0x1e,0,0x1f,0x20,0,0x21,0,
0,0x22,0,0,0x18,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0x23,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0x23,0x23,0,0,0,0,0x24,0,
0,0,0,0,0,0x25,0,0,0,0x13,0,0,0,0,0,0,
0x26,0,0,0x27,0,0x28,0,0,0,0x23,0x29,0x10,0,0x2a,0,0x2b,
0,0x2c,0,0,0,0,0x2d,0x2e,0,0,0,0,0,0,1,0x2f,
0,0,0,0,0,0,0,0,0x24,0x24,0,0,0,0,0x25,0,
0,0,0,0,0,0x26,0,0,0,0x13,0,0,0,0,0,0,
0x27,0,0,0x28,0,0x29,0,0,0,0x24,0x2a,0x10,0,0x2b,0,0x2c,
0,0x2d,0,0,0,0,0x2e,0x2f,0,0,0,0,0,0,1,0x30,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0x30,0x31,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0x31,0x32,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0x32,0,0,0,0x33,0,0,0,1,
0,0,0,0,0,0,0,0x33,0,0,0,0x34,0,0,0,1,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0x34,0,0,0x35,0,0,0,0,0,0,0,0,0,0,0,
0,0x35,0,0,0x36,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
@ -99,9 +99,9 @@ const uint8_t CollationFCD::lcccIndex[2048]={
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0x36,0x37,0,0,0x38,0,0,0,0,0,0,0,0,
0x20,0,0,0,0,0,0x29,0x39,0,0x3a,0x3b,0,0,0x3b,0x3c,0,
0,0,0,0,0,0x3d,0x3e,0x3f,0,0,0,0,0,0,0,0x17,
0,0,0,0x37,0x38,0,0,0x39,0,0,0,0,0,0,0,0,
0x21,0,0,0,0,0,0x2a,0x3a,0,0x3b,0x3c,0,0,0x3c,0x3d,0,
0,0,0,0,0,0x3e,0x3f,0x40,0,0,0,0,0,0,0,0x18,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
@ -124,7 +124,7 @@ const uint8_t CollationFCD::lcccIndex[2048]={
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0x40,0x41,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0x41,0x42,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
@ -141,17 +141,17 @@ const uint8_t CollationFCD::lcccIndex[2048]={
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0x42,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0x43,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0
};
const uint32_t CollationFCD::lcccBits[67]={
const uint32_t CollationFCD::lcccBits[68]={
0,0xffffffff,0xffff7fff,0xffff,0xf8,0xfffe0000,0xbfffffff,0xb6,0x7ff0000,0xfffff800,0x10000,0x9fc00000,0x3d9f,0x20000,0xffff0000,0x7ff,
0xff800,0xfbc00000,0x3eef,0xe000000,0xfffffff8,0x10000000,0x1e2000,0x2000,0x602000,0x400,0x7000000,0xf00,0x3000000,0x2a00000,0x3c3e0000,0xdf,
0x40,0x6800000,0xe0000000,0x100000,0x20040000,0x200,0x1800000,0x9fe00001,0x3fff0000,0x10,0xc00,0xc0040,0x800000,0xfff70000,0x31021fd,0xf03fffff,
0x1fff0000,0x1ffe2,0x38000,0x80000000,0xfc00,0x6000000,0x3ff08000,0xc0000000,0x30000,0x3ffff,0x3800,0x80000,1,0xc19d0000,2,0x400000,
0x4000035,0x4108000,0x40000000
0xff800,0xfbc00000,0x3eef,0xe000000,0xfff00000,0xfffffffb,0x10000000,0x1e2000,0x2000,0x602000,0x400,0x7000000,0xf00,0x3000000,0x2a00000,0x3c3e0000,
0xdf,0x40,0x6800000,0xe0000000,0x100000,0x20040000,0x200,0x1800000,0x9fe00001,0x3fff0000,0x10,0xc00,0xc0040,0x800000,0xfff70000,0x31021fd,
0xf83fffff,0x1fff0000,0x1ffe2,0x38000,0x80000000,0xfc00,0x6000000,0x3ff08000,0xc0000000,0x30000,0x3ffff,0x3800,0x80000,1,0xc19d0000,2,
0x400000,0x40000b5,0x5108000,0x40000000
};
const uint8_t CollationFCD::tcccIndex[2048]={
@ -159,27 +159,27 @@ const uint8_t CollationFCD::tcccIndex[2048]={
0xb,0xc,0,0,0,0,0,0,1,1,0xd,0xe,0xf,0x10,0x11,0,
0x12,0x13,0x14,0x15,0x16,0,0x17,0x18,0,0,0,0,0x19,0x1a,0x1b,0,
0x1c,0x1d,0x1e,0x1f,0,0,0x20,0x21,0x22,0x23,0x24,0,0,0,0,0x25,
0x26,0x27,0x28,0,0,0,0,0x29,0,0x2a,0x2b,0,0,0x2c,0x2d,0,
0,0x2e,0x2f,0,0,0x2c,0x30,0,0,0x2c,0x31,0,0,0,0x30,0,
0,0,0x32,0,0,0x2c,0x30,0,0,0,0x30,0,0,0,0x33,0,
0,0x34,0x35,0,0,0x36,0x35,0,0x36,0x37,0,0x38,0x39,0,0x3a,0,
0,0x3b,0,0,0x30,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0x3c,0,0,0,0,0,
0x26,0x27,0x28,0,0,0,0x29,0x2a,0,0x2b,0x2c,0,0,0x2d,0x2e,0,
0,0x2f,0x30,0,0,0x2d,0x31,0,0,0x2d,0x32,0,0,0,0x31,0,
0,0,0x33,0,0,0x2d,0x31,0,0,0,0x31,0,0,0,0x34,0,
0,0x35,0x36,0,0,0x37,0x36,0,0x37,0x38,0,0x39,0x3a,0,0x3b,0,
0,0x3c,0,0,0x31,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0x3d,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0x3d,0x3d,0,0,0,0,0x3e,0,
0,0,0,0,0,0x3f,0,0,0,0x28,0,0,0,0,0,0,
0x40,0,0,0x41,0,0x42,0,0,0,0x3d,0x43,0x25,0,0x44,0,0x45,
0,0x46,0,0,0,0,0x47,0x48,0,0,0,0,0,0,1,0x49,
1,1,1,1,0x4a,1,1,0x4b,0x4c,1,0x4d,0x4e,1,0x4f,0x50,0x51,
0,0,0,0,0,0,0x52,0x53,0,0x54,0,0,0x55,0x56,0x57,0,
0x58,0x59,0x5a,0x5b,0x5c,0x5d,0,0x5e,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0x3e,0x3e,0,0,0,0,0x3f,0,
0,0,0,0,0,0x40,0,0,0,0x28,0,0,0,0,0,0,
0x41,0,0,0x42,0,0x43,0,0,0,0x3e,0x44,0x25,0,0x45,0,0x46,
0,0x47,0,0,0,0,0x48,0x49,0,0,0,0,0,0,1,0x4a,
1,1,1,1,0x4b,1,1,0x4c,0x4d,1,0x4e,0x4f,1,0x50,0x51,0x52,
0,0,0,0,0,0,0x53,0x54,0,0x55,0,0,0x56,0x57,0x58,0,
0x59,0x5a,0x5b,0x5c,0x5d,0x5e,0,0x5f,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0x2c,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0x5f,0,0,0,0x60,0,0,0,1,
0,0,0,0,0,0,0x2d,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0x60,0,0,0,0x61,0,0,0,1,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0x61,0x62,0x63,0x64,0x62,0x63,0x65,0,0,0,0,0,0,0,0,
0,0x62,0x63,0x64,0x65,0x63,0x64,0x66,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
@ -238,9 +238,9 @@ const uint8_t CollationFCD::tcccIndex[2048]={
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0x66,0x67,0,0,0x68,0,0,0,0,0,0,0,0,
0x3a,0,0,0,0,0,0x43,0x69,0,0x6a,0x6b,0,0,0x6b,0x6c,0,
0,0,0,0,0,0x6d,0x6e,0x6f,0,0,0,0,0,0,0,0x30,
0,0,0,0x67,0x68,0,0,0x69,0,0,0,0,0,0,0,0,
0x3b,0,0,0,0,0,0x44,0x6a,0,0x6b,0x6c,0,0,0x6c,0x6d,0,
0,0,0,0,0,0x6e,0x6f,0x70,0,0,0,0,0,0,0,0x31,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
@ -263,7 +263,7 @@ const uint8_t CollationFCD::tcccIndex[2048]={
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0x70,0x71,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0x71,0x72,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
@ -280,20 +280,20 @@ const uint8_t CollationFCD::tcccIndex[2048]={
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0x3c,0x72,0x73,0,0,0,0,0,
0,0,0,0,0,0,0,0,0x3d,0x73,0x74,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0xe,0,0,0,0,0,0,0,0,0,0,0,0,0,0
};
const uint32_t CollationFCD::tcccBits[116]={
const uint32_t CollationFCD::tcccBits[117]={
0,0xffffffff,0x3e7effbf,0xbe7effbf,0xfffcffff,0x7ef1ff3f,0xfff3f1f8,0x7fffff3f,0x18003,0xdfffe000,0xff31ffcf,0xcfffffff,0xfffc0,0xffff7fff,0xffff,0x1d760,
0x1fc00,0x187c00,0x200708b,0x2000000,0x708b0000,0xc00000,0xf8,0xfccf0006,0x33ffcfc,0xfffe0000,0xbfffffff,0xb6,0x7ff0000,0x7c,0xfffff800,0x10000,
0x9fc80005,0x3d9f,0x20000,0xffff0000,0x7ff,0xff800,0xfbc00000,0x3eef,0xe000000,0xfffffff8,0x10120200,0xff1e2000,0x10000000,0xb0002000,0x10480000,0x4e002000,
0x2000,0x30002000,0x602100,0x24000400,0x7000000,0xf00,0x3000000,0x2a00000,0x3d7e0000,0xdf,0x40,0x6800000,0xe0000000,0x100000,0x20040000,0x200,
0x1800000,0x9fe00001,0x3fff0000,0x10,0xc00,0xc0040,0x800000,0xfff70000,0x31021fd,0xf03fffff,0xbffffff,0x3ffffff,0x3f3fffff,0xaaff3f3f,0x3fffffff,0x1fdfffff,
0xefcfffde,0x1fdc7fff,0x1fff0000,0x1ffe2,0x800,0xc000000,0x4000,0xe000,0x1210,0x50,0x292,0x333e005,0x333,0xf000,0x3c0f,0x38000,
0x80000000,0xfc00,0x55555000,0x36db02a5,0x46100000,0x47900000,0x3ff08000,0xc0000000,0x30000,0x3ffff,0x3800,0x80000,1,0xc19d0000,2,0x400000,
0x4000035,0x4108000,0x5f7ffc00,0x7fdb
0x9fc80005,0x3d9f,0x20000,0xffff0000,0x7ff,0xff800,0xfbc00000,0x3eef,0xe000000,0xfff00000,0xfffffffb,0x10120200,0xff1e2000,0x10000000,0xb0002000,0x10480000,
0x4e002000,0x2000,0x30002000,0x602100,0x24000400,0x7000000,0xf00,0x3000000,0x2a00000,0x3d7e0000,0xdf,0x40,0x6800000,0xe0000000,0x100000,0x20040000,
0x200,0x1800000,0x9fe00001,0x3fff0000,0x10,0xc00,0xc0040,0x800000,0xfff70000,0x31021fd,0xf83fffff,0xbffffff,0x3ffffff,0x3f3fffff,0xaaff3f3f,0x3fffffff,
0x1fdfffff,0xefcfffde,0x1fdc7fff,0x1fff0000,0x1ffe2,0x800,0xc000000,0x4000,0xe000,0x1210,0x50,0x292,0x333e005,0x333,0xf000,0x3c0f,
0x38000,0x80000000,0xfc00,0x55555000,0x36db02a5,0x46100000,0x47900000,0x3ff08000,0xc0000000,0x30000,0x3ffff,0x3800,0x80000,1,0xc19d0000,2,
0x400000,0x40000b5,0x5108000,0x5f7ffc00,0x7fdb
};
U_NAMESPACE_END

View file

@ -5,7 +5,7 @@
* WARNING: THIS FILE IS MACHINE GENERATED. DO NOT HAND EDIT IT UNLESS
* YOU REALLY KNOW WHAT YOU'RE DOING.
*
* Generated on: 04/08/2016 02:00:11 PM PDT
* Generated on: 05/10/2016 11:13:24 AM PDT
*/
#ifndef __LELANGUAGES_H

View file

@ -5,7 +5,7 @@
* WARNING: THIS FILE IS MACHINE GENERATED. DO NOT HAND EDIT IT UNLESS
* YOU REALLY KNOW WHAT YOU'RE DOING.
*
* Generated on: 04/08/2016 02:00:11 PM PDT
* Generated on: 05/10/2016 11:13:24 AM PDT
*/
#ifndef __LESCRIPTS_H

View file

@ -5,7 +5,7 @@
* WARNING: THIS FILE IS MACHINE GENERATED. DO NOT HAND EDIT IT UNLESS
* YOU REALLY KNOW WHAT YOU'RE DOING.
*
* Generated on: 04/08/2016 02:00:11 PM PDT
* Generated on: 05/10/2016 11:13:24 AM PDT
*/
#include "LETypes.h"
@ -169,7 +169,7 @@ const LETag OpenTypeLayoutEngine::scriptTags[] = {
shrdScriptTag, /* 'shrd' (SHARADA) */
soraScriptTag, /* 'sora' (SORA_SOMPENG) */
takrScriptTag, /* 'takr' (TAKRI) */
tangScriptTag, /* 'tang' (TANG) */
tangScriptTag, /* 'tang' (TANGUT) */
woleScriptTag, /* 'wole' (WOLE) */
hluwScriptTag, /* 'hluw' (ANATOLIAN_HIEROGLYPHS) */
khojScriptTag, /* 'khoj' (KHOJKI) */

View file

@ -5,7 +5,7 @@
* WARNING: THIS FILE IS MACHINE GENERATED. DO NOT HAND EDIT IT UNLESS
* YOU REALLY KNOW WHAT YOU'RE DOING.
*
* Generated on: 04/08/2016 02:00:11 PM PDT
* Generated on: 05/10/2016 11:13:24 AM PDT
*/
#ifndef __SCRIPTANDLANGUAGES_H
@ -183,7 +183,7 @@ const LETag nshuScriptTag = 0x6E736875; /* 'nshu' (NSHU) */
const LETag shrdScriptTag = 0x73687264; /* 'shrd' (SHARADA) */
const LETag soraScriptTag = 0x736F7261; /* 'sora' (SORA_SOMPENG) */
const LETag takrScriptTag = 0x74616B72; /* 'takr' (TAKRI) */
const LETag tangScriptTag = 0x74616E67; /* 'tang' (TANG) */
const LETag tangScriptTag = 0x74616E67; /* 'tang' (TANGUT) */
const LETag woleScriptTag = 0x776F6C65; /* 'wole' (WOLE) */
const LETag hluwScriptTag = 0x686C7577; /* 'hluw' (ANATOLIAN_HIEROGLYPHS) */
const LETag khojScriptTag = 0x6B686F6A; /* 'khoj' (KHOJKI) */

View file

@ -401,7 +401,7 @@ void TestUScriptCodeAPI(){
"Loma", "Mende_Kikakui", "Meroitic_Cursive",
"Old_North_Arabian", "Nabataean", "Palmyrene", "Khudawadi", "Warang_Citi",
/* new in ICU 4.8 */
"Afak", "Jurc", "Mro", "Nshu", "Sharada", "Sora_Sompeng", "Takri", "Tang", "Wole",
"Afak", "Jurc", "Mro", "Nshu", "Sharada", "Sora_Sompeng", "Takri", "Tangut", "Wole",
/* new in ICU 49 */
"Anatolian_Hieroglyphs", "Khojki", "Tirhuta",
/* new in ICU 52 */

View file

@ -1966,6 +1966,7 @@ private:
UnicodeSet *fCRLFSet;
UnicodeSet *fControlSet;
UnicodeSet *fExtendSet;
UnicodeSet *fZWJSet;
UnicodeSet *fRegionalIndicatorSet;
UnicodeSet *fPrependSet;
UnicodeSet *fSpacingSet;
@ -1975,11 +1976,11 @@ private:
UnicodeSet *fLVSet;
UnicodeSet *fLVTSet;
UnicodeSet *fHangulSet;
UnicodeSet *fAnySet;
UnicodeSet *fEmojiModifierSet;
UnicodeSet *fEmojiBaseSet;
UnicodeSet *fZWJSet;
UnicodeSet *fEmojiModifierSet;
UnicodeSet *fGAZSet;
UnicodeSet *fEBGSet; // ***new
UnicodeSet *fAnySet;
const UnicodeString *fText;
};
@ -1991,9 +1992,11 @@ RBBICharMonkey::RBBICharMonkey() {
fText = NULL;
fCRLFSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\r\\n]"), status);
fControlSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[[\\p{Grapheme_Cluster_Break = Control}]-[:Block=Tags:]]"), status);
fExtendSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[[\\p{Grapheme_Cluster_Break = Extend}][:Block=Tags:]]"), status);
fRegionalIndicatorSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Grapheme_Cluster_Break = Regional_Indicator}]"), status);
fControlSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[[\\p{Grapheme_Cluster_Break = Control}]]"), status);
fExtendSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[[\\p{Grapheme_Cluster_Break = Extend}]]"), status);
fZWJSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Grapheme_Cluster_Break = ZWJ}]"), status);
fRegionalIndicatorSet =
new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Grapheme_Cluster_Break = Regional_Indicator}]"), status);
fPrependSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Grapheme_Cluster_Break = Prepend}]"), status);
fSpacingSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Grapheme_Cluster_Break = SpacingMark}]"), status);
fLSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Grapheme_Cluster_Break = L}]"), status);
@ -2007,21 +2010,14 @@ RBBICharMonkey::RBBICharMonkey() {
fHangulSet->addAll(*fTSet);
fHangulSet->addAll(*fLVSet);
fHangulSet->addAll(*fLVTSet);
fAnySet = new UnicodeSet(0, 0x10ffff);
fEmojiBaseSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Grapheme_Cluster_Break = EB}]"), status);
fEmojiModifierSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Grapheme_Cluster_Break = EM}]"), status);
fGAZSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Grapheme_Cluster_Break = GAZ}]"), status);
fEBGSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Grapheme_Cluster_Break = EBG}]"), status);
fAnySet = new UnicodeSet(0, 0x10ffff);
fEmojiBaseSet = new UnicodeSet(UnicodeString(
"[\\u261D\\u26F9\\u270A-\\u270D\\U0001F385\\U0001F3C3-\\U0001F3C4\\U0001F3CA-\\U0001F3CB\\U0001F442-\\U0001F443"
"\\U0001F446-\\U0001F450\\U0001F466-\\U0001F469\\U0001F46E\\U0001F470-\\U0001F478\\U0001F47C\\U0001F481-\\U0001F483"
"\\U0001F485-\\U0001F487\\U0001F4AA\\U0001F575\\U0001F590\\U0001F595-\\U0001F596\\U0001F645-\\U0001F647"
"\\U0001F64B-\\U0001F64F\\U0001F6A3\\U0001F6B4-\\U0001F6B6\\U0001F6C0\\U0001F918]"), status);
fEmojiModifierSet = new UnicodeSet(0x0001F3FB, 0x0001F3FF);
fZWJSet = new UnicodeSet(0x200D, 0x200D);
fGAZSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\U0001F466-\\U0001F469\\U0001F48B\\U0001F5E8\\u2764]"), status);
fSets = new UVector(status);
fSets = new UVector(status);
fSets->addElement(fCRLFSet, status);
fSets->addElement(fControlSet, status);
fSets->addElement(fExtendSet, status);
@ -2036,6 +2032,7 @@ RBBICharMonkey::RBBICharMonkey() {
fSets->addElement(fEmojiModifierSet, status);
fSets->addElement(fZWJSet, status);
fSets->addElement(fGAZSet, status);
fSets->addElement(fEBGSet, status);
if (U_FAILURE(status)) {
deferredStatus = status;
}
@ -2149,7 +2146,7 @@ int32_t RBBICharMonkey::next(int32_t prevPos) {
continue;
}
// Rule (GB9) x Extend
// Rule (GB9) x (Extend | ZWJ)
if (fExtendSet->contains(c2) || fZWJSet->contains(c2)) {
continue;
}
@ -2164,17 +2161,17 @@ int32_t RBBICharMonkey::next(int32_t prevPos) {
continue;
}
// Rule (GB9c) Emoji_Base x Emoji_Modifier
if ((fEmojiBaseSet->contains(c1) || fGAZSet->contains(c1)) && fEmojiModifierSet->contains(c2)) {
// Rule (GB10) (Emoji_Base | EBG) x Emoji_Modifier
if ((fEmojiBaseSet->contains(c1) || fEBGSet->contains(c1)) && fEmojiModifierSet->contains(c2)) {
continue;
}
// Rule (GB9d) ZWJ x Glue_After_Zwj
if (fZWJSet->contains(c1) && fGAZSet->contains(c2)) {
// Rule (GB11) ZWJ x (Glue_After_ZWJ | EBG)
if (fZWJSet->contains(c1) && (fGAZSet->contains(c2) || fEBGSet->contains(c2))) {
continue;
}
// Rule (GB10) Any <break> Any
// Rule (GB999) Any <break> Any
break;
}
@ -2208,6 +2205,7 @@ RBBICharMonkey::~RBBICharMonkey() {
delete fEmojiModifierSet;
delete fZWJSet;
delete fGAZSet;
delete fEBGSet;
}
//------------------------------------------------------------------------------------------
@ -2233,8 +2231,6 @@ private:
UnicodeSet *fKatakanaSet;
UnicodeSet *fHebrew_LetterSet;
UnicodeSet *fALetterSet;
// TODO(jungshik): Do we still need this change?
// UnicodeSet *fALetterSet; // matches ALetterPlus in word.txt
UnicodeSet *fSingle_QuoteSet;
UnicodeSet *fDouble_QuoteSet;
UnicodeSet *fMidNumLetSet;
@ -2245,10 +2241,11 @@ private:
UnicodeSet *fOtherSet;
UnicodeSet *fExtendSet;
UnicodeSet *fExtendNumLetSet;
UnicodeSet *fDictionaryCjkSet;
UnicodeSet *fDictionarySet;
UnicodeSet *fEBaseSet;
UnicodeSet *fEBGSet;
UnicodeSet *fEModifierSet;
UnicodeSet *fZWSSet;
UnicodeSet *fZWJSet;
UnicodeSet *fGAZSet;
const UnicodeString *fText;
@ -2261,48 +2258,34 @@ RBBIWordMonkey::RBBIWordMonkey()
fSets = new UVector(status);
fCRSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = CR}]"), status);
fLFSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = LF}]"), status);
fNewlineSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = Newline}]"), status);
fDictionaryCjkSet= new UnicodeSet("[[\\uac00-\\ud7a3][:Han:][:Hiragana:][:Katakana:]]", status);
// Exclude Hangul syllables from ALetterSet during testing.
// Leave CJK dictionary characters out from the monkey tests!
#if 0
fALetterSet = new UnicodeSet("[\\p{Word_Break = ALetter}"
"[\\p{Line_Break = Complex_Context}"
"-\\p{Grapheme_Cluster_Break = Extend}"
"-\\p{Grapheme_Cluster_Break = Control}"
"]]",
status);
#endif
fRegionalIndicatorSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = Regional_Indicator}]"), status);
fCRSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = CR}]"), status);
fLFSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = LF}]"), status);
fNewlineSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = Newline}]"), status);
fKatakanaSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = Katakana}]"), status);
fRegionalIndicatorSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = Regional_Indicator}]"), status);
fHebrew_LetterSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = Hebrew_Letter}]"), status);
fALetterSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = ALetter}]"), status);
fALetterSet->removeAll(*fDictionaryCjkSet);
fSingle_QuoteSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = Single_Quote}]"), status);
fDouble_QuoteSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = Double_Quote}]"), status);
fMidNumLetSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = MidNumLet}]"), status);
fMidLetterSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = MidLetter}]"), status);
fMidNumSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = MidNum}]"), status);
// TODO: this set used to contain [\\uff10-\\uff19] (fullwidth digits), but this breaks the test
// we should figure out why
fNumericSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = Numeric}]"), status);
fFormatSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = Format}]"), status);
fExtendNumLetSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = ExtendNumLet}]"), status);
fExtendSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = Extend}]"), status);
fEBaseSet = new UnicodeSet(UnicodeString(
"[\\u261D\\u26F9\\u270A-\\u270D\\U0001F385\\U0001F3C3-\\U0001F3C4\\U0001F3CA-\\U0001F3CB\\U0001F442-\\U0001F443"
"\\U0001F446-\\U0001F450\\U0001F466-\\U0001F469\\U0001F46E\\U0001F470-\\U0001F478\\U0001F47C\\U0001F481-\\U0001F483"
"\\U0001F485-\\U0001F487\\U0001F4AA\\U0001F575\\U0001F590\\U0001F595-\\U0001F596\\U0001F645-\\U0001F647"
"\\U0001F64B-\\U0001F64F\\U0001F6A3\\U0001F6B4-\\U0001F6B6\\U0001F6C0\\U0001F918]"), status);
fEBaseSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = EB}]"), status);
fEBGSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = EBG}]"), status);
fEModifierSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = EM}]"), status);
fZWJSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = ZWJ}]"), status);
fGAZSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = GAZ}]"), status);
fEModifierSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\U0001F3FB-\\U0001F3FF]"), status);
fZWSSet = new UnicodeSet((UChar32)0x200D, (UChar32)0x200D);;
fGAZSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\U0001F466-\\U0001F469\\U0001F48B\\U0001F5E8\\u2764]"), status);
fExtendSet->removeAll(*fZWSSet);
fDictionarySet = new UnicodeSet(UNICODE_STRING_SIMPLE("[[\\uac00-\\ud7a3][:Han:][:Hiragana:]]"), status);
fDictionarySet->addAll(*fKatakanaSet);
fDictionarySet->addAll(UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{LineBreak = Complex_Context}]"), status));
fALetterSet->removeAll(*fDictionarySet);
fOtherSet = new UnicodeSet();
if(U_FAILURE(status)) {
@ -2327,13 +2310,13 @@ RBBIWordMonkey::RBBIWordMonkey()
fOtherSet->removeAll(*fExtendSet);
fOtherSet->removeAll(*fRegionalIndicatorSet);
fOtherSet->removeAll(*fEBaseSet);
fOtherSet->removeAll(*fEBGSet);
fOtherSet->removeAll(*fEModifierSet);
fOtherSet->removeAll(*fZWSSet);
fOtherSet->removeAll(*fZWJSet);
fOtherSet->removeAll(*fGAZSet);
// Inhibit dictionary characters from being tested at all.
fOtherSet->removeAll(*fDictionaryCjkSet);
fOtherSet->removeAll(UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{LineBreak = Complex_Context}]"), status));
fOtherSet->removeAll(*fDictionarySet);
fSets->addElement(fCRSet, status);
fSets->addElement(fLFSet, status);
@ -2343,7 +2326,9 @@ RBBIWordMonkey::RBBIWordMonkey()
fSets->addElement(fALetterSet, status);
fSets->addElement(fSingle_QuoteSet, status);
fSets->addElement(fDouble_QuoteSet, status);
//fSets->addElement(fKatakanaSet, status); //TODO: work out how to test katakana
//fSets->addElement(fKatakanaSet, status); // Omit Katakana from fSets, which omits Katakana characters
// from the test data. They are all in the dictionary set,
// which this (old, to be retired) monkey test cannot handle.
fSets->addElement(fMidLetterSet, status);
fSets->addElement(fMidNumLetSet, status);
fSets->addElement(fMidNumSet, status);
@ -2354,8 +2339,9 @@ RBBIWordMonkey::RBBIWordMonkey()
fSets->addElement(fExtendNumLetSet, status);
fSets->addElement(fEBaseSet, status);
fSets->addElement(fEBGSet, status);
fSets->addElement(fEModifierSet, status);
fSets->addElement(fZWSSet, status);
fSets->addElement(fZWJSet, status);
fSets->addElement(fGAZSet, status);
if (U_FAILURE(status)) {
@ -2406,7 +2392,7 @@ int32_t RBBIWordMonkey::next(int32_t prevPos) {
break;
};
}
while (fFormatSet->contains(c3) || fExtendSet->contains(c3) || fZWSSet->contains(c3));
while (fFormatSet->contains(c3) || fExtendSet->contains(c3) || fZWJSet->contains(c3));
if (p1 == p2) {
@ -2435,12 +2421,12 @@ int32_t RBBIWordMonkey::next(int32_t prevPos) {
break;
};
// Rule (3c) ZWJ x GAZ (Glue after ZWJ).
// Rule (3c) ZWJ x (Glue_after_ZWJ | EBG).
// Not ignoring extend chars, so peek into input text to
// get the potential ZWJ, the character immediately preceding c2.
// Sloppy UChar32 indexing: p2-1 may reference trail half
// but char32At will get the full code point.
if (fZWSSet->contains(fText->char32At(p2-1)) && fGAZSet->contains(c2)) {
if (fZWJSet->contains(fText->char32At(p2-1)) && (fGAZSet->contains(c2) || fEBGSet->contains(c2))) {
continue;
}
@ -2513,6 +2499,8 @@ int32_t RBBIWordMonkey::next(int32_t prevPos) {
}
// Rule (13) Katakana x Katakana
// Note: matches UAX 29 rules, but doesn't come into play for ICU because
// all Katakana are handled by the dictionary breaker.
if (fKatakanaSet->contains(c1) &&
fKatakanaSet->contains(c2)) {
continue;
@ -2532,7 +2520,12 @@ int32_t RBBIWordMonkey::next(int32_t prevPos) {
continue;
}
// Rule 13c
// WB 14 (E_Base | EBG) x E_Modifier
if ((fEBaseSet->contains(c1) || fEBGSet->contains(c1)) && fEModifierSet->contains(c2)) {
continue;
}
// Rule 15 - 17 Group pairs of Regional Indicators.
if (fRegionalIndicatorSet->contains(c0) && fRegionalIndicatorSet->contains(c1)) {
break;
}
@ -2540,12 +2533,7 @@ int32_t RBBIWordMonkey::next(int32_t prevPos) {
continue;
}
// Rule 13d
if ((fEBaseSet->contains(c1) || fGAZSet->contains(c1)) && fEModifierSet->contains(c2)) {
continue;
}
// Rule 14. Break found here.
// Rule 999. Break found here.
break;
}
@ -2577,11 +2565,12 @@ RBBIWordMonkey::~RBBIWordMonkey() {
delete fExtendSet;
delete fExtendNumLetSet;
delete fRegionalIndicatorSet;
delete fDictionaryCjkSet;
delete fDictionarySet;
delete fOtherSet;
delete fEBaseSet;
delete fEBGSet;
delete fEModifierSet;
delete fZWSSet;
delete fZWJSet;
delete fGAZSet;
}
@ -3034,13 +3023,9 @@ RBBILineMonkey::RBBILineMonkey() :
fRI = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=RI}]"), status);
fSG = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\ud800-\\udfff]"), status);
fXX = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=XX}]"), status);
fEB = new UnicodeSet(UnicodeString(
"[\\u261D\\u26F9\\u270A-\\u270D\\U0001F385\\U0001F3C3-\\U0001F3C4\\U0001F3CA-\\U0001F3CB\\U0001F442-\\U0001F443"
"\\U0001F446-\\U0001F450\\U0001F466-\\U0001F469\\U0001F46E\\U0001F470-\\U0001F478\\U0001F47C\\U0001F481-\\U0001F483"
"\\U0001F485-\\U0001F487\\U0001F4AA\\U0001F575\\U0001F590\\U0001F595-\\U0001F596\\U0001F645-\\U0001F647"
"\\U0001F64B-\\U0001F64F\\U0001F6A3\\U0001F6B4-\\U0001F6B6\\U0001F6C0\\U0001F918]"), status);
fEM = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\U0001F3FB-\\U0001F3FF]"), status);
fZJ = new UnicodeSet((UChar32)0x200D, (UChar32)0x200D);
fEB = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=EB}]"), status);
fEM = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=EM}]"), status);
fZJ = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=ZWJ}]"), status);
if (U_FAILURE(status)) {
deferredStatus = status;
@ -3055,11 +3040,8 @@ RBBILineMonkey::RBBILineMonkey() :
fID->addAll(*fEB); // Emoji Base and Emoji Modifier behave as ID.
fID->addAll(*fEM);
fAL->removeAll(*fEM);
fAL->remove((UChar32)0x2764); // Emoji Proposal: move u2764 from Al to Id
fID->add((UChar32)0x2764);
fCM->addAll(*fZJ); // ZWJ behaves as a CM.
fSets->addElement(fBK, status);
fSets->addElement(fCR, status);
@ -3104,12 +3086,12 @@ RBBILineMonkey::RBBILineMonkey() :
fSets->addElement(fZJ, status);
const char *rules =
"((\\p{Line_Break=PR}|\\p{Line_Break=PO})\\p{Line_Break=CM}*)?"
"((\\p{Line_Break=OP}|\\p{Line_Break=HY})\\p{Line_Break=CM}*)?"
"\\p{Line_Break=NU}\\p{Line_Break=CM}*"
"((\\p{Line_Break=NU}|\\p{Line_Break=IS}|\\p{Line_Break=SY})\\p{Line_Break=CM}*)*"
"((\\p{Line_Break=CL}|\\p{Line_Break=CP})\\p{Line_Break=CM}*)?"
"((\\p{Line_Break=PR}|\\p{Line_Break=PO})\\p{Line_Break=CM}*)?";
"((\\p{Line_Break=PR}|\\p{Line_Break=PO})(\\p{Line_Break=CM}|\\u200d)*)?"
"((\\p{Line_Break=OP}|\\p{Line_Break=HY})(\\p{Line_Break=CM}|\\u200d)*)?"
"\\p{Line_Break=NU}(\\p{Line_Break=CM}|\\u200d)*"
"((\\p{Line_Break=NU}|\\p{Line_Break=IS}|\\p{Line_Break=SY})(\\p{Line_Break=CM}|\\u200d)*)*"
"((\\p{Line_Break=CL}|\\p{Line_Break=CP})(\\p{Line_Break=CM}|\\u200d)*)?"
"((\\p{Line_Break=PR}|\\p{Line_Break=PO})(\\p{Line_Break=CM}|\\u200d)*)?";
fNumberMatcher = new RegexMatcher(
UnicodeString(rules, -1, US_INV), 0, status);

View file

@ -418,13 +418,17 @@ namespace {
*/
UScriptCode getCharScript(UScriptCode script) {
switch(script) {
case USCRIPT_HAN_WITH_BOPOMOFO:
case USCRIPT_SIMPLIFIED_HAN:
case USCRIPT_TRADITIONAL_HAN:
return USCRIPT_HAN;
case USCRIPT_JAPANESE:
return USCRIPT_HIRAGANA;
case USCRIPT_JAMO:
case USCRIPT_KOREAN:
return USCRIPT_HANGUL;
case USCRIPT_SYMBOLS_EMOJI:
return USCRIPT_SYMBOLS;
default:
return script;
}
@ -441,7 +445,7 @@ void UnicodeTest::TestScriptMetadata() {
for(int32_t sci = 0; sci < USCRIPT_CODE_LIMIT; ++sci) {
UScriptCode sc = (UScriptCode)sci;
// Run the test with -v to see which script has failures:
// .../intltest$ make && ./intltest utility/UnicodeTest/TestScriptMetadata -v | grep -C 3 FAIL
// .../intltest$ make && ./intltest utility/UnicodeTest/TestScriptMetadata -v | grep -C 6 FAIL
logln(uscript_getShortName(sc));
UScriptUsage usage = uscript_getUsage(sc);
UnicodeString sample = uscript_getSampleUnicodeString(sc);

View file

@ -1,10 +1,11 @@
# BidiTest-8.0.0.txt
# Date: 2014-12-16, 23:07:28 GMT [MD]
# BidiTest-9.0.0.txt
# Date: 2016-03-02, 18:54:52 GMT
# © 2016 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see http://www.unicode.org/terms_of_use.html
#
# Unicode Character Database
# Copyright (c) 1991-2014 Unicode, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html
# For documentation, see http://www.unicode.org/reports/tr44/
# For documentation, see http://www.unicode.org/reports/tr44/
#
# This file provides a conformance test for UBA (Unicode Bidi Algorithm) implementations.
# It is designed to be reasonably compact, and yet provide a thorough test of all cases up to

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -1,18 +1,18 @@
# GraphemeBreakTest-8.0.0.txt
# Date: 2015-02-13, 13:47:15 GMT [MD]
# Hand patched for Emoji breaking proposal L2/16-011R3.
# GraphemeBreakTest-9.0.0.txt
# Date: 2016-03-03, 12:43:52 GMT
# © 2016 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see http://www.unicode.org/terms_of_use.html
#
# Unicode Character Database
# Copyright (c) 1991-2015 Unicode, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html
# For documentation, see http://www.unicode.org/reports/tr44/
# For documentation, see http://www.unicode.org/reports/tr44/
#
# Default Grapheme Break Test
#
# Format:
# <string> (# <comment>)?
# <string> contains hex Unicode code points, with
# ÷ wherever there is a break opportunity, and
# <string> (# <comment>)?
# <string> contains hex Unicode code points, with
# ÷ wherever there is a break opportunity, and
# × wherever there is not.
# <comment> the format can change, but currently it shows:
# - the sample character name
@ -31,6 +31,8 @@
÷ 0020 × 0308 ÷ 0001 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ 0020 × 0300 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3]
÷ 0020 × 0308 × 0300 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3]
÷ 0020 ÷ 0600 ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
÷ 0020 × 0308 ÷ 0600 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
÷ 0020 × 0903 ÷ # ÷ [0.2] SPACE (Other) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 0020 × 0308 × 0903 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 0020 ÷ 1100 ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
@ -43,8 +45,18 @@
÷ 0020 × 0308 ÷ AC00 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 0020 ÷ AC01 ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 0020 × 0308 ÷ AC01 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 0020 ÷ 1F1E6 ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3]
÷ 0020 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3]
÷ 0020 ÷ 1F1E6 ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
÷ 0020 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
÷ 0020 ÷ 261D ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] WHITE UP POINTING INDEX (E_Base) ÷ [0.3]
÷ 0020 × 0308 ÷ 261D ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] WHITE UP POINTING INDEX (E_Base) ÷ [0.3]
÷ 0020 ÷ 1F3FB ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) ÷ [0.3]
÷ 0020 × 0308 ÷ 1F3FB ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) ÷ [0.3]
÷ 0020 × 200D ÷ # ÷ [0.2] SPACE (Other) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3]
÷ 0020 × 0308 × 200D ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3]
÷ 0020 ÷ 2764 ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [0.3]
÷ 0020 × 0308 ÷ 2764 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [0.3]
÷ 0020 ÷ 1F466 ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] BOY (EBG) ÷ [0.3]
÷ 0020 × 0308 ÷ 1F466 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] BOY (EBG) ÷ [0.3]
÷ 0020 ÷ 0378 ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
÷ 0020 × 0308 ÷ 0378 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
÷ 0020 ÷ D800 ÷ # ÷ [0.2] SPACE (Other) ÷ [5.0] <surrogate-D800> (Control) ÷ [0.3]
@ -59,6 +71,8 @@
÷ 000D ÷ 0308 ÷ 0001 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ 000D ÷ 0300 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3]
÷ 000D ÷ 0308 × 0300 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3]
÷ 000D ÷ 0600 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
÷ 000D ÷ 0308 ÷ 0600 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
÷ 000D ÷ 0903 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 000D ÷ 0308 × 0903 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 000D ÷ 1100 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
@ -71,8 +85,18 @@
÷ 000D ÷ 0308 ÷ AC00 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 000D ÷ AC01 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 000D ÷ 0308 ÷ AC01 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 000D ÷ 1F1E6 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3]
÷ 000D ÷ 0308 ÷ 1F1E6 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3]
÷ 000D ÷ 1F1E6 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
÷ 000D ÷ 0308 ÷ 1F1E6 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
÷ 000D ÷ 261D ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] WHITE UP POINTING INDEX (E_Base) ÷ [0.3]
÷ 000D ÷ 0308 ÷ 261D ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] WHITE UP POINTING INDEX (E_Base) ÷ [0.3]
÷ 000D ÷ 1F3FB ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) ÷ [0.3]
÷ 000D ÷ 0308 ÷ 1F3FB ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) ÷ [0.3]
÷ 000D ÷ 200D ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3]
÷ 000D ÷ 0308 × 200D ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3]
÷ 000D ÷ 2764 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [0.3]
÷ 000D ÷ 0308 ÷ 2764 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [0.3]
÷ 000D ÷ 1F466 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] BOY (EBG) ÷ [0.3]
÷ 000D ÷ 0308 ÷ 1F466 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] BOY (EBG) ÷ [0.3]
÷ 000D ÷ 0378 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] <reserved-0378> (Other) ÷ [0.3]
÷ 000D ÷ 0308 ÷ 0378 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
÷ 000D ÷ D800 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] <surrogate-D800> (Control) ÷ [0.3]
@ -87,6 +111,8 @@
÷ 000A ÷ 0308 ÷ 0001 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ 000A ÷ 0300 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3]
÷ 000A ÷ 0308 × 0300 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3]
÷ 000A ÷ 0600 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
÷ 000A ÷ 0308 ÷ 0600 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
÷ 000A ÷ 0903 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 000A ÷ 0308 × 0903 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 000A ÷ 1100 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
@ -99,8 +125,18 @@
÷ 000A ÷ 0308 ÷ AC00 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 000A ÷ AC01 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 000A ÷ 0308 ÷ AC01 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 000A ÷ 1F1E6 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3]
÷ 000A ÷ 0308 ÷ 1F1E6 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3]
÷ 000A ÷ 1F1E6 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
÷ 000A ÷ 0308 ÷ 1F1E6 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
÷ 000A ÷ 261D ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] WHITE UP POINTING INDEX (E_Base) ÷ [0.3]
÷ 000A ÷ 0308 ÷ 261D ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] WHITE UP POINTING INDEX (E_Base) ÷ [0.3]
÷ 000A ÷ 1F3FB ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) ÷ [0.3]
÷ 000A ÷ 0308 ÷ 1F3FB ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) ÷ [0.3]
÷ 000A ÷ 200D ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3]
÷ 000A ÷ 0308 × 200D ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3]
÷ 000A ÷ 2764 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [0.3]
÷ 000A ÷ 0308 ÷ 2764 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [0.3]
÷ 000A ÷ 1F466 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] BOY (EBG) ÷ [0.3]
÷ 000A ÷ 0308 ÷ 1F466 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] BOY (EBG) ÷ [0.3]
÷ 000A ÷ 0378 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] <reserved-0378> (Other) ÷ [0.3]
÷ 000A ÷ 0308 ÷ 0378 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
÷ 000A ÷ D800 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] <surrogate-D800> (Control) ÷ [0.3]
@ -115,6 +151,8 @@
÷ 0001 ÷ 0308 ÷ 0001 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ 0001 ÷ 0300 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3]
÷ 0001 ÷ 0308 × 0300 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3]
÷ 0001 ÷ 0600 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
÷ 0001 ÷ 0308 ÷ 0600 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
÷ 0001 ÷ 0903 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 0001 ÷ 0308 × 0903 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 0001 ÷ 1100 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
@ -127,8 +165,18 @@
÷ 0001 ÷ 0308 ÷ AC00 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 0001 ÷ AC01 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 0001 ÷ 0308 ÷ AC01 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 0001 ÷ 1F1E6 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3]
÷ 0001 ÷ 0308 ÷ 1F1E6 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3]
÷ 0001 ÷ 1F1E6 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
÷ 0001 ÷ 0308 ÷ 1F1E6 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
÷ 0001 ÷ 261D ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] WHITE UP POINTING INDEX (E_Base) ÷ [0.3]
÷ 0001 ÷ 0308 ÷ 261D ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] WHITE UP POINTING INDEX (E_Base) ÷ [0.3]
÷ 0001 ÷ 1F3FB ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) ÷ [0.3]
÷ 0001 ÷ 0308 ÷ 1F3FB ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) ÷ [0.3]
÷ 0001 ÷ 200D ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3]
÷ 0001 ÷ 0308 × 200D ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3]
÷ 0001 ÷ 2764 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [0.3]
÷ 0001 ÷ 0308 ÷ 2764 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [0.3]
÷ 0001 ÷ 1F466 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] BOY (EBG) ÷ [0.3]
÷ 0001 ÷ 0308 ÷ 1F466 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] BOY (EBG) ÷ [0.3]
÷ 0001 ÷ 0378 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] <reserved-0378> (Other) ÷ [0.3]
÷ 0001 ÷ 0308 ÷ 0378 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
÷ 0001 ÷ D800 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] <surrogate-D800> (Control) ÷ [0.3]
@ -143,6 +191,8 @@
÷ 0300 × 0308 ÷ 0001 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ 0300 × 0300 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3]
÷ 0300 × 0308 × 0300 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) × [9.0] COMBINING DIAERESIS (Extend) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3]
÷ 0300 ÷ 0600 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
÷ 0300 × 0308 ÷ 0600 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
÷ 0300 × 0903 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 0300 × 0308 × 0903 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) × [9.0] COMBINING DIAERESIS (Extend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 0300 ÷ 1100 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
@ -155,12 +205,62 @@
÷ 0300 × 0308 ÷ AC00 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 0300 ÷ AC01 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 0300 × 0308 ÷ AC01 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 0300 ÷ 1F1E6 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3]
÷ 0300 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3]
÷ 0300 ÷ 1F1E6 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
÷ 0300 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
÷ 0300 ÷ 261D ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) ÷ [999.0] WHITE UP POINTING INDEX (E_Base) ÷ [0.3]
÷ 0300 × 0308 ÷ 261D ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] WHITE UP POINTING INDEX (E_Base) ÷ [0.3]
÷ 0300 ÷ 1F3FB ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) ÷ [999.0] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) ÷ [0.3]
÷ 0300 × 0308 ÷ 1F3FB ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) ÷ [0.3]
÷ 0300 × 200D ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3]
÷ 0300 × 0308 × 200D ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) × [9.0] COMBINING DIAERESIS (Extend) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3]
÷ 0300 ÷ 2764 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) ÷ [999.0] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [0.3]
÷ 0300 × 0308 ÷ 2764 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [0.3]
÷ 0300 ÷ 1F466 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) ÷ [999.0] BOY (EBG) ÷ [0.3]
÷ 0300 × 0308 ÷ 1F466 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] BOY (EBG) ÷ [0.3]
÷ 0300 ÷ 0378 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
÷ 0300 × 0308 ÷ 0378 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
÷ 0300 ÷ D800 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) ÷ [5.0] <surrogate-D800> (Control) ÷ [0.3]
÷ 0300 × 0308 ÷ D800 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <surrogate-D800> (Control) ÷ [0.3]
÷ 0600 × 0020 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] SPACE (Other) ÷ [0.3]
÷ 0600 × 0308 ÷ 0020 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ 0600 ÷ 000D ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 0600 × 0308 ÷ 000D ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 0600 ÷ 000A ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 0600 × 0308 ÷ 000A ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 0600 ÷ 0001 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ 0600 × 0308 ÷ 0001 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ 0600 × 0300 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3]
÷ 0600 × 0308 × 0300 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3]
÷ 0600 × 0600 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
÷ 0600 × 0308 ÷ 0600 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
÷ 0600 × 0903 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 0600 × 0308 × 0903 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 0600 × 1100 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ 0600 × 0308 ÷ 1100 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ 0600 × 1160 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ 0600 × 0308 ÷ 1160 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ 0600 × 11A8 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ 0600 × 0308 ÷ 11A8 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ 0600 × AC00 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 0600 × 0308 ÷ AC00 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 0600 × AC01 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 0600 × 0308 ÷ AC01 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 0600 × 1F1E6 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
÷ 0600 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
÷ 0600 × 261D ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] WHITE UP POINTING INDEX (E_Base) ÷ [0.3]
÷ 0600 × 0308 ÷ 261D ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] WHITE UP POINTING INDEX (E_Base) ÷ [0.3]
÷ 0600 × 1F3FB ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) ÷ [0.3]
÷ 0600 × 0308 ÷ 1F3FB ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) ÷ [0.3]
÷ 0600 × 200D ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3]
÷ 0600 × 0308 × 200D ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3]
÷ 0600 × 2764 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [0.3]
÷ 0600 × 0308 ÷ 2764 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [0.3]
÷ 0600 × 1F466 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] BOY (EBG) ÷ [0.3]
÷ 0600 × 0308 ÷ 1F466 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] BOY (EBG) ÷ [0.3]
÷ 0600 × 0378 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] <reserved-0378> (Other) ÷ [0.3]
÷ 0600 × 0308 ÷ 0378 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
÷ 0600 ÷ D800 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) ÷ [5.0] <surrogate-D800> (Control) ÷ [0.3]
÷ 0600 × 0308 ÷ D800 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <surrogate-D800> (Control) ÷ [0.3]
÷ 0903 ÷ 0020 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ 0903 × 0308 ÷ 0020 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ 0903 ÷ 000D ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
@ -171,6 +271,8 @@
÷ 0903 × 0308 ÷ 0001 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ 0903 × 0300 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3]
÷ 0903 × 0308 × 0300 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3]
÷ 0903 ÷ 0600 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
÷ 0903 × 0308 ÷ 0600 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
÷ 0903 × 0903 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 0903 × 0308 × 0903 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 0903 ÷ 1100 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
@ -183,8 +285,18 @@
÷ 0903 × 0308 ÷ AC00 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 0903 ÷ AC01 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 0903 × 0308 ÷ AC01 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 0903 ÷ 1F1E6 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3]
÷ 0903 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3]
÷ 0903 ÷ 1F1E6 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
÷ 0903 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
÷ 0903 ÷ 261D ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] WHITE UP POINTING INDEX (E_Base) ÷ [0.3]
÷ 0903 × 0308 ÷ 261D ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] WHITE UP POINTING INDEX (E_Base) ÷ [0.3]
÷ 0903 ÷ 1F3FB ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) ÷ [0.3]
÷ 0903 × 0308 ÷ 1F3FB ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) ÷ [0.3]
÷ 0903 × 200D ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3]
÷ 0903 × 0308 × 200D ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3]
÷ 0903 ÷ 2764 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [0.3]
÷ 0903 × 0308 ÷ 2764 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [0.3]
÷ 0903 ÷ 1F466 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] BOY (EBG) ÷ [0.3]
÷ 0903 × 0308 ÷ 1F466 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] BOY (EBG) ÷ [0.3]
÷ 0903 ÷ 0378 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
÷ 0903 × 0308 ÷ 0378 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
÷ 0903 ÷ D800 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [5.0] <surrogate-D800> (Control) ÷ [0.3]
@ -199,6 +311,8 @@
÷ 1100 × 0308 ÷ 0001 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ 1100 × 0300 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3]
÷ 1100 × 0308 × 0300 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3]
÷ 1100 ÷ 0600 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
÷ 1100 × 0308 ÷ 0600 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
÷ 1100 × 0903 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 1100 × 0308 × 0903 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 1100 × 1100 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [6.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
@ -211,8 +325,18 @@
÷ 1100 × 0308 ÷ AC00 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 1100 × AC01 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [6.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 1100 × 0308 ÷ AC01 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 1100 ÷ 1F1E6 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3]
÷ 1100 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3]
÷ 1100 ÷ 1F1E6 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
÷ 1100 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
÷ 1100 ÷ 261D ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [999.0] WHITE UP POINTING INDEX (E_Base) ÷ [0.3]
÷ 1100 × 0308 ÷ 261D ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] WHITE UP POINTING INDEX (E_Base) ÷ [0.3]
÷ 1100 ÷ 1F3FB ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [999.0] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) ÷ [0.3]
÷ 1100 × 0308 ÷ 1F3FB ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) ÷ [0.3]
÷ 1100 × 200D ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3]
÷ 1100 × 0308 × 200D ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3]
÷ 1100 ÷ 2764 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [999.0] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [0.3]
÷ 1100 × 0308 ÷ 2764 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [0.3]
÷ 1100 ÷ 1F466 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [999.0] BOY (EBG) ÷ [0.3]
÷ 1100 × 0308 ÷ 1F466 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] BOY (EBG) ÷ [0.3]
÷ 1100 ÷ 0378 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
÷ 1100 × 0308 ÷ 0378 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
÷ 1100 ÷ D800 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [5.0] <surrogate-D800> (Control) ÷ [0.3]
@ -227,6 +351,8 @@
÷ 1160 × 0308 ÷ 0001 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ 1160 × 0300 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3]
÷ 1160 × 0308 × 0300 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3]
÷ 1160 ÷ 0600 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
÷ 1160 × 0308 ÷ 0600 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
÷ 1160 × 0903 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 1160 × 0308 × 0903 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 1160 ÷ 1100 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
@ -239,8 +365,18 @@
÷ 1160 × 0308 ÷ AC00 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 1160 ÷ AC01 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 1160 × 0308 ÷ AC01 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 1160 ÷ 1F1E6 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3]
÷ 1160 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3]
÷ 1160 ÷ 1F1E6 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
÷ 1160 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
÷ 1160 ÷ 261D ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] WHITE UP POINTING INDEX (E_Base) ÷ [0.3]
÷ 1160 × 0308 ÷ 261D ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] WHITE UP POINTING INDEX (E_Base) ÷ [0.3]
÷ 1160 ÷ 1F3FB ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) ÷ [0.3]
÷ 1160 × 0308 ÷ 1F3FB ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) ÷ [0.3]
÷ 1160 × 200D ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3]
÷ 1160 × 0308 × 200D ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3]
÷ 1160 ÷ 2764 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [0.3]
÷ 1160 × 0308 ÷ 2764 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [0.3]
÷ 1160 ÷ 1F466 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] BOY (EBG) ÷ [0.3]
÷ 1160 × 0308 ÷ 1F466 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] BOY (EBG) ÷ [0.3]
÷ 1160 ÷ 0378 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
÷ 1160 × 0308 ÷ 0378 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
÷ 1160 ÷ D800 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [5.0] <surrogate-D800> (Control) ÷ [0.3]
@ -255,6 +391,8 @@
÷ 11A8 × 0308 ÷ 0001 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ 11A8 × 0300 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3]
÷ 11A8 × 0308 × 0300 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3]
÷ 11A8 ÷ 0600 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
÷ 11A8 × 0308 ÷ 0600 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
÷ 11A8 × 0903 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 11A8 × 0308 × 0903 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 11A8 ÷ 1100 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
@ -267,8 +405,18 @@
÷ 11A8 × 0308 ÷ AC00 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 11A8 ÷ AC01 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 11A8 × 0308 ÷ AC01 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 11A8 ÷ 1F1E6 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3]
÷ 11A8 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3]
÷ 11A8 ÷ 1F1E6 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
÷ 11A8 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
÷ 11A8 ÷ 261D ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] WHITE UP POINTING INDEX (E_Base) ÷ [0.3]
÷ 11A8 × 0308 ÷ 261D ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] WHITE UP POINTING INDEX (E_Base) ÷ [0.3]
÷ 11A8 ÷ 1F3FB ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) ÷ [0.3]
÷ 11A8 × 0308 ÷ 1F3FB ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) ÷ [0.3]
÷ 11A8 × 200D ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3]
÷ 11A8 × 0308 × 200D ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3]
÷ 11A8 ÷ 2764 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [0.3]
÷ 11A8 × 0308 ÷ 2764 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [0.3]
÷ 11A8 ÷ 1F466 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] BOY (EBG) ÷ [0.3]
÷ 11A8 × 0308 ÷ 1F466 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] BOY (EBG) ÷ [0.3]
÷ 11A8 ÷ 0378 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
÷ 11A8 × 0308 ÷ 0378 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
÷ 11A8 ÷ D800 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [5.0] <surrogate-D800> (Control) ÷ [0.3]
@ -283,6 +431,8 @@
÷ AC00 × 0308 ÷ 0001 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ AC00 × 0300 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3]
÷ AC00 × 0308 × 0300 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3]
÷ AC00 ÷ 0600 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
÷ AC00 × 0308 ÷ 0600 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
÷ AC00 × 0903 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ AC00 × 0308 × 0903 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ AC00 ÷ 1100 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
@ -295,8 +445,18 @@
÷ AC00 × 0308 ÷ AC00 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ AC00 ÷ AC01 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ AC00 × 0308 ÷ AC01 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ AC00 ÷ 1F1E6 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3]
÷ AC00 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3]
÷ AC00 ÷ 1F1E6 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
÷ AC00 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
÷ AC00 ÷ 261D ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] WHITE UP POINTING INDEX (E_Base) ÷ [0.3]
÷ AC00 × 0308 ÷ 261D ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] WHITE UP POINTING INDEX (E_Base) ÷ [0.3]
÷ AC00 ÷ 1F3FB ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) ÷ [0.3]
÷ AC00 × 0308 ÷ 1F3FB ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) ÷ [0.3]
÷ AC00 × 200D ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3]
÷ AC00 × 0308 × 200D ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3]
÷ AC00 ÷ 2764 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [0.3]
÷ AC00 × 0308 ÷ 2764 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [0.3]
÷ AC00 ÷ 1F466 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] BOY (EBG) ÷ [0.3]
÷ AC00 × 0308 ÷ 1F466 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] BOY (EBG) ÷ [0.3]
÷ AC00 ÷ 0378 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
÷ AC00 × 0308 ÷ 0378 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
÷ AC00 ÷ D800 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [5.0] <surrogate-D800> (Control) ÷ [0.3]
@ -311,6 +471,8 @@
÷ AC01 × 0308 ÷ 0001 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ AC01 × 0300 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3]
÷ AC01 × 0308 × 0300 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3]
÷ AC01 ÷ 0600 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
÷ AC01 × 0308 ÷ 0600 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
÷ AC01 × 0903 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ AC01 × 0308 × 0903 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ AC01 ÷ 1100 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
@ -323,40 +485,262 @@
÷ AC01 × 0308 ÷ AC00 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ AC01 ÷ AC01 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ AC01 × 0308 ÷ AC01 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ AC01 ÷ 1F1E6 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3]
÷ AC01 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3]
÷ AC01 ÷ 1F1E6 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
÷ AC01 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
÷ AC01 ÷ 261D ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] WHITE UP POINTING INDEX (E_Base) ÷ [0.3]
÷ AC01 × 0308 ÷ 261D ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] WHITE UP POINTING INDEX (E_Base) ÷ [0.3]
÷ AC01 ÷ 1F3FB ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) ÷ [0.3]
÷ AC01 × 0308 ÷ 1F3FB ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) ÷ [0.3]
÷ AC01 × 200D ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3]
÷ AC01 × 0308 × 200D ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3]
÷ AC01 ÷ 2764 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [0.3]
÷ AC01 × 0308 ÷ 2764 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [0.3]
÷ AC01 ÷ 1F466 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] BOY (EBG) ÷ [0.3]
÷ AC01 × 0308 ÷ 1F466 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] BOY (EBG) ÷ [0.3]
÷ AC01 ÷ 0378 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
÷ AC01 × 0308 ÷ 0378 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
÷ AC01 ÷ D800 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [5.0] <surrogate-D800> (Control) ÷ [0.3]
÷ AC01 × 0308 ÷ D800 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <surrogate-D800> (Control) ÷ [0.3]
÷ 1F1E6 ÷ 0020 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ 1F1E6 × 0308 ÷ 0020 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ 1F1E6 ÷ 000D ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 1F1E6 × 0308 ÷ 000D ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 1F1E6 ÷ 000A ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 1F1E6 × 0308 ÷ 000A ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 1F1E6 ÷ 0001 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ 1F1E6 × 0308 ÷ 0001 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ 1F1E6 × 0300 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3]
÷ 1F1E6 × 0308 × 0300 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) × [9.0] COMBINING DIAERESIS (Extend) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3]
÷ 1F1E6 × 0903 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 1F1E6 × 0308 × 0903 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) × [9.0] COMBINING DIAERESIS (Extend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 1F1E6 ÷ 1100 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ 1F1E6 × 0308 ÷ 1100 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ 1F1E6 ÷ 1160 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ 1F1E6 × 0308 ÷ 1160 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ 1F1E6 ÷ 11A8 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ 1F1E6 × 0308 ÷ 11A8 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ 1F1E6 ÷ AC00 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 1F1E6 × 0308 ÷ AC00 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 1F1E6 ÷ AC01 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 1F1E6 × 0308 ÷ AC01 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 1F1E6 × 1F1E6 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) × [8.1] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3]
÷ 1F1E6 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3]
÷ 1F1E6 ÷ 0378 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
÷ 1F1E6 × 0308 ÷ 0378 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
÷ 1F1E6 ÷ D800 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [5.0] <surrogate-D800> (Control) ÷ [0.3]
÷ 1F1E6 × 0308 ÷ D800 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <surrogate-D800> (Control) ÷ [0.3]
÷ 1F1E6 ÷ 0020 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ 1F1E6 × 0308 ÷ 0020 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ 1F1E6 ÷ 000D ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 1F1E6 × 0308 ÷ 000D ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 1F1E6 ÷ 000A ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 1F1E6 × 0308 ÷ 000A ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 1F1E6 ÷ 0001 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ 1F1E6 × 0308 ÷ 0001 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ 1F1E6 × 0300 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3]
÷ 1F1E6 × 0308 × 0300 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3]
÷ 1F1E6 ÷ 0600 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
÷ 1F1E6 × 0308 ÷ 0600 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
÷ 1F1E6 × 0903 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 1F1E6 × 0308 × 0903 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 1F1E6 ÷ 1100 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ 1F1E6 × 0308 ÷ 1100 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ 1F1E6 ÷ 1160 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ 1F1E6 × 0308 ÷ 1160 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ 1F1E6 ÷ 11A8 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ 1F1E6 × 0308 ÷ 11A8 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ 1F1E6 ÷ AC00 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 1F1E6 × 0308 ÷ AC00 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 1F1E6 ÷ AC01 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 1F1E6 × 0308 ÷ AC01 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 1F1E6 × 1F1E6 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [8.11] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
÷ 1F1E6 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
÷ 1F1E6 ÷ 261D ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] WHITE UP POINTING INDEX (E_Base) ÷ [0.3]
÷ 1F1E6 × 0308 ÷ 261D ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] WHITE UP POINTING INDEX (E_Base) ÷ [0.3]
÷ 1F1E6 ÷ 1F3FB ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) ÷ [0.3]
÷ 1F1E6 × 0308 ÷ 1F3FB ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) ÷ [0.3]
÷ 1F1E6 × 200D ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3]
÷ 1F1E6 × 0308 × 200D ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3]
÷ 1F1E6 ÷ 2764 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [0.3]
÷ 1F1E6 × 0308 ÷ 2764 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [0.3]
÷ 1F1E6 ÷ 1F466 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] BOY (EBG) ÷ [0.3]
÷ 1F1E6 × 0308 ÷ 1F466 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] BOY (EBG) ÷ [0.3]
÷ 1F1E6 ÷ 0378 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
÷ 1F1E6 × 0308 ÷ 0378 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
÷ 1F1E6 ÷ D800 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [5.0] <surrogate-D800> (Control) ÷ [0.3]
÷ 1F1E6 × 0308 ÷ D800 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <surrogate-D800> (Control) ÷ [0.3]
÷ 261D ÷ 0020 ÷ # ÷ [0.2] WHITE UP POINTING INDEX (E_Base) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ 261D × 0308 ÷ 0020 ÷ # ÷ [0.2] WHITE UP POINTING INDEX (E_Base) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ 261D ÷ 000D ÷ # ÷ [0.2] WHITE UP POINTING INDEX (E_Base) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 261D × 0308 ÷ 000D ÷ # ÷ [0.2] WHITE UP POINTING INDEX (E_Base) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 261D ÷ 000A ÷ # ÷ [0.2] WHITE UP POINTING INDEX (E_Base) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 261D × 0308 ÷ 000A ÷ # ÷ [0.2] WHITE UP POINTING INDEX (E_Base) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 261D ÷ 0001 ÷ # ÷ [0.2] WHITE UP POINTING INDEX (E_Base) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ 261D × 0308 ÷ 0001 ÷ # ÷ [0.2] WHITE UP POINTING INDEX (E_Base) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ 261D × 0300 ÷ # ÷ [0.2] WHITE UP POINTING INDEX (E_Base) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3]
÷ 261D × 0308 × 0300 ÷ # ÷ [0.2] WHITE UP POINTING INDEX (E_Base) × [9.0] COMBINING DIAERESIS (Extend) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3]
÷ 261D ÷ 0600 ÷ # ÷ [0.2] WHITE UP POINTING INDEX (E_Base) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
÷ 261D × 0308 ÷ 0600 ÷ # ÷ [0.2] WHITE UP POINTING INDEX (E_Base) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
÷ 261D × 0903 ÷ # ÷ [0.2] WHITE UP POINTING INDEX (E_Base) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 261D × 0308 × 0903 ÷ # ÷ [0.2] WHITE UP POINTING INDEX (E_Base) × [9.0] COMBINING DIAERESIS (Extend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 261D ÷ 1100 ÷ # ÷ [0.2] WHITE UP POINTING INDEX (E_Base) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ 261D × 0308 ÷ 1100 ÷ # ÷ [0.2] WHITE UP POINTING INDEX (E_Base) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ 261D ÷ 1160 ÷ # ÷ [0.2] WHITE UP POINTING INDEX (E_Base) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ 261D × 0308 ÷ 1160 ÷ # ÷ [0.2] WHITE UP POINTING INDEX (E_Base) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ 261D ÷ 11A8 ÷ # ÷ [0.2] WHITE UP POINTING INDEX (E_Base) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ 261D × 0308 ÷ 11A8 ÷ # ÷ [0.2] WHITE UP POINTING INDEX (E_Base) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ 261D ÷ AC00 ÷ # ÷ [0.2] WHITE UP POINTING INDEX (E_Base) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 261D × 0308 ÷ AC00 ÷ # ÷ [0.2] WHITE UP POINTING INDEX (E_Base) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 261D ÷ AC01 ÷ # ÷ [0.2] WHITE UP POINTING INDEX (E_Base) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 261D × 0308 ÷ AC01 ÷ # ÷ [0.2] WHITE UP POINTING INDEX (E_Base) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 261D ÷ 1F1E6 ÷ # ÷ [0.2] WHITE UP POINTING INDEX (E_Base) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
÷ 261D × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] WHITE UP POINTING INDEX (E_Base) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
÷ 261D ÷ 261D ÷ # ÷ [0.2] WHITE UP POINTING INDEX (E_Base) ÷ [999.0] WHITE UP POINTING INDEX (E_Base) ÷ [0.3]
÷ 261D × 0308 ÷ 261D ÷ # ÷ [0.2] WHITE UP POINTING INDEX (E_Base) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] WHITE UP POINTING INDEX (E_Base) ÷ [0.3]
÷ 261D × 1F3FB ÷ # ÷ [0.2] WHITE UP POINTING INDEX (E_Base) × [9.3] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) ÷ [0.3]
÷ 261D × 0308 ÷ 1F3FB ÷ # ÷ [0.2] WHITE UP POINTING INDEX (E_Base) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) ÷ [0.3]
÷ 261D × 200D ÷ # ÷ [0.2] WHITE UP POINTING INDEX (E_Base) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3]
÷ 261D × 0308 × 200D ÷ # ÷ [0.2] WHITE UP POINTING INDEX (E_Base) × [9.0] COMBINING DIAERESIS (Extend) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3]
÷ 261D ÷ 2764 ÷ # ÷ [0.2] WHITE UP POINTING INDEX (E_Base) ÷ [999.0] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [0.3]
÷ 261D × 0308 ÷ 2764 ÷ # ÷ [0.2] WHITE UP POINTING INDEX (E_Base) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [0.3]
÷ 261D ÷ 1F466 ÷ # ÷ [0.2] WHITE UP POINTING INDEX (E_Base) ÷ [999.0] BOY (EBG) ÷ [0.3]
÷ 261D × 0308 ÷ 1F466 ÷ # ÷ [0.2] WHITE UP POINTING INDEX (E_Base) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] BOY (EBG) ÷ [0.3]
÷ 261D ÷ 0378 ÷ # ÷ [0.2] WHITE UP POINTING INDEX (E_Base) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
÷ 261D × 0308 ÷ 0378 ÷ # ÷ [0.2] WHITE UP POINTING INDEX (E_Base) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
÷ 261D ÷ D800 ÷ # ÷ [0.2] WHITE UP POINTING INDEX (E_Base) ÷ [5.0] <surrogate-D800> (Control) ÷ [0.3]
÷ 261D × 0308 ÷ D800 ÷ # ÷ [0.2] WHITE UP POINTING INDEX (E_Base) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <surrogate-D800> (Control) ÷ [0.3]
÷ 1F3FB ÷ 0020 ÷ # ÷ [0.2] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ 1F3FB × 0308 ÷ 0020 ÷ # ÷ [0.2] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ 1F3FB ÷ 000D ÷ # ÷ [0.2] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 1F3FB × 0308 ÷ 000D ÷ # ÷ [0.2] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 1F3FB ÷ 000A ÷ # ÷ [0.2] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 1F3FB × 0308 ÷ 000A ÷ # ÷ [0.2] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 1F3FB ÷ 0001 ÷ # ÷ [0.2] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ 1F3FB × 0308 ÷ 0001 ÷ # ÷ [0.2] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ 1F3FB × 0300 ÷ # ÷ [0.2] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3]
÷ 1F3FB × 0308 × 0300 ÷ # ÷ [0.2] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) × [9.0] COMBINING DIAERESIS (Extend) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3]
÷ 1F3FB ÷ 0600 ÷ # ÷ [0.2] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
÷ 1F3FB × 0308 ÷ 0600 ÷ # ÷ [0.2] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
÷ 1F3FB × 0903 ÷ # ÷ [0.2] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 1F3FB × 0308 × 0903 ÷ # ÷ [0.2] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) × [9.0] COMBINING DIAERESIS (Extend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 1F3FB ÷ 1100 ÷ # ÷ [0.2] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ 1F3FB × 0308 ÷ 1100 ÷ # ÷ [0.2] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ 1F3FB ÷ 1160 ÷ # ÷ [0.2] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ 1F3FB × 0308 ÷ 1160 ÷ # ÷ [0.2] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ 1F3FB ÷ 11A8 ÷ # ÷ [0.2] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ 1F3FB × 0308 ÷ 11A8 ÷ # ÷ [0.2] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ 1F3FB ÷ AC00 ÷ # ÷ [0.2] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 1F3FB × 0308 ÷ AC00 ÷ # ÷ [0.2] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 1F3FB ÷ AC01 ÷ # ÷ [0.2] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 1F3FB × 0308 ÷ AC01 ÷ # ÷ [0.2] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 1F3FB ÷ 1F1E6 ÷ # ÷ [0.2] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
÷ 1F3FB × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
÷ 1F3FB ÷ 261D ÷ # ÷ [0.2] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) ÷ [999.0] WHITE UP POINTING INDEX (E_Base) ÷ [0.3]
÷ 1F3FB × 0308 ÷ 261D ÷ # ÷ [0.2] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] WHITE UP POINTING INDEX (E_Base) ÷ [0.3]
÷ 1F3FB ÷ 1F3FB ÷ # ÷ [0.2] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) ÷ [999.0] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) ÷ [0.3]
÷ 1F3FB × 0308 ÷ 1F3FB ÷ # ÷ [0.2] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) ÷ [0.3]
÷ 1F3FB × 200D ÷ # ÷ [0.2] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3]
÷ 1F3FB × 0308 × 200D ÷ # ÷ [0.2] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) × [9.0] COMBINING DIAERESIS (Extend) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3]
÷ 1F3FB ÷ 2764 ÷ # ÷ [0.2] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) ÷ [999.0] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [0.3]
÷ 1F3FB × 0308 ÷ 2764 ÷ # ÷ [0.2] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [0.3]
÷ 1F3FB ÷ 1F466 ÷ # ÷ [0.2] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) ÷ [999.0] BOY (EBG) ÷ [0.3]
÷ 1F3FB × 0308 ÷ 1F466 ÷ # ÷ [0.2] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] BOY (EBG) ÷ [0.3]
÷ 1F3FB ÷ 0378 ÷ # ÷ [0.2] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
÷ 1F3FB × 0308 ÷ 0378 ÷ # ÷ [0.2] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
÷ 1F3FB ÷ D800 ÷ # ÷ [0.2] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) ÷ [5.0] <surrogate-D800> (Control) ÷ [0.3]
÷ 1F3FB × 0308 ÷ D800 ÷ # ÷ [0.2] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <surrogate-D800> (Control) ÷ [0.3]
÷ 200D ÷ 0020 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ 200D × 0308 ÷ 0020 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ 200D ÷ 000D ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 200D × 0308 ÷ 000D ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 200D ÷ 000A ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 200D × 0308 ÷ 000A ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 200D ÷ 0001 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ 200D × 0308 ÷ 0001 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ 200D × 0300 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3]
÷ 200D × 0308 × 0300 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) × [9.0] COMBINING DIAERESIS (Extend) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3]
÷ 200D ÷ 0600 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
÷ 200D × 0308 ÷ 0600 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
÷ 200D × 0903 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 200D × 0308 × 0903 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) × [9.0] COMBINING DIAERESIS (Extend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 200D ÷ 1100 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ 200D × 0308 ÷ 1100 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ 200D ÷ 1160 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ 200D × 0308 ÷ 1160 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ 200D ÷ 11A8 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ 200D × 0308 ÷ 11A8 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ 200D ÷ AC00 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 200D × 0308 ÷ AC00 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 200D ÷ AC01 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 200D × 0308 ÷ AC01 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 200D ÷ 1F1E6 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
÷ 200D × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
÷ 200D ÷ 261D ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) ÷ [999.0] WHITE UP POINTING INDEX (E_Base) ÷ [0.3]
÷ 200D × 0308 ÷ 261D ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] WHITE UP POINTING INDEX (E_Base) ÷ [0.3]
÷ 200D ÷ 1F3FB ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) ÷ [999.0] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) ÷ [0.3]
÷ 200D × 0308 ÷ 1F3FB ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) ÷ [0.3]
÷ 200D × 200D ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3]
÷ 200D × 0308 × 200D ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) × [9.0] COMBINING DIAERESIS (Extend) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3]
÷ 200D × 2764 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) × [9.4] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [0.3]
÷ 200D × 0308 ÷ 2764 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [0.3]
÷ 200D × 1F466 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) × [9.4] BOY (EBG) ÷ [0.3]
÷ 200D × 0308 ÷ 1F466 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] BOY (EBG) ÷ [0.3]
÷ 200D ÷ 0378 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
÷ 200D × 0308 ÷ 0378 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
÷ 200D ÷ D800 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) ÷ [5.0] <surrogate-D800> (Control) ÷ [0.3]
÷ 200D × 0308 ÷ D800 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <surrogate-D800> (Control) ÷ [0.3]
÷ 2764 ÷ 0020 ÷ # ÷ [0.2] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ 2764 × 0308 ÷ 0020 ÷ # ÷ [0.2] HEAVY BLACK HEART (Glue_After_Zwj) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ 2764 ÷ 000D ÷ # ÷ [0.2] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 2764 × 0308 ÷ 000D ÷ # ÷ [0.2] HEAVY BLACK HEART (Glue_After_Zwj) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 2764 ÷ 000A ÷ # ÷ [0.2] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 2764 × 0308 ÷ 000A ÷ # ÷ [0.2] HEAVY BLACK HEART (Glue_After_Zwj) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 2764 ÷ 0001 ÷ # ÷ [0.2] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ 2764 × 0308 ÷ 0001 ÷ # ÷ [0.2] HEAVY BLACK HEART (Glue_After_Zwj) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ 2764 × 0300 ÷ # ÷ [0.2] HEAVY BLACK HEART (Glue_After_Zwj) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3]
÷ 2764 × 0308 × 0300 ÷ # ÷ [0.2] HEAVY BLACK HEART (Glue_After_Zwj) × [9.0] COMBINING DIAERESIS (Extend) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3]
÷ 2764 ÷ 0600 ÷ # ÷ [0.2] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
÷ 2764 × 0308 ÷ 0600 ÷ # ÷ [0.2] HEAVY BLACK HEART (Glue_After_Zwj) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
÷ 2764 × 0903 ÷ # ÷ [0.2] HEAVY BLACK HEART (Glue_After_Zwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 2764 × 0308 × 0903 ÷ # ÷ [0.2] HEAVY BLACK HEART (Glue_After_Zwj) × [9.0] COMBINING DIAERESIS (Extend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 2764 ÷ 1100 ÷ # ÷ [0.2] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ 2764 × 0308 ÷ 1100 ÷ # ÷ [0.2] HEAVY BLACK HEART (Glue_After_Zwj) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ 2764 ÷ 1160 ÷ # ÷ [0.2] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ 2764 × 0308 ÷ 1160 ÷ # ÷ [0.2] HEAVY BLACK HEART (Glue_After_Zwj) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ 2764 ÷ 11A8 ÷ # ÷ [0.2] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ 2764 × 0308 ÷ 11A8 ÷ # ÷ [0.2] HEAVY BLACK HEART (Glue_After_Zwj) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ 2764 ÷ AC00 ÷ # ÷ [0.2] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 2764 × 0308 ÷ AC00 ÷ # ÷ [0.2] HEAVY BLACK HEART (Glue_After_Zwj) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 2764 ÷ AC01 ÷ # ÷ [0.2] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 2764 × 0308 ÷ AC01 ÷ # ÷ [0.2] HEAVY BLACK HEART (Glue_After_Zwj) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 2764 ÷ 1F1E6 ÷ # ÷ [0.2] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
÷ 2764 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] HEAVY BLACK HEART (Glue_After_Zwj) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
÷ 2764 ÷ 261D ÷ # ÷ [0.2] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [999.0] WHITE UP POINTING INDEX (E_Base) ÷ [0.3]
÷ 2764 × 0308 ÷ 261D ÷ # ÷ [0.2] HEAVY BLACK HEART (Glue_After_Zwj) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] WHITE UP POINTING INDEX (E_Base) ÷ [0.3]
÷ 2764 ÷ 1F3FB ÷ # ÷ [0.2] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [999.0] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) ÷ [0.3]
÷ 2764 × 0308 ÷ 1F3FB ÷ # ÷ [0.2] HEAVY BLACK HEART (Glue_After_Zwj) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) ÷ [0.3]
÷ 2764 × 200D ÷ # ÷ [0.2] HEAVY BLACK HEART (Glue_After_Zwj) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3]
÷ 2764 × 0308 × 200D ÷ # ÷ [0.2] HEAVY BLACK HEART (Glue_After_Zwj) × [9.0] COMBINING DIAERESIS (Extend) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3]
÷ 2764 ÷ 2764 ÷ # ÷ [0.2] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [999.0] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [0.3]
÷ 2764 × 0308 ÷ 2764 ÷ # ÷ [0.2] HEAVY BLACK HEART (Glue_After_Zwj) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [0.3]
÷ 2764 ÷ 1F466 ÷ # ÷ [0.2] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [999.0] BOY (EBG) ÷ [0.3]
÷ 2764 × 0308 ÷ 1F466 ÷ # ÷ [0.2] HEAVY BLACK HEART (Glue_After_Zwj) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] BOY (EBG) ÷ [0.3]
÷ 2764 ÷ 0378 ÷ # ÷ [0.2] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
÷ 2764 × 0308 ÷ 0378 ÷ # ÷ [0.2] HEAVY BLACK HEART (Glue_After_Zwj) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
÷ 2764 ÷ D800 ÷ # ÷ [0.2] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [5.0] <surrogate-D800> (Control) ÷ [0.3]
÷ 2764 × 0308 ÷ D800 ÷ # ÷ [0.2] HEAVY BLACK HEART (Glue_After_Zwj) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <surrogate-D800> (Control) ÷ [0.3]
÷ 1F466 ÷ 0020 ÷ # ÷ [0.2] BOY (EBG) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ 1F466 × 0308 ÷ 0020 ÷ # ÷ [0.2] BOY (EBG) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ 1F466 ÷ 000D ÷ # ÷ [0.2] BOY (EBG) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 1F466 × 0308 ÷ 000D ÷ # ÷ [0.2] BOY (EBG) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 1F466 ÷ 000A ÷ # ÷ [0.2] BOY (EBG) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 1F466 × 0308 ÷ 000A ÷ # ÷ [0.2] BOY (EBG) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 1F466 ÷ 0001 ÷ # ÷ [0.2] BOY (EBG) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ 1F466 × 0308 ÷ 0001 ÷ # ÷ [0.2] BOY (EBG) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ 1F466 × 0300 ÷ # ÷ [0.2] BOY (EBG) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3]
÷ 1F466 × 0308 × 0300 ÷ # ÷ [0.2] BOY (EBG) × [9.0] COMBINING DIAERESIS (Extend) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3]
÷ 1F466 ÷ 0600 ÷ # ÷ [0.2] BOY (EBG) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
÷ 1F466 × 0308 ÷ 0600 ÷ # ÷ [0.2] BOY (EBG) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
÷ 1F466 × 0903 ÷ # ÷ [0.2] BOY (EBG) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 1F466 × 0308 × 0903 ÷ # ÷ [0.2] BOY (EBG) × [9.0] COMBINING DIAERESIS (Extend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 1F466 ÷ 1100 ÷ # ÷ [0.2] BOY (EBG) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ 1F466 × 0308 ÷ 1100 ÷ # ÷ [0.2] BOY (EBG) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ 1F466 ÷ 1160 ÷ # ÷ [0.2] BOY (EBG) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ 1F466 × 0308 ÷ 1160 ÷ # ÷ [0.2] BOY (EBG) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ 1F466 ÷ 11A8 ÷ # ÷ [0.2] BOY (EBG) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ 1F466 × 0308 ÷ 11A8 ÷ # ÷ [0.2] BOY (EBG) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ 1F466 ÷ AC00 ÷ # ÷ [0.2] BOY (EBG) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 1F466 × 0308 ÷ AC00 ÷ # ÷ [0.2] BOY (EBG) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 1F466 ÷ AC01 ÷ # ÷ [0.2] BOY (EBG) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 1F466 × 0308 ÷ AC01 ÷ # ÷ [0.2] BOY (EBG) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 1F466 ÷ 1F1E6 ÷ # ÷ [0.2] BOY (EBG) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
÷ 1F466 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] BOY (EBG) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
÷ 1F466 ÷ 261D ÷ # ÷ [0.2] BOY (EBG) ÷ [999.0] WHITE UP POINTING INDEX (E_Base) ÷ [0.3]
÷ 1F466 × 0308 ÷ 261D ÷ # ÷ [0.2] BOY (EBG) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] WHITE UP POINTING INDEX (E_Base) ÷ [0.3]
÷ 1F466 × 1F3FB ÷ # ÷ [0.2] BOY (EBG) × [9.3] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) ÷ [0.3]
÷ 1F466 × 0308 ÷ 1F3FB ÷ # ÷ [0.2] BOY (EBG) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) ÷ [0.3]
÷ 1F466 × 200D ÷ # ÷ [0.2] BOY (EBG) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3]
÷ 1F466 × 0308 × 200D ÷ # ÷ [0.2] BOY (EBG) × [9.0] COMBINING DIAERESIS (Extend) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3]
÷ 1F466 ÷ 2764 ÷ # ÷ [0.2] BOY (EBG) ÷ [999.0] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [0.3]
÷ 1F466 × 0308 ÷ 2764 ÷ # ÷ [0.2] BOY (EBG) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [0.3]
÷ 1F466 ÷ 1F466 ÷ # ÷ [0.2] BOY (EBG) ÷ [999.0] BOY (EBG) ÷ [0.3]
÷ 1F466 × 0308 ÷ 1F466 ÷ # ÷ [0.2] BOY (EBG) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] BOY (EBG) ÷ [0.3]
÷ 1F466 ÷ 0378 ÷ # ÷ [0.2] BOY (EBG) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
÷ 1F466 × 0308 ÷ 0378 ÷ # ÷ [0.2] BOY (EBG) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
÷ 1F466 ÷ D800 ÷ # ÷ [0.2] BOY (EBG) ÷ [5.0] <surrogate-D800> (Control) ÷ [0.3]
÷ 1F466 × 0308 ÷ D800 ÷ # ÷ [0.2] BOY (EBG) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <surrogate-D800> (Control) ÷ [0.3]
÷ 0378 ÷ 0020 ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ 0378 × 0308 ÷ 0020 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ 0378 ÷ 000D ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
@ -367,6 +751,8 @@
÷ 0378 × 0308 ÷ 0001 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ 0378 × 0300 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3]
÷ 0378 × 0308 × 0300 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3]
÷ 0378 ÷ 0600 ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
÷ 0378 × 0308 ÷ 0600 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
÷ 0378 × 0903 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 0378 × 0308 × 0903 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 0378 ÷ 1100 ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
@ -379,8 +765,18 @@
÷ 0378 × 0308 ÷ AC00 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 0378 ÷ AC01 ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 0378 × 0308 ÷ AC01 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 0378 ÷ 1F1E6 ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3]
÷ 0378 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3]
÷ 0378 ÷ 1F1E6 ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
÷ 0378 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
÷ 0378 ÷ 261D ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [999.0] WHITE UP POINTING INDEX (E_Base) ÷ [0.3]
÷ 0378 × 0308 ÷ 261D ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] WHITE UP POINTING INDEX (E_Base) ÷ [0.3]
÷ 0378 ÷ 1F3FB ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [999.0] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) ÷ [0.3]
÷ 0378 × 0308 ÷ 1F3FB ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) ÷ [0.3]
÷ 0378 × 200D ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3]
÷ 0378 × 0308 × 200D ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3]
÷ 0378 ÷ 2764 ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [999.0] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [0.3]
÷ 0378 × 0308 ÷ 2764 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [0.3]
÷ 0378 ÷ 1F466 ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [999.0] BOY (EBG) ÷ [0.3]
÷ 0378 × 0308 ÷ 1F466 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] BOY (EBG) ÷ [0.3]
÷ 0378 ÷ 0378 ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
÷ 0378 × 0308 ÷ 0378 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
÷ 0378 ÷ D800 ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [5.0] <surrogate-D800> (Control) ÷ [0.3]
@ -395,6 +791,8 @@
÷ D800 ÷ 0308 ÷ 0001 ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ D800 ÷ 0300 ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3]
÷ D800 ÷ 0308 × 0300 ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3]
÷ D800 ÷ 0600 ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
÷ D800 ÷ 0308 ÷ 0600 ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
÷ D800 ÷ 0903 ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ D800 ÷ 0308 × 0903 ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ D800 ÷ 1100 ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
@ -407,23 +805,45 @@
÷ D800 ÷ 0308 ÷ AC00 ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ D800 ÷ AC01 ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ D800 ÷ 0308 ÷ AC01 ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ D800 ÷ 1F1E6 ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3]
÷ D800 ÷ 0308 ÷ 1F1E6 ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3]
÷ D800 ÷ 1F1E6 ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
÷ D800 ÷ 0308 ÷ 1F1E6 ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
÷ D800 ÷ 261D ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] WHITE UP POINTING INDEX (E_Base) ÷ [0.3]
÷ D800 ÷ 0308 ÷ 261D ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] WHITE UP POINTING INDEX (E_Base) ÷ [0.3]
÷ D800 ÷ 1F3FB ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) ÷ [0.3]
÷ D800 ÷ 0308 ÷ 1F3FB ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) ÷ [0.3]
÷ D800 ÷ 200D ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3]
÷ D800 ÷ 0308 × 200D ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3]
÷ D800 ÷ 2764 ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [0.3]
÷ D800 ÷ 0308 ÷ 2764 ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [0.3]
÷ D800 ÷ 1F466 ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] BOY (EBG) ÷ [0.3]
÷ D800 ÷ 0308 ÷ 1F466 ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] BOY (EBG) ÷ [0.3]
÷ D800 ÷ 0378 ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] <reserved-0378> (Other) ÷ [0.3]
÷ D800 ÷ 0308 ÷ 0378 ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
÷ D800 ÷ D800 ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] <surrogate-D800> (Control) ÷ [0.3]
÷ D800 ÷ 0308 ÷ D800 ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <surrogate-D800> (Control) ÷ [0.3]
÷ 0061 ÷ 1F1E6 ÷ 0062 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Other) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [999.0] LATIN SMALL LETTER B (Other) ÷ [0.3]
÷ 1F1F7 × 1F1FA ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER R (Regional_Indicator) × [8.1] REGIONAL INDICATOR SYMBOL LETTER U (Regional_Indicator) ÷ [0.3]
÷ 1F1F7 × 1F1FA ÷ 1F1F8 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER R (Regional_Indicator) × [8.1] REGIONAL INDICATOR SYMBOL LETTER U (Regional_Indicator) × [8.1] REGIONAL INDICATOR SYMBOL LETTER S (Regional_Indicator) ÷ [0.3]
÷ 1F1F7 × 1F1FA ÷ 1F1F8 × 1F1EA ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER R (Regional_Indicator) × [8.1] REGIONAL INDICATOR SYMBOL LETTER U (Regional_Indicator) × [8.1] REGIONAL INDICATOR SYMBOL LETTER S (Regional_Indicator) × [8.1] REGIONAL INDICATOR SYMBOL LETTER E (Regional_Indicator) ÷ [0.3]
÷ 1F1F7 × 1F1FA ÷ 200B ÷ 1F1F8 × 1F1EA ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER R (Regional_Indicator) × [8.1] REGIONAL INDICATOR SYMBOL LETTER U (Regional_Indicator) ÷ [5.0] ZERO WIDTH SPACE (Control) ÷ [4.0] REGIONAL INDICATOR SYMBOL LETTER S (Regional_Indicator) × [8.1] REGIONAL INDICATOR SYMBOL LETTER E (Regional_Indicator) ÷ [0.3]
÷ 1F1E6 × 1F1E7 ÷ 1F1E8 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) × [8.1] REGIONAL INDICATOR SYMBOL LETTER B (Regional_Indicator) × [8.1] REGIONAL INDICATOR SYMBOL LETTER C (Regional_Indicator) ÷ [0.3]
÷ 1F1E6 × 200D ÷ 1F1E7 × 1F1E8 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) × [9.0] ZERO WIDTH JOINER (Extend) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER B (Regional_Indicator) × [8.1] REGIONAL INDICATOR SYMBOL LETTER C (Regional_Indicator) ÷ [0.3]
÷ 1F1E6 × 1F1E7 × 200D ÷ 1F1E8 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) × [8.1] REGIONAL INDICATOR SYMBOL LETTER B (Regional_Indicator) × [9.0] ZERO WIDTH JOINER (Extend) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER C (Regional_Indicator) ÷ [0.3]
÷ 0020 × 200D ÷ 0646 ÷ # ÷ [0.2] SPACE (Other) × [9.0] ZERO WIDTH JOINER (Extend) ÷ [999.0] ARABIC LETTER NOON (Other) ÷ [0.3]
÷ 0646 × 200D ÷ 0020 ÷ # ÷ [0.2] ARABIC LETTER NOON (Other) × [9.0] ZERO WIDTH JOINER (Extend) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ 000D × 000A ÷ 0061 ÷ 000A ÷ 0308 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) × [3.0] <LINE FEED (LF)> (LF) ÷ [4.0] LATIN SMALL LETTER A (Other) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [0.3]
÷ 0061 × 0308 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [0.3]
÷ 0020 × 200D ÷ 0646 ÷ # ÷ [0.2] SPACE (Other) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [999.0] ARABIC LETTER NOON (Other) ÷ [0.3]
÷ 0646 × 200D ÷ 0020 ÷ # ÷ [0.2] ARABIC LETTER NOON (Other) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ 1100 × 1100 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [6.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ AC00 × 11A8 ÷ 1100 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [7.0] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ AC01 × 11A8 ÷ 1100 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [8.0] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ 1F1E6 × 1F1E7 ÷ 1F1E8 ÷ 0062 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [8.11] REGIONAL INDICATOR SYMBOL LETTER B (RI) ÷ [8.13] REGIONAL INDICATOR SYMBOL LETTER C (RI) ÷ [999.0] LATIN SMALL LETTER B (Other) ÷ [0.3]
÷ 0061 ÷ 1F1E6 × 1F1E7 ÷ 1F1E8 ÷ 0062 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Other) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [8.12] REGIONAL INDICATOR SYMBOL LETTER B (RI) ÷ [8.13] REGIONAL INDICATOR SYMBOL LETTER C (RI) ÷ [999.0] LATIN SMALL LETTER B (Other) ÷ [0.3]
÷ 0061 ÷ 1F1E6 × 1F1E7 × 200D ÷ 1F1E8 ÷ 0062 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Other) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [8.12] REGIONAL INDICATOR SYMBOL LETTER B (RI) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER C (RI) ÷ [999.0] LATIN SMALL LETTER B (Other) ÷ [0.3]
÷ 0061 ÷ 1F1E6 × 200D ÷ 1F1E7 × 1F1E8 ÷ 0062 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Other) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER B (RI) × [8.12] REGIONAL INDICATOR SYMBOL LETTER C (RI) ÷ [999.0] LATIN SMALL LETTER B (Other) ÷ [0.3]
÷ 0061 ÷ 1F1E6 × 1F1E7 ÷ 1F1E8 × 1F1E9 ÷ 0062 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Other) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [8.12] REGIONAL INDICATOR SYMBOL LETTER B (RI) ÷ [8.13] REGIONAL INDICATOR SYMBOL LETTER C (RI) × [8.12] REGIONAL INDICATOR SYMBOL LETTER D (RI) ÷ [999.0] LATIN SMALL LETTER B (Other) ÷ [0.3]
÷ 0061 × 200D ÷ # ÷ [0.2] LATIN SMALL LETTER A (Other) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3]
÷ 0061 × 0308 ÷ 0062 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] LATIN SMALL LETTER B (Other) ÷ [0.3]
÷ 0061 × 0903 ÷ 0062 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Other) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] LATIN SMALL LETTER B (Other) ÷ [0.3]
÷ 0061 ÷ 0600 × 0062 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Other) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) × [9.2] LATIN SMALL LETTER B (Other) ÷ [0.3]
÷ 261D × 1F3FB ÷ 261D ÷ # ÷ [0.2] WHITE UP POINTING INDEX (E_Base) × [9.3] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) ÷ [999.0] WHITE UP POINTING INDEX (E_Base) ÷ [0.3]
÷ 1F466 × 1F3FB ÷ # ÷ [0.2] BOY (EBG) × [9.3] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) ÷ [0.3]
÷ 200D × 1F466 × 1F3FB ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) × [9.4] BOY (EBG) × [9.3] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) ÷ [0.3]
÷ 200D × 2764 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) × [9.4] HEAVY BLACK HEART (Glue_After_Zwj) ÷ [0.3]
÷ 200D × 1F466 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) × [9.4] BOY (EBG) ÷ [0.3]
÷ 1F466 ÷ 1F466 ÷ # ÷ [0.2] BOY (EBG) ÷ [999.0] BOY (EBG) ÷ [0.3]
#
# Lines: 402
# Lines: 822
#
# EOF

View file

@ -1,4 +1,4 @@
# LineBreakTest-9.0.0.txt
# LineBreakTest-9.0.0.txt
# Date: 2016-03-23, 11:24:44 GMT
# © 2016 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.

View file

@ -1,10 +1,11 @@
# SentenceBreakTest-8.0.0.txt
# Date: 2015-04-30, 09:40:15 GMT [MD]
# SentenceBreakTest-9.0.0.txt
# Date: 2016-03-03, 12:44:04 GMT
# © 2016 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see http://www.unicode.org/terms_of_use.html
#
# Unicode Character Database
# Copyright (c) 1991-2015 Unicode, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html
# For documentation, see http://www.unicode.org/reports/tr44/
# For documentation, see http://www.unicode.org/reports/tr44/
#
# Default Sentence Break Test
#
@ -470,6 +471,10 @@
÷ 0300 × 0308 × 00AD ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [5.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
÷ 0300 × 0300 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
÷ 0300 × 0308 × 0300 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
÷ 000D × 000A ÷ 0061 × 000A ÷ 0308 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) × [3.0] <LINE FEED (LF)> (LF) ÷ [4.0] LATIN SMALL LETTER A (Lower) × [12.0] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [0.3]
÷ 0061 × 0308 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [5.0] COMBINING DIAERESIS (Extend_FE) ÷ [0.3]
÷ 0020 × 200D × 0646 ÷ # ÷ [0.2] SPACE (Sp) × [5.0] ZERO WIDTH JOINER (Extend_FE) × [12.0] ARABIC LETTER NOON (OLetter) ÷ [0.3]
÷ 0646 × 200D × 0020 ÷ # ÷ [0.2] ARABIC LETTER NOON (OLetter) × [5.0] ZERO WIDTH JOINER (Extend_FE) × [12.0] SPACE (Sp) ÷ [0.3]
÷ 0028 × 0022 × 0047 × 006F × 002E × 0022 × 0029 × 0020 ÷ 0028 × 0048 × 0065 × 0020 × 0064 × 0069 × 0064 × 002E × 0029 ÷ # ÷ [0.2] LEFT PARENTHESIS (Close) × [12.0] QUOTATION MARK (Close) × [12.0] LATIN CAPITAL LETTER G (Upper) × [12.0] LATIN SMALL LETTER O (Lower) × [12.0] FULL STOP (ATerm) × [9.0] QUOTATION MARK (Close) × [9.0] RIGHT PARENTHESIS (Close) × [9.0] SPACE (Sp) ÷ [11.0] LEFT PARENTHESIS (Close) × [12.0] LATIN CAPITAL LETTER H (Upper) × [12.0] LATIN SMALL LETTER E (Lower) × [12.0] SPACE (Sp) × [12.0] LATIN SMALL LETTER D (Lower) × [12.0] LATIN SMALL LETTER I (Lower) × [12.0] LATIN SMALL LETTER D (Lower) × [12.0] FULL STOP (ATerm) × [9.0] RIGHT PARENTHESIS (Close) ÷ [0.3]
÷ 0028 × 201C × 0047 × 006F × 003F × 201D × 0029 × 0020 ÷ 0028 × 0048 × 0065 × 0020 × 0064 × 0069 × 0064 × 002E × 0029 ÷ # ÷ [0.2] LEFT PARENTHESIS (Close) × [12.0] LEFT DOUBLE QUOTATION MARK (Close) × [12.0] LATIN CAPITAL LETTER G (Upper) × [12.0] LATIN SMALL LETTER O (Lower) × [12.0] QUESTION MARK (STerm) × [9.0] RIGHT DOUBLE QUOTATION MARK (Close) × [9.0] RIGHT PARENTHESIS (Close) × [9.0] SPACE (Sp) ÷ [11.0] LEFT PARENTHESIS (Close) × [12.0] LATIN CAPITAL LETTER H (Upper) × [12.0] LATIN SMALL LETTER E (Lower) × [12.0] SPACE (Sp) × [12.0] LATIN SMALL LETTER D (Lower) × [12.0] LATIN SMALL LETTER I (Lower) × [12.0] LATIN SMALL LETTER D (Lower) × [12.0] FULL STOP (ATerm) × [9.0] RIGHT PARENTHESIS (Close) ÷ [0.3]
÷ 0055 × 002E × 0053 × 002E × 0041 × 0300 × 002E × 0020 × 0069 × 0073 ÷ # ÷ [0.2] LATIN CAPITAL LETTER U (Upper) × [12.0] FULL STOP (ATerm) × [7.0] LATIN CAPITAL LETTER S (Upper) × [12.0] FULL STOP (ATerm) × [7.0] LATIN CAPITAL LETTER A (Upper) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) × [12.0] FULL STOP (ATerm) × [8.0] SPACE (Sp) × [8.0] LATIN SMALL LETTER I (Lower) × [12.0] LATIN SMALL LETTER S (Lower) ÷ [0.3]
@ -518,12 +523,7 @@
÷ 2060 × 0065 × 2060 × 0074 × 2060 × 0063 × 2060 × 002E × 2060 × 3002 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) × [12.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [12.0] LATIN SMALL LETTER T (Lower) × [5.0] WORD JOINER (Format_FE) × [12.0] LATIN SMALL LETTER C (Lower) × [5.0] WORD JOINER (Format_FE) × [12.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [8.1] IDEOGRAPHIC FULL STOP (STerm) × [5.0] WORD JOINER (Format_FE) × [5.0] WORD JOINER (Format_FE) ÷ [0.3]
÷ 2060 × 5B57 × 2060 × 3002 × 2060 ÷ 5B83 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) × [12.0] CJK UNIFIED IDEOGRAPH-5B57 (OLetter) × [5.0] WORD JOINER (Format_FE) × [12.0] IDEOGRAPHIC FULL STOP (STerm) × [5.0] WORD JOINER (Format_FE) ÷ [11.0] CJK UNIFIED IDEOGRAPH-5B83 (OLetter) × [5.0] WORD JOINER (Format_FE) × [5.0] WORD JOINER (Format_FE) ÷ [0.3]
÷ 2060 × 0021 × 2060 × 0020 × 2060 × 0020 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) × [12.0] EXCLAMATION MARK (STerm) × [5.0] WORD JOINER (Format_FE) × [9.0] SPACE (Sp) × [5.0] WORD JOINER (Format_FE) × [10.0] SPACE (Sp) × [5.0] WORD JOINER (Format_FE) × [5.0] WORD JOINER (Format_FE) ÷ [0.3]
÷ 1F1E6 × 1F1E7 × 1F1E8 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Other) × [12.0] REGIONAL INDICATOR SYMBOL LETTER B (Other) × [12.0] REGIONAL INDICATOR SYMBOL LETTER C (Other) ÷ [0.3]
÷ 1F1E6 × 200D × 1F1E7 × 1F1E8 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Other) × [5.0] ZERO WIDTH JOINER (Extend_FE) × [12.0] REGIONAL INDICATOR SYMBOL LETTER B (Other) × [12.0] REGIONAL INDICATOR SYMBOL LETTER C (Other) ÷ [0.3]
÷ 1F1E6 × 1F1E7 × 200D × 1F1E8 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Other) × [12.0] REGIONAL INDICATOR SYMBOL LETTER B (Other) × [5.0] ZERO WIDTH JOINER (Extend_FE) × [12.0] REGIONAL INDICATOR SYMBOL LETTER C (Other) ÷ [0.3]
÷ 0020 × 200D × 0646 ÷ # ÷ [0.2] SPACE (Sp) × [5.0] ZERO WIDTH JOINER (Extend_FE) × [12.0] ARABIC LETTER NOON (OLetter) ÷ [0.3]
÷ 0646 × 200D × 0020 ÷ # ÷ [0.2] ARABIC LETTER NOON (OLetter) × [5.0] ZERO WIDTH JOINER (Extend_FE) × [12.0] SPACE (Sp) ÷ [0.3]
#
# Lines: 503
# Lines: 502
#
# EOF

File diff suppressed because it is too large Load diff

View file

@ -12,28 +12,32 @@
type = grapheme; # one of grapheme | word | line | sentence
locale = en;
CR = [\u000d];
LF = [\u000a];
CR = [\p{Grapheme_Cluster_Break = CR}];
LF = [\p{Grapheme_Cluster_Break = LF}];
Control = [[\p{Grapheme_Cluster_Break = Control}]-[:Block=Tags:]];
Extend = [[\p{Grapheme_Cluster_Break = Extend}][:Block=Tags:]];
Control = [[\p{Grapheme_Cluster_Break = Control}]];
Extend = [[\p{Grapheme_Cluster_Break = Extend}]];
ZWJ = [\p{Grapheme_Cluster_Break = ZWJ}];
Regional_Indicator = [\p{Grapheme_Cluster_Break = Regional_Indicator}];
Prepend = [];
Prepend = [\p{Grapheme_Cluster_Break = Prepend}];
SpacingMark = [\p{Grapheme_Cluster_Break = SpacingMark}];
E_Base = [\u261D\u26F9\u270A-\u270D\U0001F385\U0001F3C3-\U0001F3C4\U0001F3CA-\U0001F3CB\U0001F442-\U0001F443\U0001F446-\U0001F450\U0001F466-\U0001F469\U0001F46E\U0001F470-\U0001F478\U0001F47C\U0001F481-\U0001F483\U0001F485-\U0001F487\U0001F4AA\U0001F575\U0001F590\U0001F595-\U0001F596\U0001F645-\U0001F647\U0001F64B-\U0001F64F\U0001F6A3\U0001F6B4-\U0001F6B6\U0001F6C0\U0001F918];
E_Modifier = [\U0001F3FB-\U0001F3FF];
GAZ = [\U0001F466-\U0001F469\U0001F48B\U0001F5E8\u2764];
ZWJ = [\u200D];
#
# Korean Syllable Definitions
#
L = [\p{Grapheme_Cluster_Break = L}];
V = [\p{Grapheme_Cluster_Break = V}];
T = [\p{Grapheme_Cluster_Break = T}];
L = [\p{Grapheme_Cluster_Break = L}];
V = [\p{Grapheme_Cluster_Break = V}];
T = [\p{Grapheme_Cluster_Break = T}];
LV = [\p{Grapheme_Cluster_Break = LV}];
LVT = [\p{Grapheme_Cluster_Break = LVT}];
# Emoji defintions
E_Base = [\p{Grapheme_Cluster_Break = EB}];
E_Modifier = [\p{Grapheme_Cluster_Break = EM}];
GAZ = [\p{Grapheme_Cluster_Break = GAZ}];
E_Base_GAZ = [\p{Grapheme_Cluster_Break = EBG}];
LV = [\p{Grapheme_Cluster_Break = LV}];
LVT = [\p{Grapheme_Cluster_Break = LVT}];
GB3: CR LF;
GB4: (Control | CR | LF) ÷;
@ -50,11 +54,11 @@ GB8: (LVT | T) T;
GB8a.1: Regional_Indicator Regional_Indicator ÷ Regional_Indicator;
GB8a.2: Regional_Indicator Regional_Indicator;
GB9: . Extend;
GB9: . (Extend | ZWJ);
GB9a: . SpacingMark;
GB9b: Prepend .;
GB9c: (E_Base | GAZ) E_Modifier;
GB9d: ZWJ GAZ;
GB10: (E_Base | E_Base_GAZ) E_Modifier;
GB11: ZWJ (GAZ | E_Base_GAZ);
GB10: . ÷;
GB999: . ÷;

View file

@ -14,7 +14,7 @@ locale = en;
AI = [:LineBreak = Ambiguous:];
AL = [[:LineBreak = Alphabetic:]-[\u2764]];
AL = [:LineBreak = Alphabetic:];
BA = [:LineBreak = Break_After:];
BB = [:LineBreak = Break_Before:];
BK = [:LineBreak = Mandatory_Break:];
@ -25,17 +25,15 @@ CL = [:LineBreak = Close_Punctuation:];
CM = [:LineBreak = Combining_Mark:];
CP = [:LineBreak = Close_Parenthesis:];
CR = [:LineBreak = Carriage_Return:];
EB = [\u261D\u26F9\u270A-\u270D\U0001F385\U0001F3C3-\U0001F3C4\U0001F3CA-\U0001F3CB\U0001F442-\U0001F443\U0001F446-\U0001F450\U0001F466-\U0001F469\U0001F46E\U0001F470-\U0001F478\U0001F47C\U0001F481-\U0001F483\U0001F485-\U0001F487\U0001F4AA\U0001F575\U0001F590\U0001F595-\U0001F596\U0001F645-\U0001F647\U0001F64B-\U0001F64F\U0001F6A3\U0001F6B4-\U0001F6B6\U0001F6C0\U0001F918];
EM = [\U0001F3FB-\U0001F3FF];
EB = [:LineBreak = EB:];
EM = [:LineBreak = EM:];
EX = [:LineBreak = Exclamation:];
GL = [:LineBreak = Glue:];
HL = [:LineBreak = Hebrew_Letter:];
HY = [:LineBreak = Hyphen:];
H2 = [:LineBreak = H2:];
H3 = [:LineBreak = H3:];
ID = [[:LineBreak = Ideographic:][\u2764]];
ID = [:LineBreak = Ideographic:];
IN = [:LineBreak = Inseperable:];
IS = [:LineBreak = Infix_Numeric:];
JL = [:LineBreak = JL:];
@ -43,7 +41,7 @@ JV = [:LineBreak = JV:];
JT = [:LineBreak = JT:];
LF = [:LineBreak = Line_Feed:];
NL = [:LineBreak = Next_Line:];
NS = [[:LineBreak = Nonstarter:] CJ];
NS = [[:LineBreak = Nonstarter:] CJ]; # CSS Strict tailoring: CJ resolves to NS.
NU = [:LineBreak = Numeric:];
OP = [:LineBreak = Open_Punctuation:];
PO = [:LineBreak = Postfix_Numeric:];
@ -57,16 +55,15 @@ SY = [:LineBreak = Break_Symbols:];
WJ = [:LineBreak = Word_Joiner:];
XX = [:LineBreak = Unknown:];
ZW = [:LineBreak = ZWSpace:];
ZJ = [\u200D];
ZWJ = [:LineBreak = ZWJ:];
# TODO: adjustment to sets needed only until Unicode properties are updated for Emoji.
ID = [ID - EB];
AL = [AL - EM];
# LB1 - Resolve AI, CB, CJ, SA, SG, and XX into other line breaking classes
AL = [AL AI SG XX ];
dictionary = SA;
dictionary = [:LineBreak = Complex_Context:];
# Redfine AL. LB1. TODO: refine according to latest UAX.
AL = [ AL AI SA SG XX ];
# By LB9, a ZWJ also behaves as a CM. Including it in the definition of CM avoids having to explicitly
# list it in the numerous rules that use CM.
CM = [CM ZWJ];
LB4: BK ÷;
LB5: CR LF;
@ -96,7 +93,7 @@ LB8: ZW ÷;
# LB8a, from Emoji proposal L2/16-011R3
# ZWJ x ID
LB8a: ZJ (ID | EB | EM);
LB8a: ZWJ (ID | EB | EM);
# LB9: X CM -> X
@ -131,11 +128,11 @@ LB19: . CM* QU;
LB19.1: QU CM* [^CM];
# LB 20 Break before and after CB.
# Interaction with LB8a: ZJ x ID is tricky because CM includes ZJ.
# ZJ acts like a CM to the left, combining with CB.
# ZJ acts independently to the right, no break from ID by LB8a.
# Interaction with LB8a: ZWJ x ID is tricky because CM includes ZWJ.
# ZWJ acts like a CM to the left, combining with CB.
# ZWJ acts independently to the right, no break from ID by LB8a.
LB20: . CM* ÷ CB;
LB20.1a: CB CM* ZJ (ID | EB | EM);
LB20.1a: CB CM* ZWJ (ID | EB | EM);
LB20.1b: CB CM* ÷;
# Note: Rule 21a must come before 21 to prevent 21.1 from matching HL BA, then
@ -185,7 +182,7 @@ LB30.2: CP CM* (AL | HL | NU);
# LB31 keep pairs of RI together.
LB30a.1: RI CM* RI CM* [BK CR LF NL SP ZW WJ GL CL CP EX IS SY QU BA HY NS];
LB30a.2: RI CM* RI CM* ZJ (ID | EB | EM);
LB30a.2: RI CM* RI CM* ZWJ (ID | EB | EM);
LB30a.3: RI CM* RI CM* ÷;
# LB30b Do not break between Emoji Base and Emoji Modifier
@ -193,5 +190,5 @@ LB30b: EB CM* EM;
# LB31 Break Everywhere Else.
# Include combining marks
LB31.1: . CM* ZJ (ID | EB | EM);
LB31.1: . CM* ZWJ (ID | EB | EM);
LB31.2: . CM* ÷;

View file

@ -9,7 +9,7 @@
# They are expected to change with review and the addition of support for rule tailoring.
#
# This tailors the line break behavior to correspond to CSS
# line-break=loose (BCP47 -u-lb-loose) as defined for languages other than
# line-break=loose (BCP47 -u-lb-loose) as defined for languages other than
# Chinese & Japanese.
# It sets characters of class CJ to behave like ID.
# In addition, it allows breaks:
@ -21,7 +21,7 @@ locale = en@lb=loose;
AI = [:LineBreak = Ambiguous:];
AL = [[:LineBreak = Alphabetic:]-[\u2764]];
AL = [:LineBreak = Alphabetic:];
BA = [:LineBreak = Break_After:];
BB = [:LineBreak = Break_Before:];
BK = [:LineBreak = Mandatory_Break:];
@ -32,17 +32,15 @@ CL = [:LineBreak = Close_Punctuation:];
CM = [:LineBreak = Combining_Mark:];
CP = [:LineBreak = Close_Parenthesis:];
CR = [:LineBreak = Carriage_Return:];
EB = [\u261D\u26F9\u270A-\u270D\U0001F385\U0001F3C3-\U0001F3C4\U0001F3CA-\U0001F3CB\U0001F442-\U0001F443\U0001F446-\U0001F450\U0001F466-\U0001F469\U0001F46E\U0001F470-\U0001F478\U0001F47C\U0001F481-\U0001F483\U0001F485-\U0001F487\U0001F4AA\U0001F575\U0001F590\U0001F595-\U0001F596\U0001F645-\U0001F647\U0001F64B-\U0001F64F\U0001F6A3\U0001F6B4-\U0001F6B6\U0001F6C0\U0001F918];
EM = [\U0001F3FB-\U0001F3FF];
EB = [:LineBreak = EB:];
EM = [:LineBreak = EM:];
EX = [:LineBreak = Exclamation:];
GL = [:LineBreak = Glue:];
HL = [:LineBreak = Hebrew_Letter:];
HY = [:LineBreak = Hyphen:];
H2 = [:LineBreak = H2:];
H3 = [:LineBreak = H3:];
ID = [[:LineBreak = Ideographic:] CJ [\u2764]];
ID = [[:LineBreak = Ideographic:] CJ]; # CSS Normal tailoring: CJ resolves to ID
IN = [:LineBreak = Inseperable:];
IS = [:LineBreak = Infix_Numeric:];
JL = [:LineBreak = JL:];
@ -65,16 +63,15 @@ SY = [:LineBreak = Break_Symbols:];
WJ = [:LineBreak = Word_Joiner:];
XX = [:LineBreak = Unknown:];
ZW = [:LineBreak = ZWSpace:];
ZJ = [\u200D];
ZWJ = [:LineBreak = ZWJ:];
# TODO: adjustment to sets needed only until Unicode properties are updated for Emoji.
ID = [ID - EB];
AL = [AL - EM];
# LB1 - Resolve AI, CB, CJ, SA, SG, and XX into other line breaking classes
AL = [AL AI SG XX ];
dictionary = SA;
dictionary = [:LineBreak = Complex_Context:];
# Redfine AL. LB1. TODO: refine according to latest UAX.
AL = [ AL AI SA SG XX ];
# By LB9, a ZWJ also behaves as a CM. Including it in the definition of CM avoids having to explicitly
# list it in the numerous rules that use CM.
CM = [CM ZWJ];
LB4: BK ÷;
LB5: CR LF;
@ -104,7 +101,7 @@ LB8: ZW ÷;
# LB8a, from Emoji proposal L2/16-011R3
# ZWJ x ID
LB8a: ZJ (ID | EB | EM);
LB8a: ZWJ (ID | EB | EM);
# LB9: X CM -> X
@ -139,11 +136,11 @@ LB19: . CM* QU;
LB19.1: QU CM* [^CM];
# LB 20 Break before and after CB.
# Interaction with LB8a: ZJ x ID is tricky because CM includes ZJ.
# ZJ acts like a CM to the left, combining with CB.
# ZJ acts independently to the right, no break from ID by LB8a.
# Interaction with LB8a: ZWJ x ID is tricky because CM includes ZWJ.
# ZWJ acts like a CM to the left, combining with CB.
# ZWJ acts independently to the right, no break from ID by LB8a.
LB20: . CM* ÷ CB;
LB20.1a: CB CM* ZJ (ID | EB | EM);
LB20.1a: CB CM* ZWJ (ID | EB | EM);
LB20.1b: CB CM* ÷;
# Note: Rule 21a must come before 21 to prevent 21.1 from matching HL BA, then
@ -193,7 +190,7 @@ LB30.2: CP CM* (AL | HL | NU);
# LB31 keep pairs of RI together.
LB30a.1: RI CM* RI CM* [BK CR LF NL SP ZW WJ GL CL CP EX IS SY QU BA HY NS];
LB30a.2: RI CM* RI CM* ZJ (ID | EB | EM);
LB30a.2: RI CM* RI CM* ZWJ (ID | EB | EM);
LB30a.3: RI CM* RI CM* ÷;
# LB30b Do not break between Emoji Base and Emoji Modifier
@ -201,5 +198,5 @@ LB30b: EB CM* EM;
# LB31 Break Everywhere Else.
# Include combining marks
LB31.1: . CM* ZJ (ID | EB | EM);
LB31.1: . CM* ZWJ (ID | EB | EM);
LB31.2: . CM* ÷;

View file

@ -9,7 +9,7 @@
# They are expected to change with review and the addition of support for rule tailoring.
#
# Line Breaking Rules
# Implement default line breaking as defined by
# Implement default line breaking as defined by
# Unicode Standard Annex #14 Revision 34 for Unicode 8.0
# http://www.unicode.org/reports/tr14/
# tailored as noted in 2nd paragraph below..
@ -34,7 +34,7 @@ locale = ja@lb=loose;
AI = [:LineBreak = Ambiguous:];
AL = [[:LineBreak = Alphabetic:]-[\u2764]];
AL = [[:LineBreak = Alphabetic:]];
BAX = [\u2010 \u2013];
BA = [[:LineBreak = Break_After:] - BAX];
BB = [:LineBreak = Break_Before:];
@ -46,10 +46,8 @@ CL = [:LineBreak = Close_Punctuation:];
CM = [:LineBreak = Combining_Mark:];
CP = [:LineBreak = Close_Parenthesis:];
CR = [:LineBreak = Carriage_Return:];
EB = [\u261D\u26F9\u270A-\u270D\U0001F385\U0001F3C3-\U0001F3C4\U0001F3CA-\U0001F3CB\U0001F442-\U0001F443\U0001F446-\U0001F450\U0001F466-\U0001F469\U0001F46E\U0001F470-\U0001F478\U0001F47C\U0001F481-\U0001F483\U0001F485-\U0001F487\U0001F4AA\U0001F575\U0001F590\U0001F595-\U0001F596\U0001F645-\U0001F647\U0001F64B-\U0001F64F\U0001F6A3\U0001F6B4-\U0001F6B6\U0001F6C0\U0001F918];
EM = [\U0001F3FB-\U0001F3FF];
EB = [:LineBreak = EB:];
EM = [:LineBreak = EM:];
EXX = [\uFF01 \uFF1F];
EX = [[:LineBreak = Exclamation:] - EXX];
GL = [:LineBreak = Glue:];
@ -57,7 +55,7 @@ HL = [:LineBreak = Hebrew_Letter:];
HY = [:LineBreak = Hyphen:];
H2 = [:LineBreak = H2:];
H3 = [:LineBreak = H3:];
ID = [[:LineBreak = Ideographic:][\u2764]CJ];
ID = [[:LineBreak = Ideographic:] CJ]; # CSS Loose tailoring: CJ resolves to ID
IN = [:LineBreak = Inseperable:];
IS = [:LineBreak = Infix_Numeric:];
JL = [:LineBreak = JL:];
@ -82,16 +80,15 @@ SY = [:LineBreak = Break_Symbols:];
WJ = [:LineBreak = Word_Joiner:];
XX = [:LineBreak = Unknown:];
ZW = [:LineBreak = ZWSpace:];
ZJ = [\u200D];
ZWJ = [:LineBreak = ZWJ:];
# TODO: adjustment to sets needed only until Unicode properties are updated for Emoji.
ID = [ID - EB];
AL = [AL - EM];
# LB1 - Resolve AI, CB, CJ, SA, SG, and XX into other line breaking classes
AL = [AL AI SG XX ];
dictionary = SA;
dictionary = [:LineBreak = Complex_Context:];
# Redfine AL. LB1. TODO: refine according to latest UAX.
AL = [ AL AI SA SG XX ];
# By LB9, a ZWJ also behaves as a CM. Including it in the definition of CM avoids having to explicitly
# list it in the numerous rules that use CM.
CM = [CM ZWJ];
LB4: BK ÷;
LB5: CR LF;
@ -121,7 +118,7 @@ LB8: ZW ÷;
# LB8a, from Emoji proposal L2/16-011R3
# ZWJ x ID
LB8a: ZJ (ID | EB | EM);
LB8a: ZWJ (ID | EB | EM);
# LB9: X CM -> X
@ -156,11 +153,11 @@ LB19: . CM* QU;
LB19.1: QU CM* [^CM];
# LB 20 Break before and after CB.
# Interaction with LB8a: ZJ x ID is tricky because CM includes ZJ.
# ZJ acts like a CM to the left, combining with CB.
# ZJ acts independently to the right, no break from ID by LB8a.
# Interaction with LB8a: ZWJ x ID is tricky because CM includes ZWJ.
# ZWJ acts like a CM to the left, combining with CB.
# ZWJ acts independently to the right, no break from ID by LB8a.
LB20: . CM* ÷ CB;
LB20.1a: CB CM* ZJ (ID | EB | EM);
LB20.1a: CB CM* ZWJ (ID | EB | EM);
LB20.1b: CB CM* ÷;
# Note: Rule 21a must come before 21 to prevent 21.1 from matching HL BA, then
@ -214,7 +211,7 @@ LB30.2: CP CM* (AL | HL | NU);
# LB31 keep pairs of RI together.
LB30a.1: RI CM* RI CM* [BK CR LF NL SP ZW WJ GL CL CP EX IS SY QU BA HY NS];
LB30a.2: RI CM* RI CM* ZJ (ID | EB | EM);
LB30a.2: RI CM* RI CM* ZWJ (ID | EB | EM);
LB30a.3: RI CM* RI CM* ÷;
# LB30b Do not break between Emoji Base and Emoji Modifier
@ -222,5 +219,5 @@ LB30b: EB CM* EM;
# LB31 Break Everywhere Else.
# Include combining marks
LB31.1: . CM* ZJ (ID | EB | EM);
LB31.1: . CM* ZWJ (ID | EB | EM);
LB31.2: . CM* ÷;

View file

@ -9,17 +9,17 @@
# They are expected to change with review and the addition of support for rule tailoring.
#
# Line Breaking Rules
# Implement default line breaking as defined by
# Implement default line breaking as defined by
# Unicode Standard Annex #14 Revision 34 for Unicode 8.0
# http://www.unicode.org/reports/tr14/
# tailored as noted in 2nd paragraph below..
# tailored as noted in 2nd paragraph below.
#
# TODO: Rule LB 8 remains as it was in Unicode 5.2
# This is only because of a limitation of ICU break engine implementation,
# not because the older behavior is desirable.
#
# This tailors the line break behavior to correspond to CSS
# line-break=normal (BCP47 -u-lb-normal) as defined for languages other than
# line-break=normal (BCP47 -u-lb-normal) as defined for languages other than
# Chinese & Japanese.
# It sets characters of class CJ to behave like ID.
@ -28,7 +28,7 @@ type = line;
locale = en@lb=normal;
AI = [:LineBreak = Ambiguous:];
AL = [[:LineBreak = Alphabetic:]-[\u2764]];
AL = [:LineBreak = Alphabetic:];
BA = [:LineBreak = Break_After:];
BB = [:LineBreak = Break_Before:];
BK = [:LineBreak = Mandatory_Break:];
@ -39,17 +39,15 @@ CL = [:LineBreak = Close_Punctuation:];
CM = [:LineBreak = Combining_Mark:];
CP = [:LineBreak = Close_Parenthesis:];
CR = [:LineBreak = Carriage_Return:];
EB = [\u261D\u26F9\u270A-\u270D\U0001F385\U0001F3C3-\U0001F3C4\U0001F3CA-\U0001F3CB\U0001F442-\U0001F443\U0001F446-\U0001F450\U0001F466-\U0001F469\U0001F46E\U0001F470-\U0001F478\U0001F47C\U0001F481-\U0001F483\U0001F485-\U0001F487\U0001F4AA\U0001F575\U0001F590\U0001F595-\U0001F596\U0001F645-\U0001F647\U0001F64B-\U0001F64F\U0001F6A3\U0001F6B4-\U0001F6B6\U0001F6C0\U0001F918];
EM = [\U0001F3FB-\U0001F3FF];
EB = [:LineBreak = EB:];
EM = [:LineBreak = EM:];
EX = [:LineBreak = Exclamation:];
GL = [:LineBreak = Glue:];
HL = [:LineBreak = Hebrew_Letter:];
HY = [:LineBreak = Hyphen:];
H2 = [:LineBreak = H2:];
H3 = [:LineBreak = H3:];
ID = [[:LineBreak = Ideographic:] CJ [\u2764]];
ID = [[:LineBreak = Ideographic:] CJ]; # CSS Normal tailoring: CJ resolves to ID
IN = [:LineBreak = Inseperable:];
IS = [:LineBreak = Infix_Numeric:];
JL = [:LineBreak = JL:];
@ -71,16 +69,15 @@ SY = [:LineBreak = Break_Symbols:];
WJ = [:LineBreak = Word_Joiner:];
XX = [:LineBreak = Unknown:];
ZW = [:LineBreak = ZWSpace:];
ZJ = [\u200D];
ZWJ = [:LineBreak = ZWJ:];
# TODO: adjustment to sets needed only until Unicode properties are updated for Emoji.
ID = [ID - EB];
AL = [AL - EM];
# LB1 - Resolve AI, CB, CJ, SA, SG, and XX into other line breaking classes
AL = [AL AI SG XX ];
dictionary = SA;
dictionary = [:LineBreak = Complex_Context:];
# Redfine AL. LB1. TODO: refine according to latest UAX.
AL = [ AL AI SA SG XX ];
# By LB9, a ZWJ also behaves as a CM. Including it in the definition of CM avoids having to explicitly
# list it in the numerous rules that use CM.
CM = [CM ZWJ];
LB4: BK ÷;
LB5: CR LF;
@ -110,7 +107,7 @@ LB8: ZW ÷;
# LB8a, from Emoji proposal L2/16-011R3
# ZWJ x ID
LB8a: ZJ (ID | EB | EM);
LB8a: ZWJ (ID | EB | EM);
# LB9: X CM -> X
@ -145,11 +142,11 @@ LB19: . CM* QU;
LB19.1: QU CM* [^CM];
# LB 20 Break before and after CB.
# Interaction with LB8a: ZJ x ID is tricky because CM includes ZJ.
# ZJ acts like a CM to the left, combining with CB.
# ZJ acts independently to the right, no break from ID by LB8a.
# Interaction with LB8a: ZWJ x ID is tricky because CM includes ZWJ.
# ZWJ acts like a CM to the left, combining with CB.
# ZWJ acts independently to the right, no break from ID by LB8a.
LB20: . CM* ÷ CB;
LB20.1a: CB CM* ZJ (ID | EB | EM);
LB20.1a: CB CM* ZWJ (ID | EB | EM);
LB20.1b: CB CM* ÷;
# Note: Rule 21a must come before 21 to prevent 21.1 from matching HL BA, then
@ -199,7 +196,7 @@ LB30.2: CP CM* (AL | HL | NU);
# LB31 keep pairs of RI together.
LB30a.1: RI CM* RI CM* [BK CR LF NL SP ZW WJ GL CL CP EX IS SY QU BA HY NS];
LB30a.2: RI CM* RI CM* ZJ (ID | EB | EM);
LB30a.2: RI CM* RI CM* ZWJ (ID | EB | EM);
LB30a.3: RI CM* RI CM* ÷;
# LB30b Do not break between Emoji Base and Emoji Modifier
@ -207,5 +204,5 @@ LB30b: EB CM* EM;
# LB31 Break Everywhere Else.
# Include combining marks
LB31.1: . CM* ZJ (ID | EB | EM);
LB31.1: . CM* ZWJ (ID | EB | EM);
LB31.2: . CM* ÷;

View file

@ -8,10 +8,10 @@
# They are expected to change with review and the addition of support for rule tailoring.
#
# Line Breaking Rules
# Implement default line breaking as defined by
# Implement default line breaking as defined by
# Unicode Standard Annex #14 Revision 34 for Unicode 8.0
# http://www.unicode.org/reports/tr14/
# tailored as noted in 2nd paragraph below..
# tailored as noted in 2nd paragraph below.
#
# TODO: Rule LB 8 remains as it was in Unicode 5.2
# This is only because of a limitation of ICU break engine implementation,
@ -27,7 +27,7 @@ type = line;
locale = ja@lb=normal;
AI = [:LineBreak = Ambiguous:];
AL = [[:LineBreak = Alphabetic:]-[\u2764]];
AL = [:LineBreak = Alphabetic:];
BAX = [\u2010 \u2013];
BA = [[:LineBreak = Break_After:] - BAX];
BB = [:LineBreak = Break_Before:];
@ -39,17 +39,15 @@ CL = [:LineBreak = Close_Punctuation:];
CM = [:LineBreak = Combining_Mark:];
CP = [:LineBreak = Close_Parenthesis:];
CR = [:LineBreak = Carriage_Return:];
EB = [\u261D\u26F9\u270A-\u270D\U0001F385\U0001F3C3-\U0001F3C4\U0001F3CA-\U0001F3CB\U0001F442-\U0001F443\U0001F446-\U0001F450\U0001F466-\U0001F469\U0001F46E\U0001F470-\U0001F478\U0001F47C\U0001F481-\U0001F483\U0001F485-\U0001F487\U0001F4AA\U0001F575\U0001F590\U0001F595-\U0001F596\U0001F645-\U0001F647\U0001F64B-\U0001F64F\U0001F6A3\U0001F6B4-\U0001F6B6\U0001F6C0\U0001F918];
EM = [\U0001F3FB-\U0001F3FF];
EB = [:LineBreak = EB:];
EM = [:LineBreak = EM:];
EX = [:LineBreak = Exclamation:];
GL = [:LineBreak = Glue:];
HL = [:LineBreak = Hebrew_Letter:];
HY = [:LineBreak = Hyphen:];
H2 = [:LineBreak = H2:];
H3 = [:LineBreak = H3:];
ID = [[:LineBreak = Ideographic:] CJ [\u2764]];
ID = [[:LineBreak = Ideographic:] CJ]; # CSS Normal tailoring: CJ resolves to ID
IN = [:LineBreak = Inseperable:];
IS = [:LineBreak = Infix_Numeric:];
JL = [:LineBreak = JL:];
@ -72,16 +70,15 @@ SY = [:LineBreak = Break_Symbols:];
WJ = [:LineBreak = Word_Joiner:];
XX = [:LineBreak = Unknown:];
ZW = [:LineBreak = ZWSpace:];
ZJ = [\u200D];
ZWJ = [:LineBreak = ZWJ:];
# TODO: adjustment to sets needed only until Unicode properties are updated for Emoji.
ID = [ID - EB];
AL = [AL - EM];
# LB1 - Resolve AI, CB, CJ, SA, SG, and XX into other line breaking classes
AL = [AL AI SG XX ];
dictionary = SA;
dictionary = [:LineBreak = Complex_Context:];
# Redfine AL. LB1. TODO: refine according to latest UAX.
AL = [ AL AI SA SG XX ];
# By LB9, a ZWJ also behaves as a CM. Including it in the definition of CM avoids having to explicitly
# list it in the numerous rules that use CM.
CM = [CM ZWJ];
LB4: BK ÷;
LB5: CR LF;
@ -114,7 +111,7 @@ LB8: ZW ÷;
# LB8a, from Emoji proposal L2/16-011R3
# ZWJ x ID
LB8a: ZJ (ID | EB | EM);
LB8a: ZWJ (ID | EB | EM);
# LB9: X CM -> X
@ -149,11 +146,11 @@ LB19: . CM* QU;
LB19.1: QU CM* [^CM];
# LB 20 Break before and after CB.
# Interaction with LB8a: ZJ x ID is tricky because CM includes ZJ.
# ZJ acts like a CM to the left, combining with CB.
# ZJ acts independently to the right, no break from ID by LB8a.
# Interaction with LB8a: ZWJ x ID is tricky because CM includes ZWJ.
# ZWJ acts like a CM to the left, combining with CB.
# ZWJ acts independently to the right, no break from ID by LB8a.
LB20: . CM* ÷ CB;
LB20.1a: CB CM* ZJ (ID | EB | EM);
LB20.1a: CB CM* ZWJ (ID | EB | EM);
LB20.1b: CB CM* ÷;
# Note: Rule 21a must come before 21 to prevent 21.1 from matching HL BA, then
@ -207,7 +204,7 @@ LB30.2: CP CM* (AL | HL | NU);
# LB31 keep pairs of RI together.
LB30a.1: RI CM* RI CM* [BK CR LF NL SP ZW WJ GL CL CP EX IS SY QU BA HY NS];
LB30a.2: RI CM* RI CM* ZJ (ID | EB | EM);
LB30a.2: RI CM* RI CM* ZWJ (ID | EB | EM);
LB30a.3: RI CM* RI CM* ÷;
# LB30b Do not break between Emoji Base and Emoji Modifier
@ -215,5 +212,5 @@ LB30b: EB CM* EM;
# LB31 Break Everywhere Else.
# Include combining marks
LB31.1: . CM* ZJ (ID | EB | EM);
LB31.1: . CM* ZWJ (ID | EB | EM);
LB31.2: . CM* ÷;

View file

@ -12,17 +12,14 @@
type = word; # one of grapheme | word | line | sentence
locale = en;
E_Base = [\u261D\u26F9\u270A-\u270D\U0001F385\U0001F3C3-\U0001F3C4\U0001F3CA-\U0001F3CB\U0001F442-\U0001F443\U0001F446-\U0001F450\U0001F466-\U0001F469\U0001F46E\U0001F470-\U0001F478\U0001F47C\U0001F481-\U0001F483\U0001F485-\U0001F487\U0001F4AA\U0001F575\U0001F590\U0001F595-\U0001F596\U0001F645-\U0001F647\U0001F64B-\U0001F64F\U0001F6A3\U0001F6B4-\U0001F6B6\U0001F6C0\U0001F918];
E_Modifier = [\U0001F3FB-\U0001F3FF];
ZWJ = [\u200D];
GAZ = [\U0001F466-\U0001F469\U0001F48B\U0001F5E8\u2764];
CR = [\p{Word_Break = CR}];
LF = [\p{Word_Break = LF}];
Newline = [\p{Word_Break = Newline}];
Extend = [[[\p{Word_Break = Extend}][:Block=Tags:]]-ZWJ];
Extend = [\p{Word_Break = Extend}];
ZWJ = [\p{Word_Break = ZWJ}];
Regional_Indicator = [\p{Word_Break = Regional_Indicator}];
Format = [[\p{Word_Break = Format}]-[:Block=Tags:]];
Format = [\p{Word_Break = Format}];
Katakana = [\p{Word_Break = Katakana}];
Hebrew_Letter = [\p{Word_Break = Hebrew_Letter}];
ALetter = [\p{Word_Break = ALetter}];
@ -33,6 +30,10 @@ MidLetter = [\p{Word_Break = MidLetter}];
MidNum = [\p{Word_Break = MidNum}];
Numeric = [\p{Word_Break = Numeric}];
ExtendNumLet = [\p{Word_Break = ExtendNumLet}];
E_Base = [\p{Word_Break = EB}];
E_Modifier = [\p{Word_Break = EM}];
GAZ = [\p{Word_Break = GAZ}];
EBG = [\p{Word_Break = EBG}];
#define dicitionary, with the effect being that those characters don't appear in test data.
@ -63,7 +64,7 @@ WB3: CR LF;
WB3a: (Newline | CR | LF) ÷;
WB3b: . ÷ (Newline | CR | LF); # actually redundant? No other rule combines.
# (but needed with UAX treat-as scheme.)
WB3c: ZWJ GAZ;
WB3c: ZWJ (GAZ | EBG);
WB5: AHLetter ExtFmt* AHLetter;
@ -83,15 +84,15 @@ WB13: Katakana ExtFmt* Katakana;
WB13a: (AHLetter | Numeric | Katakana | ExtendNumLet) ExtFmt* ExtendNumLet;
WB13b: ExtendNumLet ExtFmt* (AHLetter | Numeric | Katakana);
# WB rule 13c, pairs of Regional Indicators stay unbroken.
# WB rule 15 - 17, pairs of Regional Indicators stay unbroken.
# Interacts with WB3c.
WB13c.1: Regional_Indicator ExtFmt* Regional_Indicator ExtFmt* ZWJ GAZ;
WB13c.2: Regional_Indicator ExtFmt* Regional_Indicator ExtFmt* ÷;
WB15: Regional_Indicator ExtFmt* Regional_Indicator ExtFmt* ZWJ (GAZ | EBG);
WB17: Regional_Indicator ExtFmt* Regional_Indicator ExtFmt* ÷;
WB13d: (E_Base | GAZ) ExtFmt* E_Modifier;
WB14: (E_Base | EBG) ExtFmt* E_Modifier;
# Rule WB 14 Any ÷ Any
# Interacts with WB3c, do not break between ZWJ and GAZ.
WB14.1: . ExtFmt* ZWJ GAZ;
WB14.2: . ExtFmt* ÷;
# Rule WB 999 Any ÷ Any
# Interacts with WB3c, do not break between ZWJ and (GAZ | EBG).
WB999.1: . ExtFmt* ZWJ (GAZ | EBG);
WB999.2: . ExtFmt* ÷;

View file

@ -11,17 +11,14 @@
type = word; # one of grapheme | word | line | sentence
locale = en_US_POSIX;
E_Base = [\u261D\u26F9\u270A-\u270D\U0001F385\U0001F3C3-\U0001F3C4\U0001F3CA-\U0001F3CB\U0001F442-\U0001F443\U0001F446-\U0001F450\U0001F466-\U0001F469\U0001F46E\U0001F470-\U0001F478\U0001F47C\U0001F481-\U0001F483\U0001F485-\U0001F487\U0001F4AA\U0001F575\U0001F590\U0001F595-\U0001F596\U0001F645-\U0001F647\U0001F64B-\U0001F64F\U0001F6A3\U0001F6B4-\U0001F6B6\U0001F6C0\U0001F918];
E_Modifier = [\U0001F3FB-\U0001F3FF];
ZWJ = [\u200D];
GAZ = [\U0001F466-\U0001F469\U0001F48B\U0001F5E8\u2764];
CR = [\p{Word_Break = CR}];
LF = [\p{Word_Break = LF}];
Newline = [\p{Word_Break = Newline}];
Extend = [[[\p{Word_Break = Extend}][:Block=Tags:]]-ZWJ];
Extend = [\p{Word_Break = Extend}];
ZWJ = [\p{Word_Break = ZWJ}];
Regional_Indicator = [\p{Word_Break = Regional_Indicator}];
Format = [[\p{Word_Break = Format}]-[:Block=Tags:]];
Format = [\p{Word_Break = Format}];
Katakana = [\p{Word_Break = Katakana}];
Hebrew_Letter = [\p{Word_Break = Hebrew_Letter}];
ALetter = [\p{Word_Break = ALetter}];
@ -32,6 +29,10 @@ MidLetter = [\p{Word_Break = MidLetter} - [\:]];
MidNum = [\p{Word_Break = MidNum} [.]];
Numeric = [\p{Word_Break = Numeric}];
ExtendNumLet = [\p{Word_Break = ExtendNumLet}];
E_Base = [\p{Word_Break = EB}];
E_Modifier = [\p{Word_Break = EM}];
GAZ = [\p{Word_Break = GAZ}];
EBG = [\p{Word_Break = EBG}];
#define dicitionary, with the effect being that those characters don't appear in test data.
@ -62,7 +63,7 @@ WB3: CR LF;
WB3a: (Newline | CR | LF) ÷;
WB3b: . ÷ (Newline | CR | LF); # actually redundant? No other rule combines.
# (but needed with UAX treat-as scheme.)
WB3c: ZWJ GAZ;
WB3c: ZWJ (GAZ | EBG);
WB5: AHLetter ExtFmt* AHLetter;
@ -82,15 +83,15 @@ WB13: Katakana ExtFmt* Katakana;
WB13a: (AHLetter | Numeric | Katakana | ExtendNumLet) ExtFmt* ExtendNumLet;
WB13b: ExtendNumLet ExtFmt* (AHLetter | Numeric | Katakana);
# WB rule 13c, pairs of Regional Indicators stay unbroken.
# WB rule 15 - 17, pairs of Regional Indicators stay unbroken.
# Interacts with WB3c.
WB13c.1: Regional_Indicator ExtFmt* Regional_Indicator ExtFmt* ZWJ GAZ;
WB13c.2: Regional_Indicator ExtFmt* Regional_Indicator ExtFmt* ÷;
WB15: Regional_Indicator ExtFmt* Regional_Indicator ExtFmt* ZWJ (GAZ | EBG);
WB17: Regional_Indicator ExtFmt* Regional_Indicator ExtFmt* ÷;
WB13d: (E_Base | GAZ) ExtFmt* E_Modifier;
WB14: (E_Base | EBG) ExtFmt* E_Modifier;
# Rule WB 14 Any ÷ Any
# Interacts with WB3c, do not break between ZWJ and GAZ.
WB14.1: . ExtFmt* ZWJ GAZ;
WB14.2: . ExtFmt* ÷;
# Rule WB 999 Any ÷ Any
# Interacts with WB3c, do not break between ZWJ and (GAZ | EBG).
WB999.1: . ExtFmt* ZWJ (GAZ | EBG);
WB999.2: . ExtFmt* ÷;

View file

@ -1,7 +1,7 @@
/*
*******************************************************************************
*
* Copyright (C) 2009-2014, International Business Machines
* Copyright (C) 2009-2016, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
@ -1247,6 +1247,7 @@ Normalizer2DataBuilder::writeCSourceFile(const char *filename) {
exit(U_FILE_ACCESS_ERROR);
return;
}
fputs("#ifdef INCLUDED_FROM_NORMALIZER2_CPP\n\n", f);
char line[100];
sprintf(line, "static const UVersionInfo %s_formatVersion={", dataName.data());
usrc_writeArray(f, line, dataInfo.formatVersion, 8, 4, "};\n");
@ -1287,6 +1288,7 @@ Normalizer2DataBuilder::writeCSourceFile(const char *filename) {
line,
norm16Trie, line2, NULL,
"};\n");
fputs("\n#endif // INCLUDED_FROM_NORMALIZER2_CPP\n", f);
fclose(f);
}