ICU-7084 Unicode 5.2: merge -r 26464:26890 branches/markus/uni52 into trunk, and a little cleanup (C++)

X-SVN-Rev: 26898
This commit is contained in:
Markus Scherer 2009-11-13 19:25:21 +00:00
parent 1be5303b73
commit 66b63f9c48
96 changed files with 577443 additions and 56197 deletions

View file

@ -341,7 +341,7 @@ getDirProps(UBiDi *pBiDi) {
const UChar *text=pBiDi->text;
DirProp *dirProps=pBiDi->dirPropsMemory; /* pBiDi->dirProps is const */
int32_t i=0, i0, i1, length=pBiDi->originalLength;
int32_t i=0, i1, length=pBiDi->originalLength;
Flags flags=0; /* collect all directionalities in the text */
UChar32 uchar;
DirProp dirProp=0, paraDirDefault=0;/* initialize to avoid compiler warnings */
@ -388,17 +388,13 @@ getDirProps(UBiDi *pBiDi) {
* their bit 0 alone yields the intended default
*/
for( /* i=0 above */ ; i<length; ) {
/* i is incremented by UTF_NEXT_CHAR */
i0=i; /* index of first code unit */
UTF_NEXT_CHAR(text, i, length, uchar);
i1=i-1; /* index of last code unit, gets the directional property */
/* i is incremented by U16_NEXT */
U16_NEXT(text, i, length, uchar);
flags|=DIRPROP_FLAG(dirProp=(DirProp)ubidi_getCustomizedClass(pBiDi, uchar));
dirProps[i1]=dirProp|paraDir;
if(i1>i0) { /* set previous code units' properties to BN */
dirProps[i-1]=dirProp|paraDir;
if(uchar>0xffff) { /* set the lead surrogate's property to BN */
flags|=DIRPROP_FLAG(BN);
do {
dirProps[--i1]=(DirProp)(BN|paraDir);
} while(i1>i0);
dirProps[i-2]=(DirProp)(BN|paraDir);
}
if(state==LOOKING_FOR_STRONG) {
if(dirProp==L) {
@ -1421,7 +1417,7 @@ resolveImplicitLevels(UBiDi *pBiDi,
levState.pImpAct=(const ImpAct*)((pBiDi->pImpTabPair)->pImpAct)[levState.runLevel&1];
processPropertySeq(pBiDi, &levState, sor, start, start);
/* initialize for property state table */
if(dirProps[start]==NSM) {
if(NO_CONTEXT_RTL(dirProps[start])==NSM) {
stateImp = 1 + sor;
} else {
stateImp=0;

File diff suppressed because it is too large Load diff

View file

@ -673,21 +673,20 @@ ucase_getType(const UCaseProps *csp, UChar32 c) {
return UCASE_GET_TYPE(props);
}
/** @return same as ucase_getType(), or <0 if c is case-ignorable */
/** @return same as ucase_getType() and set bit 2 if c is case-ignorable */
U_CAPI int32_t U_EXPORT2
ucase_getTypeOrIgnorable(const UCaseProps *csp, UChar32 c) {
uint16_t props=UTRIE2_GET16(&csp->trie, c);
int32_t type=UCASE_GET_TYPE(props);
if(type!=UCASE_NONE) {
return type;
} else if(
c==0x307 ||
(props&(UCASE_EXCEPTION|UCASE_CASE_IGNORABLE))==UCASE_CASE_IGNORABLE
) {
return -1; /* case-ignorable */
} else {
return 0; /* c is neither cased nor case-ignorable */
if(props&UCASE_EXCEPTION) {
const uint16_t *pe=GET_EXCEPTIONS(csp, props);
if(*pe&UCASE_EXC_CASE_IGNORABLE) {
type|=4;
}
} else if(type==UCASE_NONE && (props&UCASE_CASE_IGNORABLE)) {
type|=4;
}
return type;
}
/** @return UCASE_NO_DOT, UCASE_SOFT_DOTTED, UCASE_ABOVE, UCASE_OTHER_ACCENT */
@ -889,24 +888,30 @@ ucase_getCaseLocale(const char *locale, int32_t *locCache) {
return result;
}
/* Is followed by {case-ignorable}* cased ? (dir determines looking forward/backward) */
/*
* Is followed by
* {case-ignorable}* cased
* ?
* (dir determines looking forward/backward)
* If a character is case-ignorable, it is skipped regardless of whether
* it is also cased or not.
*/
static UBool
isFollowedByCasedLetter(const UCaseProps *csp, UCaseContextIterator *iter, void *context, int8_t dir) {
UChar32 c;
uint16_t props;
if(iter==NULL) {
return FALSE;
}
for(/* dir!=0 sets direction */; (c=iter(context, dir))>=0; dir=0) {
props=UTRIE2_GET16(&csp->trie, c);
if(UCASE_GET_TYPE(props)!=UCASE_NONE) {
return TRUE; /* followed by cased letter */
} else if(c==0x307 || (props&(UCASE_EXCEPTION|UCASE_CASE_IGNORABLE))==UCASE_CASE_IGNORABLE) {
int32_t type=ucase_getTypeOrIgnorable(csp, c);
if(type&4) {
/* case-ignorable, continue with the loop */
} else if(type!=UCASE_NONE) {
return TRUE; /* followed by cased letter */
} else {
return FALSE; /* not ignorable */
return FALSE; /* uncased and not case-ignorable */
}
}
@ -1573,6 +1578,8 @@ u_foldCase(UChar32 c, uint32_t options) {
U_CFUNC int32_t U_EXPORT2
ucase_hasBinaryProperty(UChar32 c, UProperty which) {
/* case mapping properties */
const UChar *resultString;
int32_t locCache;
const UCaseProps *csp=GET_CASE_PROPS();
if(csp==NULL) {
return FALSE;
@ -1586,6 +1593,38 @@ ucase_hasBinaryProperty(UChar32 c, UProperty which) {
return ucase_isSoftDotted(csp, c);
case UCHAR_CASE_SENSITIVE:
return ucase_isCaseSensitive(csp, c);
case UCHAR_CASED:
return (UBool)(UCASE_NONE!=ucase_getType(csp, c));
case UCHAR_CASE_IGNORABLE:
return (UBool)(ucase_getTypeOrIgnorable(csp, c)>>2);
/*
* Note: The following Changes_When_Xyz are defined as testing whether
* the NFD form of the input changes when Xyz-case-mapped.
* However, this simpler implementation of these properties,
* ignoring NFD, passes the tests.
* The implementation needs to be changed if the tests start failing.
* When that happens, optimizations should be used to work with the
* per-single-code point ucase_toFullXyz() functions unless
* the NFD form has more than one code point,
* and the property starts set needs to be the union of the
* start sets for normalization and case mappings.
*/
case UCHAR_CHANGES_WHEN_LOWERCASED:
locCache=UCASE_LOC_ROOT;
return (UBool)(ucase_toFullLower(csp, c, NULL, NULL, &resultString, "", &locCache)>=0);
case UCHAR_CHANGES_WHEN_UPPERCASED:
locCache=UCASE_LOC_ROOT;
return (UBool)(ucase_toFullUpper(csp, c, NULL, NULL, &resultString, "", &locCache)>=0);
case UCHAR_CHANGES_WHEN_TITLECASED:
locCache=UCASE_LOC_ROOT;
return (UBool)(ucase_toFullTitle(csp, c, NULL, NULL, &resultString, "", &locCache)>=0);
/* case UCHAR_CHANGES_WHEN_CASEFOLDED: -- in uprops.c */
case UCHAR_CHANGES_WHEN_CASEMAPPED:
locCache=UCASE_LOC_ROOT;
return (UBool)(
ucase_toFullLower(csp, c, NULL, NULL, &resultString, "", &locCache)>=0 ||
ucase_toFullUpper(csp, c, NULL, NULL, &resultString, "", &locCache)>=0 ||
ucase_toFullTitle(csp, c, NULL, NULL, &resultString, "", &locCache)>=0);
default:
return FALSE;
}

View file

@ -1,7 +1,7 @@
/*
*******************************************************************************
*
* Copyright (C) 2004-2008, International Business Machines
* Copyright (C) 2004-2009, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
@ -349,7 +349,9 @@ enum {
/* each slot is 2 uint16_t instead of 1 */
#define UCASE_EXC_DOUBLE_SLOTS 0x100
/* reserved: exception bits 11..9 */
/* reserved: exception bits 10..9 */
#define UCASE_EXC_CASE_IGNORABLE 0x800
/* UCASE_EXC_DOT_MASK=UCASE_DOT_MASK<<UCASE_EXC_DOT_SHIFT */
#define UCASE_EXC_DOT_SHIFT 8

File diff suppressed because it is too large Load diff

View file

@ -1,6 +1,6 @@
/*
********************************************************************************
* Copyright (C) 1996-2008, International Business Machines
* Copyright (C) 1996-2009, International Business Machines
* Corporation and others. All Rights Reserved.
********************************************************************************
*
@ -583,10 +583,11 @@ u_isJavaIDPart(UChar32 c) {
U_CAPI int32_t U_EXPORT2
u_charDigitValue(UChar32 c) {
uint32_t props;
int32_t value;
GET_PROPS(c, props);
if(GET_NUMERIC_TYPE(props)==1) {
return GET_NUMERIC_VALUE(props);
value=(int32_t)GET_NUMERIC_TYPE_VALUE(props)-UPROPS_NTV_DECIMAL_START;
if(value<=9) {
return value;
} else {
return -1;
}
@ -594,47 +595,32 @@ u_charDigitValue(UChar32 c) {
U_CAPI double U_EXPORT2
u_getNumericValue(UChar32 c) {
uint32_t props, numericType, numericValue;
uint32_t props;
int32_t ntv;
GET_PROPS(c, props);
numericType=GET_NUMERIC_TYPE(props);
ntv=(int32_t)GET_NUMERIC_TYPE_VALUE(props);
if(numericType==0 || numericType>=UPROPS_NT_COUNT) {
if(ntv==UPROPS_NTV_NONE) {
return U_NO_NUMERIC_VALUE;
}
numericValue=GET_NUMERIC_VALUE(props);
if(numericType<U_NT_COUNT) {
/* normal type, the value is stored directly */
return numericValue;
} else if(numericType==UPROPS_NT_FRACTION) {
/* fraction value */
int32_t numerator;
uint32_t denominator;
numerator=(int32_t)numericValue>>UPROPS_FRACTION_NUM_SHIFT;
denominator=(numericValue&UPROPS_FRACTION_DEN_MASK)+UPROPS_FRACTION_DEN_OFFSET;
if(numerator==0) {
numerator=-1;
}
return (double)numerator/(double)denominator;
} else /* numericType==UPROPS_NT_LARGE */ {
/* large value with exponent */
} else if(ntv<UPROPS_NTV_DIGIT_START) {
/* decimal digit */
return ntv-UPROPS_NTV_DECIMAL_START;
} else if(ntv<UPROPS_NTV_NUMERIC_START) {
/* other digit */
return ntv-UPROPS_NTV_DIGIT_START;
} else if(ntv<UPROPS_NTV_FRACTION_START) {
/* small integer */
return ntv-UPROPS_NTV_NUMERIC_START;
} else if(ntv<UPROPS_NTV_LARGE_START) {
/* fraction */
int32_t numerator=(ntv>>4)-12;
int32_t denominator=(ntv&0xf)+1;
return (double)numerator/denominator;
} else if(ntv<UPROPS_NTV_RESERVED_START) {
/* large, single-significant-digit integer */
double numValue;
int32_t mant, exp;
mant=(int32_t)numericValue>>UPROPS_LARGE_MANT_SHIFT;
exp=(int32_t)numericValue&UPROPS_LARGE_EXP_MASK;
if(mant==0) {
mant=1;
exp+=UPROPS_LARGE_EXP_OFFSET_EXTRA;
} else if(mant>9) {
return U_NO_NUMERIC_VALUE; /* reserved mantissa value */
} else {
exp+=UPROPS_LARGE_EXP_OFFSET;
}
int32_t mant=(ntv>>5)-14;
int32_t exp=(ntv&0x1f)+2;
numValue=mant;
/* multiply by 10^exp without math.h */
@ -658,6 +644,9 @@ u_getNumericValue(UChar32 c) {
}
return numValue;
} else {
/* reserved */
return U_NO_NUMERIC_VALUE;
}
}
@ -749,43 +738,6 @@ uprv_getMaxValues(int32_t column) {
#endif
}
/*
* get Hangul Syllable Type
* implemented here so that uchar.c (uhst_addPropertyStarts())
* does not depend on uprops.c (u_getIntPropertyValue(c, UCHAR_HANGUL_SYLLABLE_TYPE))
*/
U_CFUNC UHangulSyllableType
uchar_getHST(UChar32 c) {
/* purely algorithmic; hardcode known characters, check for assigned new ones */
if(c<JAMO_L_BASE) {
/* U_HST_NOT_APPLICABLE */
} else if(c<=0x11ff) {
/* Jamo range */
if(c<=0x115f) {
/* Jamo L range, HANGUL CHOSEONG ... */
if(c==0x115f || c<=0x1159 || u_charType(c)==U_OTHER_LETTER) {
return U_HST_LEADING_JAMO;
}
} else if(c<=0x11a7) {
/* Jamo V range, HANGUL JUNGSEONG ... */
if(c<=0x11a2 || u_charType(c)==U_OTHER_LETTER) {
return U_HST_VOWEL_JAMO;
}
} else {
/* Jamo T range */
if(c<=0x11f9 || u_charType(c)==U_OTHER_LETTER) {
return U_HST_TRAILING_JAMO;
}
}
} else if((c-=HANGUL_BASE)<0) {
/* U_HST_NOT_APPLICABLE */
} else if(c<HANGUL_COUNT) {
/* Hangul syllable */
return c%JAMO_T_COUNT==0 ? U_HST_LV_SYLLABLE : U_HST_LVT_SYLLABLE;
}
return U_HST_NOT_APPLICABLE;
}
U_CAPI void U_EXPORT2
u_charAge(UChar32 c, UVersionInfo versionArray) {
if(versionArray!=NULL) {
@ -816,71 +768,6 @@ ublock_getCode(UChar32 c) {
/* property starts for UnicodeSet ------------------------------------------- */
/* for Hangul_Syllable_Type */
U_CFUNC void U_EXPORT2
uhst_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode) {
UChar32 c;
int32_t value, value2;
if(U_FAILURE(*pErrorCode)) {
return;
}
#if !UCHAR_HARDCODE_DATA
if(!HAVE_DATA) {
*pErrorCode=dataErrorCode;
return;
}
#endif
/* add code points with hardcoded properties, plus the ones following them */
/*
* Add Jamo type boundaries for UCHAR_HANGUL_SYLLABLE_TYPE.
* First, we add fixed boundaries for the blocks of Jamos.
* Then we check in loops to see where the current Unicode version
* actually stops assigning such Jamos. We start each loop
* at the end of the per-Jamo-block assignments in Unicode 4 or earlier.
* (These have not changed since Unicode 2.)
*/
sa->add(sa->set, 0x1100);
value=U_HST_LEADING_JAMO;
for(c=0x115a; c<=0x115f; ++c) {
value2=uchar_getHST(c);
if(value!=value2) {
value=value2;
sa->add(sa->set, c);
}
}
sa->add(sa->set, 0x1160);
value=U_HST_VOWEL_JAMO;
for(c=0x11a3; c<=0x11a7; ++c) {
value2=uchar_getHST(c);
if(value!=value2) {
value=value2;
sa->add(sa->set, c);
}
}
sa->add(sa->set, 0x11a8);
value=U_HST_TRAILING_JAMO;
for(c=0x11fa; c<=0x11ff; ++c) {
value2=uchar_getHST(c);
if(value!=value2) {
value=value2;
sa->add(sa->set, c);
}
}
/* Add Hangul type boundaries for UCHAR_HANGUL_SYLLABLE_TYPE. */
for(c=HANGUL_BASE; c<(HANGUL_BASE+HANGUL_COUNT); c+=JAMO_T_COUNT) {
sa->add(sa->set, c);
sa->add(sa->set, c+1);
}
sa->add(sa->set, c);
}
static UBool U_CALLCONV
_enumPropertyStartsRange(const void *context, UChar32 start, UChar32 end, uint32_t value) {
/* add the start code point to the USet */

File diff suppressed because it is too large Load diff

View file

@ -251,8 +251,8 @@ isDataLoaded(UErrorCode *pErrorCode) {
* field can contain ';' as part of its contents.
* In unames.dat, it is marked as token[';']==-1 only if the
* semicolon is used in the data file - which is iff we
* have Unicode 1.0 names or ISO comments.
* So, it will be token[';']==-1 if we store U1.0 names/ISO comments
* have Unicode 1.0 names or ISO comments or aliases.
* So, it will be token[';']==-1 if we store U1.0 names/ISO comments/aliases
* although we know that it will never be part of a name.
*/
static uint16_t
@ -264,32 +264,26 @@ expandName(UCharNames *names,
uint8_t *tokenStrings=(uint8_t *)names+names->tokenStringOffset;
uint8_t c;
if(nameChoice==U_UNICODE_10_CHAR_NAME || nameChoice==U_ISO_COMMENT) {
if(nameChoice!=U_UNICODE_CHAR_NAME && nameChoice!=U_EXTENDED_CHAR_NAME) {
/*
* skip the modern name if it is not requested _and_
* if the semicolon byte value is a character, not a token number
*/
if((uint8_t)';'>=tokenCount || tokens[(uint8_t)';']==(uint16_t)(-1)) {
while(nameLength>0) {
--nameLength;
if(*name++==';') {
break;
}
}
if(nameChoice==U_ISO_COMMENT) {
/* skip the Unicode 1.0 name as well to get the ISO comment */
int fieldIndex= nameChoice==U_ISO_COMMENT ? 2 : nameChoice;
do {
while(nameLength>0) {
--nameLength;
if(*name++==';') {
break;
}
}
}
} while(--fieldIndex>0);
} else {
/*
* the semicolon byte value is a token number, therefore
* only modern names are stored in unames.dat and there is no
* such requested Unicode 1.0 name here
* such requested alternate name here
*/
nameLength=0;
}
@ -364,23 +358,26 @@ compareName(UCharNames *names,
uint8_t c;
const char *origOtherName = otherName;
if(nameChoice==U_UNICODE_10_CHAR_NAME) {
if(nameChoice!=U_UNICODE_CHAR_NAME && nameChoice!=U_EXTENDED_CHAR_NAME) {
/*
* skip the modern name if it is not requested _and_
* if the semicolon byte value is a character, not a token number
*/
if((uint8_t)';'>=tokenCount || tokens[(uint8_t)';']==(uint16_t)(-1)) {
while(nameLength>0) {
--nameLength;
if(*name++==';') {
break;
int fieldIndex= nameChoice==U_ISO_COMMENT ? 2 : nameChoice;
do {
while(nameLength>0) {
--nameLength;
if(*name++==';') {
break;
}
}
}
} while(--fieldIndex>0);
} else {
/*
* the semicolon byte value is a token number, therefore
* only modern names are stored in unames.dat and there is no
* such requested Unicode 1.0 name here
* such requested alternate name here
*/
nameLength=0;
}
@ -865,13 +862,8 @@ getAlgName(AlgorithmicRange *range, uint32_t code, UCharNameChoice nameChoice,
char *buffer, uint16_t bufferLength) {
uint16_t bufferPos=0;
/*
* Do not write algorithmic Unicode 1.0 names because
* Unihan names are the same as the modern ones,
* extension A was only introduced with Unicode 3.0, and
* the Hangul syllable block was moved and changed around Unicode 1.1.5.
*/
if(nameChoice==U_UNICODE_10_CHAR_NAME) {
/* Only the normative character name can be algorithmic. */
if(nameChoice!=U_UNICODE_CHAR_NAME && nameChoice!=U_EXTENDED_CHAR_NAME) {
/* zero-terminate */
if(bufferLength>0) {
*buffer=0;
@ -957,7 +949,7 @@ enumAlgNames(AlgorithmicRange *range,
char buffer[200];
uint16_t length;
if(nameChoice==U_UNICODE_10_CHAR_NAME) {
if(nameChoice!=U_UNICODE_CHAR_NAME && nameChoice!=U_EXTENDED_CHAR_NAME) {
return TRUE;
}
@ -1095,7 +1087,7 @@ static UChar32
findAlgName(AlgorithmicRange *range, UCharNameChoice nameChoice, const char *otherName) {
UChar32 code;
if(nameChoice==U_UNICODE_10_CHAR_NAME) {
if(nameChoice!=U_UNICODE_CHAR_NAME && nameChoice!=U_EXTENDED_CHAR_NAME) {
return 0xffff;
}

View file

@ -62,7 +62,7 @@ U_NAMESPACE_BEGIN
* if(isFailure()) { handleFailure(); }
* }
* protected:
* virtual handleFailure() {
* virtual void handleFailure() const {
* log_failure(u_errorName(errorCode));
* exit(errorCode);
* }

View file

@ -39,7 +39,7 @@ U_CDECL_BEGIN
* @see u_getUnicodeVersion
* @stable ICU 2.0
*/
#define U_UNICODE_VERSION "5.1"
#define U_UNICODE_VERSION "5.2"
/**
* \file
@ -414,8 +414,22 @@ typedef enum UProperty {
See the uchar.h file documentation.
@stable ICU 3.4 */
UCHAR_POSIX_XDIGIT=48,
/** Binary property Cased. For Lowercase, Uppercase and Titlecase characters. @draft ICU 4.4 */
UCHAR_CASED=49,
/** Binary property Case_Ignorable. Used in context-sensitive case mappings. @draft ICU 4.4 */
UCHAR_CASE_IGNORABLE=50,
/** Binary property Changes_When_Lowercased. @draft ICU 4.4 */
UCHAR_CHANGES_WHEN_LOWERCASED=51,
/** Binary property Changes_When_Uppercased. @draft ICU 4.4 */
UCHAR_CHANGES_WHEN_UPPERCASED=52,
/** Binary property Changes_When_Titlecased. @draft ICU 4.4 */
UCHAR_CHANGES_WHEN_TITLECASED=53,
/** Binary property Changes_When_Casefolded. @draft ICU 4.4 */
UCHAR_CHANGES_WHEN_CASEFOLDED=54,
/** Binary property Changes_When_Casemapped. @draft ICU 4.4 */
UCHAR_CHANGES_WHEN_CASEMAPPED=55,
/** One more than the last constant for binary Unicode properties. @stable ICU 2.1 */
UCHAR_BINARY_LIMIT=49,
UCHAR_BINARY_LIMIT=56,
/** Enumerated property Bidi_Class.
Same as u_charDirection, returns UCharDirection values. @stable ICU 2.2 */
@ -1291,8 +1305,63 @@ enum UBlockCode {
/** @stable ICU 4.0 */
UBLOCK_DOMINO_TILES = 171, /*[1F030]*/
/* New blocks in Unicode 5.2 */
/** @draft ICU 4.4 */
UBLOCK_SAMARITAN = 172, /*[0800]*/
/** @draft ICU 4.4 */
UBLOCK_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED = 173, /*[18B0]*/
/** @draft ICU 4.4 */
UBLOCK_TAI_THAM = 174, /*[1A20]*/
/** @draft ICU 4.4 */
UBLOCK_VEDIC_EXTENSIONS = 175, /*[1CD0]*/
/** @draft ICU 4.4 */
UBLOCK_LISU = 176, /*[A4D0]*/
/** @draft ICU 4.4 */
UBLOCK_BAMUM = 177, /*[A6A0]*/
/** @draft ICU 4.4 */
UBLOCK_COMMON_INDIC_NUMBER_FORMS = 178, /*[A830]*/
/** @draft ICU 4.4 */
UBLOCK_DEVANAGARI_EXTENDED = 179, /*[A8E0]*/
/** @draft ICU 4.4 */
UBLOCK_HANGUL_JAMO_EXTENDED_A = 180, /*[A960]*/
/** @draft ICU 4.4 */
UBLOCK_JAVANESE = 181, /*[A980]*/
/** @draft ICU 4.4 */
UBLOCK_MYANMAR_EXTENDED_A = 182, /*[AA60]*/
/** @draft ICU 4.4 */
UBLOCK_TAI_VIET = 183, /*[AA80]*/
/** @draft ICU 4.4 */
UBLOCK_MEETEI_MAYEK = 184, /*[ABC0]*/
/** @draft ICU 4.4 */
UBLOCK_HANGUL_JAMO_EXTENDED_B = 185, /*[D7B0]*/
/** @draft ICU 4.4 */
UBLOCK_IMPERIAL_ARAMAIC = 186, /*[10840]*/
/** @draft ICU 4.4 */
UBLOCK_OLD_SOUTH_ARABIAN = 187, /*[10A60]*/
/** @draft ICU 4.4 */
UBLOCK_AVESTAN = 188, /*[10B00]*/
/** @draft ICU 4.4 */
UBLOCK_INSCRIPTIONAL_PARTHIAN = 189, /*[10B40]*/
/** @draft ICU 4.4 */
UBLOCK_INSCRIPTIONAL_PAHLAVI = 190, /*[10B60]*/
/** @draft ICU 4.4 */
UBLOCK_OLD_TURKIC = 191, /*[10C00]*/
/** @draft ICU 4.4 */
UBLOCK_RUMI_NUMERAL_SYMBOLS = 192, /*[10E60]*/
/** @draft ICU 4.4 */
UBLOCK_KAITHI = 193, /*[11080]*/
/** @draft ICU 4.4 */
UBLOCK_EGYPTIAN_HIEROGLYPHS = 194, /*[13000]*/
/** @draft ICU 4.4 */
UBLOCK_ENCLOSED_ALPHANUMERIC_SUPPLEMENT = 195, /*[1F100]*/
/** @draft ICU 4.4 */
UBLOCK_ENCLOSED_IDEOGRAPHIC_SUPPLEMENT = 196, /*[1F200]*/
/** @draft ICU 4.4 */
UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C = 197, /*[2A700]*/
/** @stable ICU 2.0 */
UBLOCK_COUNT = 172,
UBLOCK_COUNT = 198,
/** @stable ICU 2.0 */
UBLOCK_INVALID_CODE=-1
@ -1337,6 +1406,7 @@ typedef enum UCharNameChoice {
U_UNICODE_CHAR_NAME,
U_UNICODE_10_CHAR_NAME,
U_EXTENDED_CHAR_NAME,
U_CHAR_NAME_ALIAS, /**< Corrected name from NameAliases.txt. @draft ICU 4.4 */
U_CHAR_NAME_CHOICE_COUNT
} UCharNameChoice;
@ -1465,6 +1535,8 @@ typedef enum UJoiningGroup {
U_JG_KHAPH, /**< @stable ICU 2.6 */
U_JG_ZHAIN, /**< @stable ICU 2.6 */
U_JG_BURUSHASKI_YEH_BARREE, /**< @stable ICU 4.0 */
U_JG_FARSI_YEH, /**< @draft ICU 4.4 */
U_JG_NYA, /**< @draft ICU 4.4 */
U_JG_COUNT
} UJoiningGroup;
@ -1584,7 +1656,8 @@ typedef enum ULineBreak {
U_LB_JL = 33, /*[JL]*/
U_LB_JT = 34, /*[JT]*/
U_LB_JV = 35, /*[JV]*/
U_LB_COUNT = 36
U_LB_CLOSE_PARENTHESIS = 36, /*[CP]*/ /* new in Unicode 5.2/ICU 4.4 */
U_LB_COUNT = 37
} ULineBreak;
/**
@ -2457,6 +2530,9 @@ u_charName(UChar32 code, UCharNameChoice nameChoice,
* The ISO 10646 comment is an informative field in the Unicode Character
* Database (UnicodeData.txt field 11) and is from the ISO 10646 names list.
*
* Note: Unicode 5.2 removes all ISO comment data, resulting in empty strings
* returned for all characters.
*
* @param c The character (code point) for which to get the ISO comment.
* It must be <code>0<=c<=0x10ffff</code>.
* @param dest Destination address for copying the comment.

View file

@ -46,7 +46,7 @@
typedef enum UScriptCode {
USCRIPT_INVALID_CODE = -1,
USCRIPT_COMMON = 0 , /* Zyyy */
USCRIPT_INHERITED = 1, /* Qaai */
USCRIPT_INHERITED = 1, /* Zinh */ /* "Code for inherited script", for non-spacing combining marks; also Qaai */
USCRIPT_ARABIC = 2, /* Arab */
USCRIPT_ARMENIAN = 3, /* Armn */
USCRIPT_BENGALI = 4, /* Beng */
@ -156,7 +156,7 @@ typedef enum UScriptCode {
USCRIPT_EASTERN_SYRIAC = 97, /* Syrn */
USCRIPT_TENGWAR = 98, /* Teng */
USCRIPT_VAI = 99, /* Vaii */
USCRIPT_VISIBLE_SPEECH = 100, /* Visp */
USCRIPT_VISIBLE_SPEECH = 100,/* Visp */
USCRIPT_CUNEIFORM = 101,/* Xsux */
USCRIPT_UNWRITTEN_LANGUAGES = 102,/* Zxxx */
USCRIPT_UNKNOWN = 103,/* Zzzz */ /* Unknown="Code for uncoded script", for unassigned code points */
@ -191,8 +191,14 @@ typedef enum UScriptCode {
USCRIPT_MATHEMATICAL_NOTATION = 128,/* Zmth */
USCRIPT_SYMBOLS = 129,/* Zsym */
/* New script codes from ISO 15924 @draft ICU 4.4 */
USCRIPT_BAMUM = 130,/* Bamu */
USCRIPT_LISU = 131,/* Lisu */
USCRIPT_NAKHI_GEBA = 132,/* Nkgb */
USCRIPT_OLD_SOUTH_ARABIAN = 133,/* Sarb */
/* Private use codes from Qaaa - Qabx are not supported*/
USCRIPT_CODE_LIMIT = 130
USCRIPT_CODE_LIMIT = 134
} UScriptCode;
/**

View file

@ -1,7 +1,7 @@
/*
*******************************************************************************
*
* Copyright (C) 1999-2008, International Business Machines
* Copyright (C) 1999-2009, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
@ -169,13 +169,14 @@ const UnicodeSet* UnicodeSet::getInclusions(int32_t src, UErrorCode &status) {
uchar_addPropertyStarts(&sa, &status);
upropsvec_addPropertyStarts(&sa, &status);
break;
case UPROPS_SRC_HST:
uhst_addPropertyStarts(&sa, &status);
break;
#if !UCONFIG_NO_NORMALIZATION
case UPROPS_SRC_NORM:
unorm_addPropertyStarts(&sa, &status);
break;
case UPROPS_SRC_CASE_AND_NORM:
ucase_addPropertyStarts(ucase_getSingleton(&status), &sa, &status);
unorm_addPropertyStarts(&sa, &status);
break;
#endif
case UPROPS_SRC_CASE:
ucase_addPropertyStarts(ucase_getSingleton(&status), &sa, &status);

File diff suppressed because it is too large Load diff

View file

@ -1,7 +1,7 @@
/*
*******************************************************************************
*
* Copyright (C) 2002-2008, International Business Machines
* Copyright (C) 2002-2009, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
@ -24,6 +24,7 @@
#include "unicode/utypes.h"
#include "unicode/uchar.h"
#include "unicode/uscript.h"
#include "unicode/ustring.h"
#include "cstring.h"
#include "ucln_cmn.h"
#include "umutex.h"
@ -91,7 +92,7 @@ static const struct {
/*
* column and mask values for binary properties from u_getUnicodeProperties().
* Must be in order of corresponding UProperty,
* and there must be exacly one entry per binary UProperty.
* and there must be exactly one entry per binary UProperty.
*
* Properties with mask 0 are handled in code.
* For them, column is the UPropertySource value.
@ -144,7 +145,14 @@ static const struct {
{ UPROPS_SRC_CHAR, 0 }, /* UCHAR_POSIX_BLANK */
{ UPROPS_SRC_CHAR, 0 }, /* UCHAR_POSIX_GRAPH */
{ UPROPS_SRC_CHAR, 0 }, /* UCHAR_POSIX_PRINT */
{ UPROPS_SRC_CHAR, 0 } /* UCHAR_POSIX_XDIGIT */
{ UPROPS_SRC_CHAR, 0 }, /* UCHAR_POSIX_XDIGIT */
{ UPROPS_SRC_CASE, 0 }, /* UCHAR_CASED */
{ UPROPS_SRC_CASE, 0 }, /* UCHAR_CASE_IGNORABLE */
{ UPROPS_SRC_CASE, 0 }, /* UCHAR_CHANGES_WHEN_LOWERCASED */
{ UPROPS_SRC_CASE, 0 }, /* UCHAR_CHANGES_WHEN_UPPERCASED */
{ UPROPS_SRC_CASE, 0 }, /* UCHAR_CHANGES_WHEN_TITLECASED */
{ UPROPS_SRC_CASE_AND_NORM, 0 }, /* UCHAR_CHANGES_WHEN_CASEFOLDED */
{ UPROPS_SRC_CASE, 0 } /* UCHAR_CHANGES_WHEN_CASEMAPPED */
};
U_CAPI UBool U_EXPORT2
@ -214,16 +222,82 @@ u_hasBinaryProperty(UChar32 c, UProperty which) {
default:
break;
}
} else if(column==UPROPS_SRC_CASE_AND_NORM) {
#if !UCONFIG_NO_NORMALIZATION
UChar nfdBuffer[4];
const UChar *nfd=NULL;
int32_t nfdLength;
UErrorCode errorCode;
switch(which) {
case UCHAR_CHANGES_WHEN_CASEFOLDED:
if(unorm_haveData(&errorCode)) {
nfd=unorm_getCanonicalDecomposition(c, nfdBuffer, &nfdLength);
}
if(nfd!=NULL) {
/* c has a decomposition */
if(nfdLength==1) {
c=nfd[0]; /* single BMP code point */
} else if(nfdLength<=U16_MAX_LENGTH) {
int32_t i=0;
U16_NEXT(nfd, i, nfdLength, c);
if(i==nfdLength) {
/* single supplementary code point */
} else {
c=U_SENTINEL;
}
} else {
c=U_SENTINEL;
}
} else if(c<0) {
return FALSE; /* protect against bad input */
}
errorCode=U_ZERO_ERROR;
if(c>=0) {
/* single code point */
const UCaseProps *csp=ucase_getSingleton(&errorCode);
const UChar *resultString;
return (UBool)(ucase_toFullFolding(csp, c, &resultString, U_FOLD_CASE_DEFAULT)>=0);
} else {
/* guess some large but stack-friendly capacity */
UChar dest[2*UCASE_MAX_STRING_LENGTH];
int32_t destLength;
destLength=u_strFoldCase(dest, LENGTHOF(dest), nfd, nfdLength, U_FOLD_CASE_DEFAULT, &errorCode);
return (UBool)(U_SUCCESS(errorCode) && 0!=u_strCompare(nfd, nfdLength, dest, destLength, FALSE));
}
default:
break;
}
#endif
}
}
}
return FALSE;
}
/*
* Map some of the Grapheme Cluster Break values to Hangul Syllable Types.
* Hangul_Syllable_Type is fully redundant with a subset of Grapheme_Cluster_Break.
*/
static const UHangulSyllableType gcbToHst[]={
U_HST_NOT_APPLICABLE, /* U_GCB_OTHER */
U_HST_NOT_APPLICABLE, /* U_GCB_CONTROL */
U_HST_NOT_APPLICABLE, /* U_GCB_CR */
U_HST_NOT_APPLICABLE, /* U_GCB_EXTEND */
U_HST_LEADING_JAMO, /* U_GCB_L */
U_HST_NOT_APPLICABLE, /* U_GCB_LF */
U_HST_LV_SYLLABLE, /* U_GCB_LV */
U_HST_LVT_SYLLABLE, /* U_GCB_LVT */
U_HST_TRAILING_JAMO, /* U_GCB_T */
U_HST_VOWEL_JAMO /* U_GCB_V */
/*
* Omit GCB values beyond what we need for hst.
* The code below checks for the array length.
*/
};
U_CAPI int32_t U_EXPORT2
u_getIntPropertyValue(UChar32 c, UProperty which) {
UErrorCode errorCode;
int32_t type;
if(which<UCHAR_BINARY_START) {
return 0; /* undefined */
@ -255,18 +329,22 @@ u_getIntPropertyValue(UChar32 c, UProperty which) {
return ubidi_getJoiningType(GET_BIDI_PROPS(), c);
case UCHAR_LINE_BREAK:
return (int32_t)(u_getUnicodeProperties(c, UPROPS_LB_VWORD)&UPROPS_LB_MASK)>>UPROPS_LB_SHIFT;
case UCHAR_NUMERIC_TYPE:
type=(int32_t)GET_NUMERIC_TYPE(u_getUnicodeProperties(c, -1));
if(type>U_NT_NUMERIC) {
/* keep internal variants of U_NT_NUMERIC from becoming visible */
type=U_NT_NUMERIC;
}
return type;
case UCHAR_NUMERIC_TYPE: {
int32_t ntv=(int32_t)GET_NUMERIC_TYPE_VALUE(u_getUnicodeProperties(c, -1));
return UPROPS_NTV_GET_TYPE(ntv);
}
case UCHAR_SCRIPT:
errorCode=U_ZERO_ERROR;
return (int32_t)uscript_getScript(c, &errorCode);
case UCHAR_HANGUL_SYLLABLE_TYPE:
return uchar_getHST(c);
case UCHAR_HANGUL_SYLLABLE_TYPE: {
/* see comments on gcbToHst[] above */
int32_t gcb=(int32_t)(u_getUnicodeProperties(c, 2)&UPROPS_GCB_MASK)>>UPROPS_GCB_SHIFT;
if(gcb<LENGTHOF(gcbToHst)) {
return gcbToHst[gcb];
} else {
return U_HST_NOT_APPLICABLE;
}
}
#if !UCONFIG_NO_NORMALIZATION
case UCHAR_NFD_QUICK_CHECK:
case UCHAR_NFKD_QUICK_CHECK:
@ -355,6 +433,15 @@ u_getIntPropertyMaxValue(UProperty which) {
}
}
/*
* TODO: Simplify, similar to binProps[].
* Use an array of column/source, mask, shift values to drive returning simple
* properties and their sources.
*
* TODO: Split the single propsvec into one per column, and have
* upropsvec_addPropertyStarts() pass a trie value function that gets the
* desired column's values.
*/
U_CFUNC UPropertySource U_EXPORT2
uprops_getSource(UProperty which) {
if(which<UCHAR_BINARY_START) {
@ -373,9 +460,6 @@ uprops_getSource(UProperty which) {
case UCHAR_NUMERIC_TYPE:
return UPROPS_SRC_CHAR;
case UCHAR_HANGUL_SYLLABLE_TYPE:
return UPROPS_SRC_HST;
case UCHAR_CANONICAL_COMBINING_CLASS:
case UCHAR_NFD_QUICK_CHECK:
case UCHAR_NFKD_QUICK_CHECK:
@ -538,7 +622,6 @@ uprv_getInclusions(const USetAdder *sa, UErrorCode *pErrorCode) {
unorm_addPropertyStarts(sa, pErrorCode);
#endif
uchar_addPropertyStarts(sa, pErrorCode);
uhst_addPropertyStarts(sa, pErrorCode);
ucase_addPropertyStarts(ucase_getSingleton(pErrorCode), sa, pErrorCode);
ubidi_addPropertyStarts(ubidi_getSingleton(pErrorCode), sa, pErrorCode);
}

View file

@ -48,45 +48,33 @@ enum {
/* definitions for the main properties words */
enum {
/* general category shift==0 0 (5 bits) */
UPROPS_NUMERIC_TYPE_SHIFT=5, /* 5 (3 bits) */
UPROPS_NUMERIC_VALUE_SHIFT=8 /* 8 (8 bits) */
/* reserved 5 (1 bit) */
UPROPS_NUMERIC_TYPE_VALUE_SHIFT=6 /* 6 (10 bits) */
};
#define GET_CATEGORY(props) ((props)&0x1f)
#define CAT_MASK(props) U_MASK(GET_CATEGORY(props))
#define GET_NUMERIC_TYPE(props) (((props)>>UPROPS_NUMERIC_TYPE_SHIFT)&7)
#define GET_NUMERIC_VALUE(props) (((props)>>UPROPS_NUMERIC_VALUE_SHIFT)&0xff)
#define GET_NUMERIC_TYPE_VALUE(props) ((props)>>UPROPS_NUMERIC_TYPE_VALUE_SHIFT)
/* internal numeric pseudo-types for special encodings of numeric values */
/* constants for the storage form of numeric types and values */
enum {
UPROPS_NT_FRACTION=4, /* ==U_NT_COUNT, must not change unless binary format version changes */
UPROPS_NT_LARGE,
UPROPS_NT_COUNT
UPROPS_NTV_NONE=0,
UPROPS_NTV_DECIMAL_START=1,
UPROPS_NTV_DIGIT_START=11,
UPROPS_NTV_NUMERIC_START=21,
UPROPS_NTV_FRACTION_START=0xb0,
UPROPS_NTV_LARGE_START=0x1e0,
UPROPS_NTV_RESERVED_START=0x300,
UPROPS_NTV_MAX_SMALL_INT=UPROPS_NTV_FRACTION_START-UPROPS_NTV_NUMERIC_START-1
};
/* encoding of fractional and large numbers */
enum {
UPROPS_MAX_SMALL_NUMBER=0xff,
UPROPS_FRACTION_NUM_SHIFT=3, /* numerator: bits 7..3 */
UPROPS_FRACTION_DEN_MASK=7, /* denominator: bits 2..0 */
UPROPS_FRACTION_MAX_NUM=31,
UPROPS_FRACTION_DEN_OFFSET=2, /* denominator values are 2..9 */
UPROPS_FRACTION_MIN_DEN=UPROPS_FRACTION_DEN_OFFSET,
UPROPS_FRACTION_MAX_DEN=UPROPS_FRACTION_MIN_DEN+UPROPS_FRACTION_DEN_MASK,
UPROPS_LARGE_MANT_SHIFT=4, /* mantissa: bits 7..4 */
UPROPS_LARGE_EXP_MASK=0xf, /* exponent: bits 3..0 */
UPROPS_LARGE_EXP_OFFSET=2, /* regular exponents 2..17 */
UPROPS_LARGE_EXP_OFFSET_EXTRA=18, /* extra large exponents 18..33 */
UPROPS_LARGE_MIN_EXP=UPROPS_LARGE_EXP_OFFSET,
UPROPS_LARGE_MAX_EXP=UPROPS_LARGE_MIN_EXP+UPROPS_LARGE_EXP_MASK,
UPROPS_LARGE_MAX_EXP_EXTRA=UPROPS_LARGE_EXP_OFFSET_EXTRA+UPROPS_LARGE_EXP_MASK
};
#define UPROPS_NTV_GET_TYPE(ntv) \
((ntv==UPROPS_NTV_NONE) ? U_NT_NONE : \
(ntv<UPROPS_NTV_DIGIT_START) ? U_NT_DECIMAL : \
(ntv<UPROPS_NTV_NUMERIC_START) ? U_NT_DIGIT : \
U_NT_NUMERIC)
/* number of properties vector words */
#define UPROPS_VECTOR_WORDS 3
@ -210,13 +198,6 @@ u_getUnicodeProperties(UChar32 c, int32_t column);
U_CFUNC int32_t
uprv_getMaxValues(int32_t column);
/**
* Get the Hangul Syllable Type for c.
* @internal
*/
U_CFUNC UHangulSyllableType
uchar_getHST(UChar32 c);
/**
* Checks if c is alphabetic, or a decimal digit; implements UCHAR_POSIX_ALNUM.
* @internal
@ -339,8 +320,6 @@ enum UPropertySource {
UPROPS_SRC_CHAR,
/** From uchar.c/uprops.icu properties vectors trie */
UPROPS_SRC_PROPSVEC,
/** Hangul_Syllable_Type, from uchar.c/uprops.icu */
UPROPS_SRC_HST,
/** From unames.c/unames.icu */
UPROPS_SRC_NAMES,
/** From unorm.cpp/unorm.icu */
@ -351,6 +330,8 @@ enum UPropertySource {
UPROPS_SRC_BIDI,
/** From uchar.c/uprops.icu main trie as well as properties vectors trie */
UPROPS_SRC_CHAR_AND_PROPSVEC,
/** From ucase.c/ucase.icu as well as unorm.cpp/unorm.icu */
UPROPS_SRC_CASE_AND_NORM,
/** One more than the highest UPropertySource (UPROPS_SRC_) constant. */
UPROPS_SRC_COUNT
};
@ -379,13 +360,6 @@ uchar_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode);
U_CFUNC void U_EXPORT2
upropsvec_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode);
/**
* Same as uchar_addPropertyStarts() but only for Hangul_Syllable_Type.
* @internal
*/
U_CFUNC void U_EXPORT2
uhst_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode);
/**
* Return a set of characters for property enumeration.
* For each two consecutive characters (start, limit) in the set,

View file

@ -1963,7 +1963,7 @@ LIB_VERSION_MAJOR=`echo $LIB_VERSION | sed 's/\..*//'`
{ $as_echo "$as_me:$LINENO: result: release $VERSION, library $LIB_VERSION" >&5
$as_echo "release $VERSION, library $LIB_VERSION" >&6; }
UNICODE_VERSION="5.1"
UNICODE_VERSION="5.2"
# Determine the host system

View file

@ -38,7 +38,7 @@ AC_SUBST(LIB_VERSION)
AC_SUBST(LIB_VERSION_MAJOR)
AC_MSG_RESULT([release $VERSION, library $LIB_VERSION])
UNICODE_VERSION="5.1"
UNICODE_VERSION="5.2"
AC_SUBST(UNICODE_VERSION)
# Determine the host system

View file

@ -499,8 +499,8 @@ $(COLBLDDIR)/ucadata.icu $(COLBLDDIR)/invuca.icu: $(UNICODEDATADIR)/FractionalUC
$(INVOKE) $(TOOLBINDIR)/genuca -s $(UNICODEDATADIR) -d $(COLBLDDIR) -i $(BUILDDIR)
# unames.icu
$(BUILDDIR)/unames.icu: $(UNICODEDATADIR)/UnicodeData.txt $(TOOLBINDIR)/gennames$(TOOLEXEEXT)
$(INVOKE) $(TOOLBINDIR)/gennames -1 -d $(BUILDDIR) $(UNICODEDATADIR)/UnicodeData.txt -u $(UNICODE_VERSION)
$(BUILDDIR)/unames.icu: $(UNICODEDATADIR)/UnicodeData.txt $(UNICODEDATADIR)/NameAliases.txt $(TOOLBINDIR)/gennames$(TOOLEXEEXT)
$(INVOKE) $(TOOLBINDIR)/gennames -1 -d $(BUILDDIR) $(UNICODEDATADIR)/UnicodeData.txt $(UNICODEDATADIR)/NameAliases.txt -u $(UNICODE_VERSION)
# cnvalias.icu
$(BUILDDIR)/cnvalias.icu: $(UCMSRCDIR)/convrtrs.txt $(TOOLBINDIR)/gencnval$(TOOLEXEEXT)

View file

@ -5,7 +5,7 @@
#
# Line Breaking Rules
# Implement default line breaking as defined by
# Unicode Standard Annex #14 Revision 22 for Unicode 5.1
# Unicode Standard Annex #14 Revision 24 for Unicode 5.2
# http://www.unicode.org/reports/tr14/
@ -61,12 +61,9 @@ $BB = [:LineBreak = Break_Before:];
$BK = [:LineBreak = Mandatory_Break:];
$B2 = [:LineBreak = Break_Both:];
$CB = [:LineBreak = Contingent_Break:];
# Unicode 5.2 changes. Fix once line break property data is updated.
$CP = [\u0029 \u005d]; # Right Parenthesis and right square bracket.
$CL = [[:LineBreak = Close_Punctuation:] - $CP];
$CL = [:LineBreak = Close_Punctuation:];
$CM = [:LineBreak = Combining_Mark:];
$CP = [:LineBreak = Close_Parenthesis:];
$CR = [:LineBreak = Carriage_Return:];
$EX = [:LineBreak = Exclamation:];
$GL = [:LineBreak = Glue:];

View file

@ -13,7 +13,7 @@
U_ICUDATA_NAME=icudt43
##############################################################################
U_ICUDATA_ENDIAN_SUFFIX=l
UNICODE_VERSION=5.1
UNICODE_VERSION=5.2
ICU_LIB_TARGET=$(DLL_OUTPUT)\$(U_ICUDATA_NAME).dll
# ICUMAKE
@ -851,7 +851,7 @@ res_index:table(nofallback) {
# Targets for unames.icu
"$(ICUBLD_PKG)\unames.icu": "$(ICUUNIDATA)\*.txt" "$(ICUTOOLS)\gennames\$(CFG)\gennames.exe"
@echo Creating data file for Unicode Names
@"$(ICUTOOLS)\gennames\$(CFG)\gennames" -1 -u $(UNICODE_VERSION) -d "$(ICUBLD_PKG)" "$(ICUUNIDATA)\UnicodeData.txt"
@"$(ICUTOOLS)\gennames\$(CFG)\gennames" -1 -u $(UNICODE_VERSION) -d "$(ICUBLD_PKG)" "$(ICUUNIDATA)\UnicodeData.txt" "$(ICUUNIDATA)\NameAliases.txt"
# Targets for pnames.icu
# >> Depends on the Unicode data as well as uchar.h and uscript.h <<

View file

@ -1,18 +1,18 @@
# BidiMirroring-5.1.0.txt
# Date: 2007-10-26, 17:14:00 PDT [KW]
# BidiMirroring-5.2.0.txt
# Date: 2009-05-22, 12:44:00 PDT [KW]
#
# Bidi_Mirroring_Glyph Property
#
# This file is an informative contributory data file in the
# Unicode Character Database.
#
# Copyright (c) 1991-2007 Unicode, Inc.
# Copyright (c) 1991-2009 Unicode, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html
#
# This data file lists characters that have the mirrored property
# where there is another Unicode character that typically has a glyph
# that is the mirror image of the original character's glyph.
# The repertoire covered by the file is Unicode 5.0.0.
# The repertoire covered by the file is Unicode 5.2.0.
#
# The file contains a list of lines with mappings from one code point
# to another one for character-based mirroring.
@ -32,7 +32,7 @@
# at http://www.unicode.org/unicode/reports/tr9/
#
# This file was originally created by Markus Scherer.
# Extended for Unicode 3.2, 4.0, 4.1, 5.0, and 5.1 by Ken Whistler.
# Extended for Unicode 3.2, 4.0, 4.1, 5.0, 5.1, and 5.2 by Ken Whistler.
#
# ############################################################

View file

@ -1,10 +1,10 @@
# Blocks-5.1.0.txt
# Date: 2007-10-22, 17:10:00 PDT [KW]
# Blocks-5.2.0.txt
# Date: 2009-05-19, 16:21:00 PDT [KW]
#
# Unicode Character Database
# Copyright (c) 1991-2007 Unicode, Inc.
# Copyright (c) 1991-2009 Unicode, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html
# For documentation, see UCD.html
# For documentation, see http://www.unicode.org/reports/tr44/
#
# Note: The casing of block names is not normative.
# For example, "Basic Latin" and "BASIC LATIN" are equivalent.
@ -18,7 +18,7 @@
# and underbars are ignored.
# For example, "Latin Extended-A" and "latin extended a" are equivalent.
# For more information on the comparison of property values,
# see UCD.html.
# see UAX #44: http://www.unicode.org/reports/tr44/
#
# All code points not explicitly listed for Block
# have the value No_Block.
@ -44,6 +44,7 @@
0750..077F; Arabic Supplement
0780..07BF; Thaana
07C0..07FF; NKo
0800..083F; Samaritan
0900..097F; Devanagari
0980..09FF; Bengali
0A00..0A7F; Gurmukhi
@ -72,15 +73,18 @@
1760..177F; Tagbanwa
1780..17FF; Khmer
1800..18AF; Mongolian
18B0..18FF; Unified Canadian Aboriginal Syllabics Extended
1900..194F; Limbu
1950..197F; Tai Le
1980..19DF; New Tai Lue
19E0..19FF; Khmer Symbols
1A00..1A1F; Buginese
1A20..1AAF; Tai Tham
1B00..1B7F; Balinese
1B80..1BBF; Sundanese
1C00..1C4F; Lepcha
1C50..1C7F; Ol Chiki
1CD0..1CFF; Vedic Extensions
1D00..1D7F; Phonetic Extensions
1D80..1DBF; Phonetic Extensions Supplement
1DC0..1DFF; Combining Diacritical Marks Supplement
@ -137,17 +141,27 @@
4E00..9FFF; CJK Unified Ideographs
A000..A48F; Yi Syllables
A490..A4CF; Yi Radicals
A4D0..A4FF; Lisu
A500..A63F; Vai
A640..A69F; Cyrillic Extended-B
A6A0..A6FF; Bamum
A700..A71F; Modifier Tone Letters
A720..A7FF; Latin Extended-D
A800..A82F; Syloti Nagri
A830..A83F; Common Indic Number Forms
A840..A87F; Phags-pa
A880..A8DF; Saurashtra
A8E0..A8FF; Devanagari Extended
A900..A92F; Kayah Li
A930..A95F; Rejang
A960..A97F; Hangul Jamo Extended-A
A980..A9DF; Javanese
AA00..AA5F; Cham
AA60..AA7F; Myanmar Extended-A
AA80..AADF; Tai Viet
ABC0..ABFF; Meetei Mayek
AC00..D7AF; Hangul Syllables
D7B0..D7FF; Hangul Jamo Extended-B
D800..DB7F; High Surrogates
DB80..DBFF; High Private Use Surrogates
DC00..DFFF; Low Surrogates
@ -179,11 +193,20 @@ FFF0..FFFF; Specials
10450..1047F; Shavian
10480..104AF; Osmanya
10800..1083F; Cypriot Syllabary
10840..1085F; Imperial Aramaic
10900..1091F; Phoenician
10920..1093F; Lydian
10A00..10A5F; Kharoshthi
10A60..10A7F; Old South Arabian
10B00..10B3F; Avestan
10B40..10B5F; Inscriptional Parthian
10B60..10B7F; Inscriptional Pahlavi
10C00..10C4F; Old Turkic
10E60..10E7F; Rumi Numeral Symbols
11080..110CF; Kaithi
12000..123FF; Cuneiform
12400..1247F; Cuneiform Numbers and Punctuation
13000..1342F; Egyptian Hieroglyphs
1D000..1D0FF; Byzantine Musical Symbols
1D100..1D1FF; Musical Symbols
1D200..1D24F; Ancient Greek Musical Notation
@ -192,7 +215,10 @@ FFF0..FFFF; Specials
1D400..1D7FF; Mathematical Alphanumeric Symbols
1F000..1F02F; Mahjong Tiles
1F030..1F09F; Domino Tiles
1F100..1F1FF; Enclosed Alphanumeric Supplement
1F200..1F2FF; Enclosed Ideographic Supplement
20000..2A6DF; CJK Unified Ideographs Extension B
2A700..2B73F; CJK Unified Ideographs Extension C
2F800..2FA1F; CJK Compatibility Ideographs Supplement
E0000..E007F; Tags
E0100..E01EF; Variation Selectors Supplement

View file

@ -1,10 +1,10 @@
# CaseFolding-5.1.0.txt
# Date: 2008-03-03, 21:57:14 GMT [MD]
# CaseFolding-5.2.0.txt
# Date: 2009-05-28, 23:02:34 GMT [MD]
#
# Unicode Character Database
# Copyright (c) 1991-2008 Unicode, Inc.
# Copyright (c) 1991-2009 Unicode, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html
# For documentation, see UCD.html
# For documentation, see http://www.unicode.org/reports/tr44/
#
# Case Folding Properties
#
@ -495,6 +495,7 @@
051E; C; 051F; # CYRILLIC CAPITAL LETTER ALEUT KA
0520; C; 0521; # CYRILLIC CAPITAL LETTER EL WITH MIDDLE HOOK
0522; C; 0523; # CYRILLIC CAPITAL LETTER EN WITH MIDDLE HOOK
0524; C; 0525; # CYRILLIC CAPITAL LETTER PE WITH DESCENDER
0531; C; 0561; # ARMENIAN CAPITAL LETTER AYB
0532; C; 0562; # ARMENIAN CAPITAL LETTER BEN
0533; C; 0563; # ARMENIAN CAPITAL LETTER GIM
@ -983,8 +984,11 @@
2C6D; C; 0251; # LATIN CAPITAL LETTER ALPHA
2C6E; C; 0271; # LATIN CAPITAL LETTER M WITH HOOK
2C6F; C; 0250; # LATIN CAPITAL LETTER TURNED A
2C70; C; 0252; # LATIN CAPITAL LETTER TURNED ALPHA
2C72; C; 2C73; # LATIN CAPITAL LETTER W WITH HOOK
2C75; C; 2C76; # LATIN CAPITAL LETTER HALF H
2C7E; C; 023F; # LATIN CAPITAL LETTER S WITH SWASH TAIL
2C7F; C; 0240; # LATIN CAPITAL LETTER Z WITH SWASH TAIL
2C80; C; 2C81; # COPTIC CAPITAL LETTER ALFA
2C82; C; 2C83; # COPTIC CAPITAL LETTER VIDA
2C84; C; 2C85; # COPTIC CAPITAL LETTER GAMMA
@ -1035,6 +1039,8 @@
2CDE; C; 2CDF; # COPTIC CAPITAL LETTER OLD NUBIAN NGI
2CE0; C; 2CE1; # COPTIC CAPITAL LETTER OLD NUBIAN NYI
2CE2; C; 2CE3; # COPTIC CAPITAL LETTER OLD NUBIAN WAU
2CEB; C; 2CEC; # COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI
2CED; C; 2CEE; # COPTIC CAPITAL LETTER CRYPTOGRAMMIC GANGIA
A640; C; A641; # CYRILLIC CAPITAL LETTER ZEMLYA
A642; C; A643; # CYRILLIC CAPITAL LETTER DZELO
A644; C; A645; # CYRILLIC CAPITAL LETTER REVERSED DZE

View file

@ -1,18 +1,14 @@
# DerivedAge-5.1.0.txt
# Date: 2008-03-03, 21:57:14 GMT [MD]
# DerivedAge-5.2.0.txt
# Date: 2009-09-17, 22:52:52 GMT [MD]
#
# Unicode Character Database
# Copyright (c) 1991-2008 Unicode, Inc.
# Copyright (c) 1991-2009 Unicode, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html
# For documentation, see UCD.html
# For documentation, see http://www.unicode.org/reports/tr44/
#
# Unicode Character Database: Derived Property Data
# This file shows when various code points were first assigned in Unicode.
#
# Caution: When using the Age *property*, all assigned code points
# in each version are included, not just the newly assigned code points.
# For more information, see http://www.unicode.org/reports/tr18/
#
# Notes:
#
# - The term 'assigned' means that a previously reserved code point was assigned
@ -37,7 +33,13 @@
# ================================================
# Property: Age
# Property: Age
#
# Note: When using the Age property in regular expressions,
# an expression such as "\p{age=3.0}" matches all of the code points
# assigned in Version 3.0--that is, all the code points with a value
# less than or equal to 3.0 for the Age property.
# For more information, see [http://www.unicode.org/reports/tr18/].
# All code points not explicitly listed for Age
# have the value unassigned.
@ -960,4 +962,103 @@ FE24..FE26 ; 5.1 # [3] COMBINING MACRON LEFT HALF..COMBINING CONJOINING MAC
# Total code points: 1624
# ================================================
# Newly assigned in Unicode 5.2.0 (October, 2009)
0524..0525 ; 5.2 # [2] CYRILLIC CAPITAL LETTER PE WITH DESCENDER..CYRILLIC SMALL LETTER PE WITH DESCENDER
0800..082D ; 5.2 # [46] SAMARITAN LETTER ALAF..SAMARITAN MARK NEQUDAA
0830..083E ; 5.2 # [15] SAMARITAN PUNCTUATION NEQUDAA..SAMARITAN PUNCTUATION ANNAAU
0900 ; 5.2 # DEVANAGARI SIGN INVERTED CANDRABINDU
094E ; 5.2 # DEVANAGARI VOWEL SIGN PRISHTHAMATRA E
0955 ; 5.2 # DEVANAGARI VOWEL SIGN CANDRA LONG E
0979..097A ; 5.2 # [2] DEVANAGARI LETTER ZHA..DEVANAGARI LETTER HEAVY YA
09FB ; 5.2 # BENGALI GANDA MARK
0FD5..0FD8 ; 5.2 # [4] RIGHT-FACING SVASTI SIGN..LEFT-FACING SVASTI SIGN WITH DOTS
109A..109D ; 5.2 # [4] MYANMAR SIGN KHAMTI TONE-1..MYANMAR VOWEL SIGN AITON AI
115A..115E ; 5.2 # [5] HANGUL CHOSEONG KIYEOK-TIKEUT..HANGUL CHOSEONG TIKEUT-RIEUL
11A3..11A7 ; 5.2 # [5] HANGUL JUNGSEONG A-EU..HANGUL JUNGSEONG O-YAE
11FA..11FF ; 5.2 # [6] HANGUL JONGSEONG KIYEOK-NIEUN..HANGUL JONGSEONG SSANGNIEUN
1400 ; 5.2 # CANADIAN SYLLABICS HYPHEN
1677..167F ; 5.2 # [9] CANADIAN SYLLABICS WOODS-CREE THWEE..CANADIAN SYLLABICS BLACKFOOT W
18B0..18F5 ; 5.2 # [70] CANADIAN SYLLABICS OY..CANADIAN SYLLABICS CARRIER DENTAL S
19AA..19AB ; 5.2 # [2] NEW TAI LUE LETTER HIGH SUA..NEW TAI LUE LETTER LOW SUA
19DA ; 5.2 # NEW TAI LUE THAM DIGIT ONE
1A20..1A5E ; 5.2 # [63] TAI THAM LETTER HIGH KA..TAI THAM CONSONANT SIGN SA
1A60..1A7C ; 5.2 # [29] TAI THAM SIGN SAKOT..TAI THAM SIGN KHUEN-LUE KARAN
1A7F..1A89 ; 5.2 # [11] TAI THAM COMBINING CRYPTOGRAMMIC DOT..TAI THAM HORA DIGIT NINE
1A90..1A99 ; 5.2 # [10] TAI THAM THAM DIGIT ZERO..TAI THAM THAM DIGIT NINE
1AA0..1AAD ; 5.2 # [14] TAI THAM SIGN WIANG..TAI THAM SIGN CAANG
1CD0..1CF2 ; 5.2 # [35] VEDIC TONE KARSHANA..VEDIC SIGN ARDHAVISARGA
1DFD ; 5.2 # COMBINING ALMOST EQUAL TO BELOW
20B6..20B8 ; 5.2 # [3] LIVRE TOURNOIS SIGN..TENGE SIGN
2150..2152 ; 5.2 # [3] VULGAR FRACTION ONE SEVENTH..VULGAR FRACTION ONE TENTH
2189 ; 5.2 # VULGAR FRACTION ZERO THIRDS
23E8 ; 5.2 # DECIMAL EXPONENT SYMBOL
269E..269F ; 5.2 # [2] THREE LINES CONVERGING RIGHT..THREE LINES CONVERGING LEFT
26BD..26BF ; 5.2 # [3] SOCCER BALL..SQUARED KEY
26C4..26CD ; 5.2 # [10] SNOWMAN WITHOUT SNOW..DISABLED CAR
26CF..26E1 ; 5.2 # [19] PICK..RESTRICTED LEFT ENTRY-2
26E3 ; 5.2 # HEAVY CIRCLE WITH STROKE AND TWO DOTS ABOVE
26E8..26FF ; 5.2 # [24] BLACK CROSS ON SHIELD..WHITE FLAG WITH HORIZONTAL MIDDLE BLACK STRIPE
2757 ; 5.2 # HEAVY EXCLAMATION MARK SYMBOL
2B55..2B59 ; 5.2 # [5] HEAVY LARGE CIRCLE..HEAVY CIRCLED SALTIRE
2C70 ; 5.2 # LATIN CAPITAL LETTER TURNED ALPHA
2C7E..2C7F ; 5.2 # [2] LATIN CAPITAL LETTER S WITH SWASH TAIL..LATIN CAPITAL LETTER Z WITH SWASH TAIL
2CEB..2CF1 ; 5.2 # [7] COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI..COPTIC COMBINING SPIRITUS LENIS
2E31 ; 5.2 # WORD SEPARATOR MIDDLE DOT
3244..324F ; 5.2 # [12] CIRCLED IDEOGRAPH QUESTION..CIRCLED NUMBER EIGHTY ON BLACK SQUARE
9FC4..9FCB ; 5.2 # [8] CJK UNIFIED IDEOGRAPH-9FC4..CJK UNIFIED IDEOGRAPH-9FCB
A4D0..A4FF ; 5.2 # [48] LISU LETTER BA..LISU PUNCTUATION FULL STOP
A6A0..A6F7 ; 5.2 # [88] BAMUM LETTER A..BAMUM QUESTION MARK
A830..A839 ; 5.2 # [10] NORTH INDIC FRACTION ONE QUARTER..NORTH INDIC QUANTITY MARK
A8E0..A8FB ; 5.2 # [28] COMBINING DEVANAGARI DIGIT ZERO..DEVANAGARI HEADSTROKE
A960..A97C ; 5.2 # [29] HANGUL CHOSEONG TIKEUT-MIEUM..HANGUL CHOSEONG SSANGYEORINHIEUH
A980..A9CD ; 5.2 # [78] JAVANESE SIGN PANYANGGA..JAVANESE TURNED PADA PISELEH
A9CF..A9D9 ; 5.2 # [11] JAVANESE PANGRANGKEP..JAVANESE DIGIT NINE
A9DE..A9DF ; 5.2 # [2] JAVANESE PADA TIRTA TUMETES..JAVANESE PADA ISEN-ISEN
AA60..AA7B ; 5.2 # [28] MYANMAR LETTER KHAMTI GA..MYANMAR SIGN PAO KAREN TONE
AA80..AAC2 ; 5.2 # [67] TAI VIET LETTER LOW KO..TAI VIET TONE MAI SONG
AADB..AADF ; 5.2 # [5] TAI VIET SYMBOL KON..TAI VIET SYMBOL KOI KOI
ABC0..ABED ; 5.2 # [46] MEETEI MAYEK LETTER KOK..MEETEI MAYEK APUN IYEK
ABF0..ABF9 ; 5.2 # [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT NINE
D7B0..D7C6 ; 5.2 # [23] HANGUL JUNGSEONG O-YEO..HANGUL JUNGSEONG ARAEA-E
D7CB..D7FB ; 5.2 # [49] HANGUL JONGSEONG NIEUN-RIEUL..HANGUL JONGSEONG PHIEUPH-THIEUTH
FA6B..FA6D ; 5.2 # [3] CJK COMPATIBILITY IDEOGRAPH-FA6B..CJK COMPATIBILITY IDEOGRAPH-FA6D
10840..10855 ; 5.2 # [22] IMPERIAL ARAMAIC LETTER ALEPH..IMPERIAL ARAMAIC LETTER TAW
10857..1085F ; 5.2 # [9] IMPERIAL ARAMAIC SECTION SIGN..IMPERIAL ARAMAIC NUMBER TEN THOUSAND
1091A..1091B ; 5.2 # [2] PHOENICIAN NUMBER TWO..PHOENICIAN NUMBER THREE
10A60..10A7F ; 5.2 # [32] OLD SOUTH ARABIAN LETTER HE..OLD SOUTH ARABIAN NUMERIC INDICATOR
10B00..10B35 ; 5.2 # [54] AVESTAN LETTER A..AVESTAN LETTER HE
10B39..10B55 ; 5.2 # [29] AVESTAN ABBREVIATION MARK..INSCRIPTIONAL PARTHIAN LETTER TAW
10B58..10B72 ; 5.2 # [27] INSCRIPTIONAL PARTHIAN NUMBER ONE..INSCRIPTIONAL PAHLAVI LETTER TAW
10B78..10B7F ; 5.2 # [8] INSCRIPTIONAL PAHLAVI NUMBER ONE..INSCRIPTIONAL PAHLAVI NUMBER ONE THOUSAND
10C00..10C48 ; 5.2 # [73] OLD TURKIC LETTER ORKHON A..OLD TURKIC LETTER ORKHON BASH
10E60..10E7E ; 5.2 # [31] RUMI DIGIT ONE..RUMI FRACTION TWO THIRDS
11080..110BC ; 5.2 # [61] KAITHI SIGN CANDRABINDU..KAITHI ENUMERATION SIGN
110BD ; 5.2 # KAITHI NUMBER SIGN
110BE..110C1 ; 5.2 # [4] KAITHI SECTION MARK..KAITHI DOUBLE DANDA
13000..1342E ; 5.2 # [1071] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH AA032
1F100..1F10A ; 5.2 # [11] DIGIT ZERO FULL STOP..DIGIT NINE COMMA
1F110..1F12E ; 5.2 # [31] PARENTHESIZED LATIN CAPITAL LETTER A..CIRCLED WZ
1F131 ; 5.2 # SQUARED LATIN CAPITAL LETTER B
1F13D ; 5.2 # SQUARED LATIN CAPITAL LETTER N
1F13F ; 5.2 # SQUARED LATIN CAPITAL LETTER P
1F142 ; 5.2 # SQUARED LATIN CAPITAL LETTER S
1F146 ; 5.2 # SQUARED LATIN CAPITAL LETTER W
1F14A..1F14E ; 5.2 # [5] SQUARED HV..SQUARED PPV
1F157 ; 5.2 # NEGATIVE CIRCLED LATIN CAPITAL LETTER H
1F15F ; 5.2 # NEGATIVE CIRCLED LATIN CAPITAL LETTER P
1F179 ; 5.2 # NEGATIVE SQUARED LATIN CAPITAL LETTER J
1F17B..1F17C ; 5.2 # [2] NEGATIVE SQUARED LATIN CAPITAL LETTER L..NEGATIVE SQUARED LATIN CAPITAL LETTER M
1F17F ; 5.2 # NEGATIVE SQUARED LATIN CAPITAL LETTER P
1F18A..1F18D ; 5.2 # [4] CROSSED NEGATIVE SQUARED LATIN CAPITAL LETTER P..NEGATIVE SQUARED SA
1F190 ; 5.2 # SQUARE DJ
1F200 ; 5.2 # SQUARE HIRAGANA HOKA
1F210..1F231 ; 5.2 # [34] SQUARED CJK UNIFIED IDEOGRAPH-624B..SQUARED CJK UNIFIED IDEOGRAPH-6253
1F240..1F248 ; 5.2 # [9] TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-672C..TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-6557
2A700..2B734 ; 5.2 # [4149] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B734
# Total code points: 6648
# EOF

View file

@ -1,37 +1,44 @@
# DerivedBidiClass-5.1.0.txt
# Date: 2008-03-20, 17:54:42 GMT [MD]
# DerivedBidiClass-5.2.0.txt
# Date: 2009-08-26, 00:50:45 GMT [MD]
#
# Unicode Character Database
# Copyright (c) 1991-2008 Unicode, Inc.
# Copyright (c) 1991-2009 Unicode, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html
# For documentation, see UCD.html
# For documentation, see http://www.unicode.org/reports/tr44/
# ================================================
# Bidi Class (listing UnicodeData.txt, field 4: see UCD.html)
# Bidi Class (listing UnicodeData.txt, field 4: see UAX #44: http://www.unicode.org/reports/tr44/)
# Unlike other properties, unassigned code points in blocks
# reserved for right-to-left scripts are given either types R or AL.
#
# The unassigned characters that default to AL are:
# Arabic [U+0600 - U+06FF]
# Syriac [U+0700 - U+074F]
# Arabic_Supplement [U+0750 - U+077F]
# Thaana [U+0780 - U+07BF]
# Arabic_Presentation_Forms_A [U+FB50 - U+FDFF]
# Arabic_Presentation_Forms_B [U+FE70 - U+FEFF]
# minus noncharacter code points.
# The unassigned code points that default to AL are in the ranges:
# [\u0600-\u07BF \uFB50-\uFDFF \uFE70-\uFEFF]
#
# The unassigned characters that default to R are:
# Hebrew [U+0590 - U+05FF]
# NKo [U+07C0 - U+07FF]
# Cypriot_Syllabary [U+00010800 - U+0001083F]
# Phoenician [U+00010900 - U+0001091F]
# Lydian [U+00010920 - U+0001093F]
# Kharoshthi [U+00010A00 - U+00010A5F]
# and any otherwise in the ranges:
# U+0800 - U+08FF,
# U+FB1D - U+FB4F,
# U+00010840 - U+00010FFF
# Arabic: U+0600 - U+06FF
# Syriac: U+0700 - U+074F
# Arabic_Supplement: U+0750 - U+077F
# Thaana: U+0780 - U+07BF
# Arabic_Presentation_Forms_A:
# U+FB50 - U+FDFF
# Arabic_Presentation_Forms_B:
# U+FE70 - U+FEFF
# minus noncharacter code points.
#
# The unassigned code points that default to R are in the ranges:
# [\u0590-\u05FF \u07C0-\u08FF \uFB1D-\uFB4F \U00010800-\U00010FFF \U0001E800-\U0001EFFF]
#
# Hebrew: U+0590 - U+05FF
# NKo: U+07C0 - U+07FF
# Cypriot_Syllabary: U+10800 - U+1083F
# Phoenician: U+10900 - U+1091F
# Lydian: U+10920 - U+1093F
# Kharoshthi: U+10A00 - U+10A5F
# and any others in the ranges:
# U+0800 - U+08FF,
# U+FB1D - U+FB4F,
# U+10840 - U+10FFF,
# U+1E800 - U+1EFFF
#
# For all other cases:
@ -74,7 +81,7 @@
03A3..03F5 ; L # L& [83] GREEK CAPITAL LETTER SIGMA..GREEK LUNATE EPSILON SYMBOL
03F7..0481 ; L # L& [139] GREEK CAPITAL LETTER SHO..CYRILLIC SMALL LETTER KOPPA
0482 ; L # So CYRILLIC THOUSANDS SIGN
048A..0523 ; L # L& [154] CYRILLIC CAPITAL LETTER SHORT I WITH TAIL..CYRILLIC SMALL LETTER EN WITH MIDDLE HOOK
048A..0525 ; L # L& [156] CYRILLIC CAPITAL LETTER SHORT I WITH TAIL..CYRILLIC SMALL LETTER PE WITH DESCENDER
0531..0556 ; L # L& [38] ARMENIAN CAPITAL LETTER AYB..ARMENIAN CAPITAL LETTER FEH
0559 ; L # Lm ARMENIAN MODIFIER LETTER LEFT HALF RING
055A..055F ; L # Po [6] ARMENIAN APOSTROPHE..ARMENIAN ABBREVIATION MARK
@ -85,6 +92,7 @@
093D ; L # Lo DEVANAGARI SIGN AVAGRAHA
093E..0940 ; L # Mc [3] DEVANAGARI VOWEL SIGN AA..DEVANAGARI VOWEL SIGN II
0949..094C ; L # Mc [4] DEVANAGARI VOWEL SIGN CANDRA O..DEVANAGARI VOWEL SIGN AU
094E ; L # Mc DEVANAGARI VOWEL SIGN PRISHTHAMATRA E
0950 ; L # Lo DEVANAGARI OM
0958..0961 ; L # Lo [10] DEVANAGARI LETTER QA..DEVANAGARI LETTER VOCALIC LL
0964..0965 ; L # Po [2] DEVANAGARI DANDA..DEVANAGARI DOUBLE DANDA
@ -92,7 +100,7 @@
0970 ; L # Po DEVANAGARI ABBREVIATION SIGN
0971 ; L # Lm DEVANAGARI SIGN HIGH SPACING DOT
0972 ; L # Lo DEVANAGARI LETTER CANDRA A
097B..097F ; L # Lo [5] DEVANAGARI LETTER GGA..DEVANAGARI LETTER BBA
0979..097F ; L # Lo [7] DEVANAGARI LETTER ZHA..DEVANAGARI LETTER BBA
0982..0983 ; L # Mc [2] BENGALI SIGN ANUSVARA..BENGALI SIGN VISARGA
0985..098C ; L # Lo [8] BENGALI LETTER A..BENGALI LETTER VOCALIC L
098F..0990 ; L # Lo [2] BENGALI LETTER E..BENGALI LETTER AI
@ -274,6 +282,7 @@
0FC7..0FCC ; L # So [6] TIBETAN SYMBOL RDO RJE RGYA GRAM..TIBETAN SYMBOL NOR BU BZHI -KHYIL
0FCE..0FCF ; L # So [2] TIBETAN SIGN RDEL NAG RDEL DKAR..TIBETAN SIGN RDEL NAG GSUM
0FD0..0FD4 ; L # Po [5] TIBETAN MARK BSKA- SHOG GI MGO RGYAN..TIBETAN MARK CLOSING BRDA RNYING YIG MGO SGAB MA
0FD5..0FD8 ; L # So [4] RIGHT-FACING SVASTI SIGN..LEFT-FACING SVASTI SIGN WITH DOTS
1000..102A ; L # Lo [43] MYANMAR LETTER KA..MYANMAR LETTER AU
102B..102C ; L # Mc [2] MYANMAR VOWEL SIGN TALL AA..MYANMAR VOWEL SIGN AA
1031 ; L # Mc MYANMAR VOWEL SIGN E
@ -296,15 +305,13 @@
108E ; L # Lo MYANMAR LETTER RUMAI PALAUNG FA
108F ; L # Mc MYANMAR SIGN RUMAI PALAUNG TONE-5
1090..1099 ; L # Nd [10] MYANMAR SHAN DIGIT ZERO..MYANMAR SHAN DIGIT NINE
109A..109C ; L # Mc [3] MYANMAR SIGN KHAMTI TONE-1..MYANMAR VOWEL SIGN AITON A
109E..109F ; L # So [2] MYANMAR SYMBOL SHAN ONE..MYANMAR SYMBOL SHAN EXCLAMATION
10A0..10C5 ; L # L& [38] GEORGIAN CAPITAL LETTER AN..GEORGIAN CAPITAL LETTER HOE
10D0..10FA ; L # Lo [43] GEORGIAN LETTER AN..GEORGIAN LETTER AIN
10FB ; L # Po GEORGIAN PARAGRAPH SEPARATOR
10FC ; L # Lm MODIFIER LETTER GEORGIAN NAR
1100..1159 ; L # Lo [90] HANGUL CHOSEONG KIYEOK..HANGUL CHOSEONG YEORINHIEUH
115F..11A2 ; L # Lo [68] HANGUL CHOSEONG FILLER..HANGUL JUNGSEONG SSANGARAEA
11A8..11F9 ; L # Lo [82] HANGUL JONGSEONG KIYEOK..HANGUL JONGSEONG YEORINHIEUH
1200..1248 ; L # Lo [73] ETHIOPIC SYLLABLE HA..ETHIOPIC SYLLABLE QWA
1100..1248 ; L # Lo [329] HANGUL CHOSEONG KIYEOK..ETHIOPIC SYLLABLE QWA
124A..124D ; L # Lo [4] ETHIOPIC SYLLABLE QWI..ETHIOPIC SYLLABLE QWE
1250..1256 ; L # Lo [7] ETHIOPIC SYLLABLE QHA..ETHIOPIC SYLLABLE QHO
1258 ; L # Lo ETHIOPIC SYLLABLE QHWA
@ -327,7 +334,7 @@
13A0..13F4 ; L # Lo [85] CHEROKEE LETTER A..CHEROKEE LETTER YV
1401..166C ; L # Lo [620] CANADIAN SYLLABICS E..CANADIAN SYLLABICS CARRIER TTSA
166D..166E ; L # Po [2] CANADIAN SYLLABICS CHI SIGN..CANADIAN SYLLABICS FULL STOP
166F..1676 ; L # Lo [8] CANADIAN SYLLABICS QAI..CANADIAN SYLLABICS NNGAA
166F..167F ; L # Lo [17] CANADIAN SYLLABICS QAI..CANADIAN SYLLABICS BLACKFOOT W
1681..169A ; L # Lo [26] OGHAM LETTER BEITH..OGHAM LETTER PEITH
16A0..16EA ; L # Lo [75] RUNIC LETTER FEHU FEOH FE F..RUNIC LETTER X
16EB..16ED ; L # Po [3] RUNIC SINGLE PUNCTUATION..RUNIC CROSS PUNCTUATION
@ -355,6 +362,7 @@
1844..1877 ; L # Lo [52] MONGOLIAN LETTER TODO E..MONGOLIAN LETTER MANCHU ZHA
1880..18A8 ; L # Lo [41] MONGOLIAN LETTER ALI GALI ANUSVARA ONE..MONGOLIAN LETTER MANCHU ALI GALI BHA
18AA ; L # Lo MONGOLIAN LETTER MANCHU ALI GALI LHA
18B0..18F5 ; L # Lo [70] CANADIAN SYLLABICS OY..CANADIAN SYLLABICS CARRIER DENTAL S
1900..191C ; L # Lo [29] LIMBU VOWEL-CARRIER LETTER..LIMBU LETTER HA
1923..1926 ; L # Mc [4] LIMBU VOWEL SIGN EE..LIMBU VOWEL SIGN AU
1929..192B ; L # Mc [3] LIMBU SUBJOINED LETTER YA..LIMBU SUBJOINED LETTER WA
@ -363,14 +371,25 @@
1946..194F ; L # Nd [10] LIMBU DIGIT ZERO..LIMBU DIGIT NINE
1950..196D ; L # Lo [30] TAI LE LETTER KA..TAI LE LETTER AI
1970..1974 ; L # Lo [5] TAI LE LETTER TONE-2..TAI LE LETTER TONE-6
1980..19A9 ; L # Lo [42] NEW TAI LUE LETTER HIGH QA..NEW TAI LUE LETTER LOW XVA
1980..19AB ; L # Lo [44] NEW TAI LUE LETTER HIGH QA..NEW TAI LUE LETTER LOW SUA
19B0..19C0 ; L # Mc [17] NEW TAI LUE VOWEL SIGN VOWEL SHORTENER..NEW TAI LUE VOWEL SIGN IY
19C1..19C7 ; L # Lo [7] NEW TAI LUE LETTER FINAL V..NEW TAI LUE LETTER FINAL B
19C8..19C9 ; L # Mc [2] NEW TAI LUE TONE MARK-1..NEW TAI LUE TONE MARK-2
19D0..19D9 ; L # Nd [10] NEW TAI LUE DIGIT ZERO..NEW TAI LUE DIGIT NINE
19D0..19DA ; L # Nd [11] NEW TAI LUE DIGIT ZERO..NEW TAI LUE THAM DIGIT ONE
1A00..1A16 ; L # Lo [23] BUGINESE LETTER KA..BUGINESE LETTER HA
1A19..1A1B ; L # Mc [3] BUGINESE VOWEL SIGN E..BUGINESE VOWEL SIGN AE
1A1E..1A1F ; L # Po [2] BUGINESE PALLAWA..BUGINESE END OF SECTION
1A20..1A54 ; L # Lo [53] TAI THAM LETTER HIGH KA..TAI THAM LETTER GREAT SA
1A55 ; L # Mc TAI THAM CONSONANT SIGN MEDIAL RA
1A57 ; L # Mc TAI THAM CONSONANT SIGN LA TANG LAI
1A61 ; L # Mc TAI THAM VOWEL SIGN A
1A63..1A64 ; L # Mc [2] TAI THAM VOWEL SIGN AA..TAI THAM VOWEL SIGN TALL AA
1A6D..1A72 ; L # Mc [6] TAI THAM VOWEL SIGN OY..TAI THAM VOWEL SIGN THAM AI
1A80..1A89 ; L # Nd [10] TAI THAM HORA DIGIT ZERO..TAI THAM HORA DIGIT NINE
1A90..1A99 ; L # Nd [10] TAI THAM THAM DIGIT ZERO..TAI THAM THAM DIGIT NINE
1AA0..1AA6 ; L # Po [7] TAI THAM SIGN WIANG..TAI THAM SIGN REVERSED ROTATED RANA
1AA7 ; L # Lm TAI THAM SIGN MAI YAMOK
1AA8..1AAD ; L # Po [6] TAI THAM SIGN KAAN..TAI THAM SIGN CAANG
1B04 ; L # Mc BALINESE SIGN BISAH
1B05..1B33 ; L # Lo [47] BALINESE LETTER AKARA..BALINESE LETTER HA
1B35 ; L # Mc BALINESE VOWEL SIGN TEDUNG
@ -399,6 +418,11 @@
1C5A..1C77 ; L # Lo [30] OL CHIKI LETTER LA..OL CHIKI LETTER OH
1C78..1C7D ; L # Lm [6] OL CHIKI MU TTUDDAG..OL CHIKI AHAD
1C7E..1C7F ; L # Po [2] OL CHIKI PUNCTUATION MUCAAD..OL CHIKI PUNCTUATION DOUBLE MUCAAD
1CD3 ; L # Po VEDIC SIGN NIHSHVASA
1CE1 ; L # Mc VEDIC TONE ATHARVAVEDIC INDEPENDENT SVARITA
1CE9..1CEC ; L # Lo [4] VEDIC SIGN ANUSVARA ANTARGOMUKHA..VEDIC SIGN ANUSVARA VAMAGOMUKHA WITH TAIL
1CEE..1CF1 ; L # Lo [4] VEDIC SIGN HEXIFORM LONG ANUSVARA..VEDIC SIGN ANUSVARA UBHAYATO MUKHA
1CF2 ; L # Mc VEDIC SIGN ARDHAVISARGA
1D00..1D2B ; L # L& [44] LATIN LETTER SMALL CAPITAL A..CYRILLIC LETTER SMALL CAPITAL EL
1D2C..1D61 ; L # Lm [54] MODIFIER LETTER CAPITAL A..MODIFIER LETTER SMALL CHI
1D62..1D77 ; L # L& [22] LATIN SUBSCRIPT SMALL LETTER I..LATIN SMALL LETTER TURNED G
@ -425,8 +449,8 @@
1FF2..1FF4 ; L # L& [3] GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI..GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI
1FF6..1FFC ; L # L& [7] GREEK SMALL LETTER OMEGA WITH PERISPOMENI..GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI
200E ; L # Cf LEFT-TO-RIGHT MARK
2071 ; L # L& SUPERSCRIPT LATIN SMALL LETTER I
207F ; L # L& SUPERSCRIPT LATIN SMALL LETTER N
2071 ; L # Lm SUPERSCRIPT LATIN SMALL LETTER I
207F ; L # Lm SUPERSCRIPT LATIN SMALL LETTER N
2090..2094 ; L # Lm [5] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER SCHWA
2102 ; L # L& DOUBLE-STRUCK CAPITAL C
2107 ; L # L& EULER CONSTANT
@ -454,10 +478,10 @@
2800..28FF ; L # So [256] BRAILLE PATTERN BLANK..BRAILLE PATTERN DOTS-12345678
2C00..2C2E ; L # L& [47] GLAGOLITIC CAPITAL LETTER AZU..GLAGOLITIC CAPITAL LETTER LATINATE MYSLITE
2C30..2C5E ; L # L& [47] GLAGOLITIC SMALL LETTER AZU..GLAGOLITIC SMALL LETTER LATINATE MYSLITE
2C60..2C6F ; L # L& [16] LATIN CAPITAL LETTER L WITH DOUBLE BAR..LATIN CAPITAL LETTER TURNED A
2C71..2C7C ; L # L& [12] LATIN SMALL LETTER V WITH RIGHT HOOK..LATIN SUBSCRIPT SMALL LETTER J
2C60..2C7C ; L # L& [29] LATIN CAPITAL LETTER L WITH DOUBLE BAR..LATIN SUBSCRIPT SMALL LETTER J
2C7D ; L # Lm MODIFIER LETTER CAPITAL V
2C80..2CE4 ; L # L& [101] COPTIC CAPITAL LETTER ALFA..COPTIC SYMBOL KAI
2C7E..2CE4 ; L # L& [103] LATIN CAPITAL LETTER S WITH SWASH TAIL..COPTIC SYMBOL KAI
2CEB..2CEE ; L # L& [4] COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI..COPTIC SMALL LETTER CRYPTOGRAMMIC GANGIA
2D00..2D25 ; L # L& [38] GEORGIAN SMALL LETTER AN..GEORGIAN SMALL LETTER HOE
2D30..2D65 ; L # Lo [54] TIFINAGH LETTER YA..TIFINAGH LETTER YAZZ
2D6F ; L # Lm TIFINAGH MODIFIER LETTER LABIALIZATION MARK
@ -493,7 +517,7 @@
31F0..31FF ; L # Lo [16] KATAKANA LETTER SMALL KU..KATAKANA LETTER SMALL RO
3200..321C ; L # So [29] PARENTHESIZED HANGUL KIYEOK..PARENTHESIZED HANGUL CIEUC U
3220..3229 ; L # No [10] PARENTHESIZED IDEOGRAPH ONE..PARENTHESIZED IDEOGRAPH TEN
322A..3243 ; L # So [26] PARENTHESIZED IDEOGRAPH MOON..PARENTHESIZED IDEOGRAPH REACH
322A..324F ; L # So [38] PARENTHESIZED IDEOGRAPH MOON..CIRCLED NUMBER EIGHTY ON BLACK SQUARE
3260..327B ; L # So [28] CIRCLED HANGUL KIYEOK..CIRCLED HANGUL HIEUH A
327F ; L # So KOREAN STANDARD SYMBOL
3280..3289 ; L # No [10] CIRCLED IDEOGRAPH ONE..CIRCLED IDEOGRAPH TEN
@ -504,10 +528,13 @@
337B..33DD ; L # So [99] SQUARE ERA NAME HEISEI..SQUARE WB
33E0..33FE ; L # So [31] IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY ONE..IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY THIRTY-ONE
3400..4DB5 ; L # Lo [6582] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DB5
4E00..9FC3 ; L # Lo [20932] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FC3
4E00..9FCB ; L # Lo [20940] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FCB
A000..A014 ; L # Lo [21] YI SYLLABLE IT..YI SYLLABLE E
A015 ; L # Lm YI SYLLABLE WU
A016..A48C ; L # Lo [1143] YI SYLLABLE BIT..YI SYLLABLE YYR
A4D0..A4F7 ; L # Lo [40] LISU LETTER BA..LISU LETTER OE
A4F8..A4FD ; L # Lm [6] LISU LETTER TONE MYA TI..LISU LETTER TONE MYA JEU
A4FE..A4FF ; L # Po [2] LISU PUNCTUATION COMMA..LISU PUNCTUATION FULL STOP
A500..A60B ; L # Lo [268] VAI SYLLABLE EE..VAI SYLLABLE NG
A60C ; L # Lm VAI SYLLABLE LENGTHENER
A610..A61F ; L # Lo [16] VAI SYLLABLE NDOLE FA..VAI SYMBOL JONG
@ -517,6 +544,9 @@ A640..A65F ; L # L& [32] CYRILLIC CAPITAL LETTER ZEMLYA..CYRILLIC SMALL LETT
A662..A66D ; L # L& [12] CYRILLIC CAPITAL LETTER SOFT DE..CYRILLIC SMALL LETTER DOUBLE MONOCULAR O
A66E ; L # Lo CYRILLIC LETTER MULTIOCULAR O
A680..A697 ; L # L& [24] CYRILLIC CAPITAL LETTER DWE..CYRILLIC SMALL LETTER SHWE
A6A0..A6E5 ; L # Lo [70] BAMUM LETTER A..BAMUM LETTER KI
A6E6..A6EF ; L # Nl [10] BAMUM LETTER MO..BAMUM LETTER KOGHOM
A6F2..A6F7 ; L # Po [6] BAMUM NJAEMLI..BAMUM QUESTION MARK
A722..A76F ; L # L& [78] LATIN CAPITAL LETTER EGYPTOLOGICAL ALEF..LATIN SMALL LETTER CON
A770 ; L # Lm MODIFIER LETTER US
A771..A787 ; L # L& [23] LATIN SMALL LETTER DUM..LATIN SMALL LETTER INSULAR T
@ -528,18 +558,33 @@ A807..A80A ; L # Lo [4] SYLOTI NAGRI LETTER KO..SYLOTI NAGRI LETTER GHO
A80C..A822 ; L # Lo [23] SYLOTI NAGRI LETTER CO..SYLOTI NAGRI LETTER HO
A823..A824 ; L # Mc [2] SYLOTI NAGRI VOWEL SIGN A..SYLOTI NAGRI VOWEL SIGN I
A827 ; L # Mc SYLOTI NAGRI VOWEL SIGN OO
A830..A835 ; L # No [6] NORTH INDIC FRACTION ONE QUARTER..NORTH INDIC FRACTION THREE SIXTEENTHS
A836..A837 ; L # So [2] NORTH INDIC QUARTER MARK..NORTH INDIC PLACEHOLDER MARK
A840..A873 ; L # Lo [52] PHAGS-PA LETTER KA..PHAGS-PA LETTER CANDRABINDU
A880..A881 ; L # Mc [2] SAURASHTRA SIGN ANUSVARA..SAURASHTRA SIGN VISARGA
A882..A8B3 ; L # Lo [50] SAURASHTRA LETTER A..SAURASHTRA LETTER LLA
A8B4..A8C3 ; L # Mc [16] SAURASHTRA CONSONANT SIGN HAARU..SAURASHTRA VOWEL SIGN AU
A8CE..A8CF ; L # Po [2] SAURASHTRA DANDA..SAURASHTRA DOUBLE DANDA
A8D0..A8D9 ; L # Nd [10] SAURASHTRA DIGIT ZERO..SAURASHTRA DIGIT NINE
A8F2..A8F7 ; L # Lo [6] DEVANAGARI SIGN SPACING CANDRABINDU..DEVANAGARI SIGN CANDRABINDU AVAGRAHA
A8F8..A8FA ; L # Po [3] DEVANAGARI SIGN PUSHPIKA..DEVANAGARI CARET
A8FB ; L # Lo DEVANAGARI HEADSTROKE
A900..A909 ; L # Nd [10] KAYAH LI DIGIT ZERO..KAYAH LI DIGIT NINE
A90A..A925 ; L # Lo [28] KAYAH LI LETTER KA..KAYAH LI LETTER OO
A92E..A92F ; L # Po [2] KAYAH LI SIGN CWI..KAYAH LI SIGN SHYA
A930..A946 ; L # Lo [23] REJANG LETTER KA..REJANG LETTER A
A952..A953 ; L # Mc [2] REJANG CONSONANT SIGN H..REJANG VIRAMA
A95F ; L # Po REJANG SECTION MARK
A960..A97C ; L # Lo [29] HANGUL CHOSEONG TIKEUT-MIEUM..HANGUL CHOSEONG SSANGYEORINHIEUH
A983 ; L # Mc JAVANESE SIGN WIGNYAN
A984..A9B2 ; L # Lo [47] JAVANESE LETTER A..JAVANESE LETTER HA
A9B4..A9B5 ; L # Mc [2] JAVANESE VOWEL SIGN TARUNG..JAVANESE VOWEL SIGN TOLONG
A9BA..A9BB ; L # Mc [2] JAVANESE VOWEL SIGN TALING..JAVANESE VOWEL SIGN DIRGA MURE
A9BD..A9C0 ; L # Mc [4] JAVANESE CONSONANT SIGN KERET..JAVANESE PANGKON
A9C1..A9CD ; L # Po [13] JAVANESE LEFT RERENGGAN..JAVANESE TURNED PADA PISELEH
A9CF ; L # Lm JAVANESE PANGRANGKEP
A9D0..A9D9 ; L # Nd [10] JAVANESE DIGIT ZERO..JAVANESE DIGIT NINE
A9DE..A9DF ; L # Po [2] JAVANESE PADA TIRTA TUMETES..JAVANESE PADA ISEN-ISEN
AA00..AA28 ; L # Lo [41] CHAM LETTER A..CHAM LETTER HA
AA2F..AA30 ; L # Mc [2] CHAM VOWEL SIGN O..CHAM VOWEL SIGN AI
AA33..AA34 ; L # Mc [2] CHAM CONSONANT SIGN YA..CHAM CONSONANT SIGN RA
@ -548,10 +593,34 @@ AA44..AA4B ; L # Lo [8] CHAM LETTER FINAL CH..CHAM LETTER FINAL SS
AA4D ; L # Mc CHAM CONSONANT SIGN FINAL H
AA50..AA59 ; L # Nd [10] CHAM DIGIT ZERO..CHAM DIGIT NINE
AA5C..AA5F ; L # Po [4] CHAM PUNCTUATION SPIRAL..CHAM PUNCTUATION TRIPLE DANDA
AA60..AA6F ; L # Lo [16] MYANMAR LETTER KHAMTI GA..MYANMAR LETTER KHAMTI FA
AA70 ; L # Lm MYANMAR MODIFIER LETTER KHAMTI REDUPLICATION
AA71..AA76 ; L # Lo [6] MYANMAR LETTER KHAMTI XA..MYANMAR LOGOGRAM KHAMTI HM
AA77..AA79 ; L # So [3] MYANMAR SYMBOL AITON EXCLAMATION..MYANMAR SYMBOL AITON TWO
AA7A ; L # Lo MYANMAR LETTER AITON RA
AA7B ; L # Mc MYANMAR SIGN PAO KAREN TONE
AA80..AAAF ; L # Lo [48] TAI VIET LETTER LOW KO..TAI VIET LETTER HIGH O
AAB1 ; L # Lo TAI VIET VOWEL AA
AAB5..AAB6 ; L # Lo [2] TAI VIET VOWEL E..TAI VIET VOWEL O
AAB9..AABD ; L # Lo [5] TAI VIET VOWEL UEA..TAI VIET VOWEL AN
AAC0 ; L # Lo TAI VIET TONE MAI NUENG
AAC2 ; L # Lo TAI VIET TONE MAI SONG
AADB..AADC ; L # Lo [2] TAI VIET SYMBOL KON..TAI VIET SYMBOL NUENG
AADD ; L # Lm TAI VIET SYMBOL SAM
AADE..AADF ; L # Po [2] TAI VIET SYMBOL HO HOI..TAI VIET SYMBOL KOI KOI
ABC0..ABE2 ; L # Lo [35] MEETEI MAYEK LETTER KOK..MEETEI MAYEK LETTER I LONSUM
ABE3..ABE4 ; L # Mc [2] MEETEI MAYEK VOWEL SIGN ONAP..MEETEI MAYEK VOWEL SIGN INAP
ABE6..ABE7 ; L # Mc [2] MEETEI MAYEK VOWEL SIGN YENAP..MEETEI MAYEK VOWEL SIGN SOUNAP
ABE9..ABEA ; L # Mc [2] MEETEI MAYEK VOWEL SIGN CHEINAP..MEETEI MAYEK VOWEL SIGN NUNG
ABEB ; L # Po MEETEI MAYEK CHEIKHEI
ABEC ; L # Mc MEETEI MAYEK LUM IYEK
ABF0..ABF9 ; L # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT NINE
AC00..D7A3 ; L # Lo [11172] HANGUL SYLLABLE GA..HANGUL SYLLABLE HIH
D7B0..D7C6 ; L # Lo [23] HANGUL JUNGSEONG O-YEO..HANGUL JUNGSEONG ARAEA-E
D7CB..D7FB ; L # Lo [49] HANGUL JONGSEONG NIEUN-RIEUL..HANGUL JONGSEONG PHIEUPH-THIEUTH
E000..F8FF ; L # Co [6400] <private-use-E000>..<private-use-F8FF>
F900..FA2D ; L # Lo [302] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA2D
FA30..FA6A ; L # Lo [59] CJK COMPATIBILITY IDEOGRAPH-FA30..CJK COMPATIBILITY IDEOGRAPH-FA6A
FA30..FA6D ; L # Lo [62] CJK COMPATIBILITY IDEOGRAPH-FA30..CJK COMPATIBILITY IDEOGRAPH-FA6D
FA70..FAD9 ; L # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9
FB00..FB06 ; L # L& [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST
FB13..FB17 ; L # L& [5] ARMENIAN SMALL LIGATURE MEN NOW..ARMENIAN SMALL LIGATURE MEN XEH
@ -595,9 +664,17 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER
10400..1044F ; L # L& [80] DESERET CAPITAL LETTER LONG I..DESERET SMALL LETTER EW
10450..1049D ; L # Lo [78] SHAVIAN LETTER PEEP..OSMANYA LETTER OO
104A0..104A9 ; L # Nd [10] OSMANYA DIGIT ZERO..OSMANYA DIGIT NINE
11082 ; L # Mc KAITHI SIGN VISARGA
11083..110AF ; L # Lo [45] KAITHI LETTER A..KAITHI LETTER HA
110B0..110B2 ; L # Mc [3] KAITHI VOWEL SIGN AA..KAITHI VOWEL SIGN II
110B7..110B8 ; L # Mc [2] KAITHI VOWEL SIGN O..KAITHI VOWEL SIGN AU
110BB..110BC ; L # Po [2] KAITHI ABBREVIATION SIGN..KAITHI ENUMERATION SIGN
110BD ; L # Cf KAITHI NUMBER SIGN
110BE..110C1 ; L # Po [4] KAITHI SECTION MARK..KAITHI DOUBLE DANDA
12000..1236E ; L # Lo [879] CUNEIFORM SIGN A..CUNEIFORM SIGN ZUM
12400..12462 ; L # Nl [99] CUNEIFORM NUMERIC SIGN TWO ASH..CUNEIFORM NUMERIC SIGN OLD ASSYRIAN ONE QUARTER
12470..12473 ; L # Po [4] CUNEIFORM PUNCTUATION SIGN OLD ASSYRIAN WORD DIVIDER..CUNEIFORM PUNCTUATION SIGN DIAGONAL TRICOLON
13000..1342E ; L # Lo [1071] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH AA032
1D000..1D0F5 ; L # So [246] BYZANTINE MUSICAL SYMBOL PSILI..BYZANTINE MUSICAL SYMBOL GORGON NEO KATO
1D100..1D126 ; L # So [39] MUSICAL SYMBOL SINGLE BARLINE..MUSICAL SYMBOL DRUM CLEF-2
1D129..1D164 ; L # So [60] MUSICAL SYMBOL MULTIPLE MEASURE REST..MUSICAL SYMBOL ONE HUNDRED TWENTY-EIGHTH NOTE
@ -630,31 +707,44 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER
1D6A8..1D6C0 ; L # L& [25] MATHEMATICAL BOLD CAPITAL ALPHA..MATHEMATICAL BOLD CAPITAL OMEGA
1D6C1 ; L # Sm MATHEMATICAL BOLD NABLA
1D6C2..1D6DA ; L # L& [25] MATHEMATICAL BOLD SMALL ALPHA..MATHEMATICAL BOLD SMALL OMEGA
1D6DB ; L # Sm MATHEMATICAL BOLD PARTIAL DIFFERENTIAL
1D6DC..1D6FA ; L # L& [31] MATHEMATICAL BOLD EPSILON SYMBOL..MATHEMATICAL ITALIC CAPITAL OMEGA
1D6FB ; L # Sm MATHEMATICAL ITALIC NABLA
1D6FC..1D714 ; L # L& [25] MATHEMATICAL ITALIC SMALL ALPHA..MATHEMATICAL ITALIC SMALL OMEGA
1D715 ; L # Sm MATHEMATICAL ITALIC PARTIAL DIFFERENTIAL
1D716..1D734 ; L # L& [31] MATHEMATICAL ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD ITALIC CAPITAL OMEGA
1D735 ; L # Sm MATHEMATICAL BOLD ITALIC NABLA
1D736..1D74E ; L # L& [25] MATHEMATICAL BOLD ITALIC SMALL ALPHA..MATHEMATICAL BOLD ITALIC SMALL OMEGA
1D74F ; L # Sm MATHEMATICAL BOLD ITALIC PARTIAL DIFFERENTIAL
1D750..1D76E ; L # L& [31] MATHEMATICAL BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD CAPITAL OMEGA
1D76F ; L # Sm MATHEMATICAL SANS-SERIF BOLD NABLA
1D770..1D788 ; L # L& [25] MATHEMATICAL SANS-SERIF BOLD SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD SMALL OMEGA
1D789 ; L # Sm MATHEMATICAL SANS-SERIF BOLD PARTIAL DIFFERENTIAL
1D78A..1D7A8 ; L # L& [31] MATHEMATICAL SANS-SERIF BOLD EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL OMEGA
1D7A9 ; L # Sm MATHEMATICAL SANS-SERIF BOLD ITALIC NABLA
1D7AA..1D7C2 ; L # L& [25] MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL OMEGA
1D7C3 ; L # Sm MATHEMATICAL SANS-SERIF BOLD ITALIC PARTIAL DIFFERENTIAL
1D7C4..1D7CB ; L # L& [8] MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD SMALL DIGAMMA
1F110..1F12E ; L # So [31] PARENTHESIZED LATIN CAPITAL LETTER A..CIRCLED WZ
1F131 ; L # So SQUARED LATIN CAPITAL LETTER B
1F13D ; L # So SQUARED LATIN CAPITAL LETTER N
1F13F ; L # So SQUARED LATIN CAPITAL LETTER P
1F142 ; L # So SQUARED LATIN CAPITAL LETTER S
1F146 ; L # So SQUARED LATIN CAPITAL LETTER W
1F14A..1F14E ; L # So [5] SQUARED HV..SQUARED PPV
1F157 ; L # So NEGATIVE CIRCLED LATIN CAPITAL LETTER H
1F15F ; L # So NEGATIVE CIRCLED LATIN CAPITAL LETTER P
1F179 ; L # So NEGATIVE SQUARED LATIN CAPITAL LETTER J
1F17B..1F17C ; L # So [2] NEGATIVE SQUARED LATIN CAPITAL LETTER L..NEGATIVE SQUARED LATIN CAPITAL LETTER M
1F17F ; L # So NEGATIVE SQUARED LATIN CAPITAL LETTER P
1F18A..1F18D ; L # So [4] CROSSED NEGATIVE SQUARED LATIN CAPITAL LETTER P..NEGATIVE SQUARED SA
1F190 ; L # So SQUARE DJ
1F200 ; L # So SQUARE HIRAGANA HOKA
1F210..1F231 ; L # So [34] SQUARED CJK UNIFIED IDEOGRAPH-624B..SQUARED CJK UNIFIED IDEOGRAPH-6253
1F240..1F248 ; L # So [9] TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-672C..TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-6557
20000..2A6D6 ; L # Lo [42711] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6D6
2A700..2B734 ; L # Lo [4149] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B734
2F800..2FA1D ; L # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D
F0000..FFFFD ; L # Co [65534] <private-use-F0000>..<private-use-FFFFD>
100000..10FFFD; L # Co [65534] <private-use-100000>..<private-use-10FFFD>
# The above property value applies to 869840 code points not listed here.
# Total code points: 1101792
# The above property value applies to 861492 code points not listed here.
# Total code points: 1099541
# ================================================
@ -675,7 +765,14 @@ F0000..FFFFD ; L # Co [65534] <private-use-F0000>..<private-use-FFFFD>
07CA..07EA ; R # Lo [33] NKO LETTER A..NKO LETTER JONA RA
07F4..07F5 ; R # Lm [2] NKO HIGH TONE APOSTROPHE..NKO LOW TONE APOSTROPHE
07FA ; R # Lm NKO LAJANYALAN
07FB..08FF ; R # Cn [261] <reserved-07FB>..<reserved-08FF>
07FB..07FF ; R # Cn [5] <reserved-07FB>..<reserved-07FF>
0800..0815 ; R # Lo [22] SAMARITAN LETTER ALAF..SAMARITAN LETTER TAAF
081A ; R # Lm SAMARITAN MODIFIER LETTER EPENTHETIC YUT
0824 ; R # Lm SAMARITAN MODIFIER LETTER SHORT A
0828 ; R # Lm SAMARITAN MODIFIER LETTER I
082E..082F ; R # Cn [2] <reserved-082E>..<reserved-082F>
0830..083E ; R # Po [15] SAMARITAN PUNCTUATION NEQUDAA..SAMARITAN PUNCTUATION ANNAAU
083F..08FF ; R # Cn [193] <reserved-083F>..<reserved-08FF>
200F ; R # Cf RIGHT-TO-LEFT MARK
FB1D ; R # Lo HEBREW LETTER YOD WITH HIRIQ
FB1F..FB28 ; R # Lo [10] HEBREW LIGATURE YIDDISH YOD YOD PATAH..HEBREW LETTER WIDE TAV
@ -700,11 +797,14 @@ FB46..FB4F ; R # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW LIGATURE AL
10839..1083B ; R # Cn [3] <reserved-10839>..<reserved-1083B>
1083C ; R # Lo CYPRIOT SYLLABLE ZA
1083D..1083E ; R # Cn [2] <reserved-1083D>..<reserved-1083E>
1083F ; R # Lo CYPRIOT SYLLABLE ZO
10840..108FF ; R # Cn [192] <reserved-10840>..<reserved-108FF>
1083F..10855 ; R # Lo [23] CYPRIOT SYLLABLE ZO..IMPERIAL ARAMAIC LETTER TAW
10856 ; R # Cn <reserved-10856>
10857 ; R # Po IMPERIAL ARAMAIC SECTION SIGN
10858..1085F ; R # No [8] IMPERIAL ARAMAIC NUMBER ONE..IMPERIAL ARAMAIC NUMBER TEN THOUSAND
10860..108FF ; R # Cn [160] <reserved-10860>..<reserved-108FF>
10900..10915 ; R # Lo [22] PHOENICIAN LETTER ALF..PHOENICIAN LETTER TAU
10916..10919 ; R # No [4] PHOENICIAN NUMBER ONE..PHOENICIAN NUMBER ONE HUNDRED
1091A..1091E ; R # Cn [5] <reserved-1091A>..<reserved-1091E>
10916..1091B ; R # No [6] PHOENICIAN NUMBER ONE..PHOENICIAN NUMBER THREE
1091C..1091E ; R # Cn [3] <reserved-1091C>..<reserved-1091E>
10920..10939 ; R # Lo [26] LYDIAN LETTER A..LYDIAN LETTER C
1093A..1093E ; R # Cn [5] <reserved-1093A>..<reserved-1093E>
1093F ; R # Po LYDIAN TRIANGULAR MARK
@ -722,9 +822,26 @@ FB46..FB4F ; R # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW LIGATURE AL
10A40..10A47 ; R # No [8] KHAROSHTHI DIGIT ONE..KHAROSHTHI NUMBER ONE THOUSAND
10A48..10A4F ; R # Cn [8] <reserved-10A48>..<reserved-10A4F>
10A50..10A58 ; R # Po [9] KHAROSHTHI PUNCTUATION DOT..KHAROSHTHI PUNCTUATION LINES
10A59..10FFF ; R # Cn [1447] <reserved-10A59>..<reserved-10FFF>
10A59..10A5F ; R # Cn [7] <reserved-10A59>..<reserved-10A5F>
10A60..10A7C ; R # Lo [29] OLD SOUTH ARABIAN LETTER HE..OLD SOUTH ARABIAN LETTER THETH
10A7D..10A7E ; R # No [2] OLD SOUTH ARABIAN NUMBER ONE..OLD SOUTH ARABIAN NUMBER FIFTY
10A7F ; R # Po OLD SOUTH ARABIAN NUMERIC INDICATOR
10A80..10AFF ; R # Cn [128] <reserved-10A80>..<reserved-10AFF>
10B00..10B35 ; R # Lo [54] AVESTAN LETTER A..AVESTAN LETTER HE
10B36..10B38 ; R # Cn [3] <reserved-10B36>..<reserved-10B38>
10B40..10B55 ; R # Lo [22] INSCRIPTIONAL PARTHIAN LETTER ALEPH..INSCRIPTIONAL PARTHIAN LETTER TAW
10B56..10B57 ; R # Cn [2] <reserved-10B56>..<reserved-10B57>
10B58..10B5F ; R # No [8] INSCRIPTIONAL PARTHIAN NUMBER ONE..INSCRIPTIONAL PARTHIAN NUMBER ONE THOUSAND
10B60..10B72 ; R # Lo [19] INSCRIPTIONAL PAHLAVI LETTER ALEPH..INSCRIPTIONAL PAHLAVI LETTER TAW
10B73..10B77 ; R # Cn [5] <reserved-10B73>..<reserved-10B77>
10B78..10B7F ; R # No [8] INSCRIPTIONAL PAHLAVI NUMBER ONE..INSCRIPTIONAL PAHLAVI NUMBER ONE THOUSAND
10B80..10BFF ; R # Cn [128] <reserved-10B80>..<reserved-10BFF>
10C00..10C48 ; R # Lo [73] OLD TURKIC LETTER ORKHON A..OLD TURKIC LETTER ORKHON BASH
10C49..10E5F ; R # Cn [535] <reserved-10C49>..<reserved-10E5F>
10E7F..10FFF ; R # Cn [385] <reserved-10E7F>..<reserved-10FFF>
1E800..1EFFF ; R # Cn [2048] <reserved-1E800>..<reserved-1EFFF>
# Total code points: 2452
# Total code points: 4441
# ================================================
@ -740,8 +857,9 @@ FB46..FB4F ; R # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW LIGATURE AL
2488..249B ; EN # No [20] DIGIT ONE FULL STOP..NUMBER TWENTY FULL STOP
FF10..FF19 ; EN # Nd [10] FULLWIDTH DIGIT ZERO..FULLWIDTH DIGIT NINE
1D7CE..1D7FF ; EN # Nd [50] MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL MONOSPACE DIGIT NINE
1F100..1F10A ; EN # No [11] DIGIT ZERO FULL STOP..DIGIT NINE COMMA
# Total code points: 120
# Total code points: 131
# ================================================
@ -773,14 +891,17 @@ FF0D ; ES # Pd FULLWIDTH HYPHEN-MINUS
0609..060A ; ET # Po [2] ARABIC-INDIC PER MILLE SIGN..ARABIC-INDIC PER TEN THOUSAND SIGN
066A ; ET # Po ARABIC PERCENT SIGN
09F2..09F3 ; ET # Sc [2] BENGALI RUPEE MARK..BENGALI RUPEE SIGN
09FB ; ET # Sc BENGALI GANDA MARK
0AF1 ; ET # Sc GUJARATI RUPEE SIGN
0BF9 ; ET # Sc TAMIL RUPEE SIGN
0E3F ; ET # Sc THAI CURRENCY SYMBOL BAHT
17DB ; ET # Sc KHMER CURRENCY SYMBOL RIEL
2030..2034 ; ET # Po [5] PER MILLE SIGN..TRIPLE PRIME
20A0..20B5 ; ET # Sc [22] EURO-CURRENCY SIGN..CEDI SIGN
20A0..20B8 ; ET # Sc [25] EURO-CURRENCY SIGN..TENGE SIGN
212E ; ET # So ESTIMATED SYMBOL
2213 ; ET # Sm MINUS-OR-PLUS SIGN
A838 ; ET # Sc NORTH INDIC RUPEE MARK
A839 ; ET # So NORTH INDIC QUANTITY MARK
FE5F ; ET # Po SMALL NUMBER SIGN
FE69 ; ET # Sc SMALL DOLLAR SIGN
FE6A ; ET # Po SMALL PERCENT SIGN
@ -790,7 +911,7 @@ FF05 ; ET # Po FULLWIDTH PERCENT SIGN
FFE0..FFE1 ; ET # Sc [2] FULLWIDTH CENT SIGN..FULLWIDTH POUND SIGN
FFE5..FFE6 ; ET # Sc [2] FULLWIDTH YEN SIGN..FULLWIDTH WON SIGN
# Total code points: 57
# Total code points: 63
# ================================================
@ -800,8 +921,9 @@ FFE5..FFE6 ; ET # Sc [2] FULLWIDTH YEN SIGN..FULLWIDTH WON SIGN
0660..0669 ; AN # Nd [10] ARABIC-INDIC DIGIT ZERO..ARABIC-INDIC DIGIT NINE
066B..066C ; AN # Po [2] ARABIC DECIMAL SEPARATOR..ARABIC THOUSANDS SEPARATOR
06DD ; AN # Cf ARABIC END OF AYAH
10E60..10E7E ; AN # No [31] RUMI DIGIT ONE..RUMI FRACTION TWO THIRDS
# Total code points: 17
# Total code points: 48
# ================================================
@ -928,6 +1050,7 @@ FF1A ; CS # Po FULLWIDTH COLON
0F3C ; ON # Ps TIBETAN MARK ANG KHANG GYON
0F3D ; ON # Pe TIBETAN MARK ANG KHANG GYAS
1390..1399 ; ON # So [10] ETHIOPIC TONAL MARK YIZET..ETHIOPIC TONAL MARK KURT
1400 ; ON # Pd CANADIAN SYLLABICS HYPHEN
169B ; ON # Ps OGHAM FEATHER MARK
169C ; ON # Pe OGHAM REVERSED FEATHER MARK
17F0..17F9 ; ON # No [10] KHMER SYMBOL LEK ATTAK SON..KHMER SYMBOL LEK ATTAK PRAM-BUON
@ -987,7 +1110,8 @@ FF1A ; CS # Po FULLWIDTH COLON
214A ; ON # So PROPERTY LINE
214B ; ON # Sm TURNED AMPERSAND
214C..214D ; ON # So [2] PER SIGN..AKTIESELSKAB
2153..215F ; ON # No [13] VULGAR FRACTION ONE THIRD..FRACTION NUMERATOR ONE
2150..215F ; ON # No [16] VULGAR FRACTION ONE SEVENTH..FRACTION NUMERATOR ONE
2189 ; ON # No VULGAR FRACTION ZERO THIRDS
2190..2194 ; ON # Sm [5] LEFTWARDS ARROW..LEFT RIGHT ARROW
2195..2199 ; ON # So [5] UP DOWN ARROW..SOUTH WEST ARROW
219A..219B ; ON # Sm [2] LEFTWARDS ARROW WITH STROKE..RIGHTWARDS ARROW WITH STROKE
@ -1023,7 +1147,7 @@ FF1A ; CS # Po FULLWIDTH COLON
239B..23B3 ; ON # Sm [25] LEFT PARENTHESIS UPPER HOOK..SUMMATION BOTTOM
23B4..23DB ; ON # So [40] TOP SQUARE BRACKET..FUSE
23DC..23E1 ; ON # Sm [6] TOP PARENTHESIS..BOTTOM TORTOISE SHELL BRACKET
23E2..23E7 ; ON # So [6] WHITE TRAPEZIUM..ELECTRICAL INTERSECTION
23E2..23E8 ; ON # So [7] WHITE TRAPEZIUM..DECIMAL EXPONENT SYMBOL
2400..2426 ; ON # So [39] SYMBOL FOR NULL..SYMBOL FOR SUBSTITUTE FORM TWO
2440..244A ; ON # So [11] OCR HOOK..OCR DOUBLE BACKSLASH
2460..2487 ; ON # No [40] CIRCLED DIGIT ONE..PARENTHESIZED NUMBER TWENTY
@ -1036,18 +1160,18 @@ FF1A ; CS # Po FULLWIDTH COLON
25F8..25FF ; ON # Sm [8] UPPER LEFT TRIANGLE..LOWER RIGHT TRIANGLE
2600..266E ; ON # So [111] BLACK SUN WITH RAYS..MUSIC NATURAL SIGN
266F ; ON # Sm MUSIC SHARP SIGN
2670..269D ; ON # So [46] WEST SYRIAC CROSS..OUTLINED WHITE STAR
26A0..26AB ; ON # So [12] WARNING SIGN..MEDIUM BLACK CIRCLE
26AD..26BC ; ON # So [16] MARRIAGE SYMBOL..SESQUIQUADRATE
26C0..26C3 ; ON # So [4] WHITE DRAUGHTS MAN..BLACK DRAUGHTS KING
2670..26AB ; ON # So [60] WEST SYRIAC CROSS..MEDIUM BLACK CIRCLE
26AD..26CD ; ON # So [33] MARRIAGE SYMBOL..DISABLED CAR
26CF..26E1 ; ON # So [19] PICK..RESTRICTED LEFT ENTRY-2
26E3 ; ON # So HEAVY CIRCLE WITH STROKE AND TWO DOTS ABOVE
26E8..26FF ; ON # So [24] BLACK CROSS ON SHIELD..WHITE FLAG WITH HORIZONTAL MIDDLE BLACK STRIPE
2701..2704 ; ON # So [4] UPPER BLADE SCISSORS..WHITE SCISSORS
2706..2709 ; ON # So [4] TELEPHONE LOCATION SIGN..ENVELOPE
270C..2727 ; ON # So [28] VICTORY HAND..WHITE FOUR POINTED STAR
2729..274B ; ON # So [35] STRESS OUTLINED WHITE STAR..HEAVY EIGHT TEARDROP-SPOKED PROPELLER ASTERISK
274D ; ON # So SHADOWED WHITE CIRCLE
274F..2752 ; ON # So [4] LOWER RIGHT DROP-SHADOWED WHITE SQUARE..UPPER RIGHT SHADOWED WHITE SQUARE
2756 ; ON # So BLACK DIAMOND MINUS WHITE X
2758..275E ; ON # So [7] LIGHT VERTICAL BAR..HEAVY DOUBLE COMMA QUOTATION MARK ORNAMENT
2756..275E ; ON # So [9] BLACK DIAMOND MINUS WHITE X..HEAVY DOUBLE COMMA QUOTATION MARK ORNAMENT
2761..2767 ; ON # So [7] CURVED STEM PARAGRAPH SIGN ORNAMENT..ROTATED FLORAL HEART BULLET
2768 ; ON # Ps MEDIUM LEFT PARENTHESIS ORNAMENT
2769 ; ON # Pe MEDIUM RIGHT PARENTHESIS ORNAMENT
@ -1120,7 +1244,7 @@ FF1A ; CS # Po FULLWIDTH COLON
2B30..2B44 ; ON # Sm [21] LEFT ARROW WITH SMALL CIRCLE..RIGHTWARDS ARROW THROUGH SUPERSET
2B45..2B46 ; ON # So [2] LEFTWARDS QUADRUPLE ARROW..RIGHTWARDS QUADRUPLE ARROW
2B47..2B4C ; ON # Sm [6] REVERSE TILDE OPERATOR ABOVE RIGHTWARDS ARROW..RIGHTWARDS ARROW ABOVE REVERSE TILDE OPERATOR
2B50..2B54 ; ON # So [5] WHITE MEDIUM STAR..WHITE RIGHT-POINTING PENTAGON
2B50..2B59 ; ON # So [10] WHITE MEDIUM STAR..HEAVY CIRCLED SALTIRE
2CE5..2CEA ; ON # So [6] COPTIC SYMBOL MI RO..COPTIC SYMBOL SHIMA SIMA
2CF9..2CFC ; ON # Po [4] COPTIC OLD NUBIAN FULL STOP..COPTIC OLD NUBIAN VERSE DIVIDER
2CFD ; ON # No COPTIC FRACTION ONE HALF
@ -1156,7 +1280,7 @@ FF1A ; CS # Po FULLWIDTH COLON
2E29 ; ON # Pe RIGHT DOUBLE PARENTHESIS
2E2A..2E2E ; ON # Po [5] TWO DOTS OVER ONE DOT PUNCTUATION..REVERSED QUESTION MARK
2E2F ; ON # Lm VERTICAL TILDE
2E30 ; ON # Po RING POINT
2E30..2E31 ; ON # Po [2] RING POINT..WORD SEPARATOR MIDDLE DOT
2E80..2E99 ; ON # So [26] CJK RADICAL REPEAT..CJK RADICAL RAP
2E9B..2EF3 ; ON # So [89] CJK RADICAL CHOKE..CJK RADICAL C-SIMPLIFIED TURTLE
2F00..2FD5 ; ON # So [214] KANGXI RADICAL ONE..KANGXI RADICAL FLUTE
@ -1299,13 +1423,19 @@ FFFC..FFFD ; ON # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTE
1018A ; ON # No GREEK ZERO SIGN
10190..1019B ; ON # So [12] ROMAN SEXTANS SIGN..ROMAN CENTURIAL SIGN
1091F ; ON # Po PHOENICIAN WORD SEPARATOR
10B39..10B3F ; ON # Po [7] AVESTAN ABBREVIATION MARK..LARGE ONE RING OVER TWO RINGS PUNCTUATION
1D200..1D241 ; ON # So [66] GREEK VOCAL NOTATION SYMBOL-1..GREEK INSTRUMENTAL NOTATION SYMBOL-54
1D245 ; ON # So GREEK MUSICAL LEIMMA
1D300..1D356 ; ON # So [87] MONOGRAM FOR EARTH..TETRAGRAM FOR FOSTERING
1D6DB ; ON # Sm MATHEMATICAL BOLD PARTIAL DIFFERENTIAL
1D715 ; ON # Sm MATHEMATICAL ITALIC PARTIAL DIFFERENTIAL
1D74F ; ON # Sm MATHEMATICAL BOLD ITALIC PARTIAL DIFFERENTIAL
1D789 ; ON # Sm MATHEMATICAL SANS-SERIF BOLD PARTIAL DIFFERENTIAL
1D7C3 ; ON # Sm MATHEMATICAL SANS-SERIF BOLD ITALIC PARTIAL DIFFERENTIAL
1F000..1F02B ; ON # So [44] MAHJONG TILE EAST WIND..MAHJONG TILE BACK
1F030..1F093 ; ON # So [100] DOMINO TILE HORIZONTAL BACK..DOMINO TILE VERTICAL-06-06
# Total code points: 3439
# Total code points: 3523
# ================================================
@ -1374,11 +1504,15 @@ FFFFE..FFFFF ; BN # Cn [2] <noncharacter-FFFFE>..<noncharacter-FFFFF>
0730..074A ; NSM # Mn [27] SYRIAC PTHAHA ABOVE..SYRIAC BARREKH
07A6..07B0 ; NSM # Mn [11] THAANA ABAFILI..THAANA SUKUN
07EB..07F3 ; NSM # Mn [9] NKO COMBINING SHORT HIGH TONE..NKO COMBINING DOUBLE DOT ABOVE
0901..0902 ; NSM # Mn [2] DEVANAGARI SIGN CANDRABINDU..DEVANAGARI SIGN ANUSVARA
0816..0819 ; NSM # Mn [4] SAMARITAN MARK IN..SAMARITAN MARK DAGESH
081B..0823 ; NSM # Mn [9] SAMARITAN MARK EPENTHETIC YUT..SAMARITAN VOWEL SIGN A
0825..0827 ; NSM # Mn [3] SAMARITAN VOWEL SIGN SHORT A..SAMARITAN VOWEL SIGN U
0829..082D ; NSM # Mn [5] SAMARITAN VOWEL SIGN LONG I..SAMARITAN MARK NEQUDAA
0900..0902 ; NSM # Mn [3] DEVANAGARI SIGN INVERTED CANDRABINDU..DEVANAGARI SIGN ANUSVARA
093C ; NSM # Mn DEVANAGARI SIGN NUKTA
0941..0948 ; NSM # Mn [8] DEVANAGARI VOWEL SIGN U..DEVANAGARI VOWEL SIGN AI
094D ; NSM # Mn DEVANAGARI SIGN VIRAMA
0951..0954 ; NSM # Mn [4] DEVANAGARI STRESS SIGN UDATTA..DEVANAGARI ACUTE ACCENT
0951..0955 ; NSM # Mn [5] DEVANAGARI STRESS SIGN UDATTA..DEVANAGARI VOWEL SIGN CANDRA LONG E
0962..0963 ; NSM # Mn [2] DEVANAGARI VOWEL SIGN VOCALIC L..DEVANAGARI VOWEL SIGN VOCALIC LL
0981 ; NSM # Mn BENGALI SIGN CANDRABINDU
09BC ; NSM # Mn BENGALI SIGN NUKTA
@ -1450,6 +1584,7 @@ FFFFE..FFFFF ; BN # Cn [2] <noncharacter-FFFFE>..<noncharacter-FFFFF>
1082 ; NSM # Mn MYANMAR CONSONANT SIGN SHAN MEDIAL WA
1085..1086 ; NSM # Mn [2] MYANMAR VOWEL SIGN SHAN E ABOVE..MYANMAR VOWEL SIGN SHAN FINAL Y
108D ; NSM # Mn MYANMAR SIGN SHAN COUNCIL EMPHATIC TONE
109D ; NSM # Mn MYANMAR VOWEL SIGN AITON AI
135F ; NSM # Mn ETHIOPIC COMBINING GEMINATION MARK
1712..1714 ; NSM # Mn [3] TAGALOG VOWEL SIGN I..TAGALOG SIGN VIRAMA
1732..1734 ; NSM # Mn [3] HANUNOO VOWEL SIGN I..HANUNOO SIGN PAMUDPOD
@ -1466,6 +1601,13 @@ FFFFE..FFFFF ; BN # Cn [2] <noncharacter-FFFFE>..<noncharacter-FFFFF>
1932 ; NSM # Mn LIMBU SMALL LETTER ANUSVARA
1939..193B ; NSM # Mn [3] LIMBU SIGN MUKPHRENG..LIMBU SIGN SA-I
1A17..1A18 ; NSM # Mn [2] BUGINESE VOWEL SIGN I..BUGINESE VOWEL SIGN U
1A56 ; NSM # Mn TAI THAM CONSONANT SIGN MEDIAL LA
1A58..1A5E ; NSM # Mn [7] TAI THAM SIGN MAI KANG LAI..TAI THAM CONSONANT SIGN SA
1A60 ; NSM # Mn TAI THAM SIGN SAKOT
1A62 ; NSM # Mn TAI THAM VOWEL SIGN MAI SAT
1A65..1A6C ; NSM # Mn [8] TAI THAM VOWEL SIGN I..TAI THAM VOWEL SIGN OA BELOW
1A73..1A7C ; NSM # Mn [10] TAI THAM VOWEL SIGN OA ABOVE..TAI THAM SIGN KHUEN-LUE KARAN
1A7F ; NSM # Mn TAI THAM COMBINING CRYPTOGRAMMIC DOT
1B00..1B03 ; NSM # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG
1B34 ; NSM # Mn BALINESE SIGN REREKAN
1B36..1B3A ; NSM # Mn [5] BALINESE VOWEL SIGN ULU..BALINESE VOWEL SIGN RA REPA
@ -1477,31 +1619,50 @@ FFFFE..FFFFF ; BN # Cn [2] <noncharacter-FFFFE>..<noncharacter-FFFFF>
1BA8..1BA9 ; NSM # Mn [2] SUNDANESE VOWEL SIGN PAMEPET..SUNDANESE VOWEL SIGN PANEULEUNG
1C2C..1C33 ; NSM # Mn [8] LEPCHA VOWEL SIGN E..LEPCHA CONSONANT SIGN T
1C36..1C37 ; NSM # Mn [2] LEPCHA SIGN RAN..LEPCHA SIGN NUKTA
1CD0..1CD2 ; NSM # Mn [3] VEDIC TONE KARSHANA..VEDIC TONE PRENKHA
1CD4..1CE0 ; NSM # Mn [13] VEDIC SIGN YAJURVEDIC MIDLINE SVARITA..VEDIC TONE RIGVEDIC KASHMIRI INDEPENDENT SVARITA
1CE2..1CE8 ; NSM # Mn [7] VEDIC SIGN VISARGA SVARITA..VEDIC SIGN VISARGA ANUDATTA WITH TAIL
1CED ; NSM # Mn VEDIC SIGN TIRYAK
1DC0..1DE6 ; NSM # Mn [39] COMBINING DOTTED GRAVE ACCENT..COMBINING LATIN SMALL LETTER Z
1DFE..1DFF ; NSM # Mn [2] COMBINING LEFT ARROWHEAD ABOVE..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW
1DFD..1DFF ; NSM # Mn [3] COMBINING ALMOST EQUAL TO BELOW..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW
20D0..20DC ; NSM # Mn [13] COMBINING LEFT HARPOON ABOVE..COMBINING FOUR DOTS ABOVE
20DD..20E0 ; NSM # Me [4] COMBINING ENCLOSING CIRCLE..COMBINING ENCLOSING CIRCLE BACKSLASH
20E1 ; NSM # Mn COMBINING LEFT RIGHT ARROW ABOVE
20E2..20E4 ; NSM # Me [3] COMBINING ENCLOSING SCREEN..COMBINING ENCLOSING UPWARD POINTING TRIANGLE
20E5..20F0 ; NSM # Mn [12] COMBINING REVERSE SOLIDUS OVERLAY..COMBINING ASTERISK ABOVE
2CEF..2CF1 ; NSM # Mn [3] COPTIC COMBINING NI ABOVE..COPTIC COMBINING SPIRITUS LENIS
2DE0..2DFF ; NSM # Mn [32] COMBINING CYRILLIC LETTER BE..COMBINING CYRILLIC LETTER IOTIFIED BIG YUS
302A..302F ; NSM # Mn [6] IDEOGRAPHIC LEVEL TONE MARK..HANGUL DOUBLE DOT TONE MARK
3099..309A ; NSM # Mn [2] COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK..COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK
A66F ; NSM # Mn COMBINING CYRILLIC VZMET
A670..A672 ; NSM # Me [3] COMBINING CYRILLIC TEN MILLIONS SIGN..COMBINING CYRILLIC THOUSAND MILLIONS SIGN
A67C..A67D ; NSM # Mn [2] COMBINING CYRILLIC KAVYKA..COMBINING CYRILLIC PAYEROK
A6F0..A6F1 ; NSM # Mn [2] BAMUM COMBINING MARK KOQNDON..BAMUM COMBINING MARK TUKWENTIS
A802 ; NSM # Mn SYLOTI NAGRI SIGN DVISVARA
A806 ; NSM # Mn SYLOTI NAGRI SIGN HASANTA
A80B ; NSM # Mn SYLOTI NAGRI SIGN ANUSVARA
A825..A826 ; NSM # Mn [2] SYLOTI NAGRI VOWEL SIGN U..SYLOTI NAGRI VOWEL SIGN E
A8C4 ; NSM # Mn SAURASHTRA SIGN VIRAMA
A8E0..A8F1 ; NSM # Mn [18] COMBINING DEVANAGARI DIGIT ZERO..COMBINING DEVANAGARI SIGN AVAGRAHA
A926..A92D ; NSM # Mn [8] KAYAH LI VOWEL UE..KAYAH LI TONE CALYA PLOPHU
A947..A951 ; NSM # Mn [11] REJANG VOWEL SIGN I..REJANG CONSONANT SIGN R
A980..A982 ; NSM # Mn [3] JAVANESE SIGN PANYANGGA..JAVANESE SIGN LAYAR
A9B3 ; NSM # Mn JAVANESE SIGN CECAK TELU
A9B6..A9B9 ; NSM # Mn [4] JAVANESE VOWEL SIGN WULU..JAVANESE VOWEL SIGN SUKU MENDUT
A9BC ; NSM # Mn JAVANESE VOWEL SIGN PEPET
AA29..AA2E ; NSM # Mn [6] CHAM VOWEL SIGN AA..CHAM VOWEL SIGN OE
AA31..AA32 ; NSM # Mn [2] CHAM VOWEL SIGN AU..CHAM VOWEL SIGN UE
AA35..AA36 ; NSM # Mn [2] CHAM CONSONANT SIGN LA..CHAM CONSONANT SIGN WA
AA43 ; NSM # Mn CHAM CONSONANT SIGN FINAL NG
AA4C ; NSM # Mn CHAM CONSONANT SIGN FINAL M
AAB0 ; NSM # Mn TAI VIET MAI KANG
AAB2..AAB4 ; NSM # Mn [3] TAI VIET VOWEL I..TAI VIET VOWEL U
AAB7..AAB8 ; NSM # Mn [2] TAI VIET MAI KHIT..TAI VIET VOWEL IA
AABE..AABF ; NSM # Mn [2] TAI VIET VOWEL AM..TAI VIET TONE MAI EK
AAC1 ; NSM # Mn TAI VIET TONE MAI THO
ABE5 ; NSM # Mn MEETEI MAYEK VOWEL SIGN ANAP
ABE8 ; NSM # Mn MEETEI MAYEK VOWEL SIGN UNAP
ABED ; NSM # Mn MEETEI MAYEK APUN IYEK
FB1E ; NSM # Mn HEBREW POINT JUDEO-SPANISH VARIKA
FE00..FE0F ; NSM # Mn [16] VARIATION SELECTOR-1..VARIATION SELECTOR-16
FE20..FE26 ; NSM # Mn [7] COMBINING LIGATURE LEFT HALF..COMBINING CONJOINING MACRON
@ -1511,6 +1672,9 @@ FE20..FE26 ; NSM # Mn [7] COMBINING LIGATURE LEFT HALF..COMBINING CONJOININ
10A0C..10A0F ; NSM # Mn [4] KHAROSHTHI VOWEL LENGTH MARK..KHAROSHTHI SIGN VISARGA
10A38..10A3A ; NSM # Mn [3] KHAROSHTHI SIGN BAR ABOVE..KHAROSHTHI SIGN DOT BELOW
10A3F ; NSM # Mn KHAROSHTHI VIRAMA
11080..11081 ; NSM # Mn [2] KAITHI SIGN CANDRABINDU..KAITHI SIGN ANUSVARA
110B3..110B6 ; NSM # Mn [4] KAITHI VOWEL SIGN U..KAITHI VOWEL SIGN AI
110B9..110BA ; NSM # Mn [2] KAITHI SIGN VIRAMA..KAITHI SIGN NUKTA
1D167..1D169 ; NSM # Mn [3] MUSICAL SYMBOL COMBINING TREMOLO-1..MUSICAL SYMBOL COMBINING TREMOLO-3
1D17B..1D182 ; NSM # Mn [8] MUSICAL SYMBOL COMBINING ACCENT..MUSICAL SYMBOL COMBINING LOURE
1D185..1D18B ; NSM # Mn [7] MUSICAL SYMBOL COMBINING DOIT..MUSICAL SYMBOL COMBINING TRIPLE TONGUE
@ -1518,7 +1682,7 @@ FE20..FE26 ; NSM # Mn [7] COMBINING LIGATURE LEFT HALF..COMBINING CONJOININ
1D242..1D244 ; NSM # Mn [3] COMBINING GREEK MUSICAL TRISEME..COMBINING GREEK MUSICAL PENTASEME
E0100..E01EF ; NSM # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256
# Total code points: 1043
# Total code points: 1173
# ================================================

File diff suppressed because it is too large Load diff

View file

@ -1,10 +1,10 @@
# DerivedJoiningGroup-5.1.0.txt
# Date: 2008-03-03, 21:57:35 GMT [MD]
# DerivedJoiningGroup-5.2.0.txt
# Date: 2009-05-22, 18:51:25 GMT [MD]
#
# Unicode Character Database
# Copyright (c) 1991-2008 Unicode, Inc.
# Copyright (c) 1991-2009 Unicode, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html
# For documentation, see UCD.html
# For documentation, see http://www.unicode.org/reports/tr44/
# ================================================
@ -205,10 +205,10 @@
# ================================================
0646 ; Noon # Lo ARABIC LETTER NOON
06B9..06BD ; Noon # Lo [5] ARABIC LETTER NOON WITH DOT BELOW..ARABIC LETTER NOON WITH THREE DOTS ABOVE
06B9..06BC ; Noon # Lo [4] ARABIC LETTER NOON WITH DOT BELOW..ARABIC LETTER NOON WITH RING
0767..0769 ; Noon # Lo [3] ARABIC LETTER NOON WITH TWO DOTS BELOW..ARABIC LETTER NOON WITH SMALL V
# Total code points: 9
# Total code points: 8
# ================================================
@ -344,15 +344,12 @@
# ================================================
0626 ; Yeh # Lo ARABIC LETTER YEH WITH HAMZA ABOVE
063D..063F ; Yeh # Lo [3] ARABIC LETTER FARSI YEH WITH INVERTED V..ARABIC LETTER FARSI YEH WITH THREE DOTS ABOVE
0649..064A ; Yeh # Lo [2] ARABIC LETTER ALEF MAKSURA..ARABIC LETTER YEH
0678 ; Yeh # Lo ARABIC LETTER HIGH HAMZA YEH
06CC ; Yeh # Lo ARABIC LETTER FARSI YEH
06CE ; Yeh # Lo ARABIC LETTER YEH WITH SMALL V
06D0..06D1 ; Yeh # Lo [2] ARABIC LETTER E..ARABIC LETTER YEH WITH THREE DOTS BELOW
0775..0777 ; Yeh # Lo [3] ARABIC LETTER FARSI YEH WITH EXTENDED ARABIC-INDIC DIGIT TWO ABOVE..ARABIC LETTER FARSI YEH WITH EXTENDED ARABIC-INDIC DIGIT FOUR BELOW
0777 ; Yeh # Lo ARABIC LETTER FARSI YEH WITH EXTENDED ARABIC-INDIC DIGIT FOUR BELOW
# Total code points: 14
# Total code points: 7
# ================================================
@ -408,4 +405,19 @@
# Total code points: 2
# ================================================
063D..063F ; Farsi_Yeh # Lo [3] ARABIC LETTER FARSI YEH WITH INVERTED V..ARABIC LETTER FARSI YEH WITH THREE DOTS ABOVE
06CC ; Farsi_Yeh # Lo ARABIC LETTER FARSI YEH
06CE ; Farsi_Yeh # Lo ARABIC LETTER YEH WITH SMALL V
0775..0776 ; Farsi_Yeh # Lo [2] ARABIC LETTER FARSI YEH WITH EXTENDED ARABIC-INDIC DIGIT TWO ABOVE..ARABIC LETTER FARSI YEH WITH EXTENDED ARABIC-INDIC DIGIT THREE ABOVE
# Total code points: 7
# ================================================
06BD ; Nya # Lo ARABIC LETTER NOON WITH THREE DOTS ABOVE
# Total code points: 1
# EOF

View file

@ -1,10 +1,10 @@
# DerivedJoiningType-5.1.0.txt
# Date: 2008-03-03, 21:57:37 GMT [MD]
# DerivedJoiningType-5.2.0.txt
# Date: 2009-05-28, 20:37:39 GMT [MD]
#
# Unicode Character Database
# Copyright (c) 1991-2008 Unicode, Inc.
# Copyright (c) 1991-2009 Unicode, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html
# For documentation, see UCD.html
# For documentation, see http://www.unicode.org/reports/tr44/
# ================================================
@ -121,11 +121,15 @@
0730..074A ; T # Mn [27] SYRIAC PTHAHA ABOVE..SYRIAC BARREKH
07A6..07B0 ; T # Mn [11] THAANA ABAFILI..THAANA SUKUN
07EB..07F3 ; T # Mn [9] NKO COMBINING SHORT HIGH TONE..NKO COMBINING DOUBLE DOT ABOVE
0901..0902 ; T # Mn [2] DEVANAGARI SIGN CANDRABINDU..DEVANAGARI SIGN ANUSVARA
0816..0819 ; T # Mn [4] SAMARITAN MARK IN..SAMARITAN MARK DAGESH
081B..0823 ; T # Mn [9] SAMARITAN MARK EPENTHETIC YUT..SAMARITAN VOWEL SIGN A
0825..0827 ; T # Mn [3] SAMARITAN VOWEL SIGN SHORT A..SAMARITAN VOWEL SIGN U
0829..082D ; T # Mn [5] SAMARITAN VOWEL SIGN LONG I..SAMARITAN MARK NEQUDAA
0900..0902 ; T # Mn [3] DEVANAGARI SIGN INVERTED CANDRABINDU..DEVANAGARI SIGN ANUSVARA
093C ; T # Mn DEVANAGARI SIGN NUKTA
0941..0948 ; T # Mn [8] DEVANAGARI VOWEL SIGN U..DEVANAGARI VOWEL SIGN AI
094D ; T # Mn DEVANAGARI SIGN VIRAMA
0951..0954 ; T # Mn [4] DEVANAGARI STRESS SIGN UDATTA..DEVANAGARI ACUTE ACCENT
0951..0955 ; T # Mn [5] DEVANAGARI STRESS SIGN UDATTA..DEVANAGARI VOWEL SIGN CANDRA LONG E
0962..0963 ; T # Mn [2] DEVANAGARI VOWEL SIGN VOCALIC L..DEVANAGARI VOWEL SIGN VOCALIC LL
0981 ; T # Mn BENGALI SIGN CANDRABINDU
09BC ; T # Mn BENGALI SIGN NUKTA
@ -199,6 +203,7 @@
1082 ; T # Mn MYANMAR CONSONANT SIGN SHAN MEDIAL WA
1085..1086 ; T # Mn [2] MYANMAR VOWEL SIGN SHAN E ABOVE..MYANMAR VOWEL SIGN SHAN FINAL Y
108D ; T # Mn MYANMAR SIGN SHAN COUNCIL EMPHATIC TONE
109D ; T # Mn MYANMAR VOWEL SIGN AITON AI
135F ; T # Mn ETHIOPIC COMBINING GEMINATION MARK
1712..1714 ; T # Mn [3] TAGALOG VOWEL SIGN I..TAGALOG SIGN VIRAMA
1732..1734 ; T # Mn [3] HANUNOO VOWEL SIGN I..HANUNOO SIGN PAMUDPOD
@ -216,6 +221,13 @@
1932 ; T # Mn LIMBU SMALL LETTER ANUSVARA
1939..193B ; T # Mn [3] LIMBU SIGN MUKPHRENG..LIMBU SIGN SA-I
1A17..1A18 ; T # Mn [2] BUGINESE VOWEL SIGN I..BUGINESE VOWEL SIGN U
1A56 ; T # Mn TAI THAM CONSONANT SIGN MEDIAL LA
1A58..1A5E ; T # Mn [7] TAI THAM SIGN MAI KANG LAI..TAI THAM CONSONANT SIGN SA
1A60 ; T # Mn TAI THAM SIGN SAKOT
1A62 ; T # Mn TAI THAM VOWEL SIGN MAI SAT
1A65..1A6C ; T # Mn [8] TAI THAM VOWEL SIGN I..TAI THAM VOWEL SIGN OA BELOW
1A73..1A7C ; T # Mn [10] TAI THAM VOWEL SIGN OA ABOVE..TAI THAM SIGN KHUEN-LUE KARAN
1A7F ; T # Mn TAI THAM COMBINING CRYPTOGRAMMIC DOT
1B00..1B03 ; T # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG
1B34 ; T # Mn BALINESE SIGN REREKAN
1B36..1B3A ; T # Mn [5] BALINESE VOWEL SIGN ULU..BALINESE VOWEL SIGN RA REPA
@ -227,8 +239,12 @@
1BA8..1BA9 ; T # Mn [2] SUNDANESE VOWEL SIGN PAMEPET..SUNDANESE VOWEL SIGN PANEULEUNG
1C2C..1C33 ; T # Mn [8] LEPCHA VOWEL SIGN E..LEPCHA CONSONANT SIGN T
1C36..1C37 ; T # Mn [2] LEPCHA SIGN RAN..LEPCHA SIGN NUKTA
1CD0..1CD2 ; T # Mn [3] VEDIC TONE KARSHANA..VEDIC TONE PRENKHA
1CD4..1CE0 ; T # Mn [13] VEDIC SIGN YAJURVEDIC MIDLINE SVARITA..VEDIC TONE RIGVEDIC KASHMIRI INDEPENDENT SVARITA
1CE2..1CE8 ; T # Mn [7] VEDIC SIGN VISARGA SVARITA..VEDIC SIGN VISARGA ANUDATTA WITH TAIL
1CED ; T # Mn VEDIC SIGN TIRYAK
1DC0..1DE6 ; T # Mn [39] COMBINING DOTTED GRAVE ACCENT..COMBINING LATIN SMALL LETTER Z
1DFE..1DFF ; T # Mn [2] COMBINING LEFT ARROWHEAD ABOVE..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW
1DFD..1DFF ; T # Mn [3] COMBINING ALMOST EQUAL TO BELOW..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW
200B ; T # Cf ZERO WIDTH SPACE
200E..200F ; T # Cf [2] LEFT-TO-RIGHT MARK..RIGHT-TO-LEFT MARK
202A..202E ; T # Cf [5] LEFT-TO-RIGHT EMBEDDING..RIGHT-TO-LEFT OVERRIDE
@ -239,24 +255,39 @@
20E1 ; T # Mn COMBINING LEFT RIGHT ARROW ABOVE
20E2..20E4 ; T # Me [3] COMBINING ENCLOSING SCREEN..COMBINING ENCLOSING UPWARD POINTING TRIANGLE
20E5..20F0 ; T # Mn [12] COMBINING REVERSE SOLIDUS OVERLAY..COMBINING ASTERISK ABOVE
2CEF..2CF1 ; T # Mn [3] COPTIC COMBINING NI ABOVE..COPTIC COMBINING SPIRITUS LENIS
2DE0..2DFF ; T # Mn [32] COMBINING CYRILLIC LETTER BE..COMBINING CYRILLIC LETTER IOTIFIED BIG YUS
302A..302F ; T # Mn [6] IDEOGRAPHIC LEVEL TONE MARK..HANGUL DOUBLE DOT TONE MARK
3099..309A ; T # Mn [2] COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK..COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK
A66F ; T # Mn COMBINING CYRILLIC VZMET
A670..A672 ; T # Me [3] COMBINING CYRILLIC TEN MILLIONS SIGN..COMBINING CYRILLIC THOUSAND MILLIONS SIGN
A67C..A67D ; T # Mn [2] COMBINING CYRILLIC KAVYKA..COMBINING CYRILLIC PAYEROK
A6F0..A6F1 ; T # Mn [2] BAMUM COMBINING MARK KOQNDON..BAMUM COMBINING MARK TUKWENTIS
A802 ; T # Mn SYLOTI NAGRI SIGN DVISVARA
A806 ; T # Mn SYLOTI NAGRI SIGN HASANTA
A80B ; T # Mn SYLOTI NAGRI SIGN ANUSVARA
A825..A826 ; T # Mn [2] SYLOTI NAGRI VOWEL SIGN U..SYLOTI NAGRI VOWEL SIGN E
A8C4 ; T # Mn SAURASHTRA SIGN VIRAMA
A8E0..A8F1 ; T # Mn [18] COMBINING DEVANAGARI DIGIT ZERO..COMBINING DEVANAGARI SIGN AVAGRAHA
A926..A92D ; T # Mn [8] KAYAH LI VOWEL UE..KAYAH LI TONE CALYA PLOPHU
A947..A951 ; T # Mn [11] REJANG VOWEL SIGN I..REJANG CONSONANT SIGN R
A980..A982 ; T # Mn [3] JAVANESE SIGN PANYANGGA..JAVANESE SIGN LAYAR
A9B3 ; T # Mn JAVANESE SIGN CECAK TELU
A9B6..A9B9 ; T # Mn [4] JAVANESE VOWEL SIGN WULU..JAVANESE VOWEL SIGN SUKU MENDUT
A9BC ; T # Mn JAVANESE VOWEL SIGN PEPET
AA29..AA2E ; T # Mn [6] CHAM VOWEL SIGN AA..CHAM VOWEL SIGN OE
AA31..AA32 ; T # Mn [2] CHAM VOWEL SIGN AU..CHAM VOWEL SIGN UE
AA35..AA36 ; T # Mn [2] CHAM CONSONANT SIGN LA..CHAM CONSONANT SIGN WA
AA43 ; T # Mn CHAM CONSONANT SIGN FINAL NG
AA4C ; T # Mn CHAM CONSONANT SIGN FINAL M
AAB0 ; T # Mn TAI VIET MAI KANG
AAB2..AAB4 ; T # Mn [3] TAI VIET VOWEL I..TAI VIET VOWEL U
AAB7..AAB8 ; T # Mn [2] TAI VIET MAI KHIT..TAI VIET VOWEL IA
AABE..AABF ; T # Mn [2] TAI VIET VOWEL AM..TAI VIET TONE MAI EK
AAC1 ; T # Mn TAI VIET TONE MAI THO
ABE5 ; T # Mn MEETEI MAYEK VOWEL SIGN ANAP
ABE8 ; T # Mn MEETEI MAYEK VOWEL SIGN UNAP
ABED ; T # Mn MEETEI MAYEK APUN IYEK
FB1E ; T # Mn HEBREW POINT JUDEO-SPANISH VARIKA
FE00..FE0F ; T # Mn [16] VARIATION SELECTOR-1..VARIATION SELECTOR-16
FE20..FE26 ; T # Mn [7] COMBINING LIGATURE LEFT HALF..COMBINING CONJOINING MACRON
@ -268,6 +299,10 @@ FFF9..FFFB ; T # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLINEAR ANNOTATI
10A0C..10A0F ; T # Mn [4] KHAROSHTHI VOWEL LENGTH MARK..KHAROSHTHI SIGN VISARGA
10A38..10A3A ; T # Mn [3] KHAROSHTHI SIGN BAR ABOVE..KHAROSHTHI SIGN DOT BELOW
10A3F ; T # Mn KHAROSHTHI VIRAMA
11080..11081 ; T # Mn [2] KAITHI SIGN CANDRABINDU..KAITHI SIGN ANUSVARA
110B3..110B6 ; T # Mn [4] KAITHI VOWEL SIGN U..KAITHI VOWEL SIGN AI
110B9..110BA ; T # Mn [2] KAITHI SIGN VIRAMA..KAITHI SIGN NUKTA
110BD ; T # Cf KAITHI NUMBER SIGN
1D167..1D169 ; T # Mn [3] MUSICAL SYMBOL COMBINING TREMOLO-1..MUSICAL SYMBOL COMBINING TREMOLO-3
1D173..1D17A ; T # Cf [8] MUSICAL SYMBOL BEGIN BEAM..MUSICAL SYMBOL END PHRASE
1D17B..1D182 ; T # Mn [8] MUSICAL SYMBOL COMBINING ACCENT..MUSICAL SYMBOL COMBINING LOURE
@ -278,6 +313,6 @@ E0001 ; T # Cf LANGUAGE TAG
E0020..E007F ; T # Cf [96] TAG SPACE..CANCEL TAG
E0100..E01EF ; T # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256
# Total code points: 1177
# Total code points: 1308
# EOF

File diff suppressed because it is too large Load diff

View file

@ -1,10 +1,10 @@
# DerivedNumericValues-5.1.0.txt
# Date: 2008-03-03, 21:57:46 GMT [MD]
# DerivedNumericValues-5.2.0.txt
# Date: 2009-08-22, 04:58:28 GMT [MD]
#
# Unicode Character Database
# Copyright (c) 1991-2008 Unicode, Inc.
# Copyright (c) 1991-2009 Unicode, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html
# For documentation, see UCD.html
# For documentation, see http://www.unicode.org/reports/tr44/
# ================================================
@ -52,20 +52,26 @@
1810 ; 0.0 ; ; 0 # Nd MONGOLIAN DIGIT ZERO
1946 ; 0.0 ; ; 0 # Nd LIMBU DIGIT ZERO
19D0 ; 0.0 ; ; 0 # Nd NEW TAI LUE DIGIT ZERO
1A80 ; 0.0 ; ; 0 # Nd TAI THAM HORA DIGIT ZERO
1A90 ; 0.0 ; ; 0 # Nd TAI THAM THAM DIGIT ZERO
1B50 ; 0.0 ; ; 0 # Nd BALINESE DIGIT ZERO
1BB0 ; 0.0 ; ; 0 # Nd SUNDANESE DIGIT ZERO
1C40 ; 0.0 ; ; 0 # Nd LEPCHA DIGIT ZERO
1C50 ; 0.0 ; ; 0 # Nd OL CHIKI DIGIT ZERO
2070 ; 0.0 ; ; 0 # No SUPERSCRIPT ZERO
2080 ; 0.0 ; ; 0 # No SUBSCRIPT ZERO
2189 ; 0.0 ; ; 0 # No VULGAR FRACTION ZERO THIRDS
24EA ; 0.0 ; ; 0 # No CIRCLED DIGIT ZERO
24FF ; 0.0 ; ; 0 # No NEGATIVE CIRCLED DIGIT ZERO
3007 ; 0.0 ; ; 0 # Nl IDEOGRAPHIC NUMBER ZERO
96F6 ; 0.0 ; ; 0 # Lo CJK UNIFIED IDEOGRAPH-96F6
A620 ; 0.0 ; ; 0 # Nd VAI DIGIT ZERO
A6EF ; 0.0 ; ; 0 # Nl BAMUM LETTER KOGHOM
A8D0 ; 0.0 ; ; 0 # Nd SAURASHTRA DIGIT ZERO
A900 ; 0.0 ; ; 0 # Nd KAYAH LI DIGIT ZERO
A9D0 ; 0.0 ; ; 0 # Nd JAVANESE DIGIT ZERO
AA50 ; 0.0 ; ; 0 # Nd CHAM DIGIT ZERO
ABF0 ; 0.0 ; ; 0 # Nd MEETEI MAYEK DIGIT ZERO
F9B2 ; 0.0 ; ; 0 # Lo CJK COMPATIBILITY IDEOGRAPH-F9B2
FF10 ; 0.0 ; ; 0 # Nd FULLWIDTH DIGIT ZERO
1018A ; 0.0 ; ; 0 # No GREEK ZERO SIGN
@ -75,18 +81,46 @@ FF10 ; 0.0 ; ; 0 # Nd FULLWIDTH DIGIT ZERO
1D7E2 ; 0.0 ; ; 0 # Nd MATHEMATICAL SANS-SERIF DIGIT ZERO
1D7EC ; 0.0 ; ; 0 # Nd MATHEMATICAL SANS-SERIF BOLD DIGIT ZERO
1D7F6 ; 0.0 ; ; 0 # Nd MATHEMATICAL MONOSPACE DIGIT ZERO
1F100..1F101 ; 0.0 ; ; 0 # No [2] DIGIT ZERO FULL STOP..DIGIT ZERO COMMA
# Total code points: 47
# Total code points: 55
# ================================================
215B ; 0.125 ; ; 1/8 # No VULGAR FRACTION ONE EIGHTH
1245F ; 0.125 ; ; 1/8 # Nl CUNEIFORM NUMERIC SIGN ONE EIGHTH ASH
09F4 ; 0.0625 ; ; 1/16 # No BENGALI CURRENCY NUMERATOR ONE
A833 ; 0.0625 ; ; 1/16 # No NORTH INDIC FRACTION ONE SIXTEENTH
# Total code points: 2
# ================================================
2152 ; 0.1 ; ; 1/10 # No VULGAR FRACTION ONE TENTH
# Total code points: 1
# ================================================
2151 ; 0.11111111 ; ; 1/9 # No VULGAR FRACTION ONE NINTH
# Total code points: 1
# ================================================
09F5 ; 0.125 ; ; 1/8 # No BENGALI CURRENCY NUMERATOR TWO
215B ; 0.125 ; ; 1/8 # No VULGAR FRACTION ONE EIGHTH
A834 ; 0.125 ; ; 1/8 # No NORTH INDIC FRACTION ONE EIGHTH
1245F ; 0.125 ; ; 1/8 # Nl CUNEIFORM NUMERIC SIGN ONE EIGHTH ASH
# Total code points: 4
# ================================================
2150 ; 0.14285714 ; ; 1/7 # No VULGAR FRACTION ONE SEVENTH
# Total code points: 1
# ================================================
2159 ; 0.16666667 ; ; 1/6 # No VULGAR FRACTION ONE SIXTH
12461 ; 0.16666667 ; ; 1/6 # Nl CUNEIFORM NUMERIC SIGN OLD ASSYRIAN ONE SIXTH
@ -94,6 +128,13 @@ FF10 ; 0.0 ; ; 0 # Nd FULLWIDTH DIGIT ZERO
# ================================================
09F6 ; 0.1875 ; ; 3/16 # No BENGALI CURRENCY NUMERATOR THREE
A835 ; 0.1875 ; ; 3/16 # No NORTH INDIC FRACTION THREE SIXTEENTHS
# Total code points: 2
# ================================================
2155 ; 0.2 ; ; 1/5 # No VULGAR FRACTION ONE FIFTH
# Total code points: 1
@ -101,20 +142,24 @@ FF10 ; 0.0 ; ; 0 # Nd FULLWIDTH DIGIT ZERO
# ================================================
00BC ; 0.25 ; ; 1/4 # No VULGAR FRACTION ONE QUARTER
09F7 ; 0.25 ; ; 1/4 # No BENGALI CURRENCY NUMERATOR FOUR
0D73 ; 0.25 ; ; 1/4 # No MALAYALAM FRACTION ONE QUARTER
A830 ; 0.25 ; ; 1/4 # No NORTH INDIC FRACTION ONE QUARTER
10140 ; 0.25 ; ; 1/4 # Nl GREEK ACROPHONIC ATTIC ONE QUARTER
10E7C ; 0.25 ; ; 1/4 # No RUMI FRACTION ONE QUARTER
12460 ; 0.25 ; ; 1/4 # Nl CUNEIFORM NUMERIC SIGN ONE QUARTER ASH
12462 ; 0.25 ; ; 1/4 # Nl CUNEIFORM NUMERIC SIGN OLD ASSYRIAN ONE QUARTER
# Total code points: 5
# Total code points: 8
# ================================================
2153 ; 0.33333333 ; ; 1/3 # No VULGAR FRACTION ONE THIRD
10E7D ; 0.33333333 ; ; 1/3 # No RUMI FRACTION ONE THIRD
1245A ; 0.33333333 ; ; 1/3 # Nl CUNEIFORM NUMERIC SIGN ONE THIRD DISH
1245D ; 0.33333333 ; ; 1/3 # Nl CUNEIFORM NUMERIC SIGN ONE THIRD VARIANT FORM A
# Total code points: 3
# Total code points: 4
# ================================================
@ -134,10 +179,12 @@ FF10 ; 0.0 ; ; 0 # Nd FULLWIDTH DIGIT ZERO
0D74 ; 0.5 ; ; 1/2 # No MALAYALAM FRACTION ONE HALF
0F2A ; 0.5 ; ; 1/2 # No TIBETAN DIGIT HALF ONE
2CFD ; 0.5 ; ; 1/2 # No COPTIC FRACTION ONE HALF
A831 ; 0.5 ; ; 1/2 # No NORTH INDIC FRACTION ONE HALF
10141 ; 0.5 ; ; 1/2 # Nl GREEK ACROPHONIC ATTIC ONE HALF
10175..10176 ; 0.5 ; ; 1/2 # No [2] GREEK ONE HALF SIGN..GREEK ONE HALF SIGN ALTERNATE FORM
10E7B ; 0.5 ; ; 1/2 # No RUMI FRACTION ONE HALF
# Total code points: 7
# Total code points: 9
# ================================================
@ -155,18 +202,21 @@ FF10 ; 0.0 ; ; 0 # Nd FULLWIDTH DIGIT ZERO
2154 ; 0.66666667 ; ; 2/3 # No VULGAR FRACTION TWO THIRDS
10177 ; 0.66666667 ; ; 2/3 # No GREEK TWO THIRDS SIGN
10E7E ; 0.66666667 ; ; 2/3 # No RUMI FRACTION TWO THIRDS
1245B ; 0.66666667 ; ; 2/3 # Nl CUNEIFORM NUMERIC SIGN TWO THIRDS DISH
1245E ; 0.66666667 ; ; 2/3 # Nl CUNEIFORM NUMERIC SIGN TWO THIRDS VARIANT FORM A
# Total code points: 4
# Total code points: 5
# ================================================
00BE ; 0.75 ; ; 3/4 # No VULGAR FRACTION THREE QUARTERS
09F8 ; 0.75 ; ; 3/4 # No BENGALI CURRENCY NUMERATOR ONE LESS THAN THE DENOMINATOR
0D75 ; 0.75 ; ; 3/4 # No MALAYALAM FRACTION THREE QUARTERS
A832 ; 0.75 ; ; 3/4 # No NORTH INDIC FRACTION THREE QUARTERS
10178 ; 0.75 ; ; 3/4 # No GREEK THREE QUARTERS SIGN
# Total code points: 3
# Total code points: 5
# ================================================
@ -196,7 +246,6 @@ FF10 ; 0.0 ; ; 0 # Nd FULLWIDTH DIGIT ZERO
07C1 ; 1.0 ; ; 1 # Nd NKO DIGIT ONE
0967 ; 1.0 ; ; 1 # Nd DEVANAGARI DIGIT ONE
09E7 ; 1.0 ; ; 1 # Nd BENGALI DIGIT ONE
09F4 ; 1.0 ; ; 1 # No BENGALI CURRENCY NUMERATOR ONE
0A67 ; 1.0 ; ; 1 # Nd GURMUKHI DIGIT ONE
0AE7 ; 1.0 ; ; 1 # Nd GUJARATI DIGIT ONE
0B67 ; 1.0 ; ; 1 # Nd ORIYA DIGIT ONE
@ -217,6 +266,9 @@ FF10 ; 0.0 ; ; 0 # Nd FULLWIDTH DIGIT ZERO
1811 ; 1.0 ; ; 1 # Nd MONGOLIAN DIGIT ONE
1947 ; 1.0 ; ; 1 # Nd LIMBU DIGIT ONE
19D1 ; 1.0 ; ; 1 # Nd NEW TAI LUE DIGIT ONE
19DA ; 1.0 ; ; 1 # Nd NEW TAI LUE THAM DIGIT ONE
1A81 ; 1.0 ; ; 1 # Nd TAI THAM HORA DIGIT ONE
1A91 ; 1.0 ; ; 1 # Nd TAI THAM THAM DIGIT ONE
1B51 ; 1.0 ; ; 1 # Nd BALINESE DIGIT ONE
1BB1 ; 1.0 ; ; 1 # Nd SUNDANESE DIGIT ONE
1C41 ; 1.0 ; ; 1 # Nd LEPCHA DIGIT ONE
@ -242,9 +294,12 @@ FF10 ; 0.0 ; ; 0 # Nd FULLWIDTH DIGIT ZERO
5E7A ; 1.0 ; ; 1 # Lo CJK UNIFIED IDEOGRAPH-5E7A
5F0C ; 1.0 ; ; 1 # Lo CJK UNIFIED IDEOGRAPH-5F0C
A621 ; 1.0 ; ; 1 # Nd VAI DIGIT ONE
A6E6 ; 1.0 ; ; 1 # Nl BAMUM LETTER MO
A8D1 ; 1.0 ; ; 1 # Nd SAURASHTRA DIGIT ONE
A901 ; 1.0 ; ; 1 # Nd KAYAH LI DIGIT ONE
A9D1 ; 1.0 ; ; 1 # Nd JAVANESE DIGIT ONE
AA51 ; 1.0 ; ; 1 # Nd CHAM DIGIT ONE
ABF1 ; 1.0 ; ; 1 # Nd MEETEI MAYEK DIGIT ONE
FF11 ; 1.0 ; ; 1 # Nd FULLWIDTH DIGIT ONE
10107 ; 1.0 ; ; 1 # No AEGEAN NUMBER ONE
10142 ; 1.0 ; ; 1 # Nl GREEK ACROPHONIC ATTIC ONE DRACHMA
@ -252,8 +307,13 @@ FF11 ; 1.0 ; ; 1 # Nd FULLWIDTH DIGIT ONE
10320 ; 1.0 ; ; 1 # No OLD ITALIC NUMERAL ONE
103D1 ; 1.0 ; ; 1 # Nl OLD PERSIAN NUMBER ONE
104A1 ; 1.0 ; ; 1 # Nd OSMANYA DIGIT ONE
10858 ; 1.0 ; ; 1 # No IMPERIAL ARAMAIC NUMBER ONE
10916 ; 1.0 ; ; 1 # No PHOENICIAN NUMBER ONE
10A40 ; 1.0 ; ; 1 # No KHAROSHTHI DIGIT ONE
10A7D ; 1.0 ; ; 1 # No OLD SOUTH ARABIAN NUMBER ONE
10B58 ; 1.0 ; ; 1 # No INSCRIPTIONAL PARTHIAN NUMBER ONE
10B78 ; 1.0 ; ; 1 # No INSCRIPTIONAL PAHLAVI NUMBER ONE
10E60 ; 1.0 ; ; 1 # No RUMI DIGIT ONE
12415 ; 1.0 ; ; 1 # Nl CUNEIFORM NUMERIC SIGN ONE GESH2
1241E ; 1.0 ; ; 1 # Nl CUNEIFORM NUMERIC SIGN ONE GESHU
1242C ; 1.0 ; ; 1 # Nl CUNEIFORM NUMERIC SIGN ONE SHARU
@ -266,9 +326,10 @@ FF11 ; 1.0 ; ; 1 # Nd FULLWIDTH DIGIT ONE
1D7E3 ; 1.0 ; ; 1 # Nd MATHEMATICAL SANS-SERIF DIGIT ONE
1D7ED ; 1.0 ; ; 1 # Nd MATHEMATICAL SANS-SERIF BOLD DIGIT ONE
1D7F7 ; 1.0 ; ; 1 # Nd MATHEMATICAL MONOSPACE DIGIT ONE
1F102 ; 1.0 ; ; 1 # No DIGIT ONE COMMA
2092A ; 1.0 ; ; 1 # Lo CJK UNIFIED IDEOGRAPH-2092A
# Total code points: 80
# Total code points: 91
# ================================================
@ -285,7 +346,6 @@ FF11 ; 1.0 ; ; 1 # Nd FULLWIDTH DIGIT ONE
07C2 ; 2.0 ; ; 2 # Nd NKO DIGIT TWO
0968 ; 2.0 ; ; 2 # Nd DEVANAGARI DIGIT TWO
09E8 ; 2.0 ; ; 2 # Nd BENGALI DIGIT TWO
09F5 ; 2.0 ; ; 2 # No BENGALI CURRENCY NUMERATOR TWO
0A68 ; 2.0 ; ; 2 # Nd GURMUKHI DIGIT TWO
0AE8 ; 2.0 ; ; 2 # Nd GUJARATI DIGIT TWO
0B68 ; 2.0 ; ; 2 # Nd ORIYA DIGIT TWO
@ -306,6 +366,8 @@ FF11 ; 1.0 ; ; 1 # Nd FULLWIDTH DIGIT ONE
1812 ; 2.0 ; ; 2 # Nd MONGOLIAN DIGIT TWO
1948 ; 2.0 ; ; 2 # Nd LIMBU DIGIT TWO
19D2 ; 2.0 ; ; 2 # Nd NEW TAI LUE DIGIT TWO
1A82 ; 2.0 ; ; 2 # Nd TAI THAM HORA DIGIT TWO
1A92 ; 2.0 ; ; 2 # Nd TAI THAM THAM DIGIT TWO
1B52 ; 2.0 ; ; 2 # Nd BALINESE DIGIT TWO
1BB2 ; 2.0 ; ; 2 # Nd SUNDANESE DIGIT TWO
1C42 ; 2.0 ; ; 2 # Nd LEPCHA DIGIT TWO
@ -333,16 +395,24 @@ FF11 ; 1.0 ; ; 1 # Nd FULLWIDTH DIGIT ONE
8CB3 ; 2.0 ; ; 2 # Lo CJK UNIFIED IDEOGRAPH-8CB3
8D30 ; 2.0 ; ; 2 # Lo CJK UNIFIED IDEOGRAPH-8D30
A622 ; 2.0 ; ; 2 # Nd VAI DIGIT TWO
A6E7 ; 2.0 ; ; 2 # Nl BAMUM LETTER MBAA
A8D2 ; 2.0 ; ; 2 # Nd SAURASHTRA DIGIT TWO
A902 ; 2.0 ; ; 2 # Nd KAYAH LI DIGIT TWO
A9D2 ; 2.0 ; ; 2 # Nd JAVANESE DIGIT TWO
AA52 ; 2.0 ; ; 2 # Nd CHAM DIGIT TWO
ABF2 ; 2.0 ; ; 2 # Nd MEETEI MAYEK DIGIT TWO
F978 ; 2.0 ; ; 2 # Lo CJK COMPATIBILITY IDEOGRAPH-F978
FF12 ; 2.0 ; ; 2 # Nd FULLWIDTH DIGIT TWO
10108 ; 2.0 ; ; 2 # No AEGEAN NUMBER TWO
1015B..1015E ; 2.0 ; ; 2 # Nl [4] GREEK ACROPHONIC EPIDAUREAN TWO..GREEK ACROPHONIC EPIDAUREAN TWO DRACHMAS
103D2 ; 2.0 ; ; 2 # Nl OLD PERSIAN NUMBER TWO
104A2 ; 2.0 ; ; 2 # Nd OSMANYA DIGIT TWO
10859 ; 2.0 ; ; 2 # No IMPERIAL ARAMAIC NUMBER TWO
1091A ; 2.0 ; ; 2 # No PHOENICIAN NUMBER TWO
10A41 ; 2.0 ; ; 2 # No KHAROSHTHI DIGIT TWO
10B59 ; 2.0 ; ; 2 # No INSCRIPTIONAL PARTHIAN NUMBER TWO
10B79 ; 2.0 ; ; 2 # No INSCRIPTIONAL PAHLAVI NUMBER TWO
10E61 ; 2.0 ; ; 2 # No RUMI DIGIT TWO
12400 ; 2.0 ; ; 2 # Nl CUNEIFORM NUMERIC SIGN TWO ASH
12416 ; 2.0 ; ; 2 # Nl CUNEIFORM NUMERIC SIGN TWO GESH2
1241F ; 2.0 ; ; 2 # Nl CUNEIFORM NUMERIC SIGN TWO GESHU
@ -358,9 +428,10 @@ FF12 ; 2.0 ; ; 2 # Nd FULLWIDTH DIGIT TWO
1D7E4 ; 2.0 ; ; 2 # Nd MATHEMATICAL SANS-SERIF DIGIT TWO
1D7EE ; 2.0 ; ; 2 # Nd MATHEMATICAL SANS-SERIF BOLD DIGIT TWO
1D7F8 ; 2.0 ; ; 2 # Nd MATHEMATICAL MONOSPACE DIGIT TWO
1F103 ; 2.0 ; ; 2 # No DIGIT TWO COMMA
22390 ; 2.0 ; ; 2 # Lo CJK UNIFIED IDEOGRAPH-22390
# Total code points: 84
# Total code points: 94
# ================================================
@ -377,7 +448,6 @@ FF12 ; 2.0 ; ; 2 # Nd FULLWIDTH DIGIT TWO
07C3 ; 3.0 ; ; 3 # Nd NKO DIGIT THREE
0969 ; 3.0 ; ; 3 # Nd DEVANAGARI DIGIT THREE
09E9 ; 3.0 ; ; 3 # Nd BENGALI DIGIT THREE
09F6 ; 3.0 ; ; 3 # No BENGALI CURRENCY NUMERATOR THREE
0A69 ; 3.0 ; ; 3 # Nd GURMUKHI DIGIT THREE
0AE9 ; 3.0 ; ; 3 # Nd GUJARATI DIGIT THREE
0B69 ; 3.0 ; ; 3 # Nd ORIYA DIGIT THREE
@ -398,6 +468,8 @@ FF12 ; 2.0 ; ; 2 # Nd FULLWIDTH DIGIT TWO
1813 ; 3.0 ; ; 3 # Nd MONGOLIAN DIGIT THREE
1949 ; 3.0 ; ; 3 # Nd LIMBU DIGIT THREE
19D3 ; 3.0 ; ; 3 # Nd NEW TAI LUE DIGIT THREE
1A83 ; 3.0 ; ; 3 # Nd TAI THAM HORA DIGIT THREE
1A93 ; 3.0 ; ; 3 # Nd TAI THAM THAM DIGIT THREE
1B53 ; 3.0 ; ; 3 # Nd BALINESE DIGIT THREE
1BB3 ; 3.0 ; ; 3 # Nd SUNDANESE DIGIT THREE
1C43 ; 3.0 ; ; 3 # Nd LEPCHA DIGIT THREE
@ -421,14 +493,22 @@ FF12 ; 2.0 ; ; 2 # Nd FULLWIDTH DIGIT TWO
53C1..53C4 ; 3.0 ; ; 3 # Lo [4] CJK UNIFIED IDEOGRAPH-53C1..CJK UNIFIED IDEOGRAPH-53C4
5F0E ; 3.0 ; ; 3 # Lo CJK UNIFIED IDEOGRAPH-5F0E
A623 ; 3.0 ; ; 3 # Nd VAI DIGIT THREE
A6E8 ; 3.0 ; ; 3 # Nl BAMUM LETTER TET
A8D3 ; 3.0 ; ; 3 # Nd SAURASHTRA DIGIT THREE
A903 ; 3.0 ; ; 3 # Nd KAYAH LI DIGIT THREE
A9D3 ; 3.0 ; ; 3 # Nd JAVANESE DIGIT THREE
AA53 ; 3.0 ; ; 3 # Nd CHAM DIGIT THREE
ABF3 ; 3.0 ; ; 3 # Nd MEETEI MAYEK DIGIT THREE
F96B ; 3.0 ; ; 3 # Lo CJK COMPATIBILITY IDEOGRAPH-F96B
FF13 ; 3.0 ; ; 3 # Nd FULLWIDTH DIGIT THREE
10109 ; 3.0 ; ; 3 # No AEGEAN NUMBER THREE
104A3 ; 3.0 ; ; 3 # Nd OSMANYA DIGIT THREE
1085A ; 3.0 ; ; 3 # No IMPERIAL ARAMAIC NUMBER THREE
1091B ; 3.0 ; ; 3 # No PHOENICIAN NUMBER THREE
10A42 ; 3.0 ; ; 3 # No KHAROSHTHI DIGIT THREE
10B5A ; 3.0 ; ; 3 # No INSCRIPTIONAL PARTHIAN NUMBER THREE
10B7A ; 3.0 ; ; 3 # No INSCRIPTIONAL PAHLAVI NUMBER THREE
10E62 ; 3.0 ; ; 3 # No RUMI DIGIT THREE
12401 ; 3.0 ; ; 3 # Nl CUNEIFORM NUMERIC SIGN THREE ASH
12408 ; 3.0 ; ; 3 # Nl CUNEIFORM NUMERIC SIGN THREE DISH
12417 ; 3.0 ; ; 3 # Nl CUNEIFORM NUMERIC SIGN THREE GESH2
@ -445,12 +525,13 @@ FF13 ; 3.0 ; ; 3 # Nd FULLWIDTH DIGIT THREE
1D7E5 ; 3.0 ; ; 3 # Nd MATHEMATICAL SANS-SERIF DIGIT THREE
1D7EF ; 3.0 ; ; 3 # Nd MATHEMATICAL SANS-SERIF BOLD DIGIT THREE
1D7F9 ; 3.0 ; ; 3 # Nd MATHEMATICAL MONOSPACE DIGIT THREE
1F104 ; 3.0 ; ; 3 # No DIGIT THREE COMMA
20AFD ; 3.0 ; ; 3 # Lo CJK UNIFIED IDEOGRAPH-20AFD
20B19 ; 3.0 ; ; 3 # Lo CJK UNIFIED IDEOGRAPH-20B19
22998 ; 3.0 ; ; 3 # Lo CJK UNIFIED IDEOGRAPH-22998
23B1B ; 3.0 ; ; 3 # Lo CJK UNIFIED IDEOGRAPH-23B1B
# Total code points: 86
# Total code points: 96
# ================================================
@ -466,7 +547,6 @@ FF13 ; 3.0 ; ; 3 # Nd FULLWIDTH DIGIT THREE
07C4 ; 4.0 ; ; 4 # Nd NKO DIGIT FOUR
096A ; 4.0 ; ; 4 # Nd DEVANAGARI DIGIT FOUR
09EA ; 4.0 ; ; 4 # Nd BENGALI DIGIT FOUR
09F7 ; 4.0 ; ; 4 # No BENGALI CURRENCY NUMERATOR FOUR
0A6A ; 4.0 ; ; 4 # Nd GURMUKHI DIGIT FOUR
0AEA ; 4.0 ; ; 4 # Nd GUJARATI DIGIT FOUR
0B6A ; 4.0 ; ; 4 # Nd ORIYA DIGIT FOUR
@ -485,6 +565,8 @@ FF13 ; 3.0 ; ; 3 # Nd FULLWIDTH DIGIT THREE
1814 ; 4.0 ; ; 4 # Nd MONGOLIAN DIGIT FOUR
194A ; 4.0 ; ; 4 # Nd LIMBU DIGIT FOUR
19D4 ; 4.0 ; ; 4 # Nd NEW TAI LUE DIGIT FOUR
1A84 ; 4.0 ; ; 4 # Nd TAI THAM HORA DIGIT FOUR
1A94 ; 4.0 ; ; 4 # Nd TAI THAM THAM DIGIT FOUR
1B54 ; 4.0 ; ; 4 # Nd BALINESE DIGIT FOUR
1BB4 ; 4.0 ; ; 4 # Nd SUNDANESE DIGIT FOUR
1C44 ; 4.0 ; ; 4 # Nd LEPCHA DIGIT FOUR
@ -508,13 +590,19 @@ FF13 ; 3.0 ; ; 3 # Nd FULLWIDTH DIGIT THREE
56DB ; 4.0 ; ; 4 # Lo CJK UNIFIED IDEOGRAPH-56DB
8086 ; 4.0 ; ; 4 # Lo CJK UNIFIED IDEOGRAPH-8086
A624 ; 4.0 ; ; 4 # Nd VAI DIGIT FOUR
A6E9 ; 4.0 ; ; 4 # Nl BAMUM LETTER KPA
A8D4 ; 4.0 ; ; 4 # Nd SAURASHTRA DIGIT FOUR
A904 ; 4.0 ; ; 4 # Nd KAYAH LI DIGIT FOUR
A9D4 ; 4.0 ; ; 4 # Nd JAVANESE DIGIT FOUR
AA54 ; 4.0 ; ; 4 # Nd CHAM DIGIT FOUR
ABF4 ; 4.0 ; ; 4 # Nd MEETEI MAYEK DIGIT FOUR
FF14 ; 4.0 ; ; 4 # Nd FULLWIDTH DIGIT FOUR
1010A ; 4.0 ; ; 4 # No AEGEAN NUMBER FOUR
104A4 ; 4.0 ; ; 4 # Nd OSMANYA DIGIT FOUR
10A43 ; 4.0 ; ; 4 # No KHAROSHTHI DIGIT FOUR
10B5B ; 4.0 ; ; 4 # No INSCRIPTIONAL PARTHIAN NUMBER FOUR
10B7B ; 4.0 ; ; 4 # No INSCRIPTIONAL PAHLAVI NUMBER FOUR
10E63 ; 4.0 ; ; 4 # No RUMI DIGIT FOUR
12402 ; 4.0 ; ; 4 # Nl CUNEIFORM NUMERIC SIGN FOUR ASH
12409 ; 4.0 ; ; 4 # Nl CUNEIFORM NUMERIC SIGN FOUR DISH
1240F ; 4.0 ; ; 4 # Nl CUNEIFORM NUMERIC SIGN FOUR U
@ -532,11 +620,12 @@ FF14 ; 4.0 ; ; 4 # Nd FULLWIDTH DIGIT FOUR
1D7E6 ; 4.0 ; ; 4 # Nd MATHEMATICAL SANS-SERIF DIGIT FOUR
1D7F0 ; 4.0 ; ; 4 # Nd MATHEMATICAL SANS-SERIF BOLD DIGIT FOUR
1D7FA ; 4.0 ; ; 4 # Nd MATHEMATICAL MONOSPACE DIGIT FOUR
1F105 ; 4.0 ; ; 4 # No DIGIT FOUR COMMA
20064 ; 4.0 ; ; 4 # Lo CJK UNIFIED IDEOGRAPH-20064
200E2 ; 4.0 ; ; 4 # Lo CJK UNIFIED IDEOGRAPH-200E2
2626D ; 4.0 ; ; 4 # Lo CJK UNIFIED IDEOGRAPH-2626D
# Total code points: 79
# Total code points: 87
# ================================================
@ -570,6 +659,8 @@ FF14 ; 4.0 ; ; 4 # Nd FULLWIDTH DIGIT FOUR
1815 ; 5.0 ; ; 5 # Nd MONGOLIAN DIGIT FIVE
194B ; 5.0 ; ; 5 # Nd LIMBU DIGIT FIVE
19D5 ; 5.0 ; ; 5 # Nd NEW TAI LUE DIGIT FIVE
1A85 ; 5.0 ; ; 5 # Nd TAI THAM HORA DIGIT FIVE
1A95 ; 5.0 ; ; 5 # Nd TAI THAM THAM DIGIT FIVE
1B55 ; 5.0 ; ; 5 # Nd BALINESE DIGIT FIVE
1BB5 ; 5.0 ; ; 5 # Nd SUNDANESE DIGIT FIVE
1C45 ; 5.0 ; ; 5 # Nd LEPCHA DIGIT FIVE
@ -593,9 +684,12 @@ FF14 ; 4.0 ; ; 4 # Nd FULLWIDTH DIGIT FOUR
4E94 ; 5.0 ; ; 5 # Lo CJK UNIFIED IDEOGRAPH-4E94
4F0D ; 5.0 ; ; 5 # Lo CJK UNIFIED IDEOGRAPH-4F0D
A625 ; 5.0 ; ; 5 # Nd VAI DIGIT FIVE
A6EA ; 5.0 ; ; 5 # Nl BAMUM LETTER TEN
A8D5 ; 5.0 ; ; 5 # Nd SAURASHTRA DIGIT FIVE
A905 ; 5.0 ; ; 5 # Nd KAYAH LI DIGIT FIVE
A9D5 ; 5.0 ; ; 5 # Nd JAVANESE DIGIT FIVE
AA55 ; 5.0 ; ; 5 # Nd CHAM DIGIT FIVE
ABF5 ; 5.0 ; ; 5 # Nd MEETEI MAYEK DIGIT FIVE
FF15 ; 5.0 ; ; 5 # Nd FULLWIDTH DIGIT FIVE
1010B ; 5.0 ; ; 5 # No AEGEAN NUMBER FIVE
10143 ; 5.0 ; ; 5 # Nl GREEK ACROPHONIC ATTIC FIVE
@ -605,6 +699,7 @@ FF15 ; 5.0 ; ; 5 # Nd FULLWIDTH DIGIT FIVE
10173 ; 5.0 ; ; 5 # Nl GREEK ACROPHONIC DELPHIC FIVE MNAS
10321 ; 5.0 ; ; 5 # No OLD ITALIC NUMERAL FIVE
104A5 ; 5.0 ; ; 5 # Nd OSMANYA DIGIT FIVE
10E64 ; 5.0 ; ; 5 # No RUMI DIGIT FIVE
12403 ; 5.0 ; ; 5 # Nl CUNEIFORM NUMERIC SIGN FIVE ASH
1240A ; 5.0 ; ; 5 # Nl CUNEIFORM NUMERIC SIGN FIVE DISH
12410 ; 5.0 ; ; 5 # Nl CUNEIFORM NUMERIC SIGN FIVE U
@ -621,9 +716,10 @@ FF15 ; 5.0 ; ; 5 # Nd FULLWIDTH DIGIT FIVE
1D7E7 ; 5.0 ; ; 5 # Nd MATHEMATICAL SANS-SERIF DIGIT FIVE
1D7F1 ; 5.0 ; ; 5 # Nd MATHEMATICAL SANS-SERIF BOLD DIGIT FIVE
1D7FB ; 5.0 ; ; 5 # Nd MATHEMATICAL MONOSPACE DIGIT FIVE
1F106 ; 5.0 ; ; 5 # No DIGIT FIVE COMMA
20121 ; 5.0 ; ; 5 # Lo CJK UNIFIED IDEOGRAPH-20121
# Total code points: 77
# Total code points: 84
# ================================================
@ -657,6 +753,8 @@ FF15 ; 5.0 ; ; 5 # Nd FULLWIDTH DIGIT FIVE
1816 ; 6.0 ; ; 6 # Nd MONGOLIAN DIGIT SIX
194C ; 6.0 ; ; 6 # Nd LIMBU DIGIT SIX
19D6 ; 6.0 ; ; 6 # Nd NEW TAI LUE DIGIT SIX
1A86 ; 6.0 ; ; 6 # Nd TAI THAM HORA DIGIT SIX
1A96 ; 6.0 ; ; 6 # Nd TAI THAM THAM DIGIT SIX
1B56 ; 6.0 ; ; 6 # Nd BALINESE DIGIT SIX
1BB6 ; 6.0 ; ; 6 # Nd SUNDANESE DIGIT SIX
1C46 ; 6.0 ; ; 6 # Nd LEPCHA DIGIT SIX
@ -680,14 +778,18 @@ FF15 ; 5.0 ; ; 5 # Nd FULLWIDTH DIGIT FIVE
9646 ; 6.0 ; ; 6 # Lo CJK UNIFIED IDEOGRAPH-9646
9678 ; 6.0 ; ; 6 # Lo CJK UNIFIED IDEOGRAPH-9678
A626 ; 6.0 ; ; 6 # Nd VAI DIGIT SIX
A6EB ; 6.0 ; ; 6 # Nl BAMUM LETTER NTUU
A8D6 ; 6.0 ; ; 6 # Nd SAURASHTRA DIGIT SIX
A906 ; 6.0 ; ; 6 # Nd KAYAH LI DIGIT SIX
A9D6 ; 6.0 ; ; 6 # Nd JAVANESE DIGIT SIX
AA56 ; 6.0 ; ; 6 # Nd CHAM DIGIT SIX
ABF6 ; 6.0 ; ; 6 # Nd MEETEI MAYEK DIGIT SIX
F9D1 ; 6.0 ; ; 6 # Lo CJK COMPATIBILITY IDEOGRAPH-F9D1
F9D3 ; 6.0 ; ; 6 # Lo CJK COMPATIBILITY IDEOGRAPH-F9D3
FF16 ; 6.0 ; ; 6 # Nd FULLWIDTH DIGIT SIX
1010C ; 6.0 ; ; 6 # No AEGEAN NUMBER SIX
104A6 ; 6.0 ; ; 6 # Nd OSMANYA DIGIT SIX
10E65 ; 6.0 ; ; 6 # No RUMI DIGIT SIX
12404 ; 6.0 ; ; 6 # Nl CUNEIFORM NUMERIC SIGN SIX ASH
1240B ; 6.0 ; ; 6 # Nl CUNEIFORM NUMERIC SIGN SIX DISH
12411 ; 6.0 ; ; 6 # Nl CUNEIFORM NUMERIC SIGN SIX U
@ -701,9 +803,10 @@ FF16 ; 6.0 ; ; 6 # Nd FULLWIDTH DIGIT SIX
1D7E8 ; 6.0 ; ; 6 # Nd MATHEMATICAL SANS-SERIF DIGIT SIX
1D7F2 ; 6.0 ; ; 6 # Nd MATHEMATICAL SANS-SERIF BOLD DIGIT SIX
1D7FC ; 6.0 ; ; 6 # Nd MATHEMATICAL MONOSPACE DIGIT SIX
1F107 ; 6.0 ; ; 6 # No DIGIT SIX COMMA
20AEA ; 6.0 ; ; 6 # Lo CJK UNIFIED IDEOGRAPH-20AEA
# Total code points: 69
# Total code points: 76
# ================================================
@ -737,6 +840,8 @@ FF16 ; 6.0 ; ; 6 # Nd FULLWIDTH DIGIT SIX
1817 ; 7.0 ; ; 7 # Nd MONGOLIAN DIGIT SEVEN
194D ; 7.0 ; ; 7 # Nd LIMBU DIGIT SEVEN
19D7 ; 7.0 ; ; 7 # Nd NEW TAI LUE DIGIT SEVEN
1A87 ; 7.0 ; ; 7 # Nd TAI THAM HORA DIGIT SEVEN
1A97 ; 7.0 ; ; 7 # Nd TAI THAM THAM DIGIT SEVEN
1B57 ; 7.0 ; ; 7 # Nd BALINESE DIGIT SEVEN
1BB7 ; 7.0 ; ; 7 # Nd SUNDANESE DIGIT SEVEN
1C47 ; 7.0 ; ; 7 # Nd LEPCHA DIGIT SEVEN
@ -760,12 +865,16 @@ FF16 ; 6.0 ; ; 6 # Nd FULLWIDTH DIGIT SIX
67D2 ; 7.0 ; ; 7 # Lo CJK UNIFIED IDEOGRAPH-67D2
6F06 ; 7.0 ; ; 7 # Lo CJK UNIFIED IDEOGRAPH-6F06
A627 ; 7.0 ; ; 7 # Nd VAI DIGIT SEVEN
A6EC ; 7.0 ; ; 7 # Nl BAMUM LETTER SAMBA
A8D7 ; 7.0 ; ; 7 # Nd SAURASHTRA DIGIT SEVEN
A907 ; 7.0 ; ; 7 # Nd KAYAH LI DIGIT SEVEN
A9D7 ; 7.0 ; ; 7 # Nd JAVANESE DIGIT SEVEN
AA57 ; 7.0 ; ; 7 # Nd CHAM DIGIT SEVEN
ABF7 ; 7.0 ; ; 7 # Nd MEETEI MAYEK DIGIT SEVEN
FF17 ; 7.0 ; ; 7 # Nd FULLWIDTH DIGIT SEVEN
1010D ; 7.0 ; ; 7 # No AEGEAN NUMBER SEVEN
104A7 ; 7.0 ; ; 7 # Nd OSMANYA DIGIT SEVEN
10E66 ; 7.0 ; ; 7 # No RUMI DIGIT SEVEN
12405 ; 7.0 ; ; 7 # Nl CUNEIFORM NUMERIC SIGN SEVEN ASH
1240C ; 7.0 ; ; 7 # Nl CUNEIFORM NUMERIC SIGN SEVEN DISH
12412 ; 7.0 ; ; 7 # Nl CUNEIFORM NUMERIC SIGN SEVEN U
@ -778,9 +887,10 @@ FF17 ; 7.0 ; ; 7 # Nd FULLWIDTH DIGIT SEVEN
1D7E9 ; 7.0 ; ; 7 # Nd MATHEMATICAL SANS-SERIF DIGIT SEVEN
1D7F3 ; 7.0 ; ; 7 # Nd MATHEMATICAL SANS-SERIF BOLD DIGIT SEVEN
1D7FD ; 7.0 ; ; 7 # Nd MATHEMATICAL MONOSPACE DIGIT SEVEN
1F108 ; 7.0 ; ; 7 # No DIGIT SEVEN COMMA
20001 ; 7.0 ; ; 7 # Lo CJK UNIFIED IDEOGRAPH-20001
# Total code points: 68
# Total code points: 75
# ================================================
@ -814,6 +924,8 @@ FF17 ; 7.0 ; ; 7 # Nd FULLWIDTH DIGIT SEVEN
1818 ; 8.0 ; ; 8 # Nd MONGOLIAN DIGIT EIGHT
194E ; 8.0 ; ; 8 # Nd LIMBU DIGIT EIGHT
19D8 ; 8.0 ; ; 8 # Nd NEW TAI LUE DIGIT EIGHT
1A88 ; 8.0 ; ; 8 # Nd TAI THAM HORA DIGIT EIGHT
1A98 ; 8.0 ; ; 8 # Nd TAI THAM THAM DIGIT EIGHT
1B58 ; 8.0 ; ; 8 # Nd BALINESE DIGIT EIGHT
1BB8 ; 8.0 ; ; 8 # Nd SUNDANESE DIGIT EIGHT
1C48 ; 8.0 ; ; 8 # Nd LEPCHA DIGIT EIGHT
@ -835,12 +947,16 @@ FF17 ; 7.0 ; ; 7 # Nd FULLWIDTH DIGIT SEVEN
516B ; 8.0 ; ; 8 # Lo CJK UNIFIED IDEOGRAPH-516B
634C ; 8.0 ; ; 8 # Lo CJK UNIFIED IDEOGRAPH-634C
A628 ; 8.0 ; ; 8 # Nd VAI DIGIT EIGHT
A6ED ; 8.0 ; ; 8 # Nl BAMUM LETTER FAAMAE
A8D8 ; 8.0 ; ; 8 # Nd SAURASHTRA DIGIT EIGHT
A908 ; 8.0 ; ; 8 # Nd KAYAH LI DIGIT EIGHT
A9D8 ; 8.0 ; ; 8 # Nd JAVANESE DIGIT EIGHT
AA58 ; 8.0 ; ; 8 # Nd CHAM DIGIT EIGHT
ABF8 ; 8.0 ; ; 8 # Nd MEETEI MAYEK DIGIT EIGHT
FF18 ; 8.0 ; ; 8 # Nd FULLWIDTH DIGIT EIGHT
1010E ; 8.0 ; ; 8 # No AEGEAN NUMBER EIGHT
104A8 ; 8.0 ; ; 8 # Nd OSMANYA DIGIT EIGHT
10E67 ; 8.0 ; ; 8 # No RUMI DIGIT EIGHT
12406 ; 8.0 ; ; 8 # Nl CUNEIFORM NUMERIC SIGN EIGHT ASH
1240D ; 8.0 ; ; 8 # Nl CUNEIFORM NUMERIC SIGN EIGHT DISH
12413 ; 8.0 ; ; 8 # Nl CUNEIFORM NUMERIC SIGN EIGHT U
@ -853,8 +969,9 @@ FF18 ; 8.0 ; ; 8 # Nd FULLWIDTH DIGIT EIGHT
1D7EA ; 8.0 ; ; 8 # Nd MATHEMATICAL SANS-SERIF DIGIT EIGHT
1D7F4 ; 8.0 ; ; 8 # Nd MATHEMATICAL SANS-SERIF BOLD DIGIT EIGHT
1D7FE ; 8.0 ; ; 8 # Nd MATHEMATICAL MONOSPACE DIGIT EIGHT
1F109 ; 8.0 ; ; 8 # No DIGIT EIGHT COMMA
# Total code points: 64
# Total code points: 71
# ================================================
@ -888,6 +1005,8 @@ FF18 ; 8.0 ; ; 8 # Nd FULLWIDTH DIGIT EIGHT
1819 ; 9.0 ; ; 9 # Nd MONGOLIAN DIGIT NINE
194F ; 9.0 ; ; 9 # Nd LIMBU DIGIT NINE
19D9 ; 9.0 ; ; 9 # Nd NEW TAI LUE DIGIT NINE
1A89 ; 9.0 ; ; 9 # Nd TAI THAM HORA DIGIT NINE
1A99 ; 9.0 ; ; 9 # Nd TAI THAM THAM DIGIT NINE
1B59 ; 9.0 ; ; 9 # Nd BALINESE DIGIT NINE
1BB9 ; 9.0 ; ; 9 # Nd SUNDANESE DIGIT NINE
1C49 ; 9.0 ; ; 9 # Nd LEPCHA DIGIT NINE
@ -910,12 +1029,16 @@ FF18 ; 8.0 ; ; 8 # Nd FULLWIDTH DIGIT EIGHT
5EFE ; 9.0 ; ; 9 # Lo CJK UNIFIED IDEOGRAPH-5EFE
7396 ; 9.0 ; ; 9 # Lo CJK UNIFIED IDEOGRAPH-7396
A629 ; 9.0 ; ; 9 # Nd VAI DIGIT NINE
A6EE ; 9.0 ; ; 9 # Nl BAMUM LETTER KOVUU
A8D9 ; 9.0 ; ; 9 # Nd SAURASHTRA DIGIT NINE
A909 ; 9.0 ; ; 9 # Nd KAYAH LI DIGIT NINE
A9D9 ; 9.0 ; ; 9 # Nd JAVANESE DIGIT NINE
AA59 ; 9.0 ; ; 9 # Nd CHAM DIGIT NINE
ABF9 ; 9.0 ; ; 9 # Nd MEETEI MAYEK DIGIT NINE
FF19 ; 9.0 ; ; 9 # Nd FULLWIDTH DIGIT NINE
1010F ; 9.0 ; ; 9 # No AEGEAN NUMBER NINE
104A9 ; 9.0 ; ; 9 # Nd OSMANYA DIGIT NINE
10E68 ; 9.0 ; ; 9 # No RUMI DIGIT NINE
12407 ; 9.0 ; ; 9 # Nl CUNEIFORM NUMERIC SIGN NINE ASH
1240E ; 9.0 ; ; 9 # Nl CUNEIFORM NUMERIC SIGN NINE DISH
12414 ; 9.0 ; ; 9 # Nl CUNEIFORM NUMERIC SIGN NINE U
@ -928,9 +1051,10 @@ FF19 ; 9.0 ; ; 9 # Nd FULLWIDTH DIGIT NINE
1D7EB ; 9.0 ; ; 9 # Nd MATHEMATICAL SANS-SERIF DIGIT NINE
1D7F5 ; 9.0 ; ; 9 # Nd MATHEMATICAL SANS-SERIF BOLD DIGIT NINE
1D7FF ; 9.0 ; ; 9 # Nd MATHEMATICAL MONOSPACE DIGIT NINE
1F10A ; 9.0 ; ; 9 # No DIGIT NINE COMMA
2F890 ; 9.0 ; ; 9 # Lo CJK COMPATIBILITY IDEOGRAPH-2F890
# Total code points: 68
# Total code points: 75
# ================================================
@ -961,11 +1085,15 @@ F9FD ; 10.0 ; ; 10 # Lo CJK COMPATIBILITY IDEOGRAPH-F9FD
10160..10164 ; 10.0 ; ; 10 # Nl [5] GREEK ACROPHONIC TROEZENIAN TEN..GREEK ACROPHONIC THESPIAN TEN
10322 ; 10.0 ; ; 10 # No OLD ITALIC NUMERAL TEN
103D3 ; 10.0 ; ; 10 # Nl OLD PERSIAN NUMBER TEN
1085B ; 10.0 ; ; 10 # No IMPERIAL ARAMAIC NUMBER TEN
10917 ; 10.0 ; ; 10 # No PHOENICIAN NUMBER TEN
10A44 ; 10.0 ; ; 10 # No KHAROSHTHI NUMBER TEN
10B5C ; 10.0 ; ; 10 # No INSCRIPTIONAL PARTHIAN NUMBER TEN
10B7C ; 10.0 ; ; 10 # No INSCRIPTIONAL PAHLAVI NUMBER TEN
10E69 ; 10.0 ; ; 10 # No RUMI NUMBER TEN
1D369 ; 10.0 ; ; 10 # No COUNTING ROD TENS DIGIT ONE
# Total code points: 34
# Total code points: 38
# ================================================
@ -1068,11 +1196,15 @@ F9FD ; 10.0 ; ; 10 # Lo CJK COMPATIBILITY IDEOGRAPH-F9FD
5EFF ; 20.0 ; ; 20 # Lo CJK UNIFIED IDEOGRAPH-5EFF
10111 ; 20.0 ; ; 20 # No AEGEAN NUMBER TWENTY
103D4 ; 20.0 ; ; 20 # Nl OLD PERSIAN NUMBER TWENTY
1085C ; 20.0 ; ; 20 # No IMPERIAL ARAMAIC NUMBER TWENTY
10918 ; 20.0 ; ; 20 # No PHOENICIAN NUMBER TWENTY
10A45 ; 20.0 ; ; 20 # No KHAROSHTHI NUMBER TWENTY
10B5D ; 20.0 ; ; 20 # No INSCRIPTIONAL PARTHIAN NUMBER TWENTY
10B7D ; 20.0 ; ; 20 # No INSCRIPTIONAL PAHLAVI NUMBER TWENTY
10E6A ; 20.0 ; ; 20 # No RUMI NUMBER TWENTY
1D36A ; 20.0 ; ; 20 # No COUNTING ROD TENS DIGIT TWO
# Total code points: 13
# Total code points: 17
# ================================================
@ -1136,10 +1268,11 @@ F9FD ; 10.0 ; ; 10 # Lo CJK COMPATIBILITY IDEOGRAPH-F9FD
5345 ; 30.0 ; ; 30 # Lo CJK UNIFIED IDEOGRAPH-5345
10112 ; 30.0 ; ; 30 # No AEGEAN NUMBER THIRTY
10165 ; 30.0 ; ; 30 # Nl GREEK ACROPHONIC THESPIAN THIRTY
10E6B ; 30.0 ; ; 30 # No RUMI NUMBER THIRTY
1D36B ; 30.0 ; ; 30 # No COUNTING ROD TENS DIGIT THREE
20983 ; 30.0 ; ; 30 # Lo CJK UNIFIED IDEOGRAPH-20983
# Total code points: 8
# Total code points: 9
# ================================================
@ -1201,11 +1334,12 @@ F9FD ; 10.0 ; ; 10 # Lo CJK COMPATIBILITY IDEOGRAPH-F9FD
32B5 ; 40.0 ; ; 40 # No CIRCLED NUMBER FORTY
534C ; 40.0 ; ; 40 # Lo CJK UNIFIED IDEOGRAPH-534C
10113 ; 40.0 ; ; 40 # No AEGEAN NUMBER FORTY
10E6C ; 40.0 ; ; 40 # No RUMI NUMBER FORTY
1D36C ; 40.0 ; ; 40 # No COUNTING ROD TENS DIGIT FOUR
2098C ; 40.0 ; ; 40 # Lo CJK UNIFIED IDEOGRAPH-2098C
2099C ; 40.0 ; ; 40 # Lo CJK UNIFIED IDEOGRAPH-2099C
# Total code points: 7
# Total code points: 8
# ================================================
@ -1275,42 +1409,48 @@ F9FD ; 10.0 ; ; 10 # Lo CJK COMPATIBILITY IDEOGRAPH-F9FD
10166..10169 ; 50.0 ; ; 50 # Nl [4] GREEK ACROPHONIC TROEZENIAN FIFTY..GREEK ACROPHONIC THESPIAN FIFTY
10174 ; 50.0 ; ; 50 # Nl GREEK ACROPHONIC STRATIAN FIFTY MNAS
10323 ; 50.0 ; ; 50 # No OLD ITALIC NUMERAL FIFTY
10A7E ; 50.0 ; ; 50 # No OLD SOUTH ARABIAN NUMBER FIFTY
10E6D ; 50.0 ; ; 50 # No RUMI NUMBER FIFTY
1D36D ; 50.0 ; ; 50 # No COUNTING ROD TENS DIGIT FIVE
# Total code points: 16
# Total code points: 18
# ================================================
1377 ; 60.0 ; ; 60 # No ETHIOPIC NUMBER SIXTY
10115 ; 60.0 ; ; 60 # No AEGEAN NUMBER SIXTY
10E6E ; 60.0 ; ; 60 # No RUMI NUMBER SIXTY
1D36E ; 60.0 ; ; 60 # No COUNTING ROD TENS DIGIT SIX
# Total code points: 3
# Total code points: 4
# ================================================
1378 ; 70.0 ; ; 70 # No ETHIOPIC NUMBER SEVENTY
10116 ; 70.0 ; ; 70 # No AEGEAN NUMBER SEVENTY
10E6F ; 70.0 ; ; 70 # No RUMI NUMBER SEVENTY
1D36F ; 70.0 ; ; 70 # No COUNTING ROD TENS DIGIT SEVEN
# Total code points: 3
# Total code points: 4
# ================================================
1379 ; 80.0 ; ; 80 # No ETHIOPIC NUMBER EIGHTY
10117 ; 80.0 ; ; 80 # No AEGEAN NUMBER EIGHTY
10E70 ; 80.0 ; ; 80 # No RUMI NUMBER EIGHTY
1D370 ; 80.0 ; ; 80 # No COUNTING ROD TENS DIGIT EIGHT
# Total code points: 3
# Total code points: 4
# ================================================
137A ; 90.0 ; ; 90 # No ETHIOPIC NUMBER NINETY
10118 ; 90.0 ; ; 90 # No AEGEAN NUMBER NINETY
10341 ; 90.0 ; ; 90 # Nl GOTHIC LETTER NINETY
10E71 ; 90.0 ; ; 90 # No RUMI NUMBER NINETY
1D371 ; 90.0 ; ; 90 # No COUNTING ROD TENS DIGIT NINE
# Total code points: 4
# Total code points: 5
# ================================================
@ -1327,29 +1467,36 @@ F9FD ; 10.0 ; ; 10 # Lo CJK COMPATIBILITY IDEOGRAPH-F9FD
10152 ; 100.0 ; ; 100 # Nl GREEK ACROPHONIC ATTIC ONE HUNDRED STATERS
1016A ; 100.0 ; ; 100 # Nl GREEK ACROPHONIC THESPIAN ONE HUNDRED
103D5 ; 100.0 ; ; 100 # Nl OLD PERSIAN NUMBER HUNDRED
1085D ; 100.0 ; ; 100 # No IMPERIAL ARAMAIC NUMBER ONE HUNDRED
10919 ; 100.0 ; ; 100 # No PHOENICIAN NUMBER ONE HUNDRED
10A46 ; 100.0 ; ; 100 # No KHAROSHTHI NUMBER ONE HUNDRED
10B5E ; 100.0 ; ; 100 # No INSCRIPTIONAL PARTHIAN NUMBER ONE HUNDRED
10B7E ; 100.0 ; ; 100 # No INSCRIPTIONAL PAHLAVI NUMBER ONE HUNDRED
10E72 ; 100.0 ; ; 100 # No RUMI NUMBER ONE HUNDRED
# Total code points: 15
# Total code points: 19
# ================================================
1011A ; 200.0 ; ; 200 # No AEGEAN NUMBER TWO HUNDRED
# Total code points: 1
# ================================================
1011B ; 300.0 ; ; 300 # No AEGEAN NUMBER THREE HUNDRED
1016B ; 300.0 ; ; 300 # Nl GREEK ACROPHONIC THESPIAN THREE HUNDRED
10E73 ; 200.0 ; ; 200 # No RUMI NUMBER TWO HUNDRED
# Total code points: 2
# ================================================
1011C ; 400.0 ; ; 400 # No AEGEAN NUMBER FOUR HUNDRED
1011B ; 300.0 ; ; 300 # No AEGEAN NUMBER THREE HUNDRED
1016B ; 300.0 ; ; 300 # Nl GREEK ACROPHONIC THESPIAN THREE HUNDRED
10E74 ; 300.0 ; ; 300 # No RUMI NUMBER THREE HUNDRED
# Total code points: 1
# Total code points: 3
# ================================================
1011C ; 400.0 ; ; 400 # No AEGEAN NUMBER FOUR HUNDRED
10E75 ; 400.0 ; ; 400 # No RUMI NUMBER FOUR HUNDRED
# Total code points: 2
# ================================================
@ -1360,33 +1507,38 @@ F9FD ; 10.0 ; ; 10 # Lo CJK COMPATIBILITY IDEOGRAPH-F9FD
1014C ; 500.0 ; ; 500 # Nl GREEK ACROPHONIC ATTIC FIVE HUNDRED TALENTS
10153 ; 500.0 ; ; 500 # Nl GREEK ACROPHONIC ATTIC FIVE HUNDRED STATERS
1016C..10170 ; 500.0 ; ; 500 # Nl [5] GREEK ACROPHONIC EPIDAUREAN FIVE HUNDRED..GREEK ACROPHONIC NAXIAN FIVE HUNDRED
10E76 ; 500.0 ; ; 500 # No RUMI NUMBER FIVE HUNDRED
# Total code points: 11
# Total code points: 12
# ================================================
1011E ; 600.0 ; ; 600 # No AEGEAN NUMBER SIX HUNDRED
10E77 ; 600.0 ; ; 600 # No RUMI NUMBER SIX HUNDRED
# Total code points: 1
# Total code points: 2
# ================================================
1011F ; 700.0 ; ; 700 # No AEGEAN NUMBER SEVEN HUNDRED
10E78 ; 700.0 ; ; 700 # No RUMI NUMBER SEVEN HUNDRED
# Total code points: 1
# Total code points: 2
# ================================================
10120 ; 800.0 ; ; 800 # No AEGEAN NUMBER EIGHT HUNDRED
10E79 ; 800.0 ; ; 800 # No RUMI NUMBER EIGHT HUNDRED
# Total code points: 1
# Total code points: 2
# ================================================
10121 ; 900.0 ; ; 900 # No AEGEAN NUMBER NINE HUNDRED
1034A ; 900.0 ; ; 900 # Nl GOTHIC LETTER NINE HUNDRED
10E7A ; 900.0 ; ; 900 # No RUMI NUMBER NINE HUNDRED
# Total code points: 2
# Total code points: 3
# ================================================
@ -1401,9 +1553,12 @@ F9FD ; 10.0 ; ; 10 # Lo CJK COMPATIBILITY IDEOGRAPH-F9FD
1014D ; 1000.0 ; ; 1000 # Nl GREEK ACROPHONIC ATTIC ONE THOUSAND TALENTS
10154 ; 1000.0 ; ; 1000 # Nl GREEK ACROPHONIC ATTIC ONE THOUSAND STATERS
10171 ; 1000.0 ; ; 1000 # Nl GREEK ACROPHONIC THESPIAN ONE THOUSAND
1085E ; 1000.0 ; ; 1000 # No IMPERIAL ARAMAIC NUMBER ONE THOUSAND
10A47 ; 1000.0 ; ; 1000 # No KHAROSHTHI NUMBER ONE THOUSAND
10B5F ; 1000.0 ; ; 1000 # No INSCRIPTIONAL PARTHIAN NUMBER ONE THOUSAND
10B7F ; 1000.0 ; ; 1000 # No INSCRIPTIONAL PAHLAVI NUMBER ONE THOUSAND
# Total code points: 13
# Total code points: 16
# ================================================
@ -1465,8 +1620,9 @@ F9FD ; 10.0 ; ; 10 # Lo CJK COMPATIBILITY IDEOGRAPH-F9FD
842C ; 10000.0 ; ; 10000 # Lo CJK UNIFIED IDEOGRAPH-842C
1012B ; 10000.0 ; ; 10000 # No AEGEAN NUMBER TEN THOUSAND
10155 ; 10000.0 ; ; 10000 # Nl GREEK ACROPHONIC ATTIC TEN THOUSAND STATERS
1085F ; 10000.0 ; ; 10000 # No IMPERIAL ARAMAIC NUMBER TEN THOUSAND
# Total code points: 6
# Total code points: 7
# ================================================

View file

@ -1,27 +1,38 @@
# EastAsianWidth-5.1.0.txt
# Date: 2006-10-26, 16:58:00 PDT [KW]
# EastAsianWidth-5.2.0.txt
# Date: 2009-06-09, 17:47:00 PDT [KW]
#
# East Asian Width Properties
#
# This file is an informative contributory data file in the
# Unicode Character Database.
#
# Copyright (c) 1991-2007 Unicode, Inc.
# Copyright (c) 1991-2009 Unicode, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html
#
# The format is two fields separated by a semicolon.
# Field 0: Unicode value
# Field 1: East Asian Width property, consisting of one of the following values:
# "N", "A", "H", "W", "F", "Na"
# - All code points, assigned or unassigned, that are not listed
# - All code points, assigned or unassigned, that are not listed
# explicitly are given the value "N".
# The unassigned code points that default to "W" include ranges in the
# following blocks:
# CJK Unified Ideographs Extension A: U+3400..U+4DBF
# CJK Unified Ideographs: U+4E00..U+9FFF
# CJK Compatibility Ideographs: U+F900..U+FAFF
# CJK Unified Ideographs Extension B: U+20000..U+2A6DF
# CJK Unified Ideographs Extension C: U+2A700..U+2B73F
# CJK Compatibility Ideographs Supplement: U+2F800..U+2FA1F
# and any other reserved code points on
# Planes 2 and 3: U+20000..U+2FFFD
# U+30000..U+3FFFD
# - Characters ranges are specified as for other property files in
# the Unicode Character Database.
#
# The Unicode name of each character is provided in a comment for help
# in identifying the characters.
#
# See UAX #11: East Asian Character Width, for more information.
# See UAX #11: East Asian Width, for more information.
#
# @missing: 0000..10FFFF; N
0000..001F;N
@ -152,7 +163,7 @@
0410..044F;A
0450;N
0451;A
0452..0523;N
0452..0525;N
0531..0556;N
0559..055F;N
0561..0587;N
@ -168,11 +179,13 @@
070F..074A;N
074D..07B1;N
07C0..07FA;N
0901..0939;N
093C..094D;N
0950..0954;N
0800..082D;N
0830..083E;N
0900..0939;N
093C..094E;N
0950..0955;N
0958..0972;N
097B..097F;N
0979..097F;N
0981..0983;N
0985..098C;N
098F..0990;N
@ -186,7 +199,7 @@
09D7;N
09DC..09DD;N
09DF..09E3;N
09E6..09FA;N
09E6..09FB;N
0A01..0A03;N
0A05..0A0A;N
0A0F..0A10;N
@ -324,14 +337,14 @@
0F90..0F97;N
0F99..0FBC;N
0FBE..0FCC;N
0FCE..0FD4;N
1000..1099;N
109E..10C5;N
0FCE..0FD8;N
1000..10C5;N
10D0..10FC;N
1100..1159;W
115F;W
1100..115F;W
1160..11A2;N
11A3..11A7;W
11A8..11F9;N
11FA..11FF;W
1200..1248;N
124A..124D;N
1250..1256;N
@ -351,8 +364,7 @@
135F..137C;N
1380..1399;N
13A0..13F4;N
1401..1676;N
1680..169C;N
1400..169C;N
16A0..16F0;N
1700..170C;N
170E..1714;N
@ -368,17 +380,22 @@
1810..1819;N
1820..1877;N
1880..18AA;N
18B0..18F5;N
1900..191C;N
1920..192B;N
1930..193B;N
1940;N
1944..196D;N
1970..1974;N
1980..19A9;N
1980..19AB;N
19B0..19C9;N
19D0..19D9;N
19D0..19DA;N
19DE..1A1B;N
1A1E..1A1F;N
1A1E..1A5E;N
1A60..1A7C;N
1A7F..1A89;N
1A90..1A99;N
1AA0..1AAD;N
1B00..1B4B;N
1B50..1B7C;N
1B80..1BAA;N
@ -386,8 +403,9 @@
1C00..1C37;N
1C3B..1C49;N
1C4D..1C7F;N
1CD0..1CF2;N
1D00..1DE6;N
1DFE..1F15;N
1DFD..1F15;N
1F18..1F1D;N
1F20..1F45;N
1F48..1F4D;N
@ -438,7 +456,7 @@
20A9;H
20AA..20AB;N
20AC;A
20AD..20B5;N
20AD..20B8;N
20D0..20F0;N
2100..2102;N
2103;A
@ -456,7 +474,7 @@
2126;A
2127..212A;N
212B;A
212C..214F;N
212C..2152;N
2153..2154;A
2155..215A;N
215B..215E;A
@ -465,6 +483,7 @@
216C..216F;N
2170..2179;A
217A..2188;N
2189;A
2190..2199;A
219A..21B7;N
21B8..21B9;A
@ -534,7 +553,7 @@
2312;A
2313..2328;N
2329..232A;W
232B..23E7;N
232B..23E8;N
2400..2426;N
2440..244A;N
2460..24E9;A
@ -595,8 +614,14 @@
266E;N
266F;A
2670..269D;N
26A0..26BC;N
269E..269F;A
26A0..26BD;N
26BE..26BF;A
26C0..26C3;N
26C4..26CD;A
26CF..26E1;A
26E3;A
26E8..26FF;A
2701..2704;N
2706..2709;N
270C..2727;N
@ -606,6 +631,7 @@
274D;N
274F..2752;N
2756;N
2757;A
2758..275E;N
2761..2775;N
2776..277F;A
@ -620,11 +646,10 @@
2985..2986;Na
2987..2B4C;N
2B50..2B54;N
2B55..2B59;A
2C00..2C2E;N
2C30..2C5E;N
2C60..2C6F;N
2C71..2C7D;N
2C80..2CEA;N
2C60..2CF1;N
2CF9..2D25;N
2D30..2D65;N
2D6F;N
@ -637,7 +662,7 @@
2DC8..2DCE;N
2DD0..2DD6;N
2DD8..2DDE;N
2DE0..2E30;N
2DE0..2E31;N
2E80..2E99;W
2E9B..2EF3;W
2F00..2FD5;W
@ -652,37 +677,56 @@
3190..31B7;W
31C0..31E3;W
31F0..321E;W
3220..3243;W
3220..3247;W
3248..324F;A
3250..32FE;W
3300..33FF;W
3400..4DB5;W
4DB6..4DBF;W
4DC0..4DFF;N
4E00..9FC3;W
4E00..9FCB;W
9FCC..9FFF;W
A000..A48C;W
A490..A4C6;W
A500..A62B;N
A4D0..A62B;N
A640..A65F;N
A662..A673;N
A67C..A697;N
A6A0..A6F7;N
A700..A78C;N
A7FB..A82B;N
A830..A839;N
A840..A877;N
A880..A8C4;N
A8CE..A8D9;N
A8E0..A8FB;N
A900..A953;N
A95F;N
A960..A97C;W
A980..A9CD;N
A9CF..A9D9;N
A9DE..A9DF;N
AA00..AA36;N
AA40..AA4D;N
AA50..AA59;N
AA5C..AA5F;N
AA5C..AA7B;N
AA80..AAC2;N
AADB..AADF;N
ABC0..ABED;N
ABF0..ABF9;N
AC00..D7A3;W
D7B0..D7C6;W
D7CB..D7FB;W
D800..DB7F;N
DB80..DBFF;N
DC00..DFFF;N
E000..F8FF;A
F900..FA2D;W
FA30..FA6A;W
FA2E..FA2F;W
FA30..FA6D;W
FA6E..FA6F;W
FA70..FAD9;W
FADA..FAFF;W
FB00..FB06;N
FB13..FB17;N
FB1D..FB36;N
@ -741,8 +785,9 @@ FFFD;A
1080A..10835;N
10837..10838;N
1083C;N
1083F;N
10900..10919;N
1083F..10855;N
10857..1085F;N
10900..1091B;N
1091F..10939;N
1093F;N
10A00..10A03;N
@ -753,9 +798,18 @@ FFFD;A
10A38..10A3A;N
10A3F..10A47;N
10A50..10A58;N
10A60..10A7F;N
10B00..10B35;N
10B39..10B55;N
10B58..10B72;N
10B78..10B7F;N
10C00..10C48;N
10E60..10E7E;N
11080..110C1;N
12000..1236E;N
12400..12462;N
12470..12473;N
13000..1342E;N
1D000..1D0F5;N
1D100..1D126;N
1D129..1D1DD;N
@ -785,8 +839,29 @@ FFFD;A
1D7CE..1D7FF;N
1F000..1F02B;N
1F030..1F093;N
1F100..1F10A;A
1F110..1F12D;A
1F12E;N
1F131;A
1F13D;A
1F13F;A
1F142;A
1F146;A
1F14A..1F14E;A
1F157;A
1F15F;A
1F179;A
1F17B..1F17C;A
1F17F;A
1F18A..1F18D;A
1F190;A
1F200;W
1F210..1F231;W
1F240..1F248;W
20000..2A6D6;W
2A6D7..2F7FF;W
2A6D7..2A6FF;W
2A700..2B734;W
2B735..2F7FF;W
2F800..2FA1D;W
2FA1E..2FFFD;W
30000..3FFFD;W

File diff suppressed because it is too large Load diff

View file

@ -1,10 +1,10 @@
# GraphemeBreakProperty-5.1.0.txt
# Date: 2008-03-03, 21:57:47 GMT [MD]
# GraphemeBreakProperty-5.2.0.txt
# Date: 2009-06-09, 21:40:09 GMT [MD]
#
# Unicode Character Database
# Copyright (c) 1991-2008 Unicode, Inc.
# Copyright (c) 1991-2009 Unicode, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html
# For documentation, see UCD.html
# For documentation, see http://www.unicode.org/reports/tr44/
# ================================================
@ -47,11 +47,12 @@
206A..206F ; Control
FEFF ; Control
FFF9..FFFB ; Control
110BD ; Control
1D173..1D17A ; Control
E0001 ; Control
E0020..E007F ; Control
# Total code points: 202
# Total code points: 203
# ================================================
@ -75,11 +76,15 @@ E0020..E007F ; Control
0730..074A ; Extend
07A6..07B0 ; Extend
07EB..07F3 ; Extend
0901..0902 ; Extend
0816..0819 ; Extend
081B..0823 ; Extend
0825..0827 ; Extend
0829..082D ; Extend
0900..0902 ; Extend
093C ; Extend
0941..0948 ; Extend
094D ; Extend
0951..0954 ; Extend
0951..0955 ; Extend
0962..0963 ; Extend
0981 ; Extend
09BC ; Extend
@ -170,6 +175,7 @@ E0020..E007F ; Control
1082 ; Extend
1085..1086 ; Extend
108D ; Extend
109D ; Extend
135F ; Extend
1712..1714 ; Extend
1732..1734 ; Extend
@ -186,6 +192,13 @@ E0020..E007F ; Control
1932 ; Extend
1939..193B ; Extend
1A17..1A18 ; Extend
1A56 ; Extend
1A58..1A5E ; Extend
1A60 ; Extend
1A62 ; Extend
1A65..1A6C ; Extend
1A73..1A7C ; Extend
1A7F ; Extend
1B00..1B03 ; Extend
1B34 ; Extend
1B36..1B3A ; Extend
@ -197,32 +210,51 @@ E0020..E007F ; Control
1BA8..1BA9 ; Extend
1C2C..1C33 ; Extend
1C36..1C37 ; Extend
1CD0..1CD2 ; Extend
1CD4..1CE0 ; Extend
1CE2..1CE8 ; Extend
1CED ; Extend
1DC0..1DE6 ; Extend
1DFE..1DFF ; Extend
1DFD..1DFF ; Extend
200C..200D ; Extend
20D0..20DC ; Extend
20DD..20E0 ; Extend
20E1 ; Extend
20E2..20E4 ; Extend
20E5..20F0 ; Extend
2CEF..2CF1 ; Extend
2DE0..2DFF ; Extend
302A..302F ; Extend
3099..309A ; Extend
A66F ; Extend
A670..A672 ; Extend
A67C..A67D ; Extend
A6F0..A6F1 ; Extend
A802 ; Extend
A806 ; Extend
A80B ; Extend
A825..A826 ; Extend
A8C4 ; Extend
A8E0..A8F1 ; Extend
A926..A92D ; Extend
A947..A951 ; Extend
A980..A982 ; Extend
A9B3 ; Extend
A9B6..A9B9 ; Extend
A9BC ; Extend
AA29..AA2E ; Extend
AA31..AA32 ; Extend
AA35..AA36 ; Extend
AA43 ; Extend
AA4C ; Extend
AAB0 ; Extend
AAB2..AAB4 ; Extend
AAB7..AAB8 ; Extend
AABE..AABF ; Extend
AAC1 ; Extend
ABE5 ; Extend
ABE8 ; Extend
ABED ; Extend
FB1E ; Extend
FE00..FE0F ; Extend
FE20..FE26 ; Extend
@ -233,6 +265,9 @@ FF9E..FF9F ; Extend
10A0C..10A0F ; Extend
10A38..10A3A ; Extend
10A3F ; Extend
11080..11081 ; Extend
110B3..110B6 ; Extend
110B9..110BA ; Extend
1D165 ; Extend
1D167..1D169 ; Extend
1D16E..1D172 ; Extend
@ -242,20 +277,24 @@ FF9E..FF9F ; Extend
1D242..1D244 ; Extend
E0100..E01EF ; Extend
# Total code points: 1075
# Total code points: 1205
# ================================================
0E40..0E44 ; Prepend
0EC0..0EC4 ; Prepend
AAB5..AAB6 ; Prepend
AAB9 ; Prepend
AABB..AABC ; Prepend
# Total code points: 10
# Total code points: 15
# ================================================
0903 ; SpacingMark
093E..0940 ; SpacingMark
0949..094C ; SpacingMark
094E ; SpacingMark
0982..0983 ; SpacingMark
09BF..09C0 ; SpacingMark
09C7..09C8 ; SpacingMark
@ -302,6 +341,7 @@ E0100..E01EF ; Extend
1083..1084 ; SpacingMark
1087..108C ; SpacingMark
108F ; SpacingMark
109A..109C ; SpacingMark
17B6 ; SpacingMark
17BE..17C5 ; SpacingMark
17C7..17C8 ; SpacingMark
@ -312,6 +352,11 @@ E0100..E01EF ; Extend
19B0..19C0 ; SpacingMark
19C8..19C9 ; SpacingMark
1A19..1A1B ; SpacingMark
1A55 ; SpacingMark
1A57 ; SpacingMark
1A61 ; SpacingMark
1A63..1A64 ; SpacingMark
1A6D..1A72 ; SpacingMark
1B04 ; SpacingMark
1B35 ; SpacingMark
1B3B ; SpacingMark
@ -323,37 +368,53 @@ E0100..E01EF ; Extend
1BAA ; SpacingMark
1C24..1C2B ; SpacingMark
1C34..1C35 ; SpacingMark
1CE1 ; SpacingMark
1CF2 ; SpacingMark
A823..A824 ; SpacingMark
A827 ; SpacingMark
A880..A881 ; SpacingMark
A8B4..A8C3 ; SpacingMark
A952..A953 ; SpacingMark
A983 ; SpacingMark
A9B4..A9B5 ; SpacingMark
A9BA..A9BB ; SpacingMark
A9BD..A9C0 ; SpacingMark
AA2F..AA30 ; SpacingMark
AA33..AA34 ; SpacingMark
AA4D ; SpacingMark
AA7B ; SpacingMark
ABE3..ABE4 ; SpacingMark
ABE6..ABE7 ; SpacingMark
ABE9..ABEA ; SpacingMark
ABEC ; SpacingMark
11082 ; SpacingMark
110B0..110B2 ; SpacingMark
110B7..110B8 ; SpacingMark
1D166 ; SpacingMark
1D16D ; SpacingMark
# Total code points: 217
# Total code points: 257
# ================================================
1100..1159 ; L
115F ; L
1100..115F ; L
A960..A97C ; L
# Total code points: 91
# Total code points: 125
# ================================================
1160..11A2 ; V
1160..11A7 ; V
D7B0..D7C6 ; V
# Total code points: 67
# Total code points: 95
# ================================================
11A8..11F9 ; T
11A8..11FF ; T
D7CB..D7FB ; T
# Total code points: 82
# Total code points: 137
# ================================================

View file

@ -1,5 +1,5 @@
# LineBreak-5.1.0.txt
# Date: 2007-12-10, 16:24:00 PST [KW]
# LineBreak-5.2.0.txt
# Date: 2009-08-17, 12:21:00 PDT [KW]
#
# Line Break Properties
#
@ -7,7 +7,7 @@
# Unicode Character Database.
# It contains both normative and informative data.
#
# Copyright (c) 1991-2007 Unicode, Inc.
# Copyright (c) 1991-2009 Unicode, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html
#
# The format is two fields separated by a semicolon.
@ -17,18 +17,29 @@
# "BK", "CR", "LF", "CM", "SG", "GL", "CB", "SP", "ZW",
# "NL", "WJ", "JL", "JV", "JT", "H2", "H3"
# Informative:
# "XX", "OP", "CL", "QU", "NS", "EX", "SY",
# "XX", "OP", "CL", "CP", "QU", "NS", "EX", "SY",
# "IS", "PR", "PO", "NU", "AL", "ID", "IN", "HY",
# "BB", "BA", "SA", "AI", "B2"
# - All code points, assigned and unassigned, that are not listed
# - All code points, assigned and unassigned, that are not listed
# explicitly are given the value "XX".
# The unassigned code points that default to "ID" include ranges in the
# following blocks:
# CJK Unified Ideographs Extension A: U+3400..U+4DBF
# CJK Unified Ideographs: U+4E00..U+9FFF
# CJK Compatibility Ideographs: U+F900..U+FAFF
# CJK Unified Ideographs Extension B: U+20000..U+2A6DF
# CJK Unified Ideographs Extension C: U+2A700..U+2B73F
# CJK Compatibility Ideographs Supplement: U+2F800..U+2FA1F
# and any other reserved code points on
# Planes 2 and 3: U+20000..U+2FFFD
# U+30000..U+3FFFD
# - Characters ranges are specified as for other property files in
# the Unicode Character Database.
#
# The Unicode name of each character is provided in a comment for help
# in identifying the characters.
#
# See UAX #14: Line Breaking Properties, for more information
# See UAX #14: Unicode Line Breaking Algorithm, for more information
#
# @missing: 0000..10FFFF; XX
0000..0008;CM
@ -46,7 +57,7 @@
0026;AL
0027;QU
0028;OP
0029;CL
0029;CP
002A;AL
002B;PR
002C;IS
@ -60,7 +71,7 @@
0040..005A;AL
005B;OP
005C;PR
005D;CL
005D;CP
005E..007A;AL
007B;OP
007C;BA
@ -122,7 +133,7 @@
038E..03A1;AL
03A3..0482;AL
0483..0489;CM
048A..0523;AL
048A..0525;AL
0531..0556;AL
0559..055F;AL
0561..0587;AL
@ -181,19 +192,28 @@
07F8;IS
07F9;EX
07FA;AL
0901..0903;CM
0800..0815;AL
0816..0819;CM
081A;AL
081B..0823;CM
0824;AL
0825..0827;CM
0828;AL
0829..082D;CM
0830..083E;AL
0900..0903;CM
0904..0939;AL
093C;CM
093D;AL
093E..094D;CM
093E..094E;CM
0950;AL
0951..0954;CM
0951..0955;CM
0958..0961;AL
0962..0963;CM
0964..0965;BA
0966..096F;NU
0970..0972;AL
097B..097F;AL
0979..097F;AL
0981..0983;CM
0985..098C;AL
098F..0990;AL
@ -213,8 +233,11 @@
09E2..09E3;CM
09E6..09EF;NU
09F0..09F1;AL
09F2..09F3;PR
09F4..09FA;AL
09F2..09F3;PO
09F4..09F8;AL
09F9;PO
09FA;AL
09FB;PR
0A01..0A03;CM
0A05..0A0A;AL
0A0F..0A10;AL
@ -420,20 +443,19 @@
0FD0..0FD1;BB
0FD2;BA
0FD3;BB
0FD4;AL
0FD4..0FD8;AL
1000..103F;SA
1040..1049;NU
104A..104B;BA
104C..104F;AL
1050..108F;SA
1090..1099;NU
109E..109F;SA
109A..109F;SA
10A0..10C5;AL
10D0..10FC;AL
1100..1159;JL
115F;JL
1160..11A2;JV
11A8..11F9;JT
1100..115F;JL
1160..11A7;JV
11A8..11FF;JT
1200..1248;AL
124A..124D;AL
1250..1256;AL
@ -456,7 +478,8 @@
1362..137C;AL
1380..1399;AL
13A0..13F4;AL
1401..1676;AL
1400;BA
1401..167F;AL
1680;BA
1681..169A;AL
169B;OP
@ -500,6 +523,7 @@
1880..18A8;AL
18A9;CM
18AA;AL
18B0..18F5;AL
1900..191C;AL
1920..192B;CM
1930..193B;CM
@ -508,19 +532,27 @@
1946..194F;NU
1950..196D;SA
1970..1974;SA
1980..19A9;SA
1980..19AB;SA
19B0..19C9;SA
19D0..19D9;NU
19D0..19DA;NU
19DE..19DF;SA
19E0..1A16;AL
1A17..1A1B;CM
1A1E..1A1F;AL
1A20..1A5E;SA
1A60..1A7C;SA
1A7F;CM
1A80..1A89;NU
1A90..1A99;NU
1AA0..1AAD;SA
1B00..1B04;CM
1B05..1B33;AL
1B34..1B44;CM
1B45..1B4B;AL
1B50..1B59;NU
1B5A..1B60;BA
1B5A..1B5B;BA
1B5C;AL
1B5D..1B60;BA
1B61..1B6A;AL
1B6B..1B73;CM
1B74..1B7C;AL
@ -537,9 +569,16 @@
1C50..1C59;NU
1C5A..1C7D;AL
1C7E..1C7F;BA
1CD0..1CD2;CM
1CD3;AL
1CD4..1CE8;CM
1CE9..1CEC;AL
1CED;CM
1CEE..1CF1;AL
1CF2;CM
1D00..1DBF;AL
1DC0..1DE6;CM
1DFE..1DFF;CM
1DFD..1DFF;CM
1E00..1F15;AL
1F18..1F1D;AL
1F20..1F45;AL
@ -615,6 +654,8 @@
20A0..20A6;PR
20A7;PO
20A8..20B5;PR
20B6;PO
20B7..20B8;PR
20D0..20F0;CM
2100..2102;AL
2103;PO
@ -630,8 +671,7 @@
2121..2122;AI
2123..212A;AL
212B;AI
212C..214F;AL
2153;AL
212C..2153;AL
2154..2155;AI
2156..215A;AL
215B;AI
@ -642,6 +682,7 @@
216C..216F;AL
2170..2179;AI
217A..2188;AL
2189;AI
2190..2199;AI
219A..21D1;AL
21D2;AI
@ -709,7 +750,7 @@
2313..2328;AL
2329;OP
232A;CL
232B..23E7;AL
232B..23E8;AL
2400..2426;AL
2440..244A;AL
2460..24FE;AI
@ -770,8 +811,14 @@
266E;AL
266F;AI
2670..269D;AL
26A0..26BC;AL
269E..269F;AI
26A0..26BD;AL
26BE..26BF;AI
26C0..26C3;AL
26C4..26CD;AI
26CF..26E1;AI
26E3;AI
26E8..26FF;AI
2701..2704;AL
2706..2709;AL
270C..2727;AL
@ -779,6 +826,7 @@
274D;AL
274F..2752;AL
2756;AL
2757;AI
2758..275A;AL
275B..275E;QU
2761;AL
@ -851,11 +899,11 @@
29FD;CL
29FE..2B4C;AL
2B50..2B54;AL
2B55..2B59;AI
2C00..2C2E;AL
2C30..2C5E;AL
2C60..2C6F;AL
2C71..2C7D;AL
2C80..2CEA;AL
2C60..2CEE;AL
2CEF..2CF1;CM
2CF9;EX
2CFA..2CFC;BA
2CFD;AL
@ -895,7 +943,7 @@
2E2A..2E2D;BA
2E2E;EX
2E2F;AL
2E30;BA
2E30..2E31;BA
2E80..2E99;ID
2E9B..2EF3;ID
2F00..2FD5;ID
@ -986,16 +1034,21 @@
31C0..31E3;ID
31F0..31FF;NS
3200..321E;ID
3220..3243;ID
3220..3247;ID
3248..324F;AI
3250..32FE;ID
3300..33FF;ID
3400..4DB5;ID
4DB6..4DBF;ID
4DC0..4DFF;AL
4E00..9FC3;ID
4E00..9FCB;ID
9FCC..9FFF;ID
A000..A014;ID
A015;NS
A016..A48C;ID
A490..A4C6;ID
A4D0..A4FD;AL
A4FE..A4FF;BA
A500..A60C;AL
A60D;BA
A60E;EX
@ -1009,6 +1062,10 @@ A66F..A672;CM
A673;AL
A67C..A67D;CM
A67E..A697;AL
A6A0..A6EF;AL
A6F0..A6F1;CM
A6F2;AL
A6F3..A6F7;BA
A700..A78C;AL
A7FB..A801;AL
A802;CM
@ -1019,6 +1076,9 @@ A80B;CM
A80C..A822;AL
A823..A827;CM
A828..A82B;AL
A830..A837;AL
A838;PO
A839;AL
A840..A873;AL
A874..A875;BB
A876..A877;EX
@ -1027,6 +1087,8 @@ A882..A8B3;AL
A8B4..A8C4;CM
A8CE..A8CF;BA
A8D0..A8D9;NU
A8E0..A8F1;CM
A8F2..A8FB;AL
A900..A909;NU
A90A..A925;AL
A926..A92D;CM
@ -1034,6 +1096,16 @@ A92E..A92F;BA
A930..A946;AL
A947..A953;CM
A95F;AL
A960..A97C;JL
A980..A983;CM
A984..A9B2;AL
A9B3..A9C0;CM
A9C1..A9C6;AL
A9C7..A9C9;BA
A9CA..A9CD;AL
A9CF;AL
A9D0..A9D9;NU
A9DE..A9DF;AL
AA00..AA28;AL
AA29..AA36;CM
AA40..AA42;AL
@ -1043,6 +1115,14 @@ AA4C..AA4D;CM
AA50..AA59;NU
AA5C;AL
AA5D..AA5F;BA
AA60..AA7B;SA
AA80..AAC2;SA
AADB..AADF;SA
ABC0..ABE2;AL
ABE3..ABEA;CM
ABEB;BA
ABEC..ABED;CM
ABF0..ABF9;NU
AC00;H2
AC01..AC1B;H3
AC1C;H2
@ -1841,13 +1921,18 @@ D76C;H2
D76D..D787;H3
D788;H2
D789..D7A3;H3
D7B0..D7C6;JV
D7CB..D7FB;JT
D800..DB7F;SG
DB80..DBFF;SG
DC00..DFFF;SG
E000..F8FF;XX
F900..FA2D;ID
FA30..FA6A;ID
FA2E..FA2F;ID
FA30..FA6D;ID
FA6E..FA6F;ID
FA70..FAD9;ID
FADA..FAFF;ID
FB00..FB06;AL
FB13..FB17;AL
FB1D;AL
@ -1993,8 +2078,10 @@ FFFD;AI
1080A..10835;AL
10837..10838;AL
1083C;AL
1083F;AL
10900..10919;AL
1083F..10855;AL
10857;BA
10858..1085F;AL
10900..1091B;AL
1091F;BA
10920..10939;AL
1093F;AL
@ -2010,9 +2097,36 @@ FFFD;AI
10A40..10A47;AL
10A50..10A57;BA
10A58;AL
10A60..10A7F;AL
10B00..10B35;AL
10B39..10B3F;BA
10B40..10B55;AL
10B58..10B72;AL
10B78..10B7F;AL
10C00..10C48;AL
10E60..10E7E;AL
11080..11082;CM
11083..110AF;AL
110B0..110BA;CM
110BB..110BD;AL
110BE..110C1;BA
12000..1236E;AL
12400..12462;AL
12470..12473;BA
13000..13257;AL
13258..1325A;OP
1325B..1325D;CL
1325E..13281;AL
13282;CL
13283..13285;AL
13286;OP
13287;CL
13288;OP
13289;CL
1328A..13378;AL
13379;OP
1337A..1337B;CL
1337C..1342E;AL
1D000..1D0F5;AL
1D100..1D126;AL
1D129..1D164;AL
@ -2052,8 +2166,32 @@ FFFD;AI
1D7CE..1D7FF;NU
1F000..1F02B;AL
1F030..1F093;AL
1F100..1F10A;AI
1F110..1F12D;AI
1F12E;AL
1F131;AI
1F13D;AI
1F13F;AI
1F142;AI
1F146;AI
1F14A..1F14E;AI
1F157;AI
1F15F;AI
1F179;AI
1F17B..1F17C;AI
1F17F;AI
1F18A..1F18D;AI
1F190;AI
1F200;ID
1F210..1F231;ID
1F240..1F248;ID
20000..2A6D6;ID
2A6D7..2A6FF;ID
2A700..2B734;ID
2B735..2F7FF;ID
2F800..2FA1D;ID
2FA1E..2FFFD;ID
30000..3FFFD;ID
E0001;CM
E0020..E007F;CM
E0100..E01EF;CM

View file

@ -0,0 +1,40 @@
# NameAliases-5.2.0.txt
# Date: 2009-05-22, 13:05:00 PDT [KW]
#
# This file is a normative contributory data file in the
# Unicode Character Database.
#
# Copyright (c) 2005-2009 Unicode, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html
#
# This file defines the formal name aliases for Unicode characters.
#
# For informative aliases see NamesList.txt
#
# For documentation, see NamesList.html and http://www.unicode.org/reports/tr44/
#
# FORMAT
#
# Each line has two fields
# First field: Code point
# Second field: Alias
#
# In case multiple aliases are assigned, additional aliases
# would be provided on separate lines
#
#-----------------------------------------------------------------
01A2;LATIN CAPITAL LETTER GHA
01A3;LATIN SMALL LETTER GHA
0CDE;KANNADA LETTER LLLA
0E9D;LAO LETTER FO FON
0E9F;LAO LETTER FO FAY
0EA3;LAO LETTER RO
0EA5;LAO LETTER LO
0FD0;TIBETAN MARK BKA- SHOG GI MGO RGYAN
A015;YI SYLLABLE ITERATION MARK
FE18;PRESENTATION FORM FOR VERTICAL RIGHT WHITE LENTICULAR BRACKET
1D0C5;BYZANTINE MUSICAL SYMBOL FTHORA SKLIRON CHROMA VASIS
# Total code points: 11
# EOF

View file

@ -1,10 +1,10 @@
# NormalizationCorrections-5.1.0.txt
# Date: 2007-10-29, 11:30:00 PDT [KW]
# NormalizationCorrections-5.2.0.txt
# Date: 2009-05-22, 13:54:00 PDT [KW]
#
# This file is a normative contributory data file in the
# Unicode Character Database.
#
# Copyright (c) 1991-2007 Unicode, Inc.
# Copyright (c) 1991-2009 Unicode, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html
#
# The normalization stabilization policy of the Unicode

View file

@ -1,10 +1,10 @@
# NormalizationTest-5.1.0.txt
# Date: 2008-03-03, 21:58:00 GMT [MD]
# NormalizationTest-5.2.0.txt
# Date: 2009-08-22, 04:58:39 GMT [MD]
#
# Unicode Character Database
# Copyright (c) 1991-2008 Unicode, Inc.
# Copyright (c) 1991-2009 Unicode, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html
# For documentation, see UCD.html
# For documentation, see http://www.unicode.org/reports/tr44/
#
# Normalization Test Suite
# Format:
@ -1253,6 +1253,9 @@
2147;2147;2147;0065;0065;
2148;2148;2148;0069;0069;
2149;2149;2149;006A;006A;
2150;2150;2150;0031 2044 0037;0031 2044 0037;
2151;2151;2151;0031 2044 0039;0031 2044 0039;
2152;2152;2152;0031 2044 0031 0030;0031 2044 0031 0030;
2153;2153;2153;0031 2044 0033;0031 2044 0033;
2154;2154;2154;0032 2044 0033;0032 2044 0033;
2155;2155;2155;0031 2044 0035;0031 2044 0035;
@ -1298,6 +1301,7 @@
217D;217D;217D;0063;0063;
217E;217E;217E;0064;0064;
217F;217F;217F;006D;006D;
2189;2189;2189;0030 2044 0033;0030 2044 0033;
219A;219A;2190 0338;219A;2190 0338;
219B;219B;2192 0338;219B;2192 0338;
21AE;21AE;2194 0338;21AE;2194 0338;
@ -1953,6 +1957,10 @@
3241;3241;3241;0028 4F11 0029;0028 4F11 0029;
3242;3242;3242;0028 81EA 0029;0028 81EA 0029;
3243;3243;3243;0028 81F3 0029;0028 81F3 0029;
3244;3244;3244;554F;554F;
3245;3245;3245;5E7C;5E7C;
3246;3246;3246;6587;6587;
3247;3247;3247;7B8F;7B8F;
3250;3250;3250;0050 0054 0045;0050 0054 0045;
3251;3251;3251;0032 0031;0032 0031;
3252;3252;3252;0032 0032;0032 0032;
@ -13905,6 +13913,9 @@ FA67;9038;9038;9038;9038;
FA68;96E3;96E3;96E3;96E3;
FA69;97FF;97FF;97FF;97FF;
FA6A;983B;983B;983B;983B;
FA6B;6075;6075;6075;6075;
FA6C;242EE;242EE;242EE;242EE;
FA6D;8218;8218;8218;8218;
FA70;4E26;4E26;4E26;4E26;
FA71;51B5;51B5;51B5;51B5;
FA72;5168;5168;5168;5168;
@ -15090,6 +15101,9 @@ FFEB;FFEB;FFEB;2192;2192;
FFEC;FFEC;FFEC;2193;2193;
FFED;FFED;FFED;25A0;25A0;
FFEE;FFEE;FFEE;25CB;25CB;
1109A;1109A;11099 110BA;1109A;11099 110BA;
1109C;1109C;1109B 110BA;1109C;1109B 110BA;
110AB;110AB;110A5 110BA;110AB;110A5 110BA;
1D15E;1D157 1D165;1D157 1D165;1D157 1D165;1D157 1D165;
1D15F;1D158 1D165;1D158 1D165;1D158 1D165;1D158 1D165;
1D160;1D158 1D165 1D16E;1D158 1D165 1D16E;1D158 1D165 1D16E;1D158 1D165 1D16E;
@ -16099,6 +16113,103 @@ FFEE;FFEE;FFEE;25CB;25CB;
1D7FD;1D7FD;1D7FD;0037;0037;
1D7FE;1D7FE;1D7FE;0038;0038;
1D7FF;1D7FF;1D7FF;0039;0039;
1F100;1F100;1F100;0030 002E;0030 002E;
1F101;1F101;1F101;0030 002C;0030 002C;
1F102;1F102;1F102;0031 002C;0031 002C;
1F103;1F103;1F103;0032 002C;0032 002C;
1F104;1F104;1F104;0033 002C;0033 002C;
1F105;1F105;1F105;0034 002C;0034 002C;
1F106;1F106;1F106;0035 002C;0035 002C;
1F107;1F107;1F107;0036 002C;0036 002C;
1F108;1F108;1F108;0037 002C;0037 002C;
1F109;1F109;1F109;0038 002C;0038 002C;
1F10A;1F10A;1F10A;0039 002C;0039 002C;
1F110;1F110;1F110;0028 0041 0029;0028 0041 0029;
1F111;1F111;1F111;0028 0042 0029;0028 0042 0029;
1F112;1F112;1F112;0028 0043 0029;0028 0043 0029;
1F113;1F113;1F113;0028 0044 0029;0028 0044 0029;
1F114;1F114;1F114;0028 0045 0029;0028 0045 0029;
1F115;1F115;1F115;0028 0046 0029;0028 0046 0029;
1F116;1F116;1F116;0028 0047 0029;0028 0047 0029;
1F117;1F117;1F117;0028 0048 0029;0028 0048 0029;
1F118;1F118;1F118;0028 0049 0029;0028 0049 0029;
1F119;1F119;1F119;0028 004A 0029;0028 004A 0029;
1F11A;1F11A;1F11A;0028 004B 0029;0028 004B 0029;
1F11B;1F11B;1F11B;0028 004C 0029;0028 004C 0029;
1F11C;1F11C;1F11C;0028 004D 0029;0028 004D 0029;
1F11D;1F11D;1F11D;0028 004E 0029;0028 004E 0029;
1F11E;1F11E;1F11E;0028 004F 0029;0028 004F 0029;
1F11F;1F11F;1F11F;0028 0050 0029;0028 0050 0029;
1F120;1F120;1F120;0028 0051 0029;0028 0051 0029;
1F121;1F121;1F121;0028 0052 0029;0028 0052 0029;
1F122;1F122;1F122;0028 0053 0029;0028 0053 0029;
1F123;1F123;1F123;0028 0054 0029;0028 0054 0029;
1F124;1F124;1F124;0028 0055 0029;0028 0055 0029;
1F125;1F125;1F125;0028 0056 0029;0028 0056 0029;
1F126;1F126;1F126;0028 0057 0029;0028 0057 0029;
1F127;1F127;1F127;0028 0058 0029;0028 0058 0029;
1F128;1F128;1F128;0028 0059 0029;0028 0059 0029;
1F129;1F129;1F129;0028 005A 0029;0028 005A 0029;
1F12A;1F12A;1F12A;3014 0053 3015;3014 0053 3015;
1F12B;1F12B;1F12B;0043;0043;
1F12C;1F12C;1F12C;0052;0052;
1F12D;1F12D;1F12D;0043 0044;0043 0044;
1F12E;1F12E;1F12E;0057 005A;0057 005A;
1F131;1F131;1F131;0042;0042;
1F13D;1F13D;1F13D;004E;004E;
1F13F;1F13F;1F13F;0050;0050;
1F142;1F142;1F142;0053;0053;
1F146;1F146;1F146;0057;0057;
1F14A;1F14A;1F14A;0048 0056;0048 0056;
1F14B;1F14B;1F14B;004D 0056;004D 0056;
1F14C;1F14C;1F14C;0053 0044;0053 0044;
1F14D;1F14D;1F14D;0053 0053;0053 0053;
1F14E;1F14E;1F14E;0050 0050 0056;0050 0050 0056;
1F190;1F190;1F190;0044 004A;0044 004A;
1F200;1F200;1F200;307B 304B;307B 304B;
1F210;1F210;1F210;624B;624B;
1F211;1F211;1F211;5B57;5B57;
1F212;1F212;1F212;53CC;53CC;
1F213;1F213;1F213;30C7;30C6 3099;
1F214;1F214;1F214;4E8C;4E8C;
1F215;1F215;1F215;591A;591A;
1F216;1F216;1F216;89E3;89E3;
1F217;1F217;1F217;5929;5929;
1F218;1F218;1F218;4EA4;4EA4;
1F219;1F219;1F219;6620;6620;
1F21A;1F21A;1F21A;7121;7121;
1F21B;1F21B;1F21B;6599;6599;
1F21C;1F21C;1F21C;524D;524D;
1F21D;1F21D;1F21D;5F8C;5F8C;
1F21E;1F21E;1F21E;518D;518D;
1F21F;1F21F;1F21F;65B0;65B0;
1F220;1F220;1F220;521D;521D;
1F221;1F221;1F221;7D42;7D42;
1F222;1F222;1F222;751F;751F;
1F223;1F223;1F223;8CA9;8CA9;
1F224;1F224;1F224;58F0;58F0;
1F225;1F225;1F225;5439;5439;
1F226;1F226;1F226;6F14;6F14;
1F227;1F227;1F227;6295;6295;
1F228;1F228;1F228;6355;6355;
1F229;1F229;1F229;4E00;4E00;
1F22A;1F22A;1F22A;4E09;4E09;
1F22B;1F22B;1F22B;904A;904A;
1F22C;1F22C;1F22C;5DE6;5DE6;
1F22D;1F22D;1F22D;4E2D;4E2D;
1F22E;1F22E;1F22E;53F3;53F3;
1F22F;1F22F;1F22F;6307;6307;
1F230;1F230;1F230;8D70;8D70;
1F231;1F231;1F231;6253;6253;
1F240;1F240;1F240;3014 672C 3015;3014 672C 3015;
1F241;1F241;1F241;3014 4E09 3015;3014 4E09 3015;
1F242;1F242;1F242;3014 4E8C 3015;3014 4E8C 3015;
1F243;1F243;1F243;3014 5B89 3015;3014 5B89 3015;
1F244;1F244;1F244;3014 70B9 3015;3014 70B9 3015;
1F245;1F245;1F245;3014 6253 3015;3014 6253 3015;
1F246;1F246;1F246;3014 76D7 3015;3014 76D7 3015;
1F247;1F247;1F247;3014 52DD 3015;3014 52DD 3015;
1F248;1F248;1F248;3014 6557 3015;3014 6557 3015;
2F800;4E3D;4E3D;4E3D;4E3D;
2F801;4E38;4E38;4E38;4E38;
2F802;4E41;4E41;4E41;4E41;
@ -17154,6 +17265,48 @@ FFEE;FFEE;FFEE;25CB;25CB;
0061 07F2 059A 0316 302A 0062;0061 302A 07F2 0316 059A 0062;0061 302A 07F2 0316 059A 0062;0061 302A 07F2 0316 059A 0062;0061 302A 07F2 0316 059A 0062;
0061 0315 0300 05AE 07F3 0062;00E0 05AE 07F3 0315 0062;0061 05AE 0300 07F3 0315 0062;00E0 05AE 07F3 0315 0062;0061 05AE 0300 07F3 0315 0062;
0061 07F3 0315 0300 05AE 0062;0061 05AE 07F3 0300 0315 0062;0061 05AE 07F3 0300 0315 0062;0061 05AE 07F3 0300 0315 0062;0061 05AE 07F3 0300 0315 0062;
0061 0315 0300 05AE 0816 0062;00E0 05AE 0816 0315 0062;0061 05AE 0300 0816 0315 0062;00E0 05AE 0816 0315 0062;0061 05AE 0300 0816 0315 0062;
0061 0816 0315 0300 05AE 0062;0061 05AE 0816 0300 0315 0062;0061 05AE 0816 0300 0315 0062;0061 05AE 0816 0300 0315 0062;0061 05AE 0816 0300 0315 0062;
0061 0315 0300 05AE 0817 0062;00E0 05AE 0817 0315 0062;0061 05AE 0300 0817 0315 0062;00E0 05AE 0817 0315 0062;0061 05AE 0300 0817 0315 0062;
0061 0817 0315 0300 05AE 0062;0061 05AE 0817 0300 0315 0062;0061 05AE 0817 0300 0315 0062;0061 05AE 0817 0300 0315 0062;0061 05AE 0817 0300 0315 0062;
0061 0315 0300 05AE 0818 0062;00E0 05AE 0818 0315 0062;0061 05AE 0300 0818 0315 0062;00E0 05AE 0818 0315 0062;0061 05AE 0300 0818 0315 0062;
0061 0818 0315 0300 05AE 0062;0061 05AE 0818 0300 0315 0062;0061 05AE 0818 0300 0315 0062;0061 05AE 0818 0300 0315 0062;0061 05AE 0818 0300 0315 0062;
0061 0315 0300 05AE 0819 0062;00E0 05AE 0819 0315 0062;0061 05AE 0300 0819 0315 0062;00E0 05AE 0819 0315 0062;0061 05AE 0300 0819 0315 0062;
0061 0819 0315 0300 05AE 0062;0061 05AE 0819 0300 0315 0062;0061 05AE 0819 0300 0315 0062;0061 05AE 0819 0300 0315 0062;0061 05AE 0819 0300 0315 0062;
0061 0315 0300 05AE 081B 0062;00E0 05AE 081B 0315 0062;0061 05AE 0300 081B 0315 0062;00E0 05AE 081B 0315 0062;0061 05AE 0300 081B 0315 0062;
0061 081B 0315 0300 05AE 0062;0061 05AE 081B 0300 0315 0062;0061 05AE 081B 0300 0315 0062;0061 05AE 081B 0300 0315 0062;0061 05AE 081B 0300 0315 0062;
0061 0315 0300 05AE 081C 0062;00E0 05AE 081C 0315 0062;0061 05AE 0300 081C 0315 0062;00E0 05AE 081C 0315 0062;0061 05AE 0300 081C 0315 0062;
0061 081C 0315 0300 05AE 0062;0061 05AE 081C 0300 0315 0062;0061 05AE 081C 0300 0315 0062;0061 05AE 081C 0300 0315 0062;0061 05AE 081C 0300 0315 0062;
0061 0315 0300 05AE 081D 0062;00E0 05AE 081D 0315 0062;0061 05AE 0300 081D 0315 0062;00E0 05AE 081D 0315 0062;0061 05AE 0300 081D 0315 0062;
0061 081D 0315 0300 05AE 0062;0061 05AE 081D 0300 0315 0062;0061 05AE 081D 0300 0315 0062;0061 05AE 081D 0300 0315 0062;0061 05AE 081D 0300 0315 0062;
0061 0315 0300 05AE 081E 0062;00E0 05AE 081E 0315 0062;0061 05AE 0300 081E 0315 0062;00E0 05AE 081E 0315 0062;0061 05AE 0300 081E 0315 0062;
0061 081E 0315 0300 05AE 0062;0061 05AE 081E 0300 0315 0062;0061 05AE 081E 0300 0315 0062;0061 05AE 081E 0300 0315 0062;0061 05AE 081E 0300 0315 0062;
0061 0315 0300 05AE 081F 0062;00E0 05AE 081F 0315 0062;0061 05AE 0300 081F 0315 0062;00E0 05AE 081F 0315 0062;0061 05AE 0300 081F 0315 0062;
0061 081F 0315 0300 05AE 0062;0061 05AE 081F 0300 0315 0062;0061 05AE 081F 0300 0315 0062;0061 05AE 081F 0300 0315 0062;0061 05AE 081F 0300 0315 0062;
0061 0315 0300 05AE 0820 0062;00E0 05AE 0820 0315 0062;0061 05AE 0300 0820 0315 0062;00E0 05AE 0820 0315 0062;0061 05AE 0300 0820 0315 0062;
0061 0820 0315 0300 05AE 0062;0061 05AE 0820 0300 0315 0062;0061 05AE 0820 0300 0315 0062;0061 05AE 0820 0300 0315 0062;0061 05AE 0820 0300 0315 0062;
0061 0315 0300 05AE 0821 0062;00E0 05AE 0821 0315 0062;0061 05AE 0300 0821 0315 0062;00E0 05AE 0821 0315 0062;0061 05AE 0300 0821 0315 0062;
0061 0821 0315 0300 05AE 0062;0061 05AE 0821 0300 0315 0062;0061 05AE 0821 0300 0315 0062;0061 05AE 0821 0300 0315 0062;0061 05AE 0821 0300 0315 0062;
0061 0315 0300 05AE 0822 0062;00E0 05AE 0822 0315 0062;0061 05AE 0300 0822 0315 0062;00E0 05AE 0822 0315 0062;0061 05AE 0300 0822 0315 0062;
0061 0822 0315 0300 05AE 0062;0061 05AE 0822 0300 0315 0062;0061 05AE 0822 0300 0315 0062;0061 05AE 0822 0300 0315 0062;0061 05AE 0822 0300 0315 0062;
0061 0315 0300 05AE 0823 0062;00E0 05AE 0823 0315 0062;0061 05AE 0300 0823 0315 0062;00E0 05AE 0823 0315 0062;0061 05AE 0300 0823 0315 0062;
0061 0823 0315 0300 05AE 0062;0061 05AE 0823 0300 0315 0062;0061 05AE 0823 0300 0315 0062;0061 05AE 0823 0300 0315 0062;0061 05AE 0823 0300 0315 0062;
0061 0315 0300 05AE 0825 0062;00E0 05AE 0825 0315 0062;0061 05AE 0300 0825 0315 0062;00E0 05AE 0825 0315 0062;0061 05AE 0300 0825 0315 0062;
0061 0825 0315 0300 05AE 0062;0061 05AE 0825 0300 0315 0062;0061 05AE 0825 0300 0315 0062;0061 05AE 0825 0300 0315 0062;0061 05AE 0825 0300 0315 0062;
0061 0315 0300 05AE 0826 0062;00E0 05AE 0826 0315 0062;0061 05AE 0300 0826 0315 0062;00E0 05AE 0826 0315 0062;0061 05AE 0300 0826 0315 0062;
0061 0826 0315 0300 05AE 0062;0061 05AE 0826 0300 0315 0062;0061 05AE 0826 0300 0315 0062;0061 05AE 0826 0300 0315 0062;0061 05AE 0826 0300 0315 0062;
0061 0315 0300 05AE 0827 0062;00E0 05AE 0827 0315 0062;0061 05AE 0300 0827 0315 0062;00E0 05AE 0827 0315 0062;0061 05AE 0300 0827 0315 0062;
0061 0827 0315 0300 05AE 0062;0061 05AE 0827 0300 0315 0062;0061 05AE 0827 0300 0315 0062;0061 05AE 0827 0300 0315 0062;0061 05AE 0827 0300 0315 0062;
0061 0315 0300 05AE 0829 0062;00E0 05AE 0829 0315 0062;0061 05AE 0300 0829 0315 0062;00E0 05AE 0829 0315 0062;0061 05AE 0300 0829 0315 0062;
0061 0829 0315 0300 05AE 0062;0061 05AE 0829 0300 0315 0062;0061 05AE 0829 0300 0315 0062;0061 05AE 0829 0300 0315 0062;0061 05AE 0829 0300 0315 0062;
0061 0315 0300 05AE 082A 0062;00E0 05AE 082A 0315 0062;0061 05AE 0300 082A 0315 0062;00E0 05AE 082A 0315 0062;0061 05AE 0300 082A 0315 0062;
0061 082A 0315 0300 05AE 0062;0061 05AE 082A 0300 0315 0062;0061 05AE 082A 0300 0315 0062;0061 05AE 082A 0300 0315 0062;0061 05AE 082A 0300 0315 0062;
0061 0315 0300 05AE 082B 0062;00E0 05AE 082B 0315 0062;0061 05AE 0300 082B 0315 0062;00E0 05AE 082B 0315 0062;0061 05AE 0300 082B 0315 0062;
0061 082B 0315 0300 05AE 0062;0061 05AE 082B 0300 0315 0062;0061 05AE 082B 0300 0315 0062;0061 05AE 082B 0300 0315 0062;0061 05AE 082B 0300 0315 0062;
0061 0315 0300 05AE 082C 0062;00E0 05AE 082C 0315 0062;0061 05AE 0300 082C 0315 0062;00E0 05AE 082C 0315 0062;0061 05AE 0300 082C 0315 0062;
0061 082C 0315 0300 05AE 0062;0061 05AE 082C 0300 0315 0062;0061 05AE 082C 0300 0315 0062;0061 05AE 082C 0300 0315 0062;0061 05AE 082C 0300 0315 0062;
0061 0315 0300 05AE 082D 0062;00E0 05AE 082D 0315 0062;0061 05AE 0300 082D 0315 0062;00E0 05AE 082D 0315 0062;0061 05AE 0300 082D 0315 0062;
0061 082D 0315 0300 05AE 0062;0061 05AE 082D 0300 0315 0062;0061 05AE 082D 0300 0315 0062;0061 05AE 082D 0300 0315 0062;0061 05AE 082D 0300 0315 0062;
0061 3099 093C 0334 093C 0062;0061 0334 093C 093C 3099 0062;0061 0334 093C 093C 3099 0062;0061 0334 093C 093C 3099 0062;0061 0334 093C 093C 3099 0062;
0061 093C 3099 093C 0334 0062;0061 0334 093C 093C 3099 0062;0061 0334 093C 093C 3099 0062;0061 0334 093C 093C 3099 0062;0061 0334 093C 093C 3099 0062;
0061 05B0 094D 3099 094D 0062;0061 3099 094D 094D 05B0 0062;0061 3099 094D 094D 05B0 0062;0061 3099 094D 094D 05B0 0062;0061 3099 094D 094D 05B0 0062;
@ -17292,6 +17445,26 @@ FFEE;FFEE;FFEE;25CB;25CB;
0061 1A17 0315 0300 05AE 0062;0061 05AE 1A17 0300 0315 0062;0061 05AE 1A17 0300 0315 0062;0061 05AE 1A17 0300 0315 0062;0061 05AE 1A17 0300 0315 0062;
0061 059A 0316 302A 1A18 0062;0061 302A 0316 1A18 059A 0062;0061 302A 0316 1A18 059A 0062;0061 302A 0316 1A18 059A 0062;0061 302A 0316 1A18 059A 0062;
0061 1A18 059A 0316 302A 0062;0061 302A 1A18 0316 059A 0062;0061 302A 1A18 0316 059A 0062;0061 302A 1A18 0316 059A 0062;0061 302A 1A18 0316 059A 0062;
0061 05B0 094D 3099 1A60 0062;0061 3099 094D 1A60 05B0 0062;0061 3099 094D 1A60 05B0 0062;0061 3099 094D 1A60 05B0 0062;0061 3099 094D 1A60 05B0 0062;
0061 1A60 05B0 094D 3099 0062;0061 3099 1A60 094D 05B0 0062;0061 3099 1A60 094D 05B0 0062;0061 3099 1A60 094D 05B0 0062;0061 3099 1A60 094D 05B0 0062;
0061 0315 0300 05AE 1A75 0062;00E0 05AE 1A75 0315 0062;0061 05AE 0300 1A75 0315 0062;00E0 05AE 1A75 0315 0062;0061 05AE 0300 1A75 0315 0062;
0061 1A75 0315 0300 05AE 0062;0061 05AE 1A75 0300 0315 0062;0061 05AE 1A75 0300 0315 0062;0061 05AE 1A75 0300 0315 0062;0061 05AE 1A75 0300 0315 0062;
0061 0315 0300 05AE 1A76 0062;00E0 05AE 1A76 0315 0062;0061 05AE 0300 1A76 0315 0062;00E0 05AE 1A76 0315 0062;0061 05AE 0300 1A76 0315 0062;
0061 1A76 0315 0300 05AE 0062;0061 05AE 1A76 0300 0315 0062;0061 05AE 1A76 0300 0315 0062;0061 05AE 1A76 0300 0315 0062;0061 05AE 1A76 0300 0315 0062;
0061 0315 0300 05AE 1A77 0062;00E0 05AE 1A77 0315 0062;0061 05AE 0300 1A77 0315 0062;00E0 05AE 1A77 0315 0062;0061 05AE 0300 1A77 0315 0062;
0061 1A77 0315 0300 05AE 0062;0061 05AE 1A77 0300 0315 0062;0061 05AE 1A77 0300 0315 0062;0061 05AE 1A77 0300 0315 0062;0061 05AE 1A77 0300 0315 0062;
0061 0315 0300 05AE 1A78 0062;00E0 05AE 1A78 0315 0062;0061 05AE 0300 1A78 0315 0062;00E0 05AE 1A78 0315 0062;0061 05AE 0300 1A78 0315 0062;
0061 1A78 0315 0300 05AE 0062;0061 05AE 1A78 0300 0315 0062;0061 05AE 1A78 0300 0315 0062;0061 05AE 1A78 0300 0315 0062;0061 05AE 1A78 0300 0315 0062;
0061 0315 0300 05AE 1A79 0062;00E0 05AE 1A79 0315 0062;0061 05AE 0300 1A79 0315 0062;00E0 05AE 1A79 0315 0062;0061 05AE 0300 1A79 0315 0062;
0061 1A79 0315 0300 05AE 0062;0061 05AE 1A79 0300 0315 0062;0061 05AE 1A79 0300 0315 0062;0061 05AE 1A79 0300 0315 0062;0061 05AE 1A79 0300 0315 0062;
0061 0315 0300 05AE 1A7A 0062;00E0 05AE 1A7A 0315 0062;0061 05AE 0300 1A7A 0315 0062;00E0 05AE 1A7A 0315 0062;0061 05AE 0300 1A7A 0315 0062;
0061 1A7A 0315 0300 05AE 0062;0061 05AE 1A7A 0300 0315 0062;0061 05AE 1A7A 0300 0315 0062;0061 05AE 1A7A 0300 0315 0062;0061 05AE 1A7A 0300 0315 0062;
0061 0315 0300 05AE 1A7B 0062;00E0 05AE 1A7B 0315 0062;0061 05AE 0300 1A7B 0315 0062;00E0 05AE 1A7B 0315 0062;0061 05AE 0300 1A7B 0315 0062;
0061 1A7B 0315 0300 05AE 0062;0061 05AE 1A7B 0300 0315 0062;0061 05AE 1A7B 0300 0315 0062;0061 05AE 1A7B 0300 0315 0062;0061 05AE 1A7B 0300 0315 0062;
0061 0315 0300 05AE 1A7C 0062;00E0 05AE 1A7C 0315 0062;0061 05AE 0300 1A7C 0315 0062;00E0 05AE 1A7C 0315 0062;0061 05AE 0300 1A7C 0315 0062;
0061 1A7C 0315 0300 05AE 0062;0061 05AE 1A7C 0300 0315 0062;0061 05AE 1A7C 0300 0315 0062;0061 05AE 1A7C 0300 0315 0062;0061 05AE 1A7C 0300 0315 0062;
0061 059A 0316 302A 1A7F 0062;0061 302A 0316 1A7F 059A 0062;0061 302A 0316 1A7F 059A 0062;0061 302A 0316 1A7F 059A 0062;0061 302A 0316 1A7F 059A 0062;
0061 1A7F 059A 0316 302A 0062;0061 302A 1A7F 0316 059A 0062;0061 302A 1A7F 0316 059A 0062;0061 302A 1A7F 0316 059A 0062;0061 302A 1A7F 0316 059A 0062;
0061 3099 093C 0334 1B34 0062;0061 0334 093C 1B34 3099 0062;0061 0334 093C 1B34 3099 0062;0061 0334 093C 1B34 3099 0062;0061 0334 093C 1B34 3099 0062;
0061 1B34 3099 093C 0334 0062;0061 0334 1B34 093C 3099 0062;0061 0334 1B34 093C 3099 0062;0061 0334 1B34 093C 3099 0062;0061 0334 1B34 093C 3099 0062;
0061 05B0 094D 3099 1B44 0062;0061 3099 094D 1B44 05B0 0062;0061 3099 094D 1B44 05B0 0062;0061 3099 094D 1B44 05B0 0062;0061 3099 094D 1B44 05B0 0062;
@ -17318,6 +17491,54 @@ FFEE;FFEE;FFEE;25CB;25CB;
0061 1BAA 05B0 094D 3099 0062;0061 3099 1BAA 094D 05B0 0062;0061 3099 1BAA 094D 05B0 0062;0061 3099 1BAA 094D 05B0 0062;0061 3099 1BAA 094D 05B0 0062;
0061 3099 093C 0334 1C37 0062;0061 0334 093C 1C37 3099 0062;0061 0334 093C 1C37 3099 0062;0061 0334 093C 1C37 3099 0062;0061 0334 093C 1C37 3099 0062;
0061 1C37 3099 093C 0334 0062;0061 0334 1C37 093C 3099 0062;0061 0334 1C37 093C 3099 0062;0061 0334 1C37 093C 3099 0062;0061 0334 1C37 093C 3099 0062;
0061 0315 0300 05AE 1CD0 0062;00E0 05AE 1CD0 0315 0062;0061 05AE 0300 1CD0 0315 0062;00E0 05AE 1CD0 0315 0062;0061 05AE 0300 1CD0 0315 0062;
0061 1CD0 0315 0300 05AE 0062;0061 05AE 1CD0 0300 0315 0062;0061 05AE 1CD0 0300 0315 0062;0061 05AE 1CD0 0300 0315 0062;0061 05AE 1CD0 0300 0315 0062;
0061 0315 0300 05AE 1CD1 0062;00E0 05AE 1CD1 0315 0062;0061 05AE 0300 1CD1 0315 0062;00E0 05AE 1CD1 0315 0062;0061 05AE 0300 1CD1 0315 0062;
0061 1CD1 0315 0300 05AE 0062;0061 05AE 1CD1 0300 0315 0062;0061 05AE 1CD1 0300 0315 0062;0061 05AE 1CD1 0300 0315 0062;0061 05AE 1CD1 0300 0315 0062;
0061 0315 0300 05AE 1CD2 0062;00E0 05AE 1CD2 0315 0062;0061 05AE 0300 1CD2 0315 0062;00E0 05AE 1CD2 0315 0062;0061 05AE 0300 1CD2 0315 0062;
0061 1CD2 0315 0300 05AE 0062;0061 05AE 1CD2 0300 0315 0062;0061 05AE 1CD2 0300 0315 0062;0061 05AE 1CD2 0300 0315 0062;0061 05AE 1CD2 0300 0315 0062;
0061 093C 0334 1CD4 0062;0061 0334 1CD4 093C 0062;0061 0334 1CD4 093C 0062;0061 0334 1CD4 093C 0062;0061 0334 1CD4 093C 0062;
0061 1CD4 093C 0334 0062;0061 1CD4 0334 093C 0062;0061 1CD4 0334 093C 0062;0061 1CD4 0334 093C 0062;0061 1CD4 0334 093C 0062;
0061 059A 0316 302A 1CD5 0062;0061 302A 0316 1CD5 059A 0062;0061 302A 0316 1CD5 059A 0062;0061 302A 0316 1CD5 059A 0062;0061 302A 0316 1CD5 059A 0062;
0061 1CD5 059A 0316 302A 0062;0061 302A 1CD5 0316 059A 0062;0061 302A 1CD5 0316 059A 0062;0061 302A 1CD5 0316 059A 0062;0061 302A 1CD5 0316 059A 0062;
0061 059A 0316 302A 1CD6 0062;0061 302A 0316 1CD6 059A 0062;0061 302A 0316 1CD6 059A 0062;0061 302A 0316 1CD6 059A 0062;0061 302A 0316 1CD6 059A 0062;
0061 1CD6 059A 0316 302A 0062;0061 302A 1CD6 0316 059A 0062;0061 302A 1CD6 0316 059A 0062;0061 302A 1CD6 0316 059A 0062;0061 302A 1CD6 0316 059A 0062;
0061 059A 0316 302A 1CD7 0062;0061 302A 0316 1CD7 059A 0062;0061 302A 0316 1CD7 059A 0062;0061 302A 0316 1CD7 059A 0062;0061 302A 0316 1CD7 059A 0062;
0061 1CD7 059A 0316 302A 0062;0061 302A 1CD7 0316 059A 0062;0061 302A 1CD7 0316 059A 0062;0061 302A 1CD7 0316 059A 0062;0061 302A 1CD7 0316 059A 0062;
0061 059A 0316 302A 1CD8 0062;0061 302A 0316 1CD8 059A 0062;0061 302A 0316 1CD8 059A 0062;0061 302A 0316 1CD8 059A 0062;0061 302A 0316 1CD8 059A 0062;
0061 1CD8 059A 0316 302A 0062;0061 302A 1CD8 0316 059A 0062;0061 302A 1CD8 0316 059A 0062;0061 302A 1CD8 0316 059A 0062;0061 302A 1CD8 0316 059A 0062;
0061 059A 0316 302A 1CD9 0062;0061 302A 0316 1CD9 059A 0062;0061 302A 0316 1CD9 059A 0062;0061 302A 0316 1CD9 059A 0062;0061 302A 0316 1CD9 059A 0062;
0061 1CD9 059A 0316 302A 0062;0061 302A 1CD9 0316 059A 0062;0061 302A 1CD9 0316 059A 0062;0061 302A 1CD9 0316 059A 0062;0061 302A 1CD9 0316 059A 0062;
0061 0315 0300 05AE 1CDA 0062;00E0 05AE 1CDA 0315 0062;0061 05AE 0300 1CDA 0315 0062;00E0 05AE 1CDA 0315 0062;0061 05AE 0300 1CDA 0315 0062;
0061 1CDA 0315 0300 05AE 0062;0061 05AE 1CDA 0300 0315 0062;0061 05AE 1CDA 0300 0315 0062;0061 05AE 1CDA 0300 0315 0062;0061 05AE 1CDA 0300 0315 0062;
0061 0315 0300 05AE 1CDB 0062;00E0 05AE 1CDB 0315 0062;0061 05AE 0300 1CDB 0315 0062;00E0 05AE 1CDB 0315 0062;0061 05AE 0300 1CDB 0315 0062;
0061 1CDB 0315 0300 05AE 0062;0061 05AE 1CDB 0300 0315 0062;0061 05AE 1CDB 0300 0315 0062;0061 05AE 1CDB 0300 0315 0062;0061 05AE 1CDB 0300 0315 0062;
0061 059A 0316 302A 1CDC 0062;0061 302A 0316 1CDC 059A 0062;0061 302A 0316 1CDC 059A 0062;0061 302A 0316 1CDC 059A 0062;0061 302A 0316 1CDC 059A 0062;
0061 1CDC 059A 0316 302A 0062;0061 302A 1CDC 0316 059A 0062;0061 302A 1CDC 0316 059A 0062;0061 302A 1CDC 0316 059A 0062;0061 302A 1CDC 0316 059A 0062;
0061 059A 0316 302A 1CDD 0062;0061 302A 0316 1CDD 059A 0062;0061 302A 0316 1CDD 059A 0062;0061 302A 0316 1CDD 059A 0062;0061 302A 0316 1CDD 059A 0062;
0061 1CDD 059A 0316 302A 0062;0061 302A 1CDD 0316 059A 0062;0061 302A 1CDD 0316 059A 0062;0061 302A 1CDD 0316 059A 0062;0061 302A 1CDD 0316 059A 0062;
0061 059A 0316 302A 1CDE 0062;0061 302A 0316 1CDE 059A 0062;0061 302A 0316 1CDE 059A 0062;0061 302A 0316 1CDE 059A 0062;0061 302A 0316 1CDE 059A 0062;
0061 1CDE 059A 0316 302A 0062;0061 302A 1CDE 0316 059A 0062;0061 302A 1CDE 0316 059A 0062;0061 302A 1CDE 0316 059A 0062;0061 302A 1CDE 0316 059A 0062;
0061 059A 0316 302A 1CDF 0062;0061 302A 0316 1CDF 059A 0062;0061 302A 0316 1CDF 059A 0062;0061 302A 0316 1CDF 059A 0062;0061 302A 0316 1CDF 059A 0062;
0061 1CDF 059A 0316 302A 0062;0061 302A 1CDF 0316 059A 0062;0061 302A 1CDF 0316 059A 0062;0061 302A 1CDF 0316 059A 0062;0061 302A 1CDF 0316 059A 0062;
0061 0315 0300 05AE 1CE0 0062;00E0 05AE 1CE0 0315 0062;0061 05AE 0300 1CE0 0315 0062;00E0 05AE 1CE0 0315 0062;0061 05AE 0300 1CE0 0315 0062;
0061 1CE0 0315 0300 05AE 0062;0061 05AE 1CE0 0300 0315 0062;0061 05AE 1CE0 0300 0315 0062;0061 05AE 1CE0 0300 0315 0062;0061 05AE 1CE0 0300 0315 0062;
0061 093C 0334 1CE2 0062;0061 0334 1CE2 093C 0062;0061 0334 1CE2 093C 0062;0061 0334 1CE2 093C 0062;0061 0334 1CE2 093C 0062;
0061 1CE2 093C 0334 0062;0061 1CE2 0334 093C 0062;0061 1CE2 0334 093C 0062;0061 1CE2 0334 093C 0062;0061 1CE2 0334 093C 0062;
0061 093C 0334 1CE3 0062;0061 0334 1CE3 093C 0062;0061 0334 1CE3 093C 0062;0061 0334 1CE3 093C 0062;0061 0334 1CE3 093C 0062;
0061 1CE3 093C 0334 0062;0061 1CE3 0334 093C 0062;0061 1CE3 0334 093C 0062;0061 1CE3 0334 093C 0062;0061 1CE3 0334 093C 0062;
0061 093C 0334 1CE4 0062;0061 0334 1CE4 093C 0062;0061 0334 1CE4 093C 0062;0061 0334 1CE4 093C 0062;0061 0334 1CE4 093C 0062;
0061 1CE4 093C 0334 0062;0061 1CE4 0334 093C 0062;0061 1CE4 0334 093C 0062;0061 1CE4 0334 093C 0062;0061 1CE4 0334 093C 0062;
0061 093C 0334 1CE5 0062;0061 0334 1CE5 093C 0062;0061 0334 1CE5 093C 0062;0061 0334 1CE5 093C 0062;0061 0334 1CE5 093C 0062;
0061 1CE5 093C 0334 0062;0061 1CE5 0334 093C 0062;0061 1CE5 0334 093C 0062;0061 1CE5 0334 093C 0062;0061 1CE5 0334 093C 0062;
0061 093C 0334 1CE6 0062;0061 0334 1CE6 093C 0062;0061 0334 1CE6 093C 0062;0061 0334 1CE6 093C 0062;0061 0334 1CE6 093C 0062;
0061 1CE6 093C 0334 0062;0061 1CE6 0334 093C 0062;0061 1CE6 0334 093C 0062;0061 1CE6 0334 093C 0062;0061 1CE6 0334 093C 0062;
0061 093C 0334 1CE7 0062;0061 0334 1CE7 093C 0062;0061 0334 1CE7 093C 0062;0061 0334 1CE7 093C 0062;0061 0334 1CE7 093C 0062;
0061 1CE7 093C 0334 0062;0061 1CE7 0334 093C 0062;0061 1CE7 0334 093C 0062;0061 1CE7 0334 093C 0062;0061 1CE7 0334 093C 0062;
0061 093C 0334 1CE8 0062;0061 0334 1CE8 093C 0062;0061 0334 1CE8 093C 0062;0061 0334 1CE8 093C 0062;0061 0334 1CE8 093C 0062;
0061 1CE8 093C 0334 0062;0061 1CE8 0334 093C 0062;0061 1CE8 0334 093C 0062;0061 1CE8 0334 093C 0062;0061 1CE8 0334 093C 0062;
0061 059A 0316 302A 1CED 0062;0061 302A 0316 1CED 059A 0062;0061 302A 0316 1CED 059A 0062;0061 302A 0316 1CED 059A 0062;0061 302A 0316 1CED 059A 0062;
0061 1CED 059A 0316 302A 0062;0061 302A 1CED 0316 059A 0062;0061 302A 1CED 0316 059A 0062;0061 302A 1CED 0316 059A 0062;0061 302A 1CED 0316 059A 0062;
0061 0315 0300 05AE 1DC0 0062;00E0 05AE 1DC0 0315 0062;0061 05AE 0300 1DC0 0315 0062;00E0 05AE 1DC0 0315 0062;0061 05AE 0300 1DC0 0315 0062;
0061 1DC0 0315 0300 05AE 0062;0061 05AE 1DC0 0300 0315 0062;0061 05AE 1DC0 0300 0315 0062;0061 05AE 1DC0 0300 0315 0062;0061 05AE 1DC0 0300 0315 0062;
0061 0315 0300 05AE 1DC1 0062;00E0 05AE 1DC1 0315 0062;0061 05AE 0300 1DC1 0315 0062;00E0 05AE 1DC1 0315 0062;0061 05AE 0300 1DC1 0315 0062;
@ -17396,6 +17617,8 @@ FFEE;FFEE;FFEE;25CB;25CB;
0061 1DE5 0315 0300 05AE 0062;0061 05AE 1DE5 0300 0315 0062;0061 05AE 1DE5 0300 0315 0062;0061 05AE 1DE5 0300 0315 0062;0061 05AE 1DE5 0300 0315 0062;
0061 0315 0300 05AE 1DE6 0062;00E0 05AE 1DE6 0315 0062;0061 05AE 0300 1DE6 0315 0062;00E0 05AE 1DE6 0315 0062;0061 05AE 0300 1DE6 0315 0062;
0061 1DE6 0315 0300 05AE 0062;0061 05AE 1DE6 0300 0315 0062;0061 05AE 1DE6 0300 0315 0062;0061 05AE 1DE6 0300 0315 0062;0061 05AE 1DE6 0300 0315 0062;
0061 059A 0316 302A 1DFD 0062;0061 302A 0316 1DFD 059A 0062;0061 302A 0316 1DFD 059A 0062;0061 302A 0316 1DFD 059A 0062;0061 302A 0316 1DFD 059A 0062;
0061 1DFD 059A 0316 302A 0062;0061 302A 1DFD 0316 059A 0062;0061 302A 1DFD 0316 059A 0062;0061 302A 1DFD 0316 059A 0062;0061 302A 1DFD 0316 059A 0062;
0061 0315 0300 05AE 1DFE 0062;00E0 05AE 1DFE 0315 0062;0061 05AE 0300 1DFE 0315 0062;00E0 05AE 1DFE 0315 0062;0061 05AE 0300 1DFE 0315 0062;
0061 1DFE 0315 0300 05AE 0062;0061 05AE 1DFE 0300 0315 0062;0061 05AE 1DFE 0300 0315 0062;0061 05AE 1DFE 0300 0315 0062;0061 05AE 1DFE 0300 0315 0062;
0061 059A 0316 302A 1DFF 0062;0061 302A 0316 1DFF 059A 0062;0061 302A 0316 1DFF 059A 0062;0061 302A 0316 1DFF 059A 0062;0061 302A 0316 1DFF 059A 0062;
@ -17452,6 +17675,12 @@ FFEE;FFEE;FFEE;25CB;25CB;
0061 20EF 059A 0316 302A 0062;0061 302A 20EF 0316 059A 0062;0061 302A 20EF 0316 059A 0062;0061 302A 20EF 0316 059A 0062;0061 302A 20EF 0316 059A 0062;
0061 0315 0300 05AE 20F0 0062;00E0 05AE 20F0 0315 0062;0061 05AE 0300 20F0 0315 0062;00E0 05AE 20F0 0315 0062;0061 05AE 0300 20F0 0315 0062;
0061 20F0 0315 0300 05AE 0062;0061 05AE 20F0 0300 0315 0062;0061 05AE 20F0 0300 0315 0062;0061 05AE 20F0 0300 0315 0062;0061 05AE 20F0 0300 0315 0062;
0061 0315 0300 05AE 2CEF 0062;00E0 05AE 2CEF 0315 0062;0061 05AE 0300 2CEF 0315 0062;00E0 05AE 2CEF 0315 0062;0061 05AE 0300 2CEF 0315 0062;
0061 2CEF 0315 0300 05AE 0062;0061 05AE 2CEF 0300 0315 0062;0061 05AE 2CEF 0300 0315 0062;0061 05AE 2CEF 0300 0315 0062;0061 05AE 2CEF 0300 0315 0062;
0061 0315 0300 05AE 2CF0 0062;00E0 05AE 2CF0 0315 0062;0061 05AE 0300 2CF0 0315 0062;00E0 05AE 2CF0 0315 0062;0061 05AE 0300 2CF0 0315 0062;
0061 2CF0 0315 0300 05AE 0062;0061 05AE 2CF0 0300 0315 0062;0061 05AE 2CF0 0300 0315 0062;0061 05AE 2CF0 0300 0315 0062;0061 05AE 2CF0 0300 0315 0062;
0061 0315 0300 05AE 2CF1 0062;00E0 05AE 2CF1 0315 0062;0061 05AE 0300 2CF1 0315 0062;00E0 05AE 2CF1 0315 0062;0061 05AE 0300 2CF1 0315 0062;
0061 2CF1 0315 0300 05AE 0062;0061 05AE 2CF1 0300 0315 0062;0061 05AE 2CF1 0300 0315 0062;0061 05AE 2CF1 0300 0315 0062;0061 05AE 2CF1 0300 0315 0062;
0061 0315 0300 05AE 2DE0 0062;00E0 05AE 2DE0 0315 0062;0061 05AE 0300 2DE0 0315 0062;00E0 05AE 2DE0 0315 0062;0061 05AE 0300 2DE0 0315 0062;
0061 2DE0 0315 0300 05AE 0062;0061 05AE 2DE0 0300 0315 0062;0061 05AE 2DE0 0300 0315 0062;0061 05AE 2DE0 0300 0315 0062;0061 05AE 2DE0 0300 0315 0062;
0061 0315 0300 05AE 2DE1 0062;00E0 05AE 2DE1 0315 0062;0061 05AE 0300 2DE1 0315 0062;00E0 05AE 2DE1 0315 0062;0061 05AE 0300 2DE1 0315 0062;
@ -17538,10 +17767,50 @@ FFEE;FFEE;FFEE;25CB;25CB;
0061 A67C 0315 0300 05AE 0062;0061 05AE A67C 0300 0315 0062;0061 05AE A67C 0300 0315 0062;0061 05AE A67C 0300 0315 0062;0061 05AE A67C 0300 0315 0062;
0061 0315 0300 05AE A67D 0062;00E0 05AE A67D 0315 0062;0061 05AE 0300 A67D 0315 0062;00E0 05AE A67D 0315 0062;0061 05AE 0300 A67D 0315 0062;
0061 A67D 0315 0300 05AE 0062;0061 05AE A67D 0300 0315 0062;0061 05AE A67D 0300 0315 0062;0061 05AE A67D 0300 0315 0062;0061 05AE A67D 0300 0315 0062;
0061 0315 0300 05AE A6F0 0062;00E0 05AE A6F0 0315 0062;0061 05AE 0300 A6F0 0315 0062;00E0 05AE A6F0 0315 0062;0061 05AE 0300 A6F0 0315 0062;
0061 A6F0 0315 0300 05AE 0062;0061 05AE A6F0 0300 0315 0062;0061 05AE A6F0 0300 0315 0062;0061 05AE A6F0 0300 0315 0062;0061 05AE A6F0 0300 0315 0062;
0061 0315 0300 05AE A6F1 0062;00E0 05AE A6F1 0315 0062;0061 05AE 0300 A6F1 0315 0062;00E0 05AE A6F1 0315 0062;0061 05AE 0300 A6F1 0315 0062;
0061 A6F1 0315 0300 05AE 0062;0061 05AE A6F1 0300 0315 0062;0061 05AE A6F1 0300 0315 0062;0061 05AE A6F1 0300 0315 0062;0061 05AE A6F1 0300 0315 0062;
0061 05B0 094D 3099 A806 0062;0061 3099 094D A806 05B0 0062;0061 3099 094D A806 05B0 0062;0061 3099 094D A806 05B0 0062;0061 3099 094D A806 05B0 0062;
0061 A806 05B0 094D 3099 0062;0061 3099 A806 094D 05B0 0062;0061 3099 A806 094D 05B0 0062;0061 3099 A806 094D 05B0 0062;0061 3099 A806 094D 05B0 0062;
0061 05B0 094D 3099 A8C4 0062;0061 3099 094D A8C4 05B0 0062;0061 3099 094D A8C4 05B0 0062;0061 3099 094D A8C4 05B0 0062;0061 3099 094D A8C4 05B0 0062;
0061 A8C4 05B0 094D 3099 0062;0061 3099 A8C4 094D 05B0 0062;0061 3099 A8C4 094D 05B0 0062;0061 3099 A8C4 094D 05B0 0062;0061 3099 A8C4 094D 05B0 0062;
0061 0315 0300 05AE A8E0 0062;00E0 05AE A8E0 0315 0062;0061 05AE 0300 A8E0 0315 0062;00E0 05AE A8E0 0315 0062;0061 05AE 0300 A8E0 0315 0062;
0061 A8E0 0315 0300 05AE 0062;0061 05AE A8E0 0300 0315 0062;0061 05AE A8E0 0300 0315 0062;0061 05AE A8E0 0300 0315 0062;0061 05AE A8E0 0300 0315 0062;
0061 0315 0300 05AE A8E1 0062;00E0 05AE A8E1 0315 0062;0061 05AE 0300 A8E1 0315 0062;00E0 05AE A8E1 0315 0062;0061 05AE 0300 A8E1 0315 0062;
0061 A8E1 0315 0300 05AE 0062;0061 05AE A8E1 0300 0315 0062;0061 05AE A8E1 0300 0315 0062;0061 05AE A8E1 0300 0315 0062;0061 05AE A8E1 0300 0315 0062;
0061 0315 0300 05AE A8E2 0062;00E0 05AE A8E2 0315 0062;0061 05AE 0300 A8E2 0315 0062;00E0 05AE A8E2 0315 0062;0061 05AE 0300 A8E2 0315 0062;
0061 A8E2 0315 0300 05AE 0062;0061 05AE A8E2 0300 0315 0062;0061 05AE A8E2 0300 0315 0062;0061 05AE A8E2 0300 0315 0062;0061 05AE A8E2 0300 0315 0062;
0061 0315 0300 05AE A8E3 0062;00E0 05AE A8E3 0315 0062;0061 05AE 0300 A8E3 0315 0062;00E0 05AE A8E3 0315 0062;0061 05AE 0300 A8E3 0315 0062;
0061 A8E3 0315 0300 05AE 0062;0061 05AE A8E3 0300 0315 0062;0061 05AE A8E3 0300 0315 0062;0061 05AE A8E3 0300 0315 0062;0061 05AE A8E3 0300 0315 0062;
0061 0315 0300 05AE A8E4 0062;00E0 05AE A8E4 0315 0062;0061 05AE 0300 A8E4 0315 0062;00E0 05AE A8E4 0315 0062;0061 05AE 0300 A8E4 0315 0062;
0061 A8E4 0315 0300 05AE 0062;0061 05AE A8E4 0300 0315 0062;0061 05AE A8E4 0300 0315 0062;0061 05AE A8E4 0300 0315 0062;0061 05AE A8E4 0300 0315 0062;
0061 0315 0300 05AE A8E5 0062;00E0 05AE A8E5 0315 0062;0061 05AE 0300 A8E5 0315 0062;00E0 05AE A8E5 0315 0062;0061 05AE 0300 A8E5 0315 0062;
0061 A8E5 0315 0300 05AE 0062;0061 05AE A8E5 0300 0315 0062;0061 05AE A8E5 0300 0315 0062;0061 05AE A8E5 0300 0315 0062;0061 05AE A8E5 0300 0315 0062;
0061 0315 0300 05AE A8E6 0062;00E0 05AE A8E6 0315 0062;0061 05AE 0300 A8E6 0315 0062;00E0 05AE A8E6 0315 0062;0061 05AE 0300 A8E6 0315 0062;
0061 A8E6 0315 0300 05AE 0062;0061 05AE A8E6 0300 0315 0062;0061 05AE A8E6 0300 0315 0062;0061 05AE A8E6 0300 0315 0062;0061 05AE A8E6 0300 0315 0062;
0061 0315 0300 05AE A8E7 0062;00E0 05AE A8E7 0315 0062;0061 05AE 0300 A8E7 0315 0062;00E0 05AE A8E7 0315 0062;0061 05AE 0300 A8E7 0315 0062;
0061 A8E7 0315 0300 05AE 0062;0061 05AE A8E7 0300 0315 0062;0061 05AE A8E7 0300 0315 0062;0061 05AE A8E7 0300 0315 0062;0061 05AE A8E7 0300 0315 0062;
0061 0315 0300 05AE A8E8 0062;00E0 05AE A8E8 0315 0062;0061 05AE 0300 A8E8 0315 0062;00E0 05AE A8E8 0315 0062;0061 05AE 0300 A8E8 0315 0062;
0061 A8E8 0315 0300 05AE 0062;0061 05AE A8E8 0300 0315 0062;0061 05AE A8E8 0300 0315 0062;0061 05AE A8E8 0300 0315 0062;0061 05AE A8E8 0300 0315 0062;
0061 0315 0300 05AE A8E9 0062;00E0 05AE A8E9 0315 0062;0061 05AE 0300 A8E9 0315 0062;00E0 05AE A8E9 0315 0062;0061 05AE 0300 A8E9 0315 0062;
0061 A8E9 0315 0300 05AE 0062;0061 05AE A8E9 0300 0315 0062;0061 05AE A8E9 0300 0315 0062;0061 05AE A8E9 0300 0315 0062;0061 05AE A8E9 0300 0315 0062;
0061 0315 0300 05AE A8EA 0062;00E0 05AE A8EA 0315 0062;0061 05AE 0300 A8EA 0315 0062;00E0 05AE A8EA 0315 0062;0061 05AE 0300 A8EA 0315 0062;
0061 A8EA 0315 0300 05AE 0062;0061 05AE A8EA 0300 0315 0062;0061 05AE A8EA 0300 0315 0062;0061 05AE A8EA 0300 0315 0062;0061 05AE A8EA 0300 0315 0062;
0061 0315 0300 05AE A8EB 0062;00E0 05AE A8EB 0315 0062;0061 05AE 0300 A8EB 0315 0062;00E0 05AE A8EB 0315 0062;0061 05AE 0300 A8EB 0315 0062;
0061 A8EB 0315 0300 05AE 0062;0061 05AE A8EB 0300 0315 0062;0061 05AE A8EB 0300 0315 0062;0061 05AE A8EB 0300 0315 0062;0061 05AE A8EB 0300 0315 0062;
0061 0315 0300 05AE A8EC 0062;00E0 05AE A8EC 0315 0062;0061 05AE 0300 A8EC 0315 0062;00E0 05AE A8EC 0315 0062;0061 05AE 0300 A8EC 0315 0062;
0061 A8EC 0315 0300 05AE 0062;0061 05AE A8EC 0300 0315 0062;0061 05AE A8EC 0300 0315 0062;0061 05AE A8EC 0300 0315 0062;0061 05AE A8EC 0300 0315 0062;
0061 0315 0300 05AE A8ED 0062;00E0 05AE A8ED 0315 0062;0061 05AE 0300 A8ED 0315 0062;00E0 05AE A8ED 0315 0062;0061 05AE 0300 A8ED 0315 0062;
0061 A8ED 0315 0300 05AE 0062;0061 05AE A8ED 0300 0315 0062;0061 05AE A8ED 0300 0315 0062;0061 05AE A8ED 0300 0315 0062;0061 05AE A8ED 0300 0315 0062;
0061 0315 0300 05AE A8EE 0062;00E0 05AE A8EE 0315 0062;0061 05AE 0300 A8EE 0315 0062;00E0 05AE A8EE 0315 0062;0061 05AE 0300 A8EE 0315 0062;
0061 A8EE 0315 0300 05AE 0062;0061 05AE A8EE 0300 0315 0062;0061 05AE A8EE 0300 0315 0062;0061 05AE A8EE 0300 0315 0062;0061 05AE A8EE 0300 0315 0062;
0061 0315 0300 05AE A8EF 0062;00E0 05AE A8EF 0315 0062;0061 05AE 0300 A8EF 0315 0062;00E0 05AE A8EF 0315 0062;0061 05AE 0300 A8EF 0315 0062;
0061 A8EF 0315 0300 05AE 0062;0061 05AE A8EF 0300 0315 0062;0061 05AE A8EF 0300 0315 0062;0061 05AE A8EF 0300 0315 0062;0061 05AE A8EF 0300 0315 0062;
0061 0315 0300 05AE A8F0 0062;00E0 05AE A8F0 0315 0062;0061 05AE 0300 A8F0 0315 0062;00E0 05AE A8F0 0315 0062;0061 05AE 0300 A8F0 0315 0062;
0061 A8F0 0315 0300 05AE 0062;0061 05AE A8F0 0300 0315 0062;0061 05AE A8F0 0300 0315 0062;0061 05AE A8F0 0300 0315 0062;0061 05AE A8F0 0300 0315 0062;
0061 0315 0300 05AE A8F1 0062;00E0 05AE A8F1 0315 0062;0061 05AE 0300 A8F1 0315 0062;00E0 05AE A8F1 0315 0062;0061 05AE 0300 A8F1 0315 0062;
0061 A8F1 0315 0300 05AE 0062;0061 05AE A8F1 0300 0315 0062;0061 05AE A8F1 0300 0315 0062;0061 05AE A8F1 0300 0315 0062;0061 05AE A8F1 0300 0315 0062;
0061 059A 0316 302A A92B 0062;0061 302A 0316 A92B 059A 0062;0061 302A 0316 A92B 059A 0062;0061 302A 0316 A92B 059A 0062;0061 302A 0316 A92B 059A 0062;
0061 A92B 059A 0316 302A 0062;0061 302A A92B 0316 059A 0062;0061 302A A92B 0316 059A 0062;0061 302A A92B 0316 059A 0062;0061 302A A92B 0316 059A 0062;
0061 059A 0316 302A A92C 0062;0061 302A 0316 A92C 059A 0062;0061 302A 0316 A92C 059A 0062;0061 302A 0316 A92C 059A 0062;0061 302A 0316 A92C 059A 0062;
@ -17550,6 +17819,30 @@ FFEE;FFEE;FFEE;25CB;25CB;
0061 A92D 059A 0316 302A 0062;0061 302A A92D 0316 059A 0062;0061 302A A92D 0316 059A 0062;0061 302A A92D 0316 059A 0062;0061 302A A92D 0316 059A 0062;
0061 05B0 094D 3099 A953 0062;0061 3099 094D A953 05B0 0062;0061 3099 094D A953 05B0 0062;0061 3099 094D A953 05B0 0062;0061 3099 094D A953 05B0 0062;
0061 A953 05B0 094D 3099 0062;0061 3099 A953 094D 05B0 0062;0061 3099 A953 094D 05B0 0062;0061 3099 A953 094D 05B0 0062;0061 3099 A953 094D 05B0 0062;
0061 3099 093C 0334 A9B3 0062;0061 0334 093C A9B3 3099 0062;0061 0334 093C A9B3 3099 0062;0061 0334 093C A9B3 3099 0062;0061 0334 093C A9B3 3099 0062;
0061 A9B3 3099 093C 0334 0062;0061 0334 A9B3 093C 3099 0062;0061 0334 A9B3 093C 3099 0062;0061 0334 A9B3 093C 3099 0062;0061 0334 A9B3 093C 3099 0062;
0061 05B0 094D 3099 A9C0 0062;0061 3099 094D A9C0 05B0 0062;0061 3099 094D A9C0 05B0 0062;0061 3099 094D A9C0 05B0 0062;0061 3099 094D A9C0 05B0 0062;
0061 A9C0 05B0 094D 3099 0062;0061 3099 A9C0 094D 05B0 0062;0061 3099 A9C0 094D 05B0 0062;0061 3099 A9C0 094D 05B0 0062;0061 3099 A9C0 094D 05B0 0062;
0061 0315 0300 05AE AAB0 0062;00E0 05AE AAB0 0315 0062;0061 05AE 0300 AAB0 0315 0062;00E0 05AE AAB0 0315 0062;0061 05AE 0300 AAB0 0315 0062;
0061 AAB0 0315 0300 05AE 0062;0061 05AE AAB0 0300 0315 0062;0061 05AE AAB0 0300 0315 0062;0061 05AE AAB0 0300 0315 0062;0061 05AE AAB0 0300 0315 0062;
0061 0315 0300 05AE AAB2 0062;00E0 05AE AAB2 0315 0062;0061 05AE 0300 AAB2 0315 0062;00E0 05AE AAB2 0315 0062;0061 05AE 0300 AAB2 0315 0062;
0061 AAB2 0315 0300 05AE 0062;0061 05AE AAB2 0300 0315 0062;0061 05AE AAB2 0300 0315 0062;0061 05AE AAB2 0300 0315 0062;0061 05AE AAB2 0300 0315 0062;
0061 0315 0300 05AE AAB3 0062;00E0 05AE AAB3 0315 0062;0061 05AE 0300 AAB3 0315 0062;00E0 05AE AAB3 0315 0062;0061 05AE 0300 AAB3 0315 0062;
0061 AAB3 0315 0300 05AE 0062;0061 05AE AAB3 0300 0315 0062;0061 05AE AAB3 0300 0315 0062;0061 05AE AAB3 0300 0315 0062;0061 05AE AAB3 0300 0315 0062;
0061 059A 0316 302A AAB4 0062;0061 302A 0316 AAB4 059A 0062;0061 302A 0316 AAB4 059A 0062;0061 302A 0316 AAB4 059A 0062;0061 302A 0316 AAB4 059A 0062;
0061 AAB4 059A 0316 302A 0062;0061 302A AAB4 0316 059A 0062;0061 302A AAB4 0316 059A 0062;0061 302A AAB4 0316 059A 0062;0061 302A AAB4 0316 059A 0062;
0061 0315 0300 05AE AAB7 0062;00E0 05AE AAB7 0315 0062;0061 05AE 0300 AAB7 0315 0062;00E0 05AE AAB7 0315 0062;0061 05AE 0300 AAB7 0315 0062;
0061 AAB7 0315 0300 05AE 0062;0061 05AE AAB7 0300 0315 0062;0061 05AE AAB7 0300 0315 0062;0061 05AE AAB7 0300 0315 0062;0061 05AE AAB7 0300 0315 0062;
0061 0315 0300 05AE AAB8 0062;00E0 05AE AAB8 0315 0062;0061 05AE 0300 AAB8 0315 0062;00E0 05AE AAB8 0315 0062;0061 05AE 0300 AAB8 0315 0062;
0061 AAB8 0315 0300 05AE 0062;0061 05AE AAB8 0300 0315 0062;0061 05AE AAB8 0300 0315 0062;0061 05AE AAB8 0300 0315 0062;0061 05AE AAB8 0300 0315 0062;
0061 0315 0300 05AE AABE 0062;00E0 05AE AABE 0315 0062;0061 05AE 0300 AABE 0315 0062;00E0 05AE AABE 0315 0062;0061 05AE 0300 AABE 0315 0062;
0061 AABE 0315 0300 05AE 0062;0061 05AE AABE 0300 0315 0062;0061 05AE AABE 0300 0315 0062;0061 05AE AABE 0300 0315 0062;0061 05AE AABE 0300 0315 0062;
0061 0315 0300 05AE AABF 0062;00E0 05AE AABF 0315 0062;0061 05AE 0300 AABF 0315 0062;00E0 05AE AABF 0315 0062;0061 05AE 0300 AABF 0315 0062;
0061 AABF 0315 0300 05AE 0062;0061 05AE AABF 0300 0315 0062;0061 05AE AABF 0300 0315 0062;0061 05AE AABF 0300 0315 0062;0061 05AE AABF 0300 0315 0062;
0061 0315 0300 05AE AAC1 0062;00E0 05AE AAC1 0315 0062;0061 05AE 0300 AAC1 0315 0062;00E0 05AE AAC1 0315 0062;0061 05AE 0300 AAC1 0315 0062;
0061 AAC1 0315 0300 05AE 0062;0061 05AE AAC1 0300 0315 0062;0061 05AE AAC1 0300 0315 0062;0061 05AE AAC1 0300 0315 0062;0061 05AE AAC1 0300 0315 0062;
0061 05B0 094D 3099 ABED 0062;0061 3099 094D ABED 05B0 0062;0061 3099 094D ABED 05B0 0062;0061 3099 094D ABED 05B0 0062;0061 3099 094D ABED 05B0 0062;
0061 ABED 05B0 094D 3099 0062;0061 3099 ABED 094D 05B0 0062;0061 3099 ABED 094D 05B0 0062;0061 3099 ABED 094D 05B0 0062;0061 3099 ABED 094D 05B0 0062;
0061 064B FB1E 05C2 FB1E 0062;0061 05C2 FB1E FB1E 064B 0062;0061 05C2 FB1E FB1E 064B 0062;0061 05C2 FB1E FB1E 064B 0062;0061 05C2 FB1E FB1E 064B 0062;
0061 FB1E 064B FB1E 05C2 0062;0061 05C2 FB1E FB1E 064B 0062;0061 05C2 FB1E FB1E 064B 0062;0061 05C2 FB1E FB1E 064B 0062;0061 05C2 FB1E FB1E 064B 0062;
0061 0315 0300 05AE FE20 0062;00E0 05AE FE20 0315 0062;0061 05AE 0300 FE20 0315 0062;00E0 05AE FE20 0315 0062;0061 05AE 0300 FE20 0315 0062;
@ -17580,6 +17873,10 @@ FFEE;FFEE;FFEE;25CB;25CB;
0061 10A3A 059A 0316 302A 0062;0061 302A 10A3A 0316 059A 0062;0061 302A 10A3A 0316 059A 0062;0061 302A 10A3A 0316 059A 0062;0061 302A 10A3A 0316 059A 0062;
0061 05B0 094D 3099 10A3F 0062;0061 3099 094D 10A3F 05B0 0062;0061 3099 094D 10A3F 05B0 0062;0061 3099 094D 10A3F 05B0 0062;0061 3099 094D 10A3F 05B0 0062;
0061 10A3F 05B0 094D 3099 0062;0061 3099 10A3F 094D 05B0 0062;0061 3099 10A3F 094D 05B0 0062;0061 3099 10A3F 094D 05B0 0062;0061 3099 10A3F 094D 05B0 0062;
0061 05B0 094D 3099 110B9 0062;0061 3099 094D 110B9 05B0 0062;0061 3099 094D 110B9 05B0 0062;0061 3099 094D 110B9 05B0 0062;0061 3099 094D 110B9 05B0 0062;
0061 110B9 05B0 094D 3099 0062;0061 3099 110B9 094D 05B0 0062;0061 3099 110B9 094D 05B0 0062;0061 3099 110B9 094D 05B0 0062;0061 3099 110B9 094D 05B0 0062;
0061 3099 093C 0334 110BA 0062;0061 0334 093C 110BA 3099 0062;0061 0334 093C 110BA 3099 0062;0061 0334 093C 110BA 3099 0062;0061 0334 093C 110BA 3099 0062;
0061 110BA 3099 093C 0334 0062;0061 0334 110BA 093C 3099 0062;0061 0334 110BA 093C 3099 0062;0061 0334 110BA 093C 3099 0062;0061 0334 110BA 093C 3099 0062;
0061 302A 031B 1DCE 1D165 0062;0061 1DCE 031B 1D165 302A 0062;0061 1DCE 031B 1D165 302A 0062;0061 1DCE 031B 1D165 302A 0062;0061 1DCE 031B 1D165 302A 0062;
0061 1D165 302A 031B 1DCE 0062;0061 1DCE 1D165 031B 302A 0062;0061 1DCE 1D165 031B 302A 0062;0061 1DCE 1D165 031B 302A 0062;0061 1DCE 1D165 031B 302A 0062;
0061 302A 031B 1DCE 1D166 0062;0061 1DCE 031B 1D166 302A 0062;0061 1DCE 031B 1D166 302A 0062;0061 1DCE 031B 1D166 302A 0062;0061 1DCE 031B 1D166 302A 0062;

View file

@ -1,10 +1,10 @@
# PropList-5.1.0.txt
# Date: 2008-03-20, 17:55:27 GMT [MD]
# PropList-5.2.0.txt
# Date: 2009-08-22, 04:58:40 GMT [MD]
#
# Unicode Character Database
# Copyright (c) 1991-2008 Unicode, Inc.
# Copyright (c) 1991-2009 Unicode, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html
# For documentation, see UCD.html
# For documentation, see http://www.unicode.org/reports/tr44/
# ================================================
@ -41,6 +41,7 @@
002D ; Dash
058A ; Dash
05BE ; Dash
1400 ; Dash
1806 ; Dash
2010..2015 ; Dash
2053 ; Dash
@ -57,7 +58,7 @@ FE58 ; Dash
FE63 ; Dash
FF0D ; Dash
# Total code points: 24
# Total code points: 25
# ================================================
@ -124,6 +125,7 @@ FF63 ; Quotation_Mark
0700..070A ; Terminal_Punctuation
070C ; Terminal_Punctuation
07F8..07F9 ; Terminal_Punctuation
0830..083E ; Terminal_Punctuation
0964..0965 ; Terminal_Punctuation
0E5A..0E5B ; Terminal_Punctuation
0F08 ; Terminal_Punctuation
@ -137,6 +139,7 @@ FF63 ; Quotation_Mark
1802..1805 ; Terminal_Punctuation
1808..1809 ; Terminal_Punctuation
1944..1945 ; Terminal_Punctuation
1AA8..1AAB ; Terminal_Punctuation
1B5A..1B5B ; Terminal_Punctuation
1B5D..1B5F ; Terminal_Punctuation
1C3B..1C3F ; Terminal_Punctuation
@ -145,11 +148,16 @@ FF63 ; Quotation_Mark
2047..2049 ; Terminal_Punctuation
2E2E ; Terminal_Punctuation
3001..3002 ; Terminal_Punctuation
A4FE..A4FF ; Terminal_Punctuation
A60D..A60F ; Terminal_Punctuation
A6F3..A6F7 ; Terminal_Punctuation
A876..A877 ; Terminal_Punctuation
A8CE..A8CF ; Terminal_Punctuation
A92F ; Terminal_Punctuation
A9C7..A9C9 ; Terminal_Punctuation
AA5D..AA5F ; Terminal_Punctuation
AADF ; Terminal_Punctuation
ABEB ; Terminal_Punctuation
FE50..FE52 ; Terminal_Punctuation
FE54..FE57 ; Terminal_Punctuation
FF01 ; Terminal_Punctuation
@ -161,10 +169,13 @@ FF61 ; Terminal_Punctuation
FF64 ; Terminal_Punctuation
1039F ; Terminal_Punctuation
103D0 ; Terminal_Punctuation
10857 ; Terminal_Punctuation
1091F ; Terminal_Punctuation
10B3A..10B3F ; Terminal_Punctuation
110BE..110C1 ; Terminal_Punctuation
12470..12473 ; Terminal_Punctuation
# Total code points: 119
# Total code points: 161
# ================================================
@ -347,11 +358,17 @@ FF41..FF46 ; Hex_Digit
0711 ; Other_Alphabetic
0730..073F ; Other_Alphabetic
07A6..07B0 ; Other_Alphabetic
0901..0902 ; Other_Alphabetic
0816..0817 ; Other_Alphabetic
081B..0823 ; Other_Alphabetic
0825..0827 ; Other_Alphabetic
0829..082C ; Other_Alphabetic
0900..0902 ; Other_Alphabetic
0903 ; Other_Alphabetic
093E..0940 ; Other_Alphabetic
0941..0948 ; Other_Alphabetic
0949..094C ; Other_Alphabetic
094E ; Other_Alphabetic
0955 ; Other_Alphabetic
0962..0963 ; Other_Alphabetic
0981 ; Other_Alphabetic
0982..0983 ; Other_Alphabetic
@ -454,6 +471,8 @@ FF41..FF46 ; Hex_Digit
1082 ; Other_Alphabetic
1083..1084 ; Other_Alphabetic
1085..1086 ; Other_Alphabetic
109C ; Other_Alphabetic
109D ; Other_Alphabetic
135F ; Other_Alphabetic
1712..1713 ; Other_Alphabetic
1732..1733 ; Other_Alphabetic
@ -476,6 +495,16 @@ FF41..FF46 ; Hex_Digit
19C8..19C9 ; Other_Alphabetic
1A17..1A18 ; Other_Alphabetic
1A19..1A1B ; Other_Alphabetic
1A55 ; Other_Alphabetic
1A56 ; Other_Alphabetic
1A57 ; Other_Alphabetic
1A58..1A5E ; Other_Alphabetic
1A61 ; Other_Alphabetic
1A62 ; Other_Alphabetic
1A63..1A64 ; Other_Alphabetic
1A65..1A6C ; Other_Alphabetic
1A6D..1A72 ; Other_Alphabetic
1A73..1A74 ; Other_Alphabetic
1B00..1B03 ; Other_Alphabetic
1B04 ; Other_Alphabetic
1B35 ; Other_Alphabetic
@ -494,6 +523,7 @@ FF41..FF46 ; Hex_Digit
1C24..1C2B ; Other_Alphabetic
1C2C..1C33 ; Other_Alphabetic
1C34..1C35 ; Other_Alphabetic
1CF2 ; Other_Alphabetic
24B6..24E9 ; Other_Alphabetic
2DE0..2DFF ; Other_Alphabetic
A823..A824 ; Other_Alphabetic
@ -504,6 +534,14 @@ A8B4..A8C3 ; Other_Alphabetic
A926..A92A ; Other_Alphabetic
A947..A951 ; Other_Alphabetic
A952 ; Other_Alphabetic
A980..A982 ; Other_Alphabetic
A983 ; Other_Alphabetic
A9B3 ; Other_Alphabetic
A9B4..A9B5 ; Other_Alphabetic
A9B6..A9B9 ; Other_Alphabetic
A9BA..A9BB ; Other_Alphabetic
A9BC ; Other_Alphabetic
A9BD..A9BF ; Other_Alphabetic
AA29..AA2E ; Other_Alphabetic
AA2F..AA30 ; Other_Alphabetic
AA31..AA32 ; Other_Alphabetic
@ -512,12 +550,25 @@ AA35..AA36 ; Other_Alphabetic
AA43 ; Other_Alphabetic
AA4C ; Other_Alphabetic
AA4D ; Other_Alphabetic
AAB0 ; Other_Alphabetic
AAB2..AAB4 ; Other_Alphabetic
AAB7..AAB8 ; Other_Alphabetic
AABE ; Other_Alphabetic
ABE3..ABE4 ; Other_Alphabetic
ABE5 ; Other_Alphabetic
ABE6..ABE7 ; Other_Alphabetic
ABE8 ; Other_Alphabetic
ABE9..ABEA ; Other_Alphabetic
FB1E ; Other_Alphabetic
10A01..10A03 ; Other_Alphabetic
10A05..10A06 ; Other_Alphabetic
10A0C..10A0F ; Other_Alphabetic
11082 ; Other_Alphabetic
110B0..110B2 ; Other_Alphabetic
110B3..110B6 ; Other_Alphabetic
110B7..110B8 ; Other_Alphabetic
# Total code points: 663
# Total code points: 759
# ================================================
@ -526,14 +577,15 @@ FB1E ; Other_Alphabetic
3021..3029 ; Ideographic
3038..303A ; Ideographic
3400..4DB5 ; Ideographic
4E00..9FC3 ; Ideographic
4E00..9FCB ; Ideographic
F900..FA2D ; Ideographic
FA30..FA6A ; Ideographic
FA30..FA6D ; Ideographic
FA70..FAD9 ; Ideographic
20000..2A6D6 ; Ideographic
2A700..2B734 ; Ideographic
2F800..2FA1D ; Ideographic
# Total code points: 71248
# Total code points: 75408
# ================================================
@ -577,6 +629,7 @@ FA70..FAD9 ; Ideographic
07A6..07B0 ; Diacritic
07EB..07F3 ; Diacritic
07F4..07F5 ; Diacritic
0818..0819 ; Diacritic
093C ; Diacritic
094D ; Diacritic
0951..0954 ; Diacritic
@ -611,25 +664,35 @@ FA70..FAD9 ; Ideographic
1087..108C ; Diacritic
108D ; Diacritic
108F ; Diacritic
109A..109B ; Diacritic
17C9..17D3 ; Diacritic
17DD ; Diacritic
1939..193B ; Diacritic
1A75..1A7C ; Diacritic
1A7F ; Diacritic
1B34 ; Diacritic
1B44 ; Diacritic
1B6B..1B73 ; Diacritic
1BAA ; Diacritic
1C36..1C37 ; Diacritic
1C78..1C7D ; Diacritic
1CD0..1CD2 ; Diacritic
1CD3 ; Diacritic
1CD4..1CE0 ; Diacritic
1CE1 ; Diacritic
1CE2..1CE8 ; Diacritic
1CED ; Diacritic
1D2C..1D61 ; Diacritic
1D62..1D6A ; Diacritic
1DC4..1DCF ; Diacritic
1DFE..1DFF ; Diacritic
1DFD..1DFF ; Diacritic
1FBD ; Diacritic
1FBF..1FC1 ; Diacritic
1FCD..1FCF ; Diacritic
1FDD..1FDF ; Diacritic
1FED..1FEF ; Diacritic
1FFD..1FFE ; Diacritic
2CEF..2CF1 ; Diacritic
2E2F ; Diacritic
302A..302F ; Diacritic
3099..309A ; Diacritic
@ -638,13 +701,24 @@ FA70..FAD9 ; Ideographic
A66F ; Diacritic
A67C..A67D ; Diacritic
A67F ; Diacritic
A6F0..A6F1 ; Diacritic
A717..A71F ; Diacritic
A720..A721 ; Diacritic
A788 ; Diacritic
A8C4 ; Diacritic
A8E0..A8F1 ; Diacritic
A92B..A92D ; Diacritic
A92E ; Diacritic
A953 ; Diacritic
A9B3 ; Diacritic
A9C0 ; Diacritic
AA7B ; Diacritic
AABF ; Diacritic
AAC0 ; Diacritic
AAC1 ; Diacritic
AAC2 ; Diacritic
ABEC ; Diacritic
ABED ; Diacritic
FB1E ; Diacritic
FE20..FE26 ; Diacritic
FF3E ; Diacritic
@ -652,13 +726,14 @@ FF40 ; Diacritic
FF70 ; Diacritic
FF9E..FF9F ; Diacritic
FFE3 ; Diacritic
110B9..110BA ; Diacritic
1D167..1D169 ; Diacritic
1D16D..1D172 ; Diacritic
1D17B..1D182 ; Diacritic
1D185..1D18B ; Diacritic
1D1AA..1D1AD ; Diacritic
# Total code points: 565
# Total code points: 639
# ================================================
@ -669,6 +744,7 @@ FFE3 ; Diacritic
0E46 ; Extender
0EC6 ; Extender
1843 ; Extender
1AA7 ; Extender
1C36 ; Extender
1C7B ; Extender
3005 ; Extender
@ -677,9 +753,12 @@ FFE3 ; Diacritic
30FC..30FE ; Extender
A015 ; Extender
A60C ; Extender
A9CF ; Extender
AA70 ; Extender
AADD ; Extender
FF70 ; Extender
# Total code points: 24
# Total code points: 28
# ================================================
@ -774,7 +853,7 @@ FF9E..FF9F ; Other_Grapheme_Extend
# ================================================
3400..4DB5 ; Unified_Ideograph
4E00..9FC3 ; Unified_Ideograph
4E00..9FCB ; Unified_Ideograph
FA0E..FA0F ; Unified_Ideograph
FA11 ; Unified_Ideograph
FA13..FA14 ; Unified_Ideograph
@ -783,8 +862,9 @@ FA21 ; Unified_Ideograph
FA23..FA24 ; Unified_Ideograph
FA27..FA29 ; Unified_Ideograph
20000..2A6D6 ; Unified_Ideograph
2A700..2B734 ; Unified_Ideograph
# Total code points: 70237
# Total code points: 74394
# ================================================
@ -803,14 +883,17 @@ E01F0..E0FFF ; Other_Default_Ignorable_Code_Point
# ================================================
0340..0341 ; Deprecated
17A3 ; Deprecated
17D3 ; Deprecated
0149 ; Deprecated
0F77 ; Deprecated
0F79 ; Deprecated
17A3..17A4 ; Deprecated
206A..206F ; Deprecated
2329 ; Deprecated
232A ; Deprecated
E0001 ; Deprecated
E0020..E007F ; Deprecated
# Total code points: 107
# Total code points: 110
# ================================================
@ -852,8 +935,11 @@ E0020..E007F ; Deprecated
0E40..0E44 ; Logical_Order_Exception
0EC0..0EC4 ; Logical_Order_Exception
AAB5..AAB6 ; Logical_Order_Exception
AAB9 ; Logical_Order_Exception
AABB..AABC ; Logical_Order_Exception
# Total code points: 10
# Total code points: 15
# ================================================
@ -899,19 +985,25 @@ E0020..E007F ; Deprecated
2047..2049 ; STerm
2E2E ; STerm
3002 ; STerm
A4FF ; STerm
A60E..A60F ; STerm
A6F3 ; STerm
A6F7 ; STerm
A876..A877 ; STerm
A8CE..A8CF ; STerm
A92F ; STerm
A9C8..A9C9 ; STerm
AA5D..AA5F ; STerm
ABEB ; STerm
FE52 ; STerm
FE56..FE57 ; STerm
FF01 ; STerm
FF0E ; STerm
FF1F ; STerm
FF61 ; STerm
110BE..110C1 ; STerm
# Total code points: 56
# Total code points: 66
# ================================================
@ -1024,8 +1116,8 @@ E0100..E01EF ; Variation_Selector
239B..23B3 ; Pattern_Syntax
23B4..23DB ; Pattern_Syntax
23DC..23E1 ; Pattern_Syntax
23E2..23E7 ; Pattern_Syntax
23E8..23FF ; Pattern_Syntax
23E2..23E8 ; Pattern_Syntax
23E9..23FF ; Pattern_Syntax
2400..2426 ; Pattern_Syntax
2427..243F ; Pattern_Syntax
2440..244A ; Pattern_Syntax
@ -1038,12 +1130,14 @@ E0100..E01EF ; Variation_Selector
25F8..25FF ; Pattern_Syntax
2600..266E ; Pattern_Syntax
266F ; Pattern_Syntax
2670..269D ; Pattern_Syntax
269E..269F ; Pattern_Syntax
26A0..26BC ; Pattern_Syntax
26BD..26BF ; Pattern_Syntax
26C0..26C3 ; Pattern_Syntax
26C4..2700 ; Pattern_Syntax
2670..26CD ; Pattern_Syntax
26CE ; Pattern_Syntax
26CF..26E1 ; Pattern_Syntax
26E2 ; Pattern_Syntax
26E3 ; Pattern_Syntax
26E4..26E7 ; Pattern_Syntax
26E8..26FF ; Pattern_Syntax
2700 ; Pattern_Syntax
2701..2704 ; Pattern_Syntax
2705 ; Pattern_Syntax
2706..2709 ; Pattern_Syntax
@ -1056,9 +1150,7 @@ E0100..E01EF ; Variation_Selector
274E ; Pattern_Syntax
274F..2752 ; Pattern_Syntax
2753..2755 ; Pattern_Syntax
2756 ; Pattern_Syntax
2757 ; Pattern_Syntax
2758..275E ; Pattern_Syntax
2756..275E ; Pattern_Syntax
275F..2760 ; Pattern_Syntax
2761..2767 ; Pattern_Syntax
2768 ; Pattern_Syntax
@ -1138,8 +1230,8 @@ E0100..E01EF ; Variation_Selector
2B45..2B46 ; Pattern_Syntax
2B47..2B4C ; Pattern_Syntax
2B4D..2B4F ; Pattern_Syntax
2B50..2B54 ; Pattern_Syntax
2B55..2BFF ; Pattern_Syntax
2B50..2B59 ; Pattern_Syntax
2B5A..2BFF ; Pattern_Syntax
2E00..2E01 ; Pattern_Syntax
2E02 ; Pattern_Syntax
2E03 ; Pattern_Syntax
@ -1171,8 +1263,8 @@ E0100..E01EF ; Variation_Selector
2E29 ; Pattern_Syntax
2E2A..2E2E ; Pattern_Syntax
2E2F ; Pattern_Syntax
2E30 ; Pattern_Syntax
2E31..2E7F ; Pattern_Syntax
2E30..2E31 ; Pattern_Syntax
2E32..2E7F ; Pattern_Syntax
3001..3003 ; Pattern_Syntax
3008 ; Pattern_Syntax
3009 ; Pattern_Syntax

View file

@ -1,10 +1,10 @@
# PropertyAliases-5.1.0.txt
# Date: 2008-02-28, 12:28:00 PST [KW]
# PropertyAliases-5.2.0.txt
# Date: 2009-08-24, 03:26:46 GMT [MD]
#
# Unicode Character Database
# Copyright (c) 1991-2008 Unicode, Inc.
# Copyright (c) 1991-2009 Unicode, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html
# For documentation, see UCD.html
# For documentation, see http://www.unicode.org/reports/tr44/
#
# This file contains aliases for properties used in the UCD.
# These names can be used for XML formats of UCD data, for regular-expression
@ -32,7 +32,7 @@
# NOTE: Property value names are NOT unique across properties. For example:
#
# AL means Arabic Letter for the Bidi_Class property, and
# AL means Alpha_Left for the Combining_Class property, and
# AL means Above_Left for the Combining_Class property, and
# AL means Alphabetic for the Line_Break property.
#
# In addition, some property names may be the same as some property value names.
@ -43,14 +43,16 @@
#
# The combination of property value and property name is, however, unique.
#
# For more information, see UTS #18: Regular Expression Guidelines
# For more information, see UTS #18: Unicode Regular Expressions
# ================================================
# ================================================
# Numeric Properties
# ================================================
ccc ; Canonical_Combining_Class
cjkAccountingNumeric; kAccountingNumeric
cjkOtherNumeric; kOtherNumeric
cjkPrimaryNumeric; kPrimaryNumeric
nv ; Numeric_Value
# ================================================
@ -58,9 +60,11 @@ nv ; Numeric_Value
# ================================================
bmg ; Bidi_Mirroring_Glyph
cf ; Case_Folding
cjkCompatibilityVariant; kCompatibilityVariant
dm ; Decomposition_Mapping
FC_NFKC ; FC_NFKC_Closure
lc ; Lowercase_Mapping
NFKC_CF ; NFKC_Casefold
scf ; Simple_Case_Folding ; sfc
slc ; Simple_Lowercase_Mapping
stc ; Simple_Titlecase_Mapping
@ -71,11 +75,22 @@ uc ; Uppercase_Mapping
# ================================================
# Miscellaneous Properties
# ================================================
cjkIICore ; kIICore
cjkIRG_GSource; kIRG_GSource
cjkIRG_HSource; kIRG_HSource
cjkIRG_JSource; kIRG_JSource
cjkIRG_KPSource; kIRG_KPSource
cjkIRG_KSource; kIRG_KSource
cjkIRG_MSource; kIRG_MSource
cjkIRG_TSource; kIRG_TSource
cjkIRG_USource; kIRG_USource
cjkIRG_VSource; kIRG_VSource
cjkRSUnicode; kRSUnicode ; Unicode_Radical_Stroke; URS
isc ; ISO_Comment
JSN ; Jamo_Short_Name
na ; Name
na1 ; Unicode_1_Name
URS ; Unicode_Radical_Stroke
Name_Alias; Name_Alias
# ================================================
# Catalog Properties
@ -88,6 +103,7 @@ sc ; Script
# Enumerated Properties
# ================================================
bc ; Bidi_Class
ccc ; Canonical_Combining_Class
dt ; Decomposition_Type
ea ; East_Asian_Width
gc ; General_Category
@ -111,8 +127,16 @@ AHex ; ASCII_Hex_Digit
Alpha ; Alphabetic
Bidi_C ; Bidi_Control
Bidi_M ; Bidi_Mirrored
Cased ; Cased
CE ; Composition_Exclusion
CI ; Case_Ignorable
Comp_Ex ; Full_Composition_Exclusion
CWCF ; Changes_When_Casefolded
CWCM ; Changes_When_Casemapped
CWKCF ; Changes_When_NFKC_Casefolded
CWL ; Changes_When_Lowercased
CWT ; Changes_When_Titlecased
CWU ; Changes_When_Uppercased
Dash ; Dash
Dep ; Deprecated
DI ; Default_Ignorable_Code_Point
@ -160,6 +184,6 @@ XO_NFKC ; Expands_On_NFKC
XO_NFKD ; Expands_On_NFKD
# ================================================
# Total: 88
# Total: 112
# EOF

View file

@ -1,10 +1,10 @@
# PropertyValueAliases-5.1.0.txt
# Date: 2008-03-03, 21:58:08 GMT [MD]
# PropertyValueAliases-5.2.0.txt
# Date: 2009-08-24, 03:27:01 GMT [MD]
#
# Unicode Character Database
# Copyright (c) 1991-2008 Unicode, Inc.
# Copyright (c) 1991-2009 Unicode, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html
# For documentation, see UCD.html
# For documentation, see http://www.unicode.org/reports/tr44/
#
# This file contains aliases for property values used in the UCD.
# These names can be used for XML formats of UCD data, for regular-expression
@ -40,7 +40,7 @@
# NOTE: Property value names are NOT unique across properties. For example:
#
# AL means Arabic Letter for the Bidi_Class property, and
# AL means Alpha_Left for the Combining_Class property, and
# AL means Above_Left for the Combining_Class property, and
# AL means Alphabetic for the Line_Break property.
#
# In addition, some property names may be the same as some property value names.
@ -51,7 +51,7 @@
#
# The combination of property value and property name is, however, unique.
#
# For more information, see UTS #18: Regular Expression Guidelines
# For more information, see UTS #18: Unicode Regular Expressions
# ================================================
@ -72,6 +72,7 @@ age; n/a ; 4.0
age; n/a ; 4.1
age; n/a ; 5.0
age; n/a ; 5.1
age; n/a ; 5.2
age; n/a ; unassigned
# Alphabetic (Alpha)
@ -128,7 +129,9 @@ blk; n/a ; Arabic_Presentation_Forms_B
blk; n/a ; Arabic_Supplement
blk; n/a ; Armenian
blk; n/a ; Arrows
blk; n/a ; Avestan
blk; n/a ; Balinese
blk; n/a ; Bamum
blk; n/a ; Basic_Latin ; ASCII
blk; n/a ; Bengali
blk; n/a ; Block_Elements
@ -152,10 +155,12 @@ blk; n/a ; CJK_Symbols_And_Punctuation
blk; n/a ; CJK_Unified_Ideographs
blk; n/a ; CJK_Unified_Ideographs_Extension_A
blk; n/a ; CJK_Unified_Ideographs_Extension_B
blk; n/a ; CJK_Unified_Ideographs_Extension_C
blk; n/a ; Combining_Diacritical_Marks
blk; n/a ; Combining_Diacritical_Marks_For_Symbols; Combining_Marks_For_Symbols
blk; n/a ; Combining_Diacritical_Marks_Supplement
blk; n/a ; Combining_Half_Marks
blk; n/a ; Common_Indic_Number_Forms
blk; n/a ; Control_Pictures
blk; n/a ; Coptic
blk; n/a ; Counting_Rod_Numerals
@ -169,10 +174,14 @@ blk; n/a ; Cyrillic_Extended_B
blk; n/a ; Cyrillic_Supplement ; Cyrillic_Supplementary
blk; n/a ; Deseret
blk; n/a ; Devanagari
blk; n/a ; Devanagari_Extended
blk; n/a ; Dingbats
blk; n/a ; Domino_Tiles
blk; n/a ; Egyptian_Hieroglyphs
blk; n/a ; Enclosed_Alphanumeric_Supplement
blk; n/a ; Enclosed_Alphanumerics
blk; n/a ; Enclosed_CJK_Letters_And_Months
blk; n/a ; Enclosed_Ideographic_Supplement
blk; n/a ; Ethiopic
blk; n/a ; Ethiopic_Extended
blk; n/a ; Ethiopic_Supplement
@ -189,6 +198,8 @@ blk; n/a ; Gurmukhi
blk; n/a ; Halfwidth_And_Fullwidth_Forms
blk; n/a ; Hangul_Compatibility_Jamo
blk; n/a ; Hangul_Jamo
blk; n/a ; Hangul_Jamo_Extended_A
blk; n/a ; Hangul_Jamo_Extended_B
blk; n/a ; Hangul_Syllables
blk; n/a ; Hanunoo
blk; n/a ; Hebrew
@ -196,7 +207,12 @@ blk; n/a ; High_Private_Use_Surrogates
blk; n/a ; High_Surrogates
blk; n/a ; Hiragana
blk; n/a ; Ideographic_Description_Characters
blk; n/a ; Imperial_Aramaic
blk; n/a ; Inscriptional_Pahlavi
blk; n/a ; Inscriptional_Parthian
blk; n/a ; IPA_Extensions
blk; n/a ; Javanese
blk; n/a ; Kaithi
blk; n/a ; Kanbun
blk; n/a ; Kangxi_Radicals
blk; n/a ; Kannada
@ -218,6 +234,7 @@ blk; n/a ; Letterlike_Symbols
blk; n/a ; Limbu
blk; n/a ; Linear_B_Ideograms
blk; n/a ; Linear_B_Syllabary
blk; n/a ; Lisu
blk; n/a ; Low_Surrogates
blk; n/a ; Lycian
blk; n/a ; Lydian
@ -225,6 +242,7 @@ blk; n/a ; Mahjong_Tiles
blk; n/a ; Malayalam
blk; n/a ; Mathematical_Alphanumeric_Symbols
blk; n/a ; Mathematical_Operators
blk; n/a ; Meetei_Mayek
blk; n/a ; Miscellaneous_Mathematical_Symbols_A
blk; n/a ; Miscellaneous_Mathematical_Symbols_B
blk; n/a ; Miscellaneous_Symbols
@ -234,6 +252,7 @@ blk; n/a ; Modifier_Tone_Letters
blk; n/a ; Mongolian
blk; n/a ; Musical_Symbols
blk; n/a ; Myanmar
blk; n/a ; Myanmar_Extended_A
blk; n/a ; New_Tai_Lue
blk; n/a ; NKo
blk; n/a ; No_Block
@ -242,6 +261,8 @@ blk; n/a ; Ogham
blk; n/a ; Ol_Chiki
blk; n/a ; Old_Italic
blk; n/a ; Old_Persian
blk; n/a ; Old_South_Arabian
blk; n/a ; Old_Turkic
blk; n/a ; Optical_Character_Recognition
blk; n/a ; Oriya
blk; n/a ; Osmanya
@ -252,7 +273,9 @@ blk; n/a ; Phonetic_Extensions
blk; n/a ; Phonetic_Extensions_Supplement
blk; n/a ; Private_Use_Area ; Private_Use
blk; n/a ; Rejang
blk; n/a ; Rumi_Numeral_Symbols
blk; n/a ; Runic
blk; n/a ; Samaritan
blk; n/a ; Saurashtra
blk; n/a ; Shavian
blk; n/a ; Sinhala
@ -273,6 +296,8 @@ blk; n/a ; Tagalog
blk; n/a ; Tagbanwa
blk; n/a ; Tags
blk; n/a ; Tai_Le
blk; n/a ; Tai_Tham
blk; n/a ; Tai_Viet
blk; n/a ; Tai_Xuan_Jing_Symbols
blk; n/a ; Tamil
blk; n/a ; Telugu
@ -282,9 +307,11 @@ blk; n/a ; Tibetan
blk; n/a ; Tifinagh
blk; n/a ; Ugaritic
blk; n/a ; Unified_Canadian_Aboriginal_Syllabics; Canadian_Syllabics
blk; n/a ; Unified_Canadian_Aboriginal_Syllabics_Extended
blk; n/a ; Vai
blk; n/a ; Variation_Selectors
blk; n/a ; Variation_Selectors_Supplement
blk; n/a ; Vedic_Extensions
blk; n/a ; Vertical_Forms
blk; n/a ; Yi_Radicals
blk; n/a ; Yi_Syllables
@ -299,6 +326,7 @@ ccc; 8; KV ; Kana_Voicing
ccc; 9; VR ; Virama
ccc; 200; ATBL ; Attached_Below_Left
ccc; 202; ATB ; Attached_Below
ccc; 214; ATA ; Attached_Above
ccc; 216; ATAR ; Attached_Above_Right
ccc; 218; BL ; Below_Left
ccc; 220; B ; Below
@ -316,6 +344,46 @@ ccc; 240; IS ; Iota_Subscript
# @missing: 0000..10FFFF; Case_Folding; <code point>
# Case_Ignorable (CI)
CI ; N ; No ; F ; False
CI ; Y ; Yes ; T ; True
# Cased (Cased)
Cased; N ; No ; F ; False
Cased; Y ; Yes ; T ; True
# Changes_When_Casefolded (CWCF)
CWCF; N ; No ; F ; False
CWCF; Y ; Yes ; T ; True
# Changes_When_Casemapped (CWCM)
CWCM; N ; No ; F ; False
CWCM; Y ; Yes ; T ; True
# Changes_When_Lowercased (CWL)
CWL; N ; No ; F ; False
CWL; Y ; Yes ; T ; True
# Changes_When_NFKC_Casefolded (CWKCF)
CWKCF; N ; No ; F ; False
CWKCF; Y ; Yes ; T ; True
# Changes_When_Titlecased (CWT)
CWT; N ; No ; F ; False
CWT; Y ; Yes ; T ; True
# Changes_When_Uppercased (CWU)
CWU; N ; No ; F ; False
CWU; Y ; Yes ; T ; True
# Composition_Exclusion (CE)
CE ; N ; No ; F ; False
@ -600,6 +668,7 @@ jg ; n/a ; Burushaski_Yeh_Barree
jg ; n/a ; Dal
jg ; n/a ; Dalath_Rish
jg ; n/a ; E
jg ; n/a ; Farsi_Yeh
jg ; n/a ; Fe
jg ; n/a ; Feh
jg ; n/a ; Final_Semkath
@ -622,6 +691,7 @@ jg ; n/a ; Mim
jg ; n/a ; No_Joining_Group
jg ; n/a ; Noon
jg ; n/a ; Nun
jg ; n/a ; Nya
jg ; n/a ; Pe
jg ; n/a ; Qaf
jg ; n/a ; Qaph
@ -667,6 +737,7 @@ lb ; BK ; Mandatory_Break
lb ; CB ; Contingent_Break
lb ; CL ; Close_Punctuation
lb ; CM ; Combining_Mark
lb ; CP ; Close_Parenthesis
lb ; CR ; Carriage_Return
lb ; EX ; Exclamation
lb ; GL ; Glue
@ -725,6 +796,10 @@ NFC_QC; Y ; Yes
NFD_QC; N ; No
NFD_QC; Y ; Yes
# NFKC_Casefold (NFKC_CF)
# @missing: 0000..10FFFF; NFKC_Casefold; <code point>
# NFKC_Quick_Check (NFKC_QC)
NFKC_QC; M ; Maybe
@ -740,6 +815,10 @@ NFKD_QC; Y ; Yes
# @missing: 0000..10FFFF; Name; <none>
# Name_Alias (Name_Alias)
# @missing: 0000..10FFFF; Name_Alias; <none>
# Noncharacter_Code_Point (NChar)
NChar; N ; No ; F ; False
@ -824,8 +903,11 @@ STerm; Y ; Yes ; T
# Script (sc)
sc ; Arab ; Arabic
sc ; Armi ; Imperial_Aramaic
sc ; Armn ; Armenian
sc ; Avst ; Avestan
sc ; Bali ; Balinese
sc ; Bamu ; Bamum
sc ; Beng ; Bengali
sc ; Bopo ; Bopomofo
sc ; Brai ; Braille
@ -840,6 +922,7 @@ sc ; Cprt ; Cypriot
sc ; Cyrl ; Cyrillic
sc ; Deva ; Devanagari
sc ; Dsrt ; Deseret
sc ; Egyp ; Egyptian_Hieroglyphs
sc ; Ethi ; Ethiopic
sc ; Geor ; Georgian
sc ; Glag ; Glagolitic
@ -854,31 +937,40 @@ sc ; Hebr ; Hebrew
sc ; Hira ; Hiragana
sc ; Hrkt ; Katakana_Or_Hiragana
sc ; Ital ; Old_Italic
sc ; Java ; Javanese
sc ; Kali ; Kayah_Li
sc ; Kana ; Katakana
sc ; Khar ; Kharoshthi
sc ; Khmr ; Khmer
sc ; Knda ; Kannada
sc ; Kthi ; Kaithi
sc ; Lana ; Tai_Tham
sc ; Laoo ; Lao
sc ; Latn ; Latin
sc ; Lepc ; Lepcha
sc ; Limb ; Limbu
sc ; Linb ; Linear_B
sc ; Lisu ; Lisu
sc ; Lyci ; Lycian
sc ; Lydi ; Lydian
sc ; Mlym ; Malayalam
sc ; Mong ; Mongolian
sc ; Mtei ; Meetei_Mayek
sc ; Mymr ; Myanmar
sc ; Nkoo ; Nko
sc ; Ogam ; Ogham
sc ; Olck ; Ol_Chiki
sc ; Orkh ; Old_Turkic
sc ; Orya ; Oriya
sc ; Osma ; Osmanya
sc ; Phag ; Phags_Pa
sc ; Phli ; Inscriptional_Pahlavi
sc ; Phnx ; Phoenician
sc ; Qaai ; Inherited
sc ; Prti ; Inscriptional_Parthian
sc ; Rjng ; Rejang
sc ; Runr ; Runic
sc ; Samr ; Samaritan
sc ; Sarb ; Old_South_Arabian
sc ; Saur ; Saurashtra
sc ; Shaw ; Shavian
sc ; Sinh ; Sinhala
@ -889,6 +981,7 @@ sc ; Tagb ; Tagbanwa
sc ; Tale ; Tai_Le
sc ; Talu ; New_Tai_Lue
sc ; Taml ; Tamil
sc ; Tavt ; Tai_Viet
sc ; Telu ; Telugu
sc ; Tfng ; Tifinagh
sc ; Tglg ; Tagalog
@ -900,6 +993,7 @@ sc ; Vaii ; Vai
sc ; Xpeo ; Old_Persian
sc ; Xsux ; Cuneiform
sc ; Yiii ; Yi
sc ; Zinh ; Inherited ; Qaai
sc ; Zyyy ; Common
sc ; Zzzz ; Unknown
@ -955,10 +1049,6 @@ Term; Y ; Yes ; T
# @missing: 0000..10FFFF; Unicode_1_Name; <none>
# Unicode_Radical_Stroke (URS)
# @missing: 0000..10FFFF; Unicode_Radical_Stroke; <none>
# Unified_Ideograph (UIdeo)
UIdeo; N ; No ; F ; False
@ -1009,4 +1099,64 @@ XIDC; Y ; Yes ; T
XIDS; N ; No ; F ; False
XIDS; Y ; Yes ; T ; True
# cjkAccountingNumeric (cjkAccountingNumeric)
# @missing: 0000..10FFFF; cjkAccountingNumeric; NaN
# cjkCompatibilityVariant (cjkCompatibilityVariant)
# @missing: 0000..10FFFF; cjkCompatibilityVariant; <code point>
# cjkIICore (cjkIICore)
# @missing: 0000..10FFFF; cjkIICore; <none>
# cjkIRG_GSource (cjkIRG_GSource)
# @missing: 0000..10FFFF; cjkIRG_GSource; <none>
# cjkIRG_HSource (cjkIRG_HSource)
# @missing: 0000..10FFFF; cjkIRG_HSource; <none>
# cjkIRG_JSource (cjkIRG_JSource)
# @missing: 0000..10FFFF; cjkIRG_JSource; <none>
# cjkIRG_KPSource (cjkIRG_KPSource)
# @missing: 0000..10FFFF; cjkIRG_KPSource; <none>
# cjkIRG_KSource (cjkIRG_KSource)
# @missing: 0000..10FFFF; cjkIRG_KSource; <none>
# cjkIRG_MSource (cjkIRG_MSource)
# @missing: 0000..10FFFF; cjkIRG_MSource; <none>
# cjkIRG_TSource (cjkIRG_TSource)
# @missing: 0000..10FFFF; cjkIRG_TSource; <none>
# cjkIRG_USource (cjkIRG_USource)
# @missing: 0000..10FFFF; cjkIRG_USource; <none>
# cjkIRG_VSource (cjkIRG_VSource)
# @missing: 0000..10FFFF; cjkIRG_VSource; <none>
# cjkOtherNumeric (cjkOtherNumeric)
# @missing: 0000..10FFFF; cjkOtherNumeric; NaN
# cjkPrimaryNumeric (cjkPrimaryNumeric)
# @missing: 0000..10FFFF; cjkPrimaryNumeric; NaN
# cjkRSUnicode (cjkRSUnicode)
# @missing: 0000..10FFFF; cjkRSUnicode; <none>
# EOF

View file

@ -1,10 +1,10 @@
# Scripts-5.1.0.txt
# Date: 2008-03-20, 17:55:33 GMT [MD]
# Scripts-5.2.0.txt
# Date: 2009-08-22, 04:58:43 GMT [MD]
#
# Unicode Character Database
# Copyright (c) 1991-2008 Unicode, Inc.
# Copyright (c) 1991-2009 Unicode, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html
# For documentation, see UCD.html
# For documentation, see http://www.unicode.org/reports/tr44/
# ================================================
@ -94,11 +94,17 @@
0970 ; Common
0CF1..0CF2 ; Common
0E3F ; Common
0FD5..0FD8 ; Common
10FB ; Common
16EB..16ED ; Common
1735..1736 ; Common
1802..1803 ; Common
1805 ; Common
1CD3 ; Common
1CE1 ; Common
1CE9..1CEC ; Common
1CEE..1CF1 ; Common
1CF2 ; Common
2000..200A ; Common
200B ; Common
200E..200F ; Common
@ -142,7 +148,7 @@
208A..208C ; Common
208D ; Common
208E ; Common
20A0..20B5 ; Common
20A0..20B8 ; Common
2100..2101 ; Common
2102 ; Common
2103..2106 ; Common
@ -173,7 +179,8 @@
214B ; Common
214C..214D ; Common
214F ; Common
2153..215F ; Common
2150..215F ; Common
2189 ; Common
2190..2194 ; Common
2195..2199 ; Common
219A..219B ; Common
@ -206,7 +213,7 @@
239B..23B3 ; Common
23B4..23DB ; Common
23DC..23E1 ; Common
23E2..23E7 ; Common
23E2..23E8 ; Common
2400..2426 ; Common
2440..244A ; Common
2460..249B ; Common
@ -220,17 +227,17 @@
25F8..25FF ; Common
2600..266E ; Common
266F ; Common
2670..269D ; Common
26A0..26BC ; Common
26C0..26C3 ; Common
2670..26CD ; Common
26CF..26E1 ; Common
26E3 ; Common
26E8..26FF ; Common
2701..2704 ; Common
2706..2709 ; Common
270C..2727 ; Common
2729..274B ; Common
274D ; Common
274F..2752 ; Common
2756 ; Common
2758..275E ; Common
2756..275E ; Common
2761..2767 ; Common
2768 ; Common
2769 ; Common
@ -303,7 +310,7 @@
2B30..2B44 ; Common
2B45..2B46 ; Common
2B47..2B4C ; Common
2B50..2B54 ; Common
2B50..2B59 ; Common
2E00..2E01 ; Common
2E02 ; Common
2E03 ; Common
@ -335,7 +342,7 @@
2E29 ; Common
2E2A..2E2E ; Common
2E2F ; Common
2E30 ; Common
2E30..2E31 ; Common
2FF0..2FFB ; Common
3000 ; Common
3001..3003 ; Common
@ -379,8 +386,7 @@
3196..319F ; Common
31C0..31E3 ; Common
3220..3229 ; Common
322A..3243 ; Common
3250 ; Common
322A..3250 ; Common
3251..325F ; Common
327F ; Common
3280..3289 ; Common
@ -394,6 +400,10 @@ A717..A71F ; Common
A720..A721 ; Common
A788 ; Common
A789..A78A ; Common
A830..A835 ; Common
A836..A837 ; Common
A838 ; Common
A839 ; Common
FD3E ; Common
FD3F ; Common
FDFD ; Common
@ -545,10 +555,27 @@ FFFC..FFFD ; Common
1D7CE..1D7FF ; Common
1F000..1F02B ; Common
1F030..1F093 ; Common
1F100..1F10A ; Common
1F110..1F12E ; Common
1F131 ; Common
1F13D ; Common
1F13F ; Common
1F142 ; Common
1F146 ; Common
1F14A..1F14E ; Common
1F157 ; Common
1F15F ; Common
1F179 ; Common
1F17B..1F17C ; Common
1F17F ; Common
1F18A..1F18D ; Common
1F190 ; Common
1F210..1F231 ; Common
1F240..1F248 ; Common
E0001 ; Common
E0020..E007F ; Common
# Total code points: 5178
# Total code points: 5395
# ================================================
@ -583,9 +610,9 @@ E0020..E007F ; Common
2160..2182 ; Latin
2183..2184 ; Latin
2185..2188 ; Latin
2C60..2C6F ; Latin
2C71..2C7C ; Latin
2C60..2C7C ; Latin
2C7D ; Latin
2C7E..2C7F ; Latin
A722..A76F ; Latin
A770 ; Latin
A771..A787 ; Latin
@ -595,7 +622,7 @@ FB00..FB06 ; Latin
FF21..FF3A ; Latin
FF41..FF5A ; Latin
# Total code points: 1241
# Total code points: 1244
# ================================================
@ -657,9 +684,10 @@ FF41..FF5A ; Latin
0400..0481 ; Cyrillic
0482 ; Cyrillic
0483..0487 ; Cyrillic
0483..0484 ; Cyrillic
0487 ; Cyrillic
0488..0489 ; Cyrillic
048A..0523 ; Cyrillic
048A..0525 ; Cyrillic
1D2B ; Cyrillic
1D78 ; Cyrillic
2DE0..2DFF ; Cyrillic
@ -752,8 +780,9 @@ FDF0..FDFB ; Arabic
FDFC ; Arabic
FE70..FE74 ; Arabic
FE76..FEFC ; Arabic
10E60..10E7E ; Arabic
# Total code points: 999
# Total code points: 1030
# ================================================
@ -777,7 +806,7 @@ FE76..FEFC ; Arabic
# ================================================
0901..0902 ; Devanagari
0900..0902 ; Devanagari
0903 ; Devanagari
0904..0939 ; Devanagari
093C ; Devanagari
@ -786,16 +815,21 @@ FE76..FEFC ; Arabic
0941..0948 ; Devanagari
0949..094C ; Devanagari
094D ; Devanagari
094E ; Devanagari
0950 ; Devanagari
0953..0954 ; Devanagari
0953..0955 ; Devanagari
0958..0961 ; Devanagari
0962..0963 ; Devanagari
0966..096F ; Devanagari
0971 ; Devanagari
0972 ; Devanagari
097B..097F ; Devanagari
0979..097F ; Devanagari
A8E0..A8F1 ; Devanagari
A8F2..A8F7 ; Devanagari
A8F8..A8FA ; Devanagari
A8FB ; Devanagari
# Total code points: 107
# Total code points: 140
# ================================================
@ -824,8 +858,9 @@ FE76..FEFC ; Arabic
09F2..09F3 ; Bengali
09F4..09F9 ; Bengali
09FA ; Bengali
09FB ; Bengali
# Total code points: 91
# Total code points: 92
# ================================================
@ -1142,9 +1177,17 @@ FE76..FEFC ; Arabic
108E ; Myanmar
108F ; Myanmar
1090..1099 ; Myanmar
109A..109C ; Myanmar
109D ; Myanmar
109E..109F ; Myanmar
AA60..AA6F ; Myanmar
AA70 ; Myanmar
AA71..AA76 ; Myanmar
AA77..AA79 ; Myanmar
AA7A ; Myanmar
AA7B ; Myanmar
# Total code points: 156
# Total code points: 188
# ================================================
@ -1157,20 +1200,21 @@ FE76..FEFC ; Arabic
# ================================================
1100..1159 ; Hangul
115F..11A2 ; Hangul
11A8..11F9 ; Hangul
1100..11FF ; Hangul
3131..318E ; Hangul
3200..321E ; Hangul
3260..327E ; Hangul
A960..A97C ; Hangul
AC00..D7A3 ; Hangul
D7B0..D7C6 ; Hangul
D7CB..D7FB ; Hangul
FFA0..FFBE ; Hangul
FFC2..FFC7 ; Hangul
FFCA..FFCF ; Hangul
FFD2..FFD7 ; Hangul
FFDA..FFDC ; Hangul
# Total code points: 11620
# Total code points: 11737
# ================================================
@ -1216,11 +1260,13 @@ FFDA..FFDC ; Hangul
# ================================================
1400 ; Canadian_Aboriginal
1401..166C ; Canadian_Aboriginal
166D..166E ; Canadian_Aboriginal
166F..1676 ; Canadian_Aboriginal
166F..167F ; Canadian_Aboriginal
18B0..18F5 ; Canadian_Aboriginal
# Total code points: 630
# Total code points: 710
# ================================================
@ -1283,8 +1329,9 @@ FFDA..FFDC ; Hangul
3041..3096 ; Hiragana
309D..309E ; Hiragana
309F ; Hiragana
1F200 ; Hiragana
# Total code points: 89
# Total code points: 90
# ================================================
@ -1317,14 +1364,15 @@ FF71..FF9D ; Katakana
3038..303A ; Han
303B ; Han
3400..4DB5 ; Han
4E00..9FC3 ; Han
4E00..9FCB ; Han
F900..FA2D ; Han
FA30..FA6A ; Han
FA30..FA6D ; Han
FA70..FAD9 ; Han
20000..2A6D6 ; Han
2A700..2B734 ; Han
2F800..2FA1D ; Han
# Total code points: 71578
# Total code points: 75738
# ================================================
@ -1360,11 +1408,16 @@ A490..A4C6 ; Yi
# ================================================
0300..036F ; Inherited
0485..0486 ; Inherited
064B..0655 ; Inherited
0670 ; Inherited
0951..0952 ; Inherited
1CD0..1CD2 ; Inherited
1CD4..1CE0 ; Inherited
1CE2..1CE8 ; Inherited
1CED ; Inherited
1DC0..1DE6 ; Inherited
1DFE..1DFF ; Inherited
1DFD..1DFF ; Inherited
200C..200D ; Inherited
20D0..20DC ; Inherited
20DD..20E0 ; Inherited
@ -1382,7 +1435,7 @@ FE20..FE26 ; Inherited
1D1AA..1D1AD ; Inherited
E0100..E01EF ; Inherited
# Total code points: 496
# Total code points: 523
# ================================================
@ -1501,22 +1554,24 @@ E0100..E01EF ; Inherited
03E2..03EF ; Coptic
2C80..2CE4 ; Coptic
2CE5..2CEA ; Coptic
2CEB..2CEE ; Coptic
2CEF..2CF1 ; Coptic
2CF9..2CFC ; Coptic
2CFD ; Coptic
2CFE..2CFF ; Coptic
# Total code points: 128
# Total code points: 135
# ================================================
1980..19A9 ; New_Tai_Lue
1980..19AB ; New_Tai_Lue
19B0..19C0 ; New_Tai_Lue
19C1..19C7 ; New_Tai_Lue
19C8..19C9 ; New_Tai_Lue
19D0..19D9 ; New_Tai_Lue
19D0..19DA ; New_Tai_Lue
19DE..19DF ; New_Tai_Lue
# Total code points: 80
# Total code points: 83
# ================================================
@ -1606,10 +1661,10 @@ A828..A82B ; Syloti_Nagri
# ================================================
10900..10915 ; Phoenician
10916..10919 ; Phoenician
10916..1091B ; Phoenician
1091F ; Phoenician
# Total code points: 27
# Total code points: 29
# ================================================
@ -1744,4 +1799,174 @@ AA5C..AA5F ; Cham
# Total code points: 83
# ================================================
1A20..1A54 ; Tai_Tham
1A55 ; Tai_Tham
1A56 ; Tai_Tham
1A57 ; Tai_Tham
1A58..1A5E ; Tai_Tham
1A60 ; Tai_Tham
1A61 ; Tai_Tham
1A62 ; Tai_Tham
1A63..1A64 ; Tai_Tham
1A65..1A6C ; Tai_Tham
1A6D..1A72 ; Tai_Tham
1A73..1A7C ; Tai_Tham
1A7F ; Tai_Tham
1A80..1A89 ; Tai_Tham
1A90..1A99 ; Tai_Tham
1AA0..1AA6 ; Tai_Tham
1AA7 ; Tai_Tham
1AA8..1AAD ; Tai_Tham
# Total code points: 127
# ================================================
AA80..AAAF ; Tai_Viet
AAB0 ; Tai_Viet
AAB1 ; Tai_Viet
AAB2..AAB4 ; Tai_Viet
AAB5..AAB6 ; Tai_Viet
AAB7..AAB8 ; Tai_Viet
AAB9..AABD ; Tai_Viet
AABE..AABF ; Tai_Viet
AAC0 ; Tai_Viet
AAC1 ; Tai_Viet
AAC2 ; Tai_Viet
AADB..AADC ; Tai_Viet
AADD ; Tai_Viet
AADE..AADF ; Tai_Viet
# Total code points: 72
# ================================================
10B00..10B35 ; Avestan
10B39..10B3F ; Avestan
# Total code points: 61
# ================================================
13000..1342E ; Egyptian_Hieroglyphs
# Total code points: 1071
# ================================================
0800..0815 ; Samaritan
0816..0819 ; Samaritan
081A ; Samaritan
081B..0823 ; Samaritan
0824 ; Samaritan
0825..0827 ; Samaritan
0828 ; Samaritan
0829..082D ; Samaritan
0830..083E ; Samaritan
# Total code points: 61
# ================================================
A4D0..A4F7 ; Lisu
A4F8..A4FD ; Lisu
A4FE..A4FF ; Lisu
# Total code points: 48
# ================================================
A6A0..A6E5 ; Bamum
A6E6..A6EF ; Bamum
A6F0..A6F1 ; Bamum
A6F2..A6F7 ; Bamum
# Total code points: 88
# ================================================
A980..A982 ; Javanese
A983 ; Javanese
A984..A9B2 ; Javanese
A9B3 ; Javanese
A9B4..A9B5 ; Javanese
A9B6..A9B9 ; Javanese
A9BA..A9BB ; Javanese
A9BC ; Javanese
A9BD..A9C0 ; Javanese
A9C1..A9CD ; Javanese
A9CF ; Javanese
A9D0..A9D9 ; Javanese
A9DE..A9DF ; Javanese
# Total code points: 91
# ================================================
ABC0..ABE2 ; Meetei_Mayek
ABE3..ABE4 ; Meetei_Mayek
ABE5 ; Meetei_Mayek
ABE6..ABE7 ; Meetei_Mayek
ABE8 ; Meetei_Mayek
ABE9..ABEA ; Meetei_Mayek
ABEB ; Meetei_Mayek
ABEC ; Meetei_Mayek
ABED ; Meetei_Mayek
ABF0..ABF9 ; Meetei_Mayek
# Total code points: 56
# ================================================
10840..10855 ; Imperial_Aramaic
10857 ; Imperial_Aramaic
10858..1085F ; Imperial_Aramaic
# Total code points: 31
# ================================================
10A60..10A7C ; Old_South_Arabian
10A7D..10A7E ; Old_South_Arabian
10A7F ; Old_South_Arabian
# Total code points: 32
# ================================================
10B40..10B55 ; Inscriptional_Parthian
10B58..10B5F ; Inscriptional_Parthian
# Total code points: 30
# ================================================
10B60..10B72 ; Inscriptional_Pahlavi
10B78..10B7F ; Inscriptional_Pahlavi
# Total code points: 27
# ================================================
10C00..10C48 ; Old_Turkic
# Total code points: 73
# ================================================
11080..11081 ; Kaithi
11082 ; Kaithi
11083..110AF ; Kaithi
110B0..110B2 ; Kaithi
110B3..110B6 ; Kaithi
110B7..110B8 ; Kaithi
110B9..110BA ; Kaithi
110BB..110BC ; Kaithi
110BD ; Kaithi
110BE..110C1 ; Kaithi
# Total code points: 66
# EOF

View file

@ -1,10 +1,10 @@
# SentenceBreakProperty-5.1.0.txt
# Date: 2008-03-20, 17:55:34 GMT [MD]
# SentenceBreakProperty-5.2.0.txt
# Date: 2009-08-22, 04:58:44 GMT [MD]
#
# Unicode Character Database
# Copyright (c) 1991-2008 Unicode, Inc.
# Copyright (c) 1991-2009 Unicode, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html
# For documentation, see UCD.html
# For documentation, see http://www.unicode.org/reports/tr44/
# ================================================
@ -49,14 +49,19 @@
0730..074A ; Extend
07A6..07B0 ; Extend
07EB..07F3 ; Extend
0901..0902 ; Extend
0816..0819 ; Extend
081B..0823 ; Extend
0825..0827 ; Extend
0829..082D ; Extend
0900..0902 ; Extend
0903 ; Extend
093C ; Extend
093E..0940 ; Extend
0941..0948 ; Extend
0949..094C ; Extend
094D ; Extend
0951..0954 ; Extend
094E ; Extend
0951..0955 ; Extend
0962..0963 ; Extend
0981 ; Extend
0982..0983 ; Extend
@ -181,6 +186,8 @@
1087..108C ; Extend
108D ; Extend
108F ; Extend
109A..109C ; Extend
109D ; Extend
135F ; Extend
1712..1714 ; Extend
1732..1734 ; Extend
@ -207,6 +214,18 @@
19C8..19C9 ; Extend
1A17..1A18 ; Extend
1A19..1A1B ; Extend
1A55 ; Extend
1A56 ; Extend
1A57 ; Extend
1A58..1A5E ; Extend
1A60 ; Extend
1A61 ; Extend
1A62 ; Extend
1A63..1A64 ; Extend
1A65..1A6C ; Extend
1A6D..1A72 ; Extend
1A73..1A7C ; Extend
1A7F ; Extend
1B00..1B03 ; Extend
1B04 ; Extend
1B34 ; Extend
@ -229,20 +248,28 @@
1C2C..1C33 ; Extend
1C34..1C35 ; Extend
1C36..1C37 ; Extend
1CD0..1CD2 ; Extend
1CD4..1CE0 ; Extend
1CE1 ; Extend
1CE2..1CE8 ; Extend
1CED ; Extend
1CF2 ; Extend
1DC0..1DE6 ; Extend
1DFE..1DFF ; Extend
1DFD..1DFF ; Extend
200C..200D ; Extend
20D0..20DC ; Extend
20DD..20E0 ; Extend
20E1 ; Extend
20E2..20E4 ; Extend
20E5..20F0 ; Extend
2CEF..2CF1 ; Extend
2DE0..2DFF ; Extend
302A..302F ; Extend
3099..309A ; Extend
A66F ; Extend
A670..A672 ; Extend
A67C..A67D ; Extend
A6F0..A6F1 ; Extend
A802 ; Extend
A806 ; Extend
A80B ; Extend
@ -252,9 +279,18 @@ A827 ; Extend
A880..A881 ; Extend
A8B4..A8C3 ; Extend
A8C4 ; Extend
A8E0..A8F1 ; Extend
A926..A92D ; Extend
A947..A951 ; Extend
A952..A953 ; Extend
A980..A982 ; Extend
A983 ; Extend
A9B3 ; Extend
A9B4..A9B5 ; Extend
A9B6..A9B9 ; Extend
A9BA..A9BB ; Extend
A9BC ; Extend
A9BD..A9C0 ; Extend
AA29..AA2E ; Extend
AA2F..AA30 ; Extend
AA31..AA32 ; Extend
@ -263,6 +299,19 @@ AA35..AA36 ; Extend
AA43 ; Extend
AA4C ; Extend
AA4D ; Extend
AA7B ; Extend
AAB0 ; Extend
AAB2..AAB4 ; Extend
AAB7..AAB8 ; Extend
AABE..AABF ; Extend
AAC1 ; Extend
ABE3..ABE4 ; Extend
ABE5 ; Extend
ABE6..ABE7 ; Extend
ABE8 ; Extend
ABE9..ABEA ; Extend
ABEC ; Extend
ABED ; Extend
FB1E ; Extend
FE00..FE0F ; Extend
FE20..FE26 ; Extend
@ -273,6 +322,12 @@ FF9E..FF9F ; Extend
10A0C..10A0F ; Extend
10A38..10A3A ; Extend
10A3F ; Extend
11080..11081 ; Extend
11082 ; Extend
110B0..110B2 ; Extend
110B3..110B6 ; Extend
110B7..110B8 ; Extend
110B9..110BA ; Extend
1D165..1D166 ; Extend
1D167..1D169 ; Extend
1D16D..1D172 ; Extend
@ -282,7 +337,7 @@ FF9E..FF9F ; Extend
1D242..1D244 ; Extend
E0100..E01EF ; Extend
# Total code points: 1285
# Total code points: 1455
# ================================================
@ -306,11 +361,12 @@ E0100..E01EF ; Extend
206A..206F ; Format
FEFF ; Format
FFF9..FFFB ; Format
110BD ; Format
1D173..1D17A ; Format
E0001 ; Format
E0020..E007F ; Format
# Total code points: 137
# Total code points: 138
# ================================================
@ -598,6 +654,7 @@ E0020..E007F ; Format
051F ; Lower
0521 ; Lower
0523 ; Lower
0525 ; Lower
0561..0587 ; Lower
1D00..1D2B ; Lower
1D2C..1D61 ; Lower
@ -749,8 +806,6 @@ E0020..E007F ; Format
1FE0..1FE7 ; Lower
1FF2..1FF4 ; Lower
1FF6..1FF7 ; Lower
2071 ; Lower
207F ; Lower
2090..2094 ; Lower
210A ; Lower
210E..210F ; Lower
@ -824,6 +879,8 @@ E0020..E007F ; Format
2CDF ; Lower
2CE1 ; Lower
2CE3..2CE4 ; Lower
2CEC ; Lower
2CEE ; Lower
2D00..2D25 ; Lower
A641 ; Lower
A643 ; Lower
@ -940,7 +997,7 @@ FF41..FF5A ; Lower
1D7C4..1D7C9 ; Lower
1D7CB ; Lower
# Total code points: 1906
# Total code points: 1907
# ================================================
@ -1208,6 +1265,7 @@ FF41..FF5A ; Lower
051E ; Upper
0520 ; Upper
0522 ; Upper
0524 ; Upper
0531..0556 ; Upper
10A0..10C5 ; Upper
1E00 ; Upper
@ -1374,10 +1432,10 @@ FF41..FF5A ; Lower
2C67 ; Upper
2C69 ; Upper
2C6B ; Upper
2C6D..2C6F ; Upper
2C6D..2C70 ; Upper
2C72 ; Upper
2C75 ; Upper
2C80 ; Upper
2C7E..2C80 ; Upper
2C82 ; Upper
2C84 ; Upper
2C86 ; Upper
@ -1427,6 +1485,8 @@ FF41..FF5A ; Lower
2CDE ; Upper
2CE0 ; Upper
2CE2 ; Upper
2CEB ; Upper
2CED ; Upper
A640 ; Upper
A642 ; Upper
A644 ; Upper
@ -1541,7 +1601,7 @@ FF21..FF3A ; Upper
1D790..1D7A8 ; Upper
1D7CA ; Upper
# Total code points: 1494
# Total code points: 1500
# ================================================
@ -1574,13 +1634,17 @@ FF21..FF3A ; Upper
07CA..07EA ; OLetter
07F4..07F5 ; OLetter
07FA ; OLetter
0800..0815 ; OLetter
081A ; OLetter
0824 ; OLetter
0828 ; OLetter
0904..0939 ; OLetter
093D ; OLetter
0950 ; OLetter
0958..0961 ; OLetter
0971 ; OLetter
0972 ; OLetter
097B..097F ; OLetter
0979..097F ; OLetter
0985..098C ; OLetter
098F..0990 ; OLetter
0993..09A8 ; OLetter
@ -1696,10 +1760,7 @@ FF21..FF3A ; Upper
108E ; OLetter
10D0..10FA ; OLetter
10FC ; OLetter
1100..1159 ; OLetter
115F..11A2 ; OLetter
11A8..11F9 ; OLetter
1200..1248 ; OLetter
1100..1248 ; OLetter
124A..124D ; OLetter
1250..1256 ; OLetter
1258 ; OLetter
@ -1718,7 +1779,7 @@ FF21..FF3A ; Upper
1380..138F ; OLetter
13A0..13F4 ; OLetter
1401..166C ; OLetter
166F..1676 ; OLetter
166F..167F ; OLetter
1681..169A ; OLetter
16A0..16EA ; OLetter
16EE..16F0 ; OLetter
@ -1736,12 +1797,15 @@ FF21..FF3A ; Upper
1844..1877 ; OLetter
1880..18A8 ; OLetter
18AA ; OLetter
18B0..18F5 ; OLetter
1900..191C ; OLetter
1950..196D ; OLetter
1970..1974 ; OLetter
1980..19A9 ; OLetter
1980..19AB ; OLetter
19C1..19C7 ; OLetter
1A00..1A16 ; OLetter
1A20..1A54 ; OLetter
1AA7 ; OLetter
1B05..1B33 ; OLetter
1B45..1B4B ; OLetter
1B83..1BA0 ; OLetter
@ -1750,6 +1814,10 @@ FF21..FF3A ; Upper
1C4D..1C4F ; OLetter
1C5A..1C77 ; OLetter
1C78..1C7D ; OLetter
1CE9..1CEC ; OLetter
1CEE..1CF1 ; OLetter
2071 ; OLetter
207F ; OLetter
2135..2138 ; OLetter
2180..2182 ; OLetter
2185..2188 ; OLetter
@ -1784,16 +1852,20 @@ FF21..FF3A ; Upper
31A0..31B7 ; OLetter
31F0..31FF ; OLetter
3400..4DB5 ; OLetter
4E00..9FC3 ; OLetter
4E00..9FCB ; OLetter
A000..A014 ; OLetter
A015 ; OLetter
A016..A48C ; OLetter
A4D0..A4F7 ; OLetter
A4F8..A4FD ; OLetter
A500..A60B ; OLetter
A60C ; OLetter
A610..A61F ; OLetter
A62A..A62B ; OLetter
A66E ; OLetter
A67F ; OLetter
A6A0..A6E5 ; OLetter
A6E6..A6EF ; OLetter
A717..A71F ; OLetter
A788 ; OLetter
A7FB..A801 ; OLetter
@ -1802,14 +1874,34 @@ A807..A80A ; OLetter
A80C..A822 ; OLetter
A840..A873 ; OLetter
A882..A8B3 ; OLetter
A8F2..A8F7 ; OLetter
A8FB ; OLetter
A90A..A925 ; OLetter
A930..A946 ; OLetter
A960..A97C ; OLetter
A984..A9B2 ; OLetter
A9CF ; OLetter
AA00..AA28 ; OLetter
AA40..AA42 ; OLetter
AA44..AA4B ; OLetter
AA60..AA6F ; OLetter
AA70 ; OLetter
AA71..AA76 ; OLetter
AA7A ; OLetter
AA80..AAAF ; OLetter
AAB1 ; OLetter
AAB5..AAB6 ; OLetter
AAB9..AABD ; OLetter
AAC0 ; OLetter
AAC2 ; OLetter
AADB..AADC ; OLetter
AADD ; OLetter
ABC0..ABE2 ; OLetter
AC00..D7A3 ; OLetter
D7B0..D7C6 ; OLetter
D7CB..D7FB ; OLetter
F900..FA2D ; OLetter
FA30..FA6A ; OLetter
FA30..FA6D ; OLetter
FA70..FAD9 ; OLetter
FB1D ; OLetter
FB1F..FB28 ; OLetter
@ -1858,19 +1950,27 @@ FFDA..FFDC ; OLetter
1080A..10835 ; OLetter
10837..10838 ; OLetter
1083C ; OLetter
1083F ; OLetter
1083F..10855 ; OLetter
10900..10915 ; OLetter
10920..10939 ; OLetter
10A00 ; OLetter
10A10..10A13 ; OLetter
10A15..10A17 ; OLetter
10A19..10A33 ; OLetter
10A60..10A7C ; OLetter
10B00..10B35 ; OLetter
10B40..10B55 ; OLetter
10B60..10B72 ; OLetter
10C00..10C48 ; OLetter
11083..110AF ; OLetter
12000..1236E ; OLetter
12400..12462 ; OLetter
13000..1342E ; OLetter
20000..2A6D6 ; OLetter
2A700..2B734 ; OLetter
2F800..2FA1D ; OLetter
# Total code points: 90320
# Total code points: 96405
# ================================================
@ -1896,7 +1996,9 @@ FFDA..FFDC ; OLetter
17E0..17E9 ; Numeric
1810..1819 ; Numeric
1946..194F ; Numeric
19D0..19D9 ; Numeric
19D0..19DA ; Numeric
1A80..1A89 ; Numeric
1A90..1A99 ; Numeric
1B50..1B59 ; Numeric
1BB0..1BB9 ; Numeric
1C40..1C49 ; Numeric
@ -1904,11 +2006,13 @@ FFDA..FFDC ; OLetter
A620..A629 ; Numeric
A8D0..A8D9 ; Numeric
A900..A909 ; Numeric
A9D0..A9D9 ; Numeric
AA50..AA59 ; Numeric
ABF0..ABF9 ; Numeric
104A0..104A9 ; Numeric
1D7CE..1D7FF ; Numeric
# Total code points: 362
# Total code points: 403
# ================================================
@ -1946,17 +2050,23 @@ FF0E ; ATerm
2047..2049 ; STerm
2E2E ; STerm
3002 ; STerm
A4FF ; STerm
A60E..A60F ; STerm
A6F3 ; STerm
A6F7 ; STerm
A876..A877 ; STerm
A8CE..A8CF ; STerm
A92F ; STerm
A9C8..A9C9 ; STerm
AA5D..AA5F ; STerm
ABEB ; STerm
FE56..FE57 ; STerm
FF01 ; STerm
FF1F ; STerm
FF61 ; STerm
110BE..110C1 ; STerm
# Total code points: 53
# Total code points: 63
# ================================================

View file

@ -1,10 +1,10 @@
# SpecialCasing-5.1.0.txt
# Date: 2008-03-03, 21:58:10 GMT [MD]
# SpecialCasing-5.2.0.txt
# Date: 2009-09-22, 23:25:59 GMT [MD]
#
# Unicode Character Database
# Copyright (c) 1991-2008 Unicode, Inc.
# Copyright (c) 1991-2009 Unicode, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html
# For documentation, see UCD.html
# For documentation, see http://www.unicode.org/reports/tr44/
#
# Special Casing Properties
#
@ -106,11 +106,11 @@ FB17; FB17; 0544 056D; 0544 053D; # ARMENIAN SMALL LIGATURE MEN XEH
1FE7; 1FE7; 03A5 0308 0342; 03A5 0308 0342; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND PERISPOMENI
1FF6; 1FF6; 03A9 0342; 03A9 0342; # GREEK SMALL LETTER OMEGA WITH PERISPOMENI
# IMPORTANT-when capitalizing iota-subscript (0345)
# It MUST be in normalized form--moved to the end of any sequence of combining marks.
# This is because logically it represents a following base character!
# E.g. <iota_subscript> (<Mn> | <Mc> | <Me>)+ => (<Mn> | <Mc> | <Me>)+ <iota_subscript>
# It should never be the first character in a word, so in titlecasing it can be left as is.
# IMPORTANT-when iota-subscript (0345) is uppercased or titlecased,
# the result will be incorrect unless the iota-subscript is moved to the end
# of any sequence of combining marks. Otherwise, the accents will go on the capital iota.
# This process can be achieved by first transforming the text to NFC before casing.
# E.g. <alpha><iota_subscript><acute> is uppercased to <ALPHA><acute><IOTA>
# The following cases are already in the UnicodeData file, so are only commented here.

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -1,10 +1,10 @@
# WordBreakProperty-5.1.0.txt
# Date: 2008-03-20, 17:55:36 GMT [MD]
# WordBreakProperty-5.2.0.txt
# Date: 2009-07-12, 04:17:35 GMT [MD]
#
# Unicode Character Database
# Copyright (c) 1991-2008 Unicode, Inc.
# Copyright (c) 1991-2009 Unicode, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html
# For documentation, see UCD.html
# For documentation, see http://www.unicode.org/reports/tr44/
# ================================================
@ -58,14 +58,19 @@
0730..074A ; Extend
07A6..07B0 ; Extend
07EB..07F3 ; Extend
0901..0902 ; Extend
0816..0819 ; Extend
081B..0823 ; Extend
0825..0827 ; Extend
0829..082D ; Extend
0900..0902 ; Extend
0903 ; Extend
093C ; Extend
093E..0940 ; Extend
0941..0948 ; Extend
0949..094C ; Extend
094D ; Extend
0951..0954 ; Extend
094E ; Extend
0951..0955 ; Extend
0962..0963 ; Extend
0981 ; Extend
0982..0983 ; Extend
@ -190,6 +195,8 @@
1087..108C ; Extend
108D ; Extend
108F ; Extend
109A..109C ; Extend
109D ; Extend
135F ; Extend
1712..1714 ; Extend
1732..1734 ; Extend
@ -216,6 +223,18 @@
19C8..19C9 ; Extend
1A17..1A18 ; Extend
1A19..1A1B ; Extend
1A55 ; Extend
1A56 ; Extend
1A57 ; Extend
1A58..1A5E ; Extend
1A60 ; Extend
1A61 ; Extend
1A62 ; Extend
1A63..1A64 ; Extend
1A65..1A6C ; Extend
1A6D..1A72 ; Extend
1A73..1A7C ; Extend
1A7F ; Extend
1B00..1B03 ; Extend
1B04 ; Extend
1B34 ; Extend
@ -238,20 +257,28 @@
1C2C..1C33 ; Extend
1C34..1C35 ; Extend
1C36..1C37 ; Extend
1CD0..1CD2 ; Extend
1CD4..1CE0 ; Extend
1CE1 ; Extend
1CE2..1CE8 ; Extend
1CED ; Extend
1CF2 ; Extend
1DC0..1DE6 ; Extend
1DFE..1DFF ; Extend
1DFD..1DFF ; Extend
200C..200D ; Extend
20D0..20DC ; Extend
20DD..20E0 ; Extend
20E1 ; Extend
20E2..20E4 ; Extend
20E5..20F0 ; Extend
2CEF..2CF1 ; Extend
2DE0..2DFF ; Extend
302A..302F ; Extend
3099..309A ; Extend
A66F ; Extend
A670..A672 ; Extend
A67C..A67D ; Extend
A6F0..A6F1 ; Extend
A802 ; Extend
A806 ; Extend
A80B ; Extend
@ -261,9 +288,18 @@ A827 ; Extend
A880..A881 ; Extend
A8B4..A8C3 ; Extend
A8C4 ; Extend
A8E0..A8F1 ; Extend
A926..A92D ; Extend
A947..A951 ; Extend
A952..A953 ; Extend
A980..A982 ; Extend
A983 ; Extend
A9B3 ; Extend
A9B4..A9B5 ; Extend
A9B6..A9B9 ; Extend
A9BA..A9BB ; Extend
A9BC ; Extend
A9BD..A9C0 ; Extend
AA29..AA2E ; Extend
AA2F..AA30 ; Extend
AA31..AA32 ; Extend
@ -272,6 +308,19 @@ AA35..AA36 ; Extend
AA43 ; Extend
AA4C ; Extend
AA4D ; Extend
AA7B ; Extend
AAB0 ; Extend
AAB2..AAB4 ; Extend
AAB7..AAB8 ; Extend
AABE..AABF ; Extend
AAC1 ; Extend
ABE3..ABE4 ; Extend
ABE5 ; Extend
ABE6..ABE7 ; Extend
ABE8 ; Extend
ABE9..ABEA ; Extend
ABEC ; Extend
ABED ; Extend
FB1E ; Extend
FE00..FE0F ; Extend
FE20..FE26 ; Extend
@ -282,6 +331,12 @@ FF9E..FF9F ; Extend
10A0C..10A0F ; Extend
10A38..10A3A ; Extend
10A3F ; Extend
11080..11081 ; Extend
11082 ; Extend
110B0..110B2 ; Extend
110B3..110B6 ; Extend
110B7..110B8 ; Extend
110B9..110BA ; Extend
1D165..1D166 ; Extend
1D167..1D169 ; Extend
1D16D..1D172 ; Extend
@ -291,7 +346,7 @@ FF9E..FF9F ; Extend
1D242..1D244 ; Extend
E0100..E01EF ; Extend
# Total code points: 1285
# Total code points: 1455
# ================================================
@ -300,13 +355,13 @@ E0100..E01EF ; Extend
06DD ; Format
070F ; Format
17B4..17B5 ; Format
200B ; Format
200E..200F ; Format
202A..202E ; Format
2060..2064 ; Format
206A..206F ; Format
FEFF ; Format
FFF9..FFFB ; Format
110BD ; Format
1D173..1D17A ; Format
E0001 ; Format
E0020..E007F ; Format
@ -362,7 +417,7 @@ FF71..FF9D ; Katakana
038E..03A1 ; ALetter
03A3..03F5 ; ALetter
03F7..0481 ; ALetter
048A..0523 ; ALetter
048A..0525 ; ALetter
0531..0556 ; ALetter
0559 ; ALetter
0561..0587 ; ALetter
@ -386,13 +441,17 @@ FF71..FF9D ; Katakana
07CA..07EA ; ALetter
07F4..07F5 ; ALetter
07FA ; ALetter
0800..0815 ; ALetter
081A ; ALetter
0824 ; ALetter
0828 ; ALetter
0904..0939 ; ALetter
093D ; ALetter
0950 ; ALetter
0958..0961 ; ALetter
0971 ; ALetter
0972 ; ALetter
097B..097F ; ALetter
0979..097F ; ALetter
0985..098C ; ALetter
098F..0990 ; ALetter
0993..09A8 ; ALetter
@ -479,10 +538,7 @@ FF71..FF9D ; Katakana
10A0..10C5 ; ALetter
10D0..10FA ; ALetter
10FC ; ALetter
1100..1159 ; ALetter
115F..11A2 ; ALetter
11A8..11F9 ; ALetter
1200..1248 ; ALetter
1100..1248 ; ALetter
124A..124D ; ALetter
1250..1256 ; ALetter
1258 ; ALetter
@ -501,7 +557,7 @@ FF71..FF9D ; Katakana
1380..138F ; ALetter
13A0..13F4 ; ALetter
1401..166C ; ALetter
166F..1676 ; ALetter
166F..167F ; ALetter
1681..169A ; ALetter
16A0..16EA ; ALetter
16EE..16F0 ; ALetter
@ -516,6 +572,7 @@ FF71..FF9D ; Katakana
1844..1877 ; ALetter
1880..18A8 ; ALetter
18AA ; ALetter
18B0..18F5 ; ALetter
1900..191C ; ALetter
1A00..1A16 ; ALetter
1B05..1B33 ; ALetter
@ -526,6 +583,8 @@ FF71..FF9D ; Katakana
1C4D..1C4F ; ALetter
1C5A..1C77 ; ALetter
1C78..1C7D ; ALetter
1CE9..1CEC ; ALetter
1CEE..1CF1 ; ALetter
1D00..1D2B ; ALetter
1D2C..1D61 ; ALetter
1D62..1D77 ; ALetter
@ -575,10 +634,10 @@ FF71..FF9D ; Katakana
24B6..24E9 ; ALetter
2C00..2C2E ; ALetter
2C30..2C5E ; ALetter
2C60..2C6F ; ALetter
2C71..2C7C ; ALetter
2C60..2C7C ; ALetter
2C7D ; ALetter
2C80..2CE4 ; ALetter
2C7E..2CE4 ; ALetter
2CEB..2CEE ; ALetter
2D00..2D25 ; ALetter
2D30..2D65 ; ALetter
2D6F ; ALetter
@ -601,6 +660,8 @@ FF71..FF9D ; Katakana
A000..A014 ; ALetter
A015 ; ALetter
A016..A48C ; ALetter
A4D0..A4F7 ; ALetter
A4F8..A4FD ; ALetter
A500..A60B ; ALetter
A60C ; ALetter
A610..A61F ; ALetter
@ -610,6 +671,8 @@ A662..A66D ; ALetter
A66E ; ALetter
A67F ; ALetter
A680..A697 ; ALetter
A6A0..A6E5 ; ALetter
A6E6..A6EF ; ALetter
A717..A71F ; ALetter
A722..A76F ; ALetter
A770 ; ALetter
@ -622,12 +685,20 @@ A807..A80A ; ALetter
A80C..A822 ; ALetter
A840..A873 ; ALetter
A882..A8B3 ; ALetter
A8F2..A8F7 ; ALetter
A8FB ; ALetter
A90A..A925 ; ALetter
A930..A946 ; ALetter
A960..A97C ; ALetter
A984..A9B2 ; ALetter
A9CF ; ALetter
AA00..AA28 ; ALetter
AA40..AA42 ; ALetter
AA44..AA4B ; ALetter
ABC0..ABE2 ; ALetter
AC00..D7A3 ; ALetter
D7B0..D7C6 ; ALetter
D7CB..D7FB ; ALetter
FB00..FB06 ; ALetter
FB13..FB17 ; ALetter
FB1D ; ALetter
@ -677,15 +748,22 @@ FFDA..FFDC ; ALetter
1080A..10835 ; ALetter
10837..10838 ; ALetter
1083C ; ALetter
1083F ; ALetter
1083F..10855 ; ALetter
10900..10915 ; ALetter
10920..10939 ; ALetter
10A00 ; ALetter
10A10..10A13 ; ALetter
10A15..10A17 ; ALetter
10A19..10A33 ; ALetter
10A60..10A7C ; ALetter
10B00..10B35 ; ALetter
10B40..10B55 ; ALetter
10B60..10B72 ; ALetter
10C00..10C48 ; ALetter
11083..110AF ; ALetter
12000..1236E ; ALetter
12400..12462 ; ALetter
13000..1342E ; ALetter
1D400..1D454 ; ALetter
1D456..1D49C ; ALetter
1D49E..1D49F ; ALetter
@ -717,7 +795,7 @@ FFDA..FFDC ; ALetter
1D7AA..1D7C2 ; ALetter
1D7C4..1D7CB ; ALetter
# Total code points: 21903
# Total code points: 23694
# ================================================
@ -788,7 +866,9 @@ FF0E ; MidNumLet
17E0..17E9 ; Numeric
1810..1819 ; Numeric
1946..194F ; Numeric
19D0..19D9 ; Numeric
19D0..19DA ; Numeric
1A80..1A89 ; Numeric
1A90..1A99 ; Numeric
1B50..1B59 ; Numeric
1BB0..1BB9 ; Numeric
1C40..1C49 ; Numeric
@ -796,11 +876,13 @@ FF0E ; MidNumLet
A620..A629 ; Numeric
A8D0..A8D9 ; Numeric
A900..A909 ; Numeric
A9D0..A9D9 ; Numeric
AA50..AA59 ; Numeric
ABF0..ABF9 ; Numeric
104A0..104A9 ; Numeric
1D7CE..1D7FF ; Numeric
# Total code points: 361
# Total code points: 402
# ================================================

View file

@ -1,4 +1,4 @@
* Copyright (C) 2004-2008, International Business Machines
* Copyright (C) 2004-2009, International Business Machines
* Corporation and others. All Rights Reserved.
*
* file name: changes.txt
@ -13,6 +13,250 @@
---------------------------------------------------------------------------- ***
Unicode 5.2 update
*** related ICU Trac tickets
7084 Unicode 5.2
7167 verify collation bytes
7235 Java test NAME_ALIAS
7236 Java DerivedCoreProperties.txt test
7237 Java BidiTest.txt
7238 UTrie2 in core unidata
7239 test for tailoring gaps
7240 Java fix CollationMiscTest
7243 update layout engine for Unicode 5.2
*** Unicode version numbers
- makedata.mak
- uchar.h
- configure.in & configure
- update ucdVersion in gennames.c if an algorithmic range changes
*** data files & enums & parser code
* file preparation
python source\tools\genprops\misc\ucdcopy.py "C:\Documents and Settings\mscherer\My Documents\unicode\ucd\5.2.0" C:\svn\icuproj\icu\trunk\source\data\unidata
- includes finding files regardless of version numbers,
copying them, and performing the equivalent processing of the
ucdstrip and ucdmerge tools on the desired set of files
* notes on changes
- PropertyAliases.txt
moved from numeric to enumerated:
ccc ; Canonical_Combining_Class
new string properties:
NFKC_CF ; NFKC_Casefold
Name_Alias; Name_Alias
new binary properties:
Cased ; Cased
CI ; Case_Ignorable
CWCF ; Changes_When_Casefolded
CWCM ; Changes_When_Casemapped
CWKCF ; Changes_When_NFKC_Casefolded
CWL ; Changes_When_Lowercased
CWT ; Changes_When_Titlecased
CWU ; Changes_When_Uppercased
new CJK Unihan properties (not supported by ICU)
- PropertyValueAliases.txt
new block names
new scripts
one script code change:
sc ; Qaai ; Inherited
->
sc ; Zinh ; Inherited ; Qaai
new Line_Break (lb) value:
lb ; CP ; Close_Parenthesis
new Joining_Group (jg) values: Farsi_Yeh, Nya
other new values:
ccc; 214; ATA ; Attached_Above
- DerivedBidiClass.txt
new default-R range: U+1E800 - U+1EFFF
- UnicodeData.txt
all of the ISO comments are gone
new CJK block end:
9FC3;<CJK Ideograph, Last> -> 9FCB;<CJK Ideograph, Last>
new CJK block:
2A700;<CJK Ideograph Extension C, First>;Lo;0;L;;;;;N;;;;;
2B734;<CJK Ideograph Extension C, Last>;Lo;0;L;;;;;N;;;;;
* genpname
- run preparse.pl
+ cd \svn\icuproj\icu\trunk\source\tools\genpname
+ make sure that data.h is writable
+ perl preparse.pl \svn\icuproj\icu\trunk > out.txt
+ preparse.pl complains with errors like the following:
Error: sc:Egyp already set to Egyptian_Hieroglyphs, cannot set to Egyp at preparse.pl line 1322, <GEN6> line 34.
This is because ICU 4.0 had scripts from ISO 15924 which are now
added to Unicode 5.2, and the Perl script shows a conflict between SyntheticPropertyValueAliases.txt
and PropertyValueAliases.txt.
-> Removed duplicate script entries from SyntheticPropertyValueAliases.txt:
Egyp, Java, Lana, Mtei, Orkh, Armi, Avst, Kthi, Phli, Prti, Samr, Tavt
+ preparse.pl complains with errors about block names missing from uchar.h; add them
* uchar.h & uscript.h & uprops.h & uprops.c & genprops
- new block & script values
+ 26 new blocks
copy new blocks from Blocks.txt
MS VC++ 2008 regular expression:
find "^{[0-9A-F]+}\.\.{[0-9A-F]+}; {[A-Z].+}$"
replace with " UBLOCK_\3 = 172, /*[\1]*/"
+ several new script values already added in ICU 4.0 for ISO 15924 coverage
(removed from SyntheticPropertyValueAliases.txt, see genpname notes above)
+ 3 new script values added for ISO 15924 and Unicode 5.2 coverage
+ 1 new script value added for ISO 15924 coverage (not in Unicode 5.2)
(added to SyntheticPropertyValueAliases.txt)
- new Joining Group (JG) values: Farsi_Yeh, Nya
- new Line_Break (lb) value:
lb ; CP ; Close_Parenthesis
* hardcoded Unihan range end/limit
- Unihan range end moves from 9FC3 to 9FCB
search for both 9FC3 (end) and 9FC4 (limit) (regex 9FC[34], case-insensitive)
+ do change gennames.c
* Compare definitions of new binary properties with what we used to use
in algorithms, to see if the definitions changed.
- Verified that definitions for Cased and Case_Ignorable are unchanged.
The gencase tool now parses the newly public Case_Ignorable values
in case the definition changes in the future.
* uchar.c & uprops.h & uprops.c & genprops
- new numeric values that didn't exist in Unicode data before:
1/7, 1/9, 1/10, 3/10, 1/16, 3/16
the ones with denominators >9 cannot be supported by uprops.icu formatVersion 5,
therefore redesign the encoding of numeric types and values for formatVersion 6;
design for simple numbers up to at least 144 ("one gross"),
large values up to at least 10^20,
and fractions with numerators -1..17 and denominators 1..16
to cover current and expected future values
(e.g., more Han numeric values, Meroitic twelfths)
* reimplement Hangul_Syllable_Type for new Jamo characters
- the old code assumed that all Jamo characters are in the 11xx block
- Unicode 5.2 fills holes there and adds new Jamo characters in
A960..A97F; Hangul Jamo Extended-A
and in
D7B0..D7FF; Hangul Jamo Extended-B
- Hangul_Syllable_Type can be trivially derived from a subset of
Grapheme_Cluster_Break values
* build Unicode data source code for hardcoding core data
C:\svn\icuproj\icu\trunk\source\data>NMAKE /f makedata.mak ICUMAKE=\svn\icuproj\icu\trunk\source\data\ CFG=x86\release uni-core-data
ICU data make path is \svn\icuproj\icu\trunk\source\data\
ICU root path is \svn\icuproj\icu\trunk
Information: cannot find "ucmlocal.mk". Not building user-additional converter files.
Information: cannot find "brklocal.mk". Not building user-additional break iterator files.
Information: cannot find "reslocal.mk". Not building user-additional resource bundle files.
Information: cannot find "collocal.mk". Not building user-additional resource bundle files.
Information: cannot find "rbnflocal.mk". Not building user-additional resource bundle files.
Information: cannot find "trnslocal.mk". Not building user-additional transliterator files.
Information: cannot find "misclocal.mk". Not building user-additional miscellaenous files.
Information: cannot find "spreplocal.mk". Not building user-additional stringprep files.
Creating data file for Unicode Property Names
Creating data file for Unicode Character Properties
Creating data file for Unicode Case Mapping Properties
Creating data file for Unicode BiDi/Shaping Properties
Creating data file for Unicode Normalization
Unicode .icu files built to "\svn\icuproj\icu\trunk\source\data\out\build\icudt43l"
Unicode .c source files built to "\svn\icuproj\icu\trunk\source\data\out\tmp"
- copy the .c source files to C:\svn\icuproj\icu\trunk\source\common
and rebuild the common library
*** UCA
- update FractionalUCA.txt with new canonical closure (output from Mark's Unicode tools)
- update source/data/unidata/UCARules.txt with UCA_Rules_SHORT.txt from Mark's Unicode tools
- update source/test/testdata/CollationTest_*.txt with output from Mark's Unicode tools
[ Begin obsolete instructions:
Starting with UCA 5.2, we use the CollationTest_*_SHORT.txt files not the *_STUB.txt files.
- generate the source/test/testdata/CollationTest_*_STUB.txt files via source/tools/genuca/genteststub.py
on Windows:
python C:\svn\icuproj\icu\trunk\source\tools\genuca\genteststub.py CollationTest_NON_IGNORABLE_SHORT.txt CollationTest_NON_IGNORABLE_STUB.txt
python C:\svn\icuproj\icu\trunk\source\tools\genuca\genteststub.py CollationTest_SHIFTED_SHORT.txt CollationTest_SHIFTED_STUB.txt
End obsolete instructions]
- run all tests with the *_SHORT.txt or the full files (the full ones have comments)
not just the *_STUB.txt files
- note on intltest: if collate/UCAConformanceTest fails, then
utility/MultithreadTest/TestCollators will fail as well;
fix the conformance test before looking into the multi-thread test
*** Implement Cased & Case_Ignorable properties
- via UProperty; call ucase.h functions ucase_getType() and ucase_getTypeOrIgnorable()
- Problem: These properties should be disjoint, but aren't
- UTC 2009nov decision: skip all Case_Ignorable regardless of whether they are Cased or not
- change ucase.icu to be able to store any combination of Cased and Case_Ignorable
*** Implement Changes_When_Xyz properties
- without stored data
*** Implement Name_Alias property
- add it as another name field in unames.icu
- make it available via u_charName() and UCharNameChoice and
- consider it in u_charFromName()
*** Break iterators
* Update break iterator rules to new UAX versions and new property values
* Update source/test/testdata/<boundary>Test.txt files from <unicode.org ucd>/ucd/auxiliary
*** new BidiTest file
- review format and data
- copy BidiTest.txt to source/test/testdata
- write test code using this data
- fix ICU code where it fails the conformance test
*** Java
- generally, find and update code corresponding to C/C++
- UCharacter.UnicodeBlock constants:
a) add an _ID integer per new block, update COUNT
b) add a class instance per new block
Visual Studio regex:
find UBLOCK_{[^ ]+} = [0-9]+, {/.+}
replace with public static final UnicodeBlock \1 = new UnicodeBlock("\1", \1_ID); \2
- CHAR_NAME_ALIAS -> UCharacter.getNameAlias() and getCharFromNameAlias()
- port test changes to Java
>> DONE with the above, TODO the following <<
*** LayoutEngine script information
* Run ICU4J com.ibm.icu.dev.tool.layout.ScriptNameBuilder. This generates LEScripts.h, LELanguage.h,
ScriptAndLanguageTags.h and ScriptAndLanguageTags.cpp in the working directory. (it also generates
ScriptRunData.cpp, which is no longer needed.)
The generated files have a current copyright date and "@draft" statement.
-> Eric Mader wrote in email on 20090930:
"I think the tool has been modified to update @draft to @stable for
older scripts and to add @draft for new scripts.
(I worked with an intern on this last year.)
You should check the output after you run it."
* copy the above files into <icu>/source/layout, replacing the old files.
Add new default entries to the indicClassTables array in <icu>/source/layout/IndicClassTables.cpp
and the complexTable array in <icu>/source/layoutex/ParagraphLayout.cpp. (This step should be automated...)
-> Eric Mader wrote in email on 20090930:
"This is just a matter of making sure that all the per-script tables have
entries for any new scripts that were added.
If any new Indic characters were added, then the class tables in
IndicClassTables.cpp should be updated to reflect this.
John Emmons should know how to do this if it's required."
* rebuild the layout and layoutex libraries.
*** Documentation
- Update User Guide
+ Jamo_Short_Name, sfc->scf, binary property value aliases
---------------------------------------------------------------------------- ***
Unicode 5.1 update
*** related ICU Trac tickets

View file

@ -284,20 +284,17 @@ void TestProperty()
int32_t tempLength;
UErrorCode status = U_ZERO_ERROR;
/*
All the collations have the same version in an ICU
version.
ICU 2.0 currVersionArray = {0x18, 0xC0, 0x02, 0x02};
ICU 2.1 currVersionArray = {0x19, 0x00, 0x03, 0x03};
ICU 2.2 currVersionArray = {0x21, 0x40, 0x04, 0x04};
ICU 2.4 currVersionArray = {0x21, 0x40, 0x04, 0x04};
ICU 2.6 currVersionArray = {0x21, 0x40, 0x03, 0x03};
ICU 2.8 currVersionArray = {0x29, 0x80, 0x00, 0x04};
ICU 3.4 currVersionArray = {0x31, 0xC0, 0x00, 0x04};
*/
UVersionInfo currVersionArray = {0x31, 0xC0, 0x00, 0x29};
/* ICU 3.4 had UCA 4.1 */
/*UVersionInfo currUCAVersionArray = {4, 1, 0, 0};*/
UVersionInfo currUCAVersionArray = {5, 1, 0, 0};
* Expected version of the English collator.
* Currently, the major/minor version numbers change when the builder code
* changes,
* number 2 is from the tailoring data version and
* number 3 is the UCA version.
* This changes with every UCA version change, and the expected value
* needs to be adjusted.
* Same in intltest/apicoll.cpp.
*/
UVersionInfo currVersionArray = {0x31, 0xC0, 0x00, 0x2A};
UVersionInfo currUCAVersionArray = {5, 2, 0, 0};
UVersionInfo versionArray = {0, 0, 0, 0};
UVersionInfo versionUCAArray = {0, 0, 0, 0};
@ -886,7 +883,7 @@ void TestOpenVsOpenRules(){
int32_t sortKeyLen1, sortKeyLen2;
uint8_t *sortKey1 = NULL, *sortKey2 = NULL;
ULocaleData *uld;
uint32_t x, y, z;
int32_t x, y, z;
USet *eSet;
int32_t eSize;
int strSize;
@ -998,36 +995,6 @@ void TestOpenVsOpenRules(){
void TestSortKey()
{
uint8_t *sortk1 = NULL, *sortk2 = NULL, *sortk3 = NULL, *sortkEmpty = NULL;
uint8_t sortk2_compat[] = {
/* 3.9 key, from UCA 5.1 */
0x2c, 0x2e, 0x30, 0x32, 0x2c, 0x01,
0x09, 0x01, 0x09, 0x01, 0x2b, 0x01,
0x92, 0x93, 0x94, 0x95, 0x92, 0x0
/* 3.6 key, from UCA 5.0 */
/*
0x29, 0x2b, 0x2d, 0x2f, 0x29, 0x01,
0x09, 0x01, 0x09, 0x01, 0x28, 0x01,
0x92, 0x93, 0x94, 0x95, 0x92, 0x00
*/
/* 3.4 key, from UCA 4.1 */
/* 0x28, 0x2a, 0x2c, 0x2e, 0x28, 0x01, 0x09, 0x01, 0x09, 0x01, 0x27, 0x01, 0x92, 0x93, 0x94, 0x95, 0x92, 0x00 */
/* 2.6.1 key */
/* 0x26, 0x28, 0x2A, 0x2C, 0x26, 0x01, 0x09, 0x01, 0x09, 0x01, 0x25, 0x01, 0x92, 0x93, 0x94, 0x95, 0x92, 0x00 */
/* 2.2 key */
/*0x1D, 0x1F, 0x21, 0x23, 0x1D, 0x01, 0x09, 0x01, 0x09, 0x01, 0x1C, 0x01, 0x92, 0x93, 0x94, 0x95, 0x92, 0x00*/
/* 2.0 key */
/*0x19, 0x1B, 0x1D, 0x1F, 0x19, 0x01, 0x09, 0x01, 0x09, 0x01, 0x18, 0x01, 0x92, 0x93, 0x94, 0x95, 0x92, 0x00*/
/* 1.8.1 key.*/
/*0x19, 0x1B, 0x1D, 0x1F, 0x19, 0x01, 0x0A, 0x01, 0x0A, 0x01, 0x92, 0x93, 0x94, 0x95, 0x92, 0x00*/
/*this is a 1.8 sortkey */
/*0x17, 0x19, 0x1B, 0x1D, 0x17, 0x01, 0x08, 0x01, 0x08, 0x00*/
/*this is a 1.7 sortkey */
/*0x02, 0x54, 0x02, 0x55, 0x02, 0x56, 0x02, 0x57, 0x02, 0x54, 0x01, 0x02, 0x02, 0x02, 0x02, 0x02, 0x01, 0x02, 0x02, 0x02, 0x02, 0x02, 0x00*/
/* this is a 1.6 sortkey */
/*0x00, 0x53, 0x00, 0x54, 0x00, 0x55, 0x00, 0x56, 0x00, 0x53, 0x00, 0x00, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x00*/
};
int32_t sortklen, osortklen;
uint32_t toStringLen=0;
UCollator *col;
@ -1104,9 +1071,7 @@ void TestSortKey()
doAssert( (memcmp(sortk2, sortk1, sortklen) < 0), "Result should be \"abcda\" < \"Abcda\"");
doAssert( (memcmp(sortk2, sortk3, sortklen) == 0), "Result should be \"abcda\" == \"abcda\"");
doAssert( (memcmp(sortk2, sortk2_compat, sortklen) == 0), "Binary format for 'abcda' sortkey different!");
resultP = ucol_sortKeyToString(col, sortk2_compat, toStringBuffer, &toStringLen);
resultP = ucol_sortKeyToString(col, sortk3, toStringBuffer, &toStringLen);
doAssert( (resultP != 0), "sortKeyToString failed!");
#if 1 /* verobse log of sortkeys */
@ -1978,7 +1943,17 @@ static void TestShortString(void)
int32_t expectedOffset;
uint32_t expectedIdentifier;
} testCases[] = {
{"LDE_RDE_KPHONEBOOK_T0041_ZLATN","B2C00_KPHONEBOOK_LDE", "de@collation=phonebook", U_USING_FALLBACK_WARNING, 0, 0 },
/*
* The following expectedOutput contains a collation weight (2D00 from UCA 5.2)
* which is the primary weight for the T character (U+0041) in the input.
* When that character gets a different weight in FractionalUCA.txt,
* the expectedOutput needs to be adjusted.
* That is, when we upgrade to a new UCA version or change collation
* in such a way that the absolute weight for 'A' changes,
* we will get a test failure here and need to adjust the test case.
*/
{"LDE_RDE_KPHONEBOOK_T0041_ZLATN","B2D00_KPHONEBOOK_LDE", "de@collation=phonebook", U_USING_FALLBACK_WARNING, 0, 0 },
{"LEN_RUS_NO_AS_S4","AS_LEN_NO_S4", NULL, U_USING_FALLBACK_WARNING, 0, 0 },
{"LDE_VPHONEBOOK_EO_SI","EO_KPHONEBOOK_LDE_SI", "de@collation=phonebook", U_ZERO_ERROR, 0, 0 },
{"LDE_Kphonebook","KPHONEBOOK_LDE", "de@collation=phonebook", U_ZERO_ERROR, 0, 0 },

View file

@ -494,7 +494,7 @@ char *aescstrdup(const UChar* unichars,int32_t length){
newString = (char*)ctst_malloc ( sizeof(char) * 8 * (length +1));
target = newString;
targetLimit = newString+sizeof(char) * 8 * (length +1);
ucnv_setFromUCallBack(conv, UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_JAVA, &cb, &p, &errorCode);
ucnv_setFromUCallBack(conv, UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_C, &cb, &p, &errorCode);
ucnv_fromUnicode(conv,&target,targetLimit, &unichars, (UChar*)(unichars+length),NULL,TRUE,&errorCode);
ucnv_close(conv);
*target = '\0';

View file

@ -1619,8 +1619,8 @@ static UBool checkCEValidity(const UCollator *coll, const UChar *codepoints,
}
if ((primary != 0 && primary < primarymax)
|| ((primary & 0xFF) == 0xFF) || (((primary>>8) & 0xFF) == 0xFF)
|| ((primary & 0xFF) && ((primary & 0xFF) <= 0x03))
|| (((primary>>8) & 0xFF) && ((primary>>8) & 0xFF) <= 0x03)
|| ((primary & 0xFF) && ((primary & 0xFF) <= 2))
|| (((primary>>8) & 0xFF) && ((primary>>8) & 0xFF) <= 2)
|| (primary >= 0xFE00 && !isContinuation(ce))) {
log_err("UCA primary weight out of bounds: %04X for string starting with %04X\n",
primary, codepoints[0]);
@ -1659,6 +1659,7 @@ static void TestCEValidity()
int count = 0;
int maxCount = 0;
UChar contextCPs[3];
UChar32 c;
UParseError parseError;
if (U_FAILURE(status)) {
log_err_status(status, "en_US collator creation failed -> %s\n", u_errorName(status));
@ -1682,12 +1683,18 @@ static void TestCEValidity()
}
log_verbose("Testing UCA elements for the whole range of unicode characters\n");
codepoints[0] = 0;
while (codepoints[0] < 0xFFFF) {
if (u_isdefined((UChar32)codepoints[0])) {
for (c = 0; c <= 0xffff; ++c) {
if (u_isdefined(c)) {
codepoints[0] = (UChar)c;
checkCEValidity(coll, codepoints, 1, 5, 86);
}
codepoints[0] ++;
}
for (; c <= 0x10ffff; ++c) {
if (u_isdefined(c)) {
int32_t i = 0;
U16_APPEND_UNSAFE(codepoints, i, c);
checkCEValidity(coll, codepoints, i, 5, 86);
}
}
ucol_close(coll);

View file

@ -2324,7 +2324,7 @@ static void TestGetLocale(void) {
const char *req = "es_AR_BUENOSAIRES", *valid, *actual;
obj = ucol_open(req, &ec);
if (U_FAILURE(ec)) {
log_err("ucol_open failed\n");
log_err("ucol_open failed - %s\n", u_errorName(ec));
return;
}
valid = ucol_getLocaleByType(obj, ULOC_VALID_LOCALE, &ec);

View file

@ -3618,9 +3618,16 @@ static void TestRuleOptions(void) {
{ "b", "\\u02d0", "a", "\\u02d1"}, 4
},
/*
* The character in the second ordering test string
* has to match the character that has the [last regular] weight
* which changes with each UCA version.
* See the bottom of FractionalUCA.txt which says something like
* [last regular [CE 27, 05, 05]] # U+1342E EGYPTIAN HIEROGLYPH AA032
*/
{ "&[before 1][last regular]<b"
"&[last regular]<a",
{ "b", "\\uD808\\uDF6E", "a", "\\u4e00" }, 4
{ "b", "\\U0001342E", "a", "\\u4e00" }, 4
},
{ "&[before 1][first implicit]<b"
@ -4720,34 +4727,6 @@ static void TestTailorNULL( void ) {
ucol_close(coll);
}
static void
TestThaiSortKey(void)
{
UChar yamakan = 0x0E4E;
UErrorCode status = U_ZERO_ERROR;
uint8_t key[256];
int32_t keyLen = 0;
/* NOTE: there is a Thai tailoring that moves Yammakan. It should not move it, */
/* since it stays in the same relative position. This should be addressed in CLDR */
/* UCA 4.0 uint8_t expectedKey[256] = { 0x01, 0xd9, 0xb2, 0x01, 0x05, 0x00 }; */
/* UCA 4.1 uint8_t expectedKey[256] = { 0x01, 0xdb, 0x3a, 0x01, 0x05, 0x00 }; */
/* UCA 5.0 uint8_t expectedKey[256] = { 0x01, 0xdc, 0xce, 0x01, 0x05, 0x00 }; */
/* UCA 5.1 moves Yammakan */
uint8_t expectedKey[256] = { 0x01, 0xe0, 0x4e, 0x01, 0x05, 0x00 };
UCollator *coll = ucol_open("th", &status);
if(U_FAILURE(status)) {
log_err_status(status, "Could not open a collator, exiting (%s)\n", u_errorName(status));
return;
}
keyLen = ucol_getSortKey(coll, &yamakan, 1, key, 256);
if(strcmp((char *)key, (char *)expectedKey)) {
log_err("Yammakan key is different from ICU 4.0!\n");
}
ucol_close(coll);
}
static void
TestUpperFirstQuaternary(void)
{
@ -5050,11 +5029,14 @@ TestTailor6179(void)
{ 0xFDD0,0x009E, 0}
};
/* UCA5.1, the value may increase in later version. */
/*
* These values from FractionalUCA.txt will change,
* and need to be updated here.
*/
uint8_t firstPrimaryIgnCE[6]={1, 87, 1, 5, 1, 0};
uint8_t lastPrimaryIgnCE[6]={1, 0xE7, 0xB9, 1, 5, 0};
uint8_t lastPrimaryIgnCE[6]={1, 0xE3, 0xC9, 1, 5, 0};
uint8_t firstSecondaryIgnCE[6]={1, 1, 0x3f, 0x03, 0};
uint8_t lastSecondaryIgnCE[6]={1, 1, 0x05, 0};
uint8_t lastSecondaryIgnCE[6]={1, 1, 0x3f, 0x03, 0};
/* Test [Last Primary ignorable] */
@ -5508,7 +5490,6 @@ void addMiscCollTest(TestNode** root)
TEST(TestBeforeTightening);
/*TEST(TestMoreBefore);*/
TEST(TestTailorNULL);
TEST(TestThaiSortKey);
TEST(TestUpperFirstQuaternary);
TEST(TestJ4960);
TEST(TestJ5223);

View file

@ -304,17 +304,19 @@ void TestUScriptCodeAPI(){
* Whenever this happens, the long script names here need to be updated.
*/
static const char* expectedLong[] = {
"Balinese", "Batk", "Blis", "Brah", "Cham", "Cirt", "Cyrs", "Egyd", "Egyh", "Egyp",
"Geok", "Hans", "Hant", "Hmng", "Hung", "Inds", "Java", "Kayah_Li", "Latf", "Latg",
"Lepcha", "Lina", "Mand", "Maya", "Mero", "Nko", "Orkh", "Perm", "Phags_Pa", "Phoenician",
"Balinese", "Batk", "Blis", "Brah", "Cham", "Cirt", "Cyrs", "Egyd", "Egyh", "Egyptian_Hieroglyphs",
"Geok", "Hans", "Hant", "Hmng", "Hung", "Inds", "Javanese", "Kayah_Li", "Latf", "Latg",
"Lepcha", "Lina", "Mand", "Maya", "Mero", "Nko", "Old_Turkic", "Perm", "Phags_Pa", "Phoenician",
"Plrd", "Roro", "Sara", "Syre", "Syrj", "Syrn", "Teng", "Vai", "Visp", "Cuneiform",
"Zxxx", "Unknown",
"Carian", "Jpan", "Lana", "Lycian", "Lydian", "Ol_Chiki", "Rejang", "Saurashtra", "Sgnw", "Sundanese",
"Moon", "Mtei",
"Carian", "Jpan", "Tai_Tham", "Lycian", "Lydian", "Ol_Chiki", "Rejang", "Saurashtra", "Sgnw", "Sundanese",
"Moon", "Meetei_Mayek",
/* new in ICU 4.0 */
"Armi", "Avst", "Cakm", "Kore",
"Kthi", "Mani", "Phli", "Phlp", "Phlv", "Prti", "Samr", "Tavt",
"Imperial_Aramaic", "Avestan", "Cakm", "Kore",
"Kaithi", "Mani", "Inscriptional_Pahlavi", "Phlp", "Phlv", "Inscriptional_Parthian", "Samaritan", "Tai_Viet",
"Zmth", "Zsym",
/* new in ICU 4.4 */
"Bamum", "Lisu", "Nkgb", "Old_South_Arabian",
};
static const char* expectedShort[] = {
"Bali", "Batk", "Blis", "Brah", "Cham", "Cirt", "Cyrs", "Egyd", "Egyh", "Egyp",
@ -328,8 +330,14 @@ void TestUScriptCodeAPI(){
"Armi", "Avst", "Cakm", "Kore",
"Kthi", "Mani", "Phli", "Phlp", "Phlv", "Prti", "Samr", "Tavt",
"Zmth", "Zsym",
/* new in ICU 4.4 */
"Bamu", "Lisu", "Nkgb", "Sarb",
};
int32_t j = 0;
if(LENGTHOF(expectedLong)!=(USCRIPT_CODE_LIMIT-USCRIPT_BALINESE)) {
log_err("need to add new script codes in cucdapi.c!\n");
return;
}
for(i=USCRIPT_BALINESE; (UScriptCode)i<USCRIPT_CODE_LIMIT; i++, j++){
const char* name = uscript_getName((UScriptCode)i);
if(name==NULL || strcmp(name,expectedLong[j])!=0){

View file

@ -519,7 +519,7 @@ static void TestMisc()
{
static const UChar sampleSpaces[] = {0x0020, 0x00a0, 0x2000, 0x2001, 0x2005};
static const UChar sampleNonSpaces[] = {0x61, 0x62, 0x63, 0x64, 0x74};
static const UChar sampleUndefined[] = {0xfff1, 0xfff7, 0xfa6b };
static const UChar sampleUndefined[] = {0xfff1, 0xfff7, 0xfa6e};
static const UChar sampleDefined[] = {0x523E, 0x4f88, 0xfffd};
static const UChar sampleBase[] = {0x0061, 0x0031, 0x03d2};
static const UChar sampleNonBase[] = {0x002B, 0x0020, 0x203B};
@ -1151,6 +1151,8 @@ enumDefaultsRange(const void *context, UChar32 start, UChar32 limit, UCharCatego
{ 0xFF00, U_RIGHT_TO_LEFT_ARABIC },
{ 0x10800, U_LEFT_TO_RIGHT },
{ 0x11000, U_RIGHT_TO_LEFT },
{ 0x1E800, U_LEFT_TO_RIGHT }, /* new default-R range in Unicode 5.2: U+1E800 - U+1EFFF */
{ 0x1F000, U_RIGHT_TO_LEFT },
{ 0x110000, U_LEFT_TO_RIGHT }
};
@ -1495,10 +1497,19 @@ static int32_t MakeDir(char* str)
static const struct {
uint32_t code;
const char *name, *oldName, *extName;
const char *name, *oldName, *extName, *alias;
} names[]={
{0x0061, "LATIN SMALL LETTER A", "", "LATIN SMALL LETTER A"},
{0x0284, "LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK", "LATIN SMALL LETTER DOTLESS J BAR HOOK", "LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK" },
{0x01a2, "LATIN CAPITAL LETTER OI",
"LATIN CAPITAL LETTER O I",
"LATIN CAPITAL LETTER OI",
"LATIN CAPITAL LETTER GHA"},
{0x0284, "LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK",
"LATIN SMALL LETTER DOTLESS J BAR HOOK",
"LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK" },
{0x0fd0, "TIBETAN MARK BSKA- SHOG GI MGO RGYAN", "",
"TIBETAN MARK BSKA- SHOG GI MGO RGYAN",
"TIBETAN MARK BKA- SHOG GI MGO RGYAN"},
{0x3401, "CJK UNIFIED IDEOGRAPH-3401", "", "CJK UNIFIED IDEOGRAPH-3401" },
{0x7fed, "CJK UNIFIED IDEOGRAPH-7FED", "", "CJK UNIFIED IDEOGRAPH-7FED" },
{0xac00, "HANGUL SYLLABLE GA", "", "HANGUL SYLLABLE GA" },
@ -1508,6 +1519,9 @@ static const struct {
{0xff08, "FULLWIDTH LEFT PARENTHESIS", "FULLWIDTH OPENING PARENTHESIS", "FULLWIDTH LEFT PARENTHESIS" },
{0xffe5, "FULLWIDTH YEN SIGN", "", "FULLWIDTH YEN SIGN" },
{0xffff, "", "", "<noncharacter-FFFF>" },
{0x1d0c5, "BYZANTINE MUSICAL SYMBOL FHTORA SKLIRON CHROMA VASIS", "",
"BYZANTINE MUSICAL SYMBOL FHTORA SKLIRON CHROMA VASIS",
"BYZANTINE MUSICAL SYMBOL FTHORA SKLIRON CHROMA VASIS"},
{0x23456, "CJK UNIFIED IDEOGRAPH-23456", "", "CJK UNIFIED IDEOGRAPH-23456" }
};
@ -1516,6 +1530,7 @@ enumCharNamesFn(void *context,
UChar32 code, UCharNameChoice nameChoice,
const char *name, int32_t length) {
int32_t *pCount=(int32_t *)context;
const char *expected;
int i;
if(length<=0 || length!=(int32_t)strlen(name)) {
@ -1539,8 +1554,15 @@ enumCharNamesFn(void *context,
}
break;
case U_UNICODE_10_CHAR_NAME:
if(names[i].oldName[0]==0 || 0!=strcmp(name, names[i].oldName)) {
log_err("u_enumCharName(0x%lx - 1.0)=%s instead of %s\n", code, name, names[i].oldName);
expected=names[i].oldName;
if(expected[0]==0 || 0!=strcmp(name, expected)) {
log_err("u_enumCharName(0x%lx - 1.0)=%s instead of %s\n", code, name, expected);
}
break;
case U_CHAR_NAME_ALIAS:
expected=names[i].alias;
if(expected==NULL || expected[0]==0 || 0!=strcmp(name, expected)) {
log_err("u_enumCharName(0x%lx - alias)=%s instead of %s\n", code, name, expected);
}
break;
case U_CHAR_NAME_CHOICE_COUNT:
@ -1596,6 +1618,7 @@ TestCharNames() {
static char name[80];
UErrorCode errorCode=U_ZERO_ERROR;
struct enumExtCharNamesContext extContext;
const char *expected;
int32_t length;
UChar32 c;
int32_t i;
@ -1656,6 +1679,35 @@ TestCharNames() {
log_err("u_charFromName(%s - 1.0) gets 0x%lx instead of 0x%lx\n", names[i].oldName, c, names[i].code);
}
}
/* Unicode character name alias */
length=u_charName(names[i].code, U_CHAR_NAME_ALIAS, name, sizeof(name), &errorCode);
if(U_FAILURE(errorCode)) {
log_err("u_charName(0x%lx - alias) error %s\n", names[i].code, u_errorName(errorCode));
return;
}
expected=names[i].alias;
if(expected==NULL) {
expected="";
}
if(length<0 || (length>0 && 0!=strcmp(name, expected)) || length!=(uint16_t)strlen(name)) {
log_err("u_charName(0x%lx - alias) gets %s length %ld instead of nothing or %s\n",
names[i].code, name, length, expected);
}
/* find the Unicode character name alias if it is stored (length>0 means that we could read it) */
if(expected[0]!=0 /* && length>0 */) {
c=u_charFromName(U_CHAR_NAME_ALIAS, expected, &errorCode);
if(U_FAILURE(errorCode)) {
log_err("u_charFromName(%s - alias) error %s\n",
expected, u_errorName(errorCode));
return;
}
if(c!=(UChar32)names[i].code) {
log_err("u_charFromName(%s - alias) gets 0x%lx instead of 0x%lx\n",
expected, c, names[i].code);
}
}
}
/* test u_enumCharNames() */
@ -2213,7 +2265,8 @@ TestAdditionalProperties() {
{ 0xfe02, UCHAR_DEFAULT_IGNORABLE_CODE_POINT, TRUE },
{ 0x1801, UCHAR_DEFAULT_IGNORABLE_CODE_POINT, FALSE },
{ 0x0341, UCHAR_DEPRECATED, TRUE },
{ 0x0149, UCHAR_DEPRECATED, TRUE }, /* changed in Unicode 5.2 */
{ 0x0341, UCHAR_DEPRECATED, FALSE }, /* changed in Unicode 5.2 */
{ 0xe0041, UCHAR_DEPRECATED, TRUE }, /* changed from Unicode 5 to 5.1 */
{ 0xe0100, UCHAR_DEPRECATED, FALSE },
@ -2327,7 +2380,8 @@ TestAdditionalProperties() {
{ 0x10EEEE, UCHAR_EAST_ASIAN_WIDTH, U_EA_AMBIGUOUS },
/* UCHAR_GENERAL_CATEGORY tested for assigned characters in TestUnicodeData() */
{ 0xd7d7, UCHAR_GENERAL_CATEGORY, 0 },
{ 0xd7c7, UCHAR_GENERAL_CATEGORY, 0 },
{ 0xd7d7, UCHAR_GENERAL_CATEGORY, U_OTHER_LETTER }, /* changed in Unicode 5.2 */
{ 0x0444, UCHAR_JOINING_GROUP, U_JG_NO_JOINING_GROUP },
{ 0x0639, UCHAR_JOINING_GROUP, U_JG_AIN },
@ -2366,27 +2420,43 @@ TestAdditionalProperties() {
/* UCHAR_SCRIPT tested in TestUScriptCodeAPI() */
{ 0x10ff, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
{ 0x1100, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },
{ 0x1111, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },
{ 0x1159, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },
{ 0x115a, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO }, /* changed in Unicode 5.2 */
{ 0x115e, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO }, /* changed in Unicode 5.2 */
{ 0x115f, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },
{ 0xa95f, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
{ 0xa960, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO }, /* changed in Unicode 5.2 */
{ 0xa97c, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO }, /* changed in Unicode 5.2 */
{ 0xa97d, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
{ 0x1160, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },
{ 0x1161, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },
{ 0x1172, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },
{ 0x11a2, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },
{ 0x11a3, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO }, /* changed in Unicode 5.2 */
{ 0x11a7, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO }, /* changed in Unicode 5.2 */
{ 0xd7af, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
{ 0xd7b0, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO }, /* changed in Unicode 5.2 */
{ 0xd7c6, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO }, /* changed in Unicode 5.2 */
{ 0xd7c7, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
{ 0x11a8, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },
{ 0x11b8, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },
{ 0x11c8, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },
{ 0x11f9, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },
{ 0x11fa, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO }, /* changed in Unicode 5.2 */
{ 0x11ff, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO }, /* changed in Unicode 5.2 */
{ 0x1200, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
{ 0x115a, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
{ 0x115e, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
{ 0x11a3, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
{ 0x11a7, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
{ 0x11fa, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
{ 0x11ff, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
{ 0xd7ca, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
{ 0xd7cb, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO }, /* changed in Unicode 5.2 */
{ 0xd7fb, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO }, /* changed in Unicode 5.2 */
{ 0xd7fc, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
{ 0xac00, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LV_SYLLABLE },
{ 0xac1c, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LV_SYLLABLE },
@ -2542,6 +2612,8 @@ TestAdditionalProperties() {
/* test u_hasBinaryProperty() and u_getIntPropertyValue() */
for(i=0; i<sizeof(props)/sizeof(props[0]); ++i) {
const char *whichName;
if(props[i][0]<0) {
/* Unicode version break */
if(uVersion<props[i][1]) {
@ -2553,19 +2625,20 @@ TestAdditionalProperties() {
c=(UChar32)props[i][0];
which=(UProperty)props[i][1];
whichName=u_getPropertyName(which, U_LONG_PROPERTY_NAME);
if(which<UCHAR_INT_START) {
result=u_hasBinaryProperty(c, which);
if(result!=props[i][2]) {
log_err("error: u_hasBinaryProperty(U+%04lx, %d)=%d is wrong (props[%d])\n",
c, which, result, i);
log_err("error: u_hasBinaryProperty(U+%04lx, %s)=%d is wrong (props[%d])\n",
c, whichName, result, i);
}
}
result=u_getIntPropertyValue(c, which);
if(result!=props[i][2]) {
log_err("error: u_getIntPropertyValue(U+%04lx, 0x1000+%d)=%d is wrong, should be %d (props[%d])\n",
c, (int32_t)which-0x1000, result, props[i][2], i);
log_err("error: u_getIntPropertyValue(U+%04lx, %s)=%d is wrong, should be %d (props[%d])\n",
c, whichName, result, props[i][2], i);
}
/* test separate functions, too */
@ -2611,7 +2684,14 @@ TestNumericProperties(void) {
{ 0x0F33, U_NT_NUMERIC, -1./2. },
{ 0x0C66, U_NT_DECIMAL, 0 },
{ 0x96f6, U_NT_NUMERIC, 0 },
{ 0xa833, U_NT_NUMERIC, 1./16. },
{ 0x2152, U_NT_NUMERIC, 1./10. },
{ 0x2151, U_NT_NUMERIC, 1./9. },
{ 0x1245f, U_NT_NUMERIC, 1./8. },
{ 0x2150, U_NT_NUMERIC, 1./7. },
{ 0x2159, U_NT_NUMERIC, 1./6. },
{ 0x09f6, U_NT_NUMERIC, 3./16. },
{ 0x2155, U_NT_NUMERIC, 1./5. },
{ 0x00BD, U_NT_NUMERIC, 1./2. },
{ 0x0031, U_NT_DECIMAL, 1. },
{ 0x4e00, U_NT_NUMERIC, 1. },
@ -2647,12 +2727,14 @@ TestNumericProperties(void) {
{ 0x4e07, U_NT_NUMERIC, 10000. },
{ 0x4ebf, U_NT_NUMERIC, 100000000. },
{ 0x5146, U_NT_NUMERIC, 1000000000000. },
{ -1, U_NT_NONE, U_NO_NUMERIC_VALUE },
{ 0x61, U_NT_NONE, U_NO_NUMERIC_VALUE },
{ 0x3000, U_NT_NONE, U_NO_NUMERIC_VALUE },
{ 0xfffe, U_NT_NONE, U_NO_NUMERIC_VALUE },
{ 0x10301, U_NT_NONE, U_NO_NUMERIC_VALUE },
{ 0xe0033, U_NT_NONE, U_NO_NUMERIC_VALUE },
{ 0x10ffff, U_NT_NONE, U_NO_NUMERIC_VALUE }
{ 0x10ffff, U_NT_NONE, U_NO_NUMERIC_VALUE },
{ 0x110000, U_NT_NONE, U_NO_NUMERIC_VALUE }
};
double nv;

View file

@ -94,17 +94,16 @@ CollationAPITest::TestProperty(/* char* par */)
UErrorCode success = U_ZERO_ERROR;
Collator *col = 0;
/*
All the collations have the same version in an ICU
version.
ICU 2.0 currVersionArray = {0x18, 0xC0, 0x02, 0x02};
ICU 2.1 currVersionArray = {0x19, 0x00, 0x03, 0x03};
ICU 2.2 currVersionArray = {0x21, 0x40, 0x04, 0x04};
ICU 2.4 currVersionArray = {0x21, 0x40, 0x04, 0x04};
ICU 2.6 currVersionArray = {0x21, 0x40, 0x03, 0x03};
ICU 2.8 currVersionArray = {0x29, 0x80, 0x00, 0x04};
ICU 3.4 currVersionArray = {0x31, 0xC0, 0x00, 0x04};
*/
UVersionInfo currVersionArray = {0x31, 0xC0, 0x00, 0x29};
* Expected version of the English collator.
* Currently, the major/minor version numbers change when the builder code
* changes,
* number 2 is from the tailoring data version and
* number 3 is the UCA version.
* This changes with every UCA version change, and the expected value
* needs to be adjusted.
* Same in cintltst/capitst.c.
*/
UVersionInfo currVersionArray = {0x31, 0xC0, 0x00, 0x2A};
UVersionInfo versionArray;
int i = 0;
@ -126,7 +125,7 @@ CollationAPITest::TestProperty(/* char* par */)
col->getVersion(versionArray);
for (i=0; i<4; ++i) {
if (versionArray[i] != currVersionArray[i]) {
errln("Testing ucol_getVersion() - unexpected result: %d.%d.%d.%d",
errln("Testing Collator::getVersion() - unexpected result: %02x.%02x.%02x.%02x",
versionArray[0], versionArray[1], versionArray[2], versionArray[3]);
break;
}
@ -593,16 +592,6 @@ CollationAPITest::TestCollationKey(/* char* par */)
const uint8_t* byteArray1 = sortk1.getByteArray(cnt1);
const uint8_t* byteArray2 = sortk2.getByteArray(cnt2);
/*
this is a bad test since it is dependent on the version of uca data,
which changes
will remove it.
const char sortk2_compat[] = {
// this is a 1.8 sortkey
0x17, 0x19, 0x1B, 0x1D, 0x17, 0x01, 0x08, 0x01, 0x08, 0x00
};
*/
const uint8_t* byteArray3 = 0;
byteArray3 = sortk1.getByteArray(cnt3);
@ -612,10 +601,6 @@ CollationAPITest::TestCollationKey(/* char* par */)
CollationKey sortk4(byteArray1, cnt1), sortk5(byteArray2, cnt2);
CollationKey sortk6(byteArray3, cnt3), sortk7(byteArray4, cnt4);
/*
doAssert(memcmp(byteArray2, sortk2_compat, strlen(sortk2_compat)) == 0,
"Binary format for 'abcda' sortkey different!");
*/
doAssert(sortk1.compareTo(sortk4) == Collator::EQUAL, "CollationKey::toByteArray(sortk1) Failed.");
doAssert(sortk2.compareTo(sortk5) == Collator::EQUAL, "CollationKey::toByteArray(sortk2) Failed.");
doAssert(sortk4.compareTo(sortk5) == Collator::GREATER, "sortk4 >>> sortk5 Failed");
@ -1119,53 +1104,6 @@ void CollationAPITest::TestSortKey()
/* Need to use identical strength */
col->setAttribute(UCOL_STRENGTH, UCOL_IDENTICAL, status);
uint8_t key2compat[] = {
/* 3.9 key, from UCA 5.1 */
0x2c, 0x2e, 0x30, 0x32, 0x2c, 0x01,
0x09, 0x01, 0x09, 0x01, 0x2b, 0x01,
0x92, 0x93, 0x94, 0x95, 0x92, 0x0
/* 3.6 key, from UCA 5.0 */
/*
0x29, 0x2b, 0x2d, 0x2f, 0x29, 0x01,
0x09, 0x01, 0x09, 0x01, 0x28, 0x01,
0x92, 0x93, 0x94, 0x95, 0x92, 0x00
*/
/* 3.4 key, from UCA 4.1 */
/*
0x28, 0x2a, 0x2c, 0x2e, 0x28, 0x01,
0x09, 0x01, 0x09, 0x01, 0x27, 0x01,
0x92, 0x93, 0x94, 0x95, 0x92, 0x00
*/
/* 2.6.1 key */
/*
0x26, 0x28, 0x2A, 0x2C, 0x26, 0x01,
0x09, 0x01, 0x09, 0x01, 0x25, 0x01,
0x92, 0x93, 0x94, 0x95, 0x92, 0x00
*/
/* 2.2 key */
/*
0x1D, 0x1F, 0x21, 0x23, 0x1D, 0x01,
0x09, 0x01, 0x09, 0x01, 0x1C, 0x01,
0x92, 0x93, 0x94, 0x95, 0x92, 0x00
*/
/* 2.0 key */
/*
0x19, 0x1B, 0x1D, 0x1F, 0x19,
0x01, 0x09, 0x01, 0x09, 0x01,
0x18, 0x01,
0x92, 0x93, 0x94, 0x95, 0x92,
0x00
*/
/* 1.8.1 key.*/
/*
0x19, 0x1B, 0x1D, 0x1F, 0x19,
0x01, 0x0A, 0x01, 0x0A, 0x01,
0x92, 0x93, 0x94, 0x95, 0x92,
0x00
*/
};
UChar test1[6] = {0x41, 0x62, 0x63, 0x64, 0x61, 0},
test2[6] = {0x61, 0x62, 0x63, 0x64, 0x61, 0},
test3[6] = {0x61, 0x62, 0x63, 0x64, 0x61, 0};
@ -1192,10 +1130,11 @@ void CollationAPITest::TestSortKey()
doAssert(key2.compareTo(key3) == Collator::EQUAL,
"Result should be \"abcda\" == \"abcda\"");
// Clone the key2 sortkey for later.
int32_t keylength = 0;
doAssert(strcmp((const char *)(key2.getByteArray(keylength)),
(const char *)key2compat) == 0,
"Binary format for 'abcda' sortkey different!");
const uint8_t *key2primary_alias = key2.getByteArray(keylength);
LocalArray<uint8_t> key2primary(new uint8_t[keylength]);
memcpy(key2primary.getAlias(), key2primary_alias, keylength);
col->getSortKey(test1, sortkey1, 64);
col->getSortKey(test2, sortkey2, 64);
@ -1255,8 +1194,8 @@ void CollationAPITest::TestSortKey()
"Result should be \"abcda\" == \"abcda\"");
tempkey = key2.getByteArray(keylength);
doAssert(memcmp(tempkey, key2compat, keylength - 1) == 0,
"Binary format for 'abcda' sortkey different!");
doAssert(memcmp(tempkey, key2primary.getAlias(), keylength - 1) == 0,
"Binary format for 'abcda' sortkey different for secondary strength!");
col->getSortKey(test1, sortkey1, 64);
col->getSortKey(test2, sortkey2, 64);

View file

@ -0,0 +1,490 @@
/*
*******************************************************************************
*
* Copyright (C) 2009, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
* file name: bidiconf.cpp
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 2009oct16
* created by: Markus W. Scherer
*
* BiDi conformance test, using the Unicode BidiTest.txt file.
*/
#include <stdio.h>
#include <string.h>
#include "unicode/utypes.h"
#include "unicode/ubidi.h"
#include "unicode/errorcode.h"
#include "unicode/putil.h"
#include "unicode/unistr.h"
#include "intltest.h"
#include "uparse.h"
class BiDiConformanceTest : public IntlTest {
public:
BiDiConformanceTest() :
directionBits(0), lineNumber(0), levelsCount(0), orderingCount(0),
errorCount(0) {}
void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par=NULL);
void TestBidiTest();
private:
char *getUnidataPath(char path[]);
UBool parseLevels(const char *start);
UBool parseOrdering(const char *start);
UBool parseInputStringFromBiDiClasses(const char *&start);
UBool checkLevels(const UBiDiLevel actualLevels[], int32_t actualCount,
const char *paraLevelName);
UBool checkOrdering(UBiDi *ubidi, const char *paraLevelName);
void printErrorLine(const char *paraLevelName);
char line[10000];
UBiDiLevel levels[1000];
uint32_t directionBits;
int32_t ordering[1000];
int32_t lineNumber;
int32_t levelsCount;
int32_t orderingCount;
int32_t errorCount;
UnicodeString inputString;
};
extern IntlTest *createBiDiConformanceTest() {
return new BiDiConformanceTest();
}
void BiDiConformanceTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char *par) {
if(exec) {
logln("TestSuite BiDiConformanceTest: ");
}
switch (index) {
TESTCASE(0, TestBidiTest);
default:
name="";
break; // needed to end the loop
}
}
// TODO: Move to a common place (IntlTest?) to avoid duplication with UnicodeTest (ucdtest.cpp).
char *BiDiConformanceTest::getUnidataPath(char path[]) {
IcuTestErrorCode errorCode(*this, "getUnidataPath");
const int kUnicodeDataTxtLength=15; // strlen("UnicodeData.txt")
// Look inside ICU_DATA first.
strcpy(path, pathToDataDirectory());
strcat(path, "unidata" U_FILE_SEP_STRING "UnicodeData.txt");
FILE *f=fopen(path, "r");
if(f!=NULL) {
fclose(f);
*(strchr(path, 0)-kUnicodeDataTxtLength)=0; // Remove the basename.
return path;
}
// As a fallback, try to guess where the source data was located
// at the time ICU was built, and look there.
# ifdef U_TOPSRCDIR
strcpy(path, U_TOPSRCDIR U_FILE_SEP_STRING "data");
# else
strcpy(path, loadTestData(errorCode));
strcat(path, U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".."
U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".."
U_FILE_SEP_STRING "data");
# endif
strcat(path, U_FILE_SEP_STRING);
strcat(path, "unidata" U_FILE_SEP_STRING "UnicodeData.txt");
f=fopen(path, "r");
if(f!=NULL) {
fclose(f);
*(strchr(path, 0)-kUnicodeDataTxtLength)=0; // Remove the basename.
return path;
}
return NULL;
}
U_DEFINE_LOCAL_OPEN_POINTER(LocalStdioFilePointer, FILE, fclose);
U_DEFINE_LOCAL_OPEN_POINTER(LocalUBiDiPointer, UBiDi, ubidi_close);
// TODO: Make "public" in uparse.h.
#define U_IS_INV_WHITESPACE(c) ((c)==' ' || (c)=='\t' || (c)=='\r' || (c)=='\n')
UBool BiDiConformanceTest::parseLevels(const char *start) {
directionBits=0;
levelsCount=0;
while(*start!=0 && *(start=u_skipWhitespace(start))!=0) {
if(*start=='x') {
levels[levelsCount++]=UBIDI_DEFAULT_LTR;
++start;
} else {
char *end;
uint32_t value=(uint32_t)strtoul(start, &end, 10);
if(end<=start || (!U_IS_INV_WHITESPACE(*end) && *end!=0) || value>(UBIDI_MAX_EXPLICIT_LEVEL+1)) {
errln("@Levels: parse error at %s", start);
return FALSE;
}
levels[levelsCount++]=(UBiDiLevel)value;
directionBits|=(1<<(value&1));
start=end;
}
}
return TRUE;
}
UBool BiDiConformanceTest::parseOrdering(const char *start) {
orderingCount=0;
while(*start!=0 && *(start=u_skipWhitespace(start))!=0) {
char *end;
uint32_t value=(uint32_t)strtoul(start, &end, 10);
if(end<=start || (!U_IS_INV_WHITESPACE(*end) && *end!=0) || value>=1000) {
errln("@Reorder: parse error at %s", start);
return FALSE;
}
ordering[orderingCount++]=(int32_t)value;
start=end;
}
return TRUE;
}
static const UChar charFromBiDiClass[U_CHAR_DIRECTION_COUNT]={
0x6c, // 'l' for L
0x52, // 'R' for R
0x33, // '3' for EN
0x2d, // '-' for ES
0x25, // '%' for ET
0x39, // '9' for AN
0x2c, // ',' for CS
0x2f, // '/' for B
0x5f, // '_' for S
0x20, // ' ' for WS
0x3d, // '=' for ON
0x65, // 'e' for LRE
0x6f, // 'o' for LRO
0x41, // 'A' for AL
0x45, // 'E' for RLE
0x4f, // 'O' for RLO
0x2a, // '*' for PDF
0x60, // '`' for NSM
0x7c // '|' for BN
};
U_CDECL_BEGIN
static UCharDirection U_CALLCONV
biDiConfUBiDiClassCallback(const void *context, UChar32 c) {
for(int i=0; i<U_CHAR_DIRECTION_COUNT; ++i) {
if(c==charFromBiDiClass[i]) {
return (UCharDirection)i;
}
}
// Character not in our hardcoded table.
// Should not occur during testing.
return U_BIDI_CLASS_DEFAULT;
}
U_CDECL_END
static const int8_t biDiClassNameLengths[U_CHAR_DIRECTION_COUNT+1]={
1, 1, 2, 2, 2, 2, 2, 1, 1, 2, 2, 3, 3, 2, 3, 3, 3, 3, 2, 0
};
UBool BiDiConformanceTest::parseInputStringFromBiDiClasses(const char *&start) {
inputString.remove();
/*
* Lengthy but fast BiDi class parser.
* A simple parser could terminate or extract the name string and use
* int32_t biDiClassInt=u_getPropertyValueEnum(UCHAR_BIDI_CLASS, bidiClassString);
* but that makes this test take significantly more time.
*/
while(*start!=0 && *(start=u_skipWhitespace(start))!=0 && *start!=';') {
UCharDirection biDiClass=U_CHAR_DIRECTION_COUNT;
// Compare each character once until we have a match on
// a complete, short BiDi class name.
if(start[0]=='L') {
if(start[1]=='R') {
if(start[2]=='E') {
biDiClass=U_LEFT_TO_RIGHT_EMBEDDING;
} else if(start[2]=='O') {
biDiClass=U_LEFT_TO_RIGHT_OVERRIDE;
}
} else {
biDiClass=U_LEFT_TO_RIGHT;
}
} else if(start[0]=='R') {
if(start[1]=='L') {
if(start[2]=='E') {
biDiClass=U_RIGHT_TO_LEFT_EMBEDDING;
} else if(start[2]=='O') {
biDiClass=U_RIGHT_TO_LEFT_OVERRIDE;
}
} else {
biDiClass=U_RIGHT_TO_LEFT;
}
} else if(start[0]=='E') {
if(start[1]=='N') {
biDiClass=U_EUROPEAN_NUMBER;
} else if(start[1]=='S') {
biDiClass=U_EUROPEAN_NUMBER_SEPARATOR;
} else if(start[1]=='T') {
biDiClass=U_EUROPEAN_NUMBER_TERMINATOR;
}
} else if(start[0]=='A') {
if(start[1]=='L') {
biDiClass=U_RIGHT_TO_LEFT_ARABIC;
} else if(start[1]=='N') {
biDiClass=U_ARABIC_NUMBER;
}
} else if(start[0]=='C' && start[1]=='S') {
biDiClass=U_COMMON_NUMBER_SEPARATOR;
} else if(start[0]=='B') {
if(start[1]=='N') {
biDiClass=U_BOUNDARY_NEUTRAL;
} else {
biDiClass=U_BLOCK_SEPARATOR;
}
} else if(start[0]=='S') {
biDiClass=U_SEGMENT_SEPARATOR;
} else if(start[0]=='W' && start[1]=='S') {
biDiClass=U_WHITE_SPACE_NEUTRAL;
} else if(start[0]=='O' && start[1]=='N') {
biDiClass=U_OTHER_NEUTRAL;
} else if(start[0]=='P' && start[1]=='D' && start[2]=='F') {
biDiClass=U_POP_DIRECTIONAL_FORMAT;
} else if(start[0]=='N' && start[1]=='S' && start[2]=='M') {
biDiClass=U_DIR_NON_SPACING_MARK;
}
// Now we verify that the class name is terminated properly,
// and not just the start of a longer word.
int8_t biDiClassNameLength=biDiClassNameLengths[biDiClass];
char c=start[biDiClassNameLength];
if(biDiClass==U_CHAR_DIRECTION_COUNT || (!U_IS_INV_WHITESPACE(c) && c!=';' && c!=0)) {
errln("BiDi class string not recognized at %s", start);
return FALSE;
}
inputString.append(charFromBiDiClass[biDiClass]);
start+=biDiClassNameLength;
}
return TRUE;
}
void BiDiConformanceTest::TestBidiTest() {
IcuTestErrorCode errorCode(*this, "TestBidiTest");
const char *sourceTestDataPath=getSourceTestData(errorCode);
if(errorCode.logIfFailureAndReset("unable to find the source/test/testdata "
"folder (getSourceTestData())")) {
return;
}
char bidiTestPath[400];
strcpy(bidiTestPath, sourceTestDataPath);
strcat(bidiTestPath, "BidiTest.txt");
LocalStdioFilePointer bidiTestFile(fopen(bidiTestPath, "r"));
if(bidiTestFile.isNull()) {
errln("unable to open %s", bidiTestPath);
return;
}
LocalUBiDiPointer ubidi(ubidi_open());
ubidi_setClassCallback(ubidi.getAlias(), biDiConfUBiDiClassCallback, NULL,
NULL, NULL, errorCode);
if(errorCode.logIfFailureAndReset("ubidi_setClassCallback()")) {
return;
}
lineNumber=0;
levelsCount=0;
orderingCount=0;
errorCount=0;
while(errorCount<10 && fgets(line, (int)sizeof(line), bidiTestFile.getAlias())!=NULL) {
++lineNumber;
// Remove trailing comments and whitespace.
char *commentStart=strchr(line, '#');
if(commentStart!=NULL) {
*commentStart=0;
}
u_rtrim(line);
const char *start=u_skipWhitespace(line);
if(*start==0) {
continue; // Skip empty and comment-only lines.
}
if(*start=='@') {
++start;
if(0==strncmp(start, "Levels:", 7)) {
if(!parseLevels(start+7)) {
return;
}
} else if(0==strncmp(start, "Reorder:", 8)) {
if(!parseOrdering(start+8)) {
return;
}
}
// Skip unknown @Xyz: ...
} else {
if(!parseInputStringFromBiDiClasses(start)) {
return;
}
start=u_skipWhitespace(start);
if(*start!=';') {
errln("missing ; separator on input line %s", line);
return;
}
start=u_skipWhitespace(start+1);
char *end;
uint32_t bitset=(uint32_t)strtoul(start, &end, 10);
if(end<=start || (!U_IS_INV_WHITESPACE(*end) && *end!=';' && *end!=0)) {
errln("input bitset parse error at %s", start);
return;
}
// Loop over the bitset.
static const UBiDiLevel paraLevels[]={ UBIDI_DEFAULT_LTR, 0, 1 };
static const char *const paraLevelNames[]={ "auto/LTR", "LTR", "RTL" };
for(int i=0; i<=2; ++i) {
if(bitset&(1<<i)) {
ubidi_setPara(ubidi.getAlias(), inputString.getBuffer(), inputString.length(),
paraLevels[i], NULL, errorCode);
const UBiDiLevel *actualLevels=ubidi_getLevels(ubidi.getAlias(), errorCode);
if(errorCode.logIfFailureAndReset("ubidi_setPara() or ubidi_getLevels()")) {
errln("Input line %d: %s", (int)lineNumber, line);
return;
}
if(!checkLevels(actualLevels, ubidi_getProcessedLength(ubidi.getAlias()),
paraLevelNames[i])) {
// continue outerLoop; does not exist in C++
// so just break out of the inner loop.
break;
}
if(!checkOrdering(ubidi.getAlias(), paraLevelNames[i])) {
// continue outerLoop; does not exist in C++
// so just break out of the inner loop.
break;
}
}
}
}
}
}
static UChar printLevel(UBiDiLevel level) {
if(level<UBIDI_DEFAULT_LTR) {
return 0x30+level;
} else {
return 0x78; // 'x'
}
}
static uint32_t getDirectionBits(const UBiDiLevel actualLevels[], int32_t actualCount) {
uint32_t actualDirectionBits=0;
for(int32_t i=0; i<actualCount; ++i) {
actualDirectionBits|=(1<<(actualLevels[i]&1));
}
return actualDirectionBits;
}
UBool BiDiConformanceTest::checkLevels(const UBiDiLevel actualLevels[], int32_t actualCount,
const char *paraLevelName) {
UBool isOk=TRUE;
if(levelsCount!=actualCount) {
errln("Wrong number of level values; expected %d actual %d",
(int)levelsCount, (int)actualCount);
isOk=FALSE;
} else {
for(int32_t i=0; i<actualCount; ++i) {
if(levels[i]!=actualLevels[i] && levels[i]<UBIDI_DEFAULT_LTR) {
if(directionBits!=3 && directionBits==getDirectionBits(actualLevels, actualCount)) {
// ICU used a shortcut:
// Since the text is unidirectional, it did not store the resolved
// levels but just returns all levels as the paragraph level 0 or 1.
// The reordering result is the same, so this is fine.
break;
} else {
errln("Wrong level value at index %d; expected %d actual %d",
(int)i, levels[i], actualLevels[i]);
isOk=FALSE;
break;
}
}
}
}
if(!isOk) {
printErrorLine(paraLevelName);
UnicodeString els("Expected levels: ");
int32_t i;
for(i=0; i<levelsCount; ++i) {
els.append(0x20).append(printLevel(levels[i]));
}
UnicodeString als("Actual levels: ");
for(i=0; i<actualCount; ++i) {
als.append(0x20).append(printLevel(actualLevels[i]));
}
errln(els);
errln(als);
}
return isOk;
}
// Note: ubidi_setReorderingOptions(ubidi, UBIDI_OPTION_REMOVE_CONTROLS);
// does not work for custom BiDi class assignments
// and anyway also removes LRM/RLM/ZWJ/ZWNJ which is not desirable here.
// Therefore we just skip the indexes for BiDi controls while comparing
// with the expected ordering that has them omitted.
UBool BiDiConformanceTest::checkOrdering(UBiDi *ubidi, const char *paraLevelName) {
UBool isOk=TRUE;
IcuTestErrorCode errorCode(*this, "TestBidiTest/checkOrdering()");
int32_t resultLength=ubidi_getResultLength(ubidi); // visual length including BiDi controls
int32_t i, visualIndex;
// Note: It should be faster to call ubidi_countRuns()/ubidi_getVisualRun()
// and loop over each run's indexes, but that seems unnecessary for this test code.
for(i=visualIndex=0; i<resultLength; ++i) {
int32_t logicalIndex=ubidi_getLogicalIndex(ubidi, i, errorCode);
if(errorCode.logIfFailureAndReset("ubidi_getLogicalIndex()")) {
errln("Input line %d: %s", (int)lineNumber, line);
return FALSE;
}
if(levels[logicalIndex]>=UBIDI_DEFAULT_LTR) {
continue; // BiDi control, omitted from expected ordering.
}
if(visualIndex<orderingCount && logicalIndex!=ordering[visualIndex]) {
errln("Wrong ordering value at visual index %d; expected %d actual %d",
(int)visualIndex, ordering[visualIndex], logicalIndex);
isOk=FALSE;
break;
}
++visualIndex;
}
// visualIndex is now the visual length minus the BiDi controls,
// which should match the length of the BidiTest.txt ordering.
if(isOk && orderingCount!=visualIndex) {
errln("Wrong number of ordering values; expected %d actual %d",
(int)orderingCount, (int)visualIndex);
isOk=FALSE;
}
if(!isOk) {
printErrorLine(paraLevelName);
UnicodeString eord("Expected ordering: ");
for(i=0; i<orderingCount; ++i) {
eord.append(0x20).append(0x30+ordering[i]);
}
UnicodeString aord("Actual ordering: ");
for(i=0; i<resultLength; ++i) {
int32_t logicalIndex=ubidi_getLogicalIndex(ubidi, i, errorCode);
if(levels[logicalIndex]<UBIDI_DEFAULT_LTR) {
aord.append(0x20).append(0x30+logicalIndex);
}
}
errln(eord);
errln(aord);
}
return isOk;
}
void BiDiConformanceTest::printErrorLine(const char *paraLevelName) {
++errorCount;
errln("Input line %5d: %s", (int)lineNumber, line);
errln(UnicodeString("Input string: ")+inputString);
errln("Para level: %s", paraLevelName);
}

View file

@ -62,7 +62,7 @@ UnicodeString toString(int32_t n);
name = #test; \
if (exec) { \
logln(#test "---"); \
logln((UnicodeString)""); \
logln(); \
test(); \
} \
break
@ -71,6 +71,7 @@ class IntlTest : public TestLog {
public:
IntlTest();
// TestLog has a virtual destructor.
virtual UBool runTest( char* name = NULL, char* par = NULL ); // not to be overidden
@ -260,4 +261,118 @@ extern UnicodeString CharsToUnicodeString(const char* chars);
/* alias for CharsToUnicodeString */
extern UnicodeString ctou(const char* chars);
// TODO(markus): Propose as public API.
/**
* Does not throw exceptions.
* Do not use this base class directly, since it does not delete its pointer.
* A subclass must implement methods that delete the pointer:
* Destructor and adoptInstead().
*
* There is no operator T *() provided because the programmer must decide
* whether to use getAlias() (without transfer of ownership) or orpan()
* (with transfer of ownership and NULLing of the pointer).
*/
template<typename T>
class /* U_COMMON_API */ LocalPointerBase {
public:
// Takes ownership.
explicit LocalPointerBase(T *p=NULL) : ptr(p) {}
// Deletes the object it owns.
~LocalPointerBase() { /* delete ptr; */ }
// NULL checks.
UBool isNull() const { return ptr==NULL; }
UBool isValid() const { return ptr!=NULL; }
// Comparisons with simple pointers, so that existing code
// with ==NULL and !=NULL need not be changed.
bool operator==(const T *other) const { return ptr==other; }
bool operator!=(const T *other) const { return ptr!=other; }
// Access without ownership change.
T *getAlias() const { return ptr; }
T &operator*() const { return *ptr; }
T *operator->() const { return ptr; }
// Gives up ownership; the internal pointer becomes NULL.
T *orphan() {
T *p=ptr;
ptr=NULL;
return p;
}
// Deletes the object it owns and adopt (take ownership of) the one passed in.
void adoptInstead(T *p) {
// delete ptr;
prt=p;
}
protected:
T *ptr;
private:
// No comparison operators with other LocalPointerBase's.
bool operator==(const LocalPointerBase &other);
bool operator!=(const LocalPointerBase &other);
// No ownership transfer: No copy constructor, no assignment operator.
LocalPointerBase(const LocalPointerBase &other);
void operator=(const LocalPointerBase &other);
// No heap allocation. Use only on the stack.
static void * U_EXPORT2 operator new(size_t size);
static void * U_EXPORT2 operator new[](size_t size);
#if U_HAVE_PLACEMENT_NEW
static void * U_EXPORT2 operator new(size_t, void *ptr);
#endif
};
// TODO: Option 1: Destructor and adoptInstead() and possible future methods
// call a protected handleDelete() which is virtual and is the single method
// which a subclass must override.
// Con: Requires the LocalPointerBase class to be virtual, doubles its size,
// and the linker picks an arbitrary .o file for the vtable.
// virtual void handleDelete() {
// delete ptr;
// }
// Option 2: Require all subclasses to override all deleting methods
// (destructor and adoptInstead()) if they need to use something other than
// the delete operator.
// Con: More methods need to be overwritten, and if we add further deleting
// methods, then that set increases further.
// Also, using a subclass instance via the base class API would not work,
// but that would be outside the intended usage.
template<typename T>
class /* U_COMMON_API */ LocalPointer : public LocalPointerBase<T> {
public:
explicit LocalPointer(T *p=NULL) : LocalPointerBase<T>(p) {}
~LocalPointer() {
delete LocalPointerBase<T>::ptr;
}
void adoptInstead(T *p) {
delete LocalPointerBase<T>::ptr;
LocalPointerBase<T>::ptr=p;
}
};
template<typename T>
class /* U_COMMON_API */ LocalArray : public LocalPointerBase<T> {
public:
explicit LocalArray(T *p=NULL) : LocalPointerBase<T>(p) {}
~LocalArray() {
delete[] LocalPointerBase<T>::ptr;
}
void adoptInstead(T *p) {
delete[] LocalPointerBase<T>::ptr;
LocalPointerBase<T>::ptr=p;
}
T &operator[](ptrdiff_t i) const { return LocalPointerBase<T>::ptr[i]; }
};
// Requirement: The closeFunction must tolerate a NULL pointer.
// Or, we could add a NULL check here.
#define U_DEFINE_LOCAL_OPEN_POINTER(LocalPointerClassName, Type, closeFunction) \
class /* U_COMMON_API */ LocalPointerClassName : public LocalPointerBase<Type> { \
public: \
explicit LocalPointerClassName(Type *p=NULL) : LocalPointerBase<Type>(p) {} \
~LocalPointerClassName() { closeFunction(ptr); } \
void adoptInstead(Type *p) { \
closeFunction(ptr); \
ptr=p; \
} \
}
#endif // _INTLTEST

View file

@ -635,6 +635,46 @@
<Filter
Name="configuration"
>
<File
RelativePath=".\simplethread.cpp"
>
<FileConfiguration
Name="Debug|Win32"
>
<Tool
Name="VCCLCompilerTool"
DisableLanguageExtensions="false"
/>
</FileConfiguration>
<FileConfiguration
Name="Release|Win32"
>
<Tool
Name="VCCLCompilerTool"
DisableLanguageExtensions="false"
/>
</FileConfiguration>
<FileConfiguration
Name="Debug|x64"
>
<Tool
Name="VCCLCompilerTool"
DisableLanguageExtensions="false"
/>
</FileConfiguration>
<FileConfiguration
Name="Release|x64"
>
<Tool
Name="VCCLCompilerTool"
DisableLanguageExtensions="false"
/>
</FileConfiguration>
</File>
<File
RelativePath=".\simplethread.h"
>
</File>
<File
RelativePath=".\strtest.cpp"
>
@ -1597,46 +1637,14 @@
>
</File>
</Filter>
<File
RelativePath=".\simplethread.cpp"
<Filter
Name="bidi"
>
<FileConfiguration
Name="Debug|Win32"
<File
RelativePath=".\bidiconf.cpp"
>
<Tool
Name="VCCLCompilerTool"
DisableLanguageExtensions="false"
/>
</FileConfiguration>
<FileConfiguration
Name="Release|Win32"
>
<Tool
Name="VCCLCompilerTool"
DisableLanguageExtensions="false"
/>
</FileConfiguration>
<FileConfiguration
Name="Debug|x64"
>
<Tool
Name="VCCLCompilerTool"
DisableLanguageExtensions="false"
/>
</FileConfiguration>
<FileConfiguration
Name="Release|x64"
>
<Tool
Name="VCCLCompilerTool"
DisableLanguageExtensions="false"
/>
</FileConfiguration>
</File>
<File
RelativePath=".\simplethread.h"
>
</File>
</File>
</Filter>
</Files>
<Globals>
</Globals>

View file

@ -35,6 +35,8 @@
#include "convtest.h"
#include "csdetest.h"
extern IntlTest *createBiDiConformanceTest();
#define CASE_SUITE(id, suite) case id: \
name = #suite; \
if(exec) { \
@ -193,6 +195,16 @@ void MajorTestLevel::runIndexedTest( int32_t index, UBool exec, const char* &nam
#endif
break;
case 15: name = "bidi";
if (exec) {
logln("TestSuite bidi---"); logln();
IntlTest *test = createBiDiConformanceTest();
callTest(*test, par);
delete test;
}
break;
default: name = ""; break;
}

View file

@ -3314,6 +3314,7 @@ RBBILineMonkey::RBBILineMonkey()
fH3 = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=H3}]"), status);
fCL = new UnicodeSet(UNICODE_STRING_SIMPLE("[[\\p{Line_break=CL}]-[\\u0029\\u005d]]"), status); // TODO: fix when props are updated.
fCP = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\u0029\\u005d]"), status);
fCP = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=CP}]"), status);
fEX = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=EX}]"), status);
fIN = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=IN}]"), status);
fJL = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=JL}]"), status);
@ -3389,7 +3390,7 @@ RBBILineMonkey::RBBILineMonkey()
"((\\p{Line_Break=OP}|\\p{Line_Break=HY})\\p{Line_Break=CM}*)?"
"\\p{Line_Break=NU}\\p{Line_Break=CM}*"
"((\\p{Line_Break=NU}|\\p{Line_Break=IS}|\\p{Line_Break=SY})\\p{Line_Break=CM}*)*"
"((\\p{Line_Break=CL}|[\\u0029\\u005d])\\p{Line_Break=CM}*)?" // TODO: p{Line_Break=CP}
"((\\p{Line_Break=CL}|\\p{Line_Break=CP})\\p{Line_Break=CM}*)?"
"((\\p{Line_Break=PR}|\\p{Line_Break=PO})\\p{Line_Break=CM}*)?";
fNumberMatcher = new RegexMatcher(

View file

@ -13,8 +13,13 @@
static const UChar HEX[16]={48,49,50,51,52,53,54,55,56,57,65,66,67,68,69,70};
UnicodeString TestUtility::hex(UChar ch) {
UnicodeString buf;
UnicodeString &TestUtility::appendHex(UnicodeString &buf, UChar32 ch) {
if (ch >= 0x10000) {
if (ch >= 0x100000) {
buf.append(HEX[0xF&(ch>>20)]);
}
buf.append(HEX[0xF&(ch>>16)]);
}
buf.append(HEX[0xF&(ch>>12)]);
buf.append(HEX[0xF&(ch>>8)]);
buf.append(HEX[0xF&(ch>>4)]);
@ -22,16 +27,26 @@ UnicodeString TestUtility::hex(UChar ch) {
return buf;
}
UnicodeString TestUtility::hex(UChar32 ch) {
UnicodeString buf;
appendHex(buf, ch);
return buf;
}
UnicodeString TestUtility::hex(const UnicodeString& s) {
return hex(s, 44 /*,*/);
}
UnicodeString TestUtility::hex(const UnicodeString& s, UChar sep) {
if (s.length() == 0) return "";
UnicodeString result = hex(s.charAt(0));
for (int32_t i = 1; i < s.length(); ++i) {
result.append(sep);
result.append(hex(s.charAt(i)));
UnicodeString result;
if (s.isEmpty()) return result;
UChar32 c;
for (int32_t i = 0; i < s.length(); i += U16_LENGTH(c)) {
c = s.char32At(i);
if (i > 0) {
result.append(sep);
}
appendHex(result, c);
}
return result;
}

View file

@ -19,8 +19,9 @@
class TestUtility {
public:
static UnicodeString &appendHex(UnicodeString &buf, UChar32 ch);
static UnicodeString hex(UChar ch);
static UnicodeString hex(UChar32 ch);
static UnicodeString hex(const UnicodeString& s);

View file

@ -955,12 +955,15 @@ void RTTest::logRoundTripFailure(const UnicodeString& from,
30FF ; 3.2 # KATAKANA DIGRAPH KOTO
31F0..31FF ; 3.2 # [16] KATAKANA LETTER SMALL KU..KATAKANA LETTER SMALL RO
Unicode 5.2 added another Hiragana character:
1F200 ; 5.2 # SQUARE HIRAGANA HOKA
We will not add them to the rules until they are more supported (e.g. in fonts on Windows)
A bug has been filed to remind us to do this: #1979.
*/
static const char KATAKANA[] = "[[[:katakana:][\\u30A1-\\u30FA\\u30FC]]-[\\u30FF\\u31F0-\\u31FF]]";
static const char HIRAGANA[] = "[[[:hiragana:][\\u3040-\\u3094]]-[\\u3095-\\u3096\\u309F-\\u30A0]]";
static const char HIRAGANA[] = "[[[:hiragana:][\\u3040-\\u3094]]-[\\u3095-\\u3096\\u309F-\\u30A0\\U0001F200-\\U0001F2FF]]";
static const char LENGTH[] = "[\\u30FC]";
static const char HALFWIDTH_KATAKANA[] = "[\\uFF65-\\uFF9D]";
static const char KATAKANA_ITERATION[] = "[\\u30FD\\u30FE]";

View file

@ -265,13 +265,12 @@ IntlTestCollator::doTest(Collator* col, const UnicodeString &source, const Unico
}
UErrorCode status = U_ZERO_ERROR;
CollationElementIterator* c = ((RuleBasedCollator *)col)->createCollationElementIterator( source );
LocalPointer<CollationElementIterator> c(((RuleBasedCollator *)col)->createCollationElementIterator(source));
logln("Testing iterating source: "+source);
backAndForth(*c);
c->setText(target, status);
logln("Testing iterating target: "+target);
backAndForth(*c);
delete c;
}
}
@ -386,8 +385,10 @@ UnicodeString &IntlTestCollator::prettify(const CollationKey &source, UnicodeStr
for (i = 0; i < byteCount; i += 1)
{
if (i != 0) {
target += " ";
}
appendHex(bytes[i], 2, target);
target += " ";
}
target += "]";
@ -399,7 +400,7 @@ void IntlTestCollator::backAndForth(CollationElementIterator &iter)
{
// Run through the iterator forwards and stick it into an array
int32_t orderLength = 0;
Order *orders = getOrders(iter, orderLength);
LocalArray<Order> orders(getOrders(iter, orderLength));
UErrorCode status = U_ZERO_ERROR;
// Now go through it backwards and make sure we get the same values
@ -436,7 +437,7 @@ void IntlTestCollator::backAndForth(CollationElementIterator &iter)
errln("Mismatched order at index %d: 0x%0:8X vs. 0x%0:8X", index,
orders[index].order, o);
//break;
goto bail;
return;
}
}
}
@ -446,7 +447,7 @@ void IntlTestCollator::backAndForth(CollationElementIterator &iter)
errln("Mismatched offset at index %d: %d vs. %d", index,
orders[index].offset, offset);
//break;
goto bail;
return;
}
#endif
@ -485,9 +486,6 @@ void IntlTestCollator::backAndForth(CollationElementIterator &iter)
}
errln("");
}
bail:
delete[] orders;
}
@ -499,7 +497,7 @@ IntlTestCollator::Order *IntlTestCollator::getOrders(CollationElementIterator &i
{
int32_t maxSize = 100;
int32_t size = 0;
Order *orders = new Order[maxSize];
LocalArray<Order> orders(new Order[maxSize]);
UErrorCode status = U_ZERO_ERROR;
int32_t offset = iter.getOffset();
@ -511,9 +509,8 @@ IntlTestCollator::Order *IntlTestCollator::getOrders(CollationElementIterator &i
maxSize *= 2;
Order *temp = new Order[maxSize];
uprv_memcpy(temp, orders, size * sizeof(Order));
delete[] orders;
orders = temp;
uprv_memcpy(temp, orders.getAlias(), size * sizeof(Order));
orders.adoptInstead(temp);
}
orders[size].order = order;
@ -527,13 +524,12 @@ IntlTestCollator::Order *IntlTestCollator::getOrders(CollationElementIterator &i
{
Order *temp = new Order[size];
uprv_memcpy(temp, orders, size * sizeof(Order));
delete[] orders;
orders = temp;
uprv_memcpy(temp, orders.getAlias(), size * sizeof(Order));
orders.adoptInstead(temp);
}
orderLength = size;
return orders;
return orders.orphan();
}
#endif /* #if !UCONFIG_NO_COLLATION */

View file

@ -558,8 +558,7 @@ public:
virtual void run()
{
fTraceInfo = 1;
NumberFormat *formatter = NULL;
NumberFormat *percentFormatter = NULL;
LocalPointer<NumberFormat> percentFormatter;
UErrorCode status = U_ZERO_ERROR;
#if 0
@ -638,13 +637,13 @@ public:
int32_t iteration;
status = U_ZERO_ERROR;
formatter = NumberFormat::createInstance(Locale::getEnglish(),status);
LocalPointer<NumberFormat> formatter(NumberFormat::createInstance(Locale::getEnglish(),status));
if(U_FAILURE(status)) {
error("Error on NumberFormat::createInstance().");
goto cleanupAndReturn;
}
percentFormatter = NumberFormat::createPercentInstance(Locale::getFrench(),status);
percentFormatter.adoptInstead(NumberFormat::createPercentInstance(Locale::getFrench(),status));
if(U_FAILURE(status)) {
error("Error on NumberFormat::createPercentInstance().");
goto cleanupAndReturn;
@ -746,9 +745,6 @@ public:
} /* end of for loop */
cleanupAndReturn:
delete formatter;
delete percentFormatter;
// while (fNum == 4) {SimpleThread::sleep(10000);} // Force a failure by preventing thread from finishing
fTraceInfo = 2;
}
@ -771,14 +767,14 @@ void MultithreadTest::TestThreadedIntl()
//
logln("Spawning: %d threads * %d iterations each.",
kFormatThreadThreads, kFormatThreadIterations);
FormatThreadTest *tests = new FormatThreadTest[kFormatThreadThreads];
LocalArray<FormatThreadTest> tests(new FormatThreadTest[kFormatThreadThreads]);
for(int32_t j = 0; j < kFormatThreadThreads; j++) {
tests[j].fNum = j;
int32_t threadStatus = tests[j].start();
if (threadStatus != 0) {
errln("System Error %d starting thread number %d.", threadStatus, j);
SimpleThread::errorFunc();
goto cleanupAndReturn;
return;
}
haveDisplayedInfo[j] = FALSE;
}
@ -819,8 +815,6 @@ void MultithreadTest::TestThreadedIntl()
//
// All threads have finished.
//
cleanupAndReturn:
delete [] tests;
}
#endif /* #if !UCONFIG_NO_FORMATTING */
@ -1016,8 +1010,7 @@ void MultithreadTest::TestCollators()
int32_t noSpawned = 0;
int32_t spawnResult = 0;
CollatorThreadTest *tests;
tests = new CollatorThreadTest[kCollatorThreadThreads];
LocalArray<CollatorThreadTest> tests(new CollatorThreadTest[kCollatorThreadThreads]);
logln(UnicodeString("Spawning: ") + kCollatorThreadThreads + " threads * " + kFormatThreadIterations + " iterations each.");
int32_t j = 0;
@ -1077,7 +1070,6 @@ void MultithreadTest::TestCollators()
SimpleThread::errorFunc();
}
ucol_close(coll);
delete[] tests;
//for(i = 0; i < lineNum; i++) {
//delete[] lines[i].buff;
//}
@ -1160,6 +1152,9 @@ void MultithreadTest::TestString()
UnicodeString *testString = new UnicodeString("This is the original test string.");
// Not using LocalArray<StringThreadTest2> tests[kStringThreadThreads];
// because we don't always want to delete them.
// See the comments below the cleanupAndReturn label.
StringThreadTest2 *tests[kStringThreadThreads];
for(j = 0; j < kStringThreadThreads; j++) {
tests[j] = new StringThreadTest2(testString, j);

View file

@ -1,6 +1,6 @@
/********************************************************************
* COPYRIGHT:
* Copyright (c) 1997-2008, International Business Machines Corporation and
* Copyright (c) 1997-2009, International Business Machines Corporation and
* others. All Rights Reserved.
********************************************************************/
@ -1311,7 +1311,8 @@ initExpectedSkippables(UnicodeSet skipSets[UNORM_MODE_COUNT]) {
"\\u05C5\\u05C7\\u0610-\\u061A\\u0622-\\u0626\\u064B-\\u065E"
"\\u0670\\u06C0\\u06C2\\u06D3\\u06D6-\\u06DC\\u06DF-\\u06E4"
"\\u06E7\\u06E8\\u06EA-\\u06ED\\u0711\\u0730-\\u074A\\u07EB-"
"\\u07F3\\u0929\\u0931\\u0934\\u093C\\u094D\\u0951-\\u0954\\u0958"
"\\u07F3\\u0816-\\u0819\\u081B-\\u0823\\u0825-\\u0827\\u0829-"
"\\u082D\\u0929\\u0931\\u0934\\u093C\\u094D\\u0951-\\u0954\\u0958"
"-\\u095F\\u09BC\\u09CB-\\u09CD\\u09DC\\u09DD\\u09DF\\u0A33"
"\\u0A36\\u0A3C\\u0A4D\\u0A59-\\u0A5B\\u0A5E\\u0ABC\\u0ACD\\u0B3C"
"\\u0B48\\u0B4B-\\u0B4D\\u0B5C\\u0B5D\\u0B94\\u0BCA-\\u0BCD"
@ -1322,35 +1323,40 @@ initExpectedSkippables(UnicodeSet skipSets[UNORM_MODE_COUNT]) {
"\\u0F5C\\u0F69\\u0F71-\\u0F76\\u0F78\\u0F7A-\\u0F7D\\u0F80-"
"\\u0F84\\u0F86\\u0F87\\u0F93\\u0F9D\\u0FA2\\u0FA7\\u0FAC\\u0FB9"
"\\u0FC6\\u1026\\u1037\\u1039\\u103A\\u108D\\u135F\\u1714\\u1734"
"\\u17D2\\u17DD\\u18A9\\u1939-\\u193B\\u1A17\\u1A18\\u1B06\\u1B08"
"\\u1B0A\\u1B0C\\u1B0E\\u1B12\\u1B34\\u1B3B\\u1B3D\\u1B40\\u1B41"
"\\u1B43\\u1B44\\u1B6B-\\u1B73\\u1BAA\\u1C37\\u1DC0-\\u1DE6"
"\\u1DFE-\\u1E99\\u1E9B\\u1EA0-\\u1EF9\\u1F00-\\u1F15\\u1F18-"
"\\u1F1D\\u1F20-\\u1F45\\u1F48-\\u1F4D\\u1F50-\\u1F57\\u1F59"
"\\u1F5B\\u1F5D\\u1F5F-\\u1F7D\\u1F80-\\u1FB4\\u1FB6-\\u1FBC"
"\\u1FBE\\u1FC1-\\u1FC4\\u1FC6-\\u1FD3\\u1FD6-\\u1FDB\\u1FDD-"
"\\u1FEF\\u1FF2-\\u1FF4\\u1FF6-\\u1FFD\\u2000\\u2001\\u20D0-"
"\\u20DC\\u20E1\\u20E5-\\u20F0\\u2126\\u212A\\u212B\\u219A\\u219B"
"\\u21AE\\u21CD-\\u21CF\\u2204\\u2209\\u220C\\u2224\\u2226\\u2241"
"\\u2244\\u2247\\u2249\\u2260\\u2262\\u226D-\\u2271\\u2274\\u2275"
"\\u2278\\u2279\\u2280\\u2281\\u2284\\u2285\\u2288\\u2289\\u22AC-"
"\\u22AF\\u22E0-\\u22E3\\u22EA-\\u22ED\\u2329\\u232A\\u2ADC"
"\\u2DE0-\\u2DFF\\u302A-\\u302F\\u304C\\u304E\\u3050\\u3052"
"\\u3054\\u3056\\u3058\\u305A\\u305C\\u305E\\u3060\\u3062\\u3065"
"\\u3067\\u3069\\u3070\\u3071\\u3073\\u3074\\u3076\\u3077\\u3079"
"\\u307A\\u307C\\u307D\\u3094\\u3099\\u309A\\u309E\\u30AC\\u30AE"
"\\u30B0\\u30B2\\u30B4\\u30B6\\u30B8\\u30BA\\u30BC\\u30BE\\u30C0"
"\\u30C2\\u30C5\\u30C7\\u30C9\\u30D0\\u30D1\\u30D3\\u30D4\\u30D6"
"\\u30D7\\u30D9\\u30DA\\u30DC\\u30DD\\u30F4\\u30F7-\\u30FA\\u30FE"
"\\uA66F\\uA67C\\uA67D\\uA806\\uA8C4\\uA92B-\\uA92D\\uA953\\uAC00"
"-\\uD7A3\\uF900-\\uFA0D\\uFA10\\uFA12\\uFA15-\\uFA1E\\uFA20"
"\\uFA22\\uFA25\\uFA26\\uFA2A-\\uFA2D\\uFA30-\\uFA6A\\uFA70-"
"\\uFAD9\\uFB1D-\\uFB1F\\uFB2A-\\uFB36\\uFB38-\\uFB3C\\uFB3E"
"\\uFB40\\uFB41\\uFB43\\uFB44\\uFB46-\\uFB4E\\uFE20-\\uFE26"
"\\U000101FD\\U00010A0D\\U00010A0F\\U00010A38-\\U00010A3A\\U00010"
"A3F\\U0001D15E-\\U0001D169\\U0001D16D-\\U0001D172\\U0001D17B-"
"\\U0001D182\\U0001D185-\\U0001D18B\\U0001D1AA-\\U0001D1AD\\U0001"
"D1BB-\\U0001D1C0\\U0001D242-\\U0001D244\\U0002F800-\\U0002FA1D]"
"\\u17D2\\u17DD\\u18A9\\u1939-\\u193B\\u1A17\\u1A18\\u1A60\\u1A75"
"-\\u1A7C\\u1A7F\\u1B06\\u1B08\\u1B0A\\u1B0C\\u1B0E\\u1B12\\u1B34"
"\\u1B3B\\u1B3D\\u1B40\\u1B41\\u1B43\\u1B44\\u1B6B-\\u1B73\\u1BAA"
"\\u1C37\\u1CD0-\\u1CD2\\u1CD4-\\u1CE0\\u1CE2-\\u1CE8\\u1CED"
"\\u1DC0-\\u1DE6\\u1DFD-\\u1E99\\u1E9B\\u1EA0-\\u1EF9\\u1F00-"
"\\u1F15\\u1F18-\\u1F1D\\u1F20-\\u1F45\\u1F48-\\u1F4D\\u1F50-"
"\\u1F57\\u1F59\\u1F5B\\u1F5D\\u1F5F-\\u1F7D\\u1F80-\\u1FB4"
"\\u1FB6-\\u1FBC\\u1FBE\\u1FC1-\\u1FC4\\u1FC6-\\u1FD3\\u1FD6-"
"\\u1FDB\\u1FDD-\\u1FEF\\u1FF2-\\u1FF4\\u1FF6-\\u1FFD\\u2000"
"\\u2001\\u20D0-\\u20DC\\u20E1\\u20E5-\\u20F0\\u2126\\u212A"
"\\u212B\\u219A\\u219B\\u21AE\\u21CD-\\u21CF\\u2204\\u2209\\u220C"
"\\u2224\\u2226\\u2241\\u2244\\u2247\\u2249\\u2260\\u2262\\u226D-"
"\\u2271\\u2274\\u2275\\u2278\\u2279\\u2280\\u2281\\u2284\\u2285"
"\\u2288\\u2289\\u22AC-\\u22AF\\u22E0-\\u22E3\\u22EA-\\u22ED"
"\\u2329\\u232A\\u2ADC\\u2CEF-\\u2CF1\\u2DE0-\\u2DFF\\u302A-"
"\\u302F\\u304C\\u304E\\u3050\\u3052\\u3054\\u3056\\u3058\\u305A"
"\\u305C\\u305E\\u3060\\u3062\\u3065\\u3067\\u3069\\u3070\\u3071"
"\\u3073\\u3074\\u3076\\u3077\\u3079\\u307A\\u307C\\u307D\\u3094"
"\\u3099\\u309A\\u309E\\u30AC\\u30AE\\u30B0\\u30B2\\u30B4\\u30B6"
"\\u30B8\\u30BA\\u30BC\\u30BE\\u30C0\\u30C2\\u30C5\\u30C7\\u30C9"
"\\u30D0\\u30D1\\u30D3\\u30D4\\u30D6\\u30D7\\u30D9\\u30DA\\u30DC"
"\\u30DD\\u30F4\\u30F7-\\u30FA\\u30FE\\uA66F\\uA67C\\uA67D\\uA6F0"
"\\uA6F1\\uA806\\uA8C4\\uA8E0-\\uA8F1\\uA92B-\\uA92D\\uA953"
"\\uA9B3\\uA9C0\\uAAB0\\uAAB2-\\uAAB4\\uAAB7\\uAAB8\\uAABE\\uAABF"
"\\uAAC1\\uABED\\uAC00-\\uD7A3\\uF900-\\uFA0D\\uFA10\\uFA12"
"\\uFA15-\\uFA1E\\uFA20\\uFA22\\uFA25\\uFA26\\uFA2A-\\uFA2D"
"\\uFA30-\\uFA6D\\uFA70-\\uFAD9\\uFB1D-\\uFB1F\\uFB2A-\\uFB36"
"\\uFB38-\\uFB3C\\uFB3E\\uFB40\\uFB41\\uFB43\\uFB44\\uFB46-"
"\\uFB4E\\uFE20-\\uFE26\\U000101FD\\U00010A0D\\U00010A0F\\U00010A"
"38-\\U00010A3A\\U00010A3F\\U0001109A\\U0001109C\\U000110AB"
"\\U000110B9\\U000110BA\\U0001D15E-\\U0001D169\\U0001D16D-\\U0001"
"D172\\U0001D17B-\\U0001D182\\U0001D185-\\U0001D18B\\U0001D1AA-"
"\\U0001D1AD\\U0001D1BB-\\U0001D1C0\\U0001D242-\\U0001D244\\U0002"
"F800-\\U0002FA1D]"
, ""), errorCode);
skipSets[UNORM_NFC].applyPattern(UnicodeString(
@ -1373,7 +1379,8 @@ initExpectedSkippables(UnicodeSet skipSets[UNORM_MODE_COUNT]) {
"\\u05C7\\u0610-\\u061A\\u0622\\u0623\\u0627\\u0648\\u064A-"
"\\u065E\\u0670\\u06C1\\u06D2\\u06D5-\\u06DC\\u06DF-\\u06E4"
"\\u06E7\\u06E8\\u06EA-\\u06ED\\u0711\\u0730-\\u074A\\u07EB-"
"\\u07F3\\u0928\\u0930\\u0933\\u093C\\u094D\\u0951-\\u0954\\u0958"
"\\u07F3\\u0816-\\u0819\\u081B-\\u0823\\u0825-\\u0827\\u0829-"
"\\u082D\\u0928\\u0930\\u0933\\u093C\\u094D\\u0951-\\u0954\\u0958"
"-\\u095F\\u09BC\\u09BE\\u09C7\\u09CD\\u09D7\\u09DC\\u09DD\\u09DF"
"\\u0A33\\u0A36\\u0A3C\\u0A4D\\u0A59-\\u0A5B\\u0A5E\\u0ABC\\u0ACD"
"\\u0B3C\\u0B3E\\u0B47\\u0B4D\\u0B56\\u0B57\\u0B5C\\u0B5D\\u0B92"
@ -1386,84 +1393,88 @@ initExpectedSkippables(UnicodeSet skipSets[UNORM_MODE_COUNT]) {
"\\u0F84\\u0F86\\u0F87\\u0F93\\u0F9D\\u0FA2\\u0FA7\\u0FAC\\u0FB9"
"\\u0FC6\\u1025\\u102E\\u1037\\u1039\\u103A\\u108D\\u1100-\\u1112"
"\\u1161-\\u1175\\u11A8-\\u11C2\\u135F\\u1714\\u1734\\u17D2"
"\\u17DD\\u18A9\\u1939-\\u193B\\u1A17\\u1A18\\u1B05\\u1B07\\u1B09"
"\\u1B0B\\u1B0D\\u1B11\\u1B34\\u1B35\\u1B3A\\u1B3C\\u1B3E\\u1B3F"
"\\u1B42\\u1B44\\u1B6B-\\u1B73\\u1BAA\\u1C37\\u1DC0-\\u1DE6"
"\\u1DFE-\\u1E03\\u1E0A-\\u1E0F\\u1E12-\\u1E1B\\u1E20-\\u1E27"
"\\u1E2A-\\u1E41\\u1E44-\\u1E53\\u1E58-\\u1E7D\\u1E80-\\u1E87"
"\\u1E8E-\\u1E91\\u1E96-\\u1E99\\u1EA0-\\u1EF3\\u1EF6-\\u1EF9"
"\\u1F00-\\u1F11\\u1F18\\u1F19\\u1F20-\\u1F31\\u1F38\\u1F39"
"\\u1F40\\u1F41\\u1F48\\u1F49\\u1F50\\u1F51\\u1F59\\u1F60-\\u1F71"
"\\u1F73-\\u1F75\\u1F77\\u1F79\\u1F7B-\\u1F7D\\u1F80\\u1F81"
"\\u1F88\\u1F89\\u1F90\\u1F91\\u1F98\\u1F99\\u1FA0\\u1FA1\\u1FA8"
"\\u1FA9\\u1FB3\\u1FB6\\u1FBB\\u1FBC\\u1FBE\\u1FBF\\u1FC3\\u1FC6"
"\\u1FC9\\u1FCB\\u1FCC\\u1FD3\\u1FDB\\u1FE3\\u1FEB\\u1FEE\\u1FEF"
"\\u1FF3\\u1FF6\\u1FF9\\u1FFB-\\u1FFE\\u2000\\u2001\\u20D0-"
"\\u20DC\\u20E1\\u20E5-\\u20F0\\u2126\\u212A\\u212B\\u2190\\u2192"
"\\u2194\\u21D0\\u21D2\\u21D4\\u2203\\u2208\\u220B\\u2223\\u2225"
"\\u223C\\u2243\\u2245\\u2248\\u224D\\u2261\\u2264\\u2265\\u2272"
"\\u2273\\u2276\\u2277\\u227A-\\u227D\\u2282\\u2283\\u2286\\u2287"
"\\u2291\\u2292\\u22A2\\u22A8\\u22A9\\u22AB\\u22B2-\\u22B5\\u2329"
"\\u232A\\u2ADC\\u2DE0-\\u2DFF\\u302A-\\u302F\\u3046\\u304B"
"\\u304D\\u304F\\u3051\\u3053\\u3055\\u3057\\u3059\\u305B\\u305D"
"\\u305F\\u3061\\u3064\\u3066\\u3068\\u306F\\u3072\\u3075\\u3078"
"\\u307B\\u3099\\u309A\\u309D\\u30A6\\u30AB\\u30AD\\u30AF\\u30B1"
"\\u30B3\\u30B5\\u30B7\\u30B9\\u30BB\\u30BD\\u30BF\\u30C1\\u30C4"
"\\u30C6\\u30C8\\u30CF\\u30D2\\u30D5\\u30D8\\u30DB\\u30EF-\\u30F2"
"\\u30FD\\uA66F\\uA67C\\uA67D\\uA806\\uA8C4\\uA92B-\\uA92D\\uA953"
"\\uAC00\\uAC1C\\uAC38\\uAC54\\uAC70\\uAC8C\\uACA8\\uACC4\\uACE0"
"\\uACFC\\uAD18\\uAD34\\uAD50\\uAD6C\\uAD88\\uADA4\\uADC0\\uADDC"
"\\uADF8\\uAE14\\uAE30\\uAE4C\\uAE68\\uAE84\\uAEA0\\uAEBC\\uAED8"
"\\uAEF4\\uAF10\\uAF2C\\uAF48\\uAF64\\uAF80\\uAF9C\\uAFB8\\uAFD4"
"\\uAFF0\\uB00C\\uB028\\uB044\\uB060\\uB07C\\uB098\\uB0B4\\uB0D0"
"\\uB0EC\\uB108\\uB124\\uB140\\uB15C\\uB178\\uB194\\uB1B0\\uB1CC"
"\\uB1E8\\uB204\\uB220\\uB23C\\uB258\\uB274\\uB290\\uB2AC\\uB2C8"
"\\uB2E4\\uB300\\uB31C\\uB338\\uB354\\uB370\\uB38C\\uB3A8\\uB3C4"
"\\uB3E0\\uB3FC\\uB418\\uB434\\uB450\\uB46C\\uB488\\uB4A4\\uB4C0"
"\\uB4DC\\uB4F8\\uB514\\uB530\\uB54C\\uB568\\uB584\\uB5A0\\uB5BC"
"\\uB5D8\\uB5F4\\uB610\\uB62C\\uB648\\uB664\\uB680\\uB69C\\uB6B8"
"\\uB6D4\\uB6F0\\uB70C\\uB728\\uB744\\uB760\\uB77C\\uB798\\uB7B4"
"\\uB7D0\\uB7EC\\uB808\\uB824\\uB840\\uB85C\\uB878\\uB894\\uB8B0"
"\\uB8CC\\uB8E8\\uB904\\uB920\\uB93C\\uB958\\uB974\\uB990\\uB9AC"
"\\uB9C8\\uB9E4\\uBA00\\uBA1C\\uBA38\\uBA54\\uBA70\\uBA8C\\uBAA8"
"\\uBAC4\\uBAE0\\uBAFC\\uBB18\\uBB34\\uBB50\\uBB6C\\uBB88\\uBBA4"
"\\uBBC0\\uBBDC\\uBBF8\\uBC14\\uBC30\\uBC4C\\uBC68\\uBC84\\uBCA0"
"\\uBCBC\\uBCD8\\uBCF4\\uBD10\\uBD2C\\uBD48\\uBD64\\uBD80\\uBD9C"
"\\uBDB8\\uBDD4\\uBDF0\\uBE0C\\uBE28\\uBE44\\uBE60\\uBE7C\\uBE98"
"\\uBEB4\\uBED0\\uBEEC\\uBF08\\uBF24\\uBF40\\uBF5C\\uBF78\\uBF94"
"\\uBFB0\\uBFCC\\uBFE8\\uC004\\uC020\\uC03C\\uC058\\uC074\\uC090"
"\\uC0AC\\uC0C8\\uC0E4\\uC100\\uC11C\\uC138\\uC154\\uC170\\uC18C"
"\\uC1A8\\uC1C4\\uC1E0\\uC1FC\\uC218\\uC234\\uC250\\uC26C\\uC288"
"\\uC2A4\\uC2C0\\uC2DC\\uC2F8\\uC314\\uC330\\uC34C\\uC368\\uC384"
"\\uC3A0\\uC3BC\\uC3D8\\uC3F4\\uC410\\uC42C\\uC448\\uC464\\uC480"
"\\uC49C\\uC4B8\\uC4D4\\uC4F0\\uC50C\\uC528\\uC544\\uC560\\uC57C"
"\\uC598\\uC5B4\\uC5D0\\uC5EC\\uC608\\uC624\\uC640\\uC65C\\uC678"
"\\uC694\\uC6B0\\uC6CC\\uC6E8\\uC704\\uC720\\uC73C\\uC758\\uC774"
"\\uC790\\uC7AC\\uC7C8\\uC7E4\\uC800\\uC81C\\uC838\\uC854\\uC870"
"\\uC88C\\uC8A8\\uC8C4\\uC8E0\\uC8FC\\uC918\\uC934\\uC950\\uC96C"
"\\uC988\\uC9A4\\uC9C0\\uC9DC\\uC9F8\\uCA14\\uCA30\\uCA4C\\uCA68"
"\\uCA84\\uCAA0\\uCABC\\uCAD8\\uCAF4\\uCB10\\uCB2C\\uCB48\\uCB64"
"\\uCB80\\uCB9C\\uCBB8\\uCBD4\\uCBF0\\uCC0C\\uCC28\\uCC44\\uCC60"
"\\uCC7C\\uCC98\\uCCB4\\uCCD0\\uCCEC\\uCD08\\uCD24\\uCD40\\uCD5C"
"\\uCD78\\uCD94\\uCDB0\\uCDCC\\uCDE8\\uCE04\\uCE20\\uCE3C\\uCE58"
"\\uCE74\\uCE90\\uCEAC\\uCEC8\\uCEE4\\uCF00\\uCF1C\\uCF38\\uCF54"
"\\uCF70\\uCF8C\\uCFA8\\uCFC4\\uCFE0\\uCFFC\\uD018\\uD034\\uD050"
"\\uD06C\\uD088\\uD0A4\\uD0C0\\uD0DC\\uD0F8\\uD114\\uD130\\uD14C"
"\\uD168\\uD184\\uD1A0\\uD1BC\\uD1D8\\uD1F4\\uD210\\uD22C\\uD248"
"\\uD264\\uD280\\uD29C\\uD2B8\\uD2D4\\uD2F0\\uD30C\\uD328\\uD344"
"\\uD360\\uD37C\\uD398\\uD3B4\\uD3D0\\uD3EC\\uD408\\uD424\\uD440"
"\\uD45C\\uD478\\uD494\\uD4B0\\uD4CC\\uD4E8\\uD504\\uD520\\uD53C"
"\\uD558\\uD574\\uD590\\uD5AC\\uD5C8\\uD5E4\\uD600\\uD61C\\uD638"
"\\uD654\\uD670\\uD68C\\uD6A8\\uD6C4\\uD6E0\\uD6FC\\uD718\\uD734"
"\\uD750\\uD76C\\uD788\\uF900-\\uFA0D\\uFA10\\uFA12\\uFA15-"
"\\uFA1E\\uFA20\\uFA22\\uFA25\\uFA26\\uFA2A-\\uFA2D\\uFA30-"
"\\uFA6A\\uFA70-\\uFAD9\\uFB1D-\\uFB1F\\uFB2A-\\uFB36\\uFB38-"
"\\uFB3C\\uFB3E\\uFB40\\uFB41\\uFB43\\uFB44\\uFB46-\\uFB4E\\uFE20"
"-\\uFE26\\U000101FD\\U00010A0D\\U00010A0F\\U00010A38-\\U00010A3A"
"\\U00010A3F\\U0001D15E-\\U0001D169\\U0001D16D-\\U0001D172\\U0001"
"D17B-\\U0001D182\\U0001D185-\\U0001D18B\\U0001D1AA-\\U0001D1AD"
"\\U0001D1BB-\\U0001D1C0\\U0001D242-\\U0001D244\\U0002F800-"
"\\U0002FA1D]"
"\\u17DD\\u18A9\\u1939-\\u193B\\u1A17\\u1A18\\u1A60\\u1A75-"
"\\u1A7C\\u1A7F\\u1B05\\u1B07\\u1B09\\u1B0B\\u1B0D\\u1B11\\u1B34"
"\\u1B35\\u1B3A\\u1B3C\\u1B3E\\u1B3F\\u1B42\\u1B44\\u1B6B-\\u1B73"
"\\u1BAA\\u1C37\\u1CD0-\\u1CD2\\u1CD4-\\u1CE0\\u1CE2-\\u1CE8"
"\\u1CED\\u1DC0-\\u1DE6\\u1DFD-\\u1E03\\u1E0A-\\u1E0F\\u1E12-"
"\\u1E1B\\u1E20-\\u1E27\\u1E2A-\\u1E41\\u1E44-\\u1E53\\u1E58-"
"\\u1E7D\\u1E80-\\u1E87\\u1E8E-\\u1E91\\u1E96-\\u1E99\\u1EA0-"
"\\u1EF3\\u1EF6-\\u1EF9\\u1F00-\\u1F11\\u1F18\\u1F19\\u1F20-"
"\\u1F31\\u1F38\\u1F39\\u1F40\\u1F41\\u1F48\\u1F49\\u1F50\\u1F51"
"\\u1F59\\u1F60-\\u1F71\\u1F73-\\u1F75\\u1F77\\u1F79\\u1F7B-"
"\\u1F7D\\u1F80\\u1F81\\u1F88\\u1F89\\u1F90\\u1F91\\u1F98\\u1F99"
"\\u1FA0\\u1FA1\\u1FA8\\u1FA9\\u1FB3\\u1FB6\\u1FBB\\u1FBC\\u1FBE"
"\\u1FBF\\u1FC3\\u1FC6\\u1FC9\\u1FCB\\u1FCC\\u1FD3\\u1FDB\\u1FE3"
"\\u1FEB\\u1FEE\\u1FEF\\u1FF3\\u1FF6\\u1FF9\\u1FFB-\\u1FFE\\u2000"
"\\u2001\\u20D0-\\u20DC\\u20E1\\u20E5-\\u20F0\\u2126\\u212A"
"\\u212B\\u2190\\u2192\\u2194\\u21D0\\u21D2\\u21D4\\u2203\\u2208"
"\\u220B\\u2223\\u2225\\u223C\\u2243\\u2245\\u2248\\u224D\\u2261"
"\\u2264\\u2265\\u2272\\u2273\\u2276\\u2277\\u227A-\\u227D\\u2282"
"\\u2283\\u2286\\u2287\\u2291\\u2292\\u22A2\\u22A8\\u22A9\\u22AB"
"\\u22B2-\\u22B5\\u2329\\u232A\\u2ADC\\u2CEF-\\u2CF1\\u2DE0-"
"\\u2DFF\\u302A-\\u302F\\u3046\\u304B\\u304D\\u304F\\u3051\\u3053"
"\\u3055\\u3057\\u3059\\u305B\\u305D\\u305F\\u3061\\u3064\\u3066"
"\\u3068\\u306F\\u3072\\u3075\\u3078\\u307B\\u3099\\u309A\\u309D"
"\\u30A6\\u30AB\\u30AD\\u30AF\\u30B1\\u30B3\\u30B5\\u30B7\\u30B9"
"\\u30BB\\u30BD\\u30BF\\u30C1\\u30C4\\u30C6\\u30C8\\u30CF\\u30D2"
"\\u30D5\\u30D8\\u30DB\\u30EF-\\u30F2\\u30FD\\uA66F\\uA67C\\uA67D"
"\\uA6F0\\uA6F1\\uA806\\uA8C4\\uA8E0-\\uA8F1\\uA92B-\\uA92D"
"\\uA953\\uA9B3\\uA9C0\\uAAB0\\uAAB2-\\uAAB4\\uAAB7\\uAAB8\\uAABE"
"\\uAABF\\uAAC1\\uABED\\uAC00\\uAC1C\\uAC38\\uAC54\\uAC70\\uAC8C"
"\\uACA8\\uACC4\\uACE0\\uACFC\\uAD18\\uAD34\\uAD50\\uAD6C\\uAD88"
"\\uADA4\\uADC0\\uADDC\\uADF8\\uAE14\\uAE30\\uAE4C\\uAE68\\uAE84"
"\\uAEA0\\uAEBC\\uAED8\\uAEF4\\uAF10\\uAF2C\\uAF48\\uAF64\\uAF80"
"\\uAF9C\\uAFB8\\uAFD4\\uAFF0\\uB00C\\uB028\\uB044\\uB060\\uB07C"
"\\uB098\\uB0B4\\uB0D0\\uB0EC\\uB108\\uB124\\uB140\\uB15C\\uB178"
"\\uB194\\uB1B0\\uB1CC\\uB1E8\\uB204\\uB220\\uB23C\\uB258\\uB274"
"\\uB290\\uB2AC\\uB2C8\\uB2E4\\uB300\\uB31C\\uB338\\uB354\\uB370"
"\\uB38C\\uB3A8\\uB3C4\\uB3E0\\uB3FC\\uB418\\uB434\\uB450\\uB46C"
"\\uB488\\uB4A4\\uB4C0\\uB4DC\\uB4F8\\uB514\\uB530\\uB54C\\uB568"
"\\uB584\\uB5A0\\uB5BC\\uB5D8\\uB5F4\\uB610\\uB62C\\uB648\\uB664"
"\\uB680\\uB69C\\uB6B8\\uB6D4\\uB6F0\\uB70C\\uB728\\uB744\\uB760"
"\\uB77C\\uB798\\uB7B4\\uB7D0\\uB7EC\\uB808\\uB824\\uB840\\uB85C"
"\\uB878\\uB894\\uB8B0\\uB8CC\\uB8E8\\uB904\\uB920\\uB93C\\uB958"
"\\uB974\\uB990\\uB9AC\\uB9C8\\uB9E4\\uBA00\\uBA1C\\uBA38\\uBA54"
"\\uBA70\\uBA8C\\uBAA8\\uBAC4\\uBAE0\\uBAFC\\uBB18\\uBB34\\uBB50"
"\\uBB6C\\uBB88\\uBBA4\\uBBC0\\uBBDC\\uBBF8\\uBC14\\uBC30\\uBC4C"
"\\uBC68\\uBC84\\uBCA0\\uBCBC\\uBCD8\\uBCF4\\uBD10\\uBD2C\\uBD48"
"\\uBD64\\uBD80\\uBD9C\\uBDB8\\uBDD4\\uBDF0\\uBE0C\\uBE28\\uBE44"
"\\uBE60\\uBE7C\\uBE98\\uBEB4\\uBED0\\uBEEC\\uBF08\\uBF24\\uBF40"
"\\uBF5C\\uBF78\\uBF94\\uBFB0\\uBFCC\\uBFE8\\uC004\\uC020\\uC03C"
"\\uC058\\uC074\\uC090\\uC0AC\\uC0C8\\uC0E4\\uC100\\uC11C\\uC138"
"\\uC154\\uC170\\uC18C\\uC1A8\\uC1C4\\uC1E0\\uC1FC\\uC218\\uC234"
"\\uC250\\uC26C\\uC288\\uC2A4\\uC2C0\\uC2DC\\uC2F8\\uC314\\uC330"
"\\uC34C\\uC368\\uC384\\uC3A0\\uC3BC\\uC3D8\\uC3F4\\uC410\\uC42C"
"\\uC448\\uC464\\uC480\\uC49C\\uC4B8\\uC4D4\\uC4F0\\uC50C\\uC528"
"\\uC544\\uC560\\uC57C\\uC598\\uC5B4\\uC5D0\\uC5EC\\uC608\\uC624"
"\\uC640\\uC65C\\uC678\\uC694\\uC6B0\\uC6CC\\uC6E8\\uC704\\uC720"
"\\uC73C\\uC758\\uC774\\uC790\\uC7AC\\uC7C8\\uC7E4\\uC800\\uC81C"
"\\uC838\\uC854\\uC870\\uC88C\\uC8A8\\uC8C4\\uC8E0\\uC8FC\\uC918"
"\\uC934\\uC950\\uC96C\\uC988\\uC9A4\\uC9C0\\uC9DC\\uC9F8\\uCA14"
"\\uCA30\\uCA4C\\uCA68\\uCA84\\uCAA0\\uCABC\\uCAD8\\uCAF4\\uCB10"
"\\uCB2C\\uCB48\\uCB64\\uCB80\\uCB9C\\uCBB8\\uCBD4\\uCBF0\\uCC0C"
"\\uCC28\\uCC44\\uCC60\\uCC7C\\uCC98\\uCCB4\\uCCD0\\uCCEC\\uCD08"
"\\uCD24\\uCD40\\uCD5C\\uCD78\\uCD94\\uCDB0\\uCDCC\\uCDE8\\uCE04"
"\\uCE20\\uCE3C\\uCE58\\uCE74\\uCE90\\uCEAC\\uCEC8\\uCEE4\\uCF00"
"\\uCF1C\\uCF38\\uCF54\\uCF70\\uCF8C\\uCFA8\\uCFC4\\uCFE0\\uCFFC"
"\\uD018\\uD034\\uD050\\uD06C\\uD088\\uD0A4\\uD0C0\\uD0DC\\uD0F8"
"\\uD114\\uD130\\uD14C\\uD168\\uD184\\uD1A0\\uD1BC\\uD1D8\\uD1F4"
"\\uD210\\uD22C\\uD248\\uD264\\uD280\\uD29C\\uD2B8\\uD2D4\\uD2F0"
"\\uD30C\\uD328\\uD344\\uD360\\uD37C\\uD398\\uD3B4\\uD3D0\\uD3EC"
"\\uD408\\uD424\\uD440\\uD45C\\uD478\\uD494\\uD4B0\\uD4CC\\uD4E8"
"\\uD504\\uD520\\uD53C\\uD558\\uD574\\uD590\\uD5AC\\uD5C8\\uD5E4"
"\\uD600\\uD61C\\uD638\\uD654\\uD670\\uD68C\\uD6A8\\uD6C4\\uD6E0"
"\\uD6FC\\uD718\\uD734\\uD750\\uD76C\\uD788\\uF900-\\uFA0D\\uFA10"
"\\uFA12\\uFA15-\\uFA1E\\uFA20\\uFA22\\uFA25\\uFA26\\uFA2A-"
"\\uFA2D\\uFA30-\\uFA6D\\uFA70-\\uFAD9\\uFB1D-\\uFB1F\\uFB2A-"
"\\uFB36\\uFB38-\\uFB3C\\uFB3E\\uFB40\\uFB41\\uFB43\\uFB44\\uFB46"
"-\\uFB4E\\uFE20-\\uFE26\\U000101FD\\U00010A0D\\U00010A0F\\U00010"
"A38-\\U00010A3A\\U00010A3F\\U00011099\\U0001109B\\U000110A5"
"\\U000110B9\\U000110BA\\U0001D15E-\\U0001D169\\U0001D16D-\\U0001"
"D172\\U0001D17B-\\U0001D182\\U0001D185-\\U0001D18B\\U0001D1AA-"
"\\U0001D1AD\\U0001D1BB-\\U0001D1C0\\U0001D242-\\U0001D244\\U0002"
"F800-\\U0002FA1D]"
, ""), errorCode);
skipSets[UNORM_NFKD].applyPattern(UnicodeString(
@ -1486,6 +1497,7 @@ initExpectedSkippables(UnicodeSet skipSets[UNORM_MODE_COUNT]) {
"-\\u061A\\u0622-\\u0626\\u064B-\\u065E\\u0670\\u0675-\\u0678"
"\\u06C0\\u06C2\\u06D3\\u06D6-\\u06DC\\u06DF-\\u06E4\\u06E7"
"\\u06E8\\u06EA-\\u06ED\\u0711\\u0730-\\u074A\\u07EB-\\u07F3"
"\\u0816-\\u0819\\u081B-\\u0823\\u0825-\\u0827\\u0829-\\u082D"
"\\u0929\\u0931\\u0934\\u093C\\u094D\\u0951-\\u0954\\u0958-"
"\\u095F\\u09BC\\u09CB-\\u09CD\\u09DC\\u09DD\\u09DF\\u0A33\\u0A36"
"\\u0A3C\\u0A4D\\u0A59-\\u0A5B\\u0A5E\\u0ABC\\u0ACD\\u0B3C\\u0B48"
@ -1497,58 +1509,66 @@ initExpectedSkippables(UnicodeSet skipSets[UNORM_MODE_COUNT]) {
"\\u0F43\\u0F4D\\u0F52\\u0F57\\u0F5C\\u0F69\\u0F71-\\u0F7D\\u0F80"
"-\\u0F84\\u0F86\\u0F87\\u0F93\\u0F9D\\u0FA2\\u0FA7\\u0FAC\\u0FB9"
"\\u0FC6\\u1026\\u1037\\u1039\\u103A\\u108D\\u10FC\\u135F\\u1714"
"\\u1734\\u17D2\\u17DD\\u18A9\\u1939-\\u193B\\u1A17\\u1A18\\u1B06"
"\\u1B08\\u1B0A\\u1B0C\\u1B0E\\u1B12\\u1B34\\u1B3B\\u1B3D\\u1B40"
"\\u1B41\\u1B43\\u1B44\\u1B6B-\\u1B73\\u1BAA\\u1C37\\u1D2C-"
"\\u1D2E\\u1D30-\\u1D3A\\u1D3C-\\u1D4D\\u1D4F-\\u1D6A\\u1D78"
"\\u1D9B-\\u1DE6\\u1DFE-\\u1E9B\\u1EA0-\\u1EF9\\u1F00-\\u1F15"
"\\u1F18-\\u1F1D\\u1F20-\\u1F45\\u1F48-\\u1F4D\\u1F50-\\u1F57"
"\\u1F59\\u1F5B\\u1F5D\\u1F5F-\\u1F7D\\u1F80-\\u1FB4\\u1FB6-"
"\\u1FC4\\u1FC6-\\u1FD3\\u1FD6-\\u1FDB\\u1FDD-\\u1FEF\\u1FF2-"
"\\u1FF4\\u1FF6-\\u1FFE\\u2000-\\u200A\\u2011\\u2017\\u2024-"
"\\u2026\\u202F\\u2033\\u2034\\u2036\\u2037\\u203C\\u203E\\u2047-"
"\\u2049\\u2057\\u205F\\u2070\\u2071\\u2074-\\u208E\\u2090-"
"\\u2094\\u20A8\\u20D0-\\u20DC\\u20E1\\u20E5-\\u20F0\\u2100-"
"\\u2103\\u2105-\\u2107\\u2109-\\u2113\\u2115\\u2116\\u2119-"
"\\u211D\\u2120-\\u2122\\u2124\\u2126\\u2128\\u212A-\\u212D"
"\\u212F-\\u2131\\u2133-\\u2139\\u213B-\\u2140\\u2145-\\u2149"
"\\u2153-\\u217F\\u219A\\u219B\\u21AE\\u21CD-\\u21CF\\u2204"
"\\u2209\\u220C\\u2224\\u2226\\u222C\\u222D\\u222F\\u2230\\u2241"
"\\u2244\\u2247\\u2249\\u2260\\u2262\\u226D-\\u2271\\u2274\\u2275"
"\\u2278\\u2279\\u2280\\u2281\\u2284\\u2285\\u2288\\u2289\\u22AC-"
"\\u22AF\\u22E0-\\u22E3\\u22EA-\\u22ED\\u2329\\u232A\\u2460-"
"\\u24EA\\u2A0C\\u2A74-\\u2A76\\u2ADC\\u2C7C\\u2C7D\\u2D6F\\u2DE0"
"-\\u2DFF\\u2E9F\\u2EF3\\u2F00-\\u2FD5\\u3000\\u302A-\\u302F"
"\\u3036\\u3038-\\u303A\\u304C\\u304E\\u3050\\u3052\\u3054\\u3056"
"\\u3058\\u305A\\u305C\\u305E\\u3060\\u3062\\u3065\\u3067\\u3069"
"\\u3070\\u3071\\u3073\\u3074\\u3076\\u3077\\u3079\\u307A\\u307C"
"\\u307D\\u3094\\u3099-\\u309C\\u309E\\u309F\\u30AC\\u30AE\\u30B0"
"\\u30B2\\u30B4\\u30B6\\u30B8\\u30BA\\u30BC\\u30BE\\u30C0\\u30C2"
"\\u30C5\\u30C7\\u30C9\\u30D0\\u30D1\\u30D3\\u30D4\\u30D6\\u30D7"
"\\u30D9\\u30DA\\u30DC\\u30DD\\u30F4\\u30F7-\\u30FA\\u30FE\\u30FF"
"\\u3131-\\u318E\\u3192-\\u319F\\u3200-\\u321E\\u3220-\\u3243"
"\\u3250-\\u327E\\u3280-\\u32FE\\u3300-\\u33FF\\uA66F\\uA67C"
"\\uA67D\\uA770\\uA806\\uA8C4\\uA92B-\\uA92D\\uA953\\uAC00-"
"\\uD7A3\\uF900-\\uFA0D\\uFA10\\uFA12\\uFA15-\\uFA1E\\uFA20"
"\\uFA22\\uFA25\\uFA26\\uFA2A-\\uFA2D\\uFA30-\\uFA6A\\uFA70-"
"\\uFAD9\\uFB00-\\uFB06\\uFB13-\\uFB17\\uFB1D-\\uFB36\\uFB38-"
"\\uFB3C\\uFB3E\\uFB40\\uFB41\\uFB43\\uFB44\\uFB46-\\uFBB1\\uFBD3"
"-\\uFD3D\\uFD50-\\uFD8F\\uFD92-\\uFDC7\\uFDF0-\\uFDFC\\uFE10-"
"\\uFE19\\uFE20-\\uFE26\\uFE30-\\uFE44\\uFE47-\\uFE52\\uFE54-"
"\\uFE66\\uFE68-\\uFE6B\\uFE70-\\uFE72\\uFE74\\uFE76-\\uFEFC"
"\\uFF01-\\uFFBE\\uFFC2-\\uFFC7\\uFFCA-\\uFFCF\\uFFD2-\\uFFD7"
"\\uFFDA-\\uFFDC\\uFFE0-\\uFFE6\\uFFE8-\\uFFEE\\U000101FD\\U00010"
"A0D\\U00010A0F\\U00010A38-\\U00010A3A\\U00010A3F\\U0001D15E-"
"\\U0001D169\\U0001D16D-\\U0001D172\\U0001D17B-\\U0001D182\\U0001"
"D185-\\U0001D18B\\U0001D1AA-\\U0001D1AD\\U0001D1BB-\\U0001D1C0"
"\\U0001D242-\\U0001D244\\U0001D400-\\U0001D454\\U0001D456-"
"\\U0001D49C\\U0001D49E\\U0001D49F\\U0001D4A2\\U0001D4A5\\U0001D4"
"A6\\U0001D4A9-\\U0001D4AC\\U0001D4AE-\\U0001D4B9\\U0001D4BB"
"\\U0001D4BD-\\U0001D4C3\\U0001D4C5-\\U0001D505\\U0001D507-"
"\\U0001D50A\\U0001D50D-\\U0001D514\\U0001D516-\\U0001D51C\\U0001"
"D51E-\\U0001D539\\U0001D53B-\\U0001D53E\\U0001D540-\\U0001D544"
"\\U0001D546\\U0001D54A-\\U0001D550\\U0001D552-\\U0001D6A5\\U0001"
"D6A8-\\U0001D7CB\\U0001D7CE-\\U0001D7FF\\U0002F800-\\U0002FA1D]"
"\\u1734\\u17D2\\u17DD\\u18A9\\u1939-\\u193B\\u1A17\\u1A18\\u1A60"
"\\u1A75-\\u1A7C\\u1A7F\\u1B06\\u1B08\\u1B0A\\u1B0C\\u1B0E\\u1B12"
"\\u1B34\\u1B3B\\u1B3D\\u1B40\\u1B41\\u1B43\\u1B44\\u1B6B-\\u1B73"
"\\u1BAA\\u1C37\\u1CD0-\\u1CD2\\u1CD4-\\u1CE0\\u1CE2-\\u1CE8"
"\\u1CED\\u1D2C-\\u1D2E\\u1D30-\\u1D3A\\u1D3C-\\u1D4D\\u1D4F-"
"\\u1D6A\\u1D78\\u1D9B-\\u1DE6\\u1DFD-\\u1E9B\\u1EA0-\\u1EF9"
"\\u1F00-\\u1F15\\u1F18-\\u1F1D\\u1F20-\\u1F45\\u1F48-\\u1F4D"
"\\u1F50-\\u1F57\\u1F59\\u1F5B\\u1F5D\\u1F5F-\\u1F7D\\u1F80-"
"\\u1FB4\\u1FB6-\\u1FC4\\u1FC6-\\u1FD3\\u1FD6-\\u1FDB\\u1FDD-"
"\\u1FEF\\u1FF2-\\u1FF4\\u1FF6-\\u1FFE\\u2000-\\u200A\\u2011"
"\\u2017\\u2024-\\u2026\\u202F\\u2033\\u2034\\u2036\\u2037\\u203C"
"\\u203E\\u2047-\\u2049\\u2057\\u205F\\u2070\\u2071\\u2074-"
"\\u208E\\u2090-\\u2094\\u20A8\\u20D0-\\u20DC\\u20E1\\u20E5-"
"\\u20F0\\u2100-\\u2103\\u2105-\\u2107\\u2109-\\u2113\\u2115"
"\\u2116\\u2119-\\u211D\\u2120-\\u2122\\u2124\\u2126\\u2128"
"\\u212A-\\u212D\\u212F-\\u2131\\u2133-\\u2139\\u213B-\\u2140"
"\\u2145-\\u2149\\u2150-\\u217F\\u2189\\u219A\\u219B\\u21AE"
"\\u21CD-\\u21CF\\u2204\\u2209\\u220C\\u2224\\u2226\\u222C\\u222D"
"\\u222F\\u2230\\u2241\\u2244\\u2247\\u2249\\u2260\\u2262\\u226D-"
"\\u2271\\u2274\\u2275\\u2278\\u2279\\u2280\\u2281\\u2284\\u2285"
"\\u2288\\u2289\\u22AC-\\u22AF\\u22E0-\\u22E3\\u22EA-\\u22ED"
"\\u2329\\u232A\\u2460-\\u24EA\\u2A0C\\u2A74-\\u2A76\\u2ADC"
"\\u2C7C\\u2C7D\\u2CEF-\\u2CF1\\u2D6F\\u2DE0-\\u2DFF\\u2E9F"
"\\u2EF3\\u2F00-\\u2FD5\\u3000\\u302A-\\u302F\\u3036\\u3038-"
"\\u303A\\u304C\\u304E\\u3050\\u3052\\u3054\\u3056\\u3058\\u305A"
"\\u305C\\u305E\\u3060\\u3062\\u3065\\u3067\\u3069\\u3070\\u3071"
"\\u3073\\u3074\\u3076\\u3077\\u3079\\u307A\\u307C\\u307D\\u3094"
"\\u3099-\\u309C\\u309E\\u309F\\u30AC\\u30AE\\u30B0\\u30B2\\u30B4"
"\\u30B6\\u30B8\\u30BA\\u30BC\\u30BE\\u30C0\\u30C2\\u30C5\\u30C7"
"\\u30C9\\u30D0\\u30D1\\u30D3\\u30D4\\u30D6\\u30D7\\u30D9\\u30DA"
"\\u30DC\\u30DD\\u30F4\\u30F7-\\u30FA\\u30FE\\u30FF\\u3131-"
"\\u318E\\u3192-\\u319F\\u3200-\\u321E\\u3220-\\u3247\\u3250-"
"\\u327E\\u3280-\\u32FE\\u3300-\\u33FF\\uA66F\\uA67C\\uA67D"
"\\uA6F0\\uA6F1\\uA770\\uA806\\uA8C4\\uA8E0-\\uA8F1\\uA92B-"
"\\uA92D\\uA953\\uA9B3\\uA9C0\\uAAB0\\uAAB2-\\uAAB4\\uAAB7\\uAAB8"
"\\uAABE\\uAABF\\uAAC1\\uABED\\uAC00-\\uD7A3\\uF900-\\uFA0D"
"\\uFA10\\uFA12\\uFA15-\\uFA1E\\uFA20\\uFA22\\uFA25\\uFA26\\uFA2A"
"-\\uFA2D\\uFA30-\\uFA6D\\uFA70-\\uFAD9\\uFB00-\\uFB06\\uFB13-"
"\\uFB17\\uFB1D-\\uFB36\\uFB38-\\uFB3C\\uFB3E\\uFB40\\uFB41"
"\\uFB43\\uFB44\\uFB46-\\uFBB1\\uFBD3-\\uFD3D\\uFD50-\\uFD8F"
"\\uFD92-\\uFDC7\\uFDF0-\\uFDFC\\uFE10-\\uFE19\\uFE20-\\uFE26"
"\\uFE30-\\uFE44\\uFE47-\\uFE52\\uFE54-\\uFE66\\uFE68-\\uFE6B"
"\\uFE70-\\uFE72\\uFE74\\uFE76-\\uFEFC\\uFF01-\\uFFBE\\uFFC2-"
"\\uFFC7\\uFFCA-\\uFFCF\\uFFD2-\\uFFD7\\uFFDA-\\uFFDC\\uFFE0-"
"\\uFFE6\\uFFE8-\\uFFEE\\U000101FD\\U00010A0D\\U00010A0F\\U00010A"
"38-\\U00010A3A\\U00010A3F\\U0001109A\\U0001109C\\U000110AB"
"\\U000110B9\\U000110BA\\U0001D15E-\\U0001D169\\U0001D16D-\\U0001"
"D172\\U0001D17B-\\U0001D182\\U0001D185-\\U0001D18B\\U0001D1AA-"
"\\U0001D1AD\\U0001D1BB-\\U0001D1C0\\U0001D242-\\U0001D244\\U0001"
"D400-\\U0001D454\\U0001D456-\\U0001D49C\\U0001D49E\\U0001D49F"
"\\U0001D4A2\\U0001D4A5\\U0001D4A6\\U0001D4A9-\\U0001D4AC\\U0001D"
"4AE-\\U0001D4B9\\U0001D4BB\\U0001D4BD-\\U0001D4C3\\U0001D4C5-"
"\\U0001D505\\U0001D507-\\U0001D50A\\U0001D50D-\\U0001D514\\U0001"
"D516-\\U0001D51C\\U0001D51E-\\U0001D539\\U0001D53B-\\U0001D53E"
"\\U0001D540-\\U0001D544\\U0001D546\\U0001D54A-\\U0001D550\\U0001"
"D552-\\U0001D6A5\\U0001D6A8-\\U0001D7CB\\U0001D7CE-\\U0001D7FF"
"\\U0001F100-\\U0001F10A\\U0001F110-\\U0001F12E\\U0001F131\\U0001"
"F13D\\U0001F13F\\U0001F142\\U0001F146\\U0001F14A-\\U0001F14E"
"\\U0001F190\\U0001F200\\U0001F210-\\U0001F231\\U0001F240-\\U0001"
"F248\\U0002F800-\\U0002FA1D]"
, ""), errorCode);
skipSets[UNORM_NFKC].applyPattern(UnicodeString(
@ -1574,7 +1594,8 @@ initExpectedSkippables(UnicodeSet skipSets[UNORM_MODE_COUNT]) {
"\\u05C2\\u05C4\\u05C5\\u05C7\\u0610-\\u061A\\u0622\\u0623\\u0627"
"\\u0648\\u064A-\\u065E\\u0670\\u0675-\\u0678\\u06C1\\u06D2"
"\\u06D5-\\u06DC\\u06DF-\\u06E4\\u06E7\\u06E8\\u06EA-\\u06ED"
"\\u0711\\u0730-\\u074A\\u07EB-\\u07F3\\u0928\\u0930\\u0933"
"\\u0711\\u0730-\\u074A\\u07EB-\\u07F3\\u0816-\\u0819\\u081B-"
"\\u0823\\u0825-\\u0827\\u0829-\\u082D\\u0928\\u0930\\u0933"
"\\u093C\\u094D\\u0951-\\u0954\\u0958-\\u095F\\u09BC\\u09BE"
"\\u09C7\\u09CD\\u09D7\\u09DC\\u09DD\\u09DF\\u0A33\\u0A36\\u0A3C"
"\\u0A4D\\u0A59-\\u0A5B\\u0A5E\\u0ABC\\u0ACD\\u0B3C\\u0B3E\\u0B47"
@ -1588,107 +1609,114 @@ initExpectedSkippables(UnicodeSet skipSets[UNORM_MODE_COUNT]) {
"\\u0F86\\u0F87\\u0F93\\u0F9D\\u0FA2\\u0FA7\\u0FAC\\u0FB9\\u0FC6"
"\\u1025\\u102E\\u1037\\u1039\\u103A\\u108D\\u10FC\\u1100-\\u1112"
"\\u1161-\\u1175\\u11A8-\\u11C2\\u135F\\u1714\\u1734\\u17D2"
"\\u17DD\\u18A9\\u1939-\\u193B\\u1A17\\u1A18\\u1B05\\u1B07\\u1B09"
"\\u1B0B\\u1B0D\\u1B11\\u1B34\\u1B35\\u1B3A\\u1B3C\\u1B3E\\u1B3F"
"\\u1B42\\u1B44\\u1B6B-\\u1B73\\u1BAA\\u1C37\\u1D2C-\\u1D2E"
"\\u1D30-\\u1D3A\\u1D3C-\\u1D4D\\u1D4F-\\u1D6A\\u1D78\\u1D9B-"
"\\u1DE6\\u1DFE-\\u1E03\\u1E0A-\\u1E0F\\u1E12-\\u1E1B\\u1E20-"
"\\u1E27\\u1E2A-\\u1E41\\u1E44-\\u1E53\\u1E58-\\u1E7D\\u1E80-"
"\\u1E87\\u1E8E-\\u1E91\\u1E96-\\u1E9B\\u1EA0-\\u1EF3\\u1EF6-"
"\\u1EF9\\u1F00-\\u1F11\\u1F18\\u1F19\\u1F20-\\u1F31\\u1F38"
"\\u1F39\\u1F40\\u1F41\\u1F48\\u1F49\\u1F50\\u1F51\\u1F59\\u1F60-"
"\\u1F71\\u1F73-\\u1F75\\u1F77\\u1F79\\u1F7B-\\u1F7D\\u1F80"
"\\u1F81\\u1F88\\u1F89\\u1F90\\u1F91\\u1F98\\u1F99\\u1FA0\\u1FA1"
"\\u1FA8\\u1FA9\\u1FB3\\u1FB6\\u1FBB-\\u1FC1\\u1FC3\\u1FC6\\u1FC9"
"\\u1FCB-\\u1FCF\\u1FD3\\u1FDB\\u1FDD-\\u1FDF\\u1FE3\\u1FEB"
"\\u1FED-\\u1FEF\\u1FF3\\u1FF6\\u1FF9\\u1FFB-\\u1FFE\\u2000-"
"\\u200A\\u2011\\u2017\\u2024-\\u2026\\u202F\\u2033\\u2034\\u2036"
"\\u2037\\u203C\\u203E\\u2047-\\u2049\\u2057\\u205F\\u2070\\u2071"
"\\u2074-\\u208E\\u2090-\\u2094\\u20A8\\u20D0-\\u20DC\\u20E1"
"\\u20E5-\\u20F0\\u2100-\\u2103\\u2105-\\u2107\\u2109-\\u2113"
"\\u2115\\u2116\\u2119-\\u211D\\u2120-\\u2122\\u2124\\u2126"
"\\u2128\\u212A-\\u212D\\u212F-\\u2131\\u2133-\\u2139\\u213B-"
"\\u2140\\u2145-\\u2149\\u2153-\\u217F\\u2190\\u2192\\u2194"
"\\u21D0\\u21D2\\u21D4\\u2203\\u2208\\u220B\\u2223\\u2225\\u222C"
"\\u222D\\u222F\\u2230\\u223C\\u2243\\u2245\\u2248\\u224D\\u2261"
"\\u2264\\u2265\\u2272\\u2273\\u2276\\u2277\\u227A-\\u227D\\u2282"
"\\u2283\\u2286\\u2287\\u2291\\u2292\\u22A2\\u22A8\\u22A9\\u22AB"
"\\u22B2-\\u22B5\\u2329\\u232A\\u2460-\\u24EA\\u2A0C\\u2A74-"
"\\u2A76\\u2ADC\\u2C7C\\u2C7D\\u2D6F\\u2DE0-\\u2DFF\\u2E9F\\u2EF3"
"\\u2F00-\\u2FD5\\u3000\\u302A-\\u302F\\u3036\\u3038-\\u303A"
"\\u3046\\u304B\\u304D\\u304F\\u3051\\u3053\\u3055\\u3057\\u3059"
"\\u305B\\u305D\\u305F\\u3061\\u3064\\u3066\\u3068\\u306F\\u3072"
"\\u3075\\u3078\\u307B\\u3099-\\u309D\\u309F\\u30A6\\u30AB\\u30AD"
"\\u30AF\\u30B1\\u30B3\\u30B5\\u30B7\\u30B9\\u30BB\\u30BD\\u30BF"
"\\u30C1\\u30C4\\u30C6\\u30C8\\u30CF\\u30D2\\u30D5\\u30D8\\u30DB"
"\\u30EF-\\u30F2\\u30FD\\u30FF\\u3131-\\u318E\\u3192-\\u319F"
"\\u3200-\\u321E\\u3220-\\u3243\\u3250-\\u327E\\u3280-\\u32FE"
"\\u3300-\\u33FF\\uA66F\\uA67C\\uA67D\\uA770\\uA806\\uA8C4\\uA92B"
"-\\uA92D\\uA953\\uAC00\\uAC1C\\uAC38\\uAC54\\uAC70\\uAC8C\\uACA8"
"\\uACC4\\uACE0\\uACFC\\uAD18\\uAD34\\uAD50\\uAD6C\\uAD88\\uADA4"
"\\uADC0\\uADDC\\uADF8\\uAE14\\uAE30\\uAE4C\\uAE68\\uAE84\\uAEA0"
"\\uAEBC\\uAED8\\uAEF4\\uAF10\\uAF2C\\uAF48\\uAF64\\uAF80\\uAF9C"
"\\uAFB8\\uAFD4\\uAFF0\\uB00C\\uB028\\uB044\\uB060\\uB07C\\uB098"
"\\uB0B4\\uB0D0\\uB0EC\\uB108\\uB124\\uB140\\uB15C\\uB178\\uB194"
"\\uB1B0\\uB1CC\\uB1E8\\uB204\\uB220\\uB23C\\uB258\\uB274\\uB290"
"\\uB2AC\\uB2C8\\uB2E4\\uB300\\uB31C\\uB338\\uB354\\uB370\\uB38C"
"\\uB3A8\\uB3C4\\uB3E0\\uB3FC\\uB418\\uB434\\uB450\\uB46C\\uB488"
"\\uB4A4\\uB4C0\\uB4DC\\uB4F8\\uB514\\uB530\\uB54C\\uB568\\uB584"
"\\uB5A0\\uB5BC\\uB5D8\\uB5F4\\uB610\\uB62C\\uB648\\uB664\\uB680"
"\\uB69C\\uB6B8\\uB6D4\\uB6F0\\uB70C\\uB728\\uB744\\uB760\\uB77C"
"\\uB798\\uB7B4\\uB7D0\\uB7EC\\uB808\\uB824\\uB840\\uB85C\\uB878"
"\\uB894\\uB8B0\\uB8CC\\uB8E8\\uB904\\uB920\\uB93C\\uB958\\uB974"
"\\uB990\\uB9AC\\uB9C8\\uB9E4\\uBA00\\uBA1C\\uBA38\\uBA54\\uBA70"
"\\uBA8C\\uBAA8\\uBAC4\\uBAE0\\uBAFC\\uBB18\\uBB34\\uBB50\\uBB6C"
"\\uBB88\\uBBA4\\uBBC0\\uBBDC\\uBBF8\\uBC14\\uBC30\\uBC4C\\uBC68"
"\\uBC84\\uBCA0\\uBCBC\\uBCD8\\uBCF4\\uBD10\\uBD2C\\uBD48\\uBD64"
"\\uBD80\\uBD9C\\uBDB8\\uBDD4\\uBDF0\\uBE0C\\uBE28\\uBE44\\uBE60"
"\\uBE7C\\uBE98\\uBEB4\\uBED0\\uBEEC\\uBF08\\uBF24\\uBF40\\uBF5C"
"\\uBF78\\uBF94\\uBFB0\\uBFCC\\uBFE8\\uC004\\uC020\\uC03C\\uC058"
"\\uC074\\uC090\\uC0AC\\uC0C8\\uC0E4\\uC100\\uC11C\\uC138\\uC154"
"\\uC170\\uC18C\\uC1A8\\uC1C4\\uC1E0\\uC1FC\\uC218\\uC234\\uC250"
"\\uC26C\\uC288\\uC2A4\\uC2C0\\uC2DC\\uC2F8\\uC314\\uC330\\uC34C"
"\\uC368\\uC384\\uC3A0\\uC3BC\\uC3D8\\uC3F4\\uC410\\uC42C\\uC448"
"\\uC464\\uC480\\uC49C\\uC4B8\\uC4D4\\uC4F0\\uC50C\\uC528\\uC544"
"\\uC560\\uC57C\\uC598\\uC5B4\\uC5D0\\uC5EC\\uC608\\uC624\\uC640"
"\\uC65C\\uC678\\uC694\\uC6B0\\uC6CC\\uC6E8\\uC704\\uC720\\uC73C"
"\\uC758\\uC774\\uC790\\uC7AC\\uC7C8\\uC7E4\\uC800\\uC81C\\uC838"
"\\uC854\\uC870\\uC88C\\uC8A8\\uC8C4\\uC8E0\\uC8FC\\uC918\\uC934"
"\\uC950\\uC96C\\uC988\\uC9A4\\uC9C0\\uC9DC\\uC9F8\\uCA14\\uCA30"
"\\uCA4C\\uCA68\\uCA84\\uCAA0\\uCABC\\uCAD8\\uCAF4\\uCB10\\uCB2C"
"\\uCB48\\uCB64\\uCB80\\uCB9C\\uCBB8\\uCBD4\\uCBF0\\uCC0C\\uCC28"
"\\uCC44\\uCC60\\uCC7C\\uCC98\\uCCB4\\uCCD0\\uCCEC\\uCD08\\uCD24"
"\\uCD40\\uCD5C\\uCD78\\uCD94\\uCDB0\\uCDCC\\uCDE8\\uCE04\\uCE20"
"\\uCE3C\\uCE58\\uCE74\\uCE90\\uCEAC\\uCEC8\\uCEE4\\uCF00\\uCF1C"
"\\uCF38\\uCF54\\uCF70\\uCF8C\\uCFA8\\uCFC4\\uCFE0\\uCFFC\\uD018"
"\\uD034\\uD050\\uD06C\\uD088\\uD0A4\\uD0C0\\uD0DC\\uD0F8\\uD114"
"\\uD130\\uD14C\\uD168\\uD184\\uD1A0\\uD1BC\\uD1D8\\uD1F4\\uD210"
"\\uD22C\\uD248\\uD264\\uD280\\uD29C\\uD2B8\\uD2D4\\uD2F0\\uD30C"
"\\uD328\\uD344\\uD360\\uD37C\\uD398\\uD3B4\\uD3D0\\uD3EC\\uD408"
"\\uD424\\uD440\\uD45C\\uD478\\uD494\\uD4B0\\uD4CC\\uD4E8\\uD504"
"\\uD520\\uD53C\\uD558\\uD574\\uD590\\uD5AC\\uD5C8\\uD5E4\\uD600"
"\\uD61C\\uD638\\uD654\\uD670\\uD68C\\uD6A8\\uD6C4\\uD6E0\\uD6FC"
"\\uD718\\uD734\\uD750\\uD76C\\uD788\\uF900-\\uFA0D\\uFA10\\uFA12"
"\\uFA15-\\uFA1E\\uFA20\\uFA22\\uFA25\\uFA26\\uFA2A-\\uFA2D"
"\\uFA30-\\uFA6A\\uFA70-\\uFAD9\\uFB00-\\uFB06\\uFB13-\\uFB17"
"\\uFB1D-\\uFB36\\uFB38-\\uFB3C\\uFB3E\\uFB40\\uFB41\\uFB43"
"\\uFB44\\uFB46-\\uFBB1\\uFBD3-\\uFD3D\\uFD50-\\uFD8F\\uFD92-"
"\\uFDC7\\uFDF0-\\uFDFC\\uFE10-\\uFE19\\uFE20-\\uFE26\\uFE30-"
"\\uFE44\\uFE47-\\uFE52\\uFE54-\\uFE66\\uFE68-\\uFE6B\\uFE70-"
"\\uFE72\\uFE74\\uFE76-\\uFEFC\\uFF01-\\uFFBE\\uFFC2-\\uFFC7"
"\\uFFCA-\\uFFCF\\uFFD2-\\uFFD7\\uFFDA-\\uFFDC\\uFFE0-\\uFFE6"
"\\uFFE8-\\uFFEE\\U000101FD\\U00010A0D\\U00010A0F\\U00010A38-"
"\\U00010A3A\\U00010A3F\\U0001D15E-\\U0001D169\\U0001D16D-\\U0001"
"D172\\U0001D17B-\\U0001D182\\U0001D185-\\U0001D18B\\U0001D1AA-"
"\\U0001D1AD\\U0001D1BB-\\U0001D1C0\\U0001D242-\\U0001D244\\U0001"
"D400-\\U0001D454\\U0001D456-\\U0001D49C\\U0001D49E\\U0001D49F"
"\\U0001D4A2\\U0001D4A5\\U0001D4A6\\U0001D4A9-\\U0001D4AC\\U0001D"
"4AE-\\U0001D4B9\\U0001D4BB\\U0001D4BD-\\U0001D4C3\\U0001D4C5-"
"\\U0001D505\\U0001D507-\\U0001D50A\\U0001D50D-\\U0001D514\\U0001"
"D516-\\U0001D51C\\U0001D51E-\\U0001D539\\U0001D53B-\\U0001D53E"
"\\U0001D540-\\U0001D544\\U0001D546\\U0001D54A-\\U0001D550\\U0001"
"D552-\\U0001D6A5\\U0001D6A8-\\U0001D7CB\\U0001D7CE-\\U0001D7FF"
"\\U0002F800-\\U0002FA1D]"
"\\u17DD\\u18A9\\u1939-\\u193B\\u1A17\\u1A18\\u1A60\\u1A75-"
"\\u1A7C\\u1A7F\\u1B05\\u1B07\\u1B09\\u1B0B\\u1B0D\\u1B11\\u1B34"
"\\u1B35\\u1B3A\\u1B3C\\u1B3E\\u1B3F\\u1B42\\u1B44\\u1B6B-\\u1B73"
"\\u1BAA\\u1C37\\u1CD0-\\u1CD2\\u1CD4-\\u1CE0\\u1CE2-\\u1CE8"
"\\u1CED\\u1D2C-\\u1D2E\\u1D30-\\u1D3A\\u1D3C-\\u1D4D\\u1D4F-"
"\\u1D6A\\u1D78\\u1D9B-\\u1DE6\\u1DFD-\\u1E03\\u1E0A-\\u1E0F"
"\\u1E12-\\u1E1B\\u1E20-\\u1E27\\u1E2A-\\u1E41\\u1E44-\\u1E53"
"\\u1E58-\\u1E7D\\u1E80-\\u1E87\\u1E8E-\\u1E91\\u1E96-\\u1E9B"
"\\u1EA0-\\u1EF3\\u1EF6-\\u1EF9\\u1F00-\\u1F11\\u1F18\\u1F19"
"\\u1F20-\\u1F31\\u1F38\\u1F39\\u1F40\\u1F41\\u1F48\\u1F49\\u1F50"
"\\u1F51\\u1F59\\u1F60-\\u1F71\\u1F73-\\u1F75\\u1F77\\u1F79"
"\\u1F7B-\\u1F7D\\u1F80\\u1F81\\u1F88\\u1F89\\u1F90\\u1F91\\u1F98"
"\\u1F99\\u1FA0\\u1FA1\\u1FA8\\u1FA9\\u1FB3\\u1FB6\\u1FBB-\\u1FC1"
"\\u1FC3\\u1FC6\\u1FC9\\u1FCB-\\u1FCF\\u1FD3\\u1FDB\\u1FDD-"
"\\u1FDF\\u1FE3\\u1FEB\\u1FED-\\u1FEF\\u1FF3\\u1FF6\\u1FF9\\u1FFB"
"-\\u1FFE\\u2000-\\u200A\\u2011\\u2017\\u2024-\\u2026\\u202F"
"\\u2033\\u2034\\u2036\\u2037\\u203C\\u203E\\u2047-\\u2049\\u2057"
"\\u205F\\u2070\\u2071\\u2074-\\u208E\\u2090-\\u2094\\u20A8"
"\\u20D0-\\u20DC\\u20E1\\u20E5-\\u20F0\\u2100-\\u2103\\u2105-"
"\\u2107\\u2109-\\u2113\\u2115\\u2116\\u2119-\\u211D\\u2120-"
"\\u2122\\u2124\\u2126\\u2128\\u212A-\\u212D\\u212F-\\u2131"
"\\u2133-\\u2139\\u213B-\\u2140\\u2145-\\u2149\\u2150-\\u217F"
"\\u2189\\u2190\\u2192\\u2194\\u21D0\\u21D2\\u21D4\\u2203\\u2208"
"\\u220B\\u2223\\u2225\\u222C\\u222D\\u222F\\u2230\\u223C\\u2243"
"\\u2245\\u2248\\u224D\\u2261\\u2264\\u2265\\u2272\\u2273\\u2276"
"\\u2277\\u227A-\\u227D\\u2282\\u2283\\u2286\\u2287\\u2291\\u2292"
"\\u22A2\\u22A8\\u22A9\\u22AB\\u22B2-\\u22B5\\u2329\\u232A\\u2460"
"-\\u24EA\\u2A0C\\u2A74-\\u2A76\\u2ADC\\u2C7C\\u2C7D\\u2CEF-"
"\\u2CF1\\u2D6F\\u2DE0-\\u2DFF\\u2E9F\\u2EF3\\u2F00-\\u2FD5"
"\\u3000\\u302A-\\u302F\\u3036\\u3038-\\u303A\\u3046\\u304B"
"\\u304D\\u304F\\u3051\\u3053\\u3055\\u3057\\u3059\\u305B\\u305D"
"\\u305F\\u3061\\u3064\\u3066\\u3068\\u306F\\u3072\\u3075\\u3078"
"\\u307B\\u3099-\\u309D\\u309F\\u30A6\\u30AB\\u30AD\\u30AF\\u30B1"
"\\u30B3\\u30B5\\u30B7\\u30B9\\u30BB\\u30BD\\u30BF\\u30C1\\u30C4"
"\\u30C6\\u30C8\\u30CF\\u30D2\\u30D5\\u30D8\\u30DB\\u30EF-\\u30F2"
"\\u30FD\\u30FF\\u3131-\\u318E\\u3192-\\u319F\\u3200-\\u321E"
"\\u3220-\\u3247\\u3250-\\u327E\\u3280-\\u32FE\\u3300-\\u33FF"
"\\uA66F\\uA67C\\uA67D\\uA6F0\\uA6F1\\uA770\\uA806\\uA8C4\\uA8E0-"
"\\uA8F1\\uA92B-\\uA92D\\uA953\\uA9B3\\uA9C0\\uAAB0\\uAAB2-"
"\\uAAB4\\uAAB7\\uAAB8\\uAABE\\uAABF\\uAAC1\\uABED\\uAC00\\uAC1C"
"\\uAC38\\uAC54\\uAC70\\uAC8C\\uACA8\\uACC4\\uACE0\\uACFC\\uAD18"
"\\uAD34\\uAD50\\uAD6C\\uAD88\\uADA4\\uADC0\\uADDC\\uADF8\\uAE14"
"\\uAE30\\uAE4C\\uAE68\\uAE84\\uAEA0\\uAEBC\\uAED8\\uAEF4\\uAF10"
"\\uAF2C\\uAF48\\uAF64\\uAF80\\uAF9C\\uAFB8\\uAFD4\\uAFF0\\uB00C"
"\\uB028\\uB044\\uB060\\uB07C\\uB098\\uB0B4\\uB0D0\\uB0EC\\uB108"
"\\uB124\\uB140\\uB15C\\uB178\\uB194\\uB1B0\\uB1CC\\uB1E8\\uB204"
"\\uB220\\uB23C\\uB258\\uB274\\uB290\\uB2AC\\uB2C8\\uB2E4\\uB300"
"\\uB31C\\uB338\\uB354\\uB370\\uB38C\\uB3A8\\uB3C4\\uB3E0\\uB3FC"
"\\uB418\\uB434\\uB450\\uB46C\\uB488\\uB4A4\\uB4C0\\uB4DC\\uB4F8"
"\\uB514\\uB530\\uB54C\\uB568\\uB584\\uB5A0\\uB5BC\\uB5D8\\uB5F4"
"\\uB610\\uB62C\\uB648\\uB664\\uB680\\uB69C\\uB6B8\\uB6D4\\uB6F0"
"\\uB70C\\uB728\\uB744\\uB760\\uB77C\\uB798\\uB7B4\\uB7D0\\uB7EC"
"\\uB808\\uB824\\uB840\\uB85C\\uB878\\uB894\\uB8B0\\uB8CC\\uB8E8"
"\\uB904\\uB920\\uB93C\\uB958\\uB974\\uB990\\uB9AC\\uB9C8\\uB9E4"
"\\uBA00\\uBA1C\\uBA38\\uBA54\\uBA70\\uBA8C\\uBAA8\\uBAC4\\uBAE0"
"\\uBAFC\\uBB18\\uBB34\\uBB50\\uBB6C\\uBB88\\uBBA4\\uBBC0\\uBBDC"
"\\uBBF8\\uBC14\\uBC30\\uBC4C\\uBC68\\uBC84\\uBCA0\\uBCBC\\uBCD8"
"\\uBCF4\\uBD10\\uBD2C\\uBD48\\uBD64\\uBD80\\uBD9C\\uBDB8\\uBDD4"
"\\uBDF0\\uBE0C\\uBE28\\uBE44\\uBE60\\uBE7C\\uBE98\\uBEB4\\uBED0"
"\\uBEEC\\uBF08\\uBF24\\uBF40\\uBF5C\\uBF78\\uBF94\\uBFB0\\uBFCC"
"\\uBFE8\\uC004\\uC020\\uC03C\\uC058\\uC074\\uC090\\uC0AC\\uC0C8"
"\\uC0E4\\uC100\\uC11C\\uC138\\uC154\\uC170\\uC18C\\uC1A8\\uC1C4"
"\\uC1E0\\uC1FC\\uC218\\uC234\\uC250\\uC26C\\uC288\\uC2A4\\uC2C0"
"\\uC2DC\\uC2F8\\uC314\\uC330\\uC34C\\uC368\\uC384\\uC3A0\\uC3BC"
"\\uC3D8\\uC3F4\\uC410\\uC42C\\uC448\\uC464\\uC480\\uC49C\\uC4B8"
"\\uC4D4\\uC4F0\\uC50C\\uC528\\uC544\\uC560\\uC57C\\uC598\\uC5B4"
"\\uC5D0\\uC5EC\\uC608\\uC624\\uC640\\uC65C\\uC678\\uC694\\uC6B0"
"\\uC6CC\\uC6E8\\uC704\\uC720\\uC73C\\uC758\\uC774\\uC790\\uC7AC"
"\\uC7C8\\uC7E4\\uC800\\uC81C\\uC838\\uC854\\uC870\\uC88C\\uC8A8"
"\\uC8C4\\uC8E0\\uC8FC\\uC918\\uC934\\uC950\\uC96C\\uC988\\uC9A4"
"\\uC9C0\\uC9DC\\uC9F8\\uCA14\\uCA30\\uCA4C\\uCA68\\uCA84\\uCAA0"
"\\uCABC\\uCAD8\\uCAF4\\uCB10\\uCB2C\\uCB48\\uCB64\\uCB80\\uCB9C"
"\\uCBB8\\uCBD4\\uCBF0\\uCC0C\\uCC28\\uCC44\\uCC60\\uCC7C\\uCC98"
"\\uCCB4\\uCCD0\\uCCEC\\uCD08\\uCD24\\uCD40\\uCD5C\\uCD78\\uCD94"
"\\uCDB0\\uCDCC\\uCDE8\\uCE04\\uCE20\\uCE3C\\uCE58\\uCE74\\uCE90"
"\\uCEAC\\uCEC8\\uCEE4\\uCF00\\uCF1C\\uCF38\\uCF54\\uCF70\\uCF8C"
"\\uCFA8\\uCFC4\\uCFE0\\uCFFC\\uD018\\uD034\\uD050\\uD06C\\uD088"
"\\uD0A4\\uD0C0\\uD0DC\\uD0F8\\uD114\\uD130\\uD14C\\uD168\\uD184"
"\\uD1A0\\uD1BC\\uD1D8\\uD1F4\\uD210\\uD22C\\uD248\\uD264\\uD280"
"\\uD29C\\uD2B8\\uD2D4\\uD2F0\\uD30C\\uD328\\uD344\\uD360\\uD37C"
"\\uD398\\uD3B4\\uD3D0\\uD3EC\\uD408\\uD424\\uD440\\uD45C\\uD478"
"\\uD494\\uD4B0\\uD4CC\\uD4E8\\uD504\\uD520\\uD53C\\uD558\\uD574"
"\\uD590\\uD5AC\\uD5C8\\uD5E4\\uD600\\uD61C\\uD638\\uD654\\uD670"
"\\uD68C\\uD6A8\\uD6C4\\uD6E0\\uD6FC\\uD718\\uD734\\uD750\\uD76C"
"\\uD788\\uF900-\\uFA0D\\uFA10\\uFA12\\uFA15-\\uFA1E\\uFA20"
"\\uFA22\\uFA25\\uFA26\\uFA2A-\\uFA2D\\uFA30-\\uFA6D\\uFA70-"
"\\uFAD9\\uFB00-\\uFB06\\uFB13-\\uFB17\\uFB1D-\\uFB36\\uFB38-"
"\\uFB3C\\uFB3E\\uFB40\\uFB41\\uFB43\\uFB44\\uFB46-\\uFBB1\\uFBD3"
"-\\uFD3D\\uFD50-\\uFD8F\\uFD92-\\uFDC7\\uFDF0-\\uFDFC\\uFE10-"
"\\uFE19\\uFE20-\\uFE26\\uFE30-\\uFE44\\uFE47-\\uFE52\\uFE54-"
"\\uFE66\\uFE68-\\uFE6B\\uFE70-\\uFE72\\uFE74\\uFE76-\\uFEFC"
"\\uFF01-\\uFFBE\\uFFC2-\\uFFC7\\uFFCA-\\uFFCF\\uFFD2-\\uFFD7"
"\\uFFDA-\\uFFDC\\uFFE0-\\uFFE6\\uFFE8-\\uFFEE\\U000101FD\\U00010"
"A0D\\U00010A0F\\U00010A38-\\U00010A3A\\U00010A3F\\U00011099"
"\\U0001109B\\U000110A5\\U000110B9\\U000110BA\\U0001D15E-\\U0001D"
"169\\U0001D16D-\\U0001D172\\U0001D17B-\\U0001D182\\U0001D185-"
"\\U0001D18B\\U0001D1AA-\\U0001D1AD\\U0001D1BB-\\U0001D1C0\\U0001"
"D242-\\U0001D244\\U0001D400-\\U0001D454\\U0001D456-\\U0001D49C"
"\\U0001D49E\\U0001D49F\\U0001D4A2\\U0001D4A5\\U0001D4A6\\U0001D4"
"A9-\\U0001D4AC\\U0001D4AE-\\U0001D4B9\\U0001D4BB\\U0001D4BD-"
"\\U0001D4C3\\U0001D4C5-\\U0001D505\\U0001D507-\\U0001D50A\\U0001"
"D50D-\\U0001D514\\U0001D516-\\U0001D51C\\U0001D51E-\\U0001D539"
"\\U0001D53B-\\U0001D53E\\U0001D540-\\U0001D544\\U0001D546\\U0001"
"D54A-\\U0001D550\\U0001D552-\\U0001D6A5\\U0001D6A8-\\U0001D7CB"
"\\U0001D7CE-\\U0001D7FF\\U0001F100-\\U0001F10A\\U0001F110-"
"\\U0001F12E\\U0001F131\\U0001F13D\\U0001F13F\\U0001F142\\U0001F1"
"46\\U0001F14A-\\U0001F14E\\U0001F190\\U0001F200\\U0001F210-"
"\\U0001F231\\U0001F240-\\U0001F248\\U0002F800-\\U0002FA1D]"
, ""), errorCode);
}

View file

@ -158,10 +158,14 @@ void UCAConformanceTest::testConformance(UCollator *coll)
int32_t line = 0;
UChar b1[1024], b2[1024];
char lineB[1024];
UChar *buffer = b1, *oldB = NULL;
char lineB1[1024], lineB2[1024];
char *lineB = lineB1, *oldLineB = lineB2;
uint8_t sk1[1024], sk2[1024];
uint8_t *oldSk = NULL, *newSk = sk1;
int32_t resLen = 0, oldLen = 0;
int32_t buflen = 0, oldBlen = 0;
uint32_t first = 0;
@ -170,6 +174,8 @@ void UCAConformanceTest::testConformance(UCollator *coll)
while (fgets(lineB, 1024, testFile) != NULL) {
// remove trailing whitespace
u_rtrim(lineB);
offset = 0;
line++;
@ -177,6 +183,11 @@ void UCAConformanceTest::testConformance(UCollator *coll)
continue;
}
offset = u_parseString(lineB, buffer, 1024, &first, &status);
if(U_FAILURE(status)) {
errln("Error parsing line %ld (%s): %s\n",
(long)line, u_errorName(status), lineB);
status = U_ZERO_ERROR;
}
buflen = offset;
buffer[offset++] = 0;
@ -195,34 +206,46 @@ void UCAConformanceTest::testConformance(UCollator *coll)
if(((res&0x80000000) != (cmpres&0x80000000)) || (res == 0 && cmpres != 0) || (res != 0 && cmpres == 0)) {
errln("Difference between ucol_strcoll and sortkey compare on line %i", line);
logln("Data line %s", lineB);
errln(" Previous data line %s", oldLineB);
errln(" Current data line %s", lineB);
}
if(res > 0) {
errln("Line %i is not greater or equal than previous line", line);
logln("Data line %s", lineB);
errln(" Previous data line %s", oldLineB);
errln(" Current data line %s", lineB);
prettify(CollationKey(oldSk, oldLen), oldS);
prettify(CollationKey(newSk, resLen), newS);
logln("Keys: "+oldS+" and "+newS);
errln(" Previous key: "+oldS);
errln(" Current key: "+newS);
} else if(res == 0) { /* equal */
res = u_strcmpCodePointOrder(oldB, buffer);
if (res == 0) {
errln("Probable error in test file on line %i (comparing identical strings)", line);
logln("Data line %s", lineB);
errln(" Data line %s", lineB);
} else if (res > 0) {
errln("Sortkeys are identical, but code point comapare gives >0 on line %i", line);
logln("Data line %s", lineB);
errln(" Previous data line %s", oldLineB);
errln(" Current data line %s", lineB);
}
}
}
oldSk = newSk;
oldLen = resLen;
newSk = (newSk == sk1)?sk2:sk1;
// swap buffers
oldLineB = lineB;
oldB = buffer;
oldSk = newSk;
if(lineB == lineB1) {
lineB = lineB2;
buffer = b2;
newSk = sk2;
} else {
lineB = lineB1;
buffer = b1;
newSk = sk1;
}
oldLen = resLen;
oldBlen = buflen;
buffer = (buffer == b1)?b2:b1;
}
}

View file

@ -9,6 +9,7 @@
#include "unicode/uniset.h"
#include "unicode/putil.h"
#include "cstring.h"
#include "hash.h"
#include "uparse.h"
#include "ucdtest.h"
@ -16,10 +17,17 @@
UnicodeTest::UnicodeTest()
{
UErrorCode errorCode=U_ZERO_ERROR;
unknownPropertyNames=new U_NAMESPACE_QUALIFIER Hashtable(errorCode);
if(U_FAILURE(errorCode)) {
delete unknownPropertyNames;
unknownPropertyNames=NULL;
}
}
UnicodeTest::~UnicodeTest()
{
delete unknownPropertyNames;
}
void UnicodeTest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par*/ )
@ -80,7 +88,14 @@ derivedCorePropsNames[]={
"Default_Ignorable_Code_Point",
"Grapheme_Extend",
"Grapheme_Link", /* Unicode 5 moves this property here from PropList.txt */
"Grapheme_Base"
"Grapheme_Base",
"Cased",
"Case_Ignorable",
"Changes_When_Lowercased",
"Changes_When_Uppercased",
"Changes_When_Titlecased",
"Changes_When_Casefolded",
"Changes_When_Casemapped"
};
static const UProperty
@ -96,9 +111,20 @@ derivedCorePropsIndex[]={
UCHAR_DEFAULT_IGNORABLE_CODE_POINT,
UCHAR_GRAPHEME_EXTEND,
UCHAR_GRAPHEME_LINK,
UCHAR_GRAPHEME_BASE
UCHAR_GRAPHEME_BASE,
UCHAR_CASED,
UCHAR_CASE_IGNORABLE,
UCHAR_CHANGES_WHEN_LOWERCASED,
UCHAR_CHANGES_WHEN_UPPERCASED,
UCHAR_CHANGES_WHEN_TITLECASED,
UCHAR_CHANGES_WHEN_CASEFOLDED,
UCHAR_CHANGES_WHEN_CASEMAPPED
};
static int32_t numErrors[LENGTHOF(derivedCorePropsIndex)]={ 0 };
enum { MAX_ERRORS=50 };
U_CFUNC void U_CALLCONV
derivedCorePropsLineFn(void *context,
char *fields[][2], int32_t /* fieldCount */,
@ -117,7 +143,13 @@ derivedCorePropsLineFn(void *context,
/* parse derived binary property name, ignore unknown names */
i=getTokenIndex(derivedCorePropsNames, LENGTHOF(derivedCorePropsNames), fields[1][0]);
if(i<0) {
me->errln("UnicodeTest warning: unknown property name '%s' in \n", fields[1][0]);
UnicodeString propName(fields[1][0], (int32_t)(fields[1][1]-fields[1][0]));
propName.trim();
if(me->unknownPropertyNames->find(propName)==NULL) {
UErrorCode errorCode=U_ZERO_ERROR;
me->unknownPropertyNames->puti(propName, 1, errorCode);
me->errln("UnicodeTest warning: unknown property name '%s' in DerivedCoreProperties.txt\n", fields[1][0]);
}
return;
}
@ -172,18 +204,17 @@ void UnicodeTest::TestAdditionalProperties() {
int32_t rangeCount, range;
uint32_t i;
UChar32 start, end;
int32_t noErrors = 0;
// test all TRUE properties
for(i=0; i<LENGTHOF(derivedCorePropsNames); ++i) {
rangeCount=derivedCoreProps[i].getRangeCount();
for(range=0; range<rangeCount; ++range) {
for(range=0; range<rangeCount && numErrors[i]<MAX_ERRORS; ++range) {
start=derivedCoreProps[i].getRangeStart(range);
end=derivedCoreProps[i].getRangeEnd(range);
for(; start<=end; ++start) {
if(!u_hasBinaryProperty(start, derivedCorePropsIndex[i])) {
errln("UnicodeTest error: u_hasBinaryProperty(U+%04lx, %s)==FALSE is wrong\n", start, derivedCorePropsNames[i]);
if(noErrors++ > 100) {
if(++numErrors[i]>=MAX_ERRORS) {
errln("Too many errors, moving to the next test");
break;
}
@ -192,7 +223,6 @@ void UnicodeTest::TestAdditionalProperties() {
}
}
noErrors = 0;
// invert all properties
for(i=0; i<LENGTHOF(derivedCorePropsNames); ++i) {
derivedCoreProps[i].complement();
@ -201,13 +231,13 @@ void UnicodeTest::TestAdditionalProperties() {
// test all FALSE properties
for(i=0; i<LENGTHOF(derivedCorePropsNames); ++i) {
rangeCount=derivedCoreProps[i].getRangeCount();
for(range=0; range<rangeCount; ++range) {
for(range=0; range<rangeCount && numErrors[i]<MAX_ERRORS; ++range) {
start=derivedCoreProps[i].getRangeStart(range);
end=derivedCoreProps[i].getRangeEnd(range);
for(; start<=end; ++start) {
if(u_hasBinaryProperty(start, derivedCorePropsIndex[i])) {
errln("UnicodeTest error: u_hasBinaryProperty(U+%04lx, %s)==TRUE is wrong\n", start, derivedCorePropsNames[i]);
if(noErrors++ > 100) {
if(++numErrors[i]>=MAX_ERRORS) {
errln("Too many errors, moving to the next test");
break;
}

View file

@ -1,6 +1,6 @@
/********************************************************************
* COPYRIGHT:
* Copyright (c) 1997-2008, International Business Machines Corporation and
* Copyright (c) 1997-2009, International Business Machines Corporation and
* others. All Rights Reserved.
********************************************************************/
@ -17,6 +17,12 @@ derivedCorePropsLineFn(void *context,
char *fields[][2], int32_t fieldCount,
UErrorCode *pErrorCode);
U_NAMESPACE_BEGIN
class Hashtable;
U_NAMESPACE_END
/**
* Test API and functionality of class Unicode
**/
@ -42,5 +48,6 @@ private:
UErrorCode *pErrorCode);
UnicodeSet derivedCoreProps[30];
U_NAMESPACE_QUALIFIER Hashtable *unknownPropertyNames;
};

View file

@ -1806,53 +1806,6 @@ UnicodeStringTest::TestUTF32() {
}
}
// TODO(markus): Temporary test code to see if all relevant compilers support templates.
/**
* Does not throw exceptions.
*/
template<class T>
class /* U_COMMON_API */ LocalPointer {
public:
// Takes ownership.
explicit LocalPointer(T *p=NULL) : ptr(p) {}
// Deletes the object it owns.
~LocalPointer() {
delete ptr;
}
// NULL checks.
UBool isNull() const { return ptr==NULL; }
UBool isValid() const { return ptr!=NULL; }
// Access without ownership change.
T *getAlias() const { return ptr; }
T &operator*() const { return *ptr; }
T *operator->() const { return ptr; }
// Give up ownership; the internal pointer becomes NULL;
T *orphan() {
T *p=ptr;
ptr=NULL;
return p;
}
// Delete the object it owns and adopt (take ownership of) the one passed in.
void adoptInstead(T *p) {
delete ptr;
ptr=p;
}
private:
T *ptr;
// No comparison operators.
bool operator==(const LocalPointer &other);
bool operator!=(const LocalPointer &other);
// No ownership transfer: No copy constructor, no assignment operator.
LocalPointer(const LocalPointer &other);
void operator=(const LocalPointer &other);
// No heap allocation. Use only on the stack.
static void * U_EXPORT2 operator new(size_t size);
static void * U_EXPORT2 operator new[](size_t size);
#if U_HAVE_PLACEMENT_NEW
static void * U_EXPORT2 operator new(size_t, void *ptr);
#endif
};
void
UnicodeStringTest::TestUTF8() {
static const uint8_t utf8[] = {
@ -1919,10 +1872,4 @@ UnicodeStringTest::TestUTF8() {
errln("UnicodeString::toUTF8String() did not create the expected string.");
}
#endif
// TODO(markus): Temporary test code to see if all relevant compilers support templates.
LocalPointer<UnicodeString> lp(new UnicodeString("abc123"));
if(lp.isNull() || lp->length()!=6 || lp.getAlias()->length()!=6) {
errln("Trouble with LocalPointer.");
}
}

219558
icu4c/source/test/testdata/BidiTest.txt vendored Normal file

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -1,10 +1,10 @@
# GraphemeBreakTest-5.1.0.txt
# Date: 2008-03-11, 02:19:22 GMT [MD]
# GraphemeBreakTest-5.2.0.txt
# Date: 2009-05-28, 20:37:56 GMT [MD]
#
# Unicode Character Database
# Copyright (c) 1991-2008 Unicode, Inc.
# Copyright (c) 1991-2009 Unicode, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html
# For documentation, see UCD.html
# For documentation, see http://www.unicode.org/reports/tr44/
#
# Default Grapheme Break Test
#

View file

@ -1,10 +1,10 @@
# SentenceBreakTest-5.1.0.txt
# Date: 2008-03-11, 02:19:26 GMT [MD]
# SentenceBreakTest-5.2.0.txt
# Date: 2009-05-28, 20:38:05 GMT [MD]
#
# Unicode Character Database
# Copyright (c) 1991-2008 Unicode, Inc.
# Copyright (c) 1991-2009 Unicode, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html
# For documentation, see UCD.html
# For documentation, see http://www.unicode.org/reports/tr44/
#
# Default Sentence Break Test
#

View file

@ -1,10 +1,10 @@
# WordBreakTest-5.1.0.txt
# Date: 2008-03-11, 02:19:28 GMT [MD]
# WordBreakTest-5.2.0.txt
# Date: 2009-05-28, 20:38:06 GMT [MD]
#
# Unicode Character Database
# Copyright (c) 1991-2008 Unicode, Inc.
# Copyright (c) 1991-2009 Unicode, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html
# For documentation, see UCD.html
# For documentation, see http://www.unicode.org/reports/tr44/
#
# Default Word Break Test
#

View file

@ -11,6 +11,33 @@
TestLog::~TestLog() {}
IcuTestErrorCode::~IcuTestErrorCode() {
// Safe because our handleFailure() does not throw exceptions.
if(isFailure()) { handleFailure(); }
}
UBool IcuTestErrorCode::logIfFailureAndReset(const char *s) {
if(isFailure()) {
// testClass.errln("%s %s failure - %s", testName, s, errorName());
UnicodeString msg(testName, -1, US_INV);
msg.append(0x20).append(UnicodeString(s, -1, US_INV));
msg.append(UNICODE_STRING_SIMPLE(" failure - ")).append(UnicodeString(errorName(), -1, US_INV));
testClass.errln(msg);
reset();
return TRUE;
} else {
reset();
return FALSE;
}
}
void IcuTestErrorCode::handleFailure() const {
// testClass.errln("%s failure - %s", testName, errorName());
UnicodeString msg(testName, -1, US_INV);
msg.append(UNICODE_STRING_SIMPLE(" failure - ")).append(UnicodeString(errorName(), -1, US_INV));
testClass.errln(msg);
}
TestDataModule *TestDataModule::getTestDataModule(const char* name, TestLog& log, UErrorCode &status)
{
if(U_FAILURE(status)) {

View file

@ -11,6 +11,7 @@
#ifndef U_TESTFW_TESTLOG
#define U_TESTFW_TESTLOG
#include "unicode/errorcode.h"
#include "unicode/unistr.h"
#include "unicode/testtype.h"
@ -27,5 +28,18 @@ public:
virtual const char* getTestDataPath(UErrorCode& err) = 0;
};
class T_CTEST_EXPORT_API IcuTestErrorCode : public ErrorCode {
public:
IcuTestErrorCode(TestLog &callingTestClass, const char *callingTestName) :
testClass(callingTestClass), testName(callingTestName) {}
virtual ~IcuTestErrorCode();
// Returns TRUE if isFailure().
UBool logIfFailureAndReset(const char *s);
protected:
virtual void handleFailure() const;
private:
TestLog &testClass;
const char *const testName;
};
#endif

View file

@ -1,7 +1,7 @@
/*
*******************************************************************************
*
* Copyright (C) 2004-2008, International Business Machines
* Copyright (C) 2004-2009, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
@ -96,7 +96,9 @@ propListBinaries={
static const Binary
derCorePropsNames[]={
{ "Lowercase", 0, UCASE_LOWER, UCASE_TYPE_MASK },
{ "Uppercase", 0, UCASE_UPPER, UCASE_TYPE_MASK }
{ "Uppercase", 0, UCASE_UPPER, UCASE_TYPE_MASK },
/* Unicode 5.2 adds Case_Ignorable as a public property. See comments in store.c. */
{ "Case_Ignorable", 1, U_MASK(UGENCASE_IS_MID_LETTER_SHIFT), U_MASK(UGENCASE_IS_MID_LETTER_SHIFT) }
};
static const Binaries
@ -233,7 +235,7 @@ main(int argc, char* argv[]) {
}
if(argc<0 || options[HELP_H].doesOccur || options[HELP_QUESTION_MARK].doesOccur) {
/*
* Broken into chucks because the C89 standard says the minimum
* Broken into chunks because the C89 standard says the minimum
* required supported string length is 509 bytes.
*/
fprintf(stderr,

View file

@ -1,7 +1,7 @@
/*
*******************************************************************************
*
* Copyright (C) 2004-2008, International Business Machines
* Copyright (C) 2004-2009, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
@ -36,6 +36,9 @@ enum {
UNI_4_0,
UNI_4_0_1,
UNI_4_1,
UNI_5_1,
UNI_5_2,
UNI_6_0,
UNI_VER_COUNT
};
@ -87,6 +90,7 @@ typedef struct {
SpecialCasing *specialCasing;
CaseFolding *caseFolding;
uint8_t gc, cc;
UBool isCaseIgnorable;
} Props;
/* global flags */

View file

@ -1,7 +1,7 @@
/*
*******************************************************************************
*
* Copyright (C) 2004-2008, International Business Machines
* Copyright (C) 2004-2009, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
@ -46,10 +46,13 @@ the udata API for loading ICU data. Especially, a UDataInfo structure
precedes the actual data. It contains platform properties values and the
file format version.
The following is a description of format version 1.1 .
The following is a description of format version 1.2 .
Format version 1.1 adds data for case closure.
Format version 1.2 adds an exception bit for case-ignorable. Needed because
the Cased and Case_Ignorable properties are not disjoint.
The file contains the following structures:
const int32_t indexes[i0] with values i0, i1, ...:
@ -116,7 +119,9 @@ Bits
1 soft-dotted character
2 cc=230
3 other cc
11.. 9 reserved
11 case-ignorable (used when the character is cased or has another exception)
(new in formatVersion 1.2/ICU 4.4)
10.. 9 reserved
8 if set, then for each optional-value slot there are 2 uint16_t values
(high and low parts of 32-bit values)
instead of single ones
@ -230,7 +235,10 @@ unicodeVersions[]={
{ 3, 2, 0, 0 },
{ 4, 0, 0, 0 },
{ 4, 0, 1, 0 },
{ 4, 1, 0, 0 }
{ 4, 1, 0, 0 },
{ 5, 1, 0, 0 },
{ 5, 2, 0, 0 },
{ 6, 0, 0, 0 }
};
int32_t ucdVersion=UNI_4_1;
@ -288,7 +296,6 @@ setProps(Props *p) {
UErrorCode errorCode;
uint32_t value, oldValue;
int32_t delta;
UBool isCaseIgnorable;
/* get the non-UnicodeData.txt properties */
value=oldValue=upvec_getValue(pv, p->code, 0);
@ -346,51 +353,52 @@ setProps(Props *p) {
}
}
/* encode case-ignorable as delta==1 on uncased characters */
isCaseIgnorable=FALSE;
if((value&UCASE_TYPE_MASK)==UCASE_NONE) {
if(ucdVersion>=UNI_4_1) {
/*
* Unicode 4.1 and up: (D47a) Word_Break=MidLetter or Mn, Me, Cf, Lm, Sk
* Unicode 5.1 and up: Word_Break=(MidLetter or MidNumLet) or Mn, Me, Cf, Lm, Sk
* The UGENCASE_IS_MID_LETTER_SHIFT bit is set for both WB=MidLetter and WB=MidNumLet.
*/
if(
(U_MASK(p->gc)&(U_GC_MN_MASK|U_GC_ME_MASK|U_GC_CF_MASK|U_GC_LM_MASK|U_GC_SK_MASK))!=0 ||
(upvec_getValue(pv, p->code, 1)&U_MASK(UGENCASE_IS_MID_LETTER_SHIFT))!=0
) {
isCaseIgnorable=TRUE;
}
} else {
/* before Unicode 4.1: Mn, Me, Cf, Lm, Sk or 0027 or 00AD or 2019 */
if(
(U_MASK(p->gc)&(U_GC_MN_MASK|U_GC_ME_MASK|U_GC_CF_MASK|U_GC_LM_MASK|U_GC_SK_MASK))!=0 ||
p->code==0x27 || p->code==0xad || p->code==0x2019
) {
isCaseIgnorable=TRUE;
}
/*
* Encode case-ignorable as delta==1 on uncased characters,
* and with an exception bit on cased characters and characters with another exception.
*/
if(ucdVersion>=UNI_4_1) {
/*
* Unicode 4.1 & 5.0: (D47a) Word_Break=MidLetter or Mn, Me, Cf, Lm, Sk
* Unicode 5.1: Word_Break=(MidLetter or MidNumLet) or Mn, Me, Cf, Lm, Sk
* The UGENCASE_IS_MID_LETTER_SHIFT bit is set for both WB=MidLetter and WB=MidNumLet.
* Unicode 5.2: The definition (Unicode Standard Definition D121) is unchanged,
* but now Case_Ignorable is a public property
* with its values listed in DerivedCoreProperties.txt.
* gencase.c parses those values as well, just in case the definition changes
* in the future. gencase.c sets the UGENCASE_IS_MID_LETTER_SHIFT bit
* for each Case_Ignorable entry. (It never resets that bit.)
*/
if(
(U_MASK(p->gc)&(U_GC_MN_MASK|U_GC_ME_MASK|U_GC_CF_MASK|U_GC_LM_MASK|U_GC_SK_MASK))!=0 ||
(upvec_getValue(pv, p->code, 1)&U_MASK(UGENCASE_IS_MID_LETTER_SHIFT))!=0
) {
p->isCaseIgnorable=TRUE;
}
} else {
/* before Unicode 4.1: Mn, Me, Cf, Lm, Sk or 0027 or 00AD or 2019 */
if(
(U_MASK(p->gc)&(U_GC_MN_MASK|U_GC_ME_MASK|U_GC_CF_MASK|U_GC_LM_MASK|U_GC_SK_MASK))!=0 ||
p->code==0x27 || p->code==0xad || p->code==0x2019
) {
p->isCaseIgnorable=TRUE;
}
}
if(isCaseIgnorable && p->code!=0x307) {
/*
* We use one of the delta/exception bits, which works because we only
* store the case-ignorable flag for uncased characters.
* There is no delta for uncased characters (see checks above).
* If there is an exception for an uncased, case-ignorable character
* (although there should not be any case mappings if it's uncased)
* then we have a problem.
* There is one character which is case-ignorable but has an exception:
* U+0307 is uncased, Mn, has conditional special casing and
* is therefore handled in code instead.
*/
if(value&UCASE_EXCEPTION) {
fprintf(stderr, "gencase error: unable to encode case-ignorable for U+%04lx with exceptions\n",
(unsigned long)p->code);
exit(U_INTERNAL_PROGRAM_ERROR);
if(p->isCaseIgnorable) {
if((value&UCASE_TYPE_MASK)==UCASE_NONE) {
/*
* We use one of the delta/exception bits for
* the case-ignorable flag for uncased characters.
* There is no delta for uncased characters (see checks above).
*/
delta=1;
} else {
/*
* If the character is cased or has another exception,
* then we store the case-ignorable flag as an exception bit.
*/
value|=UCASE_EXCEPTION;
}
delta=1;
}
/* handle exceptions */
@ -822,21 +830,22 @@ static uint16_t
makeException(uint32_t value, Props *p) {
uint32_t slots[8];
uint32_t slotBits;
uint16_t excWord, excIndex, excTop, i, count, length, fullLengths;
uint16_t excWord, i, count, length, fullLengths;
UBool doubleSlots;
/* excIndex will be returned for storing in the trie word */
excIndex=exceptionsTop;
if(excIndex>=UCASE_MAX_EXCEPTIONS) {
/* exceptionsTop might be returned for storing in the trie word */
if(exceptionsTop>=UCASE_MAX_EXCEPTIONS) {
fprintf(stderr, "gencase error: too many exceptions words\n");
exit(U_BUFFER_OVERFLOW_ERROR);
}
excTop=excIndex+1; /* +1 for excWord which will be stored at excIndex */
/* copy and shift the soft-dotted bits */
excWord=((uint16_t)value&UCASE_DOT_MASK)<<UCASE_EXC_DOT_SHIFT;
if(p->isCaseIgnorable) {
excWord|=UCASE_EXC_CASE_IGNORABLE;
}
/* update maxFullLength */
if(p->specialCasing!=NULL) {
length=p->specialCasing->lowerCase[0];
@ -962,56 +971,73 @@ makeException(uint32_t value, Props *p) {
excWord|=U_MASK(UCASE_EXC_FULL_MAPPINGS);
}
/* write slots */
doubleSlots=(UBool)(slotBits>0xffff);
if(!doubleSlots) {
for(i=0; i<count; ++i) {
exceptions[excTop++]=(uint16_t)slots[i];
if(count==0) {
/* No optional slots: Try to share excWord entries. */
uint16_t excIndex;
for(excIndex=0; excIndex<exceptionsTop; ++excIndex) {
if(excWord==exceptions[excIndex]) {
return excIndex;
}
}
/* not found */
++exceptionsTop;
exceptions[excIndex]=excWord;
return excIndex;
} else {
excWord|=UCASE_EXC_DOUBLE_SLOTS;
for(i=0; i<count; ++i) {
exceptions[excTop++]=(uint16_t)(slots[i]>>16);
exceptions[excTop++]=(uint16_t)slots[i];
/* write slots */
uint16_t excIndex=exceptionsTop;
uint16_t excTop=excIndex+1; /* +1 for excWord which will be stored at excIndex */
doubleSlots=(UBool)(slotBits>0xffff);
if(!doubleSlots) {
for(i=0; i<count; ++i) {
exceptions[excTop++]=(uint16_t)slots[i];
}
} else {
excWord|=UCASE_EXC_DOUBLE_SLOTS;
for(i=0; i<count; ++i) {
exceptions[excTop++]=(uint16_t)(slots[i]>>16);
exceptions[excTop++]=(uint16_t)slots[i];
}
}
}
/* write the full case mapping strings */
if(p->specialCasing!=NULL) {
length=(uint16_t)p->specialCasing->lowerCase[0];
u_memcpy((UChar *)exceptions+excTop, p->specialCasing->lowerCase+1, length);
excTop+=length;
}
if(p->caseFolding!=NULL) {
length=(uint16_t)p->caseFolding->full[0];
u_memcpy((UChar *)exceptions+excTop, p->caseFolding->full+1, length);
excTop+=length;
}
if(p->specialCasing!=NULL) {
length=(uint16_t)p->specialCasing->upperCase[0];
u_memcpy((UChar *)exceptions+excTop, p->specialCasing->upperCase+1, length);
excTop+=length;
length=(uint16_t)p->specialCasing->titleCase[0];
u_memcpy((UChar *)exceptions+excTop, p->specialCasing->titleCase+1, length);
excTop+=length;
}
/* write the closure data */
if(p->closure[0]!=0) {
UChar32 c;
for(i=0; i<LENGTHOF(p->closure) && (c=p->closure[i])!=0; ++i) {
U16_APPEND_UNSAFE((UChar *)exceptions, excTop, c);
/* write the full case mapping strings */
if(p->specialCasing!=NULL) {
length=(uint16_t)p->specialCasing->lowerCase[0];
u_memcpy((UChar *)exceptions+excTop, p->specialCasing->lowerCase+1, length);
excTop+=length;
}
if(p->caseFolding!=NULL) {
length=(uint16_t)p->caseFolding->full[0];
u_memcpy((UChar *)exceptions+excTop, p->caseFolding->full+1, length);
excTop+=length;
}
if(p->specialCasing!=NULL) {
length=(uint16_t)p->specialCasing->upperCase[0];
u_memcpy((UChar *)exceptions+excTop, p->specialCasing->upperCase+1, length);
excTop+=length;
length=(uint16_t)p->specialCasing->titleCase[0];
u_memcpy((UChar *)exceptions+excTop, p->specialCasing->titleCase+1, length);
excTop+=length;
}
/* write the closure data */
if(p->closure[0]!=0) {
UChar32 c;
for(i=0; i<LENGTHOF(p->closure) && (c=p->closure[i])!=0; ++i) {
U16_APPEND_UNSAFE((UChar *)exceptions, excTop, c);
}
}
exceptionsTop=excTop;
/* write the main exceptions word */
exceptions[excIndex]=excWord;
return excIndex;
}
exceptionsTop=excTop;
/* write the main exceptions word */
exceptions[excIndex]=excWord;
return excIndex;
}
extern void

View file

@ -1,7 +1,7 @@
/*
*******************************************************************************
*
* Copyright (C) 1999-2008, International Business Machines
* Copyright (C) 1999-2009, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
@ -164,6 +164,7 @@ enum {
UNI_4_1,
UNI_5_0,
UNI_5_1,
UNI_5_2,
UNI_VER_COUNT
};
@ -179,10 +180,11 @@ unicodeVersions[]={
{ 4, 0, 1, 0 },
{ 4, 1, 0, 0 },
{ 5, 0, 0, 0 },
{ 5, 1, 0, 0 }
{ 5, 1, 0, 0 },
{ 5, 2, 0, 0 }
};
static int32_t ucdVersion=UNI_5_1;
static int32_t ucdVersion=UNI_5_2;
static int32_t
findUnicodeVersion(const UVersionInfo version) {
@ -222,6 +224,19 @@ typedef struct Options {
UBool storeISOComments;
} Options;
/*
* Pair of code point and name alias.
* Try to keep sizeof(CpNameAlias) a multiple of 4 to avoid padding.
*/
typedef struct CpNameAlias {
uint32_t code;
char nameAlias[124];
} CpNameAlias;
static CpNameAlias cpNameAliases[50];
static uint32_t cpNameAliasesIndex=0, cpNameAliasesTop=0;
static uint8_t stringStore[STRING_STORE_SIZE],
groupStore[GROUP_STORE_SIZE],
lineLengths[LINES_PER_GROUP];
@ -258,6 +273,9 @@ static uint32_t tokenCount;
static void
init(void);
static void
parseNameAliases(const char *filename, Options *options);
static void
parseDB(const char *filename, Options *options);
@ -388,13 +406,15 @@ main(int argc, char* argv[]) {
* required supported string length is 509 bytes.
*/
fprintf(stderr,
"Usage: %s [-1[+|-]] [-v[+|-]] [-c[+|-]] filename\n"
"Usage: %s [-1[+|-]] [-v[+|-]] [-c[+|-]] [filename_ud [filename_na]]\n"
"\n"
"Read the UnicodeData.txt file and \n"
"create a binary file " DATA_NAME "." DATA_TYPE " with the character names\n"
"\n"
"\tfilename absolute path/filename for the Unicode database text file\n"
"\t\t(default: standard input)\n"
"\tfilename_ud absolute path/filename for the UnicodeData.txt file\n"
"\t (default: standard input)\n"
"\tfilename_na absolute path/filename for the NameAliases.txt file\n"
"\t (default: no name aliases)\n"
"\n",
argv[0]);
fprintf(stderr,
@ -429,6 +449,9 @@ main(int argc, char* argv[]) {
ucdVersion=findUnicodeVersion(version);
init();
if(argc>=3) {
parseNameAliases(argv[2], &moreOptions);
}
parseDB(argc>=2 ? argv[1] : "-", &moreOptions);
compress();
generateData(options[DESTDIR].value, &moreOptions);
@ -464,13 +487,68 @@ getName(char **pStart, char *limit) {
return (int16_t)(limit-start);
}
static void U_CALLCONV
nameAliasesLineFn(void *context,
char *fields[][2], int32_t fieldCount,
UErrorCode *pErrorCode) {
char *name;
int16_t length=0;
static uint32_t prevCode=0;
uint32_t code=0;
if(U_FAILURE(*pErrorCode)) {
return;
}
/* get the character code */
code=uprv_strtoul(fields[0][0], NULL, 16);
/* get the character name */
name=fields[1][0];
length=getName(&name, fields[1][1]);
if(length==0 || length>=sizeof(cpNameAliases[cpNameAliasesTop].nameAlias)) {
fprintf(stderr, "gennames: error - name alias %s empty or too long for code point U+%04lx\n",
name, (unsigned long)code);
*pErrorCode=U_PARSE_ERROR;
exit(U_PARSE_ERROR);
}
/* check for non-character code points */
if(!U_IS_UNICODE_CHAR(code)) {
fprintf(stderr, "gennames: error - name alias for non-character code point U+%04lx\n",
(unsigned long)code);
*pErrorCode=U_PARSE_ERROR;
exit(U_PARSE_ERROR);
}
/* check that the code points (code) are in ascending order */
if(code<=prevCode && code>0) {
fprintf(stderr, "gennames: error - NameAliases entries out of order, U+%04lx after U+%04lx\n",
(unsigned long)code, (unsigned long)prevCode);
*pErrorCode=U_PARSE_ERROR;
exit(U_PARSE_ERROR);
}
prevCode=code;
if(cpNameAliasesTop>=LENGTHOF(cpNameAliases)) {
fprintf(stderr, "gennames: error - too many name aliases\n");
*pErrorCode=U_PARSE_ERROR;
exit(U_PARSE_ERROR);
}
cpNameAliases[cpNameAliasesTop].code=code;
uprv_memcpy(cpNameAliases[cpNameAliasesTop].nameAlias, name, length);
cpNameAliases[cpNameAliasesTop].nameAlias[length]=0;
++cpNameAliasesTop;
parseName(name, length);
}
static void U_CALLCONV
lineFn(void *context,
char *fields[][2], int32_t fieldCount,
UErrorCode *pErrorCode) {
Options *storeOptions=(Options *)context;
char *names[3];
int16_t lengths[3]={ 0, 0, 0 };
char *names[4];
int16_t lengths[4]={ 0, 0, 0, 0 };
static uint32_t prevCode=0;
uint32_t code=0;
@ -532,23 +610,56 @@ lineFn(void *context,
parseName(names[1], lengths[1]);
parseName(names[2], lengths[2]);
if(cpNameAliasesIndex<cpNameAliasesTop && code>=cpNameAliases[cpNameAliasesIndex].code) {
if(code==cpNameAliases[cpNameAliasesIndex].code) {
names[3]=cpNameAliases[cpNameAliasesIndex].nameAlias;
lengths[3]=(int16_t)uprv_strlen(cpNameAliases[cpNameAliasesIndex].nameAlias);
++cpNameAliasesIndex;
} else {
fprintf(stderr, "gennames: error - NameAlias but no UnicodeData entry for U+%04lx\n",
(unsigned long)code);
*pErrorCode=U_PARSE_ERROR;
exit(U_PARSE_ERROR);
}
}
/*
* set the count argument to
* 1: only store regular names, or only store ISO 10646 comments
* 2: store regular and 1.0 names
* 3: store names and ISO 10646 comment
* 4: also store name alias
*
* addLine() will ignore empty trailing names
*/
if(storeOptions->storeNames) {
/* store names and comments as parsed according to storeOptions */
addLine(code, names, lengths, 3);
addLine(code, names, lengths, LENGTHOF(names));
} else {
/* store only ISO 10646 comments */
addLine(code, names+2, lengths+2, 1);
}
}
static void
parseNameAliases(const char *filename, Options *storeOptions) {
char *fields[2][2];
UErrorCode errorCode=U_ZERO_ERROR;
if(!storeOptions->storeNames) {
return;
}
u_parseDelimitedFile(filename, ';', fields, 2, nameAliasesLineFn, NULL, &errorCode);
if(U_FAILURE(errorCode)) {
fprintf(stderr, "gennames parse error: %s\n", u_errorName(errorCode));
exit(errorCode);
}
if(!beQuiet) {
printf("number of name aliases: %lu\n", (unsigned long)cpNameAliasesTop);
}
}
static void
parseDB(const char *filename, Options *storeOptions) {
char *fields[15][2];
@ -559,6 +670,11 @@ parseDB(const char *filename, Options *storeOptions) {
fprintf(stderr, "gennames parse error: %s\n", u_errorName(errorCode));
exit(errorCode);
}
if(cpNameAliasesIndex<cpNameAliasesTop) {
fprintf(stderr, "gennames: error - NameAlias but no UnicodeData entry for U+%04lx\n",
(unsigned long)cpNameAliases[cpNameAliasesIndex].code);
exit(U_PARSE_ERROR);
}
if(!beQuiet) {
printf("size of all names in the database: %lu\n",
@ -1099,6 +1215,11 @@ generateAlgorithmicData(UNewDataMemory *pData, Options *storeOptions) {
0, 5,
sizeof(AlgorithmicRange)+PREFIX_LENGTH_4
};
static AlgorithmicRange cjkExtC={
0x2a700, 0x2b734,
0, 5,
sizeof(AlgorithmicRange)+PREFIX_LENGTH_4
};
static char jamo[]=
"HANGUL SYLLABLE \0"
@ -1131,7 +1252,10 @@ generateAlgorithmicData(UNewDataMemory *pData, Options *storeOptions) {
size=0;
if(ucdVersion>=UNI_5_1) {
if(ucdVersion>=UNI_5_2) {
/* Unicode 5.2 and up has a longer CJK Unihan range than before */
cjk.rangeEnd=0x9FCB;
} else if(ucdVersion>=UNI_5_1) {
/* Unicode 5.1 and up has a longer CJK Unihan range than before */
cjk.rangeEnd=0x9FC3;
} else if(ucdVersion>=UNI_4_1) {
@ -1142,6 +1266,9 @@ generateAlgorithmicData(UNewDataMemory *pData, Options *storeOptions) {
/* number of ranges of algorithmic names */
if(!storeOptions->storeNames) {
countAlgRanges=0;
} else if(ucdVersion>=UNI_5_2) {
/* Unicode 5.2 and up has 5 ranges including CJK Extension C */
countAlgRanges=5;
} else if(ucdVersion>=UNI_3_1) {
/* Unicode 3.1 and up has 4 ranges including CJK Extension B */
countAlgRanges=4;
@ -1218,6 +1345,19 @@ generateAlgorithmicData(UNewDataMemory *pData, Options *storeOptions) {
}
}
/* range 4: cjk extension c */
if(countAlgRanges>=5) {
if(pData!=NULL) {
udata_writeBlock(pData, &cjkExtC, sizeof(AlgorithmicRange));
udata_writeString(pData, prefix, PREFIX_LENGTH);
if(PREFIX_LENGTH<PREFIX_LENGTH_4) {
udata_writePadding(pData, PREFIX_LENGTH_4-PREFIX_LENGTH);
}
} else {
size+=sizeof(AlgorithmicRange)+PREFIX_LENGTH_4;
}
}
return size;
}

View file

@ -1,7 +1,7 @@
/*
*******************************************************************************
*
* Copyright (C) 1999-2008, International Business Machines
* Copyright (C) 1999-2009, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
@ -2006,9 +2006,6 @@ generateData(const char *dataDir, UBool csource) {
}
/* use UTrie2 */
dataInfo.formatVersion[0]=3;
dataInfo.formatVersion[2]=0;
dataInfo.formatVersion[3]=0;
normRuntimeTrie2=utrie2_fromUTrie(&normRuntimeTrie, 0, &errorCode);
if(fcdTrieSize>0) {
fcdRuntimeTrie2=utrie2_fromUTrie(&fcdRuntimeTrie, 0, &errorCode);
@ -2042,10 +2039,12 @@ generateData(const char *dataDir, UBool csource) {
f=usrc_create(dataDir, "unorm_props_data.c");
if(f!=NULL) {
/* unused
usrc_writeArray(f,
"static const UVersionInfo formatVersion={ ",
dataInfo.formatVersion, 8, 4,
" };\n\n");
*/
usrc_writeArray(f,
"static const UVersionInfo dataVersion={ ",
dataInfo.dataVersion, 8, 4,

View file

@ -1,5 +1,5 @@
########################################################################
# Copyright (c) 2006-2008, International Business Machines
# Copyright (c) 2006-2009, International Business Machines
# Corporation and others. All Rights Reserved.
########################################################################
# file name: SyntheticPropertyValueAliases.txt
@ -31,16 +31,13 @@ sc ; Cirt ; Cirt
sc ; Cyrs ; Cyrs
sc ; Egyd ; Egyd
sc ; Egyh ; Egyh
sc ; Egyp ; Egyp
sc ; Geok ; Geok
sc ; Hans ; Hans
sc ; Hant ; Hant
sc ; Hmng ; Hmng
sc ; Hung ; Hung
sc ; Inds ; Inds
sc ; Java ; Java
sc ; Jpan ; Jpan
sc ; Lana ; Lana
sc ; Latf ; Latf
sc ; Latg ; Latg
sc ; Lina ; Lina
@ -48,8 +45,6 @@ sc ; Mand ; Mand
sc ; Maya ; Maya
sc ; Mero ; Mero
sc ; Moon ; Moon
sc ; Mtei ; Mtei
sc ; Orkh ; Orkh
sc ; Perm ; Perm
sc ; Plrd ; Plrd
sc ; Roro ; Roro
@ -62,17 +57,12 @@ sc ; Teng ; Teng
sc ; Visp ; Visp
sc ; Zxxx ; Zxxx
sc ; Armi ; Armi
sc ; Avst ; Avst
sc ; Cakm ; Cakm
sc ; Kore ; Kore
sc ; Kthi ; Kthi
sc ; Mani ; Mani
sc ; Phli ; Phli
sc ; Phlp ; Phlp
sc ; Phlv ; Phlv
sc ; Prti ; Prti
sc ; Samr ; Samr
sc ; Tavt ; Tavt
sc ; Zmth ; Zmth
sc ; Zsym ; Zsym
sc ; Nkgb ; Nkgb

File diff suppressed because it is too large Load diff

View file

@ -1,7 +1,7 @@
#!/bin/perl -w
#*******************************************************************
# COPYRIGHT:
# Copyright (c) 2002-2008, International Business Machines Corporation and
# Copyright (c) 2002-2009, International Business Machines Corporation and
# others. All Rights Reserved.
#*******************************************************************
@ -804,7 +804,7 @@ sub read_PropertyAliases {
if (/^\s*(.+?)\s*;/) {
my $short = $1;
my @fields = /;\s*([^\s;]+)/g;
if (@fields < 1 || @fields > 2) {
if (@fields < 1) {
my $number = @fields;
die "Error: Wrong number of fields ($number) in $filename at $_";
}

View file

@ -0,0 +1,169 @@
#!/usr/bin/python2.4
# Copyright (c) 2009 International Business Machines
# Corporation and others. All Rights Reserved.
#
# file name: ucdcopy.py
# encoding: US-ASCII
# tab size: 8 (not used)
# indentation:4
#
# created on: 2009aug04
# created by: Markus W. Scherer
#
# Copy Unicode Character Database (ucd) files from a tree
# of files downloaded from ftp://www.unicode.org/Public/5.2.0/
# to a folder like ICU's source/data/unidata/
# and modify some of the files to make them more compact.
#
# Invoke with two command-line parameters, for the source
# and destination folders.
import os
import os.path
import re
import shutil
import sys
_strip_re = re.compile("^([0-9a-fA-F]+.+?) *#.*")
_code_point_re = re.compile("\s*([0-9a-fA-F]+)\s*;")
def CopyAndStripWithOptionalMerge(s, t, do_merge):
in_file = open(s, "r")
out_file = open(t, "w")
first = -1 # First code point with first_data.
last = -1 # Last code point with first_data.
first_data = "" # Common data for code points [first..last].
for line in in_file:
match = _strip_re.match(line)
if match:
line = match.group(1)
else:
line = line.rstrip()
if do_merge:
match = _code_point_re.match(line)
if match:
c = int(match.group(1), 16)
data = line[match.end() - 1:]
else:
c = -1
data = ""
if last >= 0 and (c != (last + 1) or data != first_data):
# output the current range
if first == last:
out_file.write("%04X%s\n" % (first, first_data))
else:
out_file.write("%04X..%04X%s\n" % (first, last, first_data))
first = -1
last = -1
first_data = ""
if c < 0:
# no data on this line, output as is
out_file.write(line)
out_file.write("\n")
else:
# data on this line, store for possible range compaction
if last < 0:
# set as the first line in a possible range
first = c
last = c
first_data = data
else:
# must be c == (last + 1) and data == first_data
# because of previous conditions
# continue with the current range
last = c
else:
# Only strip, don't merge: just output the stripped line.
out_file.write(line)
out_file.write("\n")
if do_merge and last >= 0:
# output the last range in the file
if first == last:
out_file.write("%04X%s\n" % (first, first_data))
else:
out_file.write("%04X..%04X%s\n" % (first, last, first_data))
first = -1
last = -1
first_data = ""
in_file.close()
out_file.flush()
out_file.close()
def CopyAndStrip(s, t):
"""Copies a file and removes comments behind data lines but not in others."""
CopyAndStripWithOptionalMerge(s, t, False)
def CopyAndStripAndMerge(s, t):
"""Copies and strips a file and merges lines.
Copies a file, removes comments, and
merges lines with adjacent code point ranges and identical per-code point
data lines into one line with range syntax.
"""
CopyAndStripWithOptionalMerge(s, t, True)
_unidata_files = {
# Simply copy these files.
"BidiMirroring.txt": shutil.copy,
"BidiTest.txt": shutil.copy,
"Blocks.txt": shutil.copy,
"CaseFolding.txt": shutil.copy,
"DerivedAge.txt": shutil.copy,
"DerivedBidiClass.txt": shutil.copy,
"DerivedJoiningGroup.txt": shutil.copy,
"DerivedJoiningType.txt": shutil.copy,
"DerivedNumericValues.txt": shutil.copy,
"NameAliases.txt": shutil.copy,
"NormalizationCorrections.txt": shutil.copy,
"PropertyAliases.txt": shutil.copy,
"PropertyValueAliases.txt": shutil.copy,
"SpecialCasing.txt": shutil.copy,
"UnicodeData.txt": shutil.copy,
# Copy these files and remove comments behind data lines but not in others.
"DerivedCoreProperties.txt": CopyAndStrip,
"DerivedNormalizationProps.txt": CopyAndStrip,
"GraphemeBreakProperty.txt": CopyAndStrip,
"NormalizationTest.txt": CopyAndStrip,
"PropList.txt": CopyAndStrip,
"Scripts.txt": CopyAndStrip,
"SentenceBreakProperty.txt": CopyAndStrip,
"WordBreakProperty.txt": CopyAndStrip,
# Also merge lines with adjacent code point ranges.
"EastAsianWidth.txt": CopyAndStripAndMerge,
"LineBreak.txt": CopyAndStripAndMerge
}
_file_version_re = re.compile("^([a-zA-Z0-9]+)" +
"-[0-9](?:\\.[0-9])*(?:d[0-9]+)?" +
"(\\.[a-z]+)$")
def main():
source_root = sys.argv[1]
dest_root = sys.argv[2]
source_files = []
for root, dirs, files in os.walk(source_root):
for file in files:
source_files.append(os.path.join(root, file))
files_processed = set()
for source_file in source_files:
basename = os.path.basename(source_file)
match = _file_version_re.match(basename)
if match:
basename = match.group(1) + match.group(2)
print basename
if basename in _unidata_files:
if basename in files_processed:
print "duplicate file basename %s!" % basename
sys.exit(1)
files_processed.add(basename)
dest_file = os.path.join(dest_root, basename)
_unidata_files[basename](source_file, dest_file)
if __name__ == "__main__":
main()

View file

@ -576,7 +576,6 @@ numericLineFn(void *context,
Props newProps={ 0 };
char *s, *numberLimit;
uint32_t start, end, value, oldProps32;
int32_t oldType;
char c;
UBool isFraction;
@ -618,16 +617,16 @@ numericLineFn(void *context,
/* parse numeric value */
s=(char *)u_skipWhitespace(fields[1][0]);
/* try large powers of 10 first, may otherwise overflow strtoul() */
if(0==uprv_strncmp(s, "10000000000", 11)) {
/* large powers of 10 are encoded in a special way, see store.c */
/* try large, single-significant-digit numbers, may otherwise overflow strtoul() */
if('1'<=s[0] && s[0]<='9' && s[1]=='0' && s[2]=='0') {
/* large integers are encoded in a special way, see store.c */
uint8_t exp=0;
value=s[0]-'0';
numberLimit=s;
while(*(++numberLimit)=='0') {
++exp;
}
value=1;
newProps.exponent=exp;
} else {
/* normal number parsing */
@ -648,18 +647,19 @@ numericLineFn(void *context,
* specific properties for single characters.
*/
/* set the new numeric type and value */
newProps.numericType=(uint8_t)U_NT_NUMERIC; /* assumed numeric type, see Unicode 4.0.1 comment */
/* set the new numeric value */
newProps.code=start;
newProps.numericValue=(int32_t)value; /* newly parsed numeric value */
/* the exponent may have been set above */
value=makeProps(&newProps);
for(; start<=end; ++start) {
uint32_t newProps32;
int32_t oldNtv;
oldProps32=getProps(start);
oldType=(int32_t)GET_NUMERIC_TYPE(oldProps32);
oldNtv=(int32_t)GET_NUMERIC_TYPE_VALUE(oldProps32);
if(isFraction) {
if(oldType!=0) {
if(UPROPS_NTV_FRACTION_START<=oldNtv && oldNtv<UPROPS_NTV_LARGE_START) {
/* this code point was already listed with its numeric value in UnicodeData.txt */
continue;
} else {
@ -672,26 +672,31 @@ numericLineFn(void *context,
* For simplicity, and because we only expect to set numeric values for Han characters,
* for now we only allow to set these values for Lo characters.
*/
if(oldType==0 && GET_CATEGORY(oldProps32)!=U_OTHER_LETTER) {
if(oldNtv==UPROPS_NTV_NONE && GET_CATEGORY(oldProps32)!=U_OTHER_LETTER) {
fprintf(stderr, "genprops error: new numeric value for a character other than Lo in DerivedNumericValues.txt at %s\n", fields[0][0]);
exit(U_PARSE_ERROR);
}
/* verify that we do not change an existing value (fractions were excluded above) */
if(oldType!=0) {
if(oldNtv!=UPROPS_NTV_NONE) {
/* the code point already has a value stored */
if((oldProps32&0xff00)!=(value&0xff00)) {
newProps.numericType=UPROPS_NTV_GET_TYPE(oldNtv);
newProps32=makeProps(&newProps);
if(oldNtv!=GET_NUMERIC_TYPE_VALUE(newProps32)) {
fprintf(stderr, "genprops error: new numeric value differs from old one for U+%04lx\n", (long)start);
exit(U_PARSE_ERROR);
}
/* same value, continue */
} else {
/* the code point is getting a new numeric value */
newProps.numericType=(uint8_t)U_NT_NUMERIC; /* assumed numeric type, see Unicode 4.0.1 comment */
newProps32=makeProps(&newProps);
if(beVerbose) {
printf("adding U+%04x numeric type %d value 0x%04x from %s\n", (int)start, U_NT_NUMERIC, (int)value, fields[0][0]);
printf("adding U+%04x numeric type %d encoded-numeric-type-value 0x%03x from %s\n",
(int)start, U_NT_NUMERIC, (int)GET_NUMERIC_TYPE_VALUE(newProps32), fields[0][0]);
}
addProps(start, value|GET_CATEGORY(oldProps32));
addProps(start, newProps32|GET_CATEGORY(oldProps32));
}
}
}

View file

@ -1,7 +1,7 @@
/*
*******************************************************************************
*
* Copyright (C) 1999-2008, International Business Machines
* Copyright (C) 1999-2009, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
@ -41,19 +41,7 @@ the udata API for loading ICU data. Especially, a UDataInfo structure
precedes the actual data. It contains platform properties values and the
file format version.
The following is a description of format version 5 .
The format changes between version 3 and 4 because the properties related to
case mappings and bidi/shaping are pulled out into separate files
for modularization.
In order to reduce the need for code changes, some of the previous data
structures are omitted, rather than rearranging everything.
For details see "Changes in format version 4" below.
Format version 5 became necessary because the bit field for script codes
overflowed. Several bit fields got rearranged, and three (Script, Block,
Word_Break) got widened by one bit each.
The following is a description of format version 6 .
Data contents:
@ -75,7 +63,7 @@ Formally, the file contains the following structures:
const int32_t indexes[16] with values i0..i15:
i0 indicates the length of the main trie.
i0..i3 all have the same value in format version 4.0;
i0..i3 all have the same value in format versions 4.0 and higher;
the related props32[] and exceptions[] and uchars[] were used in format version 3
i0 propsIndex; -- 32-bit unit index to the table of 32-bit properties words
@ -95,7 +83,7 @@ Formally, the file contains the following structures:
PT serialized properties trie, see utrie.h (byte size: 4*(i0-16))
P, E, and U are not used (empty) in format version 4
P, E, and U are not used (empty) in format versions 4 and above
P const uint32_t props32[i1-i0];
E const uint32_t exceptions[i2-i1];
@ -130,45 +118,23 @@ and 0<=c<0x110000, the lookup is done like this:
Each 16-bit properties word contains:
0.. 4 general category
5.. 7 numeric type
non-digit numbers are stored with multiple types and pseudo-types
in order to facilitate compact encoding:
0 no numeric value (0)
1 decimal digit value (0..9)
2 digit value (0..9)
3 (U_NT_NUMERIC) normal non-digit numeric value 0..0xff
4 (internal type UPROPS_NT_FRACTION) fraction
5 (internal type UPROPS_NT_LARGE) large number >0xff
6..7 reserved
5 reserved
6..15 numeric type and value (ntv)
when returning the numeric type from a public API,
internal types must be turned into U_NT_NUMERIC
Encoding of numeric type and value in the 10-bit ntv field:
ntv type value
0 U_NT_NONE 0
1..10 U_NT_DECIMAL 0..9
11..20 U_NT_DIGIT 0..9
21..0x2ff U_NT_NUMERIC see below
0x300..0x3ff reserved
8..15 numeric value
encoding of fractions and large numbers see below
Fractions:
// n is the 8-bit numeric value from bits 8..15 of the trie word (shifted down)
int32_t num, den;
num=n>>3; // num=0..31
den=(n&7)+2; // den=2..9
if(num==0) {
num=-1; // num=-1 or 1..31
}
double result=(double)num/(double)den;
Large numbers:
// n is the 8-bit numeric value from bits 8..15 of the trie word (shifted down)
int32_t m, e;
m=n>>4; // m=0..15
e=(n&0xf);
if(m==0) {
m=1; // for large powers of 10
e+=18; // e=18..33
} else {
e+=2; // e=2..17
} // m==10..15 are reserved
double result=(double)m*10^e;
For U_NT_NUMERIC:
ntv value
21..0xaf integer 0..154
0xb0..0x1df fraction ((ntv>>4)-12) / ((ntv&0xf)+1) = -1..17 / 1..16
0x1e0..0x2ff large int ((ntv>>5)-14) * 10^((ntv&0x1f)+2) = (1..9)*(10^2..10^33)
(only one significant decimal digit)
--- Additional properties (new in format version 2.1) ---
@ -225,11 +191,21 @@ The indexes[] values for the omitted structures are still filled in
--- Changes in format version 5 ---
Rearranged bit fields in the second trie (AT) because the script code field
overflowed. Old code would have seen nonsensically low values for new, higher
script codes.
Format version 5 became necessary because the bit field for script codes
overflowed. The changes are incompatible because
old code would have seen nonsensically low values for new, higher script codes.
Rearranged bit fields in the second trie (AT) and widened three (Script, Block,
Word_Break) by one bit each.
Modified bit fields in icu/source/common/uprops.h
--- Changes in format version 6 ---
Format version 6 became necessary because Unicode 5.2 adds fractions with
denominators 9, 10 and 16, and it was easier to redesign the encoding of numeric
types and values rather than add another variant to the previous format.
----------------------------------------------------------------------------- */
/* UDataInfo cf. udata.h */
@ -243,7 +219,7 @@ static UDataInfo dataInfo={
0,
{ 0x55, 0x50, 0x72, 0x6f }, /* dataFormat="UPro" */
{ 5, 0, UTRIE_SHIFT, UTRIE_INDEX_SHIFT }, /* formatVersion */
{ 6, 0, UTRIE_SHIFT, UTRIE_INDEX_SHIFT }, /* formatVersion */
{ 5, 1, 0, 0 } /* dataVersion */
};
@ -275,19 +251,12 @@ exitStore() {
exitAdditionalProperties();
}
static uint32_t printNumericTypeValueError(Props *p) {
fprintf(stderr, "genprops error: unable to encode numeric type & value %d %ld/%lu E%d\n",
(int)p->numericType, (long)p->numericValue, (unsigned long)p->denominator, p->exponent);
exit(U_ILLEGAL_ARGUMENT_ERROR);
return 0;
}
/* store a character's properties ------------------------------------------- */
extern uint32_t
makeProps(Props *p) {
uint32_t den;
int32_t type, value, exp;
int32_t type, value, exp, ntv;
/* encode numeric type & value */
type=p->numericType;
@ -295,74 +264,67 @@ makeProps(Props *p) {
den=p->denominator;
exp=p->exponent;
if(den!=0) {
/* fraction */
if( type!=U_NT_NUMERIC ||
value<-1 || value==0 || value>UPROPS_FRACTION_MAX_NUM ||
den<UPROPS_FRACTION_MIN_DEN || UPROPS_FRACTION_MAX_DEN<den ||
exp!=0
) {
return printNumericTypeValueError(p);
ntv=-1; /* the numeric type and value cannot be encoded if ntv remains -1 */
switch(type) {
case U_NT_NONE:
if(value==0 && den==0 && exp==0) {
ntv=UPROPS_NTV_NONE;
}
type=UPROPS_NT_FRACTION;
if(value==-1) {
value=0;
break;
case U_NT_DECIMAL:
if(0<=value && value<=9 && den==0 && exp==0) {
ntv=UPROPS_NTV_DECIMAL_START+value;
}
den-=UPROPS_FRACTION_DEN_OFFSET;
value=(value<<UPROPS_FRACTION_NUM_SHIFT)|den;
} else if(exp!=0) {
/* very large value */
if( type!=U_NT_NUMERIC ||
value<1 || 9<value ||
exp<UPROPS_LARGE_MIN_EXP || UPROPS_LARGE_MAX_EXP_EXTRA<exp
) {
return printNumericTypeValueError(p);
break;
case U_NT_DIGIT:
if(0<=value && value<=9 && den==0 && exp==0) {
ntv=UPROPS_NTV_DIGIT_START+value;
}
type=UPROPS_NT_LARGE;
if(exp<=UPROPS_LARGE_MAX_EXP) {
/* 1..9 * 10^(2..17) */
exp-=UPROPS_LARGE_EXP_OFFSET;
} else {
/* 1 * 10^(18..33) */
if(value!=1) {
return printNumericTypeValueError(p);
break;
case U_NT_NUMERIC:
if(den==0) {
if(exp==2 && (value*100)<=UPROPS_NTV_MAX_SMALL_INT) {
/* small integer parsed like a large one */
ntv=UPROPS_NTV_NUMERIC_START+value*100;
} else if(exp==0 && value>=0) {
if(value<=UPROPS_NTV_MAX_SMALL_INT) {
/* small integer */
ntv=UPROPS_NTV_NUMERIC_START+value;
} else {
/* large integer parsed like a small one */
/* split the value into mantissa and exponent, base 10 */
int32_t mant=value;
while((mant%10)==0) {
mant/=10;
++exp;
}
if(mant<=9) {
ntv=((mant+14)<<5)+(exp-2);
}
}
} else if(2<=exp && exp<=33 && 1<=value && value<=9) {
/* large, single-significant-digit integer */
ntv=((value+14)<<5)+(exp-2);
}
} else if(exp==0) {
if(-1<=value && value<=17 && 1<=den && den<=16) {
/* fraction */
ntv=((value+12)<<4)+(den-1);
}
value=0;
exp-=UPROPS_LARGE_EXP_OFFSET_EXTRA;
}
value=(value<<UPROPS_LARGE_MANT_SHIFT)|exp;
} else if(value>UPROPS_MAX_SMALL_NUMBER) {
/* large value */
if(type!=U_NT_NUMERIC) {
return printNumericTypeValueError(p);
}
type=UPROPS_NT_LARGE;
/* split the value into mantissa and exponent, base 10 */
while((value%10)==0) {
value/=10;
++exp;
}
if(value>9) {
return printNumericTypeValueError(p);
}
exp-=UPROPS_LARGE_EXP_OFFSET;
value=(value<<UPROPS_LARGE_MANT_SHIFT)|exp;
} else if(value<0) {
/* unable to encode negative values, other than fractions -1/x */
return printNumericTypeValueError(p);
/* } else normal value=0..0xff { */
default:
break;
}
if(ntv<0) {
fprintf(stderr, "genprops error: unable to encode numeric type %d & value %ld/%lu E%d\n",
(int)type, (long)value, (unsigned long)den, exp);
exit(U_ILLEGAL_ARGUMENT_ERROR);
}
/* encode the properties */
return
(uint32_t)p->generalCategory |
((uint32_t)type<<UPROPS_NUMERIC_TYPE_SHIFT) |
((uint32_t)value<<UPROPS_NUMERIC_VALUE_SHIFT);
(ntv<<UPROPS_NUMERIC_TYPE_VALUE_SHIFT);
}
extern void
@ -443,9 +405,6 @@ generateData(const char *dataDir, UBool csource) {
}
/* use UTrie2 */
dataInfo.formatVersion[0]=6;
dataInfo.formatVersion[2]=0;
dataInfo.formatVersion[3]=0;
trie2=utrie2_fromUTrie(&trie, 0, &errorCode);
if(U_FAILURE(errorCode)) {
fprintf(
@ -473,10 +432,12 @@ generateData(const char *dataDir, UBool csource) {
f=usrc_create(dataDir, "uchar_props_data.c");
if(f!=NULL) {
/* unused
usrc_writeArray(f,
"static const UVersionInfo formatVersion={",
dataInfo.formatVersion, 8, 4,
"};\n\n");
*/
usrc_writeArray(f,
"static const UVersionInfo dataVersion={",
dataInfo.dataVersion, 8, 4,

View file

@ -0,0 +1,53 @@
#!/usr/bin/python
# Copyright (C) 2009, International Business Machines
# Corporation and others. All Rights Reserved.
#
# file name: genteststub.py
# encoding: US-ASCII
# tab size: 8 (not used)
# indentation:2
__author__ = "Claire Ho"
import sys
# read file
print "command: python genteststub.py <inputFileName.txt> <outputFileName.txt>"
print "if the fileName.txt is omitted, the default data file is CollationTest_NON_IGNORABLE_SHORT.txt"
if len(sys.argv) >= 2:
fname=sys.argv[1]
else :
fname="CollationTest_NON_IGNORABLE_SHORT.txt"
openfile = open(fname, 'r');
#output file name
ext=fname.find(".txt");
if len(sys.argv) >=3 :
wFname = sys.argv[2]
elif (ext>0) :
wFname = fname[:ext+1]+"_STUB.txt"
else :
wFname = "out.txt"
wrfile = open(wFname,'w')
print "Reading file: "+fname+" ..."
print "Writing file: "+wFname+" ..."
count=10
for line in openfile.readlines():
pos = line.find("#")
if pos == 0:
# print the header
wrfile.write(line.rstrip()+"\n")
continue
if pos >= 0: line = line[:pos]
line = line.rstrip()
if line:
if (count==10):
wrfile.write(line+"\n")
count=0
count=count+1
if count!=1:
if line:
wrfile.write(line+"\n") # write the last case
wrfile.close()
openfile.close()

View file

@ -1,7 +1,7 @@
/*
*******************************************************************************
*
* Copyright (C) 2000-2008, International Business Machines
* Copyright (C) 2000-2009, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
@ -519,6 +519,7 @@ UCAElements *readAnElement(FILE *data, tempUCATable *t, UCAConstants *consts, UE
//consts->UCA_NEXT_TOP_VALUE = theValue<<24 | 0x030303;
//}
} else if (vt[cnt].what_to_do == READCE) { /* vt[cnt].what_to_do == READCE */
// TODO: combine & clean up the two CE parsers
pointer = strchr(buffer+vtLen, '[');
if(pointer) {
pointer++;
@ -705,6 +706,29 @@ UCAElements *readAnElement(FILE *data, tempUCATable *t, UCAConstants *consts, UE
pointer++;
}
}
// Check for valid bytes in CE weights.
// TODO: Tighten this so that it allows 03 & 04 in intermediate bytes
// but not in final bytes.
// See http://bugs.icu-project.org/trac/ticket/7167
for (i = 0; i < (int32_t)CEindex; ++i) {
uint32_t value = element->CEs[i];
uint8_t bytes[4] = {
(uint8_t)(value >> 24),
(uint8_t)(value >> 16),
(uint8_t)(value >> 8),
(uint8_t)(value & UCOL_NEW_TERTIARYORDERMASK)
};
for (int j = 0; j < 4; ++j) {
uint8_t maxByte =
(isContinuation(value) || j == 1) ?
UCOL_BYTE_FIRST_TAILORED :
UCOL_BYTE_COMMON;
if (0 != bytes[j] && bytes[j] < maxByte) {
fprintf(stderr, "Warning: invalid UCA weight byte %02X for %s\n", bytes[j], buffer);
// TODO: return NULL;
}
}
}
if(U_FAILURE(*status)) {
fprintf(stderr, "problem putting stuff in hash table %s\n", u_errorName(*status));
@ -786,6 +810,14 @@ void writeOutData(UCATableHeader *data,
}
}
enum {
/*
* Maximum number of UCA contractions we can store.
* May need to be increased for a new Unicode version.
*/
MAX_UCA_CONTRACTION_CES=2048
};
static int32_t
write_uca_table(const char *filename,
const char *outputDir,
@ -817,8 +849,8 @@ write_uca_table(const char *filename,
return 0;
}
uprv_memset(opts, 0, sizeof(UColOptionSet));
UChar contractionCEs[512][3];
uprv_memset(contractionCEs, 0, 512*3*sizeof(UChar));
UChar contractionCEs[MAX_UCA_CONTRACTION_CES][3];
uprv_memset(contractionCEs, 0, sizeof(contractionCEs));
uint32_t noOfContractions = 0;
UCAConstants consts;
uprv_memset(&consts, 0, sizeof(consts));
@ -948,6 +980,13 @@ struct {
if(UTF_IS_LEAD(element->cPoints[0]) && UTF_IS_TRAIL(element->cPoints[1]) && element->cSize == 2) {
surrogateCount++;
} else {
if(noOfContractions>=MAX_UCA_CONTRACTION_CES) {
fprintf(stderr,
"\nMore than %d contractions. Please increase MAX_UCA_CONTRACTION_CES in genuca.cpp. "
"Exiting...\n",
(int)MAX_UCA_CONTRACTION_CES);
exit(*status);
}
contractionCEs[noOfContractions][0] = element->cPoints[0];
contractionCEs[noOfContractions][1] = element->cPoints[1];
if(element->cSize > 2) { // the third one
@ -967,6 +1006,13 @@ struct {
// contractionCEs[1]: '\0' to differentiate with contractions.
// contractionCEs[2]: prefix char
if (element->prefixSize>0) {
if(noOfContractions>=MAX_UCA_CONTRACTION_CES) {
fprintf(stderr,
"\nMore than %d contractions. Please increase MAX_UCA_CONTRACTION_CES in genuca.cpp. "
"Exiting...\n",
(int)MAX_UCA_CONTRACTION_CES);
exit(*status);
}
contractionCEs[noOfContractions][0]=element->cPoints[0];
contractionCEs[noOfContractions][1]='\0';
contractionCEs[noOfContractions][2]=element->prefixChars[0];

View file

@ -91,7 +91,7 @@ uprops_swap(const UDataSwapper *ds,
pInfo->dataFormat[1]==0x50 &&
pInfo->dataFormat[2]==0x72 &&
pInfo->dataFormat[3]==0x6f &&
(pInfo->formatVersion[0]==3 || pInfo->formatVersion[0]==4 || pInfo->formatVersion[0]==5) &&
(3<=pInfo->formatVersion[0] && pInfo->formatVersion[0]<=6) &&
pInfo->formatVersion[2]==UTRIE_SHIFT &&
pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT
)) {

View file

@ -1,7 +1,7 @@
/*
*******************************************************************************
*
* Copyright (C) 2000-2007, International Business Machines
* Copyright (C) 2000-2009, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
@ -28,14 +28,26 @@
#include <stdio.h>
/* Is c a whitespace character? */
#define IS_INV_WHITESPACE(c) ((c)==' ' || (c)=='\t' || (c)=='\r' || (c)=='\n')
U_CAPI const char * U_EXPORT2
u_skipWhitespace(const char *s) {
while(*s==' ' || *s=='\t') {
while(IS_INV_WHITESPACE(*s)) {
++s;
}
return s;
}
U_CAPI char * U_EXPORT2
u_rtrim(char *s) {
char *end=uprv_strchr(s, 0);
while(s<end && IS_INV_WHITESPACE(*(end-1))) {
*--end = 0;
}
return end;
}
/*
* If the string starts with # @missing: then return the pointer to the
* following non-whitespace character.
@ -90,12 +102,8 @@ u_parseDelimitedFile(const char *filename, char delimiter,
}
while(T_FileStream_readLine(file, line, sizeof(line))!=NULL) {
length=(int32_t)uprv_strlen(line);
/* remove trailing newline characters */
while(length>0 && (line[length-1]=='\r' || line[length-1]=='\n')) {
line[--length]=0;
}
length=(int32_t)(u_rtrim(line)-line);
/*
* detect a line with # @missing:
@ -118,7 +126,7 @@ u_parseDelimitedFile(const char *filename, char delimiter,
limit=uprv_strchr(start, '#');
if(limit!=NULL) {
/* get white space before the pound sign */
while(limit>start && (*(limit-1)==' ' || *(limit-1)=='\t')) {
while(limit>start && IS_INV_WHITESPACE(*(limit-1))) {
--limit;
}
@ -202,7 +210,7 @@ u_parseCodePoints(const char *s,
/* read one code point */
value=(uint32_t)uprv_strtoul(s, &end, 16);
if(end<=s || (*end!=' ' && *end!='\t' && *end!=';' && *end!=0) || value>=0x110000) {
if(end<=s || (!IS_INV_WHITESPACE(*end) && *end!=';' && *end!=0) || value>=0x110000) {
*pErrorCode=U_PARSE_ERROR;
return 0;
}
@ -261,7 +269,7 @@ u_parseString(const char *s,
/* read one code point */
value=(uint32_t)uprv_strtoul(s, &end, 16);
if(end<=s || (*end!=' ' && *end!='\t' && *end!=';' && *end!=0) || value>=0x110000) {
if(end<=s || (!IS_INV_WHITESPACE(*end) && *end!=';' && *end!=0) || value>=0x110000) {
*pErrorCode=U_PARSE_ERROR;
return 0;
}
@ -307,7 +315,7 @@ u_parseCodePointRange(const char *s,
/* read the start code point */
value=(uint32_t)uprv_strtoul(s, &end, 16);
if(end<=s || (*end!=' ' && *end!='\t' && *end!='.' && *end!=';') || value>=0x110000) {
if(end<=s || (!IS_INV_WHITESPACE(*end) && *end!='.' && *end!=';' && *end!=0) || value>=0x110000) {
*pErrorCode=U_PARSE_ERROR;
return 0;
}
@ -327,7 +335,7 @@ u_parseCodePointRange(const char *s,
/* read the end code point */
value=(uint32_t)uprv_strtoul(s, &end, 16);
if(end<=s || (*end!=' ' && *end!='\t' && *end!=';') || value>=0x110000) {
if(end<=s || (!IS_INV_WHITESPACE(*end) && *end!=';' && *end!=0) || value>=0x110000) {
*pErrorCode=U_PARSE_ERROR;
return 0;
}

View file

@ -1,7 +1,7 @@
/*
*******************************************************************************
*
* Copyright (C) 2000-2004, International Business Machines
* Copyright (C) 2000-2009, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
@ -34,6 +34,15 @@ U_CDECL_BEGIN
U_CAPI const char * U_EXPORT2
u_skipWhitespace(const char *s);
/**
* Trim whitespace (including line endings) from the end of the string.
*
* @param s Pointer to the string.
* @return Pointer to the new end of the string.
*/
U_CAPI char * U_EXPORT2
u_rtrim(char *s);
/** Function type for u_parseDelimitedFile(). */
typedef void U_CALLCONV
UParseLineFn(void *context,