mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-07 06:25:30 +00:00
ICU-7264 merge Unicode 6.0 into trunk from branches/markus/uni60 -r 28339:28657
X-SVN-Rev: 28661
This commit is contained in:
parent
3c7ba0c2e4
commit
b5e1330176
87 changed files with 21109 additions and 17841 deletions
|
@ -1,7 +1,7 @@
|
|||
/*
|
||||
******************************************************************************
|
||||
*
|
||||
* Copyright (C) 1999-2009, International Business Machines
|
||||
* Copyright (C) 1999-2010, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
******************************************************************************
|
||||
|
@ -145,11 +145,7 @@ ubidi_openSized(int32_t maxLength, int32_t maxRunCount, UErrorCode *pErrorCode)
|
|||
uprv_memset(pBiDi, 0, sizeof(UBiDi));
|
||||
|
||||
/* get BiDi properties */
|
||||
pBiDi->bdp=ubidi_getSingleton(pErrorCode);
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
uprv_free(pBiDi);
|
||||
return NULL;
|
||||
}
|
||||
pBiDi->bdp=ubidi_getSingleton();
|
||||
|
||||
/* allocate memory for arrays as requested */
|
||||
if(maxLength>0) {
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2004-2008, International Business Machines
|
||||
* Copyright (C) 2004-2010, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
|
@ -38,286 +38,16 @@ struct UBiDiProps {
|
|||
uint8_t formatVersion[4];
|
||||
};
|
||||
|
||||
/* data loading etc. -------------------------------------------------------- */
|
||||
|
||||
#if UBIDI_HARDCODE_DATA
|
||||
|
||||
/* ubidi_props_data.c is machine-generated by genbidi --csource */
|
||||
#include "ubidi_props_data.c"
|
||||
|
||||
#else
|
||||
|
||||
static UBool U_CALLCONV
|
||||
isAcceptable(void *context,
|
||||
const char *type, const char *name,
|
||||
const UDataInfo *pInfo) {
|
||||
if(
|
||||
pInfo->size>=20 &&
|
||||
pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
|
||||
pInfo->charsetFamily==U_CHARSET_FAMILY &&
|
||||
pInfo->dataFormat[0]==UBIDI_FMT_0 && /* dataFormat="BiDi" */
|
||||
pInfo->dataFormat[1]==UBIDI_FMT_1 &&
|
||||
pInfo->dataFormat[2]==UBIDI_FMT_2 &&
|
||||
pInfo->dataFormat[3]==UBIDI_FMT_3 &&
|
||||
pInfo->formatVersion[0]==1 &&
|
||||
pInfo->formatVersion[2]==UTRIE_SHIFT &&
|
||||
pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT
|
||||
) {
|
||||
UBiDiProps *bdp=(UBiDiProps *)context;
|
||||
uprv_memcpy(bdp->formatVersion, pInfo->formatVersion, 4);
|
||||
return TRUE;
|
||||
} else {
|
||||
return FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
static UBiDiProps *
|
||||
ubidi_openData(UBiDiProps *bdpProto,
|
||||
const uint8_t *bin, int32_t length, UErrorCode *pErrorCode) {
|
||||
UBiDiProps *bdp;
|
||||
int32_t size;
|
||||
|
||||
bdpProto->indexes=(const int32_t *)bin;
|
||||
if( (length>=0 && length<16*4) ||
|
||||
bdpProto->indexes[UBIDI_IX_INDEX_TOP]<16
|
||||
) {
|
||||
/* length or indexes[] too short for minimum indexes[] length of 16 */
|
||||
*pErrorCode=U_INVALID_FORMAT_ERROR;
|
||||
return NULL;
|
||||
}
|
||||
size=bdpProto->indexes[UBIDI_IX_INDEX_TOP]*4;
|
||||
if(length>=0) {
|
||||
if(length>=size && length>=bdpProto->indexes[UBIDI_IX_LENGTH]) {
|
||||
length-=size;
|
||||
} else {
|
||||
/* length too short for indexes[] or for the whole data length */
|
||||
*pErrorCode=U_INVALID_FORMAT_ERROR;
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
bin+=size;
|
||||
/* from here on, assume that the sizes of the items fit into the total length */
|
||||
|
||||
/* unserialize the trie, after indexes[] */
|
||||
size=bdpProto->indexes[UBIDI_IX_TRIE_SIZE];
|
||||
utrie_unserialize(&bdpProto->trie, bin, size, pErrorCode);
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
return NULL;
|
||||
}
|
||||
bin+=size;
|
||||
|
||||
/* get mirrors[] */
|
||||
size=4*bdpProto->indexes[UBIDI_IX_MIRROR_LENGTH];
|
||||
bdpProto->mirrors=(const uint32_t *)bin;
|
||||
bin+=size;
|
||||
|
||||
/* get jgArray[] */
|
||||
size=bdpProto->indexes[UBIDI_IX_JG_LIMIT]-bdpProto->indexes[UBIDI_IX_JG_START];
|
||||
bdpProto->jgArray=bin;
|
||||
bin+=size;
|
||||
|
||||
/* allocate, copy, and return the new UBiDiProps */
|
||||
bdp=(UBiDiProps *)uprv_malloc(sizeof(UBiDiProps));
|
||||
if(bdp==NULL) {
|
||||
*pErrorCode=U_MEMORY_ALLOCATION_ERROR;
|
||||
return NULL;
|
||||
} else {
|
||||
uprv_memcpy(bdp, bdpProto, sizeof(UBiDiProps));
|
||||
return bdp;
|
||||
}
|
||||
}
|
||||
|
||||
U_CFUNC UBiDiProps *
|
||||
ubidi_openProps(UErrorCode *pErrorCode) {
|
||||
UBiDiProps bdpProto={ NULL }, *bdp;
|
||||
|
||||
bdpProto.mem=udata_openChoice(NULL, UBIDI_DATA_TYPE, UBIDI_DATA_NAME, isAcceptable, &bdpProto, pErrorCode);
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
bdp=ubidi_openData(
|
||||
&bdpProto,
|
||||
udata_getMemory(bdpProto.mem),
|
||||
udata_getLength(bdpProto.mem),
|
||||
pErrorCode);
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
udata_close(bdpProto.mem);
|
||||
return NULL;
|
||||
} else {
|
||||
return bdp;
|
||||
}
|
||||
}
|
||||
|
||||
U_CFUNC UBiDiProps *
|
||||
ubidi_openBinary(const uint8_t *bin, int32_t length, UErrorCode *pErrorCode) {
|
||||
UBiDiProps bdpProto={ NULL };
|
||||
const DataHeader *hdr;
|
||||
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
return NULL;
|
||||
}
|
||||
if(bin==NULL) {
|
||||
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* check the header */
|
||||
if(length>=0 && length<20) {
|
||||
*pErrorCode=U_INVALID_FORMAT_ERROR;
|
||||
return NULL;
|
||||
}
|
||||
hdr=(const DataHeader *)bin;
|
||||
if(
|
||||
!(hdr->dataHeader.magic1==0xda && hdr->dataHeader.magic2==0x27 &&
|
||||
hdr->info.isBigEndian==U_IS_BIG_ENDIAN &&
|
||||
isAcceptable(&bdpProto, UBIDI_DATA_TYPE, UBIDI_DATA_NAME, &hdr->info))
|
||||
) {
|
||||
*pErrorCode=U_INVALID_FORMAT_ERROR;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
bin+=hdr->dataHeader.headerSize;
|
||||
if(length>=0) {
|
||||
length-=hdr->dataHeader.headerSize;
|
||||
}
|
||||
return ubidi_openData(&bdpProto, bin, length, pErrorCode);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
U_CFUNC void
|
||||
ubidi_closeProps(UBiDiProps *bdp) {
|
||||
if(bdp!=NULL) {
|
||||
#if !UBIDI_HARDCODE_DATA
|
||||
udata_close(bdp->mem);
|
||||
#endif
|
||||
uprv_free(bdp);
|
||||
}
|
||||
}
|
||||
|
||||
/* UBiDiProps singleton ----------------------------------------------------- */
|
||||
|
||||
#if !UBIDI_HARDCODE_DATA
|
||||
static UBiDiProps *gBdpDummy=NULL;
|
||||
static UBiDiProps *gBdp=NULL;
|
||||
static UErrorCode gErrorCode=U_ZERO_ERROR;
|
||||
static int8_t gHaveData=0;
|
||||
|
||||
static UBool U_CALLCONV
|
||||
ubidi_cleanup(void) {
|
||||
ubidi_closeProps(gBdpDummy);
|
||||
gBdpDummy=NULL;
|
||||
ubidi_closeProps(gBdp);
|
||||
gBdp=NULL;
|
||||
gErrorCode=U_ZERO_ERROR;
|
||||
gHaveData=0;
|
||||
return TRUE;
|
||||
}
|
||||
#endif
|
||||
|
||||
U_CFUNC const UBiDiProps *
|
||||
ubidi_getSingleton(UErrorCode *pErrorCode) {
|
||||
#if UBIDI_HARDCODE_DATA
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
return NULL;
|
||||
}
|
||||
ubidi_getSingleton() {
|
||||
return &ubidi_props_singleton;
|
||||
#else
|
||||
int8_t haveData;
|
||||
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
UMTX_CHECK(NULL, gHaveData, haveData);
|
||||
|
||||
if(haveData>0) {
|
||||
/* data was loaded */
|
||||
return gBdp;
|
||||
} else if(haveData<0) {
|
||||
/* data loading failed */
|
||||
*pErrorCode=gErrorCode;
|
||||
return NULL;
|
||||
} else /* haveData==0 */ {
|
||||
/* load the data */
|
||||
UBiDiProps *bdp=ubidi_openProps(pErrorCode);
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
gHaveData=-1;
|
||||
gErrorCode=*pErrorCode;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* set the static variables */
|
||||
umtx_lock(NULL);
|
||||
if(gBdp==NULL) {
|
||||
gBdp=bdp;
|
||||
bdp=NULL;
|
||||
gHaveData=1;
|
||||
ucln_common_registerCleanup(UCLN_COMMON_UBIDI, ubidi_cleanup);
|
||||
}
|
||||
umtx_unlock(NULL);
|
||||
|
||||
ubidi_closeProps(bdp);
|
||||
return gBdp;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
#if !UBIDI_HARDCODE_DATA
|
||||
U_CAPI const UBiDiProps *
|
||||
ubidi_getDummy(UErrorCode *pErrorCode) {
|
||||
UBiDiProps *bdp;
|
||||
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
UMTX_CHECK(NULL, gBdpDummy, bdp);
|
||||
|
||||
if(bdp!=NULL) {
|
||||
/* the dummy object was already created */
|
||||
return bdp;
|
||||
} else /* bdp==NULL */ {
|
||||
/* create the dummy object */
|
||||
int32_t *indexes;
|
||||
|
||||
bdp=(UBiDiProps *)uprv_malloc(sizeof(UBiDiProps)+UBIDI_IX_TOP*4+UTRIE_DUMMY_SIZE);
|
||||
if(bdp==NULL) {
|
||||
*pErrorCode=U_MEMORY_ALLOCATION_ERROR;
|
||||
return NULL;
|
||||
}
|
||||
uprv_memset(bdp, 0, sizeof(UBiDiProps)+UBIDI_IX_TOP*4);
|
||||
|
||||
bdp->indexes=indexes=(int32_t *)(bdp+1);
|
||||
indexes[UBIDI_IX_INDEX_TOP]=UBIDI_IX_TOP;
|
||||
|
||||
indexes[UBIDI_IX_TRIE_SIZE]=
|
||||
utrie_unserializeDummy(&bdp->trie, indexes+UBIDI_IX_TOP, UTRIE_DUMMY_SIZE, 0, 0, TRUE, pErrorCode);
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
uprv_free(bdp);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
bdp->formatVersion[0]=1;
|
||||
bdp->formatVersion[2]=UTRIE_SHIFT;
|
||||
bdp->formatVersion[3]=UTRIE_INDEX_SHIFT;
|
||||
|
||||
/* set the static variables */
|
||||
umtx_lock(NULL);
|
||||
if(gBdpDummy==NULL) {
|
||||
gBdpDummy=bdp;
|
||||
bdp=NULL;
|
||||
ucln_common_registerCleanup(UCLN_COMMON_UBIDI, ubidi_cleanup);
|
||||
}
|
||||
umtx_unlock(NULL);
|
||||
|
||||
uprv_free(bdp);
|
||||
return gBdpDummy;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
/* set of property starts for UnicodeSet ------------------------------------ */
|
||||
|
||||
static UBool U_CALLCONV
|
||||
|
@ -476,29 +206,15 @@ ubidi_getJoiningGroup(const UBiDiProps *bdp, UChar32 c) {
|
|||
|
||||
U_CFUNC UCharDirection
|
||||
u_charDirection(UChar32 c) {
|
||||
UErrorCode errorCode=U_ZERO_ERROR;
|
||||
const UBiDiProps *bdp=ubidi_getSingleton(&errorCode);
|
||||
if(bdp!=NULL) {
|
||||
return ubidi_getClass(bdp, c);
|
||||
} else {
|
||||
return U_LEFT_TO_RIGHT;
|
||||
}
|
||||
return ubidi_getClass(&ubidi_props_singleton, c);
|
||||
}
|
||||
|
||||
U_CFUNC UBool
|
||||
u_isMirrored(UChar32 c) {
|
||||
UErrorCode errorCode=U_ZERO_ERROR;
|
||||
const UBiDiProps *bdp=ubidi_getSingleton(&errorCode);
|
||||
return (UBool)(bdp!=NULL && ubidi_isMirrored(bdp, c));
|
||||
return ubidi_isMirrored(&ubidi_props_singleton, c);
|
||||
}
|
||||
|
||||
U_CFUNC UChar32
|
||||
u_charMirror(UChar32 c) {
|
||||
UErrorCode errorCode=U_ZERO_ERROR;
|
||||
const UBiDiProps *bdp=ubidi_getSingleton(&errorCode);
|
||||
if(bdp!=NULL) {
|
||||
return ubidi_getMirror(bdp, c);
|
||||
} else {
|
||||
return c;
|
||||
}
|
||||
return ubidi_getMirror(&ubidi_props_singleton, c);
|
||||
}
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2004-2008, International Business Machines
|
||||
* Copyright (C) 2004-2010, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
|
@ -24,8 +24,6 @@
|
|||
#include "uset_imp.h"
|
||||
#include "udataswp.h"
|
||||
|
||||
#define UBIDI_HARDCODE_DATA 1
|
||||
|
||||
U_CDECL_BEGIN
|
||||
|
||||
/* library API -------------------------------------------------------------- */
|
||||
|
@ -33,28 +31,8 @@ U_CDECL_BEGIN
|
|||
struct UBiDiProps;
|
||||
typedef struct UBiDiProps UBiDiProps;
|
||||
|
||||
U_CFUNC UBiDiProps *
|
||||
ubidi_openProps(UErrorCode *pErrorCode);
|
||||
|
||||
U_CFUNC UBiDiProps *
|
||||
ubidi_openBinary(const uint8_t *bin, int32_t length, UErrorCode *pErrorCode);
|
||||
|
||||
U_CFUNC void
|
||||
ubidi_closeProps(UBiDiProps *bdp);
|
||||
|
||||
|
||||
U_CFUNC const UBiDiProps *
|
||||
ubidi_getSingleton(UErrorCode *pErrorCode);
|
||||
|
||||
#if !UBIDI_HARDCODE_DATA
|
||||
/**
|
||||
* Get a singleton dummy object, one that works with no real data.
|
||||
* This can be used when the real data is not available.
|
||||
* Using the dummy can reduce checks for available data after an initial failure.
|
||||
*/
|
||||
U_CAPI const UBiDiProps *
|
||||
ubidi_getDummy(UErrorCode *pErrorCode);
|
||||
#endif
|
||||
ubidi_getSingleton(void);
|
||||
|
||||
U_CAPI int32_t
|
||||
ubidi_swap(const UDataSwapper *ds,
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -1,7 +1,7 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2004-2009, International Business Machines
|
||||
* Copyright (C) 2004-2010, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
|
@ -39,291 +39,16 @@ struct UCaseProps {
|
|||
uint8_t formatVersion[4];
|
||||
};
|
||||
|
||||
/* data loading etc. -------------------------------------------------------- */
|
||||
|
||||
#if UCASE_HARDCODE_DATA
|
||||
|
||||
/* ucase_props_data.c is machine-generated by gencase --csource */
|
||||
#include "ucase_props_data.c"
|
||||
|
||||
#else
|
||||
|
||||
static UBool U_CALLCONV
|
||||
isAcceptable(void *context,
|
||||
const char *type, const char *name,
|
||||
const UDataInfo *pInfo) {
|
||||
if(
|
||||
pInfo->size>=20 &&
|
||||
pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
|
||||
pInfo->charsetFamily==U_CHARSET_FAMILY &&
|
||||
pInfo->dataFormat[0]==UCASE_FMT_0 && /* dataFormat="cAsE" */
|
||||
pInfo->dataFormat[1]==UCASE_FMT_1 &&
|
||||
pInfo->dataFormat[2]==UCASE_FMT_2 &&
|
||||
pInfo->dataFormat[3]==UCASE_FMT_3 &&
|
||||
pInfo->formatVersion[0]==1 &&
|
||||
pInfo->formatVersion[2]==UTRIE_SHIFT &&
|
||||
pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT
|
||||
) {
|
||||
UCaseProps *csp=(UCaseProps *)context;
|
||||
uprv_memcpy(csp->formatVersion, pInfo->formatVersion, 4);
|
||||
return TRUE;
|
||||
} else {
|
||||
return FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
static UCaseProps *
|
||||
ucase_openData(UCaseProps *cspProto,
|
||||
const uint8_t *bin, int32_t length, UErrorCode *pErrorCode) {
|
||||
UCaseProps *csp;
|
||||
int32_t size;
|
||||
|
||||
cspProto->indexes=(const int32_t *)bin;
|
||||
if( (length>=0 && length<16*4) ||
|
||||
cspProto->indexes[UCASE_IX_INDEX_TOP]<16
|
||||
) {
|
||||
/* length or indexes[] too short for minimum indexes[] length of 16 */
|
||||
*pErrorCode=U_INVALID_FORMAT_ERROR;
|
||||
return NULL;
|
||||
}
|
||||
size=cspProto->indexes[UCASE_IX_INDEX_TOP]*4;
|
||||
if(length>=0) {
|
||||
if(length>=size && length>=cspProto->indexes[UCASE_IX_LENGTH]) {
|
||||
length-=size;
|
||||
} else {
|
||||
/* length too short for indexes[] or for the whole data length */
|
||||
*pErrorCode=U_INVALID_FORMAT_ERROR;
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
bin+=size;
|
||||
/* from here on, assume that the sizes of the items fit into the total length */
|
||||
|
||||
/* unserialize the trie, after indexes[] */
|
||||
size=cspProto->indexes[UCASE_IX_TRIE_SIZE];
|
||||
utrie_unserialize(&cspProto->trie, bin, size, pErrorCode);
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
return NULL;
|
||||
}
|
||||
bin+=size;
|
||||
|
||||
/* get exceptions[] */
|
||||
size=2*cspProto->indexes[UCASE_IX_EXC_LENGTH];
|
||||
cspProto->exceptions=(const uint16_t *)bin;
|
||||
bin+=size;
|
||||
|
||||
/* get unfold[] */
|
||||
size=2*cspProto->indexes[UCASE_IX_UNFOLD_LENGTH];
|
||||
if(size!=0) {
|
||||
cspProto->unfold=(const UChar *)bin;
|
||||
bin+=size;
|
||||
} else {
|
||||
cspProto->unfold=NULL;
|
||||
}
|
||||
|
||||
/* allocate, copy, and return the new UCaseProps */
|
||||
csp=(UCaseProps *)uprv_malloc(sizeof(UCaseProps));
|
||||
if(csp==NULL) {
|
||||
*pErrorCode=U_MEMORY_ALLOCATION_ERROR;
|
||||
return NULL;
|
||||
} else {
|
||||
uprv_memcpy(csp, cspProto, sizeof(UCaseProps));
|
||||
return csp;
|
||||
}
|
||||
}
|
||||
|
||||
U_CAPI UCaseProps * U_EXPORT2
|
||||
ucase_open(UErrorCode *pErrorCode) {
|
||||
UCaseProps cspProto={ NULL }, *csp;
|
||||
|
||||
cspProto.mem=udata_openChoice(NULL, UCASE_DATA_TYPE, UCASE_DATA_NAME, isAcceptable, &cspProto, pErrorCode);
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
csp=ucase_openData(
|
||||
&cspProto,
|
||||
udata_getMemory(cspProto.mem),
|
||||
udata_getLength(cspProto.mem),
|
||||
pErrorCode);
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
udata_close(cspProto.mem);
|
||||
return NULL;
|
||||
} else {
|
||||
return csp;
|
||||
}
|
||||
}
|
||||
|
||||
U_CAPI UCaseProps * U_EXPORT2
|
||||
ucase_openBinary(const uint8_t *bin, int32_t length, UErrorCode *pErrorCode) {
|
||||
UCaseProps cspProto={ NULL };
|
||||
const DataHeader *hdr;
|
||||
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
return NULL;
|
||||
}
|
||||
if(bin==NULL) {
|
||||
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* check the header */
|
||||
if(length>=0 && length<20) {
|
||||
*pErrorCode=U_INVALID_FORMAT_ERROR;
|
||||
return NULL;
|
||||
}
|
||||
hdr=(const DataHeader *)bin;
|
||||
if(
|
||||
!(hdr->dataHeader.magic1==0xda && hdr->dataHeader.magic2==0x27 &&
|
||||
hdr->info.isBigEndian==U_IS_BIG_ENDIAN &&
|
||||
isAcceptable(&cspProto, UCASE_DATA_TYPE, UCASE_DATA_NAME, &hdr->info))
|
||||
) {
|
||||
*pErrorCode=U_INVALID_FORMAT_ERROR;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
bin+=hdr->dataHeader.headerSize;
|
||||
if(length>=0) {
|
||||
length-=hdr->dataHeader.headerSize;
|
||||
}
|
||||
return ucase_openData(&cspProto, bin, length, pErrorCode);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
U_CAPI void U_EXPORT2
|
||||
ucase_close(UCaseProps *csp) {
|
||||
if(csp!=NULL) {
|
||||
#if !UCASE_HARDCODE_DATA
|
||||
udata_close(csp->mem);
|
||||
#endif
|
||||
uprv_free(csp);
|
||||
}
|
||||
}
|
||||
|
||||
/* UCaseProps singleton ----------------------------------------------------- */
|
||||
|
||||
#if !UCASE_HARDCODE_DATA
|
||||
static UCaseProps *gCsp=NULL;
|
||||
static UCaseProps *gCspDummy=NULL;
|
||||
static UErrorCode gErrorCode=U_ZERO_ERROR;
|
||||
static int8_t gHaveData=0;
|
||||
#endif
|
||||
|
||||
#if !UCASE_HARDCODE_DATA
|
||||
static UBool U_CALLCONV ucase_cleanup(void) {
|
||||
ucase_close(gCsp);
|
||||
gCsp=NULL;
|
||||
ucase_close(gCspDummy);
|
||||
gCspDummy=NULL;
|
||||
gErrorCode=U_ZERO_ERROR;
|
||||
gHaveData=0;
|
||||
return TRUE;
|
||||
}
|
||||
#endif
|
||||
|
||||
U_CAPI const UCaseProps * U_EXPORT2
|
||||
ucase_getSingleton(UErrorCode *pErrorCode) {
|
||||
#if UCASE_HARDCODE_DATA
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
return NULL;
|
||||
}
|
||||
ucase_getSingleton() {
|
||||
return &ucase_props_singleton;
|
||||
#else
|
||||
int8_t haveData;
|
||||
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
UMTX_CHECK(NULL, gHaveData, haveData);
|
||||
|
||||
if(haveData>0) {
|
||||
/* data was loaded */
|
||||
return gCsp;
|
||||
} else if(haveData<0) {
|
||||
/* data loading failed */
|
||||
*pErrorCode=gErrorCode;
|
||||
return NULL;
|
||||
} else /* haveData==0 */ {
|
||||
/* load the data */
|
||||
UCaseProps *csp=ucase_open(pErrorCode);
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
gHaveData=-1;
|
||||
gErrorCode=*pErrorCode;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* set the static variables */
|
||||
umtx_lock(NULL);
|
||||
if(gCsp==NULL) {
|
||||
gCsp=csp;
|
||||
csp=NULL;
|
||||
gHaveData=1;
|
||||
ucln_common_registerCleanup(UCLN_COMMON_UCASE, ucase_cleanup);
|
||||
}
|
||||
umtx_unlock(NULL);
|
||||
|
||||
ucase_close(csp);
|
||||
return gCsp;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
#if !UCASE_HARDCODE_DATA
|
||||
U_CAPI const UCaseProps * U_EXPORT2
|
||||
ucase_getDummy(UErrorCode *pErrorCode) {
|
||||
UCaseProps *csp;
|
||||
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
UMTX_CHECK(NULL, gCspDummy, csp);
|
||||
|
||||
if(csp!=NULL) {
|
||||
/* the dummy object was already created */
|
||||
return csp;
|
||||
} else /* csp==NULL */ {
|
||||
/* create the dummy object */
|
||||
int32_t *indexes;
|
||||
|
||||
csp=(UCaseProps *)uprv_malloc(sizeof(UCaseProps)+UCASE_IX_TOP*4+UTRIE_DUMMY_SIZE);
|
||||
if(csp==NULL) {
|
||||
*pErrorCode=U_MEMORY_ALLOCATION_ERROR;
|
||||
return NULL;
|
||||
}
|
||||
uprv_memset(csp, 0, sizeof(UCaseProps)+UCASE_IX_TOP*4);
|
||||
|
||||
csp->indexes=indexes=(int32_t *)(csp+1);
|
||||
indexes[UCASE_IX_INDEX_TOP]=UCASE_IX_TOP;
|
||||
|
||||
indexes[UCASE_IX_TRIE_SIZE]=
|
||||
utrie_unserializeDummy(&csp->trie, indexes+UCASE_IX_TOP, UTRIE_DUMMY_SIZE, 0, 0, TRUE, pErrorCode);
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
uprv_free(csp);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
csp->formatVersion[0]=1;
|
||||
csp->formatVersion[2]=UTRIE_SHIFT;
|
||||
csp->formatVersion[3]=UTRIE_INDEX_SHIFT;
|
||||
|
||||
/* set the static variables */
|
||||
umtx_lock(NULL);
|
||||
if(gCspDummy==NULL) {
|
||||
gCspDummy=csp;
|
||||
csp=NULL;
|
||||
ucln_common_registerCleanup(UCLN_COMMON_UCASE, ucase_cleanup);
|
||||
}
|
||||
umtx_unlock(NULL);
|
||||
|
||||
uprv_free(csp);
|
||||
return gCspDummy;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
/* set of property starts for UnicodeSet ------------------------------------ */
|
||||
|
||||
static UBool U_CALLCONV
|
||||
|
@ -1475,69 +1200,7 @@ ucase_toFullFolding(const UCaseProps *csp, UChar32 c,
|
|||
|
||||
/* case mapping properties API ---------------------------------------------- */
|
||||
|
||||
/* get the UCaseProps singleton, or else its dummy, once and for all */
|
||||
#if !UCASE_HARDCODE_DATA
|
||||
static const UCaseProps *
|
||||
getCaseProps() {
|
||||
/*
|
||||
* This lazy intialization with double-checked locking (without mutex protection for
|
||||
* the initial check) is transiently unsafe under certain circumstances.
|
||||
* Check the readme and use u_init() if necessary.
|
||||
*/
|
||||
|
||||
/* the initial check is performed by the GET_CASE_PROPS() macro */
|
||||
const UCaseProps *csp;
|
||||
UErrorCode errorCode=U_ZERO_ERROR;
|
||||
|
||||
csp=ucase_getSingleton(&errorCode);
|
||||
if(U_FAILURE(errorCode)) {
|
||||
errorCode=U_ZERO_ERROR;
|
||||
csp=ucase_getDummy(&errorCode);
|
||||
if(U_FAILURE(errorCode)) {
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
return csp;
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* In ICU 3.0, most Unicode properties were loaded from uprops.icu.
|
||||
* ICU 3.2 adds ucase.icu for case mapping properties.
|
||||
* ICU 3.4 adds ubidi.icu for bidi/shaping properties and
|
||||
* removes case/bidi/shaping properties from uprops.icu.
|
||||
*
|
||||
* Loading of uprops.icu was never mutex-protected and required u_init()
|
||||
* for thread safety.
|
||||
* In order to maintain performance for all such properties,
|
||||
* ucase.icu and ubidi.icu are loaded lazily, without mutexing.
|
||||
* u_init() will try to load them for thread safety,
|
||||
* but u_init() will not fail if they are missing.
|
||||
*
|
||||
* uchar.c maintains a tri-state flag for (not loaded/loaded/failed to load)
|
||||
* and an error code for load failure.
|
||||
* Instead, here we try to load at most once.
|
||||
* If it works, we use the resulting singleton object.
|
||||
* If it fails, then we get a dummy object, which always works unless
|
||||
* we are seriously out of memory.
|
||||
* After the first try, we have a never-changing pointer to either the
|
||||
* real singleton or the dummy.
|
||||
*
|
||||
* This method is used in Unicode properties APIs (uchar.h) that
|
||||
* do not have a service object and also do not have an error code parameter.
|
||||
* Other API implementations get the singleton themselves
|
||||
* (with mutexing), store it in the service object, and report errors.
|
||||
*
|
||||
* TODO: Remove this support for non-hardcoded data. u_init() is publicly
|
||||
* advertised as not being required for thread safety, we cannot
|
||||
* revert to unsafe data loading.
|
||||
*/
|
||||
#if !UCASE_HARDCODE_DATA
|
||||
#define GET_CASE_PROPS() (gCsp!=NULL ? gCsp : getCaseProps())
|
||||
#else
|
||||
#define GET_CASE_PROPS() &ucase_props_singleton
|
||||
#endif
|
||||
|
||||
/* public API (see uchar.h) */
|
||||
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2004-2009, International Business Machines
|
||||
* Copyright (C) 2004-2010, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
|
@ -31,31 +31,8 @@ U_CDECL_BEGIN
|
|||
struct UCaseProps;
|
||||
typedef struct UCaseProps UCaseProps;
|
||||
|
||||
U_CAPI UCaseProps * U_EXPORT2
|
||||
ucase_open(UErrorCode *pErrorCode);
|
||||
|
||||
U_CAPI UCaseProps * U_EXPORT2
|
||||
ucase_openBinary(const uint8_t *bin, int32_t length, UErrorCode *pErrorCode);
|
||||
|
||||
U_CAPI void U_EXPORT2
|
||||
ucase_close(UCaseProps *csp);
|
||||
|
||||
|
||||
U_CAPI const UCaseProps * U_EXPORT2
|
||||
ucase_getSingleton(UErrorCode *pErrorCode);
|
||||
|
||||
#define UCASE_HARDCODE_DATA 1
|
||||
|
||||
#if !UCASE_HARDCODE_DATA
|
||||
/**
|
||||
* Get a singleton dummy object, one that works with no real data.
|
||||
* This can be used when the real data is not available.
|
||||
* Using the dummy can reduce checks for available data after an initial failure.
|
||||
*/
|
||||
U_CAPI const UCaseProps * U_EXPORT2
|
||||
ucase_getDummy(UErrorCode *pErrorCode);
|
||||
#endif
|
||||
|
||||
ucase_getSingleton(void);
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
ucase_swap(const UDataSwapper *ds,
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -1,7 +1,7 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2005-2009, International Business Machines
|
||||
* Copyright (C) 2005-2010, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
|
@ -45,7 +45,7 @@ ucasemap_open(const char *locale, uint32_t options, UErrorCode *pErrorCode) {
|
|||
}
|
||||
uprv_memset(csm, 0, sizeof(UCaseMap));
|
||||
|
||||
csm->csp=ucase_getSingleton(pErrorCode);
|
||||
csm->csp=ucase_getSingleton();
|
||||
ucasemap_setLocale(csm, locale, pErrorCode);
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
uprv_free(csm);
|
||||
|
|
|
@ -33,228 +33,19 @@
|
|||
|
||||
#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
|
||||
|
||||
/* dynamically loaded Unicode character properties -------------------------- */
|
||||
|
||||
#define UCHAR_HARDCODE_DATA 1
|
||||
|
||||
#if UCHAR_HARDCODE_DATA
|
||||
|
||||
/* uchar_props_data.c is machine-generated by genprops --csource */
|
||||
#include "uchar_props_data.c"
|
||||
|
||||
#else
|
||||
|
||||
/*
|
||||
* loaded uprops.dat -
|
||||
* for a description of the file format, see icu/source/tools/genprops/store.c
|
||||
*/
|
||||
static const char DATA_NAME[] = "uprops";
|
||||
static const char DATA_TYPE[] = "icu";
|
||||
|
||||
static UDataMemory *propsData=NULL;
|
||||
static UErrorCode dataErrorCode=U_ZERO_ERROR;
|
||||
|
||||
static uint8_t formatVersion[4]={ 0, 0, 0, 0 };
|
||||
static UVersionInfo dataVersion={ 0, 0, 0, 0 };
|
||||
|
||||
static UTrie propsTrie={ 0 }, propsVectorsTrie={ 0 };
|
||||
static const uint32_t *pData32=NULL, *propsVectors=NULL;
|
||||
static int32_t countPropsVectors=0, propsVectorsColumns=0;
|
||||
|
||||
static int8_t havePropsData=0; /* == 0 -> Data has not been loaded.
|
||||
* < 0 -> Error occured attempting to load data.
|
||||
* > 0 -> Data has been successfully loaded.
|
||||
*/
|
||||
|
||||
/* index values loaded from uprops.dat */
|
||||
static int32_t indexes[UPROPS_INDEX_COUNT];
|
||||
|
||||
static UBool U_CALLCONV
|
||||
isAcceptable(void *context,
|
||||
const char *type, const char *name,
|
||||
const UDataInfo *pInfo) {
|
||||
if(
|
||||
pInfo->size>=20 &&
|
||||
pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
|
||||
pInfo->charsetFamily==U_CHARSET_FAMILY &&
|
||||
pInfo->dataFormat[0]==0x55 && /* dataFormat="UPro" */
|
||||
pInfo->dataFormat[1]==0x50 &&
|
||||
pInfo->dataFormat[2]==0x72 &&
|
||||
pInfo->dataFormat[3]==0x6f &&
|
||||
pInfo->formatVersion[0]==4 &&
|
||||
pInfo->formatVersion[2]==UTRIE_SHIFT &&
|
||||
pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT
|
||||
) {
|
||||
uprv_memcpy(formatVersion, pInfo->formatVersion, 4);
|
||||
uprv_memcpy(dataVersion, pInfo->dataVersion, 4);
|
||||
return TRUE;
|
||||
} else {
|
||||
return FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
static UBool U_CALLCONV uchar_cleanup(void)
|
||||
{
|
||||
if (propsData) {
|
||||
udata_close(propsData);
|
||||
propsData=NULL;
|
||||
}
|
||||
pData32=NULL;
|
||||
propsVectors=NULL;
|
||||
countPropsVectors=0;
|
||||
uprv_memset(dataVersion, 0, U_MAX_VERSION_LENGTH);
|
||||
dataErrorCode=U_ZERO_ERROR;
|
||||
havePropsData=0;
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
struct UCharProps {
|
||||
UDataMemory *propsData;
|
||||
UTrie propsTrie, propsVectorsTrie;
|
||||
const uint32_t *pData32;
|
||||
};
|
||||
typedef struct UCharProps UCharProps;
|
||||
|
||||
/* open uprops.icu */
|
||||
static void
|
||||
_openProps(UCharProps *ucp, UErrorCode *pErrorCode) {
|
||||
const uint32_t *p;
|
||||
int32_t length;
|
||||
|
||||
ucp->propsData=udata_openChoice(NULL, DATA_TYPE, DATA_NAME, isAcceptable, NULL, pErrorCode);
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
return;
|
||||
}
|
||||
|
||||
ucp->pData32=p=(const uint32_t *)udata_getMemory(ucp->propsData);
|
||||
|
||||
/* unserialize the trie; it is directly after the int32_t indexes[UPROPS_INDEX_COUNT] */
|
||||
length=(int32_t)p[UPROPS_PROPS32_INDEX]*4;
|
||||
length=utrie_unserialize(&ucp->propsTrie, (const uint8_t *)(p+UPROPS_INDEX_COUNT), length-64, pErrorCode);
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* unserialize the properties vectors trie */
|
||||
length=(int32_t)(p[UPROPS_ADDITIONAL_VECTORS_INDEX]-p[UPROPS_ADDITIONAL_TRIE_INDEX])*4;
|
||||
if(length>0) {
|
||||
length=utrie_unserialize(&ucp->propsVectorsTrie, (const uint8_t *)(p+p[UPROPS_ADDITIONAL_TRIE_INDEX]), length, pErrorCode);
|
||||
}
|
||||
if(length<=0 || U_FAILURE(*pErrorCode)) {
|
||||
/*
|
||||
* length==0:
|
||||
* Allow the properties vectors trie to be missing -
|
||||
* also requires propsVectorsColumns=indexes[UPROPS_ADDITIONAL_VECTORS_COLUMNS_INDEX]
|
||||
* to be zero so that this trie is never accessed.
|
||||
*/
|
||||
uprv_memset(&ucp->propsVectorsTrie, 0, sizeof(ucp->propsVectorsTrie));
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !UCHAR_HARDCODE_DATA
|
||||
static int8_t
|
||||
uprv_loadPropsData(UErrorCode *pErrorCode) {
|
||||
/* load Unicode character properties data from file if necessary */
|
||||
|
||||
/*
|
||||
* This lazy intialization with double-checked locking (without mutex protection for
|
||||
* haveNormData==0) is transiently unsafe under certain circumstances.
|
||||
* Check the readme and use u_init() if necessary.
|
||||
*/
|
||||
if(havePropsData==0) {
|
||||
UCharProps ucp={ NULL };
|
||||
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
return havePropsData;
|
||||
}
|
||||
|
||||
/* open the data outside the mutex block */
|
||||
_openProps(&ucp, pErrorCode);
|
||||
|
||||
if(U_SUCCESS(*pErrorCode)) {
|
||||
/* in the mutex block, set the data for this process */
|
||||
umtx_lock(NULL);
|
||||
if(propsData==NULL) {
|
||||
propsData=ucp.propsData;
|
||||
ucp.propsData=NULL;
|
||||
pData32=ucp.pData32;
|
||||
ucp.pData32=NULL;
|
||||
uprv_memcpy(&propsTrie, &ucp.propsTrie, sizeof(propsTrie));
|
||||
uprv_memcpy(&propsVectorsTrie, &ucp.propsVectorsTrie, sizeof(propsVectorsTrie));
|
||||
}
|
||||
|
||||
/* initialize some variables */
|
||||
uprv_memcpy(indexes, pData32, sizeof(indexes));
|
||||
|
||||
/* additional properties */
|
||||
if(indexes[UPROPS_ADDITIONAL_VECTORS_INDEX]!=0) {
|
||||
propsVectors=pData32+indexes[UPROPS_ADDITIONAL_VECTORS_INDEX];
|
||||
countPropsVectors=indexes[UPROPS_RESERVED_INDEX]-indexes[UPROPS_ADDITIONAL_VECTORS_INDEX];
|
||||
propsVectorsColumns=indexes[UPROPS_ADDITIONAL_VECTORS_COLUMNS_INDEX];
|
||||
}
|
||||
|
||||
havePropsData=1;
|
||||
umtx_unlock(NULL);
|
||||
} else {
|
||||
dataErrorCode=*pErrorCode;
|
||||
havePropsData=-1;
|
||||
}
|
||||
ucln_common_registerCleanup(UCLN_COMMON_UCHAR, uchar_cleanup);
|
||||
|
||||
/* if a different thread set it first, then close the extra data */
|
||||
udata_close(ucp.propsData); /* NULL if it was set correctly */
|
||||
}
|
||||
|
||||
return havePropsData;
|
||||
}
|
||||
|
||||
static int8_t
|
||||
loadPropsData(void) {
|
||||
UErrorCode errorCode = U_ZERO_ERROR;
|
||||
int8_t retVal = uprv_loadPropsData(&errorCode);
|
||||
return retVal;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
/* constants and macros for access to the data ------------------------------ */
|
||||
|
||||
/* getting a uint32_t properties word from the data */
|
||||
#if UCHAR_HARDCODE_DATA
|
||||
|
||||
#define GET_PROPS(c, result) ((result)=UTRIE2_GET16(&propsTrie, c));
|
||||
|
||||
#else
|
||||
|
||||
#define HAVE_DATA (havePropsData>0 || loadPropsData()>0)
|
||||
#define GET_PROPS_UNSAFE(c, result) \
|
||||
UTRIE_GET16(&propsTrie, c, result);
|
||||
#define GET_PROPS(c, result) \
|
||||
if(HAVE_DATA) { \
|
||||
GET_PROPS_UNSAFE(c, result); \
|
||||
} else { \
|
||||
(result)=0; \
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
U_CFUNC UBool
|
||||
uprv_haveProperties(UErrorCode *pErrorCode) {
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
return FALSE;
|
||||
}
|
||||
#if !UCHAR_HARDCODE_DATA
|
||||
if(havePropsData==0) {
|
||||
uprv_loadPropsData(pErrorCode);
|
||||
}
|
||||
if(havePropsData<0) {
|
||||
*pErrorCode=dataErrorCode;
|
||||
return FALSE;
|
||||
}
|
||||
#endif
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
|
@ -291,11 +82,7 @@ U_CAPI void U_EXPORT2
|
|||
u_enumCharTypes(UCharEnumTypeRange *enumRange, const void *context) {
|
||||
struct _EnumTypeCallback callback;
|
||||
|
||||
if(enumRange==NULL
|
||||
#if !UCHAR_HARDCODE_DATA
|
||||
|| !HAVE_DATA
|
||||
#endif
|
||||
) {
|
||||
if(enumRange==NULL) {
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -706,9 +493,6 @@ u_getUnicodeProperties(UChar32 c, int32_t column) {
|
|||
GET_PROPS(c, props);
|
||||
return props;
|
||||
} else if(
|
||||
#if !UCHAR_HARDCODE_DATA
|
||||
!HAVE_DATA || countPropsVectors==0 ||
|
||||
#endif
|
||||
column<0 || column>=propsVectorsColumns
|
||||
) {
|
||||
return 0;
|
||||
|
@ -720,22 +504,14 @@ u_getUnicodeProperties(UChar32 c, int32_t column) {
|
|||
|
||||
U_CFUNC int32_t
|
||||
uprv_getMaxValues(int32_t column) {
|
||||
#if !UCHAR_HARDCODE_DATA
|
||||
if(HAVE_DATA) {
|
||||
#endif
|
||||
switch(column) {
|
||||
case 0:
|
||||
return indexes[UPROPS_MAX_VALUES_INDEX];
|
||||
case 2:
|
||||
return indexes[UPROPS_MAX_VALUES_2_INDEX];
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
#if !UCHAR_HARDCODE_DATA
|
||||
} else {
|
||||
switch(column) {
|
||||
case 0:
|
||||
return indexes[UPROPS_MAX_VALUES_INDEX];
|
||||
case 2:
|
||||
return indexes[UPROPS_MAX_VALUES_2_INDEX];
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
U_CAPI void U_EXPORT2
|
||||
|
@ -750,6 +526,7 @@ u_charAge(UChar32 c, UVersionInfo versionArray) {
|
|||
|
||||
U_CAPI UScriptCode U_EXPORT2
|
||||
uscript_getScript(UChar32 c, UErrorCode *pErrorCode) {
|
||||
uint32_t scriptX;
|
||||
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
|
||||
return USCRIPT_INVALID_CODE;
|
||||
}
|
||||
|
@ -757,8 +534,81 @@ uscript_getScript(UChar32 c, UErrorCode *pErrorCode) {
|
|||
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return USCRIPT_INVALID_CODE;
|
||||
}
|
||||
scriptX=u_getUnicodeProperties(c, 0)&UPROPS_SCRIPT_X_MASK;
|
||||
if(scriptX<UPROPS_SCRIPT_X_WITH_COMMON) {
|
||||
return (UScriptCode)scriptX;
|
||||
} else if(scriptX<UPROPS_SCRIPT_X_WITH_INHERITED) {
|
||||
return USCRIPT_COMMON;
|
||||
} else if(scriptX<UPROPS_SCRIPT_X_WITH_OTHER) {
|
||||
return USCRIPT_INHERITED;
|
||||
} else {
|
||||
return (UScriptCode)scriptExtensions[scriptX&UPROPS_SCRIPT_MASK];
|
||||
}
|
||||
}
|
||||
|
||||
return (UScriptCode)(u_getUnicodeProperties(c, 0)&UPROPS_SCRIPT_MASK);
|
||||
U_DRAFT UBool U_EXPORT2
|
||||
uscript_hasScript(UChar32 c, UScriptCode sc) {
|
||||
UScriptCode script;
|
||||
const uint16_t *scx;
|
||||
uint32_t scriptX=u_getUnicodeProperties(c, 0)&UPROPS_SCRIPT_X_MASK;
|
||||
if(scriptX<UPROPS_SCRIPT_X_WITH_COMMON) {
|
||||
return sc==(UScriptCode)scriptX;
|
||||
}
|
||||
|
||||
scx=scriptExtensions+(scriptX&UPROPS_SCRIPT_MASK);
|
||||
if(scriptX<UPROPS_SCRIPT_X_WITH_INHERITED) {
|
||||
script=USCRIPT_COMMON;
|
||||
} else if(scriptX<UPROPS_SCRIPT_X_WITH_OTHER) {
|
||||
script=USCRIPT_INHERITED;
|
||||
} else {
|
||||
script=(UScriptCode)scx[0];
|
||||
scx=scriptExtensions+scx[1];
|
||||
}
|
||||
if(sc==script) {
|
||||
return TRUE;
|
||||
}
|
||||
while(sc>*scx) {
|
||||
++scx;
|
||||
}
|
||||
return sc==(*scx&0x7fff);
|
||||
}
|
||||
|
||||
U_DRAFT int32_t U_EXPORT2
|
||||
uscript_getScriptExtensions(UChar32 c,
|
||||
UScriptCode *scripts, int32_t capacity,
|
||||
UErrorCode *pErrorCode) {
|
||||
uint32_t scriptX;
|
||||
int32_t length;
|
||||
const uint16_t *scx;
|
||||
uint16_t sx;
|
||||
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
|
||||
return 0;
|
||||
}
|
||||
if(capacity<0 || (capacity>0 && scripts==NULL)) {
|
||||
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return 0;
|
||||
}
|
||||
scriptX=u_getUnicodeProperties(c, 0)&UPROPS_SCRIPT_X_MASK;
|
||||
if(scriptX<UPROPS_SCRIPT_X_WITH_COMMON) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
length=0;
|
||||
scx=scriptExtensions+(scriptX&UPROPS_SCRIPT_MASK);
|
||||
if(scriptX>=UPROPS_SCRIPT_X_WITH_OTHER) {
|
||||
scx=scriptExtensions+scx[1];
|
||||
}
|
||||
do {
|
||||
sx=*scx++;
|
||||
if(length<capacity) {
|
||||
scripts[length]=sx&0x7fff;
|
||||
}
|
||||
++length;
|
||||
} while(sx<0x8000);
|
||||
if(length>capacity) {
|
||||
*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
|
||||
}
|
||||
return length;
|
||||
}
|
||||
|
||||
U_CAPI UBlockCode U_EXPORT2
|
||||
|
@ -784,13 +634,6 @@ uchar_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode) {
|
|||
return;
|
||||
}
|
||||
|
||||
#if !UCHAR_HARDCODE_DATA
|
||||
if(!HAVE_DATA) {
|
||||
*pErrorCode=dataErrorCode;
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* add the start code point of each same-value range of the main trie */
|
||||
utrie2_enum(&propsTrie, NULL, _enumPropertyStartsRange, sa);
|
||||
|
||||
|
@ -851,13 +694,6 @@ upropsvec_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode) {
|
|||
return;
|
||||
}
|
||||
|
||||
#if !UCHAR_HARDCODE_DATA
|
||||
if(!HAVE_DATA) {
|
||||
*pErrorCode=dataErrorCode;
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* add the start code point of each same-value range of the properties vectors trie */
|
||||
if(propsVectorsColumns>0) {
|
||||
/* if propsVectorsColumns==0 then the properties vectors trie may not be there at all */
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -46,9 +46,6 @@ typedef enum ECleanupCommonType {
|
|||
UCLN_COMMON_UNAMES,
|
||||
UCLN_COMMON_PNAME,
|
||||
UCLN_COMMON_UPROPS,
|
||||
UCLN_COMMON_UBIDI,
|
||||
UCLN_COMMON_UCASE,
|
||||
UCLN_COMMON_UCHAR,
|
||||
UCLN_COMMON_UCNV,
|
||||
UCLN_COMMON_UCNV_IO,
|
||||
UCLN_COMMON_UDATA,
|
||||
|
|
|
@ -39,7 +39,7 @@ U_CDECL_BEGIN
|
|||
* @see u_getUnicodeVersion
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
#define U_UNICODE_VERSION "5.2"
|
||||
#define U_UNICODE_VERSION "6.0"
|
||||
|
||||
/**
|
||||
* \file
|
||||
|
@ -392,21 +392,21 @@ typedef enum UProperty {
|
|||
See the uchar.h file documentation.
|
||||
@stable ICU 3.4 */
|
||||
UCHAR_POSIX_XDIGIT=48,
|
||||
/** Binary property Cased. For Lowercase, Uppercase and Titlecase characters. @draft ICU 4.4 */
|
||||
/** Binary property Cased. For Lowercase, Uppercase and Titlecase characters. @stable ICU 4.4 */
|
||||
UCHAR_CASED=49,
|
||||
/** Binary property Case_Ignorable. Used in context-sensitive case mappings. @draft ICU 4.4 */
|
||||
/** Binary property Case_Ignorable. Used in context-sensitive case mappings. @stable ICU 4.4 */
|
||||
UCHAR_CASE_IGNORABLE=50,
|
||||
/** Binary property Changes_When_Lowercased. @draft ICU 4.4 */
|
||||
/** Binary property Changes_When_Lowercased. @stable ICU 4.4 */
|
||||
UCHAR_CHANGES_WHEN_LOWERCASED=51,
|
||||
/** Binary property Changes_When_Uppercased. @draft ICU 4.4 */
|
||||
/** Binary property Changes_When_Uppercased. @stable ICU 4.4 */
|
||||
UCHAR_CHANGES_WHEN_UPPERCASED=52,
|
||||
/** Binary property Changes_When_Titlecased. @draft ICU 4.4 */
|
||||
/** Binary property Changes_When_Titlecased. @stable ICU 4.4 */
|
||||
UCHAR_CHANGES_WHEN_TITLECASED=53,
|
||||
/** Binary property Changes_When_Casefolded. @draft ICU 4.4 */
|
||||
/** Binary property Changes_When_Casefolded. @stable ICU 4.4 */
|
||||
UCHAR_CHANGES_WHEN_CASEFOLDED=54,
|
||||
/** Binary property Changes_When_Casemapped. @draft ICU 4.4 */
|
||||
/** Binary property Changes_When_Casemapped. @stable ICU 4.4 */
|
||||
UCHAR_CHANGES_WHEN_CASEMAPPED=55,
|
||||
/** Binary property Changes_When_NFKC_Casefolded. @draft ICU 4.4 */
|
||||
/** Binary property Changes_When_NFKC_Casefolded. @stable ICU 4.4 */
|
||||
UCHAR_CHANGES_WHEN_NFKC_CASEFOLDED=56,
|
||||
/** One more than the last constant for binary Unicode properties. @stable ICU 2.1 */
|
||||
UCHAR_BINARY_LIMIT=57,
|
||||
|
@ -560,6 +560,20 @@ typedef enum UProperty {
|
|||
/** One more than the last constant for string Unicode properties. @stable ICU 2.4 */
|
||||
UCHAR_STRING_LIMIT=0x400D,
|
||||
|
||||
/** Provisional property Script_Extensions (new in Unicode 6.0).
|
||||
As a provisional property, it may be modified or removed
|
||||
in future versions of the Unicode Standard, and thus in ICU.
|
||||
Some characters are commonly used in multiple scripts.
|
||||
For more information, see UAX #24: http://www.unicode.org/reports/tr24/.
|
||||
Corresponds to uscript_hasScript and uscript_getScriptExtensions in uscript.h.
|
||||
@draft ICU 4.6 */
|
||||
UCHAR_SCRIPT_EXTENSIONS=0x7000,
|
||||
/** First constant for Unicode properties with unusual value types. @draft ICU 4.6 */
|
||||
UCHAR_OTHER_PROPERTY_START=UCHAR_SCRIPT_EXTENSIONS,
|
||||
/** One more than the last constant for Unicode properties with unusual value types.
|
||||
* @draft ICU 4.6 */
|
||||
UCHAR_OTHER_PROPERTY_LIMIT=0x7001,
|
||||
|
||||
/** Represents a nonexistent or invalid property or property value. @stable ICU 2.4 */
|
||||
UCHAR_INVALID_CODE = -1
|
||||
} UProperty;
|
||||
|
@ -1287,61 +1301,88 @@ enum UBlockCode {
|
|||
|
||||
/* New blocks in Unicode 5.2 */
|
||||
|
||||
/** @draft ICU 4.4 */
|
||||
/** @stable ICU 4.4 */
|
||||
UBLOCK_SAMARITAN = 172, /*[0800]*/
|
||||
/** @draft ICU 4.4 */
|
||||
/** @stable ICU 4.4 */
|
||||
UBLOCK_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED = 173, /*[18B0]*/
|
||||
/** @draft ICU 4.4 */
|
||||
/** @stable ICU 4.4 */
|
||||
UBLOCK_TAI_THAM = 174, /*[1A20]*/
|
||||
/** @draft ICU 4.4 */
|
||||
/** @stable ICU 4.4 */
|
||||
UBLOCK_VEDIC_EXTENSIONS = 175, /*[1CD0]*/
|
||||
/** @draft ICU 4.4 */
|
||||
/** @stable ICU 4.4 */
|
||||
UBLOCK_LISU = 176, /*[A4D0]*/
|
||||
/** @draft ICU 4.4 */
|
||||
/** @stable ICU 4.4 */
|
||||
UBLOCK_BAMUM = 177, /*[A6A0]*/
|
||||
/** @draft ICU 4.4 */
|
||||
/** @stable ICU 4.4 */
|
||||
UBLOCK_COMMON_INDIC_NUMBER_FORMS = 178, /*[A830]*/
|
||||
/** @draft ICU 4.4 */
|
||||
/** @stable ICU 4.4 */
|
||||
UBLOCK_DEVANAGARI_EXTENDED = 179, /*[A8E0]*/
|
||||
/** @draft ICU 4.4 */
|
||||
/** @stable ICU 4.4 */
|
||||
UBLOCK_HANGUL_JAMO_EXTENDED_A = 180, /*[A960]*/
|
||||
/** @draft ICU 4.4 */
|
||||
/** @stable ICU 4.4 */
|
||||
UBLOCK_JAVANESE = 181, /*[A980]*/
|
||||
/** @draft ICU 4.4 */
|
||||
/** @stable ICU 4.4 */
|
||||
UBLOCK_MYANMAR_EXTENDED_A = 182, /*[AA60]*/
|
||||
/** @draft ICU 4.4 */
|
||||
/** @stable ICU 4.4 */
|
||||
UBLOCK_TAI_VIET = 183, /*[AA80]*/
|
||||
/** @draft ICU 4.4 */
|
||||
/** @stable ICU 4.4 */
|
||||
UBLOCK_MEETEI_MAYEK = 184, /*[ABC0]*/
|
||||
/** @draft ICU 4.4 */
|
||||
/** @stable ICU 4.4 */
|
||||
UBLOCK_HANGUL_JAMO_EXTENDED_B = 185, /*[D7B0]*/
|
||||
/** @draft ICU 4.4 */
|
||||
/** @stable ICU 4.4 */
|
||||
UBLOCK_IMPERIAL_ARAMAIC = 186, /*[10840]*/
|
||||
/** @draft ICU 4.4 */
|
||||
/** @stable ICU 4.4 */
|
||||
UBLOCK_OLD_SOUTH_ARABIAN = 187, /*[10A60]*/
|
||||
/** @draft ICU 4.4 */
|
||||
/** @stable ICU 4.4 */
|
||||
UBLOCK_AVESTAN = 188, /*[10B00]*/
|
||||
/** @draft ICU 4.4 */
|
||||
/** @stable ICU 4.4 */
|
||||
UBLOCK_INSCRIPTIONAL_PARTHIAN = 189, /*[10B40]*/
|
||||
/** @draft ICU 4.4 */
|
||||
/** @stable ICU 4.4 */
|
||||
UBLOCK_INSCRIPTIONAL_PAHLAVI = 190, /*[10B60]*/
|
||||
/** @draft ICU 4.4 */
|
||||
/** @stable ICU 4.4 */
|
||||
UBLOCK_OLD_TURKIC = 191, /*[10C00]*/
|
||||
/** @draft ICU 4.4 */
|
||||
/** @stable ICU 4.4 */
|
||||
UBLOCK_RUMI_NUMERAL_SYMBOLS = 192, /*[10E60]*/
|
||||
/** @draft ICU 4.4 */
|
||||
/** @stable ICU 4.4 */
|
||||
UBLOCK_KAITHI = 193, /*[11080]*/
|
||||
/** @draft ICU 4.4 */
|
||||
/** @stable ICU 4.4 */
|
||||
UBLOCK_EGYPTIAN_HIEROGLYPHS = 194, /*[13000]*/
|
||||
/** @draft ICU 4.4 */
|
||||
/** @stable ICU 4.4 */
|
||||
UBLOCK_ENCLOSED_ALPHANUMERIC_SUPPLEMENT = 195, /*[1F100]*/
|
||||
/** @draft ICU 4.4 */
|
||||
/** @stable ICU 4.4 */
|
||||
UBLOCK_ENCLOSED_IDEOGRAPHIC_SUPPLEMENT = 196, /*[1F200]*/
|
||||
/** @draft ICU 4.4 */
|
||||
/** @stable ICU 4.4 */
|
||||
UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C = 197, /*[2A700]*/
|
||||
|
||||
/* New blocks in Unicode 6.0 */
|
||||
|
||||
/** @stable ICU 4.6 */
|
||||
UBLOCK_MANDAIC = 198, /*[0840]*/
|
||||
/** @stable ICU 4.6 */
|
||||
UBLOCK_BATAK = 199, /*[1BC0]*/
|
||||
/** @stable ICU 4.6 */
|
||||
UBLOCK_ETHIOPIC_EXTENDED_A = 200, /*[AB00]*/
|
||||
/** @stable ICU 4.6 */
|
||||
UBLOCK_BRAHMI = 201, /*[11000]*/
|
||||
/** @stable ICU 4.6 */
|
||||
UBLOCK_BAMUM_SUPPLEMENT = 202, /*[16800]*/
|
||||
/** @stable ICU 4.6 */
|
||||
UBLOCK_KANA_SUPPLEMENT = 203, /*[1B000]*/
|
||||
/** @stable ICU 4.6 */
|
||||
UBLOCK_PLAYING_CARDS = 204, /*[1F0A0]*/
|
||||
/** @stable ICU 4.6 */
|
||||
UBLOCK_MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS = 205, /*[1F300]*/
|
||||
/** @stable ICU 4.6 */
|
||||
UBLOCK_EMOTICONS = 206, /*[1F600]*/
|
||||
/** @stable ICU 4.6 */
|
||||
UBLOCK_TRANSPORT_AND_MAP_SYMBOLS = 207, /*[1F680]*/
|
||||
/** @stable ICU 4.6 */
|
||||
UBLOCK_ALCHEMICAL_SYMBOLS = 208, /*[1F700]*/
|
||||
/** @stable ICU 4.6 */
|
||||
UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D = 209, /*[2B740]*/
|
||||
|
||||
/** @stable ICU 2.0 */
|
||||
UBLOCK_COUNT = 198,
|
||||
UBLOCK_COUNT = 210,
|
||||
|
||||
/** @stable ICU 2.0 */
|
||||
UBLOCK_INVALID_CODE=-1
|
||||
|
@ -1386,7 +1427,7 @@ typedef enum UCharNameChoice {
|
|||
U_UNICODE_CHAR_NAME,
|
||||
U_UNICODE_10_CHAR_NAME,
|
||||
U_EXTENDED_CHAR_NAME,
|
||||
U_CHAR_NAME_ALIAS, /**< Corrected name from NameAliases.txt. @draft ICU 4.4 */
|
||||
U_CHAR_NAME_ALIAS, /**< Corrected name from NameAliases.txt. @stable ICU 4.4 */
|
||||
U_CHAR_NAME_CHOICE_COUNT
|
||||
} UCharNameChoice;
|
||||
|
||||
|
@ -1474,7 +1515,8 @@ typedef enum UJoiningGroup {
|
|||
U_JG_GAF,
|
||||
U_JG_GAMAL,
|
||||
U_JG_HAH,
|
||||
U_JG_HAMZA_ON_HEH_GOAL,
|
||||
U_JG_TEH_MARBUTA_GOAL, /**< @stable ICU 4.6 */
|
||||
U_JG_HAMZA_ON_HEH_GOAL=U_JG_TEH_MARBUTA_GOAL,
|
||||
U_JG_HE,
|
||||
U_JG_HEH,
|
||||
U_JG_HEH_GOAL,
|
||||
|
@ -1515,8 +1557,8 @@ typedef enum UJoiningGroup {
|
|||
U_JG_KHAPH, /**< @stable ICU 2.6 */
|
||||
U_JG_ZHAIN, /**< @stable ICU 2.6 */
|
||||
U_JG_BURUSHASKI_YEH_BARREE, /**< @stable ICU 4.0 */
|
||||
U_JG_FARSI_YEH, /**< @draft ICU 4.4 */
|
||||
U_JG_NYA, /**< @draft ICU 4.4 */
|
||||
U_JG_FARSI_YEH, /**< @stable ICU 4.4 */
|
||||
U_JG_NYA, /**< @stable ICU 4.4 */
|
||||
U_JG_COUNT
|
||||
} UJoiningGroup;
|
||||
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
**********************************************************************
|
||||
* Copyright (C) 1997-2009, International Business Machines
|
||||
* Copyright (C) 1997-2010, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
*
|
||||
|
@ -45,7 +45,7 @@
|
|||
*/
|
||||
typedef enum UScriptCode {
|
||||
USCRIPT_INVALID_CODE = -1,
|
||||
USCRIPT_COMMON = 0 , /* Zyyy */
|
||||
USCRIPT_COMMON = 0, /* Zyyy */
|
||||
USCRIPT_INHERITED = 1, /* Zinh */ /* "Code for inherited script", for non-spacing combining marks; also Qaai */
|
||||
USCRIPT_ARABIC = 2, /* Arab */
|
||||
USCRIPT_ARMENIAN = 3, /* Armn */
|
||||
|
@ -107,7 +107,7 @@ typedef enum UScriptCode {
|
|||
|
||||
/** New script code in Unicode 4.0.1 @stable ICU 3.0 */
|
||||
USCRIPT_KATAKANA_OR_HIRAGANA = 54,/*Hrkt */
|
||||
|
||||
|
||||
/* New scripts in Unicode 4.1 @stable ICU 3.4 */
|
||||
USCRIPT_BUGINESE = 55, /* Bugi */
|
||||
USCRIPT_GLAGOLITIC = 56, /* Glag */
|
||||
|
@ -140,9 +140,15 @@ typedef enum UScriptCode {
|
|||
USCRIPT_LATIN_GAELIC = 81, /* Latg */
|
||||
USCRIPT_LEPCHA = 82, /* Lepc */
|
||||
USCRIPT_LINEAR_A = 83, /* Lina */
|
||||
USCRIPT_MANDAEAN = 84, /* Mand */
|
||||
/** @stable ICU 4.6 */
|
||||
USCRIPT_MANDAIC = 84, /* Mand */
|
||||
/** @stable ICU 3.6 */
|
||||
USCRIPT_MANDAEAN = USCRIPT_MANDAIC,
|
||||
USCRIPT_MAYAN_HIEROGLYPHS = 85, /* Maya */
|
||||
USCRIPT_MEROITIC = 86, /* Mero */
|
||||
/** @stable ICU 4.6 */
|
||||
USCRIPT_MEROITIC_HIEROGLYPHS = 86, /* Mero */
|
||||
/** @stable ICU 3.6 */
|
||||
USCRIPT_MEROITIC = USCRIPT_MEROITIC_HIEROGLYPHS,
|
||||
USCRIPT_NKO = 87, /* Nkoo */
|
||||
USCRIPT_ORKHON = 88, /* Orkh */
|
||||
USCRIPT_OLD_PERMIC = 89, /* Perm */
|
||||
|
@ -191,14 +197,29 @@ typedef enum UScriptCode {
|
|||
USCRIPT_MATHEMATICAL_NOTATION = 128,/* Zmth */
|
||||
USCRIPT_SYMBOLS = 129,/* Zsym */
|
||||
|
||||
/* New script codes from ISO 15924 @draft ICU 4.4 */
|
||||
/* New script codes from ISO 15924 @stable ICU 4.4 */
|
||||
USCRIPT_BAMUM = 130,/* Bamu */
|
||||
USCRIPT_LISU = 131,/* Lisu */
|
||||
USCRIPT_NAKHI_GEBA = 132,/* Nkgb */
|
||||
USCRIPT_OLD_SOUTH_ARABIAN = 133,/* Sarb */
|
||||
|
||||
/* Private use codes from Qaaa - Qabx are not supported*/
|
||||
USCRIPT_CODE_LIMIT = 134
|
||||
/* New script codes from ISO 15924 @stable ICU 4.6 */
|
||||
USCRIPT_BASSA_VAH = 134,/* Bass */
|
||||
USCRIPT_DUPLOYAN_SHORTAND = 135,/* Dupl */
|
||||
USCRIPT_ELBASAN = 136,/* Elba */
|
||||
USCRIPT_GRANTHA = 137,/* Gran */
|
||||
USCRIPT_KPELLE = 138,/* Kpel */
|
||||
USCRIPT_LOMA = 139,/* Loma */
|
||||
USCRIPT_MENDE = 140,/* Mend */
|
||||
USCRIPT_MEROITIC_CURSIVE = 141,/* Merc */
|
||||
USCRIPT_OLD_NORTH_ARABIAN = 142,/* Narb */
|
||||
USCRIPT_NABATAEAN = 143,/* Nbat */
|
||||
USCRIPT_PALMYRENE = 144,/* Palm */
|
||||
USCRIPT_SINDHI = 145,/* Sind */
|
||||
USCRIPT_WARANG_CITI = 146,/* Wara */
|
||||
|
||||
/* Private use codes from Qaaa - Qabx are not supported */
|
||||
USCRIPT_CODE_LIMIT = 147
|
||||
} UScriptCode;
|
||||
|
||||
/**
|
||||
|
@ -244,7 +265,7 @@ uscript_getName(UScriptCode scriptCode);
|
|||
U_STABLE const char* U_EXPORT2
|
||||
uscript_getShortName(UScriptCode scriptCode);
|
||||
|
||||
/**
|
||||
/**
|
||||
* Gets the script code associated with the given codepoint.
|
||||
* Returns USCRIPT_MALAYALAM given 0x0D02
|
||||
* @param codepoint UChar32 codepoint
|
||||
|
@ -255,6 +276,51 @@ uscript_getShortName(UScriptCode scriptCode);
|
|||
U_STABLE UScriptCode U_EXPORT2
|
||||
uscript_getScript(UChar32 codepoint, UErrorCode *err);
|
||||
|
||||
/**
|
||||
* Is code point c used in script sc?
|
||||
* That is, does code point c have the Script property value sc,
|
||||
* or do code point c's Script_Extensions include script code sc?
|
||||
*
|
||||
* Some characters are commonly used in multiple scripts.
|
||||
* For more information, see UAX #24: http://www.unicode.org/reports/tr24/.
|
||||
*
|
||||
* The Script_Extensions property is provisional. It may be modified or removed
|
||||
* in future versions of the Unicode Standard, and thus in ICU.
|
||||
* @param c code point
|
||||
* @param sc script code
|
||||
* @return TRUE if Script(c)==sc or sc is in Script_Extensions(c)
|
||||
* @draft ICU 4.6
|
||||
*/
|
||||
U_DRAFT UBool U_EXPORT2
|
||||
uscript_hasScript(UChar32 c, UScriptCode sc);
|
||||
|
||||
/**
|
||||
* Writes code point c's Script_Extensions as a list of UScriptCode values
|
||||
* to the output scripts array.
|
||||
*
|
||||
* Some characters are commonly used in multiple scripts.
|
||||
* For more information, see UAX #24: http://www.unicode.org/reports/tr24/.
|
||||
*
|
||||
* If there are more than capacity script codes to be written, then
|
||||
* U_BUFFER_OVERFLOW_ERROR is set and the number of Script_Extensions is returned.
|
||||
* (Usual ICU buffer handling behavior.)
|
||||
*
|
||||
* The Script_Extensions property is provisional. It may be modified or removed
|
||||
* in future versions of the Unicode Standard, and thus in ICU.
|
||||
* @param c code point
|
||||
* @param scripts output script code array
|
||||
* @param capacity capacity of the scripts array
|
||||
* @param errorCode Standard ICU error code. Its input value must
|
||||
* pass the U_SUCCESS() test, or else the function returns
|
||||
* immediately. Check for U_FAILURE() on output or use with
|
||||
* function chaining. (See User Guide for details.)
|
||||
* @return number of script codes in c's Script_Extensions,
|
||||
* written to scripts unless U_BUFFER_OVERFLOW_ERROR indicates insufficient capacity
|
||||
* @draft ICU 4.6
|
||||
*/
|
||||
U_DRAFT int32_t U_EXPORT2
|
||||
uscript_getScriptExtensions(UChar32 c,
|
||||
UScriptCode *scripts, int32_t capacity,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
|
|
|
@ -210,7 +210,7 @@ const UnicodeSet* UnicodeSet::getInclusions(int32_t src, UErrorCode &status) {
|
|||
if(U_SUCCESS(status)) {
|
||||
impl->addPropertyStarts(&sa, status);
|
||||
}
|
||||
ucase_addPropertyStarts(ucase_getSingleton(&status), &sa, &status);
|
||||
ucase_addPropertyStarts(ucase_getSingleton(), &sa, &status);
|
||||
break;
|
||||
}
|
||||
case UPROPS_SRC_NFC: {
|
||||
|
@ -243,10 +243,10 @@ const UnicodeSet* UnicodeSet::getInclusions(int32_t src, UErrorCode &status) {
|
|||
}
|
||||
#endif
|
||||
case UPROPS_SRC_CASE:
|
||||
ucase_addPropertyStarts(ucase_getSingleton(&status), &sa, &status);
|
||||
ucase_addPropertyStarts(ucase_getSingleton(), &sa, &status);
|
||||
break;
|
||||
case UPROPS_SRC_BIDI:
|
||||
ubidi_addPropertyStarts(ubidi_getSingleton(&status), &sa, &status);
|
||||
ubidi_addPropertyStarts(ubidi_getSingleton(), &sa, &status);
|
||||
break;
|
||||
default:
|
||||
status = U_INTERNAL_PROGRAM_ERROR;
|
||||
|
@ -929,9 +929,10 @@ static UBool generalCategoryMaskFilter(UChar32 ch, void* context) {
|
|||
}
|
||||
|
||||
static UBool versionFilter(UChar32 ch, void* context) {
|
||||
UVersionInfo v, none = { 0, 0, 0, 0};
|
||||
UVersionInfo* version = (UVersionInfo*)context;
|
||||
static const UVersionInfo none = { 0, 0, 0, 0 };
|
||||
UVersionInfo v;
|
||||
u_charAge(ch, v);
|
||||
UVersionInfo* version = (UVersionInfo*)context;
|
||||
return uprv_memcmp(&v, &none, sizeof(v)) > 0 && uprv_memcmp(&v, version, sizeof(v)) <= 0;
|
||||
}
|
||||
|
||||
|
@ -945,6 +946,9 @@ static UBool intPropertyFilter(UChar32 ch, void* context) {
|
|||
return u_getIntPropertyValue((UChar32) ch, c->prop) == c->value;
|
||||
}
|
||||
|
||||
static UBool scriptExtensionsFilter(UChar32 ch, void* context) {
|
||||
return uscript_hasScript(ch, *(UScriptCode*)context);
|
||||
}
|
||||
|
||||
/**
|
||||
* Generic filter-based scanning code for UCD property UnicodeSets.
|
||||
|
@ -953,20 +957,17 @@ void UnicodeSet::applyFilter(UnicodeSet::Filter filter,
|
|||
void* context,
|
||||
int32_t src,
|
||||
UErrorCode &status) {
|
||||
// Walk through all Unicode characters, noting the start
|
||||
if (U_FAILURE(status)) return;
|
||||
|
||||
// Logically, walk through all Unicode characters, noting the start
|
||||
// and end of each range for which filter.contain(c) is
|
||||
// true. Add each range to a set.
|
||||
//
|
||||
// To improve performance, use the INCLUSIONS set, which
|
||||
// To improve performance, use an inclusions set which
|
||||
// encodes information about character ranges that are known
|
||||
// to have identical properties. INCLUSIONS contains
|
||||
// only the first characters of such ranges.
|
||||
//
|
||||
// TODO Where possible, instead of scanning over code points,
|
||||
// use internal property data to initialize UnicodeSets for
|
||||
// those properties. Scanning code points is slow.
|
||||
if (U_FAILURE(status)) return;
|
||||
|
||||
// to have identical properties.
|
||||
// getInclusions(src) contains exactly the first characters of
|
||||
// same-value ranges for the given properties "source".
|
||||
const UnicodeSet* inclusions = getInclusions(src, status);
|
||||
if (U_FAILURE(status)) {
|
||||
return;
|
||||
|
@ -1034,6 +1035,9 @@ UnicodeSet::applyIntPropertyValue(UProperty prop, int32_t value, UErrorCode& ec)
|
|||
|
||||
if (prop == UCHAR_GENERAL_CATEGORY_MASK) {
|
||||
applyFilter(generalCategoryMaskFilter, &value, UPROPS_SRC_CHAR, ec);
|
||||
} else if (prop == UCHAR_SCRIPT_EXTENSIONS) {
|
||||
UScriptCode script = (UScriptCode)value;
|
||||
applyFilter(scriptExtensionsFilter, &script, UPROPS_SRC_PROPSVEC, ec);
|
||||
} else {
|
||||
IntPropertyContext c = {prop, value};
|
||||
applyFilter(intPropertyFilter, &c, uprops_getSource(prop), ec);
|
||||
|
@ -1146,6 +1150,13 @@ UnicodeSet::applyPropertyAlias(const UnicodeString& prop,
|
|||
return *this;
|
||||
}
|
||||
break;
|
||||
case UCHAR_SCRIPT_EXTENSIONS:
|
||||
v = u_getPropertyValueEnum(UCHAR_SCRIPT, vname.data());
|
||||
if (v == UCHAR_INVALID_CODE) {
|
||||
FAIL(ec);
|
||||
}
|
||||
// fall through to calling applyIntPropertyValue()
|
||||
break;
|
||||
default:
|
||||
// p is a non-binary, non-enumerated property that we
|
||||
// don't support (yet).
|
||||
|
@ -1183,7 +1194,7 @@ UnicodeSet::applyPropertyAlias(const UnicodeString& prop,
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
applyIntPropertyValue(p, v, ec);
|
||||
if(invert) {
|
||||
complement();
|
||||
|
@ -1395,9 +1406,8 @@ UnicodeSet& UnicodeSet::closeOver(int32_t attribute) {
|
|||
return *this;
|
||||
}
|
||||
if (attribute & (USET_CASE_INSENSITIVE | USET_ADD_CASE_MAPPINGS)) {
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
const UCaseProps *csp = ucase_getSingleton(&status);
|
||||
if (U_SUCCESS(status)) {
|
||||
const UCaseProps *csp = ucase_getSingleton();
|
||||
{
|
||||
UnicodeSet foldSet(*this);
|
||||
UnicodeString str;
|
||||
USetAdder sa = {
|
||||
|
@ -1460,6 +1470,7 @@ UnicodeSet& UnicodeSet::closeOver(int32_t attribute) {
|
|||
} else {
|
||||
Locale root("");
|
||||
#if !UCONFIG_NO_BREAK_ITERATION
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
BreakIterator *bi = BreakIterator::createWordInstance(root, status);
|
||||
#endif
|
||||
if (U_SUCCESS(status)) {
|
||||
|
|
|
@ -99,14 +99,7 @@ UnicodeString::caseMap(BreakIterator *titleIter,
|
|||
return *this;
|
||||
}
|
||||
|
||||
UErrorCode errorCode;
|
||||
|
||||
errorCode = U_ZERO_ERROR;
|
||||
const UCaseProps *csp=ucase_getSingleton(&errorCode);
|
||||
if(U_FAILURE(errorCode)) {
|
||||
setToBogus();
|
||||
return *this;
|
||||
}
|
||||
const UCaseProps *csp=ucase_getSingleton();
|
||||
|
||||
// We need to allocate a new buffer for the internal string case mapping function.
|
||||
// This is very similar to how doReplace() keeps the old array pointer
|
||||
|
@ -138,6 +131,7 @@ UnicodeString::caseMap(BreakIterator *titleIter,
|
|||
}
|
||||
|
||||
// Case-map, and if the result is too long, then reallocate and repeat.
|
||||
UErrorCode errorCode;
|
||||
int32_t newLength;
|
||||
do {
|
||||
errorCode = U_ZERO_ERROR;
|
||||
|
|
|
@ -184,7 +184,7 @@ unorm_cmpEquivFold(const UChar *s1, int32_t length1,
|
|||
nfcImpl=NULL;
|
||||
}
|
||||
if((options&U_COMPARE_IGNORE_CASE)!=0) {
|
||||
csp=ucase_getSingleton(pErrorCode);
|
||||
csp=ucase_getSingleton();
|
||||
} else {
|
||||
csp=NULL;
|
||||
}
|
||||
|
|
|
@ -38,125 +38,231 @@
|
|||
|
||||
U_NAMESPACE_USE
|
||||
|
||||
/* cleanup ------------------------------------------------------------------ */
|
||||
|
||||
static const UBiDiProps *gBdp=NULL;
|
||||
|
||||
static UBool U_CALLCONV uprops_cleanup(void) {
|
||||
gBdp=NULL;
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
/* bidi/shaping properties API ---------------------------------------------- */
|
||||
|
||||
/* get the UBiDiProps singleton, or else its dummy, once and for all */
|
||||
static const UBiDiProps *
|
||||
getBiDiProps() {
|
||||
/*
|
||||
* This lazy intialization with double-checked locking (without mutex protection for
|
||||
* the initial check) is transiently unsafe under certain circumstances.
|
||||
* Check the readme and use u_init() if necessary.
|
||||
*/
|
||||
|
||||
/* the initial check is performed by the GET_BIDI_PROPS() macro */
|
||||
const UBiDiProps *bdp;
|
||||
UErrorCode errorCode=U_ZERO_ERROR;
|
||||
|
||||
bdp=ubidi_getSingleton(&errorCode);
|
||||
#if !UBIDI_HARDCODE_DATA
|
||||
if(U_FAILURE(errorCode)) {
|
||||
errorCode=U_ZERO_ERROR;
|
||||
bdp=ubidi_getDummy(&errorCode);
|
||||
if(U_FAILURE(errorCode)) {
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
umtx_lock(NULL);
|
||||
if(gBdp==NULL) {
|
||||
gBdp=bdp;
|
||||
ucln_common_registerCleanup(UCLN_COMMON_UPROPS, uprops_cleanup);
|
||||
}
|
||||
umtx_unlock(NULL);
|
||||
|
||||
return gBdp;
|
||||
}
|
||||
|
||||
/* see comment for GET_CASE_PROPS() */
|
||||
#define GET_BIDI_PROPS() (gBdp!=NULL ? gBdp : getBiDiProps())
|
||||
#define GET_BIDI_PROPS() ubidi_getSingleton()
|
||||
|
||||
/* general properties API functions ----------------------------------------- */
|
||||
|
||||
static const struct {
|
||||
int32_t column;
|
||||
struct BinaryProperty;
|
||||
|
||||
typedef UBool BinaryPropertyContains(const BinaryProperty &prop, UChar32 c, UProperty which);
|
||||
|
||||
struct BinaryProperty {
|
||||
int32_t column; // SRC_PROPSVEC column, or "source" if mask==0
|
||||
uint32_t mask;
|
||||
} binProps[UCHAR_BINARY_LIMIT]={
|
||||
BinaryPropertyContains *contains;
|
||||
};
|
||||
|
||||
static UBool defaultContains(const BinaryProperty &prop, UChar32 c, UProperty /*which*/) {
|
||||
/* systematic, directly stored properties */
|
||||
return (u_getUnicodeProperties(c, prop.column)&prop.mask)!=0;
|
||||
}
|
||||
|
||||
static UBool caseBinaryPropertyContains(const BinaryProperty &/*prop*/, UChar32 c, UProperty which) {
|
||||
return ucase_hasBinaryProperty(c, which);
|
||||
}
|
||||
|
||||
static UBool isBidiControl(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
|
||||
return ubidi_isBidiControl(GET_BIDI_PROPS(), c);
|
||||
}
|
||||
|
||||
static UBool isMirrored(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
|
||||
return ubidi_isMirrored(GET_BIDI_PROPS(), c);
|
||||
}
|
||||
|
||||
static UBool isJoinControl(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
|
||||
return ubidi_isJoinControl(GET_BIDI_PROPS(), c);
|
||||
}
|
||||
|
||||
static UBool hasFullCompositionExclusion(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
|
||||
#if UCONFIG_NO_NORMALIZATION
|
||||
return FALSE;
|
||||
#else
|
||||
// By definition, Full_Composition_Exclusion is the same as NFC_QC=No.
|
||||
UErrorCode errorCode=U_ZERO_ERROR;
|
||||
const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(errorCode);
|
||||
return U_SUCCESS(errorCode) && impl->isCompNo(impl->getNorm16(c));
|
||||
#endif
|
||||
}
|
||||
|
||||
// UCHAR_NF*_INERT properties
|
||||
static UBool isNormInert(const BinaryProperty &/*prop*/, UChar32 c, UProperty which) {
|
||||
#if UCONFIG_NO_NORMALIZATION
|
||||
return FALSE;
|
||||
#else
|
||||
UErrorCode errorCode=U_ZERO_ERROR;
|
||||
const Normalizer2 *norm2=Normalizer2Factory::getInstance(
|
||||
(UNormalizationMode)(which-UCHAR_NFD_INERT+UNORM_NFD), errorCode);
|
||||
return U_SUCCESS(errorCode) && norm2->isInert(c);
|
||||
#endif
|
||||
}
|
||||
|
||||
static UBool changesWhenCasefolded(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
|
||||
#if UCONFIG_NO_NORMALIZATION
|
||||
return FALSE;
|
||||
#else
|
||||
UnicodeString nfd;
|
||||
UErrorCode errorCode=U_ZERO_ERROR;
|
||||
const Normalizer2 *nfcNorm2=Normalizer2Factory::getNFCInstance(errorCode);
|
||||
if(U_FAILURE(errorCode)) {
|
||||
return FALSE;
|
||||
}
|
||||
if(nfcNorm2->getDecomposition(c, nfd)) {
|
||||
/* c has a decomposition */
|
||||
if(nfd.length()==1) {
|
||||
c=nfd[0]; /* single BMP code point */
|
||||
} else if(nfd.length()<=U16_MAX_LENGTH &&
|
||||
nfd.length()==U16_LENGTH(c=nfd.char32At(0))
|
||||
) {
|
||||
/* single supplementary code point */
|
||||
} else {
|
||||
c=U_SENTINEL;
|
||||
}
|
||||
} else if(c<0) {
|
||||
return FALSE; /* protect against bad input */
|
||||
}
|
||||
if(c>=0) {
|
||||
/* single code point */
|
||||
const UCaseProps *csp=ucase_getSingleton();
|
||||
const UChar *resultString;
|
||||
return (UBool)(ucase_toFullFolding(csp, c, &resultString, U_FOLD_CASE_DEFAULT)>=0);
|
||||
} else {
|
||||
/* guess some large but stack-friendly capacity */
|
||||
UChar dest[2*UCASE_MAX_STRING_LENGTH];
|
||||
int32_t destLength;
|
||||
destLength=u_strFoldCase(dest, LENGTHOF(dest),
|
||||
nfd.getBuffer(), nfd.length(),
|
||||
U_FOLD_CASE_DEFAULT, &errorCode);
|
||||
return (UBool)(U_SUCCESS(errorCode) &&
|
||||
0!=u_strCompare(nfd.getBuffer(), nfd.length(),
|
||||
dest, destLength, FALSE));
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
static UBool changesWhenNFKC_Casefolded(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
|
||||
#if UCONFIG_NO_NORMALIZATION
|
||||
return FALSE;
|
||||
#else
|
||||
UErrorCode errorCode=U_ZERO_ERROR;
|
||||
const Normalizer2Impl *kcf=Normalizer2Factory::getNFKC_CFImpl(errorCode);
|
||||
if(U_FAILURE(errorCode)) {
|
||||
return FALSE;
|
||||
}
|
||||
UnicodeString src(c);
|
||||
UnicodeString dest;
|
||||
{
|
||||
// The ReorderingBuffer must be in a block because its destructor
|
||||
// needs to release dest's buffer before we look at its contents.
|
||||
ReorderingBuffer buffer(*kcf, dest);
|
||||
// Small destCapacity for NFKC_CF(c).
|
||||
if(buffer.init(5, errorCode)) {
|
||||
const UChar *srcArray=src.getBuffer();
|
||||
kcf->compose(srcArray, srcArray+src.length(), FALSE,
|
||||
TRUE, buffer, errorCode);
|
||||
}
|
||||
}
|
||||
return U_SUCCESS(errorCode) && dest!=src;
|
||||
#endif
|
||||
}
|
||||
|
||||
static UBool isCanonSegmentStarter(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
|
||||
#if UCONFIG_NO_NORMALIZATION
|
||||
return FALSE;
|
||||
#else
|
||||
UErrorCode errorCode=U_ZERO_ERROR;
|
||||
const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(errorCode);
|
||||
return
|
||||
U_SUCCESS(errorCode) && impl->ensureCanonIterData(errorCode) &&
|
||||
impl->isCanonSegmentStarter(c);
|
||||
#endif
|
||||
}
|
||||
|
||||
static UBool isPOSIX_alnum(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
|
||||
return u_isalnumPOSIX(c);
|
||||
}
|
||||
|
||||
static UBool isPOSIX_blank(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
|
||||
return u_isblank(c);
|
||||
}
|
||||
|
||||
static UBool isPOSIX_graph(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
|
||||
return u_isgraphPOSIX(c);
|
||||
}
|
||||
|
||||
static UBool isPOSIX_print(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
|
||||
return u_isprintPOSIX(c);
|
||||
}
|
||||
|
||||
static UBool isPOSIX_xdigit(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
|
||||
return u_isxdigit(c);
|
||||
}
|
||||
|
||||
static const BinaryProperty binProps[UCHAR_BINARY_LIMIT]={
|
||||
/*
|
||||
* column and mask values for binary properties from u_getUnicodeProperties().
|
||||
* Must be in order of corresponding UProperty,
|
||||
* and there must be exactly one entry per binary UProperty.
|
||||
*
|
||||
* Properties with mask 0 are handled in code.
|
||||
* Properties with mask==0 and contains==NULL are handled in code.
|
||||
* For them, column is the UPropertySource value.
|
||||
*/
|
||||
{ 1, U_MASK(UPROPS_ALPHABETIC) },
|
||||
{ 1, U_MASK(UPROPS_ASCII_HEX_DIGIT) },
|
||||
{ UPROPS_SRC_BIDI, 0 }, /* UCHAR_BIDI_CONTROL */
|
||||
{ UPROPS_SRC_BIDI, 0 }, /* UCHAR_BIDI_MIRRORED */
|
||||
{ 1, U_MASK(UPROPS_DASH) },
|
||||
{ 1, U_MASK(UPROPS_DEFAULT_IGNORABLE_CODE_POINT) },
|
||||
{ 1, U_MASK(UPROPS_DEPRECATED) },
|
||||
{ 1, U_MASK(UPROPS_DIACRITIC) },
|
||||
{ 1, U_MASK(UPROPS_EXTENDER) },
|
||||
{ UPROPS_SRC_NFC, 0 }, /* UCHAR_FULL_COMPOSITION_EXCLUSION */
|
||||
{ 1, U_MASK(UPROPS_GRAPHEME_BASE) },
|
||||
{ 1, U_MASK(UPROPS_GRAPHEME_EXTEND) },
|
||||
{ 1, U_MASK(UPROPS_GRAPHEME_LINK) },
|
||||
{ 1, U_MASK(UPROPS_HEX_DIGIT) },
|
||||
{ 1, U_MASK(UPROPS_HYPHEN) },
|
||||
{ 1, U_MASK(UPROPS_ID_CONTINUE) },
|
||||
{ 1, U_MASK(UPROPS_ID_START) },
|
||||
{ 1, U_MASK(UPROPS_IDEOGRAPHIC) },
|
||||
{ 1, U_MASK(UPROPS_IDS_BINARY_OPERATOR) },
|
||||
{ 1, U_MASK(UPROPS_IDS_TRINARY_OPERATOR) },
|
||||
{ UPROPS_SRC_BIDI, 0 }, /* UCHAR_JOIN_CONTROL */
|
||||
{ 1, U_MASK(UPROPS_LOGICAL_ORDER_EXCEPTION) },
|
||||
{ UPROPS_SRC_CASE, 0 }, /* UCHAR_LOWERCASE */
|
||||
{ 1, U_MASK(UPROPS_MATH) },
|
||||
{ 1, U_MASK(UPROPS_NONCHARACTER_CODE_POINT) },
|
||||
{ 1, U_MASK(UPROPS_QUOTATION_MARK) },
|
||||
{ 1, U_MASK(UPROPS_RADICAL) },
|
||||
{ UPROPS_SRC_CASE, 0 }, /* UCHAR_SOFT_DOTTED */
|
||||
{ 1, U_MASK(UPROPS_TERMINAL_PUNCTUATION) },
|
||||
{ 1, U_MASK(UPROPS_UNIFIED_IDEOGRAPH) },
|
||||
{ UPROPS_SRC_CASE, 0 }, /* UCHAR_UPPERCASE */
|
||||
{ 1, U_MASK(UPROPS_WHITE_SPACE) },
|
||||
{ 1, U_MASK(UPROPS_XID_CONTINUE) },
|
||||
{ 1, U_MASK(UPROPS_XID_START) },
|
||||
{ UPROPS_SRC_CASE, 0 }, /* UCHAR_CASE_SENSITIVE */
|
||||
{ 1, U_MASK(UPROPS_S_TERM) },
|
||||
{ 1, U_MASK(UPROPS_VARIATION_SELECTOR) },
|
||||
{ UPROPS_SRC_NFC, 0 }, /* UCHAR_NFD_INERT */
|
||||
{ UPROPS_SRC_NFKC, 0 }, /* UCHAR_NFKD_INERT */
|
||||
{ UPROPS_SRC_NFC, 0 }, /* UCHAR_NFC_INERT */
|
||||
{ UPROPS_SRC_NFKC, 0 }, /* UCHAR_NFKC_INERT */
|
||||
{ UPROPS_SRC_NFC_CANON_ITER, 0 }, /* UCHAR_SEGMENT_STARTER */
|
||||
{ 1, U_MASK(UPROPS_PATTERN_SYNTAX) },
|
||||
{ 1, U_MASK(UPROPS_PATTERN_WHITE_SPACE) },
|
||||
{ UPROPS_SRC_CHAR_AND_PROPSVEC, 0 }, /* UCHAR_POSIX_ALNUM */
|
||||
{ UPROPS_SRC_CHAR, 0 }, /* UCHAR_POSIX_BLANK */
|
||||
{ UPROPS_SRC_CHAR, 0 }, /* UCHAR_POSIX_GRAPH */
|
||||
{ UPROPS_SRC_CHAR, 0 }, /* UCHAR_POSIX_PRINT */
|
||||
{ UPROPS_SRC_CHAR, 0 }, /* UCHAR_POSIX_XDIGIT */
|
||||
{ UPROPS_SRC_CASE, 0 }, /* UCHAR_CASED */
|
||||
{ UPROPS_SRC_CASE, 0 }, /* UCHAR_CASE_IGNORABLE */
|
||||
{ UPROPS_SRC_CASE, 0 }, /* UCHAR_CHANGES_WHEN_LOWERCASED */
|
||||
{ UPROPS_SRC_CASE, 0 }, /* UCHAR_CHANGES_WHEN_UPPERCASED */
|
||||
{ UPROPS_SRC_CASE, 0 }, /* UCHAR_CHANGES_WHEN_TITLECASED */
|
||||
{ UPROPS_SRC_CASE_AND_NORM, 0 }, /* UCHAR_CHANGES_WHEN_CASEFOLDED */
|
||||
{ UPROPS_SRC_CASE, 0 }, /* UCHAR_CHANGES_WHEN_CASEMAPPED */
|
||||
{ UPROPS_SRC_NFKC_CF, 0 } /* UCHAR_CHANGES_WHEN_NFKC_CASEFOLDED */
|
||||
{ 1, U_MASK(UPROPS_ALPHABETIC), defaultContains },
|
||||
{ 1, U_MASK(UPROPS_ASCII_HEX_DIGIT), defaultContains },
|
||||
{ UPROPS_SRC_BIDI, 0, isBidiControl },
|
||||
{ UPROPS_SRC_BIDI, 0, isMirrored },
|
||||
{ 1, U_MASK(UPROPS_DASH), defaultContains },
|
||||
{ 1, U_MASK(UPROPS_DEFAULT_IGNORABLE_CODE_POINT), defaultContains },
|
||||
{ 1, U_MASK(UPROPS_DEPRECATED), defaultContains },
|
||||
{ 1, U_MASK(UPROPS_DIACRITIC), defaultContains },
|
||||
{ 1, U_MASK(UPROPS_EXTENDER), defaultContains },
|
||||
{ UPROPS_SRC_NFC, 0, hasFullCompositionExclusion },
|
||||
{ 1, U_MASK(UPROPS_GRAPHEME_BASE), defaultContains },
|
||||
{ 1, U_MASK(UPROPS_GRAPHEME_EXTEND), defaultContains },
|
||||
{ 1, U_MASK(UPROPS_GRAPHEME_LINK), defaultContains },
|
||||
{ 1, U_MASK(UPROPS_HEX_DIGIT), defaultContains },
|
||||
{ 1, U_MASK(UPROPS_HYPHEN), defaultContains },
|
||||
{ 1, U_MASK(UPROPS_ID_CONTINUE), defaultContains },
|
||||
{ 1, U_MASK(UPROPS_ID_START), defaultContains },
|
||||
{ 1, U_MASK(UPROPS_IDEOGRAPHIC), defaultContains },
|
||||
{ 1, U_MASK(UPROPS_IDS_BINARY_OPERATOR), defaultContains },
|
||||
{ 1, U_MASK(UPROPS_IDS_TRINARY_OPERATOR), defaultContains },
|
||||
{ UPROPS_SRC_BIDI, 0, isJoinControl },
|
||||
{ 1, U_MASK(UPROPS_LOGICAL_ORDER_EXCEPTION), defaultContains },
|
||||
{ UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_LOWERCASE
|
||||
{ 1, U_MASK(UPROPS_MATH), defaultContains },
|
||||
{ 1, U_MASK(UPROPS_NONCHARACTER_CODE_POINT), defaultContains },
|
||||
{ 1, U_MASK(UPROPS_QUOTATION_MARK), defaultContains },
|
||||
{ 1, U_MASK(UPROPS_RADICAL), defaultContains },
|
||||
{ UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_SOFT_DOTTED
|
||||
{ 1, U_MASK(UPROPS_TERMINAL_PUNCTUATION), defaultContains },
|
||||
{ 1, U_MASK(UPROPS_UNIFIED_IDEOGRAPH), defaultContains },
|
||||
{ UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_UPPERCASE
|
||||
{ 1, U_MASK(UPROPS_WHITE_SPACE), defaultContains },
|
||||
{ 1, U_MASK(UPROPS_XID_CONTINUE), defaultContains },
|
||||
{ 1, U_MASK(UPROPS_XID_START), defaultContains },
|
||||
{ UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_CASE_SENSITIVE
|
||||
{ 1, U_MASK(UPROPS_S_TERM), defaultContains },
|
||||
{ 1, U_MASK(UPROPS_VARIATION_SELECTOR), defaultContains },
|
||||
{ UPROPS_SRC_NFC, 0, isNormInert }, // UCHAR_NFD_INERT
|
||||
{ UPROPS_SRC_NFKC, 0, isNormInert }, // UCHAR_NFKD_INERT
|
||||
{ UPROPS_SRC_NFC, 0, isNormInert }, // UCHAR_NFC_INERT
|
||||
{ UPROPS_SRC_NFKC, 0, isNormInert }, // UCHAR_NFKC_INERT
|
||||
{ UPROPS_SRC_NFC_CANON_ITER, 0, isCanonSegmentStarter },
|
||||
{ 1, U_MASK(UPROPS_PATTERN_SYNTAX), defaultContains },
|
||||
{ 1, U_MASK(UPROPS_PATTERN_WHITE_SPACE), defaultContains },
|
||||
{ UPROPS_SRC_CHAR_AND_PROPSVEC, 0, isPOSIX_alnum },
|
||||
{ UPROPS_SRC_CHAR, 0, isPOSIX_blank },
|
||||
{ UPROPS_SRC_CHAR, 0, isPOSIX_graph },
|
||||
{ UPROPS_SRC_CHAR, 0, isPOSIX_print },
|
||||
{ UPROPS_SRC_CHAR, 0, isPOSIX_xdigit },
|
||||
{ UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_CASED
|
||||
{ UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_CASE_IGNORABLE
|
||||
{ UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_CHANGES_WHEN_LOWERCASED
|
||||
{ UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_CHANGES_WHEN_UPPERCASED
|
||||
{ UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_CHANGES_WHEN_TITLECASED
|
||||
{ UPROPS_SRC_CASE_AND_NORM, 0, changesWhenCasefolded },
|
||||
{ UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_CHANGES_WHEN_CASEMAPPED
|
||||
{ UPROPS_SRC_NFKC_CF, 0, changesWhenNFKC_Casefolded }
|
||||
};
|
||||
|
||||
U_CAPI UBool U_EXPORT2
|
||||
|
@ -164,158 +270,11 @@ u_hasBinaryProperty(UChar32 c, UProperty which) {
|
|||
/* c is range-checked in the functions that are called from here */
|
||||
if(which<UCHAR_BINARY_START || UCHAR_BINARY_LIMIT<=which) {
|
||||
/* not a known binary property */
|
||||
return FALSE;
|
||||
} else {
|
||||
uint32_t mask=binProps[which].mask;
|
||||
int32_t column=binProps[which].column;
|
||||
if(mask!=0) {
|
||||
/* systematic, directly stored properties */
|
||||
return (u_getUnicodeProperties(c, column)&mask)!=0;
|
||||
} else {
|
||||
if(column==UPROPS_SRC_CASE) {
|
||||
return ucase_hasBinaryProperty(c, which);
|
||||
} else if(column==UPROPS_SRC_NFC) {
|
||||
#if !UCONFIG_NO_NORMALIZATION
|
||||
UErrorCode errorCode=U_ZERO_ERROR;
|
||||
switch(which) {
|
||||
case UCHAR_FULL_COMPOSITION_EXCLUSION: {
|
||||
// By definition, Full_Composition_Exclusion is the same as NFC_QC=No.
|
||||
const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(errorCode);
|
||||
return U_SUCCESS(errorCode) && impl->isCompNo(impl->getNorm16(c));
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
// UCHAR_NF[CD]_INERT properties
|
||||
const Normalizer2 *norm2=Normalizer2Factory::getInstance(
|
||||
(UNormalizationMode)(which-UCHAR_NFD_INERT+UNORM_NFD), errorCode);
|
||||
return U_SUCCESS(errorCode) && norm2->isInert(c);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
} else if(column==UPROPS_SRC_NFKC) {
|
||||
#if !UCONFIG_NO_NORMALIZATION
|
||||
// UCHAR_NFK[CD]_INERT properties
|
||||
UErrorCode errorCode=U_ZERO_ERROR;
|
||||
const Normalizer2 *norm2=Normalizer2Factory::getInstance(
|
||||
(UNormalizationMode)(which-UCHAR_NFD_INERT+UNORM_NFD), errorCode);
|
||||
return U_SUCCESS(errorCode) && norm2->isInert(c);
|
||||
#endif
|
||||
} else if(column==UPROPS_SRC_NFKC_CF) {
|
||||
// currently only for UCHAR_CHANGES_WHEN_NFKC_CASEFOLDED
|
||||
#if !UCONFIG_NO_NORMALIZATION
|
||||
UErrorCode errorCode=U_ZERO_ERROR;
|
||||
const Normalizer2Impl *kcf=Normalizer2Factory::getNFKC_CFImpl(errorCode);
|
||||
if(U_SUCCESS(errorCode)) {
|
||||
UnicodeString src(c);
|
||||
UnicodeString dest;
|
||||
{
|
||||
// The ReorderingBuffer must be in a block because its destructor
|
||||
// needs to release dest's buffer before we look at its contents.
|
||||
ReorderingBuffer buffer(*kcf, dest);
|
||||
// Small destCapacity for NFKC_CF(c).
|
||||
if(buffer.init(5, errorCode)) {
|
||||
const UChar *srcArray=src.getBuffer();
|
||||
kcf->compose(srcArray, srcArray+src.length(), FALSE,
|
||||
TRUE, buffer, errorCode);
|
||||
}
|
||||
}
|
||||
return U_SUCCESS(errorCode) && dest!=src;
|
||||
}
|
||||
#endif
|
||||
} else if(column==UPROPS_SRC_NFC_CANON_ITER) {
|
||||
/* normalization properties from nfc.nrm canonical iterator data */
|
||||
// UCHAR_SEGMENT_STARTER
|
||||
#if !UCONFIG_NO_NORMALIZATION
|
||||
UErrorCode errorCode=U_ZERO_ERROR;
|
||||
const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(errorCode);
|
||||
return
|
||||
U_SUCCESS(errorCode) && impl->ensureCanonIterData(errorCode) &&
|
||||
impl->isCanonSegmentStarter(c);
|
||||
#endif
|
||||
} else if(column==UPROPS_SRC_BIDI) {
|
||||
/* bidi/shaping properties */
|
||||
const UBiDiProps *bdp=GET_BIDI_PROPS();
|
||||
if(bdp!=NULL) {
|
||||
switch(which) {
|
||||
case UCHAR_BIDI_MIRRORED:
|
||||
return ubidi_isMirrored(bdp, c);
|
||||
case UCHAR_BIDI_CONTROL:
|
||||
return ubidi_isBidiControl(bdp, c);
|
||||
case UCHAR_JOIN_CONTROL:
|
||||
return ubidi_isJoinControl(bdp, c);
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
/* else return FALSE below */
|
||||
} else if(column==UPROPS_SRC_CHAR) {
|
||||
switch(which) {
|
||||
case UCHAR_POSIX_BLANK:
|
||||
return u_isblank(c);
|
||||
case UCHAR_POSIX_GRAPH:
|
||||
return u_isgraphPOSIX(c);
|
||||
case UCHAR_POSIX_PRINT:
|
||||
return u_isprintPOSIX(c);
|
||||
case UCHAR_POSIX_XDIGIT:
|
||||
return u_isxdigit(c);
|
||||
default:
|
||||
break;
|
||||
}
|
||||
} else if(column==UPROPS_SRC_CHAR_AND_PROPSVEC) {
|
||||
switch(which) {
|
||||
case UCHAR_POSIX_ALNUM:
|
||||
return u_isalnumPOSIX(c);
|
||||
default:
|
||||
break;
|
||||
}
|
||||
} else if(column==UPROPS_SRC_CASE_AND_NORM) {
|
||||
#if !UCONFIG_NO_NORMALIZATION
|
||||
UnicodeString nfd;
|
||||
UErrorCode errorCode=U_ZERO_ERROR;
|
||||
const Normalizer2 *nfcNorm2=Normalizer2Factory::getNFCInstance(errorCode);
|
||||
if(U_FAILURE(errorCode)) {
|
||||
return FALSE;
|
||||
}
|
||||
switch(which) {
|
||||
case UCHAR_CHANGES_WHEN_CASEFOLDED:
|
||||
if(nfcNorm2->getDecomposition(c, nfd)) {
|
||||
/* c has a decomposition */
|
||||
if(nfd.length()==1) {
|
||||
c=nfd[0]; /* single BMP code point */
|
||||
} else if(nfd.length()<=U16_MAX_LENGTH &&
|
||||
nfd.length()==U16_LENGTH(c=nfd.char32At(0))
|
||||
) {
|
||||
/* single supplementary code point */
|
||||
} else {
|
||||
c=U_SENTINEL;
|
||||
}
|
||||
} else if(c<0) {
|
||||
return FALSE; /* protect against bad input */
|
||||
}
|
||||
errorCode=U_ZERO_ERROR;
|
||||
if(c>=0) {
|
||||
/* single code point */
|
||||
const UCaseProps *csp=ucase_getSingleton(&errorCode);
|
||||
const UChar *resultString;
|
||||
return (UBool)(ucase_toFullFolding(csp, c, &resultString, U_FOLD_CASE_DEFAULT)>=0);
|
||||
} else {
|
||||
/* guess some large but stack-friendly capacity */
|
||||
UChar dest[2*UCASE_MAX_STRING_LENGTH];
|
||||
int32_t destLength;
|
||||
destLength=u_strFoldCase(dest, LENGTHOF(dest),
|
||||
nfd.getBuffer(), nfd.length(),
|
||||
U_FOLD_CASE_DEFAULT, &errorCode);
|
||||
return (UBool)(U_SUCCESS(errorCode) &&
|
||||
0!=u_strCompare(nfd.getBuffer(), nfd.length(),
|
||||
dest, destLength, FALSE));
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
const BinaryProperty &prop=binProps[which];
|
||||
return prop.contains(prop, c, which);
|
||||
}
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
#if !UCONFIG_NO_NORMALIZATION
|
||||
|
@ -344,6 +303,70 @@ getFCD16(UChar32 c) {
|
|||
|
||||
#endif
|
||||
|
||||
struct IntProperty;
|
||||
|
||||
typedef int32_t IntPropertyGetValue(const IntProperty &prop, UChar32 c, UProperty which);
|
||||
typedef int32_t IntPropertyGetMaxValue(const IntProperty &prop, UProperty which);
|
||||
|
||||
struct IntProperty {
|
||||
int32_t column; // SRC_PROPSVEC column, or "source" if mask==0
|
||||
uint32_t mask;
|
||||
int32_t shift; // =maxValue if getMaxValueFromShift() is used
|
||||
IntPropertyGetValue *getValue;
|
||||
IntPropertyGetMaxValue *getMaxValue;
|
||||
};
|
||||
|
||||
static int32_t defaultGetValue(const IntProperty &prop, UChar32 c, UProperty /*which*/) {
|
||||
/* systematic, directly stored properties */
|
||||
return (int32_t)(u_getUnicodeProperties(c, prop.column)&prop.mask)>>prop.shift;
|
||||
}
|
||||
|
||||
static int32_t defaultGetMaxValue(const IntProperty &prop, UProperty /*which*/) {
|
||||
return (uprv_getMaxValues(prop.column)&prop.mask)>>prop.shift;
|
||||
}
|
||||
|
||||
static int32_t getMaxValueFromShift(const IntProperty &prop, UProperty /*which*/) {
|
||||
return prop.shift;
|
||||
}
|
||||
|
||||
static int32_t getBiDiClass(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
|
||||
return (int32_t)u_charDirection(c);
|
||||
}
|
||||
|
||||
static int32_t biDiGetMaxValue(const IntProperty &/*prop*/, UProperty which) {
|
||||
return ubidi_getMaxValue(GET_BIDI_PROPS(), which);
|
||||
}
|
||||
|
||||
static int32_t getCombiningClass(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
|
||||
#if UCONFIG_NO_NORMALIZATION
|
||||
return 0;
|
||||
#else
|
||||
return u_getCombiningClass(c);
|
||||
#endif
|
||||
}
|
||||
|
||||
static int32_t getGeneralCategory(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
|
||||
return (int32_t)u_charType(c);
|
||||
}
|
||||
|
||||
static int32_t getJoiningGroup(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
|
||||
return ubidi_getJoiningGroup(GET_BIDI_PROPS(), c);
|
||||
}
|
||||
|
||||
static int32_t getJoiningType(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
|
||||
return ubidi_getJoiningType(GET_BIDI_PROPS(), c);
|
||||
}
|
||||
|
||||
static int32_t getNumericType(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
|
||||
int32_t ntv=(int32_t)GET_NUMERIC_TYPE_VALUE(u_getUnicodeProperties(c, -1));
|
||||
return UPROPS_NTV_GET_TYPE(ntv);
|
||||
}
|
||||
|
||||
static int32_t getScript(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
|
||||
UErrorCode errorCode=U_ZERO_ERROR;
|
||||
return (int32_t)uscript_getScript(c, &errorCode);
|
||||
}
|
||||
|
||||
/*
|
||||
* Map some of the Grapheme Cluster Break values to Hangul Syllable Types.
|
||||
* Hangul_Syllable_Type is fully redundant with a subset of Grapheme_Cluster_Break.
|
||||
|
@ -365,79 +388,90 @@ static const UHangulSyllableType gcbToHst[]={
|
|||
*/
|
||||
};
|
||||
|
||||
static int32_t getHangulSyllableType(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
|
||||
/* see comments on gcbToHst[] above */
|
||||
int32_t gcb=(int32_t)(u_getUnicodeProperties(c, 2)&UPROPS_GCB_MASK)>>UPROPS_GCB_SHIFT;
|
||||
if(gcb<LENGTHOF(gcbToHst)) {
|
||||
return gcbToHst[gcb];
|
||||
} else {
|
||||
return U_HST_NOT_APPLICABLE;
|
||||
}
|
||||
}
|
||||
|
||||
static int32_t getNormQuickCheck(const IntProperty &/*prop*/, UChar32 c, UProperty which) {
|
||||
#if UCONFIG_NO_NORMALIZATION
|
||||
return 0;
|
||||
#else
|
||||
return (int32_t)unorm_getQuickCheck(c, (UNormalizationMode)(which-UCHAR_NFD_QUICK_CHECK+UNORM_NFD));
|
||||
#endif
|
||||
}
|
||||
|
||||
static int32_t getLeadCombiningClass(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
|
||||
#if UCONFIG_NO_NORMALIZATION
|
||||
return 0;
|
||||
#else
|
||||
return getFCD16(c)>>8;
|
||||
#endif
|
||||
}
|
||||
|
||||
static int32_t getTrailCombiningClass(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
|
||||
#if UCONFIG_NO_NORMALIZATION
|
||||
return 0;
|
||||
#else
|
||||
return getFCD16(c)&0xff;
|
||||
#endif
|
||||
}
|
||||
|
||||
static const IntProperty intProps[UCHAR_INT_LIMIT-UCHAR_INT_START]={
|
||||
/*
|
||||
* column, mask and shift values for int-value properties from u_getUnicodeProperties().
|
||||
* Must be in order of corresponding UProperty,
|
||||
* and there must be exactly one entry per int UProperty.
|
||||
*
|
||||
* Properties with mask==0 and getValue==NULL are handled in code.
|
||||
* For them, column is the UPropertySource value.
|
||||
*/
|
||||
{ UPROPS_SRC_BIDI, 0, 0, getBiDiClass, biDiGetMaxValue },
|
||||
{ 0, UPROPS_BLOCK_MASK, UPROPS_BLOCK_SHIFT, defaultGetValue, defaultGetMaxValue },
|
||||
{ UPROPS_SRC_NFC, 0, 0xff, getCombiningClass, getMaxValueFromShift },
|
||||
{ 2, UPROPS_DT_MASK, 0, defaultGetValue, defaultGetMaxValue },
|
||||
{ 0, UPROPS_EA_MASK, UPROPS_EA_SHIFT, defaultGetValue, defaultGetMaxValue },
|
||||
{ UPROPS_SRC_CHAR, 0, (int32_t)U_CHAR_CATEGORY_COUNT-1,getGeneralCategory, getMaxValueFromShift },
|
||||
{ UPROPS_SRC_BIDI, 0, 0, getJoiningGroup, biDiGetMaxValue },
|
||||
{ UPROPS_SRC_BIDI, 0, 0, getJoiningType, biDiGetMaxValue },
|
||||
{ 2, UPROPS_LB_MASK, UPROPS_LB_SHIFT, defaultGetValue, defaultGetMaxValue },
|
||||
{ UPROPS_SRC_CHAR, 0, (int32_t)U_NT_COUNT-1, getNumericType, getMaxValueFromShift },
|
||||
{ 0, UPROPS_SCRIPT_MASK, 0, getScript, defaultGetMaxValue },
|
||||
{ UPROPS_SRC_PROPSVEC, 0, (int32_t)U_HST_COUNT-1, getHangulSyllableType, getMaxValueFromShift },
|
||||
// UCHAR_NFD_QUICK_CHECK: max=1=YES -- never "maybe", only "no" or "yes"
|
||||
{ UPROPS_SRC_NFC, 0, (int32_t)UNORM_YES, getNormQuickCheck, getMaxValueFromShift },
|
||||
// UCHAR_NFKD_QUICK_CHECK: max=1=YES -- never "maybe", only "no" or "yes"
|
||||
{ UPROPS_SRC_NFKC, 0, (int32_t)UNORM_YES, getNormQuickCheck, getMaxValueFromShift },
|
||||
// UCHAR_NFC_QUICK_CHECK: max=2=MAYBE
|
||||
{ UPROPS_SRC_NFC, 0, (int32_t)UNORM_MAYBE, getNormQuickCheck, getMaxValueFromShift },
|
||||
// UCHAR_NFKC_QUICK_CHECK: max=2=MAYBE
|
||||
{ UPROPS_SRC_NFKC, 0, (int32_t)UNORM_MAYBE, getNormQuickCheck, getMaxValueFromShift },
|
||||
{ UPROPS_SRC_NFC, 0, 0xff, getLeadCombiningClass, getMaxValueFromShift },
|
||||
{ UPROPS_SRC_NFC, 0, 0xff, getTrailCombiningClass, getMaxValueFromShift },
|
||||
{ 2, UPROPS_GCB_MASK, UPROPS_GCB_SHIFT, defaultGetValue, defaultGetMaxValue },
|
||||
{ 2, UPROPS_SB_MASK, UPROPS_SB_SHIFT, defaultGetValue, defaultGetMaxValue },
|
||||
{ 2, UPROPS_WB_MASK, UPROPS_WB_SHIFT, defaultGetValue, defaultGetMaxValue }
|
||||
};
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
u_getIntPropertyValue(UChar32 c, UProperty which) {
|
||||
UErrorCode errorCode;
|
||||
|
||||
if(which<UCHAR_BINARY_START) {
|
||||
return 0; /* undefined */
|
||||
} else if(which<UCHAR_BINARY_LIMIT) {
|
||||
return (int32_t)u_hasBinaryProperty(c, which);
|
||||
} else if(which<UCHAR_INT_START) {
|
||||
return 0; /* undefined */
|
||||
if(which<UCHAR_INT_START) {
|
||||
if(UCHAR_BINARY_START<=which && which<UCHAR_BINARY_LIMIT) {
|
||||
const BinaryProperty &prop=binProps[which];
|
||||
return prop.contains(prop, c, which);
|
||||
}
|
||||
} else if(which<UCHAR_INT_LIMIT) {
|
||||
switch(which) {
|
||||
case UCHAR_BIDI_CLASS:
|
||||
return (int32_t)u_charDirection(c);
|
||||
case UCHAR_BLOCK:
|
||||
return (int32_t)ublock_getCode(c);
|
||||
#if !UCONFIG_NO_NORMALIZATION
|
||||
case UCHAR_CANONICAL_COMBINING_CLASS:
|
||||
return u_getCombiningClass(c);
|
||||
#endif
|
||||
case UCHAR_DECOMPOSITION_TYPE:
|
||||
return (int32_t)(u_getUnicodeProperties(c, 2)&UPROPS_DT_MASK);
|
||||
case UCHAR_EAST_ASIAN_WIDTH:
|
||||
return (int32_t)(u_getUnicodeProperties(c, 0)&UPROPS_EA_MASK)>>UPROPS_EA_SHIFT;
|
||||
case UCHAR_GENERAL_CATEGORY:
|
||||
return (int32_t)u_charType(c);
|
||||
case UCHAR_JOINING_GROUP:
|
||||
return ubidi_getJoiningGroup(GET_BIDI_PROPS(), c);
|
||||
case UCHAR_JOINING_TYPE:
|
||||
return ubidi_getJoiningType(GET_BIDI_PROPS(), c);
|
||||
case UCHAR_LINE_BREAK:
|
||||
return (int32_t)(u_getUnicodeProperties(c, UPROPS_LB_VWORD)&UPROPS_LB_MASK)>>UPROPS_LB_SHIFT;
|
||||
case UCHAR_NUMERIC_TYPE: {
|
||||
int32_t ntv=(int32_t)GET_NUMERIC_TYPE_VALUE(u_getUnicodeProperties(c, -1));
|
||||
return UPROPS_NTV_GET_TYPE(ntv);
|
||||
}
|
||||
case UCHAR_SCRIPT:
|
||||
errorCode=U_ZERO_ERROR;
|
||||
return (int32_t)uscript_getScript(c, &errorCode);
|
||||
case UCHAR_HANGUL_SYLLABLE_TYPE: {
|
||||
/* see comments on gcbToHst[] above */
|
||||
int32_t gcb=(int32_t)(u_getUnicodeProperties(c, 2)&UPROPS_GCB_MASK)>>UPROPS_GCB_SHIFT;
|
||||
if(gcb<LENGTHOF(gcbToHst)) {
|
||||
return gcbToHst[gcb];
|
||||
} else {
|
||||
return U_HST_NOT_APPLICABLE;
|
||||
}
|
||||
}
|
||||
#if !UCONFIG_NO_NORMALIZATION
|
||||
case UCHAR_NFD_QUICK_CHECK:
|
||||
case UCHAR_NFKD_QUICK_CHECK:
|
||||
case UCHAR_NFC_QUICK_CHECK:
|
||||
case UCHAR_NFKC_QUICK_CHECK:
|
||||
return (int32_t)unorm_getQuickCheck(c, (UNormalizationMode)(which-UCHAR_NFD_QUICK_CHECK+UNORM_NFD));
|
||||
case UCHAR_LEAD_CANONICAL_COMBINING_CLASS:
|
||||
return getFCD16(c)>>8;
|
||||
case UCHAR_TRAIL_CANONICAL_COMBINING_CLASS:
|
||||
return getFCD16(c)&0xff;
|
||||
#endif
|
||||
case UCHAR_GRAPHEME_CLUSTER_BREAK:
|
||||
return (int32_t)(u_getUnicodeProperties(c, 2)&UPROPS_GCB_MASK)>>UPROPS_GCB_SHIFT;
|
||||
case UCHAR_SENTENCE_BREAK:
|
||||
return (int32_t)(u_getUnicodeProperties(c, 2)&UPROPS_SB_MASK)>>UPROPS_SB_SHIFT;
|
||||
case UCHAR_WORD_BREAK:
|
||||
return (int32_t)(u_getUnicodeProperties(c, 2)&UPROPS_WB_MASK)>>UPROPS_WB_SHIFT;
|
||||
default:
|
||||
return 0; /* undefined */
|
||||
}
|
||||
const IntProperty &prop=intProps[which-UCHAR_INT_START];
|
||||
return prop.getValue(prop, c, which);
|
||||
} else if(which==UCHAR_GENERAL_CATEGORY_MASK) {
|
||||
return U_MASK(u_charType(c));
|
||||
} else {
|
||||
return 0; /* undefined */
|
||||
}
|
||||
return 0; // undefined
|
||||
}
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
|
@ -447,104 +481,36 @@ u_getIntPropertyMinValue(UProperty /*which*/) {
|
|||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
u_getIntPropertyMaxValue(UProperty which) {
|
||||
if(which<UCHAR_BINARY_START) {
|
||||
return -1; /* undefined */
|
||||
} else if(which<UCHAR_BINARY_LIMIT) {
|
||||
return 1; /* maximum TRUE for all binary properties */
|
||||
} else if(which<UCHAR_INT_START) {
|
||||
return -1; /* undefined */
|
||||
} else if(which<UCHAR_INT_LIMIT) {
|
||||
switch(which) {
|
||||
case UCHAR_BIDI_CLASS:
|
||||
case UCHAR_JOINING_GROUP:
|
||||
case UCHAR_JOINING_TYPE:
|
||||
return ubidi_getMaxValue(GET_BIDI_PROPS(), which);
|
||||
case UCHAR_BLOCK:
|
||||
return (uprv_getMaxValues(0)&UPROPS_BLOCK_MASK)>>UPROPS_BLOCK_SHIFT;
|
||||
case UCHAR_CANONICAL_COMBINING_CLASS:
|
||||
case UCHAR_LEAD_CANONICAL_COMBINING_CLASS:
|
||||
case UCHAR_TRAIL_CANONICAL_COMBINING_CLASS:
|
||||
return 0xff; /* TODO do we need to be more precise, getting the actual maximum? */
|
||||
case UCHAR_DECOMPOSITION_TYPE:
|
||||
return uprv_getMaxValues(2)&UPROPS_DT_MASK;
|
||||
case UCHAR_EAST_ASIAN_WIDTH:
|
||||
return (uprv_getMaxValues(0)&UPROPS_EA_MASK)>>UPROPS_EA_SHIFT;
|
||||
case UCHAR_GENERAL_CATEGORY:
|
||||
return (int32_t)U_CHAR_CATEGORY_COUNT-1;
|
||||
case UCHAR_LINE_BREAK:
|
||||
return (uprv_getMaxValues(UPROPS_LB_VWORD)&UPROPS_LB_MASK)>>UPROPS_LB_SHIFT;
|
||||
case UCHAR_NUMERIC_TYPE:
|
||||
return (int32_t)U_NT_COUNT-1;
|
||||
case UCHAR_SCRIPT:
|
||||
return uprv_getMaxValues(0)&UPROPS_SCRIPT_MASK;
|
||||
case UCHAR_HANGUL_SYLLABLE_TYPE:
|
||||
return (int32_t)U_HST_COUNT-1;
|
||||
#if !UCONFIG_NO_NORMALIZATION
|
||||
case UCHAR_NFD_QUICK_CHECK:
|
||||
case UCHAR_NFKD_QUICK_CHECK:
|
||||
return (int32_t)UNORM_YES; /* these are never "maybe", only "no" or "yes" */
|
||||
case UCHAR_NFC_QUICK_CHECK:
|
||||
case UCHAR_NFKC_QUICK_CHECK:
|
||||
return (int32_t)UNORM_MAYBE;
|
||||
#endif
|
||||
case UCHAR_GRAPHEME_CLUSTER_BREAK:
|
||||
return (uprv_getMaxValues(2)&UPROPS_GCB_MASK)>>UPROPS_GCB_SHIFT;
|
||||
case UCHAR_SENTENCE_BREAK:
|
||||
return (uprv_getMaxValues(2)&UPROPS_SB_MASK)>>UPROPS_SB_SHIFT;
|
||||
case UCHAR_WORD_BREAK:
|
||||
return (uprv_getMaxValues(2)&UPROPS_WB_MASK)>>UPROPS_WB_SHIFT;
|
||||
default:
|
||||
return -1; /* undefined */
|
||||
if(which<UCHAR_INT_START) {
|
||||
if(UCHAR_BINARY_START<=which && which<UCHAR_BINARY_LIMIT) {
|
||||
return 1; // maximum TRUE for all binary properties
|
||||
}
|
||||
} else {
|
||||
return -1; /* undefined */
|
||||
} else if(which<UCHAR_INT_LIMIT) {
|
||||
const IntProperty &prop=intProps[which-UCHAR_INT_START];
|
||||
return prop.getMaxValue(prop, which);
|
||||
}
|
||||
return -1; // undefined
|
||||
}
|
||||
|
||||
/*
|
||||
* TODO: Simplify, similar to binProps[].
|
||||
* Use an array of column/source, mask, shift values to drive returning simple
|
||||
* properties and their sources.
|
||||
*
|
||||
* TODO: Split the single propsvec into one per column, and have
|
||||
* upropsvec_addPropertyStarts() pass a trie value function that gets the
|
||||
* desired column's values.
|
||||
*/
|
||||
U_CFUNC UPropertySource U_EXPORT2
|
||||
uprops_getSource(UProperty which) {
|
||||
if(which<UCHAR_BINARY_START) {
|
||||
return UPROPS_SRC_NONE; /* undefined */
|
||||
} else if(which<UCHAR_BINARY_LIMIT) {
|
||||
if(binProps[which].mask!=0) {
|
||||
const BinaryProperty &prop=binProps[which];
|
||||
if(prop.mask!=0) {
|
||||
return UPROPS_SRC_PROPSVEC;
|
||||
} else {
|
||||
return (UPropertySource)binProps[which].column;
|
||||
return (UPropertySource)prop.column;
|
||||
}
|
||||
} else if(which<UCHAR_INT_START) {
|
||||
return UPROPS_SRC_NONE; /* undefined */
|
||||
} else if(which<UCHAR_INT_LIMIT) {
|
||||
switch(which) {
|
||||
case UCHAR_GENERAL_CATEGORY:
|
||||
case UCHAR_NUMERIC_TYPE:
|
||||
return UPROPS_SRC_CHAR;
|
||||
|
||||
case UCHAR_CANONICAL_COMBINING_CLASS:
|
||||
case UCHAR_NFD_QUICK_CHECK:
|
||||
case UCHAR_NFC_QUICK_CHECK:
|
||||
case UCHAR_LEAD_CANONICAL_COMBINING_CLASS:
|
||||
case UCHAR_TRAIL_CANONICAL_COMBINING_CLASS:
|
||||
return UPROPS_SRC_NFC;
|
||||
case UCHAR_NFKD_QUICK_CHECK:
|
||||
case UCHAR_NFKC_QUICK_CHECK:
|
||||
return UPROPS_SRC_NFKC;
|
||||
|
||||
case UCHAR_BIDI_CLASS:
|
||||
case UCHAR_JOINING_GROUP:
|
||||
case UCHAR_JOINING_TYPE:
|
||||
return UPROPS_SRC_BIDI;
|
||||
|
||||
default:
|
||||
const IntProperty &prop=intProps[which-UCHAR_INT_START];
|
||||
if(prop.mask!=0) {
|
||||
return UPROPS_SRC_PROPSVEC;
|
||||
} else {
|
||||
return (UPropertySource)prop.column;
|
||||
}
|
||||
} else if(which<UCHAR_STRING_START) {
|
||||
switch(which) {
|
||||
|
@ -582,7 +548,12 @@ uprops_getSource(UProperty which) {
|
|||
return UPROPS_SRC_NONE;
|
||||
}
|
||||
} else {
|
||||
return UPROPS_SRC_NONE; /* undefined */
|
||||
switch(which) {
|
||||
case UCHAR_SCRIPT_EXTENSIONS:
|
||||
return UPROPS_SRC_PROPSVEC;
|
||||
default:
|
||||
return UPROPS_SRC_NONE; /* undefined */
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -604,7 +575,7 @@ u_getFC_NFKC_Closure(UChar32 c, UChar *dest, int32_t destCapacity, UErrorCode *p
|
|||
// case folding and NFKC.)
|
||||
// For the derivation, see Unicode's DerivedNormalizationProps.txt.
|
||||
const Normalizer2 *nfkc=Normalizer2Factory::getNFKCInstance(*pErrorCode);
|
||||
const UCaseProps *csp=ucase_getSingleton(pErrorCode);
|
||||
const UCaseProps *csp=ucase_getSingleton();
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
return 0;
|
||||
}
|
||||
|
@ -637,114 +608,3 @@ u_getFC_NFKC_Closure(UChar32 c, UChar *dest, int32_t destCapacity, UErrorCode *p
|
|||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
/*----------------------------------------------------------------
|
||||
* Inclusions list
|
||||
*----------------------------------------------------------------*/
|
||||
|
||||
/*
|
||||
* Return a set of characters for property enumeration.
|
||||
* The set implicitly contains 0x110000 as well, which is one more than the highest
|
||||
* Unicode code point.
|
||||
*
|
||||
* This set is used as an ordered list - its code points are ordered, and
|
||||
* consecutive code points (in Unicode code point order) in the set define a range.
|
||||
* For each two consecutive characters (start, limit) in the set,
|
||||
* all of the UCD/normalization and related properties for
|
||||
* all code points start..limit-1 are all the same,
|
||||
* except for character names and ISO comments.
|
||||
*
|
||||
* All Unicode code points U+0000..U+10ffff are covered by these ranges.
|
||||
* The ranges define a partition of the Unicode code space.
|
||||
* ICU uses the inclusions set to enumerate properties for generating
|
||||
* UnicodeSets containing all code points that have a certain property value.
|
||||
*
|
||||
* The Inclusion List is generated from the UCD. It is generated
|
||||
* by enumerating the data tries, and code points for hardcoded properties
|
||||
* are added as well.
|
||||
*
|
||||
* --------------------------------------------------------------------------
|
||||
*
|
||||
* The following are ideas for getting properties-unique code point ranges,
|
||||
* with possible optimizations beyond the current implementation.
|
||||
* These optimizations would require more code and be more fragile.
|
||||
* The current implementation generates one single list (set) for all properties.
|
||||
*
|
||||
* To enumerate properties efficiently, one needs to know ranges of
|
||||
* repetitive values, so that the value of only each start code point
|
||||
* can be applied to the whole range.
|
||||
* This information is in principle available in the uprops.icu/unorm.icu data.
|
||||
*
|
||||
* There are two obstacles:
|
||||
*
|
||||
* 1. Some properties are computed from multiple data structures,
|
||||
* making it necessary to get repetitive ranges by intersecting
|
||||
* ranges from multiple tries.
|
||||
*
|
||||
* 2. It is not economical to write code for getting repetitive ranges
|
||||
* that are precise for each of some 50 properties.
|
||||
*
|
||||
* Compromise ideas:
|
||||
*
|
||||
* - Get ranges per trie, not per individual property.
|
||||
* Each range contains the same values for a whole group of properties.
|
||||
* This would generate currently five range sets, two for uprops.icu tries
|
||||
* and three for unorm.icu tries.
|
||||
*
|
||||
* - Combine sets of ranges for multiple tries to get sufficient sets
|
||||
* for properties, e.g., the uprops.icu main and auxiliary tries
|
||||
* for all non-normalization properties.
|
||||
*
|
||||
* Ideas for representing ranges and combining them:
|
||||
*
|
||||
* - A UnicodeSet could hold just the start code points of ranges.
|
||||
* Multiple sets are easily combined by or-ing them together.
|
||||
*
|
||||
* - Alternatively, a UnicodeSet could hold each even-numbered range.
|
||||
* All ranges could be enumerated by using each start code point
|
||||
* (for the even-numbered ranges) as well as each limit (end+1) code point
|
||||
* (for the odd-numbered ranges).
|
||||
* It should be possible to combine two such sets by xor-ing them,
|
||||
* but no more than two.
|
||||
*
|
||||
* The second way to represent ranges may(?!) yield smaller UnicodeSet arrays,
|
||||
* but the first one is certainly simpler and applicable for combining more than
|
||||
* two range sets.
|
||||
*
|
||||
* It is possible to combine all range sets for all uprops/unorm tries into one
|
||||
* set that can be used for all properties.
|
||||
* As an optimization, there could be less-combined range sets for certain
|
||||
* groups of properties.
|
||||
* The relationship of which less-combined range set to use for which property
|
||||
* depends on the implementation of the properties and must be hardcoded
|
||||
* - somewhat error-prone and higher maintenance but can be tested easily
|
||||
* by building property sets "the simple way" in test code.
|
||||
*
|
||||
* ---
|
||||
*
|
||||
* Do not use a UnicodeSet pattern because that causes infinite recursion;
|
||||
* UnicodeSet depends on the inclusions set.
|
||||
*
|
||||
* ---
|
||||
*
|
||||
* uprv_getInclusions() is commented out starting 2004-sep-13 because
|
||||
* uniset_props.cpp now calls the uxyz_addPropertyStarts() directly,
|
||||
* and only for the relevant property source.
|
||||
*/
|
||||
#if 0
|
||||
|
||||
U_CAPI void U_EXPORT2
|
||||
uprv_getInclusions(const USetAdder *sa, UErrorCode *pErrorCode) {
|
||||
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
|
||||
return;
|
||||
}
|
||||
|
||||
#if !UCONFIG_NO_NORMALIZATION
|
||||
unorm_addPropertyStarts(sa, pErrorCode);
|
||||
#endif
|
||||
uchar_addPropertyStarts(sa, pErrorCode);
|
||||
ucase_addPropertyStarts(ucase_getSingleton(pErrorCode), sa, pErrorCode);
|
||||
ubidi_addPropertyStarts(ubidi_getSingleton(pErrorCode), sa, pErrorCode);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
@ -35,7 +35,13 @@ enum {
|
|||
UPROPS_ADDITIONAL_VECTORS_INDEX,
|
||||
UPROPS_ADDITIONAL_VECTORS_COLUMNS_INDEX,
|
||||
|
||||
UPROPS_RESERVED_INDEX, /* 6 */
|
||||
UPROPS_SCRIPT_EXTENSIONS_INDEX,
|
||||
|
||||
UPROPS_RESERVED_INDEX_7,
|
||||
UPROPS_RESERVED_INDEX_8,
|
||||
|
||||
/* size of the data file (number of 32-bit units after the header) */
|
||||
UPROPS_DATA_TOP_INDEX,
|
||||
|
||||
/* maximum values for code values in vector word 0 */
|
||||
UPROPS_MAX_VALUES_INDEX=10,
|
||||
|
@ -83,16 +89,25 @@ enum {
|
|||
* Properties in vector word 0
|
||||
* Bits
|
||||
* 31..24 DerivedAge version major/minor one nibble each
|
||||
* 23..20 reserved
|
||||
* 23..22 3..1: Bits 7..0 = Script_Extensions index
|
||||
* 3: Script value from Script_Extensions
|
||||
* 2: Script=Inherited
|
||||
* 1: Script=Common
|
||||
* 0: Script=bits 7..0
|
||||
* 21..20 reserved
|
||||
* 19..17 East Asian Width
|
||||
* 16.. 8 UBlockCode
|
||||
* 7.. 0 UScriptCode
|
||||
* 7.. 0 UScriptCode, or index to Script_Extensions
|
||||
*/
|
||||
|
||||
/* derived age: one nibble each for major and minor version numbers */
|
||||
#define UPROPS_AGE_MASK 0xff000000
|
||||
#define UPROPS_AGE_SHIFT 24
|
||||
|
||||
/* Script_Extensions: mask includes Script */
|
||||
#define UPROPS_SCRIPT_X_MASK 0x00c000ff
|
||||
#define UPROPS_SCRIPT_X_SHIFT 22
|
||||
|
||||
#define UPROPS_EA_MASK 0x000e0000
|
||||
#define UPROPS_EA_SHIFT 17
|
||||
|
||||
|
@ -101,6 +116,11 @@ enum {
|
|||
|
||||
#define UPROPS_SCRIPT_MASK 0x000000ff
|
||||
|
||||
/* UPROPS_SCRIPT_X_WITH_COMMON must be the lowest value that involves Script_Extensions. */
|
||||
#define UPROPS_SCRIPT_X_WITH_COMMON 0x400000
|
||||
#define UPROPS_SCRIPT_X_WITH_INHERITED 0x800000
|
||||
#define UPROPS_SCRIPT_X_WITH_OTHER 0xc00000
|
||||
|
||||
/*
|
||||
* Properties in vector word 1
|
||||
* Each bit encodes one binary property.
|
||||
|
@ -160,7 +180,6 @@ enum {
|
|||
*/
|
||||
#define UPROPS_LB_MASK 0x03f00000
|
||||
#define UPROPS_LB_SHIFT 20
|
||||
#define UPROPS_LB_VWORD 2
|
||||
|
||||
#define UPROPS_SB_MASK 0x000f8000
|
||||
#define UPROPS_SB_SHIFT 15
|
||||
|
|
|
@ -316,17 +316,10 @@ _shapeToArabicDigitsWithContext(UChar *s, int32_t length,
|
|||
UChar digitBase,
|
||||
UBool isLogical, UBool lastStrongWasAL) {
|
||||
const UBiDiProps *bdp;
|
||||
UErrorCode errorCode;
|
||||
|
||||
int32_t i;
|
||||
UChar c;
|
||||
|
||||
errorCode=U_ZERO_ERROR;
|
||||
bdp=ubidi_getSingleton(&errorCode);
|
||||
if(U_FAILURE(errorCode)) {
|
||||
return;
|
||||
}
|
||||
|
||||
bdp=ubidi_getSingleton();
|
||||
digitBase-=0x30;
|
||||
|
||||
/* the iteration direction depends on the type of input */
|
||||
|
|
|
@ -348,17 +348,13 @@ usprep_getProfile(const char* path,
|
|||
if(!loadData(newProfile.getAlias(), path, name, _SPREP_DATA_TYPE, status) || U_FAILURE(*status) ){
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
/* get the options */
|
||||
newProfile->doNFKC = (UBool)((newProfile->indexes[_SPREP_OPTIONS] & _SPREP_NORMALIZATION_ON) > 0);
|
||||
newProfile->checkBiDi = (UBool)((newProfile->indexes[_SPREP_OPTIONS] & _SPREP_CHECK_BIDI_ON) > 0);
|
||||
|
||||
if(newProfile->checkBiDi) {
|
||||
newProfile->bdp = ubidi_getSingleton(status);
|
||||
if(U_FAILURE(*status)) {
|
||||
usprep_unload(newProfile.getAlias());
|
||||
return NULL;
|
||||
}
|
||||
newProfile->bdp = ubidi_getSingleton();
|
||||
}
|
||||
|
||||
LocalMemory<UStringPrepKey> key;
|
||||
|
|
|
@ -191,10 +191,7 @@ setTempCaseMapLocale(UCaseMap *csm, const char *locale, UErrorCode *pErrorCode)
|
|||
static U_INLINE void
|
||||
setTempCaseMap(UCaseMap *csm, const char *locale, UErrorCode *pErrorCode) {
|
||||
if(csm->csp==NULL) {
|
||||
csm->csp=ucase_getSingleton(pErrorCode);
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
return;
|
||||
}
|
||||
csm->csp=ucase_getSingleton();
|
||||
}
|
||||
if(locale!=NULL && locale[0]==0) {
|
||||
csm->locale[0]=0;
|
||||
|
@ -622,7 +619,7 @@ u_strFoldCase(UChar *dest, int32_t destCapacity,
|
|||
uint32_t options,
|
||||
UErrorCode *pErrorCode) {
|
||||
UCaseMap csm={ NULL };
|
||||
csm.csp=ucase_getSingleton(pErrorCode);
|
||||
csm.csp=ucase_getSingleton();
|
||||
csm.options=options;
|
||||
return caseMap(&csm,
|
||||
dest, destCapacity,
|
||||
|
@ -680,7 +677,7 @@ u_strcmpFold(const UChar *s1, int32_t length1,
|
|||
* assume that at least the option U_COMPARE_IGNORE_CASE is set
|
||||
* otherwise this function would have to behave exactly as uprv_strCompare()
|
||||
*/
|
||||
csp=ucase_getSingleton(pErrorCode);
|
||||
csp=ucase_getSingleton();
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -583,21 +583,18 @@ utext_caseCompare(UText *s1, int32_t length1,
|
|||
/* current code points */
|
||||
UChar32 c1, c2;
|
||||
uint8_t cLength1, cLength2;
|
||||
|
||||
|
||||
/* argument checking */
|
||||
if(pErrorCode==0 || U_FAILURE(*pErrorCode)) {
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
return 0;
|
||||
}
|
||||
if(s1==NULL || s2==NULL) {
|
||||
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return 0;
|
||||
}
|
||||
|
||||
csp=ucase_getSingleton(pErrorCode);
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
csp=ucase_getSingleton();
|
||||
|
||||
/* for variable-length strings */
|
||||
if(length1 < 0) {
|
||||
length1 = INT32_MIN;
|
||||
|
@ -709,21 +706,18 @@ utext_caseCompareNativeLimit(UText *s1, int64_t limit1,
|
|||
|
||||
/* native indexes into s1 and s2 */
|
||||
int64_t index1, index2;
|
||||
|
||||
|
||||
/* argument checking */
|
||||
if(pErrorCode==0 || U_FAILURE(*pErrorCode)) {
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
return 0;
|
||||
}
|
||||
if(s1==NULL || s2==NULL) {
|
||||
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return 0;
|
||||
}
|
||||
|
||||
csp=ucase_getSingleton(pErrorCode);
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
csp=ucase_getSingleton();
|
||||
|
||||
/* initialize */
|
||||
index1 = (limit1 >= 0 ? UTEXT_GETNATIVEINDEX(s1) : 0);
|
||||
index2 = (limit2 >= 0 ? UTEXT_GETNATIVEINDEX(s2) : 0);
|
||||
|
|
|
@ -482,6 +482,9 @@ utrie2_swap(const UDataSwapper *ds,
|
|||
return size;
|
||||
}
|
||||
|
||||
// utrie2_swapAnyVersion() should be defined here but lives in utrie2_builder.c
|
||||
// to avoid a dependency from utrie2.cpp on utrie.c.
|
||||
|
||||
/* enumeration -------------------------------------------------------------- */
|
||||
|
||||
#define MIN(a, b) ((a)<(b) ? (a) : (b))
|
||||
|
|
|
@ -349,6 +349,15 @@ utrie2_swap(const UDataSwapper *ds,
|
|||
const void *inData, int32_t length, void *outData,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* Swap a serialized UTrie or UTrie2.
|
||||
* @internal
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
utrie2_swapAnyVersion(const UDataSwapper *ds,
|
||||
const void *inData, int32_t length, void *outData,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* Build a UTrie2 (version 2) from a UTrie (version 1).
|
||||
* Enumerates all values in the UTrie and builds a UTrie2 with the same values.
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
/*
|
||||
******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2001-2009, International Business Machines
|
||||
* Copyright (C) 2001-2010, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
******************************************************************************
|
||||
|
@ -31,7 +31,7 @@
|
|||
#include "utrie2.h"
|
||||
#include "utrie2_impl.h"
|
||||
|
||||
#include "utrie.h" /* for utrie2_fromUTrie() */
|
||||
#include "utrie.h" /* for utrie2_fromUTrie() and utrie_swap() */
|
||||
|
||||
#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
|
||||
|
||||
|
@ -1445,3 +1445,25 @@ utrie2_serialize(UTrie2 *trie,
|
|||
}
|
||||
return trie->length;
|
||||
}
|
||||
|
||||
/*
|
||||
* This is here to avoid a dependency from utrie2.cpp on utrie.c.
|
||||
* This file already depends on utrie.c.
|
||||
* Otherwise, this should be in utrie2.cpp right after utrie2_swap().
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
utrie2_swapAnyVersion(const UDataSwapper *ds,
|
||||
const void *inData, int32_t length, void *outData,
|
||||
UErrorCode *pErrorCode) {
|
||||
if(U_SUCCESS(*pErrorCode)) {
|
||||
switch(utrie2_getVersion(inData, length, TRUE)) {
|
||||
case 1:
|
||||
return utrie_swap(ds, inData, length, outData, pErrorCode);
|
||||
case 2:
|
||||
return utrie2_swap(ds, inData, length, outData, pErrorCode);
|
||||
default:
|
||||
*pErrorCode=U_INVALID_FORMAT_ERROR;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -654,6 +654,14 @@ UTS46::mapDevChars(UnicodeString &dest, int32_t labelStart, int32_t mappingStart
|
|||
return length;
|
||||
}
|
||||
|
||||
// Some non-ASCII characters are equivalent to sequences with
|
||||
// non-LDH ASCII characters. To find them:
|
||||
// grep disallowed_STD3_valid IdnaMappingTable.txt (or uts46.txt)
|
||||
static inline UBool
|
||||
isNonASCIIDisallowedSTD3Valid(UChar32 c) {
|
||||
return c==0x2260 || c==0x226E || c==0x226F;
|
||||
}
|
||||
|
||||
// Replace the label in dest with the label string, if the label was modified.
|
||||
// If &label==&dest then the label was modified in-place and labelLength
|
||||
// is the new label length, different from label.length().
|
||||
|
@ -778,9 +786,11 @@ UTS46::processLabel(UnicodeString &dest,
|
|||
}
|
||||
} else {
|
||||
oredChars|=c;
|
||||
if(c==0xfffd) {
|
||||
if(disallowNonLDHDot && isNonASCIIDisallowedSTD3Valid(c)) {
|
||||
info.labelErrors|=UIDNA_ERROR_DISALLOWED;
|
||||
*s=0xfffd;
|
||||
} else if(c==0xfffd) {
|
||||
info.labelErrors|=UIDNA_ERROR_DISALLOWED;
|
||||
++s;
|
||||
}
|
||||
}
|
||||
++s;
|
||||
|
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -13,7 +13,7 @@
|
|||
U_ICUDATA_NAME=icudt45
|
||||
##############################################################################
|
||||
U_ICUDATA_ENDIAN_SUFFIX=l
|
||||
UNICODE_VERSION=5.2
|
||||
UNICODE_VERSION=6.0
|
||||
ICU_LIB_TARGET=$(DLL_OUTPUT)\$(U_ICUDATA_NAME).dll
|
||||
|
||||
# ICUMAKE
|
||||
|
|
|
@ -1,18 +1,19 @@
|
|||
# BidiMirroring-5.2.0.txt
|
||||
# Date: 2009-05-22, 12:44:00 PDT [KW]
|
||||
# BidiMirroring-6.0.0.txt
|
||||
# Date: 2010-06-21, 12:09:00 PDT [KW]
|
||||
#
|
||||
# Bidi_Mirroring_Glyph Property
|
||||
#
|
||||
# This file is an informative contributory data file in the
|
||||
# Unicode Character Database.
|
||||
#
|
||||
# Copyright (c) 1991-2009 Unicode, Inc.
|
||||
# Copyright (c) 1991-2010 Unicode, Inc.
|
||||
# For terms of use, see http://www.unicode.org/terms_of_use.html
|
||||
#
|
||||
# This data file lists characters that have the mirrored property
|
||||
# where there is another Unicode character that typically has a glyph
|
||||
# This data file lists characters that have the Bidi_Mirrored=True property
|
||||
# value, for which there is another Unicode character that typically has a glyph
|
||||
# that is the mirror image of the original character's glyph.
|
||||
# The repertoire covered by the file is Unicode 5.2.0.
|
||||
#
|
||||
# The repertoire covered by the file is Unicode 6.0.0.
|
||||
#
|
||||
# The file contains a list of lines with mappings from one code point
|
||||
# to another one for character-based mirroring.
|
||||
|
@ -25,14 +26,22 @@
|
|||
# variable-length hexadecimal value with 4 to 6 digits.
|
||||
# A comment indicates where the characters are "BEST FIT" mirroring.
|
||||
#
|
||||
# Code points with the "mirrored" property but no appropriate mirrors are
|
||||
# Code points for which Bidi_Mirrored=True, but for which no appropriate
|
||||
# characters exist with mirrored glyphs, are
|
||||
# listed as comments at the end of the file.
|
||||
#
|
||||
# Formally, the default value of the Bidi_Mirroring_Glyph property
|
||||
# for each code point is the code point itself, unless a mapping to
|
||||
# some other character is specified in this data file. When a code
|
||||
# point has the default value for the Bidi_Mirroring_Glyph property,
|
||||
# that means that no other character exists whose glyph is suitable
|
||||
# for character-based mirroring.
|
||||
#
|
||||
# For information on bidi mirroring, see UAX #9: Bidirectional Algorithm,
|
||||
# at http://www.unicode.org/unicode/reports/tr9/
|
||||
#
|
||||
# This file was originally created by Markus Scherer.
|
||||
# Extended for Unicode 3.2, 4.0, 4.1, 5.0, 5.1, and 5.2 by Ken Whistler.
|
||||
# Extended for Unicode 3.2, 4.0, 4.1, 5.0, 5.1, 5.2, and 6.0 by Ken Whistler.
|
||||
#
|
||||
# ############################################################
|
||||
|
||||
|
@ -464,8 +473,8 @@ FF63; FF62 # [BEST FIT] HALFWIDTH RIGHT CORNER BRACKET
|
|||
# 22FF; Z NOTATION BAG MEMBERSHIP
|
||||
# 2320; TOP HALF INTEGRAL
|
||||
# 2321; BOTTOM HALF INTEGRAL
|
||||
# 27CC; LONG DIVISION
|
||||
# 27C0; THREE DIMENSIONAL ANGLE
|
||||
# 27CC; LONG DIVISION
|
||||
# 27D3; LOWER RIGHT CORNER WITH DOT
|
||||
# 27D4; UPPER LEFT CORNER WITH DOT
|
||||
# 27DC; LEFT MULTIMAP
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
# Blocks-5.2.0.txt
|
||||
# Date: 2009-05-19, 16:21:00 PDT [KW]
|
||||
# Blocks-6.0.0.txt
|
||||
# Date: 2010-06-04, 11:12:00 PDT [KW]
|
||||
#
|
||||
# Unicode Character Database
|
||||
# Copyright (c) 1991-2009 Unicode, Inc.
|
||||
# Copyright (c) 1991-2010 Unicode, Inc.
|
||||
# For terms of use, see http://www.unicode.org/terms_of_use.html
|
||||
# For documentation, see http://www.unicode.org/reports/tr44/
|
||||
#
|
||||
|
@ -45,6 +45,7 @@
|
|||
0780..07BF; Thaana
|
||||
07C0..07FF; NKo
|
||||
0800..083F; Samaritan
|
||||
0840..085F; Mandaic
|
||||
0900..097F; Devanagari
|
||||
0980..09FF; Bengali
|
||||
0A00..0A7F; Gurmukhi
|
||||
|
@ -82,6 +83,7 @@
|
|||
1A20..1AAF; Tai Tham
|
||||
1B00..1B7F; Balinese
|
||||
1B80..1BBF; Sundanese
|
||||
1BC0..1BFF; Batak
|
||||
1C00..1C4F; Lepcha
|
||||
1C50..1C7F; Ol Chiki
|
||||
1CD0..1CFF; Vedic Extensions
|
||||
|
@ -159,6 +161,7 @@ A980..A9DF; Javanese
|
|||
AA00..AA5F; Cham
|
||||
AA60..AA7F; Myanmar Extended-A
|
||||
AA80..AADF; Tai Viet
|
||||
AB00..AB2F; Ethiopic Extended-A
|
||||
ABC0..ABFF; Meetei Mayek
|
||||
AC00..D7AF; Hangul Syllables
|
||||
D7B0..D7FF; Hangul Jamo Extended-B
|
||||
|
@ -203,10 +206,13 @@ FFF0..FFFF; Specials
|
|||
10B60..10B7F; Inscriptional Pahlavi
|
||||
10C00..10C4F; Old Turkic
|
||||
10E60..10E7F; Rumi Numeral Symbols
|
||||
11000..1107F; Brahmi
|
||||
11080..110CF; Kaithi
|
||||
12000..123FF; Cuneiform
|
||||
12400..1247F; Cuneiform Numbers and Punctuation
|
||||
13000..1342F; Egyptian Hieroglyphs
|
||||
16800..16A3F; Bamum Supplement
|
||||
1B000..1B0FF; Kana Supplement
|
||||
1D000..1D0FF; Byzantine Musical Symbols
|
||||
1D100..1D1FF; Musical Symbols
|
||||
1D200..1D24F; Ancient Greek Musical Notation
|
||||
|
@ -215,10 +221,16 @@ FFF0..FFFF; Specials
|
|||
1D400..1D7FF; Mathematical Alphanumeric Symbols
|
||||
1F000..1F02F; Mahjong Tiles
|
||||
1F030..1F09F; Domino Tiles
|
||||
1F0A0..1F0FF; Playing Cards
|
||||
1F100..1F1FF; Enclosed Alphanumeric Supplement
|
||||
1F200..1F2FF; Enclosed Ideographic Supplement
|
||||
1F300..1F5FF; Miscellaneous Symbols And Pictographs
|
||||
1F600..1F64F; Emoticons
|
||||
1F680..1F6FF; Transport And Map Symbols
|
||||
1F700..1F77F; Alchemical Symbols
|
||||
20000..2A6DF; CJK Unified Ideographs Extension B
|
||||
2A700..2B73F; CJK Unified Ideographs Extension C
|
||||
2B740..2B81F; CJK Unified Ideographs Extension D
|
||||
2F800..2FA1F; CJK Compatibility Ideographs Supplement
|
||||
E0000..E007F; Tags
|
||||
E0100..E01EF; Variation Selectors Supplement
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
# CaseFolding-5.2.0.txt
|
||||
# Date: 2009-05-28, 23:02:34 GMT [MD]
|
||||
# CaseFolding-6.0.0.txt
|
||||
# Date: 2010-05-18, 00:48:57 GMT [MD]
|
||||
#
|
||||
# Unicode Character Database
|
||||
# Copyright (c) 1991-2009 Unicode, Inc.
|
||||
# Copyright (c) 1991-2010 Unicode, Inc.
|
||||
# For terms of use, see http://www.unicode.org/terms_of_use.html
|
||||
# For documentation, see http://www.unicode.org/reports/tr44/
|
||||
#
|
||||
|
@ -496,6 +496,7 @@
|
|||
0520; C; 0521; # CYRILLIC CAPITAL LETTER EL WITH MIDDLE HOOK
|
||||
0522; C; 0523; # CYRILLIC CAPITAL LETTER EN WITH MIDDLE HOOK
|
||||
0524; C; 0525; # CYRILLIC CAPITAL LETTER PE WITH DESCENDER
|
||||
0526; C; 0527; # CYRILLIC CAPITAL LETTER SHHA WITH DESCENDER
|
||||
0531; C; 0561; # ARMENIAN CAPITAL LETTER AYB
|
||||
0532; C; 0562; # ARMENIAN CAPITAL LETTER BEN
|
||||
0533; C; 0563; # ARMENIAN CAPITAL LETTER GIM
|
||||
|
@ -1057,6 +1058,7 @@ A658; C; A659; # CYRILLIC CAPITAL LETTER CLOSED LITTLE YUS
|
|||
A65A; C; A65B; # CYRILLIC CAPITAL LETTER BLENDED YUS
|
||||
A65C; C; A65D; # CYRILLIC CAPITAL LETTER IOTIFIED CLOSED LITTLE YUS
|
||||
A65E; C; A65F; # CYRILLIC CAPITAL LETTER YN
|
||||
A660; C; A661; # CYRILLIC CAPITAL LETTER REVERSED TSE
|
||||
A662; C; A663; # CYRILLIC CAPITAL LETTER SOFT DE
|
||||
A664; C; A665; # CYRILLIC CAPITAL LETTER SOFT EL
|
||||
A666; C; A667; # CYRILLIC CAPITAL LETTER SOFT EM
|
||||
|
@ -1122,6 +1124,13 @@ A782; C; A783; # LATIN CAPITAL LETTER INSULAR R
|
|||
A784; C; A785; # LATIN CAPITAL LETTER INSULAR S
|
||||
A786; C; A787; # LATIN CAPITAL LETTER INSULAR T
|
||||
A78B; C; A78C; # LATIN CAPITAL LETTER SALTILLO
|
||||
A78D; C; 0265; # LATIN CAPITAL LETTER TURNED H
|
||||
A790; C; A791; # LATIN CAPITAL LETTER N WITH DESCENDER
|
||||
A7A0; C; A7A1; # LATIN CAPITAL LETTER G WITH OBLIQUE STROKE
|
||||
A7A2; C; A7A3; # LATIN CAPITAL LETTER K WITH OBLIQUE STROKE
|
||||
A7A4; C; A7A5; # LATIN CAPITAL LETTER N WITH OBLIQUE STROKE
|
||||
A7A6; C; A7A7; # LATIN CAPITAL LETTER R WITH OBLIQUE STROKE
|
||||
A7A8; C; A7A9; # LATIN CAPITAL LETTER S WITH OBLIQUE STROKE
|
||||
FB00; F; 0066 0066; # LATIN SMALL LIGATURE FF
|
||||
FB01; F; 0066 0069; # LATIN SMALL LIGATURE FI
|
||||
FB02; F; 0066 006C; # LATIN SMALL LIGATURE FL
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
# DerivedAge-5.2.0.txt
|
||||
# Date: 2009-09-17, 22:52:52 GMT [MD]
|
||||
# DerivedAge-6.0.0.txt
|
||||
# Date: 2010-08-19, 00:47:58 GMT [MD]
|
||||
#
|
||||
# Unicode Character Database
|
||||
# Copyright (c) 1991-2009 Unicode, Inc.
|
||||
# Copyright (c) 1991-2010 Unicode, Inc.
|
||||
# For terms of use, see http://www.unicode.org/terms_of_use.html
|
||||
# For documentation, see http://www.unicode.org/reports/tr44/
|
||||
#
|
||||
|
@ -1061,4 +1061,117 @@ FA6B..FA6D ; 5.2 # [3] CJK COMPATIBILITY IDEOGRAPH-FA6B..CJK COMPATIBILITY
|
|||
|
||||
# Total code points: 6648
|
||||
|
||||
# ================================================
|
||||
|
||||
# Newly assigned in Unicode 6.0.0 (Scheduled September, 2010)
|
||||
|
||||
0526..0527 ; 6.0 # [2] CYRILLIC CAPITAL LETTER SHHA WITH DESCENDER..CYRILLIC SMALL LETTER SHHA WITH DESCENDER
|
||||
0620 ; 6.0 # ARABIC LETTER KASHMIRI YEH
|
||||
065F ; 6.0 # ARABIC WAVY HAMZA BELOW
|
||||
0840..085B ; 6.0 # [28] MANDAIC LETTER HALQA..MANDAIC GEMINATION MARK
|
||||
085E ; 6.0 # MANDAIC PUNCTUATION
|
||||
093A..093B ; 6.0 # [2] DEVANAGARI VOWEL SIGN OE..DEVANAGARI VOWEL SIGN OOE
|
||||
094F ; 6.0 # DEVANAGARI VOWEL SIGN AW
|
||||
0956..0957 ; 6.0 # [2] DEVANAGARI VOWEL SIGN UE..DEVANAGARI VOWEL SIGN UUE
|
||||
0973..0977 ; 6.0 # [5] DEVANAGARI LETTER OE..DEVANAGARI LETTER UUE
|
||||
0B72..0B77 ; 6.0 # [6] ORIYA FRACTION ONE QUARTER..ORIYA FRACTION THREE SIXTEENTHS
|
||||
0D29 ; 6.0 # MALAYALAM LETTER NNNA
|
||||
0D3A ; 6.0 # MALAYALAM LETTER TTTA
|
||||
0D4E ; 6.0 # MALAYALAM LETTER DOT REPH
|
||||
0F8C..0F8F ; 6.0 # [4] TIBETAN SIGN INVERTED MCHU CAN..TIBETAN SUBJOINED SIGN INVERTED MCHU CAN
|
||||
0FD9..0FDA ; 6.0 # [2] TIBETAN MARK LEADING MCHAN RTAGS..TIBETAN MARK TRAILING MCHAN RTAGS
|
||||
135D..135E ; 6.0 # [2] ETHIOPIC COMBINING GEMINATION AND VOWEL LENGTH MARK..ETHIOPIC COMBINING VOWEL LENGTH MARK
|
||||
1BC0..1BF3 ; 6.0 # [52] BATAK LETTER A..BATAK PANONGONAN
|
||||
1BFC..1BFF ; 6.0 # [4] BATAK SYMBOL BINDU NA METEK..BATAK SYMBOL BINDU PANGOLAT
|
||||
1DFC ; 6.0 # COMBINING DOUBLE INVERTED BREVE BELOW
|
||||
2095..209C ; 6.0 # [8] LATIN SUBSCRIPT SMALL LETTER H..LATIN SUBSCRIPT SMALL LETTER T
|
||||
20B9 ; 6.0 # INDIAN RUPEE SIGN
|
||||
23E9..23F3 ; 6.0 # [11] BLACK RIGHT-POINTING DOUBLE TRIANGLE..HOURGLASS WITH FLOWING SAND
|
||||
26CE ; 6.0 # OPHIUCHUS
|
||||
26E2 ; 6.0 # ASTRONOMICAL SYMBOL FOR URANUS
|
||||
26E4..26E7 ; 6.0 # [4] PENTAGRAM..INVERTED PENTAGRAM
|
||||
2705 ; 6.0 # WHITE HEAVY CHECK MARK
|
||||
270A..270B ; 6.0 # [2] RAISED FIST..RAISED HAND
|
||||
2728 ; 6.0 # SPARKLES
|
||||
274C ; 6.0 # CROSS MARK
|
||||
274E ; 6.0 # NEGATIVE SQUARED CROSS MARK
|
||||
2753..2755 ; 6.0 # [3] BLACK QUESTION MARK ORNAMENT..WHITE EXCLAMATION MARK ORNAMENT
|
||||
275F..2760 ; 6.0 # [2] HEAVY LOW SINGLE COMMA QUOTATION MARK ORNAMENT..HEAVY LOW DOUBLE COMMA QUOTATION MARK ORNAMENT
|
||||
2795..2797 ; 6.0 # [3] HEAVY PLUS SIGN..HEAVY DIVISION SIGN
|
||||
27B0 ; 6.0 # CURLY LOOP
|
||||
27BF ; 6.0 # DOUBLE CURLY LOOP
|
||||
27CE..27CF ; 6.0 # [2] SQUARED LOGICAL AND..SQUARED LOGICAL OR
|
||||
2D70 ; 6.0 # TIFINAGH SEPARATOR MARK
|
||||
2D7F ; 6.0 # TIFINAGH CONSONANT JOINER
|
||||
31B8..31BA ; 6.0 # [3] BOPOMOFO LETTER GH..BOPOMOFO LETTER ZY
|
||||
A660..A661 ; 6.0 # [2] CYRILLIC CAPITAL LETTER REVERSED TSE..CYRILLIC SMALL LETTER REVERSED TSE
|
||||
A78D..A78E ; 6.0 # [2] LATIN CAPITAL LETTER TURNED H..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT
|
||||
A790..A791 ; 6.0 # [2] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER N WITH DESCENDER
|
||||
A7A0..A7A9 ; 6.0 # [10] LATIN CAPITAL LETTER G WITH OBLIQUE STROKE..LATIN SMALL LETTER S WITH OBLIQUE STROKE
|
||||
A7FA ; 6.0 # LATIN LETTER SMALL CAPITAL TURNED M
|
||||
AB01..AB06 ; 6.0 # [6] ETHIOPIC SYLLABLE TTHU..ETHIOPIC SYLLABLE TTHO
|
||||
AB09..AB0E ; 6.0 # [6] ETHIOPIC SYLLABLE DDHU..ETHIOPIC SYLLABLE DDHO
|
||||
AB11..AB16 ; 6.0 # [6] ETHIOPIC SYLLABLE DZU..ETHIOPIC SYLLABLE DZO
|
||||
AB20..AB26 ; 6.0 # [7] ETHIOPIC SYLLABLE CCHHA..ETHIOPIC SYLLABLE CCHHO
|
||||
AB28..AB2E ; 6.0 # [7] ETHIOPIC SYLLABLE BBA..ETHIOPIC SYLLABLE BBO
|
||||
FBB2..FBC1 ; 6.0 # [16] ARABIC SYMBOL DOT ABOVE..ARABIC SYMBOL SMALL TAH BELOW
|
||||
11000..1104D ; 6.0 # [78] BRAHMI SIGN CANDRABINDU..BRAHMI PUNCTUATION LOTUS
|
||||
11052..1106F ; 6.0 # [30] BRAHMI NUMBER ONE..BRAHMI DIGIT NINE
|
||||
16800..16A38 ; 6.0 # [569] BAMUM LETTER PHASE-A NGKUE MFON..BAMUM LETTER PHASE-F VUEQ
|
||||
1B000..1B001 ; 6.0 # [2] KATAKANA LETTER ARCHAIC E..HIRAGANA LETTER ARCHAIC YE
|
||||
1F0A0..1F0AE ; 6.0 # [15] PLAYING CARD BACK..PLAYING CARD KING OF SPADES
|
||||
1F0B1..1F0BE ; 6.0 # [14] PLAYING CARD ACE OF HEARTS..PLAYING CARD KING OF HEARTS
|
||||
1F0C1..1F0CF ; 6.0 # [15] PLAYING CARD ACE OF DIAMONDS..PLAYING CARD BLACK JOKER
|
||||
1F0D1..1F0DF ; 6.0 # [15] PLAYING CARD ACE OF CLUBS..PLAYING CARD WHITE JOKER
|
||||
1F130 ; 6.0 # SQUARED LATIN CAPITAL LETTER A
|
||||
1F132..1F13C ; 6.0 # [11] SQUARED LATIN CAPITAL LETTER C..SQUARED LATIN CAPITAL LETTER M
|
||||
1F13E ; 6.0 # SQUARED LATIN CAPITAL LETTER O
|
||||
1F140..1F141 ; 6.0 # [2] SQUARED LATIN CAPITAL LETTER Q..SQUARED LATIN CAPITAL LETTER R
|
||||
1F143..1F145 ; 6.0 # [3] SQUARED LATIN CAPITAL LETTER T..SQUARED LATIN CAPITAL LETTER V
|
||||
1F147..1F149 ; 6.0 # [3] SQUARED LATIN CAPITAL LETTER X..SQUARED LATIN CAPITAL LETTER Z
|
||||
1F14F..1F156 ; 6.0 # [8] SQUARED WC..NEGATIVE CIRCLED LATIN CAPITAL LETTER G
|
||||
1F158..1F15E ; 6.0 # [7] NEGATIVE CIRCLED LATIN CAPITAL LETTER I..NEGATIVE CIRCLED LATIN CAPITAL LETTER O
|
||||
1F160..1F169 ; 6.0 # [10] NEGATIVE CIRCLED LATIN CAPITAL LETTER Q..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z
|
||||
1F170..1F178 ; 6.0 # [9] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER I
|
||||
1F17A ; 6.0 # NEGATIVE SQUARED LATIN CAPITAL LETTER K
|
||||
1F17D..1F17E ; 6.0 # [2] NEGATIVE SQUARED LATIN CAPITAL LETTER N..NEGATIVE SQUARED LATIN CAPITAL LETTER O
|
||||
1F180..1F189 ; 6.0 # [10] NEGATIVE SQUARED LATIN CAPITAL LETTER Q..NEGATIVE SQUARED LATIN CAPITAL LETTER Z
|
||||
1F18E..1F18F ; 6.0 # [2] NEGATIVE SQUARED AB..NEGATIVE SQUARED WC
|
||||
1F191..1F19A ; 6.0 # [10] SQUARED CL..SQUARED VS
|
||||
1F1E6..1F1FF ; 6.0 # [26] REGIONAL INDICATOR SYMBOL LETTER A..REGIONAL INDICATOR SYMBOL LETTER Z
|
||||
1F201..1F202 ; 6.0 # [2] SQUARED KATAKANA KOKO..SQUARED KATAKANA SA
|
||||
1F232..1F23A ; 6.0 # [9] SQUARED CJK UNIFIED IDEOGRAPH-7981..SQUARED CJK UNIFIED IDEOGRAPH-55B6
|
||||
1F250..1F251 ; 6.0 # [2] CIRCLED IDEOGRAPH ADVANTAGE..CIRCLED IDEOGRAPH ACCEPT
|
||||
1F300..1F320 ; 6.0 # [33] CYCLONE..SHOOTING STAR
|
||||
1F330..1F335 ; 6.0 # [6] CHESTNUT..CACTUS
|
||||
1F337..1F37C ; 6.0 # [70] TULIP..BABY BOTTLE
|
||||
1F380..1F393 ; 6.0 # [20] RIBBON..GRADUATION CAP
|
||||
1F3A0..1F3C4 ; 6.0 # [37] CAROUSEL HORSE..SURFER
|
||||
1F3C6..1F3CA ; 6.0 # [5] TROPHY..SWIMMER
|
||||
1F3E0..1F3F0 ; 6.0 # [17] HOUSE BUILDING..EUROPEAN CASTLE
|
||||
1F400..1F43E ; 6.0 # [63] RAT..PAW PRINTS
|
||||
1F440 ; 6.0 # EYES
|
||||
1F442..1F4F7 ; 6.0 # [182] EAR..CAMERA
|
||||
1F4F9..1F4FC ; 6.0 # [4] VIDEO CAMERA..VIDEOCASSETTE
|
||||
1F500..1F53D ; 6.0 # [62] TWISTED RIGHTWARDS ARROWS..DOWN-POINTING SMALL RED TRIANGLE
|
||||
1F550..1F567 ; 6.0 # [24] CLOCK FACE ONE OCLOCK..CLOCK FACE TWELVE-THIRTY
|
||||
1F5FB..1F5FF ; 6.0 # [5] MOUNT FUJI..MOYAI
|
||||
1F601..1F610 ; 6.0 # [16] GRINNING FACE WITH SMILING EYES..NEUTRAL FACE
|
||||
1F612..1F614 ; 6.0 # [3] UNAMUSED FACE..PENSIVE FACE
|
||||
1F616 ; 6.0 # CONFOUNDED FACE
|
||||
1F618 ; 6.0 # FACE THROWING A KISS
|
||||
1F61A ; 6.0 # KISSING FACE WITH CLOSED EYES
|
||||
1F61C..1F61E ; 6.0 # [3] FACE WITH STUCK-OUT TONGUE AND WINKING EYE..DISAPPOINTED FACE
|
||||
1F620..1F625 ; 6.0 # [6] ANGRY FACE..DISAPPOINTED BUT RELIEVED FACE
|
||||
1F628..1F62B ; 6.0 # [4] FEARFUL FACE..TIRED FACE
|
||||
1F62D ; 6.0 # LOUDLY CRYING FACE
|
||||
1F630..1F633 ; 6.0 # [4] FACE WITH OPEN MOUTH AND COLD SWEAT..FLUSHED FACE
|
||||
1F635..1F640 ; 6.0 # [12] DIZZY FACE..WEARY CAT FACE
|
||||
1F645..1F64F ; 6.0 # [11] FACE WITH NO GOOD GESTURE..PERSON WITH FOLDED HANDS
|
||||
1F680..1F6C5 ; 6.0 # [70] ROCKET..LEFT LUGGAGE
|
||||
1F700..1F773 ; 6.0 # [116] ALCHEMICAL SYMBOL FOR QUINTESSENCE..ALCHEMICAL SYMBOL FOR HALF OUNCE
|
||||
2B740..2B81D ; 6.0 # [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D
|
||||
|
||||
# Total code points: 2088
|
||||
|
||||
# EOF
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
# DerivedBidiClass-5.2.0.txt
|
||||
# Date: 2009-08-26, 00:50:45 GMT [MD]
|
||||
# DerivedBidiClass-6.0.0.txt
|
||||
# Date: 2010-08-19, 00:48:03 GMT [MD]
|
||||
#
|
||||
# Unicode Character Database
|
||||
# Copyright (c) 1991-2009 Unicode, Inc.
|
||||
# Copyright (c) 1991-2010 Unicode, Inc.
|
||||
# For terms of use, see http://www.unicode.org/terms_of_use.html
|
||||
# For documentation, see http://www.unicode.org/reports/tr44/
|
||||
|
||||
|
@ -81,7 +81,7 @@
|
|||
03A3..03F5 ; L # L& [83] GREEK CAPITAL LETTER SIGMA..GREEK LUNATE EPSILON SYMBOL
|
||||
03F7..0481 ; L # L& [139] GREEK CAPITAL LETTER SHO..CYRILLIC SMALL LETTER KOPPA
|
||||
0482 ; L # So CYRILLIC THOUSANDS SIGN
|
||||
048A..0525 ; L # L& [156] CYRILLIC CAPITAL LETTER SHORT I WITH TAIL..CYRILLIC SMALL LETTER PE WITH DESCENDER
|
||||
048A..0527 ; L # L& [158] CYRILLIC CAPITAL LETTER SHORT I WITH TAIL..CYRILLIC SMALL LETTER SHHA WITH DESCENDER
|
||||
0531..0556 ; L # L& [38] ARMENIAN CAPITAL LETTER AYB..ARMENIAN CAPITAL LETTER FEH
|
||||
0559 ; L # Lm ARMENIAN MODIFIER LETTER LEFT HALF RING
|
||||
055A..055F ; L # Po [6] ARMENIAN APOSTROPHE..ARMENIAN ABBREVIATION MARK
|
||||
|
@ -89,17 +89,18 @@
|
|||
0589 ; L # Po ARMENIAN FULL STOP
|
||||
0903 ; L # Mc DEVANAGARI SIGN VISARGA
|
||||
0904..0939 ; L # Lo [54] DEVANAGARI LETTER SHORT A..DEVANAGARI LETTER HA
|
||||
093B ; L # Mc DEVANAGARI VOWEL SIGN OOE
|
||||
093D ; L # Lo DEVANAGARI SIGN AVAGRAHA
|
||||
093E..0940 ; L # Mc [3] DEVANAGARI VOWEL SIGN AA..DEVANAGARI VOWEL SIGN II
|
||||
0949..094C ; L # Mc [4] DEVANAGARI VOWEL SIGN CANDRA O..DEVANAGARI VOWEL SIGN AU
|
||||
094E ; L # Mc DEVANAGARI VOWEL SIGN PRISHTHAMATRA E
|
||||
094E..094F ; L # Mc [2] DEVANAGARI VOWEL SIGN PRISHTHAMATRA E..DEVANAGARI VOWEL SIGN AW
|
||||
0950 ; L # Lo DEVANAGARI OM
|
||||
0958..0961 ; L # Lo [10] DEVANAGARI LETTER QA..DEVANAGARI LETTER VOCALIC LL
|
||||
0964..0965 ; L # Po [2] DEVANAGARI DANDA..DEVANAGARI DOUBLE DANDA
|
||||
0966..096F ; L # Nd [10] DEVANAGARI DIGIT ZERO..DEVANAGARI DIGIT NINE
|
||||
0970 ; L # Po DEVANAGARI ABBREVIATION SIGN
|
||||
0971 ; L # Lm DEVANAGARI SIGN HIGH SPACING DOT
|
||||
0972 ; L # Lo DEVANAGARI LETTER CANDRA A
|
||||
0972..0977 ; L # Lo [6] DEVANAGARI LETTER CANDRA A..DEVANAGARI LETTER UUE
|
||||
0979..097F ; L # Lo [7] DEVANAGARI LETTER ZHA..DEVANAGARI LETTER BBA
|
||||
0982..0983 ; L # Mc [2] BENGALI SIGN ANUSVARA..BENGALI SIGN VISARGA
|
||||
0985..098C ; L # Lo [8] BENGALI LETTER A..BENGALI LETTER VOCALIC L
|
||||
|
@ -165,6 +166,7 @@
|
|||
0B66..0B6F ; L # Nd [10] ORIYA DIGIT ZERO..ORIYA DIGIT NINE
|
||||
0B70 ; L # So ORIYA ISSHAR
|
||||
0B71 ; L # Lo ORIYA LETTER WA
|
||||
0B72..0B77 ; L # No [6] ORIYA FRACTION ONE QUARTER..ORIYA FRACTION THREE SIXTEENTHS
|
||||
0B83 ; L # Lo TAMIL SIGN VISARGA
|
||||
0B85..0B8A ; L # Lo [6] TAMIL LETTER A..TAMIL LETTER UU
|
||||
0B8E..0B90 ; L # Lo [3] TAMIL LETTER E..TAMIL LETTER AI
|
||||
|
@ -212,15 +214,16 @@
|
|||
0CDE ; L # Lo KANNADA LETTER FA
|
||||
0CE0..0CE1 ; L # Lo [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL
|
||||
0CE6..0CEF ; L # Nd [10] KANNADA DIGIT ZERO..KANNADA DIGIT NINE
|
||||
0CF1..0CF2 ; L # Lo [2] KANNADA SIGN JIHVAMULIYA..KANNADA SIGN UPADHMANIYA
|
||||
0D02..0D03 ; L # Mc [2] MALAYALAM SIGN ANUSVARA..MALAYALAM SIGN VISARGA
|
||||
0D05..0D0C ; L # Lo [8] MALAYALAM LETTER A..MALAYALAM LETTER VOCALIC L
|
||||
0D0E..0D10 ; L # Lo [3] MALAYALAM LETTER E..MALAYALAM LETTER AI
|
||||
0D12..0D28 ; L # Lo [23] MALAYALAM LETTER O..MALAYALAM LETTER NA
|
||||
0D2A..0D39 ; L # Lo [16] MALAYALAM LETTER PA..MALAYALAM LETTER HA
|
||||
0D12..0D3A ; L # Lo [41] MALAYALAM LETTER O..MALAYALAM LETTER TTTA
|
||||
0D3D ; L # Lo MALAYALAM SIGN AVAGRAHA
|
||||
0D3E..0D40 ; L # Mc [3] MALAYALAM VOWEL SIGN AA..MALAYALAM VOWEL SIGN II
|
||||
0D46..0D48 ; L # Mc [3] MALAYALAM VOWEL SIGN E..MALAYALAM VOWEL SIGN AI
|
||||
0D4A..0D4C ; L # Mc [3] MALAYALAM VOWEL SIGN O..MALAYALAM VOWEL SIGN AU
|
||||
0D4E ; L # Lo MALAYALAM LETTER DOT REPH
|
||||
0D57 ; L # Mc MALAYALAM AU LENGTH MARK
|
||||
0D60..0D61 ; L # Lo [2] MALAYALAM LETTER VOCALIC RR..MALAYALAM LETTER VOCALIC LL
|
||||
0D66..0D6F ; L # Nd [10] MALAYALAM DIGIT ZERO..MALAYALAM DIGIT NINE
|
||||
|
@ -277,12 +280,13 @@
|
|||
0F49..0F6C ; L # Lo [36] TIBETAN LETTER NYA..TIBETAN LETTER RRA
|
||||
0F7F ; L # Mc TIBETAN SIGN RNAM BCAD
|
||||
0F85 ; L # Po TIBETAN MARK PALUTA
|
||||
0F88..0F8B ; L # Lo [4] TIBETAN SIGN LCE TSA CAN..TIBETAN SIGN GRU MED RGYINGS
|
||||
0F88..0F8C ; L # Lo [5] TIBETAN SIGN LCE TSA CAN..TIBETAN SIGN INVERTED MCHU CAN
|
||||
0FBE..0FC5 ; L # So [8] TIBETAN KU RU KHA..TIBETAN SYMBOL RDO RJE
|
||||
0FC7..0FCC ; L # So [6] TIBETAN SYMBOL RDO RJE RGYA GRAM..TIBETAN SYMBOL NOR BU BZHI -KHYIL
|
||||
0FCE..0FCF ; L # So [2] TIBETAN SIGN RDEL NAG RDEL DKAR..TIBETAN SIGN RDEL NAG GSUM
|
||||
0FD0..0FD4 ; L # Po [5] TIBETAN MARK BSKA- SHOG GI MGO RGYAN..TIBETAN MARK CLOSING BRDA RNYING YIG MGO SGAB MA
|
||||
0FD5..0FD8 ; L # So [4] RIGHT-FACING SVASTI SIGN..LEFT-FACING SVASTI SIGN WITH DOTS
|
||||
0FD9..0FDA ; L # Po [2] TIBETAN MARK LEADING MCHAN RTAGS..TIBETAN MARK TRAILING MCHAN RTAGS
|
||||
1000..102A ; L # Lo [43] MYANMAR LETTER KA..MYANMAR LETTER AU
|
||||
102B..102C ; L # Mc [2] MYANMAR VOWEL SIGN TALL AA..MYANMAR VOWEL SIGN AA
|
||||
1031 ; L # Mc MYANMAR VOWEL SIGN E
|
||||
|
@ -375,7 +379,8 @@
|
|||
19B0..19C0 ; L # Mc [17] NEW TAI LUE VOWEL SIGN VOWEL SHORTENER..NEW TAI LUE VOWEL SIGN IY
|
||||
19C1..19C7 ; L # Lo [7] NEW TAI LUE LETTER FINAL V..NEW TAI LUE LETTER FINAL B
|
||||
19C8..19C9 ; L # Mc [2] NEW TAI LUE TONE MARK-1..NEW TAI LUE TONE MARK-2
|
||||
19D0..19DA ; L # Nd [11] NEW TAI LUE DIGIT ZERO..NEW TAI LUE THAM DIGIT ONE
|
||||
19D0..19D9 ; L # Nd [10] NEW TAI LUE DIGIT ZERO..NEW TAI LUE DIGIT NINE
|
||||
19DA ; L # No NEW TAI LUE THAM DIGIT ONE
|
||||
1A00..1A16 ; L # Lo [23] BUGINESE LETTER KA..BUGINESE LETTER HA
|
||||
1A19..1A1B ; L # Mc [3] BUGINESE VOWEL SIGN E..BUGINESE VOWEL SIGN AE
|
||||
1A1E..1A1F ; L # Po [2] BUGINESE PALLAWA..BUGINESE END OF SECTION
|
||||
|
@ -408,6 +413,12 @@
|
|||
1BAA ; L # Mc SUNDANESE SIGN PAMAAEH
|
||||
1BAE..1BAF ; L # Lo [2] SUNDANESE LETTER KHA..SUNDANESE LETTER SYA
|
||||
1BB0..1BB9 ; L # Nd [10] SUNDANESE DIGIT ZERO..SUNDANESE DIGIT NINE
|
||||
1BC0..1BE5 ; L # Lo [38] BATAK LETTER A..BATAK LETTER U
|
||||
1BE7 ; L # Mc BATAK VOWEL SIGN E
|
||||
1BEA..1BEC ; L # Mc [3] BATAK VOWEL SIGN I..BATAK VOWEL SIGN O
|
||||
1BEE ; L # Mc BATAK VOWEL SIGN U
|
||||
1BF2..1BF3 ; L # Mc [2] BATAK PANGOLAT..BATAK PANONGONAN
|
||||
1BFC..1BFF ; L # Po [4] BATAK SYMBOL BINDU NA METEK..BATAK SYMBOL BINDU PANGOLAT
|
||||
1C00..1C23 ; L # Lo [36] LEPCHA LETTER KA..LEPCHA LETTER A
|
||||
1C24..1C2B ; L # Mc [8] LEPCHA SUBJOINED LETTER YA..LEPCHA VOWEL SIGN UU
|
||||
1C34..1C35 ; L # Mc [2] LEPCHA CONSONANT SIGN NYIN-DO..LEPCHA CONSONANT SIGN KANG
|
||||
|
@ -451,7 +462,7 @@
|
|||
200E ; L # Cf LEFT-TO-RIGHT MARK
|
||||
2071 ; L # Lm SUPERSCRIPT LATIN SMALL LETTER I
|
||||
207F ; L # Lm SUPERSCRIPT LATIN SMALL LETTER N
|
||||
2090..2094 ; L # Lm [5] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER SCHWA
|
||||
2090..209C ; L # Lm [13] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER T
|
||||
2102 ; L # L& DOUBLE-STRUCK CAPITAL C
|
||||
2107 ; L # L& EULER CONSTANT
|
||||
210A..2113 ; L # L& [10] SCRIPT SMALL G..SCRIPT SMALL L
|
||||
|
@ -485,6 +496,7 @@
|
|||
2D00..2D25 ; L # L& [38] GEORGIAN SMALL LETTER AN..GEORGIAN SMALL LETTER HOE
|
||||
2D30..2D65 ; L # Lo [54] TIFINAGH LETTER YA..TIFINAGH LETTER YAZZ
|
||||
2D6F ; L # Lm TIFINAGH MODIFIER LETTER LABIALIZATION MARK
|
||||
2D70 ; L # Po TIFINAGH SEPARATOR MARK
|
||||
2D80..2D96 ; L # Lo [23] ETHIOPIC SYLLABLE LOA..ETHIOPIC SYLLABLE GGWE
|
||||
2DA0..2DA6 ; L # Lo [7] ETHIOPIC SYLLABLE SSA..ETHIOPIC SYLLABLE SSO
|
||||
2DA8..2DAE ; L # Lo [7] ETHIOPIC SYLLABLE CCA..ETHIOPIC SYLLABLE CCO
|
||||
|
@ -513,7 +525,7 @@
|
|||
3190..3191 ; L # So [2] IDEOGRAPHIC ANNOTATION LINKING MARK..IDEOGRAPHIC ANNOTATION REVERSE MARK
|
||||
3192..3195 ; L # No [4] IDEOGRAPHIC ANNOTATION ONE MARK..IDEOGRAPHIC ANNOTATION FOUR MARK
|
||||
3196..319F ; L # So [10] IDEOGRAPHIC ANNOTATION TOP MARK..IDEOGRAPHIC ANNOTATION MAN MARK
|
||||
31A0..31B7 ; L # Lo [24] BOPOMOFO LETTER BU..BOPOMOFO FINAL LETTER H
|
||||
31A0..31BA ; L # Lo [27] BOPOMOFO LETTER BU..BOPOMOFO LETTER ZY
|
||||
31F0..31FF ; L # Lo [16] KATAKANA LETTER SMALL KU..KATAKANA LETTER SMALL RO
|
||||
3200..321C ; L # So [29] PARENTHESIZED HANGUL KIYEOK..PARENTHESIZED HANGUL CIEUC U
|
||||
3220..3229 ; L # No [10] PARENTHESIZED IDEOGRAPH ONE..PARENTHESIZED IDEOGRAPH TEN
|
||||
|
@ -540,8 +552,7 @@ A60C ; L # Lm VAI SYLLABLE LENGTHENER
|
|||
A610..A61F ; L # Lo [16] VAI SYLLABLE NDOLE FA..VAI SYMBOL JONG
|
||||
A620..A629 ; L # Nd [10] VAI DIGIT ZERO..VAI DIGIT NINE
|
||||
A62A..A62B ; L # Lo [2] VAI SYLLABLE NDOLE MA..VAI SYLLABLE NDOLE DO
|
||||
A640..A65F ; L # L& [32] CYRILLIC CAPITAL LETTER ZEMLYA..CYRILLIC SMALL LETTER YN
|
||||
A662..A66D ; L # L& [12] CYRILLIC CAPITAL LETTER SOFT DE..CYRILLIC SMALL LETTER DOUBLE MONOCULAR O
|
||||
A640..A66D ; L # L& [46] CYRILLIC CAPITAL LETTER ZEMLYA..CYRILLIC SMALL LETTER DOUBLE MONOCULAR O
|
||||
A66E ; L # Lo CYRILLIC LETTER MULTIOCULAR O
|
||||
A680..A697 ; L # L& [24] CYRILLIC CAPITAL LETTER DWE..CYRILLIC SMALL LETTER SHWE
|
||||
A6A0..A6E5 ; L # Lo [70] BAMUM LETTER A..BAMUM LETTER KI
|
||||
|
@ -551,7 +562,10 @@ A722..A76F ; L # L& [78] LATIN CAPITAL LETTER EGYPTOLOGICAL ALEF..LATIN SMAL
|
|||
A770 ; L # Lm MODIFIER LETTER US
|
||||
A771..A787 ; L # L& [23] LATIN SMALL LETTER DUM..LATIN SMALL LETTER INSULAR T
|
||||
A789..A78A ; L # Sk [2] MODIFIER LETTER COLON..MODIFIER LETTER SHORT EQUALS SIGN
|
||||
A78B..A78C ; L # L& [2] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER SALTILLO
|
||||
A78B..A78E ; L # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT
|
||||
A790..A791 ; L # L& [2] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER N WITH DESCENDER
|
||||
A7A0..A7A9 ; L # L& [10] LATIN CAPITAL LETTER G WITH OBLIQUE STROKE..LATIN SMALL LETTER S WITH OBLIQUE STROKE
|
||||
A7FA ; L # L& LATIN LETTER SMALL CAPITAL TURNED M
|
||||
A7FB..A801 ; L # Lo [7] LATIN EPIGRAPHIC LETTER REVERSED F..SYLOTI NAGRI LETTER I
|
||||
A803..A805 ; L # Lo [3] SYLOTI NAGRI LETTER U..SYLOTI NAGRI LETTER O
|
||||
A807..A80A ; L # Lo [4] SYLOTI NAGRI LETTER KO..SYLOTI NAGRI LETTER GHO
|
||||
|
@ -608,6 +622,11 @@ AAC2 ; L # Lo TAI VIET TONE MAI SONG
|
|||
AADB..AADC ; L # Lo [2] TAI VIET SYMBOL KON..TAI VIET SYMBOL NUENG
|
||||
AADD ; L # Lm TAI VIET SYMBOL SAM
|
||||
AADE..AADF ; L # Po [2] TAI VIET SYMBOL HO HOI..TAI VIET SYMBOL KOI KOI
|
||||
AB01..AB06 ; L # Lo [6] ETHIOPIC SYLLABLE TTHU..ETHIOPIC SYLLABLE TTHO
|
||||
AB09..AB0E ; L # Lo [6] ETHIOPIC SYLLABLE DDHU..ETHIOPIC SYLLABLE DDHO
|
||||
AB11..AB16 ; L # Lo [6] ETHIOPIC SYLLABLE DZU..ETHIOPIC SYLLABLE DZO
|
||||
AB20..AB26 ; L # Lo [7] ETHIOPIC SYLLABLE CCHHA..ETHIOPIC SYLLABLE CCHHO
|
||||
AB28..AB2E ; L # Lo [7] ETHIOPIC SYLLABLE BBA..ETHIOPIC SYLLABLE BBO
|
||||
ABC0..ABE2 ; L # Lo [35] MEETEI MAYEK LETTER KOK..MEETEI MAYEK LETTER I LONSUM
|
||||
ABE3..ABE4 ; L # Mc [2] MEETEI MAYEK VOWEL SIGN ONAP..MEETEI MAYEK VOWEL SIGN INAP
|
||||
ABE6..ABE7 ; L # Mc [2] MEETEI MAYEK VOWEL SIGN YENAP..MEETEI MAYEK VOWEL SIGN SOUNAP
|
||||
|
@ -664,6 +683,11 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER
|
|||
10400..1044F ; L # L& [80] DESERET CAPITAL LETTER LONG I..DESERET SMALL LETTER EW
|
||||
10450..1049D ; L # Lo [78] SHAVIAN LETTER PEEP..OSMANYA LETTER OO
|
||||
104A0..104A9 ; L # Nd [10] OSMANYA DIGIT ZERO..OSMANYA DIGIT NINE
|
||||
11000 ; L # Mc BRAHMI SIGN CANDRABINDU
|
||||
11002 ; L # Mc BRAHMI SIGN VISARGA
|
||||
11003..11037 ; L # Lo [53] BRAHMI SIGN JIHVAMULIYA..BRAHMI LETTER OLD TAMIL NNNA
|
||||
11047..1104D ; L # Po [7] BRAHMI DANDA..BRAHMI PUNCTUATION LOTUS
|
||||
11066..1106F ; L # Nd [10] BRAHMI DIGIT ZERO..BRAHMI DIGIT NINE
|
||||
11082 ; L # Mc KAITHI SIGN VISARGA
|
||||
11083..110AF ; L # Lo [45] KAITHI LETTER A..KAITHI LETTER HA
|
||||
110B0..110B2 ; L # Mc [3] KAITHI VOWEL SIGN AA..KAITHI VOWEL SIGN II
|
||||
|
@ -675,6 +699,8 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER
|
|||
12400..12462 ; L # Nl [99] CUNEIFORM NUMERIC SIGN TWO ASH..CUNEIFORM NUMERIC SIGN OLD ASSYRIAN ONE QUARTER
|
||||
12470..12473 ; L # Po [4] CUNEIFORM PUNCTUATION SIGN OLD ASSYRIAN WORD DIVIDER..CUNEIFORM PUNCTUATION SIGN DIAGONAL TRICOLON
|
||||
13000..1342E ; L # Lo [1071] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH AA032
|
||||
16800..16A38 ; L # Lo [569] BAMUM LETTER PHASE-A NGKUE MFON..BAMUM LETTER PHASE-F VUEQ
|
||||
1B000..1B001 ; L # Lo [2] KATAKANA LETTER ARCHAIC E..HIRAGANA LETTER ARCHAIC YE
|
||||
1D000..1D0F5 ; L # So [246] BYZANTINE MUSICAL SYMBOL PSILI..BYZANTINE MUSICAL SYMBOL GORGON NEO KATO
|
||||
1D100..1D126 ; L # So [39] MUSICAL SYMBOL SINGLE BARLINE..MUSICAL SYMBOL DRUM CLEF-2
|
||||
1D129..1D164 ; L # So [60] MUSICAL SYMBOL MULTIPLE MEASURE REST..MUSICAL SYMBOL ONE HUNDRED TWENTY-EIGHTH NOTE
|
||||
|
@ -721,30 +747,23 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER
|
|||
1D7AA..1D7C2 ; L # L& [25] MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL OMEGA
|
||||
1D7C4..1D7CB ; L # L& [8] MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD SMALL DIGAMMA
|
||||
1F110..1F12E ; L # So [31] PARENTHESIZED LATIN CAPITAL LETTER A..CIRCLED WZ
|
||||
1F131 ; L # So SQUARED LATIN CAPITAL LETTER B
|
||||
1F13D ; L # So SQUARED LATIN CAPITAL LETTER N
|
||||
1F13F ; L # So SQUARED LATIN CAPITAL LETTER P
|
||||
1F142 ; L # So SQUARED LATIN CAPITAL LETTER S
|
||||
1F146 ; L # So SQUARED LATIN CAPITAL LETTER W
|
||||
1F14A..1F14E ; L # So [5] SQUARED HV..SQUARED PPV
|
||||
1F157 ; L # So NEGATIVE CIRCLED LATIN CAPITAL LETTER H
|
||||
1F15F ; L # So NEGATIVE CIRCLED LATIN CAPITAL LETTER P
|
||||
1F179 ; L # So NEGATIVE SQUARED LATIN CAPITAL LETTER J
|
||||
1F17B..1F17C ; L # So [2] NEGATIVE SQUARED LATIN CAPITAL LETTER L..NEGATIVE SQUARED LATIN CAPITAL LETTER M
|
||||
1F17F ; L # So NEGATIVE SQUARED LATIN CAPITAL LETTER P
|
||||
1F18A..1F18D ; L # So [4] CROSSED NEGATIVE SQUARED LATIN CAPITAL LETTER P..NEGATIVE SQUARED SA
|
||||
1F190 ; L # So SQUARE DJ
|
||||
1F200 ; L # So SQUARE HIRAGANA HOKA
|
||||
1F210..1F231 ; L # So [34] SQUARED CJK UNIFIED IDEOGRAPH-624B..SQUARED CJK UNIFIED IDEOGRAPH-6253
|
||||
1F130..1F169 ; L # So [58] SQUARED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z
|
||||
1F170..1F19A ; L # So [43] NEGATIVE SQUARED LATIN CAPITAL LETTER A..SQUARED VS
|
||||
1F1E6..1F202 ; L # So [29] REGIONAL INDICATOR SYMBOL LETTER A..SQUARED KATAKANA SA
|
||||
1F210..1F23A ; L # So [43] SQUARED CJK UNIFIED IDEOGRAPH-624B..SQUARED CJK UNIFIED IDEOGRAPH-55B6
|
||||
1F240..1F248 ; L # So [9] TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-672C..TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-6557
|
||||
1F250..1F251 ; L # So [2] CIRCLED IDEOGRAPH ADVANTAGE..CIRCLED IDEOGRAPH ACCEPT
|
||||
1F48C ; L # So LOVE LETTER
|
||||
1F524 ; L # So INPUT SYMBOL FOR LATIN LETTERS
|
||||
20000..2A6D6 ; L # Lo [42711] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6D6
|
||||
2A700..2B734 ; L # Lo [4149] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B734
|
||||
2B740..2B81D ; L # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D
|
||||
2F800..2FA1D ; L # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D
|
||||
F0000..FFFFD ; L # Co [65534] <private-use-F0000>..<private-use-FFFFD>
|
||||
100000..10FFFD; L # Co [65534] <private-use-100000>..<private-use-10FFFD>
|
||||
|
||||
# The above property value applies to 861492 code points not listed here.
|
||||
# Total code points: 1099541
|
||||
# The above property value applies to 859451 code points not listed here.
|
||||
# Total code points: 1098619
|
||||
|
||||
# ================================================
|
||||
|
||||
|
@ -772,7 +791,11 @@ F0000..FFFFD ; L # Co [65534] <private-use-F0000>..<private-use-FFFFD>
|
|||
0828 ; R # Lm SAMARITAN MODIFIER LETTER I
|
||||
082E..082F ; R # Cn [2] <reserved-082E>..<reserved-082F>
|
||||
0830..083E ; R # Po [15] SAMARITAN PUNCTUATION NEQUDAA..SAMARITAN PUNCTUATION ANNAAU
|
||||
083F..08FF ; R # Cn [193] <reserved-083F>..<reserved-08FF>
|
||||
083F ; R # Cn <reserved-083F>
|
||||
0840..0858 ; R # Lo [25] MANDAIC LETTER HALQA..MANDAIC LETTER AIN
|
||||
085C..085D ; R # Cn [2] <reserved-085C>..<reserved-085D>
|
||||
085E ; R # Po MANDAIC PUNCTUATION
|
||||
085F..08FF ; R # Cn [161] <reserved-085F>..<reserved-08FF>
|
||||
200F ; R # Cf RIGHT-TO-LEFT MARK
|
||||
FB1D ; R # Lo HEBREW LETTER YOD WITH HIRIQ
|
||||
FB1F..FB28 ; R # Lo [10] HEBREW LIGATURE YIDDISH YOD YOD PATAH..HEBREW LETTER WIDE TAV
|
||||
|
@ -841,7 +864,7 @@ FB46..FB4F ; R # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW LIGATURE AL
|
|||
10E7F..10FFF ; R # Cn [385] <reserved-10E7F>..<reserved-10FFF>
|
||||
1E800..1EFFF ; R # Cn [2048] <reserved-1E800>..<reserved-1EFFF>
|
||||
|
||||
# Total code points: 4441
|
||||
# Total code points: 4438
|
||||
|
||||
# ================================================
|
||||
|
||||
|
@ -897,7 +920,7 @@ FF0D ; ES # Pd FULLWIDTH HYPHEN-MINUS
|
|||
0E3F ; ET # Sc THAI CURRENCY SYMBOL BAHT
|
||||
17DB ; ET # Sc KHMER CURRENCY SYMBOL RIEL
|
||||
2030..2034 ; ET # Po [5] PER MILLE SIGN..TRIPLE PRIME
|
||||
20A0..20B8 ; ET # Sc [25] EURO-CURRENCY SIGN..TENGE SIGN
|
||||
20A0..20B9 ; ET # Sc [26] EURO-CURRENCY SIGN..INDIAN RUPEE SIGN
|
||||
212E ; ET # So ESTIMATED SYMBOL
|
||||
2213 ; ET # Sm MINUS-OR-PLUS SIGN
|
||||
A838 ; ET # Sc NORTH INDIC RUPEE MARK
|
||||
|
@ -911,7 +934,7 @@ FF05 ; ET # Po FULLWIDTH PERCENT SIGN
|
|||
FFE0..FFE1 ; ET # Sc [2] FULLWIDTH CENT SIGN..FULLWIDTH POUND SIGN
|
||||
FFE5..FFE6 ; ET # Sc [2] FULLWIDTH YEN SIGN..FULLWIDTH WON SIGN
|
||||
|
||||
# Total code points: 63
|
||||
# Total code points: 64
|
||||
|
||||
# ================================================
|
||||
|
||||
|
@ -921,9 +944,10 @@ FFE5..FFE6 ; ET # Sc [2] FULLWIDTH YEN SIGN..FULLWIDTH WON SIGN
|
|||
0660..0669 ; AN # Nd [10] ARABIC-INDIC DIGIT ZERO..ARABIC-INDIC DIGIT NINE
|
||||
066B..066C ; AN # Po [2] ARABIC DECIMAL SEPARATOR..ARABIC THOUSANDS SEPARATOR
|
||||
06DD ; AN # Cf ARABIC END OF AYAH
|
||||
070F ; AN # Cf SYRIAC ABBREVIATION MARK
|
||||
10E60..10E7E ; AN # No [31] RUMI DIGIT ONE..RUMI FRACTION TWO THIRDS
|
||||
|
||||
# Total code points: 48
|
||||
# Total code points: 49
|
||||
|
||||
# ================================================
|
||||
|
||||
|
@ -1038,13 +1062,13 @@ FF1A ; CS # Po FULLWIDTH COLON
|
|||
058A ; ON # Pd ARMENIAN HYPHEN
|
||||
0606..0607 ; ON # Sm [2] ARABIC-INDIC CUBE ROOT..ARABIC-INDIC FOURTH ROOT
|
||||
060E..060F ; ON # So [2] ARABIC POETIC VERSE SIGN..ARABIC SIGN MISRA
|
||||
06DE ; ON # So ARABIC START OF RUB EL HIZB
|
||||
06E9 ; ON # So ARABIC PLACE OF SAJDAH
|
||||
07F6 ; ON # So NKO SYMBOL OO DENNEN
|
||||
07F7..07F9 ; ON # Po [3] NKO SYMBOL GBAKURUNEN..NKO EXCLAMATION MARK
|
||||
0BF3..0BF8 ; ON # So [6] TAMIL DAY SIGN..TAMIL AS ABOVE SIGN
|
||||
0BFA ; ON # So TAMIL NUMBER SIGN
|
||||
0C78..0C7E ; ON # No [7] TELUGU FRACTION DIGIT ZERO FOR ODD POWERS OF FOUR..TELUGU FRACTION DIGIT THREE FOR EVEN POWERS OF FOUR
|
||||
0CF1..0CF2 ; ON # So [2] KANNADA SIGN JIHVAMULIYA..KANNADA SIGN UPADHMANIYA
|
||||
0F3A ; ON # Ps TIBETAN MARK GUG RTAGS GYON
|
||||
0F3B ; ON # Pe TIBETAN MARK GUG RTAGS GYAS
|
||||
0F3C ; ON # Ps TIBETAN MARK ANG KHANG GYON
|
||||
|
@ -1059,8 +1083,7 @@ FF1A ; CS # Po FULLWIDTH COLON
|
|||
1807..180A ; ON # Po [4] MONGOLIAN SIBE SYLLABLE BOUNDARY MARKER..MONGOLIAN NIRUGU
|
||||
1940 ; ON # So LIMBU SIGN LOO
|
||||
1944..1945 ; ON # Po [2] LIMBU EXCLAMATION MARK..LIMBU QUESTION MARK
|
||||
19DE..19DF ; ON # Po [2] NEW TAI LUE SIGN LAE..NEW TAI LUE SIGN LAEV
|
||||
19E0..19FF ; ON # So [32] KHMER SYMBOL PATHAMASAT..KHMER SYMBOL DAP-PRAM ROC
|
||||
19DE..19FF ; ON # So [34] NEW TAI LUE SIGN LAE..KHMER SYMBOL DAP-PRAM ROC
|
||||
1FBD ; ON # Sk GREEK KORONIS
|
||||
1FBF..1FC1 ; ON # Sk [3] GREEK PSILI..GREEK DIALYTIKA AND PERISPOMENI
|
||||
1FCD..1FCF ; ON # Sk [3] GREEK PSILI AND VARIA..GREEK PSILI AND PERISPOMENI
|
||||
|
@ -1100,7 +1123,8 @@ FF1A ; CS # Po FULLWIDTH COLON
|
|||
2103..2106 ; ON # So [4] DEGREE CELSIUS..CADA UNA
|
||||
2108..2109 ; ON # So [2] SCRUPLE..DEGREE FAHRENHEIT
|
||||
2114 ; ON # So L B BAR SYMBOL
|
||||
2116..2118 ; ON # So [3] NUMERO SIGN..SCRIPT CAPITAL P
|
||||
2116..2117 ; ON # So [2] NUMERO SIGN..SOUND RECORDING COPYRIGHT
|
||||
2118 ; ON # Sm SCRIPT CAPITAL P
|
||||
211E..2123 ; ON # So [6] PRESCRIPTION TAKE..VERSICLE
|
||||
2125 ; ON # So OUNCE SIGN
|
||||
2127 ; ON # So INVERTED OHM SIGN
|
||||
|
@ -1147,7 +1171,7 @@ FF1A ; CS # Po FULLWIDTH COLON
|
|||
239B..23B3 ; ON # Sm [25] LEFT PARENTHESIS UPPER HOOK..SUMMATION BOTTOM
|
||||
23B4..23DB ; ON # So [40] TOP SQUARE BRACKET..FUSE
|
||||
23DC..23E1 ; ON # Sm [6] TOP PARENTHESIS..BOTTOM TORTOISE SHELL BRACKET
|
||||
23E2..23E8 ; ON # So [7] WHITE TRAPEZIUM..DECIMAL EXPONENT SYMBOL
|
||||
23E2..23F3 ; ON # So [18] WHITE TRAPEZIUM..HOURGLASS WITH FLOWING SAND
|
||||
2400..2426 ; ON # So [39] SYMBOL FOR NULL..SYMBOL FOR SUBSTITUTE FORM TWO
|
||||
2440..244A ; ON # So [11] OCR HOOK..OCR DOUBLE BACKSLASH
|
||||
2460..2487 ; ON # No [40] CIRCLED DIGIT ONE..PARENTHESIZED NUMBER TWENTY
|
||||
|
@ -1161,18 +1185,8 @@ FF1A ; CS # Po FULLWIDTH COLON
|
|||
2600..266E ; ON # So [111] BLACK SUN WITH RAYS..MUSIC NATURAL SIGN
|
||||
266F ; ON # Sm MUSIC SHARP SIGN
|
||||
2670..26AB ; ON # So [60] WEST SYRIAC CROSS..MEDIUM BLACK CIRCLE
|
||||
26AD..26CD ; ON # So [33] MARRIAGE SYMBOL..DISABLED CAR
|
||||
26CF..26E1 ; ON # So [19] PICK..RESTRICTED LEFT ENTRY-2
|
||||
26E3 ; ON # So HEAVY CIRCLE WITH STROKE AND TWO DOTS ABOVE
|
||||
26E8..26FF ; ON # So [24] BLACK CROSS ON SHIELD..WHITE FLAG WITH HORIZONTAL MIDDLE BLACK STRIPE
|
||||
2701..2704 ; ON # So [4] UPPER BLADE SCISSORS..WHITE SCISSORS
|
||||
2706..2709 ; ON # So [4] TELEPHONE LOCATION SIGN..ENVELOPE
|
||||
270C..2727 ; ON # So [28] VICTORY HAND..WHITE FOUR POINTED STAR
|
||||
2729..274B ; ON # So [35] STRESS OUTLINED WHITE STAR..HEAVY EIGHT TEARDROP-SPOKED PROPELLER ASTERISK
|
||||
274D ; ON # So SHADOWED WHITE CIRCLE
|
||||
274F..2752 ; ON # So [4] LOWER RIGHT DROP-SHADOWED WHITE SQUARE..UPPER RIGHT SHADOWED WHITE SQUARE
|
||||
2756..275E ; ON # So [9] BLACK DIAMOND MINUS WHITE X..HEAVY DOUBLE COMMA QUOTATION MARK ORNAMENT
|
||||
2761..2767 ; ON # So [7] CURVED STEM PARAGRAPH SIGN ORNAMENT..ROTATED FLORAL HEART BULLET
|
||||
26AD..26FF ; ON # So [83] MARRIAGE SYMBOL..WHITE FLAG WITH HORIZONTAL MIDDLE BLACK STRIPE
|
||||
2701..2767 ; ON # So [103] UPPER BLADE SCISSORS..ROTATED FLORAL HEART BULLET
|
||||
2768 ; ON # Ps MEDIUM LEFT PARENTHESIS ORNAMENT
|
||||
2769 ; ON # Pe MEDIUM RIGHT PARENTHESIS ORNAMENT
|
||||
276A ; ON # Ps MEDIUM FLATTENED LEFT PARENTHESIS ORNAMENT
|
||||
|
@ -1188,15 +1202,13 @@ FF1A ; CS # Po FULLWIDTH COLON
|
|||
2774 ; ON # Ps MEDIUM LEFT CURLY BRACKET ORNAMENT
|
||||
2775 ; ON # Pe MEDIUM RIGHT CURLY BRACKET ORNAMENT
|
||||
2776..2793 ; ON # No [30] DINGBAT NEGATIVE CIRCLED DIGIT ONE..DINGBAT NEGATIVE CIRCLED SANS-SERIF NUMBER TEN
|
||||
2794 ; ON # So HEAVY WIDE-HEADED RIGHTWARDS ARROW
|
||||
2798..27AF ; ON # So [24] HEAVY SOUTH EAST ARROW..NOTCHED LOWER RIGHT-SHADOWED WHITE RIGHTWARDS ARROW
|
||||
27B1..27BE ; ON # So [14] NOTCHED UPPER RIGHT-SHADOWED WHITE RIGHTWARDS ARROW..OPEN-OUTLINED RIGHTWARDS ARROW
|
||||
2794..27BF ; ON # So [44] HEAVY WIDE-HEADED RIGHTWARDS ARROW..DOUBLE CURLY LOOP
|
||||
27C0..27C4 ; ON # Sm [5] THREE DIMENSIONAL ANGLE..OPEN SUPERSET
|
||||
27C5 ; ON # Ps LEFT S-SHAPED BAG DELIMITER
|
||||
27C6 ; ON # Pe RIGHT S-SHAPED BAG DELIMITER
|
||||
27C7..27CA ; ON # Sm [4] OR WITH DOT INSIDE..VERTICAL BAR WITH HORIZONTAL STROKE
|
||||
27CC ; ON # Sm LONG DIVISION
|
||||
27D0..27E5 ; ON # Sm [22] WHITE DIAMOND WITH CENTRED DOT..WHITE SQUARE WITH RIGHTWARDS TICK
|
||||
27CE..27E5 ; ON # Sm [24] SQUARED LOGICAL AND..WHITE SQUARE WITH RIGHTWARDS TICK
|
||||
27E6 ; ON # Ps MATHEMATICAL LEFT WHITE SQUARE BRACKET
|
||||
27E7 ; ON # Pe MATHEMATICAL RIGHT WHITE SQUARE BRACKET
|
||||
27E8 ; ON # Ps MATHEMATICAL LEFT ANGLE BRACKET
|
||||
|
@ -1424,6 +1436,7 @@ FFFC..FFFD ; ON # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTE
|
|||
10190..1019B ; ON # So [12] ROMAN SEXTANS SIGN..ROMAN CENTURIAL SIGN
|
||||
1091F ; ON # Po PHOENICIAN WORD SEPARATOR
|
||||
10B39..10B3F ; ON # Po [7] AVESTAN ABBREVIATION MARK..LARGE ONE RING OVER TWO RINGS PUNCTUATION
|
||||
11052..11065 ; ON # No [20] BRAHMI NUMBER ONE..BRAHMI NUMBER ONE THOUSAND
|
||||
1D200..1D241 ; ON # So [66] GREEK VOCAL NOTATION SYMBOL-1..GREEK INSTRUMENTAL NOTATION SYMBOL-54
|
||||
1D245 ; ON # So GREEK MUSICAL LEIMMA
|
||||
1D300..1D356 ; ON # So [87] MONOGRAM FOR EARTH..TETRAGRAM FOR FOSTERING
|
||||
|
@ -1434,8 +1447,42 @@ FFFC..FFFD ; ON # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTE
|
|||
1D7C3 ; ON # Sm MATHEMATICAL SANS-SERIF BOLD ITALIC PARTIAL DIFFERENTIAL
|
||||
1F000..1F02B ; ON # So [44] MAHJONG TILE EAST WIND..MAHJONG TILE BACK
|
||||
1F030..1F093 ; ON # So [100] DOMINO TILE HORIZONTAL BACK..DOMINO TILE VERTICAL-06-06
|
||||
1F0A0..1F0AE ; ON # So [15] PLAYING CARD BACK..PLAYING CARD KING OF SPADES
|
||||
1F0B1..1F0BE ; ON # So [14] PLAYING CARD ACE OF HEARTS..PLAYING CARD KING OF HEARTS
|
||||
1F0C1..1F0CF ; ON # So [15] PLAYING CARD ACE OF DIAMONDS..PLAYING CARD BLACK JOKER
|
||||
1F0D1..1F0DF ; ON # So [15] PLAYING CARD ACE OF CLUBS..PLAYING CARD WHITE JOKER
|
||||
1F300..1F320 ; ON # So [33] CYCLONE..SHOOTING STAR
|
||||
1F330..1F335 ; ON # So [6] CHESTNUT..CACTUS
|
||||
1F337..1F37C ; ON # So [70] TULIP..BABY BOTTLE
|
||||
1F380..1F393 ; ON # So [20] RIBBON..GRADUATION CAP
|
||||
1F3A0..1F3C4 ; ON # So [37] CAROUSEL HORSE..SURFER
|
||||
1F3C6..1F3CA ; ON # So [5] TROPHY..SWIMMER
|
||||
1F3E0..1F3F0 ; ON # So [17] HOUSE BUILDING..EUROPEAN CASTLE
|
||||
1F400..1F43E ; ON # So [63] RAT..PAW PRINTS
|
||||
1F440 ; ON # So EYES
|
||||
1F442..1F48B ; ON # So [74] EAR..KISS MARK
|
||||
1F48D..1F4F7 ; ON # So [107] RING..CAMERA
|
||||
1F4F9..1F4FC ; ON # So [4] VIDEO CAMERA..VIDEOCASSETTE
|
||||
1F500..1F523 ; ON # So [36] TWISTED RIGHTWARDS ARROWS..INPUT SYMBOL FOR SYMBOLS
|
||||
1F525..1F53D ; ON # So [25] FIRE..DOWN-POINTING SMALL RED TRIANGLE
|
||||
1F550..1F567 ; ON # So [24] CLOCK FACE ONE OCLOCK..CLOCK FACE TWELVE-THIRTY
|
||||
1F5FB..1F5FF ; ON # So [5] MOUNT FUJI..MOYAI
|
||||
1F601..1F610 ; ON # So [16] GRINNING FACE WITH SMILING EYES..NEUTRAL FACE
|
||||
1F612..1F614 ; ON # So [3] UNAMUSED FACE..PENSIVE FACE
|
||||
1F616 ; ON # So CONFOUNDED FACE
|
||||
1F618 ; ON # So FACE THROWING A KISS
|
||||
1F61A ; ON # So KISSING FACE WITH CLOSED EYES
|
||||
1F61C..1F61E ; ON # So [3] FACE WITH STUCK-OUT TONGUE AND WINKING EYE..DISAPPOINTED FACE
|
||||
1F620..1F625 ; ON # So [6] ANGRY FACE..DISAPPOINTED BUT RELIEVED FACE
|
||||
1F628..1F62B ; ON # So [4] FEARFUL FACE..TIRED FACE
|
||||
1F62D ; ON # So LOUDLY CRYING FACE
|
||||
1F630..1F633 ; ON # So [4] FACE WITH OPEN MOUTH AND COLD SWEAT..FLUSHED FACE
|
||||
1F635..1F640 ; ON # So [12] DIZZY FACE..WEARY CAT FACE
|
||||
1F645..1F64F ; ON # So [11] FACE WITH NO GOOD GESTURE..PERSON WITH FOLDED HANDS
|
||||
1F680..1F6C5 ; ON # So [70] ROCKET..LEFT LUGGAGE
|
||||
1F700..1F773 ; ON # So [116] ALCHEMICAL SYMBOL FOR QUINTESSENCE..ALCHEMICAL SYMBOL FOR HALF OUNCE
|
||||
|
||||
# Total code points: 3523
|
||||
# Total code points: 4412
|
||||
|
||||
# ================================================
|
||||
|
||||
|
@ -1446,7 +1493,6 @@ FFFC..FFFD ; ON # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTE
|
|||
007F..0084 ; BN # Cc [6] <control-007F>..<control-0084>
|
||||
0086..009F ; BN # Cc [26] <control-0086>..<control-009F>
|
||||
00AD ; BN # Cf SOFT HYPHEN
|
||||
070F ; BN # Cf SYRIAC ABBREVIATION MARK
|
||||
200B..200D ; BN # Cf [3] ZERO WIDTH SPACE..ZERO WIDTH JOINER
|
||||
2060..2064 ; BN # Cf [5] WORD JOINER..INVISIBLE PLUS
|
||||
2065..2069 ; BN # Cn [5] <reserved-2065>..<reserved-2069>
|
||||
|
@ -1478,7 +1524,7 @@ EFFFE..EFFFF ; BN # Cn [2] <noncharacter-EFFFE>..<noncharacter-EFFFF>
|
|||
FFFFE..FFFFF ; BN # Cn [2] <noncharacter-FFFFE>..<noncharacter-FFFFF>
|
||||
10FFFE..10FFFF; BN # Cn [2] <noncharacter-10FFFE>..<noncharacter-10FFFF>
|
||||
|
||||
# Total code points: 4016
|
||||
# Total code points: 4015
|
||||
|
||||
# ================================================
|
||||
|
||||
|
@ -1493,10 +1539,9 @@ FFFFE..FFFFF ; BN # Cn [2] <noncharacter-FFFFE>..<noncharacter-FFFFF>
|
|||
05C4..05C5 ; NSM # Mn [2] HEBREW MARK UPPER DOT..HEBREW MARK LOWER DOT
|
||||
05C7 ; NSM # Mn HEBREW POINT QAMATS QATAN
|
||||
0610..061A ; NSM # Mn [11] ARABIC SIGN SALLALLAHOU ALAYHE WASSALLAM..ARABIC SMALL KASRA
|
||||
064B..065E ; NSM # Mn [20] ARABIC FATHATAN..ARABIC FATHA WITH TWO DOTS
|
||||
064B..065F ; NSM # Mn [21] ARABIC FATHATAN..ARABIC WAVY HAMZA BELOW
|
||||
0670 ; NSM # Mn ARABIC LETTER SUPERSCRIPT ALEF
|
||||
06D6..06DC ; NSM # Mn [7] ARABIC SMALL HIGH LIGATURE SAD WITH LAM WITH ALEF MAKSURA..ARABIC SMALL HIGH SEEN
|
||||
06DE ; NSM # Me ARABIC START OF RUB EL HIZB
|
||||
06DF..06E4 ; NSM # Mn [6] ARABIC SMALL HIGH ROUNDED ZERO..ARABIC SMALL HIGH MADDA
|
||||
06E7..06E8 ; NSM # Mn [2] ARABIC SMALL HIGH YEH..ARABIC SMALL HIGH NOON
|
||||
06EA..06ED ; NSM # Mn [4] ARABIC EMPTY CENTRE LOW STOP..ARABIC SMALL LOW MEEM
|
||||
|
@ -1508,11 +1553,13 @@ FFFFE..FFFFF ; BN # Cn [2] <noncharacter-FFFFE>..<noncharacter-FFFFF>
|
|||
081B..0823 ; NSM # Mn [9] SAMARITAN MARK EPENTHETIC YUT..SAMARITAN VOWEL SIGN A
|
||||
0825..0827 ; NSM # Mn [3] SAMARITAN VOWEL SIGN SHORT A..SAMARITAN VOWEL SIGN U
|
||||
0829..082D ; NSM # Mn [5] SAMARITAN VOWEL SIGN LONG I..SAMARITAN MARK NEQUDAA
|
||||
0859..085B ; NSM # Mn [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK
|
||||
0900..0902 ; NSM # Mn [3] DEVANAGARI SIGN INVERTED CANDRABINDU..DEVANAGARI SIGN ANUSVARA
|
||||
093A ; NSM # Mn DEVANAGARI VOWEL SIGN OE
|
||||
093C ; NSM # Mn DEVANAGARI SIGN NUKTA
|
||||
0941..0948 ; NSM # Mn [8] DEVANAGARI VOWEL SIGN U..DEVANAGARI VOWEL SIGN AI
|
||||
094D ; NSM # Mn DEVANAGARI SIGN VIRAMA
|
||||
0951..0955 ; NSM # Mn [5] DEVANAGARI STRESS SIGN UDATTA..DEVANAGARI VOWEL SIGN CANDRA LONG E
|
||||
0951..0957 ; NSM # Mn [7] DEVANAGARI STRESS SIGN UDATTA..DEVANAGARI VOWEL SIGN UUE
|
||||
0962..0963 ; NSM # Mn [2] DEVANAGARI VOWEL SIGN VOCALIC L..DEVANAGARI VOWEL SIGN VOCALIC LL
|
||||
0981 ; NSM # Mn BENGALI SIGN CANDRABINDU
|
||||
09BC ; NSM # Mn BENGALI SIGN NUKTA
|
||||
|
@ -1571,7 +1618,7 @@ FFFFE..FFFFF ; BN # Cn [2] <noncharacter-FFFFE>..<noncharacter-FFFFF>
|
|||
0F71..0F7E ; NSM # Mn [14] TIBETAN VOWEL SIGN AA..TIBETAN SIGN RJES SU NGA RO
|
||||
0F80..0F84 ; NSM # Mn [5] TIBETAN VOWEL SIGN REVERSED I..TIBETAN MARK HALANTA
|
||||
0F86..0F87 ; NSM # Mn [2] TIBETAN SIGN LCI RTAGS..TIBETAN SIGN YANG RTAGS
|
||||
0F90..0F97 ; NSM # Mn [8] TIBETAN SUBJOINED LETTER KA..TIBETAN SUBJOINED LETTER JA
|
||||
0F8D..0F97 ; NSM # Mn [11] TIBETAN SUBJOINED SIGN LCE TSA CAN..TIBETAN SUBJOINED LETTER JA
|
||||
0F99..0FBC ; NSM # Mn [36] TIBETAN SUBJOINED LETTER NYA..TIBETAN SUBJOINED LETTER FIXED-FORM RA
|
||||
0FC6 ; NSM # Mn TIBETAN SYMBOL PADMA GDAN
|
||||
102D..1030 ; NSM # Mn [4] MYANMAR VOWEL SIGN I..MYANMAR VOWEL SIGN UU
|
||||
|
@ -1585,7 +1632,7 @@ FFFFE..FFFFF ; BN # Cn [2] <noncharacter-FFFFE>..<noncharacter-FFFFF>
|
|||
1085..1086 ; NSM # Mn [2] MYANMAR VOWEL SIGN SHAN E ABOVE..MYANMAR VOWEL SIGN SHAN FINAL Y
|
||||
108D ; NSM # Mn MYANMAR SIGN SHAN COUNCIL EMPHATIC TONE
|
||||
109D ; NSM # Mn MYANMAR VOWEL SIGN AITON AI
|
||||
135F ; NSM # Mn ETHIOPIC COMBINING GEMINATION MARK
|
||||
135D..135F ; NSM # Mn [3] ETHIOPIC COMBINING GEMINATION AND VOWEL LENGTH MARK..ETHIOPIC COMBINING GEMINATION MARK
|
||||
1712..1714 ; NSM # Mn [3] TAGALOG VOWEL SIGN I..TAGALOG SIGN VIRAMA
|
||||
1732..1734 ; NSM # Mn [3] HANUNOO VOWEL SIGN I..HANUNOO SIGN PAMUDPOD
|
||||
1752..1753 ; NSM # Mn [2] BUHID VOWEL SIGN I..BUHID VOWEL SIGN U
|
||||
|
@ -1617,6 +1664,10 @@ FFFFE..FFFFF ; BN # Cn [2] <noncharacter-FFFFE>..<noncharacter-FFFFF>
|
|||
1B80..1B81 ; NSM # Mn [2] SUNDANESE SIGN PANYECEK..SUNDANESE SIGN PANGLAYAR
|
||||
1BA2..1BA5 ; NSM # Mn [4] SUNDANESE CONSONANT SIGN PANYAKRA..SUNDANESE VOWEL SIGN PANYUKU
|
||||
1BA8..1BA9 ; NSM # Mn [2] SUNDANESE VOWEL SIGN PAMEPET..SUNDANESE VOWEL SIGN PANEULEUNG
|
||||
1BE6 ; NSM # Mn BATAK SIGN TOMPI
|
||||
1BE8..1BE9 ; NSM # Mn [2] BATAK VOWEL SIGN PAKPAK E..BATAK VOWEL SIGN EE
|
||||
1BED ; NSM # Mn BATAK VOWEL SIGN KARO O
|
||||
1BEF..1BF1 ; NSM # Mn [3] BATAK VOWEL SIGN U FOR SIMALUNGUN SA..BATAK CONSONANT SIGN H
|
||||
1C2C..1C33 ; NSM # Mn [8] LEPCHA VOWEL SIGN E..LEPCHA CONSONANT SIGN T
|
||||
1C36..1C37 ; NSM # Mn [2] LEPCHA SIGN RAN..LEPCHA SIGN NUKTA
|
||||
1CD0..1CD2 ; NSM # Mn [3] VEDIC TONE KARSHANA..VEDIC TONE PRENKHA
|
||||
|
@ -1624,13 +1675,14 @@ FFFFE..FFFFF ; BN # Cn [2] <noncharacter-FFFFE>..<noncharacter-FFFFF>
|
|||
1CE2..1CE8 ; NSM # Mn [7] VEDIC SIGN VISARGA SVARITA..VEDIC SIGN VISARGA ANUDATTA WITH TAIL
|
||||
1CED ; NSM # Mn VEDIC SIGN TIRYAK
|
||||
1DC0..1DE6 ; NSM # Mn [39] COMBINING DOTTED GRAVE ACCENT..COMBINING LATIN SMALL LETTER Z
|
||||
1DFD..1DFF ; NSM # Mn [3] COMBINING ALMOST EQUAL TO BELOW..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW
|
||||
1DFC..1DFF ; NSM # Mn [4] COMBINING DOUBLE INVERTED BREVE BELOW..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW
|
||||
20D0..20DC ; NSM # Mn [13] COMBINING LEFT HARPOON ABOVE..COMBINING FOUR DOTS ABOVE
|
||||
20DD..20E0 ; NSM # Me [4] COMBINING ENCLOSING CIRCLE..COMBINING ENCLOSING CIRCLE BACKSLASH
|
||||
20E1 ; NSM # Mn COMBINING LEFT RIGHT ARROW ABOVE
|
||||
20E2..20E4 ; NSM # Me [3] COMBINING ENCLOSING SCREEN..COMBINING ENCLOSING UPWARD POINTING TRIANGLE
|
||||
20E5..20F0 ; NSM # Mn [12] COMBINING REVERSE SOLIDUS OVERLAY..COMBINING ASTERISK ABOVE
|
||||
2CEF..2CF1 ; NSM # Mn [3] COPTIC COMBINING NI ABOVE..COPTIC COMBINING SPIRITUS LENIS
|
||||
2D7F ; NSM # Mn TIFINAGH CONSONANT JOINER
|
||||
2DE0..2DFF ; NSM # Mn [32] COMBINING CYRILLIC LETTER BE..COMBINING CYRILLIC LETTER IOTIFIED BIG YUS
|
||||
302A..302F ; NSM # Mn [6] IDEOGRAPHIC LEVEL TONE MARK..HANGUL DOUBLE DOT TONE MARK
|
||||
3099..309A ; NSM # Mn [2] COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK..COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK
|
||||
|
@ -1672,6 +1724,8 @@ FE20..FE26 ; NSM # Mn [7] COMBINING LIGATURE LEFT HALF..COMBINING CONJOININ
|
|||
10A0C..10A0F ; NSM # Mn [4] KHAROSHTHI VOWEL LENGTH MARK..KHAROSHTHI SIGN VISARGA
|
||||
10A38..10A3A ; NSM # Mn [3] KHAROSHTHI SIGN BAR ABOVE..KHAROSHTHI SIGN DOT BELOW
|
||||
10A3F ; NSM # Mn KHAROSHTHI VIRAMA
|
||||
11001 ; NSM # Mn BRAHMI SIGN ANUSVARA
|
||||
11038..11046 ; NSM # Mn [15] BRAHMI VOWEL SIGN AA..BRAHMI VIRAMA
|
||||
11080..11081 ; NSM # Mn [2] KAITHI SIGN CANDRABINDU..KAITHI SIGN ANUSVARA
|
||||
110B3..110B6 ; NSM # Mn [4] KAITHI VOWEL SIGN U..KAITHI VOWEL SIGN AI
|
||||
110B9..110BA ; NSM # Mn [2] KAITHI SIGN VIRAMA..KAITHI SIGN NUKTA
|
||||
|
@ -1682,7 +1736,7 @@ FE20..FE26 ; NSM # Mn [7] COMBINING LIGATURE LEFT HALF..COMBINING CONJOININ
|
|||
1D242..1D244 ; NSM # Mn [3] COMBINING GREEK MUSICAL TRISEME..COMBINING GREEK MUSICAL PENTASEME
|
||||
E0100..E01EF ; NSM # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256
|
||||
|
||||
# Total code points: 1173
|
||||
# Total code points: 1209
|
||||
|
||||
# ================================================
|
||||
|
||||
|
@ -1695,11 +1749,9 @@ E0100..E01EF ; NSM # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256
|
|||
061B ; AL # Po ARABIC SEMICOLON
|
||||
061C..061D ; AL # Cn [2] <reserved-061C>..<reserved-061D>
|
||||
061E..061F ; AL # Po [2] ARABIC TRIPLE DOT PUNCTUATION MARK..ARABIC QUESTION MARK
|
||||
0620 ; AL # Cn <reserved-0620>
|
||||
0621..063F ; AL # Lo [31] ARABIC LETTER HAMZA..ARABIC LETTER FARSI YEH WITH THREE DOTS ABOVE
|
||||
0620..063F ; AL # Lo [32] ARABIC LETTER KASHMIRI YEH..ARABIC LETTER FARSI YEH WITH THREE DOTS ABOVE
|
||||
0640 ; AL # Lm ARABIC TATWEEL
|
||||
0641..064A ; AL # Lo [10] ARABIC LETTER FEH..ARABIC LETTER YEH
|
||||
065F ; AL # Cn <reserved-065F>
|
||||
066D ; AL # Po ARABIC FIVE POINTED STAR
|
||||
066E..066F ; AL # Lo [2] ARABIC LETTER DOTLESS BEH..ARABIC LETTER DOTLESS QAF
|
||||
0671..06D3 ; AL # Lo [99] ARABIC LETTER ALEF WASLA..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE
|
||||
|
@ -1719,7 +1771,8 @@ E0100..E01EF ; NSM # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256
|
|||
07B1 ; AL # Lo THAANA LETTER NAA
|
||||
07B2..07BF ; AL # Cn [14] <reserved-07B2>..<reserved-07BF>
|
||||
FB50..FBB1 ; AL # Lo [98] ARABIC LETTER ALEF WASLA ISOLATED FORM..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE FINAL FORM
|
||||
FBB2..FBD2 ; AL # Cn [33] <reserved-FBB2>..<reserved-FBD2>
|
||||
FBB2..FBC1 ; AL # Sk [16] ARABIC SYMBOL DOT ABOVE..ARABIC SYMBOL SMALL TAH BELOW
|
||||
FBC2..FBD2 ; AL # Cn [17] <reserved-FBC2>..<reserved-FBD2>
|
||||
FBD3..FD3D ; AL # Lo [363] ARABIC LETTER NG ISOLATED FORM..ARABIC LIGATURE ALEF WITH FATHATAN ISOLATED FORM
|
||||
FD40..FD4F ; AL # Cn [16] <reserved-FD40>..<reserved-FD4F>
|
||||
FD50..FD8F ; AL # Lo [64] ARABIC LIGATURE TEH WITH JEEM WITH MEEM INITIAL FORM..ARABIC LIGATURE MEEM WITH KHAH WITH MEEM INITIAL FORM
|
||||
|
@ -1734,7 +1787,7 @@ FE75 ; AL # Cn <reserved-FE75>
|
|||
FE76..FEFC ; AL # Lo [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LAM WITH ALEF FINAL FORM
|
||||
FEFD..FEFE ; AL # Cn [2] <reserved-FEFD>..<reserved-FEFE>
|
||||
|
||||
# Total code points: 1116
|
||||
# Total code points: 1115
|
||||
|
||||
# ================================================
|
||||
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -1,8 +1,8 @@
|
|||
# DerivedJoiningGroup-5.2.0.txt
|
||||
# Date: 2009-05-22, 18:51:25 GMT [MD]
|
||||
# DerivedJoiningGroup-6.0.0.txt
|
||||
# Date: 2010-07-17, 22:46:14 GMT [MD]
|
||||
#
|
||||
# Unicode Character Database
|
||||
# Copyright (c) 1991-2009 Unicode, Inc.
|
||||
# Copyright (c) 1991-2010 Unicode, Inc.
|
||||
# For terms of use, see http://www.unicode.org/terms_of_use.html
|
||||
# For documentation, see http://www.unicode.org/reports/tr44/
|
||||
|
||||
|
@ -126,7 +126,7 @@
|
|||
|
||||
# ================================================
|
||||
|
||||
06C3 ; Hamza_On_Heh_Goal # Lo ARABIC LETTER TEH MARBUTA GOAL
|
||||
06C3 ; Teh_Marbuta_Goal # Lo ARABIC LETTER TEH MARBUTA GOAL
|
||||
|
||||
# Total code points: 1
|
||||
|
||||
|
@ -343,13 +343,14 @@
|
|||
|
||||
# ================================================
|
||||
|
||||
0620 ; Yeh # Lo ARABIC LETTER KASHMIRI YEH
|
||||
0626 ; Yeh # Lo ARABIC LETTER YEH WITH HAMZA ABOVE
|
||||
0649..064A ; Yeh # Lo [2] ARABIC LETTER ALEF MAKSURA..ARABIC LETTER YEH
|
||||
0678 ; Yeh # Lo ARABIC LETTER HIGH HAMZA YEH
|
||||
06D0..06D1 ; Yeh # Lo [2] ARABIC LETTER E..ARABIC LETTER YEH WITH THREE DOTS BELOW
|
||||
0777 ; Yeh # Lo ARABIC LETTER FARSI YEH WITH EXTENDED ARABIC-INDIC DIGIT FOUR BELOW
|
||||
|
||||
# Total code points: 7
|
||||
# Total code points: 8
|
||||
|
||||
# ================================================
|
||||
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
# DerivedJoiningType-5.2.0.txt
|
||||
# Date: 2009-05-28, 20:37:39 GMT [MD]
|
||||
# DerivedJoiningType-6.0.0.txt
|
||||
# Date: 2010-08-19, 00:48:10 GMT [MD]
|
||||
#
|
||||
# Unicode Character Database
|
||||
# Copyright (c) 1991-2009 Unicode, Inc.
|
||||
# Copyright (c) 1991-2010 Unicode, Inc.
|
||||
# For terms of use, see http://www.unicode.org/terms_of_use.html
|
||||
# For documentation, see http://www.unicode.org/reports/tr44/
|
||||
|
||||
|
@ -29,6 +29,7 @@
|
|||
|
||||
# Joining_Type=Dual_Joining
|
||||
|
||||
0620 ; D # Lo ARABIC LETTER KASHMIRI YEH
|
||||
0626 ; D # Lo ARABIC LETTER YEH WITH HAMZA ABOVE
|
||||
0628 ; D # Lo ARABIC LETTER BEH
|
||||
062A..062E ; D # Lo [5] ARABIC LETTER TEH..ARABIC LETTER KHAH
|
||||
|
@ -58,7 +59,7 @@
|
|||
077A..077F ; D # Lo [6] ARABIC LETTER YEH BARREE WITH EXTENDED ARABIC-INDIC DIGIT TWO ABOVE..ARABIC LETTER KAF WITH TWO DOTS ABOVE
|
||||
07CA..07EA ; D # Lo [33] NKO LETTER A..NKO LETTER JONA RA
|
||||
|
||||
# Total code points: 188
|
||||
# Total code points: 189
|
||||
|
||||
# ================================================
|
||||
|
||||
|
@ -109,10 +110,9 @@
|
|||
05C4..05C5 ; T # Mn [2] HEBREW MARK UPPER DOT..HEBREW MARK LOWER DOT
|
||||
05C7 ; T # Mn HEBREW POINT QAMATS QATAN
|
||||
0610..061A ; T # Mn [11] ARABIC SIGN SALLALLAHOU ALAYHE WASSALLAM..ARABIC SMALL KASRA
|
||||
064B..065E ; T # Mn [20] ARABIC FATHATAN..ARABIC FATHA WITH TWO DOTS
|
||||
064B..065F ; T # Mn [21] ARABIC FATHATAN..ARABIC WAVY HAMZA BELOW
|
||||
0670 ; T # Mn ARABIC LETTER SUPERSCRIPT ALEF
|
||||
06D6..06DC ; T # Mn [7] ARABIC SMALL HIGH LIGATURE SAD WITH LAM WITH ALEF MAKSURA..ARABIC SMALL HIGH SEEN
|
||||
06DE ; T # Me ARABIC START OF RUB EL HIZB
|
||||
06DF..06E4 ; T # Mn [6] ARABIC SMALL HIGH ROUNDED ZERO..ARABIC SMALL HIGH MADDA
|
||||
06E7..06E8 ; T # Mn [2] ARABIC SMALL HIGH YEH..ARABIC SMALL HIGH NOON
|
||||
06EA..06ED ; T # Mn [4] ARABIC EMPTY CENTRE LOW STOP..ARABIC SMALL LOW MEEM
|
||||
|
@ -125,11 +125,13 @@
|
|||
081B..0823 ; T # Mn [9] SAMARITAN MARK EPENTHETIC YUT..SAMARITAN VOWEL SIGN A
|
||||
0825..0827 ; T # Mn [3] SAMARITAN VOWEL SIGN SHORT A..SAMARITAN VOWEL SIGN U
|
||||
0829..082D ; T # Mn [5] SAMARITAN VOWEL SIGN LONG I..SAMARITAN MARK NEQUDAA
|
||||
0859..085B ; T # Mn [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK
|
||||
0900..0902 ; T # Mn [3] DEVANAGARI SIGN INVERTED CANDRABINDU..DEVANAGARI SIGN ANUSVARA
|
||||
093A ; T # Mn DEVANAGARI VOWEL SIGN OE
|
||||
093C ; T # Mn DEVANAGARI SIGN NUKTA
|
||||
0941..0948 ; T # Mn [8] DEVANAGARI VOWEL SIGN U..DEVANAGARI VOWEL SIGN AI
|
||||
094D ; T # Mn DEVANAGARI SIGN VIRAMA
|
||||
0951..0955 ; T # Mn [5] DEVANAGARI STRESS SIGN UDATTA..DEVANAGARI VOWEL SIGN CANDRA LONG E
|
||||
0951..0957 ; T # Mn [7] DEVANAGARI STRESS SIGN UDATTA..DEVANAGARI VOWEL SIGN UUE
|
||||
0962..0963 ; T # Mn [2] DEVANAGARI VOWEL SIGN VOCALIC L..DEVANAGARI VOWEL SIGN VOCALIC LL
|
||||
0981 ; T # Mn BENGALI SIGN CANDRABINDU
|
||||
09BC ; T # Mn BENGALI SIGN NUKTA
|
||||
|
@ -190,7 +192,7 @@
|
|||
0F71..0F7E ; T # Mn [14] TIBETAN VOWEL SIGN AA..TIBETAN SIGN RJES SU NGA RO
|
||||
0F80..0F84 ; T # Mn [5] TIBETAN VOWEL SIGN REVERSED I..TIBETAN MARK HALANTA
|
||||
0F86..0F87 ; T # Mn [2] TIBETAN SIGN LCI RTAGS..TIBETAN SIGN YANG RTAGS
|
||||
0F90..0F97 ; T # Mn [8] TIBETAN SUBJOINED LETTER KA..TIBETAN SUBJOINED LETTER JA
|
||||
0F8D..0F97 ; T # Mn [11] TIBETAN SUBJOINED SIGN LCE TSA CAN..TIBETAN SUBJOINED LETTER JA
|
||||
0F99..0FBC ; T # Mn [36] TIBETAN SUBJOINED LETTER NYA..TIBETAN SUBJOINED LETTER FIXED-FORM RA
|
||||
0FC6 ; T # Mn TIBETAN SYMBOL PADMA GDAN
|
||||
102D..1030 ; T # Mn [4] MYANMAR VOWEL SIGN I..MYANMAR VOWEL SIGN UU
|
||||
|
@ -204,7 +206,7 @@
|
|||
1085..1086 ; T # Mn [2] MYANMAR VOWEL SIGN SHAN E ABOVE..MYANMAR VOWEL SIGN SHAN FINAL Y
|
||||
108D ; T # Mn MYANMAR SIGN SHAN COUNCIL EMPHATIC TONE
|
||||
109D ; T # Mn MYANMAR VOWEL SIGN AITON AI
|
||||
135F ; T # Mn ETHIOPIC COMBINING GEMINATION MARK
|
||||
135D..135F ; T # Mn [3] ETHIOPIC COMBINING GEMINATION AND VOWEL LENGTH MARK..ETHIOPIC COMBINING GEMINATION MARK
|
||||
1712..1714 ; T # Mn [3] TAGALOG VOWEL SIGN I..TAGALOG SIGN VIRAMA
|
||||
1732..1734 ; T # Mn [3] HANUNOO VOWEL SIGN I..HANUNOO SIGN PAMUDPOD
|
||||
1752..1753 ; T # Mn [2] BUHID VOWEL SIGN I..BUHID VOWEL SIGN U
|
||||
|
@ -237,6 +239,10 @@
|
|||
1B80..1B81 ; T # Mn [2] SUNDANESE SIGN PANYECEK..SUNDANESE SIGN PANGLAYAR
|
||||
1BA2..1BA5 ; T # Mn [4] SUNDANESE CONSONANT SIGN PANYAKRA..SUNDANESE VOWEL SIGN PANYUKU
|
||||
1BA8..1BA9 ; T # Mn [2] SUNDANESE VOWEL SIGN PAMEPET..SUNDANESE VOWEL SIGN PANEULEUNG
|
||||
1BE6 ; T # Mn BATAK SIGN TOMPI
|
||||
1BE8..1BE9 ; T # Mn [2] BATAK VOWEL SIGN PAKPAK E..BATAK VOWEL SIGN EE
|
||||
1BED ; T # Mn BATAK VOWEL SIGN KARO O
|
||||
1BEF..1BF1 ; T # Mn [3] BATAK VOWEL SIGN U FOR SIMALUNGUN SA..BATAK CONSONANT SIGN H
|
||||
1C2C..1C33 ; T # Mn [8] LEPCHA VOWEL SIGN E..LEPCHA CONSONANT SIGN T
|
||||
1C36..1C37 ; T # Mn [2] LEPCHA SIGN RAN..LEPCHA SIGN NUKTA
|
||||
1CD0..1CD2 ; T # Mn [3] VEDIC TONE KARSHANA..VEDIC TONE PRENKHA
|
||||
|
@ -244,7 +250,7 @@
|
|||
1CE2..1CE8 ; T # Mn [7] VEDIC SIGN VISARGA SVARITA..VEDIC SIGN VISARGA ANUDATTA WITH TAIL
|
||||
1CED ; T # Mn VEDIC SIGN TIRYAK
|
||||
1DC0..1DE6 ; T # Mn [39] COMBINING DOTTED GRAVE ACCENT..COMBINING LATIN SMALL LETTER Z
|
||||
1DFD..1DFF ; T # Mn [3] COMBINING ALMOST EQUAL TO BELOW..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW
|
||||
1DFC..1DFF ; T # Mn [4] COMBINING DOUBLE INVERTED BREVE BELOW..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW
|
||||
200B ; T # Cf ZERO WIDTH SPACE
|
||||
200E..200F ; T # Cf [2] LEFT-TO-RIGHT MARK..RIGHT-TO-LEFT MARK
|
||||
202A..202E ; T # Cf [5] LEFT-TO-RIGHT EMBEDDING..RIGHT-TO-LEFT OVERRIDE
|
||||
|
@ -256,6 +262,7 @@
|
|||
20E2..20E4 ; T # Me [3] COMBINING ENCLOSING SCREEN..COMBINING ENCLOSING UPWARD POINTING TRIANGLE
|
||||
20E5..20F0 ; T # Mn [12] COMBINING REVERSE SOLIDUS OVERLAY..COMBINING ASTERISK ABOVE
|
||||
2CEF..2CF1 ; T # Mn [3] COPTIC COMBINING NI ABOVE..COPTIC COMBINING SPIRITUS LENIS
|
||||
2D7F ; T # Mn TIFINAGH CONSONANT JOINER
|
||||
2DE0..2DFF ; T # Mn [32] COMBINING CYRILLIC LETTER BE..COMBINING CYRILLIC LETTER IOTIFIED BIG YUS
|
||||
302A..302F ; T # Mn [6] IDEOGRAPHIC LEVEL TONE MARK..HANGUL DOUBLE DOT TONE MARK
|
||||
3099..309A ; T # Mn [2] COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK..COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK
|
||||
|
@ -299,6 +306,8 @@ FFF9..FFFB ; T # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLINEAR ANNOTATI
|
|||
10A0C..10A0F ; T # Mn [4] KHAROSHTHI VOWEL LENGTH MARK..KHAROSHTHI SIGN VISARGA
|
||||
10A38..10A3A ; T # Mn [3] KHAROSHTHI SIGN BAR ABOVE..KHAROSHTHI SIGN DOT BELOW
|
||||
10A3F ; T # Mn KHAROSHTHI VIRAMA
|
||||
11001 ; T # Mn BRAHMI SIGN ANUSVARA
|
||||
11038..11046 ; T # Mn [15] BRAHMI VOWEL SIGN AA..BRAHMI VIRAMA
|
||||
11080..11081 ; T # Mn [2] KAITHI SIGN CANDRABINDU..KAITHI SIGN ANUSVARA
|
||||
110B3..110B6 ; T # Mn [4] KAITHI VOWEL SIGN U..KAITHI VOWEL SIGN AI
|
||||
110B9..110BA ; T # Mn [2] KAITHI SIGN VIRAMA..KAITHI SIGN NUKTA
|
||||
|
@ -313,6 +322,6 @@ E0001 ; T # Cf LANGUAGE TAG
|
|||
E0020..E007F ; T # Cf [96] TAG SPACE..CANCEL TAG
|
||||
E0100..E01EF ; T # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256
|
||||
|
||||
# Total code points: 1308
|
||||
# Total code points: 1344
|
||||
|
||||
# EOF
|
||||
|
|
|
@ -1,14 +1,14 @@
|
|||
# DerivedNormalizationProps-5.2.0.txt
|
||||
# Date: 2009-08-26, 18:18:50 GMT [MD]
|
||||
# DerivedNormalizationProps-6.0.0.txt
|
||||
# Date: 2010-05-20, 15:14:12 GMT [MD]
|
||||
#
|
||||
# Unicode Character Database
|
||||
# Copyright (c) 1991-2009 Unicode, Inc.
|
||||
# Copyright (c) 1991-2010 Unicode, Inc.
|
||||
# For terms of use, see http://www.unicode.org/terms_of_use.html
|
||||
# For documentation, see http://www.unicode.org/reports/tr44/
|
||||
|
||||
# ================================================
|
||||
|
||||
# Derived Property: FC_NFKC_Closure
|
||||
# Derived Property: FC_NFKC_Closure (DEPRECATED as of Unicode 6.0.0)
|
||||
# Generated from computing: b = NFKC(Fold(a)); c = NFKC(Fold(b));
|
||||
# Then if (c != b) add the mapping from a to c to the set of
|
||||
# mappings that constitute the FC_NFKC_Closure list
|
||||
|
@ -611,19 +611,41 @@
|
|||
1F12C ; FC_NFKC; 0072
|
||||
1F12D ; FC_NFKC; 0063 0064
|
||||
1F12E ; FC_NFKC; 0077 007A
|
||||
1F130 ; FC_NFKC; 0061
|
||||
1F131 ; FC_NFKC; 0062
|
||||
1F132 ; FC_NFKC; 0063
|
||||
1F133 ; FC_NFKC; 0064
|
||||
1F134 ; FC_NFKC; 0065
|
||||
1F135 ; FC_NFKC; 0066
|
||||
1F136 ; FC_NFKC; 0067
|
||||
1F137 ; FC_NFKC; 0068
|
||||
1F138 ; FC_NFKC; 0069
|
||||
1F139 ; FC_NFKC; 006A
|
||||
1F13A ; FC_NFKC; 006B
|
||||
1F13B ; FC_NFKC; 006C
|
||||
1F13C ; FC_NFKC; 006D
|
||||
1F13D ; FC_NFKC; 006E
|
||||
1F13E ; FC_NFKC; 006F
|
||||
1F13F ; FC_NFKC; 0070
|
||||
1F140 ; FC_NFKC; 0071
|
||||
1F141 ; FC_NFKC; 0072
|
||||
1F142 ; FC_NFKC; 0073
|
||||
1F143 ; FC_NFKC; 0074
|
||||
1F144 ; FC_NFKC; 0075
|
||||
1F145 ; FC_NFKC; 0076
|
||||
1F146 ; FC_NFKC; 0077
|
||||
1F147 ; FC_NFKC; 0078
|
||||
1F148 ; FC_NFKC; 0079
|
||||
1F149 ; FC_NFKC; 007A
|
||||
1F14A ; FC_NFKC; 0068 0076
|
||||
1F14B ; FC_NFKC; 006D 0076
|
||||
1F14C ; FC_NFKC; 0073 0064
|
||||
1F14D ; FC_NFKC; 0073 0073
|
||||
1F14E ; FC_NFKC; 0070 0070 0076
|
||||
1F14F ; FC_NFKC; 0077 0063
|
||||
1F190 ; FC_NFKC; 0064 006A
|
||||
|
||||
# Total code points: 608
|
||||
# Total code points: 630
|
||||
|
||||
# ================================================
|
||||
|
||||
|
@ -1299,7 +1321,7 @@ FB46..FB4E ; NFC_QC; N
|
|||
208A..208C ; NFKD_QC; N
|
||||
208D ; NFKD_QC; N
|
||||
208E ; NFKD_QC; N
|
||||
2090..2094 ; NFKD_QC; N
|
||||
2090..209C ; NFKD_QC; N
|
||||
20A8 ; NFKD_QC; N
|
||||
2100..2101 ; NFKD_QC; N
|
||||
2102 ; NFKD_QC; N
|
||||
|
@ -1603,19 +1625,15 @@ FFED..FFEE ; NFKD_QC; N
|
|||
1D7CE..1D7FF ; NFKD_QC; N
|
||||
1F100..1F10A ; NFKD_QC; N
|
||||
1F110..1F12E ; NFKD_QC; N
|
||||
1F131 ; NFKD_QC; N
|
||||
1F13D ; NFKD_QC; N
|
||||
1F13F ; NFKD_QC; N
|
||||
1F142 ; NFKD_QC; N
|
||||
1F146 ; NFKD_QC; N
|
||||
1F14A..1F14E ; NFKD_QC; N
|
||||
1F130..1F14F ; NFKD_QC; N
|
||||
1F190 ; NFKD_QC; N
|
||||
1F200 ; NFKD_QC; N
|
||||
1F210..1F231 ; NFKD_QC; N
|
||||
1F200..1F202 ; NFKD_QC; N
|
||||
1F210..1F23A ; NFKD_QC; N
|
||||
1F240..1F248 ; NFKD_QC; N
|
||||
1F250..1F251 ; NFKD_QC; N
|
||||
2F800..2FA1D ; NFKD_QC; N
|
||||
|
||||
# Total code points: 16688
|
||||
# Total code points: 16731
|
||||
|
||||
# ================================================
|
||||
|
||||
|
@ -1745,7 +1763,7 @@ FFED..FFEE ; NFKD_QC; N
|
|||
208A..208C ; NFKC_QC; N
|
||||
208D ; NFKC_QC; N
|
||||
208E ; NFKC_QC; N
|
||||
2090..2094 ; NFKC_QC; N
|
||||
2090..209C ; NFKC_QC; N
|
||||
20A8 ; NFKC_QC; N
|
||||
2100..2101 ; NFKC_QC; N
|
||||
2102 ; NFKC_QC; N
|
||||
|
@ -1976,19 +1994,15 @@ FFED..FFEE ; NFKC_QC; N
|
|||
1D7CE..1D7FF ; NFKC_QC; N
|
||||
1F100..1F10A ; NFKC_QC; N
|
||||
1F110..1F12E ; NFKC_QC; N
|
||||
1F131 ; NFKC_QC; N
|
||||
1F13D ; NFKC_QC; N
|
||||
1F13F ; NFKC_QC; N
|
||||
1F142 ; NFKC_QC; N
|
||||
1F146 ; NFKC_QC; N
|
||||
1F14A..1F14E ; NFKC_QC; N
|
||||
1F130..1F14F ; NFKC_QC; N
|
||||
1F190 ; NFKC_QC; N
|
||||
1F200 ; NFKC_QC; N
|
||||
1F210..1F231 ; NFKC_QC; N
|
||||
1F200..1F202 ; NFKC_QC; N
|
||||
1F210..1F23A ; NFKC_QC; N
|
||||
1F240..1F248 ; NFKC_QC; N
|
||||
1F250..1F251 ; NFKC_QC; N
|
||||
2F800..2FA1D ; NFKC_QC; N
|
||||
|
||||
# Total code points: 4597
|
||||
# Total code points: 4640
|
||||
|
||||
# ================================================
|
||||
|
||||
|
@ -2034,7 +2048,7 @@ FFED..FFEE ; NFKC_QC; N
|
|||
|
||||
# ================================================
|
||||
|
||||
# Derived Property: Expands_On_NFD
|
||||
# Derived Property: Expands_On_NFD (DEPRECATED as of Unicode 6.0.0)
|
||||
# Generated according to UAX #15.
|
||||
# Characters whose normalized length is not one.
|
||||
# WARNING: Normalization of STRINGS must use the algorithm in UAX #15 because characters may interact.
|
||||
|
@ -2262,7 +2276,7 @@ FB46..FB4E ; Expands_On_NFD
|
|||
|
||||
# ================================================
|
||||
|
||||
# Derived Property: Expands_On_NFC
|
||||
# Derived Property: Expands_On_NFC (DEPRECATED as of Unicode 6.0.0)
|
||||
# Generated according to UAX #15.
|
||||
# Characters whose normalized length is not one.
|
||||
# WARNING: Normalization of STRINGS must use the algorithm in UAX #15 because characters may interact.
|
||||
|
@ -2309,7 +2323,7 @@ FB46..FB4E ; Expands_On_NFC
|
|||
|
||||
# ================================================
|
||||
|
||||
# Derived Property: Expands_On_NFKD
|
||||
# Derived Property: Expands_On_NFKD (DEPRECATED as of Unicode 6.0.0)
|
||||
# Generated according to UAX #15.
|
||||
# Characters whose normalized length is not one.
|
||||
# WARNING: Normalization of STRINGS must use the algorithm in UAX #15 because characters may interact.
|
||||
|
@ -2608,17 +2622,17 @@ FFE3 ; Expands_On_NFKD
|
|||
1F100..1F10A ; Expands_On_NFKD
|
||||
1F110..1F12A ; Expands_On_NFKD
|
||||
1F12D..1F12E ; Expands_On_NFKD
|
||||
1F14A..1F14E ; Expands_On_NFKD
|
||||
1F14A..1F14F ; Expands_On_NFKD
|
||||
1F190 ; Expands_On_NFKD
|
||||
1F200 ; Expands_On_NFKD
|
||||
1F200..1F201 ; Expands_On_NFKD
|
||||
1F213 ; Expands_On_NFKD
|
||||
1F240..1F248 ; Expands_On_NFKD
|
||||
|
||||
# Total code points: 13374
|
||||
# Total code points: 13376
|
||||
|
||||
# ================================================
|
||||
|
||||
# Derived Property: Expands_On_NFKC
|
||||
# Derived Property: Expands_On_NFKC (DEPRECATED as of Unicode 6.0.0)
|
||||
# Generated according to UAX #15.
|
||||
# Characters whose normalized length is not one.
|
||||
# WARNING: Normalization of STRINGS must use the algorithm in UAX #15 because characters may interact.
|
||||
|
@ -2746,12 +2760,12 @@ FFE3 ; Expands_On_NFKC
|
|||
1F100..1F10A ; Expands_On_NFKC
|
||||
1F110..1F12A ; Expands_On_NFKC
|
||||
1F12D..1F12E ; Expands_On_NFKC
|
||||
1F14A..1F14E ; Expands_On_NFKC
|
||||
1F14A..1F14F ; Expands_On_NFKC
|
||||
1F190 ; Expands_On_NFKC
|
||||
1F200 ; Expands_On_NFKC
|
||||
1F200..1F201 ; Expands_On_NFKC
|
||||
1F240..1F248 ; Expands_On_NFKC
|
||||
|
||||
# Total code points: 1231
|
||||
# Total code points: 1233
|
||||
|
||||
# ================================================
|
||||
|
||||
|
@ -3251,6 +3265,7 @@ FFE3 ; Expands_On_NFKC
|
|||
0520 ; NFKC_CF; 0521
|
||||
0522 ; NFKC_CF; 0523
|
||||
0524 ; NFKC_CF; 0525
|
||||
0526 ; NFKC_CF; 0527
|
||||
0531 ; NFKC_CF; 0561
|
||||
0532 ; NFKC_CF; 0562
|
||||
0533 ; NFKC_CF; 0563
|
||||
|
@ -3817,6 +3832,14 @@ FFE3 ; Expands_On_NFKC
|
|||
2092 ; NFKC_CF; 006F
|
||||
2093 ; NFKC_CF; 0078
|
||||
2094 ; NFKC_CF; 0259
|
||||
2095 ; NFKC_CF; 0068
|
||||
2096 ; NFKC_CF; 006B
|
||||
2097 ; NFKC_CF; 006C
|
||||
2098 ; NFKC_CF; 006D
|
||||
2099 ; NFKC_CF; 006E
|
||||
209A ; NFKC_CF; 0070
|
||||
209B ; NFKC_CF; 0073
|
||||
209C ; NFKC_CF; 0074
|
||||
20A8 ; NFKC_CF; 0072 0073
|
||||
2100 ; NFKC_CF; 0061 002F 0063
|
||||
2101 ; NFKC_CF; 0061 002F 0073
|
||||
|
@ -5032,6 +5055,7 @@ A658 ; NFKC_CF; A659
|
|||
A65A ; NFKC_CF; A65B
|
||||
A65C ; NFKC_CF; A65D
|
||||
A65E ; NFKC_CF; A65F
|
||||
A660 ; NFKC_CF; A661
|
||||
A662 ; NFKC_CF; A663
|
||||
A664 ; NFKC_CF; A665
|
||||
A666 ; NFKC_CF; A667
|
||||
|
@ -5098,6 +5122,13 @@ A782 ; NFKC_CF; A783
|
|||
A784 ; NFKC_CF; A785
|
||||
A786 ; NFKC_CF; A787
|
||||
A78B ; NFKC_CF; A78C
|
||||
A78D ; NFKC_CF; 0265
|
||||
A790 ; NFKC_CF; A791
|
||||
A7A0 ; NFKC_CF; A7A1
|
||||
A7A2 ; NFKC_CF; A7A3
|
||||
A7A4 ; NFKC_CF; A7A5
|
||||
A7A6 ; NFKC_CF; A7A7
|
||||
A7A8 ; NFKC_CF; A7A9
|
||||
F900 ; NFKC_CF; 8C48
|
||||
F901 ; NFKC_CF; 66F4
|
||||
F902 ; NFKC_CF; 8ECA
|
||||
|
@ -7518,18 +7549,42 @@ FFF0..FFF8 ; NFKC_CF;
|
|||
1F12C ; NFKC_CF; 0072
|
||||
1F12D ; NFKC_CF; 0063 0064
|
||||
1F12E ; NFKC_CF; 0077 007A
|
||||
1F130 ; NFKC_CF; 0061
|
||||
1F131 ; NFKC_CF; 0062
|
||||
1F132 ; NFKC_CF; 0063
|
||||
1F133 ; NFKC_CF; 0064
|
||||
1F134 ; NFKC_CF; 0065
|
||||
1F135 ; NFKC_CF; 0066
|
||||
1F136 ; NFKC_CF; 0067
|
||||
1F137 ; NFKC_CF; 0068
|
||||
1F138 ; NFKC_CF; 0069
|
||||
1F139 ; NFKC_CF; 006A
|
||||
1F13A ; NFKC_CF; 006B
|
||||
1F13B ; NFKC_CF; 006C
|
||||
1F13C ; NFKC_CF; 006D
|
||||
1F13D ; NFKC_CF; 006E
|
||||
1F13E ; NFKC_CF; 006F
|
||||
1F13F ; NFKC_CF; 0070
|
||||
1F140 ; NFKC_CF; 0071
|
||||
1F141 ; NFKC_CF; 0072
|
||||
1F142 ; NFKC_CF; 0073
|
||||
1F143 ; NFKC_CF; 0074
|
||||
1F144 ; NFKC_CF; 0075
|
||||
1F145 ; NFKC_CF; 0076
|
||||
1F146 ; NFKC_CF; 0077
|
||||
1F147 ; NFKC_CF; 0078
|
||||
1F148 ; NFKC_CF; 0079
|
||||
1F149 ; NFKC_CF; 007A
|
||||
1F14A ; NFKC_CF; 0068 0076
|
||||
1F14B ; NFKC_CF; 006D 0076
|
||||
1F14C ; NFKC_CF; 0073 0064
|
||||
1F14D ; NFKC_CF; 0073 0073
|
||||
1F14E ; NFKC_CF; 0070 0070 0076
|
||||
1F14F ; NFKC_CF; 0077 0063
|
||||
1F190 ; NFKC_CF; 0064 006A
|
||||
1F200 ; NFKC_CF; 307B 304B
|
||||
1F201 ; NFKC_CF; 30B3 30B3
|
||||
1F202 ; NFKC_CF; 30B5
|
||||
1F210 ; NFKC_CF; 624B
|
||||
1F211 ; NFKC_CF; 5B57
|
||||
1F212 ; NFKC_CF; 53CC
|
||||
|
@ -7564,6 +7619,15 @@ FFF0..FFF8 ; NFKC_CF;
|
|||
1F22F ; NFKC_CF; 6307
|
||||
1F230 ; NFKC_CF; 8D70
|
||||
1F231 ; NFKC_CF; 6253
|
||||
1F232 ; NFKC_CF; 7981
|
||||
1F233 ; NFKC_CF; 7A7A
|
||||
1F234 ; NFKC_CF; 5408
|
||||
1F235 ; NFKC_CF; 6E80
|
||||
1F236 ; NFKC_CF; 6709
|
||||
1F237 ; NFKC_CF; 6708
|
||||
1F238 ; NFKC_CF; 7533
|
||||
1F239 ; NFKC_CF; 5272
|
||||
1F23A ; NFKC_CF; 55B6
|
||||
1F240 ; NFKC_CF; 3014 672C 3015
|
||||
1F241 ; NFKC_CF; 3014 4E09 3015
|
||||
1F242 ; NFKC_CF; 3014 4E8C 3015
|
||||
|
@ -7573,6 +7637,8 @@ FFF0..FFF8 ; NFKC_CF;
|
|||
1F246 ; NFKC_CF; 3014 76D7 3015
|
||||
1F247 ; NFKC_CF; 3014 52DD 3015
|
||||
1F248 ; NFKC_CF; 3014 6557 3015
|
||||
1F250 ; NFKC_CF; 5F97
|
||||
1F251 ; NFKC_CF; 53EF
|
||||
2F800 ; NFKC_CF; 4E3D
|
||||
2F801 ; NFKC_CF; 4E38
|
||||
2F802 ; NFKC_CF; 4E41
|
||||
|
@ -8113,7 +8179,7 @@ E0080..E00FF ; NFKC_CF;
|
|||
E0100..E01EF ; NFKC_CF;
|
||||
E01F0..E0FFF ; NFKC_CF;
|
||||
|
||||
# Total code points: 9740
|
||||
# Total code points: 9792
|
||||
|
||||
# ================================================
|
||||
|
||||
|
@ -8405,6 +8471,7 @@ E01F0..E0FFF ; NFKC_CF;
|
|||
0520 ; Changes_When_NFKC_Casefolded
|
||||
0522 ; Changes_When_NFKC_Casefolded
|
||||
0524 ; Changes_When_NFKC_Casefolded
|
||||
0526 ; Changes_When_NFKC_Casefolded
|
||||
0531..0556 ; Changes_When_NFKC_Casefolded
|
||||
0587 ; Changes_When_NFKC_Casefolded
|
||||
0675..0678 ; Changes_When_NFKC_Casefolded
|
||||
|
@ -8635,7 +8702,7 @@ E01F0..E0FFF ; NFKC_CF;
|
|||
208A..208C ; Changes_When_NFKC_Casefolded
|
||||
208D ; Changes_When_NFKC_Casefolded
|
||||
208E ; Changes_When_NFKC_Casefolded
|
||||
2090..2094 ; Changes_When_NFKC_Casefolded
|
||||
2090..209C ; Changes_When_NFKC_Casefolded
|
||||
20A8 ; Changes_When_NFKC_Casefolded
|
||||
2100..2101 ; Changes_When_NFKC_Casefolded
|
||||
2102 ; Changes_When_NFKC_Casefolded
|
||||
|
@ -8776,6 +8843,7 @@ A658 ; Changes_When_NFKC_Casefolded
|
|||
A65A ; Changes_When_NFKC_Casefolded
|
||||
A65C ; Changes_When_NFKC_Casefolded
|
||||
A65E ; Changes_When_NFKC_Casefolded
|
||||
A660 ; Changes_When_NFKC_Casefolded
|
||||
A662 ; Changes_When_NFKC_Casefolded
|
||||
A664 ; Changes_When_NFKC_Casefolded
|
||||
A666 ; Changes_When_NFKC_Casefolded
|
||||
|
@ -8841,6 +8909,13 @@ A782 ; Changes_When_NFKC_Casefolded
|
|||
A784 ; Changes_When_NFKC_Casefolded
|
||||
A786 ; Changes_When_NFKC_Casefolded
|
||||
A78B ; Changes_When_NFKC_Casefolded
|
||||
A78D ; Changes_When_NFKC_Casefolded
|
||||
A790 ; Changes_When_NFKC_Casefolded
|
||||
A7A0 ; Changes_When_NFKC_Casefolded
|
||||
A7A2 ; Changes_When_NFKC_Casefolded
|
||||
A7A4 ; Changes_When_NFKC_Casefolded
|
||||
A7A6 ; Changes_When_NFKC_Casefolded
|
||||
A7A8 ; Changes_When_NFKC_Casefolded
|
||||
F900..FA0D ; Changes_When_NFKC_Casefolded
|
||||
FA10 ; Changes_When_NFKC_Casefolded
|
||||
FA12 ; Changes_When_NFKC_Casefolded
|
||||
|
@ -9012,16 +9087,12 @@ FFF0..FFF8 ; Changes_When_NFKC_Casefolded
|
|||
1D7CE..1D7FF ; Changes_When_NFKC_Casefolded
|
||||
1F100..1F10A ; Changes_When_NFKC_Casefolded
|
||||
1F110..1F12E ; Changes_When_NFKC_Casefolded
|
||||
1F131 ; Changes_When_NFKC_Casefolded
|
||||
1F13D ; Changes_When_NFKC_Casefolded
|
||||
1F13F ; Changes_When_NFKC_Casefolded
|
||||
1F142 ; Changes_When_NFKC_Casefolded
|
||||
1F146 ; Changes_When_NFKC_Casefolded
|
||||
1F14A..1F14E ; Changes_When_NFKC_Casefolded
|
||||
1F130..1F14F ; Changes_When_NFKC_Casefolded
|
||||
1F190 ; Changes_When_NFKC_Casefolded
|
||||
1F200 ; Changes_When_NFKC_Casefolded
|
||||
1F210..1F231 ; Changes_When_NFKC_Casefolded
|
||||
1F200..1F202 ; Changes_When_NFKC_Casefolded
|
||||
1F210..1F23A ; Changes_When_NFKC_Casefolded
|
||||
1F240..1F248 ; Changes_When_NFKC_Casefolded
|
||||
1F250..1F251 ; Changes_When_NFKC_Casefolded
|
||||
2F800..2FA1D ; Changes_When_NFKC_Casefolded
|
||||
E0000 ; Changes_When_NFKC_Casefolded
|
||||
E0001 ; Changes_When_NFKC_Casefolded
|
||||
|
@ -9031,6 +9102,6 @@ E0080..E00FF ; Changes_When_NFKC_Casefolded
|
|||
E0100..E01EF ; Changes_When_NFKC_Casefolded
|
||||
E01F0..E0FFF ; Changes_When_NFKC_Casefolded
|
||||
|
||||
# Total code points: 9740
|
||||
# Total code points: 9792
|
||||
|
||||
# EOF
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
# DerivedNumericValues-5.2.0.txt
|
||||
# Date: 2009-08-22, 04:58:28 GMT [MD]
|
||||
# DerivedNumericValues-6.0.0.txt
|
||||
# Date: 2010-08-19, 00:48:14 GMT [MD]
|
||||
#
|
||||
# Unicode Character Database
|
||||
# Copyright (c) 1991-2009 Unicode, Inc.
|
||||
# Copyright (c) 1991-2010 Unicode, Inc.
|
||||
# For terms of use, see http://www.unicode.org/terms_of_use.html
|
||||
# For documentation, see http://www.unicode.org/reports/tr44/
|
||||
|
||||
|
@ -76,6 +76,7 @@ F9B2 ; 0.0 ; ; 0 # Lo CJK COMPATIBILITY IDEOGRAPH-F9B2
|
|||
FF10 ; 0.0 ; ; 0 # Nd FULLWIDTH DIGIT ZERO
|
||||
1018A ; 0.0 ; ; 0 # No GREEK ZERO SIGN
|
||||
104A0 ; 0.0 ; ; 0 # Nd OSMANYA DIGIT ZERO
|
||||
11066 ; 0.0 ; ; 0 # Nd BRAHMI DIGIT ZERO
|
||||
1D7CE ; 0.0 ; ; 0 # Nd MATHEMATICAL BOLD DIGIT ZERO
|
||||
1D7D8 ; 0.0 ; ; 0 # Nd MATHEMATICAL DOUBLE-STRUCK DIGIT ZERO
|
||||
1D7E2 ; 0.0 ; ; 0 # Nd MATHEMATICAL SANS-SERIF DIGIT ZERO
|
||||
|
@ -83,14 +84,15 @@ FF10 ; 0.0 ; ; 0 # Nd FULLWIDTH DIGIT ZERO
|
|||
1D7F6 ; 0.0 ; ; 0 # Nd MATHEMATICAL MONOSPACE DIGIT ZERO
|
||||
1F100..1F101 ; 0.0 ; ; 0 # No [2] DIGIT ZERO FULL STOP..DIGIT ZERO COMMA
|
||||
|
||||
# Total code points: 55
|
||||
# Total code points: 56
|
||||
|
||||
# ================================================
|
||||
|
||||
09F4 ; 0.0625 ; ; 1/16 # No BENGALI CURRENCY NUMERATOR ONE
|
||||
0B75 ; 0.0625 ; ; 1/16 # No ORIYA FRACTION ONE SIXTEENTH
|
||||
A833 ; 0.0625 ; ; 1/16 # No NORTH INDIC FRACTION ONE SIXTEENTH
|
||||
|
||||
# Total code points: 2
|
||||
# Total code points: 3
|
||||
|
||||
# ================================================
|
||||
|
||||
|
@ -107,11 +109,12 @@ A833 ; 0.0625 ; ; 1/16 # No NORTH INDIC FRACTION ONE SIXTEENTH
|
|||
# ================================================
|
||||
|
||||
09F5 ; 0.125 ; ; 1/8 # No BENGALI CURRENCY NUMERATOR TWO
|
||||
0B76 ; 0.125 ; ; 1/8 # No ORIYA FRACTION ONE EIGHTH
|
||||
215B ; 0.125 ; ; 1/8 # No VULGAR FRACTION ONE EIGHTH
|
||||
A834 ; 0.125 ; ; 1/8 # No NORTH INDIC FRACTION ONE EIGHTH
|
||||
1245F ; 0.125 ; ; 1/8 # Nl CUNEIFORM NUMERIC SIGN ONE EIGHTH ASH
|
||||
|
||||
# Total code points: 4
|
||||
# Total code points: 5
|
||||
|
||||
# ================================================
|
||||
|
||||
|
@ -129,9 +132,10 @@ A834 ; 0.125 ; ; 1/8 # No NORTH INDIC FRACTION ONE EIGHTH
|
|||
# ================================================
|
||||
|
||||
09F6 ; 0.1875 ; ; 3/16 # No BENGALI CURRENCY NUMERATOR THREE
|
||||
0B77 ; 0.1875 ; ; 3/16 # No ORIYA FRACTION THREE SIXTEENTHS
|
||||
A835 ; 0.1875 ; ; 3/16 # No NORTH INDIC FRACTION THREE SIXTEENTHS
|
||||
|
||||
# Total code points: 2
|
||||
# Total code points: 3
|
||||
|
||||
# ================================================
|
||||
|
||||
|
@ -143,6 +147,7 @@ A835 ; 0.1875 ; ; 3/16 # No NORTH INDIC FRACTION THREE SIXTEENTHS
|
|||
|
||||
00BC ; 0.25 ; ; 1/4 # No VULGAR FRACTION ONE QUARTER
|
||||
09F7 ; 0.25 ; ; 1/4 # No BENGALI CURRENCY NUMERATOR FOUR
|
||||
0B72 ; 0.25 ; ; 1/4 # No ORIYA FRACTION ONE QUARTER
|
||||
0D73 ; 0.25 ; ; 1/4 # No MALAYALAM FRACTION ONE QUARTER
|
||||
A830 ; 0.25 ; ; 1/4 # No NORTH INDIC FRACTION ONE QUARTER
|
||||
10140 ; 0.25 ; ; 1/4 # Nl GREEK ACROPHONIC ATTIC ONE QUARTER
|
||||
|
@ -150,7 +155,7 @@ A830 ; 0.25 ; ; 1/4 # No NORTH INDIC FRACTION ONE QUARTER
|
|||
12460 ; 0.25 ; ; 1/4 # Nl CUNEIFORM NUMERIC SIGN ONE QUARTER ASH
|
||||
12462 ; 0.25 ; ; 1/4 # Nl CUNEIFORM NUMERIC SIGN OLD ASSYRIAN ONE QUARTER
|
||||
|
||||
# Total code points: 8
|
||||
# Total code points: 9
|
||||
|
||||
# ================================================
|
||||
|
||||
|
@ -176,6 +181,7 @@ A830 ; 0.25 ; ; 1/4 # No NORTH INDIC FRACTION ONE QUARTER
|
|||
# ================================================
|
||||
|
||||
00BD ; 0.5 ; ; 1/2 # No VULGAR FRACTION ONE HALF
|
||||
0B73 ; 0.5 ; ; 1/2 # No ORIYA FRACTION ONE HALF
|
||||
0D74 ; 0.5 ; ; 1/2 # No MALAYALAM FRACTION ONE HALF
|
||||
0F2A ; 0.5 ; ; 1/2 # No TIBETAN DIGIT HALF ONE
|
||||
2CFD ; 0.5 ; ; 1/2 # No COPTIC FRACTION ONE HALF
|
||||
|
@ -184,7 +190,7 @@ A831 ; 0.5 ; ; 1/2 # No NORTH INDIC FRACTION ONE HALF
|
|||
10175..10176 ; 0.5 ; ; 1/2 # No [2] GREEK ONE HALF SIGN..GREEK ONE HALF SIGN ALTERNATE FORM
|
||||
10E7B ; 0.5 ; ; 1/2 # No RUMI FRACTION ONE HALF
|
||||
|
||||
# Total code points: 9
|
||||
# Total code points: 10
|
||||
|
||||
# ================================================
|
||||
|
||||
|
@ -212,11 +218,12 @@ A831 ; 0.5 ; ; 1/2 # No NORTH INDIC FRACTION ONE HALF
|
|||
|
||||
00BE ; 0.75 ; ; 3/4 # No VULGAR FRACTION THREE QUARTERS
|
||||
09F8 ; 0.75 ; ; 3/4 # No BENGALI CURRENCY NUMERATOR ONE LESS THAN THE DENOMINATOR
|
||||
0B74 ; 0.75 ; ; 3/4 # No ORIYA FRACTION THREE QUARTERS
|
||||
0D75 ; 0.75 ; ; 3/4 # No MALAYALAM FRACTION THREE QUARTERS
|
||||
A832 ; 0.75 ; ; 3/4 # No NORTH INDIC FRACTION THREE QUARTERS
|
||||
10178 ; 0.75 ; ; 3/4 # No GREEK THREE QUARTERS SIGN
|
||||
|
||||
# Total code points: 5
|
||||
# Total code points: 6
|
||||
|
||||
# ================================================
|
||||
|
||||
|
@ -266,7 +273,7 @@ A832 ; 0.75 ; ; 3/4 # No NORTH INDIC FRACTION THREE QUARTERS
|
|||
1811 ; 1.0 ; ; 1 # Nd MONGOLIAN DIGIT ONE
|
||||
1947 ; 1.0 ; ; 1 # Nd LIMBU DIGIT ONE
|
||||
19D1 ; 1.0 ; ; 1 # Nd NEW TAI LUE DIGIT ONE
|
||||
19DA ; 1.0 ; ; 1 # Nd NEW TAI LUE THAM DIGIT ONE
|
||||
19DA ; 1.0 ; ; 1 # No NEW TAI LUE THAM DIGIT ONE
|
||||
1A81 ; 1.0 ; ; 1 # Nd TAI THAM HORA DIGIT ONE
|
||||
1A91 ; 1.0 ; ; 1 # Nd TAI THAM THAM DIGIT ONE
|
||||
1B51 ; 1.0 ; ; 1 # Nd BALINESE DIGIT ONE
|
||||
|
@ -314,6 +321,8 @@ FF11 ; 1.0 ; ; 1 # Nd FULLWIDTH DIGIT ONE
|
|||
10B58 ; 1.0 ; ; 1 # No INSCRIPTIONAL PARTHIAN NUMBER ONE
|
||||
10B78 ; 1.0 ; ; 1 # No INSCRIPTIONAL PAHLAVI NUMBER ONE
|
||||
10E60 ; 1.0 ; ; 1 # No RUMI DIGIT ONE
|
||||
11052 ; 1.0 ; ; 1 # No BRAHMI NUMBER ONE
|
||||
11067 ; 1.0 ; ; 1 # Nd BRAHMI DIGIT ONE
|
||||
12415 ; 1.0 ; ; 1 # Nl CUNEIFORM NUMERIC SIGN ONE GESH2
|
||||
1241E ; 1.0 ; ; 1 # Nl CUNEIFORM NUMERIC SIGN ONE GESHU
|
||||
1242C ; 1.0 ; ; 1 # Nl CUNEIFORM NUMERIC SIGN ONE SHARU
|
||||
|
@ -329,7 +338,7 @@ FF11 ; 1.0 ; ; 1 # Nd FULLWIDTH DIGIT ONE
|
|||
1F102 ; 1.0 ; ; 1 # No DIGIT ONE COMMA
|
||||
2092A ; 1.0 ; ; 1 # Lo CJK UNIFIED IDEOGRAPH-2092A
|
||||
|
||||
# Total code points: 91
|
||||
# Total code points: 93
|
||||
|
||||
# ================================================
|
||||
|
||||
|
@ -413,6 +422,8 @@ FF12 ; 2.0 ; ; 2 # Nd FULLWIDTH DIGIT TWO
|
|||
10B59 ; 2.0 ; ; 2 # No INSCRIPTIONAL PARTHIAN NUMBER TWO
|
||||
10B79 ; 2.0 ; ; 2 # No INSCRIPTIONAL PAHLAVI NUMBER TWO
|
||||
10E61 ; 2.0 ; ; 2 # No RUMI DIGIT TWO
|
||||
11053 ; 2.0 ; ; 2 # No BRAHMI NUMBER TWO
|
||||
11068 ; 2.0 ; ; 2 # Nd BRAHMI DIGIT TWO
|
||||
12400 ; 2.0 ; ; 2 # Nl CUNEIFORM NUMERIC SIGN TWO ASH
|
||||
12416 ; 2.0 ; ; 2 # Nl CUNEIFORM NUMERIC SIGN TWO GESH2
|
||||
1241F ; 2.0 ; ; 2 # Nl CUNEIFORM NUMERIC SIGN TWO GESHU
|
||||
|
@ -431,7 +442,7 @@ FF12 ; 2.0 ; ; 2 # Nd FULLWIDTH DIGIT TWO
|
|||
1F103 ; 2.0 ; ; 2 # No DIGIT TWO COMMA
|
||||
22390 ; 2.0 ; ; 2 # Lo CJK UNIFIED IDEOGRAPH-22390
|
||||
|
||||
# Total code points: 94
|
||||
# Total code points: 96
|
||||
|
||||
# ================================================
|
||||
|
||||
|
@ -509,6 +520,8 @@ FF13 ; 3.0 ; ; 3 # Nd FULLWIDTH DIGIT THREE
|
|||
10B5A ; 3.0 ; ; 3 # No INSCRIPTIONAL PARTHIAN NUMBER THREE
|
||||
10B7A ; 3.0 ; ; 3 # No INSCRIPTIONAL PAHLAVI NUMBER THREE
|
||||
10E62 ; 3.0 ; ; 3 # No RUMI DIGIT THREE
|
||||
11054 ; 3.0 ; ; 3 # No BRAHMI NUMBER THREE
|
||||
11069 ; 3.0 ; ; 3 # Nd BRAHMI DIGIT THREE
|
||||
12401 ; 3.0 ; ; 3 # Nl CUNEIFORM NUMERIC SIGN THREE ASH
|
||||
12408 ; 3.0 ; ; 3 # Nl CUNEIFORM NUMERIC SIGN THREE DISH
|
||||
12417 ; 3.0 ; ; 3 # Nl CUNEIFORM NUMERIC SIGN THREE GESH2
|
||||
|
@ -531,7 +544,7 @@ FF13 ; 3.0 ; ; 3 # Nd FULLWIDTH DIGIT THREE
|
|||
22998 ; 3.0 ; ; 3 # Lo CJK UNIFIED IDEOGRAPH-22998
|
||||
23B1B ; 3.0 ; ; 3 # Lo CJK UNIFIED IDEOGRAPH-23B1B
|
||||
|
||||
# Total code points: 96
|
||||
# Total code points: 98
|
||||
|
||||
# ================================================
|
||||
|
||||
|
@ -603,6 +616,8 @@ FF14 ; 4.0 ; ; 4 # Nd FULLWIDTH DIGIT FOUR
|
|||
10B5B ; 4.0 ; ; 4 # No INSCRIPTIONAL PARTHIAN NUMBER FOUR
|
||||
10B7B ; 4.0 ; ; 4 # No INSCRIPTIONAL PAHLAVI NUMBER FOUR
|
||||
10E63 ; 4.0 ; ; 4 # No RUMI DIGIT FOUR
|
||||
11055 ; 4.0 ; ; 4 # No BRAHMI NUMBER FOUR
|
||||
1106A ; 4.0 ; ; 4 # Nd BRAHMI DIGIT FOUR
|
||||
12402 ; 4.0 ; ; 4 # Nl CUNEIFORM NUMERIC SIGN FOUR ASH
|
||||
12409 ; 4.0 ; ; 4 # Nl CUNEIFORM NUMERIC SIGN FOUR DISH
|
||||
1240F ; 4.0 ; ; 4 # Nl CUNEIFORM NUMERIC SIGN FOUR U
|
||||
|
@ -625,7 +640,7 @@ FF14 ; 4.0 ; ; 4 # Nd FULLWIDTH DIGIT FOUR
|
|||
200E2 ; 4.0 ; ; 4 # Lo CJK UNIFIED IDEOGRAPH-200E2
|
||||
2626D ; 4.0 ; ; 4 # Lo CJK UNIFIED IDEOGRAPH-2626D
|
||||
|
||||
# Total code points: 87
|
||||
# Total code points: 89
|
||||
|
||||
# ================================================
|
||||
|
||||
|
@ -700,6 +715,8 @@ FF15 ; 5.0 ; ; 5 # Nd FULLWIDTH DIGIT FIVE
|
|||
10321 ; 5.0 ; ; 5 # No OLD ITALIC NUMERAL FIVE
|
||||
104A5 ; 5.0 ; ; 5 # Nd OSMANYA DIGIT FIVE
|
||||
10E64 ; 5.0 ; ; 5 # No RUMI DIGIT FIVE
|
||||
11056 ; 5.0 ; ; 5 # No BRAHMI NUMBER FIVE
|
||||
1106B ; 5.0 ; ; 5 # Nd BRAHMI DIGIT FIVE
|
||||
12403 ; 5.0 ; ; 5 # Nl CUNEIFORM NUMERIC SIGN FIVE ASH
|
||||
1240A ; 5.0 ; ; 5 # Nl CUNEIFORM NUMERIC SIGN FIVE DISH
|
||||
12410 ; 5.0 ; ; 5 # Nl CUNEIFORM NUMERIC SIGN FIVE U
|
||||
|
@ -719,7 +736,7 @@ FF15 ; 5.0 ; ; 5 # Nd FULLWIDTH DIGIT FIVE
|
|||
1F106 ; 5.0 ; ; 5 # No DIGIT FIVE COMMA
|
||||
20121 ; 5.0 ; ; 5 # Lo CJK UNIFIED IDEOGRAPH-20121
|
||||
|
||||
# Total code points: 84
|
||||
# Total code points: 86
|
||||
|
||||
# ================================================
|
||||
|
||||
|
@ -790,6 +807,8 @@ FF16 ; 6.0 ; ; 6 # Nd FULLWIDTH DIGIT SIX
|
|||
1010C ; 6.0 ; ; 6 # No AEGEAN NUMBER SIX
|
||||
104A6 ; 6.0 ; ; 6 # Nd OSMANYA DIGIT SIX
|
||||
10E65 ; 6.0 ; ; 6 # No RUMI DIGIT SIX
|
||||
11057 ; 6.0 ; ; 6 # No BRAHMI NUMBER SIX
|
||||
1106C ; 6.0 ; ; 6 # Nd BRAHMI DIGIT SIX
|
||||
12404 ; 6.0 ; ; 6 # Nl CUNEIFORM NUMERIC SIGN SIX ASH
|
||||
1240B ; 6.0 ; ; 6 # Nl CUNEIFORM NUMERIC SIGN SIX DISH
|
||||
12411 ; 6.0 ; ; 6 # Nl CUNEIFORM NUMERIC SIGN SIX U
|
||||
|
@ -806,7 +825,7 @@ FF16 ; 6.0 ; ; 6 # Nd FULLWIDTH DIGIT SIX
|
|||
1F107 ; 6.0 ; ; 6 # No DIGIT SIX COMMA
|
||||
20AEA ; 6.0 ; ; 6 # Lo CJK UNIFIED IDEOGRAPH-20AEA
|
||||
|
||||
# Total code points: 76
|
||||
# Total code points: 78
|
||||
|
||||
# ================================================
|
||||
|
||||
|
@ -875,6 +894,8 @@ FF17 ; 7.0 ; ; 7 # Nd FULLWIDTH DIGIT SEVEN
|
|||
1010D ; 7.0 ; ; 7 # No AEGEAN NUMBER SEVEN
|
||||
104A7 ; 7.0 ; ; 7 # Nd OSMANYA DIGIT SEVEN
|
||||
10E66 ; 7.0 ; ; 7 # No RUMI DIGIT SEVEN
|
||||
11058 ; 7.0 ; ; 7 # No BRAHMI NUMBER SEVEN
|
||||
1106D ; 7.0 ; ; 7 # Nd BRAHMI DIGIT SEVEN
|
||||
12405 ; 7.0 ; ; 7 # Nl CUNEIFORM NUMERIC SIGN SEVEN ASH
|
||||
1240C ; 7.0 ; ; 7 # Nl CUNEIFORM NUMERIC SIGN SEVEN DISH
|
||||
12412 ; 7.0 ; ; 7 # Nl CUNEIFORM NUMERIC SIGN SEVEN U
|
||||
|
@ -890,7 +911,7 @@ FF17 ; 7.0 ; ; 7 # Nd FULLWIDTH DIGIT SEVEN
|
|||
1F108 ; 7.0 ; ; 7 # No DIGIT SEVEN COMMA
|
||||
20001 ; 7.0 ; ; 7 # Lo CJK UNIFIED IDEOGRAPH-20001
|
||||
|
||||
# Total code points: 75
|
||||
# Total code points: 77
|
||||
|
||||
# ================================================
|
||||
|
||||
|
@ -957,6 +978,8 @@ FF18 ; 8.0 ; ; 8 # Nd FULLWIDTH DIGIT EIGHT
|
|||
1010E ; 8.0 ; ; 8 # No AEGEAN NUMBER EIGHT
|
||||
104A8 ; 8.0 ; ; 8 # Nd OSMANYA DIGIT EIGHT
|
||||
10E67 ; 8.0 ; ; 8 # No RUMI DIGIT EIGHT
|
||||
11059 ; 8.0 ; ; 8 # No BRAHMI NUMBER EIGHT
|
||||
1106E ; 8.0 ; ; 8 # Nd BRAHMI DIGIT EIGHT
|
||||
12406 ; 8.0 ; ; 8 # Nl CUNEIFORM NUMERIC SIGN EIGHT ASH
|
||||
1240D ; 8.0 ; ; 8 # Nl CUNEIFORM NUMERIC SIGN EIGHT DISH
|
||||
12413 ; 8.0 ; ; 8 # Nl CUNEIFORM NUMERIC SIGN EIGHT U
|
||||
|
@ -971,7 +994,7 @@ FF18 ; 8.0 ; ; 8 # Nd FULLWIDTH DIGIT EIGHT
|
|||
1D7FE ; 8.0 ; ; 8 # Nd MATHEMATICAL MONOSPACE DIGIT EIGHT
|
||||
1F109 ; 8.0 ; ; 8 # No DIGIT EIGHT COMMA
|
||||
|
||||
# Total code points: 71
|
||||
# Total code points: 73
|
||||
|
||||
# ================================================
|
||||
|
||||
|
@ -1039,6 +1062,8 @@ FF19 ; 9.0 ; ; 9 # Nd FULLWIDTH DIGIT NINE
|
|||
1010F ; 9.0 ; ; 9 # No AEGEAN NUMBER NINE
|
||||
104A9 ; 9.0 ; ; 9 # Nd OSMANYA DIGIT NINE
|
||||
10E68 ; 9.0 ; ; 9 # No RUMI DIGIT NINE
|
||||
1105A ; 9.0 ; ; 9 # No BRAHMI NUMBER NINE
|
||||
1106F ; 9.0 ; ; 9 # Nd BRAHMI DIGIT NINE
|
||||
12407 ; 9.0 ; ; 9 # Nl CUNEIFORM NUMERIC SIGN NINE ASH
|
||||
1240E ; 9.0 ; ; 9 # Nl CUNEIFORM NUMERIC SIGN NINE DISH
|
||||
12414 ; 9.0 ; ; 9 # Nl CUNEIFORM NUMERIC SIGN NINE U
|
||||
|
@ -1054,7 +1079,7 @@ FF19 ; 9.0 ; ; 9 # Nd FULLWIDTH DIGIT NINE
|
|||
1F10A ; 9.0 ; ; 9 # No DIGIT NINE COMMA
|
||||
2F890 ; 9.0 ; ; 9 # Lo CJK COMPATIBILITY IDEOGRAPH-2F890
|
||||
|
||||
# Total code points: 75
|
||||
# Total code points: 77
|
||||
|
||||
# ================================================
|
||||
|
||||
|
@ -1091,9 +1116,10 @@ F9FD ; 10.0 ; ; 10 # Lo CJK COMPATIBILITY IDEOGRAPH-F9FD
|
|||
10B5C ; 10.0 ; ; 10 # No INSCRIPTIONAL PARTHIAN NUMBER TEN
|
||||
10B7C ; 10.0 ; ; 10 # No INSCRIPTIONAL PAHLAVI NUMBER TEN
|
||||
10E69 ; 10.0 ; ; 10 # No RUMI NUMBER TEN
|
||||
1105B ; 10.0 ; ; 10 # No BRAHMI NUMBER TEN
|
||||
1D369 ; 10.0 ; ; 10 # No COUNTING ROD TENS DIGIT ONE
|
||||
|
||||
# Total code points: 38
|
||||
# Total code points: 39
|
||||
|
||||
# ================================================
|
||||
|
||||
|
@ -1202,9 +1228,10 @@ F9FD ; 10.0 ; ; 10 # Lo CJK COMPATIBILITY IDEOGRAPH-F9FD
|
|||
10B5D ; 20.0 ; ; 20 # No INSCRIPTIONAL PARTHIAN NUMBER TWENTY
|
||||
10B7D ; 20.0 ; ; 20 # No INSCRIPTIONAL PAHLAVI NUMBER TWENTY
|
||||
10E6A ; 20.0 ; ; 20 # No RUMI NUMBER TWENTY
|
||||
1105C ; 20.0 ; ; 20 # No BRAHMI NUMBER TWENTY
|
||||
1D36A ; 20.0 ; ; 20 # No COUNTING ROD TENS DIGIT TWO
|
||||
|
||||
# Total code points: 17
|
||||
# Total code points: 18
|
||||
|
||||
# ================================================
|
||||
|
||||
|
@ -1269,10 +1296,11 @@ F9FD ; 10.0 ; ; 10 # Lo CJK COMPATIBILITY IDEOGRAPH-F9FD
|
|||
10112 ; 30.0 ; ; 30 # No AEGEAN NUMBER THIRTY
|
||||
10165 ; 30.0 ; ; 30 # Nl GREEK ACROPHONIC THESPIAN THIRTY
|
||||
10E6B ; 30.0 ; ; 30 # No RUMI NUMBER THIRTY
|
||||
1105D ; 30.0 ; ; 30 # No BRAHMI NUMBER THIRTY
|
||||
1D36B ; 30.0 ; ; 30 # No COUNTING ROD TENS DIGIT THREE
|
||||
20983 ; 30.0 ; ; 30 # Lo CJK UNIFIED IDEOGRAPH-20983
|
||||
|
||||
# Total code points: 9
|
||||
# Total code points: 10
|
||||
|
||||
# ================================================
|
||||
|
||||
|
@ -1335,11 +1363,12 @@ F9FD ; 10.0 ; ; 10 # Lo CJK COMPATIBILITY IDEOGRAPH-F9FD
|
|||
534C ; 40.0 ; ; 40 # Lo CJK UNIFIED IDEOGRAPH-534C
|
||||
10113 ; 40.0 ; ; 40 # No AEGEAN NUMBER FORTY
|
||||
10E6C ; 40.0 ; ; 40 # No RUMI NUMBER FORTY
|
||||
1105E ; 40.0 ; ; 40 # No BRAHMI NUMBER FORTY
|
||||
1D36C ; 40.0 ; ; 40 # No COUNTING ROD TENS DIGIT FOUR
|
||||
2098C ; 40.0 ; ; 40 # Lo CJK UNIFIED IDEOGRAPH-2098C
|
||||
2099C ; 40.0 ; ; 40 # Lo CJK UNIFIED IDEOGRAPH-2099C
|
||||
|
||||
# Total code points: 8
|
||||
# Total code points: 9
|
||||
|
||||
# ================================================
|
||||
|
||||
|
@ -1411,36 +1440,40 @@ F9FD ; 10.0 ; ; 10 # Lo CJK COMPATIBILITY IDEOGRAPH-F9FD
|
|||
10323 ; 50.0 ; ; 50 # No OLD ITALIC NUMERAL FIFTY
|
||||
10A7E ; 50.0 ; ; 50 # No OLD SOUTH ARABIAN NUMBER FIFTY
|
||||
10E6D ; 50.0 ; ; 50 # No RUMI NUMBER FIFTY
|
||||
1105F ; 50.0 ; ; 50 # No BRAHMI NUMBER FIFTY
|
||||
1D36D ; 50.0 ; ; 50 # No COUNTING ROD TENS DIGIT FIVE
|
||||
|
||||
# Total code points: 18
|
||||
# Total code points: 19
|
||||
|
||||
# ================================================
|
||||
|
||||
1377 ; 60.0 ; ; 60 # No ETHIOPIC NUMBER SIXTY
|
||||
10115 ; 60.0 ; ; 60 # No AEGEAN NUMBER SIXTY
|
||||
10E6E ; 60.0 ; ; 60 # No RUMI NUMBER SIXTY
|
||||
11060 ; 60.0 ; ; 60 # No BRAHMI NUMBER SIXTY
|
||||
1D36E ; 60.0 ; ; 60 # No COUNTING ROD TENS DIGIT SIX
|
||||
|
||||
# Total code points: 4
|
||||
# Total code points: 5
|
||||
|
||||
# ================================================
|
||||
|
||||
1378 ; 70.0 ; ; 70 # No ETHIOPIC NUMBER SEVENTY
|
||||
10116 ; 70.0 ; ; 70 # No AEGEAN NUMBER SEVENTY
|
||||
10E6F ; 70.0 ; ; 70 # No RUMI NUMBER SEVENTY
|
||||
11061 ; 70.0 ; ; 70 # No BRAHMI NUMBER SEVENTY
|
||||
1D36F ; 70.0 ; ; 70 # No COUNTING ROD TENS DIGIT SEVEN
|
||||
|
||||
# Total code points: 4
|
||||
# Total code points: 5
|
||||
|
||||
# ================================================
|
||||
|
||||
1379 ; 80.0 ; ; 80 # No ETHIOPIC NUMBER EIGHTY
|
||||
10117 ; 80.0 ; ; 80 # No AEGEAN NUMBER EIGHTY
|
||||
10E70 ; 80.0 ; ; 80 # No RUMI NUMBER EIGHTY
|
||||
11062 ; 80.0 ; ; 80 # No BRAHMI NUMBER EIGHTY
|
||||
1D370 ; 80.0 ; ; 80 # No COUNTING ROD TENS DIGIT EIGHT
|
||||
|
||||
# Total code points: 4
|
||||
# Total code points: 5
|
||||
|
||||
# ================================================
|
||||
|
||||
|
@ -1448,9 +1481,10 @@ F9FD ; 10.0 ; ; 10 # Lo CJK COMPATIBILITY IDEOGRAPH-F9FD
|
|||
10118 ; 90.0 ; ; 90 # No AEGEAN NUMBER NINETY
|
||||
10341 ; 90.0 ; ; 90 # Nl GOTHIC LETTER NINETY
|
||||
10E71 ; 90.0 ; ; 90 # No RUMI NUMBER NINETY
|
||||
11063 ; 90.0 ; ; 90 # No BRAHMI NUMBER NINETY
|
||||
1D371 ; 90.0 ; ; 90 # No COUNTING ROD TENS DIGIT NINE
|
||||
|
||||
# Total code points: 5
|
||||
# Total code points: 6
|
||||
|
||||
# ================================================
|
||||
|
||||
|
@ -1473,8 +1507,9 @@ F9FD ; 10.0 ; ; 10 # Lo CJK COMPATIBILITY IDEOGRAPH-F9FD
|
|||
10B5E ; 100.0 ; ; 100 # No INSCRIPTIONAL PARTHIAN NUMBER ONE HUNDRED
|
||||
10B7E ; 100.0 ; ; 100 # No INSCRIPTIONAL PAHLAVI NUMBER ONE HUNDRED
|
||||
10E72 ; 100.0 ; ; 100 # No RUMI NUMBER ONE HUNDRED
|
||||
11064 ; 100.0 ; ; 100 # No BRAHMI NUMBER ONE HUNDRED
|
||||
|
||||
# Total code points: 19
|
||||
# Total code points: 20
|
||||
|
||||
# ================================================
|
||||
|
||||
|
@ -1557,8 +1592,9 @@ F9FD ; 10.0 ; ; 10 # Lo CJK COMPATIBILITY IDEOGRAPH-F9FD
|
|||
10A47 ; 1000.0 ; ; 1000 # No KHAROSHTHI NUMBER ONE THOUSAND
|
||||
10B5F ; 1000.0 ; ; 1000 # No INSCRIPTIONAL PARTHIAN NUMBER ONE THOUSAND
|
||||
10B7F ; 1000.0 ; ; 1000 # No INSCRIPTIONAL PAHLAVI NUMBER ONE THOUSAND
|
||||
11065 ; 1000.0 ; ; 1000 # No BRAHMI NUMBER ONE THOUSAND
|
||||
|
||||
# Total code points: 16
|
||||
# Total code points: 17
|
||||
|
||||
# ================================================
|
||||
|
||||
|
|
|
@ -1,12 +1,12 @@
|
|||
# EastAsianWidth-5.2.0.txt
|
||||
# Date: 2009-06-09, 17:47:00 PDT [KW]
|
||||
# EastAsianWidth-6.0.0.txt
|
||||
# Date: 2010-08-17, 12:17:00 PDT [KW]
|
||||
#
|
||||
# East Asian Width Properties
|
||||
#
|
||||
# This file is an informative contributory data file in the
|
||||
# Unicode Character Database.
|
||||
#
|
||||
# Copyright (c) 1991-2009 Unicode, Inc.
|
||||
# Copyright (c) 1991-2010 Unicode, Inc.
|
||||
# For terms of use, see http://www.unicode.org/terms_of_use.html
|
||||
#
|
||||
# The format is two fields separated by a semicolon.
|
||||
|
@ -22,6 +22,7 @@
|
|||
# CJK Compatibility Ideographs: U+F900..U+FAFF
|
||||
# CJK Unified Ideographs Extension B: U+20000..U+2A6DF
|
||||
# CJK Unified Ideographs Extension C: U+2A700..U+2B73F
|
||||
# CJK Unified Ideographs Extension D: U+2B740..U+2B81F
|
||||
# CJK Compatibility Ideographs Supplement: U+2F800..U+2FA1F
|
||||
# and any other reserved code points on
|
||||
# Planes 2 and 3: U+20000..U+2FFFD
|
||||
|
@ -163,7 +164,7 @@
|
|||
0410..044F;A
|
||||
0450;N
|
||||
0451;A
|
||||
0452..0525;N
|
||||
0452..0527;N
|
||||
0531..0556;N
|
||||
0559..055F;N
|
||||
0561..0587;N
|
||||
|
@ -173,18 +174,15 @@
|
|||
05F0..05F4;N
|
||||
0600..0603;N
|
||||
0606..061B;N
|
||||
061E..061F;N
|
||||
0621..065E;N
|
||||
0660..070D;N
|
||||
061E..070D;N
|
||||
070F..074A;N
|
||||
074D..07B1;N
|
||||
07C0..07FA;N
|
||||
0800..082D;N
|
||||
0830..083E;N
|
||||
0900..0939;N
|
||||
093C..094E;N
|
||||
0950..0955;N
|
||||
0958..0972;N
|
||||
0840..085B;N
|
||||
085E;N
|
||||
0900..0977;N
|
||||
0979..097F;N
|
||||
0981..0983;N
|
||||
0985..098C;N
|
||||
|
@ -243,7 +241,7 @@
|
|||
0B56..0B57;N
|
||||
0B5C..0B5D;N
|
||||
0B5F..0B63;N
|
||||
0B66..0B71;N
|
||||
0B66..0B77;N
|
||||
0B82..0B83;N
|
||||
0B85..0B8A;N
|
||||
0B8E..0B90;N
|
||||
|
@ -291,11 +289,10 @@
|
|||
0D02..0D03;N
|
||||
0D05..0D0C;N
|
||||
0D0E..0D10;N
|
||||
0D12..0D28;N
|
||||
0D2A..0D39;N
|
||||
0D12..0D3A;N
|
||||
0D3D..0D44;N
|
||||
0D46..0D48;N
|
||||
0D4A..0D4D;N
|
||||
0D4A..0D4E;N
|
||||
0D57;N
|
||||
0D60..0D63;N
|
||||
0D66..0D75;N
|
||||
|
@ -333,11 +330,10 @@
|
|||
0EDC..0EDD;N
|
||||
0F00..0F47;N
|
||||
0F49..0F6C;N
|
||||
0F71..0F8B;N
|
||||
0F90..0F97;N
|
||||
0F71..0F97;N
|
||||
0F99..0FBC;N
|
||||
0FBE..0FCC;N
|
||||
0FCE..0FD8;N
|
||||
0FCE..0FDA;N
|
||||
1000..10C5;N
|
||||
10D0..10FC;N
|
||||
1100..115F;W
|
||||
|
@ -361,7 +357,7 @@
|
|||
12D8..1310;N
|
||||
1312..1315;N
|
||||
1318..135A;N
|
||||
135F..137C;N
|
||||
135D..137C;N
|
||||
1380..1399;N
|
||||
13A0..13F4;N
|
||||
1400..169C;N
|
||||
|
@ -400,12 +396,13 @@
|
|||
1B50..1B7C;N
|
||||
1B80..1BAA;N
|
||||
1BAE..1BB9;N
|
||||
1C00..1C37;N
|
||||
1BC0..1BF3;N
|
||||
1BFC..1C37;N
|
||||
1C3B..1C49;N
|
||||
1C4D..1C7F;N
|
||||
1CD0..1CF2;N
|
||||
1D00..1DE6;N
|
||||
1DFD..1F15;N
|
||||
1DFC..1F15;N
|
||||
1F18..1F1D;N
|
||||
1F20..1F45;N
|
||||
1F48..1F4D;N
|
||||
|
@ -451,12 +448,12 @@
|
|||
2080;N
|
||||
2081..2084;A
|
||||
2085..208E;N
|
||||
2090..2094;N
|
||||
2090..209C;N
|
||||
20A0..20A8;N
|
||||
20A9;H
|
||||
20AA..20AB;N
|
||||
20AC;A
|
||||
20AD..20B8;N
|
||||
20AD..20B9;N
|
||||
20D0..20F0;N
|
||||
2100..2102;N
|
||||
2103;A
|
||||
|
@ -553,7 +550,7 @@
|
|||
2312;A
|
||||
2313..2328;N
|
||||
2329..232A;W
|
||||
232B..23E8;N
|
||||
232B..23F3;N
|
||||
2400..2426;N
|
||||
2440..244A;N
|
||||
2460..24E9;A
|
||||
|
@ -619,28 +616,21 @@
|
|||
26BE..26BF;A
|
||||
26C0..26C3;N
|
||||
26C4..26CD;A
|
||||
26CE;N
|
||||
26CF..26E1;A
|
||||
26E2;N
|
||||
26E3;A
|
||||
26E4..26E7;N
|
||||
26E8..26FF;A
|
||||
2701..2704;N
|
||||
2706..2709;N
|
||||
270C..2727;N
|
||||
2729..273C;N
|
||||
2701..273C;N
|
||||
273D;A
|
||||
273E..274B;N
|
||||
274D;N
|
||||
274F..2752;N
|
||||
2756;N
|
||||
273E..2756;N
|
||||
2757;A
|
||||
2758..275E;N
|
||||
2761..2775;N
|
||||
2758..2775;N
|
||||
2776..277F;A
|
||||
2780..2794;N
|
||||
2798..27AF;N
|
||||
27B1..27BE;N
|
||||
27C0..27CA;N
|
||||
2780..27CA;N
|
||||
27CC;N
|
||||
27D0..27E5;N
|
||||
27CE..27E5;N
|
||||
27E6..27ED;Na
|
||||
27EE..2984;N
|
||||
2985..2986;Na
|
||||
|
@ -652,8 +642,8 @@
|
|||
2C60..2CF1;N
|
||||
2CF9..2D25;N
|
||||
2D30..2D65;N
|
||||
2D6F;N
|
||||
2D80..2D96;N
|
||||
2D6F..2D70;N
|
||||
2D7F..2D96;N
|
||||
2DA0..2DA6;N
|
||||
2DA8..2DAE;N
|
||||
2DB0..2DB6;N
|
||||
|
@ -674,7 +664,7 @@
|
|||
3099..30FF;W
|
||||
3105..312D;W
|
||||
3131..318E;W
|
||||
3190..31B7;W
|
||||
3190..31BA;W
|
||||
31C0..31E3;W
|
||||
31F0..321E;W
|
||||
3220..3247;W
|
||||
|
@ -689,12 +679,13 @@
|
|||
A000..A48C;W
|
||||
A490..A4C6;W
|
||||
A4D0..A62B;N
|
||||
A640..A65F;N
|
||||
A662..A673;N
|
||||
A640..A673;N
|
||||
A67C..A697;N
|
||||
A6A0..A6F7;N
|
||||
A700..A78C;N
|
||||
A7FB..A82B;N
|
||||
A700..A78E;N
|
||||
A790..A791;N
|
||||
A7A0..A7A9;N
|
||||
A7FA..A82B;N
|
||||
A830..A839;N
|
||||
A840..A877;N
|
||||
A880..A8C4;N
|
||||
|
@ -712,6 +703,11 @@ AA50..AA59;N
|
|||
AA5C..AA7B;N
|
||||
AA80..AAC2;N
|
||||
AADB..AADF;N
|
||||
AB01..AB06;N
|
||||
AB09..AB0E;N
|
||||
AB11..AB16;N
|
||||
AB20..AB26;N
|
||||
AB28..AB2E;N
|
||||
ABC0..ABED;N
|
||||
ABF0..ABF9;N
|
||||
AC00..D7A3;W
|
||||
|
@ -734,7 +730,7 @@ FB38..FB3C;N
|
|||
FB3E;N
|
||||
FB40..FB41;N
|
||||
FB43..FB44;N
|
||||
FB46..FBB1;N
|
||||
FB46..FBC1;N
|
||||
FBD3..FD3F;N
|
||||
FD50..FD8F;N
|
||||
FD92..FDC7;N
|
||||
|
@ -805,11 +801,15 @@ FFFD;A
|
|||
10B78..10B7F;N
|
||||
10C00..10C48;N
|
||||
10E60..10E7E;N
|
||||
11000..1104D;N
|
||||
11052..1106F;N
|
||||
11080..110C1;N
|
||||
12000..1236E;N
|
||||
12400..12462;N
|
||||
12470..12473;N
|
||||
13000..1342E;N
|
||||
16800..16A38;N
|
||||
1B000..1B001;W
|
||||
1D000..1D0F5;N
|
||||
1D100..1D126;N
|
||||
1D129..1D1DD;N
|
||||
|
@ -839,29 +839,54 @@ FFFD;A
|
|||
1D7CE..1D7FF;N
|
||||
1F000..1F02B;N
|
||||
1F030..1F093;N
|
||||
1F0A0..1F0AE;N
|
||||
1F0B1..1F0BE;N
|
||||
1F0C1..1F0CF;N
|
||||
1F0D1..1F0DF;N
|
||||
1F100..1F10A;A
|
||||
1F110..1F12D;A
|
||||
1F12E;N
|
||||
1F131;A
|
||||
1F13D;A
|
||||
1F13F;A
|
||||
1F142;A
|
||||
1F146;A
|
||||
1F14A..1F14E;A
|
||||
1F157;A
|
||||
1F15F;A
|
||||
1F179;A
|
||||
1F17B..1F17C;A
|
||||
1F17F;A
|
||||
1F18A..1F18D;A
|
||||
1F190;A
|
||||
1F200;W
|
||||
1F210..1F231;W
|
||||
1F130..1F169;A
|
||||
1F170..1F19A;A
|
||||
1F1E6..1F1FF;N
|
||||
1F200..1F202;W
|
||||
1F210..1F23A;W
|
||||
1F240..1F248;W
|
||||
1F250..1F251;W
|
||||
1F300..1F320;N
|
||||
1F330..1F335;N
|
||||
1F337..1F37C;N
|
||||
1F380..1F393;N
|
||||
1F3A0..1F3C4;N
|
||||
1F3C6..1F3CA;N
|
||||
1F3E0..1F3F0;N
|
||||
1F400..1F43E;N
|
||||
1F440;N
|
||||
1F442..1F4F7;N
|
||||
1F4F9..1F4FC;N
|
||||
1F500..1F53D;N
|
||||
1F550..1F567;N
|
||||
1F5FB..1F5FF;N
|
||||
1F601..1F610;N
|
||||
1F612..1F614;N
|
||||
1F616;N
|
||||
1F618;N
|
||||
1F61A;N
|
||||
1F61C..1F61E;N
|
||||
1F620..1F625;N
|
||||
1F628..1F62B;N
|
||||
1F62D;N
|
||||
1F630..1F633;N
|
||||
1F635..1F640;N
|
||||
1F645..1F64F;N
|
||||
1F680..1F6C5;N
|
||||
1F700..1F773;N
|
||||
20000..2A6D6;W
|
||||
2A6D7..2A6FF;W
|
||||
2A700..2B734;W
|
||||
2B735..2F7FF;W
|
||||
2B735..2F73F;W
|
||||
2B740..2B81D;W
|
||||
2B81E..2F7FF;W
|
||||
2F800..2FA1D;W
|
||||
2FA1E..2FFFD;W
|
||||
30000..3FFFD;W
|
||||
|
|
|
@ -1,6 +1,9 @@
|
|||
# Fractional UCA Table, generated from standard UCA
|
||||
# 2009-10-24, 00:18:26 GMT [MD]
|
||||
# VERSION: UCA=5.2.0, UCD=5.2.0
|
||||
|
||||
# Markus 2010-08-18: This is really the 5.2 version, but for testing with
|
||||
# UCD 6.0 I need the version numbers here to be 6.0 as well.
|
||||
# VERSION: UCA=6.0.0, UCD=6.0.0
|
||||
|
||||
# Generated processed version, as described in ICU design document.
|
||||
# NOTES
|
||||
|
@ -17,7 +20,7 @@
|
|||
# - Differs from previous version in that MAX value was introduced at 1F.
|
||||
# All tertiary values are shifted down by 1, filling the gap at 7!
|
||||
|
||||
[UCA version = 5.2.0]
|
||||
[UCA version = 6.0.0]
|
||||
|
||||
0000; [,,]
|
||||
0001; [,,]
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
# GraphemeBreakProperty-5.2.0.txt
|
||||
# Date: 2009-06-09, 21:40:09 GMT [MD]
|
||||
# GraphemeBreakProperty-6.0.0.txt
|
||||
# Date: 2010-09-01, 18:48:17 GMT [MD]
|
||||
#
|
||||
# Unicode Character Database
|
||||
# Copyright (c) 1991-2009 Unicode, Inc.
|
||||
# Copyright (c) 1991-2010 Unicode, Inc.
|
||||
# For terms of use, see http://www.unicode.org/terms_of_use.html
|
||||
# For documentation, see http://www.unicode.org/reports/tr44/
|
||||
|
||||
|
@ -65,10 +65,9 @@ E0020..E007F ; Control
|
|||
05C4..05C5 ; Extend
|
||||
05C7 ; Extend
|
||||
0610..061A ; Extend
|
||||
064B..065E ; Extend
|
||||
064B..065F ; Extend
|
||||
0670 ; Extend
|
||||
06D6..06DC ; Extend
|
||||
06DE ; Extend
|
||||
06DF..06E4 ; Extend
|
||||
06E7..06E8 ; Extend
|
||||
06EA..06ED ; Extend
|
||||
|
@ -80,11 +79,13 @@ E0020..E007F ; Control
|
|||
081B..0823 ; Extend
|
||||
0825..0827 ; Extend
|
||||
0829..082D ; Extend
|
||||
0859..085B ; Extend
|
||||
0900..0902 ; Extend
|
||||
093A ; Extend
|
||||
093C ; Extend
|
||||
0941..0948 ; Extend
|
||||
094D ; Extend
|
||||
0951..0955 ; Extend
|
||||
0951..0957 ; Extend
|
||||
0962..0963 ; Extend
|
||||
0981 ; Extend
|
||||
09BC ; Extend
|
||||
|
@ -143,15 +144,10 @@ E0020..E007F ; Control
|
|||
0DD2..0DD4 ; Extend
|
||||
0DD6 ; Extend
|
||||
0DDF ; Extend
|
||||
0E30 ; Extend
|
||||
0E31 ; Extend
|
||||
0E32..0E33 ; Extend
|
||||
0E34..0E3A ; Extend
|
||||
0E45 ; Extend
|
||||
0E47..0E4E ; Extend
|
||||
0EB0 ; Extend
|
||||
0EB1 ; Extend
|
||||
0EB2..0EB3 ; Extend
|
||||
0EB4..0EB9 ; Extend
|
||||
0EBB..0EBC ; Extend
|
||||
0EC8..0ECD ; Extend
|
||||
|
@ -162,7 +158,7 @@ E0020..E007F ; Control
|
|||
0F71..0F7E ; Extend
|
||||
0F80..0F84 ; Extend
|
||||
0F86..0F87 ; Extend
|
||||
0F90..0F97 ; Extend
|
||||
0F8D..0F97 ; Extend
|
||||
0F99..0FBC ; Extend
|
||||
0FC6 ; Extend
|
||||
102D..1030 ; Extend
|
||||
|
@ -176,7 +172,7 @@ E0020..E007F ; Control
|
|||
1085..1086 ; Extend
|
||||
108D ; Extend
|
||||
109D ; Extend
|
||||
135F ; Extend
|
||||
135D..135F ; Extend
|
||||
1712..1714 ; Extend
|
||||
1732..1734 ; Extend
|
||||
1752..1753 ; Extend
|
||||
|
@ -208,6 +204,10 @@ E0020..E007F ; Control
|
|||
1B80..1B81 ; Extend
|
||||
1BA2..1BA5 ; Extend
|
||||
1BA8..1BA9 ; Extend
|
||||
1BE6 ; Extend
|
||||
1BE8..1BE9 ; Extend
|
||||
1BED ; Extend
|
||||
1BEF..1BF1 ; Extend
|
||||
1C2C..1C33 ; Extend
|
||||
1C36..1C37 ; Extend
|
||||
1CD0..1CD2 ; Extend
|
||||
|
@ -215,7 +215,7 @@ E0020..E007F ; Control
|
|||
1CE2..1CE8 ; Extend
|
||||
1CED ; Extend
|
||||
1DC0..1DE6 ; Extend
|
||||
1DFD..1DFF ; Extend
|
||||
1DFC..1DFF ; Extend
|
||||
200C..200D ; Extend
|
||||
20D0..20DC ; Extend
|
||||
20DD..20E0 ; Extend
|
||||
|
@ -223,6 +223,7 @@ E0020..E007F ; Control
|
|||
20E2..20E4 ; Extend
|
||||
20E5..20F0 ; Extend
|
||||
2CEF..2CF1 ; Extend
|
||||
2D7F ; Extend
|
||||
2DE0..2DFF ; Extend
|
||||
302A..302F ; Extend
|
||||
3099..309A ; Extend
|
||||
|
@ -265,6 +266,8 @@ FF9E..FF9F ; Extend
|
|||
10A0C..10A0F ; Extend
|
||||
10A38..10A3A ; Extend
|
||||
10A3F ; Extend
|
||||
11001 ; Extend
|
||||
11038..11046 ; Extend
|
||||
11080..11081 ; Extend
|
||||
110B3..110B6 ; Extend
|
||||
110B9..110BA ; Extend
|
||||
|
@ -277,7 +280,7 @@ FF9E..FF9F ; Extend
|
|||
1D242..1D244 ; Extend
|
||||
E0100..E01EF ; Extend
|
||||
|
||||
# Total code points: 1205
|
||||
# Total code points: 1234
|
||||
|
||||
# ================================================
|
||||
|
||||
|
@ -292,9 +295,10 @@ AABB..AABC ; Prepend
|
|||
# ================================================
|
||||
|
||||
0903 ; SpacingMark
|
||||
093B ; SpacingMark
|
||||
093E..0940 ; SpacingMark
|
||||
0949..094C ; SpacingMark
|
||||
094E ; SpacingMark
|
||||
094E..094F ; SpacingMark
|
||||
0982..0983 ; SpacingMark
|
||||
09BF..09C0 ; SpacingMark
|
||||
09C7..09C8 ; SpacingMark
|
||||
|
@ -329,6 +333,11 @@ AABB..AABC ; Prepend
|
|||
0DD0..0DD1 ; SpacingMark
|
||||
0DD8..0DDE ; SpacingMark
|
||||
0DF2..0DF3 ; SpacingMark
|
||||
0E30 ; SpacingMark
|
||||
0E32..0E33 ; SpacingMark
|
||||
0E45 ; SpacingMark
|
||||
0EB0 ; SpacingMark
|
||||
0EB2..0EB3 ; SpacingMark
|
||||
0F3E..0F3F ; SpacingMark
|
||||
0F7F ; SpacingMark
|
||||
102B..102C ; SpacingMark
|
||||
|
@ -366,6 +375,10 @@ AABB..AABC ; Prepend
|
|||
1BA1 ; SpacingMark
|
||||
1BA6..1BA7 ; SpacingMark
|
||||
1BAA ; SpacingMark
|
||||
1BE7 ; SpacingMark
|
||||
1BEA..1BEC ; SpacingMark
|
||||
1BEE ; SpacingMark
|
||||
1BF2..1BF3 ; SpacingMark
|
||||
1C24..1C2B ; SpacingMark
|
||||
1C34..1C35 ; SpacingMark
|
||||
1CE1 ; SpacingMark
|
||||
|
@ -387,13 +400,15 @@ ABE3..ABE4 ; SpacingMark
|
|||
ABE6..ABE7 ; SpacingMark
|
||||
ABE9..ABEA ; SpacingMark
|
||||
ABEC ; SpacingMark
|
||||
11000 ; SpacingMark
|
||||
11002 ; SpacingMark
|
||||
11082 ; SpacingMark
|
||||
110B0..110B2 ; SpacingMark
|
||||
110B7..110B8 ; SpacingMark
|
||||
1D166 ; SpacingMark
|
||||
1D16D ; SpacingMark
|
||||
|
||||
# Total code points: 257
|
||||
# Total code points: 275
|
||||
|
||||
# ================================================
|
||||
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
# LineBreak-5.2.0.txt
|
||||
# Date: 2009-08-17, 12:21:00 PDT [KW]
|
||||
# LineBreak-6.0.0.txt
|
||||
# Date: 2010-08-18, 17:25:00 PDT [KW]
|
||||
#
|
||||
# Line Break Properties
|
||||
#
|
||||
|
@ -7,7 +7,7 @@
|
|||
# Unicode Character Database.
|
||||
# It contains both normative and informative data.
|
||||
#
|
||||
# Copyright (c) 1991-2009 Unicode, Inc.
|
||||
# Copyright (c) 1991-2010 Unicode, Inc.
|
||||
# For terms of use, see http://www.unicode.org/terms_of_use.html
|
||||
#
|
||||
# The format is two fields separated by a semicolon.
|
||||
|
@ -29,6 +29,7 @@
|
|||
# CJK Compatibility Ideographs: U+F900..U+FAFF
|
||||
# CJK Unified Ideographs Extension B: U+20000..U+2A6DF
|
||||
# CJK Unified Ideographs Extension C: U+2A700..U+2B73F
|
||||
# CJK Unified Ideographs Extension D: U+2B740..U+2B81F
|
||||
# CJK Compatibility Ideographs Supplement: U+2F800..U+2FA1F
|
||||
# and any other reserved code points on
|
||||
# Planes 2 and 3: U+20000..U+2FFFD
|
||||
|
@ -133,7 +134,7 @@
|
|||
038E..03A1;AL
|
||||
03A3..0482;AL
|
||||
0483..0489;CM
|
||||
048A..0525;AL
|
||||
048A..0527;AL
|
||||
0531..0556;AL
|
||||
0559..055F;AL
|
||||
0561..0587;AL
|
||||
|
@ -158,8 +159,8 @@
|
|||
0610..061A;CM
|
||||
061B;EX
|
||||
061E..061F;EX
|
||||
0621..064A;AL
|
||||
064B..065E;CM
|
||||
0620..064A;AL
|
||||
064B..065F;CM
|
||||
0660..0669;NU
|
||||
066A;PO
|
||||
066B..066C;NU
|
||||
|
@ -169,8 +170,8 @@
|
|||
06D4;EX
|
||||
06D5;AL
|
||||
06D6..06DC;CM
|
||||
06DD;AL
|
||||
06DE..06E4;CM
|
||||
06DD..06DE;AL
|
||||
06DF..06E4;CM
|
||||
06E5..06E6;AL
|
||||
06E7..06E8;CM
|
||||
06E9;AL
|
||||
|
@ -201,18 +202,21 @@
|
|||
0828;AL
|
||||
0829..082D;CM
|
||||
0830..083E;AL
|
||||
0840..0858;AL
|
||||
0859..085B;CM
|
||||
085E;AL
|
||||
0900..0903;CM
|
||||
0904..0939;AL
|
||||
093C;CM
|
||||
093A..093C;CM
|
||||
093D;AL
|
||||
093E..094E;CM
|
||||
093E..094F;CM
|
||||
0950;AL
|
||||
0951..0955;CM
|
||||
0951..0957;CM
|
||||
0958..0961;AL
|
||||
0962..0963;CM
|
||||
0964..0965;BA
|
||||
0966..096F;NU
|
||||
0970..0972;AL
|
||||
0970..0977;AL
|
||||
0979..097F;AL
|
||||
0981..0983;CM
|
||||
0985..098C;AL
|
||||
|
@ -291,7 +295,7 @@
|
|||
0B5F..0B61;AL
|
||||
0B62..0B63;CM
|
||||
0B66..0B6F;NU
|
||||
0B70..0B71;AL
|
||||
0B70..0B77;AL
|
||||
0B82;CM
|
||||
0B83;AL
|
||||
0B85..0B8A;AL
|
||||
|
@ -348,12 +352,12 @@
|
|||
0D02..0D03;CM
|
||||
0D05..0D0C;AL
|
||||
0D0E..0D10;AL
|
||||
0D12..0D28;AL
|
||||
0D2A..0D39;AL
|
||||
0D12..0D3A;AL
|
||||
0D3D;AL
|
||||
0D3E..0D44;CM
|
||||
0D46..0D48;CM
|
||||
0D4A..0D4D;CM
|
||||
0D4E;AL
|
||||
0D57;CM
|
||||
0D60..0D61;AL
|
||||
0D62..0D63;CM
|
||||
|
@ -432,8 +436,8 @@
|
|||
0F80..0F84;CM
|
||||
0F85;BA
|
||||
0F86..0F87;CM
|
||||
0F88..0F8B;AL
|
||||
0F90..0F97;CM
|
||||
0F88..0F8C;AL
|
||||
0F8D..0F97;CM
|
||||
0F99..0FBC;CM
|
||||
0FBE..0FBF;BA
|
||||
0FC0..0FC5;AL
|
||||
|
@ -444,6 +448,7 @@
|
|||
0FD2;BA
|
||||
0FD3;BB
|
||||
0FD4..0FD8;AL
|
||||
0FD9..0FDA;GL
|
||||
1000..103F;SA
|
||||
1040..1049;NU
|
||||
104A..104B;BA
|
||||
|
@ -472,7 +477,7 @@
|
|||
12D8..1310;AL
|
||||
1312..1315;AL
|
||||
1318..135A;AL
|
||||
135F;CM
|
||||
135D..135F;CM
|
||||
1360;AL
|
||||
1361;BA
|
||||
1362..137C;AL
|
||||
|
@ -534,7 +539,8 @@
|
|||
1970..1974;SA
|
||||
1980..19AB;SA
|
||||
19B0..19C9;SA
|
||||
19D0..19DA;NU
|
||||
19D0..19D9;NU
|
||||
19DA;SA
|
||||
19DE..19DF;SA
|
||||
19E0..1A16;AL
|
||||
1A17..1A1B;CM
|
||||
|
@ -561,7 +567,9 @@
|
|||
1BA1..1BAA;CM
|
||||
1BAE..1BAF;AL
|
||||
1BB0..1BB9;NU
|
||||
1C00..1C23;AL
|
||||
1BC0..1BE5;AL
|
||||
1BE6..1BF3;CM
|
||||
1BFC..1C23;AL
|
||||
1C24..1C37;CM
|
||||
1C3B..1C3F;BA
|
||||
1C40..1C49;NU
|
||||
|
@ -578,7 +586,7 @@
|
|||
1CF2;CM
|
||||
1D00..1DBF;AL
|
||||
1DC0..1DE6;CM
|
||||
1DFD..1DFF;CM
|
||||
1DFC..1DFF;CM
|
||||
1E00..1F15;AL
|
||||
1F18..1F1D;AL
|
||||
1F20..1F45;AL
|
||||
|
@ -650,12 +658,12 @@
|
|||
2085..208C;AL
|
||||
208D;OP
|
||||
208E;CL
|
||||
2090..2094;AL
|
||||
2090..209C;AL
|
||||
20A0..20A6;PR
|
||||
20A7;PO
|
||||
20A8..20B5;PR
|
||||
20B6;PO
|
||||
20B7..20B8;PR
|
||||
20B7..20B9;PR
|
||||
20D0..20F0;CM
|
||||
2100..2102;AL
|
||||
2103;PO
|
||||
|
@ -750,7 +758,7 @@
|
|||
2313..2328;AL
|
||||
2329;OP
|
||||
232A;CL
|
||||
232B..23E8;AL
|
||||
232B..23F3;AL
|
||||
2400..2426;AL
|
||||
2440..244A;AL
|
||||
2460..24FE;AI
|
||||
|
@ -816,20 +824,17 @@
|
|||
26BE..26BF;AI
|
||||
26C0..26C3;AL
|
||||
26C4..26CD;AI
|
||||
26CE;AL
|
||||
26CF..26E1;AI
|
||||
26E2;AL
|
||||
26E3;AI
|
||||
26E4..26E7;AL
|
||||
26E8..26FF;AI
|
||||
2701..2704;AL
|
||||
2706..2709;AL
|
||||
270C..2727;AL
|
||||
2729..274B;AL
|
||||
274D;AL
|
||||
274F..2752;AL
|
||||
2756;AL
|
||||
2701..2756;AL
|
||||
2757;AI
|
||||
2758..275A;AL
|
||||
275B..275E;QU
|
||||
2761;AL
|
||||
275F..2761;AL
|
||||
2762..2763;EX
|
||||
2764..2767;AL
|
||||
2768;OP
|
||||
|
@ -847,15 +852,12 @@
|
|||
2774;OP
|
||||
2775;CL
|
||||
2776..2793;AI
|
||||
2794;AL
|
||||
2798..27AF;AL
|
||||
27B1..27BE;AL
|
||||
27C0..27C4;AL
|
||||
2794..27C4;AL
|
||||
27C5;OP
|
||||
27C6;CL
|
||||
27C7..27CA;AL
|
||||
27CC;AL
|
||||
27D0..27E5;AL
|
||||
27CE..27E5;AL
|
||||
27E6;OP
|
||||
27E7;CL
|
||||
27E8;OP
|
||||
|
@ -912,6 +914,8 @@
|
|||
2D00..2D25;AL
|
||||
2D30..2D65;AL
|
||||
2D6F;AL
|
||||
2D70;BA
|
||||
2D7F;CM
|
||||
2D80..2D96;AL
|
||||
2DA0..2DA6;AL
|
||||
2DA8..2DAE;AL
|
||||
|
@ -1030,7 +1034,7 @@
|
|||
30FF;ID
|
||||
3105..312D;ID
|
||||
3131..318E;ID
|
||||
3190..31B7;ID
|
||||
3190..31BA;ID
|
||||
31C0..31E3;ID
|
||||
31F0..31FF;NS
|
||||
3200..321E;ID
|
||||
|
@ -1056,8 +1060,7 @@ A60F;BA
|
|||
A610..A61F;AL
|
||||
A620..A629;NU
|
||||
A62A..A62B;AL
|
||||
A640..A65F;AL
|
||||
A662..A66E;AL
|
||||
A640..A66E;AL
|
||||
A66F..A672;CM
|
||||
A673;AL
|
||||
A67C..A67D;CM
|
||||
|
@ -1066,8 +1069,10 @@ A6A0..A6EF;AL
|
|||
A6F0..A6F1;CM
|
||||
A6F2;AL
|
||||
A6F3..A6F7;BA
|
||||
A700..A78C;AL
|
||||
A7FB..A801;AL
|
||||
A700..A78E;AL
|
||||
A790..A791;AL
|
||||
A7A0..A7A9;AL
|
||||
A7FA..A801;AL
|
||||
A802;CM
|
||||
A803..A805;AL
|
||||
A806;CM
|
||||
|
@ -1118,6 +1123,11 @@ AA5D..AA5F;BA
|
|||
AA60..AA7B;SA
|
||||
AA80..AAC2;SA
|
||||
AADB..AADF;SA
|
||||
AB01..AB06;AL
|
||||
AB09..AB0E;AL
|
||||
AB11..AB16;AL
|
||||
AB20..AB26;AL
|
||||
AB28..AB2E;AL
|
||||
ABC0..ABE2;AL
|
||||
ABE3..ABEA;CM
|
||||
ABEB;BA
|
||||
|
@ -1942,7 +1952,7 @@ FB38..FB3C;AL
|
|||
FB3E;AL
|
||||
FB40..FB41;AL
|
||||
FB43..FB44;AL
|
||||
FB46..FBB1;AL
|
||||
FB46..FBC1;AL
|
||||
FBD3..FD3D;AL
|
||||
FD3E;OP
|
||||
FD3F;CL
|
||||
|
@ -2105,6 +2115,13 @@ FFFD;AI
|
|||
10B78..10B7F;AL
|
||||
10C00..10C48;AL
|
||||
10E60..10E7E;AL
|
||||
11000..11002;CM
|
||||
11003..11037;AL
|
||||
11038..11046;CM
|
||||
11047..11048;BA
|
||||
11049..1104D;AL
|
||||
11052..11065;AL
|
||||
11066..1106F;NU
|
||||
11080..11082;CM
|
||||
11083..110AF;AL
|
||||
110B0..110BA;CM
|
||||
|
@ -2127,6 +2144,8 @@ FFFD;AI
|
|||
13379;OP
|
||||
1337A..1337B;CL
|
||||
1337C..1342E;AL
|
||||
16800..16A38;AL
|
||||
1B000..1B001;ID
|
||||
1D000..1D0F5;AL
|
||||
1D100..1D126;AL
|
||||
1D129..1D164;AL
|
||||
|
@ -2166,29 +2185,54 @@ FFFD;AI
|
|||
1D7CE..1D7FF;NU
|
||||
1F000..1F02B;AL
|
||||
1F030..1F093;AL
|
||||
1F0A0..1F0AE;AL
|
||||
1F0B1..1F0BE;AL
|
||||
1F0C1..1F0CF;AL
|
||||
1F0D1..1F0DF;AL
|
||||
1F100..1F10A;AI
|
||||
1F110..1F12D;AI
|
||||
1F12E;AL
|
||||
1F131;AI
|
||||
1F13D;AI
|
||||
1F13F;AI
|
||||
1F142;AI
|
||||
1F146;AI
|
||||
1F14A..1F14E;AI
|
||||
1F157;AI
|
||||
1F15F;AI
|
||||
1F179;AI
|
||||
1F17B..1F17C;AI
|
||||
1F17F;AI
|
||||
1F18A..1F18D;AI
|
||||
1F190;AI
|
||||
1F200;ID
|
||||
1F210..1F231;ID
|
||||
1F130..1F169;AI
|
||||
1F170..1F19A;AI
|
||||
1F1E6..1F1FF;AL
|
||||
1F200..1F202;ID
|
||||
1F210..1F23A;ID
|
||||
1F240..1F248;ID
|
||||
1F250..1F251;ID
|
||||
1F300..1F320;AL
|
||||
1F330..1F335;AL
|
||||
1F337..1F37C;AL
|
||||
1F380..1F393;AL
|
||||
1F3A0..1F3C4;AL
|
||||
1F3C6..1F3CA;AL
|
||||
1F3E0..1F3F0;AL
|
||||
1F400..1F43E;AL
|
||||
1F440;AL
|
||||
1F442..1F4F7;AL
|
||||
1F4F9..1F4FC;AL
|
||||
1F500..1F53D;AL
|
||||
1F550..1F567;AL
|
||||
1F5FB..1F5FF;AL
|
||||
1F601..1F610;AL
|
||||
1F612..1F614;AL
|
||||
1F616;AL
|
||||
1F618;AL
|
||||
1F61A;AL
|
||||
1F61C..1F61E;AL
|
||||
1F620..1F625;AL
|
||||
1F628..1F62B;AL
|
||||
1F62D;AL
|
||||
1F630..1F633;AL
|
||||
1F635..1F640;AL
|
||||
1F645..1F64F;AL
|
||||
1F680..1F6C5;AL
|
||||
1F700..1F773;AL
|
||||
20000..2A6D6;ID
|
||||
2A6D7..2A6FF;ID
|
||||
2A700..2B734;ID
|
||||
2B735..2F7FF;ID
|
||||
2B735..2B73F;ID
|
||||
2B740..2B81D;ID
|
||||
2B81E..2F7FF;ID
|
||||
2F800..2FA1D;ID
|
||||
2FA1E..2FFFD;ID
|
||||
30000..3FFFD;ID
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
# NameAliases-5.2.0.txt
|
||||
# Date: 2009-05-22, 13:05:00 PDT [KW]
|
||||
# NameAliases-6.0.0.txt
|
||||
# Date: 2010-05-10, 11:58:00 PDT [KW]
|
||||
#
|
||||
# This file is a normative contributory data file in the
|
||||
# Unicode Character Database.
|
||||
#
|
||||
# Copyright (c) 2005-2009 Unicode, Inc.
|
||||
# Copyright (c) 2005-2010 Unicode, Inc.
|
||||
# For terms of use, see http://www.unicode.org/terms_of_use.html
|
||||
#
|
||||
# This file defines the formal name aliases for Unicode characters.
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
# NormalizationCorrections-5.2.0.txt
|
||||
# Date: 2009-05-22, 13:54:00 PDT [KW]
|
||||
# NormalizationCorrections-6.0.0.txt
|
||||
# Date: 2010-05-19, 11:21:00 PDT [KW]
|
||||
#
|
||||
# This file is a normative contributory data file in the
|
||||
# Unicode Character Database.
|
||||
#
|
||||
# Copyright (c) 1991-2009 Unicode, Inc.
|
||||
# Copyright (c) 1991-2010 Unicode, Inc.
|
||||
# For terms of use, see http://www.unicode.org/terms_of_use.html
|
||||
#
|
||||
# The normalization stabilization policy of the Unicode
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
# NormalizationTest-5.2.0.txt
|
||||
# Date: 2009-08-22, 04:58:39 GMT [MD]
|
||||
# NormalizationTest-6.0.0.txt
|
||||
# Date: 2010-05-18, 00:49:30 GMT [MD]
|
||||
#
|
||||
# Unicode Character Database
|
||||
# Copyright (c) 1991-2009 Unicode, Inc.
|
||||
# Copyright (c) 1991-2010 Unicode, Inc.
|
||||
# For terms of use, see http://www.unicode.org/terms_of_use.html
|
||||
# For documentation, see http://www.unicode.org/reports/tr44/
|
||||
#
|
||||
|
@ -1196,6 +1196,14 @@
|
|||
2092;2092;2092;006F;006F;
|
||||
2093;2093;2093;0078;0078;
|
||||
2094;2094;2094;0259;0259;
|
||||
2095;2095;2095;0068;0068;
|
||||
2096;2096;2096;006B;006B;
|
||||
2097;2097;2097;006C;006C;
|
||||
2098;2098;2098;006D;006D;
|
||||
2099;2099;2099;006E;006E;
|
||||
209A;209A;209A;0070;0070;
|
||||
209B;209B;209B;0073;0073;
|
||||
209C;209C;209C;0074;0074;
|
||||
20A8;20A8;20A8;0052 0073;0052 0073;
|
||||
2100;2100;2100;0061 002F 0063;0061 002F 0063;
|
||||
2101;2101;2101;0061 002F 0073;0061 002F 0073;
|
||||
|
@ -16155,18 +16163,42 @@ FFEE;FFEE;FFEE;25CB;25CB;
|
|||
1F12C;1F12C;1F12C;0052;0052;
|
||||
1F12D;1F12D;1F12D;0043 0044;0043 0044;
|
||||
1F12E;1F12E;1F12E;0057 005A;0057 005A;
|
||||
1F130;1F130;1F130;0041;0041;
|
||||
1F131;1F131;1F131;0042;0042;
|
||||
1F132;1F132;1F132;0043;0043;
|
||||
1F133;1F133;1F133;0044;0044;
|
||||
1F134;1F134;1F134;0045;0045;
|
||||
1F135;1F135;1F135;0046;0046;
|
||||
1F136;1F136;1F136;0047;0047;
|
||||
1F137;1F137;1F137;0048;0048;
|
||||
1F138;1F138;1F138;0049;0049;
|
||||
1F139;1F139;1F139;004A;004A;
|
||||
1F13A;1F13A;1F13A;004B;004B;
|
||||
1F13B;1F13B;1F13B;004C;004C;
|
||||
1F13C;1F13C;1F13C;004D;004D;
|
||||
1F13D;1F13D;1F13D;004E;004E;
|
||||
1F13E;1F13E;1F13E;004F;004F;
|
||||
1F13F;1F13F;1F13F;0050;0050;
|
||||
1F140;1F140;1F140;0051;0051;
|
||||
1F141;1F141;1F141;0052;0052;
|
||||
1F142;1F142;1F142;0053;0053;
|
||||
1F143;1F143;1F143;0054;0054;
|
||||
1F144;1F144;1F144;0055;0055;
|
||||
1F145;1F145;1F145;0056;0056;
|
||||
1F146;1F146;1F146;0057;0057;
|
||||
1F147;1F147;1F147;0058;0058;
|
||||
1F148;1F148;1F148;0059;0059;
|
||||
1F149;1F149;1F149;005A;005A;
|
||||
1F14A;1F14A;1F14A;0048 0056;0048 0056;
|
||||
1F14B;1F14B;1F14B;004D 0056;004D 0056;
|
||||
1F14C;1F14C;1F14C;0053 0044;0053 0044;
|
||||
1F14D;1F14D;1F14D;0053 0053;0053 0053;
|
||||
1F14E;1F14E;1F14E;0050 0050 0056;0050 0050 0056;
|
||||
1F14F;1F14F;1F14F;0057 0043;0057 0043;
|
||||
1F190;1F190;1F190;0044 004A;0044 004A;
|
||||
1F200;1F200;1F200;307B 304B;307B 304B;
|
||||
1F201;1F201;1F201;30B3 30B3;30B3 30B3;
|
||||
1F202;1F202;1F202;30B5;30B5;
|
||||
1F210;1F210;1F210;624B;624B;
|
||||
1F211;1F211;1F211;5B57;5B57;
|
||||
1F212;1F212;1F212;53CC;53CC;
|
||||
|
@ -16201,6 +16233,15 @@ FFEE;FFEE;FFEE;25CB;25CB;
|
|||
1F22F;1F22F;1F22F;6307;6307;
|
||||
1F230;1F230;1F230;8D70;8D70;
|
||||
1F231;1F231;1F231;6253;6253;
|
||||
1F232;1F232;1F232;7981;7981;
|
||||
1F233;1F233;1F233;7A7A;7A7A;
|
||||
1F234;1F234;1F234;5408;5408;
|
||||
1F235;1F235;1F235;6E80;6E80;
|
||||
1F236;1F236;1F236;6709;6709;
|
||||
1F237;1F237;1F237;6708;6708;
|
||||
1F238;1F238;1F238;7533;7533;
|
||||
1F239;1F239;1F239;5272;5272;
|
||||
1F23A;1F23A;1F23A;55B6;55B6;
|
||||
1F240;1F240;1F240;3014 672C 3015;3014 672C 3015;
|
||||
1F241;1F241;1F241;3014 4E09 3015;3014 4E09 3015;
|
||||
1F242;1F242;1F242;3014 4E8C 3015;3014 4E8C 3015;
|
||||
|
@ -16210,6 +16251,8 @@ FFEE;FFEE;FFEE;25CB;25CB;
|
|||
1F246;1F246;1F246;3014 76D7 3015;3014 76D7 3015;
|
||||
1F247;1F247;1F247;3014 52DD 3015;3014 52DD 3015;
|
||||
1F248;1F248;1F248;3014 6557 3015;3014 6557 3015;
|
||||
1F250;1F250;1F250;5F97;5F97;
|
||||
1F251;1F251;1F251;53EF;53EF;
|
||||
2F800;4E3D;4E3D;4E3D;4E3D;
|
||||
2F801;4E38;4E38;4E38;4E38;
|
||||
2F802;4E41;4E41;4E41;4E41;
|
||||
|
@ -17151,6 +17194,8 @@ FFEE;FFEE;FFEE;25CB;25CB;
|
|||
0061 065D 0315 0300 05AE 0062;0061 05AE 065D 0300 0315 0062;0061 05AE 065D 0300 0315 0062;0061 05AE 065D 0300 0315 0062;0061 05AE 065D 0300 0315 0062;
|
||||
0061 0315 0300 05AE 065E 0062;00E0 05AE 065E 0315 0062;0061 05AE 0300 065E 0315 0062;00E0 05AE 065E 0315 0062;0061 05AE 0300 065E 0315 0062;
|
||||
0061 065E 0315 0300 05AE 0062;0061 05AE 065E 0300 0315 0062;0061 05AE 065E 0300 0315 0062;0061 05AE 065E 0300 0315 0062;0061 05AE 065E 0300 0315 0062;
|
||||
0061 059A 0316 302A 065F 0062;0061 302A 0316 065F 059A 0062;0061 302A 0316 065F 059A 0062;0061 302A 0316 065F 059A 0062;0061 302A 0316 065F 059A 0062;
|
||||
0061 065F 059A 0316 302A 0062;0061 302A 065F 0316 059A 0062;0061 302A 065F 0316 059A 0062;0061 302A 065F 0316 059A 0062;0061 302A 065F 0316 059A 0062;
|
||||
0061 0711 0670 0652 0670 0062;0061 0652 0670 0670 0711 0062;0061 0652 0670 0670 0711 0062;0061 0652 0670 0670 0711 0062;0061 0652 0670 0670 0711 0062;
|
||||
0061 0670 0711 0670 0652 0062;0061 0652 0670 0670 0711 0062;0061 0652 0670 0670 0711 0062;0061 0652 0670 0670 0711 0062;0061 0652 0670 0670 0711 0062;
|
||||
0061 0315 0300 05AE 06D6 0062;00E0 05AE 06D6 0315 0062;0061 05AE 0300 06D6 0315 0062;00E0 05AE 06D6 0315 0062;0061 05AE 0300 06D6 0315 0062;
|
||||
|
@ -17307,6 +17352,12 @@ FFEE;FFEE;FFEE;25CB;25CB;
|
|||
0061 082C 0315 0300 05AE 0062;0061 05AE 082C 0300 0315 0062;0061 05AE 082C 0300 0315 0062;0061 05AE 082C 0300 0315 0062;0061 05AE 082C 0300 0315 0062;
|
||||
0061 0315 0300 05AE 082D 0062;00E0 05AE 082D 0315 0062;0061 05AE 0300 082D 0315 0062;00E0 05AE 082D 0315 0062;0061 05AE 0300 082D 0315 0062;
|
||||
0061 082D 0315 0300 05AE 0062;0061 05AE 082D 0300 0315 0062;0061 05AE 082D 0300 0315 0062;0061 05AE 082D 0300 0315 0062;0061 05AE 082D 0300 0315 0062;
|
||||
0061 059A 0316 302A 0859 0062;0061 302A 0316 0859 059A 0062;0061 302A 0316 0859 059A 0062;0061 302A 0316 0859 059A 0062;0061 302A 0316 0859 059A 0062;
|
||||
0061 0859 059A 0316 302A 0062;0061 302A 0859 0316 059A 0062;0061 302A 0859 0316 059A 0062;0061 302A 0859 0316 059A 0062;0061 302A 0859 0316 059A 0062;
|
||||
0061 059A 0316 302A 085A 0062;0061 302A 0316 085A 059A 0062;0061 302A 0316 085A 059A 0062;0061 302A 0316 085A 059A 0062;0061 302A 0316 085A 059A 0062;
|
||||
0061 085A 059A 0316 302A 0062;0061 302A 085A 0316 059A 0062;0061 302A 085A 0316 059A 0062;0061 302A 085A 0316 059A 0062;0061 302A 085A 0316 059A 0062;
|
||||
0061 059A 0316 302A 085B 0062;0061 302A 0316 085B 059A 0062;0061 302A 0316 085B 059A 0062;0061 302A 0316 085B 059A 0062;0061 302A 0316 085B 059A 0062;
|
||||
0061 085B 059A 0316 302A 0062;0061 302A 085B 0316 059A 0062;0061 302A 085B 0316 059A 0062;0061 302A 085B 0316 059A 0062;0061 302A 085B 0316 059A 0062;
|
||||
0061 3099 093C 0334 093C 0062;0061 0334 093C 093C 3099 0062;0061 0334 093C 093C 3099 0062;0061 0334 093C 093C 3099 0062;0061 0334 093C 093C 3099 0062;
|
||||
0061 093C 3099 093C 0334 0062;0061 0334 093C 093C 3099 0062;0061 0334 093C 093C 3099 0062;0061 0334 093C 093C 3099 0062;0061 0334 093C 093C 3099 0062;
|
||||
0061 05B0 094D 3099 094D 0062;0061 3099 094D 094D 05B0 0062;0061 3099 094D 094D 05B0 0062;0061 3099 094D 094D 05B0 0062;0061 3099 094D 094D 05B0 0062;
|
||||
|
@ -17423,6 +17474,10 @@ FFEE;FFEE;FFEE;25CB;25CB;
|
|||
0061 103A 05B0 094D 3099 0062;0061 3099 103A 094D 05B0 0062;0061 3099 103A 094D 05B0 0062;0061 3099 103A 094D 05B0 0062;0061 3099 103A 094D 05B0 0062;
|
||||
0061 059A 0316 302A 108D 0062;0061 302A 0316 108D 059A 0062;0061 302A 0316 108D 059A 0062;0061 302A 0316 108D 059A 0062;0061 302A 0316 108D 059A 0062;
|
||||
0061 108D 059A 0316 302A 0062;0061 302A 108D 0316 059A 0062;0061 302A 108D 0316 059A 0062;0061 302A 108D 0316 059A 0062;0061 302A 108D 0316 059A 0062;
|
||||
0061 0315 0300 05AE 135D 0062;00E0 05AE 135D 0315 0062;0061 05AE 0300 135D 0315 0062;00E0 05AE 135D 0315 0062;0061 05AE 0300 135D 0315 0062;
|
||||
0061 135D 0315 0300 05AE 0062;0061 05AE 135D 0300 0315 0062;0061 05AE 135D 0300 0315 0062;0061 05AE 135D 0300 0315 0062;0061 05AE 135D 0300 0315 0062;
|
||||
0061 0315 0300 05AE 135E 0062;00E0 05AE 135E 0315 0062;0061 05AE 0300 135E 0315 0062;00E0 05AE 135E 0315 0062;0061 05AE 0300 135E 0315 0062;
|
||||
0061 135E 0315 0300 05AE 0062;0061 05AE 135E 0300 0315 0062;0061 05AE 135E 0300 0315 0062;0061 05AE 135E 0300 0315 0062;0061 05AE 135E 0300 0315 0062;
|
||||
0061 0315 0300 05AE 135F 0062;00E0 05AE 135F 0315 0062;0061 05AE 0300 135F 0315 0062;00E0 05AE 135F 0315 0062;0061 05AE 0300 135F 0315 0062;
|
||||
0061 135F 0315 0300 05AE 0062;0061 05AE 135F 0300 0315 0062;0061 05AE 135F 0300 0315 0062;0061 05AE 135F 0300 0315 0062;0061 05AE 135F 0300 0315 0062;
|
||||
0061 05B0 094D 3099 1714 0062;0061 3099 094D 1714 05B0 0062;0061 3099 094D 1714 05B0 0062;0061 3099 094D 1714 05B0 0062;0061 3099 094D 1714 05B0 0062;
|
||||
|
@ -17489,6 +17544,12 @@ FFEE;FFEE;FFEE;25CB;25CB;
|
|||
0061 1B73 0315 0300 05AE 0062;0061 05AE 1B73 0300 0315 0062;0061 05AE 1B73 0300 0315 0062;0061 05AE 1B73 0300 0315 0062;0061 05AE 1B73 0300 0315 0062;
|
||||
0061 05B0 094D 3099 1BAA 0062;0061 3099 094D 1BAA 05B0 0062;0061 3099 094D 1BAA 05B0 0062;0061 3099 094D 1BAA 05B0 0062;0061 3099 094D 1BAA 05B0 0062;
|
||||
0061 1BAA 05B0 094D 3099 0062;0061 3099 1BAA 094D 05B0 0062;0061 3099 1BAA 094D 05B0 0062;0061 3099 1BAA 094D 05B0 0062;0061 3099 1BAA 094D 05B0 0062;
|
||||
0061 3099 093C 0334 1BE6 0062;0061 0334 093C 1BE6 3099 0062;0061 0334 093C 1BE6 3099 0062;0061 0334 093C 1BE6 3099 0062;0061 0334 093C 1BE6 3099 0062;
|
||||
0061 1BE6 3099 093C 0334 0062;0061 0334 1BE6 093C 3099 0062;0061 0334 1BE6 093C 3099 0062;0061 0334 1BE6 093C 3099 0062;0061 0334 1BE6 093C 3099 0062;
|
||||
0061 05B0 094D 3099 1BF2 0062;0061 3099 094D 1BF2 05B0 0062;0061 3099 094D 1BF2 05B0 0062;0061 3099 094D 1BF2 05B0 0062;0061 3099 094D 1BF2 05B0 0062;
|
||||
0061 1BF2 05B0 094D 3099 0062;0061 3099 1BF2 094D 05B0 0062;0061 3099 1BF2 094D 05B0 0062;0061 3099 1BF2 094D 05B0 0062;0061 3099 1BF2 094D 05B0 0062;
|
||||
0061 05B0 094D 3099 1BF3 0062;0061 3099 094D 1BF3 05B0 0062;0061 3099 094D 1BF3 05B0 0062;0061 3099 094D 1BF3 05B0 0062;0061 3099 094D 1BF3 05B0 0062;
|
||||
0061 1BF3 05B0 094D 3099 0062;0061 3099 1BF3 094D 05B0 0062;0061 3099 1BF3 094D 05B0 0062;0061 3099 1BF3 094D 05B0 0062;0061 3099 1BF3 094D 05B0 0062;
|
||||
0061 3099 093C 0334 1C37 0062;0061 0334 093C 1C37 3099 0062;0061 0334 093C 1C37 3099 0062;0061 0334 093C 1C37 3099 0062;0061 0334 093C 1C37 3099 0062;
|
||||
0061 1C37 3099 093C 0334 0062;0061 0334 1C37 093C 3099 0062;0061 0334 1C37 093C 3099 0062;0061 0334 1C37 093C 3099 0062;0061 0334 1C37 093C 3099 0062;
|
||||
0061 0315 0300 05AE 1CD0 0062;00E0 05AE 1CD0 0315 0062;0061 05AE 0300 1CD0 0315 0062;00E0 05AE 1CD0 0315 0062;0061 05AE 0300 1CD0 0315 0062;
|
||||
|
@ -17617,6 +17678,8 @@ FFEE;FFEE;FFEE;25CB;25CB;
|
|||
0061 1DE5 0315 0300 05AE 0062;0061 05AE 1DE5 0300 0315 0062;0061 05AE 1DE5 0300 0315 0062;0061 05AE 1DE5 0300 0315 0062;0061 05AE 1DE5 0300 0315 0062;
|
||||
0061 0315 0300 05AE 1DE6 0062;00E0 05AE 1DE6 0315 0062;0061 05AE 0300 1DE6 0315 0062;00E0 05AE 1DE6 0315 0062;0061 05AE 0300 1DE6 0315 0062;
|
||||
0061 1DE6 0315 0300 05AE 0062;0061 05AE 1DE6 0300 0315 0062;0061 05AE 1DE6 0300 0315 0062;0061 05AE 1DE6 0300 0315 0062;0061 05AE 1DE6 0300 0315 0062;
|
||||
0061 035D 035C 0315 1DFC 0062;0061 0315 035C 1DFC 035D 0062;0061 0315 035C 1DFC 035D 0062;0061 0315 035C 1DFC 035D 0062;0061 0315 035C 1DFC 035D 0062;
|
||||
0061 1DFC 035D 035C 0315 0062;0061 0315 1DFC 035C 035D 0062;0061 0315 1DFC 035C 035D 0062;0061 0315 1DFC 035C 035D 0062;0061 0315 1DFC 035C 035D 0062;
|
||||
0061 059A 0316 302A 1DFD 0062;0061 302A 0316 1DFD 059A 0062;0061 302A 0316 1DFD 059A 0062;0061 302A 0316 1DFD 059A 0062;0061 302A 0316 1DFD 059A 0062;
|
||||
0061 1DFD 059A 0316 302A 0062;0061 302A 1DFD 0316 059A 0062;0061 302A 1DFD 0316 059A 0062;0061 302A 1DFD 0316 059A 0062;0061 302A 1DFD 0316 059A 0062;
|
||||
0061 0315 0300 05AE 1DFE 0062;00E0 05AE 1DFE 0315 0062;0061 05AE 0300 1DFE 0315 0062;00E0 05AE 1DFE 0315 0062;0061 05AE 0300 1DFE 0315 0062;
|
||||
|
@ -17681,6 +17744,8 @@ FFEE;FFEE;FFEE;25CB;25CB;
|
|||
0061 2CF0 0315 0300 05AE 0062;0061 05AE 2CF0 0300 0315 0062;0061 05AE 2CF0 0300 0315 0062;0061 05AE 2CF0 0300 0315 0062;0061 05AE 2CF0 0300 0315 0062;
|
||||
0061 0315 0300 05AE 2CF1 0062;00E0 05AE 2CF1 0315 0062;0061 05AE 0300 2CF1 0315 0062;00E0 05AE 2CF1 0315 0062;0061 05AE 0300 2CF1 0315 0062;
|
||||
0061 2CF1 0315 0300 05AE 0062;0061 05AE 2CF1 0300 0315 0062;0061 05AE 2CF1 0300 0315 0062;0061 05AE 2CF1 0300 0315 0062;0061 05AE 2CF1 0300 0315 0062;
|
||||
0061 05B0 094D 3099 2D7F 0062;0061 3099 094D 2D7F 05B0 0062;0061 3099 094D 2D7F 05B0 0062;0061 3099 094D 2D7F 05B0 0062;0061 3099 094D 2D7F 05B0 0062;
|
||||
0061 2D7F 05B0 094D 3099 0062;0061 3099 2D7F 094D 05B0 0062;0061 3099 2D7F 094D 05B0 0062;0061 3099 2D7F 094D 05B0 0062;0061 3099 2D7F 094D 05B0 0062;
|
||||
0061 0315 0300 05AE 2DE0 0062;00E0 05AE 2DE0 0315 0062;0061 05AE 0300 2DE0 0315 0062;00E0 05AE 2DE0 0315 0062;0061 05AE 0300 2DE0 0315 0062;
|
||||
0061 2DE0 0315 0300 05AE 0062;0061 05AE 2DE0 0300 0315 0062;0061 05AE 2DE0 0300 0315 0062;0061 05AE 2DE0 0300 0315 0062;0061 05AE 2DE0 0300 0315 0062;
|
||||
0061 0315 0300 05AE 2DE1 0062;00E0 05AE 2DE1 0315 0062;0061 05AE 0300 2DE1 0315 0062;00E0 05AE 2DE1 0315 0062;0061 05AE 0300 2DE1 0315 0062;
|
||||
|
@ -17873,6 +17938,8 @@ FFEE;FFEE;FFEE;25CB;25CB;
|
|||
0061 10A3A 059A 0316 302A 0062;0061 302A 10A3A 0316 059A 0062;0061 302A 10A3A 0316 059A 0062;0061 302A 10A3A 0316 059A 0062;0061 302A 10A3A 0316 059A 0062;
|
||||
0061 05B0 094D 3099 10A3F 0062;0061 3099 094D 10A3F 05B0 0062;0061 3099 094D 10A3F 05B0 0062;0061 3099 094D 10A3F 05B0 0062;0061 3099 094D 10A3F 05B0 0062;
|
||||
0061 10A3F 05B0 094D 3099 0062;0061 3099 10A3F 094D 05B0 0062;0061 3099 10A3F 094D 05B0 0062;0061 3099 10A3F 094D 05B0 0062;0061 3099 10A3F 094D 05B0 0062;
|
||||
0061 05B0 094D 3099 11046 0062;0061 3099 094D 11046 05B0 0062;0061 3099 094D 11046 05B0 0062;0061 3099 094D 11046 05B0 0062;0061 3099 094D 11046 05B0 0062;
|
||||
0061 11046 05B0 094D 3099 0062;0061 3099 11046 094D 05B0 0062;0061 3099 11046 094D 05B0 0062;0061 3099 11046 094D 05B0 0062;0061 3099 11046 094D 05B0 0062;
|
||||
0061 05B0 094D 3099 110B9 0062;0061 3099 094D 110B9 05B0 0062;0061 3099 094D 110B9 05B0 0062;0061 3099 094D 110B9 05B0 0062;0061 3099 094D 110B9 05B0 0062;
|
||||
0061 110B9 05B0 094D 3099 0062;0061 3099 110B9 094D 05B0 0062;0061 3099 110B9 094D 05B0 0062;0061 3099 110B9 094D 05B0 0062;0061 3099 110B9 094D 05B0 0062;
|
||||
0061 3099 093C 0334 110BA 0062;0061 0334 093C 110BA 3099 0062;0061 0334 093C 110BA 3099 0062;0061 0334 093C 110BA 3099 0062;0061 0334 093C 110BA 3099 0062;
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
# PropList-5.2.0.txt
|
||||
# Date: 2009-08-22, 04:58:40 GMT [MD]
|
||||
# PropList-6.0.0.txt
|
||||
# Date: 2010-08-19, 00:48:28 GMT [MD]
|
||||
#
|
||||
# Unicode Character Database
|
||||
# Copyright (c) 1991-2009 Unicode, Inc.
|
||||
# Copyright (c) 1991-2010 Unicode, Inc.
|
||||
# For terms of use, see http://www.unicode.org/terms_of_use.html
|
||||
# For documentation, see http://www.unicode.org/reports/tr44/
|
||||
|
||||
|
@ -126,6 +126,7 @@ FF63 ; Quotation_Mark
|
|||
070C ; Terminal_Punctuation
|
||||
07F8..07F9 ; Terminal_Punctuation
|
||||
0830..083E ; Terminal_Punctuation
|
||||
085E ; Terminal_Punctuation
|
||||
0964..0965 ; Terminal_Punctuation
|
||||
0E5A..0E5B ; Terminal_Punctuation
|
||||
0F08 ; Terminal_Punctuation
|
||||
|
@ -172,10 +173,11 @@ FF64 ; Terminal_Punctuation
|
|||
10857 ; Terminal_Punctuation
|
||||
1091F ; Terminal_Punctuation
|
||||
10B3A..10B3F ; Terminal_Punctuation
|
||||
11047..1104D ; Terminal_Punctuation
|
||||
110BE..110C1 ; Terminal_Punctuation
|
||||
12470..12473 ; Terminal_Punctuation
|
||||
|
||||
# Total code points: 161
|
||||
# Total code points: 169
|
||||
|
||||
# ================================================
|
||||
|
||||
|
@ -197,6 +199,7 @@ FF64 ; Terminal_Punctuation
|
|||
20E5..20E6 ; Other_Math
|
||||
20EB..20EF ; Other_Math
|
||||
2102 ; Other_Math
|
||||
2107 ; Other_Math
|
||||
210A..2113 ; Other_Math
|
||||
2115 ; Other_Math
|
||||
2119..211D ; Other_Math
|
||||
|
@ -318,7 +321,7 @@ FF3E ; Other_Math
|
|||
1D7C4..1D7CB ; Other_Math
|
||||
1D7CE..1D7FF ; Other_Math
|
||||
|
||||
# Total code points: 1216
|
||||
# Total code points: 1217
|
||||
|
||||
# ================================================
|
||||
|
||||
|
@ -349,7 +352,7 @@ FF41..FF46 ; Hex_Digit
|
|||
05C7 ; Other_Alphabetic
|
||||
0610..061A ; Other_Alphabetic
|
||||
064B..0657 ; Other_Alphabetic
|
||||
0659..065E ; Other_Alphabetic
|
||||
0659..065F ; Other_Alphabetic
|
||||
0670 ; Other_Alphabetic
|
||||
06D6..06DC ; Other_Alphabetic
|
||||
06E1..06E4 ; Other_Alphabetic
|
||||
|
@ -364,11 +367,13 @@ FF41..FF46 ; Hex_Digit
|
|||
0829..082C ; Other_Alphabetic
|
||||
0900..0902 ; Other_Alphabetic
|
||||
0903 ; Other_Alphabetic
|
||||
093A ; Other_Alphabetic
|
||||
093B ; Other_Alphabetic
|
||||
093E..0940 ; Other_Alphabetic
|
||||
0941..0948 ; Other_Alphabetic
|
||||
0949..094C ; Other_Alphabetic
|
||||
094E ; Other_Alphabetic
|
||||
0955 ; Other_Alphabetic
|
||||
094E..094F ; Other_Alphabetic
|
||||
0955..0957 ; Other_Alphabetic
|
||||
0962..0963 ; Other_Alphabetic
|
||||
0981 ; Other_Alphabetic
|
||||
0982..0983 ; Other_Alphabetic
|
||||
|
@ -453,7 +458,7 @@ FF41..FF46 ; Hex_Digit
|
|||
0F71..0F7E ; Other_Alphabetic
|
||||
0F7F ; Other_Alphabetic
|
||||
0F80..0F81 ; Other_Alphabetic
|
||||
0F90..0F97 ; Other_Alphabetic
|
||||
0F8D..0F97 ; Other_Alphabetic
|
||||
0F99..0FBC ; Other_Alphabetic
|
||||
102B..102C ; Other_Alphabetic
|
||||
102D..1030 ; Other_Alphabetic
|
||||
|
@ -520,6 +525,12 @@ FF41..FF46 ; Hex_Digit
|
|||
1BA2..1BA5 ; Other_Alphabetic
|
||||
1BA6..1BA7 ; Other_Alphabetic
|
||||
1BA8..1BA9 ; Other_Alphabetic
|
||||
1BE7 ; Other_Alphabetic
|
||||
1BE8..1BE9 ; Other_Alphabetic
|
||||
1BEA..1BEC ; Other_Alphabetic
|
||||
1BED ; Other_Alphabetic
|
||||
1BEE ; Other_Alphabetic
|
||||
1BEF..1BF1 ; Other_Alphabetic
|
||||
1C24..1C2B ; Other_Alphabetic
|
||||
1C2C..1C33 ; Other_Alphabetic
|
||||
1C34..1C35 ; Other_Alphabetic
|
||||
|
@ -536,7 +547,6 @@ A947..A951 ; Other_Alphabetic
|
|||
A952 ; Other_Alphabetic
|
||||
A980..A982 ; Other_Alphabetic
|
||||
A983 ; Other_Alphabetic
|
||||
A9B3 ; Other_Alphabetic
|
||||
A9B4..A9B5 ; Other_Alphabetic
|
||||
A9B6..A9B9 ; Other_Alphabetic
|
||||
A9BA..A9BB ; Other_Alphabetic
|
||||
|
@ -563,12 +573,16 @@ FB1E ; Other_Alphabetic
|
|||
10A01..10A03 ; Other_Alphabetic
|
||||
10A05..10A06 ; Other_Alphabetic
|
||||
10A0C..10A0F ; Other_Alphabetic
|
||||
11000 ; Other_Alphabetic
|
||||
11001 ; Other_Alphabetic
|
||||
11002 ; Other_Alphabetic
|
||||
11038..11045 ; Other_Alphabetic
|
||||
11082 ; Other_Alphabetic
|
||||
110B0..110B2 ; Other_Alphabetic
|
||||
110B3..110B6 ; Other_Alphabetic
|
||||
110B7..110B8 ; Other_Alphabetic
|
||||
|
||||
# Total code points: 759
|
||||
# Total code points: 795
|
||||
|
||||
# ================================================
|
||||
|
||||
|
@ -583,9 +597,10 @@ FA30..FA6D ; Ideographic
|
|||
FA70..FAD9 ; Ideographic
|
||||
20000..2A6D6 ; Ideographic
|
||||
2A700..2B734 ; Ideographic
|
||||
2B740..2B81D ; Ideographic
|
||||
2F800..2FA1D ; Ideographic
|
||||
|
||||
# Total code points: 75408
|
||||
# Total code points: 75630
|
||||
|
||||
# ================================================
|
||||
|
||||
|
@ -863,8 +878,9 @@ FA23..FA24 ; Unified_Ideograph
|
|||
FA27..FA29 ; Unified_Ideograph
|
||||
20000..2A6D6 ; Unified_Ideograph
|
||||
2A700..2B734 ; Unified_Ideograph
|
||||
2B740..2B81D ; Unified_Ideograph
|
||||
|
||||
# Total code points: 74394
|
||||
# Total code points: 74616
|
||||
|
||||
# ================================================
|
||||
|
||||
|
@ -884,6 +900,7 @@ E01F0..E0FFF ; Other_Default_Ignorable_Code_Point
|
|||
# ================================================
|
||||
|
||||
0149 ; Deprecated
|
||||
0673 ; Deprecated
|
||||
0F77 ; Deprecated
|
||||
0F79 ; Deprecated
|
||||
17A3..17A4 ; Deprecated
|
||||
|
@ -893,7 +910,7 @@ E01F0..E0FFF ; Other_Default_Ignorable_Code_Point
|
|||
E0001 ; Deprecated
|
||||
E0020..E007F ; Deprecated
|
||||
|
||||
# Total code points: 110
|
||||
# Total code points: 111
|
||||
|
||||
# ================================================
|
||||
|
||||
|
@ -954,8 +971,9 @@ AABB..AABC ; Logical_Order_Exception
|
|||
00B7 ; Other_ID_Continue
|
||||
0387 ; Other_ID_Continue
|
||||
1369..1371 ; Other_ID_Continue
|
||||
19DA ; Other_ID_Continue
|
||||
|
||||
# Total code points: 11
|
||||
# Total code points: 12
|
||||
|
||||
# ================================================
|
||||
|
||||
|
@ -974,9 +992,11 @@ AABB..AABC ; Logical_Order_Exception
|
|||
1362 ; STerm
|
||||
1367..1368 ; STerm
|
||||
166E ; STerm
|
||||
1735..1736 ; STerm
|
||||
1803 ; STerm
|
||||
1809 ; STerm
|
||||
1944..1945 ; STerm
|
||||
1AA8..1AAB ; STerm
|
||||
1B5A..1B5B ; STerm
|
||||
1B5E..1B5F ; STerm
|
||||
1C3B..1C3C ; STerm
|
||||
|
@ -1001,9 +1021,11 @@ FF01 ; STerm
|
|||
FF0E ; STerm
|
||||
FF1F ; STerm
|
||||
FF61 ; STerm
|
||||
10A56..10A57 ; STerm
|
||||
11047..11048 ; STerm
|
||||
110BE..110C1 ; STerm
|
||||
|
||||
# Total code points: 66
|
||||
# Total code points: 76
|
||||
|
||||
# ================================================
|
||||
|
||||
|
@ -1116,8 +1138,8 @@ E0100..E01EF ; Variation_Selector
|
|||
239B..23B3 ; Pattern_Syntax
|
||||
23B4..23DB ; Pattern_Syntax
|
||||
23DC..23E1 ; Pattern_Syntax
|
||||
23E2..23E8 ; Pattern_Syntax
|
||||
23E9..23FF ; Pattern_Syntax
|
||||
23E2..23F3 ; Pattern_Syntax
|
||||
23F4..23FF ; Pattern_Syntax
|
||||
2400..2426 ; Pattern_Syntax
|
||||
2427..243F ; Pattern_Syntax
|
||||
2440..244A ; Pattern_Syntax
|
||||
|
@ -1130,29 +1152,9 @@ E0100..E01EF ; Variation_Selector
|
|||
25F8..25FF ; Pattern_Syntax
|
||||
2600..266E ; Pattern_Syntax
|
||||
266F ; Pattern_Syntax
|
||||
2670..26CD ; Pattern_Syntax
|
||||
26CE ; Pattern_Syntax
|
||||
26CF..26E1 ; Pattern_Syntax
|
||||
26E2 ; Pattern_Syntax
|
||||
26E3 ; Pattern_Syntax
|
||||
26E4..26E7 ; Pattern_Syntax
|
||||
26E8..26FF ; Pattern_Syntax
|
||||
2670..26FF ; Pattern_Syntax
|
||||
2700 ; Pattern_Syntax
|
||||
2701..2704 ; Pattern_Syntax
|
||||
2705 ; Pattern_Syntax
|
||||
2706..2709 ; Pattern_Syntax
|
||||
270A..270B ; Pattern_Syntax
|
||||
270C..2727 ; Pattern_Syntax
|
||||
2728 ; Pattern_Syntax
|
||||
2729..274B ; Pattern_Syntax
|
||||
274C ; Pattern_Syntax
|
||||
274D ; Pattern_Syntax
|
||||
274E ; Pattern_Syntax
|
||||
274F..2752 ; Pattern_Syntax
|
||||
2753..2755 ; Pattern_Syntax
|
||||
2756..275E ; Pattern_Syntax
|
||||
275F..2760 ; Pattern_Syntax
|
||||
2761..2767 ; Pattern_Syntax
|
||||
2701..2767 ; Pattern_Syntax
|
||||
2768 ; Pattern_Syntax
|
||||
2769 ; Pattern_Syntax
|
||||
276A ; Pattern_Syntax
|
||||
|
@ -1167,20 +1169,15 @@ E0100..E01EF ; Variation_Selector
|
|||
2773 ; Pattern_Syntax
|
||||
2774 ; Pattern_Syntax
|
||||
2775 ; Pattern_Syntax
|
||||
2794 ; Pattern_Syntax
|
||||
2795..2797 ; Pattern_Syntax
|
||||
2798..27AF ; Pattern_Syntax
|
||||
27B0 ; Pattern_Syntax
|
||||
27B1..27BE ; Pattern_Syntax
|
||||
27BF ; Pattern_Syntax
|
||||
2794..27BF ; Pattern_Syntax
|
||||
27C0..27C4 ; Pattern_Syntax
|
||||
27C5 ; Pattern_Syntax
|
||||
27C6 ; Pattern_Syntax
|
||||
27C7..27CA ; Pattern_Syntax
|
||||
27CB ; Pattern_Syntax
|
||||
27CC ; Pattern_Syntax
|
||||
27CD..27CF ; Pattern_Syntax
|
||||
27D0..27E5 ; Pattern_Syntax
|
||||
27CD ; Pattern_Syntax
|
||||
27CE..27E5 ; Pattern_Syntax
|
||||
27E6 ; Pattern_Syntax
|
||||
27E7 ; Pattern_Syntax
|
||||
27E8 ; Pattern_Syntax
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
# PropertyAliases-5.2.0.txt
|
||||
# Date: 2009-08-24, 03:26:46 GMT [MD]
|
||||
# PropertyAliases-6.0.0.txt
|
||||
# Date: 2010-05-18, 00:49:38 GMT [MD]
|
||||
#
|
||||
# Unicode Character Database
|
||||
# Copyright (c) 1991-2009 Unicode, Inc.
|
||||
# Copyright (c) 1991-2010 Unicode, Inc.
|
||||
# For terms of use, see http://www.unicode.org/terms_of_use.html
|
||||
# For documentation, see http://www.unicode.org/reports/tr44/
|
||||
#
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
# PropertyValueAliases-5.2.0.txt
|
||||
# Date: 2009-08-24, 03:27:01 GMT [MD]
|
||||
# PropertyValueAliases-6.0.0.txt
|
||||
# Date: 2010-07-17, 22:44:06 GMT [MD]
|
||||
#
|
||||
# Unicode Character Database
|
||||
# Copyright (c) 1991-2009 Unicode, Inc.
|
||||
# Copyright (c) 1991-2010 Unicode, Inc.
|
||||
# For terms of use, see http://www.unicode.org/terms_of_use.html
|
||||
# For documentation, see http://www.unicode.org/reports/tr44/
|
||||
#
|
||||
|
@ -73,6 +73,7 @@ age; n/a ; 4.1
|
|||
age; n/a ; 5.0
|
||||
age; n/a ; 5.1
|
||||
age; n/a ; 5.2
|
||||
age; n/a ; 6.0
|
||||
age; n/a ; unassigned
|
||||
|
||||
# Alphabetic (Alpha)
|
||||
|
@ -119,6 +120,7 @@ Bidi_M; Y ; Yes ; T
|
|||
# Block (blk)
|
||||
|
||||
blk; n/a ; Aegean_Numbers
|
||||
blk; n/a ; Alchemical_Symbols
|
||||
blk; n/a ; Alphabetic_Presentation_Forms
|
||||
blk; n/a ; Ancient_Greek_Musical_Notation
|
||||
blk; n/a ; Ancient_Greek_Numbers
|
||||
|
@ -132,12 +134,15 @@ blk; n/a ; Arrows
|
|||
blk; n/a ; Avestan
|
||||
blk; n/a ; Balinese
|
||||
blk; n/a ; Bamum
|
||||
blk; n/a ; Bamum_Supplement
|
||||
blk; n/a ; Basic_Latin ; ASCII
|
||||
blk; n/a ; Batak
|
||||
blk; n/a ; Bengali
|
||||
blk; n/a ; Block_Elements
|
||||
blk; n/a ; Bopomofo
|
||||
blk; n/a ; Bopomofo_Extended
|
||||
blk; n/a ; Box_Drawing
|
||||
blk; n/a ; Brahmi
|
||||
blk; n/a ; Braille_Patterns
|
||||
blk; n/a ; Buginese
|
||||
blk; n/a ; Buhid
|
||||
|
@ -156,6 +161,7 @@ blk; n/a ; CJK_Unified_Ideographs
|
|||
blk; n/a ; CJK_Unified_Ideographs_Extension_A
|
||||
blk; n/a ; CJK_Unified_Ideographs_Extension_B
|
||||
blk; n/a ; CJK_Unified_Ideographs_Extension_C
|
||||
blk; n/a ; CJK_Unified_Ideographs_Extension_D
|
||||
blk; n/a ; Combining_Diacritical_Marks
|
||||
blk; n/a ; Combining_Diacritical_Marks_For_Symbols; Combining_Marks_For_Symbols
|
||||
blk; n/a ; Combining_Diacritical_Marks_Supplement
|
||||
|
@ -178,12 +184,14 @@ blk; n/a ; Devanagari_Extended
|
|||
blk; n/a ; Dingbats
|
||||
blk; n/a ; Domino_Tiles
|
||||
blk; n/a ; Egyptian_Hieroglyphs
|
||||
blk; n/a ; Emoticons
|
||||
blk; n/a ; Enclosed_Alphanumeric_Supplement
|
||||
blk; n/a ; Enclosed_Alphanumerics
|
||||
blk; n/a ; Enclosed_CJK_Letters_And_Months
|
||||
blk; n/a ; Enclosed_Ideographic_Supplement
|
||||
blk; n/a ; Ethiopic
|
||||
blk; n/a ; Ethiopic_Extended
|
||||
blk; n/a ; Ethiopic_Extended_A
|
||||
blk; n/a ; Ethiopic_Supplement
|
||||
blk; n/a ; General_Punctuation
|
||||
blk; n/a ; Geometric_Shapes
|
||||
|
@ -213,6 +221,7 @@ blk; n/a ; Inscriptional_Parthian
|
|||
blk; n/a ; IPA_Extensions
|
||||
blk; n/a ; Javanese
|
||||
blk; n/a ; Kaithi
|
||||
blk; n/a ; Kana_Supplement
|
||||
blk; n/a ; Kanbun
|
||||
blk; n/a ; Kangxi_Radicals
|
||||
blk; n/a ; Kannada
|
||||
|
@ -240,6 +249,7 @@ blk; n/a ; Lycian
|
|||
blk; n/a ; Lydian
|
||||
blk; n/a ; Mahjong_Tiles
|
||||
blk; n/a ; Malayalam
|
||||
blk; n/a ; Mandaic
|
||||
blk; n/a ; Mathematical_Alphanumeric_Symbols
|
||||
blk; n/a ; Mathematical_Operators
|
||||
blk; n/a ; Meetei_Mayek
|
||||
|
@ -247,6 +257,7 @@ blk; n/a ; Miscellaneous_Mathematical_Symbols_A
|
|||
blk; n/a ; Miscellaneous_Mathematical_Symbols_B
|
||||
blk; n/a ; Miscellaneous_Symbols
|
||||
blk; n/a ; Miscellaneous_Symbols_And_Arrows
|
||||
blk; n/a ; Miscellaneous_Symbols_And_Pictographs
|
||||
blk; n/a ; Miscellaneous_Technical
|
||||
blk; n/a ; Modifier_Tone_Letters
|
||||
blk; n/a ; Mongolian
|
||||
|
@ -271,6 +282,7 @@ blk; n/a ; Phaistos_Disc
|
|||
blk; n/a ; Phoenician
|
||||
blk; n/a ; Phonetic_Extensions
|
||||
blk; n/a ; Phonetic_Extensions_Supplement
|
||||
blk; n/a ; Playing_Cards
|
||||
blk; n/a ; Private_Use_Area ; Private_Use
|
||||
blk; n/a ; Rejang
|
||||
blk; n/a ; Rumi_Numeral_Symbols
|
||||
|
@ -305,6 +317,7 @@ blk; n/a ; Thaana
|
|||
blk; n/a ; Thai
|
||||
blk; n/a ; Tibetan
|
||||
blk; n/a ; Tifinagh
|
||||
blk; n/a ; Transport_And_Map_Symbols
|
||||
blk; n/a ; Ugaritic
|
||||
blk; n/a ; Unified_Canadian_Aboriginal_Syllabics; Canadian_Syllabics
|
||||
blk; n/a ; Unified_Canadian_Aboriginal_Syllabics_Extended
|
||||
|
@ -675,7 +688,6 @@ jg ; n/a ; Final_Semkath
|
|||
jg ; n/a ; Gaf
|
||||
jg ; n/a ; Gamal
|
||||
jg ; n/a ; Hah
|
||||
jg ; n/a ; Hamza_On_Heh_Goal
|
||||
jg ; n/a ; He
|
||||
jg ; n/a ; Heh
|
||||
jg ; n/a ; Heh_Goal
|
||||
|
@ -707,6 +719,7 @@ jg ; n/a ; Syriac_Waw
|
|||
jg ; n/a ; Tah
|
||||
jg ; n/a ; Taw
|
||||
jg ; n/a ; Teh_Marbuta
|
||||
jg ; n/a ; Teh_Marbuta_Goal ; Hamza_On_Heh_Goal
|
||||
jg ; n/a ; Teth
|
||||
jg ; n/a ; Waw
|
||||
jg ; n/a ; Yeh
|
||||
|
@ -908,8 +921,10 @@ sc ; Armn ; Armenian
|
|||
sc ; Avst ; Avestan
|
||||
sc ; Bali ; Balinese
|
||||
sc ; Bamu ; Bamum
|
||||
sc ; Batk ; Batak
|
||||
sc ; Beng ; Bengali
|
||||
sc ; Bopo ; Bopomofo
|
||||
sc ; Brah ; Brahmi
|
||||
sc ; Brai ; Braille
|
||||
sc ; Bugi ; Buginese
|
||||
sc ; Buhd ; Buhid
|
||||
|
@ -953,6 +968,7 @@ sc ; Linb ; Linear_B
|
|||
sc ; Lisu ; Lisu
|
||||
sc ; Lyci ; Lycian
|
||||
sc ; Lydi ; Lydian
|
||||
sc ; Mand ; Mandaic
|
||||
sc ; Mlym ; Malayalam
|
||||
sc ; Mong ; Mongolian
|
||||
sc ; Mtei ; Meetei_Mayek
|
||||
|
|
172
icu4c/source/data/unidata/ScriptExtensions.txt
Normal file
172
icu4c/source/data/unidata/ScriptExtensions.txt
Normal file
|
@ -0,0 +1,172 @@
|
|||
# ScriptExtensions-6.0.0.txt
|
||||
# Date: 2010-08-30, 01:48:36 GMT [MD]
|
||||
#
|
||||
# Unicode Character Database
|
||||
# Copyright (c) 1991-2010 Unicode, Inc.
|
||||
# For terms of use, see http://www.unicode.org/terms_of_use.html
|
||||
# For documentation, see http://www.unicode.org/reports/tr44/
|
||||
#
|
||||
# The Script Extensions contain data about characters that belong to multiple scripts.
|
||||
# This data is provisional, and expected to change over time, as more information becomes available.
|
||||
# The script values are space-delimited short values, such as Hang for Hangul.
|
||||
# For more information, see UAX #24: http://www.unicode.org/reports/tr24/.
|
||||
|
||||
# ================================================
|
||||
|
||||
# Property: Script_Extensions
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Arab Syrc
|
||||
|
||||
0640 ; Arab Syrc # Lm ARABIC TATWEEL
|
||||
064B..0655 ; Arab Syrc # Mn [11] ARABIC FATHATAN..ARABIC HAMZA BELOW
|
||||
0670 ; Arab Syrc # Mn ARABIC LETTER SUPERSCRIPT ALEF
|
||||
|
||||
# Total code points: 13
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Arab Thaa
|
||||
|
||||
0660..0669 ; Arab Thaa # Nd [10] ARABIC-INDIC DIGIT ZERO..ARABIC-INDIC DIGIT NINE
|
||||
FDF2 ; Arab Thaa # Lo ARABIC LIGATURE ALLAH ISOLATED FORM
|
||||
FDFD ; Arab Thaa # So ARABIC LIGATURE BISMILLAH AR-RAHMAN AR-RAHEEM
|
||||
|
||||
# Total code points: 12
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Armn Geor
|
||||
|
||||
0589 ; Armn Geor # Po ARMENIAN FULL STOP
|
||||
|
||||
# Total code points: 1
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Bopo Hani
|
||||
|
||||
302A..302D ; Bopo Hani # Mn [4] IDEOGRAPHIC LEVEL TONE MARK..IDEOGRAPHIC ENTERING TONE MARK
|
||||
|
||||
# Total code points: 4
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Hira Kana
|
||||
|
||||
3031..3035 ; Hira Kana # Lm [5] VERTICAL KANA REPEAT MARK..VERTICAL KANA REPEAT MARK LOWER HALF
|
||||
3099..309A ; Hira Kana # Mn [2] COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK..COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK
|
||||
309B..309C ; Hira Kana # Sk [2] KATAKANA-HIRAGANA VOICED SOUND MARK..KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK
|
||||
30A0 ; Hira Kana # Pd KATAKANA-HIRAGANA DOUBLE HYPHEN
|
||||
30FC ; Hira Kana # Lm KATAKANA-HIRAGANA PROLONGED SOUND MARK
|
||||
FF70 ; Hira Kana # Lm HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK
|
||||
FF9E..FF9F ; Hira Kana # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK
|
||||
|
||||
# Total code points: 14
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Mong Phag
|
||||
|
||||
1802..1803 ; Mong Phag # Po [2] MONGOLIAN COMMA..MONGOLIAN FULL STOP
|
||||
1805 ; Mong Phag # Po MONGOLIAN FOUR DOTS
|
||||
|
||||
# Total code points: 3
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Arab Syrc Thaa
|
||||
|
||||
060C ; Arab Syrc Thaa # Po ARABIC COMMA
|
||||
061B ; Arab Syrc Thaa # Po ARABIC SEMICOLON
|
||||
061F ; Arab Syrc Thaa # Po ARABIC QUESTION MARK
|
||||
|
||||
# Total code points: 3
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Hani Hira Kana
|
||||
|
||||
3006 ; Hani Hira Kana # Lo IDEOGRAPHIC CLOSING MARK
|
||||
303C ; Hani Hira Kana # Lo MASU MARK
|
||||
303D ; Hani Hira Kana # Po PART ALTERNATION MARK
|
||||
3190..3191 ; Hani Hira Kana # So [2] IDEOGRAPHIC ANNOTATION LINKING MARK..IDEOGRAPHIC ANNOTATION REVERSE MARK
|
||||
3192..3195 ; Hani Hira Kana # No [4] IDEOGRAPHIC ANNOTATION ONE MARK..IDEOGRAPHIC ANNOTATION FOUR MARK
|
||||
3196..319F ; Hani Hira Kana # So [10] IDEOGRAPHIC ANNOTATION TOP MARK..IDEOGRAPHIC ANNOTATION MAN MARK
|
||||
|
||||
# Total code points: 19
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Beng Deva Guru Orya
|
||||
|
||||
0964..0965 ; Beng Deva Guru Orya # Po [2] DEVANAGARI DANDA..DEVANAGARI DOUBLE DANDA
|
||||
|
||||
# Total code points: 2
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Buhd Hano Tagb Tglg
|
||||
|
||||
1735..1736 ; Buhd Hano Tagb Tglg # Po [2] PHILIPPINE SINGLE PUNCTUATION..PHILIPPINE DOUBLE PUNCTUATION
|
||||
|
||||
# Total code points: 2
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Bopo Hang Hani Hira Kana
|
||||
|
||||
3003 ; Bopo Hang Hani Hira Kana # Po DITTO MARK
|
||||
3013 ; Bopo Hang Hani Hira Kana # So GETA MARK
|
||||
301C ; Bopo Hang Hani Hira Kana # Pd WAVE DASH
|
||||
301D ; Bopo Hang Hani Hira Kana # Ps REVERSED DOUBLE PRIME QUOTATION MARK
|
||||
301E..301F ; Bopo Hang Hani Hira Kana # Pe [2] DOUBLE PRIME QUOTATION MARK..LOW DOUBLE PRIME QUOTATION MARK
|
||||
3030 ; Bopo Hang Hani Hira Kana # Pd WAVY DASH
|
||||
3037 ; Bopo Hang Hani Hira Kana # So IDEOGRAPHIC TELEGRAPH LINE FEED SEPARATOR SYMBOL
|
||||
303E..303F ; Bopo Hang Hani Hira Kana # So [2] IDEOGRAPHIC VARIATION INDICATOR..IDEOGRAPHIC HALF FILL SPACE
|
||||
31C0..31E3 ; Bopo Hang Hani Hira Kana # So [36] CJK STROKE T..CJK STROKE Q
|
||||
3220..3229 ; Bopo Hang Hani Hira Kana # No [10] PARENTHESIZED IDEOGRAPH ONE..PARENTHESIZED IDEOGRAPH TEN
|
||||
322A..3243 ; Bopo Hang Hani Hira Kana # So [26] PARENTHESIZED IDEOGRAPH MOON..PARENTHESIZED IDEOGRAPH REACH
|
||||
3280..3289 ; Bopo Hang Hani Hira Kana # No [10] CIRCLED IDEOGRAPH ONE..CIRCLED IDEOGRAPH TEN
|
||||
328A..32B0 ; Bopo Hang Hani Hira Kana # So [39] CIRCLED IDEOGRAPH MOON..CIRCLED IDEOGRAPH NIGHT
|
||||
32C0..32CB ; Bopo Hang Hani Hira Kana # So [12] IDEOGRAPHIC TELEGRAPH SYMBOL FOR JANUARY..IDEOGRAPHIC TELEGRAPH SYMBOL FOR DECEMBER
|
||||
3358..3370 ; Bopo Hang Hani Hira Kana # So [25] IDEOGRAPHIC TELEGRAPH SYMBOL FOR HOUR ZERO..IDEOGRAPHIC TELEGRAPH SYMBOL FOR HOUR TWENTY-FOUR
|
||||
337B..337F ; Bopo Hang Hani Hira Kana # So [5] SQUARE ERA NAME HEISEI..SQUARE CORPORATION
|
||||
33E0..33FE ; Bopo Hang Hani Hira Kana # So [31] IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY ONE..IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY THIRTY-ONE
|
||||
FE45..FE46 ; Bopo Hang Hani Hira Kana # Po [2] SESAME DOT..WHITE SESAME DOT
|
||||
|
||||
# Total code points: 206
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Bopo Hang Hani Hira Kana Yiii
|
||||
|
||||
3001..3002 ; Bopo Hang Hani Hira Kana Yiii # Po [2] IDEOGRAPHIC COMMA..IDEOGRAPHIC FULL STOP
|
||||
3008 ; Bopo Hang Hani Hira Kana Yiii # Ps LEFT ANGLE BRACKET
|
||||
3009 ; Bopo Hang Hani Hira Kana Yiii # Pe RIGHT ANGLE BRACKET
|
||||
300A ; Bopo Hang Hani Hira Kana Yiii # Ps LEFT DOUBLE ANGLE BRACKET
|
||||
300B ; Bopo Hang Hani Hira Kana Yiii # Pe RIGHT DOUBLE ANGLE BRACKET
|
||||
300C ; Bopo Hang Hani Hira Kana Yiii # Ps LEFT CORNER BRACKET
|
||||
300D ; Bopo Hang Hani Hira Kana Yiii # Pe RIGHT CORNER BRACKET
|
||||
300E ; Bopo Hang Hani Hira Kana Yiii # Ps LEFT WHITE CORNER BRACKET
|
||||
300F ; Bopo Hang Hani Hira Kana Yiii # Pe RIGHT WHITE CORNER BRACKET
|
||||
3010 ; Bopo Hang Hani Hira Kana Yiii # Ps LEFT BLACK LENTICULAR BRACKET
|
||||
3011 ; Bopo Hang Hani Hira Kana Yiii # Pe RIGHT BLACK LENTICULAR BRACKET
|
||||
3014 ; Bopo Hang Hani Hira Kana Yiii # Ps LEFT TORTOISE SHELL BRACKET
|
||||
3015 ; Bopo Hang Hani Hira Kana Yiii # Pe RIGHT TORTOISE SHELL BRACKET
|
||||
3016 ; Bopo Hang Hani Hira Kana Yiii # Ps LEFT WHITE LENTICULAR BRACKET
|
||||
3017 ; Bopo Hang Hani Hira Kana Yiii # Pe RIGHT WHITE LENTICULAR BRACKET
|
||||
3018 ; Bopo Hang Hani Hira Kana Yiii # Ps LEFT WHITE TORTOISE SHELL BRACKET
|
||||
3019 ; Bopo Hang Hani Hira Kana Yiii # Pe RIGHT WHITE TORTOISE SHELL BRACKET
|
||||
301A ; Bopo Hang Hani Hira Kana Yiii # Ps LEFT WHITE SQUARE BRACKET
|
||||
301B ; Bopo Hang Hani Hira Kana Yiii # Pe RIGHT WHITE SQUARE BRACKET
|
||||
30FB ; Bopo Hang Hani Hira Kana Yiii # Po KATAKANA MIDDLE DOT
|
||||
FF61 ; Bopo Hang Hani Hira Kana Yiii # Po HALFWIDTH IDEOGRAPHIC FULL STOP
|
||||
FF62 ; Bopo Hang Hani Hira Kana Yiii # Ps HALFWIDTH LEFT CORNER BRACKET
|
||||
FF63 ; Bopo Hang Hani Hira Kana Yiii # Pe HALFWIDTH RIGHT CORNER BRACKET
|
||||
FF64..FF65 ; Bopo Hang Hani Hira Kana Yiii # Po [2] HALFWIDTH IDEOGRAPHIC COMMA..HALFWIDTH KATAKANA MIDDLE DOT
|
||||
|
||||
# Total code points: 26
|
||||
|
||||
# EOF
|
|
@ -1,8 +1,8 @@
|
|||
# Scripts-5.2.0.txt
|
||||
# Date: 2009-08-22, 04:58:43 GMT [MD]
|
||||
# Scripts-6.0.0.txt
|
||||
# Date: 2010-08-19, 00:48:47 GMT [MD]
|
||||
#
|
||||
# Unicode Character Database
|
||||
# Copyright (c) 1991-2009 Unicode, Inc.
|
||||
# Copyright (c) 1991-2010 Unicode, Inc.
|
||||
# For terms of use, see http://www.unicode.org/terms_of_use.html
|
||||
# For documentation, see http://www.unicode.org/reports/tr44/
|
||||
|
||||
|
@ -73,7 +73,7 @@
|
|||
02C2..02C5 ; Common
|
||||
02C6..02D1 ; Common
|
||||
02D2..02DF ; Common
|
||||
02E5..02EB ; Common
|
||||
02E5..02E9 ; Common
|
||||
02EC ; Common
|
||||
02ED ; Common
|
||||
02EE ; Common
|
||||
|
@ -83,7 +83,6 @@
|
|||
0385 ; Common
|
||||
0387 ; Common
|
||||
0589 ; Common
|
||||
0600..0603 ; Common
|
||||
060C ; Common
|
||||
061B ; Common
|
||||
061F ; Common
|
||||
|
@ -92,7 +91,6 @@
|
|||
06DD ; Common
|
||||
0964..0965 ; Common
|
||||
0970 ; Common
|
||||
0CF1..0CF2 ; Common
|
||||
0E3F ; Common
|
||||
0FD5..0FD8 ; Common
|
||||
10FB ; Common
|
||||
|
@ -148,7 +146,7 @@
|
|||
208A..208C ; Common
|
||||
208D ; Common
|
||||
208E ; Common
|
||||
20A0..20B8 ; Common
|
||||
20A0..20B9 ; Common
|
||||
2100..2101 ; Common
|
||||
2102 ; Common
|
||||
2103..2106 ; Common
|
||||
|
@ -157,7 +155,8 @@
|
|||
210A..2113 ; Common
|
||||
2114 ; Common
|
||||
2115 ; Common
|
||||
2116..2118 ; Common
|
||||
2116..2117 ; Common
|
||||
2118 ; Common
|
||||
2119..211D ; Common
|
||||
211E..2123 ; Common
|
||||
2124 ; Common
|
||||
|
@ -213,7 +212,7 @@
|
|||
239B..23B3 ; Common
|
||||
23B4..23DB ; Common
|
||||
23DC..23E1 ; Common
|
||||
23E2..23E8 ; Common
|
||||
23E2..23F3 ; Common
|
||||
2400..2426 ; Common
|
||||
2440..244A ; Common
|
||||
2460..249B ; Common
|
||||
|
@ -227,18 +226,8 @@
|
|||
25F8..25FF ; Common
|
||||
2600..266E ; Common
|
||||
266F ; Common
|
||||
2670..26CD ; Common
|
||||
26CF..26E1 ; Common
|
||||
26E3 ; Common
|
||||
26E8..26FF ; Common
|
||||
2701..2704 ; Common
|
||||
2706..2709 ; Common
|
||||
270C..2727 ; Common
|
||||
2729..274B ; Common
|
||||
274D ; Common
|
||||
274F..2752 ; Common
|
||||
2756..275E ; Common
|
||||
2761..2767 ; Common
|
||||
2670..26FF ; Common
|
||||
2701..2767 ; Common
|
||||
2768 ; Common
|
||||
2769 ; Common
|
||||
276A ; Common
|
||||
|
@ -254,15 +243,13 @@
|
|||
2774 ; Common
|
||||
2775 ; Common
|
||||
2776..2793 ; Common
|
||||
2794 ; Common
|
||||
2798..27AF ; Common
|
||||
27B1..27BE ; Common
|
||||
2794..27BF ; Common
|
||||
27C0..27C4 ; Common
|
||||
27C5 ; Common
|
||||
27C6 ; Common
|
||||
27C7..27CA ; Common
|
||||
27CC ; Common
|
||||
27D0..27E5 ; Common
|
||||
27CE..27E5 ; Common
|
||||
27E6 ; Common
|
||||
27E7 ; Common
|
||||
27E8 ; Common
|
||||
|
@ -555,27 +542,51 @@ FFFC..FFFD ; Common
|
|||
1D7CE..1D7FF ; Common
|
||||
1F000..1F02B ; Common
|
||||
1F030..1F093 ; Common
|
||||
1F0A0..1F0AE ; Common
|
||||
1F0B1..1F0BE ; Common
|
||||
1F0C1..1F0CF ; Common
|
||||
1F0D1..1F0DF ; Common
|
||||
1F100..1F10A ; Common
|
||||
1F110..1F12E ; Common
|
||||
1F131 ; Common
|
||||
1F13D ; Common
|
||||
1F13F ; Common
|
||||
1F142 ; Common
|
||||
1F146 ; Common
|
||||
1F14A..1F14E ; Common
|
||||
1F157 ; Common
|
||||
1F15F ; Common
|
||||
1F179 ; Common
|
||||
1F17B..1F17C ; Common
|
||||
1F17F ; Common
|
||||
1F18A..1F18D ; Common
|
||||
1F190 ; Common
|
||||
1F210..1F231 ; Common
|
||||
1F130..1F169 ; Common
|
||||
1F170..1F19A ; Common
|
||||
1F1E6..1F1FF ; Common
|
||||
1F201..1F202 ; Common
|
||||
1F210..1F23A ; Common
|
||||
1F240..1F248 ; Common
|
||||
1F250..1F251 ; Common
|
||||
1F300..1F320 ; Common
|
||||
1F330..1F335 ; Common
|
||||
1F337..1F37C ; Common
|
||||
1F380..1F393 ; Common
|
||||
1F3A0..1F3C4 ; Common
|
||||
1F3C6..1F3CA ; Common
|
||||
1F3E0..1F3F0 ; Common
|
||||
1F400..1F43E ; Common
|
||||
1F440 ; Common
|
||||
1F442..1F4F7 ; Common
|
||||
1F4F9..1F4FC ; Common
|
||||
1F500..1F53D ; Common
|
||||
1F550..1F567 ; Common
|
||||
1F5FB..1F5FF ; Common
|
||||
1F601..1F610 ; Common
|
||||
1F612..1F614 ; Common
|
||||
1F616 ; Common
|
||||
1F618 ; Common
|
||||
1F61A ; Common
|
||||
1F61C..1F61E ; Common
|
||||
1F620..1F625 ; Common
|
||||
1F628..1F62B ; Common
|
||||
1F62D ; Common
|
||||
1F630..1F633 ; Common
|
||||
1F635..1F640 ; Common
|
||||
1F645..1F64F ; Common
|
||||
1F680..1F6C5 ; Common
|
||||
1F700..1F773 ; Common
|
||||
E0001 ; Common
|
||||
E0020..E007F ; Common
|
||||
|
||||
# Total code points: 5395
|
||||
# Total code points: 6379
|
||||
|
||||
# ================================================
|
||||
|
||||
|
@ -603,7 +614,7 @@ E0020..E007F ; Common
|
|||
1E00..1EFF ; Latin
|
||||
2071 ; Latin
|
||||
207F ; Latin
|
||||
2090..2094 ; Latin
|
||||
2090..209C ; Latin
|
||||
212A..212B ; Latin
|
||||
2132 ; Latin
|
||||
214E ; Latin
|
||||
|
@ -616,13 +627,16 @@ E0020..E007F ; Common
|
|||
A722..A76F ; Latin
|
||||
A770 ; Latin
|
||||
A771..A787 ; Latin
|
||||
A78B..A78C ; Latin
|
||||
A78B..A78E ; Latin
|
||||
A790..A791 ; Latin
|
||||
A7A0..A7A9 ; Latin
|
||||
A7FA ; Latin
|
||||
A7FB..A7FF ; Latin
|
||||
FB00..FB06 ; Latin
|
||||
FF21..FF3A ; Latin
|
||||
FF41..FF5A ; Latin
|
||||
|
||||
# Total code points: 1244
|
||||
# Total code points: 1267
|
||||
|
||||
# ================================================
|
||||
|
||||
|
@ -687,12 +701,11 @@ FF41..FF5A ; Latin
|
|||
0483..0484 ; Cyrillic
|
||||
0487 ; Cyrillic
|
||||
0488..0489 ; Cyrillic
|
||||
048A..0525 ; Cyrillic
|
||||
048A..0527 ; Cyrillic
|
||||
1D2B ; Cyrillic
|
||||
1D78 ; Cyrillic
|
||||
2DE0..2DFF ; Cyrillic
|
||||
A640..A65F ; Cyrillic
|
||||
A662..A66D ; Cyrillic
|
||||
A640..A66D ; Cyrillic
|
||||
A66E ; Cyrillic
|
||||
A66F ; Cyrillic
|
||||
A670..A672 ; Cyrillic
|
||||
|
@ -702,7 +715,7 @@ A67E ; Cyrillic
|
|||
A67F ; Cyrillic
|
||||
A680..A697 ; Cyrillic
|
||||
|
||||
# Total code points: 404
|
||||
# Total code points: 408
|
||||
|
||||
# ================================================
|
||||
|
||||
|
@ -744,6 +757,7 @@ FB46..FB4F ; Hebrew
|
|||
|
||||
# ================================================
|
||||
|
||||
0600..0603 ; Arabic
|
||||
0606..0608 ; Arabic
|
||||
0609..060A ; Arabic
|
||||
060B ; Arabic
|
||||
|
@ -751,7 +765,7 @@ FB46..FB4F ; Hebrew
|
|||
060E..060F ; Arabic
|
||||
0610..061A ; Arabic
|
||||
061E ; Arabic
|
||||
0621..063F ; Arabic
|
||||
0620..063F ; Arabic
|
||||
0641..064A ; Arabic
|
||||
0656..065E ; Arabic
|
||||
066A..066D ; Arabic
|
||||
|
@ -773,6 +787,7 @@ FB46..FB4F ; Hebrew
|
|||
06FF ; Arabic
|
||||
0750..077F ; Arabic
|
||||
FB50..FBB1 ; Arabic
|
||||
FBB2..FBC1 ; Arabic
|
||||
FBD3..FD3D ; Arabic
|
||||
FD50..FD8F ; Arabic
|
||||
FD92..FDC7 ; Arabic
|
||||
|
@ -782,7 +797,7 @@ FE70..FE74 ; Arabic
|
|||
FE76..FEFC ; Arabic
|
||||
10E60..10E7E ; Arabic
|
||||
|
||||
# Total code points: 1030
|
||||
# Total code points: 1051
|
||||
|
||||
# ================================================
|
||||
|
||||
|
@ -809,27 +824,29 @@ FE76..FEFC ; Arabic
|
|||
0900..0902 ; Devanagari
|
||||
0903 ; Devanagari
|
||||
0904..0939 ; Devanagari
|
||||
093A ; Devanagari
|
||||
093B ; Devanagari
|
||||
093C ; Devanagari
|
||||
093D ; Devanagari
|
||||
093E..0940 ; Devanagari
|
||||
0941..0948 ; Devanagari
|
||||
0949..094C ; Devanagari
|
||||
094D ; Devanagari
|
||||
094E ; Devanagari
|
||||
094E..094F ; Devanagari
|
||||
0950 ; Devanagari
|
||||
0953..0955 ; Devanagari
|
||||
0953..0957 ; Devanagari
|
||||
0958..0961 ; Devanagari
|
||||
0962..0963 ; Devanagari
|
||||
0966..096F ; Devanagari
|
||||
0971 ; Devanagari
|
||||
0972 ; Devanagari
|
||||
0972..0977 ; Devanagari
|
||||
0979..097F ; Devanagari
|
||||
A8E0..A8F1 ; Devanagari
|
||||
A8F2..A8F7 ; Devanagari
|
||||
A8F8..A8FA ; Devanagari
|
||||
A8FB ; Devanagari
|
||||
|
||||
# Total code points: 140
|
||||
# Total code points: 150
|
||||
|
||||
# ================================================
|
||||
|
||||
|
@ -941,8 +958,9 @@ A8FB ; Devanagari
|
|||
0B66..0B6F ; Oriya
|
||||
0B70 ; Oriya
|
||||
0B71 ; Oriya
|
||||
0B72..0B77 ; Oriya
|
||||
|
||||
# Total code points: 84
|
||||
# Total code points: 90
|
||||
|
||||
# ================================================
|
||||
|
||||
|
@ -1018,22 +1036,23 @@ A8FB ; Devanagari
|
|||
0CE0..0CE1 ; Kannada
|
||||
0CE2..0CE3 ; Kannada
|
||||
0CE6..0CEF ; Kannada
|
||||
0CF1..0CF2 ; Kannada
|
||||
|
||||
# Total code points: 84
|
||||
# Total code points: 86
|
||||
|
||||
# ================================================
|
||||
|
||||
0D02..0D03 ; Malayalam
|
||||
0D05..0D0C ; Malayalam
|
||||
0D0E..0D10 ; Malayalam
|
||||
0D12..0D28 ; Malayalam
|
||||
0D2A..0D39 ; Malayalam
|
||||
0D12..0D3A ; Malayalam
|
||||
0D3D ; Malayalam
|
||||
0D3E..0D40 ; Malayalam
|
||||
0D41..0D44 ; Malayalam
|
||||
0D46..0D48 ; Malayalam
|
||||
0D4A..0D4C ; Malayalam
|
||||
0D4D ; Malayalam
|
||||
0D4E ; Malayalam
|
||||
0D57 ; Malayalam
|
||||
0D60..0D61 ; Malayalam
|
||||
0D62..0D63 ; Malayalam
|
||||
|
@ -1042,7 +1061,7 @@ A8FB ; Devanagari
|
|||
0D79 ; Malayalam
|
||||
0D7A..0D7F ; Malayalam
|
||||
|
||||
# Total code points: 95
|
||||
# Total code points: 98
|
||||
|
||||
# ================================================
|
||||
|
||||
|
@ -1132,16 +1151,17 @@ A8FB ; Devanagari
|
|||
0F80..0F84 ; Tibetan
|
||||
0F85 ; Tibetan
|
||||
0F86..0F87 ; Tibetan
|
||||
0F88..0F8B ; Tibetan
|
||||
0F90..0F97 ; Tibetan
|
||||
0F88..0F8C ; Tibetan
|
||||
0F8D..0F97 ; Tibetan
|
||||
0F99..0FBC ; Tibetan
|
||||
0FBE..0FC5 ; Tibetan
|
||||
0FC6 ; Tibetan
|
||||
0FC7..0FCC ; Tibetan
|
||||
0FCE..0FCF ; Tibetan
|
||||
0FD0..0FD4 ; Tibetan
|
||||
0FD9..0FDA ; Tibetan
|
||||
|
||||
# Total code points: 201
|
||||
# Total code points: 207
|
||||
|
||||
# ================================================
|
||||
|
||||
|
@ -1201,6 +1221,7 @@ AA7B ; Myanmar
|
|||
# ================================================
|
||||
|
||||
1100..11FF ; Hangul
|
||||
302E..302F ; Hangul
|
||||
3131..318E ; Hangul
|
||||
3200..321E ; Hangul
|
||||
3260..327E ; Hangul
|
||||
|
@ -1214,7 +1235,7 @@ FFCA..FFCF ; Hangul
|
|||
FFD2..FFD7 ; Hangul
|
||||
FFDA..FFDC ; Hangul
|
||||
|
||||
# Total code points: 11737
|
||||
# Total code points: 11739
|
||||
|
||||
# ================================================
|
||||
|
||||
|
@ -1234,7 +1255,7 @@ FFDA..FFDC ; Hangul
|
|||
12D8..1310 ; Ethiopic
|
||||
1312..1315 ; Ethiopic
|
||||
1318..135A ; Ethiopic
|
||||
135F ; Ethiopic
|
||||
135D..135F ; Ethiopic
|
||||
1360 ; Ethiopic
|
||||
1361..1368 ; Ethiopic
|
||||
1369..137C ; Ethiopic
|
||||
|
@ -1249,8 +1270,13 @@ FFDA..FFDC ; Hangul
|
|||
2DC8..2DCE ; Ethiopic
|
||||
2DD0..2DD6 ; Ethiopic
|
||||
2DD8..2DDE ; Ethiopic
|
||||
AB01..AB06 ; Ethiopic
|
||||
AB09..AB0E ; Ethiopic
|
||||
AB11..AB16 ; Ethiopic
|
||||
AB20..AB26 ; Ethiopic
|
||||
AB28..AB2E ; Ethiopic
|
||||
|
||||
# Total code points: 461
|
||||
# Total code points: 495
|
||||
|
||||
# ================================================
|
||||
|
||||
|
@ -1329,9 +1355,10 @@ FFDA..FFDC ; Hangul
|
|||
3041..3096 ; Hiragana
|
||||
309D..309E ; Hiragana
|
||||
309F ; Hiragana
|
||||
1B001 ; Hiragana
|
||||
1F200 ; Hiragana
|
||||
|
||||
# Total code points: 90
|
||||
# Total code points: 91
|
||||
|
||||
# ================================================
|
||||
|
||||
|
@ -1343,15 +1370,17 @@ FFDA..FFDC ; Hangul
|
|||
3300..3357 ; Katakana
|
||||
FF66..FF6F ; Katakana
|
||||
FF71..FF9D ; Katakana
|
||||
1B000 ; Katakana
|
||||
|
||||
# Total code points: 299
|
||||
# Total code points: 300
|
||||
|
||||
# ================================================
|
||||
|
||||
02EA..02EB ; Bopomofo
|
||||
3105..312D ; Bopomofo
|
||||
31A0..31B7 ; Bopomofo
|
||||
31A0..31BA ; Bopomofo
|
||||
|
||||
# Total code points: 65
|
||||
# Total code points: 70
|
||||
|
||||
# ================================================
|
||||
|
||||
|
@ -1370,9 +1399,10 @@ FA30..FA6D ; Han
|
|||
FA70..FAD9 ; Han
|
||||
20000..2A6D6 ; Han
|
||||
2A700..2B734 ; Han
|
||||
2B740..2B81D ; Han
|
||||
2F800..2FA1D ; Han
|
||||
|
||||
# Total code points: 75738
|
||||
# Total code points: 75960
|
||||
|
||||
# ================================================
|
||||
|
||||
|
@ -1410,6 +1440,7 @@ A490..A4C6 ; Yi
|
|||
0300..036F ; Inherited
|
||||
0485..0486 ; Inherited
|
||||
064B..0655 ; Inherited
|
||||
065F ; Inherited
|
||||
0670 ; Inherited
|
||||
0951..0952 ; Inherited
|
||||
1CD0..1CD2 ; Inherited
|
||||
|
@ -1417,14 +1448,14 @@ A490..A4C6 ; Yi
|
|||
1CE2..1CE8 ; Inherited
|
||||
1CED ; Inherited
|
||||
1DC0..1DE6 ; Inherited
|
||||
1DFD..1DFF ; Inherited
|
||||
1DFC..1DFF ; Inherited
|
||||
200C..200D ; Inherited
|
||||
20D0..20DC ; Inherited
|
||||
20DD..20E0 ; Inherited
|
||||
20E1 ; Inherited
|
||||
20E2..20E4 ; Inherited
|
||||
20E5..20F0 ; Inherited
|
||||
302A..302F ; Inherited
|
||||
302A..302D ; Inherited
|
||||
3099..309A ; Inherited
|
||||
FE00..FE0F ; Inherited
|
||||
FE20..FE26 ; Inherited
|
||||
|
@ -1568,7 +1599,8 @@ E0100..E01EF ; Inherited
|
|||
19B0..19C0 ; New_Tai_Lue
|
||||
19C1..19C7 ; New_Tai_Lue
|
||||
19C8..19C9 ; New_Tai_Lue
|
||||
19D0..19DA ; New_Tai_Lue
|
||||
19D0..19D9 ; New_Tai_Lue
|
||||
19DA ; New_Tai_Lue
|
||||
19DE..19DF ; New_Tai_Lue
|
||||
|
||||
# Total code points: 83
|
||||
|
@ -1584,8 +1616,10 @@ E0100..E01EF ; Inherited
|
|||
|
||||
2D30..2D65 ; Tifinagh
|
||||
2D6F ; Tifinagh
|
||||
2D70 ; Tifinagh
|
||||
2D7F ; Tifinagh
|
||||
|
||||
# Total code points: 55
|
||||
# Total code points: 57
|
||||
|
||||
# ================================================
|
||||
|
||||
|
@ -1882,8 +1916,9 @@ A6A0..A6E5 ; Bamum
|
|||
A6E6..A6EF ; Bamum
|
||||
A6F0..A6F1 ; Bamum
|
||||
A6F2..A6F7 ; Bamum
|
||||
16800..16A38 ; Bamum
|
||||
|
||||
# Total code points: 88
|
||||
# Total code points: 657
|
||||
|
||||
# ================================================
|
||||
|
||||
|
@ -1969,4 +2004,40 @@ ABF0..ABF9 ; Meetei_Mayek
|
|||
|
||||
# Total code points: 66
|
||||
|
||||
# ================================================
|
||||
|
||||
1BC0..1BE5 ; Batak
|
||||
1BE6 ; Batak
|
||||
1BE7 ; Batak
|
||||
1BE8..1BE9 ; Batak
|
||||
1BEA..1BEC ; Batak
|
||||
1BED ; Batak
|
||||
1BEE ; Batak
|
||||
1BEF..1BF1 ; Batak
|
||||
1BF2..1BF3 ; Batak
|
||||
1BFC..1BFF ; Batak
|
||||
|
||||
# Total code points: 56
|
||||
|
||||
# ================================================
|
||||
|
||||
11000 ; Brahmi
|
||||
11001 ; Brahmi
|
||||
11002 ; Brahmi
|
||||
11003..11037 ; Brahmi
|
||||
11038..11046 ; Brahmi
|
||||
11047..1104D ; Brahmi
|
||||
11052..11065 ; Brahmi
|
||||
11066..1106F ; Brahmi
|
||||
|
||||
# Total code points: 108
|
||||
|
||||
# ================================================
|
||||
|
||||
0840..0858 ; Mandaic
|
||||
0859..085B ; Mandaic
|
||||
085E ; Mandaic
|
||||
|
||||
# Total code points: 29
|
||||
|
||||
# EOF
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
# SentenceBreakProperty-5.2.0.txt
|
||||
# Date: 2009-08-22, 04:58:44 GMT [MD]
|
||||
# SentenceBreakProperty-6.0.0.txt
|
||||
# Date: 2010-08-19, 00:48:47 GMT [MD]
|
||||
#
|
||||
# Unicode Character Database
|
||||
# Copyright (c) 1991-2009 Unicode, Inc.
|
||||
# Copyright (c) 1991-2010 Unicode, Inc.
|
||||
# For terms of use, see http://www.unicode.org/terms_of_use.html
|
||||
# For documentation, see http://www.unicode.org/reports/tr44/
|
||||
|
||||
|
@ -38,10 +38,9 @@
|
|||
05C4..05C5 ; Extend
|
||||
05C7 ; Extend
|
||||
0610..061A ; Extend
|
||||
064B..065E ; Extend
|
||||
064B..065F ; Extend
|
||||
0670 ; Extend
|
||||
06D6..06DC ; Extend
|
||||
06DE ; Extend
|
||||
06DF..06E4 ; Extend
|
||||
06E7..06E8 ; Extend
|
||||
06EA..06ED ; Extend
|
||||
|
@ -53,15 +52,18 @@
|
|||
081B..0823 ; Extend
|
||||
0825..0827 ; Extend
|
||||
0829..082D ; Extend
|
||||
0859..085B ; Extend
|
||||
0900..0902 ; Extend
|
||||
0903 ; Extend
|
||||
093A ; Extend
|
||||
093B ; Extend
|
||||
093C ; Extend
|
||||
093E..0940 ; Extend
|
||||
0941..0948 ; Extend
|
||||
0949..094C ; Extend
|
||||
094D ; Extend
|
||||
094E ; Extend
|
||||
0951..0955 ; Extend
|
||||
094E..094F ; Extend
|
||||
0951..0957 ; Extend
|
||||
0962..0963 ; Extend
|
||||
0981 ; Extend
|
||||
0982..0983 ; Extend
|
||||
|
@ -163,7 +165,7 @@
|
|||
0F7F ; Extend
|
||||
0F80..0F84 ; Extend
|
||||
0F86..0F87 ; Extend
|
||||
0F90..0F97 ; Extend
|
||||
0F8D..0F97 ; Extend
|
||||
0F99..0FBC ; Extend
|
||||
0FC6 ; Extend
|
||||
102B..102C ; Extend
|
||||
|
@ -188,7 +190,7 @@
|
|||
108F ; Extend
|
||||
109A..109C ; Extend
|
||||
109D ; Extend
|
||||
135F ; Extend
|
||||
135D..135F ; Extend
|
||||
1712..1714 ; Extend
|
||||
1732..1734 ; Extend
|
||||
1752..1753 ; Extend
|
||||
|
@ -244,6 +246,14 @@
|
|||
1BA6..1BA7 ; Extend
|
||||
1BA8..1BA9 ; Extend
|
||||
1BAA ; Extend
|
||||
1BE6 ; Extend
|
||||
1BE7 ; Extend
|
||||
1BE8..1BE9 ; Extend
|
||||
1BEA..1BEC ; Extend
|
||||
1BED ; Extend
|
||||
1BEE ; Extend
|
||||
1BEF..1BF1 ; Extend
|
||||
1BF2..1BF3 ; Extend
|
||||
1C24..1C2B ; Extend
|
||||
1C2C..1C33 ; Extend
|
||||
1C34..1C35 ; Extend
|
||||
|
@ -255,7 +265,7 @@
|
|||
1CED ; Extend
|
||||
1CF2 ; Extend
|
||||
1DC0..1DE6 ; Extend
|
||||
1DFD..1DFF ; Extend
|
||||
1DFC..1DFF ; Extend
|
||||
200C..200D ; Extend
|
||||
20D0..20DC ; Extend
|
||||
20DD..20E0 ; Extend
|
||||
|
@ -263,6 +273,7 @@
|
|||
20E2..20E4 ; Extend
|
||||
20E5..20F0 ; Extend
|
||||
2CEF..2CF1 ; Extend
|
||||
2D7F ; Extend
|
||||
2DE0..2DFF ; Extend
|
||||
302A..302F ; Extend
|
||||
3099..309A ; Extend
|
||||
|
@ -322,6 +333,10 @@ FF9E..FF9F ; Extend
|
|||
10A0C..10A0F ; Extend
|
||||
10A38..10A3A ; Extend
|
||||
10A3F ; Extend
|
||||
11000 ; Extend
|
||||
11001 ; Extend
|
||||
11002 ; Extend
|
||||
11038..11046 ; Extend
|
||||
11080..11081 ; Extend
|
||||
11082 ; Extend
|
||||
110B0..110B2 ; Extend
|
||||
|
@ -337,7 +352,7 @@ FF9E..FF9F ; Extend
|
|||
1D242..1D244 ; Extend
|
||||
E0100..E01EF ; Extend
|
||||
|
||||
# Total code points: 1455
|
||||
# Total code points: 1502
|
||||
|
||||
# ================================================
|
||||
|
||||
|
@ -655,6 +670,7 @@ E0020..E007F ; Format
|
|||
0521 ; Lower
|
||||
0523 ; Lower
|
||||
0525 ; Lower
|
||||
0527 ; Lower
|
||||
0561..0587 ; Lower
|
||||
1D00..1D2B ; Lower
|
||||
1D2C..1D61 ; Lower
|
||||
|
@ -898,6 +914,7 @@ A659 ; Lower
|
|||
A65B ; Lower
|
||||
A65D ; Lower
|
||||
A65F ; Lower
|
||||
A661 ; Lower
|
||||
A663 ; Lower
|
||||
A665 ; Lower
|
||||
A667 ; Lower
|
||||
|
@ -964,6 +981,14 @@ A783 ; Lower
|
|||
A785 ; Lower
|
||||
A787 ; Lower
|
||||
A78C ; Lower
|
||||
A78E ; Lower
|
||||
A791 ; Lower
|
||||
A7A1 ; Lower
|
||||
A7A3 ; Lower
|
||||
A7A5 ; Lower
|
||||
A7A7 ; Lower
|
||||
A7A9 ; Lower
|
||||
A7FA ; Lower
|
||||
FB00..FB06 ; Lower
|
||||
FB13..FB17 ; Lower
|
||||
FF41..FF5A ; Lower
|
||||
|
@ -997,7 +1022,7 @@ FF41..FF5A ; Lower
|
|||
1D7C4..1D7C9 ; Lower
|
||||
1D7CB ; Lower
|
||||
|
||||
# Total code points: 1907
|
||||
# Total code points: 1917
|
||||
|
||||
# ================================================
|
||||
|
||||
|
@ -1266,6 +1291,7 @@ FF41..FF5A ; Lower
|
|||
0520 ; Upper
|
||||
0522 ; Upper
|
||||
0524 ; Upper
|
||||
0526 ; Upper
|
||||
0531..0556 ; Upper
|
||||
10A0..10C5 ; Upper
|
||||
1E00 ; Upper
|
||||
|
@ -1503,6 +1529,7 @@ A658 ; Upper
|
|||
A65A ; Upper
|
||||
A65C ; Upper
|
||||
A65E ; Upper
|
||||
A660 ; Upper
|
||||
A662 ; Upper
|
||||
A664 ; Upper
|
||||
A666 ; Upper
|
||||
|
@ -1567,6 +1594,13 @@ A782 ; Upper
|
|||
A784 ; Upper
|
||||
A786 ; Upper
|
||||
A78B ; Upper
|
||||
A78D ; Upper
|
||||
A790 ; Upper
|
||||
A7A0 ; Upper
|
||||
A7A2 ; Upper
|
||||
A7A4 ; Upper
|
||||
A7A6 ; Upper
|
||||
A7A8 ; Upper
|
||||
FF21..FF3A ; Upper
|
||||
10400..10427 ; Upper
|
||||
1D400..1D419 ; Upper
|
||||
|
@ -1601,7 +1635,7 @@ FF21..FF3A ; Upper
|
|||
1D790..1D7A8 ; Upper
|
||||
1D7CA ; Upper
|
||||
|
||||
# Total code points: 1500
|
||||
# Total code points: 1509
|
||||
|
||||
# ================================================
|
||||
|
||||
|
@ -1617,7 +1651,7 @@ FF21..FF3A ; Upper
|
|||
05D0..05EA ; OLetter
|
||||
05F0..05F2 ; OLetter
|
||||
05F3 ; OLetter
|
||||
0621..063F ; OLetter
|
||||
0620..063F ; OLetter
|
||||
0640 ; OLetter
|
||||
0641..064A ; OLetter
|
||||
066E..066F ; OLetter
|
||||
|
@ -1638,12 +1672,13 @@ FF21..FF3A ; Upper
|
|||
081A ; OLetter
|
||||
0824 ; OLetter
|
||||
0828 ; OLetter
|
||||
0840..0858 ; OLetter
|
||||
0904..0939 ; OLetter
|
||||
093D ; OLetter
|
||||
0950 ; OLetter
|
||||
0958..0961 ; OLetter
|
||||
0971 ; OLetter
|
||||
0972 ; OLetter
|
||||
0972..0977 ; OLetter
|
||||
0979..097F ; OLetter
|
||||
0985..098C ; OLetter
|
||||
098F..0990 ; OLetter
|
||||
|
@ -1712,11 +1747,12 @@ FF21..FF3A ; Upper
|
|||
0CBD ; OLetter
|
||||
0CDE ; OLetter
|
||||
0CE0..0CE1 ; OLetter
|
||||
0CF1..0CF2 ; OLetter
|
||||
0D05..0D0C ; OLetter
|
||||
0D0E..0D10 ; OLetter
|
||||
0D12..0D28 ; OLetter
|
||||
0D2A..0D39 ; OLetter
|
||||
0D12..0D3A ; OLetter
|
||||
0D3D ; OLetter
|
||||
0D4E ; OLetter
|
||||
0D60..0D61 ; OLetter
|
||||
0D7A..0D7F ; OLetter
|
||||
0D85..0D96 ; OLetter
|
||||
|
@ -1748,7 +1784,7 @@ FF21..FF3A ; Upper
|
|||
0F00 ; OLetter
|
||||
0F40..0F47 ; OLetter
|
||||
0F49..0F6C ; OLetter
|
||||
0F88..0F8B ; OLetter
|
||||
0F88..0F8C ; OLetter
|
||||
1000..102A ; OLetter
|
||||
103F ; OLetter
|
||||
1050..1055 ; OLetter
|
||||
|
@ -1810,6 +1846,7 @@ FF21..FF3A ; Upper
|
|||
1B45..1B4B ; OLetter
|
||||
1B83..1BA0 ; OLetter
|
||||
1BAE..1BAF ; OLetter
|
||||
1BC0..1BE5 ; OLetter
|
||||
1C00..1C23 ; OLetter
|
||||
1C4D..1C4F ; OLetter
|
||||
1C5A..1C77 ; OLetter
|
||||
|
@ -1818,6 +1855,7 @@ FF21..FF3A ; Upper
|
|||
1CEE..1CF1 ; OLetter
|
||||
2071 ; OLetter
|
||||
207F ; OLetter
|
||||
2095..209C ; OLetter
|
||||
2135..2138 ; OLetter
|
||||
2180..2182 ; OLetter
|
||||
2185..2188 ; OLetter
|
||||
|
@ -1849,7 +1887,7 @@ FF21..FF3A ; Upper
|
|||
30FF ; OLetter
|
||||
3105..312D ; OLetter
|
||||
3131..318E ; OLetter
|
||||
31A0..31B7 ; OLetter
|
||||
31A0..31BA ; OLetter
|
||||
31F0..31FF ; OLetter
|
||||
3400..4DB5 ; OLetter
|
||||
4E00..9FCB ; OLetter
|
||||
|
@ -1896,6 +1934,11 @@ AAC0 ; OLetter
|
|||
AAC2 ; OLetter
|
||||
AADB..AADC ; OLetter
|
||||
AADD ; OLetter
|
||||
AB01..AB06 ; OLetter
|
||||
AB09..AB0E ; OLetter
|
||||
AB11..AB16 ; OLetter
|
||||
AB20..AB26 ; OLetter
|
||||
AB28..AB2E ; OLetter
|
||||
ABC0..ABE2 ; OLetter
|
||||
AC00..D7A3 ; OLetter
|
||||
D7B0..D7C6 ; OLetter
|
||||
|
@ -1962,15 +2005,19 @@ FFDA..FFDC ; OLetter
|
|||
10B40..10B55 ; OLetter
|
||||
10B60..10B72 ; OLetter
|
||||
10C00..10C48 ; OLetter
|
||||
11003..11037 ; OLetter
|
||||
11083..110AF ; OLetter
|
||||
12000..1236E ; OLetter
|
||||
12400..12462 ; OLetter
|
||||
13000..1342E ; OLetter
|
||||
16800..16A38 ; OLetter
|
||||
1B000..1B001 ; OLetter
|
||||
20000..2A6D6 ; OLetter
|
||||
2A700..2B734 ; OLetter
|
||||
2B740..2B81D ; OLetter
|
||||
2F800..2FA1D ; OLetter
|
||||
|
||||
# Total code points: 96405
|
||||
# Total code points: 97369
|
||||
|
||||
# ================================================
|
||||
|
||||
|
@ -1996,7 +2043,7 @@ FFDA..FFDC ; OLetter
|
|||
17E0..17E9 ; Numeric
|
||||
1810..1819 ; Numeric
|
||||
1946..194F ; Numeric
|
||||
19D0..19DA ; Numeric
|
||||
19D0..19D9 ; Numeric
|
||||
1A80..1A89 ; Numeric
|
||||
1A90..1A99 ; Numeric
|
||||
1B50..1B59 ; Numeric
|
||||
|
@ -2010,9 +2057,10 @@ A9D0..A9D9 ; Numeric
|
|||
AA50..AA59 ; Numeric
|
||||
ABF0..ABF9 ; Numeric
|
||||
104A0..104A9 ; Numeric
|
||||
11066..1106F ; Numeric
|
||||
1D7CE..1D7FF ; Numeric
|
||||
|
||||
# Total code points: 403
|
||||
# Total code points: 412
|
||||
|
||||
# ================================================
|
||||
|
||||
|
@ -2039,9 +2087,11 @@ FF0E ; ATerm
|
|||
1362 ; STerm
|
||||
1367..1368 ; STerm
|
||||
166E ; STerm
|
||||
1735..1736 ; STerm
|
||||
1803 ; STerm
|
||||
1809 ; STerm
|
||||
1944..1945 ; STerm
|
||||
1AA8..1AAB ; STerm
|
||||
1B5A..1B5B ; STerm
|
||||
1B5E..1B5F ; STerm
|
||||
1C3B..1C3C ; STerm
|
||||
|
@ -2064,9 +2114,11 @@ FE56..FE57 ; STerm
|
|||
FF01 ; STerm
|
||||
FF1F ; STerm
|
||||
FF61 ; STerm
|
||||
10A56..10A57 ; STerm
|
||||
11047..11048 ; STerm
|
||||
110BE..110C1 ; STerm
|
||||
|
||||
# Total code points: 63
|
||||
# Total code points: 73
|
||||
|
||||
# ================================================
|
||||
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
# SpecialCasing-5.2.0.txt
|
||||
# Date: 2009-09-22, 23:25:59 GMT [MD]
|
||||
# SpecialCasing-6.0.0.txt
|
||||
# Date: 2010-05-18, 00:49:39 GMT [MD]
|
||||
#
|
||||
# Unicode Character Database
|
||||
# Copyright (c) 1991-2009 Unicode, Inc.
|
||||
# Copyright (c) 1991-2010 Unicode, Inc.
|
||||
# For terms of use, see http://www.unicode.org/terms_of_use.html
|
||||
# For documentation, see http://www.unicode.org/reports/tr44/
|
||||
#
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -1,8 +1,8 @@
|
|||
# WordBreakProperty-5.2.0.txt
|
||||
# Date: 2009-07-12, 04:17:35 GMT [MD]
|
||||
# WordBreakProperty-6.0.0.txt
|
||||
# Date: 2010-08-19, 00:48:48 GMT [MD]
|
||||
#
|
||||
# Unicode Character Database
|
||||
# Copyright (c) 1991-2009 Unicode, Inc.
|
||||
# Copyright (c) 1991-2010 Unicode, Inc.
|
||||
# For terms of use, see http://www.unicode.org/terms_of_use.html
|
||||
# For documentation, see http://www.unicode.org/reports/tr44/
|
||||
|
||||
|
@ -47,10 +47,9 @@
|
|||
05C4..05C5 ; Extend
|
||||
05C7 ; Extend
|
||||
0610..061A ; Extend
|
||||
064B..065E ; Extend
|
||||
064B..065F ; Extend
|
||||
0670 ; Extend
|
||||
06D6..06DC ; Extend
|
||||
06DE ; Extend
|
||||
06DF..06E4 ; Extend
|
||||
06E7..06E8 ; Extend
|
||||
06EA..06ED ; Extend
|
||||
|
@ -62,15 +61,18 @@
|
|||
081B..0823 ; Extend
|
||||
0825..0827 ; Extend
|
||||
0829..082D ; Extend
|
||||
0859..085B ; Extend
|
||||
0900..0902 ; Extend
|
||||
0903 ; Extend
|
||||
093A ; Extend
|
||||
093B ; Extend
|
||||
093C ; Extend
|
||||
093E..0940 ; Extend
|
||||
0941..0948 ; Extend
|
||||
0949..094C ; Extend
|
||||
094D ; Extend
|
||||
094E ; Extend
|
||||
0951..0955 ; Extend
|
||||
094E..094F ; Extend
|
||||
0951..0957 ; Extend
|
||||
0962..0963 ; Extend
|
||||
0981 ; Extend
|
||||
0982..0983 ; Extend
|
||||
|
@ -172,7 +174,7 @@
|
|||
0F7F ; Extend
|
||||
0F80..0F84 ; Extend
|
||||
0F86..0F87 ; Extend
|
||||
0F90..0F97 ; Extend
|
||||
0F8D..0F97 ; Extend
|
||||
0F99..0FBC ; Extend
|
||||
0FC6 ; Extend
|
||||
102B..102C ; Extend
|
||||
|
@ -197,7 +199,7 @@
|
|||
108F ; Extend
|
||||
109A..109C ; Extend
|
||||
109D ; Extend
|
||||
135F ; Extend
|
||||
135D..135F ; Extend
|
||||
1712..1714 ; Extend
|
||||
1732..1734 ; Extend
|
||||
1752..1753 ; Extend
|
||||
|
@ -253,6 +255,14 @@
|
|||
1BA6..1BA7 ; Extend
|
||||
1BA8..1BA9 ; Extend
|
||||
1BAA ; Extend
|
||||
1BE6 ; Extend
|
||||
1BE7 ; Extend
|
||||
1BE8..1BE9 ; Extend
|
||||
1BEA..1BEC ; Extend
|
||||
1BED ; Extend
|
||||
1BEE ; Extend
|
||||
1BEF..1BF1 ; Extend
|
||||
1BF2..1BF3 ; Extend
|
||||
1C24..1C2B ; Extend
|
||||
1C2C..1C33 ; Extend
|
||||
1C34..1C35 ; Extend
|
||||
|
@ -264,7 +274,7 @@
|
|||
1CED ; Extend
|
||||
1CF2 ; Extend
|
||||
1DC0..1DE6 ; Extend
|
||||
1DFD..1DFF ; Extend
|
||||
1DFC..1DFF ; Extend
|
||||
200C..200D ; Extend
|
||||
20D0..20DC ; Extend
|
||||
20DD..20E0 ; Extend
|
||||
|
@ -272,6 +282,7 @@
|
|||
20E2..20E4 ; Extend
|
||||
20E5..20F0 ; Extend
|
||||
2CEF..2CF1 ; Extend
|
||||
2D7F ; Extend
|
||||
2DE0..2DFF ; Extend
|
||||
302A..302F ; Extend
|
||||
3099..309A ; Extend
|
||||
|
@ -331,6 +342,10 @@ FF9E..FF9F ; Extend
|
|||
10A0C..10A0F ; Extend
|
||||
10A38..10A3A ; Extend
|
||||
10A3F ; Extend
|
||||
11000 ; Extend
|
||||
11001 ; Extend
|
||||
11002 ; Extend
|
||||
11038..11046 ; Extend
|
||||
11080..11081 ; Extend
|
||||
11082 ; Extend
|
||||
110B0..110B2 ; Extend
|
||||
|
@ -346,7 +361,7 @@ FF9E..FF9F ; Extend
|
|||
1D242..1D244 ; Extend
|
||||
E0100..E01EF ; Extend
|
||||
|
||||
# Total code points: 1455
|
||||
# Total code points: 1502
|
||||
|
||||
# ================================================
|
||||
|
||||
|
@ -382,8 +397,9 @@ E0020..E007F ; Format
|
|||
FF66..FF6F ; Katakana
|
||||
FF70 ; Katakana
|
||||
FF71..FF9D ; Katakana
|
||||
1B000 ; Katakana
|
||||
|
||||
# Total code points: 309
|
||||
# Total code points: 310
|
||||
|
||||
# ================================================
|
||||
|
||||
|
@ -417,14 +433,14 @@ FF71..FF9D ; Katakana
|
|||
038E..03A1 ; ALetter
|
||||
03A3..03F5 ; ALetter
|
||||
03F7..0481 ; ALetter
|
||||
048A..0525 ; ALetter
|
||||
048A..0527 ; ALetter
|
||||
0531..0556 ; ALetter
|
||||
0559 ; ALetter
|
||||
0561..0587 ; ALetter
|
||||
05D0..05EA ; ALetter
|
||||
05F0..05F2 ; ALetter
|
||||
05F3 ; ALetter
|
||||
0621..063F ; ALetter
|
||||
0620..063F ; ALetter
|
||||
0640 ; ALetter
|
||||
0641..064A ; ALetter
|
||||
066E..066F ; ALetter
|
||||
|
@ -445,12 +461,13 @@ FF71..FF9D ; Katakana
|
|||
081A ; ALetter
|
||||
0824 ; ALetter
|
||||
0828 ; ALetter
|
||||
0840..0858 ; ALetter
|
||||
0904..0939 ; ALetter
|
||||
093D ; ALetter
|
||||
0950 ; ALetter
|
||||
0958..0961 ; ALetter
|
||||
0971 ; ALetter
|
||||
0972 ; ALetter
|
||||
0972..0977 ; ALetter
|
||||
0979..097F ; ALetter
|
||||
0985..098C ; ALetter
|
||||
098F..0990 ; ALetter
|
||||
|
@ -519,11 +536,12 @@ FF71..FF9D ; Katakana
|
|||
0CBD ; ALetter
|
||||
0CDE ; ALetter
|
||||
0CE0..0CE1 ; ALetter
|
||||
0CF1..0CF2 ; ALetter
|
||||
0D05..0D0C ; ALetter
|
||||
0D0E..0D10 ; ALetter
|
||||
0D12..0D28 ; ALetter
|
||||
0D2A..0D39 ; ALetter
|
||||
0D12..0D3A ; ALetter
|
||||
0D3D ; ALetter
|
||||
0D4E ; ALetter
|
||||
0D60..0D61 ; ALetter
|
||||
0D7A..0D7F ; ALetter
|
||||
0D85..0D96 ; ALetter
|
||||
|
@ -534,7 +552,7 @@ FF71..FF9D ; Katakana
|
|||
0F00 ; ALetter
|
||||
0F40..0F47 ; ALetter
|
||||
0F49..0F6C ; ALetter
|
||||
0F88..0F8B ; ALetter
|
||||
0F88..0F8C ; ALetter
|
||||
10A0..10C5 ; ALetter
|
||||
10D0..10FA ; ALetter
|
||||
10FC ; ALetter
|
||||
|
@ -579,6 +597,7 @@ FF71..FF9D ; Katakana
|
|||
1B45..1B4B ; ALetter
|
||||
1B83..1BA0 ; ALetter
|
||||
1BAE..1BAF ; ALetter
|
||||
1BC0..1BE5 ; ALetter
|
||||
1C00..1C23 ; ALetter
|
||||
1C4D..1C4F ; ALetter
|
||||
1C5A..1C77 ; ALetter
|
||||
|
@ -612,7 +631,7 @@ FF71..FF9D ; Katakana
|
|||
1FF6..1FFC ; ALetter
|
||||
2071 ; ALetter
|
||||
207F ; ALetter
|
||||
2090..2094 ; ALetter
|
||||
2090..209C ; ALetter
|
||||
2102 ; ALetter
|
||||
2107 ; ALetter
|
||||
210A..2113 ; ALetter
|
||||
|
@ -656,7 +675,7 @@ FF71..FF9D ; Katakana
|
|||
303C ; ALetter
|
||||
3105..312D ; ALetter
|
||||
3131..318E ; ALetter
|
||||
31A0..31B7 ; ALetter
|
||||
31A0..31BA ; ALetter
|
||||
A000..A014 ; ALetter
|
||||
A015 ; ALetter
|
||||
A016..A48C ; ALetter
|
||||
|
@ -666,8 +685,7 @@ A500..A60B ; ALetter
|
|||
A60C ; ALetter
|
||||
A610..A61F ; ALetter
|
||||
A62A..A62B ; ALetter
|
||||
A640..A65F ; ALetter
|
||||
A662..A66D ; ALetter
|
||||
A640..A66D ; ALetter
|
||||
A66E ; ALetter
|
||||
A67F ; ALetter
|
||||
A680..A697 ; ALetter
|
||||
|
@ -678,7 +696,10 @@ A722..A76F ; ALetter
|
|||
A770 ; ALetter
|
||||
A771..A787 ; ALetter
|
||||
A788 ; ALetter
|
||||
A78B..A78C ; ALetter
|
||||
A78B..A78E ; ALetter
|
||||
A790..A791 ; ALetter
|
||||
A7A0..A7A9 ; ALetter
|
||||
A7FA ; ALetter
|
||||
A7FB..A801 ; ALetter
|
||||
A803..A805 ; ALetter
|
||||
A807..A80A ; ALetter
|
||||
|
@ -695,6 +716,11 @@ A9CF ; ALetter
|
|||
AA00..AA28 ; ALetter
|
||||
AA40..AA42 ; ALetter
|
||||
AA44..AA4B ; ALetter
|
||||
AB01..AB06 ; ALetter
|
||||
AB09..AB0E ; ALetter
|
||||
AB11..AB16 ; ALetter
|
||||
AB20..AB26 ; ALetter
|
||||
AB28..AB2E ; ALetter
|
||||
ABC0..ABE2 ; ALetter
|
||||
AC00..D7A3 ; ALetter
|
||||
D7B0..D7C6 ; ALetter
|
||||
|
@ -760,10 +786,12 @@ FFDA..FFDC ; ALetter
|
|||
10B40..10B55 ; ALetter
|
||||
10B60..10B72 ; ALetter
|
||||
10C00..10C48 ; ALetter
|
||||
11003..11037 ; ALetter
|
||||
11083..110AF ; ALetter
|
||||
12000..1236E ; ALetter
|
||||
12400..12462 ; ALetter
|
||||
13000..1342E ; ALetter
|
||||
16800..16A38 ; ALetter
|
||||
1D400..1D454 ; ALetter
|
||||
1D456..1D49C ; ALetter
|
||||
1D49E..1D49F ; ALetter
|
||||
|
@ -795,7 +823,7 @@ FFDA..FFDC ; ALetter
|
|||
1D7AA..1D7C2 ; ALetter
|
||||
1D7C4..1D7CB ; ALetter
|
||||
|
||||
# Total code points: 23694
|
||||
# Total code points: 24453
|
||||
|
||||
# ================================================
|
||||
|
||||
|
@ -866,7 +894,7 @@ FF0E ; MidNumLet
|
|||
17E0..17E9 ; Numeric
|
||||
1810..1819 ; Numeric
|
||||
1946..194F ; Numeric
|
||||
19D0..19DA ; Numeric
|
||||
19D0..19D9 ; Numeric
|
||||
1A80..1A89 ; Numeric
|
||||
1A90..1A99 ; Numeric
|
||||
1B50..1B59 ; Numeric
|
||||
|
@ -880,9 +908,10 @@ A9D0..A9D9 ; Numeric
|
|||
AA50..AA59 ; Numeric
|
||||
ABF0..ABF9 ; Numeric
|
||||
104A0..104A9 ; Numeric
|
||||
11066..1106F ; Numeric
|
||||
1D7CE..1D7FF ; Numeric
|
||||
|
||||
# Total code points: 402
|
||||
# Total code points: 411
|
||||
|
||||
# ================================================
|
||||
|
||||
|
|
|
@ -13,6 +13,220 @@
|
|||
|
||||
---------------------------------------------------------------------------- ***
|
||||
|
||||
Unicode 6.0 update
|
||||
|
||||
*** related ICU Trac tickets
|
||||
|
||||
7264 Unicode 6.0 Update
|
||||
|
||||
*** Unicode version numbers
|
||||
- makedata.mak
|
||||
- uchar.h
|
||||
(configure.in & configure: have been modified to extract the version from uchar.h)
|
||||
- com.ibm.icu.util.VersionInfo
|
||||
|
||||
*** data files & enums & parser code
|
||||
|
||||
* file preparation
|
||||
|
||||
~/svn.icu/tools/trunk/src/unicode/c/genprops/misc$ ./ucdcopy.py ~/uni60/20100720/ucd ~/uni60/processed
|
||||
- This now prepares both unidata and testdata files in respective output subfolders.
|
||||
|
||||
* PropertyAliases.txt changes
|
||||
- new Script_Extensions property defined in the new ScriptExtensions.txt file
|
||||
but not listed in PropertyAliases.txt; reported to unicode.org;
|
||||
-> added to tools/trunk/src/unicode/c/genpname/SyntheticPropertyAliases.txt
|
||||
scx; Script_Extensions
|
||||
-> uchar.h with new UProperty section
|
||||
-> com.ibm.icu.lang.UProperty, parallel with uchar.h
|
||||
|
||||
* PropertyValueAliases.txt changes
|
||||
- 12 new block names:
|
||||
Alchemical_Symbols
|
||||
Bamum_Supplement
|
||||
Batak
|
||||
Brahmi
|
||||
CJK_Unified_Ideographs_Extension_D
|
||||
Emoticons
|
||||
Ethiopic_Extended_A
|
||||
Kana_Supplement
|
||||
Mandaic
|
||||
Miscellaneous_Symbols_And_Pictographs
|
||||
Playing_Cards
|
||||
Transport_And_Map_Symbols
|
||||
-> add to uchar.h
|
||||
-> add to UCharacter.UnicodeBlock
|
||||
Eclipse find UBLOCK_([^ ]+) = [0-9]+, (/.+)
|
||||
replace public static final UnicodeBlock \1 = new UnicodeBlock("\1", \1_ID); \2
|
||||
- Joining_Group (jg) values:
|
||||
Teh_Marbuta_Goal becomes the new canonical value for the old Hamza_On_Heh_Goal which becomes an alias
|
||||
-> uchar.h & UCharacter.JoiningGroup
|
||||
- 3 new scripts:
|
||||
sc ; Batk ; Batak
|
||||
sc ; Brah ; Brahmi
|
||||
sc ; Mand ; Mandaic
|
||||
-> remove these from SyntheticPropertyValueAliases.txt
|
||||
-> add alias USCRIPT_MANDAIC to USCRIPT_MANDAEAN
|
||||
-> fix expectedLong names in cucdapi.c/TestUScriptCodeAPI()
|
||||
and in com.ibm.icu.dev.test.lang.TestUScript.java
|
||||
- 13 new script codes from ISO 15924 http://www.unicode.org/iso15924/codechanges.html
|
||||
(added 2009-11-11..2010-07-18)
|
||||
Bass 259 Bassa Vah
|
||||
Dupl 755 Duployan shortand
|
||||
Elba 226 Elbasan
|
||||
Gran 343 Grantha
|
||||
Kpel 436 Kpelle
|
||||
Loma 437 Loma
|
||||
Mend 438 Mende
|
||||
Merc 101 Meroitic Cursive
|
||||
Narb 106 Old North Arabian
|
||||
Nbat 159 Nabataean
|
||||
Palm 126 Palmyrene
|
||||
Sind 318 Sindhi
|
||||
Wara 262 Warang Citi
|
||||
-> uscript.h
|
||||
-> com.ibm.icu.lang.UScript
|
||||
find USCRIPT_([^ ]+) *= ([0-9]+),(.+)
|
||||
replace public static final int \1 = \2;\3
|
||||
-> SyntheticPropertyValueAliases.txt
|
||||
-> add to expectedLong and expectedShort names in cintltst/cucdapi.c/TestUScriptCodeAPI()
|
||||
and in com.ibm.icu.dev.test.lang.TestUScript.java
|
||||
- ISO 15924 name change
|
||||
Mero 100 Meroitic Hieroglyphs (was Meroitic)
|
||||
-> add new alias USCRIPT_MEROITIC_HIEROGLYPHS to USCRIPT_MEROITIC
|
||||
- property value alias added for Cham, was already moved out of SyntheticPropertyValueAliases.txt
|
||||
|
||||
* UnicodeData.txt changes
|
||||
- new CJK block:
|
||||
2B740;<CJK Ideograph Extension D, First>;Lo;0;L;;;;;N;;;;;
|
||||
2B81D;<CJK Ideograph Extension D, Last>;Lo;0;L;;;;;N;;;;;
|
||||
-> add to tools/trunk/src/unicode/c/gennames/gennames.c, with new ucdVersion
|
||||
|
||||
* build Unicode tools using CMake+make
|
||||
|
||||
* run genpname/preparse.pl (on Linux)
|
||||
+ cd ~/svn.icu/tools/trunk/src/unicode/c/genpname
|
||||
+ make sure that data.h is writable
|
||||
+ perl preparse.pl ~/svn.icu/trunk/src > out.txt
|
||||
+ preparse.pl shows no errors, out.txt Info and Warning lines look ok
|
||||
|
||||
* rebuild Unicode tools (at least genpname) using make
|
||||
- You might first need to "make install" ICU so that the tools build can pick
|
||||
up the new definitions from the installed header files.
|
||||
|
||||
* run genpname
|
||||
- ~/svn.icu/tools/trunk/bld/unicode$ c/genpname/genpname -v -d ~/svn.icu/trunk/src/source/data/in
|
||||
- rebuild ICU & tools
|
||||
|
||||
* update source/data/unidata/norm2/nfkc_cf.txt
|
||||
- follow the instructions in nfkc_cf.txt for updating it from DerivedNormalizationProps.txt
|
||||
|
||||
* update source/data/unidata/norm2/uts46.txt
|
||||
- download http://www.unicode.org/Public/idna/6.0.0/IdnaMappingTable.txt
|
||||
to ~/svn.icu/tools/trunk/src/unicode/py
|
||||
- adjust idna2nrm.py to handle new disallowed_STD3_valid and disallowed_STD3_mapped values
|
||||
- ~/svn.icu/tools/trunk/src/unicode/py$ ./idna2nrm.py
|
||||
- ~/svn.icu/tools/trunk/src/unicode/py$ cp uts46.txt ~/svn.icu/trunk/src/source/data/unidata/norm2
|
||||
|
||||
* update uts46test.cpp and UTS46Test.java if there are new characters that are equivalent to
|
||||
sequences with non-LDH ASCII (that is, their decompositions contain '=' or similar)
|
||||
- grep IdnaMappingTable.txt or uts46.txt for "disallowed_STD3_valid" on non-ASCII characters
|
||||
- Unicode 6.0: U+2260, U+226E, U+226F
|
||||
|
||||
* generate core properties data files
|
||||
- ~/svn.icu/tools/trunk/src/unicode$ ./makeprops.sh ~/svn.icu/trunk/src ~/svn.icu/trunk/bld
|
||||
- rebuild ICU & tools
|
||||
- run makeuca.sh so that genuca picks up the new nfc.nrm:
|
||||
~/svn.icu/tools/trunk/src/unicode$ ./makeuca.sh ~/svn.icu/trunk/src ~/svn.icu/trunk/bld
|
||||
- rebuild ICU & tools
|
||||
|
||||
* implement new Script_Extensions property (provisional)
|
||||
- parser & generator: genprops & uprops.icu
|
||||
- uscript.h, uprops.h, uchar.c, uniset_props.cpp and others, plus cintltst/cucdapi.c & intltest/usettest.cpp
|
||||
- UScript.java, UCharacterProperty.java, UnicodeSet.java, TestUScript.java, UnicodeSetTest.java
|
||||
|
||||
* switch ubidi.icu, ucase.icu and uprops.icu from UTrie to UTrie2
|
||||
- (one-time change)
|
||||
- genbidi/gencase/genprops tools changes
|
||||
- re-run makeprops.sh (see above)
|
||||
- UCharacterProperty.java, UCharacterTypeIterator.java,
|
||||
UBiDiProps.java, UCaseProps.java, and several others with minor changes;
|
||||
UCharacterPropertyReader.java deleted and its code folded into UCharacterProperty.java
|
||||
|
||||
* update Java data files
|
||||
- refresh just the UCD-related files, just to be safe
|
||||
- see (ICU4C)/source/data/icu4j-readme.txt
|
||||
- mkdir /tmp/icu4j
|
||||
- ~/svn.icu/trunk/bld$ make ICU4J_ROOT=/tmp/icu4j icu4j-data-install
|
||||
output:
|
||||
...
|
||||
Unicode .icu files built to ./out/build/icudt45l
|
||||
mkdir -p ./out/icu4j/com/ibm/icu/impl/data/icudt45b
|
||||
echo ubidi.icu ucase.icu uprops.icu > ./out/icu4j/add.txt
|
||||
LD_LIBRARY_PATH=../lib:../stubdata:../tools/ctestfw:$LD_LIBRARY_PATH ../bin/icupkg ./out/tmp/icudt45l.dat ./out/icu4j/icudt45b.dat -a ./out/icu4j/add.txt -s ./out/build/icudt45l -x '*' -tb -d ./out/icu4j/com/ibm/icu/impl/data/icudt45b
|
||||
jar cf ./out/icu4j/icudata.jar -C ./out/icu4j com/ibm/icu/impl/data/icudt45b
|
||||
mkdir -p /tmp/icu4j/main/shared/data
|
||||
cp ./out/icu4j/icudata.jar /tmp/icu4j/main/shared/data
|
||||
- copy the big-endian Unicode data files to another location,
|
||||
separate from the other data files
|
||||
mkdir -p /tmp/icu4j/com/ibm/icu/impl/data/icudt45b/coll
|
||||
mkdir -p /tmp/icu4j/com/ibm/icu/impl/data/icudt45b/brkitr
|
||||
~/svn.icu/trunk/bld/data/out/icu4j$ cp com/ibm/icu/impl/data/icudt45b/*.icu /tmp/icu4j/com/ibm/icu/impl/data/icudt45b
|
||||
~/svn.icu/trunk/bld/data/out/icu4j$ rm /tmp/icu4j/com/ibm/icu/impl/data/icudt45b/cnvalias.icu
|
||||
~/svn.icu/trunk/bld/data/out/icu4j$ cp com/ibm/icu/impl/data/icudt45b/*.nrm /tmp/icu4j/com/ibm/icu/impl/data/icudt45b
|
||||
~/svn.icu/trunk/bld/data/out/icu4j$ cp com/ibm/icu/impl/data/icudt45b/coll/*.icu /tmp/icu4j/com/ibm/icu/impl/data/icudt45b/coll
|
||||
~/svn.icu/trunk/bld/data/out/icu4j$ cp com/ibm/icu/impl/data/icudt45b/brkitr/* /tmp/icu4j/com/ibm/icu/impl/data/icudt45b/brkitr
|
||||
- refresh ICU4J
|
||||
~/svn.icu/trunk/bld/data/out/icu4j$ jar uf ~/svn.icu4j/trunk/src/main/shared/data/icudata.jar -C /tmp/icu4j com/ibm/icu/impl/data/icudt45b
|
||||
|
||||
* refresh Java test .txt files
|
||||
- copy new .txt files into ICU4J's main/tests/core/src/com/ibm/icu/dev/data/unicode
|
||||
|
||||
* un-hardcode normalization skippable (NF*_Inert) test data
|
||||
- removes one manual step from the Unicode upgrade, and removes dependency on one of Mark's tools
|
||||
|
||||
* copy updated break iterator test files
|
||||
- now handled by early ucdcopy.py and
|
||||
copying the uni60/processed/testdata files to ~/svn.icu/trunk/src/source/test/testdata
|
||||
(old instructions:
|
||||
copy from (Unicode 6.0)/ucd/auxiliary/*BreakTest-6....txt
|
||||
to ~/svn.icu/trunk/src/source/test/testdata)
|
||||
- they are not used in ICU4J
|
||||
|
||||
* UCA
|
||||
|
||||
- get output from Mark's tools; look in
|
||||
http://www.unicode.org/~book/incoming/mark/uca6.0.0/
|
||||
http://www.macchiato.com/unicode/utc/additional-uca-files
|
||||
http://www.unicode.org/Public/UCA/6.0.0/
|
||||
http://www.unicode.org/~mdavis/uca/
|
||||
- update source/data/unidata/FractionalUCA.txt with FractionalUCA_SHORT.txt
|
||||
- update source/data/unidata/UCARules.txt with UCA_Rules_SHORT.txt
|
||||
- run makeuca.sh:
|
||||
~/svn.icu/tools/trunk/src/unicode$ ./makeuca.sh ~/svn.icu/trunk/src ~/svn.icu/trunk/bld
|
||||
- rebuild ICU4C
|
||||
- refresh ICU4J collation data:
|
||||
(subset of instructions above for properties data refresh, except copies all coll/*)
|
||||
~/svn.icu/trunk/bld$ make ICU4J_ROOT=/tmp/icu4j icu4j-data-install
|
||||
mkdir -p /tmp/icu4j/com/ibm/icu/impl/data/icudt45b/coll
|
||||
~/svn.icu/trunk/bld/data/out/icu4j$ cp com/ibm/icu/impl/data/icudt45b/coll/* /tmp/icu4j/com/ibm/icu/impl/data/icudt45b/coll
|
||||
~/svn.icu/trunk/bld/data/out/icu4j$ jar uf ~/svn.icu4j/trunk/src/main/shared/data/icudata.jar -C /tmp/icu4j com/ibm/icu/impl/data/icudt45b
|
||||
- update (ICU)/source/test/testdata/CollationTest_*.txt
|
||||
and (ICU4J)/main/tests/collate/src/com/ibm/icu/dev/data/CollationTest_*.txt
|
||||
with output from Mark's Unicode tools
|
||||
- run all tests with the *_SHORT.txt or the full files (the full ones have comments)
|
||||
- note on intltest: if collate/UCAConformanceTest fails, then
|
||||
utility/MultithreadTest/TestCollators will fail as well;
|
||||
fix the conformance test before looking into the multi-thread test
|
||||
|
||||
* When refreshing all of ICU4J data from ICU4C
|
||||
- ~/svn.icu/trunk/bld$ make ICU4J_ROOT=/tmp/icu4j icu4j-data-install
|
||||
- cp /tmp/icu4j/main/shared/data/icudata.jar ~/svn.icu4j/trunk/src/main/shared/data
|
||||
or
|
||||
- ~/svn.icu/trunk/bld$ make ICU4J_ROOT=~/svn.icu4j/trunk/src icu4j-data-install
|
||||
|
||||
---------------------------------------------------------------------------- ***
|
||||
|
||||
Unicode 5.2 update
|
||||
|
||||
*** related ICU Trac tickets
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
#
|
||||
# file name: nfc.txt
|
||||
#
|
||||
# machine-generated on: 2009-11-30
|
||||
# machine-generated on: 2010-07-23
|
||||
#
|
||||
|
||||
# Canonical_Combining_Class (ccc) values
|
||||
|
@ -88,6 +88,7 @@
|
|||
0657..065B:230
|
||||
065C:220
|
||||
065D..065E:230
|
||||
065F:220
|
||||
0670:35
|
||||
06D6..06DC:230
|
||||
06DF..06E2:230
|
||||
|
@ -124,6 +125,7 @@
|
|||
081B..0823:230
|
||||
0825..0827:230
|
||||
0829..082D:230
|
||||
0859..085B:220
|
||||
093C:7
|
||||
094D:9
|
||||
0951:230
|
||||
|
@ -166,7 +168,7 @@
|
|||
1037:7
|
||||
1039..103A:9
|
||||
108D:220
|
||||
135F:230
|
||||
135D..135F:230
|
||||
1714:9
|
||||
1734:9
|
||||
17D2:9
|
||||
|
@ -186,6 +188,8 @@
|
|||
1B6C:220
|
||||
1B6D..1B73:230
|
||||
1BAA:9
|
||||
1BE6:7
|
||||
1BF2..1BF3:9
|
||||
1C37:7
|
||||
1CD0..1CD2:230
|
||||
1CD4:1
|
||||
|
@ -205,6 +209,7 @@
|
|||
1DCF:220
|
||||
1DD0:202
|
||||
1DD1..1DE6:230
|
||||
1DFC:233
|
||||
1DFD:220
|
||||
1DFE:230
|
||||
1DFF:220
|
||||
|
@ -222,6 +227,7 @@
|
|||
20EC..20EF:220
|
||||
20F0:230
|
||||
2CEF..2CF1:230
|
||||
2D7F:9
|
||||
2DE0..2DFF:230
|
||||
302A:218
|
||||
302B:228
|
||||
|
@ -255,6 +261,7 @@ FE20..FE26:230
|
|||
10A39:1
|
||||
10A3A:220
|
||||
10A3F:9
|
||||
11046:9
|
||||
110B9:9
|
||||
110BA:7
|
||||
1D165..1D166:216
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
#
|
||||
# file name: nfkc.txt
|
||||
#
|
||||
# machine-generated on: 2009-11-30
|
||||
# machine-generated on: 2010-07-23
|
||||
#
|
||||
|
||||
# Canonical_Combining_Class (ccc) values
|
||||
|
@ -88,6 +88,7 @@
|
|||
0657..065B:230
|
||||
065C:220
|
||||
065D..065E:230
|
||||
065F:220
|
||||
0670:35
|
||||
06D6..06DC:230
|
||||
06DF..06E2:230
|
||||
|
@ -124,6 +125,7 @@
|
|||
081B..0823:230
|
||||
0825..0827:230
|
||||
0829..082D:230
|
||||
0859..085B:220
|
||||
093C:7
|
||||
094D:9
|
||||
0951:230
|
||||
|
@ -166,7 +168,7 @@
|
|||
1037:7
|
||||
1039..103A:9
|
||||
108D:220
|
||||
135F:230
|
||||
135D..135F:230
|
||||
1714:9
|
||||
1734:9
|
||||
17D2:9
|
||||
|
@ -186,6 +188,8 @@
|
|||
1B6C:220
|
||||
1B6D..1B73:230
|
||||
1BAA:9
|
||||
1BE6:7
|
||||
1BF2..1BF3:9
|
||||
1C37:7
|
||||
1CD0..1CD2:230
|
||||
1CD4:1
|
||||
|
@ -205,6 +209,7 @@
|
|||
1DCF:220
|
||||
1DD0:202
|
||||
1DD1..1DE6:230
|
||||
1DFC:233
|
||||
1DFD:220
|
||||
1DFE:230
|
||||
1DFF:220
|
||||
|
@ -222,6 +227,7 @@
|
|||
20EC..20EF:220
|
||||
20F0:230
|
||||
2CEF..2CF1:230
|
||||
2D7F:9
|
||||
2DE0..2DFF:230
|
||||
302A:218
|
||||
302B:228
|
||||
|
@ -255,6 +261,7 @@ FE20..FE26:230
|
|||
10A39:1
|
||||
10A3A:220
|
||||
10A3F:9
|
||||
11046:9
|
||||
110B9:9
|
||||
110BA:7
|
||||
1D165..1D166:216
|
||||
|
@ -1400,6 +1407,14 @@ FE20..FE26:230
|
|||
2092>006F
|
||||
2093>0078
|
||||
2094>0259
|
||||
2095>0068
|
||||
2096>006B
|
||||
2097>006C
|
||||
2098>006D
|
||||
2099>006E
|
||||
209A>0070
|
||||
209B>0073
|
||||
209C>0074
|
||||
20A8>0052 0073
|
||||
2100>0061 002F 0063
|
||||
2101>0061 002F 0073
|
||||
|
@ -5187,18 +5202,42 @@ FFEE>25CB
|
|||
1F12C>0052
|
||||
1F12D>0043 0044
|
||||
1F12E>0057 005A
|
||||
1F130>0041
|
||||
1F131>0042
|
||||
1F132>0043
|
||||
1F133>0044
|
||||
1F134>0045
|
||||
1F135>0046
|
||||
1F136>0047
|
||||
1F137>0048
|
||||
1F138>0049
|
||||
1F139>004A
|
||||
1F13A>004B
|
||||
1F13B>004C
|
||||
1F13C>004D
|
||||
1F13D>004E
|
||||
1F13E>004F
|
||||
1F13F>0050
|
||||
1F140>0051
|
||||
1F141>0052
|
||||
1F142>0053
|
||||
1F143>0054
|
||||
1F144>0055
|
||||
1F145>0056
|
||||
1F146>0057
|
||||
1F147>0058
|
||||
1F148>0059
|
||||
1F149>005A
|
||||
1F14A>0048 0056
|
||||
1F14B>004D 0056
|
||||
1F14C>0053 0044
|
||||
1F14D>0053 0053
|
||||
1F14E>0050 0050 0056
|
||||
1F14F>0057 0043
|
||||
1F190>0044 004A
|
||||
1F200>307B 304B
|
||||
1F201>30B3 30B3
|
||||
1F202>30B5
|
||||
1F210>624B
|
||||
1F211>5B57
|
||||
1F212>53CC
|
||||
|
@ -5233,6 +5272,15 @@ FFEE>25CB
|
|||
1F22F>6307
|
||||
1F230>8D70
|
||||
1F231>6253
|
||||
1F232>7981
|
||||
1F233>7A7A
|
||||
1F234>5408
|
||||
1F235>6E80
|
||||
1F236>6709
|
||||
1F237>6708
|
||||
1F238>7533
|
||||
1F239>5272
|
||||
1F23A>55B6
|
||||
1F240>3014 672C 3015
|
||||
1F241>3014 4E09 3015
|
||||
1F242>3014 4E8C 3015
|
||||
|
@ -5242,6 +5290,8 @@ FFEE>25CB
|
|||
1F246>3014 76D7 3015
|
||||
1F247>3014 52DD 3015
|
||||
1F248>3014 6557 3015
|
||||
1F250>5F97
|
||||
1F251>53EF
|
||||
2F800>4E3D
|
||||
2F801>4E38
|
||||
2F802>4E41
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
# Extracted from:
|
||||
# DerivedNormalizationProps-5.2.0.txt
|
||||
# Date: 2009-08-26, 18:18:50 GMT [MD]
|
||||
# DerivedNormalizationProps-6.0.0.txt
|
||||
# Date: 2010-05-20, 15:14:12 GMT [MD]
|
||||
#
|
||||
# Unicode Character Database
|
||||
# Copyright (c) 1991-2009 Unicode, Inc.
|
||||
# Copyright (c) 1991-2010 Unicode, Inc.
|
||||
# For terms of use, see http://www.unicode.org/terms_of_use.html
|
||||
# For documentation, see http://www.unicode.org/reports/tr44/
|
||||
|
||||
|
@ -23,7 +23,7 @@
|
|||
# WARNING: Application to STRINGS must apply NFC after mapping each character, because characters may interact.
|
||||
# For more information, see [http://www.unicode.org/reports/tr44/]
|
||||
# Omitted code points are unchanged by this mapping.
|
||||
# @missing: 0000..10FFFF><code point>
|
||||
# @missing: 0000..10FFFF; NFKC_CF; <code point>
|
||||
|
||||
# All code points not explicitly listed for NFKC_Casefold
|
||||
# have the value <codepoint>.
|
||||
|
@ -511,6 +511,7 @@
|
|||
0520>0521
|
||||
0522>0523
|
||||
0524>0525
|
||||
0526>0527
|
||||
0531>0561
|
||||
0532>0562
|
||||
0533>0563
|
||||
|
@ -1077,6 +1078,14 @@
|
|||
2092>006F
|
||||
2093>0078
|
||||
2094>0259
|
||||
2095>0068
|
||||
2096>006B
|
||||
2097>006C
|
||||
2098>006D
|
||||
2099>006E
|
||||
209A>0070
|
||||
209B>0073
|
||||
209C>0074
|
||||
20A8>0072 0073
|
||||
2100>0061 002F 0063
|
||||
2101>0061 002F 0073
|
||||
|
@ -2292,6 +2301,7 @@ A658>A659
|
|||
A65A>A65B
|
||||
A65C>A65D
|
||||
A65E>A65F
|
||||
A660>A661
|
||||
A662>A663
|
||||
A664>A665
|
||||
A666>A667
|
||||
|
@ -2358,6 +2368,13 @@ A782>A783
|
|||
A784>A785
|
||||
A786>A787
|
||||
A78B>A78C
|
||||
A78D>0265
|
||||
A790>A791
|
||||
A7A0>A7A1
|
||||
A7A2>A7A3
|
||||
A7A4>A7A5
|
||||
A7A6>A7A7
|
||||
A7A8>A7A9
|
||||
F900>8C48
|
||||
F901>66F4
|
||||
F902>8ECA
|
||||
|
@ -4778,18 +4795,42 @@ FFF0..FFF8>
|
|||
1F12C>0072
|
||||
1F12D>0063 0064
|
||||
1F12E>0077 007A
|
||||
1F130>0061
|
||||
1F131>0062
|
||||
1F132>0063
|
||||
1F133>0064
|
||||
1F134>0065
|
||||
1F135>0066
|
||||
1F136>0067
|
||||
1F137>0068
|
||||
1F138>0069
|
||||
1F139>006A
|
||||
1F13A>006B
|
||||
1F13B>006C
|
||||
1F13C>006D
|
||||
1F13D>006E
|
||||
1F13E>006F
|
||||
1F13F>0070
|
||||
1F140>0071
|
||||
1F141>0072
|
||||
1F142>0073
|
||||
1F143>0074
|
||||
1F144>0075
|
||||
1F145>0076
|
||||
1F146>0077
|
||||
1F147>0078
|
||||
1F148>0079
|
||||
1F149>007A
|
||||
1F14A>0068 0076
|
||||
1F14B>006D 0076
|
||||
1F14C>0073 0064
|
||||
1F14D>0073 0073
|
||||
1F14E>0070 0070 0076
|
||||
1F14F>0077 0063
|
||||
1F190>0064 006A
|
||||
1F200>307B 304B
|
||||
1F201>30B3 30B3
|
||||
1F202>30B5
|
||||
1F210>624B
|
||||
1F211>5B57
|
||||
1F212>53CC
|
||||
|
@ -4824,6 +4865,15 @@ FFF0..FFF8>
|
|||
1F22F>6307
|
||||
1F230>8D70
|
||||
1F231>6253
|
||||
1F232>7981
|
||||
1F233>7A7A
|
||||
1F234>5408
|
||||
1F235>6E80
|
||||
1F236>6709
|
||||
1F237>6708
|
||||
1F238>7533
|
||||
1F239>5272
|
||||
1F23A>55B6
|
||||
1F240>3014 672C 3015
|
||||
1F241>3014 4E09 3015
|
||||
1F242>3014 4E8C 3015
|
||||
|
@ -4833,6 +4883,8 @@ FFF0..FFF8>
|
|||
1F246>3014 76D7 3015
|
||||
1F247>3014 52DD 3015
|
||||
1F248>3014 6557 3015
|
||||
1F250>5F97
|
||||
1F251>53EF
|
||||
2F800>4E3D
|
||||
2F801>4E38
|
||||
2F802>4E41
|
||||
|
@ -5373,4 +5425,4 @@ E0080..E00FF>
|
|||
E0100..E01EF>
|
||||
E01F0..E0FFF>
|
||||
|
||||
# Total code points: 9740
|
||||
# Total code points: 9792
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -1,7 +1,7 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2001-2008, International Business Machines
|
||||
* Copyright (C) 2001-2010, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
|
@ -88,12 +88,9 @@ UOBJECT_DEFINE_ABSTRACT_RTTI_IMPLEMENTATION(CaseMapTransliterator)
|
|||
*/
|
||||
CaseMapTransliterator::CaseMapTransliterator(const UnicodeString &id, UCaseMapFull *map) :
|
||||
Transliterator(id, 0),
|
||||
fCsp(NULL),
|
||||
fCsp(ucase_getSingleton()),
|
||||
fMap(map)
|
||||
{
|
||||
UErrorCode errorCode = U_ZERO_ERROR;
|
||||
fCsp = ucase_getSingleton(&errorCode); // expect to get NULL if failure
|
||||
|
||||
// TODO test incremental mode with context-sensitive text (e.g. greek sigma)
|
||||
// TODO need to call setMaximumContextLength()?!
|
||||
}
|
||||
|
|
|
@ -3903,8 +3903,8 @@ GC_Done:
|
|||
// Test input against a literal string.
|
||||
// Strings require two slots in the compiled pattern, one for the
|
||||
// offset to the string text, and one for the length.
|
||||
const UCaseProps *csp = ucase_getSingleton(&status);
|
||||
if (U_SUCCESS(status)) {
|
||||
const UCaseProps *csp = ucase_getSingleton();
|
||||
{
|
||||
int32_t stringStartIdx, stringLen;
|
||||
stringStartIdx = opValue;
|
||||
|
||||
|
@ -5580,8 +5580,8 @@ GC_Done:
|
|||
// Test input against a literal string.
|
||||
// Strings require two slots in the compiled pattern, one for the
|
||||
// offset to the string text, and one for the length.
|
||||
const UCaseProps *csp = ucase_getSingleton(&status);
|
||||
if (U_SUCCESS(status)) {
|
||||
const UCaseProps *csp = ucase_getSingleton();
|
||||
{
|
||||
int32_t stringStartIdx, stringLen;
|
||||
stringStartIdx = opValue;
|
||||
|
||||
|
|
|
@ -29,6 +29,7 @@
|
|||
#include "capitst.h"
|
||||
#include "ccolltst.h"
|
||||
#include "putilimp.h"
|
||||
#include "cmemory.h"
|
||||
#include "cstring.h"
|
||||
|
||||
static void TestAttribute(void);
|
||||
|
@ -279,7 +280,7 @@ void TestProperty()
|
|||
{
|
||||
UCollator *col, *ruled;
|
||||
UChar *disName;
|
||||
int32_t len = 0, i = 0;
|
||||
int32_t len = 0;
|
||||
UChar *source, *target;
|
||||
int32_t tempLength;
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
|
@ -293,10 +294,10 @@ void TestProperty()
|
|||
* needs to be adjusted.
|
||||
* Same in intltest/apicoll.cpp.
|
||||
*/
|
||||
UVersionInfo currVersionArray = {0x31, 0xC0, 0x05, 0x2A};
|
||||
UVersionInfo currUCAVersionArray = {5, 2, 0, 0};
|
||||
UVersionInfo currVersionArray = {0x31, 0xC0, 0x05, 0x2A}; /* from ICU 4.4/UCA 5.2 */
|
||||
UVersionInfo versionArray = {0, 0, 0, 0};
|
||||
UVersionInfo versionUCAArray = {0, 0, 0, 0};
|
||||
UVersionInfo versionUCDArray = {0, 0, 0, 0};
|
||||
|
||||
log_verbose("The property tests begin : \n");
|
||||
log_verbose("Test ucol_strcoll : \n");
|
||||
|
@ -307,21 +308,23 @@ void TestProperty()
|
|||
}
|
||||
|
||||
ucol_getVersion(col, versionArray);
|
||||
for (i=0; i<4; ++i) {
|
||||
if (versionArray[i] != currVersionArray[i]) {
|
||||
log_err("Testing ucol_getVersion() - unexpected result: %hu.%hu.%hu.%hu\n",
|
||||
versionArray[0], versionArray[1], versionArray[2], versionArray[3]);
|
||||
break;
|
||||
}
|
||||
/* Check for a version greater than some value rather than equality
|
||||
* so that we need not update the expected version each time. */
|
||||
if (uprv_memcmp(versionArray, currVersionArray, 4)<0) {
|
||||
log_err("Testing ucol_getVersion() - unexpected result: %02x.%02x.%02x.%02x\n",
|
||||
versionArray[0], versionArray[1], versionArray[2], versionArray[3]);
|
||||
} else {
|
||||
log_verbose("ucol_getVersion() result: %02x.%02x.%02x.%02x\n",
|
||||
versionArray[0], versionArray[1], versionArray[2], versionArray[3]);
|
||||
}
|
||||
|
||||
/* Assume that the UCD and UCA versions are the same,
|
||||
* rather than hardcoding (and updating each time) a particular UCA version. */
|
||||
u_getUnicodeVersion(versionUCDArray);
|
||||
ucol_getUCAVersion(col, versionUCAArray);
|
||||
for (i=0; i<4; ++i) {
|
||||
if (versionUCAArray[i] != currUCAVersionArray[i]) {
|
||||
log_err("Testing ucol_getUCAVersion() - unexpected result: %hu.%hu.%hu.%hu\n",
|
||||
versionUCAArray[0], versionUCAArray[1], versionUCAArray[2], versionUCAArray[3]);
|
||||
break;
|
||||
}
|
||||
if (0!=uprv_memcmp(versionUCAArray, versionUCDArray, 4)) {
|
||||
log_err("Testing ucol_getUCAVersion() - unexpected result: %hu.%hu.%hu.%hu\n",
|
||||
versionUCAArray[0], versionUCAArray[1], versionUCAArray[2], versionUCAArray[3]);
|
||||
}
|
||||
|
||||
source=(UChar*)malloc(sizeof(UChar) * 12);
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/********************************************************************
|
||||
* Copyright (c) 1997-2009, International Business Machines
|
||||
* Copyright (c) 1997-2010, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
********************************************************************/
|
||||
|
||||
|
@ -293,7 +293,7 @@ void TestUScriptCodeAPI(){
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
{
|
||||
/*
|
||||
* These script codes were originally added to ICU pre-3.6, so that ICU would
|
||||
|
@ -304,9 +304,9 @@ void TestUScriptCodeAPI(){
|
|||
* Whenever this happens, the long script names here need to be updated.
|
||||
*/
|
||||
static const char* expectedLong[] = {
|
||||
"Balinese", "Batk", "Blis", "Brah", "Cham", "Cirt", "Cyrs", "Egyd", "Egyh", "Egyptian_Hieroglyphs",
|
||||
"Balinese", "Batak", "Blis", "Brahmi", "Cham", "Cirt", "Cyrs", "Egyd", "Egyh", "Egyptian_Hieroglyphs",
|
||||
"Geok", "Hans", "Hant", "Hmng", "Hung", "Inds", "Javanese", "Kayah_Li", "Latf", "Latg",
|
||||
"Lepcha", "Lina", "Mand", "Maya", "Mero", "Nko", "Old_Turkic", "Perm", "Phags_Pa", "Phoenician",
|
||||
"Lepcha", "Lina", "Mandaic", "Maya", "Mero", "Nko", "Old_Turkic", "Perm", "Phags_Pa", "Phoenician",
|
||||
"Plrd", "Roro", "Sara", "Syre", "Syrj", "Syrn", "Teng", "Vai", "Visp", "Cuneiform",
|
||||
"Zxxx", "Unknown",
|
||||
"Carian", "Jpan", "Tai_Tham", "Lycian", "Lydian", "Ol_Chiki", "Rejang", "Saurashtra", "Sgnw", "Sundanese",
|
||||
|
@ -317,6 +317,9 @@ void TestUScriptCodeAPI(){
|
|||
"Zmth", "Zsym",
|
||||
/* new in ICU 4.4 */
|
||||
"Bamum", "Lisu", "Nkgb", "Old_South_Arabian",
|
||||
/* new in ICU 4.6 */
|
||||
"Bass", "Dupl", "Elba", "Gran", "Kpel", "Loma", "Mend", "Merc",
|
||||
"Narb", "Nbat", "Palm", "Sind", "Wara",
|
||||
};
|
||||
static const char* expectedShort[] = {
|
||||
"Bali", "Batk", "Blis", "Brah", "Cham", "Cirt", "Cyrs", "Egyd", "Egyh", "Egyp",
|
||||
|
@ -332,6 +335,9 @@ void TestUScriptCodeAPI(){
|
|||
"Zmth", "Zsym",
|
||||
/* new in ICU 4.4 */
|
||||
"Bamu", "Lisu", "Nkgb", "Sarb",
|
||||
/* new in ICU 4.6 */
|
||||
"Bass", "Dupl", "Elba", "Gran", "Kpel", "Loma", "Mend", "Merc",
|
||||
"Narb", "Nbat", "Palm", "Sind", "Wara",
|
||||
};
|
||||
int32_t j = 0;
|
||||
if(LENGTHOF(expectedLong)!=(USCRIPT_CODE_LIMIT-USCRIPT_BALINESE)) {
|
||||
|
@ -364,6 +370,123 @@ void TestUScriptCodeAPI(){
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
/* test characters which have Script_Extensions */
|
||||
UErrorCode errorCode=U_ZERO_ERROR;
|
||||
if(!(
|
||||
USCRIPT_COMMON==uscript_getScript(0x0640, &errorCode) &&
|
||||
USCRIPT_INHERITED==uscript_getScript(0x0650, &errorCode) &&
|
||||
USCRIPT_ARABIC==uscript_getScript(0xfdf2, &errorCode)) ||
|
||||
U_FAILURE(errorCode)
|
||||
) {
|
||||
log_err("uscript_getScript(character with Script_Extensions) failed\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void TestHasScript() {
|
||||
if(!(
|
||||
!uscript_hasScript(0x063f, USCRIPT_COMMON) &&
|
||||
uscript_hasScript(0x063f, USCRIPT_ARABIC) && /* main Script value */
|
||||
!uscript_hasScript(0x063f, USCRIPT_SYRIAC) &&
|
||||
!uscript_hasScript(0x063f, USCRIPT_THAANA))
|
||||
) {
|
||||
log_err("uscript_hasScript(U+063F, ...) is wrong\n");
|
||||
}
|
||||
if(!(
|
||||
uscript_hasScript(0x0640, USCRIPT_COMMON) && /* main Script value */
|
||||
uscript_hasScript(0x0640, USCRIPT_ARABIC) &&
|
||||
uscript_hasScript(0x0640, USCRIPT_SYRIAC) &&
|
||||
!uscript_hasScript(0x0640, USCRIPT_THAANA))
|
||||
) {
|
||||
log_err("uscript_hasScript(U+0640, ...) is wrong\n");
|
||||
}
|
||||
if(!(
|
||||
uscript_hasScript(0x0650, USCRIPT_INHERITED) && /* main Script value */
|
||||
uscript_hasScript(0x0650, USCRIPT_ARABIC) &&
|
||||
uscript_hasScript(0x0650, USCRIPT_SYRIAC) &&
|
||||
!uscript_hasScript(0x0650, USCRIPT_THAANA))
|
||||
) {
|
||||
log_err("uscript_hasScript(U+0650, ...) is wrong\n");
|
||||
}
|
||||
if(!(
|
||||
uscript_hasScript(0x0660, USCRIPT_COMMON) && /* main Script value */
|
||||
uscript_hasScript(0x0660, USCRIPT_ARABIC) &&
|
||||
!uscript_hasScript(0x0660, USCRIPT_SYRIAC) &&
|
||||
uscript_hasScript(0x0660, USCRIPT_THAANA))
|
||||
) {
|
||||
log_err("uscript_hasScript(U+0660, ...) is wrong\n");
|
||||
}
|
||||
if(!(
|
||||
!uscript_hasScript(0xfdf2, USCRIPT_COMMON) &&
|
||||
uscript_hasScript(0xfdf2, USCRIPT_ARABIC) && /* main Script value */
|
||||
!uscript_hasScript(0xfdf2, USCRIPT_SYRIAC) &&
|
||||
uscript_hasScript(0xfdf2, USCRIPT_THAANA))
|
||||
) {
|
||||
log_err("uscript_hasScript(U+FDF2, ...) is wrong\n");
|
||||
}
|
||||
}
|
||||
|
||||
void TestGetScriptExtensions() {
|
||||
UScriptCode scripts[20];
|
||||
int32_t length;
|
||||
UErrorCode errorCode;
|
||||
|
||||
/* errors and overflows */
|
||||
errorCode=U_PARSE_ERROR;
|
||||
length=uscript_getScriptExtensions(0x0640, scripts, LENGTHOF(scripts), &errorCode);
|
||||
if(errorCode!=U_PARSE_ERROR) {
|
||||
log_err("uscript_getScriptExtensions(U_PARSE_ERROR) did not preserve the UErrorCode - %s\n",
|
||||
u_errorName(errorCode));
|
||||
}
|
||||
errorCode=U_ZERO_ERROR;
|
||||
length=uscript_getScriptExtensions(0x0640, NULL, LENGTHOF(scripts), &errorCode);
|
||||
if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
|
||||
log_err("uscript_getScriptExtensions(NULL) did not set U_ILLEGAL_ARGUMENT_ERROR - %s\n",
|
||||
u_errorName(errorCode));
|
||||
}
|
||||
errorCode=U_ZERO_ERROR;
|
||||
length=uscript_getScriptExtensions(0x0640, scripts, -1, &errorCode);
|
||||
if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
|
||||
log_err("uscript_getScriptExtensions(capacity<0) did not set U_ILLEGAL_ARGUMENT_ERROR - %s\n",
|
||||
u_errorName(errorCode));
|
||||
}
|
||||
errorCode=U_ZERO_ERROR;
|
||||
length=uscript_getScriptExtensions(0x0640, scripts, 0, &errorCode);
|
||||
if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=2) {
|
||||
log_err("uscript_getScriptExtensions(capacity=0: pure preflighting)=%d != 2 - %s\n",
|
||||
(int)length, u_errorName(errorCode));
|
||||
}
|
||||
errorCode=U_ZERO_ERROR;
|
||||
length=uscript_getScriptExtensions(0x0640, scripts, 1, &errorCode);
|
||||
if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=2) {
|
||||
log_err("uscript_getScriptExtensions(capacity=1: preflighting)=%d != 2 - %s\n",
|
||||
(int)length, u_errorName(errorCode));
|
||||
}
|
||||
|
||||
/* normal usage */
|
||||
errorCode=U_ZERO_ERROR;
|
||||
length=uscript_getScriptExtensions(0x063f, scripts, 0, &errorCode);
|
||||
if(U_FAILURE(errorCode) || length!=0) {
|
||||
log_err("uscript_getScriptExtensions(U+063F, capacity=0)=%d != 0 - %s\n",
|
||||
(int)length, u_errorName(errorCode));
|
||||
}
|
||||
length=uscript_getScriptExtensions(0x0640, scripts, LENGTHOF(scripts), &errorCode);
|
||||
if(U_FAILURE(errorCode) || length!=2 || scripts[0]!=USCRIPT_ARABIC || scripts[1]!=USCRIPT_SYRIAC) {
|
||||
log_err("uscript_getScriptExtensions(U+0640)=%d failed - %s\n",
|
||||
(int)length, u_errorName(errorCode));
|
||||
}
|
||||
length=uscript_getScriptExtensions(0xfdf2, scripts, LENGTHOF(scripts), &errorCode);
|
||||
if(U_FAILURE(errorCode) || length!=2 || scripts[0]!=USCRIPT_ARABIC || scripts[1]!=USCRIPT_THAANA) {
|
||||
log_err("uscript_getScriptExtensions(U+FDF2)=%d failed - %s\n",
|
||||
(int)length, u_errorName(errorCode));
|
||||
}
|
||||
length=uscript_getScriptExtensions(0xff65, scripts, LENGTHOF(scripts), &errorCode);
|
||||
if(U_FAILURE(errorCode) || length!=6 || scripts[0]!=USCRIPT_BOPOMOFO || scripts[5]!=USCRIPT_YI) {
|
||||
log_err("uscript_getScriptExtensions(U+FF65)=%d failed - %s\n",
|
||||
(int)length, u_errorName(errorCode));
|
||||
}
|
||||
}
|
||||
|
||||
void TestBinaryValues() {
|
||||
|
|
|
@ -1,8 +1,10 @@
|
|||
/********************************************************************
|
||||
* COPYRIGHT:
|
||||
* Copyright (c) 2003-2008, International Business Machines Corporation and
|
||||
* Copyright (c) 2003-2010, International Business Machines Corporation and
|
||||
* others. All Rights Reserved.
|
||||
********************************************************************/
|
||||
|
||||
void TestUScriptCodeAPI(void);
|
||||
void TestHasScript(void);
|
||||
void TestGetScriptExtensions(void);
|
||||
void TestBinaryValues(void);
|
||||
|
|
|
@ -182,6 +182,8 @@ void addUnicodeTest(TestNode** root)
|
|||
addTest(root, &TestCharNames, "tsutil/cucdtst/TestCharNames");
|
||||
addTest(root, &TestMirroring, "tsutil/cucdtst/TestMirroring");
|
||||
addTest(root, &TestUScriptCodeAPI, "tsutil/cucdtst/TestUScriptCodeAPI");
|
||||
addTest(root, &TestHasScript, "tsutil/cucdtst/TestHasScript");
|
||||
addTest(root, &TestGetScriptExtensions, "tsutil/cucdtst/TestGetScriptExtensions");
|
||||
addTest(root, &TestUScriptRunAPI, "tsutil/cucdtst/TestUScriptRunAPI");
|
||||
addTest(root, &TestPropertyNames, "tsutil/cucdtst/TestPropertyNames");
|
||||
addTest(root, &TestPropertyValues, "tsutil/cucdtst/TestPropertyValues");
|
||||
|
@ -2344,7 +2346,6 @@ TestAdditionalProperties() {
|
|||
{ 0x072A, UCHAR_JOINING_GROUP, U_JG_DALATH_RISH },
|
||||
{ 0x0647, UCHAR_JOINING_GROUP, U_JG_HEH },
|
||||
{ 0x06C1, UCHAR_JOINING_GROUP, U_JG_HEH_GOAL },
|
||||
{ 0x06C3, UCHAR_JOINING_GROUP, U_JG_HAMZA_ON_HEH_GOAL },
|
||||
|
||||
{ 0x200C, UCHAR_JOINING_TYPE, U_JT_NON_JOINING },
|
||||
{ 0x200D, UCHAR_JOINING_TYPE, U_JT_JOIN_CAUSING },
|
||||
|
@ -2477,6 +2478,11 @@ TestAdditionalProperties() {
|
|||
{ 0xa4d0, UCHAR_SCRIPT, USCRIPT_LISU },
|
||||
{ 0x10a7f, UCHAR_SCRIPT, USCRIPT_OLD_SOUTH_ARABIAN },
|
||||
|
||||
{ -1, 0x600, 0 }, /* version break for Unicode 6.0 */
|
||||
|
||||
/* value changed in Unicode 6.0 */
|
||||
{ 0x06C3, UCHAR_JOINING_GROUP, U_JG_TEH_MARBUTA_GOAL },
|
||||
|
||||
/* undefined UProperty values */
|
||||
{ 0x61, 0x4a7, 0 },
|
||||
{ 0x234bc, 0x15ed, 0 }
|
||||
|
@ -2919,7 +2925,7 @@ TestConsistency() {
|
|||
*
|
||||
* Unicode 4 changed 00AD Soft Hyphen to Cf and removed it from Dash
|
||||
* but not from Hyphen.
|
||||
* UTC 94 (2003mar) decided to leave it that way and to changed UCD.html.
|
||||
* UTC 94 (2003mar) decided to leave it that way and to change UCD.html.
|
||||
* Therefore, do not show errors when testing the Hyphen property.
|
||||
*/
|
||||
log_verbose("Starting with Unicode 4, inconsistencies with [:Hyphen:] are\n"
|
||||
|
|
|
@ -104,9 +104,8 @@ CollationAPITest::TestProperty(/* char* par */)
|
|||
* needs to be adjusted.
|
||||
* Same in cintltst/capitst.c.
|
||||
*/
|
||||
UVersionInfo currVersionArray = {0x31, 0xC0, 0x05, 0x2A};
|
||||
UVersionInfo currVersionArray = {0x31, 0xC0, 0x05, 0x2A}; // from ICU 4.4/UCA 5.2
|
||||
UVersionInfo versionArray;
|
||||
int i = 0;
|
||||
|
||||
logln("The property tests begin : ");
|
||||
logln("Test ctors : ");
|
||||
|
@ -124,12 +123,14 @@ CollationAPITest::TestProperty(/* char* par */)
|
|||
delete kwEnum;
|
||||
|
||||
col->getVersion(versionArray);
|
||||
for (i=0; i<4; ++i) {
|
||||
if (versionArray[i] != currVersionArray[i]) {
|
||||
errln("Testing Collator::getVersion() - unexpected result: %02x.%02x.%02x.%02x",
|
||||
// Check for a version greater than some value rather than equality
|
||||
// so that we need not update the expected version each time.
|
||||
if (uprv_memcmp(versionArray, currVersionArray, 4)<0) {
|
||||
errln("Testing Collator::getVersion() - unexpected result: %02x.%02x.%02x.%02x",
|
||||
versionArray[0], versionArray[1], versionArray[2], versionArray[3]);
|
||||
} else {
|
||||
logln("Collator::getVersion() result: %02x.%02x.%02x.%02x",
|
||||
versionArray[0], versionArray[1], versionArray[2], versionArray[3]);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
doAssert((col->compare("ab", "abc") == Collator::LESS), "ab < abc comparison failed");
|
||||
|
|
|
@ -1965,6 +1965,10 @@ void RBBITest::TestTailoredBreaks() {
|
|||
UErrorCode status = U_ZERO_ERROR;
|
||||
switch (tbItemPtr->type) {
|
||||
case UBRK_CHARACTER:
|
||||
// TODO(andy): Match Thai grapheme break behavior to Unicode 6.0 and remove this time bomb.
|
||||
{ UVersionInfo icu453 = { 4, 5, 3, 0 };
|
||||
if (!isICUVersionAtLeast(icu453)) continue;
|
||||
}
|
||||
tailoredBrkiter = BreakIterator::createCharacterInstance(testLocale, status);
|
||||
rootBrkiter = BreakIterator::createCharacterInstance(rootLocale, status);
|
||||
break;
|
||||
|
@ -2201,6 +2205,10 @@ void RBBITest::TestUnicodeFiles() {
|
|||
//-------------------------------------------------------------------------------------------
|
||||
void RBBITest::runUnicodeTestData(const char *fileName, RuleBasedBreakIterator *bi) {
|
||||
#if !UCONFIG_NO_REGULAR_EXPRESSIONS
|
||||
// TODO(andy): Match line break behavior to Unicode 6.0 and remove this time bomb.
|
||||
UVersionInfo icu453 = { 4, 5, 3, 0 };
|
||||
UBool isICUVersionAtLeast453 = isICUVersionAtLeast(icu453);
|
||||
UBool isLineBreak = 0 == strcmp(fileName, "LineBreakTest.txt");
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
|
||||
//
|
||||
|
@ -2294,7 +2302,10 @@ void RBBITest::runUnicodeTestData(const char *fileName, RuleBasedBreakIterator *
|
|||
// If the line from the file contained test data, run the test now.
|
||||
//
|
||||
if (testString.length() > 0) {
|
||||
// TODO(andy): Remove this time bomb code.
|
||||
if (!isLineBreak || isICUVersionAtLeast453 || !(4658 <= lineNumber && lineNumber <= 4758)) {
|
||||
checkUnicodeTestCase(fileName, lineNumber, testString, &breakPositions, bi);
|
||||
}
|
||||
}
|
||||
|
||||
// Clear out this test case.
|
||||
|
@ -4589,8 +4600,8 @@ void RBBITest::RunMonkey(BreakIterator *bi, RBBIMonkeyKind &mk, const char *name
|
|||
breakPos = bi->preceding(i);
|
||||
if (breakPos >= i ||
|
||||
breakPos > lastBreakPos ||
|
||||
breakPos < 0 && testText.getChar32Start(i)>0 ||
|
||||
breakPos < lastBreakPos && lastBreakPos < testText.getChar32Start(i) ) {
|
||||
(breakPos < 0 && testText.getChar32Start(i)>0) ||
|
||||
(breakPos < lastBreakPos && lastBreakPos < testText.getChar32Start(i)) ) {
|
||||
errln("%s break monkey test: "
|
||||
"Out of range value returned by BreakIterator::preceding().\n"
|
||||
"index=%d; prev returned %d; lastBreak=%d" ,
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
**********************************************************************
|
||||
* Copyright (C) 2000-2009, International Business Machines
|
||||
* Copyright (C) 2000-2010, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
* Date Name Description
|
||||
|
@ -962,8 +962,8 @@ void RTTest::logRoundTripFailure(const UnicodeString& from,
|
|||
A bug has been filed to remind us to do this: #1979.
|
||||
*/
|
||||
|
||||
static const char KATAKANA[] = "[[[:katakana:][\\u30A1-\\u30FA\\u30FC]]-[\\u30FF\\u31F0-\\u31FF]]";
|
||||
static const char HIRAGANA[] = "[[[:hiragana:][\\u3040-\\u3094]]-[\\u3095-\\u3096\\u309F-\\u30A0\\U0001F200-\\U0001F2FF]]";
|
||||
static const char KATAKANA[] = "[[[:katakana:][\\u30A1-\\u30FA\\u30FC]]-[\\u30FF\\u31F0-\\u31FF]-[:^age=5.2:]]";
|
||||
static const char HIRAGANA[] = "[[[:hiragana:][\\u3040-\\u3094]]-[\\u3095-\\u3096\\u309F-\\u30A0\\U0001F200-\\U0001F2FF]-[:^age=5.2:]]";
|
||||
static const char LENGTH[] = "[\\u30FC]";
|
||||
static const char HALFWIDTH_KATAKANA[] = "[\\uFF65-\\uFF9D]";
|
||||
static const char KATAKANA_ITERATION[] = "[\\u30FD\\u30FE]";
|
||||
|
|
|
@ -1280,441 +1280,64 @@ BasicNormalizerTest::FindFoldFCDExceptions() {
|
|||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Hardcoded "NF* Skippable" sets, generated from
|
||||
* Mark Davis' com.ibm.text.UCD.NFSkippable (see ICU4J CVS, module unicodetools).
|
||||
* Run com.ibm.text.UCD.Main with the option NFSkippable.
|
||||
*
|
||||
* Must be updated for each Unicode version.
|
||||
*/
|
||||
static void
|
||||
initExpectedSkippables(UnicodeSet skipSets[UNORM_MODE_COUNT]) {
|
||||
UErrorCode errorCode=U_ZERO_ERROR;
|
||||
initExpectedSkippables(UnicodeSet skipSets[UNORM_MODE_COUNT], UErrorCode &errorCode) {
|
||||
skipSets[UNORM_NFD].applyPattern(
|
||||
UNICODE_STRING_SIMPLE("[[:NFD_QC=Yes:]&[:ccc=0:]]"), errorCode);
|
||||
skipSets[UNORM_NFC].applyPattern(
|
||||
UNICODE_STRING_SIMPLE("[[:NFC_QC=Yes:]&[:ccc=0:]-[:HST=LV:]]"), errorCode);
|
||||
skipSets[UNORM_NFKD].applyPattern(
|
||||
UNICODE_STRING_SIMPLE("[[:NFKD_QC=Yes:]&[:ccc=0:]]"), errorCode);
|
||||
skipSets[UNORM_NFKC].applyPattern(
|
||||
UNICODE_STRING_SIMPLE("[[:NFKC_QC=Yes:]&[:ccc=0:]-[:HST=LV:]]"), errorCode);
|
||||
|
||||
skipSets[UNORM_NFD].applyPattern(UnicodeString(
|
||||
"[^\\u00C0-\\u00C5\\u00C7-\\u00CF\\u00D1-\\u00D6\\u00D9-\\u00DD"
|
||||
"\\u00E0-\\u00E5\\u00E7-\\u00EF\\u00F1-\\u00F6\\u00F9-\\u00FD"
|
||||
"\\u00FF-\\u010F\\u0112-\\u0125\\u0128-\\u0130\\u0134-\\u0137"
|
||||
"\\u0139-\\u013E\\u0143-\\u0148\\u014C-\\u0151\\u0154-\\u0165"
|
||||
"\\u0168-\\u017E\\u01A0\\u01A1\\u01AF\\u01B0\\u01CD-\\u01DC"
|
||||
"\\u01DE-\\u01E3\\u01E6-\\u01F0\\u01F4\\u01F5\\u01F8-\\u021B"
|
||||
"\\u021E\\u021F\\u0226-\\u0233\\u0300-\\u034E\\u0350-\\u036F"
|
||||
"\\u0374\\u037E\\u0385-\\u038A\\u038C\\u038E-\\u0390\\u03AA-"
|
||||
"\\u03B0\\u03CA-\\u03CE\\u03D3\\u03D4\\u0400\\u0401\\u0403\\u0407"
|
||||
"\\u040C-\\u040E\\u0419\\u0439\\u0450\\u0451\\u0453\\u0457\\u045C"
|
||||
"-\\u045E\\u0476\\u0477\\u0483-\\u0487\\u04C1\\u04C2\\u04D0-"
|
||||
"\\u04D3\\u04D6\\u04D7\\u04DA-\\u04DF\\u04E2-\\u04E7\\u04EA-"
|
||||
"\\u04F5\\u04F8\\u04F9\\u0591-\\u05BD\\u05BF\\u05C1\\u05C2\\u05C4"
|
||||
"\\u05C5\\u05C7\\u0610-\\u061A\\u0622-\\u0626\\u064B-\\u065E"
|
||||
"\\u0670\\u06C0\\u06C2\\u06D3\\u06D6-\\u06DC\\u06DF-\\u06E4"
|
||||
"\\u06E7\\u06E8\\u06EA-\\u06ED\\u0711\\u0730-\\u074A\\u07EB-"
|
||||
"\\u07F3\\u0816-\\u0819\\u081B-\\u0823\\u0825-\\u0827\\u0829-"
|
||||
"\\u082D\\u0929\\u0931\\u0934\\u093C\\u094D\\u0951-\\u0954\\u0958"
|
||||
"-\\u095F\\u09BC\\u09CB-\\u09CD\\u09DC\\u09DD\\u09DF\\u0A33"
|
||||
"\\u0A36\\u0A3C\\u0A4D\\u0A59-\\u0A5B\\u0A5E\\u0ABC\\u0ACD\\u0B3C"
|
||||
"\\u0B48\\u0B4B-\\u0B4D\\u0B5C\\u0B5D\\u0B94\\u0BCA-\\u0BCD"
|
||||
"\\u0C48\\u0C4D\\u0C55\\u0C56\\u0CBC\\u0CC0\\u0CC7\\u0CC8\\u0CCA"
|
||||
"\\u0CCB\\u0CCD\\u0D4A-\\u0D4D\\u0DCA\\u0DDA\\u0DDC-\\u0DDE"
|
||||
"\\u0E38-\\u0E3A\\u0E48-\\u0E4B\\u0EB8\\u0EB9\\u0EC8-\\u0ECB"
|
||||
"\\u0F18\\u0F19\\u0F35\\u0F37\\u0F39\\u0F43\\u0F4D\\u0F52\\u0F57"
|
||||
"\\u0F5C\\u0F69\\u0F71-\\u0F76\\u0F78\\u0F7A-\\u0F7D\\u0F80-"
|
||||
"\\u0F84\\u0F86\\u0F87\\u0F93\\u0F9D\\u0FA2\\u0FA7\\u0FAC\\u0FB9"
|
||||
"\\u0FC6\\u1026\\u1037\\u1039\\u103A\\u108D\\u135F\\u1714\\u1734"
|
||||
"\\u17D2\\u17DD\\u18A9\\u1939-\\u193B\\u1A17\\u1A18\\u1A60\\u1A75"
|
||||
"-\\u1A7C\\u1A7F\\u1B06\\u1B08\\u1B0A\\u1B0C\\u1B0E\\u1B12\\u1B34"
|
||||
"\\u1B3B\\u1B3D\\u1B40\\u1B41\\u1B43\\u1B44\\u1B6B-\\u1B73\\u1BAA"
|
||||
"\\u1C37\\u1CD0-\\u1CD2\\u1CD4-\\u1CE0\\u1CE2-\\u1CE8\\u1CED"
|
||||
"\\u1DC0-\\u1DE6\\u1DFD-\\u1E99\\u1E9B\\u1EA0-\\u1EF9\\u1F00-"
|
||||
"\\u1F15\\u1F18-\\u1F1D\\u1F20-\\u1F45\\u1F48-\\u1F4D\\u1F50-"
|
||||
"\\u1F57\\u1F59\\u1F5B\\u1F5D\\u1F5F-\\u1F7D\\u1F80-\\u1FB4"
|
||||
"\\u1FB6-\\u1FBC\\u1FBE\\u1FC1-\\u1FC4\\u1FC6-\\u1FD3\\u1FD6-"
|
||||
"\\u1FDB\\u1FDD-\\u1FEF\\u1FF2-\\u1FF4\\u1FF6-\\u1FFD\\u2000"
|
||||
"\\u2001\\u20D0-\\u20DC\\u20E1\\u20E5-\\u20F0\\u2126\\u212A"
|
||||
"\\u212B\\u219A\\u219B\\u21AE\\u21CD-\\u21CF\\u2204\\u2209\\u220C"
|
||||
"\\u2224\\u2226\\u2241\\u2244\\u2247\\u2249\\u2260\\u2262\\u226D-"
|
||||
"\\u2271\\u2274\\u2275\\u2278\\u2279\\u2280\\u2281\\u2284\\u2285"
|
||||
"\\u2288\\u2289\\u22AC-\\u22AF\\u22E0-\\u22E3\\u22EA-\\u22ED"
|
||||
"\\u2329\\u232A\\u2ADC\\u2CEF-\\u2CF1\\u2DE0-\\u2DFF\\u302A-"
|
||||
"\\u302F\\u304C\\u304E\\u3050\\u3052\\u3054\\u3056\\u3058\\u305A"
|
||||
"\\u305C\\u305E\\u3060\\u3062\\u3065\\u3067\\u3069\\u3070\\u3071"
|
||||
"\\u3073\\u3074\\u3076\\u3077\\u3079\\u307A\\u307C\\u307D\\u3094"
|
||||
"\\u3099\\u309A\\u309E\\u30AC\\u30AE\\u30B0\\u30B2\\u30B4\\u30B6"
|
||||
"\\u30B8\\u30BA\\u30BC\\u30BE\\u30C0\\u30C2\\u30C5\\u30C7\\u30C9"
|
||||
"\\u30D0\\u30D1\\u30D3\\u30D4\\u30D6\\u30D7\\u30D9\\u30DA\\u30DC"
|
||||
"\\u30DD\\u30F4\\u30F7-\\u30FA\\u30FE\\uA66F\\uA67C\\uA67D\\uA6F0"
|
||||
"\\uA6F1\\uA806\\uA8C4\\uA8E0-\\uA8F1\\uA92B-\\uA92D\\uA953"
|
||||
"\\uA9B3\\uA9C0\\uAAB0\\uAAB2-\\uAAB4\\uAAB7\\uAAB8\\uAABE\\uAABF"
|
||||
"\\uAAC1\\uABED\\uAC00-\\uD7A3\\uF900-\\uFA0D\\uFA10\\uFA12"
|
||||
"\\uFA15-\\uFA1E\\uFA20\\uFA22\\uFA25\\uFA26\\uFA2A-\\uFA2D"
|
||||
"\\uFA30-\\uFA6D\\uFA70-\\uFAD9\\uFB1D-\\uFB1F\\uFB2A-\\uFB36"
|
||||
"\\uFB38-\\uFB3C\\uFB3E\\uFB40\\uFB41\\uFB43\\uFB44\\uFB46-"
|
||||
"\\uFB4E\\uFE20-\\uFE26\\U000101FD\\U00010A0D\\U00010A0F\\U00010A"
|
||||
"38-\\U00010A3A\\U00010A3F\\U0001109A\\U0001109C\\U000110AB"
|
||||
"\\U000110B9\\U000110BA\\U0001D15E-\\U0001D169\\U0001D16D-\\U0001"
|
||||
"D172\\U0001D17B-\\U0001D182\\U0001D185-\\U0001D18B\\U0001D1AA-"
|
||||
"\\U0001D1AD\\U0001D1BB-\\U0001D1C0\\U0001D242-\\U0001D244\\U0002"
|
||||
"F800-\\U0002FA1D]"
|
||||
, ""), errorCode);
|
||||
// Remove from the NFC and NFKC sets all those characters that change
|
||||
// when a back-combining character is added.
|
||||
// First, get all of the back-combining characters and their combining classes.
|
||||
UnicodeSet combineBack("[:NFC_QC=Maybe:]", errorCode);
|
||||
int32_t numCombineBack=combineBack.size();
|
||||
int32_t *combineBackCharsAndCc=new int32_t[numCombineBack*2];
|
||||
UnicodeSetIterator iter(combineBack);
|
||||
for(int32_t i=0; i<numCombineBack; ++i) {
|
||||
iter.next();
|
||||
UChar32 c=iter.getCodepoint();
|
||||
combineBackCharsAndCc[2*i]=c;
|
||||
combineBackCharsAndCc[2*i+1]=u_getCombiningClass(c);
|
||||
}
|
||||
|
||||
skipSets[UNORM_NFC].applyPattern(UnicodeString(
|
||||
"[^<->A-PR-Za-pr-z\\u00A8\\u00C0-\\u00CF\\u00D1-\\u00D6\\u00D8-"
|
||||
"\\u00DD\\u00E0-\\u00EF\\u00F1-\\u00F6\\u00F8-\\u00FD\\u00FF-"
|
||||
"\\u0103\\u0106-\\u010F\\u0112-\\u0117\\u011A-\\u0121\\u0124"
|
||||
"\\u0125\\u0128-\\u012D\\u0130\\u0139\\u013A\\u013D\\u013E\\u0143"
|
||||
"\\u0144\\u0147\\u0148\\u014C-\\u0151\\u0154\\u0155\\u0158-"
|
||||
"\\u015D\\u0160\\u0161\\u0164\\u0165\\u0168-\\u0171\\u0174-"
|
||||
"\\u017F\\u01A0\\u01A1\\u01AF\\u01B0\\u01B7\\u01CD-\\u01DC\\u01DE"
|
||||
"-\\u01E1\\u01E6-\\u01EB\\u01F4\\u01F5\\u01F8-\\u01FB\\u0200-"
|
||||
"\\u021B\\u021E\\u021F\\u0226-\\u0233\\u0292\\u0300-\\u034E"
|
||||
"\\u0350-\\u036F\\u0374\\u037E\\u0387\\u0391\\u0395\\u0397\\u0399"
|
||||
"\\u039F\\u03A1\\u03A5\\u03A9\\u03AC\\u03AE\\u03B1\\u03B5\\u03B7"
|
||||
"\\u03B9\\u03BF\\u03C1\\u03C5\\u03C9-\\u03CB\\u03CE\\u03D2\\u0406"
|
||||
"\\u0410\\u0413\\u0415-\\u0418\\u041A\\u041E\\u0423\\u0427\\u042B"
|
||||
"\\u042D\\u0430\\u0433\\u0435-\\u0438\\u043A\\u043E\\u0443\\u0447"
|
||||
"\\u044B\\u044D\\u0456\\u0474\\u0475\\u0483-\\u0487\\u04D8\\u04D9"
|
||||
"\\u04E8\\u04E9\\u0591-\\u05BD\\u05BF\\u05C1\\u05C2\\u05C4\\u05C5"
|
||||
"\\u05C7\\u0610-\\u061A\\u0622\\u0623\\u0627\\u0648\\u064A-"
|
||||
"\\u065E\\u0670\\u06C1\\u06D2\\u06D5-\\u06DC\\u06DF-\\u06E4"
|
||||
"\\u06E7\\u06E8\\u06EA-\\u06ED\\u0711\\u0730-\\u074A\\u07EB-"
|
||||
"\\u07F3\\u0816-\\u0819\\u081B-\\u0823\\u0825-\\u0827\\u0829-"
|
||||
"\\u082D\\u0928\\u0930\\u0933\\u093C\\u094D\\u0951-\\u0954\\u0958"
|
||||
"-\\u095F\\u09BC\\u09BE\\u09C7\\u09CD\\u09D7\\u09DC\\u09DD\\u09DF"
|
||||
"\\u0A33\\u0A36\\u0A3C\\u0A4D\\u0A59-\\u0A5B\\u0A5E\\u0ABC\\u0ACD"
|
||||
"\\u0B3C\\u0B3E\\u0B47\\u0B4D\\u0B56\\u0B57\\u0B5C\\u0B5D\\u0B92"
|
||||
"\\u0BBE\\u0BC6\\u0BC7\\u0BCD\\u0BD7\\u0C46\\u0C4D\\u0C55\\u0C56"
|
||||
"\\u0CBC\\u0CBF\\u0CC2\\u0CC6\\u0CCA\\u0CCD\\u0CD5\\u0CD6\\u0D3E"
|
||||
"\\u0D46\\u0D47\\u0D4D\\u0D57\\u0DCA\\u0DCF\\u0DD9\\u0DDC\\u0DDF"
|
||||
"\\u0E38-\\u0E3A\\u0E48-\\u0E4B\\u0EB8\\u0EB9\\u0EC8-\\u0ECB"
|
||||
"\\u0F18\\u0F19\\u0F35\\u0F37\\u0F39\\u0F43\\u0F4D\\u0F52\\u0F57"
|
||||
"\\u0F5C\\u0F69\\u0F71-\\u0F76\\u0F78\\u0F7A-\\u0F7D\\u0F80-"
|
||||
"\\u0F84\\u0F86\\u0F87\\u0F93\\u0F9D\\u0FA2\\u0FA7\\u0FAC\\u0FB9"
|
||||
"\\u0FC6\\u1025\\u102E\\u1037\\u1039\\u103A\\u108D\\u1100-\\u1112"
|
||||
"\\u1161-\\u1175\\u11A8-\\u11C2\\u135F\\u1714\\u1734\\u17D2"
|
||||
"\\u17DD\\u18A9\\u1939-\\u193B\\u1A17\\u1A18\\u1A60\\u1A75-"
|
||||
"\\u1A7C\\u1A7F\\u1B05\\u1B07\\u1B09\\u1B0B\\u1B0D\\u1B11\\u1B34"
|
||||
"\\u1B35\\u1B3A\\u1B3C\\u1B3E\\u1B3F\\u1B42\\u1B44\\u1B6B-\\u1B73"
|
||||
"\\u1BAA\\u1C37\\u1CD0-\\u1CD2\\u1CD4-\\u1CE0\\u1CE2-\\u1CE8"
|
||||
"\\u1CED\\u1DC0-\\u1DE6\\u1DFD-\\u1E03\\u1E0A-\\u1E0F\\u1E12-"
|
||||
"\\u1E1B\\u1E20-\\u1E27\\u1E2A-\\u1E41\\u1E44-\\u1E53\\u1E58-"
|
||||
"\\u1E7D\\u1E80-\\u1E87\\u1E8E-\\u1E91\\u1E96-\\u1E99\\u1EA0-"
|
||||
"\\u1EF3\\u1EF6-\\u1EF9\\u1F00-\\u1F11\\u1F18\\u1F19\\u1F20-"
|
||||
"\\u1F31\\u1F38\\u1F39\\u1F40\\u1F41\\u1F48\\u1F49\\u1F50\\u1F51"
|
||||
"\\u1F59\\u1F60-\\u1F71\\u1F73-\\u1F75\\u1F77\\u1F79\\u1F7B-"
|
||||
"\\u1F7D\\u1F80\\u1F81\\u1F88\\u1F89\\u1F90\\u1F91\\u1F98\\u1F99"
|
||||
"\\u1FA0\\u1FA1\\u1FA8\\u1FA9\\u1FB3\\u1FB6\\u1FBB\\u1FBC\\u1FBE"
|
||||
"\\u1FBF\\u1FC3\\u1FC6\\u1FC9\\u1FCB\\u1FCC\\u1FD3\\u1FDB\\u1FE3"
|
||||
"\\u1FEB\\u1FEE\\u1FEF\\u1FF3\\u1FF6\\u1FF9\\u1FFB-\\u1FFE\\u2000"
|
||||
"\\u2001\\u20D0-\\u20DC\\u20E1\\u20E5-\\u20F0\\u2126\\u212A"
|
||||
"\\u212B\\u2190\\u2192\\u2194\\u21D0\\u21D2\\u21D4\\u2203\\u2208"
|
||||
"\\u220B\\u2223\\u2225\\u223C\\u2243\\u2245\\u2248\\u224D\\u2261"
|
||||
"\\u2264\\u2265\\u2272\\u2273\\u2276\\u2277\\u227A-\\u227D\\u2282"
|
||||
"\\u2283\\u2286\\u2287\\u2291\\u2292\\u22A2\\u22A8\\u22A9\\u22AB"
|
||||
"\\u22B2-\\u22B5\\u2329\\u232A\\u2ADC\\u2CEF-\\u2CF1\\u2DE0-"
|
||||
"\\u2DFF\\u302A-\\u302F\\u3046\\u304B\\u304D\\u304F\\u3051\\u3053"
|
||||
"\\u3055\\u3057\\u3059\\u305B\\u305D\\u305F\\u3061\\u3064\\u3066"
|
||||
"\\u3068\\u306F\\u3072\\u3075\\u3078\\u307B\\u3099\\u309A\\u309D"
|
||||
"\\u30A6\\u30AB\\u30AD\\u30AF\\u30B1\\u30B3\\u30B5\\u30B7\\u30B9"
|
||||
"\\u30BB\\u30BD\\u30BF\\u30C1\\u30C4\\u30C6\\u30C8\\u30CF\\u30D2"
|
||||
"\\u30D5\\u30D8\\u30DB\\u30EF-\\u30F2\\u30FD\\uA66F\\uA67C\\uA67D"
|
||||
"\\uA6F0\\uA6F1\\uA806\\uA8C4\\uA8E0-\\uA8F1\\uA92B-\\uA92D"
|
||||
"\\uA953\\uA9B3\\uA9C0\\uAAB0\\uAAB2-\\uAAB4\\uAAB7\\uAAB8\\uAABE"
|
||||
"\\uAABF\\uAAC1\\uABED\\uAC00\\uAC1C\\uAC38\\uAC54\\uAC70\\uAC8C"
|
||||
"\\uACA8\\uACC4\\uACE0\\uACFC\\uAD18\\uAD34\\uAD50\\uAD6C\\uAD88"
|
||||
"\\uADA4\\uADC0\\uADDC\\uADF8\\uAE14\\uAE30\\uAE4C\\uAE68\\uAE84"
|
||||
"\\uAEA0\\uAEBC\\uAED8\\uAEF4\\uAF10\\uAF2C\\uAF48\\uAF64\\uAF80"
|
||||
"\\uAF9C\\uAFB8\\uAFD4\\uAFF0\\uB00C\\uB028\\uB044\\uB060\\uB07C"
|
||||
"\\uB098\\uB0B4\\uB0D0\\uB0EC\\uB108\\uB124\\uB140\\uB15C\\uB178"
|
||||
"\\uB194\\uB1B0\\uB1CC\\uB1E8\\uB204\\uB220\\uB23C\\uB258\\uB274"
|
||||
"\\uB290\\uB2AC\\uB2C8\\uB2E4\\uB300\\uB31C\\uB338\\uB354\\uB370"
|
||||
"\\uB38C\\uB3A8\\uB3C4\\uB3E0\\uB3FC\\uB418\\uB434\\uB450\\uB46C"
|
||||
"\\uB488\\uB4A4\\uB4C0\\uB4DC\\uB4F8\\uB514\\uB530\\uB54C\\uB568"
|
||||
"\\uB584\\uB5A0\\uB5BC\\uB5D8\\uB5F4\\uB610\\uB62C\\uB648\\uB664"
|
||||
"\\uB680\\uB69C\\uB6B8\\uB6D4\\uB6F0\\uB70C\\uB728\\uB744\\uB760"
|
||||
"\\uB77C\\uB798\\uB7B4\\uB7D0\\uB7EC\\uB808\\uB824\\uB840\\uB85C"
|
||||
"\\uB878\\uB894\\uB8B0\\uB8CC\\uB8E8\\uB904\\uB920\\uB93C\\uB958"
|
||||
"\\uB974\\uB990\\uB9AC\\uB9C8\\uB9E4\\uBA00\\uBA1C\\uBA38\\uBA54"
|
||||
"\\uBA70\\uBA8C\\uBAA8\\uBAC4\\uBAE0\\uBAFC\\uBB18\\uBB34\\uBB50"
|
||||
"\\uBB6C\\uBB88\\uBBA4\\uBBC0\\uBBDC\\uBBF8\\uBC14\\uBC30\\uBC4C"
|
||||
"\\uBC68\\uBC84\\uBCA0\\uBCBC\\uBCD8\\uBCF4\\uBD10\\uBD2C\\uBD48"
|
||||
"\\uBD64\\uBD80\\uBD9C\\uBDB8\\uBDD4\\uBDF0\\uBE0C\\uBE28\\uBE44"
|
||||
"\\uBE60\\uBE7C\\uBE98\\uBEB4\\uBED0\\uBEEC\\uBF08\\uBF24\\uBF40"
|
||||
"\\uBF5C\\uBF78\\uBF94\\uBFB0\\uBFCC\\uBFE8\\uC004\\uC020\\uC03C"
|
||||
"\\uC058\\uC074\\uC090\\uC0AC\\uC0C8\\uC0E4\\uC100\\uC11C\\uC138"
|
||||
"\\uC154\\uC170\\uC18C\\uC1A8\\uC1C4\\uC1E0\\uC1FC\\uC218\\uC234"
|
||||
"\\uC250\\uC26C\\uC288\\uC2A4\\uC2C0\\uC2DC\\uC2F8\\uC314\\uC330"
|
||||
"\\uC34C\\uC368\\uC384\\uC3A0\\uC3BC\\uC3D8\\uC3F4\\uC410\\uC42C"
|
||||
"\\uC448\\uC464\\uC480\\uC49C\\uC4B8\\uC4D4\\uC4F0\\uC50C\\uC528"
|
||||
"\\uC544\\uC560\\uC57C\\uC598\\uC5B4\\uC5D0\\uC5EC\\uC608\\uC624"
|
||||
"\\uC640\\uC65C\\uC678\\uC694\\uC6B0\\uC6CC\\uC6E8\\uC704\\uC720"
|
||||
"\\uC73C\\uC758\\uC774\\uC790\\uC7AC\\uC7C8\\uC7E4\\uC800\\uC81C"
|
||||
"\\uC838\\uC854\\uC870\\uC88C\\uC8A8\\uC8C4\\uC8E0\\uC8FC\\uC918"
|
||||
"\\uC934\\uC950\\uC96C\\uC988\\uC9A4\\uC9C0\\uC9DC\\uC9F8\\uCA14"
|
||||
"\\uCA30\\uCA4C\\uCA68\\uCA84\\uCAA0\\uCABC\\uCAD8\\uCAF4\\uCB10"
|
||||
"\\uCB2C\\uCB48\\uCB64\\uCB80\\uCB9C\\uCBB8\\uCBD4\\uCBF0\\uCC0C"
|
||||
"\\uCC28\\uCC44\\uCC60\\uCC7C\\uCC98\\uCCB4\\uCCD0\\uCCEC\\uCD08"
|
||||
"\\uCD24\\uCD40\\uCD5C\\uCD78\\uCD94\\uCDB0\\uCDCC\\uCDE8\\uCE04"
|
||||
"\\uCE20\\uCE3C\\uCE58\\uCE74\\uCE90\\uCEAC\\uCEC8\\uCEE4\\uCF00"
|
||||
"\\uCF1C\\uCF38\\uCF54\\uCF70\\uCF8C\\uCFA8\\uCFC4\\uCFE0\\uCFFC"
|
||||
"\\uD018\\uD034\\uD050\\uD06C\\uD088\\uD0A4\\uD0C0\\uD0DC\\uD0F8"
|
||||
"\\uD114\\uD130\\uD14C\\uD168\\uD184\\uD1A0\\uD1BC\\uD1D8\\uD1F4"
|
||||
"\\uD210\\uD22C\\uD248\\uD264\\uD280\\uD29C\\uD2B8\\uD2D4\\uD2F0"
|
||||
"\\uD30C\\uD328\\uD344\\uD360\\uD37C\\uD398\\uD3B4\\uD3D0\\uD3EC"
|
||||
"\\uD408\\uD424\\uD440\\uD45C\\uD478\\uD494\\uD4B0\\uD4CC\\uD4E8"
|
||||
"\\uD504\\uD520\\uD53C\\uD558\\uD574\\uD590\\uD5AC\\uD5C8\\uD5E4"
|
||||
"\\uD600\\uD61C\\uD638\\uD654\\uD670\\uD68C\\uD6A8\\uD6C4\\uD6E0"
|
||||
"\\uD6FC\\uD718\\uD734\\uD750\\uD76C\\uD788\\uF900-\\uFA0D\\uFA10"
|
||||
"\\uFA12\\uFA15-\\uFA1E\\uFA20\\uFA22\\uFA25\\uFA26\\uFA2A-"
|
||||
"\\uFA2D\\uFA30-\\uFA6D\\uFA70-\\uFAD9\\uFB1D-\\uFB1F\\uFB2A-"
|
||||
"\\uFB36\\uFB38-\\uFB3C\\uFB3E\\uFB40\\uFB41\\uFB43\\uFB44\\uFB46"
|
||||
"-\\uFB4E\\uFE20-\\uFE26\\U000101FD\\U00010A0D\\U00010A0F\\U00010"
|
||||
"A38-\\U00010A3A\\U00010A3F\\U00011099\\U0001109B\\U000110A5"
|
||||
"\\U000110B9\\U000110BA\\U0001D15E-\\U0001D169\\U0001D16D-\\U0001"
|
||||
"D172\\U0001D17B-\\U0001D182\\U0001D185-\\U0001D18B\\U0001D1AA-"
|
||||
"\\U0001D1AD\\U0001D1BB-\\U0001D1C0\\U0001D242-\\U0001D244\\U0002"
|
||||
"F800-\\U0002FA1D]"
|
||||
, ""), errorCode);
|
||||
// We need not look at control codes, Han characters nor Hangul LVT syllables because they
|
||||
// do not combine forward. LV syllables are already removed.
|
||||
UnicodeSet notInteresting("[[:C:][:Unified_Ideograph:][:HST=LVT:]]", errorCode);
|
||||
LocalPointer<UnicodeSet> unsure(&((UnicodeSet *)(skipSets[UNORM_NFC].clone()))->removeAll(notInteresting));
|
||||
// System.out.format("unsure.size()=%d\n", unsure.size());
|
||||
|
||||
skipSets[UNORM_NFKD].applyPattern(UnicodeString(
|
||||
"[^\\u00A0\\u00A8\\u00AA\\u00AF\\u00B2-\\u00B5\\u00B8-\\u00BA"
|
||||
"\\u00BC-\\u00BE\\u00C0-\\u00C5\\u00C7-\\u00CF\\u00D1-\\u00D6"
|
||||
"\\u00D9-\\u00DD\\u00E0-\\u00E5\\u00E7-\\u00EF\\u00F1-\\u00F6"
|
||||
"\\u00F9-\\u00FD\\u00FF-\\u010F\\u0112-\\u0125\\u0128-\\u0130"
|
||||
"\\u0132-\\u0137\\u0139-\\u0140\\u0143-\\u0149\\u014C-\\u0151"
|
||||
"\\u0154-\\u0165\\u0168-\\u017F\\u01A0\\u01A1\\u01AF\\u01B0"
|
||||
"\\u01C4-\\u01DC\\u01DE-\\u01E3\\u01E6-\\u01F5\\u01F8-\\u021B"
|
||||
"\\u021E\\u021F\\u0226-\\u0233\\u02B0-\\u02B8\\u02D8-\\u02DD"
|
||||
"\\u02E0-\\u02E4\\u0300-\\u034E\\u0350-\\u036F\\u0374\\u037A"
|
||||
"\\u037E\\u0384-\\u038A\\u038C\\u038E-\\u0390\\u03AA-\\u03B0"
|
||||
"\\u03CA-\\u03CE\\u03D0-\\u03D6\\u03F0-\\u03F2\\u03F4\\u03F5"
|
||||
"\\u03F9\\u0400\\u0401\\u0403\\u0407\\u040C-\\u040E\\u0419\\u0439"
|
||||
"\\u0450\\u0451\\u0453\\u0457\\u045C-\\u045E\\u0476\\u0477\\u0483"
|
||||
"-\\u0487\\u04C1\\u04C2\\u04D0-\\u04D3\\u04D6\\u04D7\\u04DA-"
|
||||
"\\u04DF\\u04E2-\\u04E7\\u04EA-\\u04F5\\u04F8\\u04F9\\u0587"
|
||||
"\\u0591-\\u05BD\\u05BF\\u05C1\\u05C2\\u05C4\\u05C5\\u05C7\\u0610"
|
||||
"-\\u061A\\u0622-\\u0626\\u064B-\\u065E\\u0670\\u0675-\\u0678"
|
||||
"\\u06C0\\u06C2\\u06D3\\u06D6-\\u06DC\\u06DF-\\u06E4\\u06E7"
|
||||
"\\u06E8\\u06EA-\\u06ED\\u0711\\u0730-\\u074A\\u07EB-\\u07F3"
|
||||
"\\u0816-\\u0819\\u081B-\\u0823\\u0825-\\u0827\\u0829-\\u082D"
|
||||
"\\u0929\\u0931\\u0934\\u093C\\u094D\\u0951-\\u0954\\u0958-"
|
||||
"\\u095F\\u09BC\\u09CB-\\u09CD\\u09DC\\u09DD\\u09DF\\u0A33\\u0A36"
|
||||
"\\u0A3C\\u0A4D\\u0A59-\\u0A5B\\u0A5E\\u0ABC\\u0ACD\\u0B3C\\u0B48"
|
||||
"\\u0B4B-\\u0B4D\\u0B5C\\u0B5D\\u0B94\\u0BCA-\\u0BCD\\u0C48"
|
||||
"\\u0C4D\\u0C55\\u0C56\\u0CBC\\u0CC0\\u0CC7\\u0CC8\\u0CCA\\u0CCB"
|
||||
"\\u0CCD\\u0D4A-\\u0D4D\\u0DCA\\u0DDA\\u0DDC-\\u0DDE\\u0E33"
|
||||
"\\u0E38-\\u0E3A\\u0E48-\\u0E4B\\u0EB3\\u0EB8\\u0EB9\\u0EC8-"
|
||||
"\\u0ECB\\u0EDC\\u0EDD\\u0F0C\\u0F18\\u0F19\\u0F35\\u0F37\\u0F39"
|
||||
"\\u0F43\\u0F4D\\u0F52\\u0F57\\u0F5C\\u0F69\\u0F71-\\u0F7D\\u0F80"
|
||||
"-\\u0F84\\u0F86\\u0F87\\u0F93\\u0F9D\\u0FA2\\u0FA7\\u0FAC\\u0FB9"
|
||||
"\\u0FC6\\u1026\\u1037\\u1039\\u103A\\u108D\\u10FC\\u135F\\u1714"
|
||||
"\\u1734\\u17D2\\u17DD\\u18A9\\u1939-\\u193B\\u1A17\\u1A18\\u1A60"
|
||||
"\\u1A75-\\u1A7C\\u1A7F\\u1B06\\u1B08\\u1B0A\\u1B0C\\u1B0E\\u1B12"
|
||||
"\\u1B34\\u1B3B\\u1B3D\\u1B40\\u1B41\\u1B43\\u1B44\\u1B6B-\\u1B73"
|
||||
"\\u1BAA\\u1C37\\u1CD0-\\u1CD2\\u1CD4-\\u1CE0\\u1CE2-\\u1CE8"
|
||||
"\\u1CED\\u1D2C-\\u1D2E\\u1D30-\\u1D3A\\u1D3C-\\u1D4D\\u1D4F-"
|
||||
"\\u1D6A\\u1D78\\u1D9B-\\u1DE6\\u1DFD-\\u1E9B\\u1EA0-\\u1EF9"
|
||||
"\\u1F00-\\u1F15\\u1F18-\\u1F1D\\u1F20-\\u1F45\\u1F48-\\u1F4D"
|
||||
"\\u1F50-\\u1F57\\u1F59\\u1F5B\\u1F5D\\u1F5F-\\u1F7D\\u1F80-"
|
||||
"\\u1FB4\\u1FB6-\\u1FC4\\u1FC6-\\u1FD3\\u1FD6-\\u1FDB\\u1FDD-"
|
||||
"\\u1FEF\\u1FF2-\\u1FF4\\u1FF6-\\u1FFE\\u2000-\\u200A\\u2011"
|
||||
"\\u2017\\u2024-\\u2026\\u202F\\u2033\\u2034\\u2036\\u2037\\u203C"
|
||||
"\\u203E\\u2047-\\u2049\\u2057\\u205F\\u2070\\u2071\\u2074-"
|
||||
"\\u208E\\u2090-\\u2094\\u20A8\\u20D0-\\u20DC\\u20E1\\u20E5-"
|
||||
"\\u20F0\\u2100-\\u2103\\u2105-\\u2107\\u2109-\\u2113\\u2115"
|
||||
"\\u2116\\u2119-\\u211D\\u2120-\\u2122\\u2124\\u2126\\u2128"
|
||||
"\\u212A-\\u212D\\u212F-\\u2131\\u2133-\\u2139\\u213B-\\u2140"
|
||||
"\\u2145-\\u2149\\u2150-\\u217F\\u2189\\u219A\\u219B\\u21AE"
|
||||
"\\u21CD-\\u21CF\\u2204\\u2209\\u220C\\u2224\\u2226\\u222C\\u222D"
|
||||
"\\u222F\\u2230\\u2241\\u2244\\u2247\\u2249\\u2260\\u2262\\u226D-"
|
||||
"\\u2271\\u2274\\u2275\\u2278\\u2279\\u2280\\u2281\\u2284\\u2285"
|
||||
"\\u2288\\u2289\\u22AC-\\u22AF\\u22E0-\\u22E3\\u22EA-\\u22ED"
|
||||
"\\u2329\\u232A\\u2460-\\u24EA\\u2A0C\\u2A74-\\u2A76\\u2ADC"
|
||||
"\\u2C7C\\u2C7D\\u2CEF-\\u2CF1\\u2D6F\\u2DE0-\\u2DFF\\u2E9F"
|
||||
"\\u2EF3\\u2F00-\\u2FD5\\u3000\\u302A-\\u302F\\u3036\\u3038-"
|
||||
"\\u303A\\u304C\\u304E\\u3050\\u3052\\u3054\\u3056\\u3058\\u305A"
|
||||
"\\u305C\\u305E\\u3060\\u3062\\u3065\\u3067\\u3069\\u3070\\u3071"
|
||||
"\\u3073\\u3074\\u3076\\u3077\\u3079\\u307A\\u307C\\u307D\\u3094"
|
||||
"\\u3099-\\u309C\\u309E\\u309F\\u30AC\\u30AE\\u30B0\\u30B2\\u30B4"
|
||||
"\\u30B6\\u30B8\\u30BA\\u30BC\\u30BE\\u30C0\\u30C2\\u30C5\\u30C7"
|
||||
"\\u30C9\\u30D0\\u30D1\\u30D3\\u30D4\\u30D6\\u30D7\\u30D9\\u30DA"
|
||||
"\\u30DC\\u30DD\\u30F4\\u30F7-\\u30FA\\u30FE\\u30FF\\u3131-"
|
||||
"\\u318E\\u3192-\\u319F\\u3200-\\u321E\\u3220-\\u3247\\u3250-"
|
||||
"\\u327E\\u3280-\\u32FE\\u3300-\\u33FF\\uA66F\\uA67C\\uA67D"
|
||||
"\\uA6F0\\uA6F1\\uA770\\uA806\\uA8C4\\uA8E0-\\uA8F1\\uA92B-"
|
||||
"\\uA92D\\uA953\\uA9B3\\uA9C0\\uAAB0\\uAAB2-\\uAAB4\\uAAB7\\uAAB8"
|
||||
"\\uAABE\\uAABF\\uAAC1\\uABED\\uAC00-\\uD7A3\\uF900-\\uFA0D"
|
||||
"\\uFA10\\uFA12\\uFA15-\\uFA1E\\uFA20\\uFA22\\uFA25\\uFA26\\uFA2A"
|
||||
"-\\uFA2D\\uFA30-\\uFA6D\\uFA70-\\uFAD9\\uFB00-\\uFB06\\uFB13-"
|
||||
"\\uFB17\\uFB1D-\\uFB36\\uFB38-\\uFB3C\\uFB3E\\uFB40\\uFB41"
|
||||
"\\uFB43\\uFB44\\uFB46-\\uFBB1\\uFBD3-\\uFD3D\\uFD50-\\uFD8F"
|
||||
"\\uFD92-\\uFDC7\\uFDF0-\\uFDFC\\uFE10-\\uFE19\\uFE20-\\uFE26"
|
||||
"\\uFE30-\\uFE44\\uFE47-\\uFE52\\uFE54-\\uFE66\\uFE68-\\uFE6B"
|
||||
"\\uFE70-\\uFE72\\uFE74\\uFE76-\\uFEFC\\uFF01-\\uFFBE\\uFFC2-"
|
||||
"\\uFFC7\\uFFCA-\\uFFCF\\uFFD2-\\uFFD7\\uFFDA-\\uFFDC\\uFFE0-"
|
||||
"\\uFFE6\\uFFE8-\\uFFEE\\U000101FD\\U00010A0D\\U00010A0F\\U00010A"
|
||||
"38-\\U00010A3A\\U00010A3F\\U0001109A\\U0001109C\\U000110AB"
|
||||
"\\U000110B9\\U000110BA\\U0001D15E-\\U0001D169\\U0001D16D-\\U0001"
|
||||
"D172\\U0001D17B-\\U0001D182\\U0001D185-\\U0001D18B\\U0001D1AA-"
|
||||
"\\U0001D1AD\\U0001D1BB-\\U0001D1C0\\U0001D242-\\U0001D244\\U0001"
|
||||
"D400-\\U0001D454\\U0001D456-\\U0001D49C\\U0001D49E\\U0001D49F"
|
||||
"\\U0001D4A2\\U0001D4A5\\U0001D4A6\\U0001D4A9-\\U0001D4AC\\U0001D"
|
||||
"4AE-\\U0001D4B9\\U0001D4BB\\U0001D4BD-\\U0001D4C3\\U0001D4C5-"
|
||||
"\\U0001D505\\U0001D507-\\U0001D50A\\U0001D50D-\\U0001D514\\U0001"
|
||||
"D516-\\U0001D51C\\U0001D51E-\\U0001D539\\U0001D53B-\\U0001D53E"
|
||||
"\\U0001D540-\\U0001D544\\U0001D546\\U0001D54A-\\U0001D550\\U0001"
|
||||
"D552-\\U0001D6A5\\U0001D6A8-\\U0001D7CB\\U0001D7CE-\\U0001D7FF"
|
||||
"\\U0001F100-\\U0001F10A\\U0001F110-\\U0001F12E\\U0001F131\\U0001"
|
||||
"F13D\\U0001F13F\\U0001F142\\U0001F146\\U0001F14A-\\U0001F14E"
|
||||
"\\U0001F190\\U0001F200\\U0001F210-\\U0001F231\\U0001F240-\\U0001"
|
||||
"F248\\U0002F800-\\U0002FA1D]"
|
||||
, ""), errorCode);
|
||||
|
||||
skipSets[UNORM_NFKC].applyPattern(UnicodeString(
|
||||
"[^<->A-PR-Za-pr-z\\u00A0\\u00A8\\u00AA\\u00AF\\u00B2-\\u00B5"
|
||||
"\\u00B8-\\u00BA\\u00BC-\\u00BE\\u00C0-\\u00CF\\u00D1-\\u00D6"
|
||||
"\\u00D8-\\u00DD\\u00E0-\\u00EF\\u00F1-\\u00F6\\u00F8-\\u00FD"
|
||||
"\\u00FF-\\u0103\\u0106-\\u010F\\u0112-\\u0117\\u011A-\\u0121"
|
||||
"\\u0124\\u0125\\u0128-\\u012D\\u0130\\u0132\\u0133\\u0139\\u013A"
|
||||
"\\u013D-\\u0140\\u0143\\u0144\\u0147-\\u0149\\u014C-\\u0151"
|
||||
"\\u0154\\u0155\\u0158-\\u015D\\u0160\\u0161\\u0164\\u0165\\u0168"
|
||||
"-\\u0171\\u0174-\\u017F\\u01A0\\u01A1\\u01AF\\u01B0\\u01B7"
|
||||
"\\u01C4-\\u01DC\\u01DE-\\u01E1\\u01E6-\\u01EB\\u01F1-\\u01F5"
|
||||
"\\u01F8-\\u01FB\\u0200-\\u021B\\u021E\\u021F\\u0226-\\u0233"
|
||||
"\\u0292\\u02B0-\\u02B8\\u02D8-\\u02DD\\u02E0-\\u02E4\\u0300-"
|
||||
"\\u034E\\u0350-\\u036F\\u0374\\u037A\\u037E\\u0384\\u0385\\u0387"
|
||||
"\\u0391\\u0395\\u0397\\u0399\\u039F\\u03A1\\u03A5\\u03A9\\u03AC"
|
||||
"\\u03AE\\u03B1\\u03B5\\u03B7\\u03B9\\u03BF\\u03C1\\u03C5\\u03C9-"
|
||||
"\\u03CB\\u03CE\\u03D0-\\u03D6\\u03F0-\\u03F2\\u03F4\\u03F5"
|
||||
"\\u03F9\\u0406\\u0410\\u0413\\u0415-\\u0418\\u041A\\u041E\\u0423"
|
||||
"\\u0427\\u042B\\u042D\\u0430\\u0433\\u0435-\\u0438\\u043A\\u043E"
|
||||
"\\u0443\\u0447\\u044B\\u044D\\u0456\\u0474\\u0475\\u0483-\\u0487"
|
||||
"\\u04D8\\u04D9\\u04E8\\u04E9\\u0587\\u0591-\\u05BD\\u05BF\\u05C1"
|
||||
"\\u05C2\\u05C4\\u05C5\\u05C7\\u0610-\\u061A\\u0622\\u0623\\u0627"
|
||||
"\\u0648\\u064A-\\u065E\\u0670\\u0675-\\u0678\\u06C1\\u06D2"
|
||||
"\\u06D5-\\u06DC\\u06DF-\\u06E4\\u06E7\\u06E8\\u06EA-\\u06ED"
|
||||
"\\u0711\\u0730-\\u074A\\u07EB-\\u07F3\\u0816-\\u0819\\u081B-"
|
||||
"\\u0823\\u0825-\\u0827\\u0829-\\u082D\\u0928\\u0930\\u0933"
|
||||
"\\u093C\\u094D\\u0951-\\u0954\\u0958-\\u095F\\u09BC\\u09BE"
|
||||
"\\u09C7\\u09CD\\u09D7\\u09DC\\u09DD\\u09DF\\u0A33\\u0A36\\u0A3C"
|
||||
"\\u0A4D\\u0A59-\\u0A5B\\u0A5E\\u0ABC\\u0ACD\\u0B3C\\u0B3E\\u0B47"
|
||||
"\\u0B4D\\u0B56\\u0B57\\u0B5C\\u0B5D\\u0B92\\u0BBE\\u0BC6\\u0BC7"
|
||||
"\\u0BCD\\u0BD7\\u0C46\\u0C4D\\u0C55\\u0C56\\u0CBC\\u0CBF\\u0CC2"
|
||||
"\\u0CC6\\u0CCA\\u0CCD\\u0CD5\\u0CD6\\u0D3E\\u0D46\\u0D47\\u0D4D"
|
||||
"\\u0D57\\u0DCA\\u0DCF\\u0DD9\\u0DDC\\u0DDF\\u0E33\\u0E38-\\u0E3A"
|
||||
"\\u0E48-\\u0E4B\\u0EB3\\u0EB8\\u0EB9\\u0EC8-\\u0ECB\\u0EDC"
|
||||
"\\u0EDD\\u0F0C\\u0F18\\u0F19\\u0F35\\u0F37\\u0F39\\u0F43\\u0F4D"
|
||||
"\\u0F52\\u0F57\\u0F5C\\u0F69\\u0F71-\\u0F7D\\u0F80-\\u0F84"
|
||||
"\\u0F86\\u0F87\\u0F93\\u0F9D\\u0FA2\\u0FA7\\u0FAC\\u0FB9\\u0FC6"
|
||||
"\\u1025\\u102E\\u1037\\u1039\\u103A\\u108D\\u10FC\\u1100-\\u1112"
|
||||
"\\u1161-\\u1175\\u11A8-\\u11C2\\u135F\\u1714\\u1734\\u17D2"
|
||||
"\\u17DD\\u18A9\\u1939-\\u193B\\u1A17\\u1A18\\u1A60\\u1A75-"
|
||||
"\\u1A7C\\u1A7F\\u1B05\\u1B07\\u1B09\\u1B0B\\u1B0D\\u1B11\\u1B34"
|
||||
"\\u1B35\\u1B3A\\u1B3C\\u1B3E\\u1B3F\\u1B42\\u1B44\\u1B6B-\\u1B73"
|
||||
"\\u1BAA\\u1C37\\u1CD0-\\u1CD2\\u1CD4-\\u1CE0\\u1CE2-\\u1CE8"
|
||||
"\\u1CED\\u1D2C-\\u1D2E\\u1D30-\\u1D3A\\u1D3C-\\u1D4D\\u1D4F-"
|
||||
"\\u1D6A\\u1D78\\u1D9B-\\u1DE6\\u1DFD-\\u1E03\\u1E0A-\\u1E0F"
|
||||
"\\u1E12-\\u1E1B\\u1E20-\\u1E27\\u1E2A-\\u1E41\\u1E44-\\u1E53"
|
||||
"\\u1E58-\\u1E7D\\u1E80-\\u1E87\\u1E8E-\\u1E91\\u1E96-\\u1E9B"
|
||||
"\\u1EA0-\\u1EF3\\u1EF6-\\u1EF9\\u1F00-\\u1F11\\u1F18\\u1F19"
|
||||
"\\u1F20-\\u1F31\\u1F38\\u1F39\\u1F40\\u1F41\\u1F48\\u1F49\\u1F50"
|
||||
"\\u1F51\\u1F59\\u1F60-\\u1F71\\u1F73-\\u1F75\\u1F77\\u1F79"
|
||||
"\\u1F7B-\\u1F7D\\u1F80\\u1F81\\u1F88\\u1F89\\u1F90\\u1F91\\u1F98"
|
||||
"\\u1F99\\u1FA0\\u1FA1\\u1FA8\\u1FA9\\u1FB3\\u1FB6\\u1FBB-\\u1FC1"
|
||||
"\\u1FC3\\u1FC6\\u1FC9\\u1FCB-\\u1FCF\\u1FD3\\u1FDB\\u1FDD-"
|
||||
"\\u1FDF\\u1FE3\\u1FEB\\u1FED-\\u1FEF\\u1FF3\\u1FF6\\u1FF9\\u1FFB"
|
||||
"-\\u1FFE\\u2000-\\u200A\\u2011\\u2017\\u2024-\\u2026\\u202F"
|
||||
"\\u2033\\u2034\\u2036\\u2037\\u203C\\u203E\\u2047-\\u2049\\u2057"
|
||||
"\\u205F\\u2070\\u2071\\u2074-\\u208E\\u2090-\\u2094\\u20A8"
|
||||
"\\u20D0-\\u20DC\\u20E1\\u20E5-\\u20F0\\u2100-\\u2103\\u2105-"
|
||||
"\\u2107\\u2109-\\u2113\\u2115\\u2116\\u2119-\\u211D\\u2120-"
|
||||
"\\u2122\\u2124\\u2126\\u2128\\u212A-\\u212D\\u212F-\\u2131"
|
||||
"\\u2133-\\u2139\\u213B-\\u2140\\u2145-\\u2149\\u2150-\\u217F"
|
||||
"\\u2189\\u2190\\u2192\\u2194\\u21D0\\u21D2\\u21D4\\u2203\\u2208"
|
||||
"\\u220B\\u2223\\u2225\\u222C\\u222D\\u222F\\u2230\\u223C\\u2243"
|
||||
"\\u2245\\u2248\\u224D\\u2261\\u2264\\u2265\\u2272\\u2273\\u2276"
|
||||
"\\u2277\\u227A-\\u227D\\u2282\\u2283\\u2286\\u2287\\u2291\\u2292"
|
||||
"\\u22A2\\u22A8\\u22A9\\u22AB\\u22B2-\\u22B5\\u2329\\u232A\\u2460"
|
||||
"-\\u24EA\\u2A0C\\u2A74-\\u2A76\\u2ADC\\u2C7C\\u2C7D\\u2CEF-"
|
||||
"\\u2CF1\\u2D6F\\u2DE0-\\u2DFF\\u2E9F\\u2EF3\\u2F00-\\u2FD5"
|
||||
"\\u3000\\u302A-\\u302F\\u3036\\u3038-\\u303A\\u3046\\u304B"
|
||||
"\\u304D\\u304F\\u3051\\u3053\\u3055\\u3057\\u3059\\u305B\\u305D"
|
||||
"\\u305F\\u3061\\u3064\\u3066\\u3068\\u306F\\u3072\\u3075\\u3078"
|
||||
"\\u307B\\u3099-\\u309D\\u309F\\u30A6\\u30AB\\u30AD\\u30AF\\u30B1"
|
||||
"\\u30B3\\u30B5\\u30B7\\u30B9\\u30BB\\u30BD\\u30BF\\u30C1\\u30C4"
|
||||
"\\u30C6\\u30C8\\u30CF\\u30D2\\u30D5\\u30D8\\u30DB\\u30EF-\\u30F2"
|
||||
"\\u30FD\\u30FF\\u3131-\\u318E\\u3192-\\u319F\\u3200-\\u321E"
|
||||
"\\u3220-\\u3247\\u3250-\\u327E\\u3280-\\u32FE\\u3300-\\u33FF"
|
||||
"\\uA66F\\uA67C\\uA67D\\uA6F0\\uA6F1\\uA770\\uA806\\uA8C4\\uA8E0-"
|
||||
"\\uA8F1\\uA92B-\\uA92D\\uA953\\uA9B3\\uA9C0\\uAAB0\\uAAB2-"
|
||||
"\\uAAB4\\uAAB7\\uAAB8\\uAABE\\uAABF\\uAAC1\\uABED\\uAC00\\uAC1C"
|
||||
"\\uAC38\\uAC54\\uAC70\\uAC8C\\uACA8\\uACC4\\uACE0\\uACFC\\uAD18"
|
||||
"\\uAD34\\uAD50\\uAD6C\\uAD88\\uADA4\\uADC0\\uADDC\\uADF8\\uAE14"
|
||||
"\\uAE30\\uAE4C\\uAE68\\uAE84\\uAEA0\\uAEBC\\uAED8\\uAEF4\\uAF10"
|
||||
"\\uAF2C\\uAF48\\uAF64\\uAF80\\uAF9C\\uAFB8\\uAFD4\\uAFF0\\uB00C"
|
||||
"\\uB028\\uB044\\uB060\\uB07C\\uB098\\uB0B4\\uB0D0\\uB0EC\\uB108"
|
||||
"\\uB124\\uB140\\uB15C\\uB178\\uB194\\uB1B0\\uB1CC\\uB1E8\\uB204"
|
||||
"\\uB220\\uB23C\\uB258\\uB274\\uB290\\uB2AC\\uB2C8\\uB2E4\\uB300"
|
||||
"\\uB31C\\uB338\\uB354\\uB370\\uB38C\\uB3A8\\uB3C4\\uB3E0\\uB3FC"
|
||||
"\\uB418\\uB434\\uB450\\uB46C\\uB488\\uB4A4\\uB4C0\\uB4DC\\uB4F8"
|
||||
"\\uB514\\uB530\\uB54C\\uB568\\uB584\\uB5A0\\uB5BC\\uB5D8\\uB5F4"
|
||||
"\\uB610\\uB62C\\uB648\\uB664\\uB680\\uB69C\\uB6B8\\uB6D4\\uB6F0"
|
||||
"\\uB70C\\uB728\\uB744\\uB760\\uB77C\\uB798\\uB7B4\\uB7D0\\uB7EC"
|
||||
"\\uB808\\uB824\\uB840\\uB85C\\uB878\\uB894\\uB8B0\\uB8CC\\uB8E8"
|
||||
"\\uB904\\uB920\\uB93C\\uB958\\uB974\\uB990\\uB9AC\\uB9C8\\uB9E4"
|
||||
"\\uBA00\\uBA1C\\uBA38\\uBA54\\uBA70\\uBA8C\\uBAA8\\uBAC4\\uBAE0"
|
||||
"\\uBAFC\\uBB18\\uBB34\\uBB50\\uBB6C\\uBB88\\uBBA4\\uBBC0\\uBBDC"
|
||||
"\\uBBF8\\uBC14\\uBC30\\uBC4C\\uBC68\\uBC84\\uBCA0\\uBCBC\\uBCD8"
|
||||
"\\uBCF4\\uBD10\\uBD2C\\uBD48\\uBD64\\uBD80\\uBD9C\\uBDB8\\uBDD4"
|
||||
"\\uBDF0\\uBE0C\\uBE28\\uBE44\\uBE60\\uBE7C\\uBE98\\uBEB4\\uBED0"
|
||||
"\\uBEEC\\uBF08\\uBF24\\uBF40\\uBF5C\\uBF78\\uBF94\\uBFB0\\uBFCC"
|
||||
"\\uBFE8\\uC004\\uC020\\uC03C\\uC058\\uC074\\uC090\\uC0AC\\uC0C8"
|
||||
"\\uC0E4\\uC100\\uC11C\\uC138\\uC154\\uC170\\uC18C\\uC1A8\\uC1C4"
|
||||
"\\uC1E0\\uC1FC\\uC218\\uC234\\uC250\\uC26C\\uC288\\uC2A4\\uC2C0"
|
||||
"\\uC2DC\\uC2F8\\uC314\\uC330\\uC34C\\uC368\\uC384\\uC3A0\\uC3BC"
|
||||
"\\uC3D8\\uC3F4\\uC410\\uC42C\\uC448\\uC464\\uC480\\uC49C\\uC4B8"
|
||||
"\\uC4D4\\uC4F0\\uC50C\\uC528\\uC544\\uC560\\uC57C\\uC598\\uC5B4"
|
||||
"\\uC5D0\\uC5EC\\uC608\\uC624\\uC640\\uC65C\\uC678\\uC694\\uC6B0"
|
||||
"\\uC6CC\\uC6E8\\uC704\\uC720\\uC73C\\uC758\\uC774\\uC790\\uC7AC"
|
||||
"\\uC7C8\\uC7E4\\uC800\\uC81C\\uC838\\uC854\\uC870\\uC88C\\uC8A8"
|
||||
"\\uC8C4\\uC8E0\\uC8FC\\uC918\\uC934\\uC950\\uC96C\\uC988\\uC9A4"
|
||||
"\\uC9C0\\uC9DC\\uC9F8\\uCA14\\uCA30\\uCA4C\\uCA68\\uCA84\\uCAA0"
|
||||
"\\uCABC\\uCAD8\\uCAF4\\uCB10\\uCB2C\\uCB48\\uCB64\\uCB80\\uCB9C"
|
||||
"\\uCBB8\\uCBD4\\uCBF0\\uCC0C\\uCC28\\uCC44\\uCC60\\uCC7C\\uCC98"
|
||||
"\\uCCB4\\uCCD0\\uCCEC\\uCD08\\uCD24\\uCD40\\uCD5C\\uCD78\\uCD94"
|
||||
"\\uCDB0\\uCDCC\\uCDE8\\uCE04\\uCE20\\uCE3C\\uCE58\\uCE74\\uCE90"
|
||||
"\\uCEAC\\uCEC8\\uCEE4\\uCF00\\uCF1C\\uCF38\\uCF54\\uCF70\\uCF8C"
|
||||
"\\uCFA8\\uCFC4\\uCFE0\\uCFFC\\uD018\\uD034\\uD050\\uD06C\\uD088"
|
||||
"\\uD0A4\\uD0C0\\uD0DC\\uD0F8\\uD114\\uD130\\uD14C\\uD168\\uD184"
|
||||
"\\uD1A0\\uD1BC\\uD1D8\\uD1F4\\uD210\\uD22C\\uD248\\uD264\\uD280"
|
||||
"\\uD29C\\uD2B8\\uD2D4\\uD2F0\\uD30C\\uD328\\uD344\\uD360\\uD37C"
|
||||
"\\uD398\\uD3B4\\uD3D0\\uD3EC\\uD408\\uD424\\uD440\\uD45C\\uD478"
|
||||
"\\uD494\\uD4B0\\uD4CC\\uD4E8\\uD504\\uD520\\uD53C\\uD558\\uD574"
|
||||
"\\uD590\\uD5AC\\uD5C8\\uD5E4\\uD600\\uD61C\\uD638\\uD654\\uD670"
|
||||
"\\uD68C\\uD6A8\\uD6C4\\uD6E0\\uD6FC\\uD718\\uD734\\uD750\\uD76C"
|
||||
"\\uD788\\uF900-\\uFA0D\\uFA10\\uFA12\\uFA15-\\uFA1E\\uFA20"
|
||||
"\\uFA22\\uFA25\\uFA26\\uFA2A-\\uFA2D\\uFA30-\\uFA6D\\uFA70-"
|
||||
"\\uFAD9\\uFB00-\\uFB06\\uFB13-\\uFB17\\uFB1D-\\uFB36\\uFB38-"
|
||||
"\\uFB3C\\uFB3E\\uFB40\\uFB41\\uFB43\\uFB44\\uFB46-\\uFBB1\\uFBD3"
|
||||
"-\\uFD3D\\uFD50-\\uFD8F\\uFD92-\\uFDC7\\uFDF0-\\uFDFC\\uFE10-"
|
||||
"\\uFE19\\uFE20-\\uFE26\\uFE30-\\uFE44\\uFE47-\\uFE52\\uFE54-"
|
||||
"\\uFE66\\uFE68-\\uFE6B\\uFE70-\\uFE72\\uFE74\\uFE76-\\uFEFC"
|
||||
"\\uFF01-\\uFFBE\\uFFC2-\\uFFC7\\uFFCA-\\uFFCF\\uFFD2-\\uFFD7"
|
||||
"\\uFFDA-\\uFFDC\\uFFE0-\\uFFE6\\uFFE8-\\uFFEE\\U000101FD\\U00010"
|
||||
"A0D\\U00010A0F\\U00010A38-\\U00010A3A\\U00010A3F\\U00011099"
|
||||
"\\U0001109B\\U000110A5\\U000110B9\\U000110BA\\U0001D15E-\\U0001D"
|
||||
"169\\U0001D16D-\\U0001D172\\U0001D17B-\\U0001D182\\U0001D185-"
|
||||
"\\U0001D18B\\U0001D1AA-\\U0001D1AD\\U0001D1BB-\\U0001D1C0\\U0001"
|
||||
"D242-\\U0001D244\\U0001D400-\\U0001D454\\U0001D456-\\U0001D49C"
|
||||
"\\U0001D49E\\U0001D49F\\U0001D4A2\\U0001D4A5\\U0001D4A6\\U0001D4"
|
||||
"A9-\\U0001D4AC\\U0001D4AE-\\U0001D4B9\\U0001D4BB\\U0001D4BD-"
|
||||
"\\U0001D4C3\\U0001D4C5-\\U0001D505\\U0001D507-\\U0001D50A\\U0001"
|
||||
"D50D-\\U0001D514\\U0001D516-\\U0001D51C\\U0001D51E-\\U0001D539"
|
||||
"\\U0001D53B-\\U0001D53E\\U0001D540-\\U0001D544\\U0001D546\\U0001"
|
||||
"D54A-\\U0001D550\\U0001D552-\\U0001D6A5\\U0001D6A8-\\U0001D7CB"
|
||||
"\\U0001D7CE-\\U0001D7FF\\U0001F100-\\U0001F10A\\U0001F110-"
|
||||
"\\U0001F12E\\U0001F131\\U0001F13D\\U0001F13F\\U0001F142\\U0001F1"
|
||||
"46\\U0001F14A-\\U0001F14E\\U0001F190\\U0001F200\\U0001F210-"
|
||||
"\\U0001F231\\U0001F240-\\U0001F248\\U0002F800-\\U0002FA1D]"
|
||||
, ""), errorCode);
|
||||
// For each character about which we are unsure, see if it changes when we add
|
||||
// one of the back-combining characters.
|
||||
const Normalizer2 *norm2=Normalizer2::getInstance(NULL, "nfc", UNORM2_COMPOSE, errorCode);
|
||||
UnicodeString s;
|
||||
iter.reset(*unsure);
|
||||
while(iter.next()) {
|
||||
UChar32 c=iter.getCodepoint();
|
||||
s.setTo(c);
|
||||
int32_t cLength=s.length();
|
||||
int32_t tccc=u_getIntPropertyValue(c, UCHAR_TRAIL_CANONICAL_COMBINING_CLASS);
|
||||
for(int32_t i=0; i<numCombineBack; ++i) {
|
||||
// If c's decomposition ends with a character with non-zero combining class, then
|
||||
// c can only change if it combines with a character with a non-zero combining class.
|
||||
int32_t cc2=combineBackCharsAndCc[2*i+1];
|
||||
if(tccc==0 || cc2!=0) {
|
||||
UChar32 c2=combineBackCharsAndCc[2*i];
|
||||
s.append(c2);
|
||||
if(!norm2->isNormalized(s, errorCode)) {
|
||||
// System.out.format("remove U+%04x (tccc=%d) + U+%04x (cc=%d)\n", c, tccc, c2, cc2);
|
||||
skipSets[UNORM_NFC].remove(c);
|
||||
skipSets[UNORM_NFKC].remove(c);
|
||||
break;
|
||||
}
|
||||
s.truncate(cLength);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
|
@ -1733,15 +1356,17 @@ BasicNormalizerTest::TestSkippable() {
|
|||
}
|
||||
|
||||
/* get expected sets from hardcoded patterns */
|
||||
initExpectedSkippables(expectSets);
|
||||
initExpectedSkippables(expectSets, errorCode);
|
||||
errorCode.assertSuccess();
|
||||
|
||||
for(int32_t i=UNORM_NONE; i<UNORM_MODE_COUNT; ++i) {
|
||||
if(skipSets[i]!=expectSets[i]) {
|
||||
errln("error: TestSkippable skipSets[%d]!=expectedSets[%d]\n"
|
||||
"may need to update hardcoded UnicodeSet patterns in\n"
|
||||
"tstnorm.cpp/initExpectedSkippables(),\n"
|
||||
"see ICU4J - unicodetools.com.ibm.text.UCD.NFSkippable\n",
|
||||
i, i);
|
||||
errln("error: TestSkippable skipSets[%d]!=expectedSets[%d]\n", i, i);
|
||||
// Note: This used to depend on hardcoded UnicodeSet patterns generated by
|
||||
// Mark's unicodetools.com.ibm.text.UCD.NFSkippable, by
|
||||
// running com.ibm.text.UCD.Main with the option NFSkippable.
|
||||
// Since ICU 4.6/Unicode 6, we are generating the
|
||||
// expectSets ourselves in initSkippables().
|
||||
|
||||
s=UNICODE_STRING_SIMPLE("skip-expect=");
|
||||
(diff=skipSets[i]).removeAll(expectSets[i]).toPattern(pattern, TRUE);
|
||||
|
|
|
@ -1044,7 +1044,18 @@ void UnicodeSetTest::TestPropertySet() {
|
|||
|
||||
"[:Assigned:]",
|
||||
"A\\uE000\\uF8FF\\uFDC7\\U00010000\\U0010FFFD",
|
||||
"\\u0888\\uFDD3\\uFFFE\\U00050005"
|
||||
"\\u0888\\uFDD3\\uFFFE\\U00050005",
|
||||
|
||||
// Script_Extensions, new in Unicode 6.0
|
||||
"[:scx=Arab:]",
|
||||
"\\u061E\\u061F\\u0620\\u0621\\u063F\\u0640\\u0650\\u065E\\uFDF1\\uFDF2\\uFDF3",
|
||||
"\\u061D\\u065F\\uFDEF\\uFDFE",
|
||||
|
||||
// U+FDF2 has Script=Arabic and also Arab in its Script_Extensions,
|
||||
// so scx-sc is missing U+FDF2.
|
||||
"[[:Script_Extensions=Arabic:]-[:Arab:]]",
|
||||
"\\u0640\\u064B\\u0650\\u0655\\uFDFD",
|
||||
"\\uFDF2"
|
||||
};
|
||||
|
||||
static const int32_t DATA_LEN = sizeof(DATA)/sizeof(DATA[0]);
|
||||
|
|
|
@ -223,6 +223,14 @@ void UTS46Test::TestNotSTD3() {
|
|||
if(result!=input || info.getErrors()!=UIDNA_ERROR_BIDI) {
|
||||
errln("notSTD3.nameToASCII(ASCII-with-space.alef.edu) failed");
|
||||
}
|
||||
// Characters that are canonically equivalent to sequences with non-LDH ASCII.
|
||||
input=UNICODE_STRING_SIMPLE("a\\u2260b\\u226Ec\\u226Fd").unescape();
|
||||
not3->nameToUnicode(input, result, info, errorCode);
|
||||
if(result!=input || info.hasErrors()) {
|
||||
prettify(result).extract(0, 0x7fffffff, buffer, LENGTHOF(buffer));
|
||||
errln("notSTD3.nameToUnicode(equiv to non-LDH ASCII) unexpected errors %04lx string %s",
|
||||
(long)info.getErrors(), buffer);
|
||||
}
|
||||
}
|
||||
|
||||
struct TestCase {
|
||||
|
@ -283,6 +291,10 @@ static const TestCase testCases[]={
|
|||
{ "\\u65E5\\u672C\\u8A9E\\u3002\\uFF2A\\uFF30", "B", // Japanese with fullwidth ".jp"
|
||||
"\\u65E5\\u672C\\u8A9E.jp", 0 },
|
||||
{ "\\u2615", "B", "\\u2615", 0 }, // Unicode 4.0 HOT BEVERAGE
|
||||
// some characters are disallowed because they are canonically equivalent
|
||||
// to sequences with non-LDH ASCII
|
||||
{ "a\\u2260b\\u226Ec\\u226Fd", "B",
|
||||
"a\\uFFFDb\\uFFFDc\\uFFFDd", UIDNA_ERROR_DISALLOWED },
|
||||
// many deviation characters, test the special mapping code
|
||||
{ "1.a\\u00DF\\u200C\\u200Db\\u200C\\u200Dc\\u00DF\\u00DF\\u00DF\\u00DFd"
|
||||
"\\u03C2\\u03C3\\u00DF\\u00DF\\u00DF\\u00DF\\u00DF\\u00DF\\u00DF\\u00DFe"
|
||||
|
|
|
@ -1,9 +1,8 @@
|
|||
# Note: Please make sure that this utf-8 file contains a BOM.
|
||||
# GraphemeBreakTest-5.2.0.txt
|
||||
# Date: 2009-05-28, 20:37:56 GMT [MD]
|
||||
# GraphemeBreakTest-6.0.0.txt
|
||||
# Date: 2010-05-18, 00:49:27 GMT [MD]
|
||||
#
|
||||
# Unicode Character Database
|
||||
# Copyright (c) 1991-2009 Unicode, Inc.
|
||||
# Copyright (c) 1991-2010 Unicode, Inc.
|
||||
# For terms of use, see http://www.unicode.org/terms_of_use.html
|
||||
# For documentation, see http://www.unicode.org/reports/tr44/
|
||||
#
|
||||
|
@ -16,7 +15,7 @@
|
|||
# × wherever there is not.
|
||||
# <comment> the format can change, but currently it shows:
|
||||
# - the sample character name
|
||||
# - (x) the line_break property* for the sample character
|
||||
# - (x) the Grapheme_Break property* for the sample character
|
||||
# - [x] the rule that determines whether there is a break or not
|
||||
#
|
||||
# These samples may be extended or changed in the future.
|
||||
|
|
|
@ -1,116 +0,0 @@
|
|||
# Default GraphemeCluster Break Test
|
||||
# Generated: 2003-04-19, 01:55:08 GMT, MED
|
||||
#
|
||||
# Format:
|
||||
# <string> (# <comment>)?
|
||||
# <string> contains hex Unicode code points, with
|
||||
# ÷ wherever there is a break opportunity, and
|
||||
# × wherever there is not.
|
||||
# <comment> the format can change, but currently it shows:
|
||||
# - the sample character name
|
||||
# - (x) the line_break property* for the sample character
|
||||
# - [x] the rule that determines whether there is a break or not
|
||||
#
|
||||
# These samples may be extended or changed in the future.
|
||||
#
|
||||
÷ 000D ÷ 000D ÷ # ÷ [1: sot ÷] <CARRIAGE RETURN (CR)> (CR) ÷ [4: ( Control | CR | LF ) ÷] <CARRIAGE RETURN (CR)> (CR) ÷ [2: ÷ eot]
|
||||
÷ 000D × 000A ÷ # ÷ [1: sot ÷] <CARRIAGE RETURN (CR)> (CR) × [3: CR × LF] <LINE FEED (LF)> (LF) ÷ [2: ÷ eot]
|
||||
÷ 000D ÷ 0001 ÷ # ÷ [1: sot ÷] <CARRIAGE RETURN (CR)> (CR) ÷ [4: ( Control | CR | LF ) ÷] <START OF HEADING> (Control) ÷ [2: ÷ eot]
|
||||
÷ 000D ÷ 0300 ÷ # ÷ [1: sot ÷] <CARRIAGE RETURN (CR)> (CR) ÷ [4: ( Control | CR | LF ) ÷] COMBINING GRAVE ACCENT (Extend) ÷ [2: ÷ eot]
|
||||
÷ 000D ÷ 1100 ÷ # ÷ [1: sot ÷] <CARRIAGE RETURN (CR)> (CR) ÷ [4: ( Control | CR | LF ) ÷] HANGUL CHOSEONG KIYEOK (L) ÷ [2: ÷ eot]
|
||||
÷ 000D ÷ 1160 ÷ # ÷ [1: sot ÷] <CARRIAGE RETURN (CR)> (CR) ÷ [4: ( Control | CR | LF ) ÷] HANGUL JUNGSEONG FILLER (V) ÷ [2: ÷ eot]
|
||||
÷ 000D ÷ 11A8 ÷ # ÷ [1: sot ÷] <CARRIAGE RETURN (CR)> (CR) ÷ [4: ( Control | CR | LF ) ÷] HANGUL JONGSEONG KIYEOK (T) ÷ [2: ÷ eot]
|
||||
÷ 000D ÷ AC00 ÷ # ÷ [1: sot ÷] <CARRIAGE RETURN (CR)> (CR) ÷ [4: ( Control | CR | LF ) ÷] HANGUL SYLLABLE GA (LV) ÷ [2: ÷ eot]
|
||||
÷ 000D ÷ AC01 ÷ # ÷ [1: sot ÷] <CARRIAGE RETURN (CR)> (CR) ÷ [4: ( Control | CR | LF ) ÷] HANGUL SYLLABLE GAG (LVT) ÷ [2: ÷ eot]
|
||||
÷ 000D ÷ 0020 ÷ # ÷ [1: sot ÷] <CARRIAGE RETURN (CR)> (CR) ÷ [4: ( Control | CR | LF ) ÷] SPACE (Other) ÷ [2: ÷ eot]
|
||||
÷ 000A ÷ 000D ÷ # ÷ [1: sot ÷] <LINE FEED (LF)> (LF) ÷ [4: ( Control | CR | LF ) ÷] <CARRIAGE RETURN (CR)> (CR) ÷ [2: ÷ eot]
|
||||
÷ 000A ÷ 000A ÷ # ÷ [1: sot ÷] <LINE FEED (LF)> (LF) ÷ [4: ( Control | CR | LF ) ÷] <LINE FEED (LF)> (LF) ÷ [2: ÷ eot]
|
||||
÷ 000A ÷ 0001 ÷ # ÷ [1: sot ÷] <LINE FEED (LF)> (LF) ÷ [4: ( Control | CR | LF ) ÷] <START OF HEADING> (Control) ÷ [2: ÷ eot]
|
||||
÷ 000A ÷ 0300 ÷ # ÷ [1: sot ÷] <LINE FEED (LF)> (LF) ÷ [4: ( Control | CR | LF ) ÷] COMBINING GRAVE ACCENT (Extend) ÷ [2: ÷ eot]
|
||||
÷ 000A ÷ 1100 ÷ # ÷ [1: sot ÷] <LINE FEED (LF)> (LF) ÷ [4: ( Control | CR | LF ) ÷] HANGUL CHOSEONG KIYEOK (L) ÷ [2: ÷ eot]
|
||||
÷ 000A ÷ 1160 ÷ # ÷ [1: sot ÷] <LINE FEED (LF)> (LF) ÷ [4: ( Control | CR | LF ) ÷] HANGUL JUNGSEONG FILLER (V) ÷ [2: ÷ eot]
|
||||
÷ 000A ÷ 11A8 ÷ # ÷ [1: sot ÷] <LINE FEED (LF)> (LF) ÷ [4: ( Control | CR | LF ) ÷] HANGUL JONGSEONG KIYEOK (T) ÷ [2: ÷ eot]
|
||||
÷ 000A ÷ AC00 ÷ # ÷ [1: sot ÷] <LINE FEED (LF)> (LF) ÷ [4: ( Control | CR | LF ) ÷] HANGUL SYLLABLE GA (LV) ÷ [2: ÷ eot]
|
||||
÷ 000A ÷ AC01 ÷ # ÷ [1: sot ÷] <LINE FEED (LF)> (LF) ÷ [4: ( Control | CR | LF ) ÷] HANGUL SYLLABLE GAG (LVT) ÷ [2: ÷ eot]
|
||||
÷ 000A ÷ 0020 ÷ # ÷ [1: sot ÷] <LINE FEED (LF)> (LF) ÷ [4: ( Control | CR | LF ) ÷] SPACE (Other) ÷ [2: ÷ eot]
|
||||
÷ 0001 ÷ 000D ÷ # ÷ [1: sot ÷] <START OF HEADING> (Control) ÷ [4: ( Control | CR | LF ) ÷] <CARRIAGE RETURN (CR)> (CR) ÷ [2: ÷ eot]
|
||||
÷ 0001 ÷ 000A ÷ # ÷ [1: sot ÷] <START OF HEADING> (Control) ÷ [4: ( Control | CR | LF ) ÷] <LINE FEED (LF)> (LF) ÷ [2: ÷ eot]
|
||||
÷ 0001 ÷ 0001 ÷ # ÷ [1: sot ÷] <START OF HEADING> (Control) ÷ [4: ( Control | CR | LF ) ÷] <START OF HEADING> (Control) ÷ [2: ÷ eot]
|
||||
÷ 0001 ÷ 0300 ÷ # ÷ [1: sot ÷] <START OF HEADING> (Control) ÷ [4: ( Control | CR | LF ) ÷] COMBINING GRAVE ACCENT (Extend) ÷ [2: ÷ eot]
|
||||
÷ 0001 ÷ 1100 ÷ # ÷ [1: sot ÷] <START OF HEADING> (Control) ÷ [4: ( Control | CR | LF ) ÷] HANGUL CHOSEONG KIYEOK (L) ÷ [2: ÷ eot]
|
||||
÷ 0001 ÷ 1160 ÷ # ÷ [1: sot ÷] <START OF HEADING> (Control) ÷ [4: ( Control | CR | LF ) ÷] HANGUL JUNGSEONG FILLER (V) ÷ [2: ÷ eot]
|
||||
÷ 0001 ÷ 11A8 ÷ # ÷ [1: sot ÷] <START OF HEADING> (Control) ÷ [4: ( Control | CR | LF ) ÷] HANGUL JONGSEONG KIYEOK (T) ÷ [2: ÷ eot]
|
||||
÷ 0001 ÷ AC00 ÷ # ÷ [1: sot ÷] <START OF HEADING> (Control) ÷ [4: ( Control | CR | LF ) ÷] HANGUL SYLLABLE GA (LV) ÷ [2: ÷ eot]
|
||||
÷ 0001 ÷ AC01 ÷ # ÷ [1: sot ÷] <START OF HEADING> (Control) ÷ [4: ( Control | CR | LF ) ÷] HANGUL SYLLABLE GAG (LVT) ÷ [2: ÷ eot]
|
||||
÷ 0001 ÷ 0020 ÷ # ÷ [1: sot ÷] <START OF HEADING> (Control) ÷ [4: ( Control | CR | LF ) ÷] SPACE (Other) ÷ [2: ÷ eot]
|
||||
÷ 0300 ÷ 000D ÷ # ÷ [1: sot ÷] COMBINING GRAVE ACCENT (Extend) ÷ [5: ÷ ( Control | CR | LF )] <CARRIAGE RETURN (CR)> (CR) ÷ [2: ÷ eot]
|
||||
÷ 0300 ÷ 000A ÷ # ÷ [1: sot ÷] COMBINING GRAVE ACCENT (Extend) ÷ [5: ÷ ( Control | CR | LF )] <LINE FEED (LF)> (LF) ÷ [2: ÷ eot]
|
||||
÷ 0300 ÷ 0001 ÷ # ÷ [1: sot ÷] COMBINING GRAVE ACCENT (Extend) ÷ [5: ÷ ( Control | CR | LF )] <START OF HEADING> (Control) ÷ [2: ÷ eot]
|
||||
÷ 0300 × 0300 ÷ # ÷ [1: sot ÷] COMBINING GRAVE ACCENT (Extend) × [9: × Extend] COMBINING GRAVE ACCENT (Extend) ÷ [2: ÷ eot]
|
||||
÷ 0300 ÷ 1100 ÷ # ÷ [1: sot ÷] COMBINING GRAVE ACCENT (Extend) ÷ [10: Any ÷ Any] HANGUL CHOSEONG KIYEOK (L) ÷ [2: ÷ eot]
|
||||
÷ 0300 ÷ 1160 ÷ # ÷ [1: sot ÷] COMBINING GRAVE ACCENT (Extend) ÷ [10: Any ÷ Any] HANGUL JUNGSEONG FILLER (V) ÷ [2: ÷ eot]
|
||||
÷ 0300 ÷ 11A8 ÷ # ÷ [1: sot ÷] COMBINING GRAVE ACCENT (Extend) ÷ [10: Any ÷ Any] HANGUL JONGSEONG KIYEOK (T) ÷ [2: ÷ eot]
|
||||
÷ 0300 ÷ AC00 ÷ # ÷ [1: sot ÷] COMBINING GRAVE ACCENT (Extend) ÷ [10: Any ÷ Any] HANGUL SYLLABLE GA (LV) ÷ [2: ÷ eot]
|
||||
÷ 0300 ÷ AC01 ÷ # ÷ [1: sot ÷] COMBINING GRAVE ACCENT (Extend) ÷ [10: Any ÷ Any] HANGUL SYLLABLE GAG (LVT) ÷ [2: ÷ eot]
|
||||
÷ 0300 ÷ 0020 ÷ # ÷ [1: sot ÷] COMBINING GRAVE ACCENT (Extend) ÷ [10: Any ÷ Any] SPACE (Other) ÷ [2: ÷ eot]
|
||||
÷ 1100 ÷ 000D ÷ # ÷ [1: sot ÷] HANGUL CHOSEONG KIYEOK (L) ÷ [5: ÷ ( Control | CR | LF )] <CARRIAGE RETURN (CR)> (CR) ÷ [2: ÷ eot]
|
||||
÷ 1100 ÷ 000A ÷ # ÷ [1: sot ÷] HANGUL CHOSEONG KIYEOK (L) ÷ [5: ÷ ( Control | CR | LF )] <LINE FEED (LF)> (LF) ÷ [2: ÷ eot]
|
||||
÷ 1100 ÷ 0001 ÷ # ÷ [1: sot ÷] HANGUL CHOSEONG KIYEOK (L) ÷ [5: ÷ ( Control | CR | LF )] <START OF HEADING> (Control) ÷ [2: ÷ eot]
|
||||
÷ 1100 × 0300 ÷ # ÷ [1: sot ÷] HANGUL CHOSEONG KIYEOK (L) × [9: × Extend] COMBINING GRAVE ACCENT (Extend) ÷ [2: ÷ eot]
|
||||
÷ 1100 × 1100 ÷ # ÷ [1: sot ÷] HANGUL CHOSEONG KIYEOK (L) × [6: L × ( L | V | LV | LVT )] HANGUL CHOSEONG KIYEOK (L) ÷ [2: ÷ eot]
|
||||
÷ 1100 × 1160 ÷ # ÷ [1: sot ÷] HANGUL CHOSEONG KIYEOK (L) × [6: L × ( L | V | LV | LVT )] HANGUL JUNGSEONG FILLER (V) ÷ [2: ÷ eot]
|
||||
÷ 1100 ÷ 11A8 ÷ # ÷ [1: sot ÷] HANGUL CHOSEONG KIYEOK (L) ÷ [10: Any ÷ Any] HANGUL JONGSEONG KIYEOK (T) ÷ [2: ÷ eot]
|
||||
÷ 1100 × AC00 ÷ # ÷ [1: sot ÷] HANGUL CHOSEONG KIYEOK (L) × [6: L × ( L | V | LV | LVT )] HANGUL SYLLABLE GA (LV) ÷ [2: ÷ eot]
|
||||
÷ 1100 × AC01 ÷ # ÷ [1: sot ÷] HANGUL CHOSEONG KIYEOK (L) × [6: L × ( L | V | LV | LVT )] HANGUL SYLLABLE GAG (LVT) ÷ [2: ÷ eot]
|
||||
÷ 1100 ÷ 0020 ÷ # ÷ [1: sot ÷] HANGUL CHOSEONG KIYEOK (L) ÷ [10: Any ÷ Any] SPACE (Other) ÷ [2: ÷ eot]
|
||||
÷ 1160 ÷ 000D ÷ # ÷ [1: sot ÷] HANGUL JUNGSEONG FILLER (V) ÷ [5: ÷ ( Control | CR | LF )] <CARRIAGE RETURN (CR)> (CR) ÷ [2: ÷ eot]
|
||||
÷ 1160 ÷ 000A ÷ # ÷ [1: sot ÷] HANGUL JUNGSEONG FILLER (V) ÷ [5: ÷ ( Control | CR | LF )] <LINE FEED (LF)> (LF) ÷ [2: ÷ eot]
|
||||
÷ 1160 ÷ 0001 ÷ # ÷ [1: sot ÷] HANGUL JUNGSEONG FILLER (V) ÷ [5: ÷ ( Control | CR | LF )] <START OF HEADING> (Control) ÷ [2: ÷ eot]
|
||||
÷ 1160 × 0300 ÷ # ÷ [1: sot ÷] HANGUL JUNGSEONG FILLER (V) × [9: × Extend] COMBINING GRAVE ACCENT (Extend) ÷ [2: ÷ eot]
|
||||
÷ 1160 ÷ 1100 ÷ # ÷ [1: sot ÷] HANGUL JUNGSEONG FILLER (V) ÷ [10: Any ÷ Any] HANGUL CHOSEONG KIYEOK (L) ÷ [2: ÷ eot]
|
||||
÷ 1160 × 1160 ÷ # ÷ [1: sot ÷] HANGUL JUNGSEONG FILLER (V) × [7: ( LV | V ) × ( V | T )] HANGUL JUNGSEONG FILLER (V) ÷ [2: ÷ eot]
|
||||
÷ 1160 × 11A8 ÷ # ÷ [1: sot ÷] HANGUL JUNGSEONG FILLER (V) × [7: ( LV | V ) × ( V | T )] HANGUL JONGSEONG KIYEOK (T) ÷ [2: ÷ eot]
|
||||
÷ 1160 ÷ AC00 ÷ # ÷ [1: sot ÷] HANGUL JUNGSEONG FILLER (V) ÷ [10: Any ÷ Any] HANGUL SYLLABLE GA (LV) ÷ [2: ÷ eot]
|
||||
÷ 1160 ÷ AC01 ÷ # ÷ [1: sot ÷] HANGUL JUNGSEONG FILLER (V) ÷ [10: Any ÷ Any] HANGUL SYLLABLE GAG (LVT) ÷ [2: ÷ eot]
|
||||
÷ 1160 ÷ 0020 ÷ # ÷ [1: sot ÷] HANGUL JUNGSEONG FILLER (V) ÷ [10: Any ÷ Any] SPACE (Other) ÷ [2: ÷ eot]
|
||||
÷ 11A8 ÷ 000D ÷ # ÷ [1: sot ÷] HANGUL JONGSEONG KIYEOK (T) ÷ [5: ÷ ( Control | CR | LF )] <CARRIAGE RETURN (CR)> (CR) ÷ [2: ÷ eot]
|
||||
÷ 11A8 ÷ 000A ÷ # ÷ [1: sot ÷] HANGUL JONGSEONG KIYEOK (T) ÷ [5: ÷ ( Control | CR | LF )] <LINE FEED (LF)> (LF) ÷ [2: ÷ eot]
|
||||
÷ 11A8 ÷ 0001 ÷ # ÷ [1: sot ÷] HANGUL JONGSEONG KIYEOK (T) ÷ [5: ÷ ( Control | CR | LF )] <START OF HEADING> (Control) ÷ [2: ÷ eot]
|
||||
÷ 11A8 × 0300 ÷ # ÷ [1: sot ÷] HANGUL JONGSEONG KIYEOK (T) × [9: × Extend] COMBINING GRAVE ACCENT (Extend) ÷ [2: ÷ eot]
|
||||
÷ 11A8 ÷ 1100 ÷ # ÷ [1: sot ÷] HANGUL JONGSEONG KIYEOK (T) ÷ [10: Any ÷ Any] HANGUL CHOSEONG KIYEOK (L) ÷ [2: ÷ eot]
|
||||
÷ 11A8 ÷ 1160 ÷ # ÷ [1: sot ÷] HANGUL JONGSEONG KIYEOK (T) ÷ [10: Any ÷ Any] HANGUL JUNGSEONG FILLER (V) ÷ [2: ÷ eot]
|
||||
÷ 11A8 × 11A8 ÷ # ÷ [1: sot ÷] HANGUL JONGSEONG KIYEOK (T) × [8: ( LVT | T ) × T] HANGUL JONGSEONG KIYEOK (T) ÷ [2: ÷ eot]
|
||||
÷ 11A8 ÷ AC00 ÷ # ÷ [1: sot ÷] HANGUL JONGSEONG KIYEOK (T) ÷ [10: Any ÷ Any] HANGUL SYLLABLE GA (LV) ÷ [2: ÷ eot]
|
||||
÷ 11A8 ÷ AC01 ÷ # ÷ [1: sot ÷] HANGUL JONGSEONG KIYEOK (T) ÷ [10: Any ÷ Any] HANGUL SYLLABLE GAG (LVT) ÷ [2: ÷ eot]
|
||||
÷ 11A8 ÷ 0020 ÷ # ÷ [1: sot ÷] HANGUL JONGSEONG KIYEOK (T) ÷ [10: Any ÷ Any] SPACE (Other) ÷ [2: ÷ eot]
|
||||
÷ AC00 ÷ 000D ÷ # ÷ [1: sot ÷] HANGUL SYLLABLE GA (LV) ÷ [5: ÷ ( Control | CR | LF )] <CARRIAGE RETURN (CR)> (CR) ÷ [2: ÷ eot]
|
||||
÷ AC00 ÷ 000A ÷ # ÷ [1: sot ÷] HANGUL SYLLABLE GA (LV) ÷ [5: ÷ ( Control | CR | LF )] <LINE FEED (LF)> (LF) ÷ [2: ÷ eot]
|
||||
÷ AC00 ÷ 0001 ÷ # ÷ [1: sot ÷] HANGUL SYLLABLE GA (LV) ÷ [5: ÷ ( Control | CR | LF )] <START OF HEADING> (Control) ÷ [2: ÷ eot]
|
||||
÷ AC00 × 0300 ÷ # ÷ [1: sot ÷] HANGUL SYLLABLE GA (LV) × [9: × Extend] COMBINING GRAVE ACCENT (Extend) ÷ [2: ÷ eot]
|
||||
÷ AC00 ÷ 1100 ÷ # ÷ [1: sot ÷] HANGUL SYLLABLE GA (LV) ÷ [10: Any ÷ Any] HANGUL CHOSEONG KIYEOK (L) ÷ [2: ÷ eot]
|
||||
÷ AC00 × 1160 ÷ # ÷ [1: sot ÷] HANGUL SYLLABLE GA (LV) × [7: ( LV | V ) × ( V | T )] HANGUL JUNGSEONG FILLER (V) ÷ [2: ÷ eot]
|
||||
÷ AC00 × 11A8 ÷ # ÷ [1: sot ÷] HANGUL SYLLABLE GA (LV) × [7: ( LV | V ) × ( V | T )] HANGUL JONGSEONG KIYEOK (T) ÷ [2: ÷ eot]
|
||||
÷ AC00 ÷ AC00 ÷ # ÷ [1: sot ÷] HANGUL SYLLABLE GA (LV) ÷ [10: Any ÷ Any] HANGUL SYLLABLE GA (LV) ÷ [2: ÷ eot]
|
||||
÷ AC00 ÷ AC01 ÷ # ÷ [1: sot ÷] HANGUL SYLLABLE GA (LV) ÷ [10: Any ÷ Any] HANGUL SYLLABLE GAG (LVT) ÷ [2: ÷ eot]
|
||||
÷ AC00 ÷ 0020 ÷ # ÷ [1: sot ÷] HANGUL SYLLABLE GA (LV) ÷ [10: Any ÷ Any] SPACE (Other) ÷ [2: ÷ eot]
|
||||
÷ AC01 ÷ 000D ÷ # ÷ [1: sot ÷] HANGUL SYLLABLE GAG (LVT) ÷ [5: ÷ ( Control | CR | LF )] <CARRIAGE RETURN (CR)> (CR) ÷ [2: ÷ eot]
|
||||
÷ AC01 ÷ 000A ÷ # ÷ [1: sot ÷] HANGUL SYLLABLE GAG (LVT) ÷ [5: ÷ ( Control | CR | LF )] <LINE FEED (LF)> (LF) ÷ [2: ÷ eot]
|
||||
÷ AC01 ÷ 0001 ÷ # ÷ [1: sot ÷] HANGUL SYLLABLE GAG (LVT) ÷ [5: ÷ ( Control | CR | LF )] <START OF HEADING> (Control) ÷ [2: ÷ eot]
|
||||
÷ AC01 × 0300 ÷ # ÷ [1: sot ÷] HANGUL SYLLABLE GAG (LVT) × [9: × Extend] COMBINING GRAVE ACCENT (Extend) ÷ [2: ÷ eot]
|
||||
÷ AC01 ÷ 1100 ÷ # ÷ [1: sot ÷] HANGUL SYLLABLE GAG (LVT) ÷ [10: Any ÷ Any] HANGUL CHOSEONG KIYEOK (L) ÷ [2: ÷ eot]
|
||||
÷ AC01 ÷ 1160 ÷ # ÷ [1: sot ÷] HANGUL SYLLABLE GAG (LVT) ÷ [10: Any ÷ Any] HANGUL JUNGSEONG FILLER (V) ÷ [2: ÷ eot]
|
||||
÷ AC01 × 11A8 ÷ # ÷ [1: sot ÷] HANGUL SYLLABLE GAG (LVT) × [8: ( LVT | T ) × T] HANGUL JONGSEONG KIYEOK (T) ÷ [2: ÷ eot]
|
||||
÷ AC01 ÷ AC00 ÷ # ÷ [1: sot ÷] HANGUL SYLLABLE GAG (LVT) ÷ [10: Any ÷ Any] HANGUL SYLLABLE GA (LV) ÷ [2: ÷ eot]
|
||||
÷ AC01 ÷ AC01 ÷ # ÷ [1: sot ÷] HANGUL SYLLABLE GAG (LVT) ÷ [10: Any ÷ Any] HANGUL SYLLABLE GAG (LVT) ÷ [2: ÷ eot]
|
||||
÷ AC01 ÷ 0020 ÷ # ÷ [1: sot ÷] HANGUL SYLLABLE GAG (LVT) ÷ [10: Any ÷ Any] SPACE (Other) ÷ [2: ÷ eot]
|
||||
÷ 0020 ÷ 000D ÷ # ÷ [1: sot ÷] SPACE (Other) ÷ [5: ÷ ( Control | CR | LF )] <CARRIAGE RETURN (CR)> (CR) ÷ [2: ÷ eot]
|
||||
÷ 0020 ÷ 000A ÷ # ÷ [1: sot ÷] SPACE (Other) ÷ [5: ÷ ( Control | CR | LF )] <LINE FEED (LF)> (LF) ÷ [2: ÷ eot]
|
||||
÷ 0020 ÷ 0001 ÷ # ÷ [1: sot ÷] SPACE (Other) ÷ [5: ÷ ( Control | CR | LF )] <START OF HEADING> (Control) ÷ [2: ÷ eot]
|
||||
÷ 0020 × 0300 ÷ # ÷ [1: sot ÷] SPACE (Other) × [9: × Extend] COMBINING GRAVE ACCENT (Extend) ÷ [2: ÷ eot]
|
||||
÷ 0020 ÷ 1100 ÷ # ÷ [1: sot ÷] SPACE (Other) ÷ [10: Any ÷ Any] HANGUL CHOSEONG KIYEOK (L) ÷ [2: ÷ eot]
|
||||
÷ 0020 ÷ 1160 ÷ # ÷ [1: sot ÷] SPACE (Other) ÷ [10: Any ÷ Any] HANGUL JUNGSEONG FILLER (V) ÷ [2: ÷ eot]
|
||||
÷ 0020 ÷ 11A8 ÷ # ÷ [1: sot ÷] SPACE (Other) ÷ [10: Any ÷ Any] HANGUL JONGSEONG KIYEOK (T) ÷ [2: ÷ eot]
|
||||
÷ 0020 ÷ AC00 ÷ # ÷ [1: sot ÷] SPACE (Other) ÷ [10: Any ÷ Any] HANGUL SYLLABLE GA (LV) ÷ [2: ÷ eot]
|
||||
÷ 0020 ÷ AC01 ÷ # ÷ [1: sot ÷] SPACE (Other) ÷ [10: Any ÷ Any] HANGUL SYLLABLE GAG (LVT) ÷ [2: ÷ eot]
|
||||
÷ 0020 ÷ 0020 ÷ # ÷ [1: sot ÷] SPACE (Other) ÷ [10: Any ÷ Any] SPACE (Other) ÷ [2: ÷ eot]
|
||||
# Lines: 100
|
10612
icu4c/source/test/testdata/LineBreakTest.txt
vendored
10612
icu4c/source/test/testdata/LineBreakTest.txt
vendored
File diff suppressed because it is too large
Load diff
15
icu4c/source/test/testdata/SentenceBreakTest.txt
vendored
15
icu4c/source/test/testdata/SentenceBreakTest.txt
vendored
|
@ -1,9 +1,8 @@
|
|||
# Note: Please make sure that this utf-8 file contains a BOM.
|
||||
# SentenceBreakTest-5.2.0.txt
|
||||
# Date: 2009-05-28, 20:38:05 GMT [MD]
|
||||
# SentenceBreakTest-6.0.0.txt
|
||||
# Date: 2010-08-19, 01:19:53 GMT [MD]
|
||||
#
|
||||
# Unicode Character Database
|
||||
# Copyright (c) 1991-2009 Unicode, Inc.
|
||||
# Copyright (c) 1991-2010 Unicode, Inc.
|
||||
# For terms of use, see http://www.unicode.org/terms_of_use.html
|
||||
# For documentation, see http://www.unicode.org/reports/tr44/
|
||||
#
|
||||
|
@ -16,7 +15,7 @@
|
|||
# × wherever there is not.
|
||||
# <comment> the format can change, but currently it shows:
|
||||
# - the sample character name
|
||||
# - (x) the line_break property* for the sample character
|
||||
# - (x) the Sentence_Break property* for the sample character
|
||||
# - [x] the rule that determines whether there is a break or not
|
||||
#
|
||||
# These samples may be extended or changed in the future.
|
||||
|
@ -484,6 +483,8 @@
|
|||
÷ 0065 × 0074 × 0063 × 002E × 0029 × 2019 × 00A0 ÷ 2018 × 0028 × 0054 × 0068 × 0065 ÷ # ÷ [0.2] LATIN SMALL LETTER E (Lower) × [12.0] LATIN SMALL LETTER T (Lower) × [12.0] LATIN SMALL LETTER C (Lower) × [12.0] FULL STOP (ATerm) × [9.0] RIGHT PARENTHESIS (Close) × [9.0] RIGHT SINGLE QUOTATION MARK (Close) × [9.0] NO-BREAK SPACE (Sp) ÷ [11.0] LEFT SINGLE QUOTATION MARK (Close) × [12.0] LEFT PARENTHESIS (Close) × [12.0] LATIN CAPITAL LETTER T (Upper) × [12.0] LATIN SMALL LETTER H (Lower) × [12.0] LATIN SMALL LETTER E (Lower) ÷ [0.3]
|
||||
÷ 0065 × 0074 × 0063 × 002E × 0029 × 2019 × 00A0 × 0308 × 0074 × 0068 × 0065 ÷ # ÷ [0.2] LATIN SMALL LETTER E (Lower) × [12.0] LATIN SMALL LETTER T (Lower) × [12.0] LATIN SMALL LETTER C (Lower) × [12.0] FULL STOP (ATerm) × [8.0] RIGHT PARENTHESIS (Close) × [8.0] RIGHT SINGLE QUOTATION MARK (Close) × [8.0] NO-BREAK SPACE (Sp) × [5.0] COMBINING DIAERESIS (Extend_FE) × [8.0] LATIN SMALL LETTER T (Lower) × [12.0] LATIN SMALL LETTER H (Lower) × [12.0] LATIN SMALL LETTER E (Lower) ÷ [0.3]
|
||||
÷ 0065 × 0074 × 0063 × 002E × 0029 × 2019 × 00A0 × 0308 ÷ 0054 × 0068 × 0065 ÷ # ÷ [0.2] LATIN SMALL LETTER E (Lower) × [12.0] LATIN SMALL LETTER T (Lower) × [12.0] LATIN SMALL LETTER C (Lower) × [12.0] FULL STOP (ATerm) × [9.0] RIGHT PARENTHESIS (Close) × [9.0] RIGHT SINGLE QUOTATION MARK (Close) × [9.0] NO-BREAK SPACE (Sp) × [5.0] COMBINING DIAERESIS (Extend_FE) ÷ [11.0] LATIN CAPITAL LETTER T (Upper) × [12.0] LATIN SMALL LETTER H (Lower) × [12.0] LATIN SMALL LETTER E (Lower) ÷ [0.3]
|
||||
÷ 0065 × 0074 × 0063 × 002E × 0029 × 2019 × 0308 ÷ 0054 × 0068 × 0065 ÷ # ÷ [0.2] LATIN SMALL LETTER E (Lower) × [12.0] LATIN SMALL LETTER T (Lower) × [12.0] LATIN SMALL LETTER C (Lower) × [12.0] FULL STOP (ATerm) × [9.0] RIGHT PARENTHESIS (Close) × [9.0] RIGHT SINGLE QUOTATION MARK (Close) × [5.0] COMBINING DIAERESIS (Extend_FE) ÷ [11.0] LATIN CAPITAL LETTER T (Upper) × [12.0] LATIN SMALL LETTER H (Lower) × [12.0] LATIN SMALL LETTER E (Lower) ÷ [0.3]
|
||||
÷ 0065 × 0074 × 0063 × 002E × 0029 × 000A ÷ 0308 × 0054 × 0068 × 0065 ÷ # ÷ [0.2] LATIN SMALL LETTER E (Lower) × [12.0] LATIN SMALL LETTER T (Lower) × [12.0] LATIN SMALL LETTER C (Lower) × [12.0] FULL STOP (ATerm) × [9.0] RIGHT PARENTHESIS (Close) × [9.0] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [12.0] LATIN CAPITAL LETTER T (Upper) × [12.0] LATIN SMALL LETTER H (Lower) × [12.0] LATIN SMALL LETTER E (Lower) ÷ [0.3]
|
||||
÷ 0074 × 0068 × 0065 × 0020 × 0072 × 0065 × 0073 × 0070 × 002E × 0020 × 006C × 0065 × 0061 × 0064 × 0065 × 0072 × 0073 × 0020 × 0061 × 0072 × 0065 ÷ # ÷ [0.2] LATIN SMALL LETTER T (Lower) × [12.0] LATIN SMALL LETTER H (Lower) × [12.0] LATIN SMALL LETTER E (Lower) × [12.0] SPACE (Sp) × [12.0] LATIN SMALL LETTER R (Lower) × [12.0] LATIN SMALL LETTER E (Lower) × [12.0] LATIN SMALL LETTER S (Lower) × [12.0] LATIN SMALL LETTER P (Lower) × [12.0] FULL STOP (ATerm) × [8.0] SPACE (Sp) × [8.0] LATIN SMALL LETTER L (Lower) × [12.0] LATIN SMALL LETTER E (Lower) × [12.0] LATIN SMALL LETTER A (Lower) × [12.0] LATIN SMALL LETTER D (Lower) × [12.0] LATIN SMALL LETTER E (Lower) × [12.0] LATIN SMALL LETTER R (Lower) × [12.0] LATIN SMALL LETTER S (Lower) × [12.0] SPACE (Sp) × [12.0] LATIN SMALL LETTER A (Lower) × [12.0] LATIN SMALL LETTER R (Lower) × [12.0] LATIN SMALL LETTER E (Lower) ÷ [0.3]
|
||||
÷ 5B57 × 002E ÷ 5B57 ÷ # ÷ [0.2] CJK UNIFIED IDEOGRAPH-5B57 (OLetter) × [12.0] FULL STOP (ATerm) ÷ [11.0] CJK UNIFIED IDEOGRAPH-5B57 (OLetter) ÷ [0.3]
|
||||
÷ 0065 × 0074 × 0063 × 002E ÷ 5B83 ÷ # ÷ [0.2] LATIN SMALL LETTER E (Lower) × [12.0] LATIN SMALL LETTER T (Lower) × [12.0] LATIN SMALL LETTER C (Lower) × [12.0] FULL STOP (ATerm) ÷ [11.0] CJK UNIFIED IDEOGRAPH-5B83 (OLetter) ÷ [0.3]
|
||||
|
@ -502,9 +503,11 @@
|
|||
÷ 2060 × 0065 × 2060 × 0074 × 2060 × 0063 × 2060 × 002E × 2060 × 0029 × 2060 × 2019 × 2060 × 00A0 × 2060 ÷ 2018 × 2060 × 0028 × 2060 × 0054 × 2060 × 0068 × 2060 × 0065 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) × [12.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [12.0] LATIN SMALL LETTER T (Lower) × [5.0] WORD JOINER (Format_FE) × [12.0] LATIN SMALL LETTER C (Lower) × [5.0] WORD JOINER (Format_FE) × [12.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [9.0] RIGHT PARENTHESIS (Close) × [5.0] WORD JOINER (Format_FE) × [9.0] RIGHT SINGLE QUOTATION MARK (Close) × [5.0] WORD JOINER (Format_FE) × [9.0] NO-BREAK SPACE (Sp) × [5.0] WORD JOINER (Format_FE) ÷ [11.0] LEFT SINGLE QUOTATION MARK (Close) × [5.0] WORD JOINER (Format_FE) × [12.0] LEFT PARENTHESIS (Close) × [5.0] WORD JOINER (Format_FE) × [12.0] LATIN CAPITAL LETTER T (Upper) × [5.0] WORD JOINER (Format_FE) × [12.0] LATIN SMALL LETTER H (Lower) × [5.0] WORD JOINER (Format_FE) × [12.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [5.0] WORD JOINER (Format_FE) ÷ [0.3]
|
||||
÷ 2060 × 0065 × 2060 × 0074 × 2060 × 0063 × 2060 × 002E × 2060 × 0029 × 2060 × 2019 × 2060 × 00A0 × 2060 × 0308 × 0074 × 2060 × 0068 × 2060 × 0065 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) × [12.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [12.0] LATIN SMALL LETTER T (Lower) × [5.0] WORD JOINER (Format_FE) × [12.0] LATIN SMALL LETTER C (Lower) × [5.0] WORD JOINER (Format_FE) × [12.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [8.0] RIGHT PARENTHESIS (Close) × [5.0] WORD JOINER (Format_FE) × [8.0] RIGHT SINGLE QUOTATION MARK (Close) × [5.0] WORD JOINER (Format_FE) × [8.0] NO-BREAK SPACE (Sp) × [5.0] WORD JOINER (Format_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [8.0] LATIN SMALL LETTER T (Lower) × [5.0] WORD JOINER (Format_FE) × [12.0] LATIN SMALL LETTER H (Lower) × [5.0] WORD JOINER (Format_FE) × [12.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [5.0] WORD JOINER (Format_FE) ÷ [0.3]
|
||||
÷ 2060 × 0065 × 2060 × 0074 × 2060 × 0063 × 2060 × 002E × 2060 × 0029 × 2060 × 2019 × 2060 × 00A0 × 2060 × 0308 ÷ 0054 × 2060 × 0068 × 2060 × 0065 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) × [12.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [12.0] LATIN SMALL LETTER T (Lower) × [5.0] WORD JOINER (Format_FE) × [12.0] LATIN SMALL LETTER C (Lower) × [5.0] WORD JOINER (Format_FE) × [12.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [9.0] RIGHT PARENTHESIS (Close) × [5.0] WORD JOINER (Format_FE) × [9.0] RIGHT SINGLE QUOTATION MARK (Close) × [5.0] WORD JOINER (Format_FE) × [9.0] NO-BREAK SPACE (Sp) × [5.0] WORD JOINER (Format_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) ÷ [11.0] LATIN CAPITAL LETTER T (Upper) × [5.0] WORD JOINER (Format_FE) × [12.0] LATIN SMALL LETTER H (Lower) × [5.0] WORD JOINER (Format_FE) × [12.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [5.0] WORD JOINER (Format_FE) ÷ [0.3]
|
||||
÷ 2060 × 0065 × 2060 × 0074 × 2060 × 0063 × 2060 × 002E × 2060 × 0029 × 2060 × 2019 × 2060 × 0308 ÷ 0054 × 2060 × 0068 × 2060 × 0065 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) × [12.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [12.0] LATIN SMALL LETTER T (Lower) × [5.0] WORD JOINER (Format_FE) × [12.0] LATIN SMALL LETTER C (Lower) × [5.0] WORD JOINER (Format_FE) × [12.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [9.0] RIGHT PARENTHESIS (Close) × [5.0] WORD JOINER (Format_FE) × [9.0] RIGHT SINGLE QUOTATION MARK (Close) × [5.0] WORD JOINER (Format_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) ÷ [11.0] LATIN CAPITAL LETTER T (Upper) × [5.0] WORD JOINER (Format_FE) × [12.0] LATIN SMALL LETTER H (Lower) × [5.0] WORD JOINER (Format_FE) × [12.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [5.0] WORD JOINER (Format_FE) ÷ [0.3]
|
||||
÷ 2060 × 0065 × 2060 × 0074 × 2060 × 0063 × 2060 × 002E × 2060 × 0029 × 2060 × 000A ÷ 2060 × 0308 × 2060 × 0054 × 2060 × 0068 × 2060 × 0065 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) × [12.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [12.0] LATIN SMALL LETTER T (Lower) × [5.0] WORD JOINER (Format_FE) × [12.0] LATIN SMALL LETTER C (Lower) × [5.0] WORD JOINER (Format_FE) × [12.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [9.0] RIGHT PARENTHESIS (Close) × [5.0] WORD JOINER (Format_FE) × [9.0] <LINE FEED (LF)> (LF) ÷ [4.0] WORD JOINER (Format_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [5.0] WORD JOINER (Format_FE) × [12.0] LATIN CAPITAL LETTER T (Upper) × [5.0] WORD JOINER (Format_FE) × [12.0] LATIN SMALL LETTER H (Lower) × [5.0] WORD JOINER (Format_FE) × [12.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [5.0] WORD JOINER (Format_FE) ÷ [0.3]
|
||||
÷ 2060 × 0074 × 2060 × 0068 × 2060 × 0065 × 2060 × 0020 × 2060 × 0072 × 2060 × 0065 × 2060 × 0073 × 2060 × 0070 × 2060 × 002E × 2060 × 0020 × 2060 × 006C × 2060 × 0065 × 2060 × 0061 × 2060 × 0064 × 2060 × 0065 × 2060 × 0072 × 2060 × 0073 × 2060 × 0020 × 2060 × 0061 × 2060 × 0072 × 2060 × 0065 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) × [12.0] LATIN SMALL LETTER T (Lower) × [5.0] WORD JOINER (Format_FE) × [12.0] LATIN SMALL LETTER H (Lower) × [5.0] WORD JOINER (Format_FE) × [12.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [12.0] SPACE (Sp) × [5.0] WORD JOINER (Format_FE) × [12.0] LATIN SMALL LETTER R (Lower) × [5.0] WORD JOINER (Format_FE) × [12.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [12.0] LATIN SMALL LETTER S (Lower) × [5.0] WORD JOINER (Format_FE) × [12.0] LATIN SMALL LETTER P (Lower) × [5.0] WORD JOINER (Format_FE) × [12.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [8.0] SPACE (Sp) × [5.0] WORD JOINER (Format_FE) × [8.0] LATIN SMALL LETTER L (Lower) × [5.0] WORD JOINER (Format_FE) × [12.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [12.0] LATIN SMALL LETTER A (Lower) × [5.0] WORD JOINER (Format_FE) × [12.0] LATIN SMALL LETTER D (Lower) × [5.0] WORD JOINER (Format_FE) × [12.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [12.0] LATIN SMALL LETTER R (Lower) × [5.0] WORD JOINER (Format_FE) × [12.0] LATIN SMALL LETTER S (Lower) × [5.0] WORD JOINER (Format_FE) × [12.0] SPACE (Sp) × [5.0] WORD JOINER (Format_FE) × [12.0] LATIN SMALL LETTER A (Lower) × [5.0] WORD JOINER (Format_FE) × [12.0] LATIN SMALL LETTER R (Lower) × [5.0] WORD JOINER (Format_FE) × [12.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [5.0] WORD JOINER (Format_FE) ÷ [0.3]
|
||||
÷ 2060 × 5B57 × 2060 × 002E × 2060 ÷ 5B57 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) × [12.0] CJK UNIFIED IDEOGRAPH-5B57 (OLetter) × [5.0] WORD JOINER (Format_FE) × [12.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) ÷ [11.0] CJK UNIFIED IDEOGRAPH-5B57 (OLetter) × [5.0] WORD JOINER (Format_FE) × [5.0] WORD JOINER (Format_FE) ÷ [0.3]
|
||||
÷ 2060 × 0065 × 2060 × 0074 × 2060 × 0063 × 2060 × 002E × 2060 ÷ 5B83 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) × [12.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [12.0] LATIN SMALL LETTER T (Lower) × [5.0] WORD JOINER (Format_FE) × [12.0] LATIN SMALL LETTER C (Lower) × [5.0] WORD JOINER (Format_FE) × [12.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) ÷ [11.0] CJK UNIFIED IDEOGRAPH-5B83 (OLetter) × [5.0] WORD JOINER (Format_FE) × [5.0] WORD JOINER (Format_FE) ÷ [0.3]
|
||||
÷ 2060 × 0065 × 2060 × 0074 × 2060 × 0063 × 2060 × 002E × 2060 × 3002 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) × [12.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [12.0] LATIN SMALL LETTER T (Lower) × [5.0] WORD JOINER (Format_FE) × [12.0] LATIN SMALL LETTER C (Lower) × [5.0] WORD JOINER (Format_FE) × [12.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [8.1] IDEOGRAPHIC FULL STOP (STerm) × [5.0] WORD JOINER (Format_FE) × [5.0] WORD JOINER (Format_FE) ÷ [0.3]
|
||||
÷ 2060 × 5B57 × 2060 × 3002 × 2060 ÷ 5B83 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) × [12.0] CJK UNIFIED IDEOGRAPH-5B57 (OLetter) × [5.0] WORD JOINER (Format_FE) × [12.0] IDEOGRAPHIC FULL STOP (STerm) × [5.0] WORD JOINER (Format_FE) ÷ [11.0] CJK UNIFIED IDEOGRAPH-5B83 (OLetter) × [5.0] WORD JOINER (Format_FE) × [5.0] WORD JOINER (Format_FE) ÷ [0.3]
|
||||
# Lines: 450
|
||||
# Lines: 490
|
||||
|
|
11
icu4c/source/test/testdata/WordBreakTest.txt
vendored
11
icu4c/source/test/testdata/WordBreakTest.txt
vendored
|
@ -1,9 +1,8 @@
|
|||
# Note: Please make sure that this utf-8 file contains a BOM.
|
||||
# WordBreakTest-5.2.0.txt
|
||||
# Date: 2009-05-28, 20:38:06 GMT [MD]
|
||||
# WordBreakTest-6.0.0.txt
|
||||
# Date: 2010-08-19, 01:19:54 GMT [MD]
|
||||
#
|
||||
# Unicode Character Database
|
||||
# Copyright (c) 1991-2009 Unicode, Inc.
|
||||
# Copyright (c) 1991-2010 Unicode, Inc.
|
||||
# For terms of use, see http://www.unicode.org/terms_of_use.html
|
||||
# For documentation, see http://www.unicode.org/reports/tr44/
|
||||
#
|
||||
|
@ -16,7 +15,7 @@
|
|||
# × wherever there is not.
|
||||
# <comment> the format can change, but currently it shows:
|
||||
# - the sample character name
|
||||
# - (x) the line_break property* for the sample character
|
||||
# - (x) the Word_Break property* for the sample character
|
||||
# - [x] the rule that determines whether there is a break or not
|
||||
#
|
||||
# These samples may be extended or changed in the future.
|
||||
|
@ -999,4 +998,4 @@
|
|||
÷ 2060 ÷ 0061 × 2060 × 0062 × 2060 × 00AD × 2060 × 0062 × 2060 × 0079 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [5.0] LATIN SMALL LETTER B (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] SOFT HYPHEN (Format_FE) × [4.0] WORD JOINER (Format_FE) × [5.0] LATIN SMALL LETTER B (ALetter) × [4.0] WORD JOINER (Format_FE) × [5.0] LATIN SMALL LETTER Y (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
|
||||
÷ 2060 ÷ 0061 × 2060 ÷ 0024 × 2060 ÷ 002D × 2060 ÷ 0033 × 2060 × 0034 × 2060 × 002C × 2060 × 0035 × 2060 × 0036 × 2060 × 0037 × 2060 × 002E × 2060 × 0031 × 2060 × 0034 × 2060 ÷ 0025 × 2060 ÷ 0062 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] DOLLAR SIGN (Other) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] HYPHEN-MINUS (Other) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] DIGIT THREE (Numeric) × [4.0] WORD JOINER (Format_FE) × [8.0] DIGIT FOUR (Numeric) × [4.0] WORD JOINER (Format_FE) × [12.0] COMMA (MidNum) × [4.0] WORD JOINER (Format_FE) × [11.0] DIGIT FIVE (Numeric) × [4.0] WORD JOINER (Format_FE) × [8.0] DIGIT SIX (Numeric) × [4.0] WORD JOINER (Format_FE) × [8.0] DIGIT SEVEN (Numeric) × [4.0] WORD JOINER (Format_FE) × [12.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [11.0] DIGIT ONE (Numeric) × [4.0] WORD JOINER (Format_FE) × [8.0] DIGIT FOUR (Numeric) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] PERCENT SIGN (Other) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] LATIN SMALL LETTER B (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
|
||||
÷ 2060 ÷ 0033 × 2060 × 0061 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) ÷ [999.0] DIGIT THREE (Numeric) × [4.0] WORD JOINER (Format_FE) × [10.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
|
||||
# Lines: 968
|
||||
# Lines: 978
|
||||
|
|
|
@ -92,9 +92,10 @@ uprops_swap(const UDataSwapper *ds,
|
|||
pInfo->dataFormat[1]==0x50 &&
|
||||
pInfo->dataFormat[2]==0x72 &&
|
||||
pInfo->dataFormat[3]==0x6f &&
|
||||
(3<=pInfo->formatVersion[0] && pInfo->formatVersion[0]<=6) &&
|
||||
pInfo->formatVersion[2]==UTRIE_SHIFT &&
|
||||
pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT
|
||||
(3<=pInfo->formatVersion[0] && pInfo->formatVersion[0]<=7) &&
|
||||
(pInfo->formatVersion[0]>=7 ||
|
||||
(pInfo->formatVersion[2]==UTRIE_SHIFT &&
|
||||
pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT))
|
||||
)) {
|
||||
udata_printError(ds, "uprops_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not a Unicode properties file\n",
|
||||
pInfo->dataFormat[0], pInfo->dataFormat[1],
|
||||
|
@ -122,10 +123,18 @@ uprops_swap(const UDataSwapper *ds,
|
|||
* comments are copied from the data format description in genprops/store.c
|
||||
* indexes[] constants are in uprops.h
|
||||
*/
|
||||
int32_t dataTop;
|
||||
if(length>=0) {
|
||||
int32_t *outData32;
|
||||
|
||||
if((length-headerSize)<(4*dataIndexes[UPROPS_RESERVED_INDEX])) {
|
||||
/*
|
||||
* In formatVersion 7, UPROPS_DATA_TOP_INDEX has the post-header data size.
|
||||
* In earlier formatVersions, it is 0 and a lower dataIndexes entry
|
||||
* has the top of the last item.
|
||||
*/
|
||||
for(i=UPROPS_DATA_TOP_INDEX; i>0 && (dataTop=dataIndexes[i])==0; --i) {}
|
||||
|
||||
if((length-headerSize)<(4*dataTop)) {
|
||||
udata_printError(ds, "uprops_swap(): too few bytes (%d after header) for a Unicode properties file\n",
|
||||
length-headerSize);
|
||||
*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
|
||||
|
@ -136,7 +145,7 @@ uprops_swap(const UDataSwapper *ds,
|
|||
|
||||
/* copy everything for inaccessible data (padding) */
|
||||
if(inData32!=outData32) {
|
||||
uprv_memcpy(outData32, inData32, 4*dataIndexes[UPROPS_RESERVED_INDEX]);
|
||||
uprv_memcpy(outData32, inData32, 4*dataTop);
|
||||
}
|
||||
|
||||
/* swap the indexes[16] */
|
||||
|
@ -146,7 +155,7 @@ uprops_swap(const UDataSwapper *ds,
|
|||
* swap the main properties UTrie
|
||||
* PT serialized properties trie, see utrie.h (byte size: 4*(i0-16))
|
||||
*/
|
||||
utrie_swap(ds,
|
||||
utrie2_swapAnyVersion(ds,
|
||||
inData32+UPROPS_INDEX_COUNT,
|
||||
4*(dataIndexes[UPROPS_PROPS32_INDEX]-UPROPS_INDEX_COUNT),
|
||||
outData32+UPROPS_INDEX_COUNT,
|
||||
|
@ -177,7 +186,7 @@ uprops_swap(const UDataSwapper *ds,
|
|||
* swap the additional UTrie
|
||||
* i3 additionalTrieIndex; -- 32-bit unit index to the additional trie for more properties
|
||||
*/
|
||||
utrie_swap(ds,
|
||||
utrie2_swapAnyVersion(ds,
|
||||
inData32+dataIndexes[UPROPS_ADDITIONAL_TRIE_INDEX],
|
||||
4*(dataIndexes[UPROPS_ADDITIONAL_VECTORS_INDEX]-dataIndexes[UPROPS_ADDITIONAL_TRIE_INDEX]),
|
||||
outData32+dataIndexes[UPROPS_ADDITIONAL_TRIE_INDEX],
|
||||
|
@ -189,13 +198,21 @@ uprops_swap(const UDataSwapper *ds,
|
|||
*/
|
||||
ds->swapArray32(ds,
|
||||
inData32+dataIndexes[UPROPS_ADDITIONAL_VECTORS_INDEX],
|
||||
4*(dataIndexes[UPROPS_RESERVED_INDEX]-dataIndexes[UPROPS_ADDITIONAL_VECTORS_INDEX]),
|
||||
4*(dataIndexes[UPROPS_SCRIPT_EXTENSIONS_INDEX]-dataIndexes[UPROPS_ADDITIONAL_VECTORS_INDEX]),
|
||||
outData32+dataIndexes[UPROPS_ADDITIONAL_VECTORS_INDEX],
|
||||
pErrorCode);
|
||||
|
||||
// swap the Script_Extensions data
|
||||
// SCX const uint16_t scriptExtensions[2*(i7-i6)];
|
||||
ds->swapArray16(ds,
|
||||
inData32+dataIndexes[UPROPS_SCRIPT_EXTENSIONS_INDEX],
|
||||
4*(dataIndexes[UPROPS_RESERVED_INDEX_7]-dataIndexes[UPROPS_SCRIPT_EXTENSIONS_INDEX]),
|
||||
outData32+dataIndexes[UPROPS_SCRIPT_EXTENSIONS_INDEX],
|
||||
pErrorCode);
|
||||
}
|
||||
|
||||
/* i6 reservedItemIndex; -- 32-bit unit index to the top of the properties vectors table */
|
||||
return headerSize+4*dataIndexes[UPROPS_RESERVED_INDEX];
|
||||
/* i7 reservedIndex7; -- 32-bit unit index to the top of the Script_Extensions data */
|
||||
return headerSize+4*dataIndexes[UPROPS_RESERVED_INDEX_7];
|
||||
}
|
||||
|
||||
/* Unicode case mapping data swapping --------------------------------------- */
|
||||
|
@ -228,9 +245,10 @@ ucase_swap(const UDataSwapper *ds,
|
|||
pInfo->dataFormat[1]==UCASE_FMT_1 &&
|
||||
pInfo->dataFormat[2]==UCASE_FMT_2 &&
|
||||
pInfo->dataFormat[3]==UCASE_FMT_3 &&
|
||||
pInfo->formatVersion[0]==1 &&
|
||||
pInfo->formatVersion[2]==UTRIE_SHIFT &&
|
||||
pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT
|
||||
((pInfo->formatVersion[0]==1 &&
|
||||
pInfo->formatVersion[2]==UTRIE_SHIFT &&
|
||||
pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT) ||
|
||||
pInfo->formatVersion[0]==2)
|
||||
)) {
|
||||
udata_printError(ds, "ucase_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as case mapping data\n",
|
||||
pInfo->dataFormat[0], pInfo->dataFormat[1],
|
||||
|
@ -285,7 +303,7 @@ ucase_swap(const UDataSwapper *ds,
|
|||
|
||||
/* swap the UTrie */
|
||||
count=indexes[UCASE_IX_TRIE_SIZE];
|
||||
utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
|
||||
utrie2_swapAnyVersion(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
|
||||
offset+=count;
|
||||
|
||||
/* swap the uint16_t exceptions[] and unfold[] */
|
||||
|
@ -329,9 +347,10 @@ ubidi_swap(const UDataSwapper *ds,
|
|||
pInfo->dataFormat[1]==UBIDI_FMT_1 &&
|
||||
pInfo->dataFormat[2]==UBIDI_FMT_2 &&
|
||||
pInfo->dataFormat[3]==UBIDI_FMT_3 &&
|
||||
pInfo->formatVersion[0]==1 &&
|
||||
pInfo->formatVersion[2]==UTRIE_SHIFT &&
|
||||
pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT
|
||||
((pInfo->formatVersion[0]==1 &&
|
||||
pInfo->formatVersion[2]==UTRIE_SHIFT &&
|
||||
pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT) ||
|
||||
pInfo->formatVersion[0]==2)
|
||||
)) {
|
||||
udata_printError(ds, "ubidi_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as bidi/shaping data\n",
|
||||
pInfo->dataFormat[0], pInfo->dataFormat[1],
|
||||
|
@ -386,7 +405,7 @@ ubidi_swap(const UDataSwapper *ds,
|
|||
|
||||
/* swap the UTrie */
|
||||
count=indexes[UBIDI_IX_TRIE_SIZE];
|
||||
utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
|
||||
utrie2_swapAnyVersion(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
|
||||
offset+=count;
|
||||
|
||||
/* swap the uint32_t mirrors[] */
|
||||
|
|
Loading…
Add table
Reference in a new issue