diff --git a/icu4c/source/common/Makefile.in b/icu4c/source/common/Makefile.in index 391ef28a282..48401ae8eb1 100644 --- a/icu4c/source/common/Makefile.in +++ b/icu4c/source/common/Makefile.in @@ -81,7 +81,7 @@ LIBS = $(LIBICUDT) $(DEFAULT_LIBS) OBJECTS = errorcode.o putil.o umath.o utypes.o uinvchar.o umutex.o ucln_cmn.o \ uinit.o uobject.o cmemory.o charstr.o cstr.o \ -udata.o ucmndata.o udatamem.o umapfile.o udataswp.o ucol_swp.o utrace.o \ +udata.o ucmndata.o udatamem.o umapfile.o udataswp.o utrie_swap.o ucol_swp.o utrace.o \ uhash.o uhash_us.o uenum.o ustrenum.o uvector.o ustack.o uvectr32.o uvectr64.o \ ucnv.o ucnv_bld.o ucnv_cnv.o ucnv_io.o ucnv_cb.o ucnv_err.o ucnvlat1.o \ ucnv_u7.o ucnv_u8.o ucnv_u16.o ucnv_u32.o ucnvscsu.o ucnvbocu.o \ @@ -102,7 +102,8 @@ normalizer2impl.o normalizer2.o filterednormalizer2.o normlzr.o unorm.o unormcmp chariter.o schriter.o uchriter.o uiter.o \ patternprops.o uchar.o uprops.o ucase.o propname.o ubidi_props.o ubidi.o ubidiwrt.o ubidiln.o ushape.o \ uscript.o uscript_props.o usc_impl.o unames.o \ -utrie.o utrie2.o utrie2_builder.o bmpset.o unisetspan.o uset_props.o uniset_props.o uniset_closure.o uset.o uniset.o usetiter.o ruleiter.o caniter.o unifilt.o unifunct.o \ +utrie.o utrie2.o utrie2_builder.o ucptrie.o umutablecptrie.o \ +bmpset.o unisetspan.o uset_props.o uniset_props.o uniset_closure.o uset.o uniset.o usetiter.o ruleiter.o caniter.o unifilt.o unifunct.o \ uarrsort.o brkiter.o ubrk.o brkeng.o dictbe.o filteredbrk.o \ rbbi.o rbbidata.o rbbinode.o rbbirb.o rbbiscan.o rbbisetb.o rbbistbl.o rbbitblb.o rbbi_cache.o \ serv.o servnotf.o servls.o servlk.o servlkf.o servrbf.o servslkf.o \ diff --git a/icu4c/source/common/common.vcxproj b/icu4c/source/common/common.vcxproj index 99c4533a573..134e2f2ba10 100644 --- a/icu4c/source/common/common.vcxproj +++ b/icu4c/source/common/common.vcxproj @@ -181,6 +181,7 @@ + @@ -314,8 +315,10 @@ + + diff --git a/icu4c/source/common/common.vcxproj.filters b/icu4c/source/common/common.vcxproj.filters index 20863f138ce..a7efe49adc9 100644 --- a/icu4c/source/common/common.vcxproj.filters +++ b/icu4c/source/common/common.vcxproj.filters @@ -139,6 +139,9 @@ collections + + collections + collections @@ -589,6 +592,12 @@ collections + + collections + + + collections + properties & sets @@ -1204,6 +1213,12 @@ collections + + collections + + + collections + data & memory @@ -1217,4 +1232,4 @@ strings - \ No newline at end of file + diff --git a/icu4c/source/common/common_uwp.vcxproj b/icu4c/source/common/common_uwp.vcxproj index ee11e5d9f61..a651d505b2f 100644 --- a/icu4c/source/common/common_uwp.vcxproj +++ b/icu4c/source/common/common_uwp.vcxproj @@ -304,6 +304,7 @@ + @@ -439,9 +440,11 @@ + + diff --git a/icu4c/source/common/loadednormalizer2impl.cpp b/icu4c/source/common/loadednormalizer2impl.cpp index 041d11853c4..82cb325b723 100644 --- a/icu4c/source/common/loadednormalizer2impl.cpp +++ b/icu4c/source/common/loadednormalizer2impl.cpp @@ -18,6 +18,7 @@ #include "unicode/udata.h" #include "unicode/localpointer.h" #include "unicode/normalizer2.h" +#include "unicode/ucptrie.h" #include "unicode/unistr.h" #include "unicode/unorm.h" #include "cstring.h" @@ -42,12 +43,12 @@ private: isAcceptable(void *context, const char *type, const char *name, const UDataInfo *pInfo); UDataMemory *memory; - UTrie2 *ownedTrie; + UCPTrie *ownedTrie; }; LoadedNormalizer2Impl::~LoadedNormalizer2Impl() { udata_close(memory); - utrie2_close(ownedTrie); + ucptrie_close(ownedTrie); } UBool U_CALLCONV @@ -62,7 +63,7 @@ LoadedNormalizer2Impl::isAcceptable(void * /*context*/, pInfo->dataFormat[1]==0x72 && pInfo->dataFormat[2]==0x6d && pInfo->dataFormat[3]==0x32 && - pInfo->formatVersion[0]==3 + pInfo->formatVersion[0]==4 ) { // Normalizer2Impl *me=(Normalizer2Impl *)context; // uprv_memcpy(me->dataVersion, pInfo->dataVersion, 4); @@ -91,9 +92,9 @@ LoadedNormalizer2Impl::load(const char *packageName, const char *name, UErrorCod int32_t offset=inIndexes[IX_NORM_TRIE_OFFSET]; int32_t nextOffset=inIndexes[IX_EXTRA_DATA_OFFSET]; - ownedTrie=utrie2_openFromSerialized(UTRIE2_16_VALUE_BITS, - inBytes+offset, nextOffset-offset, NULL, - &errorCode); + ownedTrie=ucptrie_openFromBinary(UCPTRIE_TYPE_FAST, UCPTRIE_VALUE_BITS_16, + inBytes+offset, nextOffset-offset, NULL, + &errorCode); if(U_FAILURE(errorCode)) { return; } @@ -131,15 +132,26 @@ U_CDECL_BEGIN static UBool U_CALLCONV uprv_loaded_normalizer2_cleanup(); U_CDECL_END -static Norm2AllModes *nfkcSingleton; -static Norm2AllModes *nfkc_cfSingleton; -static UHashtable *cache=NULL; +#if !NORM2_HARDCODE_NFC_DATA +static Norm2AllModes *nfcSingleton; +static icu::UInitOnce nfcInitOnce = U_INITONCE_INITIALIZER; +#endif +static Norm2AllModes *nfkcSingleton; static icu::UInitOnce nfkcInitOnce = U_INITONCE_INITIALIZER; + +static Norm2AllModes *nfkc_cfSingleton; static icu::UInitOnce nfkc_cfInitOnce = U_INITONCE_INITIALIZER; +static UHashtable *cache=NULL; + // UInitOnce singleton initialization function static void U_CALLCONV initSingletons(const char *what, UErrorCode &errorCode) { +#if !NORM2_HARDCODE_NFC_DATA + if (uprv_strcmp(what, "nfc") == 0) { + nfcSingleton = Norm2AllModes::createInstance(NULL, "nfc", errorCode); + } else +#endif if (uprv_strcmp(what, "nfkc") == 0) { nfkcSingleton = Norm2AllModes::createInstance(NULL, "nfkc", errorCode); } else if (uprv_strcmp(what, "nfkc_cf") == 0) { @@ -157,19 +169,36 @@ static void U_CALLCONV deleteNorm2AllModes(void *allModes) { } static UBool U_CALLCONV uprv_loaded_normalizer2_cleanup() { +#if !NORM2_HARDCODE_NFC_DATA + delete nfcSingleton; + nfcSingleton = NULL; + nfcInitOnce.reset(); +#endif + delete nfkcSingleton; nfkcSingleton = NULL; + nfkcInitOnce.reset(); + delete nfkc_cfSingleton; nfkc_cfSingleton = NULL; + nfkc_cfInitOnce.reset(); + uhash_close(cache); cache=NULL; - nfkcInitOnce.reset(); - nfkc_cfInitOnce.reset(); return TRUE; } U_CDECL_END +#if !NORM2_HARDCODE_NFC_DATA +const Norm2AllModes * +Norm2AllModes::getNFCInstance(UErrorCode &errorCode) { + if(U_FAILURE(errorCode)) { return NULL; } + umtx_initOnce(nfcInitOnce, &initSingletons, "nfc", errorCode); + return nfcSingleton; +} +#endif + const Norm2AllModes * Norm2AllModes::getNFKCInstance(UErrorCode &errorCode) { if(U_FAILURE(errorCode)) { return NULL; } @@ -184,6 +213,36 @@ Norm2AllModes::getNFKC_CFInstance(UErrorCode &errorCode) { return nfkc_cfSingleton; } +#if !NORM2_HARDCODE_NFC_DATA +const Normalizer2 * +Normalizer2::getNFCInstance(UErrorCode &errorCode) { + const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode); + return allModes!=NULL ? &allModes->comp : NULL; +} + +const Normalizer2 * +Normalizer2::getNFDInstance(UErrorCode &errorCode) { + const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode); + return allModes!=NULL ? &allModes->decomp : NULL; +} + +const Normalizer2 *Normalizer2Factory::getFCDInstance(UErrorCode &errorCode) { + const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode); + return allModes!=NULL ? &allModes->fcd : NULL; +} + +const Normalizer2 *Normalizer2Factory::getFCCInstance(UErrorCode &errorCode) { + const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode); + return allModes!=NULL ? &allModes->fcc : NULL; +} + +const Normalizer2Impl * +Normalizer2Factory::getNFCImpl(UErrorCode &errorCode) { + const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode); + return allModes!=NULL ? allModes->impl : NULL; +} +#endif + const Normalizer2 * Normalizer2::getNFKCInstance(UErrorCode &errorCode) { const Norm2AllModes *allModes=Norm2AllModes::getNFKCInstance(errorCode); diff --git a/icu4c/source/common/norm2_nfc_data.h b/icu4c/source/common/norm2_nfc_data.h index c8bf440ae10..82a68097385 100644 --- a/icu4c/source/common/norm2_nfc_data.h +++ b/icu4c/source/common/norm2_nfc_data.h @@ -11,258 +11,197 @@ #ifdef INCLUDED_FROM_NORMALIZER2_CPP -static const UVersionInfo norm2_nfc_data_formatVersion={3,0,0,0}; +static const UVersionInfo norm2_nfc_data_formatVersion={4,0,0,0}; static const UVersionInfo norm2_nfc_data_dataVersion={0xb,0,0,0}; static const int32_t norm2_nfc_data_indexes[Normalizer2Impl::IX_COUNT]={ -0x50,0x4e50,0x8aa8,0x8ba8,0x8ba8,0x8ba8,0x8ba8,0x8ba8,0xc0,0x300,0xadc,0x29d0,0x3c56,0xfc00,0x1282,0x3b8c, +0x50,0x4ab0,0x8708,0x8808,0x8808,0x8808,0x8808,0x8808,0xc0,0x300,0xadc,0x29d0,0x3c56,0xfc00,0x1282,0x3b8c, 0x3c24,0x3c56,0x300,0 }; -static const uint16_t norm2_nfc_data_trieIndex[9976]={ -0x2aa,0x2b2,0x2ba,0x2c2,0x2d0,0x2d8,0x2e0,0x2e8,0x2f0,0x2f8,0x300,0x308,0x310,0x318,0x31e,0x326, -0x32e,0x336,0x2c9,0x2d1,0x33b,0x343,0x2c9,0x2d1,0x34b,0x353,0x35b,0x363,0x36b,0x373,0x37b,0x383, -0x38b,0x393,0x39b,0x3a3,0x3ab,0x3b3,0x3bb,0x3c3,0x2c9,0x2d1,0x2c9,0x2d1,0x3ca,0x3d2,0x3da,0x3e2, -0x3e6,0x3ee,0x3f4,0x3fc,0x2c9,0x2d1,0x404,0x40c,0x410,0x418,0x420,0x428,0x2c9,0x2d1,0x426,0x42e, -0x436,0x43d,0x441,0x2c9,0x2c9,0x2c9,0x448,0x450,0x2c9,0x458,0x460,0x2c9,0x2c9,0x468,0x470,0x478, -0x2c9,0x480,0x488,0x2c9,0x2c9,0x490,0x498,0x2c9,0x2c9,0x468,0x49f,0x2c9,0x4a7,0x4ad,0x4b5,0x2c9, -0x2c9,0x2c9,0x4bc,0x2c9,0x2c9,0x4c2,0x4ca,0x2c9,0x2c9,0x4d0,0x4d8,0x2c9,0x2c9,0x2c9,0x4de,0x2c9, -0x2c9,0x4e6,0x4ed,0x2c9,0x2c9,0x4f0,0x4f7,0x2c9,0x4fa,0x501,0x509,0x511,0x519,0x521,0x528,0x2c9, -0x2c9,0x52f,0x2c9,0x2c9,0x536,0x2c9,0x2c9,0x2c9,0x96d,0x2c9,0x2c9,0x975,0x2c9,0x97b,0x983,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x53a,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x542,0x542,0x2c9,0x2c9,0x2c9,0x2c9,0x548,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x550,0x2c9,0x2c9,0x2c9,0x553,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x55a,0x2c9,0x2c9,0x562,0x2c9,0x56a,0x2c9,0x2c9,0x572,0x577,0x57f,0x585,0x2c9,0x58b,0x2c9,0x592, -0x2c9,0x597,0x2c9,0x2c9,0x2c9,0x2c9,0x59d,0x5a5,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x5ad,0x5b2, -0x5ba,0x5c2,0x5ca,0x5d2,0x5da,0x5e2,0x5ea,0x5f2,0x5fa,0x602,0x60a,0x612,0x61a,0x622,0x62a,0x632, -0x63a,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x63e,0x646,0x2c9,0x64d,0x2c9,0x2c9,0x651,0x658,0x65d,0x2c9, -0x665,0x66d,0x675,0x67d,0x685,0x68d,0x2c9,0x695,0x2c9,0x69b,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x69e,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x6a6,0x2c9,0x2c9,0x2c9,0x6ab,0x2c9,0x2c9,0x2c9,0x6b3, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x6bb,0x6c2,0x6ca,0x6d2,0x6da,0x6e2,0x6ea,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x6f2,0x6fa,0x2c9,0x2c9,0x702,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x709,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x710,0x718,0x2c9,0x71e,0x722,0x2c9,0x2c9,0x598,0x72a,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x72e,0x736,0x739,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x498, -0x98b,0x98c,0x98d,0x98e,0x98f,0x990,0x99f,0x98b,0x98c,0x98d,0x98e,0x98f,0x990,0x99f,0x98b,0x98c, -0x98d,0x98e,0x98f,0x990,0x99f,0x98b,0x98c,0x98d,0x98e,0x98f,0x990,0x99f,0x98b,0x98c,0x98d,0x98e, -0x98f,0x990,0x99f,0x98b,0x98c,0x98d,0x98e,0x98f,0x990,0x99f,0x98b,0x98c,0x98d,0x98e,0x98f,0x990, -0x99f,0x98b,0x98c,0x98d,0x98e,0x98f,0x990,0x99f,0x98b,0x98c,0x98d,0x98e,0x98f,0x990,0x99f,0x98b, -0x98c,0x98d,0x98e,0x98f,0x990,0x99f,0x98b,0x98c,0x98d,0x98e,0x98f,0x990,0x99f,0x98b,0x98c,0x98d, -0x98e,0x98f,0x990,0x99f,0x98b,0x98c,0x98d,0x98e,0x98f,0x990,0x99f,0x98b,0x98c,0x98d,0x98e,0x98f, -0x990,0x99f,0x98b,0x98c,0x98d,0x98e,0x98f,0x990,0x99f,0x98b,0x98c,0x98d,0x98e,0x98f,0x990,0x99f, -0x98b,0x98c,0x98d,0x98e,0x98f,0x990,0x99f,0x98b,0x98c,0x98d,0x98e,0x98f,0x990,0x99f,0x98b,0x98c, -0x98d,0x98e,0x98f,0x990,0x99f,0x98b,0x98c,0x98d,0x98e,0x98f,0x990,0x99f,0x98b,0x98c,0x98d,0x98e, -0x98f,0x990,0x99f,0x98b,0x98c,0x98d,0x98e,0x98f,0x990,0x99f,0x98b,0x98c,0x98d,0x98e,0x98f,0x990, -0x99f,0x98b,0x98c,0x98d,0x98e,0x98f,0x990,0x99f,0x98b,0x98c,0x98d,0x98e,0x98f,0x990,0x99f,0x98b, -0x98c,0x98d,0x98e,0x98f,0x990,0x99f,0x98b,0x98c,0x98d,0x98e,0x98f,0x990,0x99f,0x98b,0x98c,0x98d, -0x98e,0x98f,0x990,0x99f,0x98b,0x98c,0x98d,0x98e,0x98f,0x990,0x99f,0x98b,0x98c,0x98d,0x98e,0x98f, -0x990,0x99f,0x98b,0x98c,0x98d,0x98e,0x98f,0x990,0x99f,0x98b,0x98c,0x98d,0x98e,0x98f,0x990,0x99f, -0x98b,0x98c,0x98d,0x98e,0x98f,0x990,0x99f,0x98b,0x98c,0x98d,0x98e,0x98f,0x990,0x99f,0x98b,0x98c, -0x98d,0x98e,0x98f,0x990,0x99f,0x98b,0x98c,0x98d,0x98e,0x98f,0x990,0x99f,0x98b,0x98c,0x98d,0x98e, -0x98f,0x990,0x99f,0x98b,0x98c,0x98d,0x98e,0x98f,0x990,0x99f,0x98b,0x98c,0x98d,0x98e,0x98f,0x990, -0x99f,0x98b,0x98c,0x98d,0x98e,0x98f,0x990,0x99f,0x98b,0x98c,0x98d,0x98e,0x98f,0x990,0x99f,0x98b, -0x98c,0x98d,0x98e,0x98f,0x990,0x99f,0x98b,0x98c,0x98d,0x98e,0x98f,0x990,0x99f,0x98b,0x98c,0x98d, -0x98e,0x98f,0x990,0x99f,0x98b,0x98c,0x98d,0x98e,0x98f,0x990,0x99f,0x98b,0x98c,0x98d,0x98e,0x98f, -0x990,0x99f,0x98b,0x98c,0x98d,0x98e,0x98f,0x990,0x99f,0x98b,0x98c,0x98d,0x98e,0x98f,0x990,0x99f, -0x98b,0x98c,0x98d,0x98e,0x98f,0x990,0x99f,0x98b,0x98c,0x98d,0x98e,0x98f,0x990,0x997,0x2c9,0x2c9, -0x9a7,0x9ae,0x2aa,0x9b5,0x2aa,0x2aa,0x2aa,0x2aa,0x2aa,0x2aa,0x2aa,0x2aa,0x2aa,0x2aa,0x2aa,0x2aa, -0x2aa,0x2aa,0x2aa,0x2aa,0x2aa,0x2aa,0x2aa,0x2aa,0x2aa,0x2aa,0x2aa,0x2aa,0x2aa,0x2aa,0x2aa,0x2aa, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x741,0x749,0x751,0x759,0x761,0x769,0x771,0x779, -0x781,0x789,0x791,0x799,0x7a1,0x7a9,0x7b1,0x2c9,0x7b8,0x7c0,0x7c8,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x7d0,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0xb28,0xb28,0xb40,0xb80,0xbc0,0xc00,0xc40,0xc78,0xcb8,0xb24,0xcec,0xb24,0xd2c,0xd6c,0xdac,0xdec, -0xe2c,0xe6c,0xeac,0xeec,0xb24,0xb24,0xf28,0xf68,0xf98,0xfd0,0xb24,0x1010,0x1040,0x1080,0xb24,0x1098, -0x880,0x8b0,0x8ee,0x92d,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x95a,0x188,0x188, -0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x976,0x188,0x188,0x9ac,0x188,0x9ec,0xa26,0x188,0x188, -0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188, -0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0xa66, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x7d4, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x7dc,0x2c9,0x2c9,0x2c9,0x7df,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x7e6,0x7ea,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x7f2,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x7f9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x800,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x709,0x6ab,0x805,0x80d,0x2c9,0x2c9,0x815,0x81c,0x2c9,0x598,0x2c9,0x2c9,0x824,0x2c9,0x2c9,0x827, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x82d,0x2c9,0x830,0x838,0x83f,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x847,0x2c9,0x2c9,0x84f,0x857,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x85c,0x864,0x2c9,0x2c9,0x6ab, -0x2c9,0x2c9,0x2c9,0x867,0x2c9,0x2c9,0x2c9,0x86d,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x870,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x542,0x86e, -0x2c9,0x877,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x6ab,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x87f,0x2c9,0x882,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x888,0x2c9,0x88e,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x894,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x89c,0x8a4,0x8ac,0x8b2,0x8ba,0x2c9,0x2c9,0x2c9,0x8c2,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x8ca,0x8d2,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x8d6,0x2c9,0x2c9,0x2c9, -0x8dd,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x8e5,0x8ed,0x8f5,0x8fd,0x905,0x90d,0x915,0x91d,0x925,0x92d, -0x935,0x93d,0x945,0x94d,0x955,0x95d,0x965,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2a9,0x2a9,1,1,1,1,1,1,1,1, +static const uint16_t norm2_nfc_data_trieIndex[1690]={ +0,0x40,0x7b,0xbb,0xfb,0x13a,0x17a,0x1b2,0x1f2,0x226,0x254,0x226,0x294,0x2d4,0x313,0x353, +0x393,0x3d2,0x40f,0x44e,0x226,0x226,0x488,0x4c8,0x4f8,0x530,0x226,0x570,0x59f,0x5de,0x226,0x5f3, +0x631,0x65f,0x226,0x68c,0x6cc,0x709,0x729,0x768,0x7a7,0x7e4,0x803,0x840,0x729,0x879,0x8a7,0x8e6, +0x226,0x920,0x937,0x977,0x98e,0x9cd,0x226,0xa03,0xa23,0xa5e,0xa6a,0xaa4,0xacc,0xb09,0xb49,0xb83, +0xb9e,0x226,0xbd9,0x226,0xc19,0xc38,0xc6e,0xcab,0x226,0x226,0x226,0x226,0x226,0xcce,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0xcfa,0x226,0x226,0xd2f, +0x226,0x226,0xd4d,0x226,0xd77,0x226,0x226,0x226,0xdb3,0xdd3,0xe13,0x226,0xe51,0xe91,0xec5,0xef1, +0x808,0x226,0x226,0xf25,0x226,0x226,0x226,0xf65,0xfa5,0xfe5,0x1025,0x1065,0x10a5,0x10e5,0x1125,0x1165, +0x11a5,0x226,0x226,0x11d5,0x1206,0x226,0x1236,0x1269,0x12a6,0x12e5,0x1325,0x135b,0x1389,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x13b4,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0xcbc,0x226,0x13d1,0x226,0x1411,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x1451,0x148b,0x14c9,0x1509,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1548,0x1586,0x15a6,0x226,0x226,0x226,0x226, +0x15e0,0x226,0x226,0x161c,0x164e,0x167c,0x80c,0x168f,0x226,0x226,0x169f,0x16df,0x226,0x226,0x226,0x13e3, +0x171f,0x1727,0x172f,0x1737,0x1723,0x172b,0x1733,0x171f,0x1727,0x172f,0x1737,0x1723,0x172b,0x1733,0x171f,0x1727, +0x172f,0x1737,0x1723,0x172b,0x1733,0x171f,0x1727,0x172f,0x1737,0x1723,0x172b,0x1733,0x171f,0x1727,0x172f,0x1737, +0x1723,0x172b,0x1733,0x171f,0x1727,0x172f,0x1737,0x1723,0x172b,0x1733,0x171f,0x1727,0x172f,0x1737,0x1723,0x172b, +0x1733,0x171f,0x1727,0x172f,0x1737,0x1723,0x172b,0x1733,0x171f,0x1727,0x172f,0x1737,0x1723,0x172b,0x1733,0x171f, +0x1727,0x172f,0x1737,0x1723,0x172b,0x1733,0x171f,0x1727,0x172f,0x1737,0x1723,0x172b,0x1733,0x171f,0x1727,0x172f, +0x1737,0x1723,0x172b,0x1733,0x171f,0x1727,0x172f,0x1737,0x1723,0x172b,0x1733,0x171f,0x1727,0x172f,0x1737,0x1723, +0x172b,0x1733,0x171f,0x1727,0x172f,0x1737,0x1723,0x172b,0x1733,0x171f,0x1727,0x172f,0x1737,0x1723,0x172b,0x1733, +0x171f,0x1727,0x172f,0x1737,0x1723,0x172b,0x1733,0x171f,0x1727,0x172f,0x1737,0x1723,0x172b,0x1733,0x171f,0x1727, +0x172f,0x1737,0x1723,0x172b,0x1733,0x171f,0x1727,0x172f,0x1737,0x1723,0x172b,0x1733,0x171f,0x1727,0x172f,0x1737, +0x1723,0x172b,0x1733,0x171f,0x1727,0x172f,0x1737,0x1723,0x172b,0x1733,0x171f,0x1727,0x172f,0x1737,0x1723,0x172b, +0x1733,0x171f,0x1727,0x172f,0x1737,0x1723,0x172b,0x1733,0x171f,0x1727,0x172f,0x1737,0x1723,0x172b,0x176b,0x226, +0x17ab,0x17e6,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x1826,0x1866,0x18a6,0x18e6,0x1926,0x1966,0x19a6,0x19e6,0x1a09,0x1a49,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1a69,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x61f,0x62e,0x644,0x663,0x678,0x678,0x678,0x67c,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0xbd9,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x54f,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x40c, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1a9c,0x226,0x226,0x1aac,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0xdc5,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1abc,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1ac6,0x54f, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x7eb,0x226,0x226,0x9ba,0x226,0x1ad6, +0x1ae3,0x1aef,0x226,0x226,0x226,0x226,0x414,0x226,0x1afa,0x1b0a,0x226,0x226,0x226,0x7e0,0x226,0x226, +0x226,0x226,0x1b1a,0x226,0x226,0x226,0x1b25,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x1b2c,0x226,0x226,0x226,0x226,0x1b37,0x1b46,0x8f6,0x1b54,0x412,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x1b62,0x798,0x226,0x226,0x226,0x226,0x226,0x1b72,0x1b81,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x8d6,0x1b89,0x1b99,0x226,0x226,0x226,0x9ba, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1ba3,0x226,0x226,0x226,0x226,0x226,0x226,0x7e6,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1ba0,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x7ed,0x7ea,0x226,0x226,0x226,0x226,0x7e8, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x9ba,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0xbd3,0x226,0x226,0x226,0x226,0x7ea,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1bb3,0x226,0x226,0x226, +0xebe,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1bb8,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x1bc7,0x1bd7,0x1be5,0x1bf2,0x226,0x1bfe,0x1c0c,0x1c1c,0x226,0x226,0x226,0x226, +0xce9,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1c2c,0x1c34,0x1c42,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1c52,0x226,0x226,0x226, +0x226,0x226,0x226,0x1c5e,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1c6e, +0x1c7e,0x1c8e,0x1c9e,0x1cae,0x1cbe,0x1cce,0x1cde,0x1cee,0x1cfe,0x1d0e,0x1d1e,0x1d2e,0x1d3e,0x1d4e,0x1d5e,0x1d6e, +0x1d7e,0x1d8e,0x1d9e,0x1dae,0x1dbe,0x1dce,0x1dde,0x1dee,0x1dfe,0x1e0e,0x1e1e,0x1e2e,0x1e3e,0x1e4e,0x1e5e,0x1e6e, +0x1e7e,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x408, +0x428,0xc4,0xc4,0xc4,0x448,0x457,0x46a,0x486,0x4a3,0x4bf,0x4dc,0x4f9,0x516,0x533,0xc4,0xc4, +0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4, +0xc4,0xc4,0xc4,0x54d,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4, +0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4, +0xc4,0xc4,0x564,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0x56f,0x58c,0xc4,0xc4,0xc4, +0xc4,0xc4,0xc4,0x5ac,0xc4,0xc4,0xc4,0x5bf,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4, +0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4, +0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0x5df,0x5ff +}; + +static const uint16_t norm2_nfc_data_trieData[7822]={ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,4,8,0xc,1,1,0x10,0x50,0x5c,0x70,0x88,0xcc,0xd0, -0xec,0x108,0x144,0x148,0x15c,0x174,0x180,0x1a4,0x1e4,1,0x1ec,0x20c,0x228,0x244,0x290,0x298, -0x2b0,0x2b8,0x2dc,1,1,1,1,1,1,0x2f4,0x334,0x340,0x354,0x36c,0x3b0,0x3b4, -0x3d0,0x3f0,0x428,0x430,0x444,0x45c,0x468,0x48c,0x4cc,1,0x4d4,0x4f4,0x510,0x530,0x57c,0x584, -0x5a0,0x5a8,0x5d0,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,4,8,0xc,1, +1,0x10,0x50,0x5c,0x70,0x88,0xcc,0xd0,0xec,0x108,0x144,0x148,0x15c,0x174,0x180,0x1a4, +0x1e4,1,0x1ec,0x20c,0x228,0x244,0x290,0x298,0x2b0,0x2b8,0x2dc,1,1,1,1,1, +1,0x2f4,0x334,0x340,0x354,0x36c,0x3b0,0x3b4,0x3d0,0x3f0,0x428,0x430,0x444,0x45c,0x468,0x48c, +0x4cc,1,0x4d4,0x4f4,0x510,0x530,0x57c,0x584,0x5a0,0x5a8,0x5d0,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,0x5e8,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,0x1284,0x128a,0xade,0x1290,0xaf4, +0xafe,0x5f4,0xb08,0x1296,0x129c,0xb12,0x12a2,0x12a8,0x12ae,0x12b4,0xb28,1,0x12ba,0x12c0,0x12c6,0xb32, +0xb48,0xb5a,1,0x5fc,0x12cc,0x12d2,0x12d8,0xb64,0x12de,1,1,0x12e4,0x12ea,0xb7a,0x12f0,0xb90, +0xb9a,0x600,0xba4,0x12f6,0x12fc,0xbae,0x1302,0x1308,0x130e,0x1314,0xbc4,1,0x131a,0x1320,0x1326,0xbce, +0xbe4,0xbf6,1,0x608,0x132c,0x1332,0x1338,0xc00,0x133e,1,0x1344,0x134a,0x1350,0xc16,0xc2c,0x1357, +0x135d,0x1362,0x1368,0x136e,0x1374,0x137a,0x1380,0x1386,0x138c,0x1392,0x1398,1,1,0xc42,0xc50,0x139e, +0x13a4,0x13aa,0x13b0,0x13b7,0x13bd,0x13c2,0x13c8,0x13ce,0x13d4,0x13da,0x13e0,0x13e6,0x13ec,0x13f3,0x13f9,0x13fe, +0x1404,1,1,0x140a,0x1410,0x1416,0x141c,0x1422,0x1428,0x142f,0x1435,0x143a,1,1,1,0x1441, +0x1447,0x144d,0x1453,1,0x1458,0x145e,0x1465,0x146b,0x1470,0x1476,1,1,1,0x147c,0x1482,0x1489, +0x148f,0x1494,0x149a,1,1,1,0xc5e,0xc6c,0x14a0,0x14a6,0x14ac,0x14b2,1,1,0x14b8,0x14be, +0x14c5,0x14cb,0x14d0,0x14d6,0xc7a,0xc84,0x14dc,0x14e2,0x14e9,0x14ef,0xc8e,0xc98,0x14f5,0x14fb,0x1500,0x1506, +1,1,0xca2,0xcac,0xcb6,0xcc0,0x150c,0x1512,0x1518,0x151e,0x1524,0x152a,0x1531,0x1537,0x153c,0x1542, +0x1548,0x154e,0x1554,0x155a,0x1560,0x1566,0x156c,0x1572,0x1578,0x60c,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,0xcca,0xce4,1,1,1,1, +1,1,1,1,1,1,1,1,1,0xcfe,0xd18,1,1,1,1,1, +1,0x610,1,1,1,1,1,1,1,1,1,1,1,1,1,0x157e, +0x1584,0x158a,0x1590,0x1596,0x159c,0x15a2,0x15a8,0x15b0,0x15ba,0x15c4,0x15ce,0x15d8,0x15e2,0x15ec,0x15f6,1, +0x1600,0x160a,0x1614,0x161e,0x1627,0x162d,1,1,0x1632,0x1638,0x163e,0x1644,0xd32,0xd3c,0x164d,0x1657, +0x165f,0x1665,0x166b,1,1,1,0x1670,0x1676,1,1,0x167c,0x1682,0x168a,0x1694,0x169d,0x16a3, +0x16a9,0x16af,0x16b4,0x16ba,0x16c0,0x16c6,0x16cc,0x16d2,0x16d8,0x16de,0x16e4,0x16ea,0x16f0,0x16f6,0x16fc,0x1702, +0x1708,0x170e,0x1714,0x171a,0x1720,0x1726,0x172c,0x1732,0x1738,0x173e,0x1744,0x174a,0x1750,0x1756,1,1, +0x175c,0x1762,1,1,1,1,1,1,0xd46,0xd50,0xd5a,0xd64,0x176a,0x1774,0x177e,0x1788, +0xd6e,0xd78,0x1792,0x179c,0x17a4,0x17aa,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,0x5e8,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -0x1284,0x128a,0xade,0x1290,0xaf4,0xafe,0x5f4,0xb08,0x1296,0x129c,0xb12,0x12a2,0x12a8,0x12ae,0x12b4,0xb28, -1,0x12ba,0x12c0,0x12c6,0xb32,0xb48,0xb5a,1,0x5fc,0x12cc,0x12d2,0x12d8,0xb64,0x12de,1,1, -0x12e4,0x12ea,0xb7a,0x12f0,0xb90,0xb9a,0x600,0xba4,0x12f6,0x12fc,0xbae,0x1302,0x1308,0x130e,0x1314,0xbc4, -1,0x131a,0x1320,0x1326,0xbce,0xbe4,0xbf6,1,0x608,0x132c,0x1332,0x1338,0xc00,0x133e,1,0x1344, -0x134a,0x1350,0xc16,0xc2c,0x1357,0x135d,0x1362,0x1368,0x136e,0x1374,0x137a,0x1380,0x1386,0x138c,0x1392,0x1398, -1,1,0xc42,0xc50,0x139e,0x13a4,0x13aa,0x13b0,0x13b7,0x13bd,0x13c2,0x13c8,0x13ce,0x13d4,0x13da,0x13e0, -0x13e6,0x13ec,0x13f3,0x13f9,0x13fe,0x1404,1,1,0x140a,0x1410,0x1416,0x141c,0x1422,0x1428,0x142f,0x1435, -0x143a,1,1,1,0x1441,0x1447,0x144d,0x1453,1,0x1458,0x145e,0x1465,0x146b,0x1470,0x1476,1, -1,1,1,0x147c,0x1482,0x1489,0x148f,0x1494,0x149a,1,1,1,0xc5e,0xc6c,0x14a0,0x14a6, -0x14ac,0x14b2,1,1,0x14b8,0x14be,0x14c5,0x14cb,0x14d0,0x14d6,0xc7a,0xc84,0x14dc,0x14e2,0x14e9,0x14ef, -0xc8e,0xc98,0x14f5,0x14fb,0x1500,0x1506,1,1,0xca2,0xcac,0xcb6,0xcc0,0x150c,0x1512,0x1518,0x151e, -0x1524,0x152a,0x1531,0x1537,0x153c,0x1542,0x1548,0x154e,0x1554,0x155a,0x1560,0x1566,0x156c,0x1572,0x1578,0x60c, +1,1,1,1,1,1,0x614,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -0xcca,0xce4,1,1,1,1,1,1,1,1,1,1,1,1,1,0xcfe, -0xd18,1,1,1,1,1,1,0x610,1,1,1,1,1,1,1,1, -1,1,1,1,1,0x157e,0x1584,0x158a,0x1590,0x1596,0x159c,0x15a2,0x15a8,0x15b0,0x15ba,0x15c4, -0x15ce,0x15d8,0x15e2,0x15ec,0x15f6,1,0x1600,0x160a,0x1614,0x161e,0x1627,0x162d,1,1,0x1632,0x1638, -0x163e,0x1644,0xd32,0xd3c,0x164d,0x1657,0x165f,0x1665,0x166b,1,1,1,0x1670,0x1676,1,1, -0x167c,0x1682,0x168a,0x1694,0x169d,0x16a3,0x16a9,0x16af,0x16b4,0x16ba,0x16c0,0x16c6,0x16cc,0x16d2,0x16d8,0x16de, -0x16e4,0x16ea,0x16f0,0x16f6,0x16fc,0x1702,0x1708,0x170e,0x1714,0x171a,0x1720,0x1726,0x172c,0x1732,0x1738,0x173e, -0x1744,0x174a,0x1750,0x1756,1,1,0x175c,0x1762,1,1,1,1,1,1,0xd46,0xd50, -0xd5a,0xd64,0x176a,0x1774,0x177e,0x1788,0xd6e,0xd78,0x1792,0x179c,0x17a4,0x17aa,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,0x614,1, +1,1,1,1,0xfdcc,0xfdcc,0xfdcc,0xfdcc,0xfdcc,0xffcc,0xfdcc,0xfdcc,0xfdcc,0xfdcc,0xfdcc,0xfdcc, +0xfdcc,0xffcc,0xffcc,0xfdcc,0xffcc,0xfdcc,0xffcc,0xfdcc,0xfdcc,0xffd0,0xffb8,0xffb8,0xffb8,0xffb8,0xffd0,0xfdb0, +0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,0xff94,0xff94,0xfdb8,0xfdb8,0xfdb8,0xfdb8,0xfd94,0xfd94,0xffb8,0xffb8,0xffb8, +0xffb8,0xfdb8,0xfdb8,0xffb8,0xfdb8,0xfdb8,0xffb8,0xffb8,0xfe02,0xfe02,0xfe02,0xfe02,0xfc02,0xffb8,0xffb8,0xffb8, +0xffb8,0xffcc,0xffcc,0xffcc,0x3c26,0x3c2c,0xfdcc,0x3c32,0x3c38,0xfde0,0xffcc,0xffb8,0xffb8,0xffb8,0xffcc,0xffcc, +0xffcc,0xffb8,0xffb8,1,0xffcc,0xffcc,0xffcc,0xffb8,0xffb8,0xffb8,0xffb8,0xffcc,0xffd0,0xffb8,0xffb8,0xffcc, +0xffd2,0xffd4,0xffd4,0xffd2,0xffd4,0xffd4,0xffd2,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc, +0xffcc,0xffcc,0xffcc,0xffcc,1,1,1,1,0x29d1,1,1,1,1,1,1,1, +1,1,0x29d5,1,1,1,1,1,0x17b1,0x17b7,0x29d9,0x17bd,0x17c3,0x17c9,1,0x17cf, +1,0x17d5,0x17db,0x17e3,0x618,1,1,1,0x634,1,0x644,1,0x658,1,1,1, +1,1,0x674,1,0x684,1,1,1,0x688,1,1,1,0x6a0,0x17eb,0x17f1,0xd82, +0x17f7,0xd8c,0x17fd,0x1805,0x6b4,1,1,1,0x6d4,1,0x6e4,1,0x6fc,1,1,1, +1,1,0x71c,1,0x72c,1,1,1,0x734,1,1,1,0x754,0xd96,0xda8,0x180d, +0x1813,0xdba,1,1,1,0x76c,0x1819,0x181f,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,0xfdcc,0xfdcc,0xfdcc,0xfdcc, -0xfdcc,0xffcc,0xfdcc,0xfdcc,0xfdcc,0xfdcc,0xfdcc,0xfdcc,0xfdcc,0xffcc,0xffcc,0xfdcc,0xffcc,0xfdcc,0xffcc,0xfdcc, -0xfdcc,0xffd0,0xffb8,0xffb8,0xffb8,0xffb8,0xffd0,0xfdb0,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,0xff94,0xff94,0xfdb8, -0xfdb8,0xfdb8,0xfdb8,0xfd94,0xfd94,0xffb8,0xffb8,0xffb8,0xffb8,0xfdb8,0xfdb8,0xffb8,0xfdb8,0xfdb8,0xffb8,0xffb8, -0xfe02,0xfe02,0xfe02,0xfe02,0xfc02,0xffb8,0xffb8,0xffb8,0xffb8,0xffcc,0xffcc,0xffcc,0x3c26,0x3c2c,0xfdcc,0x3c32, -0x3c38,0xfde0,0xffcc,0xffb8,0xffb8,0xffb8,0xffcc,0xffcc,0xffcc,0xffb8,0xffb8,1,0xffcc,0xffcc,0xffcc,0xffb8, -0xffb8,0xffb8,0xffb8,0xffcc,0xffd0,0xffb8,0xffb8,0xffcc,0xffd2,0xffd4,0xffd4,0xffd2,0xffd4,0xffd4,0xffd2,0xffcc, -0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,1,1,1,1, -0x29d1,1,1,1,1,1,1,1,1,1,0x29d5,1,1,1,1,1, -1,0x17b1,0x17b7,0x29d9,0x17bd,0x17c3,0x17c9,1,0x17cf,1,0x17d5,0x17db,0x17e3,0x618,1,1, -1,0x634,1,0x644,1,0x658,1,1,1,1,1,0x674,1,0x684,1,1, -1,0x688,1,1,1,0x6a0,0x17eb,0x17f1,0xd82,0x17f7,0xd8c,0x17fd,0x1805,0x6b4,1,1, -1,0x6d4,1,0x6e4,1,0x6fc,1,1,1,1,1,0x71c,1,0x72c,1,1, -1,0x734,1,1,1,0x754,0xd96,0xda8,0x180d,0x1813,0xdba,1,1,1,0x76c,0x1819, -0x181f,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,0x1825,0x182b,1,0x1831, -1,1,0x774,0x1837,1,1,1,1,0x183d,0x1843,0x1849,1,0x778,1,1,0x780, -1,0x784,0x790,0x798,0x79c,0x184f,0x7ac,1,1,1,0x7b0,1,1,1,1,0x7b4, -1,1,1,0x7c4,1,1,1,0x7c8,1,0x7cc,1,1,0x7d0,1,1,0x7d8, -1,0x7dc,0x7e8,0x7f0,0x7f4,0x1855,0x804,1,1,1,0x808,1,1,1,1,0x80c, -1,1,1,0x81c,1,1,1,0x820,1,0x824,1,1,0x185b,0x1861,1,0x1867, -1,1,0x828,0x186d,1,1,1,1,0x1873,0x1879,0x187f,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -0x82c,0x830,0x1885,0x188b,1,1,1,1,1,1,1,1,1,1,1,0xffcc, -0xffcc,0xffcc,0xffcc,0xffcc,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,0x1825,0x182b,1,0x1831,1,1,0x774,0x1837,1,1,1,1,0x183d, +0x1843,0x1849,1,0x778,1,1,0x780,1,0x784,0x790,0x798,0x79c,0x184f,0x7ac,1,1, +1,0x7b0,1,1,1,1,0x7b4,1,1,1,0x7c4,1,1,1,0x7c8,1, +0x7cc,1,1,0x7d0,1,1,0x7d8,1,0x7dc,0x7e8,0x7f0,0x7f4,0x1855,0x804,1,1, +1,0x808,1,1,1,0x80c,1,1,1,0x81c,1,1,1,0x820,1,0x824, +1,1,0x185b,0x1861,1,0x1867,1,1,0x828,0x186d,1,1,1,1,0x1873,0x1879, +0x187f,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,0x82c,0x830,0x1885,0x188b,1,1,1,1,1,1, +1,1,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,0x1891,0x1897,1, -1,1,1,1,1,1,1,1,1,1,1,1,0x189d,0x18a3,0x18a9,0x18af, -1,1,0x18b5,0x18bb,0x834,0x838,0x18c1,0x18c7,0x18cd,0x18d3,0x18d9,0x18df,1,1,0x18e5,0x18eb, -0x18f1,0x18f7,0x18fd,0x1903,0x83c,0x840,0x1909,0x190f,0x1915,0x191b,0x1921,0x1927,0x192d,0x1933,0x1939,0x193f, -0x1945,0x194b,1,1,0x1951,0x1957,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0x1891, +0x1897,1,1,1,1,1,1,1,1,1,1,1,1,1,0x189d,0x18a3, +0x18a9,0x18af,1,1,0x18b5,0x18bb,0x834,0x838,0x18c1,0x18c7,0x18cd,0x18d3,0x18d9,0x18df,1,1, +0x18e5,0x18eb,0x18f1,0x18f7,0x18fd,0x1903,0x83c,0x840,0x1909,0x190f,0x1915,0x191b,0x1921,0x1927,0x192d,0x1933, +0x1939,0x193f,0x1945,0x194b,1,1,0x1951,0x1957,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,0xffb8,0xffcc,0xffcc,0xffcc,0xffcc,0xffb8,0xffcc, 0xffcc,0xffcc,0xffbc,0xffb8,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8, 0xffcc,0xffcc,0xffb8,0xffcc,0xffcc,0xffbc,0xffc8,0xffcc,0xfe14,0xfe16,0xfe18,0xfe1a,0xfe1c,0xfe1e,0xfe20,0xfe22, @@ -281,369 +220,415 @@ static const uint16_t norm2_nfc_data_trieIndex[9976]={ 1,1,0x85c,0x1987,1,0x860,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,1,1,0xffcc, 0xffcc,0xffcc,0xffcc,0xffb8,0xffcc,1,1,0xffcc,0xffcc,1,0xffb8,0xffcc,0xffcc,0xffb8,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,0xfe48,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -0xffcc,0xffb8,0xffcc,0xffcc,0xffb8,0xffcc,0xffcc,0xffb8,0xffb8,0xffb8,0xffcc,0xffb8,0xffb8,0xffcc,0xffb8,0xffcc, -0xffcc,0xffcc,0xffb8,0xffcc,0xffb8,0xffcc,0xffb8,0xffcc,0xffb8,0xffcc,0xffcc,1,1,1,1,1, +0xfe48,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0xffcc, +0xffb8,0xffcc,0xffcc,0xffb8,0xffcc,0xffcc,0xffb8,0xffb8,0xffb8,0xffcc,0xffb8,0xffb8,0xffcc,0xffb8,0xffcc,0xffcc, +0xffb8,0xffcc,0xffb8,0xffcc,0xffb8,0xffcc,0xffb8,0xffcc,0xffcc,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffb8,0xffcc,1,1,1,1, -1,1,1,1,1,0xffb8,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,0xffcc,0xffcc, -0xffcc,0xffcc,1,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,1,0xffcc,0xffcc,0xffcc,1,0xffcc,0xffcc,0xffcc, -0xffcc,0xffcc,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,0xffb8,0xffb8,0xffb8, +0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffb8,0xffcc,1,1,1,1,1,1,1,1,1, +0xffb8,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,0xffcc,0xffcc,0xffcc,0xffcc,1,0xffcc,0xffcc,0xffcc,0xffcc, +0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,1,0xffcc,0xffcc,0xffcc,1,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,0xffb8,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc, -0xffcc,0xffcc,1,0xffb8,0xffcc,0xffcc,0xffb8,0xffcc,0xffcc,0xffb8,0xffcc,0xffcc,0xffcc,0xffb8,0xffb8,0xffb8, -0xfe36,0xfe38,0xfe3a,0xffcc,0xffcc,0xffcc,0xffb8,0xffcc,0xffcc,0xffb8,0xffb8,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc, -1,1,1,1,1,1,1,1,0x864,0x198d,1,1,1,1,1,1, -0x868,0x1993,1,0x86c,0x1999,1,1,1,1,1,1,1,0xfc0e,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,0xfe12,1,1, -1,0xffcc,0xffb8,0xffcc,0xffcc,1,1,1,0x29dc,0x29e2,0x29e8,0x29ee,0x29f4,0x29fa,0x2a00,0x2a06, +1,1,1,1,1,1,1,1,0xffb8,0xffb8,0xffb8,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,0xfe0e,1,0xfc00,1, -1,1,1,1,1,1,1,0x870,1,1,1,0x199f,0x19a5,0xfe12,1,1, -1,1,1,1,1,1,1,0xfc00,1,1,1,1,0x2a0c,0x2a12,1,0x2a18, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0xffb8, +0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,1,0xffb8, +0xffcc,0xffcc,0xffb8,0xffcc,0xffcc,0xffb8,0xffcc,0xffcc,0xffcc,0xffb8,0xffb8,0xffb8,0xfe36,0xfe38,0xfe3a,0xffcc, +0xffcc,0xffcc,0xffb8,0xffcc,0xffcc,0xffb8,0xffb8,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,0xffcc,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,0x2a1e,1,1,0x2a24,1,1,1,1,1,0xfe0e,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,0xfe12,1,1, -1,1,1,1,1,1,1,1,1,0x2a2a,0x2a30,0x2a36,1,1,0x2a3c,1, +1,1,1,1,0x864,0x198d,1,1,1,1,1,1,0x868,0x1993,1,0x86c, +0x1999,1,1,1,1,1,1,1,0xfc0e,1,1,1,1,1,1,1, +1,1,1,1,1,1,0xfe12,1,1,1,0xffcc,0xffb8,0xffcc,0xffcc,1,1, +1,0x29dc,0x29e2,0x29e8,0x29ee,0x29f4,0x29fa,0x2a00,0x2a06,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,0xfe0e,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,0xfe0e,1,0xfc00,1,1,1,1,1,1,1,0x870, +1,1,1,0x199f,0x19a5,0xfe12,1,1,1,1,1,1,1,1,1,0xfc00, +1,1,1,1,0x2a0c,0x2a12,1,0x2a18,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,0xffcc,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,0x2a1e,1,1,0x2a24,1,1, +1,1,1,0xfe0e,1,1,1,1,1,1,1,1,1,1,1,1, +1,0xfe12,1,1,1,1,1,1,1,1,1,1,1,0x2a2a,0x2a30,0x2a36, +1,1,0x2a3c,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0xfe0e, 1,1,1,1,1,1,1,1,1,1,1,1,1,0xfe12,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,0x878,0x19ab,1,1,0x19b1,0x19b7,0xfe12,1,1,1,1,1,1, -1,1,0xfc00,0xfc00,1,1,1,1,0x2a42,0x2a48,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,0x884,1, -0x19bd,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,0xfc00,1,1,1,1,1,1,1,0x888,0x890,1,1,0x19c3,0x19c9, -0x19cf,0xfe12,1,1,1,1,1,1,1,1,1,0xfc00,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +0x878,0x19ab,1,1,0x19b1,0x19b7,0xfe12,1,1,1,1,1,1,1,1,0xfc00, +0xfc00,1,1,1,1,0x2a42,0x2a48,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,0x884,1,0x19bd,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,0xfc00,1,1,1,1,1,1,0x888,0x890,1,1, +0x19c3,0x19c9,0x19cf,0xfe12,1,1,1,1,1,1,1,1,1,0xfc00,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,0x894,1,0x19d5,1,1,1,1,0xfe12,1,1, 1,1,1,1,1,0xfea8,0xfcb6,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,0xfe0e,1,1,0x898,0x19db,1,0xfc00,1,1,1,0x89c,0x19e1, -0x19e7,1,0xdc4,0x19ef,1,0xfe12,1,1,1,1,1,1,1,0xfc00,0xfc00,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,0xfe12,0xfe12,1,0xfc00,1, -1,1,1,1,1,1,0x8a8,0x8b0,1,1,0x19f7,0x19fd,0x1a03,0xfe12,1,1, -1,1,1,1,1,1,1,0xfc00,1,1,1,1,1,1,1,1, -1,1,0xfc12,1,1,1,1,0xfc00,1,1,1,1,1,1,1,1, -1,0x8b4,0x1a09,1,0xdce,0x1a11,0x1a19,0xfc00,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -0xfece,0xfece,0xfe12,1,1,1,1,1,1,1,1,1,0xfed6,0xfed6,0xfed6,0xfed6, +1,1,1,0xfe0e,1,1,0x898,0x19db,1,0xfc00,1,1,1,0x89c,0x19e1,0x19e7, +1,0xdc4,0x19ef,1,0xfe12,1,1,1,1,1,1,1,0xfc00,0xfc00,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,0xfeec,0xfeec,1,1,1,1,1,1, -1,1,1,1,0xfef4,0xfef4,0xfef4,0xfef4,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -0xffb8,0xffb8,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,0xffb8,1,0xffb8,1,0xffb0,1,1, -1,1,1,1,1,1,1,0x2a4f,1,1,1,1,1,1,1,1, -1,0x2a55,1,1,1,1,0x2a5b,1,1,1,1,0x2a61,1,1,1,1, -0x2a67,1,1,1,1,1,1,1,1,1,1,1,1,0x2a6d,1,1, -1,1,1,1,1,0xff02,0xff04,0x3c40,0xff08,0x3c48,0x2a72,1,0x2a78,1,0xff04,0xff04, -0xff04,0xff04,1,1,0xff04,0x3c50,0xffcc,0xffcc,0xfe12,1,0xffcc,0xffcc,1,1,1,1, -1,1,1,1,1,1,1,0x2a7f,1,1,1,1,1,1,1,1, -1,0x2a85,1,1,1,1,0x2a8b,1,1,1,1,0x2a91,1,1,1,1, -0x2a97,1,1,1,1,1,1,1,1,1,1,1,1,0x2a9d,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,0xfe12,0xfe12,1,0xfc00,1,1,1, +1,1,1,0x8a8,0x8b0,1,1,0x19f7,0x19fd,0x1a03,0xfe12,1,1,1,1,1, +1,1,1,1,0xfc00,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,0xfc12,1,1, +1,1,0xfc00,1,1,1,1,1,1,1,1,1,0x8b4,0x1a09,1,0xdce, +0x1a11,0x1a19,0xfc00,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,0xfece,0xfece,0xfe12,1,1, +1,1,1,1,1,1,0xfed6,0xfed6,0xfed6,0xfed6,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,0xfeec,0xfeec,1,1,1,1,1,1,1,1,0xfef4,0xfef4,0xfef4,0xfef4, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,0xffb8,0xffb8,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,0xffb8,1,0xffb8,1,0xffb0,1,1,1,1,1,1,0x2a4f,1,1,1, +1,1,1,1,1,1,0x2a55,1,1,1,1,0x2a5b,1,1,1,1, +0x2a61,1,1,1,1,0x2a67,1,1,1,1,1,1,1,1,1,1, +1,1,0x2a6d,1,1,1,1,1,1,1,0xff02,0xff04,0x3c40,0xff08,0x3c48,0x2a72, +1,0x2a78,1,0xff04,0xff04,0xff04,0xff04,1,1,0xff04,0x3c50,0xffcc,0xffcc,0xfe12,1,0xffcc, +0xffcc,1,1,1,1,1,1,1,1,1,1,1,0x2a7f,1,1,1, +1,1,1,1,1,1,0x2a85,1,1,1,1,0x2a8b,1,1,1,1, +0x2a91,1,1,1,1,0x2a97,1,1,1,1,1,1,1,1,1,1, +1,1,0x2a9d,1,1,1,1,1,1,0xffb8,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,0x8c0,0x1a1f,1,1,1,1,1,1,1,0xfc00,1,1,1, +1,1,1,1,1,0xfe0e,1,0xfe12,0xfe12,1,1,1,1,1,1,1, 1,1,1,1,1,1,0xffb8,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,0x8c0,0x1a1f,1,1,1,1,1,1,1,0xfc00,1,1,1,1,1, -1,1,1,0xfe0e,1,0xfe12,0xfe12,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,0xffb8,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,0xffcc,0xffcc,0xffcc,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,0xfe12,1,1,1, +1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2, +2,2,2,2,2,2,2,2,2,2,2,2,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,0xfe12,1,1,1,1,1,1,1,1,1,1,0xffcc,1,1, -1,1,1,1,1,1,1,1,1,0xffc8,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,0xffbc,0xffcc,0xffb8,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0xffcc, -0xffb8,1,1,1,1,1,1,1,0xfe12,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,0xffcc,0xffcc,0xffcc, -0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,1,1,0xffb8,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffb8,0xffb8,0xffb8, -0xffb8,0xffb8,0xffb8,0xffcc,0xffcc,0xffb8,1,1,1,1,1,1,1,0x8c4,0x1a25,0x8c8, -0x1a2b,0x8cc,0x1a31,0x8d0,0x1a37,0x8d4,0x1a3d,1,1,0x8d8,0x1a43,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00, +0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -0xfe0e,0xfc00,1,1,1,1,0x8dc,0x1a49,0x8e0,0x1a4f,0x8e4,0x8e8,0x1a55,0x1a5b,0x8ec,0x1a61, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00, +0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,0xffcc,0xffcc,0xffcc,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,0xfe12,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,0xfe12,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,0xfe12,1,1,1,1,1,1,1,1,1,1,0xffcc,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,0xffc8,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +0xffbc,0xffcc,0xffb8,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,0xffcc,0xffb8,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,0xfe12,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc, +1,1,0xffb8,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,0xffcc,0xffcc, +0xffb8,1,1,1,1,1,0x8c4,0x1a25,0x8c8,0x1a2b,0x8cc,0x1a31,0x8d0,0x1a37,0x8d4,0x1a3d, +1,1,0x8d8,0x1a43,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,0xfe0e,0xfc00,1,1,1,1,0x8dc,0x1a49,0x8e0,0x1a4f,0x8e4, +0x8e8,0x1a55,0x1a5b,0x8ec,0x1a61,0xfe12,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,0xffcc,0xffb8,0xffcc,0xffcc, +0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0xfe12, 0xfe12,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0xffcc, -0xffb8,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,0xfe12,0xfe12,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,0xfe0e,1, -1,1,1,1,1,1,1,1,1,1,0xfe12,0xfe12,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,0xfe0e,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,0xffcc,0xffcc,0xffcc,1,0xfe02,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,0xffcc,0xffcc, -0xffb8,0xffb8,0xffb8,0xffb8,0xffcc,1,0xfe02,0xfe02,0xfe02,0xfe02,0xfe02,0xfe02,0xfe02,1,1,1, -1,0xffb8,1,1,1,1,1,1,0xffcc,1,1,1,0xffcc,0xffcc,1,1, -1,1,1,1,0xffcc,0xffcc,0xffb8,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffb8,0xffcc, -0xffcc,0xffd4,0xffac,0xffb8,0xff94,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc, -0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffd0,0xffc8, -0xffc8,0xffb8,1,0xffcc,0xffd2,0xffb8,0xffcc,0xffb8,0x1a66,0x1a6c,0x1a72,0x1a78,0x1a7f,0x1a85,0x1a8b,0x1a91, -0x1a99,0x1aa3,0x1aaa,0x1ab0,0x1ab6,0x1abc,0x1ac2,0x1ac8,0x1acf,0x1ad5,0x1ada,0x1ae0,0x1ae8,0x1af2,0x1afc,0x1b06, -0x1b0e,0x1b14,0x1b1a,0x1b20,0x1b29,0x1b33,0x1b3b,0x1b41,0x1b46,0x1b4c,0x1b52,0x1b58,0x1b5e,0x1b64,0x1b6a,0x1b70, -0x1b77,0x1b7d,0x1b82,0x1b88,0x1b8e,0x1b94,0x1b9c,0x1ba6,0x1bae,0x1bb4,0x1bba,0x1bc0,0x1bc6,0x1bcc,0xdd8,0xde2, -0x1bd4,0x1bde,0x1be6,0x1bec,0x1bf2,0x1bf8,0x1bfe,0x1c04,0x1c0a,0x1c10,0x1c17,0x1c1d,0x1c22,0x1c28,0x1c2e,0x1c34, -0x1c3a,0x1c40,0x1c46,0x1c4c,0x1c54,0x1c5e,0x1c68,0x1c72,0x1c7c,0x1c86,0x1c90,0x1c9a,0x1ca3,0x1ca9,0x1caf,0x1cb5, -0x1cba,0x1cc0,0xdec,0xdf6,0x1cc8,0x1cd2,0x1cda,0x1ce0,0x1ce6,0x1cec,0xe00,0xe0a,0x1cf4,0x1cfe,0x1d08,0x1d12, -0x1d1c,0x1d26,0x1d2e,0x1d34,0x1d3a,0x1d40,0x1d46,0x1d4c,0x1d52,0x1d58,0x1d5e,0x1d64,0x1d6a,0x1d70,0x1d76,0x1d7c, -0x1d84,0x1d8e,0x1d98,0x1da2,0x1daa,0x1db0,0x1db7,0x1dbd,0x1dc2,0x1dc8,0x1dce,0x1dd4,0x1dda,0x1de0,0x1de6,0x1dec, -0x1df3,0x1df9,0x1dff,0x1e05,0x1e0b,0x1e11,0x1e16,0x1e1c,0x1e22,0x1e28,0x1e2f,0x1e35,0x1e3b,0x1e41,0x1e46,0x1e4c, -0x1e52,0x1e58,1,0x1e5f,1,1,1,1,0xe14,0xe22,0x1e64,0x1e6a,0x1e72,0x1e7c,0x1e86,0x1e90, -0x1e9a,0x1ea4,0x1eae,0x1eb8,0x1ec2,0x1ecc,0x1ed6,0x1ee0,0x1eea,0x1ef4,0x1efe,0x1f08,0x1f12,0x1f1c,0x1f26,0x1f30, -0xe30,0xe3a,0x1f38,0x1f3e,0x1f44,0x1f4a,0x1f52,0x1f5c,0x1f66,0x1f70,0x1f7a,0x1f84,0x1f8e,0x1f98,0x1fa2,0x1fac, -0x1fb4,0x1fba,0x1fc0,0x1fc6,0xe44,0xe4e,0x1fcc,0x1fd2,0x1fda,0x1fe4,0x1fee,0x1ff8,0x2002,0x200c,0x2016,0x2020, -0x202a,0x2034,0x203e,0x2048,0x2052,0x205c,0x2066,0x2070,0x207a,0x2084,0x208e,0x2098,0x20a0,0x20a6,0x20ac,0x20b2, -0x20ba,0x20c4,0x20ce,0x20d8,0x20e2,0x20ec,0x20f6,0x2100,0x210a,0x2114,0x211c,0x2122,0x2129,0x212f,0x2134,0x213a, -0x2140,0x2146,1,1,1,1,1,1,0xe58,0xe6e,0xe86,0xe94,0xea2,0xeb0,0xebe,0xecc, -0xed8,0xeee,0xf06,0xf14,0xf22,0xf30,0xf3e,0xf4c,0xf58,0xf66,0x214f,0x2159,0x2163,0x216d,1,1, -0xf74,0xf82,0x2177,0x2181,0x218b,0x2195,1,1,0xf90,0xfa6,0xfbe,0xfcc,0xfda,0xfe8,0xff6,0x1004, -0x1010,0x1026,0x103e,0x104c,0x105a,0x1068,0x1076,0x1084,0x1090,0x10a2,0x219f,0x21a9,0x21b3,0x21bd,0x21c7,0x21d1, -0x10b4,0x10c6,0x21db,0x21e5,0x21ef,0x21f9,0x2203,0x220d,0x10d8,0x10e6,0x2217,0x2221,0x222b,0x2235,1,1, -0x10f4,0x1102,0x223f,0x2249,0x2253,0x225d,1,1,0x1110,0x1122,0x2267,0x2271,0x227b,0x2285,0x228f,0x2299, -1,0x1134,1,0x22a3,1,0x22ad,1,0x22b7,0x1146,0x115c,0x1174,0x1182,0x1190,0x119e,0x11ac,0x11ba, -0x11c6,0x11dc,0x11f4,0x1202,0x1210,0x121e,0x122c,0x123a,0x1246,0x3b8e,0x22bf,0x3b96,0x1250,0x3b9e,0x22c5,0x3ba6, -0x22cb,0x3bae,0x22d1,0x3bb6,0x125a,0x3bbe,1,1,0x22d8,0x22e2,0x22f1,0x2301,0x2311,0x2321,0x2331,0x2341, -0x234c,0x2356,0x2365,0x2375,0x2385,0x2395,0x23a5,0x23b5,0x23c0,0x23ca,0x23d9,0x23e9,0x23f9,0x2409,0x2419,0x2429, -0x2434,0x243e,0x244d,0x245d,0x246d,0x247d,0x248d,0x249d,0x24a8,0x24b2,0x24c1,0x24d1,0x24e1,0x24f1,0x2501,0x2511, -0x251c,0x2526,0x2535,0x2545,0x2555,0x2565,0x2575,0x2585,0x258f,0x2595,0x259d,0x25a4,0x25ad,1,0x1264,0x25b7, -0x25bf,0x25c5,0x25cb,0x3bc6,0x25d0,1,0x2aa2,0x8f0,1,0x25d7,0x25df,0x25e6,0x25ef,1,0x126e,0x25f9, -0x2601,0x3bce,0x2607,0x3bd6,0x260c,0x2613,0x2619,0x261f,0x2625,0x262b,0x2633,0x3be0,1,1,0x263b,0x2643, -0x264b,0x2651,0x2657,0x3bea,1,0x265d,0x2663,0x2669,0x266f,0x2675,0x267d,0x3bf4,0x2685,0x268b,0x2691,0x2699, -0x26a1,0x26a7,0x26ad,0x3bfe,0x26b3,0x26b9,0x3c06,0x2aa7,1,1,0x26c1,0x26c8,0x26d1,1,0x1278,0x26db, -0x26e3,0x3c0e,0x26e9,0x3c16,0x26ee,0x2aab,0x8fc,1,0xfa09,0xfa09,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,0xffcc,0xffcc,0xfe02,0xfe02,0xffcc,0xffcc,0xffcc,0xffcc, -0xfe02,0xfe02,0xfe02,0xffcc,0xffcc,1,1,1,1,0xffcc,1,1,1,0xfe02,0xfe02,0xffcc, -0xffb8,0xffcc,0xfe02,0xfe02,0xffb8,0xffb8,0xffb8,0xffb8,0xffcc,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,0x2aae,1,1,1,0x2ab2,0x3c1e, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,0x908,1,0x90c,1,0x910,1,1,1,1,1,0x26f5,0x26fb, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,0x2701,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,0x2707,0x270d,0x2713,0x914,1,0x918,1,0x91c,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,0x920,0x2719,1,1,1,0x924,0x271f,1,0x928, -0x2725,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,0x92c,0x272b,0x930,0x2731,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -0x934,1,1,1,1,0x2737,1,0x938,0x273d,0x93c,1,0x2743,0x940,0x2749,1,1, -1,0x944,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,0x274f,0x948,0x2755,1,0x94c,0x950,1,1,1,1,1,1, -1,0x275b,0x2761,0x2767,0x276d,0x2773,0x954,0x958,0x2779,0x277f,0x95c,0x960,0x2785,0x278b,0x964,0x968, -0x96c,0x970,1,1,0x2791,0x2797,0x974,0x978,0x279d,0x27a3,0x97c,0x980,0x27a9,0x27af,1,1, -1,1,1,1,1,0x984,0x988,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,0x98c,1,1,1,1,1,0x990,0x994,1,0x998, -0x27b5,0x27bb,0x27c1,0x27c7,1,1,0x99c,0x9a0,0x9a4,0x9a8,1,1,1,1,1,1, -1,1,1,1,0x27cd,0x27d3,0x27d9,0x27df,1,1,1,1,1,1,0x27e5,0x27eb, -0x27f1,0x27f7,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,0x2ab7,0x2abb,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,0x2abf,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,0xffcc,0xffcc,0xffcc,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,0xfe12,0xffcc,0xffcc,0xffcc,0xffcc, +1,1,1,1,1,1,1,0xfe0e,1,1,1,1,1,1,1,1, +1,1,1,0xfe12,0xfe12,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,0xffcc,0xffcc,0xffcc,1,0xfe02,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,0xffcc, +0xffcc,0xffb8,0xffb8,0xffb8,0xffb8,0xffcc,1,0xfe02,0xfe02,0xfe02,0xfe02,0xfe02,0xfe02,0xfe02,1,1, +1,1,0xffb8,1,1,1,1,1,1,0xffcc,1,1,1,0xffcc,0xffcc,1, +1,1,1,1,1,0xffcc,0xffcc,0xffb8,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffb8, +0xffcc,0xffcc,0xffd4,0xffac,0xffb8,0xff94,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc, 0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc, -0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,1,1,1,1, -1,1,1,1,1,1,0xffb4,0xffc8,0xffd0,0xffbc,0xffc0,0xffc0,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,0x9ac,1, -1,1,1,0x9b0,0x27fd,0x9b4,0x2803,0x9b8,0x2809,0x9bc,0x280f,0x9c0,0x2815,0x9c4,0x281b,0x9c8, -0x2821,0x9cc,0x2827,0x9d0,0x282d,0x9d4,0x2833,0x9d8,0x2839,0x9dc,0x283f,1,0x9e0,0x2845,0x9e4,0x284b, -0x9e8,0x2851,1,1,1,1,1,0x9ec,0x2857,0x285d,0x9f4,0x2863,0x2869,0x9fc,0x286f,0x2875, -0xa04,0x287b,0x2881,0xa0c,0x2887,0x288d,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,0x2893,1,1,1, -1,0xfc10,0xfc10,1,1,0xa14,0x2899,1,1,1,1,1,1,1,0xa18,1, -1,1,1,0xa1c,0x289f,0xa20,0x28a5,0xa24,0x28ab,0xa28,0x28b1,0xa2c,0x28b7,0xa30,0x28bd,0xa34, -0x28c3,0xa38,0x28c9,0xa3c,0x28cf,0xa40,0x28d5,0xa44,0x28db,0xa48,0x28e1,1,0xa4c,0x28e7,0xa50,0x28ed, -0xa54,0x28f3,1,1,1,1,1,0xa58,0x28f9,0x28ff,0xa60,0x2905,0x290b,0xa68,0x2911,0x2917, -0xa70,0x291d,0x2923,0xa78,0x2929,0x292f,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,0xa80,0xa84,0xa88,0xa8c,1,0x2935,1,1,0x293b, -0x2941,0x2947,0x294d,1,1,0xa90,0x2953,1,1,1,1,1,1,1,1,1, +0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffd0,0xffc8,0xffc8,0xffb8,1, +0xffcc,0xffd2,0xffb8,0xffcc,0xffb8,0x1a66,0x1a6c,0x1a72,0x1a78,0x1a7f,0x1a85,0x1a8b,0x1a91,0x1a99,0x1aa3,0x1aaa, +0x1ab0,0x1ab6,0x1abc,0x1ac2,0x1ac8,0x1acf,0x1ad5,0x1ada,0x1ae0,0x1ae8,0x1af2,0x1afc,0x1b06,0x1b0e,0x1b14,0x1b1a, +0x1b20,0x1b29,0x1b33,0x1b3b,0x1b41,0x1b46,0x1b4c,0x1b52,0x1b58,0x1b5e,0x1b64,0x1b6a,0x1b70,0x1b77,0x1b7d,0x1b82, +0x1b88,0x1b8e,0x1b94,0x1b9c,0x1ba6,0x1bae,0x1bb4,0x1bba,0x1bc0,0x1bc6,0x1bcc,0xdd8,0xde2,0x1bd4,0x1bde,0x1be6, +0x1bec,0x1bf2,0x1bf8,0x1bfe,0x1c04,0x1c0a,0x1c10,0x1c17,0x1c1d,0x1c22,0x1c28,0x1c2e,0x1c34,0x1c3a,0x1c40,0x1c46, +0x1c4c,0x1c54,0x1c5e,0x1c68,0x1c72,0x1c7c,0x1c86,0x1c90,0x1c9a,0x1ca3,0x1ca9,0x1caf,0x1cb5,0x1cba,0x1cc0,0xdec, +0xdf6,0x1cc8,0x1cd2,0x1cda,0x1ce0,0x1ce6,0x1cec,0xe00,0xe0a,0x1cf4,0x1cfe,0x1d08,0x1d12,0x1d1c,0x1d26,0x1d2e, +0x1d34,0x1d3a,0x1d40,0x1d46,0x1d4c,0x1d52,0x1d58,0x1d5e,0x1d64,0x1d6a,0x1d70,0x1d76,0x1d7c,0x1d84,0x1d8e,0x1d98, +0x1da2,0x1daa,0x1db0,0x1db7,0x1dbd,0x1dc2,0x1dc8,0x1dce,0x1dd4,0x1dda,0x1de0,0x1de6,0x1dec,0x1df3,0x1df9,0x1dff, +0x1e05,0x1e0b,0x1e11,0x1e16,0x1e1c,0x1e22,0x1e28,0x1e2f,0x1e35,0x1e3b,0x1e41,0x1e46,0x1e4c,0x1e52,0x1e58,1, +0x1e5f,1,1,1,1,0xe14,0xe22,0x1e64,0x1e6a,0x1e72,0x1e7c,0x1e86,0x1e90,0x1e9a,0x1ea4,0x1eae, +0x1eb8,0x1ec2,0x1ecc,0x1ed6,0x1ee0,0x1eea,0x1ef4,0x1efe,0x1f08,0x1f12,0x1f1c,0x1f26,0x1f30,0xe30,0xe3a,0x1f38, +0x1f3e,0x1f44,0x1f4a,0x1f52,0x1f5c,0x1f66,0x1f70,0x1f7a,0x1f84,0x1f8e,0x1f98,0x1fa2,0x1fac,0x1fb4,0x1fba,0x1fc0, +0x1fc6,0xe44,0xe4e,0x1fcc,0x1fd2,0x1fda,0x1fe4,0x1fee,0x1ff8,0x2002,0x200c,0x2016,0x2020,0x202a,0x2034,0x203e, +0x2048,0x2052,0x205c,0x2066,0x2070,0x207a,0x2084,0x208e,0x2098,0x20a0,0x20a6,0x20ac,0x20b2,0x20ba,0x20c4,0x20ce, +0x20d8,0x20e2,0x20ec,0x20f6,0x2100,0x210a,0x2114,0x211c,0x2122,0x2129,0x212f,0x2134,0x213a,0x2140,0x2146,1, +1,1,1,1,1,0xe58,0xe6e,0xe86,0xe94,0xea2,0xeb0,0xebe,0xecc,0xed8,0xeee,0xf06, +0xf14,0xf22,0xf30,0xf3e,0xf4c,0xf58,0xf66,0x214f,0x2159,0x2163,0x216d,1,1,0xf74,0xf82,0x2177, +0x2181,0x218b,0x2195,1,1,0xf90,0xfa6,0xfbe,0xfcc,0xfda,0xfe8,0xff6,0x1004,0x1010,0x1026,0x103e, +0x104c,0x105a,0x1068,0x1076,0x1084,0x1090,0x10a2,0x219f,0x21a9,0x21b3,0x21bd,0x21c7,0x21d1,0x10b4,0x10c6,0x21db, +0x21e5,0x21ef,0x21f9,0x2203,0x220d,0x10d8,0x10e6,0x2217,0x2221,0x222b,0x2235,1,1,0x10f4,0x1102,0x223f, +0x2249,0x2253,0x225d,1,1,0x1110,0x1122,0x2267,0x2271,0x227b,0x2285,0x228f,0x2299,1,0x1134,1, +0x22a3,1,0x22ad,1,0x22b7,0x1146,0x115c,0x1174,0x1182,0x1190,0x119e,0x11ac,0x11ba,0x11c6,0x11dc,0x11f4, +0x1202,0x1210,0x121e,0x122c,0x123a,0x1246,0x3b8e,0x22bf,0x3b96,0x1250,0x3b9e,0x22c5,0x3ba6,0x22cb,0x3bae,0x22d1, +0x3bb6,0x125a,0x3bbe,1,1,0x22d8,0x22e2,0x22f1,0x2301,0x2311,0x2321,0x2331,0x2341,0x234c,0x2356,0x2365, +0x2375,0x2385,0x2395,0x23a5,0x23b5,0x23c0,0x23ca,0x23d9,0x23e9,0x23f9,0x2409,0x2419,0x2429,0x2434,0x243e,0x244d, +0x245d,0x246d,0x247d,0x248d,0x249d,0x24a8,0x24b2,0x24c1,0x24d1,0x24e1,0x24f1,0x2501,0x2511,0x251c,0x2526,0x2535, +0x2545,0x2555,0x2565,0x2575,0x2585,0x258f,0x2595,0x259d,0x25a4,0x25ad,1,0x1264,0x25b7,0x25bf,0x25c5,0x25cb, +0x3bc6,0x25d0,1,0x2aa2,0x8f0,1,0x25d7,0x25df,0x25e6,0x25ef,1,0x126e,0x25f9,0x2601,0x3bce,0x2607, +0x3bd6,0x260c,0x2613,0x2619,0x261f,0x2625,0x262b,0x2633,0x3be0,1,1,0x263b,0x2643,0x264b,0x2651,0x2657, +0x3bea,1,0x265d,0x2663,0x2669,0x266f,0x2675,0x267d,0x3bf4,0x2685,0x268b,0x2691,0x2699,0x26a1,0x26a7,0x26ad, +0x3bfe,0x26b3,0x26b9,0x3c06,0x2aa7,1,1,0x26c1,0x26c8,0x26d1,1,0x1278,0x26db,0x26e3,0x3c0e,0x26e9, +0x3c16,0x26ee,0x2aab,0x8fc,1,0xfa09,0xfa09,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,0xffcc,0xffcc,0xfe02,0xfe02,0xffcc,0xffcc,0xffcc,0xffcc,0xfe02,0xfe02,0xfe02, +0xffcc,0xffcc,1,1,1,1,0xffcc,1,1,1,0xfe02,0xfe02,0xffcc,0xffb8,0xffcc,0xfe02, +0xfe02,0xffb8,0xffb8,0xffb8,0xffb8,0xffcc,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,0x2aae,1,1,1, +0x2ab2,0x3c1e,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,0x908,1,0x90c,1,0x910,1,1,1,1,1, +0x26f5,0x26fb,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,0x2701,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,0x2707,0x270d,0x2713,0x914,1,0x918,1,0x91c,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,0x920,0x2719,1,1,1,0x924,0x271f, +1,0x928,0x2725,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,0x92c,0x272b,0x930,0x2731,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,0x934,1,1,1,0x2737,1,0x938,0x273d,0x93c,1,0x2743,0x940,0x2749,1, +1,1,0x944,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,0x274f,0x948,0x2755,1,0x94c,0x950,1,1,1,1,1, +1,1,0x275b,0x2761,0x2767,0x276d,0x2773,0x954,0x958,0x2779,0x277f,0x95c,0x960,0x2785,0x278b,0x964, +0x968,0x96c,0x970,1,1,0x2791,0x2797,0x974,0x978,0x279d,0x27a3,0x97c,0x980,0x27a9,0x27af,1, +1,1,1,1,1,1,0x984,0x988,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,0x98c,1,1,1,1,1,0x990,0x994,1, +0x998,0x27b5,0x27bb,0x27c1,0x27c7,1,1,0x99c,0x9a0,0x9a4,0x9a8,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,0x27cd,0x27d3,0x27d9,0x27df,1, +1,1,1,1,1,0x27e5,0x27eb,0x27f1,0x27f7,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,0x2ab7,0x2abb,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +0x2abf,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +0xfe12,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc, +0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc, +0xffcc,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,0xffb4,0xffc8,0xffd0,0xffbc,0xffc0, +0xffc0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,0x9ac,1,1,1,1,0x9b0,0x27fd,0x9b4,0x2803,0x9b8,0x2809,0x9bc,0x280f,0x9c0,0x2815, +0x9c4,0x281b,0x9c8,0x2821,0x9cc,0x2827,0x9d0,0x282d,0x9d4,0x2833,0x9d8,0x2839,0x9dc,0x283f,1,0x9e0, +0x2845,0x9e4,0x284b,0x9e8,0x2851,1,1,1,1,1,0x9ec,0x2857,0x285d,0x9f4,0x2863,0x2869, +0x9fc,0x286f,0x2875,0xa04,0x287b,0x2881,0xa0c,0x2887,0x288d,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,0x2893,1,1, +1,1,0xfc10,0xfc10,1,1,0xa14,0x2899,1,1,1,1,1,1,1,0xa18, +1,1,1,1,0xa1c,0x289f,0xa20,0x28a5,0xa24,0x28ab,0xa28,0x28b1,0xa2c,0x28b7,0xa30,0x28bd, +0xa34,0x28c3,0xa38,0x28c9,0xa3c,0x28cf,0xa40,0x28d5,0xa44,0x28db,0xa48,0x28e1,1,0xa4c,0x28e7,0xa50, +0x28ed,0xa54,0x28f3,1,1,1,1,1,0xa58,0x28f9,0x28ff,0xa60,0x2905,0x290b,0xa68,0x2911, +0x2917,0xa70,0x291d,0x2923,0xa78,0x2929,0x292f,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,0xa80,0xa84,0xa88,0xa8c,1,0x2935,1,1, +0x293b,0x2941,0x2947,0x294d,1,1,0xa90,0x2953,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,0xffcc,1,1,1,1,0xffcc,0xffcc,0xffcc,0xffcc, 0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,0xffcc,0xffcc,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,0xffcc,0xffcc,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,0xffcc,0xffcc,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,0xfe12,1,1,1,1,1, +1,1,1,1,1,1,0xfe12,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,0xfe12,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc, -0xffcc,0xffcc,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,0xffb8,0xffb8,0xffb8,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,0xfe12,1,1,1,1, -1,1,1,1,1,1,1,1,0xfe12,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,0xffcc,1,0xffcc,0xffcc,0xffb8,1,1,0xffcc, -0xffcc,1,1,1,1,1,0xffcc,0xffcc,1,0xffcc,1,1,1,1,1,1, +0xfe12,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,0xffcc,0xffcc,0xffcc,0xffcc, +0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,0xfe12,1,1,1,1,1, -1,1,1,1,0x2ac5,0x2ac9,0x2acd,0x2ad1,0x2ad5,0x2ad9,0x2add,0x2ae1,0x2ae1,0x2ae5,0x2ae9,0x2aed, -0x2af1,0x2af5,0x2af9,0x2afd,0x2b01,0x2b05,0x2b09,0x2b0d,0x2b11,0x2b15,0x2b19,0x2b1d,0x2b21,0x2b25,0x2b29,0x2b2d, -0x2b31,0x2b35,0x2b39,0x2b3d,0x2b41,0x2b45,0x2b49,0x2b4d,0x2b51,0x2b55,0x2b59,0x2b5d,0x2b61,0x2b65,0x2b69,0x2b6d, -0x2b71,0x2b75,0x2b79,0x2b7d,0x2b81,0x2b85,0x2b89,0x2b8d,0x2b91,0x2b95,0x2b99,0x2b9d,0x2ba1,0x2ba5,0x2ba9,0x2bad, -0x2bb1,0x2bb5,0x2bb9,0x2bbd,0x2bc1,0x2bc5,0x2bc9,0x2bcd,0x2bd1,0x2bd5,0x2bd9,0x2bdd,0x2be1,0x2be5,0x2be9,0x2bed, -0x2bf1,0x2bf5,0x2bf9,0x2bfd,0x2c01,0x2c05,0x2c09,0x2c0d,0x2c11,0x2c15,0x2c19,0x2c1d,0x2c21,0x2c25,0x2c29,0x2c2d, -0x2b11,0x2c31,0x2c35,0x2c39,0x2c3d,0x2c41,0x2c45,0x2c49,0x2c4d,0x2c51,0x2c55,0x2c59,0x2c5d,0x2c61,0x2c65,0x2c69, -0x2c6d,0x2c71,0x2c75,0x2c79,0x2c7d,0x2c81,0x2c85,0x2c89,0x2c8d,0x2c91,0x2c95,0x2c99,0x2c9d,0x2ca1,0x2ca5,0x2ca9, -0x2cad,0x2cb1,0x2cb5,0x2cb9,0x2cbd,0x2cc1,0x2cc5,0x2cc9,0x2ccd,0x2cd1,0x2cd5,0x2cd9,0x2cdd,0x2ce1,0x2ce5,0x2ce9, -0x2ced,0x2cf1,0x2cf5,0x2cf9,0x2cfd,0x2d01,0x2d05,0x2d09,0x2d0d,0x2d11,0x2d15,0x2d19,0x2d1d,0x2d21,0x2d25,0x2d29, -0x2d2d,0x2d31,0x2d35,0x2d39,0x2d3d,0x2c79,0x2d41,0x2d45,0x2d49,0x2d4d,0x2d51,0x2d55,0x2d59,0x2d5d,0x2c39,0x2d61, -0x2d65,0x2d69,0x2d6d,0x2d71,0x2d75,0x2d79,0x2d7d,0x2d81,0x2d85,0x2d89,0x2d8d,0x2d91,0x2d95,0x2d99,0x2d9d,0x2da1, -0x2da5,0x2da9,0x2dad,0x2b11,0x2db1,0x2db5,0x2db9,0x2dbd,0x2dc1,0x2dc5,0x2dc9,0x2dcd,0x2dd1,0x2dd5,0x2dd9,0x2ddd, -0x2de1,0x2de5,0x2de9,0x2ded,0x2df1,0x2df5,0x2df9,0x2dfd,0x2e01,0x2e05,0x2e09,0x2e0d,0x2e11,0x2e15,0x2e19,0x2c41, -0x2e1d,0x2e21,0x2e25,0x2e29,0x2e2d,0x2e31,0x2e35,0x2e39,0x2e3d,0x2e41,0x2e45,0x2e49,0x2e4d,0x2e51,0x2e55,0x2e59, -0x2e5d,0x2e61,0x2e65,0x2e69,0x2e6d,0x2e71,0x2e75,0x2e79,0x2e7d,0x2e81,0x2e85,0x2e89,0x2e8d,0x2e91,0x2e95,0x2e99, -0x2e9d,0x2ea1,0x2ea5,0x2ea9,0x2ead,0x2eb1,0x2eb5,0x2eb9,0x2ebd,0x2ec1,0x2ec5,0x2ec9,0x2ecd,0x2ed1,0x2ed5,0x2ed9, -0x2edd,0x2ee1,1,1,0x2ee5,1,0x2ee9,1,1,0x2eed,0x2ef1,0x2ef5,0x2ef9,0x2efd,0x2f01,0x2f05, -0x2f09,0x2f0d,0x2f11,1,0x2f15,1,0x2f19,1,1,0x2f1d,0x2f21,1,1,1,0x2f25,0x2f29, -0x2f2d,0x2f31,0x2f35,0x2f39,0x2f3d,0x2f41,0x2f45,0x2f49,0x2f4d,0x2f51,0x2f55,0x2f59,0x2f5d,0x2f61,0x2f65,0x2f69, -0x2f6d,0x2f71,0x2f75,0x2f79,0x2f7d,0x2f81,0x2f85,0x2f89,0x2f8d,0x2f91,0x2f95,0x2f99,0x2f9d,0x2fa1,0x2fa5,0x2fa9, -0x2fad,0x2fb1,0x2fb5,0x2fb9,0x2fbd,0x2fc1,0x2fc5,0x2fc9,0x2fcd,0x2fd1,0x2fd5,0x2d15,0x2fd9,0x2fdd,0x2fe1,0x2fe5, -0x2fe9,0x2fed,0x2fed,0x2ff1,0x2ff5,0x2ff9,0x2ffd,0x3001,0x3005,0x3009,0x300d,0x2f1d,0x3011,0x3015,0x3019,0x301d, -0x3021,0x3027,1,1,0x302b,0x302f,0x3033,0x3037,0x303b,0x303f,0x3043,0x3047,0x2f55,0x304b,0x304f,0x3053, -0x2ee5,0x3057,0x305b,0x305f,0x3063,0x3067,0x306b,0x306f,0x3073,0x3077,0x307b,0x307f,0x3083,0x2f79,0x3087,0x2f7d, -0x308b,0x308f,0x3093,0x3097,0x309b,0x2ee9,0x2b65,0x309f,0x30a3,0x30a7,0x2c7d,0x2dd9,0x30ab,0x30af,0x2f99,0x30b3, -0x2f9d,0x30b7,0x30bb,0x30bf,0x2ef1,0x30c3,0x30c7,0x30cb,0x30cf,0x30d3,0x2ef5,0x30d7,0x30db,0x30df,0x30e3,0x30e7, -0x30eb,0x2fd5,0x30ef,0x30f3,0x2d15,0x30f7,0x2fe5,0x30fb,0x30ff,0x3103,0x3107,0x310b,0x2ff9,0x310f,0x2f19,0x3113, -0x2ffd,0x2c31,0x3117,0x3001,0x311b,0x3009,0x311f,0x3123,0x3127,0x312b,0x312f,0x3011,0x2f09,0x3133,0x3015,0x3137, -0x3019,0x313b,0x2ae1,0x313f,0x3145,0x314b,0x3151,0x3155,0x3159,0x315d,0x3163,0x3169,0x316f,0x3173,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,0x3176,0xfe34,0x317c, -1,1,1,1,1,1,1,1,1,1,0x3182,0x3188,0x3190,0x319a,0x31a2,0x31a8, -0x31ae,0x31b4,0x31ba,0x31c0,0x31c6,0x31cc,0x31d2,1,0x31d8,0x31de,0x31e4,0x31ea,0x31f0,1,0x31f6,1, -0x31fc,0x3202,1,0x3208,0x320e,1,0x3214,0x321a,0x3220,0x3226,0x322c,0x3232,0x3238,0x323e,0x3244,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,0xffcc,0xffcc, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,0xffb8,1,1, -0xffb8,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,0xffb8,1,0xffcc,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -0xffcc,0xfe02,0xffb8,1,1,1,1,0xfe12,1,1,1,1,1,0xffcc,0xffb8,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,0xffcc,0xffcc,0xffcc,0xffcc,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,0xffb8,0xffb8,0xffcc,0xffcc,0xffcc,0xffb8,0xffcc,0xffb8,0xffb8,0xffb8, -0xffb8,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,0xa94,0x2959,0xa9a, -0x2963,1,1,1,1,1,1,1,1,0xaa0,1,1,1,1,1,0x296d, -1,1,1,1,1,1,1,1,1,1,1,1,1,0xfe12,0xfc0e,1, -1,1,1,1,0xffcc,0xffcc,0xffcc,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,0xfc00,1,1,1,1,1,1,0x2977,0x2981, -1,0xaa6,0xaac,0xfe12,0xfe12,1,1,1,1,1,1,1,1,1,1,1, -0xfe12,1,1,1,1,1,1,1,1,1,0xfe0e,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,0xfe12,0xfe0e,1,1,1,1,1,1,1,1,1,1,0xfe0e,0xfe12,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,0xfe0e,0xfe0e,1,0xfc00,1, -1,1,1,1,1,1,1,0xab2,1,1,1,0x298b,0x2995,0xfe12,1,1, -1,1,1,1,1,1,1,0xfc00,1,1,1,1,1,1,1,1, -1,1,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,1,1,1,0xffcc,0xffcc,0xffcc,0xffcc, -0xffcc,1,1,1,1,1,1,1,1,1,1,1,1,1,0xfe12,1, -1,1,0xfe0e,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,0xffcc,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,0xfc00,1,1,1, -1,1,1,1,1,0xabe,0xfc00,0x299f,0x29a9,0xfc00,0x29b3,1,1,1,0xfe12,0xfe0e, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0xfc00, -1,1,1,1,1,1,1,1,0xad0,0xad6,0x29bd,0x29c7,1,1,1,0xfe12, -0xfe0e,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,0xfe12,0xfe0e,1,1,1,1,1,1,1,1,1,1,1,0xfe12, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,0xfe12,0xfe0e,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,0xfe12,1,1,1,1,1,1,1,1,0xfe0e,1, -0xfe12,0xfe12,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,0xffb8,0xffb8,0xffb8,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0xfe12, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -0xfe02,0xfe02,0xfe02,0xfe02,0xfe02,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,0xfe02,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,0x324a,0x3254, -0x3268,0x3280,0x3298,0x32b0,0x32c8,0xffb0,0xffb0,0xfe02,0xfe02,0xfe02,1,1,1,0xffc4,0xffb0,0xffb0, -0xffb0,0xffb0,0xffb0,1,1,1,1,1,1,1,1,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8, -0xffb8,0xffb8,0xffb8,1,1,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffb8,0xffb8,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0xffcc, +1,0xffcc,0xffcc,0xffb8,1,1,0xffcc,0xffcc,1,1,1,1,1,0xffcc,0xffcc,1, +0xffcc,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,0xffcc,0xffcc,0xffcc,0xffcc,1,1,1,1,1,1,1,1,1,1, -1,1,1,0x32d6,0x32e0,0x32f4,0x330c,0x3324,0x333c,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,0xffcc,0xffcc,0xffcc,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,1, -0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc, -0xffcc,1,1,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,1,0xffcc,0xffcc,1,0xffcc,0xffcc, -0xffcc,0xffcc,0xffcc,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,1, -1,1,1,1,1,1,1,1,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xfe0e,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,0x334b,0x334f,0x3353,0x3357,0x335d,0x2f3d,0x3361,0x3365,0x3369,0x336d,0x2f41,0x3371, -0x3375,0x3379,0x2f45,0x337f,0x3383,0x3387,0x338b,0x3391,0x3395,0x3399,0x339d,0x33a3,0x33a7,0x33ab,0x33af,0x302f, -0x33b3,0x33b9,0x33bd,0x33c1,0x33c5,0x33c9,0x33cd,0x33d1,0x33d5,0x3043,0x2f49,0x2f4d,0x3047,0x33d9,0x33dd,0x2c49, -0x33e1,0x2f51,0x33e5,0x33e9,0x33ed,0x33f1,0x33f1,0x33f1,0x33f5,0x33fb,0x33ff,0x3403,0x3407,0x340d,0x3411,0x3415, -0x3419,0x341d,0x3421,0x3425,0x3429,0x342d,0x3431,0x3435,0x3439,0x343d,0x343d,0x304f,0x3441,0x3445,0x3449,0x344d, -0x2f59,0x3451,0x3455,0x3459,0x2ead,0x345d,0x3461,0x3465,0x3469,0x346d,0x3471,0x3475,0x3479,0x347d,0x3483,0x3487, -0x348b,0x348f,0x3493,0x3497,0x349b,0x34a1,0x34a7,0x34ab,0x34af,0x34b3,0x34b7,0x34bb,0x34bf,0x34c3,0x34c7,0x34c7, -0x34cb,0x34d1,0x34d5,0x2c39,0x34d9,0x34dd,0x34e3,0x34e7,0x34eb,0x34ef,0x34f3,0x34f7,0x2f6d,0x34fb,0x34ff,0x3503, -0x3509,0x350d,0x3513,0x3517,0x351b,0x351f,0x3523,0x3527,0x352b,0x352f,0x3533,0x3537,0x353b,0x353f,0x3545,0x3549, -0x354d,0x3551,0x2b61,0x3555,0x355b,0x355f,0x355f,0x3565,0x3569,0x3569,0x356d,0x3571,0x3577,0x357d,0x3581,0x3585, -0x3589,0x358d,0x3591,0x3595,0x3599,0x359d,0x35a1,0x2f71,0x35a5,0x35ab,0x35af,0x35b3,0x307f,0x35b3,0x35b7,0x2f79, -0x35bb,0x35bf,0x35c3,0x35c7,0x2f7d,0x2af5,0x35cb,0x35cf,0x35d3,0x35d7,0x35db,0x35df,0x35e3,0x35e9,0x35ed,0x35f1, -0x35f5,0x35f9,0x35fd,0x3603,0x3607,0x360b,0x360f,0x3613,0x3617,0x361b,0x361f,0x3623,0x2f81,0x3627,0x362b,0x3631, -0x3635,0x3639,0x363d,0x2f89,0x3641,0x3645,0x3649,0x364d,0x3651,0x3655,0x3659,0x365d,0x2b65,0x309f,0x3661,0x3665, -0x3669,0x366d,0x3673,0x3677,0x367b,0x367f,0x2f8d,0x3683,0x3689,0x368d,0x3691,0x3151,0x3695,0x3699,0x369d,0x36a1, -0x36a5,0x36ab,0x36af,0x36b3,0x36b7,0x36bd,0x36c1,0x36c5,0x36c9,0x2c7d,0x36cd,0x36d1,0x36d7,0x36dd,0x36e3,0x36e7, -0x36ed,0x36f1,0x36f5,0x36f9,0x36fd,0x2f91,0x2dd9,0x3701,0x3705,0x3709,0x370d,0x3713,0x3717,0x371b,0x371f,0x30af, -0x3723,0x3727,0x372d,0x3731,0x3735,0x373b,0x3741,0x3745,0x30b3,0x3749,0x374d,0x3751,0x3755,0x3759,0x375d,0x3761, -0x3767,0x376b,0x3771,0x3775,0x377b,0x30bb,0x377f,0x3783,0x3789,0x378d,0x3791,0x3797,0x379d,0x37a1,0x37a5,0x37a9, -0x37ad,0x37ad,0x37b1,0x37b5,0x30c3,0x37b9,0x37bd,0x37c1,0x37c5,0x37c9,0x37cf,0x37d3,0x2c45,0x37d9,0x37df,0x37e3, -0x37e9,0x37ef,0x37f5,0x37f9,0x30db,0x37fd,0x3803,0x3809,0x380f,0x3815,0x3819,0x3819,0x30df,0x3159,0x381d,0x3821, -0x3825,0x3829,0x382f,0x2bad,0x30e7,0x3833,0x3837,0x2fbd,0x383d,0x3843,0x2f05,0x3849,0x384d,0x2fcd,0x3851,0x3855, -0x3859,0x385f,0x385f,0x3865,0x3869,0x386d,0x3873,0x3877,0x387b,0x387f,0x3885,0x3889,0x388d,0x3891,0x3895,0x3899, -0x389f,0x38a3,0x38a7,0x38ab,0x38af,0x38b3,0x38b7,0x38bd,0x38c3,0x38c7,0x38cd,0x38d1,0x38d7,0x38db,0x2fe5,0x38df, -0x38e5,0x38eb,0x38ef,0x38f5,0x38f9,0x38ff,0x3903,0x3907,0x390b,0x390f,0x3913,0x3917,0x391d,0x3923,0x3929,0x3565, -0x392f,0x3933,0x3937,0x393b,0x393f,0x3943,0x3947,0x394b,0x394f,0x3953,0x3957,0x395b,0x2c8d,0x3961,0x3965,0x3969, -0x396d,0x3971,0x3975,0x2ff1,0x3979,0x397d,0x3981,0x3985,0x3989,0x398f,0x3995,0x399b,0x399f,0x39a3,0x39a7,0x39ab, -0x39b1,0x39b5,0x39bb,0x39bf,0x39c3,0x39c9,0x39cf,0x39d3,0x2b99,0x39d7,0x39db,0x39df,0x39e3,0x39e7,0x39eb,0x3103, -0x39ef,0x39f3,0x39f7,0x39fb,0x39ff,0x3a03,0x3a07,0x3a0b,0x3a0f,0x3a13,0x3a19,0x3a1d,0x3a21,0x3a25,0x3a29,0x3a2d, -0x3a33,0x3a39,0x3a3d,0x3a41,0x3117,0x311b,0x3a45,0x3a49,0x3a4f,0x3a53,0x3a57,0x3a5b,0x3a5f,0x3a65,0x3a6b,0x3a6f, -0x3a73,0x3a77,0x3a7d,0x311f,0x3a81,0x3a87,0x3a8d,0x3a91,0x3a95,0x3a99,0x3a9f,0x3aa3,0x3aa7,0x3aab,0x3aaf,0x3ab3, -0x3ab7,0x3abb,0x3ac1,0x3ac5,0x3ac9,0x3acd,0x3ad3,0x3ad7,0x3adb,0x3adf,0x3ae3,0x3ae9,0x3aef,0x3af3,0x3af7,0x3afb, -0x3b01,0x3b05,0x3137,0x3137,0x3b0b,0x3b0f,0x3b15,0x3b19,0x3b1d,0x3b21,0x3b25,0x3b29,0x3b2d,0x3b31,0x313b,0x3b37, -0x3b3b,0x3b3f,0x3b43,0x3b47,0x3b4b,0x3b51,0x3b55,0x3b5b,0x3b61,0x3b67,0x3b6b,0x3b6f,0x3b73,0x3b77,0x3b7b,0x3b7f, -0x3b83,0x3b87,1,1,2,2,2,2,2,2,2,2,2,2,2,2, -2,2,2,2,2,2,2,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00, -0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,1,1,1,1,1,1, -1,1,1,1,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00, -0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,0xadc,0x1283,0x1283,0x1283, +1,1,1,1,1,0xfe12,1,1,1,1,1,1,1,1,1,0xadc, 0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283, -0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0xadc,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283, +0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0xadc,0x1283,0x1283,0x1283,0x1283, 0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283, +0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0xadc,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283, +0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283, +0x1283,0x1283,0x1283,0xadc,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283, +0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,0x1283,0x1283,0x1283,0x1283, -0xadc,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283, -0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x3c54,1,0x3c54,0x3c54, -0x3c54,0x3c54,0x3c54,0x3c54,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,0x3c54,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,0x3c54,1,1,1,1,0x3c54,1,1,1, -0x3c54,1,0x3c54,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,0x3c56,1,0x3c56,0x3c56,0x3c56, +0x3c56,0x3c56,0x3c56,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,0x3c56,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,0x3c56,1,1,1,1,0x3c56, +1,1,1,0x3c56,1,0x3c56,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,0x3b87,1,1,1,1,1 +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,0x3b87,1,0x2ac5,0x2ac9,0x2acd,0x2ad1,0x2ad5,0x2ad9,0x2add,0x2ae1,0x2ae1,0x2ae5, +0x2ae9,0x2aed,0x2af1,0x2af5,0x2af9,0x2afd,0x2b01,0x2b05,0x2b09,0x2b0d,0x2b11,0x2b15,0x2b19,0x2b1d,0x2b21,0x2b25, +0x2b29,0x2b2d,0x2b31,0x2b35,0x2b39,0x2b3d,0x2b41,0x2b45,0x2b49,0x2b4d,0x2b51,0x2b55,0x2b59,0x2b5d,0x2b61,0x2b65, +0x2b69,0x2b6d,0x2b71,0x2b75,0x2b79,0x2b7d,0x2b81,0x2b85,0x2b89,0x2b8d,0x2b91,0x2b95,0x2b99,0x2b9d,0x2ba1,0x2ba5, +0x2ba9,0x2bad,0x2bb1,0x2bb5,0x2bb9,0x2bbd,0x2bc1,0x2bc5,0x2bc9,0x2bcd,0x2bd1,0x2bd5,0x2bd9,0x2bdd,0x2be1,0x2be5, +0x2be9,0x2bed,0x2bf1,0x2bf5,0x2bf9,0x2bfd,0x2c01,0x2c05,0x2c09,0x2c0d,0x2c11,0x2c15,0x2c19,0x2c1d,0x2c21,0x2c25, +0x2c29,0x2c2d,0x2b11,0x2c31,0x2c35,0x2c39,0x2c3d,0x2c41,0x2c45,0x2c49,0x2c4d,0x2c51,0x2c55,0x2c59,0x2c5d,0x2c61, +0x2c65,0x2c69,0x2c6d,0x2c71,0x2c75,0x2c79,0x2c7d,0x2c81,0x2c85,0x2c89,0x2c8d,0x2c91,0x2c95,0x2c99,0x2c9d,0x2ca1, +0x2ca5,0x2ca9,0x2cad,0x2cb1,0x2cb5,0x2cb9,0x2cbd,0x2cc1,0x2cc5,0x2cc9,0x2ccd,0x2cd1,0x2cd5,0x2cd9,0x2cdd,0x2ce1, +0x2ce5,0x2ce9,0x2ced,0x2cf1,0x2cf5,0x2cf9,0x2cfd,0x2d01,0x2d05,0x2d09,0x2d0d,0x2d11,0x2d15,0x2d19,0x2d1d,0x2d21, +0x2d25,0x2d29,0x2d2d,0x2d31,0x2d35,0x2d39,0x2d3d,0x2c79,0x2d41,0x2d45,0x2d49,0x2d4d,0x2d51,0x2d55,0x2d59,0x2d5d, +0x2c39,0x2d61,0x2d65,0x2d69,0x2d6d,0x2d71,0x2d75,0x2d79,0x2d7d,0x2d81,0x2d85,0x2d89,0x2d8d,0x2d91,0x2d95,0x2d99, +0x2d9d,0x2da1,0x2da5,0x2da9,0x2dad,0x2b11,0x2db1,0x2db5,0x2db9,0x2dbd,0x2dc1,0x2dc5,0x2dc9,0x2dcd,0x2dd1,0x2dd5, +0x2dd9,0x2ddd,0x2de1,0x2de5,0x2de9,0x2ded,0x2df1,0x2df5,0x2df9,0x2dfd,0x2e01,0x2e05,0x2e09,0x2e0d,0x2e11,0x2e15, +0x2e19,0x2c41,0x2e1d,0x2e21,0x2e25,0x2e29,0x2e2d,0x2e31,0x2e35,0x2e39,0x2e3d,0x2e41,0x2e45,0x2e49,0x2e4d,0x2e51, +0x2e55,0x2e59,0x2e5d,0x2e61,0x2e65,0x2e69,0x2e6d,0x2e71,0x2e75,0x2e79,0x2e7d,0x2e81,0x2e85,0x2e89,0x2e8d,0x2e91, +0x2e95,0x2e99,0x2e9d,0x2ea1,0x2ea5,0x2ea9,0x2ead,0x2eb1,0x2eb5,0x2eb9,0x2ebd,0x2ec1,0x2ec5,0x2ec9,0x2ecd,0x2ed1, +0x2ed5,0x2ed9,0x2edd,0x2ee1,1,1,0x2ee5,1,0x2ee9,1,1,0x2eed,0x2ef1,0x2ef5,0x2ef9,0x2efd, +0x2f01,0x2f05,0x2f09,0x2f0d,0x2f11,1,0x2f15,1,0x2f19,1,1,0x2f1d,0x2f21,1,1,1, +0x2f25,0x2f29,0x2f2d,0x2f31,0x2f35,0x2f39,0x2f3d,0x2f41,0x2f45,0x2f49,0x2f4d,0x2f51,0x2f55,0x2f59,0x2f5d,0x2f61, +0x2f65,0x2f69,0x2f6d,0x2f71,0x2f75,0x2f79,0x2f7d,0x2f81,0x2f85,0x2f89,0x2f8d,0x2f91,0x2f95,0x2f99,0x2f9d,0x2fa1, +0x2fa5,0x2fa9,0x2fad,0x2fb1,0x2fb5,0x2fb9,0x2fbd,0x2fc1,0x2fc5,0x2fc9,0x2fcd,0x2fd1,0x2fd5,0x2d15,0x2fd9,0x2fdd, +0x2fe1,0x2fe5,0x2fe9,0x2fed,0x2fed,0x2ff1,0x2ff5,0x2ff9,0x2ffd,0x3001,0x3005,0x3009,0x300d,0x2f1d,0x3011,0x3015, +0x3019,0x301d,0x3021,0x3027,1,1,0x302b,0x302f,0x3033,0x3037,0x303b,0x303f,0x3043,0x3047,0x2f55,0x304b, +0x304f,0x3053,0x2ee5,0x3057,0x305b,0x305f,0x3063,0x3067,0x306b,0x306f,0x3073,0x3077,0x307b,0x307f,0x3083,0x2f79, +0x3087,0x2f7d,0x308b,0x308f,0x3093,0x3097,0x309b,0x2ee9,0x2b65,0x309f,0x30a3,0x30a7,0x2c7d,0x2dd9,0x30ab,0x30af, +0x2f99,0x30b3,0x2f9d,0x30b7,0x30bb,0x30bf,0x2ef1,0x30c3,0x30c7,0x30cb,0x30cf,0x30d3,0x2ef5,0x30d7,0x30db,0x30df, +0x30e3,0x30e7,0x30eb,0x2fd5,0x30ef,0x30f3,0x2d15,0x30f7,0x2fe5,0x30fb,0x30ff,0x3103,0x3107,0x310b,0x2ff9,0x310f, +0x2f19,0x3113,0x2ffd,0x2c31,0x3117,0x3001,0x311b,0x3009,0x311f,0x3123,0x3127,0x312b,0x312f,0x3011,0x2f09,0x3133, +0x3015,0x3137,0x3019,0x313b,0x2ae1,0x313f,0x3145,0x314b,0x3151,0x3155,0x3159,0x315d,0x3163,0x3169,0x316f,0x3173, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,0x3176,0xfe34,0x317c,1,1,1,1,1,1,1, +1,1,1,0x3182,0x3188,0x3190,0x319a,0x31a2,0x31a8,0x31ae,0x31b4,0x31ba,0x31c0,0x31c6,0x31cc,0x31d2, +1,0x31d8,0x31de,0x31e4,0x31ea,0x31f0,1,0x31f6,1,0x31fc,0x3202,1,0x3208,0x320e,1,0x3214, +0x321a,0x3220,0x3226,0x322c,0x3232,0x3238,0x323e,0x3244,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc, +0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,0xffcc,0xffcc,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,0xffb8,1,0xffcc,1,1,1,1, +1,1,1,1,0xffcc,0xfe02,0xffb8,1,1,1,1,0xfe12,1,1,1,1, +0xffcc,0xffcc,0xffcc,0xffcc,1,1,1,1,1,1,1,1,0xffb8,0xffb8,0xffcc,0xffcc, +0xffcc,0xffb8,0xffcc,0xffb8,0xffb8,0xffb8,1,1,1,1,1,1,1,1,1,0xa94, +0x2959,0xa9a,0x2963,1,1,1,1,1,0xaa0,1,1,1,1,1,0x296d,1, +1,1,1,1,1,1,1,1,0xfe12,0xfc0e,1,1,1,1,1,1, +1,0xfc00,1,1,1,1,1,1,0x2977,0x2981,1,0xaa6,0xaac,0xfe12,0xfe12,1, +1,1,1,1,1,1,1,1,1,1,0xfe12,1,1,1,1,1, +1,1,1,1,0xfe0e,1,1,1,1,1,0xfe12,0xfe0e,1,1,1,1, +1,1,1,1,1,0xfe0e,0xfe12,1,1,1,1,1,1,1,1,1, +1,1,0xfe0e,0xfe0e,1,0xfc00,1,1,1,1,1,1,1,0xab2,1,1, +1,0x298b,0x2995,0xfe12,1,1,1,1,1,1,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc, +0xffcc,1,1,1,0xfe12,1,1,1,0xfe0e,1,1,1,1,1,1,1, +1,1,0xfc00,1,1,1,1,1,1,1,1,0xabe,0xfc00,0x299f,0x29a9,0xfc00, +0x29b3,1,1,0xfe12,0xfe0e,1,1,1,1,1,1,1,1,1,1,1, +1,0xad0,0xad6,0x29bd,0x29c7,1,1,1,0xfe12,0xfe0e,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,0xfe12,0xfe0e,1,1,1,1,1, +1,1,1,0xfe02,0xfe02,0xfe02,0xfe02,0xfe02,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,0xfe02,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,0x324a,0x3254,0x3268,0x3280,0x3298,0x32b0,0x32c8,0xffb0,0xffb0,0xfe02,0xfe02, +0xfe02,1,1,1,0xffc4,0xffb0,0xffb0,0xffb0,1,1,1,1,1,1,1,1, +0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,1,1,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffb8,0xffb8,1,1, +1,1,1,1,1,1,1,1,0xffcc,0xffcc,0xffcc,0xffcc,1,1,1,1, +1,1,1,1,1,1,1,0x32d6,0x32e0,0x32f4,0x330c,0x3324,0x333c,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,0xffcc,0xffcc,0xffcc,0xffcc, +0xffcc,0xffcc,0xffcc,1,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,1,1,0xffcc, +0xffcc,0xffcc,0xffcc,0xffcc,1,0xffcc,0xffcc,1,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,1,1,1, +1,1,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,1,1,1,1,1,1,1, +1,1,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xfe0e,1,1,1,1,1,0x334b,0x334f, +0x3353,0x3357,0x335d,0x2f3d,0x3361,0x3365,0x3369,0x336d,0x2f41,0x3371,0x3375,0x3379,0x2f45,0x337f,0x3383,0x3387, +0x338b,0x3391,0x3395,0x3399,0x339d,0x33a3,0x33a7,0x33ab,0x33af,0x302f,0x33b3,0x33b9,0x33bd,0x33c1,0x33c5,0x33c9, +0x33cd,0x33d1,0x33d5,0x3043,0x2f49,0x2f4d,0x3047,0x33d9,0x33dd,0x2c49,0x33e1,0x2f51,0x33e5,0x33e9,0x33ed,0x33f1, +0x33f1,0x33f1,0x33f5,0x33fb,0x33ff,0x3403,0x3407,0x340d,0x3411,0x3415,0x3419,0x341d,0x3421,0x3425,0x3429,0x342d, +0x3431,0x3435,0x3439,0x343d,0x343d,0x304f,0x3441,0x3445,0x3449,0x344d,0x2f59,0x3451,0x3455,0x3459,0x2ead,0x345d, +0x3461,0x3465,0x3469,0x346d,0x3471,0x3475,0x3479,0x347d,0x3483,0x3487,0x348b,0x348f,0x3493,0x3497,0x349b,0x34a1, +0x34a7,0x34ab,0x34af,0x34b3,0x34b7,0x34bb,0x34bf,0x34c3,0x34c7,0x34c7,0x34cb,0x34d1,0x34d5,0x2c39,0x34d9,0x34dd, +0x34e3,0x34e7,0x34eb,0x34ef,0x34f3,0x34f7,0x2f6d,0x34fb,0x34ff,0x3503,0x3509,0x350d,0x3513,0x3517,0x351b,0x351f, +0x3523,0x3527,0x352b,0x352f,0x3533,0x3537,0x353b,0x353f,0x3545,0x3549,0x354d,0x3551,0x2b61,0x3555,0x355b,0x355f, +0x355f,0x3565,0x3569,0x3569,0x356d,0x3571,0x3577,0x357d,0x3581,0x3585,0x3589,0x358d,0x3591,0x3595,0x3599,0x359d, +0x35a1,0x2f71,0x35a5,0x35ab,0x35af,0x35b3,0x307f,0x35b3,0x35b7,0x2f79,0x35bb,0x35bf,0x35c3,0x35c7,0x2f7d,0x2af5, +0x35cb,0x35cf,0x35d3,0x35d7,0x35db,0x35df,0x35e3,0x35e9,0x35ed,0x35f1,0x35f5,0x35f9,0x35fd,0x3603,0x3607,0x360b, +0x360f,0x3613,0x3617,0x361b,0x361f,0x3623,0x2f81,0x3627,0x362b,0x3631,0x3635,0x3639,0x363d,0x2f89,0x3641,0x3645, +0x3649,0x364d,0x3651,0x3655,0x3659,0x365d,0x2b65,0x309f,0x3661,0x3665,0x3669,0x366d,0x3673,0x3677,0x367b,0x367f, +0x2f8d,0x3683,0x3689,0x368d,0x3691,0x3151,0x3695,0x3699,0x369d,0x36a1,0x36a5,0x36ab,0x36af,0x36b3,0x36b7,0x36bd, +0x36c1,0x36c5,0x36c9,0x2c7d,0x36cd,0x36d1,0x36d7,0x36dd,0x36e3,0x36e7,0x36ed,0x36f1,0x36f5,0x36f9,0x36fd,0x2f91, +0x2dd9,0x3701,0x3705,0x3709,0x370d,0x3713,0x3717,0x371b,0x371f,0x30af,0x3723,0x3727,0x372d,0x3731,0x3735,0x373b, +0x3741,0x3745,0x30b3,0x3749,0x374d,0x3751,0x3755,0x3759,0x375d,0x3761,0x3767,0x376b,0x3771,0x3775,0x377b,0x30bb, +0x377f,0x3783,0x3789,0x378d,0x3791,0x3797,0x379d,0x37a1,0x37a5,0x37a9,0x37ad,0x37ad,0x37b1,0x37b5,0x30c3,0x37b9, +0x37bd,0x37c1,0x37c5,0x37c9,0x37cf,0x37d3,0x2c45,0x37d9,0x37df,0x37e3,0x37e9,0x37ef,0x37f5,0x37f9,0x30db,0x37fd, +0x3803,0x3809,0x380f,0x3815,0x3819,0x3819,0x30df,0x3159,0x381d,0x3821,0x3825,0x3829,0x382f,0x2bad,0x30e7,0x3833, +0x3837,0x2fbd,0x383d,0x3843,0x2f05,0x3849,0x384d,0x2fcd,0x3851,0x3855,0x3859,0x385f,0x385f,0x3865,0x3869,0x386d, +0x3873,0x3877,0x387b,0x387f,0x3885,0x3889,0x388d,0x3891,0x3895,0x3899,0x389f,0x38a3,0x38a7,0x38ab,0x38af,0x38b3, +0x38b7,0x38bd,0x38c3,0x38c7,0x38cd,0x38d1,0x38d7,0x38db,0x2fe5,0x38df,0x38e5,0x38eb,0x38ef,0x38f5,0x38f9,0x38ff, +0x3903,0x3907,0x390b,0x390f,0x3913,0x3917,0x391d,0x3923,0x3929,0x3565,0x392f,0x3933,0x3937,0x393b,0x393f,0x3943, +0x3947,0x394b,0x394f,0x3953,0x3957,0x395b,0x2c8d,0x3961,0x3965,0x3969,0x396d,0x3971,0x3975,0x2ff1,0x3979,0x397d, +0x3981,0x3985,0x3989,0x398f,0x3995,0x399b,0x399f,0x39a3,0x39a7,0x39ab,0x39b1,0x39b5,0x39bb,0x39bf,0x39c3,0x39c9, +0x39cf,0x39d3,0x2b99,0x39d7,0x39db,0x39df,0x39e3,0x39e7,0x39eb,0x3103,0x39ef,0x39f3,0x39f7,0x39fb,0x39ff,0x3a03, +0x3a07,0x3a0b,0x3a0f,0x3a13,0x3a19,0x3a1d,0x3a21,0x3a25,0x3a29,0x3a2d,0x3a33,0x3a39,0x3a3d,0x3a41,0x3117,0x311b, +0x3a45,0x3a49,0x3a4f,0x3a53,0x3a57,0x3a5b,0x3a5f,0x3a65,0x3a6b,0x3a6f,0x3a73,0x3a77,0x3a7d,0x311f,0x3a81,0x3a87, +0x3a8d,0x3a91,0x3a95,0x3a99,0x3a9f,0x3aa3,0x3aa7,0x3aab,0x3aaf,0x3ab3,0x3ab7,0x3abb,0x3ac1,0x3ac5,0x3ac9,0x3acd, +0x3ad3,0x3ad7,0x3adb,0x3adf,0x3ae3,0x3ae9,0x3aef,0x3af3,0x3af7,0x3afb,0x3b01,0x3b05,0x3137,0x3137,0x3b0b,0x3b0f, +0x3b15,0x3b19,0x3b1d,0x3b21,0x3b25,0x3b29,0x3b2d,0x3b31,0x313b,0x3b37,0x3b3b,0x3b3f,0x3b43,0x3b47,0x3b4b,0x3b51, +0x3b55,0x3b5b,0x3b61,0x3b67,0x3b6b,0x3b6f,0x3b73,0x3b77,0x3b7b,0x3b7f,0x3b83,0x3b87,1,1 +}; + +static const UCPTrie norm2_nfc_data_trie={ + norm2_nfc_data_trieIndex, + { norm2_nfc_data_trieData }, + 1690, 7822, + 0x2fc00, 0x30, + 0, 0, + 0, 0, + 0xc4, 0x226, + 0x1, }; static const uint16_t norm2_nfc_data_extraData[7724]={ @@ -1151,19 +1136,4 @@ static const uint8_t norm2_nfc_data_smallFCD[256]={ 0,0,0,0,0,0,0,0,0,0,0,7,0,0,2,0 }; -static const UTrie2 norm2_nfc_data_trie={ - norm2_nfc_data_trieIndex, - norm2_nfc_data_trieIndex+2728, - NULL, - 2728, - 7248, - 0x188, - 0xb24, - 0x1, - 0x1, - 0x30000, - 0x26f4, - NULL, 0, FALSE, FALSE, 0, NULL -}; - #endif // INCLUDED_FROM_NORMALIZER2_CPP diff --git a/icu4c/source/common/normalizer2.cpp b/icu4c/source/common/normalizer2.cpp index 0f12dfcff42..6be7e0b21a2 100644 --- a/icu4c/source/common/normalizer2.cpp +++ b/icu4c/source/common/normalizer2.cpp @@ -34,9 +34,11 @@ using icu::Normalizer2Impl; +#if NORM2_HARDCODE_NFC_DATA // NFC/NFD data machine-generated by gennorm2 --csource #define INCLUDED_FROM_NORMALIZER2_CPP #include "norm2_nfc_data.h" +#endif U_NAMESPACE_BEGIN @@ -176,6 +178,36 @@ FCDNormalizer2::~FCDNormalizer2() {} // instance cache ---------------------------------------------------------- *** +U_CDECL_BEGIN +static UBool U_CALLCONV uprv_normalizer2_cleanup(); +U_CDECL_END + +static Normalizer2 *noopSingleton; +static icu::UInitOnce noopInitOnce = U_INITONCE_INITIALIZER; + +static void U_CALLCONV initNoopSingleton(UErrorCode &errorCode) { + if(U_FAILURE(errorCode)) { + return; + } + noopSingleton=new NoopNormalizer2; + if(noopSingleton==NULL) { + errorCode=U_MEMORY_ALLOCATION_ERROR; + return; + } + ucln_common_registerCleanup(UCLN_COMMON_NORMALIZER2, uprv_normalizer2_cleanup); +} + +const Normalizer2 *Normalizer2Factory::getNoopInstance(UErrorCode &errorCode) { + if(U_FAILURE(errorCode)) { return NULL; } + umtx_initOnce(noopInitOnce, &initNoopSingleton, errorCode); + return noopSingleton; +} + +const Normalizer2Impl * +Normalizer2Factory::getImpl(const Normalizer2 *norm2) { + return &((Normalizer2WithImpl *)norm2)->impl; +} + Norm2AllModes::~Norm2AllModes() { delete impl; } @@ -195,6 +227,7 @@ Norm2AllModes::createInstance(Normalizer2Impl *impl, UErrorCode &errorCode) { return allModes; } +#if NORM2_HARDCODE_NFC_DATA Norm2AllModes * Norm2AllModes::createNFCInstance(UErrorCode &errorCode) { if(U_FAILURE(errorCode)) { @@ -210,48 +243,15 @@ Norm2AllModes::createNFCInstance(UErrorCode &errorCode) { return createInstance(impl, errorCode); } -U_CDECL_BEGIN -static UBool U_CALLCONV uprv_normalizer2_cleanup(); -U_CDECL_END - static Norm2AllModes *nfcSingleton; -static Normalizer2 *noopSingleton; static icu::UInitOnce nfcInitOnce = U_INITONCE_INITIALIZER; -static icu::UInitOnce noopInitOnce = U_INITONCE_INITIALIZER; -// UInitOnce singleton initialization functions static void U_CALLCONV initNFCSingleton(UErrorCode &errorCode) { nfcSingleton=Norm2AllModes::createNFCInstance(errorCode); ucln_common_registerCleanup(UCLN_COMMON_NORMALIZER2, uprv_normalizer2_cleanup); } -static void U_CALLCONV initNoopSingleton(UErrorCode &errorCode) { - if(U_FAILURE(errorCode)) { - return; - } - noopSingleton=new NoopNormalizer2; - if(noopSingleton==NULL) { - errorCode=U_MEMORY_ALLOCATION_ERROR; - return; - } - ucln_common_registerCleanup(UCLN_COMMON_NORMALIZER2, uprv_normalizer2_cleanup); -} - -U_CDECL_BEGIN - -static UBool U_CALLCONV uprv_normalizer2_cleanup() { - delete nfcSingleton; - nfcSingleton = NULL; - delete noopSingleton; - noopSingleton = NULL; - nfcInitOnce.reset(); - noopInitOnce.reset(); - return TRUE; -} - -U_CDECL_END - const Norm2AllModes * Norm2AllModes::getNFCInstance(UErrorCode &errorCode) { if(U_FAILURE(errorCode)) { return NULL; } @@ -281,23 +281,29 @@ const Normalizer2 *Normalizer2Factory::getFCCInstance(UErrorCode &errorCode) { return allModes!=NULL ? &allModes->fcc : NULL; } -const Normalizer2 *Normalizer2Factory::getNoopInstance(UErrorCode &errorCode) { - if(U_FAILURE(errorCode)) { return NULL; } - umtx_initOnce(noopInitOnce, &initNoopSingleton, errorCode); - return noopSingleton; -} - const Normalizer2Impl * Normalizer2Factory::getNFCImpl(UErrorCode &errorCode) { const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode); return allModes!=NULL ? allModes->impl : NULL; } +#endif // NORM2_HARDCODE_NFC_DATA -const Normalizer2Impl * -Normalizer2Factory::getImpl(const Normalizer2 *norm2) { - return &((Normalizer2WithImpl *)norm2)->impl; +U_CDECL_BEGIN + +static UBool U_CALLCONV uprv_normalizer2_cleanup() { + delete noopSingleton; + noopSingleton = NULL; + noopInitOnce.reset(); +#if NORM2_HARDCODE_NFC_DATA + delete nfcSingleton; + nfcSingleton = NULL; + nfcInitOnce.reset(); +#endif + return TRUE; } +U_CDECL_END + U_NAMESPACE_END // C API ------------------------------------------------------------------- *** diff --git a/icu4c/source/common/normalizer2impl.cpp b/icu4c/source/common/normalizer2impl.cpp index eba0f03544a..6816ddc853a 100644 --- a/icu4c/source/common/normalizer2impl.cpp +++ b/icu4c/source/common/normalizer2impl.cpp @@ -16,6 +16,8 @@ * created by: Markus W. Scherer */ +// #define UCPTRIE_DEBUG + #include "unicode/utypes.h" #if !UCONFIG_NO_NORMALIZATION @@ -24,7 +26,9 @@ #include "unicode/edits.h" #include "unicode/normalizer2.h" #include "unicode/stringoptions.h" +#include "unicode/ucptrie.h" #include "unicode/udata.h" +#include "unicode/umutablecptrie.h" #include "unicode/ustring.h" #include "unicode/utf16.h" #include "unicode/utf8.h" @@ -34,8 +38,8 @@ #include "normalizer2impl.h" #include "putilimp.h" #include "uassert.h" +#include "ucptrie_impl.h" #include "uset_imp.h" -#include "utrie2.h" #include "uvector.h" U_NAMESPACE_BEGIN @@ -62,7 +66,7 @@ inline uint8_t leadByteForCP(UChar32 c) { * Returns the code point from one single well-formed UTF-8 byte sequence * between cpStart and cpLimit. * - * UTrie2 UTF-8 macros do not assemble whole code points (for efficiency). + * Trie UTF-8 macros do not assemble whole code points (for efficiency). * When we do need the code point, we call this function. * We should not need it for normalization-inert data (norm16==0). * Illegal sequences yield the error value norm16==0 just like real normalization-inert code points. @@ -253,7 +257,7 @@ UBool ReorderingBuffer::appendSupplementary(UChar32 c, uint8_t cc, UErrorCode &e return TRUE; } -UBool ReorderingBuffer::append(const UChar *s, int32_t length, +UBool ReorderingBuffer::append(const UChar *s, int32_t length, UBool isNFD, uint8_t leadCC, uint8_t trailCC, UErrorCode &errorCode) { if(length==0) { @@ -280,8 +284,11 @@ UBool ReorderingBuffer::append(const UChar *s, int32_t length, while(i(inIndexes[IX_MIN_DECOMP_NO_CP]); minCompNoMaybeCP = static_cast(inIndexes[IX_MIN_COMP_NO_MAYBE_CP]); @@ -445,75 +453,8 @@ Normalizer2Impl::init(const int32_t *inIndexes, const UTrie2 *inTrie, smallFCD=inSmallFCD; } -class LcccContext { -public: - LcccContext(const Normalizer2Impl &ni, UnicodeSet &s) : impl(ni), set(s) {} - - void handleRange(UChar32 start, UChar32 end, uint16_t norm16) { - if (norm16 > Normalizer2Impl::MIN_NORMAL_MAYBE_YES && - norm16 != Normalizer2Impl::JAMO_VT) { - set.add(start, end); - } else if (impl.minNoNoCompNoMaybeCC <= norm16 && norm16 < impl.limitNoNo) { - uint16_t fcd16=impl.getFCD16(start); - if(fcd16>0xff) { set.add(start, end); } - } - } - -private: - const Normalizer2Impl &impl; - UnicodeSet &set; -}; - -namespace { - -struct PropertyStartsContext { - PropertyStartsContext(const Normalizer2Impl &ni, const USetAdder *adder) - : impl(ni), sa(adder) {} - - const Normalizer2Impl &impl; - const USetAdder *sa; -}; - -} // namespace - U_CDECL_BEGIN -static UBool U_CALLCONV -enumLcccRange(const void *context, UChar32 start, UChar32 end, uint32_t value) { - ((LcccContext *)context)->handleRange(start, end, (uint16_t)value); - return TRUE; -} - -static UBool U_CALLCONV -enumNorm16PropertyStartsRange(const void *context, UChar32 start, UChar32 end, uint32_t value) { - /* add the start code point to the USet */ - const PropertyStartsContext *ctx=(const PropertyStartsContext *)context; - const USetAdder *sa=ctx->sa; - sa->add(sa->set, start); - if (start != end && ctx->impl.isAlgorithmicNoNo((uint16_t)value) && - (value & Normalizer2Impl::DELTA_TCCC_MASK) > Normalizer2Impl::DELTA_TCCC_1) { - // Range of code points with same-norm16-value algorithmic decompositions. - // They might have different non-zero FCD16 values. - uint16_t prevFCD16=ctx->impl.getFCD16(start); - while(++start<=end) { - uint16_t fcd16=ctx->impl.getFCD16(start); - if(fcd16!=prevFCD16) { - sa->add(sa->set, start); - prevFCD16=fcd16; - } - } - } - return TRUE; -} - -static UBool U_CALLCONV -enumPropertyStartsRange(const void *context, UChar32 start, UChar32 /*end*/, uint32_t /*value*/) { - /* add the start code point to the USet */ - const USetAdder *sa=(const USetAdder *)context; - sa->add(sa->set, start); - return TRUE; -} - static uint32_t U_CALLCONV segmentStarterMapper(const void * /*context*/, uint32_t value) { return value&CANON_NOT_SEGMENT_STARTER; @@ -523,15 +464,44 @@ U_CDECL_END void Normalizer2Impl::addLcccChars(UnicodeSet &set) const { - LcccContext context(*this, set); - utrie2_enum(normTrie, NULL, enumLcccRange, &context); + UChar32 start = 0, end; + uint32_t norm16; + while ((end = ucptrie_getRange(normTrie, start, UCPTRIE_RANGE_FIXED_LEAD_SURROGATES, INERT, + nullptr, nullptr, &norm16)) >= 0) { + if (norm16 > Normalizer2Impl::MIN_NORMAL_MAYBE_YES && + norm16 != Normalizer2Impl::JAMO_VT) { + set.add(start, end); + } else if (minNoNoCompNoMaybeCC <= norm16 && norm16 < limitNoNo) { + uint16_t fcd16 = getFCD16(start); + if (fcd16 > 0xff) { set.add(start, end); } + } + start = end + 1; + } } void Normalizer2Impl::addPropertyStarts(const USetAdder *sa, UErrorCode & /*errorCode*/) const { - /* add the start code point of each same-value range of each trie */ - PropertyStartsContext context(*this, sa); - utrie2_enum(normTrie, NULL, enumNorm16PropertyStartsRange, &context); + // Add the start code point of each same-value range of the trie. + UChar32 start = 0, end; + uint32_t value; + while ((end = ucptrie_getRange(normTrie, start, UCPTRIE_RANGE_FIXED_LEAD_SURROGATES, INERT, + nullptr, nullptr, &value)) >= 0) { + sa->add(sa->set, start); + if (start != end && isAlgorithmicNoNo((uint16_t)value) && + (value & Normalizer2Impl::DELTA_TCCC_MASK) > Normalizer2Impl::DELTA_TCCC_1) { + // Range of code points with same-norm16-value algorithmic decompositions. + // They might have different non-zero FCD16 values. + uint16_t prevFCD16 = getFCD16(start); + while (++start <= end) { + uint16_t fcd16 = getFCD16(start); + if (fcd16 != prevFCD16) { + sa->add(sa->set, start); + prevFCD16 = fcd16; + } + } + } + start = end + 1; + } /* add Hangul LV syllables and LV+1 because of skippables */ for(UChar c=Hangul::HANGUL_BASE; ctrie, segmentStarterMapper, enumPropertyStartsRange, sa); + // Add the start code point of each same-value range of the canonical iterator data trie. + if (!ensureCanonIterData(errorCode)) { return; } + // Currently only used for the SEGMENT_STARTER property. + UChar32 start = 0, end; + uint32_t value; + while ((end = ucptrie_getRange(fCanonIterData->trie, start, UCPTRIE_RANGE_NORMAL, 0, + segmentStarterMapper, nullptr, &value)) >= 0) { + sa->add(sa->set, start); + start = end + 1; } } @@ -633,27 +608,23 @@ Normalizer2Impl::decompose(const UChar *src, const UChar *limit, // count code units below the minimum or with irrelevant data for the quick check for(prevSrc=src; src!=limit;) { if( (c=*src)= limitNoNo) { @@ -789,7 +760,7 @@ Normalizer2Impl::decomposeShort(const uint8_t *src, const uint8_t *limit, } c = codePointFromValidUTF8(prevSrc, src); c = mapAlgorithmic(c, norm16); - norm16 = getNorm16(c); + norm16 = getRawNorm16(c); } else if (stopAtCompBoundary && norm16 < minNoNoCompNoMaybeCC) { return prevSrc; } @@ -828,7 +799,7 @@ Normalizer2Impl::decomposeShort(const uint8_t *src, const uint8_t *limit, } else { leadCC = 0; } - if (!buffer.append((const char16_t *)mapping+1, length, leadCC, trailCC, errorCode)) { + if (!buffer.append((const char16_t *)mapping+1, length, TRUE, leadCC, trailCC, errorCode)) { return nullptr; } } @@ -854,7 +825,7 @@ Normalizer2Impl::getDecomposition(UChar32 c, UChar buffer[4], int32_t &length) c length=0; U16_APPEND_UNSAFE(buffer, length, c); // The mapping might decompose further. - norm16 = getNorm16(c); + norm16 = getRawNorm16(c); } if (norm16 < minYesNo) { return decomp; @@ -926,19 +897,30 @@ void Normalizer2Impl::decomposeAndAppend(const UChar *src, const UChar *limit, return; } // Just merge the strings at the boundary. - ForwardUTrie2StringIterator iter(normTrie, src, limit); - uint8_t firstCC, prevCC, cc; - firstCC=prevCC=cc=getCC(iter.next16()); - while(cc!=0) { - prevCC=cc; - cc=getCC(iter.next16()); - }; + bool isFirst = true; + uint8_t firstCC = 0, prevCC = 0, cc; + const UChar *p = src; + while (p != limit) { + const UChar *codePointStart = p; + UChar32 c; + uint16_t norm16; + UCPTRIE_FAST_U16_NEXT(normTrie, UCPTRIE_16, p, limit, c, norm16); + if ((cc = getCC(norm16)) == 0) { + p = codePointStart; + break; + } + if (isFirst) { + firstCC = cc; + isFirst = false; + } + prevCC = cc; + } if(limit==NULL) { // appendZeroCC() needs limit!=NULL - limit=u_strchr(iter.codePointStart, 0); + limit=u_strchr(p, 0); } - if (buffer.append(src, (int32_t)(iter.codePointStart-src), firstCC, prevCC, errorCode)) { - buffer.appendZeroCC(iter.codePointStart, limit, errorCode); + if (buffer.append(src, (int32_t)(p - src), FALSE, firstCC, prevCC, errorCode)) { + buffer.appendZeroCC(p, limit, errorCode); } } @@ -1085,7 +1067,7 @@ void Normalizer2Impl::addComposites(const uint16_t *list, UnicodeSet &set) const } UChar32 composite=compositeAndFwd>>1; if((compositeAndFwd&1)!=0) { - addComposites(getCompositionsListForComposite(getNorm16(composite)), set); + addComposites(getCompositionsListForComposite(getRawNorm16(composite)), set); } set.add(composite); } while((firstUnit&COMP_1_LAST_TUPLE)==0); @@ -1124,7 +1106,7 @@ void Normalizer2Impl::recompose(ReorderingBuffer &buffer, int32_t recomposeStart prevCC=0; for(;;) { - UTRIE2_U16_NEXT16(normTrie, p, limit, c, norm16); + UCPTRIE_FAST_U16_NEXT(normTrie, UCPTRIE_16, p, limit, c, norm16); cc=getCCFromYesOrMaybe(norm16); if( // this character combines backward and isMaybe(norm16) && @@ -1229,7 +1211,7 @@ void Normalizer2Impl::recompose(ReorderingBuffer &buffer, int32_t recomposeStart // Is the composite a starter that combines forward? if(compositeAndFwd&1) { compositionsList= - getCompositionsListForComposite(getNorm16(composite)); + getCompositionsListForComposite(getRawNorm16(composite)); } else { compositionsList=NULL; } @@ -1268,7 +1250,7 @@ void Normalizer2Impl::recompose(ReorderingBuffer &buffer, int32_t recomposeStart UChar32 Normalizer2Impl::composePair(UChar32 a, UChar32 b) const { - uint16_t norm16=getNorm16(a); // maps an out-of-range 'a' to inert norm16=0 + uint16_t norm16=getNorm16(a); // maps an out-of-range 'a' to inert norm16 const uint16_t *list; if(isInert(norm16)) { return U_SENTINEL; @@ -1359,28 +1341,22 @@ Normalizer2Impl::compose(const UChar *src, const UChar *limit, return TRUE; } if( (c=*src)= MIN_YES_YES_WITH_CC) { cc = getCCFromNormalYesOrMaybe(n16); if (prevCC > cc) { @@ -1559,7 +1535,7 @@ Normalizer2Impl::compose(const UChar *src, const UChar *limit, // decompose and recompose. if (prevBoundary != prevSrc && !norm16HasCompBoundaryBefore(norm16)) { const UChar *p = prevSrc; - UTRIE2_U16_PREV16(normTrie, prevBoundary, p, c, norm16); + UCPTRIE_FAST_U16_PREV(normTrie, UCPTRIE_16, prevBoundary, p, c, norm16); if (!norm16HasCompBoundaryAfter(norm16, onlyContiguous)) { prevSrc = p; } @@ -1626,28 +1602,22 @@ Normalizer2Impl::composeQuickCheck(const UChar *src, const UChar *limit, return src; } if( (c=*src)= MIN_YES_YES_WITH_CC) { cc = getCCFromNormalYesOrMaybe(n16); if (prevCC > cc) { @@ -1975,7 +1945,7 @@ Normalizer2Impl::composeUTF8(uint32_t options, UBool onlyContiguous, // decompose and recompose. if (prevBoundary != prevSrc && !norm16HasCompBoundaryBefore(norm16)) { const uint8_t *p = prevSrc; - UTRIE2_U8_PREV16(normTrie, prevBoundary, p, norm16); + UCPTRIE_FAST_U8_PREV(normTrie, UCPTRIE_16, prevBoundary, p, norm16); if (!norm16HasCompBoundaryAfter(norm16, onlyContiguous)) { prevSrc = p; } @@ -2023,7 +1993,7 @@ UBool Normalizer2Impl::hasCompBoundaryBefore(const UChar *src, const UChar *limi } UChar32 c; uint16_t norm16; - UTRIE2_U16_NEXT16(normTrie, src, limit, c, norm16); + UCPTRIE_FAST_U16_NEXT(normTrie, UCPTRIE_16, src, limit, c, norm16); return norm16HasCompBoundaryBefore(norm16); } @@ -2032,7 +2002,7 @@ UBool Normalizer2Impl::hasCompBoundaryBefore(const uint8_t *src, const uint8_t * return TRUE; } uint16_t norm16; - UTRIE2_U8_NEXT16(normTrie, src, limit, norm16); + UCPTRIE_FAST_U8_NEXT(normTrie, UCPTRIE_16, src, limit, norm16); return norm16HasCompBoundaryBefore(norm16); } @@ -2043,7 +2013,7 @@ UBool Normalizer2Impl::hasCompBoundaryAfter(const UChar *start, const UChar *p, } UChar32 c; uint16_t norm16; - UTRIE2_U16_PREV16(normTrie, start, p, c, norm16); + UCPTRIE_FAST_U16_PREV(normTrie, UCPTRIE_16, start, p, c, norm16); return norm16HasCompBoundaryAfter(norm16, onlyContiguous); } @@ -2053,36 +2023,42 @@ UBool Normalizer2Impl::hasCompBoundaryAfter(const uint8_t *start, const uint8_t return TRUE; } uint16_t norm16; - UTRIE2_U8_PREV16(normTrie, start, p, norm16); + UCPTRIE_FAST_U8_PREV(normTrie, UCPTRIE_16, start, p, norm16); return norm16HasCompBoundaryAfter(norm16, onlyContiguous); } const UChar *Normalizer2Impl::findPreviousCompBoundary(const UChar *start, const UChar *p, UBool onlyContiguous) const { - BackwardUTrie2StringIterator iter(normTrie, start, p); - for(;;) { - uint16_t norm16=iter.previous16(); + while (p != start) { + const UChar *codePointLimit = p; + UChar32 c; + uint16_t norm16; + UCPTRIE_FAST_U16_PREV(normTrie, UCPTRIE_16, start, p, c, norm16); if (norm16HasCompBoundaryAfter(norm16, onlyContiguous)) { - return iter.codePointLimit; + return codePointLimit; } - if (hasCompBoundaryBefore(iter.codePoint, norm16)) { - return iter.codePointStart; + if (hasCompBoundaryBefore(c, norm16)) { + return p; } } + return p; } const UChar *Normalizer2Impl::findNextCompBoundary(const UChar *p, const UChar *limit, UBool onlyContiguous) const { - ForwardUTrie2StringIterator iter(normTrie, p, limit); - for(;;) { - uint16_t norm16=iter.next16(); - if (hasCompBoundaryBefore(iter.codePoint, norm16)) { - return iter.codePointStart; + while (p != limit) { + const UChar *codePointStart = p; + UChar32 c; + uint16_t norm16; + UCPTRIE_FAST_U16_NEXT(normTrie, UCPTRIE_16, p, limit, c, norm16); + if (hasCompBoundaryBefore(c, norm16)) { + return codePointStart; } if (norm16HasCompBoundaryAfter(norm16, onlyContiguous)) { - return iter.codePointLimit; + return p; } } + return p; } uint8_t Normalizer2Impl::getPreviousTrailCC(const UChar *start, const UChar *p) const { @@ -2130,7 +2106,7 @@ uint16_t Normalizer2Impl::getFCD16FromNormData(UChar32 c) const { } // Maps to an isCompYesAndZeroCC. c=mapAlgorithmic(c, norm16); - norm16=getNorm16(c); + norm16=getRawNorm16(c); } } if(norm16<=minYesNo || isHangulLVT(norm16)) { @@ -2195,17 +2171,10 @@ Normalizer2Impl::makeFCD(const UChar *src, const UChar *limit, prevFCD16=0; ++src; } else { - if(U16_IS_SURROGATE(c)) { + if(U16_IS_LEAD(c)) { UChar c2; - if(U16_IS_SURROGATE_LEAD(c)) { - if((src+1)!=limit && U16_IS_TRAIL(c2=src[1])) { - c=U16_GET_SUPPLEMENTARY(c, c2); - } - } else /* trail surrogate */ { - if(prevSrcadd(firstOrigin); @@ -2406,7 +2376,6 @@ void CanonIterData::addToStartSet(UChar32 origin, UChar32 decompLead, UErrorCode class InitCanonIterData { public: static void doInit(Normalizer2Impl *impl, UErrorCode &errorCode); - static void handleRange(Normalizer2Impl *impl, UChar32 start, UChar32 end, uint16_t value, UErrorCode &errorCode); }; U_CDECL_BEGIN @@ -2417,18 +2386,6 @@ initCanonIterData(Normalizer2Impl *impl, UErrorCode &errorCode) { InitCanonIterData::doInit(impl, errorCode); } -// Call Normalizer2Impl::makeCanonIterDataFromNorm16() for a range of same-norm16 characters. -// context: the Normalizer2Impl -static UBool U_CALLCONV -enumCIDRangeHandler(const void *context, UChar32 start, UChar32 end, uint32_t value) { - UErrorCode errorCode = U_ZERO_ERROR; - if (value != Normalizer2Impl::INERT) { - Normalizer2Impl *impl = (Normalizer2Impl *)context; - InitCanonIterData::handleRange(impl, start, end, (uint16_t)value, errorCode); - } - return U_SUCCESS(errorCode); -} - U_CDECL_END void InitCanonIterData::doInit(Normalizer2Impl *impl, UErrorCode &errorCode) { @@ -2438,8 +2395,24 @@ void InitCanonIterData::doInit(Normalizer2Impl *impl, UErrorCode &errorCode) { errorCode=U_MEMORY_ALLOCATION_ERROR; } if (U_SUCCESS(errorCode)) { - utrie2_enum(impl->normTrie, NULL, enumCIDRangeHandler, impl); - utrie2_freeze(impl->fCanonIterData->trie, UTRIE2_32_VALUE_BITS, &errorCode); + UChar32 start = 0, end; + uint32_t value; + while ((end = ucptrie_getRange(impl->normTrie, start, + UCPTRIE_RANGE_FIXED_LEAD_SURROGATES, Normalizer2Impl::INERT, + nullptr, nullptr, &value)) >= 0) { + // Call Normalizer2Impl::makeCanonIterDataFromNorm16() for a range of same-norm16 characters. + if (value != Normalizer2Impl::INERT) { + impl->makeCanonIterDataFromNorm16(start, end, value, *impl->fCanonIterData, errorCode); + } + start = end + 1; + } +#ifdef UCPTRIE_DEBUG + umutablecptrie_setName(impl->fCanonIterData->mutableTrie, "CanonIterData"); +#endif + impl->fCanonIterData->trie = umutablecptrie_buildImmutable( + impl->fCanonIterData->mutableTrie, UCPTRIE_TYPE_SMALL, UCPTRIE_VALUE_BITS_32, &errorCode); + umutablecptrie_close(impl->fCanonIterData->mutableTrie); + impl->fCanonIterData->mutableTrie = nullptr; } if (U_FAILURE(errorCode)) { delete impl->fCanonIterData; @@ -2447,11 +2420,6 @@ void InitCanonIterData::doInit(Normalizer2Impl *impl, UErrorCode &errorCode) { } } -void InitCanonIterData::handleRange( - Normalizer2Impl *impl, UChar32 start, UChar32 end, uint16_t value, UErrorCode &errorCode) { - impl->makeCanonIterDataFromNorm16(start, end, value, *impl->fCanonIterData, errorCode); -} - void Normalizer2Impl::makeCanonIterDataFromNorm16(UChar32 start, UChar32 end, const uint16_t norm16, CanonIterData &newData, UErrorCode &errorCode) const { @@ -2465,7 +2433,7 @@ void Normalizer2Impl::makeCanonIterDataFromNorm16(UChar32 start, UChar32 end, co return; } for(UChar32 c=start; c<=end; ++c) { - uint32_t oldValue=utrie2_get32(newData.trie, c); + uint32_t oldValue = umutablecptrie_get(newData.mutableTrie, c); uint32_t newValue=oldValue; if(isMaybeOrNonZeroCC(norm16)) { // not a segment starter if it occurs in a decomposition or has cc!=0 @@ -2483,7 +2451,7 @@ void Normalizer2Impl::makeCanonIterDataFromNorm16(UChar32 start, UChar32 end, co if (isDecompNoAlgorithmic(norm16_2)) { // Maps to an isCompYesAndZeroCC. c2 = mapAlgorithmic(c2, norm16_2); - norm16_2 = getNorm16(c2); + norm16_2 = getRawNorm16(c2); // No compatibility mappings for the CanonicalIterator. U_ASSERT(!(isHangulLV(norm16_2) || isHangulLVT(norm16_2))); } @@ -2510,10 +2478,10 @@ void Normalizer2Impl::makeCanonIterDataFromNorm16(UChar32 start, UChar32 end, co if(norm16_2>=minNoNo) { while(itrie, c); + return (int32_t)ucptrie_get(fCanonIterData->trie, c); } const UnicodeSet &Normalizer2Impl::getCanonStartSet(int32_t n) const { @@ -2561,7 +2529,7 @@ UBool Normalizer2Impl::getCanonStartSet(UChar32 c, UnicodeSet &set) const { set.add(value); } if((canonValue&CANON_HAS_COMPOSITIONS)!=0) { - uint16_t norm16=getNorm16(c); + uint16_t norm16=getRawNorm16(c); if(norm16==JAMO_L) { UChar32 syllable= (UChar32)(Hangul::HANGUL_BASE+(c-Hangul::JAMO_L_BASE)*Hangul::JAMO_VT_COUNT); @@ -2608,7 +2576,7 @@ unorm2_swap(const UDataSwapper *ds, pInfo->dataFormat[1]==0x72 && pInfo->dataFormat[2]==0x6d && pInfo->dataFormat[3]==0x32 && - (1<=formatVersion0 && formatVersion0<=3) + (1<=formatVersion0 && formatVersion0<=4) )) { udata_printError(ds, "unorm2_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as Normalizer2 data\n", pInfo->dataFormat[0], pInfo->dataFormat[1], @@ -2669,9 +2637,9 @@ unorm2_swap(const UDataSwapper *ds, ds->swapArray32(ds, inBytes, nextOffset-offset, outBytes, pErrorCode); offset=nextOffset; - /* swap the UTrie2 */ + /* swap the trie */ nextOffset=indexes[Normalizer2Impl::IX_EXTRA_DATA_OFFSET]; - utrie2_swap(ds, inBytes+offset, nextOffset-offset, outBytes+offset, pErrorCode); + utrie_swapAnyVersion(ds, inBytes+offset, nextOffset-offset, outBytes+offset, pErrorCode); offset=nextOffset; /* swap the uint16_t extraData[] */ diff --git a/icu4c/source/common/normalizer2impl.h b/icu4c/source/common/normalizer2impl.h index 8f187522ae1..2231110bbc5 100644 --- a/icu4c/source/common/normalizer2impl.h +++ b/icu4c/source/common/normalizer2impl.h @@ -24,12 +24,19 @@ #if !UCONFIG_NO_NORMALIZATION #include "unicode/normalizer2.h" +#include "unicode/ucptrie.h" #include "unicode/unistr.h" #include "unicode/unorm.h" +#include "unicode/utf.h" #include "unicode/utf16.h" #include "mutex.h" #include "uset_imp.h" -#include "utrie2.h" + +// When the nfc.nrm data is *not* hardcoded into the common library +// (with this constant set to 0), +// then it needs to be built into the data package: +// Add nfc.nrm to icu4c/source/data/Makefile.in DAT_FILES_SHORT +#define NORM2_HARDCODE_NFC_DATA 1 U_NAMESPACE_BEGIN @@ -158,8 +165,7 @@ public: appendBMP((UChar)c, cc, errorCode) : appendSupplementary(c, cc, errorCode); } - // s must be in NFD, otherwise change the implementation. - UBool append(const UChar *s, int32_t length, + UBool append(const UChar *s, int32_t length, UBool isNFD, uint8_t leadCC, uint8_t trailCC, UErrorCode &errorCode); UBool appendBMP(UChar c, uint8_t cc, UErrorCode &errorCode) { @@ -243,7 +249,7 @@ public: } virtual ~Normalizer2Impl(); - void init(const int32_t *inIndexes, const UTrie2 *inTrie, + void init(const int32_t *inIndexes, const UCPTrie *inTrie, const uint16_t *inExtraData, const uint8_t *inSmallFCD); void addLcccChars(UnicodeSet &set) const; @@ -254,7 +260,12 @@ public: UBool ensureCanonIterData(UErrorCode &errorCode) const; - uint16_t getNorm16(UChar32 c) const { return UTRIE2_GET16(normTrie, c); } + // The trie stores values for lead surrogate code *units*. + // Surrogate code *points* are inert. + uint16_t getNorm16(UChar32 c) const { + return U_IS_LEAD(c) ? INERT : UCPTRIE_FAST_GET(normTrie, UCPTRIE_16, c); + } + uint16_t getRawNorm16(UChar32 c) const { return UCPTRIE_FAST_GET(normTrie, UCPTRIE_16, c); } UNormalizationCheckResult getCompQuickCheck(uint16_t norm16) const { if(norm16=0 && outData==NULL)) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - - /* setup and swapping */ - if(length>=0 && (uint32_t)lengthreadUInt32(inTrie->signature); - trie.options=ds->readUInt32(inTrie->options); - trie.indexLength=udata_readInt32(ds, inTrie->indexLength); - trie.dataLength=udata_readInt32(ds, inTrie->dataLength); - - if( trie.signature!=0x54726965 || - (trie.options&UTRIE_OPTIONS_SHIFT_MASK)!=UTRIE_SHIFT || - ((trie.options>>UTRIE_OPTIONS_INDEX_SHIFT)&UTRIE_OPTIONS_SHIFT_MASK)!=UTRIE_INDEX_SHIFT || - trie.indexLength=0) { - UTrieHeader *outTrie; - - if(lengthswapArray32(ds, inTrie, sizeof(UTrieHeader), outTrie, pErrorCode); - - /* swap the index and the data */ - if(dataIs32) { - ds->swapArray16(ds, inTrie+1, trie.indexLength*2, outTrie+1, pErrorCode); - ds->swapArray32(ds, (const uint16_t *)(inTrie+1)+trie.indexLength, trie.dataLength*4, - (uint16_t *)(outTrie+1)+trie.indexLength, pErrorCode); - } else { - ds->swapArray16(ds, inTrie+1, (trie.indexLength+trie.dataLength)*2, outTrie+1, pErrorCode); - } - } - - return size; -} - #if !UCONFIG_NO_COLLATION U_CAPI UBool U_EXPORT2 diff --git a/icu4c/source/common/ucptrie.cpp b/icu4c/source/common/ucptrie.cpp new file mode 100644 index 00000000000..09ac38a705b --- /dev/null +++ b/icu4c/source/common/ucptrie.cpp @@ -0,0 +1,573 @@ +// © 2017 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +// ucptrie.cpp (modified from utrie2.cpp) +// created: 2017dec29 Markus W. Scherer + +// #define UCPTRIE_DEBUG +#ifdef UCPTRIE_DEBUG +# include +#endif + +#include "unicode/utypes.h" +#include "unicode/ucptrie.h" +#include "unicode/utf.h" +#include "unicode/utf8.h" +#include "unicode/utf16.h" +#include "cmemory.h" +#include "uassert.h" +#include "ucptrie_impl.h" + +U_CAPI UCPTrie * U_EXPORT2 +ucptrie_openFromBinary(UCPTrieType type, UCPTrieValueWidth valueWidth, + const void *data, int32_t length, int32_t *pActualLength, + UErrorCode *pErrorCode) { + if (U_FAILURE(*pErrorCode)) { + return nullptr; + } + + if (length <= 0 || (U_POINTER_MASK_LSB(data, 3) != 0) || + type < UCPTRIE_TYPE_ANY || UCPTRIE_TYPE_SMALL < type || + valueWidth < UCPTRIE_VALUE_BITS_ANY || UCPTRIE_VALUE_BITS_8 < valueWidth) { + *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; + return nullptr; + } + + // Enough data for a trie header? + if (length < (int32_t)sizeof(UCPTrieHeader)) { + *pErrorCode = U_INVALID_FORMAT_ERROR; + return nullptr; + } + + // Check the signature. + const UCPTrieHeader *header = (const UCPTrieHeader *)data; + if (header->signature != UCPTRIE_SIG) { + *pErrorCode = U_INVALID_FORMAT_ERROR; + return nullptr; + } + + int32_t options = header->options; + int32_t typeInt = (options >> 6) & 3; + int32_t valueWidthInt = options & UCPTRIE_OPTIONS_VALUE_BITS_MASK; + if (typeInt > UCPTRIE_TYPE_SMALL || valueWidthInt > UCPTRIE_VALUE_BITS_8 || + (options & UCPTRIE_OPTIONS_RESERVED_MASK) != 0) { + *pErrorCode = U_INVALID_FORMAT_ERROR; + return nullptr; + } + UCPTrieType actualType = (UCPTrieType)typeInt; + UCPTrieValueWidth actualValueWidth = (UCPTrieValueWidth)valueWidthInt; + if (type < 0) { + type = actualType; + } + if (valueWidth < 0) { + valueWidth = actualValueWidth; + } + if (type != actualType || valueWidth != actualValueWidth) { + *pErrorCode = U_INVALID_FORMAT_ERROR; + return nullptr; + } + + // Get the length values and offsets. + UCPTrie tempTrie; + uprv_memset(&tempTrie, 0, sizeof(tempTrie)); + tempTrie.indexLength = header->indexLength; + tempTrie.dataLength = + ((options & UCPTRIE_OPTIONS_DATA_LENGTH_MASK) << 4) | header->dataLength; + tempTrie.index3NullOffset = header->index3NullOffset; + tempTrie.dataNullOffset = + ((options & UCPTRIE_OPTIONS_DATA_NULL_OFFSET_MASK) << 8) | header->dataNullOffset; + + tempTrie.highStart = header->shiftedHighStart << UCPTRIE_SHIFT_2; + tempTrie.shifted12HighStart = (tempTrie.highStart + 0xfff) >> 12; + tempTrie.type = type; + tempTrie.valueWidth = valueWidth; + + // Calculate the actual length. + int32_t actualLength = (int32_t)sizeof(UCPTrieHeader) + tempTrie.indexLength * 2; + if (valueWidth == UCPTRIE_VALUE_BITS_16) { + actualLength += tempTrie.dataLength * 2; + } else if (valueWidth == UCPTRIE_VALUE_BITS_32) { + actualLength += tempTrie.dataLength * 4; + } else { + actualLength += tempTrie.dataLength; + } + if (length < actualLength) { + *pErrorCode = U_INVALID_FORMAT_ERROR; // Not enough bytes. + return nullptr; + } + + // Allocate the trie. + UCPTrie *trie = (UCPTrie *)uprv_malloc(sizeof(UCPTrie)); + if (trie == nullptr) { + *pErrorCode = U_MEMORY_ALLOCATION_ERROR; + return nullptr; + } + uprv_memcpy(trie, &tempTrie, sizeof(tempTrie)); +#ifdef UCPTRIE_DEBUG + trie->name = "fromSerialized"; +#endif + + // Set the pointers to its index and data arrays. + const uint16_t *p16 = (const uint16_t *)(header + 1); + trie->index = p16; + p16 += trie->indexLength; + + // Get the data. + int32_t nullValueOffset = trie->dataNullOffset; + if (nullValueOffset >= trie->dataLength) { + nullValueOffset = trie->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET; + } + switch (valueWidth) { + case UCPTRIE_VALUE_BITS_16: + trie->data.ptr16 = p16; + trie->nullValue = trie->data.ptr16[nullValueOffset]; + break; + case UCPTRIE_VALUE_BITS_32: + trie->data.ptr32 = (const uint32_t *)p16; + trie->nullValue = trie->data.ptr32[nullValueOffset]; + break; + case UCPTRIE_VALUE_BITS_8: + trie->data.ptr8 = (const uint8_t *)p16; + trie->nullValue = trie->data.ptr8[nullValueOffset]; + break; + default: + // Unreachable because valueWidth was checked above. + *pErrorCode = U_INVALID_FORMAT_ERROR; + return nullptr; + } + + if (pActualLength != nullptr) { + *pActualLength = actualLength; + } + return trie; +} + +U_CAPI void U_EXPORT2 +ucptrie_close(UCPTrie *trie) { + uprv_free(trie); +} + +U_CAPI UCPTrieType U_EXPORT2 +ucptrie_getType(const UCPTrie *trie) { + return (UCPTrieType)trie->type; +} + +U_CAPI UCPTrieValueWidth U_EXPORT2 +ucptrie_getValueWidth(const UCPTrie *trie) { + return (UCPTrieValueWidth)trie->valueWidth; +} + +U_CAPI int32_t U_EXPORT2 +ucptrie_internalSmallIndex(const UCPTrie *trie, UChar32 c) { + int32_t i1 = c >> UCPTRIE_SHIFT_1; + if (trie->type == UCPTRIE_TYPE_FAST) { + U_ASSERT(0xffff < c && c < trie->highStart); + i1 += UCPTRIE_BMP_INDEX_LENGTH - UCPTRIE_OMITTED_BMP_INDEX_1_LENGTH; + } else { + U_ASSERT((uint32_t)c < (uint32_t)trie->highStart && trie->highStart > UCPTRIE_SMALL_LIMIT); + i1 += UCPTRIE_SMALL_INDEX_LENGTH; + } + int32_t i3Block = trie->index[ + (int32_t)trie->index[i1] + ((c >> UCPTRIE_SHIFT_2) & UCPTRIE_INDEX_2_MASK)]; + int32_t i3 = (c >> UCPTRIE_SHIFT_3) & UCPTRIE_INDEX_3_MASK; + int32_t dataBlock; + if ((i3Block & 0x8000) == 0) { + // 16-bit indexes + dataBlock = trie->index[i3Block + i3]; + } else { + // 18-bit indexes stored in groups of 9 entries per 8 indexes. + i3Block = (i3Block & 0x7fff) + (i3 & ~7) + (i3 >> 3); + i3 &= 7; + dataBlock = ((int32_t)trie->index[i3Block++] << (2 + (2 * i3))) & 0x30000; + dataBlock |= trie->index[i3Block + i3]; + } + return dataBlock + (c & UCPTRIE_SMALL_DATA_MASK); +} + +U_CAPI int32_t U_EXPORT2 +ucptrie_internalSmallU8Index(const UCPTrie *trie, int32_t lt1, uint8_t t2, uint8_t t3) { + UChar32 c = (lt1 << 12) | (t2 << 6) | t3; + if (c >= trie->highStart) { + // Possible because the UTF-8 macro compares with shifted12HighStart which may be higher. + return trie->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET; + } + return ucptrie_internalSmallIndex(trie, c); +} + +U_CAPI int32_t U_EXPORT2 +ucptrie_internalU8PrevIndex(const UCPTrie *trie, UChar32 c, + const uint8_t *start, const uint8_t *src) { + int32_t i, length; + // Support 64-bit pointers by avoiding cast of arbitrary difference. + if ((src - start) <= 7) { + i = length = (int32_t)(src - start); + } else { + i = length = 7; + start = src - 7; + } + c = utf8_prevCharSafeBody(start, 0, &i, c, -1); + i = length - i; // Number of bytes read backward from src. + int32_t idx = _UCPTRIE_CP_INDEX(trie, 0xffff, c); + return (idx << 3) | i; +} + +namespace { + +inline uint32_t getValue(UCPTrieData data, UCPTrieValueWidth valueWidth, int32_t dataIndex) { + switch (valueWidth) { + case UCPTRIE_VALUE_BITS_16: + return data.ptr16[dataIndex]; + case UCPTRIE_VALUE_BITS_32: + return data.ptr32[dataIndex]; + case UCPTRIE_VALUE_BITS_8: + return data.ptr8[dataIndex]; + default: + // Unreachable if the trie is properly initialized. + return 0xffffffff; + } +} + +} // namespace + +U_CAPI uint32_t U_EXPORT2 +ucptrie_get(const UCPTrie *trie, UChar32 c) { + int32_t dataIndex; + if ((uint32_t)c <= 0x7f) { + // linear ASCII + dataIndex = c; + } else { + UChar32 fastMax = trie->type == UCPTRIE_TYPE_FAST ? 0xffff : UCPTRIE_SMALL_MAX; + dataIndex = _UCPTRIE_CP_INDEX(trie, fastMax, c); + } + return getValue(trie->data, (UCPTrieValueWidth)trie->valueWidth, dataIndex); +} + +namespace { + +constexpr int32_t MAX_UNICODE = 0x10ffff; + +inline uint32_t maybeFilterValue(uint32_t value, uint32_t trieNullValue, uint32_t nullValue, + UCPTrieValueFilter *filter, const void *context) { + if (value == trieNullValue) { + value = nullValue; + } else if (filter != nullptr) { + value = filter(context, value); + } + return value; +} + +UChar32 getRange(const void *t, UChar32 start, + UCPTrieValueFilter *filter, const void *context, uint32_t *pValue) { + if ((uint32_t)start > MAX_UNICODE) { + return U_SENTINEL; + } + const UCPTrie *trie = reinterpret_cast(t); + UCPTrieValueWidth valueWidth = (UCPTrieValueWidth)trie->valueWidth; + if (start >= trie->highStart) { + if (pValue != nullptr) { + int32_t di = trie->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET; + uint32_t value = getValue(trie->data, valueWidth, di); + if (filter != nullptr) { value = filter(context, value); } + *pValue = value; + } + return MAX_UNICODE; + } + + uint32_t nullValue = trie->nullValue; + if (filter != nullptr) { nullValue = filter(context, nullValue); } + const uint16_t *index = trie->index; + + int32_t prevI3Block = -1; + int32_t prevBlock = -1; + UChar32 c = start; + uint32_t value; + bool haveValue = false; + do { + int32_t i3Block; + int32_t i3; + int32_t i3BlockLength; + int32_t dataBlockLength; + if (c <= 0xffff && (trie->type == UCPTRIE_TYPE_FAST || c <= UCPTRIE_SMALL_MAX)) { + i3Block = 0; + i3 = c >> UCPTRIE_FAST_SHIFT; + i3BlockLength = trie->type == UCPTRIE_TYPE_FAST ? + UCPTRIE_BMP_INDEX_LENGTH : UCPTRIE_SMALL_INDEX_LENGTH; + dataBlockLength = UCPTRIE_FAST_DATA_BLOCK_LENGTH; + } else { + // Use the multi-stage index. + int32_t i1 = c >> UCPTRIE_SHIFT_1; + if (trie->type == UCPTRIE_TYPE_FAST) { + U_ASSERT(0xffff < c && c < trie->highStart); + i1 += UCPTRIE_BMP_INDEX_LENGTH - UCPTRIE_OMITTED_BMP_INDEX_1_LENGTH; + } else { + U_ASSERT(c < trie->highStart && trie->highStart > UCPTRIE_SMALL_LIMIT); + i1 += UCPTRIE_SMALL_INDEX_LENGTH; + } + i3Block = trie->index[ + (int32_t)trie->index[i1] + ((c >> UCPTRIE_SHIFT_2) & UCPTRIE_INDEX_2_MASK)]; + if (i3Block == prevI3Block && (c - start) >= UCPTRIE_CP_PER_INDEX_2_ENTRY) { + // The index-3 block is the same as the previous one, and filled with value. + U_ASSERT((c & (UCPTRIE_CP_PER_INDEX_2_ENTRY - 1)) == 0); + c += UCPTRIE_CP_PER_INDEX_2_ENTRY; + continue; + } + prevI3Block = i3Block; + if (i3Block == trie->index3NullOffset) { + // This is the index-3 null block. + if (haveValue) { + if (nullValue != value) { + return c - 1; + } + } else { + value = nullValue; + if (pValue != nullptr) { *pValue = nullValue; } + haveValue = true; + } + prevBlock = trie->dataNullOffset; + c = (c + UCPTRIE_CP_PER_INDEX_2_ENTRY) & ~(UCPTRIE_CP_PER_INDEX_2_ENTRY - 1); + continue; + } + i3 = (c >> UCPTRIE_SHIFT_3) & UCPTRIE_INDEX_3_MASK; + i3BlockLength = UCPTRIE_INDEX_3_BLOCK_LENGTH; + dataBlockLength = UCPTRIE_SMALL_DATA_BLOCK_LENGTH; + } + // Enumerate data blocks for one index-3 block. + do { + int32_t block; + if ((i3Block & 0x8000) == 0) { + block = index[i3Block + i3]; + } else { + // 18-bit indexes stored in groups of 9 entries per 8 indexes. + int32_t group = (i3Block & 0x7fff) + (i3 & ~7) + (i3 >> 3); + int32_t gi = i3 & 7; + block = ((int32_t)index[group++] << (2 + (2 * gi))) & 0x30000; + block |= index[group + gi]; + } + if (block == prevBlock && (c - start) >= dataBlockLength) { + // The block is the same as the previous one, and filled with value. + U_ASSERT((c & (dataBlockLength - 1)) == 0); + c += dataBlockLength; + } else { + int32_t dataMask = dataBlockLength - 1; + prevBlock = block; + if (block == trie->dataNullOffset) { + // This is the data null block. + if (haveValue) { + if (nullValue != value) { + return c - 1; + } + } else { + value = nullValue; + if (pValue != nullptr) { *pValue = nullValue; } + haveValue = true; + } + c = (c + dataBlockLength) & ~dataMask; + } else { + int32_t di = block + (c & dataMask); + uint32_t value2 = getValue(trie->data, valueWidth, di); + value2 = maybeFilterValue(value2, trie->nullValue, nullValue, + filter, context); + if (haveValue) { + if (value2 != value) { + return c - 1; + } + } else { + value = value2; + if (pValue != nullptr) { *pValue = value; } + haveValue = true; + } + while ((++c & dataMask) != 0) { + if (maybeFilterValue(getValue(trie->data, valueWidth, ++di), + trie->nullValue, nullValue, + filter, context) != value) { + return c - 1; + } + } + } + } + } while (++i3 < i3BlockLength); + } while (c < trie->highStart); + U_ASSERT(haveValue); + int32_t di = trie->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET; + uint32_t highValue = getValue(trie->data, valueWidth, di); + if (maybeFilterValue(highValue, trie->nullValue, nullValue, + filter, context) != value) { + return c - 1; + } else { + return MAX_UNICODE; + } +} + +} // namespace + +U_CFUNC UChar32 +ucptrie_internalGetRange(UCPTrieGetRange *getRange, + const void *trie, UChar32 start, + UCPTrieRangeOption option, uint32_t surrogateValue, + UCPTrieValueFilter *filter, const void *context, uint32_t *pValue) { + if (option == UCPTRIE_RANGE_NORMAL) { + return getRange(trie, start, filter, context, pValue); + } + uint32_t value; + if (pValue == nullptr) { + // We need to examine the range value even if the caller does not want it. + pValue = &value; + } + UChar32 surrEnd = option == UCPTRIE_RANGE_FIXED_ALL_SURROGATES ? 0xdfff : 0xdbff; + UChar32 end = getRange(trie, start, filter, context, pValue); + if (end < 0xd7ff || start > surrEnd) { + return end; + } + // The range overlaps with surrogates, or ends just before the first one. + if (*pValue == surrogateValue) { + if (end >= surrEnd) { + // Surrogates followed by a non-surrogateValue range, + // or surrogates are part of a larger surrogateValue range. + return end; + } + } else { + if (start <= 0xd7ff) { + return 0xd7ff; // Non-surrogateValue range ends before surrogateValue surrogates. + } + // Start is a surrogate with a non-surrogateValue code *unit* value. + // Return a surrogateValue code *point* range. + *pValue = surrogateValue; + if (end > surrEnd) { + return surrEnd; // Surrogate range ends before non-surrogateValue rest of range. + } + } + // See if the surrogateValue surrogate range can be merged with + // an immediately following range. + uint32_t value2; + UChar32 end2 = getRange(trie, surrEnd + 1, filter, context, &value2); + if (value2 == surrogateValue) { + return end2; + } + return surrEnd; +} + +U_CAPI UChar32 U_EXPORT2 +ucptrie_getRange(const UCPTrie *trie, UChar32 start, + UCPTrieRangeOption option, uint32_t surrogateValue, + UCPTrieValueFilter *filter, const void *context, uint32_t *pValue) { + return ucptrie_internalGetRange(getRange, trie, start, + option, surrogateValue, + filter, context, pValue); +} + +U_CAPI int32_t U_EXPORT2 +ucptrie_toBinary(const UCPTrie *trie, + void *data, int32_t capacity, + UErrorCode *pErrorCode) { + if (U_FAILURE(*pErrorCode)) { + return 0; + } + + UCPTrieType type = (UCPTrieType)trie->type; + UCPTrieValueWidth valueWidth = (UCPTrieValueWidth)trie->valueWidth; + if (type < UCPTRIE_TYPE_FAST || UCPTRIE_TYPE_SMALL < type || + valueWidth < UCPTRIE_VALUE_BITS_16 || UCPTRIE_VALUE_BITS_8 < valueWidth || + capacity < 0 || + (capacity > 0 && (data == nullptr || (U_POINTER_MASK_LSB(data, 3) != 0)))) { + *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + + int32_t length = (int32_t)sizeof(UCPTrieHeader) + trie->indexLength * 2; + switch (valueWidth) { + case UCPTRIE_VALUE_BITS_16: + length += trie->dataLength * 2; + break; + case UCPTRIE_VALUE_BITS_32: + length += trie->dataLength * 4; + break; + case UCPTRIE_VALUE_BITS_8: + length += trie->dataLength; + break; + default: + // unreachable + break; + } + if (capacity < length) { + *pErrorCode = U_BUFFER_OVERFLOW_ERROR; + return length; + } + + char *bytes = (char *)data; + UCPTrieHeader *header = (UCPTrieHeader *)bytes; + header->signature = UCPTRIE_SIG; // "Tri3" + header->options = (uint16_t)( + ((trie->dataLength & 0xf0000) >> 4) | + ((trie->dataNullOffset & 0xf0000) >> 8) | + (trie->type << 6) | + valueWidth); + header->indexLength = (uint16_t)trie->indexLength; + header->dataLength = (uint16_t)trie->dataLength; + header->index3NullOffset = trie->index3NullOffset; + header->dataNullOffset = (uint16_t)trie->dataNullOffset; + header->shiftedHighStart = trie->highStart >> UCPTRIE_SHIFT_2; + bytes += sizeof(UCPTrieHeader); + + uprv_memcpy(bytes, trie->index, trie->indexLength * 2); + bytes += trie->indexLength * 2; + + switch (valueWidth) { + case UCPTRIE_VALUE_BITS_16: + uprv_memcpy(bytes, trie->data.ptr16, trie->dataLength * 2); + break; + case UCPTRIE_VALUE_BITS_32: + uprv_memcpy(bytes, trie->data.ptr32, trie->dataLength * 4); + break; + case UCPTRIE_VALUE_BITS_8: + uprv_memcpy(bytes, trie->data.ptr8, trie->dataLength); + break; + default: + // unreachable + break; + } + return length; +} + +namespace { + +#ifdef UCPTRIE_DEBUG +long countNull(const UCPTrie *trie) { + uint32_t nullValue=trie->nullValue; + int32_t length=trie->dataLength; + long count=0; + switch (trie->valueWidth) { + case UCPTRIE_VALUE_BITS_16: + for(int32_t i=0; idata.ptr16[i]==nullValue) { ++count; } + } + break; + case UCPTRIE_VALUE_BITS_32: + for(int32_t i=0; idata.ptr32[i]==nullValue) { ++count; } + } + break; + case UCPTRIE_VALUE_BITS_8: + for(int32_t i=0; idata.ptr8[i]==nullValue) { ++count; } + } + break; + default: + // unreachable + break; + } + return count; +} + +U_CFUNC void +ucptrie_printLengths(const UCPTrie *trie, const char *which) { + long indexLength=trie->indexLength; + long dataLength=(long)trie->dataLength; + long totalLength=(long)sizeof(UCPTrieHeader)+indexLength*2+ + dataLength*(trie->valueWidth==UCPTRIE_VALUE_BITS_16 ? 2 : + trie->valueWidth==UCPTRIE_VALUE_BITS_32 ? 4 : 1); + printf("**UCPTrieLengths(%s %s)** index:%6ld data:%6ld countNull:%6ld serialized:%6ld\n", + which, trie->name, indexLength, dataLength, countNull(trie), totalLength); +} +#endif + +} // namespace diff --git a/icu4c/source/common/ucptrie_impl.h b/icu4c/source/common/ucptrie_impl.h new file mode 100644 index 00000000000..00481a3a8a8 --- /dev/null +++ b/icu4c/source/common/ucptrie_impl.h @@ -0,0 +1,284 @@ +// © 2017 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +// ucptrie_impl.h (modified from utrie2_impl.h) +// created: 2017dec29 Markus W. Scherer + +#ifndef __UCPTRIE_IMPL_H__ +#define __UCPTRIE_IMPL_H__ + +#include "unicode/ucptrie.h" +#ifdef UCPTRIE_DEBUG +#include "unicode/umutablecptrie.h" +#endif + +// UCPTrie signature values, in platform endianness and opposite endianness. +// The UCPTrie signature ASCII byte values spell "Tri3". +#define UCPTRIE_SIG 0x54726933 +#define UCPTRIE_OE_SIG 0x33697254 + +/** + * Header data for the binary, memory-mappable representation of a UCPTrie/CodePointTrie. + * @internal + */ +struct UCPTrieHeader { + /** "Tri3" in big-endian US-ASCII (0x54726933) */ + uint32_t signature; + + /** + * Options bit field: + * Bits 15..12: Data length bits 19..16. + * Bits 11..8: Data null block offset bits 19..16. + * Bits 7..6: UCPTrieType + * Bits 5..3: Reserved (0). + * Bits 2..0: UCPTrieValueWidth + */ + uint16_t options; + + /** Total length of the index tables. */ + uint16_t indexLength; + + /** Data length bits 15..0. */ + uint16_t dataLength; + + /** Index-3 null block offset, 0x7fff or 0xffff if none. */ + uint16_t index3NullOffset; + + /** Data null block offset bits 15..0, 0xfffff if none. */ + uint16_t dataNullOffset; + + /** + * First code point of the single-value range ending with U+10ffff, + * rounded up and then shifted right by UCPTRIE_SHIFT_2. + */ + uint16_t shiftedHighStart; +}; + +/** + * Constants for use with UCPTrieHeader.options. + * @internal + */ +enum { + UCPTRIE_OPTIONS_DATA_LENGTH_MASK = 0xf000, + UCPTRIE_OPTIONS_DATA_NULL_OFFSET_MASK = 0xf00, + UCPTRIE_OPTIONS_RESERVED_MASK = 0x38, + UCPTRIE_OPTIONS_VALUE_BITS_MASK = 7, + /** + * Value for index3NullOffset which indicates that there is no index-3 null block. + * Bit 15 is unused for this value because this bit is used if the index-3 contains + * 18-bit indexes. + */ + UCPTRIE_NO_INDEX3_NULL_OFFSET = 0x7fff, + UCPTRIE_NO_DATA_NULL_OFFSET = 0xfffff +}; + +// Internal constants. +enum { + /** The length of the BMP index table. 1024=0x400 */ + UCPTRIE_BMP_INDEX_LENGTH = 0x10000 >> UCPTRIE_FAST_SHIFT, + + UCPTRIE_SMALL_LIMIT = 0x1000, + UCPTRIE_SMALL_INDEX_LENGTH = UCPTRIE_SMALL_LIMIT >> UCPTRIE_FAST_SHIFT, + + /** Shift size for getting the index-3 table offset. */ + UCPTRIE_SHIFT_3 = 4, + + /** Shift size for getting the index-2 table offset. */ + UCPTRIE_SHIFT_2 = 5 + UCPTRIE_SHIFT_3, + + /** Shift size for getting the index-1 table offset. */ + UCPTRIE_SHIFT_1 = 5 + UCPTRIE_SHIFT_2, + + /** + * Difference between two shift sizes, + * for getting an index-2 offset from an index-3 offset. 5=9-4 + */ + UCPTRIE_SHIFT_2_3 = UCPTRIE_SHIFT_2 - UCPTRIE_SHIFT_3, + + /** + * Difference between two shift sizes, + * for getting an index-1 offset from an index-2 offset. 5=14-9 + */ + UCPTRIE_SHIFT_1_2 = UCPTRIE_SHIFT_1 - UCPTRIE_SHIFT_2, + + /** + * Number of index-1 entries for the BMP. (4) + * This part of the index-1 table is omitted from the serialized form. + */ + UCPTRIE_OMITTED_BMP_INDEX_1_LENGTH = 0x10000 >> UCPTRIE_SHIFT_1, + + /** Number of entries in an index-2 block. 32=0x20 */ + UCPTRIE_INDEX_2_BLOCK_LENGTH = 1 << UCPTRIE_SHIFT_1_2, + + /** Mask for getting the lower bits for the in-index-2-block offset. */ + UCPTRIE_INDEX_2_MASK = UCPTRIE_INDEX_2_BLOCK_LENGTH - 1, + + /** Number of code points per index-2 table entry. 512=0x200 */ + UCPTRIE_CP_PER_INDEX_2_ENTRY = 1 << UCPTRIE_SHIFT_2, + + /** Number of entries in an index-3 block. 32=0x20 */ + UCPTRIE_INDEX_3_BLOCK_LENGTH = 1 << UCPTRIE_SHIFT_2_3, + + /** Mask for getting the lower bits for the in-index-3-block offset. */ + UCPTRIE_INDEX_3_MASK = UCPTRIE_INDEX_3_BLOCK_LENGTH - 1, + + /** Number of entries in a small data block. 16=0x10 */ + UCPTRIE_SMALL_DATA_BLOCK_LENGTH = 1 << UCPTRIE_SHIFT_3, + + /** Mask for getting the lower bits for the in-small-data-block offset. */ + UCPTRIE_SMALL_DATA_MASK = UCPTRIE_SMALL_DATA_BLOCK_LENGTH - 1 +}; + +typedef UChar32 +UCPTrieGetRange(const void *trie, UChar32 start, + UCPTrieValueFilter *filter, const void *context, uint32_t *pValue); + +U_CFUNC UChar32 +ucptrie_internalGetRange(UCPTrieGetRange *getRange, + const void *trie, UChar32 start, + UCPTrieRangeOption option, uint32_t surrogateValue, + UCPTrieValueFilter *filter, const void *context, uint32_t *pValue); + +#ifdef UCPTRIE_DEBUG +U_CFUNC void +ucptrie_printLengths(const UCPTrie *trie, const char *which); + +U_CFUNC void umutablecptrie_setName(UMutableCPTrie *builder, const char *name); +#endif + +/* + * Format of the binary, memory-mappable representation of a UCPTrie/CodePointTrie. + * For overview information see http://site.icu-project.org/design/struct/utrie + * + * The binary trie data should be 32-bit-aligned. + * The overall layout is: + * + * UCPTrieHeader header; -- 16 bytes, see struct definition above + * uint16_t index[header.indexLength]; + * uintXY_t data[header.dataLength]; + * + * The trie data array is an array of uint16_t, uint32_t, or uint8_t, + * specified via the UCPTrieValueWidth when building the trie. + * The data array is 32-bit-aligned for uint32_t, otherwise 16-bit-aligned. + * The overall length of the trie data is a multiple of 4 bytes. + * (Padding is added at the end of the index array and/or near the end of the data array as needed.) + * + * The length of the data array (dataLength) is stored as an integer split across two fields + * of the header struct (high bits in header.options). + * + * The trie type can be "fast" or "small" which determines the index structure, + * specified via the UCPTrieType when building the trie. + * + * The type and valueWidth are stored in the header.options. + * There are reserved type and valueWidth values, and reserved header.options bits. + * They could be used in future format extensions. + * Code reading the trie structure must fail with an error when unknown values or options are set. + * + * Values for ASCII character (U+0000..U+007F) can always be found at the start of the data array. + * + * Values for code points below a type-specific fast-indexing limit are found via two-stage lookup. + * For a "fast" trie, the limit is the BMP/supplementary boundary at U+10000. + * For a "small" trie, the limit is UCPTRIE_SMALL_MAX+1=U+1000. + * + * All code points in the range highStart..U+10FFFF map to a single highValue + * which is stored at the second-to-last position of the data array. + * (See UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET.) + * The highStart value is header.shiftedHighStart<>UCPTRIE_SHIFT_1. + * (For 0x100000 supplementary code points U+10000..U+10ffff.) + * + * After this index-1 table follow the variable-length index-3 and index-2 tables. + * + * The supplementary index tables are omitted completely + * if there is only BMP data (highStart<=U+10000). + * + * For a "small" trie: + * + * The index array starts with a fast-index table for lookup of code points U+0000..U+0FFF. + * + * The "supplementary" index tables are always stored. + * The index-1 table starts from U+0000, its maximum length is 68=0x44=0x110000>>UCPTRIE_SHIFT_1. + * + * For both trie types: + * + * The last index-2 block may be a partial block, storing indexes only for code points + * below highStart. + * + * Lookup for ASCII code point c: + * + * Linear access from the start of the data array. + * + * value = data[c]; + * + * Lookup for fast-range code point c: + * + * Shift the code point right by UCPTRIE_FAST_SHIFT=6 bits, + * fetch the index array value at that offset, + * add the lower code point bits, index into the data array. + * + * value = data[index[c>>6] + (c&0x3f)]; + * + * (This works for ASCII as well.) + * + * Lookup for small-range code point c below highStart: + * + * Split the code point into four bit fields using several sets of shifts & masks + * to read consecutive values from the index-1, index-2, index-3 and data tables. + * + * If all of the data block offsets in an index-3 block fit within 16 bits (up to 0xffff), + * then the data block offsets are stored directly as uint16_t. + * + * Otherwise (this is very unusual but possible), the index-2 entry for the index-3 block + * has bit 15 (0x8000) set, and each set of 8 index-3 entries is preceded by + * an additional uint16_t word. Data block offsets are 18 bits wide, with the top 2 bits stored + * in the additional word. + * + * See ucptrie_internalSmallIndex() for details. + * + * (In a "small" trie, this works for ASCII and below-fast_limit code points as well.) + * + * Compaction: + * + * Multiple code point ranges ("blocks") that are aligned on certain boundaries + * (determined by the shifting/bit fields of code points) and + * map to the same data values normally share a single subsequence of the data array. + * Data blocks can also overlap partially. + * (Depending on the builder code finding duplicate and overlapping blocks.) + * + * Iteration over same-value ranges: + * + * Range iteration (ucptrie_getRange()) walks the structure from a start code point + * until some code point is found that maps to a different value; + * the end of the returned range is just before that. + * + * The header.dataNullOffset (split across two header fields, high bits in header.options) + * is the offset of a widely shared data block filled with one single value. + * It helps quickly skip over large ranges of data with that value. + * Similarly, the header.index3NullOffset is the index-array offset of an index-3 block + * where all index entries point to the dataNullOffset. + * If there is no such data or index-3 block, then these offsets are set to + * values that cannot be reached (data offset out of range/reserved index offset), + * normally UCPTRIE_NO_DATA_NULL_OFFSET or UCPTRIE_NO_INDEX3_NULL_OFFSET respectively. + */ + +#endif diff --git a/icu4c/source/common/udataswp.h b/icu4c/source/common/udataswp.h index 5303870b1d3..5e7b043c4c9 100644 --- a/icu4c/source/common/udataswp.h +++ b/icu4c/source/common/udataswp.h @@ -333,6 +333,43 @@ uprv_compareInvEbcdic(const UDataSwapper *ds, # error Unknown charset family! #endif +// utrie_swap.cpp -----------------------------------------------------------*** + +/** + * Swaps a serialized UTrie. + * @internal + */ +U_CAPI int32_t U_EXPORT2 +utrie_swap(const UDataSwapper *ds, + const void *inData, int32_t length, void *outData, + UErrorCode *pErrorCode); + +/** + * Swaps a serialized UTrie2. + * @internal + */ +U_CAPI int32_t U_EXPORT2 +utrie2_swap(const UDataSwapper *ds, + const void *inData, int32_t length, void *outData, + UErrorCode *pErrorCode); + +/** + * Swaps a serialized UCPTrie. + * @internal + */ +U_CAPI int32_t U_EXPORT2 +ucptrie_swap(const UDataSwapper *ds, + const void *inData, int32_t length, void *outData, + UErrorCode *pErrorCode); + +/** + * Swaps a serialized UTrie, UTrie2, or UCPTrie. + * @internal + */ +U_CAPI int32_t U_EXPORT2 +utrie_swapAnyVersion(const UDataSwapper *ds, + const void *inData, int32_t length, void *outData, + UErrorCode *pErrorCode); /* material... -------------------------------------------------------------- */ diff --git a/icu4c/source/common/umutablecptrie.cpp b/icu4c/source/common/umutablecptrie.cpp new file mode 100644 index 00000000000..892ee3fd3c7 --- /dev/null +++ b/icu4c/source/common/umutablecptrie.cpp @@ -0,0 +1,1605 @@ +// © 2017 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +// umutablecptrie.cpp (inspired by utrie2_builder.cpp) +// created: 2017dec29 Markus W. Scherer + +// #define UCPTRIE_DEBUG +#ifdef UCPTRIE_DEBUG +# include +#endif + +#include "unicode/utypes.h" +#include "unicode/ucptrie.h" +#include "unicode/umutablecptrie.h" +#include "unicode/uobject.h" +#include "unicode/utf16.h" +#include "cmemory.h" +#include "uassert.h" +#include "ucptrie_impl.h" + +U_NAMESPACE_BEGIN + +namespace { + +constexpr int32_t MAX_UNICODE = 0x10ffff; + +constexpr int32_t UNICODE_LIMIT = 0x110000; +constexpr int32_t BMP_LIMIT = 0x10000; +constexpr int32_t ASCII_LIMIT = 0x80; + +constexpr int32_t I_LIMIT = UNICODE_LIMIT >> UCPTRIE_SHIFT_3; +constexpr int32_t BMP_I_LIMIT = BMP_LIMIT >> UCPTRIE_SHIFT_3; +constexpr int32_t ASCII_I_LIMIT = ASCII_LIMIT >> UCPTRIE_SHIFT_3; + +constexpr int32_t SMALL_DATA_BLOCKS_PER_BMP_BLOCK = (1 << (UCPTRIE_FAST_SHIFT - UCPTRIE_SHIFT_3)); + +// Flag values for data blocks. +constexpr uint8_t ALL_SAME = 0; +constexpr uint8_t MIXED = 1; +constexpr uint8_t SAME_AS = 2; + +/** Start with allocation of 16k data entries. */ +constexpr int32_t INITIAL_DATA_LENGTH = ((int32_t)1 << 14); + +/** Grow about 8x each time. */ +constexpr int32_t MEDIUM_DATA_LENGTH = ((int32_t)1 << 17); + +/** + * Maximum length of the build-time data array. + * One entry per 0x110000 code points. + */ +constexpr int32_t MAX_DATA_LENGTH = UNICODE_LIMIT; + +// Flag values for index-3 blocks while compacting/building. +constexpr uint8_t I3_NULL = 0; +constexpr uint8_t I3_BMP = 1; +constexpr uint8_t I3_16 = 2; +constexpr uint8_t I3_18 = 3; + +constexpr int32_t INDEX_3_18BIT_BLOCK_LENGTH = UCPTRIE_INDEX_3_BLOCK_LENGTH + UCPTRIE_INDEX_3_BLOCK_LENGTH / 8; + +class AllSameBlocks; + +class MutableCodePointTrie : public UMemory { +public: + MutableCodePointTrie(uint32_t initialValue, uint32_t errorValue, UErrorCode &errorCode); + MutableCodePointTrie(const MutableCodePointTrie &other, UErrorCode &errorCode); + MutableCodePointTrie(const MutableCodePointTrie &other) = delete; + ~MutableCodePointTrie(); + + MutableCodePointTrie &operator=(const MutableCodePointTrie &other) = delete; + + static MutableCodePointTrie *fromUCPTrie(const UCPTrie *trie, UErrorCode &errorCode); + + uint32_t get(UChar32 c) const; + int32_t getRange(UChar32 start, UCPTrieValueFilter *filter, const void *context, + uint32_t *pValue) const; + + void set(UChar32 c, uint32_t value, UErrorCode &errorCode); + void setRange(UChar32 start, UChar32 end, uint32_t value, UErrorCode &errorCode); + + UCPTrie *build(UCPTrieType type, UCPTrieValueWidth valueWidth, UErrorCode &errorCode); + +private: + void clear(); + + bool ensureHighStart(UChar32 c); + int32_t allocDataBlock(int32_t blockLength); + int32_t getDataBlock(int32_t i); + + void maskValues(uint32_t mask); + UChar32 findHighStart() const; + int32_t compactWholeDataBlocks(int32_t fastILimit, AllSameBlocks &allSameBlocks); + int32_t compactData(int32_t fastILimit, uint32_t *newData); + int32_t compactIndex(int32_t fastILimit, UErrorCode &errorCode); + int32_t compactTrie(int32_t fastILimit, UErrorCode &errorCode); + + uint32_t *index = nullptr; + int32_t indexCapacity = 0; + int32_t index3NullOffset = -1; + uint32_t *data = nullptr; + int32_t dataCapacity = 0; + int32_t dataLength = 0; + int32_t dataNullOffset = -1; + + uint32_t origInitialValue; + uint32_t initialValue; + uint32_t errorValue; + UChar32 highStart; + uint32_t highValue; +#ifdef UCPTRIE_DEBUG +public: + const char *name; +#endif +private: + /** Temporary array while building the final data. */ + uint16_t *index16 = nullptr; + uint8_t flags[UNICODE_LIMIT >> UCPTRIE_SHIFT_3]; +}; + +MutableCodePointTrie::MutableCodePointTrie(uint32_t iniValue, uint32_t errValue, UErrorCode &errorCode) : + origInitialValue(iniValue), initialValue(iniValue), errorValue(errValue), + highStart(0), highValue(initialValue) +#ifdef UCPTRIE_DEBUG + , name("open") +#endif + { + if (U_FAILURE(errorCode)) { return; } + index = (uint32_t *)uprv_malloc(BMP_I_LIMIT * 4); + data = (uint32_t *)uprv_malloc(INITIAL_DATA_LENGTH * 4); + if (index == nullptr || data == nullptr) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + return; + } + indexCapacity = BMP_I_LIMIT; + dataCapacity = INITIAL_DATA_LENGTH; +} + +MutableCodePointTrie::MutableCodePointTrie(const MutableCodePointTrie &other, UErrorCode &errorCode) : + index3NullOffset(other.index3NullOffset), + dataNullOffset(other.dataNullOffset), + origInitialValue(other.origInitialValue), initialValue(other.initialValue), + errorValue(other.errorValue), + highStart(other.highStart), highValue(other.highValue) +#ifdef UCPTRIE_DEBUG + , name("mutable clone") +#endif + { + if (U_FAILURE(errorCode)) { return; } + int32_t iCapacity = highStart <= BMP_LIMIT ? BMP_I_LIMIT : I_LIMIT; + index = (uint32_t *)uprv_malloc(iCapacity * 4); + data = (uint32_t *)uprv_malloc(other.dataCapacity * 4); + if (index == nullptr || data == nullptr) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + return; + } + indexCapacity = iCapacity; + dataCapacity = other.dataCapacity; + + int32_t iLimit = highStart >> UCPTRIE_SHIFT_3; + uprv_memcpy(flags, other.flags, iLimit); + uprv_memcpy(index, other.index, iLimit * 4); + uprv_memcpy(data, other.data, (size_t)other.dataLength * 4); + dataLength = other.dataLength; + U_ASSERT(other.index16 == nullptr); +} + +MutableCodePointTrie::~MutableCodePointTrie() { + uprv_free(index); + uprv_free(data); + uprv_free(index16); +} + +MutableCodePointTrie *MutableCodePointTrie::fromUCPTrie(const UCPTrie *trie, UErrorCode &errorCode) { + // Use the highValue as the initialValue to reduce the highStart. + uint32_t errorValue; + uint32_t initialValue; + switch (trie->valueWidth) { + case UCPTRIE_VALUE_BITS_16: + errorValue = trie->data.ptr16[trie->dataLength - UCPTRIE_ERROR_VALUE_NEG_DATA_OFFSET]; + initialValue = trie->data.ptr16[trie->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET]; + break; + case UCPTRIE_VALUE_BITS_32: + errorValue = trie->data.ptr32[trie->dataLength - UCPTRIE_ERROR_VALUE_NEG_DATA_OFFSET]; + initialValue = trie->data.ptr32[trie->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET]; + break; + case UCPTRIE_VALUE_BITS_8: + errorValue = trie->data.ptr8[trie->dataLength - UCPTRIE_ERROR_VALUE_NEG_DATA_OFFSET]; + initialValue = trie->data.ptr8[trie->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET]; + break; + default: + // Unreachable if the trie is properly initialized. + errorCode = U_ILLEGAL_ARGUMENT_ERROR; + return nullptr; + } + LocalPointer mutableTrie( + new MutableCodePointTrie(initialValue, errorValue, errorCode), + errorCode); + if (U_FAILURE(errorCode)) { + return nullptr; + } + UChar32 start = 0, end; + uint32_t value; + while ((end = ucptrie_getRange(trie, start, UCPTRIE_RANGE_NORMAL, 0, + nullptr, nullptr, &value)) >= 0) { + if (value != initialValue) { + if (start == end) { + mutableTrie->set(start, value, errorCode); + } else { + mutableTrie->setRange(start, end, value, errorCode); + } + } + start = end + 1; + } + if (U_SUCCESS(errorCode)) { + return mutableTrie.orphan(); + } else { + return nullptr; + } +} + +void MutableCodePointTrie::clear() { + index3NullOffset = dataNullOffset = -1; + dataLength = 0; + highValue = initialValue = origInitialValue; + highStart = 0; + uprv_free(index16); + index16 = nullptr; +} + +uint32_t MutableCodePointTrie::get(UChar32 c) const { + if ((uint32_t)c > MAX_UNICODE) { + return errorValue; + } + if (c >= highStart) { + return highValue; + } + int32_t i = c >> UCPTRIE_SHIFT_3; + if (flags[i] == ALL_SAME) { + return index[i]; + } else { + return data[index[i] + (c & UCPTRIE_SMALL_DATA_MASK)]; + } +} + +inline uint32_t maybeFilterValue(uint32_t value, uint32_t initialValue, uint32_t nullValue, + UCPTrieValueFilter *filter, const void *context) { + if (value == initialValue) { + value = nullValue; + } else if (filter != nullptr) { + value = filter(context, value); + } + return value; +} + +UChar32 MutableCodePointTrie::getRange( + UChar32 start, UCPTrieValueFilter *filter, const void *context, + uint32_t *pValue) const { + if ((uint32_t)start > MAX_UNICODE) { + return U_SENTINEL; + } + if (start >= highStart) { + if (pValue != nullptr) { + uint32_t value = highValue; + if (filter != nullptr) { value = filter(context, value); } + *pValue = value; + } + return MAX_UNICODE; + } + uint32_t nullValue = initialValue; + if (filter != nullptr) { nullValue = filter(context, nullValue); } + UChar32 c = start; + uint32_t value; + bool haveValue = false; + int32_t i = c >> UCPTRIE_SHIFT_3; + do { + if (flags[i] == ALL_SAME) { + uint32_t value2 = maybeFilterValue(index[i], initialValue, nullValue, + filter, context); + if (haveValue) { + if (value2 != value) { + return c - 1; + } + } else { + value = value2; + if (pValue != nullptr) { *pValue = value; } + haveValue = true; + } + c = (c + UCPTRIE_SMALL_DATA_BLOCK_LENGTH) & ~UCPTRIE_SMALL_DATA_MASK; + } else /* MIXED */ { + int32_t di = index[i] + (c & UCPTRIE_SMALL_DATA_MASK); + uint32_t value2 = maybeFilterValue(data[di], initialValue, nullValue, + filter, context); + if (haveValue) { + if (value2 != value) { + return c - 1; + } + } else { + value = value2; + if (pValue != nullptr) { *pValue = value; } + haveValue = true; + } + while ((++c & UCPTRIE_SMALL_DATA_MASK) != 0) { + if (maybeFilterValue(data[++di], initialValue, nullValue, + filter, context) != value) { + return c - 1; + } + } + } + ++i; + } while (c < highStart); + U_ASSERT(haveValue); + if (maybeFilterValue(highValue, initialValue, nullValue, + filter, context) != value) { + return c - 1; + } else { + return MAX_UNICODE; + } +} + +void +writeBlock(uint32_t *block, uint32_t value) { + uint32_t *limit = block + UCPTRIE_SMALL_DATA_BLOCK_LENGTH; + while (block < limit) { + *block++ = value; + } +} + +bool MutableCodePointTrie::ensureHighStart(UChar32 c) { + if (c >= highStart) { + // Round up to a UCPTRIE_CP_PER_INDEX_2_ENTRY boundary to simplify compaction. + c = (c + UCPTRIE_CP_PER_INDEX_2_ENTRY) & ~(UCPTRIE_CP_PER_INDEX_2_ENTRY - 1); + int32_t i = highStart >> UCPTRIE_SHIFT_3; + int32_t iLimit = c >> UCPTRIE_SHIFT_3; + if (iLimit > indexCapacity) { + uint32_t *newIndex = (uint32_t *)uprv_malloc(I_LIMIT * 4); + if (newIndex == nullptr) { return false; } + uprv_memcpy(newIndex, index, i * 4); + uprv_free(index); + index = newIndex; + indexCapacity = I_LIMIT; + } + do { + flags[i] = ALL_SAME; + index[i] = initialValue; + } while(++i < iLimit); + highStart = c; + } + return true; +} + +int32_t MutableCodePointTrie::allocDataBlock(int32_t blockLength) { + int32_t newBlock = dataLength; + int32_t newTop = newBlock + blockLength; + if (newTop > dataCapacity) { + int32_t capacity; + if (dataCapacity < MEDIUM_DATA_LENGTH) { + capacity = MEDIUM_DATA_LENGTH; + } else if (dataCapacity < MAX_DATA_LENGTH) { + capacity = MAX_DATA_LENGTH; + } else { + // Should never occur. + // Either MAX_DATA_LENGTH is incorrect, + // or the code writes more values than should be possible. + return -1; + } + uint32_t *newData = (uint32_t *)uprv_malloc(capacity * 4); + if (newData == nullptr) { + return -1; + } + uprv_memcpy(newData, data, (size_t)dataLength * 4); + uprv_free(data); + data = newData; + dataCapacity = capacity; + } + dataLength = newTop; + return newBlock; +} + +/** + * No error checking for illegal arguments. + * + * @return -1 if no new data block available (out of memory in data array) + * @internal + */ +int32_t MutableCodePointTrie::getDataBlock(int32_t i) { + if (flags[i] == MIXED) { + return index[i]; + } + if (i < BMP_I_LIMIT) { + int32_t newBlock = allocDataBlock(UCPTRIE_FAST_DATA_BLOCK_LENGTH); + if (newBlock < 0) { return newBlock; } + int32_t iStart = i & ~(SMALL_DATA_BLOCKS_PER_BMP_BLOCK -1); + int32_t iLimit = iStart + SMALL_DATA_BLOCKS_PER_BMP_BLOCK; + do { + U_ASSERT(flags[iStart] == ALL_SAME); + writeBlock(data + newBlock, index[iStart]); + flags[iStart] = MIXED; + index[iStart++] = newBlock; + newBlock += UCPTRIE_SMALL_DATA_BLOCK_LENGTH; + } while (iStart < iLimit); + return index[i]; + } else { + int32_t newBlock = allocDataBlock(UCPTRIE_SMALL_DATA_BLOCK_LENGTH); + if (newBlock < 0) { return newBlock; } + writeBlock(data + newBlock, index[i]); + flags[i] = MIXED; + index[i] = newBlock; + return newBlock; + } +} + +void MutableCodePointTrie::set(UChar32 c, uint32_t value, UErrorCode &errorCode) { + if (U_FAILURE(errorCode)) { + return; + } + if ((uint32_t)c > MAX_UNICODE) { + errorCode = U_ILLEGAL_ARGUMENT_ERROR; + return; + } + + int32_t block; + if (!ensureHighStart(c) || (block = getDataBlock(c >> UCPTRIE_SHIFT_3)) < 0) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + return; + } + + data[block + (c & UCPTRIE_SMALL_DATA_MASK)] = value; +} + +void +fillBlock(uint32_t *block, UChar32 start, UChar32 limit, uint32_t value) { + uint32_t *pLimit = block + limit; + block += start; + while (block < pLimit) { + *block++ = value; + } +} + +void MutableCodePointTrie::setRange(UChar32 start, UChar32 end, uint32_t value, UErrorCode &errorCode) { + if (U_FAILURE(errorCode)) { + return; + } + if ((uint32_t)start > MAX_UNICODE || (uint32_t)end > MAX_UNICODE || start > end) { + errorCode = U_ILLEGAL_ARGUMENT_ERROR; + return; + } + if (!ensureHighStart(end)) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + return; + } + + UChar32 limit = end + 1; + if (start & UCPTRIE_SMALL_DATA_MASK) { + // Set partial block at [start..following block boundary[. + int32_t block = getDataBlock(start >> UCPTRIE_SHIFT_3); + if (block < 0) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + return; + } + + UChar32 nextStart = (start + UCPTRIE_SMALL_DATA_MASK) & ~UCPTRIE_SMALL_DATA_MASK; + if (nextStart <= limit) { + fillBlock(data + block, start & UCPTRIE_SMALL_DATA_MASK, UCPTRIE_SMALL_DATA_BLOCK_LENGTH, + value); + start = nextStart; + } else { + fillBlock(data + block, start & UCPTRIE_SMALL_DATA_MASK, limit & UCPTRIE_SMALL_DATA_MASK, + value); + return; + } + } + + // Number of positions in the last, partial block. + int32_t rest = limit & UCPTRIE_SMALL_DATA_MASK; + + // Round down limit to a block boundary. + limit &= ~UCPTRIE_SMALL_DATA_MASK; + + // Iterate over all-value blocks. + while (start < limit) { + int32_t i = start >> UCPTRIE_SHIFT_3; + if (flags[i] == ALL_SAME) { + index[i] = value; + } else /* MIXED */ { + fillBlock(data + index[i], 0, UCPTRIE_SMALL_DATA_BLOCK_LENGTH, value); + } + start += UCPTRIE_SMALL_DATA_BLOCK_LENGTH; + } + + if (rest > 0) { + // Set partial block at [last block boundary..limit[. + int32_t block = getDataBlock(start >> UCPTRIE_SHIFT_3); + if (block < 0) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + return; + } + + fillBlock(data + block, 0, rest, value); + } +} + +/* compaction --------------------------------------------------------------- */ + +void MutableCodePointTrie::maskValues(uint32_t mask) { + initialValue &= mask; + errorValue &= mask; + highValue &= mask; + int32_t iLimit = highStart >> UCPTRIE_SHIFT_3; + for (int32_t i = 0; i < iLimit; ++i) { + if (flags[i] == ALL_SAME) { + index[i] &= mask; + } + } + for (int32_t i = 0; i < dataLength; ++i) { + data[i] &= mask; + } +} + +inline bool +equalBlocks(const uint32_t *s, const uint32_t *t, int32_t length) { + while (length > 0 && *s == *t) { + ++s; + ++t; + --length; + } + return length == 0; +} + +inline bool +equalBlocks(const uint16_t *s, const uint32_t *t, int32_t length) { + while (length > 0 && *s == *t) { + ++s; + ++t; + --length; + } + return length == 0; +} + +inline bool +equalBlocks(const uint16_t *s, const uint16_t *t, int32_t length) { + while (length > 0 && *s == *t) { + ++s; + ++t; + --length; + } + return length == 0; +} + +bool allValuesSameAs(const uint32_t *p, int32_t length, uint32_t value) { + const uint32_t *pLimit = p + length; + while (p < pLimit && *p == value) { ++p; } + return p == pLimit; +} + +/** Search for an identical block. */ +int32_t findSameBlock(const uint32_t *p, int32_t pStart, int32_t length, + const uint32_t *q, int32_t qStart, int32_t blockLength) { + // Ensure that we do not even partially get past length. + length -= blockLength; + + q += qStart; + while (pStart <= length) { + if (equalBlocks(p + pStart, q, blockLength)) { + return pStart; + } + ++pStart; + } + return -1; +} + +int32_t findSameBlock(const uint16_t *p, int32_t pStart, int32_t length, + const uint32_t *q, int32_t qStart, int32_t blockLength) { + // Ensure that we do not even partially get past length. + length -= blockLength; + + q += qStart; + while (pStart <= length) { + if (equalBlocks(p + pStart, q, blockLength)) { + return pStart; + } + ++pStart; + } + return -1; +} + +int32_t findSameBlock(const uint16_t *p, int32_t pStart, int32_t length, + const uint16_t *q, int32_t qStart, int32_t blockLength) { + // Ensure that we do not even partially get past length. + length -= blockLength; + + q += qStart; + while (pStart <= length) { + if (equalBlocks(p + pStart, q, blockLength)) { + return pStart; + } + ++pStart; + } + return -1; +} + +int32_t findAllSameBlock(const uint32_t *p, int32_t length, uint32_t value, + int32_t blockLength) { + // Ensure that we do not even partially get past length. + length -= blockLength; + + for (int32_t block = 0; block <= length; ++block) { + if (p[block] == value) { + for (int32_t i = 1;; ++i) { + if (i == blockLength) { + return block; + } + if (p[block + i] != value) { + block += i; + break; + } + } + } + } + return -1; +} + +/** + * Look for maximum overlap of the beginning of the other block + * with the previous, adjacent block. + */ +int32_t getOverlap(const uint32_t *p, int32_t length, + const uint32_t *q, int32_t qStart, int32_t blockLength) { + int32_t overlap = blockLength - 1; + U_ASSERT(overlap <= length); + q += qStart; + while (overlap > 0 && !equalBlocks(p + (length - overlap), q, overlap)) { + --overlap; + } + return overlap; +} + +int32_t getOverlap(const uint16_t *p, int32_t length, + const uint32_t *q, int32_t qStart, int32_t blockLength) { + int32_t overlap = blockLength - 1; + U_ASSERT(overlap <= length); + q += qStart; + while (overlap > 0 && !equalBlocks(p + (length - overlap), q, overlap)) { + --overlap; + } + return overlap; +} + +int32_t getOverlap(const uint16_t *p, int32_t length, + const uint16_t *q, int32_t qStart, int32_t blockLength) { + int32_t overlap = blockLength - 1; + U_ASSERT(overlap <= length); + q += qStart; + while (overlap > 0 && !equalBlocks(p + (length - overlap), q, overlap)) { + --overlap; + } + return overlap; +} + +int32_t getAllSameOverlap(const uint32_t *p, int32_t length, uint32_t value, + int32_t blockLength) { + int32_t min = length - (blockLength - 1); + int32_t i = length; + while (min < i && p[i - 1] == value) { --i; } + return length - i; +} + +/** + * Finds the start of the last range in the trie by enumerating backward. + * Indexes for code points higher than this will be omitted. + */ +UChar32 MutableCodePointTrie::findHighStart() const { + int32_t i = highStart >> UCPTRIE_SHIFT_3; + while (i > 0) { + bool match; + if (flags[--i] == ALL_SAME) { + match = index[i] == highValue; + } else /* MIXED */ { + const uint32_t *p = data + index[i]; + for (int32_t j = 0;; ++j) { + if (j == UCPTRIE_SMALL_DATA_BLOCK_LENGTH) { + match = true; + break; + } + if (p[j] != highValue) { + match = false; + break; + } + } + } + if (!match) { + return (i + 1) << UCPTRIE_SHIFT_3; + } + } + return 0; +} + +class AllSameBlocks { +public: + static constexpr int32_t NEW_UNIQUE = -1; + static constexpr int32_t OVERFLOW = -2; + + AllSameBlocks() : length(0), mostRecent(-1) {} + + int32_t findOrAdd(int32_t index, int32_t count, uint32_t value) { + if (mostRecent >= 0 && values[mostRecent] == value) { + refCounts[mostRecent] += count; + return indexes[mostRecent]; + } + for (int32_t i = 0; i < length; ++i) { + if (values[i] == value) { + mostRecent = i; + refCounts[i] += count; + return indexes[i]; + } + } + if (length == CAPACITY) { + return OVERFLOW; + } + mostRecent = length; + indexes[length] = index; + values[length] = value; + refCounts[length++] = count; + return NEW_UNIQUE; + } + + /** Replaces the block which has the lowest reference count. */ + void add(int32_t index, int32_t count, uint32_t value) { + U_ASSERT(length == CAPACITY); + int32_t least = -1; + int32_t leastCount = I_LIMIT; + for (int32_t i = 0; i < length; ++i) { + U_ASSERT(values[i] != value); + if (refCounts[i] < leastCount) { + least = i; + leastCount = refCounts[i]; + } + } + U_ASSERT(least >= 0); + mostRecent = least; + indexes[least] = index; + values[least] = value; + refCounts[least] = count; + } + + int32_t findMostUsed() const { + if (length == 0) { return -1; } + int32_t max = -1; + int32_t maxCount = 0; + for (int32_t i = 0; i < length; ++i) { + if (refCounts[i] > maxCount) { + max = i; + maxCount = refCounts[i]; + } + } + return indexes[max]; + } + +private: + static constexpr int32_t CAPACITY = 32; + + int32_t length; + int32_t mostRecent; + + int32_t indexes[CAPACITY]; + uint32_t values[CAPACITY]; + int32_t refCounts[CAPACITY]; +}; + +int32_t MutableCodePointTrie::compactWholeDataBlocks(int32_t fastILimit, AllSameBlocks &allSameBlocks) { +#ifdef UCPTRIE_DEBUG + bool overflow = false; +#endif + + // ASCII data will be stored as a linear table, even if the following code + // does not yet count it that way. + int32_t newDataCapacity = ASCII_LIMIT; + // Add room for special values (errorValue, highValue) and padding. + newDataCapacity += 4; + int32_t iLimit = highStart >> UCPTRIE_SHIFT_3; + int32_t blockLength = UCPTRIE_FAST_DATA_BLOCK_LENGTH; + int32_t inc = SMALL_DATA_BLOCKS_PER_BMP_BLOCK; + for (int32_t i = 0; i < iLimit; i += inc) { + if (i == fastILimit) { + blockLength = UCPTRIE_SMALL_DATA_BLOCK_LENGTH; + inc = 1; + } + uint32_t value = index[i]; + if (flags[i] == MIXED) { + // Really mixed? + const uint32_t *p = data + value; + value = *p; + if (allValuesSameAs(p + 1, blockLength - 1, value)) { + flags[i] = ALL_SAME; + index[i] = value; + // Fall through to ALL_SAME handling. + } else { + newDataCapacity += blockLength; + continue; + } + } else { + U_ASSERT(flags[i] == ALL_SAME); + if (inc > 1) { + // Do all of the fast-range data block's ALL_SAME parts have the same value? + bool allSame = true; + int32_t next_i = i + inc; + for (int32_t j = i + 1; j < next_i; ++j) { + U_ASSERT(flags[j] == ALL_SAME); + if (index[j] != value) { + allSame = false; + break; + } + } + if (!allSame) { + // Turn it into a MIXED block. + if (getDataBlock(i) < 0) { + return -1; + } + continue; + } + } + } + // Is there another ALL_SAME block with the same value? + int32_t other = allSameBlocks.findOrAdd(i, inc, value); + if (other == AllSameBlocks::OVERFLOW) { + // The fixed-size array overflowed. Slow check for a duplicate block. +#ifdef UCPTRIE_DEBUG + if (!overflow) { + puts("UCPTrie AllSameBlocks overflow"); + overflow = true; + } +#endif + int32_t jInc = SMALL_DATA_BLOCKS_PER_BMP_BLOCK; + for (int32_t j = 0;; j += jInc) { + if (j == i) { + allSameBlocks.add(i, inc, value); + break; + } + if (j == fastILimit) { + jInc = 1; + } + if (flags[j] == ALL_SAME && index[j] == value) { + allSameBlocks.add(j, jInc + inc, value); + other = j; + break; + // We could keep counting blocks with the same value + // before we add the first one, which may improve compaction in rare cases, + // but it would make it slower. + } + } + } + if (other >= 0) { + flags[i] = SAME_AS; + index[i] = other; + } else { + // New unique same-value block. + newDataCapacity += blockLength; + } + } + return newDataCapacity; +} + +#ifdef UCPTRIE_DEBUG +# define DEBUG_DO(expr) expr +#else +# define DEBUG_DO(expr) +#endif + +#ifdef UCPTRIE_DEBUG +// Braille symbols: U+28xx = UTF-8 E2 A0 80..E2 A3 BF +int32_t appendValue(char s[], int32_t length, uint32_t value) { + value ^= value >> 16; + value ^= value >> 8; + s[length] = 0xE2; + s[length + 1] = (char)(0xA0 + ((value >> 6) & 3)); + s[length + 2] = (char)(0x80 + (value & 0x3F)); + return length + 3; +} + +void printBlock(const uint32_t *block, int32_t blockLength, uint32_t value, + UChar32 start, int32_t overlap, uint32_t initialValue) { + char s[UCPTRIE_FAST_DATA_BLOCK_LENGTH * 3 + 3]; + int32_t length = 0; + int32_t i; + for (i = 0; i < overlap; ++i) { + length = appendValue(s, length, 0); // Braille blank + } + s[length++] = '|'; + for (; i < blockLength; ++i) { + if (block != nullptr) { + value = block[i]; + } + if (value == initialValue) { + value = 0x40; // Braille lower left dot + } + length = appendValue(s, length, value); + } + s[length] = 0; + start += overlap; + if (start <= 0xffff) { + printf(" %04lX %s|\n", (long)start, s); + } else if (start <= 0xfffff) { + printf(" %5lX %s|\n", (long)start, s); + } else { + printf(" %6lX %s|\n", (long)start, s); + } +} +#endif + +/** + * Compacts a build-time trie. + * + * The compaction + * - removes blocks that are identical with earlier ones + * - overlaps each new non-duplicate block as much as possible with the previously-written one + * - works with fast-range data blocks whose length is a multiple of that of + * higher-code-point data blocks + * + * It does not try to find an optimal order of writing, deduplicating, and overlapping blocks. + */ +int32_t MutableCodePointTrie::compactData(int32_t fastILimit, uint32_t *newData) { +#ifdef UCPTRIE_DEBUG + int32_t countSame=0, sumOverlaps=0; + bool printData = dataLength == 29088 /* line.brk */ || + // dataLength == 30048 /* CanonIterData */ || + dataLength == 50400 /* zh.txt~stroke */; +#endif + + // The linear ASCII data has been copied into newData already. + int32_t newDataLength = 0; + for (int32_t i = 0; newDataLength < ASCII_LIMIT; + newDataLength += UCPTRIE_FAST_DATA_BLOCK_LENGTH, i += SMALL_DATA_BLOCKS_PER_BMP_BLOCK) { + index[i] = newDataLength; +#ifdef UCPTRIE_DEBUG + if (printData) { + printBlock(newData + newDataLength, UCPTRIE_FAST_DATA_BLOCK_LENGTH, 0, newDataLength, 0, initialValue); + } +#endif + } + + int32_t iLimit = highStart >> UCPTRIE_SHIFT_3; + int32_t blockLength = UCPTRIE_FAST_DATA_BLOCK_LENGTH; + int32_t inc = SMALL_DATA_BLOCKS_PER_BMP_BLOCK; + for (int32_t i = ASCII_I_LIMIT; i < iLimit; i += inc) { + if (i == fastILimit) { + blockLength = UCPTRIE_SMALL_DATA_BLOCK_LENGTH; + inc = 1; + } + if (flags[i] == ALL_SAME) { + uint32_t value = index[i]; + int32_t n = findAllSameBlock(newData, newDataLength, value, blockLength); + if (n >= 0) { + DEBUG_DO(++countSame); + index[i] = n; + } else { + n = getAllSameOverlap(newData, newDataLength, value, blockLength); + DEBUG_DO(sumOverlaps += n); +#ifdef UCPTRIE_DEBUG + if (printData) { + printBlock(nullptr, blockLength, value, i << UCPTRIE_SHIFT_3, n, initialValue); + } +#endif + index[i] = newDataLength - n; + while (n < blockLength) { + newData[newDataLength++] = value; + ++n; + } + } + } else if (flags[i] == MIXED) { + const uint32_t *block = data + index[i]; + int32_t n = findSameBlock(newData, 0, newDataLength, block, 0, blockLength); + if (n >= 0) { + DEBUG_DO(++countSame); + index[i] = n; + } else { + n = getOverlap(newData, newDataLength, block, 0, blockLength); + DEBUG_DO(sumOverlaps += n); +#ifdef UCPTRIE_DEBUG + if (printData) { + printBlock(block, blockLength, 0, i << UCPTRIE_SHIFT_3, n, initialValue); + } +#endif + index[i] = newDataLength - n; + while (n < blockLength) { + newData[newDataLength++] = block[n++]; + } + } + } else /* SAME_AS */ { + uint32_t j = index[i]; + index[i] = index[j]; + } + } + +#ifdef UCPTRIE_DEBUG + /* we saved some space */ + printf("compacting UCPTrie: count of 32-bit data words %lu->%lu countSame=%ld sumOverlaps=%ld\n", + (long)dataLength, (long)newDataLength, (long)countSame, (long)sumOverlaps); +#endif + return newDataLength; +} + +int32_t MutableCodePointTrie::compactIndex(int32_t fastILimit, UErrorCode &errorCode) { + int32_t fastIndexLength = fastILimit >> (UCPTRIE_FAST_SHIFT - UCPTRIE_SHIFT_3); + if ((highStart >> UCPTRIE_FAST_SHIFT) <= fastIndexLength) { + // Only the linear fast index, no multi-stage index tables. + index3NullOffset = UCPTRIE_NO_INDEX3_NULL_OFFSET; + return fastIndexLength; + } + + // Condense the fast index table. + // Also, does it contain an index-3 block with all dataNullOffset? + uint16_t fastIndex[UCPTRIE_BMP_INDEX_LENGTH]; // fastIndexLength + int32_t i3FirstNull = -1; + for (int32_t i = 0, j = 0; i < fastILimit; ++j) { + uint32_t i3 = index[i]; + fastIndex[j] = (uint16_t)i3; + if (i3 == (uint32_t)dataNullOffset) { + if (i3FirstNull < 0) { + i3FirstNull = j; + } else if (index3NullOffset < 0 && + (j - i3FirstNull + 1) == UCPTRIE_INDEX_3_BLOCK_LENGTH) { + index3NullOffset = i3FirstNull; + } + } else { + i3FirstNull = -1; + } + // Set the index entries that compactData() skipped. + // Needed when the multi-stage index covers the fast index range as well. + int32_t iNext = i + SMALL_DATA_BLOCKS_PER_BMP_BLOCK; + while (++i < iNext) { + i3 += UCPTRIE_SMALL_DATA_BLOCK_LENGTH; + index[i] = i3; + } + } + + // Examine index-3 blocks. For each determine one of: + // - same as the index-3 null block + // - same as a fast-index block + // - 16-bit indexes + // - 18-bit indexes + // We store this in the first flags entry for the index-3 block. + // + // Also determine an upper limit for the index-3 table length. + int32_t index3Capacity = 0; + i3FirstNull = index3NullOffset; + // If the fast index covers the whole BMP, then + // the multi-stage index is only for supplementary code points. + // Otherwise, the multi-stage index covers all of Unicode. + int32_t iStart = fastILimit < BMP_I_LIMIT ? 0 : BMP_I_LIMIT; + int32_t iLimit = highStart >> UCPTRIE_SHIFT_3; + for (int32_t i = iStart; i < iLimit;) { + int32_t j = i; + int32_t jLimit = i + UCPTRIE_INDEX_3_BLOCK_LENGTH; + uint32_t oredI3 = 0; + bool isNull = true; + do { + uint32_t i3 = index[j]; + oredI3 |= i3; + if (i3 != (uint32_t)dataNullOffset) { + isNull = false; + } + } while (++j < jLimit); + if (isNull) { + flags[i] = I3_NULL; + if (i3FirstNull < 0) { + if (oredI3 <= 0xffff) { + index3Capacity += UCPTRIE_INDEX_3_BLOCK_LENGTH; + } else { + index3Capacity += INDEX_3_18BIT_BLOCK_LENGTH; + } + i3FirstNull = 0; + } + } else { + if (oredI3 <= 0xffff) { + int32_t n = findSameBlock(fastIndex, 0, fastIndexLength, + index, i, UCPTRIE_INDEX_3_BLOCK_LENGTH); + if (n >= 0) { + flags[i] = I3_BMP; + index[i] = n; + } else { + flags[i] = I3_16; + index3Capacity += UCPTRIE_INDEX_3_BLOCK_LENGTH; + } + } else { + flags[i] = I3_18; + index3Capacity += INDEX_3_18BIT_BLOCK_LENGTH; + } + } + i = j; + } + + int32_t index2Capacity = (iLimit - iStart) >> UCPTRIE_SHIFT_2_3; + + // Length of the index-1 table, rounded up. + int32_t index1Length = (index2Capacity + UCPTRIE_INDEX_2_MASK) >> UCPTRIE_SHIFT_1_2; + + // Index table: Fast index, index-1, index-3, index-2. + // +1 for possible index table padding. + int32_t index16Capacity = fastIndexLength + index1Length + index3Capacity + index2Capacity + 1; + index16 = (uint16_t *)uprv_malloc(index16Capacity * 2); + if (index16 == nullptr) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + return 0; + } + uprv_memcpy(index16, fastIndex, fastIndexLength * 2); + + // Compact the index-3 table and write an uncompacted version of the index-2 table. + uint16_t index2[UNICODE_LIMIT >> UCPTRIE_SHIFT_2]; // index2Capacity + int32_t i2Length = 0; + i3FirstNull = index3NullOffset; + int32_t index3Start = fastIndexLength + index1Length; + int32_t indexLength = index3Start; + for (int32_t i = iStart; i < iLimit; i += UCPTRIE_INDEX_3_BLOCK_LENGTH) { + int32_t i3; + uint8_t f = flags[i]; + if (f == I3_NULL && i3FirstNull < 0) { + // First index-3 null block. Write & overlap it like a normal block, then remember it. + f = dataNullOffset <= 0xffff ? I3_16 : I3_18; + i3FirstNull = 0; + } + if (f == I3_NULL) { + i3 = index3NullOffset; + } else if (f == I3_BMP) { + i3 = index[i]; + } else if (f == I3_16) { + int32_t n = findSameBlock(index16, index3Start, indexLength, + index, i, UCPTRIE_INDEX_3_BLOCK_LENGTH); + if (n >= 0) { + i3 = n; + } else { + if (indexLength == index3Start) { + // No overlap at the boundary between the index-1 and index-3 tables. + n = 0; + } else { + n = getOverlap(index16, indexLength, + index, i, UCPTRIE_INDEX_3_BLOCK_LENGTH); + } + i3 = indexLength - n; + while (n < UCPTRIE_INDEX_3_BLOCK_LENGTH) { + index16[indexLength++] = index[i + n++]; + } + } + } else { + U_ASSERT(f == I3_18); + // Encode an index-3 block that contains one or more data indexes exceeding 16 bits. + int32_t j = i; + int32_t jLimit = i + UCPTRIE_INDEX_3_BLOCK_LENGTH; + int32_t k = indexLength; + do { + ++k; + uint32_t v = index[j++]; + uint32_t upperBits = (v & 0x30000) >> 2; + index16[k++] = v; + v = index[j++]; + upperBits |= (v & 0x30000) >> 4; + index16[k++] = v; + v = index[j++]; + upperBits |= (v & 0x30000) >> 6; + index16[k++] = v; + v = index[j++]; + upperBits |= (v & 0x30000) >> 8; + index16[k++] = v; + v = index[j++]; + upperBits |= (v & 0x30000) >> 10; + index16[k++] = v; + v = index[j++]; + upperBits |= (v & 0x30000) >> 12; + index16[k++] = v; + v = index[j++]; + upperBits |= (v & 0x30000) >> 14; + index16[k++] = v; + v = index[j++]; + upperBits |= (v & 0x30000) >> 16; + index16[k++] = v; + index16[k - 9] = upperBits; + } while (j < jLimit); + int32_t n = findSameBlock(index16, index3Start, indexLength, + index16, indexLength, INDEX_3_18BIT_BLOCK_LENGTH); + if (n >= 0) { + i3 = n | 0x8000; + } else { + if (indexLength == index3Start) { + // No overlap at the boundary between the index-1 and index-3 tables. + n = 0; + } else { + n = getOverlap(index16, indexLength, + index16, indexLength, INDEX_3_18BIT_BLOCK_LENGTH); + } + i3 = (indexLength - n) | 0x8000; + if (n > 0) { + int32_t start = indexLength; + while (n < INDEX_3_18BIT_BLOCK_LENGTH) { + index16[indexLength++] = index16[start + n++]; + } + } else { + indexLength += INDEX_3_18BIT_BLOCK_LENGTH; + } + } + } + if (index3NullOffset < 0 && i3FirstNull >= 0) { + index3NullOffset = i3; + } + // Set the index-2 table entry. + index2[i2Length++] = i3; + } + U_ASSERT(i2Length == index2Capacity); + U_ASSERT(indexLength <= index3Start + index3Capacity); + + if (index3NullOffset < 0) { + index3NullOffset = UCPTRIE_NO_INDEX3_NULL_OFFSET; + } + if (indexLength >= (UCPTRIE_NO_INDEX3_NULL_OFFSET + UCPTRIE_INDEX_3_BLOCK_LENGTH)) { + // The index-3 offsets exceed 15 bits, or + // the last one cannot be distinguished from the no-null-block value. + errorCode = U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } + + // Compact the index-2 table and write the index-1 table. + int32_t blockLength = UCPTRIE_INDEX_2_BLOCK_LENGTH; + int32_t i1 = fastIndexLength; + for (int32_t i = 0; i < i2Length; i += blockLength) { + if ((i2Length - i) < blockLength) { + // highStart is inside the last index-2 block. Shorten it. + blockLength = i2Length - i; + } + int32_t i2; + int32_t n = findSameBlock(index16, index3Start, indexLength, + index2, i, blockLength); + if (n >= 0) { + i2 = n; + } else { + if (indexLength == index3Start) { + // No overlap at the boundary between the index-1 and index-3/2 tables. + n = 0; + } else { + n = getOverlap(index16, indexLength, index2, i, blockLength); + } + i2 = indexLength - n; + while (n < blockLength) { + index16[indexLength++] = index2[i + n++]; + } + } + // Set the index-1 table entry. + index16[i1++] = i2; + } + U_ASSERT(i1 == index3Start); + U_ASSERT(indexLength <= index16Capacity); + +#ifdef UCPTRIE_DEBUG + /* we saved some space */ + printf("compacting UCPTrie: count of 16-bit index words %lu->%lu\n", + (long)iLimit, (long)indexLength); +#endif + + return indexLength; +} + +int32_t MutableCodePointTrie::compactTrie(int32_t fastILimit, UErrorCode &errorCode) { + // Find the real highStart and round it up. + U_ASSERT((highStart & (UCPTRIE_CP_PER_INDEX_2_ENTRY - 1)) == 0); + highValue = get(MAX_UNICODE); + int32_t realHighStart = findHighStart(); + realHighStart = (realHighStart + (UCPTRIE_CP_PER_INDEX_2_ENTRY - 1)) & + ~(UCPTRIE_CP_PER_INDEX_2_ENTRY - 1); + if (realHighStart == UNICODE_LIMIT) { + highValue = initialValue; + } + +#ifdef UCPTRIE_DEBUG + printf("UCPTrie: highStart U+%06lx highValue 0x%lx initialValue 0x%lx\n", + (long)realHighStart, (long)highValue, (long)initialValue); +#endif + + // We always store indexes and data values for the fast range. + // Pin highStart to the top of that range while building. + UChar32 fastLimit = fastILimit << UCPTRIE_SHIFT_3; + if (realHighStart < fastLimit) { + for (int32_t i = (realHighStart >> UCPTRIE_SHIFT_3); i < fastILimit; ++i) { + flags[i] = ALL_SAME; + index[i] = highValue; + } + highStart = fastLimit; + } else { + highStart = realHighStart; + } + + uint32_t asciiData[ASCII_LIMIT]; + for (int32_t i = 0; i < ASCII_LIMIT; ++i) { + asciiData[i] = get(i); + } + + // First we look for which data blocks have the same value repeated over the whole block, + // deduplicate such blocks, find a good null data block (for faster enumeration), + // and get an upper bound for the necessary data array length. + AllSameBlocks allSameBlocks; + int32_t newDataCapacity = compactWholeDataBlocks(fastILimit, allSameBlocks); + if (newDataCapacity < 0) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + return 0; + } + uint32_t *newData = (uint32_t *)uprv_malloc(newDataCapacity * 4); + if (newData == nullptr) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + return 0; + } + uprv_memcpy(newData, asciiData, sizeof(asciiData)); + + int32_t newDataLength = compactData(fastILimit, newData); + U_ASSERT(newDataLength <= newDataCapacity); + uprv_free(data); + data = newData; + dataCapacity = newDataCapacity; + dataLength = newDataLength; + if (dataLength > (0x3ffff + UCPTRIE_SMALL_DATA_BLOCK_LENGTH)) { + // The offset of the last data block is too high to be stored in the index table. + errorCode = U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } + + int32_t dataNullIndex = allSameBlocks.findMostUsed(); + if (dataNullIndex >= 0) { + dataNullOffset = index[dataNullIndex]; +#ifdef UCPTRIE_DEBUG + if (data[dataNullOffset] != initialValue) { + printf("UCPTrie initialValue %lx -> more common nullValue %lx\n", + (long)initialValue, (long)data[dataNullOffset]); + } +#endif + initialValue = data[dataNullOffset]; + } else { + dataNullOffset = UCPTRIE_NO_DATA_NULL_OFFSET; + } + + int32_t indexLength = compactIndex(fastILimit, errorCode); + highStart = realHighStart; + return indexLength; +} + +UCPTrie *MutableCodePointTrie::build(UCPTrieType type, UCPTrieValueWidth valueWidth, UErrorCode &errorCode) { + if (U_FAILURE(errorCode)) { + return nullptr; + } + if (type < UCPTRIE_TYPE_FAST || UCPTRIE_TYPE_SMALL < type || + valueWidth < UCPTRIE_VALUE_BITS_16 || UCPTRIE_VALUE_BITS_8 < valueWidth) { + errorCode = U_ILLEGAL_ARGUMENT_ERROR; + return nullptr; + } + + // The mutable trie always stores 32-bit values. + // When we build a UCPTrie for a smaller value width, we first mask off unused bits + // before compacting the data. + switch (valueWidth) { + case UCPTRIE_VALUE_BITS_32: + break; + case UCPTRIE_VALUE_BITS_16: + maskValues(0xffff); + break; + case UCPTRIE_VALUE_BITS_8: + maskValues(0xff); + break; + default: + break; + } + + UChar32 fastLimit = type == UCPTRIE_TYPE_FAST ? BMP_LIMIT : UCPTRIE_SMALL_LIMIT; + int32_t indexLength = compactTrie(fastLimit >> UCPTRIE_SHIFT_3, errorCode); + if (U_FAILURE(errorCode)) { + clear(); + return nullptr; + } + + // Ensure data table alignment: The index length must be even for uint32_t data. + if (valueWidth == UCPTRIE_VALUE_BITS_32 && (indexLength & 1) != 0) { + index16[indexLength++] = 0xffee; // arbitrary value + } + + // Make the total trie structure length a multiple of 4 bytes by padding the data table, + // and store special values as the last two data values. + int32_t length = indexLength * 2; + if (valueWidth == UCPTRIE_VALUE_BITS_16) { + if (((indexLength ^ dataLength) & 1) != 0) { + // padding + data[dataLength++] = errorValue; + } + if (data[dataLength - 1] != errorValue || data[dataLength - 2] != highValue) { + data[dataLength++] = highValue; + data[dataLength++] = errorValue; + } + length += dataLength * 2; + } else if (valueWidth == UCPTRIE_VALUE_BITS_32) { + // 32-bit data words never need padding to a multiple of 4 bytes. + if (data[dataLength - 1] != errorValue || data[dataLength - 2] != highValue) { + if (data[dataLength - 1] != highValue) { + data[dataLength++] = highValue; + } + data[dataLength++] = errorValue; + } + length += dataLength * 4; + } else { + int32_t and3 = (length + dataLength) & 3; + if (and3 == 0 && data[dataLength - 1] == errorValue && data[dataLength - 2] == highValue) { + // all set + } else if(and3 == 3 && data[dataLength - 1] == highValue) { + data[dataLength++] = errorValue; + } else { + while (and3 != 2) { + data[dataLength++] = highValue; + and3 = (and3 + 1) & 3; + } + data[dataLength++] = highValue; + data[dataLength++] = errorValue; + } + length += dataLength; + } + + // Calculate the total length of the UCPTrie as a single memory block. + length += sizeof(UCPTrie); + U_ASSERT((length & 3) == 0); + + uint8_t *bytes = (uint8_t *)uprv_malloc(length); + if (bytes == nullptr) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + clear(); + return nullptr; + } + UCPTrie *trie = reinterpret_cast(bytes); + uprv_memset(trie, 0, sizeof(UCPTrie)); + trie->indexLength = indexLength; + trie->dataLength = dataLength; + + trie->highStart = highStart; + // Round up shifted12HighStart to a multiple of 0x1000 for easy testing from UTF-8 lead bytes. + // Runtime code needs to then test for the real highStart as well. + trie->shifted12HighStart = (highStart + 0xfff) >> 12; + trie->type = type; + trie->valueWidth = valueWidth; + + trie->index3NullOffset = index3NullOffset; + trie->dataNullOffset = dataNullOffset; + trie->nullValue = initialValue; + + bytes += sizeof(UCPTrie); + + // Fill the index and data arrays. + uint16_t *dest16 = (uint16_t *)bytes; + trie->index = dest16; + + if (highStart <= fastLimit) { + // Condense only the fast index from the mutable-trie index. + for (int32_t i = 0, j = 0; j < indexLength; i += SMALL_DATA_BLOCKS_PER_BMP_BLOCK, ++j) { + *dest16++ = (uint16_t)index[i]; // dest16[j] + } + } else { + uprv_memcpy(dest16, index16, indexLength * 2); + dest16 += indexLength; + } + bytes += indexLength * 2; + + // Write the data array. + const uint32_t *p = data; + switch (valueWidth) { + case UCPTRIE_VALUE_BITS_16: + // Write 16-bit data values. + trie->data.ptr16 = dest16; + for (int32_t i = dataLength; i > 0; --i) { + *dest16++ = (uint16_t)*p++; + } + break; + case UCPTRIE_VALUE_BITS_32: + // Write 32-bit data values. + trie->data.ptr32 = (uint32_t *)bytes; + uprv_memcpy(bytes, p, (size_t)dataLength * 4); + break; + case UCPTRIE_VALUE_BITS_8: + // Write 8-bit data values. + trie->data.ptr8 = bytes; + for (int32_t i = dataLength; i > 0; --i) { + *bytes++ = (uint8_t)*p++; + } + break; + default: + // Will not occur, valueWidth checked at the beginning. + break; + } + +#ifdef UCPTRIE_DEBUG + trie->name = name; + + ucptrie_printLengths(trie, ""); +#endif + + clear(); + return trie; +} + +} // namespace + +U_NAMESPACE_END + +U_NAMESPACE_USE + +U_CAPI UMutableCPTrie * U_EXPORT2 +umutablecptrie_open(uint32_t initialValue, uint32_t errorValue, UErrorCode *pErrorCode) { + if (U_FAILURE(*pErrorCode)) { + return nullptr; + } + MutableCodePointTrie *trie = new MutableCodePointTrie(initialValue, errorValue, *pErrorCode); + if (U_FAILURE(*pErrorCode)) { + delete trie; + return nullptr; + } + return reinterpret_cast(trie); +} + +U_CAPI UMutableCPTrie * U_EXPORT2 +umutablecptrie_clone(const UMutableCPTrie *other, UErrorCode *pErrorCode) { + if (U_FAILURE(*pErrorCode)) { + return nullptr; + } + if (other == nullptr) { + return nullptr; + } + MutableCodePointTrie *clone = new MutableCodePointTrie( + *reinterpret_cast(other), *pErrorCode); + if (U_FAILURE(*pErrorCode)) { + delete clone; + return nullptr; + } + return reinterpret_cast(clone); +} + +U_CAPI void U_EXPORT2 +umutablecptrie_close(UMutableCPTrie *trie) { + delete reinterpret_cast(trie); +} + +U_CAPI UMutableCPTrie * U_EXPORT2 +umutablecptrie_fromUCPTrie(const UCPTrie *trie, UErrorCode *pErrorCode) { + if (U_FAILURE(*pErrorCode)) { + return nullptr; + } + if (trie == nullptr) { + *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; + return nullptr; + } + return reinterpret_cast(MutableCodePointTrie::fromUCPTrie(trie, *pErrorCode)); +} + +U_CAPI uint32_t U_EXPORT2 +umutablecptrie_get(const UMutableCPTrie *trie, UChar32 c) { + return reinterpret_cast(trie)->get(c); +} + +namespace { + +UChar32 getRange(const void *trie, UChar32 start, + UCPTrieValueFilter *filter, const void *context, uint32_t *pValue) { + return reinterpret_cast(trie)-> + getRange(start, filter, context, pValue); +} + +} // namespace + +U_CAPI UChar32 U_EXPORT2 +umutablecptrie_getRange(const UMutableCPTrie *trie, UChar32 start, + UCPTrieRangeOption option, uint32_t surrogateValue, + UCPTrieValueFilter *filter, const void *context, uint32_t *pValue) { + return ucptrie_internalGetRange(getRange, trie, start, + option, surrogateValue, + filter, context, pValue); +} + +U_CAPI void U_EXPORT2 +umutablecptrie_set(UMutableCPTrie *trie, UChar32 c, uint32_t value, UErrorCode *pErrorCode) { + if (U_FAILURE(*pErrorCode)) { + return; + } + reinterpret_cast(trie)->set(c, value, *pErrorCode); +} + +U_CAPI void U_EXPORT2 +umutablecptrie_setRange(UMutableCPTrie *trie, UChar32 start, UChar32 end, + uint32_t value, UErrorCode *pErrorCode) { + if (U_FAILURE(*pErrorCode)) { + return; + } + reinterpret_cast(trie)->setRange(start, end, value, *pErrorCode); +} + +/* Compact and internally serialize the trie. */ +U_CAPI UCPTrie * U_EXPORT2 +umutablecptrie_buildImmutable(UMutableCPTrie *trie, UCPTrieType type, UCPTrieValueWidth valueWidth, + UErrorCode *pErrorCode) { + if (U_FAILURE(*pErrorCode)) { + return nullptr; + } + return reinterpret_cast(trie)->build(type, valueWidth, *pErrorCode); +} + +#ifdef UCPTRIE_DEBUG +U_CFUNC void umutablecptrie_setName(UMutableCPTrie *trie, const char *name) { + reinterpret_cast(trie)->name = name; +} +#endif diff --git a/icu4c/source/common/unicode/ucptrie.h b/icu4c/source/common/unicode/ucptrie.h new file mode 100644 index 00000000000..e9b61df0c9b --- /dev/null +++ b/icu4c/source/common/unicode/ucptrie.h @@ -0,0 +1,695 @@ +// © 2017 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +// ucptrie.h (modified from utrie2.h) +// created: 2017dec29 Markus W. Scherer + +#ifndef __UCPTRIE_H__ +#define __UCPTRIE_H__ + +#include "unicode/utypes.h" +#include "unicode/localpointer.h" +#include "unicode/utf8.h" +#include "putilimp.h" +#include "udataswp.h" + +U_CDECL_BEGIN + +/** + * \file + * + * This file defines an immutable Unicode code point trie. + * + * @see UCPTrie + * @see UMutableCPTrie + */ + +/** + * Immutable Unicode code point trie structure. + * Fast, reasonably compact, map from Unicode code points (U+0000..U+10FFFF) to integer values. + * For details see http://site.icu-project.org/design/struct/utrie + * + * Do not access UCPTrie fields directly; use public functions and macros. + * Functions are easy to use: They support all trie types and value widths. + * + * When performance is really important, macros provide faster access. + * Most macros are specific to either "fast" or "small" tries, see UCPTrieType. + * There are "fast" macros for special optimized use cases. + * + * The macros will return bogus values, or may crash, if used on the wrong type or value width. + * + * @see UMutableCPTrie + * @draft ICU 63 + */ +struct UCPTrie; +typedef struct UCPTrie UCPTrie; + +/** + * Selectors for the type of a UCPTrie. + * Different trade-offs for size vs. speed. + * + * @see umutablecptrie_buildImmutable + * @see ucptrie_openFromBinary + * @see ucptrie_getType + * @draft ICU 63 + */ +enum UCPTrieType { + /** + * For ucptrie_openFromBinary() to accept any type. + * ucptrie_getType() will return the actual type. + * @draft ICU 63 + */ + UCPTRIE_TYPE_ANY = -1, + /** + * Fast/simple/larger BMP data structure. Use functions and "fast" macros. + * @draft ICU 63 + */ + UCPTRIE_TYPE_FAST, + /** + * Small/slower BMP data structure. Use functions and "small" macros. + * @draft ICU 63 + */ + UCPTRIE_TYPE_SMALL +}; +typedef enum UCPTrieType UCPTrieType; + +/** + * Selectors for the number of bits in a UCPTrie data value. + * + * @see umutablecptrie_buildImmutable + * @see ucptrie_openFromBinary + * @see ucptrie_getValueWidth + * @draft ICU 63 + */ +enum UCPTrieValueWidth { + /** + * For ucptrie_openFromBinary() to accept any data value width. + * ucptrie_getValueWidth() will return the actual data value width. + * @draft ICU 63 + */ + UCPTRIE_VALUE_BITS_ANY = -1, + /** + * 16 bits per UCPTrie data value. + * @draft ICU 63 + */ + UCPTRIE_VALUE_BITS_16, + /** + * 32 bits per UCPTrie data value. + * @draft ICU 63 + */ + UCPTRIE_VALUE_BITS_32, + /** + * 8 bits per UCPTrie data value. + * @draft ICU 63 + */ + UCPTRIE_VALUE_BITS_8 +}; +typedef enum UCPTrieValueWidth UCPTrieValueWidth; + +/** + * Selectors for how ucptrie_getRange() should report value ranges overlapping with surrogates. + * Most users should use UCPTRIE_RANGE_NORMAL. + * + * @see ucptrie_getRange + * @draft ICU 63 + */ +enum UCPTrieRangeOption { + /** + * ucptrie_getRange() enumerates all same-value ranges as stored in the trie. + * Most users should use this option. + */ + UCPTRIE_RANGE_NORMAL, + /** + * ucptrie_getRange() enumerates all same-value ranges as stored in the trie, + * except that lead surrogates (U+D800..U+DBFF) are treated as having the + * surrogateValue, which is passed to getRange() as a separate parameter. + * The surrogateValue is not transformed via filter(). + * See U_IS_LEAD(c). + * + * Most users should use UCPTRIE_RANGE_NORMAL instead. + * + * This option is useful for tries that map surrogate code *units* to + * special values optimized for UTF-16 string processing + * or for special error behavior for unpaired surrogates, + * but those values are not to be associated with the lead surrogate code *points*. + */ + UCPTRIE_RANGE_FIXED_LEAD_SURROGATES, + /** + * ucptrie_getRange() enumerates all same-value ranges as stored in the trie, + * except that all surrogates (U+D800..U+DFFF) are treated as having the + * surrogateValue, which is passed to getRange() as a separate parameter. + * The surrogateValue is not transformed via filter(). + * See U_IS_SURROGATE(c). + * + * Most users should use UCPTRIE_RANGE_NORMAL instead. + * + * This option is useful for tries that map surrogate code *units* to + * special values optimized for UTF-16 string processing + * or for special error behavior for unpaired surrogates, + * but those values are not to be associated with the lead surrogate code *points*. + */ + UCPTRIE_RANGE_FIXED_ALL_SURROGATES +}; +typedef enum UCPTrieRangeOption UCPTrieRangeOption; + +/** + * Opens a trie from its binary form, stored in 32-bit-aligned memory. + * Inverse of ucptrie_toBinary(). + * + * The memory must remain valid and unchanged as long as the trie is used. + * You must ucptrie_close() the trie once you are done using it. + * + * @param type selects the trie type; results in an + * U_INVALID_FORMAT_ERROR if it does not match the binary data; + * use UCPTRIE_TYPE_ANY to accept any type + * @param valueWidth selects the number of bits in a data value; results in an + * U_INVALID_FORMAT_ERROR if it does not match the binary data; + * use UCPTRIE_VALUE_BITS_ANY to accept any data value width + * @param data a pointer to 32-bit-aligned memory containing the binary data of a UCPTrie + * @param length the number of bytes available at data; + * can be more than necessary + * @param pActualLength receives the actual number of bytes at data taken up by the trie data; + * can be NULL + * @param pErrorCode an in/out ICU UErrorCode + * @return the trie + * + * @see umutablecptrie_open + * @see umutablecptrie_buildImmutable + * @see ucptrie_toBinary + * @draft ICU 63 + */ +U_CAPI UCPTrie * U_EXPORT2 +ucptrie_openFromBinary(UCPTrieType type, UCPTrieValueWidth valueWidth, + const void *data, int32_t length, int32_t *pActualLength, + UErrorCode *pErrorCode); + +/** + * Closes a trie and releases associated memory. + * + * @param trie the trie + * @draft ICU 63 + */ +U_CAPI void U_EXPORT2 +ucptrie_close(UCPTrie *trie); + +#if U_SHOW_CPLUSPLUS_API + +U_NAMESPACE_BEGIN + +/** + * \class LocalUCPTriePointer + * "Smart pointer" class, closes a UCPTrie via ucptrie_close(). + * For most methods see the LocalPointerBase base class. + * + * @see LocalPointerBase + * @see LocalPointer + * @draft ICU 63 + */ +U_DEFINE_LOCAL_OPEN_POINTER(LocalUCPTriePointer, UCPTrie, ucptrie_close); + +U_NAMESPACE_END + +#endif + +/** + * Returns the trie type. + * + * @param trie the trie + * @return the trie type + * @see ucptrie_openFromBinary + * @see UCPTRIE_TYPE_ANY + * @draft ICU 63 + */ +U_CAPI UCPTrieType U_EXPORT2 +ucptrie_getType(const UCPTrie *trie); + +/** + * Returns the number of bits in a trie data value. + * + * @param trie the trie + * @return the number of bits in a trie data value + * @see ucptrie_openFromBinary + * @see UCPTRIE_VALUE_BITS_ANY + * @draft ICU 63 + */ +U_CAPI UCPTrieValueWidth U_EXPORT2 +ucptrie_getValueWidth(const UCPTrie *trie); + +/** + * Returns the value for a code point as stored in the trie, with range checking. + * Returns the trie error value if c is not in the range 0..U+10FFFF. + * + * Easier to use than UCPTRIE_FAST_GET() and similar macros but slower. + * Easier to use because, unlike the macros, this function works on all UCPTrie + * objects, for all types and value widths. + * + * @param trie the trie + * @param c the code point + * @return the trie value, + * or the trie error value if the code point is not in the range 0..U+10FFFF + * @draft ICU 63 + */ +U_CAPI uint32_t U_EXPORT2 +ucptrie_get(const UCPTrie *trie, UChar32 c); + +/** + * Callback function type: Modifies a trie value. + * Optionally called by ucptrie_getRange() or umutablecptrie_getRange(). + * The modified value will be returned by the getRange function. + * + * Can be used to ignore some of the value bits, + * make a filter for one of several values, + * return a value index computed from the trie value, etc. + * + * @param context an opaque pointer, as passed into the getRange function + * @param value a value from the trie + * @return the modified value + * @draft ICU 63 + */ +typedef uint32_t U_CALLCONV +UCPTrieValueFilter(const void *context, uint32_t value); + +/** + * Returns the last code point such that all those from start to there have the same value. + * Can be used to efficiently iterate over all same-value ranges in a trie. + * + * If the UCPTrieValueFilter function pointer is not NULL, then + * the value to be delivered is passed through that function, and the return value is the end + * of the range where all values are modified to the same actual value. + * The value is unchanged if that function pointer is NULL. + * + * Example: + * \code + * UChar32 start = 0, end; + * uint32_t value; + * while ((end = ucptrie_getRange(trie, start, UCPTRIE_RANGE_NORMAL, 0, + * NULL, NULL, &value)) >= 0) { + * // Work with the range start..end and its value. + * start = end + 1; + * } + * \endcode + * + * @param trie the trie + * @param start range start + * @param option defines whether surrogates are treated normally, + * or as having the surrogateValue; usually UCPTRIE_RANGE_NORMAL + * @param surrogateValue value for surrogates; ignored if option==UCPTRIE_RANGE_NORMAL + * @param filter a pointer to a function that may modify the trie data value, + * or NULL if the values from the trie are to be used unmodified + * @param context an opaque pointer that is passed on to the filter function + * @param pValue if not NULL, receives the value that every code point start..end has; + * may have been modified by filter(context, trie value) + * if that function pointer is not NULL + * @return the range end code point, or -1 if start is not a valid code point + * @draft ICU 63 + */ +U_CAPI UChar32 U_EXPORT2 +ucptrie_getRange(const UCPTrie *trie, UChar32 start, + UCPTrieRangeOption option, uint32_t surrogateValue, + UCPTrieValueFilter *filter, const void *context, uint32_t *pValue); + +/** + * Writes a memory-mappable form of the trie into 32-bit aligned memory. + * Inverse of ucptrie_openFromBinary(). + * + * @param trie the trie + * @param data a pointer to 32-bit-aligned memory to be filled with the trie data; + * can be NULL if capacity==0 + * @param capacity the number of bytes available at data, or 0 for pure preflighting + * @param pErrorCode an in/out ICU UErrorCode; + * U_BUFFER_OVERFLOW_ERROR if the capacity is too small + * @return the number of bytes written or (if buffer overflow) needed for the trie + * + * @see ucptrie_openFromBinary() + * @draft ICU 63 + */ +U_CAPI int32_t U_EXPORT2 +ucptrie_toBinary(const UCPTrie *trie, void *data, int32_t capacity, UErrorCode *pErrorCode); + +/** + * Macro parameter value for a trie with 16-bit data values. + * Use the name of this macro as a "dataAccess" parameter in other macros. + * Do not use this macro in any other way. + * + * @see UCPTRIE_VALUE_BITS_16 + * @draft ICU 63 + */ +#define UCPTRIE_16(trie, i) ((trie)->data.ptr16[i]) + +/** + * Macro parameter value for a trie with 32-bit data values. + * Use the name of this macro as a "dataAccess" parameter in other macros. + * Do not use this macro in any other way. + * + * @see UCPTRIE_VALUE_BITS_32 + * @draft ICU 63 + */ +#define UCPTRIE_32(trie, i) ((trie)->data.ptr32[i]) + +/** + * Macro parameter value for a trie with 8-bit data values. + * Use the name of this macro as a "dataAccess" parameter in other macros. + * Do not use this macro in any other way. + * + * @see UCPTRIE_VALUE_BITS_8 + * @draft ICU 63 + */ +#define UCPTRIE_8(trie, i) ((trie)->data.ptr8[i]) + +/** + * Returns a trie value for a code point, with range checking. + * Returns the trie error value if c is not in the range 0..U+10FFFF. + * + * @param trie (const UCPTrie *, in) the trie; must have type UCPTRIE_TYPE_FAST + * @param dataAccess UCPTRIE_16, UCPTRIE_32, or UCPTRIE_8 according to the trie’s value width + * @param c (UChar32, in) the input code point + * @return The code point's trie value. + * @draft ICU 63 + */ +#define UCPTRIE_FAST_GET(trie, dataAccess, c) dataAccess(trie, _UCPTRIE_CP_INDEX(trie, 0xffff, c)) + +/** + * Returns a 16-bit trie value for a code point, with range checking. + * Returns the trie error value if c is not in the range U+0000..U+10FFFF. + * + * @param trie (const UCPTrie *, in) the trie; must have type UCPTRIE_TYPE_SMALL + * @param dataAccess UCPTRIE_16, UCPTRIE_32, or UCPTRIE_8 according to the trie’s value width + * @param c (UChar32, in) the input code point + * @return The code point's trie value. + * @draft ICU 63 + */ +#define UCPTRIE_SMALL_GET(trie, dataAccess, c) \ + dataAccess(trie, _UCPTRIE_CP_INDEX(trie, UCPTRIE_SMALL_MAX, c)) + +/** + * UTF-16: Reads the next code point (UChar32 c, out), post-increments src, + * and gets a value from the trie. + * Sets the trie error value if c is an unpaired surrogate. + * + * @param trie (const UCPTrie *, in) the trie; must have type UCPTRIE_TYPE_FAST + * @param dataAccess UCPTRIE_16, UCPTRIE_32, or UCPTRIE_8 according to the trie’s value width + * @param src (const UChar *, in/out) the source text pointer + * @param limit (const UChar *, in) the limit pointer for the text, or NULL if NUL-terminated + * @param c (UChar32, out) variable for the code point + * @param result (out) variable for the trie lookup result + * @draft ICU 63 + */ +#define UCPTRIE_FAST_U16_NEXT(trie, dataAccess, src, limit, c, result) { \ + (c) = *(src)++; \ + int32_t __index; \ + if (!U16_IS_SURROGATE(c)) { \ + __index = _UCPTRIE_FAST_INDEX(trie, c); \ + } else { \ + uint16_t __c2; \ + if (U16_IS_SURROGATE_LEAD(c) && (src) != (limit) && U16_IS_TRAIL(__c2 = *(src))) { \ + ++(src); \ + (c) = U16_GET_SUPPLEMENTARY((c), __c2); \ + __index = _UCPTRIE_SMALL_INDEX(trie, c); \ + } else { \ + __index = (trie)->dataLength - UCPTRIE_ERROR_VALUE_NEG_DATA_OFFSET; \ + } \ + } \ + (result) = dataAccess(trie, __index); \ +} + +/** + * UTF-16: Reads the previous code point (UChar32 c, out), pre-decrements src, + * and gets a value from the trie. + * Sets the trie error value if c is an unpaired surrogate. + * + * @param trie (const UCPTrie *, in) the trie; must have type UCPTRIE_TYPE_FAST + * @param dataAccess UCPTRIE_16, UCPTRIE_32, or UCPTRIE_8 according to the trie’s value width + * @param start (const UChar *, in) the start pointer for the text + * @param src (const UChar *, in/out) the source text pointer + * @param c (UChar32, out) variable for the code point + * @param result (out) variable for the trie lookup result + * @draft ICU 63 + */ +#define UCPTRIE_FAST_U16_PREV(trie, dataAccess, start, src, c, result) { \ + (c) = *--(src); \ + int32_t __index; \ + if (!U16_IS_SURROGATE(c)) { \ + __index = _UCPTRIE_FAST_INDEX(trie, c); \ + } else { \ + uint16_t __c2; \ + if (U16_IS_SURROGATE_TRAIL(c) && (src) != (start) && U16_IS_LEAD(__c2 = *((src) - 1))) { \ + --(src); \ + (c) = U16_GET_SUPPLEMENTARY(__c2, (c)); \ + __index = _UCPTRIE_SMALL_INDEX(trie, c); \ + } else { \ + __index = (trie)->dataLength - UCPTRIE_ERROR_VALUE_NEG_DATA_OFFSET; \ + } \ + } \ + (result) = dataAccess(trie, __index); \ +} + +/** + * UTF-8: Post-increments src and gets a value from the trie. + * Sets the trie error value for an ill-formed byte sequence. + * + * Unlike UCPTRIE_FAST_U16_NEXT() this UTF-8 macro does not provide the code point + * because it would be more work to do so and is often not needed. + * If the trie value differs from the error value, then the byte sequence is well-formed, + * and the code point can be assembled without revalidation. + * + * @param trie (const UCPTrie *, in) the trie; must have type UCPTRIE_TYPE_FAST + * @param dataAccess UCPTRIE_16, UCPTRIE_32, or UCPTRIE_8 according to the trie’s value width + * @param src (const char *, in/out) the source text pointer + * @param limit (const char *, in) the limit pointer for the text (must not be NULL) + * @param result (out) variable for the trie lookup result + * @draft ICU 63 + */ +#define UCPTRIE_FAST_U8_NEXT(trie, dataAccess, src, limit, result) { \ + int32_t __lead = (uint8_t)*(src)++; \ + if (!U8_IS_SINGLE(__lead)) { \ + uint8_t __t1, __t2, __t3; \ + if ((src) != (limit) && \ + (__lead >= 0xe0 ? \ + __lead < 0xf0 ? /* U+0800..U+FFFF except surrogates */ \ + U8_LEAD3_T1_BITS[__lead &= 0xf] & (1 << ((__t1 = *(src)) >> 5)) && \ + ++(src) != (limit) && (__t2 = *(src) - 0x80) <= 0x3f && \ + (__lead = ((int32_t)(trie)->index[(__lead << 6) + (__t1 & 0x3f)]) + __t2, 1) \ + : /* U+10000..U+10FFFF */ \ + (__lead -= 0xf0) <= 4 && \ + U8_LEAD4_T1_BITS[(__t1 = *(src)) >> 4] & (1 << __lead) && \ + (__lead = (__lead << 6) | (__t1 & 0x3f), ++(src) != (limit)) && \ + (__t2 = *(src) - 0x80) <= 0x3f && \ + ++(src) != (limit) && (__t3 = *(src) - 0x80) <= 0x3f && \ + (__lead = __lead >= (trie)->shifted12HighStart ? \ + (trie)->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET : \ + ucptrie_internalSmallU8Index((trie), __lead, __t2, __t3), 1) \ + : /* U+0080..U+07FF */ \ + __lead >= 0xc2 && (__t1 = *(src) - 0x80) <= 0x3f && \ + (__lead = (int32_t)(trie)->index[__lead & 0x1f] + __t1, 1))) { \ + ++(src); \ + } else { \ + __lead = (trie)->dataLength - UCPTRIE_ERROR_VALUE_NEG_DATA_OFFSET; /* ill-formed*/ \ + } \ + } \ + (result) = dataAccess(trie, __lead); \ +} + +/** + * UTF-8: Pre-decrements src and gets a value from the trie. + * Sets the trie error value for an ill-formed byte sequence. + * + * Unlike UCPTRIE_FAST_U16_PREV() this UTF-8 macro does not provide the code point + * because it would be more work to do so and is often not needed. + * If the trie value differs from the error value, then the byte sequence is well-formed, + * and the code point can be assembled without revalidation. + * + * @param trie (const UCPTrie *, in) the trie; must have type UCPTRIE_TYPE_FAST + * @param dataAccess UCPTRIE_16, UCPTRIE_32, or UCPTRIE_8 according to the trie’s value width + * @param start (const char *, in) the start pointer for the text + * @param src (const char *, in/out) the source text pointer + * @param result (out) variable for the trie lookup result + * @draft ICU 63 + */ +#define UCPTRIE_FAST_U8_PREV(trie, dataAccess, start, src, result) { \ + int32_t __index = (uint8_t)*--(src); \ + if (!U8_IS_SINGLE(__index)) { \ + __index = ucptrie_internalU8PrevIndex((trie), __index, (const uint8_t *)(start), \ + (const uint8_t *)(src)); \ + (src) -= __index & 7; \ + __index >>= 3; \ + } \ + (result) = dataAccess(trie, __index); \ +} + +/** + * Returns a trie value for an ASCII code point, without range checking. + * + * @param trie (const UCPTrie *, in) the trie (of either fast or small type) + * @param dataAccess UCPTRIE_16, UCPTRIE_32, or UCPTRIE_8 according to the trie’s value width + * @param c (UChar32, in) the input code point; must be U+0000..U+007F + * @return The ASCII code point's trie value. + * @draft ICU 63 + */ +#define UCPTRIE_ASCII_GET(trie, dataAccess, c) dataAccess(trie, c) + +/** + * Returns a trie value for a BMP code point (U+0000..U+FFFF), without range checking. + * Can be used to look up a value for a UTF-16 code unit if other parts of + * the string processing check for surrogates. + * + * @param trie (const UCPTrie *, in) the trie; must have type UCPTRIE_TYPE_FAST + * @param dataAccess UCPTRIE_16, UCPTRIE_32, or UCPTRIE_8 according to the trie’s value width + * @param c (UChar32, in) the input code point, must be U+0000..U+FFFF + * @return The BMP code point's trie value. + * @draft ICU 63 + */ +#define UCPTRIE_FAST_BMP_GET(trie, dataAccess, c) dataAccess(trie, _UCPTRIE_FAST_INDEX(trie, c)) + +/** + * Returns a trie value for a supplementary code point (U+10000..U+10FFFF), + * without range checking. + * + * @param trie (const UCPTrie *, in) the trie; must have type UCPTRIE_TYPE_FAST + * @param dataAccess UCPTRIE_16, UCPTRIE_32, or UCPTRIE_8 according to the trie’s value width + * @param c (UChar32, in) the input code point, must be U+10000..U+10FFFF + * @return The supplementary code point's trie value. + * @draft ICU 63 + */ +#define UCPTRIE_FAST_SUPP_GET(trie, dataAccess, c) dataAccess(trie, _UCPTRIE_SMALL_INDEX(trie, c)) + +/* Internal definitions ----------------------------------------------------- */ + +/** @internal */ +typedef union UCPTrieData { + /** @internal */ + const void *ptr0; + /** @internal */ + const uint16_t *ptr16; + /** @internal */ + const uint32_t *ptr32; + /** @internal */ + const uint8_t *ptr8; +} UCPTrieData; + +/** + * Internal trie structure definition. + * Visible only for use by API macros. + * @internal + */ +struct UCPTrie { + /** @internal */ + const uint16_t *index; + /** @internal */ + UCPTrieData data; + + /** @internal */ + int32_t indexLength; + /** @internal */ + int32_t dataLength; + /** Start of the last range which ends at U+10FFFF. @internal */ + UChar32 highStart; + /** highStart>>12 @internal */ + uint16_t shifted12HighStart; + + /** @internal */ + int8_t type; // UCPTrieType + /** @internal */ + int8_t valueWidth; // UCPTrieValueWidth + + /** padding/reserved @internal */ + uint32_t reserved32; + /** padding/reserved @internal */ + uint16_t reserved16; + + /** + * Internal index-3 null block offset. + * Set to an impossibly high value (e.g., 0xffff) if there is no dedicated index-3 null block. + * @internal + */ + uint16_t index3NullOffset; + /** + * Internal data null block offset, not shifted. + * Set to an impossibly high value (e.g., 0xfffff) if there is no dedicated data null block. + * @internal + */ + int32_t dataNullOffset; + /** @internal */ + uint32_t nullValue; + +#ifdef UCPTRIE_DEBUG + /** @internal */ + const char *name; +#endif +}; + +/** + * Internal implementation constants. + * These are needed for the API macros, but users should not use these directly. + * @internal + */ +enum { + /** @internal */ + UCPTRIE_FAST_SHIFT = 6, + + /** Number of entries in a data block for code points below the fast limit. 64=0x40 @internal */ + UCPTRIE_FAST_DATA_BLOCK_LENGTH = 1 << UCPTRIE_FAST_SHIFT, + + /** Mask for getting the lower bits for the in-fast-data-block offset. @internal */ + UCPTRIE_FAST_DATA_MASK = UCPTRIE_FAST_DATA_BLOCK_LENGTH - 1, + + /** @internal */ + UCPTRIE_SMALL_MAX = 0xfff, + + /** + * Offset from dataLength (to be subtracted) for fetching the + * value returned for out-of-range code points and ill-formed UTF-8/16. + * @internal + */ + UCPTRIE_ERROR_VALUE_NEG_DATA_OFFSET = 1, + /** + * Offset from dataLength (to be subtracted) for fetching the + * value returned for code points highStart..U+10FFFF. + * @internal + */ + UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET = 2 +}; + +/* Internal functions and macros -------------------------------------------- */ + +/** @internal */ +U_INTERNAL int32_t U_EXPORT2 +ucptrie_internalSmallIndex(const UCPTrie *trie, UChar32 c); + +/** @internal */ +U_INTERNAL int32_t U_EXPORT2 +ucptrie_internalSmallU8Index(const UCPTrie *trie, int32_t lt1, uint8_t t2, uint8_t t3); + +/** + * Internal function for part of the UCPTRIE_FAST_U8_PREVxx() macro implementations. + * Do not call directly. + * @internal + */ +U_INTERNAL int32_t U_EXPORT2 +ucptrie_internalU8PrevIndex(const UCPTrie *trie, UChar32 c, + const uint8_t *start, const uint8_t *src); + +/** Internal trie getter for a code point below the fast limit. Returns the data index. @internal */ +#define _UCPTRIE_FAST_INDEX(trie, c) \ + ((int32_t)(trie)->index[(c) >> UCPTRIE_FAST_SHIFT] + ((c) & UCPTRIE_FAST_DATA_MASK)) + +/** Internal trie getter for a code point at or above the fast limit. Returns the data index. @internal */ +#define _UCPTRIE_SMALL_INDEX(trie, c) \ + ((c) >= (trie)->highStart ? \ + (trie)->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET : \ + ucptrie_internalSmallIndex(trie, c)) + +/** + * Internal trie getter for a code point, with checking that c is in U+0000..10FFFF. + * Returns the data index. + * @internal + */ +#define _UCPTRIE_CP_INDEX(trie, fastMax, c) \ + ((uint32_t)(c) <= (uint32_t)(fastMax) ? \ + _UCPTRIE_FAST_INDEX(trie, c) : \ + (uint32_t)(c) <= 0x10ffff ? \ + _UCPTRIE_SMALL_INDEX(trie, c) : \ + (trie)->dataLength - UCPTRIE_ERROR_VALUE_NEG_DATA_OFFSET) + +U_CDECL_END + +#endif diff --git a/icu4c/source/common/unicode/umutablecptrie.h b/icu4c/source/common/unicode/umutablecptrie.h new file mode 100644 index 00000000000..811471ca571 --- /dev/null +++ b/icu4c/source/common/unicode/umutablecptrie.h @@ -0,0 +1,215 @@ +// © 2017 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +// umutablecptrie.h (split out of ucptrie.h) +// created: 2018jan24 Markus W. Scherer + +#ifndef __UMUTABLECPTRIE_H__ +#define __UMUTABLECPTRIE_H__ + +#include "unicode/utypes.h" +#include "unicode/localpointer.h" +#include "unicode/ucptrie.h" +#include "unicode/utf8.h" +#include "putilimp.h" +#include "udataswp.h" + +U_CDECL_BEGIN + +/** + * \file + * + * This file defines a mutable Unicode code point trie. + * + * @see UCPTrie + * @see UMutableCPTrie + */ + +/** + * Mutable Unicode code point trie. + * Fast map from Unicode code points (U+0000..U+10FFFF) to 32-bit integer values. + * For details see http://site.icu-project.org/design/struct/utrie + * + * Setting values (especially ranges) and lookup is fast. + * The mutable trie is only somewhat space-efficient. + * It builds a compacted, immutable UCPTrie. + * + * This trie can be modified while iterating over its contents. + * For example, it is possible to merge its values with those from another + * set of ranges (e.g., another mutable or immutable trie): + * Iterate over those source ranges; for each of them iterate over this trie; + * add the source value into the value of each trie range. + * + * @see UCPTrie + * @see umutablecptrie_buildImmutable + * @draft ICU 63 + */ +struct UMutableCPTrie; +typedef struct UMutableCPTrie UMutableCPTrie; + +/** + * Creates a mutable trie that initially maps each Unicode code point to the same value. + * It uses 32-bit data values until umutablecptrie_buildImmutable() is called. + * umutablecptrie_buildImmutable() takes a valueWidth parameter which + * determines the number of bits in the data value in the resulting UCPTrie. + * You must umutablecptrie_close() the trie once you are done using it. + * + * @param initialValue the initial value that is set for all code points + * @param errorValue the value for out-of-range code points and ill-formed UTF-8/16 + * @param pErrorCode an in/out ICU UErrorCode + * @return the trie + * @draft ICU 63 + */ +U_CAPI UMutableCPTrie * U_EXPORT2 +umutablecptrie_open(uint32_t initialValue, uint32_t errorValue, UErrorCode *pErrorCode); + +/** + * Clones a mutable trie. + * You must umutablecptrie_close() the clone once you are done using it. + * + * @param other the trie to clone + * @param pErrorCode an in/out ICU UErrorCode + * @return the trie clone + * @draft ICU 63 + */ +U_CAPI UMutableCPTrie * U_EXPORT2 +umutablecptrie_clone(const UMutableCPTrie *other, UErrorCode *pErrorCode); + +/** + * Closes a mutable trie and releases associated memory. + * + * @param trie the trie + * @draft ICU 63 + */ +U_CAPI void U_EXPORT2 +umutablecptrie_close(UMutableCPTrie *trie); + +#if U_SHOW_CPLUSPLUS_API + +U_NAMESPACE_BEGIN + +/** + * \class LocalUMutableCPTriePointer + * "Smart pointer" class, closes a UMutableCPTrie via umutablecptrie_close(). + * For most methods see the LocalPointerBase base class. + * + * @see LocalPointerBase + * @see LocalPointer + * @draft ICU 63 + */ +U_DEFINE_LOCAL_OPEN_POINTER(LocalUMutableCPTriePointer, UMutableCPTrie, umutablecptrie_close); + +U_NAMESPACE_END + +#endif + +/** + * Creates a mutable trie with the same contents as the immutable one. + * You must umutablecptrie_close() the mutable trie once you are done using it. + * + * @param trie the immutable trie + * @param pErrorCode an in/out ICU UErrorCode + * @return the mutable trie + * @draft ICU 63 + */ +U_CAPI UMutableCPTrie * U_EXPORT2 +umutablecptrie_fromUCPTrie(const UCPTrie *trie, UErrorCode *pErrorCode); + +/** + * Returns the value for a code point as stored in the trie. + * + * @param trie the trie + * @param c the code point + * @return the value + * @draft ICU 63 + */ +U_CAPI uint32_t U_EXPORT2 +umutablecptrie_get(const UMutableCPTrie *trie, UChar32 c); + +/** + * Returns the last code point such that all those from start to there have the same value. + * Can be used to efficiently iterate over all same-value ranges in a trie. + * The trie can be modified between calls to this function. + * + * If the UCPTrieValueFilter function pointer is not NULL, then + * the value to be delivered is passed through that function, and the return value is the end + * of the range where all values are modified to the same actual value. + * The value is unchanged if that function pointer is NULL. + * + * See the same-signature ucptrie_getRange() for a code sample. + * + * @param trie the trie + * @param start range start + * @param option defines whether surrogates are treated normally, + * or as having the surrogateValue; usually UCPTRIE_RANGE_NORMAL + * @param surrogateValue value for surrogates; ignored if option==UCPTRIE_RANGE_NORMAL + * @param filter a pointer to a function that may modify the trie data value, + * or NULL if the values from the trie are to be used unmodified + * @param context an opaque pointer that is passed on to the filter function + * @param pValue if not NULL, receives the value that every code point start..end has; + * may have been modified by filter(context, trie value) + * if that function pointer is not NULL + * @return the range end code point, or -1 if start is not a valid code point + * @draft ICU 63 + */ +U_CAPI UChar32 U_EXPORT2 +umutablecptrie_getRange(const UMutableCPTrie *trie, UChar32 start, + UCPTrieRangeOption option, uint32_t surrogateValue, + UCPTrieValueFilter *filter, const void *context, uint32_t *pValue); + +/** + * Sets a value for a code point. + * + * @param trie the trie + * @param c the code point + * @param value the value + * @param pErrorCode an in/out ICU UErrorCode + * @draft ICU 63 + */ +U_CAPI void U_EXPORT2 +umutablecptrie_set(UMutableCPTrie *trie, UChar32 c, uint32_t value, UErrorCode *pErrorCode); + +/** + * Sets a value for each code point [start..end]. + * Faster and more space-efficient than setting the value for each code point separately. + * + * @param trie the trie + * @param start the first code point to get the value + * @param end the last code point to get the value (inclusive) + * @param value the value + * @param pErrorCode an in/out ICU UErrorCode + * @draft ICU 63 + */ +U_CAPI void U_EXPORT2 +umutablecptrie_setRange(UMutableCPTrie *trie, + UChar32 start, UChar32 end, + uint32_t value, UErrorCode *pErrorCode); + +/** + * Compacts the data and builds an immutable UCPTrie according to the parameters. + * After this, the mutable trie will be empty. + * + * Not every possible set of mappings can be built into a UCPTrie, + * because of limitations resulting from speed and space optimizations. + * Every Unicode assigned character can be mapped to a unique value. + * Typical data yields data structures far smaller than the limitations. + * + * It is possible to construct extremely unusual mappings that exceed the data structure limits. + * In such a case this function will fail with a U_INDEX_OUTOFBOUNDS_ERROR. + * + * @param trie the trie trie + * @param type selects the trie type + * @param valueWidth selects the number of bits in a trie data value; if smaller than 32 bits, + * then the values stored in the trie will be truncated first + * @param pErrorCode an in/out ICU UErrorCode + * + * @see umutablecptrie_fromUCPTrie + * @draft ICU 63 + */ +U_CAPI UCPTrie * U_EXPORT2 +umutablecptrie_buildImmutable(UMutableCPTrie *trie, UCPTrieType type, UCPTrieValueWidth valueWidth, + UErrorCode *pErrorCode); + +U_CDECL_END + +#endif diff --git a/icu4c/source/common/utrie.h b/icu4c/source/common/utrie.h index 641027a1a3f..3e2197eda6c 100644 --- a/icu4c/source/common/utrie.h +++ b/icu4c/source/common/utrie.h @@ -21,7 +21,6 @@ #include "unicode/utypes.h" #include "unicode/utf16.h" -#include "udataswp.h" U_CDECL_BEGIN @@ -732,17 +731,13 @@ utrie_serialize(UNewTrie *trie, void *data, int32_t capacity, UBool reduceTo16Bits, UErrorCode *pErrorCode); -/** - * Swap a serialized UTrie. - * @internal - */ -U_CAPI int32_t U_EXPORT2 -utrie_swap(const UDataSwapper *ds, - const void *inData, int32_t length, void *outData, - UErrorCode *pErrorCode); - /* serialization ------------------------------------------------------------ */ +// UTrie signature values, in platform endianness and opposite endianness. +// The UTrie signature ASCII byte values spell "Trie". +#define UTRIE_SIG 0x54726965 +#define UTRIE_OE_SIG 0x65697254 + /** * Trie data structure in serialized form: * diff --git a/icu4c/source/common/utrie2.cpp b/icu4c/source/common/utrie2.cpp index fb6c67f0397..24ef5782c90 100644 --- a/icu4c/source/common/utrie2.cpp +++ b/icu4c/source/common/utrie2.cpp @@ -24,11 +24,10 @@ * This file contains only the runtime and enumeration code, for read-only access. * See utrie2_builder.c for the builder code. */ -#ifdef UTRIE2_DEBUG -# include -#endif - #include "unicode/utypes.h" +#ifdef UCPTRIE_DEBUG +#include "unicode/umutablecptrie.h" +#endif #include "unicode/utf.h" #include "unicode/utf8.h" #include "unicode/utf16.h" @@ -202,6 +201,9 @@ utrie2_openFromSerialized(UTrie2ValueBits valueBits, trie->memory=(uint32_t *)data; trie->length=actualLength; trie->isMemoryOwned=FALSE; +#ifdef UTRIE2_DEBUG + trie->name="fromSerialized"; +#endif /* set the pointers to its index and data arrays */ p16=(const uint16_t *)(header+1); @@ -294,6 +296,9 @@ utrie2_openDummy(UTrie2ValueBits valueBits, trie->errorValue=errorValue; trie->highStart=0; trie->highValueIndex=dataMove+UTRIE2_DATA_START_OFFSET; +#ifdef UTRIE2_DEBUG + trie->name="dummy"; +#endif /* set the header fields */ header=(UTrie2Header *)trie->memory; @@ -373,34 +378,15 @@ utrie2_close(UTrie2 *trie) { } if(trie->newTrie!=NULL) { uprv_free(trie->newTrie->data); +#ifdef UCPTRIE_DEBUG + umutablecptrie_close(trie->newTrie->t3); +#endif uprv_free(trie->newTrie); } uprv_free(trie); } } -U_CAPI int32_t U_EXPORT2 -utrie2_getVersion(const void *data, int32_t length, UBool anyEndianOk) { - uint32_t signature; - if(length<16 || data==NULL || (U_POINTER_MASK_LSB(data, 3)!=0)) { - return 0; - } - signature=*(const uint32_t *)data; - if(signature==UTRIE2_SIG) { - return 2; - } - if(anyEndianOk && signature==UTRIE2_OE_SIG) { - return 2; - } - if(signature==UTRIE_SIG) { - return 1; - } - if(anyEndianOk && signature==UTRIE_OE_SIG) { - return 1; - } - return 0; -} - U_CAPI UBool U_EXPORT2 utrie2_isFrozen(const UTrie2 *trie) { return (UBool)(trie->newTrie==NULL); @@ -430,96 +416,6 @@ utrie2_serialize(const UTrie2 *trie, return trie->length; } -U_CAPI int32_t U_EXPORT2 -utrie2_swap(const UDataSwapper *ds, - const void *inData, int32_t length, void *outData, - UErrorCode *pErrorCode) { - const UTrie2Header *inTrie; - UTrie2Header trie; - int32_t dataLength, size; - UTrie2ValueBits valueBits; - - if(U_FAILURE(*pErrorCode)) { - return 0; - } - if(ds==NULL || inData==NULL || (length>=0 && outData==NULL)) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - - /* setup and swapping */ - if(length>=0 && length<(int32_t)sizeof(UTrie2Header)) { - *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; - return 0; - } - - inTrie=(const UTrie2Header *)inData; - trie.signature=ds->readUInt32(inTrie->signature); - trie.options=ds->readUInt16(inTrie->options); - trie.indexLength=ds->readUInt16(inTrie->indexLength); - trie.shiftedDataLength=ds->readUInt16(inTrie->shiftedDataLength); - - valueBits=(UTrie2ValueBits)(trie.options&UTRIE2_OPTIONS_VALUE_BITS_MASK); - dataLength=(int32_t)trie.shiftedDataLength<=0) { - UTrie2Header *outTrie; - - if(lengthswapArray32(ds, &inTrie->signature, 4, &outTrie->signature, pErrorCode); - ds->swapArray16(ds, &inTrie->options, 12, &outTrie->options, pErrorCode); - - /* swap the index and the data */ - switch(valueBits) { - case UTRIE2_16_VALUE_BITS: - ds->swapArray16(ds, inTrie+1, (trie.indexLength+dataLength)*2, outTrie+1, pErrorCode); - break; - case UTRIE2_32_VALUE_BITS: - ds->swapArray16(ds, inTrie+1, trie.indexLength*2, outTrie+1, pErrorCode); - ds->swapArray32(ds, (const uint16_t *)(inTrie+1)+trie.indexLength, dataLength*4, - (uint16_t *)(outTrie+1)+trie.indexLength, pErrorCode); - break; - default: - *pErrorCode=U_INVALID_FORMAT_ERROR; - return 0; - } - } - - return size; -} - -// utrie2_swapAnyVersion() should be defined here but lives in utrie2_builder.c -// to avoid a dependency from utrie2.cpp on utrie.c. - /* enumeration -------------------------------------------------------------- */ #define MIN_VALUE(a, b) ((a)<(b) ? (a) : (b)) diff --git a/icu4c/source/common/utrie2.h b/icu4c/source/common/utrie2.h index 8e1caa5e90b..75028ee23ac 100644 --- a/icu4c/source/common/utrie2.h +++ b/icu4c/source/common/utrie2.h @@ -22,7 +22,6 @@ #include "unicode/utypes.h" #include "unicode/utf8.h" #include "putilimp.h" -#include "udataswp.h" U_CDECL_BEGIN @@ -330,40 +329,6 @@ utrie2_serialize(const UTrie2 *trie, /* Public UTrie2 API: miscellaneous functions ------------------------------- */ -/** - * Get the UTrie version from 32-bit-aligned memory containing the serialized form - * of either a UTrie (version 1) or a UTrie2 (version 2). - * - * @param data a pointer to 32-bit-aligned memory containing the serialized form - * of a UTrie, version 1 or 2 - * @param length the number of bytes available at data; - * can be more than necessary (see return value) - * @param anyEndianOk If FALSE, only platform-endian serialized forms are recognized. - * If TRUE, opposite-endian serialized forms are recognized as well. - * @return the UTrie version of the serialized form, or 0 if it is not - * recognized as a serialized UTrie - */ -U_CAPI int32_t U_EXPORT2 -utrie2_getVersion(const void *data, int32_t length, UBool anyEndianOk); - -/** - * Swap a serialized UTrie2. - * @internal - */ -U_CAPI int32_t U_EXPORT2 -utrie2_swap(const UDataSwapper *ds, - const void *inData, int32_t length, void *outData, - UErrorCode *pErrorCode); - -/** - * Swap a serialized UTrie or UTrie2. - * @internal - */ -U_CAPI int32_t U_EXPORT2 -utrie2_swapAnyVersion(const UDataSwapper *ds, - const void *inData, int32_t length, void *outData, - UErrorCode *pErrorCode); - /** * Build a UTrie2 (version 2) from a UTrie (version 1). * Enumerates all values in the UTrie and builds a UTrie2 with the same values. @@ -709,6 +674,10 @@ struct UTrie2 { UBool padding1; int16_t padding2; UNewTrie2 *newTrie; /* builder object; NULL when frozen */ + +#ifdef UTRIE2_DEBUG + const char *name; +#endif }; /** diff --git a/icu4c/source/common/utrie2_builder.cpp b/icu4c/source/common/utrie2_builder.cpp index 6a92b442c31..80e09c9c26b 100644 --- a/icu4c/source/common/utrie2_builder.cpp +++ b/icu4c/source/common/utrie2_builder.cpp @@ -24,16 +24,23 @@ * This file contains only the builder code. * See utrie2.c for the runtime and enumeration code. */ +// #define UTRIE2_DEBUG #ifdef UTRIE2_DEBUG # include #endif +// #define UCPTRIE_DEBUG #include "unicode/utypes.h" +#ifdef UCPTRIE_DEBUG +#include "unicode/ucptrie.h" +#include "unicode/umutablecptrie.h" +#include "ucptrie_impl.h" +#endif #include "cmemory.h" #include "utrie2.h" #include "utrie2_impl.h" -#include "utrie.h" /* for utrie2_fromUTrie() and utrie_swap() */ +#include "utrie.h" // for utrie2_fromUTrie() /* Implementation notes ----------------------------------------------------- */ @@ -132,8 +139,14 @@ utrie2_open(uint32_t initialValue, uint32_t errorValue, UErrorCode *pErrorCode) trie->errorValue=errorValue; trie->highStart=0x110000; trie->newTrie=newTrie; +#ifdef UTRIE2_DEBUG + trie->name="open"; +#endif newTrie->data=data; +#ifdef UCPTRIE_DEBUG + newTrie->t3=umutablecptrie_open(initialValue, errorValue, pErrorCode); +#endif newTrie->dataCapacity=UNEWTRIE2_INITIAL_DATA_LENGTH; newTrie->initialValue=initialValue; newTrie->errorValue=errorValue; @@ -246,6 +259,14 @@ cloneBuilder(const UNewTrie2 *other) { uprv_free(trie); return NULL; } +#ifdef UCPTRIE_DEBUG + if(other->t3==nullptr) { + trie->t3=nullptr; + } else { + UErrorCode errorCode=U_ZERO_ERROR; + trie->t3=umutablecptrie_clone(other->t3, &errorCode); + } +#endif trie->dataCapacity=other->dataCapacity; /* clone data */ @@ -343,6 +364,22 @@ copyEnumRange(const void *context, UChar32 start, UChar32 end, uint32_t value) { } #ifdef UTRIE2_DEBUG +static long countInitial(const UTrie2 *trie) { + uint32_t initialValue=trie->initialValue; + int32_t length=trie->dataLength; + long count=0; + if(trie->data16!=nullptr) { + for(int32_t i=0; idata16[i]==initialValue) { ++count; } + } + } else { + for(int32_t i=0; idata32[i]==initialValue) { ++count; } + } + } + return count; +} + static void utrie_printLengths(const UTrie *trie) { long indexLength=trie->indexLength; @@ -357,8 +394,8 @@ utrie2_printLengths(const UTrie2 *trie, const char *which) { long indexLength=trie->indexLength; long dataLength=(long)trie->dataLength; long totalLength=(long)sizeof(UTrie2Header)+indexLength*2+dataLength*(trie->data32!=NULL ? 4 : 2); - printf("**UTrie2Lengths(%s)** index:%6ld data:%6ld serialized:%6ld\n", - which, indexLength, dataLength, totalLength); + printf("**UTrie2Lengths(%s %s)** index:%6ld data:%6ld countInitial:%6ld serialized:%6ld\n", + which, trie->name, indexLength, dataLength, countInitial(trie), totalLength); } #endif @@ -622,6 +659,9 @@ set32(UNewTrie2 *trie, *pErrorCode=U_NO_WRITE_PERMISSION; return; } +#ifdef UCPTRIE_DEBUG + umutablecptrie_set(trie->t3, c, value, pErrorCode); +#endif block=getDataBlock(trie, c, forLSCP); if(block<0) { @@ -717,6 +757,9 @@ utrie2_setRange32(UTrie2 *trie, *pErrorCode=U_NO_WRITE_PERMISSION; return; } +#ifdef UCPTRIE_DEBUG + umutablecptrie_setRange(newTrie->t3, start, end, value, pErrorCode); +#endif if(!overwrite && value==newTrie->initialValue) { return; /* nothing to do */ } @@ -732,7 +775,7 @@ utrie2_setRange32(UTrie2 *trie, return; } - nextStart=(start+UTRIE2_DATA_BLOCK_LENGTH)&~UTRIE2_DATA_MASK; + nextStart=(start+UTRIE2_DATA_MASK)&~UTRIE2_DATA_MASK; if(nextStart<=limit) { fillBlock(newTrie->data+block, start&UTRIE2_DATA_MASK, UTRIE2_DATA_BLOCK_LENGTH, value, newTrie->initialValue, overwrite); @@ -983,6 +1026,10 @@ findHighStart(UNewTrie2 *trie, uint32_t highValue) { */ static void compactData(UNewTrie2 *trie) { +#ifdef UTRIE2_DEBUG + int32_t countSame=0, sumOverlaps=0; +#endif + int32_t start, newStart, movedStart; int32_t blockLength, overlap; int32_t i, mapIndex, blockCount; @@ -1023,6 +1070,9 @@ compactData(UNewTrie2 *trie) { if( (movedStart=findSameDataBlock(trie->data, newStart, start, blockLength)) >=0 ) { +#ifdef UTRIE2_DEBUG + ++countSame; +#endif /* found an identical block, set the other block's index value for the current block */ for(i=blockCount, mapIndex=start>>UTRIE2_SHIFT_2; i>0; --i) { trie->map[mapIndex++]=movedStart; @@ -1042,6 +1092,9 @@ compactData(UNewTrie2 *trie) { overlap>0 && !equal_uint32(trie->data+(newStart-overlap), trie->data+start, overlap); overlap-=UTRIE2_DATA_GRANULARITY) {} +#ifdef UTRIE2_DEBUG + sumOverlaps+=overlap; +#endif if(overlap>0 || newStart%lu\n", - (long)trie->dataLength, (long)newStart); + printf("compacting UTrie2: count of 32-bit data words %lu->%lu countSame=%ld sumOverlaps=%ld\n", + (long)trie->dataLength, (long)newStart, (long)countSame, (long)sumOverlaps); #endif trie->dataLength=newStart; @@ -1163,7 +1216,7 @@ compactIndex2(UNewTrie2 *trie) { #ifdef UTRIE2_DEBUG /* we saved some space */ - printf("compacting UTrie2: count of 16-bit index-2 words %lu->%lu\n", + printf("compacting UTrie2: count of 16-bit index words %lu->%lu\n", (long)trie->index2Length, (long)newStart); #endif @@ -1193,7 +1246,7 @@ compactTrie(UTrie2 *trie, UErrorCode *pErrorCode) { trie->highStart=newTrie->highStart=highStart; #ifdef UTRIE2_DEBUG - printf("UTrie2: highStart U+%04lx highValue 0x%lx initialValue 0x%lx\n", + printf("UTrie2: highStart U+%06lx highValue 0x%lx initialValue 0x%lx\n", (long)highStart, (long)highValue, (long)trie->initialValue); #endif @@ -1211,7 +1264,7 @@ compactTrie(UTrie2 *trie, UErrorCode *pErrorCode) { compactIndex2(newTrie); #ifdef UTRIE2_DEBUG } else { - printf("UTrie2: highStart U+%04lx count of 16-bit index-2 words %lu->%lu\n", + printf("UTrie2: highStart U+%04lx count of 16-bit index words %lu->%lu\n", (long)highStart, (long)trie->newTrie->index2Length, (long)UTRIE2_INDEX_1_OFFSET); #endif } @@ -1411,31 +1464,18 @@ utrie2_freeze(UTrie2 *trie, UTrie2ValueBits valueBits, UErrorCode *pErrorCode) { return; } +#ifdef UTRIE2_DEBUG + utrie2_printLengths(trie, ""); +#endif + +#ifdef UCPTRIE_DEBUG + umutablecptrie_setName(newTrie->t3, trie->name); + ucptrie_close( + umutablecptrie_buildImmutable( + newTrie->t3, UCPTRIE_TYPE_FAST, (UCPTrieValueWidth)valueBits, pErrorCode)); +#endif /* Delete the UNewTrie2. */ uprv_free(newTrie->data); uprv_free(newTrie); trie->newTrie=NULL; } - -/* - * This is here to avoid a dependency from utrie2.cpp on utrie.c. - * This file already depends on utrie.c. - * Otherwise, this should be in utrie2.cpp right after utrie2_swap(). - */ -U_CAPI int32_t U_EXPORT2 -utrie2_swapAnyVersion(const UDataSwapper *ds, - const void *inData, int32_t length, void *outData, - UErrorCode *pErrorCode) { - if(U_SUCCESS(*pErrorCode)) { - switch(utrie2_getVersion(inData, length, TRUE)) { - case 1: - return utrie_swap(ds, inData, length, outData, pErrorCode); - case 2: - return utrie2_swap(ds, inData, length, outData, pErrorCode); - default: - *pErrorCode=U_INVALID_FORMAT_ERROR; - return 0; - } - } - return 0; -} diff --git a/icu4c/source/common/utrie2_impl.h b/icu4c/source/common/utrie2_impl.h index b7dc9d3fb45..2a14db3a6bd 100644 --- a/icu4c/source/common/utrie2_impl.h +++ b/icu4c/source/common/utrie2_impl.h @@ -22,22 +22,20 @@ #ifndef __UTRIE2_IMPL_H__ #define __UTRIE2_IMPL_H__ +#ifdef UCPTRIE_DEBUG +#include "unicode/umutablecptrie.h" +#endif #include "utrie2.h" /* Public UTrie2 API implementation ----------------------------------------- */ /* - * These definitions are mostly needed by utrie2.c, + * These definitions are mostly needed by utrie2.cpp, * but also by utrie2_serialize() and utrie2_swap(). */ -/* - * UTrie and UTrie2 signature values, - * in platform endianness and opposite endianness. - */ -#define UTRIE_SIG 0x54726965 -#define UTRIE_OE_SIG 0x65697254 - +// UTrie2 signature values, in platform endianness and opposite endianness. +// The UTrie2 signature ASCII byte values spell "Tri2". #define UTRIE2_SIG 0x54726932 #define UTRIE2_OE_SIG 0x32697254 @@ -145,6 +143,9 @@ struct UNewTrie2 { int32_t index1[UNEWTRIE2_INDEX_1_LENGTH]; int32_t index2[UNEWTRIE2_MAX_INDEX_2_LENGTH]; uint32_t *data; +#ifdef UCPTRIE_DEBUG + UMutableCPTrie *t3; +#endif uint32_t initialValue, errorValue; int32_t index2Length, dataCapacity, dataLength; diff --git a/icu4c/source/common/utrie_swap.cpp b/icu4c/source/common/utrie_swap.cpp new file mode 100644 index 00000000000..5abe7bd5d77 --- /dev/null +++ b/icu4c/source/common/utrie_swap.cpp @@ -0,0 +1,344 @@ +// © 2018 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +// utrie_swap.cpp +// created: 2018aug08 Markus W. Scherer + +#include "unicode/utypes.h" +#include "cmemory.h" +#include "ucptrie_impl.h" +#include "udataswp.h" +#include "utrie.h" +#include "utrie2_impl.h" + +// These functions for swapping different generations of ICU code point tries are here +// so that their implementation files need not depend on swapper code, +// need not depend on each other, and so that other swapper code +// need not depend on other trie code. + +namespace { + +constexpr int32_t ASCII_LIMIT = 0x80; + +} // namespace + +U_CAPI int32_t U_EXPORT2 +utrie_swap(const UDataSwapper *ds, + const void *inData, int32_t length, void *outData, + UErrorCode *pErrorCode) { + const UTrieHeader *inTrie; + UTrieHeader trie; + int32_t size; + UBool dataIs32; + + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { + return 0; + } + if(ds==NULL || inData==NULL || (length>=0 && outData==NULL)) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + + /* setup and swapping */ + if(length>=0 && (uint32_t)lengthreadUInt32(inTrie->signature); + trie.options=ds->readUInt32(inTrie->options); + trie.indexLength=udata_readInt32(ds, inTrie->indexLength); + trie.dataLength=udata_readInt32(ds, inTrie->dataLength); + + if( trie.signature!=0x54726965 || + (trie.options&UTRIE_OPTIONS_SHIFT_MASK)!=UTRIE_SHIFT || + ((trie.options>>UTRIE_OPTIONS_INDEX_SHIFT)&UTRIE_OPTIONS_SHIFT_MASK)!=UTRIE_INDEX_SHIFT || + trie.indexLength=0) { + UTrieHeader *outTrie; + + if(lengthswapArray32(ds, inTrie, sizeof(UTrieHeader), outTrie, pErrorCode); + + /* swap the index and the data */ + if(dataIs32) { + ds->swapArray16(ds, inTrie+1, trie.indexLength*2, outTrie+1, pErrorCode); + ds->swapArray32(ds, (const uint16_t *)(inTrie+1)+trie.indexLength, trie.dataLength*4, + (uint16_t *)(outTrie+1)+trie.indexLength, pErrorCode); + } else { + ds->swapArray16(ds, inTrie+1, (trie.indexLength+trie.dataLength)*2, outTrie+1, pErrorCode); + } + } + + return size; +} + +U_CAPI int32_t U_EXPORT2 +utrie2_swap(const UDataSwapper *ds, + const void *inData, int32_t length, void *outData, + UErrorCode *pErrorCode) { + const UTrie2Header *inTrie; + UTrie2Header trie; + int32_t dataLength, size; + UTrie2ValueBits valueBits; + + if(U_FAILURE(*pErrorCode)) { + return 0; + } + if(ds==NULL || inData==NULL || (length>=0 && outData==NULL)) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + + /* setup and swapping */ + if(length>=0 && length<(int32_t)sizeof(UTrie2Header)) { + *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } + + inTrie=(const UTrie2Header *)inData; + trie.signature=ds->readUInt32(inTrie->signature); + trie.options=ds->readUInt16(inTrie->options); + trie.indexLength=ds->readUInt16(inTrie->indexLength); + trie.shiftedDataLength=ds->readUInt16(inTrie->shiftedDataLength); + + valueBits=(UTrie2ValueBits)(trie.options&UTRIE2_OPTIONS_VALUE_BITS_MASK); + dataLength=(int32_t)trie.shiftedDataLength<=0) { + UTrie2Header *outTrie; + + if(lengthswapArray32(ds, &inTrie->signature, 4, &outTrie->signature, pErrorCode); + ds->swapArray16(ds, &inTrie->options, 12, &outTrie->options, pErrorCode); + + /* swap the index and the data */ + switch(valueBits) { + case UTRIE2_16_VALUE_BITS: + ds->swapArray16(ds, inTrie+1, (trie.indexLength+dataLength)*2, outTrie+1, pErrorCode); + break; + case UTRIE2_32_VALUE_BITS: + ds->swapArray16(ds, inTrie+1, trie.indexLength*2, outTrie+1, pErrorCode); + ds->swapArray32(ds, (const uint16_t *)(inTrie+1)+trie.indexLength, dataLength*4, + (uint16_t *)(outTrie+1)+trie.indexLength, pErrorCode); + break; + default: + *pErrorCode=U_INVALID_FORMAT_ERROR; + return 0; + } + } + + return size; +} + +U_CAPI int32_t U_EXPORT2 +ucptrie_swap(const UDataSwapper *ds, + const void *inData, int32_t length, void *outData, + UErrorCode *pErrorCode) { + const UCPTrieHeader *inTrie; + UCPTrieHeader trie; + int32_t dataLength, size; + UCPTrieValueWidth valueWidth; + + if(U_FAILURE(*pErrorCode)) { + return 0; + } + if(ds==nullptr || inData==nullptr || (length>=0 && outData==nullptr)) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + + /* setup and swapping */ + if(length>=0 && length<(int32_t)sizeof(UCPTrieHeader)) { + *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } + + inTrie=(const UCPTrieHeader *)inData; + trie.signature=ds->readUInt32(inTrie->signature); + trie.options=ds->readUInt16(inTrie->options); + trie.indexLength=ds->readUInt16(inTrie->indexLength); + trie.dataLength = ds->readUInt16(inTrie->dataLength); + + UCPTrieType type = (UCPTrieType)((trie.options >> 6) & 3); + valueWidth = (UCPTrieValueWidth)(trie.options & UCPTRIE_OPTIONS_VALUE_BITS_MASK); + dataLength = ((int32_t)(trie.options & UCPTRIE_OPTIONS_DATA_LENGTH_MASK) << 4) | trie.dataLength; + + int32_t minIndexLength = type == UCPTRIE_TYPE_FAST ? + UCPTRIE_BMP_INDEX_LENGTH : UCPTRIE_SMALL_INDEX_LENGTH; + if( trie.signature!=UCPTRIE_SIG || + type > UCPTRIE_TYPE_SMALL || + (trie.options & UCPTRIE_OPTIONS_RESERVED_MASK) != 0 || + valueWidth > UCPTRIE_VALUE_BITS_8 || + trie.indexLength < minIndexLength || + dataLength < ASCII_LIMIT + ) { + *pErrorCode=U_INVALID_FORMAT_ERROR; /* not a UCPTrie */ + return 0; + } + + size=sizeof(UCPTrieHeader)+trie.indexLength*2; + switch(valueWidth) { + case UCPTRIE_VALUE_BITS_16: + size+=dataLength*2; + break; + case UCPTRIE_VALUE_BITS_32: + size+=dataLength*4; + break; + case UCPTRIE_VALUE_BITS_8: + size+=dataLength; + break; + default: + *pErrorCode=U_INVALID_FORMAT_ERROR; + return 0; + } + + if(length>=0) { + UCPTrieHeader *outTrie; + + if(lengthswapArray32(ds, &inTrie->signature, 4, &outTrie->signature, pErrorCode); + ds->swapArray16(ds, &inTrie->options, 12, &outTrie->options, pErrorCode); + + /* swap the index and the data */ + switch(valueWidth) { + case UCPTRIE_VALUE_BITS_16: + ds->swapArray16(ds, inTrie+1, (trie.indexLength+dataLength)*2, outTrie+1, pErrorCode); + break; + case UCPTRIE_VALUE_BITS_32: + ds->swapArray16(ds, inTrie+1, trie.indexLength*2, outTrie+1, pErrorCode); + ds->swapArray32(ds, (const uint16_t *)(inTrie+1)+trie.indexLength, dataLength*4, + (uint16_t *)(outTrie+1)+trie.indexLength, pErrorCode); + break; + case UCPTRIE_VALUE_BITS_8: + ds->swapArray16(ds, inTrie+1, trie.indexLength*2, outTrie+1, pErrorCode); + if(inTrie!=outTrie) { + uprv_memmove((outTrie+1)+trie.indexLength, (inTrie+1)+trie.indexLength, dataLength); + } + break; + default: + *pErrorCode=U_INVALID_FORMAT_ERROR; + return 0; + } + } + + return size; +} + +namespace { + +/** + * Gets the trie version from 32-bit-aligned memory containing the serialized form + * of a UTrie (version 1), a UTrie2 (version 2), or a UCPTrie (version 3). + * + * @param data a pointer to 32-bit-aligned memory containing the serialized form of a trie + * @param length the number of bytes available at data; + * can be more than necessary (see return value) + * @param anyEndianOk If FALSE, only platform-endian serialized forms are recognized. + * If TRUE, opposite-endian serialized forms are recognized as well. + * @return the trie version of the serialized form, or 0 if it is not + * recognized as a serialized trie + */ +int32_t +getVersion(const void *data, int32_t length, UBool anyEndianOk) { + uint32_t signature; + if(length<16 || data==nullptr || (U_POINTER_MASK_LSB(data, 3)!=0)) { + return 0; + } + signature=*(const uint32_t *)data; + if(signature==UCPTRIE_SIG) { + return 3; + } + if(anyEndianOk && signature==UCPTRIE_OE_SIG) { + return 3; + } + if(signature==UTRIE2_SIG) { + return 2; + } + if(anyEndianOk && signature==UTRIE2_OE_SIG) { + return 2; + } + if(signature==UTRIE_SIG) { + return 1; + } + if(anyEndianOk && signature==UTRIE_OE_SIG) { + return 1; + } + return 0; +} + +} // namespace + +U_CAPI int32_t U_EXPORT2 +utrie_swapAnyVersion(const UDataSwapper *ds, + const void *inData, int32_t length, void *outData, + UErrorCode *pErrorCode) { + if(U_FAILURE(*pErrorCode)) { return 0; } + switch(getVersion(inData, length, TRUE)) { + case 1: + return utrie_swap(ds, inData, length, outData, pErrorCode); + case 2: + return utrie2_swap(ds, inData, length, outData, pErrorCode); + case 3: + return ucptrie_swap(ds, inData, length, outData, pErrorCode); + default: + *pErrorCode=U_INVALID_FORMAT_ERROR; + return 0; + } +} diff --git a/icu4c/source/common/uts46.cpp b/icu4c/source/common/uts46.cpp index 5a23572eb64..b9e6cb023bb 100644 --- a/icu4c/source/common/uts46.cpp +++ b/icu4c/source/common/uts46.cpp @@ -557,7 +557,10 @@ UTS46::processUnicode(const UnicodeString &src, destArray=dest.getBuffer(); destLength+=newLength-labelLength; labelLimit=labelStart+=newLength+1; - } else if(0xdf<=c && c<=0x200d && (c==0xdf || c==0x3c2 || c>=0x200c)) { + continue; + } else if(c<0xdf) { + // pass + } else if(c<=0x200d && (c==0xdf || c==0x3c2 || c>=0x200c)) { info.isTransDiff=TRUE; if(doMapDevChars) { destLength=mapDevChars(dest, labelStart, labelLimit, errorCode); @@ -565,15 +568,23 @@ UTS46::processUnicode(const UnicodeString &src, return dest; } destArray=dest.getBuffer(); - // Do not increment labelLimit in case c was removed. // All deviation characters have been mapped, no need to check for them again. doMapDevChars=FALSE; - } else { - ++labelLimit; + // Do not increment labelLimit in case c was removed. + continue; + } + } else if(U16_IS_SURROGATE(c)) { + if(U16_IS_SURROGATE_LEAD(c) ? + (labelLimit+1)==destLength || !U16_IS_TRAIL(destArray[labelLimit+1]) : + labelLimit==labelStart || !U16_IS_LEAD(destArray[labelLimit-1])) { + // Map an unpaired surrogate to U+FFFD before normalization so that when + // that removes characters we do not turn two unpaired ones into a pair. + info.labelErrors|=UIDNA_ERROR_DISALLOWED; + dest.setCharAt(labelLimit, 0xfffd); + destArray=dest.getBuffer(); } - } else { - ++labelLimit; } + ++labelLimit; } // Permit an empty label at the end (0FFFD # NA .. D7C7..D7CA >FFFD # NA .. # D7CB..D7FB valid # 5.2 HANGUL JONGSEONG NIEUN-RIEUL..HANGUL JONGSEONG PHIEUPH-THIEUTH D7FC..D7FF >FFFD # NA .. -D800..DFFF >FFFD # 2.0 .. +# D800..DFFF >FFFD # 2.0 .. E000..F8FF >FFFD # 1.1 .. F900 >8C48 # 1.1 CJK COMPATIBILITY IDEOGRAPH-F900 F901 >66F4 # 1.1 CJK COMPATIBILITY IDEOGRAPH-F901 diff --git a/icu4c/source/i18n/uspoof_impl.h b/icu4c/source/i18n/uspoof_impl.h index 2985c7317f4..0eee0e58a8a 100644 --- a/icu4c/source/i18n/uspoof_impl.h +++ b/icu4c/source/i18n/uspoof_impl.h @@ -20,7 +20,7 @@ #include "unicode/uspoof.h" #include "unicode/uscript.h" #include "unicode/udata.h" - +#include "udataswp.h" #include "utrie2.h" #if !UCONFIG_NO_NORMALIZATION diff --git a/icu4c/source/test/cintltst/Makefile.in b/icu4c/source/test/cintltst/Makefile.in index e7bf69e18d3..208fb787321 100644 --- a/icu4c/source/test/cintltst/Makefile.in +++ b/icu4c/source/test/cintltst/Makefile.in @@ -48,7 +48,7 @@ cnmdptst.o cnormtst.o cnumtst.o crelativedateformattest.o crestst.o creststn.o c cucdapi.o cucdtst.o custrtst.o cstrcase.o cutiltst.o nucnvtst.o nccbtst.o bocu1tst.o \ cbiditst.o cbididat.o eurocreg.o udatatst.o utf16tst.o utransts.o \ ncnvfbts.o ncnvtst.o putiltst.o cstrtest.o udatpg_test.o utf8tst.o \ -stdnmtst.o usrchtst.o custrtrn.o sorttest.o trietest.o trie2test.o usettest.o \ +stdnmtst.o usrchtst.o custrtrn.o sorttest.o trietest.o trie2test.o ucptrietest.o usettest.o \ uenumtst.o utmstest.o currtest.o \ idnatest.o nfsprep.o spreptst.o sprpdata.o \ hpmufn.o tracetst.o reapits.o uregiontest.o ulistfmttest.o\ diff --git a/icu4c/source/test/cintltst/cintltst.vcxproj b/icu4c/source/test/cintltst/cintltst.vcxproj index 143e9176a1a..6388eb84785 100644 --- a/icu4c/source/test/cintltst/cintltst.vcxproj +++ b/icu4c/source/test/cintltst/cintltst.vcxproj @@ -182,6 +182,7 @@ + @@ -284,4 +285,4 @@ - \ No newline at end of file + diff --git a/icu4c/source/test/cintltst/cintltst.vcxproj.filters b/icu4c/source/test/cintltst/cintltst.vcxproj.filters index be69d72a8a7..ff185c66e6a 100644 --- a/icu4c/source/test/cintltst/cintltst.vcxproj.filters +++ b/icu4c/source/test/cintltst/cintltst.vcxproj.filters @@ -123,6 +123,9 @@ collections + + collections + collections @@ -417,4 +420,4 @@ sprep & idna - \ No newline at end of file + diff --git a/icu4c/source/test/cintltst/cutiltst.c b/icu4c/source/test/cintltst/cutiltst.c index aa52970604d..5f43c48ad28 100644 --- a/icu4c/source/test/cintltst/cutiltst.c +++ b/icu4c/source/test/cintltst/cutiltst.c @@ -27,6 +27,7 @@ void addHashtableTest(TestNode** root); void addCStringTest(TestNode** root); void addTrieTest(TestNode** root); void addTrie2Test(TestNode** root); +void addUCPTrieTest(TestNode** root); void addEnumerationTest(TestNode** root); void addPosixTest(TestNode** root); void addSortTest(TestNode** root); @@ -38,6 +39,7 @@ void addUtility(TestNode** root) addCStringTest(root); addTrieTest(root); addTrie2Test(root); + addUCPTrieTest(root); addLocaleTest(root); addCLDRTest(root); addUnicodeTest(root); diff --git a/icu4c/source/test/cintltst/trie2test.c b/icu4c/source/test/cintltst/trie2test.c index 9444159beda..aa7e7c64671 100644 --- a/icu4c/source/test/cintltst/trie2test.c +++ b/icu4c/source/test/cintltst/trie2test.c @@ -421,7 +421,7 @@ testTrieUTF8(const char *testName, prevCP=c; --c; /* end of the range */ U8_APPEND_UNSAFE(s, length, c); - if(U_IS_SURROGATE(prevCP)) { + if(U_IS_SURROGATE(c)) { // A surrogate byte sequence counts as 3 single-byte errors. values[countValues++]=errorValue; values[countValues++]=errorValue; @@ -1287,31 +1287,6 @@ GrowDataArrayTest(void) { /* versions 1 and 2 --------------------------------------------------------- */ -static void -GetVersionTest(void) { - uint32_t data[4]; - if( /* version 1 */ - (data[0]=0x54726965, 1!=utrie2_getVersion(data, sizeof(data), FALSE)) || - (data[0]=0x54726965, 1!=utrie2_getVersion(data, sizeof(data), TRUE)) || - (data[0]=0x65697254, 0!=utrie2_getVersion(data, sizeof(data), FALSE)) || - (data[0]=0x65697254, 1!=utrie2_getVersion(data, sizeof(data), TRUE)) || - /* version 2 */ - (data[0]=0x54726932, 2!=utrie2_getVersion(data, sizeof(data), FALSE)) || - (data[0]=0x54726932, 2!=utrie2_getVersion(data, sizeof(data), TRUE)) || - (data[0]=0x32697254, 0!=utrie2_getVersion(data, sizeof(data), FALSE)) || - (data[0]=0x32697254, 2!=utrie2_getVersion(data, sizeof(data), TRUE)) || - /* illegal arguments */ - (data[0]=0x54726932, 0!=utrie2_getVersion(NULL, sizeof(data), FALSE)) || - (data[0]=0x54726932, 0!=utrie2_getVersion(data, 3, FALSE)) || - (data[0]=0x54726932, 0!=utrie2_getVersion((char *)data+1, sizeof(data), FALSE)) || - /* unknown signature values */ - (data[0]=0x11223344, 0!=utrie2_getVersion(data, sizeof(data), FALSE)) || - (data[0]=0x54726933, 0!=utrie2_getVersion(data, sizeof(data), FALSE)) - ) { - log_err("error: utrie2_getVersion() is not working as expected\n"); - } -} - static UNewTrie * makeNewTrie1WithRanges(const char *testName, const SetRange setRanges[], int32_t countSetRanges, @@ -1455,6 +1430,5 @@ addTrie2Test(TestNode** root) { addTest(root, &DummyTrieTest, "tsutil/trie2test/DummyTrieTest"); addTest(root, &FreeBlocksTest, "tsutil/trie2test/FreeBlocksTest"); addTest(root, &GrowDataArrayTest, "tsutil/trie2test/GrowDataArrayTest"); - addTest(root, &GetVersionTest, "tsutil/trie2test/GetVersionTest"); addTest(root, &Trie12ConversionTest, "tsutil/trie2test/Trie12ConversionTest"); } diff --git a/icu4c/source/test/cintltst/ucptrietest.c b/icu4c/source/test/cintltst/ucptrietest.c new file mode 100644 index 00000000000..b92f2ed1814 --- /dev/null +++ b/icu4c/source/test/cintltst/ucptrietest.c @@ -0,0 +1,1506 @@ +// © 2017 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +// ucptrietest.c (modified from trie2test.c) +// created: 2017dec29 Markus W. Scherer + +#include +#include "unicode/utypes.h" +#include "unicode/ucptrie.h" +#include "unicode/umutablecptrie.h" +#include "unicode/utf.h" +#include "unicode/utf16.h" +#include "unicode/utf8.h" +#include "uassert.h" +#include "ucptrie_impl.h" +#include "utrie.h" +#include "cstring.h" +#include "cmemory.h" +#include "udataswp.h" +#include "cintltst.h" + +void addUCPTrieTest(TestNode** root); + +/* Values for setting possibly overlapping, out-of-order ranges of values */ +typedef struct SetRange { + UChar32 start, limit; + uint32_t value; +} SetRange; + +/* + * Values for testing: + * value is set from the previous boundary's limit to before + * this boundary's limit + * + * There must be an entry with limit 0 and the intialValue. + * It may be preceded by an entry with negative limit and the errorValue. + */ +typedef struct CheckRange { + UChar32 limit; + uint32_t value; +} CheckRange; + +static int32_t +skipSpecialValues(const CheckRange checkRanges[], int32_t countCheckRanges) { + int32_t i; + for(i=0; i= 0) { + log_err("error: %s getRanges (%s) fails to deliver range [U+%04lx..U+%04lx].0x%lx\n", + name, variant, (long)start, (long)expEnd, (long)expValue); + } + return FALSE; + } + if (expEnd < 0) { + log_err("error: %s getRanges (%s) delivers unexpected range [U+%04lx..U+%04lx].0x%lx\n", + name, variant, (long)start, (long)end, (long)value); + return FALSE; + } + if (end != expEnd || value != expValue) { + log_err("error: %s getRanges (%s) delivers wrong range [U+%04lx..U+%04lx].0x%lx " + "instead of [U+%04lx..U+%04lx].0x%lx\n", + name, variant, (long)start, (long)end, (long)value, + (long)start, (long)expEnd, (long)expValue); + return FALSE; + } + return TRUE; +} + +// Test iteration starting from various UTF-8/16 and trie structure boundaries. +// Also test starting partway through lead & trail surrogates for fixed-surrogate-value options, +// and partway through supplementary code points. +static UChar32 iterStarts[] = { + 0, 0x7f, 0x80, 0x7ff, 0x800, 0xfff, 0x1000, + 0xd7ff, 0xd800, 0xd888, 0xdddd, 0xdfff, 0xe000, + 0xffff, 0x10000, 0x12345, 0x10ffff, 0x110000 +}; + +static void +testTrieGetRanges(const char *testName, const UCPTrie *trie, const UMutableCPTrie *mutableTrie, + UCPTrieRangeOption option, uint32_t surrValue, + const CheckRange checkRanges[], int32_t countCheckRanges) { + const char *const typeName = trie == NULL ? "mutableTrie" : "trie"; + const char *const optionName = option == UCPTRIE_RANGE_NORMAL ? "normal" : + option == UCPTRIE_RANGE_FIXED_LEAD_SURROGATES ? "fixedLeadSurr" : "fixedAllSurr"; + char name[80]; + int32_t s; + for (s = 0; s < UPRV_LENGTHOF(iterStarts); ++s) { + UChar32 start = iterStarts[s]; + int32_t i, i0; + UChar32 end, expEnd; + uint32_t value, expValue; + // No need to go from each iteration start to the very end. + int32_t innerLoopCount; + + sprintf(name, "%s/%s(%s) min=U+%04lx", typeName, optionName, testName, (long)start); + + // Skip over special values and low ranges. + for (i = 0; i < countCheckRanges && checkRanges[i].limit <= start; ++i) {} + i0 = i; + // without value handler + for (innerLoopCount = 0;; ++i, start = end + 1) { + if (i < countCheckRanges) { + expEnd = checkRanges[i].limit - 1; + expValue = checkRanges[i].value; + } else { + expEnd = -1; + expValue = value = 0x5005; + } + end = trie != NULL ? + ucptrie_getRange(trie, start, option, surrValue, NULL, NULL, &value) : + umutablecptrie_getRange(mutableTrie, start, option, surrValue, NULL, NULL, &value); + if (!doCheckRange(name, "without value handler", start, end, value, expEnd, expValue)) { + break; + } + if (s != 0 && ++innerLoopCount == 5) { break; } + } + // with value handler + for (i = i0, start = iterStarts[s], innerLoopCount = 0;; ++i, start = end + 1) { + if (i < countCheckRanges) { + expEnd = checkRanges[i].limit - 1; + expValue = checkRanges[i].value ^ 0x5555; + } else { + expEnd = -1; + expValue = value = 0x5005; + } + end = trie != NULL ? + ucptrie_getRange(trie, start, option, surrValue ^ 0x5555, testFilter, NULL, &value) : + umutablecptrie_getRange(mutableTrie, start, option, surrValue ^ 0x5555, + testFilter, NULL, &value); + if (!doCheckRange(name, "with value handler", start, end, value, expEnd, expValue)) { + break; + } + if (s != 0 && ++innerLoopCount == 5) { break; } + } + // without value + for (i = i0, start = iterStarts[s], innerLoopCount = 0;; ++i, start = end + 1) { + if (i < countCheckRanges) { + expEnd = checkRanges[i].limit - 1; + } else { + expEnd = -1; + } + end = trie != NULL ? + ucptrie_getRange(trie, start, option, surrValue, NULL, NULL, NULL) : + umutablecptrie_getRange(mutableTrie, start, option, surrValue, NULL, NULL, NULL); + if (!doCheckRange(name, "without value", start, end, 0, expEnd, 0)) { + break; + } + if (s != 0 && ++innerLoopCount == 5) { break; } + } + } +} + +static void +testTrieGetters(const char *testName, const UCPTrie *trie, + UCPTrieType type, UCPTrieValueWidth valueWidth, + const CheckRange checkRanges[], int32_t countCheckRanges) { + uint32_t initialValue, errorValue; + uint32_t value, value2; + UChar32 start, limit; + int32_t i, countSpecials; + int32_t countErrors=0; + + const char *const typeName = "trie"; + + countSpecials=getSpecialValues(checkRanges, countCheckRanges, &initialValue, &errorValue); + + start=0; + for(i=countSpecials; i10) { + return; + } + } + } + + /* test linear ASCII range from the data array pointer (access to "internal" field) */ + start=0; + for(i=countSpecials; idata.ptr16[start]; + } else if(valueWidth==UCPTRIE_VALUE_BITS_32) { + value2=trie->data.ptr32[start]; + } else { + value2=trie->data.ptr8[start]; + } + if(value!=value2) { + log_err("error: %s(%s).asciiData[U+%04lx]==0x%lx instead of 0x%lx\n", + typeName, testName, (long)start, (long)value2, (long)value); + ++countErrors; + } + ++start; + if(countErrors>10) { + return; + } + } + } + + /* test errorValue */ + if (type == UCPTRIE_TYPE_FAST) { + if(valueWidth==UCPTRIE_VALUE_BITS_16) { + value = UCPTRIE_FAST_GET(trie, UCPTRIE_16, -1); + value2 = UCPTRIE_FAST_GET(trie, UCPTRIE_16, 0x110000); + } else if(valueWidth==UCPTRIE_VALUE_BITS_32) { + value = UCPTRIE_FAST_GET(trie, UCPTRIE_32, -1); + value2 = UCPTRIE_FAST_GET(trie, UCPTRIE_32, 0x110000); + } else { + value = UCPTRIE_FAST_GET(trie, UCPTRIE_8, -1); + value2 = UCPTRIE_FAST_GET(trie, UCPTRIE_8, 0x110000); + } + } else { + if(valueWidth==UCPTRIE_VALUE_BITS_16) { + value = UCPTRIE_SMALL_GET(trie, UCPTRIE_16, -1); + value2 = UCPTRIE_SMALL_GET(trie, UCPTRIE_16, 0x110000); + } else if(valueWidth==UCPTRIE_VALUE_BITS_32) { + value = UCPTRIE_SMALL_GET(trie, UCPTRIE_32, -1); + value2 = UCPTRIE_SMALL_GET(trie, UCPTRIE_32, 0x110000); + } else { + value = UCPTRIE_SMALL_GET(trie, UCPTRIE_8, -1); + value2 = UCPTRIE_SMALL_GET(trie, UCPTRIE_8, 0x110000); + } + } + if(value!=errorValue || value2!=errorValue) { + log_err("error: %s(%s).get(out of range) != errorValue\n", + typeName, testName); + } + value=ucptrie_get(trie, -1); + value2=ucptrie_get(trie, 0x110000); + if(value!=errorValue || value2!=errorValue) { + log_err("error: %s(%s).get(out of range) != errorValue\n", + typeName, testName); + } +} + +static void +testBuilderGetters(const char *testName, const UMutableCPTrie *mutableTrie, + const CheckRange checkRanges[], int32_t countCheckRanges) { + uint32_t initialValue, errorValue; + uint32_t value, value2; + UChar32 start, limit; + int32_t i, countSpecials; + int32_t countErrors=0; + + const char *const typeName = "mutableTrie"; + + countSpecials=getSpecialValues(checkRanges, countCheckRanges, &initialValue, &errorValue); + + start=0; + for(i=countSpecials; i10) { + return; + } + } + } + + /* test errorValue */ + value=umutablecptrie_get(mutableTrie, -1); + value2=umutablecptrie_get(mutableTrie, 0x110000); + if(value!=errorValue || value2!=errorValue) { + log_err("error: %s(%s).get(out of range) != errorValue\n", + typeName, testName); + } +} + +#define ACCIDENTAL_SURROGATE_PAIR(s, length, cp) (length > 0 && U16_IS_LEAD(s[length-1]) && U_IS_TRAIL(cp)) + +static void +testTrieUTF16(const char *testName, + const UCPTrie *trie, UCPTrieValueWidth valueWidth, + const CheckRange checkRanges[], int32_t countCheckRanges) { + UChar s[30000]; + uint32_t values[16000]; + + const UChar *p, *limit; + + uint32_t errorValue = ucptrie_get(trie, -1); + uint32_t value, expected; + UChar32 prevCP, c, c2; + int32_t i, length, sIndex, countValues; + + /* write a string */ + prevCP=0; + length=countValues=0; + for(i=skipSpecialValues(checkRanges, countCheckRanges); iUPRV_LENGTHOF(s)) { + log_err("UTF-16 test string length %d > capacity %d\n", (int)length, (int)UPRV_LENGTHOF(s)); + return; + } + if(countValues>UPRV_LENGTHOF(values)) { + log_err("UTF-16 test values length %d > capacity %d\n", (int)countValues, (int)UPRV_LENGTHOF(values)); + return; + } + + /* try forward */ + p=s; + i=0; + while(pUPRV_LENGTHOF(s)) { + log_err("UTF-8 test string length %d > capacity %d\n", (int)length, (int)UPRV_LENGTHOF(s)); + return; + } + if(countValues>UPRV_LENGTHOF(values)) { + log_err("UTF-8 test values length %d > capacity %d\n", (int)countValues, (int)UPRV_LENGTHOF(values)); + return; + } + + /* try forward */ + p=s; + i=0; + while(pU+%04lx) (read %d bytes): " + "0x%lx instead of 0x%lx (from bytes %lx)\n", + testName, (int)prev8, (unsigned long)actualBytes, (long)c, (int)((p-s)-prev8), + (long)value, (long)values[i], (unsigned long)expectedBytes); + } + if(i8!=(p-s)) { + log_err("error: wrong end index from UCPTRIE_FAST_U8_NEXT(%s)(from %d %lx->U+%04lx): " + "%ld != %ld (bytes %lx)\n", + testName, (int)prev8, (unsigned long)actualBytes, (long)c, + (long)(p-s), (long)i8, (unsigned long)expectedBytes); + break; + } + ++i; + } + + /* try backward */ + p=limit; + i=countValues; + while(sU+%04lx) (read %d bytes): " + "0x%lx instead of 0x%lx (from bytes %lx)\n", + testName, (int)prev8, (unsigned long)actualBytes, (long)c, (int)(prev8-(p-s)), + (long)value, (long)values[i], (unsigned long)expectedBytes); + } + if(i8!=(p-s)) { + log_err("error: wrong end index from UCPTRIE_FAST_U8_PREV(%s)(from %d %lx->U+%04lx): " + "%ld != %ld (bytes %lx)\n", + testName, (int)prev8, (unsigned long)actualBytes, (long)c, + (long)(p-s), (long)i8, (unsigned long)expectedBytes); + break; + } + } +} + +static void +testTrie(const char *testName, const UCPTrie *trie, + UCPTrieType type, UCPTrieValueWidth valueWidth, + const CheckRange checkRanges[], int32_t countCheckRanges) { + testTrieGetters(testName, trie, type, valueWidth, checkRanges, countCheckRanges); + testTrieGetRanges(testName, trie, NULL, UCPTRIE_RANGE_NORMAL, 0, checkRanges, countCheckRanges); + if (type == UCPTRIE_TYPE_FAST) { + testTrieUTF16(testName, trie, valueWidth, checkRanges, countCheckRanges); + testTrieUTF8(testName, trie, valueWidth, checkRanges, countCheckRanges); + } +} + +static void +testBuilder(const char *testName, const UMutableCPTrie *mutableTrie, + const CheckRange checkRanges[], int32_t countCheckRanges) { + testBuilderGetters(testName, mutableTrie, checkRanges, countCheckRanges); + testTrieGetRanges(testName, NULL, mutableTrie, UCPTRIE_RANGE_NORMAL, 0, checkRanges, countCheckRanges); +} + +static uint32_t storage[120000]; +static uint32_t swapped[120000]; + +static void +testTrieSerialize(const char *testName, UMutableCPTrie *mutableTrie, + UCPTrieType type, UCPTrieValueWidth valueWidth, UBool withSwap, + const CheckRange checkRanges[], int32_t countCheckRanges) { + UCPTrie *trie; + int32_t length1, length2, length3; + UErrorCode errorCode; + + /* clone the trie so that the caller can reuse the original */ + errorCode=U_ZERO_ERROR; + mutableTrie = umutablecptrie_clone(mutableTrie, &errorCode); + if(U_FAILURE(errorCode)) { + log_err("error: umutablecptrie_clone(%s) failed - %s\n", + testName, u_errorName(errorCode)); + return; + } + + /* + * This is not a loop, but simply a block that we can exit with "break" + * when something goes wrong. + */ + do { + errorCode=U_ZERO_ERROR; + trie = umutablecptrie_buildImmutable(mutableTrie, type, valueWidth, &errorCode); + if (U_FAILURE(errorCode)) { + log_err("error: umutablecptrie_buildImmutable(%s) failed: %s\n", + testName, u_errorName(errorCode)); + break; + } + errorCode=U_ZERO_ERROR; + length1=ucptrie_toBinary(trie, NULL, 0, &errorCode); + if(errorCode!=U_BUFFER_OVERFLOW_ERROR) { + log_err("error: ucptrie_toBinary(%s) preflighting set %s != U_BUFFER_OVERFLOW_ERROR\n", + testName, u_errorName(errorCode)); + break; + } + errorCode=U_ZERO_ERROR; + length2=ucptrie_toBinary(trie, storage, sizeof(storage), &errorCode); + if(errorCode==U_BUFFER_OVERFLOW_ERROR) { + log_err("error: ucptrie_toBinary(%s) needs more memory\n", testName); + break; + } + if(U_FAILURE(errorCode)) { + log_err("error: ucptrie_toBinary(%s) failed: %s\n", testName, u_errorName(errorCode)); + break; + } + if(length1!=length2) { + log_err("error: trie serialization (%s) lengths different: " + "preflight vs. serialize\n", testName); + break; + } + + testTrie(testName, trie, type, valueWidth, checkRanges, countCheckRanges); + ucptrie_close(trie); + trie=NULL; + + if(withSwap) { + int32_t swappedLength; + + UDataSwapper *ds; + + /* swap to opposite-endian */ + uprv_memset(swapped, 0x55, length2); + ds=udata_openSwapper(U_IS_BIG_ENDIAN, U_CHARSET_FAMILY, + !U_IS_BIG_ENDIAN, U_CHARSET_FAMILY, &errorCode); + swappedLength=ucptrie_swap(ds, storage, -1, NULL, &errorCode); + if(U_FAILURE(errorCode) || swappedLength!=length2) { + log_err("error: ucptrie_swap(%s to OE preflighting) failed (%s) " + "or before/after lengths different\n", + testName, u_errorName(errorCode)); + udata_closeSwapper(ds); + break; + } + swappedLength=ucptrie_swap(ds, storage, length2, swapped, &errorCode); + udata_closeSwapper(ds); + if(U_FAILURE(errorCode) || swappedLength!=length2) { + log_err("error: ucptrie_swap(%s to OE) failed (%s) or before/after lengths different\n", + testName, u_errorName(errorCode)); + break; + } + + /* swap back to platform-endian */ + uprv_memset(storage, 0xaa, length2); + ds=udata_openSwapper(!U_IS_BIG_ENDIAN, U_CHARSET_FAMILY, + U_IS_BIG_ENDIAN, U_CHARSET_FAMILY, &errorCode); + swappedLength=ucptrie_swap(ds, swapped, -1, NULL, &errorCode); + if(U_FAILURE(errorCode) || swappedLength!=length2) { + log_err("error: ucptrie_swap(%s to PE preflighting) failed (%s) " + "or before/after lengths different\n", + testName, u_errorName(errorCode)); + udata_closeSwapper(ds); + break; + } + swappedLength=ucptrie_swap(ds, swapped, length2, storage, &errorCode); + udata_closeSwapper(ds); + if(U_FAILURE(errorCode) || swappedLength!=length2) { + log_err("error: ucptrie_swap(%s to PE) failed (%s) or before/after lengths different\n", + testName, u_errorName(errorCode)); + break; + } + } + + trie = ucptrie_openFromBinary(type, valueWidth, storage, length2, &length3, &errorCode); + if(U_FAILURE(errorCode)) { + log_err("error: ucptrie_openFromBinary(%s) failed, %s\n", testName, u_errorName(errorCode)); + break; + } + if(type != ucptrie_getType(trie)) { + log_err("error: trie serialization (%s) did not preserve trie type\n", testName); + break; + } + if(valueWidth != ucptrie_getValueWidth(trie)) { + log_err("error: trie serialization (%s) did not preserve data value width\n", testName); + break; + } + if(length2!=length3) { + log_err("error: trie serialization (%s) lengths different: " + "serialize vs. unserialize\n", testName); + break; + } + /* overwrite the storage that is not supposed to be needed */ + uprv_memset((char *)storage+length3, 0xfa, (int32_t)(sizeof(storage)-length3)); + + { + errorCode=U_ZERO_ERROR; + UCPTrie *any = ucptrie_openFromBinary(UCPTRIE_TYPE_ANY, UCPTRIE_VALUE_BITS_ANY, + storage, length3, NULL, &errorCode); + if (U_SUCCESS(errorCode)) { + if (type != ucptrie_getType(any)) { + log_err("error: ucptrie_openFromBinary(" + "UCPTRIE_TYPE_ANY, UCPTRIE_VALUE_BITS_ANY).getType() wrong\n"); + } + if (valueWidth != ucptrie_getValueWidth(any)) { + log_err("error: ucptrie_openFromBinary(" + "UCPTRIE_TYPE_ANY, UCPTRIE_VALUE_BITS_ANY).getValueWidth() wrong\n"); + } + ucptrie_close(any); + } else { + log_err("error: ucptrie_openFromBinary(" + "UCPTRIE_TYPE_ANY, UCPTRIE_VALUE_BITS_ANY) failed - %s\n", + u_errorName(errorCode)); + } + } + + errorCode=U_ZERO_ERROR; + testTrie(testName, trie, type, valueWidth, checkRanges, countCheckRanges); + { + /* make a mutable trie from an immutable one */ + uint32_t value, value2; + UMutableCPTrie *mutable2 = umutablecptrie_fromUCPTrie(trie, &errorCode); + if(U_FAILURE(errorCode)) { + log_err("error: umutablecptrie_fromUCPTrie(unserialized %s) failed - %s\n", + testName, u_errorName(errorCode)); + break; + } + + value=umutablecptrie_get(mutable2, 0xa1); + umutablecptrie_set(mutable2, 0xa1, 789, &errorCode); + value2=umutablecptrie_get(mutable2, 0xa1); + umutablecptrie_set(mutable2, 0xa1, value, &errorCode); + if(U_FAILURE(errorCode) || value2!=789) { + log_err("error: modifying a mutableTrie-from-UCPTrie (%s) failed - %s\n", + testName, u_errorName(errorCode)); + } + testBuilder(testName, mutable2, checkRanges, countCheckRanges); + umutablecptrie_close(mutable2); + } + } while(0); + + umutablecptrie_close(mutableTrie); + ucptrie_close(trie); +} + +static UMutableCPTrie * +testTrieSerializeAllValueWidth(const char *testName, + UMutableCPTrie *mutableTrie, UBool withClone, + const CheckRange checkRanges[], int32_t countCheckRanges) { + char name[40]; + uint32_t oredValues = 0; + int32_t i; + for (i = 0; i < countCheckRanges; ++i) { + oredValues |= checkRanges[i].value; + } + + testBuilder(testName, mutableTrie, checkRanges, countCheckRanges); + + if (oredValues <= 0xffff) { + uprv_strcpy(name, testName); + uprv_strcat(name, ".16"); + testTrieSerialize(name, mutableTrie, + UCPTRIE_TYPE_FAST, UCPTRIE_VALUE_BITS_16, withClone, + checkRanges, countCheckRanges); + } + + uprv_strcpy(name, testName); + uprv_strcat(name, ".32"); + testTrieSerialize(name, mutableTrie, + UCPTRIE_TYPE_FAST, UCPTRIE_VALUE_BITS_32, withClone, + checkRanges, countCheckRanges); + + if (oredValues <= 0xff) { + uprv_strcpy(name, testName); + uprv_strcat(name, ".8"); + testTrieSerialize(name, mutableTrie, + UCPTRIE_TYPE_FAST, UCPTRIE_VALUE_BITS_8, withClone, + checkRanges, countCheckRanges); + } + + if (oredValues <= 0xffff) { + uprv_strcpy(name, testName); + uprv_strcat(name, ".small16"); + testTrieSerialize(name, mutableTrie, + UCPTRIE_TYPE_SMALL, UCPTRIE_VALUE_BITS_16, withClone, + checkRanges, countCheckRanges); + } + + return mutableTrie; +} + +static UMutableCPTrie * +makeTrieWithRanges(const char *testName, UBool withClone, + const SetRange setRanges[], int32_t countSetRanges, + const CheckRange checkRanges[], int32_t countCheckRanges) { + UMutableCPTrie *mutableTrie; + uint32_t initialValue, errorValue; + uint32_t value; + UChar32 start, limit; + int32_t i; + UErrorCode errorCode; + + log_verbose("\ntesting Trie '%s'\n", testName); + errorCode=U_ZERO_ERROR; + getSpecialValues(checkRanges, countCheckRanges, &initialValue, &errorValue); + mutableTrie = umutablecptrie_open(initialValue, errorValue, &errorCode); + if(U_FAILURE(errorCode)) { + log_err("error: umutablecptrie_open(%s) failed: %s\n", testName, u_errorName(errorCode)); + return NULL; + } + + /* set values from setRanges[] */ + for(i=0; i>4)/2; ++i) { // 4=UCPTRIE_SHIFT_3 + umutablecptrie_setRange(mutableTrie, 0x740, 0x840-1, 1, &errorCode); + umutablecptrie_setRange(mutableTrie, 0x780, 0x880-1, 1, &errorCode); + umutablecptrie_setRange(mutableTrie, 0x740, 0x840-1, 2, &errorCode); + umutablecptrie_setRange(mutableTrie, 0x780, 0x880-1, 3, &errorCode); + } + /* make blocks that will be free during compaction */ + umutablecptrie_setRange(mutableTrie, 0x1000, 0x3000-1, 2, &errorCode); + umutablecptrie_setRange(mutableTrie, 0x2000, 0x4000-1, 3, &errorCode); + umutablecptrie_setRange(mutableTrie, 0x1000, 0x4000-1, 1, &errorCode); + if(U_FAILURE(errorCode)) { + log_err("error: setting lots of ranges into a mutable trie (%s) failed - %s\n", + testName, u_errorName(errorCode)); + umutablecptrie_close(mutableTrie); + return; + } + + mutableTrie = testTrieSerializeAllValueWidth(testName, mutableTrie, FALSE, + checkRanges, UPRV_LENGTHOF(checkRanges)); + umutablecptrie_close(mutableTrie); +} + +static void +GrowDataArrayTest(void) { + static const CheckRange + checkRanges[]={ + { 0, 1 }, + { 0x720, 2 }, + { 0x7a0, 3 }, + { 0x8a0, 4 }, + { 0x110000, 5 } + }; + static const char *const testName="grow-data"; + + UMutableCPTrie *mutableTrie; + int32_t i; + UErrorCode errorCode; + + errorCode=U_ZERO_ERROR; + mutableTrie=umutablecptrie_open(1, 0xad, &errorCode); + if(U_FAILURE(errorCode)) { + log_err("error: umutablecptrie_open(%s) failed: %s\n", testName, u_errorName(errorCode)); + return; + } + + /* + * Use umutablecptrie_set() not umutablecptrie_setRange() to write non-initialValue-data. + * Should grow/reallocate the data array to a sufficient length. + */ + for(i=0; i<0x1000; ++i) { + umutablecptrie_set(mutableTrie, i, 2, &errorCode); + } + for(i=0x720; i<0x1100; ++i) { /* some overlap */ + umutablecptrie_set(mutableTrie, i, 3, &errorCode); + } + for(i=0x7a0; i<0x900; ++i) { + umutablecptrie_set(mutableTrie, i, 4, &errorCode); + } + for(i=0x8a0; i<0x110000; ++i) { + umutablecptrie_set(mutableTrie, i, 5, &errorCode); + } + if(U_FAILURE(errorCode)) { + log_err("error: setting lots of values into a mutable trie (%s) failed - %s\n", + testName, u_errorName(errorCode)); + umutablecptrie_close(mutableTrie); + return; + } + + mutableTrie = testTrieSerializeAllValueWidth(testName, mutableTrie, FALSE, + checkRanges, UPRV_LENGTHOF(checkRanges)); + umutablecptrie_close(mutableTrie); +} + +static void +ManyAllSameBlocksTest(void) { + static const char *const testName="many-all-same"; + + UMutableCPTrie *mutableTrie; + int32_t i; + UErrorCode errorCode; + CheckRange checkRanges[(0x110000 >> 12) + 1]; + + errorCode = U_ZERO_ERROR; + mutableTrie = umutablecptrie_open(0xff33, 0xad, &errorCode); + if (U_FAILURE(errorCode)) { + log_err("error: umutablecptrie_open(%s) failed: %s\n", testName, u_errorName(errorCode)); + return; + } + checkRanges[0].limit = 0; + checkRanges[0].value = 0xff33; // initialValue + + // Many all-same-value blocks. + for (i = 0; i < 0x110000; i += 0x1000) { + uint32_t value = i >> 12; + umutablecptrie_setRange(mutableTrie, i, i + 0xfff, value, &errorCode); + checkRanges[value + 1].limit = i + 0x1000; + checkRanges[value + 1].value = value; + } + for (i = 0; i < 0x110000; i += 0x1000) { + uint32_t expected = i >> 12; + uint32_t v0 = umutablecptrie_get(mutableTrie, i); + uint32_t vfff = umutablecptrie_get(mutableTrie, i + 0xfff); + if (v0 != expected || vfff != expected) { + log_err("error: UMutableCPTrie U+%04lx unexpected value\n", (long)i); + } + } + + mutableTrie = testTrieSerializeAllValueWidth(testName, mutableTrie, FALSE, + checkRanges, UPRV_LENGTHOF(checkRanges)); + umutablecptrie_close(mutableTrie); +} + +static void +MuchDataTest(void) { + static const char *const testName="much-data"; + + UMutableCPTrie *mutableTrie; + int32_t r, c; + UErrorCode errorCode = U_ZERO_ERROR; + CheckRange checkRanges[(0x10000 >> 6) + (0x10240 >> 4) + 10]; + + mutableTrie = umutablecptrie_open(0xff33, 0xad, &errorCode); + if (U_FAILURE(errorCode)) { + log_err("error: umutablecptrie_open(%s) failed: %s\n", testName, u_errorName(errorCode)); + return; + } + checkRanges[0].limit = 0; + checkRanges[0].value = 0xff33; // initialValue + r = 1; + + // Add much data that does not compact well, + // to get more than 128k data values after compaction. + for (c = 0; c < 0x10000; c += 0x40) { + uint32_t value = c >> 4; + umutablecptrie_setRange(mutableTrie, c, c + 0x3f, value, &errorCode); + checkRanges[r].limit = c + 0x40; + checkRanges[r++].value = value; + } + checkRanges[r].limit = 0x20000; + checkRanges[r++].value = 0xff33; + for (c = 0x20000; c < 0x30230; c += 0x10) { + uint32_t value = c >> 4; + umutablecptrie_setRange(mutableTrie, c, c + 0xf, value, &errorCode); + checkRanges[r].limit = c + 0x10; + checkRanges[r++].value = value; + } + umutablecptrie_setRange(mutableTrie, 0x30230, 0x30233, 0x3023, &errorCode); + checkRanges[r].limit = 0x30234; + checkRanges[r++].value = 0x3023; + umutablecptrie_setRange(mutableTrie, 0x30234, 0xdffff, 0x5005, &errorCode); + checkRanges[r].limit = 0xe0000; + checkRanges[r++].value = 0x5005; + umutablecptrie_setRange(mutableTrie, 0xe0000, 0x10ffff, 0x9009, &errorCode); + checkRanges[r].limit = 0x110000; + checkRanges[r++].value = 0x9009; + if (U_FAILURE(errorCode)) { + log_err("error: setting lots of values into a mutable trie (%s) failed - %s\n", + testName, u_errorName(errorCode)); + umutablecptrie_close(mutableTrie); + return; + } + U_ASSERT(r <= UPRV_LENGTHOF(checkRanges)); + + testBuilder(testName, mutableTrie, checkRanges, r); + testTrieSerialize("much-data.16", mutableTrie, + UCPTRIE_TYPE_FAST, UCPTRIE_VALUE_BITS_16, FALSE, checkRanges, r); + umutablecptrie_close(mutableTrie); +} + +static void testGetRangesFixedSurr(const char *testName, const UMutableCPTrie *mutableTrie, + UCPTrieRangeOption option, + const CheckRange checkRanges[], int32_t countCheckRanges) { + testTrieGetRanges(testName, NULL, mutableTrie, option, 5, checkRanges, countCheckRanges); + UErrorCode errorCode = U_ZERO_ERROR; + UMutableCPTrie *clone = umutablecptrie_clone(mutableTrie, &errorCode); + UCPTrie *trie; + if (U_FAILURE(errorCode)) { + log_err("error: umutablecptrie_clone(%s) failed: %s\n", testName, u_errorName(errorCode)); + return; + } + trie = umutablecptrie_buildImmutable(clone, UCPTRIE_TYPE_FAST, UCPTRIE_VALUE_BITS_16, &errorCode); + umutablecptrie_close(clone); + if (U_FAILURE(errorCode)) { + log_err("error: umutablecptrie_buildImmutable(%s) failed: %s\n", testName, u_errorName(errorCode)); + return; + } + testTrieGetRanges(testName, trie, NULL, option, 5, checkRanges, countCheckRanges); + ucptrie_close(trie); +} + +static void +TrieTestGetRangesFixedSurr(void) { + static const SetRange + setRangesFixedSurr[]={ + { 0xd000, 0xd7ff, 5 }, + { 0xd7ff, 0xe001, 3 }, + { 0xe001, 0xf900, 5 }, + }; + + static const CheckRange + checkRangesFixedLeadSurr1[]={ + { 0, 0 }, + { 0xd000, 0 }, + { 0xd7ff, 5 }, + { 0xd800, 3 }, + { 0xdc00, 5 }, + { 0xe001, 3 }, + { 0xf900, 5 }, + { 0x110000, 0 } + }; + + static const CheckRange + checkRangesFixedAllSurr1[]={ + { 0, 0 }, + { 0xd000, 0 }, + { 0xd7ff, 5 }, + { 0xd800, 3 }, + { 0xe000, 5 }, + { 0xe001, 3 }, + { 0xf900, 5 }, + { 0x110000, 0 } + }; + + static const CheckRange + checkRangesFixedLeadSurr3[]={ + { 0, 0 }, + { 0xd000, 0 }, + { 0xdc00, 5 }, + { 0xe001, 3 }, + { 0xf900, 5 }, + { 0x110000, 0 } + }; + + static const CheckRange + checkRangesFixedAllSurr3[]={ + { 0, 0 }, + { 0xd000, 0 }, + { 0xe000, 5 }, + { 0xe001, 3 }, + { 0xf900, 5 }, + { 0x110000, 0 } + }; + + static const CheckRange + checkRangesFixedSurr4[]={ + { 0, 0 }, + { 0xd000, 0 }, + { 0xf900, 5 }, + { 0x110000, 0 } + }; + + UMutableCPTrie *mutableTrie = makeTrieWithRanges( + "fixedSurr", FALSE, setRangesFixedSurr, UPRV_LENGTHOF(setRangesFixedSurr), + checkRangesFixedLeadSurr1, UPRV_LENGTHOF(checkRangesFixedLeadSurr1)); + UErrorCode errorCode = U_ZERO_ERROR; + if (mutableTrie == NULL) { + return; + } + testGetRangesFixedSurr("fixedLeadSurr1", mutableTrie, UCPTRIE_RANGE_FIXED_LEAD_SURROGATES, + checkRangesFixedLeadSurr1, UPRV_LENGTHOF(checkRangesFixedLeadSurr1)); + testGetRangesFixedSurr("fixedAllSurr1", mutableTrie, UCPTRIE_RANGE_FIXED_ALL_SURROGATES, + checkRangesFixedAllSurr1, UPRV_LENGTHOF(checkRangesFixedAllSurr1)); + // Setting a range in the middle of lead surrogates makes no difference. + umutablecptrie_setRange(mutableTrie, 0xd844, 0xd899, 5, &errorCode); + if (U_FAILURE(errorCode)) { + log_err("error: umutablecptrie_setRange(fixedSurr2) failed: %s\n", u_errorName(errorCode)); + umutablecptrie_close(mutableTrie); + return; + } + testGetRangesFixedSurr("fixedLeadSurr2", mutableTrie, UCPTRIE_RANGE_FIXED_LEAD_SURROGATES, + checkRangesFixedLeadSurr1, UPRV_LENGTHOF(checkRangesFixedLeadSurr1)); + // Bridge the gap before the lead surrogates. + umutablecptrie_set(mutableTrie, 0xd7ff, 5, &errorCode); + if (U_FAILURE(errorCode)) { + log_err("error: umutablecptrie_set(fixedSurr3) failed: %s\n", u_errorName(errorCode)); + umutablecptrie_close(mutableTrie); + return; + } + testGetRangesFixedSurr("fixedLeadSurr3", mutableTrie, UCPTRIE_RANGE_FIXED_LEAD_SURROGATES, + checkRangesFixedLeadSurr3, UPRV_LENGTHOF(checkRangesFixedLeadSurr3)); + testGetRangesFixedSurr("fixedAllSurr3", mutableTrie, UCPTRIE_RANGE_FIXED_ALL_SURROGATES, + checkRangesFixedAllSurr3, UPRV_LENGTHOF(checkRangesFixedAllSurr3)); + // Bridge the gap after the trail surrogates. + umutablecptrie_set(mutableTrie, 0xe000, 5, &errorCode); + if (U_FAILURE(errorCode)) { + log_err("error: umutablecptrie_set(fixedSurr4) failed: %s\n", u_errorName(errorCode)); + umutablecptrie_close(mutableTrie); + return; + } + testGetRangesFixedSurr("fixedSurr4", mutableTrie, UCPTRIE_RANGE_FIXED_ALL_SURROGATES, + checkRangesFixedSurr4, UPRV_LENGTHOF(checkRangesFixedSurr4)); + umutablecptrie_close(mutableTrie); +} + +void +addUCPTrieTest(TestNode** root) { + addTest(root, &TrieTestSet1, "tsutil/ucptrietest/TrieTestSet1"); + addTest(root, &TrieTestSet2Overlap, "tsutil/ucptrietest/TrieTestSet2Overlap"); + addTest(root, &TrieTestSet3Initial9, "tsutil/ucptrietest/TrieTestSet3Initial9"); + addTest(root, &TrieTestSetEmpty, "tsutil/ucptrietest/TrieTestSetEmpty"); + addTest(root, &TrieTestSetSingleValue, "tsutil/ucptrietest/TrieTestSetSingleValue"); + addTest(root, &TrieTestSet2OverlapWithClone, "tsutil/ucptrietest/TrieTestSet2OverlapWithClone"); + addTest(root, &FreeBlocksTest, "tsutil/ucptrietest/FreeBlocksTest"); + addTest(root, &GrowDataArrayTest, "tsutil/ucptrietest/GrowDataArrayTest"); + addTest(root, &ManyAllSameBlocksTest, "tsutil/ucptrietest/ManyAllSameBlocksTest"); + addTest(root, &MuchDataTest, "tsutil/ucptrietest/MuchDataTest"); + addTest(root, &TrieTestGetRangesFixedSurr, "tsutil/ucptrietest/TrieTestGetRangesFixedSurr"); +} diff --git a/icu4c/source/test/intltest/tstnorm.cpp b/icu4c/source/test/intltest/tstnorm.cpp index 45e9db8cb1d..741211b170f 100644 --- a/icu4c/source/test/intltest/tstnorm.cpp +++ b/icu4c/source/test/intltest/tstnorm.cpp @@ -633,6 +633,29 @@ BasicNormalizerTest::TestPreviousNext(const UChar *src, int32_t srcLength, const char *moves, UNormalizationMode mode, const char *name) { + // Sanity check non-iterative normalization. + { + IcuTestErrorCode errorCode(*this, "TestPreviousNext"); + UnicodeString result; + Normalizer::normalize(UnicodeString(src, srcLength), mode, 0, result, errorCode); + if (errorCode.isFailure()) { + dataerrln("error: non-iterative normalization of %s failed: %s", + name, errorCode.errorName()); + errorCode.reset(); + return; + } + // UnicodeString::fromUTF32(expect, expectLength) + // would turn unpaired surrogates into U+FFFD. + for (int32_t i = 0, j = 0; i < result.length(); ++j) { + UChar32 c = result.char32At(i); + if (c != expect[j]) { + errln("error: non-iterative normalization of %s did not yield the expected result", + name); + } + i += U16_LENGTH(c); + } + } + // iterators Normalizer iter(src, srcLength, mode); @@ -1432,9 +1455,14 @@ struct StringPair { const char *input, *expected; }; void BasicNormalizerTest::TestCustomComp() { static const StringPair pairs[]={ - { "\\uD801\\uE000\\uDFFE", "" }, - { "\\uD800\\uD801\\uE000\\uDFFE\\uDFFF", "\\uD7FF\\uFFFF" }, - { "\\uD800\\uD801\\uDFFE\\uDFFF", "\\uD7FF\\U000107FE\\uFFFF" }, + // ICU 63 normalization with UCPTrie requires inert surrogate code points. + // { "\\uD801\\uE000\\uDFFE", "" }, + // { "\\uD800\\uD801\\uE000\\uDFFE\\uDFFF", "\\uD7FF\\uFFFF" }, + // { "\\uD800\\uD801\\uDFFE\\uDFFF", "\\uD7FF\\U000107FE\\uFFFF" }, + { "\\uD801\\uE000\\uDFFE", "\\uD801\\uDFFE" }, + { "\\uD800\\uD801\\uE000\\uDFFE\\uDFFF", "\\uD800\\uD801\\uDFFE\\uDFFF" }, + { "\\uD800\\uD801\\uDFFE\\uDFFF", "\\uD800\\U000107FE\\uDFFF" }, + { "\\uE001\\U000110B9\\u0345\\u0308\\u0327", "\\uE002\\U000110B9\\u0327\\u0345" }, { "\\uE010\\U000F0011\\uE012", "\\uE011\\uE012" }, { "\\uE010\\U000F0011\\U000F0011\\uE012", "\\uE011\\U000F0010" }, @@ -1462,9 +1490,14 @@ BasicNormalizerTest::TestCustomComp() { void BasicNormalizerTest::TestCustomFCC() { static const StringPair pairs[]={ - { "\\uD801\\uE000\\uDFFE", "" }, - { "\\uD800\\uD801\\uE000\\uDFFE\\uDFFF", "\\uD7FF\\uFFFF" }, - { "\\uD800\\uD801\\uDFFE\\uDFFF", "\\uD7FF\\U000107FE\\uFFFF" }, + // ICU 63 normalization with UCPTrie requires inert surrogate code points. + // { "\\uD801\\uE000\\uDFFE", "" }, + // { "\\uD800\\uD801\\uE000\\uDFFE\\uDFFF", "\\uD7FF\\uFFFF" }, + // { "\\uD800\\uD801\\uDFFE\\uDFFF", "\\uD7FF\\U000107FE\\uFFFF" }, + { "\\uD801\\uE000\\uDFFE", "\\uD801\\uDFFE" }, + { "\\uD800\\uD801\\uE000\\uDFFE\\uDFFF", "\\uD800\\uD801\\uDFFE\\uDFFF" }, + { "\\uD800\\uD801\\uDFFE\\uDFFF", "\\uD800\\U000107FE\\uDFFF" }, + // The following expected result is different from CustomComp // because of only-contiguous composition. { "\\uE001\\U000110B9\\u0345\\u0308\\u0327", "\\uE001\\U000110B9\\u0327\\u0308\\u0345" }, diff --git a/icu4c/source/test/perf/normperf/Makefile.in b/icu4c/source/test/perf/normperf/Makefile.in index 1655c78cd66..bc63e822686 100644 --- a/icu4c/source/test/perf/normperf/Makefile.in +++ b/icu4c/source/test/perf/normperf/Makefile.in @@ -17,17 +17,20 @@ include $(top_builddir)/icudefs.mk subdir = test/perf/normperf ## Extra files to remove for 'make clean' -CLEANFILES = *~ $(DEPS) +CLEANFILES = *~ $(DEPS) $(SIMPLE_DEPS) ## Target information TARGET = normperf +SIMPLE = simplenormperf CPPFLAGS += -I$(top_srcdir)/common -I$(top_srcdir)/tools/toolutil -I$(top_srcdir)/tools/ctestfw LIBS = $(LIBCTESTFW) $(LIBICUI18N) $(LIBICUUC) $(LIBICUTOOLUTIL) $(DEFAULT_LIBS) $(LIB_M) OBJECTS = normperf.o +SIMPLE_OBJ = simplenormperf.o DEPS = $(OBJECTS:.o=.d) +SIMPLE_DEPS = $(SIMPLE_OBJ:.o=.d) ## List of phony targets .PHONY : all all-local install install-local clean clean-local \ @@ -44,7 +47,7 @@ distclean : distclean-local dist: dist-local check: all check-local -all-local: $(TARGET) +all-local: $(TARGET) $(SIMPLE) install-local: @@ -52,7 +55,7 @@ dist-local: clean-local: test -z "$(CLEANFILES)" || $(RMV) $(CLEANFILES) - $(RMV) $(OBJECTS) $(TARGET) + $(RMV) $(OBJECTS) $(SIMPLE_OBJ) $(TARGET) $(SIMPLE) distclean-local: clean-local $(RMV) Makefile @@ -67,16 +70,21 @@ $(TARGET) : $(OBJECTS) $(LINK.cc) -o $@ $^ $(LIBS) $(POST_BUILD_STEP) +$(SIMPLE) : $(SIMPLE_OBJ) + $(LINK.cc) -o $@ $^ $(LIBS) + $(POST_BUILD_STEP) + invoke: ICU_DATA=$${ICU_DATA:-$(top_builddir)/data/} TZ=PST8PDT $(INVOKE) $(INVOCATION) ifeq (,$(MAKECMDGOALS)) -include $(DEPS) +-include $(SIMPLE_DEPS) else ifneq ($(patsubst %clean,,$(MAKECMDGOALS)),) ifneq ($(patsubst %install,,$(MAKECMDGOALS)),) -include $(DEPS) +-include $(SIMPLE_DEPS) endif endif endif - diff --git a/icu4c/source/test/perf/normperf/simplenormperf.cpp b/icu4c/source/test/perf/normperf/simplenormperf.cpp new file mode 100644 index 00000000000..b6865ba1eca --- /dev/null +++ b/icu4c/source/test/perf/normperf/simplenormperf.cpp @@ -0,0 +1,352 @@ +// © 2018 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +// simplenormperf.cpp +// created: 2018mar15 Markus W. Scherer + +#include +#include + +#include "unicode/utypes.h" +#include "unicode/bytestream.h" +#include "unicode/normalizer2.h" +#include "unicode/stringpiece.h" +#include "unicode/unistr.h" +#include "unicode/utf8.h" +#include "unicode/utimer.h" +#include "cmemory.h" + +using icu::Normalizer2; +using icu::UnicodeString; + +namespace { + +// Strings with commonly occurring BMP characters. +class CommonChars { +public: + static UnicodeString getMixed(int32_t minLength) { + return extend(UnicodeString(latin1).append(japanese).append(arabic), minLength); + } + static UnicodeString getLatin1(int32_t minLength) { return extend(latin1, minLength); } + static UnicodeString getLowercaseLatin1(int32_t minLength) { return extend(lowercaseLatin1, minLength); } + static UnicodeString getASCII(int32_t minLength) { return extend(ascii, minLength); } + static UnicodeString getJapanese(int32_t minLength) { return extend(japanese, minLength); } + + // Returns an array of UTF-8 offsets, one per code point. + // Assumes all BMP characters. + static int32_t *toUTF8WithOffsets(const UnicodeString &s16, std::string &s8, int32_t &numCodePoints) { + s8.clear(); + s8.reserve(s16.length()); + s16.toUTF8String(s8); + const char *s = s8.data(); + int32_t length = s8.length(); + int32_t *offsets = new int32_t[length + 1]; + int32_t numCP = 0; + for (int32_t i = 0; i < length;) { + offsets[numCP++] = i; + U8_FWD_1(s, i, length); + } + offsets[numCP] = length; + numCodePoints = numCP; + return offsets; + } + +private: + static UnicodeString extend(const UnicodeString &s, int32_t minLength) { + UnicodeString result(s); + while (result.length() < minLength) { + UnicodeString twice = result + result; + result = std::move(twice); + } + return result; + } + + static const UChar *const latin1; + static const UChar *const lowercaseLatin1; + static const UChar *const ascii; + static const UChar *const japanese; + static const UChar *const arabic; +}; + +const UChar *const CommonChars::latin1 = + // Goethe’s Bergschloß in normal sentence case. + u"Da droben auf jenem Berge, da steht ein altes Schloß, " + u"wo hinter Toren und Türen sonst lauerten Ritter und Roß.\n" + u"Verbrannt sind Türen und Tore, und überall ist es so still; " + u"das alte verfallne Gemäuer durchklettr ich, wie ich nur will.\n" + u"Hierneben lag ein Keller, so voll von köstlichem Wein; " + u"nun steiget nicht mehr mit Krügen die Kellnerin heiter hinein.\n" + u"Sie setzt den Gästen im Saale nicht mehr die Becher umher, " + u"sie füllt zum Heiligen Mahle dem Pfaffen das Fläschchen nicht mehr.\n" + u"Sie reicht dem lüsternen Knappen nicht mehr auf dem Gange den Trank, " + u"und nimmt für flüchtige Gabe nicht mehr den flüchtigen Dank.\n" + u"Denn alle Balken und Decken, sie sind schon lange verbrannt, " + u"und Trepp und Gang und Kapelle in Schutt und Trümmer verwandt.\n" + u"Doch als mit Zither und Flasche nach diesen felsigen Höhn " + u"ich an dem heitersten Tage mein Liebchen steigen gesehn,\n" + u"da drängte sich frohes Behagen hervor aus verödeter Ruh, " + u"da gings wie in alten Tagen recht feierlich wieder zu.\n" + u"Als wären für stattliche Gäste die weitesten Räume bereit, " + u"als käm ein Pärchen gegangen aus jener tüchtigen Zeit.\n" + u"Als stünd in seiner Kapelle der würdige Pfaffe schon da " + u"und fragte: Wollt ihr einander? Wir aber lächelten: Ja!\n" + u"Und tief bewegten Gesänge des Herzens innigsten Grund, " + u"Es zeugte, statt der Menge, der Echo schallender Mund.\n" + u"Und als sich gegen Abend im stillen alles verlor," + u"da blickte die glühende Sonne zum schroffen Gipfel empor.\n" + u"Und Knapp und Kellnerin glänzen als Herren weit und breit; " + u"sie nimmt sich zum Kredenzen und er zum Danke sich Zeit.\n"; + +const UChar *const CommonChars::lowercaseLatin1 = + // Goethe’s Bergschloß in all lowercase + u"da droben auf jenem berge, da steht ein altes schloß, " + u"wo hinter toren und türen sonst lauerten ritter und roß.\n" + u"verbrannt sind türen und tore, und überall ist es so still; " + u"das alte verfallne gemäuer durchklettr ich, wie ich nur will.\n" + u"hierneben lag ein keller, so voll von köstlichem wein; " + u"nun steiget nicht mehr mit krügen die kellnerin heiter hinein.\n" + u"sie setzt den gästen im saale nicht mehr die becher umher, " + u"sie füllt zum heiligen mahle dem pfaffen das fläschchen nicht mehr.\n" + u"sie reicht dem lüsternen knappen nicht mehr auf dem gange den trank, " + u"und nimmt für flüchtige gabe nicht mehr den flüchtigen dank.\n" + u"denn alle balken und decken, sie sind schon lange verbrannt, " + u"und trepp und gang und kapelle in schutt und trümmer verwandt.\n" + u"doch als mit zither und flasche nach diesen felsigen höhn " + u"ich an dem heitersten tage mein liebchen steigen gesehn,\n" + u"da drängte sich frohes behagen hervor aus verödeter ruh, " + u"da gings wie in alten tagen recht feierlich wieder zu.\n" + u"als wären für stattliche gäste die weitesten räume bereit, " + u"als käm ein pärchen gegangen aus jener tüchtigen zeit.\n" + u"als stünd in seiner kapelle der würdige pfaffe schon da " + u"und fragte: wollt ihr einander? wir aber lächelten: ja!\n" + u"und tief bewegten gesänge des herzens innigsten grund, " + u"es zeugte, statt der menge, der echo schallender mund.\n" + u"und als sich gegen abend im stillen alles verlor," + u"da blickte die glühende sonne zum schroffen gipfel empor.\n" + u"und knapp und kellnerin glänzen als herren weit und breit; " + u"sie nimmt sich zum kredenzen und er zum danke sich zeit.\n"; + +const UChar *const CommonChars::ascii = + // Goethe’s Bergschloß in normal sentence case but ASCII-fied + u"Da droben auf jenem Berge, da steht ein altes Schloss, " + u"wo hinter Toren und Tueren sonst lauerten Ritter und Ross.\n" + u"Verbrannt sind Tueren und Tore, und ueberall ist es so still; " + u"das alte verfallne Gemaeuer durchklettr ich, wie ich nur will.\n" + u"Hierneben lag ein Keller, so voll von koestlichem Wein; " + u"nun steiget nicht mehr mit Kruegen die Kellnerin heiter hinein.\n" + u"Sie setzt den Gaesten im Saale nicht mehr die Becher umher, " + u"sie fuellt zum Heiligen Mahle dem Pfaffen das Flaeschchen nicht mehr.\n" + u"Sie reicht dem luesternen Knappen nicht mehr auf dem Gange den Trank, " + u"und nimmt fuer fluechtige Gabe nicht mehr den fluechtigen Dank.\n" + u"Denn alle Balken und Decken, sie sind schon lange verbrannt, " + u"und Trepp und Gang und Kapelle in Schutt und Truemmer verwandt.\n" + u"Doch als mit Zither und Flasche nach diesen felsigen Hoehn " + u"ich an dem heitersten Tage mein Liebchen steigen gesehn,\n" + u"da draengte sich frohes Behagen hervor aus veroedeter Ruh, " + u"da gings wie in alten Tagen recht feierlich wieder zu.\n" + u"Als waeren fuer stattliche Gaeste die weitesten Raeume bereit, " + u"als kaem ein Paerchen gegangen aus jener tuechtigen Zeit.\n" + u"Als stuend in seiner Kapelle der wuerdige Pfaffe schon da " + u"und fragte: Wollt ihr einander? Wir aber laechelten: Ja!\n" + u"Und tief bewegten Gesaenge des Herzens innigsten Grund, " + u"Es zeugte, statt der Menge, der Echo schallender Mund.\n" + u"Und als sich gegen Abend im stillen alles verlor," + u"da blickte die gluehende Sonne zum schroffen Gipfel empor.\n" + u"Und Knapp und Kellnerin glaenzen als Herren weit und breit; " + u"sie nimmt sich zum Kredenzen und er zum Danke sich Zeit.\n"; + +const UChar *const CommonChars::japanese = + // Ame ni mo makezu = Be not Defeated by the Rain, by Kenji Miyazawa. + u"雨にもまけず風にもまけず雪にも夏の暑さにもまけぬ" + u"丈夫なからだをもち慾はなく決して瞋らず" + u"いつもしずかにわらっている一日に玄米四合と" + u"味噌と少しの野菜をたべあらゆることを" + u"じぶんをかんじょうにいれずによくみききしわかり" + u"そしてわすれず野原の松の林の蔭の" + u"小さな萱ぶきの小屋にいて東に病気のこどもあれば" + u"行って看病してやり西につかれた母あれば" + u"行ってその稲の束を負い南に死にそうな人あれば" + u"行ってこわがらなくてもいいといい" + u"北にけんかやそしょうがあれば" + u"つまらないからやめろといいひでりのときはなみだをながし" + u"さむさのなつはおろおろあるきみんなにでくのぼうとよばれ" + u"ほめられもせずくにもされずそういうものにわたしはなりたい"; + +const UChar *const CommonChars::arabic = + // Some Arabic for variety. "What is Unicode?" + // http://www.unicode.org/standard/translations/arabic.html + u"تتعامل الحواسيب بالأسام مع الأرقام فقط، " + u"و تخزن الحروف و المحارف " + u"الأخرى بتخصيص رقم لكل واحد " + u"منها. قبل اختراع يونيكود كان هناك "; + +// TODO: class BenchmarkPerCodePoint? + +class Operation { +public: + Operation() {} + virtual ~Operation(); + virtual double call(int32_t iterations, int32_t pieceLength) = 0; + +protected: + UTimer startTime; +}; + +Operation::~Operation() {} + +const int32_t kLengths[] = { 5, 12, 30, 100, 1000, 10000 }; + +int32_t getMaxLength() { return kLengths[UPRV_LENGTHOF(kLengths) - 1]; } + +// Returns seconds per code point. +double measure(Operation &op, int32_t pieceLength) { + // Increase the number of iterations until we use at least one second. + int32_t iterations = 1; + for (;;) { + double seconds = op.call(iterations, pieceLength); + if (seconds >= 1) { + if (iterations > 1) { + return seconds / (iterations * pieceLength); + } else { + // Run it once more, to avoid measuring only the warm-up. + return op.call(1, pieceLength) / (iterations * pieceLength); + } + } + if (seconds < 0.01) { + iterations *= 10; + } else if (seconds < 0.55) { + iterations *= 1.1 / seconds; + } else { + iterations *= 2; + } + } +} + +void benchmark(const char *name, Operation &op) { + for (int32_t i = 0; i < UPRV_LENGTHOF(kLengths); ++i) { + int32_t pieceLength = kLengths[i]; + double secPerCp = measure(op, pieceLength); + printf("%s %6d %12f ns/cp\n", name, (int)pieceLength, secPerCp * 1000000000); + } + puts(""); +} + +class NormalizeUTF16 : public Operation { +public: + NormalizeUTF16(const Normalizer2 &n2, const UnicodeString &text) : + norm2(n2), src(text), s(src.getBuffer()) {} + virtual ~NormalizeUTF16(); + virtual double call(int32_t iterations, int32_t pieceLength); + +private: + const Normalizer2 &norm2; + UnicodeString src; + const UChar *s; + UnicodeString dest; +}; + +NormalizeUTF16::~NormalizeUTF16() {} + +// Assumes all BMP characters. +double NormalizeUTF16::call(int32_t iterations, int32_t pieceLength) { + int32_t start = 0; + int32_t limit = src.length() - pieceLength; + UnicodeString piece; + UErrorCode errorCode = U_ZERO_ERROR; + utimer_getTime(&startTime); + for (int32_t i = 0; i < iterations; ++i) { + piece.setTo(FALSE, s + start, pieceLength); + norm2.normalize(piece, dest, errorCode); + start = (start + pieceLength) % limit; + } + return utimer_getElapsedSeconds(&startTime); +} + +class NormalizeUTF8 : public Operation { +public: + NormalizeUTF8(const Normalizer2 &n2, const UnicodeString &text) : norm2(n2), sink(&dest) { + offsets = CommonChars::toUTF8WithOffsets(text, src, numCodePoints); + s = src.data(); + } + virtual ~NormalizeUTF8(); + virtual double call(int32_t iterations, int32_t pieceLength); + +private: + const Normalizer2 &norm2; + std::string src; + const char *s; + int32_t *offsets; + int32_t numCodePoints; + std::string dest; + icu::StringByteSink sink; +}; + +NormalizeUTF8::~NormalizeUTF8() { + delete[] offsets; +} + +double NormalizeUTF8::call(int32_t iterations, int32_t pieceLength) { + int32_t start = 0; + int32_t limit = numCodePoints - pieceLength; + UErrorCode errorCode = U_ZERO_ERROR; + utimer_getTime(&startTime); + for (int32_t i = 0; i < iterations; ++i) { + int32_t start8 = offsets[start]; + int32_t limit8 = offsets[start + pieceLength]; + icu::StringPiece piece(s + start8, limit8 - start8); + norm2.normalizeUTF8(0, piece, sink, nullptr, errorCode); + start = (start + pieceLength) % limit; + } + return utimer_getElapsedSeconds(&startTime); +} + +} // namespace + +extern int main(int /*argc*/, const char * /*argv*/[]) { + // More than the longest piece length so that we read from different parts of the string + // for that piece length. + int32_t maxLength = getMaxLength() * 10; + UErrorCode errorCode = U_ZERO_ERROR; + const Normalizer2 *nfc = Normalizer2::getNFCInstance(errorCode); + const Normalizer2 *nfkc_cf = Normalizer2::getNFKCCasefoldInstance(errorCode); + if (U_FAILURE(errorCode)) { + fprintf(stderr, + "simplenormperf: failed to get Normalizer2 instances - %s\n", + u_errorName(errorCode)); + } + { + // Base line: Should remain in the fast loop without trie lookups. + NormalizeUTF16 op(*nfc, CommonChars::getLatin1(maxLength)); + benchmark("NFC/UTF-16/latin1", op); + } + { + // Base line 2: Read UTF-8, trie lookups, but should have nothing to do. + NormalizeUTF8 op(*nfc, CommonChars::getJapanese(maxLength)); + benchmark("NFC/UTF-8/japanese", op); + } + { + NormalizeUTF16 op(*nfkc_cf, CommonChars::getMixed(maxLength)); + benchmark("NFKC_CF/UTF-16/mixed", op); + } + { + NormalizeUTF16 op(*nfkc_cf, CommonChars::getLowercaseLatin1(maxLength)); + benchmark("NFKC_CF/UTF-16/lowercaseLatin1", op); + } + { + NormalizeUTF16 op(*nfkc_cf, CommonChars::getJapanese(maxLength)); + benchmark("NFKC_CF/UTF-16/japanese", op); + } + { + NormalizeUTF8 op(*nfkc_cf, CommonChars::getMixed(maxLength)); + benchmark("NFKC_CF/UTF-8/mixed", op); + } + { + NormalizeUTF8 op(*nfkc_cf, CommonChars::getLowercaseLatin1(maxLength)); + benchmark("NFKC_CF/UTF-8/lowercaseLatin1", op); + } + { + NormalizeUTF8 op(*nfkc_cf, CommonChars::getJapanese(maxLength)); + benchmark("NFKC_CF/UTF-8/japanese", op); + } + return 0; +} diff --git a/icu4c/source/test/testdata/testnorm.txt b/icu4c/source/test/testdata/testnorm.txt index 0e2ae119f78..55994d4f29b 100644 --- a/icu4c/source/test/testdata/testnorm.txt +++ b/icu4c/source/test/testdata/testnorm.txt @@ -44,9 +44,10 @@ 0360..0361:234 0362:233 0363..036F:230 -D802:2 # surrogates with non-zero combining classes -D803:3 -D804:4 +# ICU 63 normalization with UCPTrie requires inert surrogate code points. +# D802:2 # surrogates with non-zero combining classes +# D803:3 +# D804:4 110B9:9 110BA:7 @@ -58,10 +59,11 @@ D804:4 00C4=0041 0308 00C5=0041 030A 00C7=0043 0327 -D800>D7FF # surrogates with mappings, and mappings to empty strings -D801> -DFFE> -DFFF>FFFF +# ICU 63 normalization with UCPTrie requires inert surrogate code points. +# D800>D7FF # surrogates with mappings, and mappings to empty strings +# D801> +# DFFE> +# DFFF>FFFF E000> E001=61 338 # composition with trail<=33FF and composite>7FFF E002=E001 308 # recursive mapping needs reordering diff --git a/icu4c/source/tools/gennorm2/gennorm2.cpp b/icu4c/source/tools/gennorm2/gennorm2.cpp index 2d24d61071b..bce5336be62 100644 --- a/icu4c/source/tools/gennorm2/gennorm2.cpp +++ b/icu4c/source/tools/gennorm2/gennorm2.cpp @@ -266,6 +266,11 @@ void parseFile(std::ifstream &f, Normalizer2DataBuilder &builder) { fprintf(stderr, "gennorm2 error: parsing code point range from %s\n", line); exit(errorCode.reset()); } + if (endCP >= 0xd800 && startCP <= 0xdfff) { + fprintf(stderr, "gennorm2 error: value or mapping for surrogate code points: %s\n", + line); + exit(U_ILLEGAL_ARGUMENT_ERROR); + } delimiter=u_skipWhitespace(delimiter); if(*delimiter==':') { const char *s=u_skipWhitespace(delimiter+1); diff --git a/icu4c/source/tools/gennorm2/n2builder.cpp b/icu4c/source/tools/gennorm2/n2builder.cpp index 7b8920d0d3a..49ebcf32385 100644 --- a/icu4c/source/tools/gennorm2/n2builder.cpp +++ b/icu4c/source/tools/gennorm2/n2builder.cpp @@ -29,7 +29,9 @@ #include "unicode/errorcode.h" #include "unicode/localpointer.h" #include "unicode/putil.h" +#include "unicode/ucptrie.h" #include "unicode/udata.h" +#include "unicode/umutablecptrie.h" #include "unicode/uniset.h" #include "unicode/unistr.h" #include "unicode/usetiter.h" @@ -41,7 +43,6 @@ #include "norms.h" #include "toolutil.h" #include "unewdata.h" -#include "utrie2.h" #include "uvectr32.h" #include "writesrc.h" @@ -58,8 +59,8 @@ static UDataInfo dataInfo={ 0, { 0x4e, 0x72, 0x6d, 0x32 }, /* dataFormat="Nrm2" */ - { 3, 0, 0, 0 }, /* formatVersion */ - { 10, 0, 0, 0 } /* dataVersion (Unicode version) */ + { 4, 0, 0, 0 }, /* formatVersion */ + { 11, 0, 0, 0 } /* dataVersion (Unicode version) */ }; U_NAMESPACE_BEGIN @@ -94,14 +95,14 @@ const HangulIterator::Range HangulIterator::ranges[4]={ Normalizer2DataBuilder::Normalizer2DataBuilder(UErrorCode &errorCode) : norms(errorCode), phase(0), overrideHandling(OVERRIDE_PREVIOUS), optimization(OPTIMIZE_NORMAL), - norm16Trie(nullptr), norm16TrieLength(0) { + norm16TrieBytes(nullptr), norm16TrieLength(0) { memset(unicodeVersion, 0, sizeof(unicodeVersion)); memset(indexes, 0, sizeof(indexes)); memset(smallFCD, 0, sizeof(smallFCD)); } Normalizer2DataBuilder::~Normalizer2DataBuilder() { - utrie2_close(norm16Trie); + delete[] norm16TrieBytes; } void @@ -407,11 +408,13 @@ void Normalizer2DataBuilder::postProcess(Norm &norm) { class Norm16Writer : public Norms::Enumerator { public: - Norm16Writer(Norms &n, Normalizer2DataBuilder &b) : Norms::Enumerator(n), builder(b) {} + Norm16Writer(UMutableCPTrie *trie, Norms &n, Normalizer2DataBuilder &b) : + Norms::Enumerator(n), builder(b), norm16Trie(trie) {} void rangeHandler(UChar32 start, UChar32 end, Norm &norm) U_OVERRIDE { - builder.writeNorm16(start, end, norm); + builder.writeNorm16(norm16Trie, start, end, norm); } Normalizer2DataBuilder &builder; + UMutableCPTrie *norm16Trie; }; void Normalizer2DataBuilder::setSmallFCD(UChar32 c) { @@ -419,7 +422,7 @@ void Normalizer2DataBuilder::setSmallFCD(UChar32 c) { smallFCD[lead>>8]|=(uint8_t)1<<((lead>>5)&7); } -void Normalizer2DataBuilder::writeNorm16(UChar32 start, UChar32 end, Norm &norm) { +void Normalizer2DataBuilder::writeNorm16(UMutableCPTrie *norm16Trie, UChar32 start, UChar32 end, Norm &norm) { if((norm.leadCC|norm.trailCC)!=0) { for(UChar32 c=start; c<=end; ++c) { setSmallFCD(c); @@ -484,7 +487,7 @@ void Normalizer2DataBuilder::writeNorm16(UChar32 start, UChar32 end, Norm &norm) norm16|=Normalizer2Impl::HAS_COMP_BOUNDARY_AFTER; } IcuToolErrorCode errorCode("gennorm2/writeNorm16()"); - utrie2_setRange32(norm16Trie, start, end, (uint32_t)norm16, TRUE, errorCode); + umutablecptrie_setRange(norm16Trie, start, end, (uint32_t)norm16, errorCode); // Set the minimum code points for real data lookups in the quick check loops. UBool isDecompNo= @@ -502,13 +505,13 @@ void Normalizer2DataBuilder::writeNorm16(UChar32 start, UChar32 end, Norm &norm) } } -void Normalizer2DataBuilder::setHangulData() { +void Normalizer2DataBuilder::setHangulData(UMutableCPTrie *norm16Trie) { HangulIterator hi; const HangulIterator::Range *range; // Check that none of the Hangul/Jamo code points have data. while((range=hi.nextRange())!=NULL) { for(UChar32 c=range->start; c<=range->end; ++c) { - if(utrie2_get32(norm16Trie, c)>Normalizer2Impl::INERT) { + if(umutablecptrie_get(norm16Trie, c)>Normalizer2Impl::INERT) { fprintf(stderr, "gennorm2 error: " "illegal mapping/composition/ccc data for Hangul or Jamo U+%04lX\n", @@ -524,13 +527,13 @@ void Normalizer2DataBuilder::setHangulData() { if(Hangul::JAMO_V_BASEp->maxNorm16) { - p->maxNorm16=value; - } - p->andedNorm16&=value; - return TRUE; -} - -U_CDECL_END - -void Normalizer2DataBuilder::processData() { - IcuToolErrorCode errorCode("gennorm2/processData()"); - norm16Trie=utrie2_open(Normalizer2Impl::INERT, Normalizer2Impl::INERT, errorCode); - errorCode.assertSuccess(); - +LocalUCPTriePointer Normalizer2DataBuilder::processData() { // Build composition lists before recursive decomposition, // so that we still have the raw, pair-wise mappings. CompositionBuilder compBuilder(norms); @@ -652,13 +622,19 @@ void Normalizer2DataBuilder::processData() { indexes[Normalizer2Impl::IX_MIN_COMP_NO_MAYBE_CP]=0x110000; indexes[Normalizer2Impl::IX_MIN_LCCC_CP]=0x110000; + IcuToolErrorCode errorCode("gennorm2/processData()"); + UMutableCPTrie *norm16Trie = umutablecptrie_open( + Normalizer2Impl::INERT, Normalizer2Impl::INERT, errorCode); + errorCode.assertSuccess(); + // Map each code point to its norm16 value, // including the properties that fit directly, // and the offset to the "extra data" if necessary. - Norm16Writer norm16Writer(norms, *this); + Norm16Writer norm16Writer(norm16Trie, norms, *this); norms.enumRanges(norm16Writer); + // TODO: iterate via getRange() instead of callback? - setHangulData(); + setHangulData(norm16Trie); // Look for the "worst" norm16 value of any supplementary code point // corresponding to a lead surrogate, and set it as that surrogate's value. @@ -670,22 +646,63 @@ void Normalizer2DataBuilder::processData() { // and select the best value that only breaks the composition and/or decomposition // inner loops if necessary. // However, that seems like overkill for an optimization for supplementary characters. - for(UChar lead=0xd800; lead<0xdc00; ++lead) { - uint32_t surrogateCPNorm16=utrie2_get32(norm16Trie, lead); - Norm16Summary summary={ surrogateCPNorm16, surrogateCPNorm16 }; - utrie2_enumForLeadSurrogate(norm16Trie, lead, NULL, enumRangeMaxValue, &summary); - uint32_t norm16=summary.maxNorm16; - if(norm16>=(uint32_t)indexes[Normalizer2Impl::IX_LIMIT_NO_NO] && - norm16>(uint32_t)indexes[Normalizer2Impl::IX_MIN_NO_NO]) { - // Set noNo ("worst" value) if it got into "less-bad" maybeYes or ccc!=0. - // Otherwise it might end up at something like JAMO_VT which stays in - // the inner decomposition quick check loop. - norm16=(uint32_t)indexes[Normalizer2Impl::IX_LIMIT_NO_NO]-1; + // + // First check that surrogate code *points* are inert. + // The parser should have rejected values/mappings for them. + uint32_t value; + UChar32 end = umutablecptrie_getRange(norm16Trie, 0xd800, UCPTRIE_RANGE_NORMAL, 0, + nullptr, nullptr, &value); + if (value != Normalizer2Impl::INERT || end < 0xdfff) { + fprintf(stderr, + "gennorm2 error: not all surrogate code points are inert: U+d800..U+%04x=%lx\n", + (int)end, (long)value); + exit(U_INTERNAL_PROGRAM_ERROR); + } + uint32_t maxNorm16 = 0; + // ANDing values yields 0 bits where any value has a 0. + // Used for worst-case HAS_COMP_BOUNDARY_AFTER. + uint32_t andedNorm16 = 0; + end = 0; + for (UChar32 start = 0x10000;;) { + if (start > end) { + end = umutablecptrie_getRange(norm16Trie, start, UCPTRIE_RANGE_NORMAL, 0, + nullptr, nullptr, &value); + if (end < 0) { break; } + } + if ((start & 0x3ff) == 0) { + // Data for a new lead surrogate. + maxNorm16 = andedNorm16 = value; + } else { + if (value > maxNorm16) { + maxNorm16 = value; + } + andedNorm16 &= value; + } + // Intersect each range with the code points for one lead surrogate. + UChar32 leadEnd = start | 0x3ff; + if (leadEnd <= end) { + // End of the supplementary block for a lead surrogate. + if (maxNorm16 >= (uint32_t)indexes[Normalizer2Impl::IX_LIMIT_NO_NO]) { + // Set noNo ("worst" value) if it got into "less-bad" maybeYes or ccc!=0. + // Otherwise it might end up at something like JAMO_VT which stays in + // the inner decomposition quick check loop. + maxNorm16 = (uint32_t)indexes[Normalizer2Impl::IX_LIMIT_NO_NO]; + } + maxNorm16 = + (maxNorm16 & ~Normalizer2Impl::HAS_COMP_BOUNDARY_AFTER)| + (andedNorm16 & Normalizer2Impl::HAS_COMP_BOUNDARY_AFTER); + if (maxNorm16 != Normalizer2Impl::INERT) { + umutablecptrie_set(norm16Trie, U16_LEAD(start), maxNorm16, errorCode); + } + if (value == Normalizer2Impl::INERT) { + // Potentially skip inert supplementary blocks for several lead surrogates. + start = (end + 1) & ~0x3ff; + } else { + start = leadEnd + 1; + } + } else { + start = end + 1; } - norm16= - (norm16&~Normalizer2Impl::HAS_COMP_BOUNDARY_AFTER)| - (summary.andedNorm16&Normalizer2Impl::HAS_COMP_BOUNDARY_AFTER); - utrie2_set32ForLeadSurrogateCodeUnit(norm16Trie, lead, norm16, errorCode); } // Adjust supplementary minimum code points to break quick check loops at their lead surrogates. @@ -705,14 +722,19 @@ void Normalizer2DataBuilder::processData() { indexes[Normalizer2Impl::IX_MIN_LCCC_CP]=U16_LEAD(minCP); } - utrie2_freeze(norm16Trie, UTRIE2_16_VALUE_BITS, errorCode); - norm16TrieLength=utrie2_serialize(norm16Trie, NULL, 0, errorCode); + LocalUCPTriePointer builtTrie( + umutablecptrie_buildImmutable(norm16Trie, UCPTRIE_TYPE_FAST, UCPTRIE_VALUE_BITS_16, errorCode)); + norm16TrieLength=ucptrie_toBinary(builtTrie.getAlias(), nullptr, 0, errorCode); if(errorCode.get()!=U_BUFFER_OVERFLOW_ERROR) { - fprintf(stderr, "gennorm2 error: unable to freeze/serialize the normalization trie - %s\n", + fprintf(stderr, "gennorm2 error: unable to build/serialize the normalization trie - %s\n", errorCode.errorName()); exit(errorCode.reset()); } + umutablecptrie_close(norm16Trie); errorCode.reset(); + norm16TrieBytes=new uint8_t[norm16TrieLength]; + ucptrie_toBinary(builtTrie.getAlias(), norm16TrieBytes, norm16TrieLength, errorCode); + errorCode.assertSuccess(); int32_t offset=(int32_t)sizeof(indexes); indexes[Normalizer2Impl::IX_NORM_TRIE_OFFSET]=offset; @@ -750,16 +772,13 @@ void Normalizer2DataBuilder::processData() { u_versionFromString(unicodeVersion, U_UNICODE_VERSION); } memcpy(dataInfo.dataVersion, unicodeVersion, 4); + return builtTrie; } void Normalizer2DataBuilder::writeBinaryFile(const char *filename) { processData(); IcuToolErrorCode errorCode("gennorm2/writeBinaryFile()"); - LocalArray norm16TrieBytes(new uint8_t[norm16TrieLength]); - utrie2_serialize(norm16Trie, norm16TrieBytes.getAlias(), norm16TrieLength, errorCode); - errorCode.assertSuccess(); - UNewDataMemory *pData= udata_create(NULL, NULL, filename, &dataInfo, haveCopyright ? U_COPYRIGHT_STRING : NULL, errorCode); @@ -769,7 +788,7 @@ void Normalizer2DataBuilder::writeBinaryFile(const char *filename) { exit(errorCode.reset()); } udata_writeBlock(pData, indexes, sizeof(indexes)); - udata_writeBlock(pData, norm16TrieBytes.getAlias(), norm16TrieLength); + udata_writeBlock(pData, norm16TrieBytes, norm16TrieLength); udata_writeUString(pData, toUCharPtr(extraData.getBuffer()), extraData.length()); udata_writeBlock(pData, smallFCD, sizeof(smallFCD)); int32_t writtenSize=udata_finish(pData, errorCode); @@ -787,7 +806,7 @@ void Normalizer2DataBuilder::writeBinaryFile(const char *filename) { void Normalizer2DataBuilder::writeCSourceFile(const char *filename) { - processData(); + LocalUCPTriePointer norm16Trie = processData(); IcuToolErrorCode errorCode("gennorm2/writeCSourceFile()"); const char *basename=findBasename(filename); @@ -797,10 +816,7 @@ Normalizer2DataBuilder::writeCSourceFile(const char *filename) { if(extension!=NULL) { dataName.truncate((int32_t)(extension-basename)); } - errorCode.assertSuccess(); - - LocalArray norm16TrieBytes(new uint8_t[norm16TrieLength]); - utrie2_serialize(norm16Trie, norm16TrieBytes.getAlias(), norm16TrieLength, errorCode); + const char *name=dataName.data(); errorCode.assertSuccess(); FILE *f=usrc_create(path.data(), basename, "icu/source/tools/gennorm2/n2builder.cpp"); @@ -808,43 +824,31 @@ Normalizer2DataBuilder::writeCSourceFile(const char *filename) { fprintf(stderr, "gennorm2/writeCSourceFile() error: unable to create the output file %s\n", filename); exit(U_FILE_ACCESS_ERROR); - return; } fputs("#ifdef INCLUDED_FROM_NORMALIZER2_CPP\n\n", f); - char line[100]; - sprintf(line, "static const UVersionInfo %s_formatVersion={", dataName.data()); + + char line[100], line2[100], line3[100]; + sprintf(line, "static const UVersionInfo %s_formatVersion={", name); usrc_writeArray(f, line, dataInfo.formatVersion, 8, 4, "};\n"); - sprintf(line, "static const UVersionInfo %s_dataVersion={", dataName.data()); + sprintf(line, "static const UVersionInfo %s_dataVersion={", name); usrc_writeArray(f, line, dataInfo.dataVersion, 8, 4, "};\n\n"); - sprintf(line, "static const int32_t %s_indexes[Normalizer2Impl::IX_COUNT]={\n", - dataName.data()); - usrc_writeArray(f, - line, - indexes, 32, Normalizer2Impl::IX_COUNT, - "\n};\n\n"); - sprintf(line, "static const uint16_t %s_trieIndex[%%ld]={\n", dataName.data()); - usrc_writeUTrie2Arrays(f, - line, NULL, - norm16Trie, - "\n};\n\n"); - sprintf(line, "static const uint16_t %s_extraData[%%ld]={\n", dataName.data()); - usrc_writeArray(f, - line, - extraData.getBuffer(), 16, extraData.length(), - "\n};\n\n"); - sprintf(line, "static const uint8_t %s_smallFCD[%%ld]={\n", dataName.data()); - usrc_writeArray(f, - line, - smallFCD, 8, sizeof(smallFCD), - "\n};\n\n"); - sprintf(line, "static const UTrie2 %s_trie={\n", dataName.data()); - char line2[100]; - sprintf(line2, "%s_trieIndex", dataName.data()); - usrc_writeUTrie2Struct(f, - line, - norm16Trie, line2, NULL, - "};\n"); - fputs("\n#endif // INCLUDED_FROM_NORMALIZER2_CPP\n", f); + sprintf(line, "static const int32_t %s_indexes[Normalizer2Impl::IX_COUNT]={\n", name); + usrc_writeArray(f, line, indexes, 32, Normalizer2Impl::IX_COUNT, "\n};\n\n"); + + sprintf(line, "static const uint16_t %s_trieIndex[%%ld]={\n", name); + sprintf(line2, "static const uint16_t %s_trieData[%%ld]={\n", name); + usrc_writeUCPTrieArrays(f, line, line2, norm16Trie.getAlias(), "\n};\n\n"); + sprintf(line, "static const UCPTrie %s_trie={\n", name); + sprintf(line2, "%s_trieIndex", name); + sprintf(line3, "%s_trieData", name); + usrc_writeUCPTrieStruct(f, line, norm16Trie.getAlias(), line2, line3, "};\n\n"); + + sprintf(line, "static const uint16_t %s_extraData[%%ld]={\n", name); + usrc_writeArray(f, line, extraData.getBuffer(), 16, extraData.length(), "\n};\n\n"); + sprintf(line, "static const uint8_t %s_smallFCD[%%ld]={\n", name); + usrc_writeArray(f, line, smallFCD, 8, sizeof(smallFCD), "\n};\n\n"); + + fputs("#endif // INCLUDED_FROM_NORMALIZER2_CPP\n", f); fclose(f); } diff --git a/icu4c/source/tools/gennorm2/n2builder.h b/icu4c/source/tools/gennorm2/n2builder.h index 61b36be0044..77b4fef533a 100644 --- a/icu4c/source/tools/gennorm2/n2builder.h +++ b/icu4c/source/tools/gennorm2/n2builder.h @@ -24,10 +24,10 @@ #if !UCONFIG_NO_NORMALIZATION #include "unicode/errorcode.h" +#include "unicode/umutablecptrie.h" #include "unicode/unistr.h" #include "normalizer2impl.h" // for IX_COUNT #include "toolutil.h" -#include "utrie2.h" #include "norms.h" U_NAMESPACE_BEGIN @@ -95,9 +95,9 @@ private: return indexes[Normalizer2Impl::IX_MIN_MAYBE_YES]- ((2*Normalizer2Impl::MAX_DELTA+1)< #include #include "unicode/errorcode.h" +#include "unicode/umutablecptrie.h" #include "unicode/unistr.h" #include "unicode/utf16.h" #include "normalizer2impl.h" #include "norms.h" #include "toolutil.h" -#include "utrie2.h" #include "uvectr32.h" U_NAMESPACE_BEGIN @@ -67,7 +67,7 @@ UChar32 Norm::combine(UChar32 trail) const { } Norms::Norms(UErrorCode &errorCode) { - normTrie=utrie2_open(0, 0, &errorCode); + normTrie = umutablecptrie_open(0, 0, &errorCode); normMem=utm_open("gennorm2 normalization structs", 10000, 0x110100, sizeof(Norm)); // Default "inert" Norm struct at index 0. Practically immutable. norms=allocNorm(); @@ -75,7 +75,7 @@ Norms::Norms(UErrorCode &errorCode) { } Norms::~Norms() { - utrie2_close(normTrie); + umutablecptrie_close(normTrie); int32_t normsLength=utm_countItems(normMem); for(int32_t i=1; irangeHandler(start, end, value); -} - -U_CDECL_END - void Norms::enumRanges(Enumerator &e) { - utrie2_enum(normTrie, nullptr, enumRangeHandler, &e); + UChar32 start = 0, end; + uint32_t i; + while ((end = umutablecptrie_getRange(normTrie, start, UCPTRIE_RANGE_NORMAL, 0, + nullptr, nullptr, &i)) >= 0) { + if (i > 0) { + e.rangeHandler(start, end, norms[i]); + } + start = end + 1; + } } Norms::Enumerator::~Enumerator() {} -UBool Norms::Enumerator::rangeHandler(UChar32 start, UChar32 end, uint32_t value) { - if(value!=0) { - rangeHandler(start, end, norms.getNormRefByIndex(value)); - } - return TRUE; -} - void CompositionBuilder::rangeHandler(UChar32 start, UChar32 end, Norm &norm) { if(norm.mappingType!=Norm::ROUND_TRIP) { return; } if(start!=end) { diff --git a/icu4c/source/tools/gennorm2/norms.h b/icu4c/source/tools/gennorm2/norms.h index 4bf6e760e20..5684e65e672 100644 --- a/icu4c/source/tools/gennorm2/norms.h +++ b/icu4c/source/tools/gennorm2/norms.h @@ -15,12 +15,12 @@ #if !UCONFIG_NO_NORMALIZATION #include "unicode/errorcode.h" +#include "unicode/umutablecptrie.h" #include "unicode/uniset.h" #include "unicode/unistr.h" #include "unicode/utf16.h" #include "normalizer2impl.h" #include "toolutil.h" -#include "utrie2.h" #include "uvectr32.h" U_NAMESPACE_BEGIN @@ -176,8 +176,6 @@ public: virtual ~Enumerator(); /** Called for enumerated value!=0. */ virtual void rangeHandler(UChar32 start, UChar32 end, Norm &norm) = 0; - /** @internal Public only for C callback. */ - UBool rangeHandler(UChar32 start, UChar32 end, uint32_t value); protected: Norms &norms; }; @@ -190,7 +188,7 @@ private: Norms(const Norms &other) = delete; Norms &operator=(const Norms &other) = delete; - UTrie2 *normTrie; + UMutableCPTrie *normTrie; UToolMemory *normMem; Norm *norms; }; diff --git a/icu4c/source/tools/genrb/parse.cpp b/icu4c/source/tools/genrb/parse.cpp index 465c099baba..44eb0c082bc 100644 --- a/icu4c/source/tools/genrb/parse.cpp +++ b/icu4c/source/tools/genrb/parse.cpp @@ -1018,6 +1018,11 @@ addCollation(ParseState* state, TableResource *result, const char *collationTyp icu::CollationInfo::printReorderRanges( *t->data, t->settings->reorderCodes, t->settings->reorderCodesLength); } +#if 0 // debugging output + } else { + printf("%s~%s collation tailoring part sizes:\n", state->filename, collationType); + icu::CollationInfo::printSizes(totalSize, indexes); +#endif } struct SResource *collationBin = bin_open(state->bundle, "%%CollationBin", totalSize, dest, NULL, NULL, status); result->add(collationBin, line, *status); diff --git a/icu4c/source/tools/toolutil/swapimpl.cpp b/icu4c/source/tools/toolutil/swapimpl.cpp index f3f333a005e..a9bee15bcf9 100644 --- a/icu4c/source/tools/toolutil/swapimpl.cpp +++ b/icu4c/source/tools/toolutil/swapimpl.cpp @@ -243,7 +243,7 @@ uprops_swap(const UDataSwapper *ds, * swap the main properties UTrie * PT serialized properties trie, see utrie.h (byte size: 4*(i0-16)) */ - utrie2_swapAnyVersion(ds, + utrie_swapAnyVersion(ds, inData32+UPROPS_INDEX_COUNT, 4*(dataIndexes[UPROPS_PROPS32_INDEX]-UPROPS_INDEX_COUNT), outData32+UPROPS_INDEX_COUNT, @@ -274,7 +274,7 @@ uprops_swap(const UDataSwapper *ds, * swap the additional UTrie * i3 additionalTrieIndex; -- 32-bit unit index to the additional trie for more properties */ - utrie2_swapAnyVersion(ds, + utrie_swapAnyVersion(ds, inData32+dataIndexes[UPROPS_ADDITIONAL_TRIE_INDEX], 4*(dataIndexes[UPROPS_ADDITIONAL_VECTORS_INDEX]-dataIndexes[UPROPS_ADDITIONAL_TRIE_INDEX]), outData32+dataIndexes[UPROPS_ADDITIONAL_TRIE_INDEX], @@ -391,7 +391,7 @@ ucase_swap(const UDataSwapper *ds, /* swap the UTrie */ count=indexes[UCASE_IX_TRIE_SIZE]; - utrie2_swapAnyVersion(ds, inBytes+offset, count, outBytes+offset, pErrorCode); + utrie_swapAnyVersion(ds, inBytes+offset, count, outBytes+offset, pErrorCode); offset+=count; /* swap the uint16_t exceptions[] and unfold[] */ @@ -493,7 +493,7 @@ ubidi_swap(const UDataSwapper *ds, /* swap the UTrie */ count=indexes[UBIDI_IX_TRIE_SIZE]; - utrie2_swapAnyVersion(ds, inBytes+offset, count, outBytes+offset, pErrorCode); + utrie_swapAnyVersion(ds, inBytes+offset, count, outBytes+offset, pErrorCode); offset+=count; /* swap the uint32_t mirrors[] */ diff --git a/icu4c/source/tools/toolutil/writesrc.cpp b/icu4c/source/tools/toolutil/writesrc.cpp index edff1f9e544..3e37864923f 100644 --- a/icu4c/source/tools/toolutil/writesrc.cpp +++ b/icu4c/source/tools/toolutil/writesrc.cpp @@ -22,6 +22,7 @@ #include #include "unicode/utypes.h" #include "unicode/putil.h" +#include "unicode/ucptrie.h" #include "utrie2.h" #include "cstring.h" #include "writesrc.h" @@ -228,6 +229,52 @@ usrc_writeUTrie2Struct(FILE *f, } } +U_CAPI void U_EXPORT2 +usrc_writeUCPTrieArrays(FILE *f, + const char *indexPrefix, const char *dataPrefix, + const UCPTrie *pTrie, + const char *postfix) { + usrc_writeArray(f, indexPrefix, pTrie->index, 16, pTrie->indexLength, postfix); + int32_t width= + pTrie->valueWidth==UCPTRIE_VALUE_BITS_16 ? 16 : + pTrie->valueWidth==UCPTRIE_VALUE_BITS_32 ? 32 : + pTrie->valueWidth==UCPTRIE_VALUE_BITS_8 ? 8 : 0; + usrc_writeArray(f, dataPrefix, pTrie->data.ptr0, width, pTrie->dataLength, postfix); +} + +U_CAPI void U_EXPORT2 +usrc_writeUCPTrieStruct(FILE *f, + const char *prefix, + const UCPTrie *pTrie, + const char *indexName, const char *dataName, + const char *postfix) { + if(prefix!=NULL) { + fputs(prefix, f); + } + fprintf( + f, + " %s,\n" // index + " { %s },\n", // data (union) + indexName, + dataName); + fprintf( + f, + " %ld, %ld,\n" // indexLength, dataLength + " 0x%lx, 0x%x,\n" // highStart, shifted12HighStart + " %d, %d,\n" // type, valueWidth + " 0, 0,\n" // reserved32, reserved16 + " 0x%x, 0x%lx,\n" // index3NullOffset, dataNullOffset + " 0x%lx,\n", // nullValue + (long)pTrie->indexLength, (long)pTrie->dataLength, + (long)pTrie->highStart, pTrie->shifted12HighStart, + pTrie->type, pTrie->valueWidth, + pTrie->index3NullOffset, (long)pTrie->dataNullOffset, + (long)pTrie->nullValue); + if(postfix!=NULL) { + fputs(postfix, f); + } +} + U_CAPI void U_EXPORT2 usrc_writeArrayOfMostlyInvChars(FILE *f, const char *prefix, diff --git a/icu4c/source/tools/toolutil/writesrc.h b/icu4c/source/tools/toolutil/writesrc.h index fdcf1f9a6b4..4547b3e9277 100644 --- a/icu4c/source/tools/toolutil/writesrc.h +++ b/icu4c/source/tools/toolutil/writesrc.h @@ -23,6 +23,7 @@ #include #include "unicode/utypes.h" +#include "unicode/ucptrie.h" #include "utrie2.h" /** @@ -75,6 +76,27 @@ usrc_writeUTrie2Struct(FILE *f, const char *indexName, const char *dataName, const char *postfix); +/** + * Calls usrc_writeArray() for the index and data arrays of a UCPTrie. + */ +U_CAPI void U_EXPORT2 +usrc_writeUCPTrieArrays(FILE *f, + const char *indexPrefix, const char *dataPrefix, + const UCPTrie *pTrie, + const char *postfix); + +/** + * Writes the UCPTrie struct values. + * The {} and declaration etc. need to be included in prefix/postfix or + * printed before and after the array contents. + */ +U_CAPI void U_EXPORT2 +usrc_writeUCPTrieStruct(FILE *f, + const char *prefix, + const UCPTrie *pTrie, + const char *indexName, const char *dataName, + const char *postfix); + /** * Writes the contents of an array of mostly invariant characters. * Characters 0..0x1f are printed as numbers, diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/ICUBinary.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/ICUBinary.java index 76326bb075d..8a8852bbac0 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/ICUBinary.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/ICUBinary.java @@ -652,6 +652,15 @@ public final class ICUBinary { } } + public static byte[] getBytes(ByteBuffer bytes, int length, int additionalSkipLength) { + byte[] dest = new byte[length]; + bytes.get(dest); + if (additionalSkipLength > 0) { + skipBytes(bytes, additionalSkipLength); + } + return dest; + } + public static String getString(ByteBuffer bytes, int length, int additionalSkipLength) { CharSequence cs = bytes.asCharBuffer(); String s = cs.subSequence(0, length).toString(); diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/Normalizer2Impl.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/Normalizer2Impl.java index 735d89e9853..1f9a8a33c5a 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/Normalizer2Impl.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/Normalizer2Impl.java @@ -12,11 +12,13 @@ package com.ibm.icu.impl; import java.io.IOException; import java.nio.ByteBuffer; import java.util.ArrayList; -import java.util.Iterator; import com.ibm.icu.text.UTF16; import com.ibm.icu.text.UnicodeSet; +import com.ibm.icu.util.CodePointMap; +import com.ibm.icu.util.CodePointTrie; import com.ibm.icu.util.ICUUncheckedIOException; +import com.ibm.icu.util.MutableCodePointTrie; import com.ibm.icu.util.VersionInfo; /** @@ -180,8 +182,7 @@ public final class Normalizer2Impl { insert(c, cc); } } - // s must be in NFD, otherwise change the implementation. - public void append(CharSequence s, int start, int limit, + public void append(CharSequence s, int start, int limit, boolean isNFD, int leadCC, int trailCC) { if(start==limit) { return; @@ -202,8 +203,11 @@ public final class Normalizer2Impl { c=Character.codePointAt(s, start); start+=Character.charCount(c); if(start(nextOffset-offset)) { throw new ICUUncheckedIOException("Normalizer2 data: not enough bytes for normTrie"); } @@ -487,46 +510,46 @@ public final class Normalizer2Impl { return load(ICUBinary.getRequiredData(name)); } - private void enumLcccRange(int start, int end, int norm16, UnicodeSet set) { - if (norm16 > MIN_NORMAL_MAYBE_YES && norm16 != JAMO_VT) { - set.add(start, end); - } else if (minNoNoCompNoMaybeCC <= norm16 && norm16 < limitNoNo) { - int fcd16=getFCD16(start); - if(fcd16>0xff) { set.add(start, end); } - } - } - - private void enumNorm16PropertyStartsRange(int start, int end, int value, UnicodeSet set) { - /* add the start code point to the USet */ - set.add(start); - if(start!=end && isAlgorithmicNoNo(value) && (value & DELTA_TCCC_MASK) > DELTA_TCCC_1) { - // Range of code points with same-norm16-value algorithmic decompositions. - // They might have different non-zero FCD16 values. - int prevFCD16=getFCD16(start); - while(++start<=end) { - int fcd16=getFCD16(start); - if(fcd16!=prevFCD16) { - set.add(start); - prevFCD16=fcd16; - } - } - } - } - public void addLcccChars(UnicodeSet set) { - Iterator trieIterator=normTrie.iterator(); - Trie2.Range range; - while(trieIterator.hasNext() && !(range=trieIterator.next()).leadSurrogate) { - enumLcccRange(range.startCodePoint, range.endCodePoint, range.value, set); + int start = 0; + CodePointMap.Range range = new CodePointMap.Range(); + while (normTrie.getRange(start, CodePointMap.RangeOption.FIXED_LEAD_SURROGATES, INERT, + null, range)) { + int end = range.getEnd(); + int norm16 = range.getValue(); + if (norm16 > MIN_NORMAL_MAYBE_YES && norm16 != JAMO_VT) { + set.add(start, end); + } else if (minNoNoCompNoMaybeCC <= norm16 && norm16 < limitNoNo) { + int fcd16 = getFCD16(start); + if (fcd16 > 0xff) { set.add(start, end); } + } + start = end + 1; } } public void addPropertyStarts(UnicodeSet set) { - /* add the start code point of each same-value range of each trie */ - Iterator trieIterator=normTrie.iterator(); - Trie2.Range range; - while(trieIterator.hasNext() && !(range=trieIterator.next()).leadSurrogate) { - enumNorm16PropertyStartsRange(range.startCodePoint, range.endCodePoint, range.value, set); + // Add the start code point of each same-value range of the trie. + int start = 0; + CodePointMap.Range range = new CodePointMap.Range(); + while (normTrie.getRange(start, CodePointMap.RangeOption.FIXED_LEAD_SURROGATES, INERT, + null, range)) { + int end = range.getEnd(); + int value = range.getValue(); + set.add(start); + if (start != end && isAlgorithmicNoNo(value) && + (value & DELTA_TCCC_MASK) > DELTA_TCCC_1) { + // Range of code points with same-norm16-value algorithmic decompositions. + // They might have different non-zero FCD16 values. + int prevFCD16 = getFCD16(start); + while (++start <= end) { + int fcd16 = getFCD16(start); + if (fcd16 != prevFCD16) { + set.add(start); + prevFCD16 = fcd16; + } + } + } + start = end + 1; } /* add Hangul LV syllables and LV+1 because of skippables */ @@ -538,20 +561,21 @@ public final class Normalizer2Impl { } public void addCanonIterPropertyStarts(UnicodeSet set) { - /* add the start code point of each same-value range of the canonical iterator data trie */ + // Add the start code point of each same-value range of the canonical iterator data trie. ensureCanonIterData(); - // currently only used for the SEGMENT_STARTER property - Iterator trieIterator=canonIterData.iterator(segmentStarterMapper); - Trie2.Range range; - while(trieIterator.hasNext() && !(range=trieIterator.next()).leadSurrogate) { - /* add the start code point to the USet */ - set.add(range.startCodePoint); + // Currently only used for the SEGMENT_STARTER property. + int start = 0; + CodePointMap.Range range = new CodePointMap.Range(); + while (canonIterData.getRange(start, segmentStarterMapper, range)) { + set.add(start); + start = range.getEnd() + 1; } } - private static final Trie2.ValueMapper segmentStarterMapper=new Trie2.ValueMapper() { + private static final CodePointMap.ValueFilter segmentStarterMapper = + new CodePointMap.ValueFilter() { @Override - public int map(int in) { - return in&CANON_NOT_SEGMENT_STARTER; + public int apply(int value) { + return value & CANON_NOT_SEGMENT_STARTER; } }; @@ -574,12 +598,14 @@ public final class Normalizer2Impl { */ public synchronized Normalizer2Impl ensureCanonIterData() { if(canonIterData==null) { - Trie2Writable newData=new Trie2Writable(0, 0); + MutableCodePointTrie mutableTrie = new MutableCodePointTrie(0, 0); canonStartSets=new ArrayList(); - Iterator trieIterator=normTrie.iterator(); - Trie2.Range range; - while(trieIterator.hasNext() && !(range=trieIterator.next()).leadSurrogate) { - final int norm16=range.value; + int start = 0; + CodePointMap.Range range = new CodePointMap.Range(); + while (normTrie.getRange(start, CodePointMap.RangeOption.FIXED_LEAD_SURROGATES, INERT, + null, range)) { + final int end = range.getEnd(); + final int norm16 = range.getValue(); if(isInert(norm16) || (minYesNo<=norm16 && norm16=minNoNo) { while((mapping+=Character.charCount(c2))>1; if((compositeAndFwd&1)!=0) { - addComposites(getCompositionsListForComposite(getNorm16(composite)), set); + addComposites(getCompositionsListForComposite(getRawNorm16(composite)), set); } set.add(composite); } while((firstUnit&COMP_1_LAST_TUPLE)==0); @@ -2045,7 +2056,7 @@ public final class Normalizer2Impl { // Is the composite a starter that combines forward? if((compositeAndFwd&1)!=0) { compositionsList= - getCompositionsListForComposite(getNorm16(composite)); + getCompositionsListForComposite(getRawNorm16(composite)); } else { compositionsList=-1; } @@ -2083,7 +2094,7 @@ public final class Normalizer2Impl { } public int composePair(int a, int b) { - int norm16=getNorm16(a); // maps an out-of-range 'a' to inert norm16=0 + int norm16=getNorm16(a); // maps an out-of-range 'a' to inert norm16 int list; if(isInert(norm16)) { return -1; @@ -2220,19 +2231,19 @@ public final class Normalizer2Impl { return getFCD16(Character.codePointBefore(s, p)); } - private void addToStartSet(Trie2Writable newData, int origin, int decompLead) { - int canonValue=newData.get(decompLead); + private void addToStartSet(MutableCodePointTrie mutableTrie, int origin, int decompLead) { + int canonValue = mutableTrie.get(decompLead); if((canonValue&(CANON_HAS_SET|CANON_VALUE_MASK))==0 && origin!=0) { // origin is the first character whose decomposition starts with // the character for which we are setting the value. - newData.set(decompLead, canonValue|origin); + mutableTrie.set(decompLead, canonValue|origin); } else { // origin is not the first character, or it is U+0000. UnicodeSet set; if((canonValue&CANON_HAS_SET)==0) { int firstOrigin=canonValue&CANON_VALUE_MASK; canonValue=(canonValue&~CANON_VALUE_MASK)|CANON_HAS_SET|canonStartSets.size(); - newData.set(decompLead, canonValue); + mutableTrie.set(decompLead, canonValue); canonStartSets.add(set=new UnicodeSet()); if(firstOrigin!=0) { set.add(firstOrigin); @@ -2263,12 +2274,12 @@ public final class Normalizer2Impl { private int centerNoNoDelta; private int minMaybeYes; - private Trie2_16 normTrie; + private CodePointTrie.Fast16 normTrie; private String maybeYesCompositions; private String extraData; // mappings and/or compositions for yesYes, yesNo & noNo characters private byte[] smallFCD; // [0x100] one bit per 32 BMP code points, set if any FCD!=0 - private Trie2_32 canonIterData; + private CodePointTrie canonIterData; private ArrayList canonStartSets; // bits in canonIterData diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/UTS46.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/UTS46.java index eccf638338a..c830bc3c75e 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/UTS46.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/UTS46.java @@ -10,6 +10,7 @@ package com.ibm.icu.impl; import java.util.EnumSet; +import com.ibm.icu.impl.Normalizer2Impl.UTF16Plus; import com.ibm.icu.lang.UCharacter; import com.ibm.icu.lang.UCharacterCategory; import com.ibm.icu.lang.UCharacterDirection; @@ -223,19 +224,31 @@ public final class UTS46 extends IDNA { promoteAndResetLabelErrors(info); destLength+=newLength-labelLength; labelLimit=labelStart+=newLength+1; - } else if(0xdf<=c && c<=0x200d && (c==0xdf || c==0x3c2 || c>=0x200c)) { + continue; + } else if(c<0xdf) { + // pass + } else if(c<=0x200d && (c==0xdf || c==0x3c2 || c>=0x200c)) { setTransitionalDifferent(info); if(doMapDevChars) { destLength=mapDevChars(dest, labelStart, labelLimit); - // Do not increment labelLimit in case c was removed. // All deviation characters have been mapped, no need to check for them again. doMapDevChars=false; - } else { - ++labelLimit; + // Do not increment labelLimit in case c was removed. + continue; + } + } else if(Character.isSurrogate(c)) { + if(UTF16Plus.isSurrogateLead(c) ? + (labelLimit+1)==destLength || + !Character.isLowSurrogate(dest.charAt(labelLimit+1)) : + labelLimit==labelStart || + !Character.isHighSurrogate(dest.charAt(labelLimit-1))) { + // Map an unpaired surrogate to U+FFFD before normalization so that when + // that removes characters we do not turn two unpaired ones into a pair. + addLabelError(info, Error.DISALLOWED); + dest.setCharAt(labelLimit, '\ufffd'); } - } else { - ++labelLimit; } + ++labelLimit; } // Permit an empty label at the end (0 { + /** + * Selectors for how getRange() should report value ranges overlapping with surrogates. + * Most users should use NORMAL. + * + * @see #getRange + * @draft ICU 63 + * @provisional This API might change or be removed in a future release. + */ + public enum RangeOption { + /** + * getRange() enumerates all same-value ranges as stored in the trie. + * Most users should use this option. + * + * @draft ICU 63 + * @provisional This API might change or be removed in a future release. + */ + NORMAL, + /** + * getRange() enumerates all same-value ranges as stored in the trie, + * except that lead surrogates (U+D800..U+DBFF) are treated as having the + * surrogateValue, which is passed to getRange() as a separate parameter. + * The surrogateValue is not transformed via filter(). + * See {@link Character#isHighSurrogate}. + * + *

Most users should use NORMAL instead. + * + *

This option is useful for tries that map surrogate code *units* to + * special values optimized for UTF-16 string processing + * or for special error behavior for unpaired surrogates, + * but those values are not to be associated with the lead surrogate code *points*. + * + * @draft ICU 63 + * @provisional This API might change or be removed in a future release. + */ + FIXED_LEAD_SURROGATES, + /** + * getRange() enumerates all same-value ranges as stored in the trie, + * except that all surrogates (U+D800..U+DFFF) are treated as having the + * surrogateValue, which is passed to getRange() as a separate parameter. + * The surrogateValue is not transformed via filter(). + * See {@link Character#isSurrogate}. + * + *

Most users should use NORMAL instead. + * + *

This option is useful for tries that map surrogate code *units* to + * special values optimized for UTF-16 string processing + * or for special error behavior for unpaired surrogates, + * but those values are not to be associated with the lead surrogate code *points*. + * + * @draft ICU 63 + * @provisional This API might change or be removed in a future release. + */ + FIXED_ALL_SURROGATES + } + + /** + * Callback function interface: Modifies a trie value. + * Optionally called by getRange(). + * The modified value will be returned by the getRange() function. + * + *

Can be used to ignore some of the value bits, + * make a filter for one of several values, + * return a value index computed from the trie value, etc. + * + * @see #getRange + * @see #iterator + * @draft ICU 63 + * @provisional This API might change or be removed in a future release. + */ + public interface ValueFilter { + /** + * Modifies the trie value. + * + * @param value trie value + * @return modified value + * @draft ICU 63 + * @provisional This API might change or be removed in a future release. + */ + public int apply(int value); + } + + /** + * Range iteration result data. + * Code points from start to end map to the same value. + * The value may have been modified by {@link ValueFilter#apply(int)}, + * or it may be the surrogateValue if a RangeOption other than "normal" was used. + * + * @see #getRange + * @see #iterator + * @draft ICU 63 + * @provisional This API might change or be removed in a future release. + */ + public static final class Range { + private int start; + private int end; + private int value; + + /** + * Constructor. Sets start and end to -1 and value to 0. + * + * @draft ICU 63 + * @provisional This API might change or be removed in a future release. + */ + public Range() { + start = end = -1; + value = 0; + } + + /** + * @return the start code point + * @draft ICU 63 + * @provisional This API might change or be removed in a future release. + */ + public int getStart() { return start; } + /** + * @return the (inclusive) end code point + * @draft ICU 63 + * @provisional This API might change or be removed in a future release. + */ + public int getEnd() { return end; } + /** + * @return the range value + * @draft ICU 63 + * @provisional This API might change or be removed in a future release. + */ + public int getValue() { return value; } + /** + * Sets the range. When using {@link #iterator()}, + * iteration will resume after the newly set end. + * + * @param start new start code point + * @param end new end code point + * @param value new value + * @draft ICU 63 + * @provisional This API might change or be removed in a future release. + */ + public void set(int start, int end, int value) { + this.start = start; + this.end = end; + this.value = value; + } + } + + private final class RangeIterator implements Iterator { + private Range range = new Range(); + + @Override + public boolean hasNext() { + return -1 <= range.end && range.end < 0x10ffff; + } + + @Override + public Range next() { + if (getRange(range.end + 1, null, range)) { + return range; + } else { + throw new NoSuchElementException(); + } + } + + @Override + public final void remove() { + throw new UnsupportedOperationException(); + } + } + + /** + * Iterates over code points of a string and fetches trie values. + * This does not implement java.util.Iterator. + * + *

+     * void onString(CodePointMap map, CharSequence s, int start) {
+     *     CodePointMap.StringIterator iter = map.stringIterator(s, start);
+     *     while (iter.next()) {
+     *         int end = iter.getIndex();  // code point from between start and end
+     *         useValue(s, start, end, iter.getCodePoint(), iter.getValue());
+     *         start = end;
+     *     }
+     * }
+     * 
+ * + *

This class is not intended for public subclassing. + * + * @draft ICU 63 + * @provisional This API might change or be removed in a future release. + */ + public class StringIterator { + /** + * @internal + * @deprecated This API is ICU internal only. + */ + @Deprecated + protected CharSequence s; + /** + * @internal + * @deprecated This API is ICU internal only. + */ + @Deprecated + protected int sIndex; + /** + * @internal + * @deprecated This API is ICU internal only. + */ + @Deprecated + protected int c; + /** + * @internal + * @deprecated This API is ICU internal only. + */ + @Deprecated + protected int value; + + /** + * @internal + * @deprecated This API is ICU internal only. + */ + @Deprecated + protected StringIterator(CharSequence s, int sIndex) { + this.s = s; + this.sIndex = sIndex; + c = -1; + value = 0; + } + + /** + * Resets the iterator to a new string and/or a new string index. + * + * @param s string to iterate over + * @param sIndex string index where the iteration will start + * @draft ICU 63 + * @provisional This API might change or be removed in a future release. + */ + public void reset(CharSequence s, int sIndex) { + this.s = s; + this.sIndex = sIndex; + c = -1; + value = 0; + } + + /** + * Reads the next code point, post-increments the string index, + * and gets a value from the trie. + * Sets the trie error value if the code point is an unpaired surrogate. + * + * @return true if the string index was not yet at the end of the string; + * otherwise the iterator did not advance + * @draft ICU 63 + * @provisional This API might change or be removed in a future release. + */ + public boolean next() { + if (sIndex >= s.length()) { + return false; + } + c = Character.codePointAt(s, sIndex); + sIndex += Character.charCount(c); + value = get(c); + return true; + } + + /** + * Reads the previous code point, pre-decrements the string index, + * and gets a value from the trie. + * Sets the trie error value if the code point is an unpaired surrogate. + * + * @return true if the string index was not yet at the start of the string; + * otherwise the iterator did not advance + * @draft ICU 63 + * @provisional This API might change or be removed in a future release. + */ + public boolean previous() { + if (sIndex <= 0) { + return false; + } + c = Character.codePointBefore(s, sIndex); + sIndex -= Character.charCount(c); + value = get(c); + return true; + } + /** + * @return the string index + * @draft ICU 63 + * @provisional This API might change or be removed in a future release. + */ + public final int getIndex() { return sIndex; } + /** + * @return the code point + * @draft ICU 63 + * @provisional This API might change or be removed in a future release. + */ + public final int getCodePoint() { return c; } + /** + * @return the trie value, + * or the trie error value if the code point is an unpaired surrogate + * @draft ICU 63 + * @provisional This API might change or be removed in a future release. + */ + public final int getValue() { return value; } + } + + /** + * Returns the value for a code point as stored in the trie, with range checking. + * Returns the trie error value if c is not in the range 0..U+10FFFF. + * + * @param c the code point + * @return the trie value, + * or the trie error value if the code point is not in the range 0..U+10FFFF + * @draft ICU 63 + * @provisional This API might change or be removed in a future release. + */ + public abstract int get(int c); + + /** + * Sets the range object to a range of code points beginning with the start parameter. + * The range end is the the last code point such that + * all those from start to there have the same value. + * Returns false if start is not 0..U+10FFFF. + * Can be used to efficiently iterate over all same-value ranges in a trie. + * + *

If the {@link ValueFilter} parameter is not null, then + * the value to be delivered is passed through that filter, and the return value is the end + * of the range where all values are modified to the same actual value. + * The value is unchanged if that parameter is null. + * + *

Example: + *

+     * int start = 0;
+     * CodePointMap.Range range = new CodePointMap.Range();
+     * while (trie.getRange(start, null, range)) {
+     *     int end = range.getEnd();
+     *     int value = range.getValue();
+     *     // Work with the range start..end and its value.
+     *     start = end + 1;
+     * }
+     * 
+ * + * @param start range start + * @param filter an object that may modify the trie data value, + * or null if the values from the trie are to be used unmodified + * @param range the range object that will be set to the code point range and value + * @return true if start is 0..U+10FFFF; otherwise no new range is fetched + * @draft ICU 63 + * @provisional This API might change or be removed in a future release. + */ + public abstract boolean getRange(int start, ValueFilter filter, Range range); + + /** + * Sets the range object to a range of code points beginning with the start parameter. + * The range end is the the last code point such that + * all those from start to there have the same value. + * Returns false if start is not 0..U+10FFFF. + * + *

Same as the simpler {@link #getRange(int, ValueFilter, Range)} but optionally + * modifies the range if it overlaps with surrogate code points. + * + * @param start range start + * @param option defines whether surrogates are treated normally, + * or as having the surrogateValue; usually {@value RangeOption#NORMAL} + * @param surrogateValue value for surrogates; ignored if option=={@value RangeOption#NORMAL} + * @param filter an object that may modify the trie data value, + * or null if the values from the trie are to be used unmodified + * @param range the range object that will be set to the code point range and value + * @return true if start is 0..U+10FFFF; otherwise no new range is fetched + * @draft ICU 63 + * @provisional This API might change or be removed in a future release. + */ + public boolean getRange(int start, RangeOption option, int surrogateValue, + ValueFilter filter, Range range) { + assert option != null; + if (!getRange(start, filter, range)) { + return false; + } + if (option == RangeOption.NORMAL) { + return true; + } + int surrEnd = option == RangeOption.FIXED_ALL_SURROGATES ? 0xdfff : 0xdbff; + int end = range.end; + if (end < 0xd7ff || start > surrEnd) { + return true; + } + // The range overlaps with surrogates, or ends just before the first one. + if (range.value == surrogateValue) { + if (end >= surrEnd) { + // Surrogates followed by a non-surrValue range, + // or surrogates are part of a larger surrValue range. + return true; + } + } else { + if (start <= 0xd7ff) { + range.end = 0xd7ff; // Non-surrValue range ends before surrValue surrogates. + return true; + } + // Start is a surrogate with a non-surrValue code *unit* value. + // Return a surrValue code *point* range. + range.value = surrogateValue; + if (end > surrEnd) { + range.end = surrEnd; // Surrogate range ends before non-surrValue rest of range. + return true; + } + } + // See if the surrValue surrogate range can be merged with + // an immediately following range. + if (getRange(surrEnd + 1, filter, range) && range.value == surrogateValue) { + range.start = start; + return true; + } + range.start = start; + range.end = surrEnd; + range.value = surrogateValue; + return true; + } + + /** + * Convenience iterator over same-trie-value code point ranges. + * Same as looping over all ranges with {@link #getRange(int, ValueFilter, Range)} + * without filtering. + * Adjacent ranges have different trie values. + * + *

The iterator always returns the same Range object. + * + * @return a Range iterator + * @draft ICU 63 + * @provisional This API might change or be removed in a future release. + */ + @Override + public Iterator iterator() { + return new RangeIterator(); + } + + /** + * Returns an iterator (not a java.util.Iterator) over code points of a string + * for fetching trie values. + * + * @param s string to iterate over + * @param sIndex string index where the iteration will start + * @return the iterator + * @draft ICU 63 + * @provisional This API might change or be removed in a future release. + */ + public StringIterator stringIterator(CharSequence s, int sIndex) { + return new StringIterator(s, sIndex); + } +} diff --git a/icu4j/main/classes/core/src/com/ibm/icu/util/CodePointTrie.java b/icu4j/main/classes/core/src/com/ibm/icu/util/CodePointTrie.java new file mode 100644 index 00000000000..71de6fc76d0 --- /dev/null +++ b/icu4j/main/classes/core/src/com/ibm/icu/util/CodePointTrie.java @@ -0,0 +1,1271 @@ +// © 2018 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html#License + +// created: 2018may04 Markus W. Scherer + +package com.ibm.icu.util; + +import java.io.DataOutputStream; +import java.io.IOException; +import java.io.OutputStream; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; + +import com.ibm.icu.impl.ICUBinary; +import com.ibm.icu.impl.Normalizer2Impl.UTF16Plus; + +/** + * Immutable Unicode code point trie. + * Fast, reasonably compact, map from Unicode code points (U+0000..U+10FFFF) to integer values. + * For details see http://site.icu-project.org/design/struct/utrie + * + *

This class is not intended for public subclassing. + * + * @see MutableCodePointTrie + * @draft ICU 63 + * @provisional This API might change or be removed in a future release. + */ +public abstract class CodePointTrie extends CodePointMap { + /** + * Selectors for the type of a CodePointTrie. + * Different trade-offs for size vs. speed. + * + *

Use null for {@link #fromBinary} to accept any type; + * {@link #getType} will return the actual type. + * + * @see MutableCodePointTrie#buildImmutable(Type, ValueWidth) + * @see #fromBinary + * @see #getType + * @draft ICU 63 + * @provisional This API might change or be removed in a future release. + */ + public enum Type { + /** + * Fast/simple/larger BMP data structure. + * The {@link Fast} subclasses have additional functions for lookup for BMP and supplementary code points. + * + * @see Fast + * @draft ICU 63 + * @provisional This API might change or be removed in a future release. + */ + FAST, + /** + * Small/slower BMP data structure. + * + * @see Small + * @draft ICU 63 + * @provisional This API might change or be removed in a future release. + */ + SMALL + } + + /** + * Selectors for the number of bits in a CodePointTrie data value. + * + *

Use null for {@link #fromBinary} to accept any data value width; + * {@link #getValueWidth} will return the actual data value width. + * + * @draft ICU 63 + * @provisional This API might change or be removed in a future release. + */ + public enum ValueWidth { + /** + * 16 bits per CodePointTrie data value. + * + * @draft ICU 63 + * @provisional This API might change or be removed in a future release. + */ + BITS_16, + /** + * 32 bits per CodePointTrie data value. + * + * @draft ICU 63 + * @provisional This API might change or be removed in a future release. + */ + BITS_32, + /** + * 8 bits per CodePointTrie data value. + * + * @draft ICU 63 + * @provisional This API might change or be removed in a future release. + */ + BITS_8 + } + + private CodePointTrie(char[] index, Data data, int highStart, + int index3NullOffset, int dataNullOffset) { + this.ascii = new int[ASCII_LIMIT]; + this.index = index; + this.data = data; + this.dataLength = data.getDataLength(); + this.highStart = highStart; + this.index3NullOffset = index3NullOffset; + this.dataNullOffset = dataNullOffset; + + for (int c = 0; c < ASCII_LIMIT; ++c) { + ascii[c] = data.getFromIndex(c); + } + + int nullValueOffset = dataNullOffset; + if (nullValueOffset >= dataLength) { + nullValueOffset = dataLength - HIGH_VALUE_NEG_DATA_OFFSET; + } + nullValue = data.getFromIndex(nullValueOffset); + } + + /** + * Creates a trie from its binary form, + * stored in the ByteBuffer starting at the current position. + * Advances the buffer position to just after the trie data. + * Inverse of {@link #toBinary(OutputStream)}. + * + *

The data is copied from the buffer; + * later modification of the buffer will not affect the trie. + * + * @param type selects the trie type; this method throws an exception + * if the type does not match the binary data; + * use null to accept any type + * @param valueWidth selects the number of bits in a data value; this method throws an exception + * if the valueWidth does not match the binary data; + * use null to accept any data value width + * @param bytes a buffer containing the binary data of a CodePointTrie + * @return the trie + * @see MutableCodePointTrie#MutableCodePointTrie(int, int) + * @see MutableCodePointTrie#buildImmutable(Type, ValueWidth) + * @see #toBinary(OutputStream) + * @draft ICU 63 + * @provisional This API might change or be removed in a future release. + */ + public static CodePointTrie fromBinary(Type type, ValueWidth valueWidth, ByteBuffer bytes) { + ByteOrder outerByteOrder = bytes.order(); + try { + // Enough data for a trie header? + if (bytes.remaining() < 16 /* sizeof(UCPTrieHeader) */) { + throw new ICUUncheckedIOException("Buffer too short for a CodePointTrie header"); + } + + // struct UCPTrieHeader + /** "Tri3" in big-endian US-ASCII (0x54726933) */ + int signature = bytes.getInt(); + + // Check the signature. + switch (signature) { + case 0x54726933: + // The buffer is already set to the trie data byte order. + break; + case 0x33697254: + // Temporarily reverse the byte order. + boolean isBigEndian = outerByteOrder == ByteOrder.BIG_ENDIAN; + bytes.order(isBigEndian ? ByteOrder.LITTLE_ENDIAN : ByteOrder.BIG_ENDIAN); + signature = 0x54726933; + break; + default: + throw new ICUUncheckedIOException("Buffer does not contain a serialized CodePointTrie"); + } + + // struct UCPTrieHeader continued + /** + * Options bit field: + * Bits 15..12: Data length bits 19..16. + * Bits 11..8: Data null block offset bits 19..16. + * Bits 7..6: UCPTrieType + * Bits 5..3: Reserved (0). + * Bits 2..0: UCPTrieValueWidth + */ + int options = bytes.getChar(); + + /** Total length of the index tables. */ + int indexLength = bytes.getChar(); + + /** Data length bits 15..0. */ + int dataLength = bytes.getChar(); + + /** Index-3 null block offset, 0x7fff or 0xffff if none. */ + int index3NullOffset = bytes.getChar(); + + /** Data null block offset bits 15..0, 0xfffff if none. */ + int dataNullOffset = bytes.getChar(); + + /** + * First code point of the single-value range ending with U+10ffff, + * rounded up and then shifted right by SHIFT_2. + */ + int shiftedHighStart = bytes.getChar(); + // struct UCPTrieHeader end + + int typeInt = (options >> 6) & 3; + Type actualType; + switch (typeInt) { + case 0: actualType = Type.FAST; break; + case 1: actualType = Type.SMALL; break; + default: + throw new ICUUncheckedIOException("CodePointTrie data header has an unsupported type"); + } + + int valueWidthInt = options & OPTIONS_VALUE_BITS_MASK; + ValueWidth actualValueWidth; + switch (valueWidthInt) { + case 0: actualValueWidth = ValueWidth.BITS_16; break; + case 1: actualValueWidth = ValueWidth.BITS_32; break; + case 2: actualValueWidth = ValueWidth.BITS_8; break; + default: + throw new ICUUncheckedIOException("CodePointTrie data header has an unsupported value width"); + } + + if ((options & OPTIONS_RESERVED_MASK) != 0) { + throw new ICUUncheckedIOException("CodePointTrie data header has unsupported options"); + } + + if (type == null) { + type = actualType; + } + if (valueWidth == null) { + valueWidth = actualValueWidth; + } + if (type != actualType || valueWidth != actualValueWidth) { + throw new ICUUncheckedIOException("CodePointTrie data header has a different type or value width than required"); + } + + // Get the length values and offsets. + dataLength |= ((options & OPTIONS_DATA_LENGTH_MASK) << 4); + dataNullOffset |= ((options & OPTIONS_DATA_NULL_OFFSET_MASK) << 8); + + int highStart = shiftedHighStart << SHIFT_2; + + // Calculate the actual length, minus the header. + int actualLength = indexLength * 2; + if (valueWidth == ValueWidth.BITS_16) { + actualLength += dataLength * 2; + } else if (valueWidth == ValueWidth.BITS_32) { + actualLength += dataLength * 4; + } else { + actualLength += dataLength; + } + if (bytes.remaining() < actualLength) { + throw new ICUUncheckedIOException("Buffer too short for the CodePointTrie data"); + } + + char[] index = ICUBinary.getChars(bytes, indexLength, 0); + switch (valueWidth) { + case BITS_16: { + char[] data16 = ICUBinary.getChars(bytes, dataLength, 0); + return type == Type.FAST ? + new Fast16(index, data16, highStart, index3NullOffset, dataNullOffset) : + new Small16(index, data16, highStart, index3NullOffset, dataNullOffset); + } + case BITS_32: { + int[] data32 = ICUBinary.getInts(bytes, dataLength, 0); + return type == Type.FAST ? + new Fast32(index, data32, highStart, index3NullOffset, dataNullOffset) : + new Small32(index, data32, highStart, index3NullOffset, dataNullOffset); + } + case BITS_8: { + byte[] data8 = ICUBinary.getBytes(bytes, dataLength, 0); + return type == Type.FAST ? + new Fast8(index, data8, highStart, index3NullOffset, dataNullOffset) : + new Small8(index, data8, highStart, index3NullOffset, dataNullOffset); + } + default: + throw new AssertionError("should be unreachable"); + } + } finally { + bytes.order(outerByteOrder); + } + } + + /** + * Returns the trie type. + * + * @return the trie type + * @draft ICU 63 + * @provisional This API might change or be removed in a future release. + */ + public abstract Type getType(); + /** + * Returns the number of bits in a trie data value. + * + * @return the number of bits in a trie data value + * @draft ICU 63 + * @provisional This API might change or be removed in a future release. + */ + public final ValueWidth getValueWidth() { return data.getValueWidth(); } + + /** + * {@inheritDoc} + * @draft ICU 63 + * @provisional This API might change or be removed in a future release. + */ + @Override + public int get(int c) { + return data.getFromIndex(cpIndex(c)); + } + + /** + * Returns a trie value for an ASCII code point, without range checking. + * + * @param c the input code point; must be U+0000..U+007F + * @return The ASCII code point's trie value. + * @draft ICU 63 + * @provisional This API might change or be removed in a future release. + */ + public final int asciiGet(int c) { + return ascii[c]; + } + + private static final int MAX_UNICODE = 0x10ffff; + + private static final int ASCII_LIMIT = 0x80; + + private static final int maybeFilterValue(int value, int trieNullValue, int nullValue, + ValueFilter filter) { + if (value == trieNullValue) { + value = nullValue; + } else if (filter != null) { + value = filter.apply(value); + } + return value; + } + + /** + * {@inheritDoc} + * @draft ICU 63 + * @provisional This API might change or be removed in a future release. + */ + @Override + public final boolean getRange(int start, ValueFilter filter, Range range) { + if (start < 0 || MAX_UNICODE < start) { + return false; + } + if (start >= highStart) { + int di = dataLength - HIGH_VALUE_NEG_DATA_OFFSET; + int value = data.getFromIndex(di); + if (filter != null) { value = filter.apply(value); } + range.set(start, MAX_UNICODE, value); + return true; + } + + int nullValue = this.nullValue; + if (filter != null) { nullValue = filter.apply(nullValue); } + Type type = getType(); + + int prevI3Block = -1; + int prevBlock = -1; + int c = start; + int value = 0; // Initialize to make compiler happy. Real value when haveValue is true. + boolean haveValue = false; + do { + int i3Block; + int i3; + int i3BlockLength; + int dataBlockLength; + if (c <= 0xffff && (type == Type.FAST || c <= SMALL_MAX)) { + i3Block = 0; + i3 = c >> FAST_SHIFT; + i3BlockLength = type == Type.FAST ? BMP_INDEX_LENGTH : SMALL_INDEX_LENGTH; + dataBlockLength = FAST_DATA_BLOCK_LENGTH; + } else { + // Use the multi-stage index. + int i1 = c >> SHIFT_1; + if (type == Type.FAST) { + assert(0xffff < c && c < highStart); + i1 += BMP_INDEX_LENGTH - OMITTED_BMP_INDEX_1_LENGTH; + } else { + assert(c < highStart && highStart > SMALL_LIMIT); + i1 += SMALL_INDEX_LENGTH; + } + i3Block = index[index[i1] + ((c >> SHIFT_2) & INDEX_2_MASK)]; + if (i3Block == prevI3Block && (c - start) >= CP_PER_INDEX_2_ENTRY) { + // The index-3 block is the same as the previous one, and filled with value. + assert((c & (CP_PER_INDEX_2_ENTRY - 1)) == 0); + c += CP_PER_INDEX_2_ENTRY; + continue; + } + prevI3Block = i3Block; + if (i3Block == index3NullOffset) { + // This is the index-3 null block. + if (haveValue) { + if (nullValue != value) { + range.set(start, c - 1, value); + return true; + } + } else { + value = nullValue; + haveValue = true; + } + prevBlock = dataNullOffset; + c = (c + CP_PER_INDEX_2_ENTRY) & ~(CP_PER_INDEX_2_ENTRY - 1); + continue; + } + i3 = (c >> SHIFT_3) & INDEX_3_MASK; + i3BlockLength = INDEX_3_BLOCK_LENGTH; + dataBlockLength = SMALL_DATA_BLOCK_LENGTH; + } + // Enumerate data blocks for one index-3 block. + do { + int block; + if ((i3Block & 0x8000) == 0) { + block = index[i3Block + i3]; + } else { + // 18-bit indexes stored in groups of 9 entries per 8 indexes. + int group = (i3Block & 0x7fff) + (i3 & ~7) + (i3 >> 3); + int gi = i3 & 7; + block = (index[group++] << (2 + (2 * gi))) & 0x30000; + block |= index[group + gi]; + } + if (block == prevBlock && (c - start) >= dataBlockLength) { + // The block is the same as the previous one, and filled with value. + assert((c & (dataBlockLength - 1)) == 0); + c += dataBlockLength; + } else { + int dataMask = dataBlockLength - 1; + prevBlock = block; + if (block == dataNullOffset) { + // This is the data null block. + if (haveValue) { + if (nullValue != value) { + range.set(start, c - 1, value); + return true; + } + } else { + value = nullValue; + haveValue = true; + } + c = (c + dataBlockLength) & ~dataMask; + } else { + int di = block + (c & dataMask); + int value2 = data.getFromIndex(di); + value2 = maybeFilterValue(value2, this.nullValue, nullValue, filter); + if (haveValue) { + if (value2 != value) { + range.set(start, c - 1, value); + return true; + } + } else { + value = value2; + haveValue = true; + } + while ((++c & dataMask) != 0) { + if (maybeFilterValue(data.getFromIndex(++di), + this.nullValue, nullValue, + filter) != value) { + range.set(start, c - 1, value); + return true; + } + } + } + } + } while (++i3 < i3BlockLength); + } while (c < highStart); + assert(haveValue); + int di = dataLength - HIGH_VALUE_NEG_DATA_OFFSET; + int highValue = data.getFromIndex(di); + if (maybeFilterValue(highValue, this.nullValue, nullValue, filter) != value) { + --c; + } else { + c = MAX_UNICODE; + } + range.set(start, c, value); + return true; + } + + /** + * Writes a representation of the trie to the output stream. + * Inverse of {@link #fromBinary}. + * + * @param os the output stream + * @return the number of bytes written + * @draft ICU 63 + * @provisional This API might change or be removed in a future release. + */ + public final int toBinary(OutputStream os) { + try { + DataOutputStream dos = new DataOutputStream(os); + + // Write the UCPTrieHeader + dos.writeInt(0x54726933); // signature="Tri3" + dos.writeChar( // options + ((dataLength & 0xf0000) >> 4) | + ((dataNullOffset & 0xf0000) >> 8) | + (getType().ordinal() << 6) | + getValueWidth().ordinal()); + dos.writeChar(index.length); + dos.writeChar(dataLength); + dos.writeChar(index3NullOffset); + dos.writeChar(dataNullOffset); + dos.writeChar(highStart >> SHIFT_2); // shiftedHighStart + int length = 16; // sizeof(UCPTrieHeader) + + for (char i : index) { dos.writeChar(i); } + length += index.length * 2; + length += data.write(dos); + return length; + } catch (IOException e) { + throw new ICUUncheckedIOException(e); + } + } + + /** @internal */ + static final int FAST_SHIFT = 6; + + /** Number of entries in a data block for code points below the fast limit. 64=0x40 @internal */ + static final int FAST_DATA_BLOCK_LENGTH = 1 << FAST_SHIFT; + + /** Mask for getting the lower bits for the in-fast-data-block offset. @internal */ + private static final int FAST_DATA_MASK = FAST_DATA_BLOCK_LENGTH - 1; + + /** @internal */ + private static final int SMALL_MAX = 0xfff; + + /** + * Offset from dataLength (to be subtracted) for fetching the + * value returned for out-of-range code points and ill-formed UTF-8/16. + * @internal + */ + private static final int ERROR_VALUE_NEG_DATA_OFFSET = 1; + /** + * Offset from dataLength (to be subtracted) for fetching the + * value returned for code points highStart..U+10FFFF. + * @internal + */ + private static final int HIGH_VALUE_NEG_DATA_OFFSET = 2; + + // ucptrie_impl.h + + /** The length of the BMP index table. 1024=0x400 */ + private static final int BMP_INDEX_LENGTH = 0x10000 >> FAST_SHIFT; + + static final int SMALL_LIMIT = 0x1000; + private static final int SMALL_INDEX_LENGTH = SMALL_LIMIT >> FAST_SHIFT; + + /** Shift size for getting the index-3 table offset. */ + static final int SHIFT_3 = 4; + + /** Shift size for getting the index-2 table offset. */ + private static final int SHIFT_2 = 5 + SHIFT_3; + + /** Shift size for getting the index-1 table offset. */ + private static final int SHIFT_1 = 5 + SHIFT_2; + + /** + * Difference between two shift sizes, + * for getting an index-2 offset from an index-3 offset. 5=9-4 + */ + static final int SHIFT_2_3 = SHIFT_2 - SHIFT_3; + + /** + * Difference between two shift sizes, + * for getting an index-1 offset from an index-2 offset. 5=14-9 + */ + static final int SHIFT_1_2 = SHIFT_1 - SHIFT_2; + + /** + * Number of index-1 entries for the BMP. (4) + * This part of the index-1 table is omitted from the serialized form. + */ + private static final int OMITTED_BMP_INDEX_1_LENGTH = 0x10000 >> SHIFT_1; + + /** Number of entries in an index-2 block. 32=0x20 */ + static final int INDEX_2_BLOCK_LENGTH = 1 << SHIFT_1_2; + + /** Mask for getting the lower bits for the in-index-2-block offset. */ + static final int INDEX_2_MASK = INDEX_2_BLOCK_LENGTH - 1; + + /** Number of code points per index-2 table entry. 512=0x200 */ + static final int CP_PER_INDEX_2_ENTRY = 1 << SHIFT_2; + + /** Number of entries in an index-3 block. 32=0x20 */ + static final int INDEX_3_BLOCK_LENGTH = 1 << SHIFT_2_3; + + /** Mask for getting the lower bits for the in-index-3-block offset. */ + private static final int INDEX_3_MASK = INDEX_3_BLOCK_LENGTH - 1; + + /** Number of entries in a small data block. 16=0x10 */ + static final int SMALL_DATA_BLOCK_LENGTH = 1 << SHIFT_3; + + /** Mask for getting the lower bits for the in-small-data-block offset. */ + static final int SMALL_DATA_MASK = SMALL_DATA_BLOCK_LENGTH - 1; + + // ucptrie_impl.h: Constants for use with UCPTrieHeader.options. + private static final int OPTIONS_DATA_LENGTH_MASK = 0xf000; + private static final int OPTIONS_DATA_NULL_OFFSET_MASK = 0xf00; + private static final int OPTIONS_RESERVED_MASK = 0x38; + private static final int OPTIONS_VALUE_BITS_MASK = 7; + /** + * Value for index3NullOffset which indicates that there is no index-3 null block. + * Bit 15 is unused for this value because this bit is used if the index-3 contains + * 18-bit indexes. + */ + static final int NO_INDEX3_NULL_OFFSET = 0x7fff; + static final int NO_DATA_NULL_OFFSET = 0xfffff; + + private static abstract class Data { + abstract ValueWidth getValueWidth(); + abstract int getDataLength(); + abstract int getFromIndex(int index); + abstract int write(DataOutputStream dos) throws IOException; + } + + private static final class Data16 extends Data { + char[] array; + Data16(char[] a) { array = a; } + @Override ValueWidth getValueWidth() { return ValueWidth.BITS_16; } + @Override int getDataLength() { return array.length; } + @Override int getFromIndex(int index) { return array[index]; } + @Override int write(DataOutputStream dos) throws IOException { + for (char v : array) { dos.writeChar(v); } + return array.length * 2; + } + } + + private static final class Data32 extends Data { + int[] array; + Data32(int[] a) { array = a; } + @Override ValueWidth getValueWidth() { return ValueWidth.BITS_32; } + @Override int getDataLength() { return array.length; } + @Override int getFromIndex(int index) { return array[index]; } + @Override int write(DataOutputStream dos) throws IOException { + for (int v : array) { dos.writeInt(v); } + return array.length * 4; + } + } + + private static final class Data8 extends Data { + byte[] array; + Data8(byte[] a) { array = a; } + @Override ValueWidth getValueWidth() { return ValueWidth.BITS_8; } + @Override int getDataLength() { return array.length; } + @Override int getFromIndex(int index) { return array[index] & 0xff; } + @Override int write(DataOutputStream dos) throws IOException { + for (byte v : array) { dos.writeByte(v); } + return array.length; + } + } + + /** @internal */ + private final int[] ascii; + + /** @internal */ + private final char[] index; + + /** + * @internal + * @deprecated This API is ICU internal only. + */ + @Deprecated + protected final Data data; + /** + * @internal + * @deprecated This API is ICU internal only. + */ + @Deprecated + protected final int dataLength; + /** + * Start of the last range which ends at U+10FFFF. + * @internal + * @deprecated This API is ICU internal only. + */ + @Deprecated + protected final int highStart; + + /** + * Internal index-3 null block offset. + * Set to an impossibly high value (e.g., 0xffff) if there is no dedicated index-3 null block. + * @internal + */ + private final int index3NullOffset; + /** + * Internal data null block offset, not shifted. + * Set to an impossibly high value (e.g., 0xfffff) if there is no dedicated data null block. + * @internal + */ + private final int dataNullOffset; + /** @internal */ + private final int nullValue; + + /** + * @internal + * @deprecated This API is ICU internal only. + */ + @Deprecated + protected final int fastIndex(int c) { + return index[c >> FAST_SHIFT] + (c & FAST_DATA_MASK); + } + + /** + * @internal + * @deprecated This API is ICU internal only. + */ + @Deprecated + protected final int smallIndex(Type type, int c) { + // Split into two methods to make this part inline-friendly. + // In C, this part is a macro. + if (c >= highStart) { + return dataLength - HIGH_VALUE_NEG_DATA_OFFSET; + } + return internalSmallIndex(type, c); + } + + private final int internalSmallIndex(Type type, int c) { + int i1 = c >> SHIFT_1; + if (type == Type.FAST) { + assert(0xffff < c && c < highStart); + i1 += BMP_INDEX_LENGTH - OMITTED_BMP_INDEX_1_LENGTH; + } else { + assert(0 <= c && c < highStart && highStart > SMALL_LIMIT); + i1 += SMALL_INDEX_LENGTH; + } + int i3Block = index[index[i1] + ((c >> SHIFT_2) & INDEX_2_MASK)]; + int i3 = (c >> SHIFT_3) & INDEX_3_MASK; + int dataBlock; + if ((i3Block & 0x8000) == 0) { + // 16-bit indexes + dataBlock = index[i3Block + i3]; + } else { + // 18-bit indexes stored in groups of 9 entries per 8 indexes. + i3Block = (i3Block & 0x7fff) + (i3 & ~7) + (i3 >> 3); + i3 &= 7; + dataBlock = (index[i3Block++] << (2 + (2 * i3))) & 0x30000; + dataBlock |= index[i3Block + i3]; + } + return dataBlock + (c & SMALL_DATA_MASK); + } + + /** + * @internal + * @deprecated This API is ICU internal only. + */ + @Deprecated + protected abstract int cpIndex(int c); + + /** + * A CodePointTrie with {@value Type#FAST}. + * + * @draft ICU 63 + * @provisional This API might change or be removed in a future release. + */ + public static abstract class Fast extends CodePointTrie { + private Fast(char[] index, Data data, int highStart, + int index3NullOffset, int dataNullOffset) { + super(index, data, highStart, index3NullOffset, dataNullOffset); + } + + /** + * Creates a trie from its binary form. + * Same as {@link CodePointTrie#fromBinary(Type, ValueWidth, ByteBuffer)} + * with {@value Type#FAST}. + * + * @param valueWidth selects the number of bits in a data value; this method throws an exception + * if the valueWidth does not match the binary data; + * use null to accept any data value width + * @param bytes a buffer containing the binary data of a CodePointTrie + * @return the trie + * @draft ICU 63 + * @provisional This API might change or be removed in a future release. + */ + public static Fast fromBinary(ValueWidth valueWidth, ByteBuffer bytes) { + return (Fast) CodePointTrie.fromBinary(Type.FAST, valueWidth, bytes); + } + + /** + * @return {@value Type#FAST} + * @draft ICU 63 + * @provisional This API might change or be removed in a future release. + */ + @Override + public final Type getType() { return Type.FAST; } + + /** + * Returns a trie value for a BMP code point (U+0000..U+FFFF), without range checking. + * Can be used to look up a value for a UTF-16 code unit if other parts of + * the string processing check for surrogates. + * + * @param c the input code point, must be U+0000..U+FFFF + * @return The BMP code point's trie value. + * @draft ICU 63 + * @provisional This API might change or be removed in a future release. + */ + public abstract int bmpGet(int c); + + /** + * Returns a trie value for a supplementary code point (U+10000..U+10FFFF), + * without range checking. + * + * @param c the input code point, must be U+10000..U+10FFFF + * @return The supplementary code point's trie value. + * @draft ICU 63 + * @provisional This API might change or be removed in a future release. + */ + public abstract int suppGet(int c); + + /** + * @internal + * @deprecated This API is ICU internal only. + */ + @Deprecated + @Override + protected final int cpIndex(int c) { + if (c >= 0) { + if (c <= 0xffff) { + return fastIndex(c); + } else if (c <= 0x10ffff) { + return smallIndex(Type.FAST, c); + } + } + return dataLength - ERROR_VALUE_NEG_DATA_OFFSET; + } + + /** + * {@inheritDoc} + * @draft ICU 63 + * @provisional This API might change or be removed in a future release. + */ + @Override + public final StringIterator stringIterator(CharSequence s, int sIndex) { + return new FastStringIterator(s, sIndex); + } + + private final class FastStringIterator extends StringIterator { + private FastStringIterator(CharSequence s, int sIndex) { + super(s, sIndex); + } + + @Override + public boolean next() { + if (sIndex >= s.length()) { + return false; + } + char lead = s.charAt(sIndex++); + c = lead; + int dataIndex; + if (!Character.isSurrogate(lead)) { + dataIndex = fastIndex(c); + } else { + char trail; + if (UTF16Plus.isSurrogateLead(lead) && sIndex < s.length() && + Character.isLowSurrogate(trail = s.charAt(sIndex))) { + ++sIndex; + c = Character.toCodePoint(lead, trail); + dataIndex = smallIndex(Type.FAST, c); + } else { + dataIndex = dataLength - ERROR_VALUE_NEG_DATA_OFFSET; + } + } + value = data.getFromIndex(dataIndex); + return true; + } + + @Override + public boolean previous() { + if (sIndex <= 0) { + return false; + } + char trail = s.charAt(--sIndex); + c = trail; + int dataIndex; + if (!Character.isSurrogate(trail)) { + dataIndex = fastIndex(c); + } else { + char lead; + if (!UTF16Plus.isSurrogateLead(trail) && sIndex > 0 && + Character.isHighSurrogate(lead = s.charAt(sIndex - 1))) { + --sIndex; + c = Character.toCodePoint(lead, trail); + dataIndex = smallIndex(Type.FAST, c); + } else { + dataIndex = dataLength - ERROR_VALUE_NEG_DATA_OFFSET; + } + } + value = data.getFromIndex(dataIndex); + return true; + } + } + } + + /** + * A CodePointTrie with {@value Type#SMALL}. + * + * @draft ICU 63 + * @provisional This API might change or be removed in a future release. + */ + public static abstract class Small extends CodePointTrie { + private Small(char[] index, Data data, int highStart, + int index3NullOffset, int dataNullOffset) { + super(index, data, highStart, index3NullOffset, dataNullOffset); + } + + /** + * Creates a trie from its binary form. + * Same as {@link CodePointTrie#fromBinary(Type, ValueWidth, ByteBuffer)} + * with {@value Type#SMALL}. + * + * @param valueWidth selects the number of bits in a data value; this method throws an exception + * if the valueWidth does not match the binary data; + * use null to accept any data value width + * @param bytes a buffer containing the binary data of a CodePointTrie + * @return the trie + * @draft ICU 63 + * @provisional This API might change or be removed in a future release. + */ + public static Small fromBinary(ValueWidth valueWidth, ByteBuffer bytes) { + return (Small) CodePointTrie.fromBinary(Type.SMALL, valueWidth, bytes); + } + + /** + * @return {@value Type#SMALL} + * @draft ICU 63 + * @provisional This API might change or be removed in a future release. + */ + @Override + public final Type getType() { return Type.SMALL; } + + /** + * @internal + * @deprecated This API is ICU internal only. + */ + @Deprecated + @Override + protected final int cpIndex(int c) { + if (c >= 0) { + if (c <= SMALL_MAX) { + return fastIndex(c); + } else if (c <= 0x10ffff) { + return smallIndex(Type.SMALL, c); + } + } + return dataLength - ERROR_VALUE_NEG_DATA_OFFSET; + } + + /** + * {@inheritDoc} + * @draft ICU 63 + * @provisional This API might change or be removed in a future release. + */ + @Override + public final StringIterator stringIterator(CharSequence s, int sIndex) { + return new SmallStringIterator(s, sIndex); + } + + private final class SmallStringIterator extends StringIterator { + private SmallStringIterator(CharSequence s, int sIndex) { + super(s, sIndex); + } + + @Override + public boolean next() { + if (sIndex >= s.length()) { + return false; + } + char lead = s.charAt(sIndex++); + c = lead; + int dataIndex; + if (!Character.isSurrogate(lead)) { + dataIndex = cpIndex(c); + } else { + char trail; + if (UTF16Plus.isSurrogateLead(lead) && sIndex < s.length() && + Character.isLowSurrogate(trail = s.charAt(sIndex))) { + ++sIndex; + c = Character.toCodePoint(lead, trail); + dataIndex = smallIndex(Type.SMALL, c); + } else { + dataIndex = dataLength - ERROR_VALUE_NEG_DATA_OFFSET; + } + } + value = data.getFromIndex(dataIndex); + return true; + } + + @Override + public boolean previous() { + if (sIndex <= 0) { + return false; + } + char trail = s.charAt(--sIndex); + c = trail; + int dataIndex; + if (!Character.isSurrogate(trail)) { + dataIndex = cpIndex(c); + } else { + char lead; + if (!UTF16Plus.isSurrogateLead(trail) && sIndex > 0 && + Character.isHighSurrogate(lead = s.charAt(sIndex - 1))) { + --sIndex; + c = Character.toCodePoint(lead, trail); + dataIndex = smallIndex(Type.SMALL, c); + } else { + dataIndex = dataLength - ERROR_VALUE_NEG_DATA_OFFSET; + } + } + value = data.getFromIndex(dataIndex); + return true; + } + } + } + + /** + * A CodePointTrie with {@value Type#FAST} and {@value ValueWidth#BITS_16}. + * + * @draft ICU 63 + * @provisional This API might change or be removed in a future release. + */ + public static final class Fast16 extends Fast { + private final char[] dataArray; + + Fast16(char[] index, char[] data16, int highStart, + int index3NullOffset, int dataNullOffset) { + super(index, new Data16(data16), highStart, index3NullOffset, dataNullOffset); + this.dataArray = data16; + } + + /** + * Creates a trie from its binary form. + * Same as {@link CodePointTrie#fromBinary(Type, ValueWidth, ByteBuffer)} + * with {@value Type#FAST} and {@value ValueWidth#BITS_16}. + * + * @param bytes a buffer containing the binary data of a CodePointTrie + * @return the trie + * @draft ICU 63 + * @provisional This API might change or be removed in a future release. + */ + public static Fast16 fromBinary(ByteBuffer bytes) { + return (Fast16) CodePointTrie.fromBinary(Type.FAST, ValueWidth.BITS_16, bytes); + } + + /** + * {@inheritDoc} + * @draft ICU 63 + * @provisional This API might change or be removed in a future release. + */ + @Override + public final int get(int c) { + return dataArray[cpIndex(c)]; + } + + /** + * {@inheritDoc} + * @draft ICU 63 + * @provisional This API might change or be removed in a future release. + */ + @Override + public final int bmpGet(int c) { + assert 0 <= c && c <= 0xffff; + return dataArray[fastIndex(c)]; + } + + /** + * {@inheritDoc} + * @draft ICU 63 + * @provisional This API might change or be removed in a future release. + */ + @Override + public final int suppGet(int c) { + assert 0x10000 <= c && c <= 0x10ffff; + return dataArray[smallIndex(Type.FAST, c)]; + } + } + + /** + * A CodePointTrie with {@value Type#FAST} and {@value ValueWidth#BITS_32}. + * + * @draft ICU 63 + * @provisional This API might change or be removed in a future release. + */ + public static final class Fast32 extends Fast { + private final int[] dataArray; + + Fast32(char[] index, int[] data32, int highStart, + int index3NullOffset, int dataNullOffset) { + super(index, new Data32(data32), highStart, index3NullOffset, dataNullOffset); + this.dataArray = data32; + } + + /** + * Creates a trie from its binary form. + * Same as {@link CodePointTrie#fromBinary(Type, ValueWidth, ByteBuffer)} + * with {@value Type#FAST} and {@value ValueWidth#BITS_32}. + * + * @param bytes a buffer containing the binary data of a CodePointTrie + * @return the trie + * @draft ICU 63 + * @provisional This API might change or be removed in a future release. + */ + public static Fast32 fromBinary(ByteBuffer bytes) { + return (Fast32) CodePointTrie.fromBinary(Type.FAST, ValueWidth.BITS_32, bytes); + } + + /** + * {@inheritDoc} + * @draft ICU 63 + * @provisional This API might change or be removed in a future release. + */ + @Override + public final int get(int c) { + return dataArray[cpIndex(c)]; + } + + /** + * {@inheritDoc} + * @draft ICU 63 + * @provisional This API might change or be removed in a future release. + */ + @Override + public final int bmpGet(int c) { + assert 0 <= c && c <= 0xffff; + return dataArray[fastIndex(c)]; + } + + /** + * {@inheritDoc} + * @draft ICU 63 + * @provisional This API might change or be removed in a future release. + */ + @Override + public final int suppGet(int c) { + assert 0x10000 <= c && c <= 0x10ffff; + return dataArray[smallIndex(Type.FAST, c)]; + } + } + + /** + * A CodePointTrie with {@value Type#FAST} and {@value ValueWidth#BITS_8}. + * + * @draft ICU 63 + * @provisional This API might change or be removed in a future release. + */ + public static final class Fast8 extends Fast { + private final byte[] dataArray; + + Fast8(char[] index, byte[] data8, int highStart, + int index3NullOffset, int dataNullOffset) { + super(index, new Data8(data8), highStart, index3NullOffset, dataNullOffset); + this.dataArray = data8; + } + + /** + * Creates a trie from its binary form. + * Same as {@link CodePointTrie#fromBinary(Type, ValueWidth, ByteBuffer)} + * with {@value Type#FAST} and {@value ValueWidth#BITS_8}. + * + * @param bytes a buffer containing the binary data of a CodePointTrie + * @return the trie + * @draft ICU 63 + * @provisional This API might change or be removed in a future release. + */ + public static Fast8 fromBinary(ByteBuffer bytes) { + return (Fast8) CodePointTrie.fromBinary(Type.FAST, ValueWidth.BITS_8, bytes); + } + + /** + * {@inheritDoc} + * @draft ICU 63 + * @provisional This API might change or be removed in a future release. + */ + @Override + public final int get(int c) { + return dataArray[cpIndex(c)] & 0xff; + } + + /** + * {@inheritDoc} + * @draft ICU 63 + * @provisional This API might change or be removed in a future release. + */ + @Override + public final int bmpGet(int c) { + assert 0 <= c && c <= 0xffff; + return dataArray[fastIndex(c)] & 0xff; + } + + /** + * {@inheritDoc} + * @draft ICU 63 + * @provisional This API might change or be removed in a future release. + */ + @Override + public final int suppGet(int c) { + assert 0x10000 <= c && c <= 0x10ffff; + return dataArray[smallIndex(Type.FAST, c)] & 0xff; + } + } + + /** + * A CodePointTrie with {@value Type#SMALL} and {@value ValueWidth#BITS_16}. + * + * @draft ICU 63 + * @provisional This API might change or be removed in a future release. + */ + public static final class Small16 extends Small { + Small16(char[] index, char[] data16, int highStart, + int index3NullOffset, int dataNullOffset) { + super(index, new Data16(data16), highStart, index3NullOffset, dataNullOffset); + } + + /** + * Creates a trie from its binary form. + * Same as {@link CodePointTrie#fromBinary(Type, ValueWidth, ByteBuffer)} + * with {@value Type#SMALL} and {@value ValueWidth#BITS_16}. + * + * @param bytes a buffer containing the binary data of a CodePointTrie + * @return the trie + * @draft ICU 63 + * @provisional This API might change or be removed in a future release. + */ + public static Small16 fromBinary(ByteBuffer bytes) { + return (Small16) CodePointTrie.fromBinary(Type.SMALL, ValueWidth.BITS_16, bytes); + } + } + + /** + * A CodePointTrie with {@value Type#SMALL} and {@value ValueWidth#BITS_32}. + * + * @draft ICU 63 + * @provisional This API might change or be removed in a future release. + */ + public static final class Small32 extends Small { + Small32(char[] index, int[] data32, int highStart, + int index3NullOffset, int dataNullOffset) { + super(index, new Data32(data32), highStart, index3NullOffset, dataNullOffset); + } + + /** + * Creates a trie from its binary form. + * Same as {@link CodePointTrie#fromBinary(Type, ValueWidth, ByteBuffer)} + * with {@value Type#SMALL} and {@value ValueWidth#BITS_32}. + * + * @param bytes a buffer containing the binary data of a CodePointTrie + * @return the trie + * @draft ICU 63 + * @provisional This API might change or be removed in a future release. + */ + public static Small32 fromBinary(ByteBuffer bytes) { + return (Small32) CodePointTrie.fromBinary(Type.SMALL, ValueWidth.BITS_32, bytes); + } + } + + /** + * A CodePointTrie with {@value Type#SMALL} and {@value ValueWidth#BITS_8}. + * + * @draft ICU 63 + * @provisional This API might change or be removed in a future release. + */ + public static final class Small8 extends Small { + Small8(char[] index, byte[] data8, int highStart, + int index3NullOffset, int dataNullOffset) { + super(index, new Data8(data8), highStart, index3NullOffset, dataNullOffset); + } + + /** + * Creates a trie from its binary form. + * Same as {@link CodePointTrie#fromBinary(Type, ValueWidth, ByteBuffer)} + * with {@value Type#SMALL} and {@value ValueWidth#BITS_8}. + * + * @param bytes a buffer containing the binary data of a CodePointTrie + * @return the trie + * @draft ICU 63 + * @provisional This API might change or be removed in a future release. + */ + public static Small8 fromBinary(ByteBuffer bytes) { + return (Small8) CodePointTrie.fromBinary(Type.SMALL, ValueWidth.BITS_8, bytes); + } + } +} diff --git a/icu4j/main/classes/core/src/com/ibm/icu/util/MutableCodePointTrie.java b/icu4j/main/classes/core/src/com/ibm/icu/util/MutableCodePointTrie.java new file mode 100644 index 00000000000..c5497818182 --- /dev/null +++ b/icu4j/main/classes/core/src/com/ibm/icu/util/MutableCodePointTrie.java @@ -0,0 +1,1289 @@ +// © 2018 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html#License + +// created: 2018may04 Markus W. Scherer + +package com.ibm.icu.util; + +import java.util.Arrays; + +/** + * Mutable Unicode code point trie. + * Fast map from Unicode code points (U+0000..U+10FFFF) to 32-bit integer values. + * For details see http://site.icu-project.org/design/struct/utrie + * + *

Setting values (especially ranges) and lookup is fast. + * The mutable trie is only somewhat space-efficient. + * It builds a compacted, immutable {@link CodePointTrie}. + * + *

This trie can be modified while iterating over its contents. + * For example, it is possible to merge its values with those from another + * set of ranges (e.g., another @{link CodePointMap}): + * Iterate over those source ranges; for each of them iterate over this trie; + * add the source value into the value of each trie range. + * + * @draft ICU 63 + * @provisional This API might change or be removed in a future release. + */ +public final class MutableCodePointTrie extends CodePointMap implements Cloneable { + /** + * Constructs a mutable trie that initially maps each Unicode code point to the same value. + * It uses 32-bit data values until + * {@link #buildImmutable(com.ibm.icu.util.CodePointTrie.Type, com.ibm.icu.util.CodePointTrie.ValueWidth)} + * is called. + * buildImmutable() takes a valueWidth parameter which + * determines the number of bits in the data value in the resulting {@link CodePointTrie}. + * + * @param initialValue the initial value that is set for all code points + * @param errorValue the value for out-of-range code points and ill-formed UTF-8/16 + * @draft ICU 63 + * @provisional This API might change or be removed in a future release. + */ + public MutableCodePointTrie(int initialValue, int errorValue) { + index = new int[BMP_I_LIMIT]; + index3NullOffset = -1; + data = new int[INITIAL_DATA_LENGTH]; + dataNullOffset = -1; + origInitialValue = initialValue; + this.initialValue = initialValue; + this.errorValue = errorValue; + highValue = initialValue; + } + + /** + * Clones this mutable trie. + * + * @return the clone + * @draft ICU 63 + * @provisional This API might change or be removed in a future release. + */ + @Override + public MutableCodePointTrie clone() { + try { + MutableCodePointTrie builder = (MutableCodePointTrie) super.clone(); + int iCapacity = highStart <= BMP_LIMIT ? BMP_I_LIMIT : I_LIMIT; + builder.index = new int[iCapacity]; + builder.flags = new byte[UNICODE_LIMIT >> CodePointTrie.SHIFT_3]; + for (int i = 0, iLimit = highStart >> CodePointTrie.SHIFT_3; i < iLimit; ++i) { + builder.index[i] = index[i]; + builder.flags[i] = flags[i]; + } + builder.index3NullOffset = index3NullOffset; + builder.data = data.clone(); + builder.dataLength = dataLength; + builder.dataNullOffset = dataNullOffset; + builder.origInitialValue = origInitialValue; + builder.initialValue = initialValue; + builder.errorValue = errorValue; + builder.highStart = highStart; + builder.highValue = highValue; + assert index16 == null; + return builder; + } catch (CloneNotSupportedException ignored) { + // Unreachable: Cloning *is* supported. + return null; + } + } + + /** + * Creates a mutable trie with the same contents as the {@link CodePointMap}. + * + * @param map the source map or trie + * @return the mutable trie + * @draft ICU 63 + * @provisional This API might change or be removed in a future release. + */ + public static MutableCodePointTrie fromCodePointMap(CodePointMap map) { + // TODO: Consider special code branch for map instanceof CodePointTrie? + // Use the highValue as the initialValue to reduce the highStart. + int errorValue = map.get(-1); + int initialValue = map.get(MAX_UNICODE); + MutableCodePointTrie mutableTrie = new MutableCodePointTrie(initialValue, errorValue); + CodePointMap.Range range = new CodePointMap.Range(); + int start = 0; + while (map.getRange(start, null, range)) { + int end = range.getEnd(); + int value = range.getValue(); + if (value != initialValue) { + if (start == end) { + mutableTrie.set(start, value); + } else { + mutableTrie.setRange(start, end, value); + } + } + start = end + 1; + } + return mutableTrie; + } + + private void clear() { + index3NullOffset = dataNullOffset = -1; + dataLength = 0; + highValue = initialValue = origInitialValue; + highStart = 0; + index16 = null; + } + + /** + * {@inheritDoc} + * @draft ICU 63 + * @provisional This API might change or be removed in a future release. + */ + @Override + public int get(int c) { + if (c < 0 || MAX_UNICODE < c) { + return errorValue; + } + if (c >= highStart) { + return highValue; + } + int i = c >> CodePointTrie.SHIFT_3; + if (flags[i] == ALL_SAME) { + return index[i]; + } else { + return data[index[i] + (c & CodePointTrie.SMALL_DATA_MASK)]; + } + } + + private static final int maybeFilterValue(int value, int initialValue, int nullValue, + ValueFilter filter) { + if (value == initialValue) { + value = nullValue; + } else if (filter != null) { + value = filter.apply(value); + } + return value; + } + + /** + * {@inheritDoc} + * @draft ICU 63 + * @provisional This API might change or be removed in a future release. + */ + @Override + public boolean getRange(int start, CodePointTrie.ValueFilter filter, + CodePointTrie.Range range) { + if (start < 0 || MAX_UNICODE < start) { + return false; + } + if (start >= highStart) { + int value = highValue; + if (filter != null) { value = filter.apply(value); } + range.set(start, MAX_UNICODE, value); + return true; + } + int nullValue = initialValue; + if (filter != null) { nullValue = filter.apply(nullValue); } + int c = start; + int value = 0; // Initialize to make compiler happy. Real value when haveValue is true. + boolean haveValue = false; + int i = c >> CodePointTrie.SHIFT_3; + do { + if (flags[i] == ALL_SAME) { + int value2 = maybeFilterValue(index[i], initialValue, nullValue, filter); + if (haveValue) { + if (value2 != value) { + range.set(start, c - 1, value); + return true; + } + } else { + value = value2; + haveValue = true; + } + c = (c + CodePointTrie.SMALL_DATA_BLOCK_LENGTH) & ~CodePointTrie.SMALL_DATA_MASK; + } else /* MIXED */ { + int di = index[i] + (c & CodePointTrie.SMALL_DATA_MASK); + int value2 = maybeFilterValue(data[di], initialValue, nullValue, filter); + if (haveValue) { + if (value2 != value) { + range.set(start, c - 1, value); + return true; + } + } else { + value = value2; + haveValue = true; + } + while ((++c & CodePointTrie.SMALL_DATA_MASK) != 0) { + if (maybeFilterValue(data[++di], initialValue, nullValue, + filter) != value) { + range.set(start, c - 1, value); + return true; + } + } + } + ++i; + } while (c < highStart); + assert(haveValue); + if (maybeFilterValue(highValue, initialValue, nullValue, filter) != value) { + range.set(start, c - 1, value); + } else { + range.set(start, MAX_UNICODE, value); + } + return true; + } + + private void writeBlock(int block, int value) { + int limit = block + CodePointTrie.SMALL_DATA_BLOCK_LENGTH; + Arrays.fill(data, block, limit, value); + } + + /** + * Sets a value for a code point. + * + * @param c the code point + * @param value the value + * @draft ICU 63 + * @provisional This API might change or be removed in a future release. + */ + public void set(int c, int value) { + if (c < 0 || MAX_UNICODE < c) { + throw new IllegalArgumentException("invalid code point"); + } + + ensureHighStart(c); + int block = getDataBlock(c >> CodePointTrie.SHIFT_3); + data[block + (c & CodePointTrie.SMALL_DATA_MASK)] = value; + } + + private void fillBlock(int block, int start, int limit, int value) { + Arrays.fill(data, block + start, block + limit, value); + } + + /** + * Sets a value for each code point [start..end]. + * Faster and more space-efficient than setting the value for each code point separately. + * + * @param start the first code point to get the value + * @param end the last code point to get the value (inclusive) + * @param value the value + * @draft ICU 63 + * @provisional This API might change or be removed in a future release. + */ + public void setRange(int start, int end, int value) { + if (start < 0 || MAX_UNICODE < start || end < 0 || MAX_UNICODE < end || start > end) { + throw new IllegalArgumentException("invalid code point range"); + } + ensureHighStart(end); + + int limit = end + 1; + if ((start & CodePointTrie.SMALL_DATA_MASK) != 0) { + // Set partial block at [start..following block boundary[. + int block = getDataBlock(start >> CodePointTrie.SHIFT_3); + int nextStart = (start + CodePointTrie.SMALL_DATA_MASK) & ~CodePointTrie.SMALL_DATA_MASK; + if (nextStart <= limit) { + fillBlock(block, start & CodePointTrie.SMALL_DATA_MASK, + CodePointTrie.SMALL_DATA_BLOCK_LENGTH, value); + start = nextStart; + } else { + fillBlock(block, start & CodePointTrie.SMALL_DATA_MASK, + limit & CodePointTrie.SMALL_DATA_MASK, value); + return; + } + } + + // Number of positions in the last, partial block. + int rest = limit & CodePointTrie.SMALL_DATA_MASK; + + // Round down limit to a block boundary. + limit &= ~CodePointTrie.SMALL_DATA_MASK; + + // Iterate over all-value blocks. + while (start < limit) { + int i = start >> CodePointTrie.SHIFT_3; + if (flags[i] == ALL_SAME) { + index[i] = value; + } else /* MIXED */ { + fillBlock(index[i], 0, CodePointTrie.SMALL_DATA_BLOCK_LENGTH, value); + } + start += CodePointTrie.SMALL_DATA_BLOCK_LENGTH; + } + + if (rest > 0) { + // Set partial block at [last block boundary..limit[. + int block = getDataBlock(start >> CodePointTrie.SHIFT_3); + fillBlock(block, 0, rest, value); + } + } + + /** + * Compacts the data and builds an immutable {@link CodePointTrie} according to the parameters. + * After this, the mutable trie will be empty. + * + *

Not every possible set of mappings can be built into a CodePointTrie, + * because of limitations resulting from speed and space optimizations. + * Every Unicode assigned character can be mapped to a unique value. + * Typical data yields data structures far smaller than the limitations. + * + *

It is possible to construct extremely unusual mappings that exceed the + * data structure limits. + * In such a case this function will throw an exception. + * + * @param type selects the trie type + * @param valueWidth selects the number of bits in a trie data value; if smaller than 32 bits, + * then the values stored in the trie will be truncated first + * + * @see #fromCodePointMap(CodePointMap) + * @draft ICU 63 + * @provisional This API might change or be removed in a future release. + */ + public CodePointTrie buildImmutable(CodePointTrie.Type type, CodePointTrie.ValueWidth valueWidth) { + if (type == null || valueWidth == null) { + throw new IllegalArgumentException("The type and valueWidth must be specified."); + } + + try { + return build(type, valueWidth); + } finally { + clear(); + } + } + + private static final int MAX_UNICODE = 0x10ffff; + + private static final int UNICODE_LIMIT = 0x110000; + private static final int BMP_LIMIT = 0x10000; + private static final int ASCII_LIMIT = 0x80; + + private static final int I_LIMIT = UNICODE_LIMIT >> CodePointTrie.SHIFT_3; + private static final int BMP_I_LIMIT = BMP_LIMIT >> CodePointTrie.SHIFT_3; + private static final int ASCII_I_LIMIT = ASCII_LIMIT >> CodePointTrie.SHIFT_3; + + private static final int SMALL_DATA_BLOCKS_PER_BMP_BLOCK = (1 << (CodePointTrie.FAST_SHIFT - CodePointTrie.SHIFT_3)); + + // Flag values for data blocks. + private static final byte ALL_SAME = 0; + private static final byte MIXED = 1; + private static final byte SAME_AS = 2; + + /** Start with allocation of 16k data entries. */ + private static final int INITIAL_DATA_LENGTH = (1 << 14); + + /** Grow about 8x each time. */ + private static final int MEDIUM_DATA_LENGTH = (1 << 17); + + /** + * Maximum length of the build-time data array. + * One entry per 0x110000 code points. + */ + private static final int MAX_DATA_LENGTH = UNICODE_LIMIT; + + // Flag values for index-3 blocks while compacting/building. + private static final byte I3_NULL = 0; + private static final byte I3_BMP = 1; + private static final byte I3_16 = 2; + private static final byte I3_18 = 3; + + private static final int INDEX_3_18BIT_BLOCK_LENGTH = CodePointTrie.INDEX_3_BLOCK_LENGTH + CodePointTrie.INDEX_3_BLOCK_LENGTH / 8; + + private int[] index; + private int index3NullOffset; + private int[] data; + private int dataLength; + private int dataNullOffset; + + private int origInitialValue; + private int initialValue; + private int errorValue; + private int highStart; + private int highValue; + + /** Temporary array while building the final data. */ + private char[] index16; + private byte[] flags = new byte[UNICODE_LIMIT >> CodePointTrie.SHIFT_3]; + + private void ensureHighStart(int c) { + if (c >= highStart) { + // Round up to a CodePointTrie.CP_PER_INDEX_2_ENTRY boundary to simplify compaction. + c = (c + CodePointTrie.CP_PER_INDEX_2_ENTRY) & ~(CodePointTrie.CP_PER_INDEX_2_ENTRY - 1); + int i = highStart >> CodePointTrie.SHIFT_3; + int iLimit = c >> CodePointTrie.SHIFT_3; + if (iLimit > index.length) { + int[] newIndex = new int[I_LIMIT]; + for (int j = 0; j < i; ++j) { newIndex[j] = index[j]; } + index = newIndex; + } + do { + flags[i] = ALL_SAME; + index[i] = initialValue; + } while(++i < iLimit); + highStart = c; + } + } + + private int allocDataBlock(int blockLength) { + int newBlock = dataLength; + int newTop = newBlock + blockLength; + if (newTop > data.length) { + int capacity; + if (data.length < MEDIUM_DATA_LENGTH) { + capacity = MEDIUM_DATA_LENGTH; + } else if (data.length < MAX_DATA_LENGTH) { + capacity = MAX_DATA_LENGTH; + } else { + // Should never occur. + // Either MAX_DATA_LENGTH is incorrect, + // or the code writes more values than should be possible. + throw new AssertionError(); + } + int[] newData = new int[capacity]; + for (int j = 0; j < dataLength; ++j) { newData[j] = data[j]; } + data = newData; + } + dataLength = newTop; + return newBlock; + } + + /** + * No error checking for illegal arguments. + * The Java version always returns non-negative values. + */ + private int getDataBlock(int i) { + if (flags[i] == MIXED) { + return index[i]; + } + if (i < BMP_I_LIMIT) { + int newBlock = allocDataBlock(CodePointTrie.FAST_DATA_BLOCK_LENGTH); + int iStart = i & ~(SMALL_DATA_BLOCKS_PER_BMP_BLOCK -1); + int iLimit = iStart + SMALL_DATA_BLOCKS_PER_BMP_BLOCK; + do { + assert(flags[iStart] == ALL_SAME); + writeBlock(newBlock, index[iStart]); + flags[iStart] = MIXED; + index[iStart++] = newBlock; + newBlock += CodePointTrie.SMALL_DATA_BLOCK_LENGTH; + } while (iStart < iLimit); + return index[i]; + } else { + int newBlock = allocDataBlock(CodePointTrie.SMALL_DATA_BLOCK_LENGTH); + if (newBlock < 0) { return newBlock; } + writeBlock(newBlock, index[i]); + flags[i] = MIXED; + index[i] = newBlock; + return newBlock; + } + } + + // compaction -------------------------------------------------------------- + + private void maskValues(int mask) { + initialValue &= mask; + errorValue &= mask; + highValue &= mask; + int iLimit = highStart >> CodePointTrie.SHIFT_3; + for (int i = 0; i < iLimit; ++i) { + if (flags[i] == ALL_SAME) { + index[i] &= mask; + } + } + for (int i = 0; i < dataLength; ++i) { + data[i] &= mask; + } + } + + private static boolean equalBlocks(int[] s, int si, int[] t, int ti, int length) { + while (length > 0 && s[si] == t[ti]) { + ++si; + ++ti; + --length; + } + return length == 0; + } + + private static boolean equalBlocks(char[] s, int si, int[] t, int ti, int length) { + while (length > 0 && s[si] == t[ti]) { + ++si; + ++ti; + --length; + } + return length == 0; + } + + private static boolean equalBlocks(char[] s, int si, char[] t, int ti, int length) { + while (length > 0 && s[si] == t[ti]) { + ++si; + ++ti; + --length; + } + return length == 0; + } + + private static boolean allValuesSameAs(int[] p, int pi, int length, int value) { + int pLimit = pi + length; + while (pi < pLimit && p[pi] == value) { ++pi; } + return pi == pLimit; + } + + /** Search for an identical block. */ + private static int findSameBlock(int[] p, int pStart, int length, + int[] q, int qStart, int blockLength) { + // Ensure that we do not even partially get past length. + length -= blockLength; + + while (pStart <= length) { + if (equalBlocks(p, pStart, q, qStart, blockLength)) { + return pStart; + } + ++pStart; + } + return -1; + } + + private static int findSameBlock(char[] p, int pStart, int length, + int[] q, int qStart, int blockLength) { + // Ensure that we do not even partially get past length. + length -= blockLength; + + while (pStart <= length) { + if (equalBlocks(p, pStart, q, qStart, blockLength)) { + return pStart; + } + ++pStart; + } + return -1; + } + + private static int findSameBlock(char[] p, int pStart, int length, + char[] q, int qStart, int blockLength) { + // Ensure that we do not even partially get past length. + length -= blockLength; + + while (pStart <= length) { + if (equalBlocks(p, pStart, q, qStart, blockLength)) { + return pStart; + } + ++pStart; + } + return -1; + } + + private static int findAllSameBlock(int[] p, int length, int value, int blockLength) { + // Ensure that we do not even partially get past length. + length -= blockLength; + + for (int block = 0; block <= length; ++block) { + if (p[block] == value) { + for (int i = 1;; ++i) { + if (i == blockLength) { + return block; + } + if (p[block + i] != value) { + block += i; + break; + } + } + } + } + return -1; + } + + /** + * Look for maximum overlap of the beginning of the other block + * with the previous, adjacent block. + */ + private static int getOverlap(int[] p, int length, int[] q, int qStart, int blockLength) { + int overlap = blockLength - 1; + assert(overlap <= length); + while (overlap > 0 && !equalBlocks(p, length - overlap, q, qStart, overlap)) { + --overlap; + } + return overlap; + } + + private static int getOverlap(char[] p, int length, int[] q, int qStart, int blockLength) { + int overlap = blockLength - 1; + assert(overlap <= length); + while (overlap > 0 && !equalBlocks(p, length - overlap, q, qStart, overlap)) { + --overlap; + } + return overlap; + } + + private static int getOverlap(char[] p, int length, char[] q, int qStart, int blockLength) { + int overlap = blockLength - 1; + assert(overlap <= length); + while (overlap > 0 && !equalBlocks(p, length - overlap, q, qStart, overlap)) { + --overlap; + } + return overlap; + } + + private static int getAllSameOverlap(int[] p, int length, int value, int blockLength) { + int min = length - (blockLength - 1); + int i = length; + while (min < i && p[i - 1] == value) { --i; } + return length - i; + } + + /** + * Finds the start of the last range in the trie by enumerating backward. + * Indexes for code points higher than this will be omitted. + */ + private int findHighStart() { + int i = highStart >> CodePointTrie.SHIFT_3; + while (i > 0) { + boolean match; + if (flags[--i] == ALL_SAME) { + match = index[i] == highValue; + } else /* MIXED */ { + int p = index[i]; + for (int j = 0;; ++j) { + if (j == CodePointTrie.SMALL_DATA_BLOCK_LENGTH) { + match = true; + break; + } + if (data[p + j] != highValue) { + match = false; + break; + } + } + } + if (!match) { + return (i + 1) << CodePointTrie.SHIFT_3; + } + } + return 0; + } + + private static final class AllSameBlocks { + static final int NEW_UNIQUE = -1; + static final int OVERFLOW = -2; + + AllSameBlocks() { + mostRecent = -1; + } + + int findOrAdd(int index, int count, int value) { + if (mostRecent >= 0 && values[mostRecent] == value) { + refCounts[mostRecent] += count; + return indexes[mostRecent]; + } + for (int i = 0; i < length; ++i) { + if (values[i] == value) { + mostRecent = i; + refCounts[i] += count; + return indexes[i]; + } + } + if (length == CAPACITY) { + return OVERFLOW; + } + mostRecent = length; + indexes[length] = index; + values[length] = value; + refCounts[length++] = count; + return NEW_UNIQUE; + } + + /** Replaces the block which has the lowest reference count. */ + void add(int index, int count, int value) { + assert(length == CAPACITY); + int least = -1; + int leastCount = I_LIMIT; + for (int i = 0; i < length; ++i) { + assert(values[i] != value); + if (refCounts[i] < leastCount) { + least = i; + leastCount = refCounts[i]; + } + } + assert(least >= 0); + mostRecent = least; + indexes[least] = index; + values[least] = value; + refCounts[least] = count; + } + + int findMostUsed() { + if (length == 0) { return -1; } + int max = -1; + int maxCount = 0; + for (int i = 0; i < length; ++i) { + if (refCounts[i] > maxCount) { + max = i; + maxCount = refCounts[i]; + } + } + return indexes[max]; + } + + private static final int CAPACITY = 32; + + private int length; + private int mostRecent; + + private int[] indexes = new int[CAPACITY]; + private int[] values = new int[CAPACITY]; + private int[] refCounts = new int[CAPACITY]; + } + + private int compactWholeDataBlocks(int fastILimit, AllSameBlocks allSameBlocks) { + // ASCII data will be stored as a linear table, even if the following code + // does not yet count it that way. + int newDataCapacity = ASCII_LIMIT; + // Add room for special values (errorValue, highValue) and padding. + newDataCapacity += 4; + int iLimit = highStart >> CodePointTrie.SHIFT_3; + int blockLength = CodePointTrie.FAST_DATA_BLOCK_LENGTH; + int inc = SMALL_DATA_BLOCKS_PER_BMP_BLOCK; + for (int i = 0; i < iLimit; i += inc) { + if (i == fastILimit) { + blockLength = CodePointTrie.SMALL_DATA_BLOCK_LENGTH; + inc = 1; + } + int value = index[i]; + if (flags[i] == MIXED) { + // Really mixed? + int p = value; + value = data[p]; + if (allValuesSameAs(data, p + 1, blockLength - 1, value)) { + flags[i] = ALL_SAME; + index[i] = value; + // Fall through to ALL_SAME handling. + } else { + newDataCapacity += blockLength; + continue; + } + } else { + assert(flags[i] == ALL_SAME); + if (inc > 1) { + // Do all of the fast-range data block's ALL_SAME parts have the same value? + boolean allSame = true; + int next_i = i + inc; + for (int j = i + 1; j < next_i; ++j) { + assert(flags[j] == ALL_SAME); + if (index[j] != value) { + allSame = false; + break; + } + } + if (!allSame) { + // Turn it into a MIXED block. + if (getDataBlock(i) < 0) { + return -1; + } + continue; + } + } + } + // Is there another ALL_SAME block with the same value? + int other = allSameBlocks.findOrAdd(i, inc, value); + if (other == AllSameBlocks.OVERFLOW) { + // The fixed-size array overflowed. Slow check for a duplicate block. + int jInc = SMALL_DATA_BLOCKS_PER_BMP_BLOCK; + for (int j = 0;; j += jInc) { + if (j == i) { + allSameBlocks.add(i, inc, value); + break; + } + if (j == fastILimit) { + jInc = 1; + } + if (flags[j] == ALL_SAME && index[j] == value) { + allSameBlocks.add(j, jInc + inc, value); + other = j; + break; + // We could keep counting blocks with the same value + // before we add the first one, which may improve compaction in rare cases, + // but it would make it slower. + } + } + } + if (other >= 0) { + flags[i] = SAME_AS; + index[i] = other; + } else { + // New unique same-value block. + newDataCapacity += blockLength; + } + } + return newDataCapacity; + } + + /** + * Compacts a build-time trie. + * + * The compaction + * - removes blocks that are identical with earlier ones + * - overlaps each new non-duplicate block as much as possible with the previously-written one + * - works with fast-range data blocks whose length is a multiple of that of + * higher-code-point data blocks + * + * It does not try to find an optimal order of writing, deduplicating, and overlapping blocks. + */ + private int compactData(int fastILimit, int[] newData) { + // The linear ASCII data has been copied into newData already. + int newDataLength = 0; + for (int i = 0; newDataLength < ASCII_LIMIT; + newDataLength += CodePointTrie.FAST_DATA_BLOCK_LENGTH, i += SMALL_DATA_BLOCKS_PER_BMP_BLOCK) { + index[i] = newDataLength; + } + + int iLimit = highStart >> CodePointTrie.SHIFT_3; + int blockLength = CodePointTrie.FAST_DATA_BLOCK_LENGTH; + int inc = SMALL_DATA_BLOCKS_PER_BMP_BLOCK; + for (int i = ASCII_I_LIMIT; i < iLimit; i += inc) { + if (i == fastILimit) { + blockLength = CodePointTrie.SMALL_DATA_BLOCK_LENGTH; + inc = 1; + } + if (flags[i] == ALL_SAME) { + int value = index[i]; + int n = findAllSameBlock(newData, newDataLength, value, blockLength); + if (n >= 0) { + index[i] = n; + } else { + n = getAllSameOverlap(newData, newDataLength, value, blockLength); + index[i] = newDataLength - n; + while (n < blockLength) { + newData[newDataLength++] = value; + ++n; + } + } + } else if (flags[i] == MIXED) { + int block = index[i]; + int n = findSameBlock(newData, 0, newDataLength, data, block, blockLength); + if (n >= 0) { + index[i] = n; + } else { + n = getOverlap(newData, newDataLength, data, block, blockLength); + index[i] = newDataLength - n; + while (n < blockLength) { + newData[newDataLength++] = data[block + n++]; + } + } + } else /* SAME_AS */ { + int j = index[i]; + index[i] = index[j]; + } + } + + return newDataLength; + } + + private int compactIndex(int fastILimit) { + int fastIndexLength = fastILimit >> (CodePointTrie.FAST_SHIFT - CodePointTrie.SHIFT_3); + if ((highStart >> CodePointTrie.FAST_SHIFT) <= fastIndexLength) { + // Only the linear fast index, no multi-stage index tables. + index3NullOffset = CodePointTrie.NO_INDEX3_NULL_OFFSET; + return fastIndexLength; + } + + // Condense the fast index table. + // Also, does it contain an index-3 block with all dataNullOffset? + char[] fastIndex = new char[fastIndexLength]; + int i3FirstNull = -1; + for (int i = 0, j = 0; i < fastILimit; ++j) { + int i3 = index[i]; + fastIndex[j] = (char)i3; + if (i3 == dataNullOffset) { + if (i3FirstNull < 0) { + i3FirstNull = j; + } else if (index3NullOffset < 0 && + (j - i3FirstNull + 1) == CodePointTrie.INDEX_3_BLOCK_LENGTH) { + index3NullOffset = i3FirstNull; + } + } else { + i3FirstNull = -1; + } + // Set the index entries that compactData() skipped. + // Needed when the multi-stage index covers the fast index range as well. + int iNext = i + SMALL_DATA_BLOCKS_PER_BMP_BLOCK; + while (++i < iNext) { + i3 += CodePointTrie.SMALL_DATA_BLOCK_LENGTH; + index[i] = i3; + } + } + + // Examine index-3 blocks. For each determine one of: + // - same as the index-3 null block + // - same as a fast-index block + // - 16-bit indexes + // - 18-bit indexes + // We store this in the first flags entry for the index-3 block. + // + // Also determine an upper limit for the index-3 table length. + int index3Capacity = 0; + i3FirstNull = index3NullOffset; + // If the fast index covers the whole BMP, then + // the multi-stage index is only for supplementary code points. + // Otherwise, the multi-stage index covers all of Unicode. + int iStart = fastILimit < BMP_I_LIMIT ? 0 : BMP_I_LIMIT; + int iLimit = highStart >> CodePointTrie.SHIFT_3; + for (int i = iStart; i < iLimit;) { + int j = i; + int jLimit = i + CodePointTrie.INDEX_3_BLOCK_LENGTH; + int oredI3 = 0; + boolean isNull = true; + do { + int i3 = index[j]; + oredI3 |= i3; + if (i3 != dataNullOffset) { + isNull = false; + } + } while (++j < jLimit); + if (isNull) { + flags[i] = I3_NULL; + if (i3FirstNull < 0) { + if (oredI3 <= 0xffff) { + index3Capacity += CodePointTrie.INDEX_3_BLOCK_LENGTH; + } else { + index3Capacity += INDEX_3_18BIT_BLOCK_LENGTH; + } + i3FirstNull = 0; + } + } else { + if (oredI3 <= 0xffff) { + int n = findSameBlock(fastIndex, 0, fastIndexLength, + index, i, CodePointTrie.INDEX_3_BLOCK_LENGTH); + if (n >= 0) { + flags[i] = I3_BMP; + index[i] = n; + } else { + flags[i] = I3_16; + index3Capacity += CodePointTrie.INDEX_3_BLOCK_LENGTH; + } + } else { + flags[i] = I3_18; + index3Capacity += INDEX_3_18BIT_BLOCK_LENGTH; + } + } + i = j; + } + + int index2Capacity = (iLimit - iStart) >> CodePointTrie.SHIFT_2_3; + + // Length of the index-1 table, rounded up. + int index1Length = (index2Capacity + CodePointTrie.INDEX_2_MASK) >> CodePointTrie.SHIFT_1_2; + + // Index table: Fast index, index-1, index-3, index-2. + // +1 for possible index table padding. + int index16Capacity = fastIndexLength + index1Length + index3Capacity + index2Capacity + 1; + index16 = Arrays.copyOf(fastIndex, index16Capacity); + + // Compact the index-3 table and write an uncompacted version of the index-2 table. + char[] index2 = new char[index2Capacity]; + int i2Length = 0; + i3FirstNull = index3NullOffset; + int index3Start = fastIndexLength + index1Length; + int indexLength = index3Start; + for (int i = iStart; i < iLimit; i += CodePointTrie.INDEX_3_BLOCK_LENGTH) { + int i3; + byte f = flags[i]; + if (f == I3_NULL && i3FirstNull < 0) { + // First index-3 null block. Write & overlap it like a normal block, then remember it. + f = dataNullOffset <= 0xffff ? I3_16 : I3_18; + i3FirstNull = 0; + } + if (f == I3_NULL) { + i3 = index3NullOffset; + } else if (f == I3_BMP) { + i3 = index[i]; + } else if (f == I3_16) { + int n = findSameBlock(index16, index3Start, indexLength, + index, i, CodePointTrie.INDEX_3_BLOCK_LENGTH); + if (n >= 0) { + i3 = n; + } else { + if (indexLength == index3Start) { + // No overlap at the boundary between the index-1 and index-3 tables. + n = 0; + } else { + n = getOverlap(index16, indexLength, + index, i, CodePointTrie.INDEX_3_BLOCK_LENGTH); + } + i3 = indexLength - n; + while (n < CodePointTrie.INDEX_3_BLOCK_LENGTH) { + index16[indexLength++] = (char)index[i + n++]; + } + } + } else { + assert(f == I3_18); + // Encode an index-3 block that contains one or more data indexes exceeding 16 bits. + int j = i; + int jLimit = i + CodePointTrie.INDEX_3_BLOCK_LENGTH; + int k = indexLength; + do { + ++k; + int v = index[j++]; + int upperBits = (v & 0x30000) >> 2; + index16[k++] = (char)v; + v = index[j++]; + upperBits |= (v & 0x30000) >> 4; + index16[k++] = (char)v; + v = index[j++]; + upperBits |= (v & 0x30000) >> 6; + index16[k++] = (char)v; + v = index[j++]; + upperBits |= (v & 0x30000) >> 8; + index16[k++] = (char)v; + v = index[j++]; + upperBits |= (v & 0x30000) >> 10; + index16[k++] = (char)v; + v = index[j++]; + upperBits |= (v & 0x30000) >> 12; + index16[k++] = (char)v; + v = index[j++]; + upperBits |= (v & 0x30000) >> 14; + index16[k++] = (char)v; + v = index[j++]; + upperBits |= (v & 0x30000) >> 16; + index16[k++] = (char)v; + index16[k - 9] = (char)upperBits; + } while (j < jLimit); + int n = findSameBlock(index16, index3Start, indexLength, + index16, indexLength, INDEX_3_18BIT_BLOCK_LENGTH); + if (n >= 0) { + i3 = n | 0x8000; + } else { + if (indexLength == index3Start) { + // No overlap at the boundary between the index-1 and index-3 tables. + n = 0; + } else { + n = getOverlap(index16, indexLength, + index16, indexLength, INDEX_3_18BIT_BLOCK_LENGTH); + } + i3 = (indexLength - n) | 0x8000; + if (n > 0) { + int start = indexLength; + while (n < INDEX_3_18BIT_BLOCK_LENGTH) { + index16[indexLength++] = index16[start + n++]; + } + } else { + indexLength += INDEX_3_18BIT_BLOCK_LENGTH; + } + } + } + if (index3NullOffset < 0 && i3FirstNull >= 0) { + index3NullOffset = i3; + } + // Set the index-2 table entry. + index2[i2Length++] = (char)i3; + } + assert(i2Length == index2Capacity); + assert(indexLength <= index3Start + index3Capacity); + + if (index3NullOffset < 0) { + index3NullOffset = CodePointTrie.NO_INDEX3_NULL_OFFSET; + } + if (indexLength >= (CodePointTrie.NO_INDEX3_NULL_OFFSET + CodePointTrie.INDEX_3_BLOCK_LENGTH)) { + // The index-3 offsets exceed 15 bits, or + // the last one cannot be distinguished from the no-null-block value. + // TODO: review exceptions / error codes + throw new IndexOutOfBoundsException( + "The trie data exceeds limitations of the data structure."); + } + + // Compact the index-2 table and write the index-1 table. + int blockLength = CodePointTrie.INDEX_2_BLOCK_LENGTH; + int i1 = fastIndexLength; + for (int i = 0; i < i2Length; i += blockLength) { + if ((i2Length - i) < blockLength) { + // highStart is inside the last index-2 block. Shorten it. + blockLength = i2Length - i; + } + int i2; + int n = findSameBlock(index16, index3Start, indexLength, + index2, i, blockLength); + if (n >= 0) { + i2 = n; + } else { + if (indexLength == index3Start) { + // No overlap at the boundary between the index-1 and index-3/2 tables. + n = 0; + } else { + n = getOverlap(index16, indexLength, index2, i, blockLength); + } + i2 = indexLength - n; + while (n < blockLength) { + index16[indexLength++] = index2[i + n++]; + } + } + // Set the index-1 table entry. + index16[i1++] = (char)i2; + } + assert(i1 == index3Start); + assert(indexLength <= index16Capacity); + + return indexLength; + } + + private int compactTrie(int fastILimit) { + // Find the real highStart and round it up. + assert((highStart & (CodePointTrie.CP_PER_INDEX_2_ENTRY - 1)) == 0); + highValue = get(MAX_UNICODE); + int realHighStart = findHighStart(); + realHighStart = (realHighStart + (CodePointTrie.CP_PER_INDEX_2_ENTRY - 1)) & + ~(CodePointTrie.CP_PER_INDEX_2_ENTRY - 1); + if (realHighStart == UNICODE_LIMIT) { + highValue = initialValue; + } + + // We always store indexes and data values for the fast range. + // Pin highStart to the top of that range while building. + int fastLimit = fastILimit << CodePointTrie.SHIFT_3; + if (realHighStart < fastLimit) { + for (int i = (realHighStart >> CodePointTrie.SHIFT_3); i < fastILimit; ++i) { + flags[i] = ALL_SAME; + index[i] = highValue; + } + highStart = fastLimit; + } else { + highStart = realHighStart; + } + + int[] asciiData = new int[ASCII_LIMIT]; + for (int i = 0; i < ASCII_LIMIT; ++i) { + asciiData[i] = get(i); + } + + // First we look for which data blocks have the same value repeated over the whole block, + // deduplicate such blocks, find a good null data block (for faster enumeration), + // and get an upper bound for the necessary data array length. + AllSameBlocks allSameBlocks = new AllSameBlocks(); + int newDataCapacity = compactWholeDataBlocks(fastILimit, allSameBlocks); + int[] newData = Arrays.copyOf(asciiData, newDataCapacity); + + int newDataLength = compactData(fastILimit, newData); + assert(newDataLength <= newDataCapacity); + data = newData; + dataLength = newDataLength; + if (dataLength > (0x3ffff + CodePointTrie.SMALL_DATA_BLOCK_LENGTH)) { + // The offset of the last data block is too high to be stored in the index table. + // TODO: review exceptions / error codes + throw new IndexOutOfBoundsException( + "The trie data exceeds limitations of the data structure."); + } + + int dataNullIndex = allSameBlocks.findMostUsed(); + if (dataNullIndex >= 0) { + dataNullOffset = index[dataNullIndex]; + initialValue = data[dataNullOffset]; + } else { + dataNullOffset = CodePointTrie.NO_DATA_NULL_OFFSET; + } + + int indexLength = compactIndex(fastILimit); + highStart = realHighStart; + return indexLength; + } + + private CodePointTrie build(CodePointTrie.Type type, CodePointTrie.ValueWidth valueWidth) { + // The mutable trie always stores 32-bit values. + // When we build a UCPTrie for a smaller value width, we first mask off unused bits + // before compacting the data. + switch (valueWidth) { + case BITS_32: + break; + case BITS_16: + maskValues(0xffff); + break; + case BITS_8: + maskValues(0xff); + break; + default: + // Should be unreachable. + throw new IllegalArgumentException(); + } + + int fastLimit = type == CodePointTrie.Type.FAST ? BMP_LIMIT : CodePointTrie.SMALL_LIMIT; + int indexLength = compactTrie(fastLimit >> CodePointTrie.SHIFT_3); + + // Ensure data table alignment: The index length must be even for uint32_t data. + if (valueWidth == CodePointTrie.ValueWidth.BITS_32 && (indexLength & 1) != 0) { + index16[indexLength++] = 0xffee; // arbitrary value + } + + // Make the total trie structure length a multiple of 4 bytes by padding the data table, + // and store special values as the last two data values. + int length = indexLength * 2; + if (valueWidth == CodePointTrie.ValueWidth.BITS_16) { + if (((indexLength ^ dataLength) & 1) != 0) { + // padding + data[dataLength++] = errorValue; + } + if (data[dataLength - 1] != errorValue || data[dataLength - 2] != highValue) { + data[dataLength++] = highValue; + data[dataLength++] = errorValue; + } + length += dataLength * 2; + } else if (valueWidth == CodePointTrie.ValueWidth.BITS_32) { + // 32-bit data words never need padding to a multiple of 4 bytes. + if (data[dataLength - 1] != errorValue || data[dataLength - 2] != highValue) { + if (data[dataLength - 1] != highValue) { + data[dataLength++] = highValue; + } + data[dataLength++] = errorValue; + } + length += dataLength * 4; + } else { + int and3 = (length + dataLength) & 3; + if (and3 == 0 && data[dataLength - 1] == errorValue && data[dataLength - 2] == highValue) { + // all set + } else if(and3 == 3 && data[dataLength - 1] == highValue) { + data[dataLength++] = errorValue; + } else { + while (and3 != 2) { + data[dataLength++] = highValue; + and3 = (and3 + 1) & 3; + } + data[dataLength++] = highValue; + data[dataLength++] = errorValue; + } + length += dataLength; + } + assert((length & 3) == 0); + + // Fill the index and data arrays. + char[] trieIndex; + if (highStart <= fastLimit) { + // Condense only the fast index from the mutable-trie index. + trieIndex = new char[indexLength]; + for (int i = 0, j = 0; j < indexLength; i += SMALL_DATA_BLOCKS_PER_BMP_BLOCK, ++j) { + trieIndex[j] = (char)index[i]; + } + } else { + if (indexLength == index16.length) { + trieIndex = index16; + index16 = null; + } else { + trieIndex = Arrays.copyOf(index16, indexLength); + } + } + + // Write the data array. + switch (valueWidth) { + case BITS_16: { + // Write 16-bit data values. + char[] data16 = new char[dataLength]; + for (int i = 0; i < dataLength; ++i) { data16[i] = (char)data[i]; } + return type == CodePointTrie.Type.FAST ? + new CodePointTrie.Fast16(trieIndex, data16, highStart, + index3NullOffset, dataNullOffset) : + new CodePointTrie.Small16(trieIndex, data16, highStart, + index3NullOffset, dataNullOffset); + } + case BITS_32: { + // Write 32-bit data values. + int[] data32 = Arrays.copyOf(data, dataLength); + return type == CodePointTrie.Type.FAST ? + new CodePointTrie.Fast32(trieIndex, data32, highStart, + index3NullOffset, dataNullOffset) : + new CodePointTrie.Small32(trieIndex, data32, highStart, + index3NullOffset, dataNullOffset); + } + case BITS_8: { + // Write 8-bit data values. + byte[] data8 = new byte[dataLength]; + for (int i = 0; i < dataLength; ++i) { data8[i] = (byte)data[i]; } + return type == CodePointTrie.Type.FAST ? + new CodePointTrie.Fast8(trieIndex, data8, highStart, + index3NullOffset, dataNullOffset) : + new CodePointTrie.Small8(trieIndex, data8, highStart, + index3NullOffset, dataNullOffset); + } + default: + // Should be unreachable. + throw new IllegalArgumentException(); + } + } +} diff --git a/icu4j/main/shared/data/icudata.jar b/icu4j/main/shared/data/icudata.jar old mode 100755 new mode 100644 index 93c64fdb11f..c864412f230 --- a/icu4j/main/shared/data/icudata.jar +++ b/icu4j/main/shared/data/icudata.jar @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:70c249360d5cc010c75203f5add8040cbcc4f33229e1d82d34b6185d69832143 -size 12510210 +oid sha256:a8be41753876c867630b4e740d692e0ae7ced119086a22cd4844ea7bf174d6f7 +size 12509408 diff --git a/icu4j/main/shared/data/icutzdata.jar b/icu4j/main/shared/data/icutzdata.jar index 8b02fe62204..67e57e3ad38 100755 --- a/icu4j/main/shared/data/icutzdata.jar +++ b/icu4j/main/shared/data/icutzdata.jar @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:93a0bf4221a173b33aeda78f4646092caad816a6832310a89278de249ec18634 +oid sha256:55923dda88f8bf3affc2cf6d774a92a49e5fbc4be5583769bfe90fc7f319d2b1 size 92857 diff --git a/icu4j/main/shared/data/testdata.jar b/icu4j/main/shared/data/testdata.jar old mode 100755 new mode 100644 index 96345bd896f..91438132198 --- a/icu4j/main/shared/data/testdata.jar +++ b/icu4j/main/shared/data/testdata.jar @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:47978ca4c19730c3d4387d9058679115dbf1e21964b993a889a38680fd3dfe47 -size 813186 +oid sha256:0d399ead8487d2beff526c723212022ba354501bb3777481f16b53241d24a8d1 +size 813119 diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/normalizer/BasicTest.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/normalizer/BasicTest.java index 0e7f9aef1b3..26c4191fd22 100644 --- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/normalizer/BasicTest.java +++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/normalizer/BasicTest.java @@ -2632,9 +2632,14 @@ public class BasicTest extends TestFmwk { @Test public void TestCustomComp() { String [][] pairs={ - { "\\uD801\\uE000\\uDFFE", "" }, - { "\\uD800\\uD801\\uE000\\uDFFE\\uDFFF", "\\uD7FF\\uFFFF" }, - { "\\uD800\\uD801\\uDFFE\\uDFFF", "\\uD7FF\\U000107FE\\uFFFF" }, + // ICU 63 normalization with CodePointTrie requires inert surrogate code points. + // { "\\uD801\\uE000\\uDFFE", "" }, + // { "\\uD800\\uD801\\uE000\\uDFFE\\uDFFF", "\\uD7FF\\uFFFF" }, + // { "\\uD800\\uD801\\uDFFE\\uDFFF", "\\uD7FF\\U000107FE\\uFFFF" }, + { "\\uD801\\uE000\\uDFFE", "\\uD801\\uDFFE" }, + { "\\uD800\\uD801\\uE000\\uDFFE\\uDFFF", "\\uD800\\uD801\\uDFFE\\uDFFF" }, + { "\\uD800\\uD801\\uDFFE\\uDFFF", "\\uD800\\U000107FE\\uDFFF" }, + { "\\uE001\\U000110B9\\u0345\\u0308\\u0327", "\\uE002\\U000110B9\\u0327\\u0345" }, { "\\uE010\\U000F0011\\uE012", "\\uE011\\uE012" }, { "\\uE010\\U000F0011\\U000F0011\\uE012", "\\uE011\\U000F0010" }, @@ -2661,9 +2666,14 @@ public class BasicTest extends TestFmwk { @Test public void TestCustomFCC() { String[][] pairs={ - { "\\uD801\\uE000\\uDFFE", "" }, - { "\\uD800\\uD801\\uE000\\uDFFE\\uDFFF", "\\uD7FF\\uFFFF" }, - { "\\uD800\\uD801\\uDFFE\\uDFFF", "\\uD7FF\\U000107FE\\uFFFF" }, + // ICU 63 normalization with CodePointTrie requires inert surrogate code points. + // { "\\uD801\\uE000\\uDFFE", "" }, + // { "\\uD800\\uD801\\uE000\\uDFFE\\uDFFF", "\\uD7FF\\uFFFF" }, + // { "\\uD800\\uD801\\uDFFE\\uDFFF", "\\uD7FF\\U000107FE\\uFFFF" }, + { "\\uD801\\uE000\\uDFFE", "\\uD801\\uDFFE" }, + { "\\uD800\\uD801\\uE000\\uDFFE\\uDFFF", "\\uD800\\uD801\\uDFFE\\uDFFF" }, + { "\\uD800\\uD801\\uDFFE\\uDFFF", "\\uD800\\U000107FE\\uDFFF" }, + // The following expected result is different from CustomComp // because of only-contiguous composition. { "\\uE001\\U000110B9\\u0345\\u0308\\u0327", "\\uE001\\U000110B9\\u0327\\u0308\\u0345" }, diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/CodePointTrieTest.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/CodePointTrieTest.java new file mode 100644 index 00000000000..819800ad71f --- /dev/null +++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/CodePointTrieTest.java @@ -0,0 +1,985 @@ +// © 2018 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html#License + +// created: 2018jul10 Markus W. Scherer + +// This is a fairly straight port from cintltst/ucptrietest.c. +// It wants to remain close to the C code, rather than be completely colloquial Java. + +package com.ibm.icu.dev.test.util; + +import java.io.ByteArrayOutputStream; +import java.nio.ByteBuffer; +import java.util.Arrays; + +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.JUnit4; + +import com.ibm.icu.dev.test.TestFmwk; +import com.ibm.icu.impl.Normalizer2Impl.UTF16Plus; +import com.ibm.icu.util.CodePointMap; +import com.ibm.icu.util.CodePointTrie; +import com.ibm.icu.util.MutableCodePointTrie; + +@RunWith(JUnit4.class) +public final class CodePointTrieTest extends TestFmwk { + /* Values for setting possibly overlapping, out-of-order ranges of values */ + private static class SetRange { + SetRange(int start, int limit, int value) { + this.start = start; + this.limit = limit; + this.value = value; + } + + final int start, limit; + final int value; + } + + // Returned from getSpecialValues(). Values extracted from an array of CheckRange. + private static class SpecialValues { + SpecialValues(int i, int initialValue, int errorValue) { + this.i = i; + this.initialValue = initialValue; + this.errorValue = errorValue; + } + + final int i; + final int initialValue; + final int errorValue; + } + + /* + * Values for testing: + * value is set from the previous boundary's limit to before + * this boundary's limit + * + * There must be an entry with limit 0 and the intialValue. + * It may be preceded by an entry with negative limit and the errorValue. + */ + private static class CheckRange { + CheckRange(int limit, int value) { + this.limit = limit; + this.value = value; + } + + final int limit; + final int value; + } + + private static int skipSpecialValues(CheckRange checkRanges[]) { + int i; + for(i=0; i= 0) { + fail(String.format( // log_err( + "error: %s getRanges (%s) fails to deliver range [U+%04x..U+%04x].0x%x\n", + name, variant, start, expEnd, expValue)); + } + return false; + } + if (expEnd < 0) { + fail(String.format( + "error: %s getRanges (%s) delivers unexpected range [U+%04x..U+%04x].0x%x\n", + name, variant, range.getStart(), range.getEnd(), range.getValue())); + return false; + } + if (range.getStart() != start || range.getEnd() != expEnd || range.getValue() != expValue) { + fail(String.format( + "error: %s getRanges (%s) delivers wrong range [U+%04x..U+%04x].0x%x " + + "instead of [U+%04x..U+%04x].0x%x\n", + name, variant, range.getStart(), range.getEnd(), range.getValue(), + start, expEnd, expValue)); + return false; + } + return true; + } + + // Test iteration starting from various UTF-8/16 and trie structure boundaries. + // Also test starting partway through lead & trail surrogates for fixed-surrogate-value options, + // and partway through supplementary code points. + private static int iterStarts[] = { + 0, 0x7f, 0x80, 0x7ff, 0x800, 0xfff, 0x1000, + 0xd7ff, 0xd800, 0xd888, 0xdddd, 0xdfff, 0xe000, + 0xffff, 0x10000, 0x12345, 0x10ffff, 0x110000 + }; + + private void + testTrieGetRanges(String testName, CodePointMap trie, + CodePointMap.RangeOption option, int surrValue, + CheckRange checkRanges[]) { + String typeName = trie instanceof MutableCodePointTrie ? "mutableTrie" : "trie"; + CodePointMap.Range range = new CodePointMap.Range(); + for (int s = 0; s < iterStarts.length; ++s) { + int start = iterStarts[s]; + int i, i0; + int expEnd; + int expValue; + boolean getRangeResult; + // No need to go from each iteration start to the very end. + int innerLoopCount; + + String name = String.format("%s/%s(%s) min=U+%04x", typeName, option, testName, start); + + // Skip over special values and low ranges. + for (i = 0; i < checkRanges.length && checkRanges[i].limit <= start; ++i) {} + i0 = i; + // without value handler + for (innerLoopCount = 0;; ++i, start = range.getEnd() + 1) { + if (i < checkRanges.length) { + expEnd = checkRanges[i].limit - 1; + expValue = checkRanges[i].value; + } else { + expEnd = -1; + expValue = 0x5005; + } + getRangeResult = option != CodePointMap.RangeOption.NORMAL ? + trie.getRange(start, option, surrValue, null, range) : + trie.getRange(start, null, range); + if (!doCheckRange(name, "without value handler", + start, getRangeResult, range, expEnd, expValue)) { + break; + } + if (s != 0 && ++innerLoopCount == 5) { break; } + } + // with value handler + for (i = i0, start = iterStarts[s], innerLoopCount = 0;; + ++i, start = range.getEnd() + 1) { + if (i < checkRanges.length) { + expEnd = checkRanges[i].limit - 1; + expValue = checkRanges[i].value ^ 0x5555; + } else { + expEnd = -1; + expValue = 0x5005; + } + getRangeResult = trie.getRange(start, option, surrValue ^ 0x5555, testFilter, range); + if (!doCheckRange(name, "with value handler", + start, getRangeResult, range, expEnd, expValue)) { + break; + } + if (s != 0 && ++innerLoopCount == 5) { break; } + } + // C also tests without value (with a NULL value pointer), + // but that does not apply to Java. + } + } + + // Note: There is much less to do here in polymorphic Java than in C + // where we have many specialized macros in addition to generic functions. + private void + testTrieGetters(String testName, CodePointTrie trie, + CodePointTrie.Type type, CodePointTrie.ValueWidth valueWidth, + CheckRange checkRanges[]) { + int value, value2; + int start, limit; + int i; + int countErrors=0; + + CodePointTrie.Fast fastTrie = + type == CodePointTrie.Type.FAST ? (CodePointTrie.Fast)trie : null; + String typeName = "trie"; + + SpecialValues specials = getSpecialValues(checkRanges); + + start=0; + for(i=specials.i; i10) { + return; + } + } + } + + /* test errorValue */ + value = trie.get(-1); + value2 = trie.get(0x110000); + if(value!=specials.errorValue || value2!=specials.errorValue) { + fail(String.format( + "error: %s(%s).get(out of range) != errorValue\n", + typeName, testName)); + } + } + + private void + testBuilderGetters(String testName, MutableCodePointTrie mutableTrie, CheckRange checkRanges[]) { + int value, value2; + int start, limit; + int i; + int countErrors=0; + + String typeName = "mutableTrie"; + + SpecialValues specials=getSpecialValues(checkRanges); + + start=0; + for(i=specials.i; i10) { + return; + } + } + } + + /* test errorValue */ + value=mutableTrie.get(-1); + value2=mutableTrie.get(0x110000); + if(value!=specials.errorValue || value2!=specials.errorValue) { + fail(String.format( + "error: %s(%s).get(out of range) != errorValue\n", + typeName, testName)); + } + } + + private static boolean ACCIDENTAL_SURROGATE_PAIR(CharSequence s, int cp) { + return s.length() > 0 && + Character.isHighSurrogate(s.charAt(s.length() - 1)) && + UTF16Plus.isTrailSurrogate(cp); + } + + private void + testTrieUTF16(String testName, + CodePointTrie trie, CodePointTrie.ValueWidth valueWidth, + CheckRange checkRanges[]) { + StringBuilder s = new StringBuilder(); + int[] values = new int[16000]; + + int errorValue = trie.get(-1); + int value, expected; + int prevCP, c, c2; + int i, sIndex, countValues; + + /* write a string */ + prevCP=0; + countValues=0; + for(i=skipSpecialValues(checkRanges); i 0) { + --i; + c2 = s.codePointBefore(sIndex); + sIndex -= Character.charCount(c2); + assertTrue("previous() at " + si.getIndex(), si.previous()); + c = si.getCodePoint(); + value = si.getValue(); + expected = UTF16Plus.isSurrogate(c) ? errorValue : values[i]; + if(value!=expected) { + fail(String.format( + "error: wrong value from UCPTRIE_PREV(%s)(U+%04x): 0x%x instead of 0x%x\n", + testName, c, value, expected)); + } + if(c!=c2) { + fail(String.format( + "error: wrong code point from UCPTRIE_PREV(%s): U+%04x != U+%04x\n", + testName, c, c2)); + } + } + assertFalse("previous() at the start", si.previous()); + } + + private void + testTrie(String testName, CodePointTrie trie, + CodePointTrie.Type type, CodePointTrie.ValueWidth valueWidth, + CheckRange checkRanges[]) { + testTrieGetters(testName, trie, type, valueWidth, checkRanges); + testTrieGetRanges(testName, trie, CodePointMap.RangeOption.NORMAL, 0, checkRanges); + if (type == CodePointTrie.Type.FAST) { + testTrieUTF16(testName, trie, valueWidth, checkRanges); + // Java: no testTrieUTF8(testName, trie, valueWidth, checkRanges); + } + } + + private void + testBuilder(String testName, MutableCodePointTrie mutableTrie, CheckRange checkRanges[]) { + testBuilderGetters(testName, mutableTrie, checkRanges); + testTrieGetRanges(testName, mutableTrie, CodePointMap.RangeOption.NORMAL, 0, checkRanges); + } + + private void + testTrieSerialize(String testName, MutableCodePointTrie mutableTrie, + CodePointTrie.Type type, CodePointTrie.ValueWidth valueWidth, boolean withSwap, + CheckRange checkRanges[]) { + CodePointTrie trie; + int length1; + + /* clone the trie so that the caller can reuse the original */ + mutableTrie = mutableTrie.clone(); + + /* + * This is not a loop, but simply a block that we can exit with "break" + * when something goes wrong. + */ + do { + trie = mutableTrie.buildImmutable(type, valueWidth); + ByteArrayOutputStream os = new ByteArrayOutputStream(); + length1=trie.toBinary(os); + assertEquals(testName + ".toBinary() length", os.size(), length1); + ByteBuffer storage = ByteBuffer.wrap(os.toByteArray()); + // Java: no preflighting + + testTrie(testName, trie, type, valueWidth, checkRanges); + trie=null; + + // Java: There is no code for "swapping" the endianness of data. + // withSwap is unused. + + trie = CodePointTrie.fromBinary(type, valueWidth, storage); + if(type != trie.getType()) { + fail(String.format( + "error: trie serialization (%s) did not preserve trie type\n", testName)); + break; + } + if(valueWidth != trie.getValueWidth()) { + fail(String.format( + "error: trie serialization (%s) did not preserve data value width\n", testName)); + break; + } + if(os.size()!=storage.position()) { + fail(String.format( + "error: trie serialization (%s) lengths different: " + + "serialize vs. unserialize\n", testName)); + break; + } + + { + storage.rewind(); + CodePointTrie any = CodePointTrie.fromBinary(null, null, storage); + if (type != any.getType()) { + fail(String.format( + "error: ucptrie_openFromBinary(" + + "UCPTRIE_TYPE_ANY, UCPTRIE_VALUE_BITS_ANY).getType() wrong\n")); + } + if (valueWidth != any.getValueWidth()) { + fail(String.format( + "error: ucptrie_openFromBinary(" + + "UCPTRIE_TYPE_ANY, UCPTRIE_VALUE_BITS_ANY).getValueWidth() wrong\n")); + } + } + + testTrie(testName, trie, type, valueWidth, checkRanges); + { + /* make a mutable trie from an immutable one */ + int value, value2; + MutableCodePointTrie mutable2 = MutableCodePointTrie.fromCodePointMap(trie); + + value=mutable2.get(0xa1); + mutable2.set(0xa1, 789); + value2=mutable2.get(0xa1); + mutable2.set(0xa1, value); + if(value2!=789) { + fail(String.format( + "error: modifying a mutableTrie-from-UCPTrie (%s) failed\n", + testName)); + } + testBuilder(testName, mutable2, checkRanges); + } + } while(false); + } + + private MutableCodePointTrie + testTrieSerializeAllValueWidth(String testName, + MutableCodePointTrie mutableTrie, boolean withClone, + CheckRange checkRanges[]) { + int oredValues = 0; + int i; + for (i = 0; i < checkRanges.length; ++i) { + oredValues |= checkRanges[i].value; + } + + testBuilder(testName, mutableTrie, checkRanges); + + if (oredValues <= 0xffff) { + String name = testName + ".16"; + testTrieSerialize(name, mutableTrie, + CodePointTrie.Type.FAST, CodePointTrie.ValueWidth.BITS_16, withClone, + checkRanges); + } + + String name = testName + ".32"; + testTrieSerialize(name, mutableTrie, + CodePointTrie.Type.FAST, CodePointTrie.ValueWidth.BITS_32, withClone, + checkRanges); + + if (oredValues <= 0xff) { + name = testName + ".8"; + testTrieSerialize(name, mutableTrie, + CodePointTrie.Type.FAST, CodePointTrie.ValueWidth.BITS_8, withClone, + checkRanges); + } + + if (oredValues <= 0xffff) { + name = testName + ".small16"; + testTrieSerialize(name, mutableTrie, + CodePointTrie.Type.SMALL, CodePointTrie.ValueWidth.BITS_16, withClone, + checkRanges); + } + + return mutableTrie; + } + + private MutableCodePointTrie + makeTrieWithRanges(String testName, boolean withClone, + SetRange setRanges[], CheckRange checkRanges[]) { + MutableCodePointTrie mutableTrie; + int value; + int start, limit; + int i; + + System.out.println("\ntesting Trie " + testName); + SpecialValues specials = getSpecialValues(checkRanges); + mutableTrie = new MutableCodePointTrie(specials.initialValue, specials.errorValue); + + /* set values from setRanges[] */ + for(i=0; i>4)/2; ++i) { // 4=UCPTRIE_SHIFT_3 + mutableTrie.setRange(0x740, 0x840-1, 1); + mutableTrie.setRange(0x780, 0x880-1, 1); + mutableTrie.setRange(0x740, 0x840-1, 2); + mutableTrie.setRange(0x780, 0x880-1, 3); + } + /* make blocks that will be free during compaction */ + mutableTrie.setRange(0x1000, 0x3000-1, 2); + mutableTrie.setRange(0x2000, 0x4000-1, 3); + mutableTrie.setRange(0x1000, 0x4000-1, 1); + + mutableTrie = testTrieSerializeAllValueWidth(testName, mutableTrie, false, checkRanges); + } + + @Test + public void GrowDataArrayTest() { + final CheckRange + checkRanges[]={ + new CheckRange(0, 1), + new CheckRange(0x720, 2), + new CheckRange(0x7a0, 3), + new CheckRange(0x8a0, 4), + new CheckRange(0x110000, 5) + }; + String testName="grow-data"; + + MutableCodePointTrie mutableTrie; + int i; + + mutableTrie=new MutableCodePointTrie(1, 0xad); + + /* + * Use umutablecptrie_set() not umutablecptrie_setRange() to write non-initialValue-data. + * Should grow/reallocate the data array to a sufficient length. + */ + for(i=0; i<0x1000; ++i) { + mutableTrie.set(i, 2); + } + for(i=0x720; i<0x1100; ++i) { /* some overlap */ + mutableTrie.set(i, 3); + } + for(i=0x7a0; i<0x900; ++i) { + mutableTrie.set(i, 4); + } + for(i=0x8a0; i<0x110000; ++i) { + mutableTrie.set(i, 5); + } + + mutableTrie = testTrieSerializeAllValueWidth(testName, mutableTrie, false, checkRanges); + } + + @Test + public void ManyAllSameBlocksTest() { + String testName="many-all-same"; + + MutableCodePointTrie mutableTrie; + int i; + CheckRange[] checkRanges = new CheckRange[(0x110000 >> 12) + 1]; + + mutableTrie = new MutableCodePointTrie(0xff33, 0xad); + checkRanges[0] = new CheckRange(0, 0xff33); // initialValue + + // Many all-same-value blocks. + for (i = 0; i < 0x110000; i += 0x1000) { + int value = i >> 12; + mutableTrie.setRange(i, i + 0xfff, value); + checkRanges[value + 1] = new CheckRange(i + 0x1000, value); + } + for (i = 0; i < 0x110000; i += 0x1000) { + int expected = i >> 12; + int v0 = mutableTrie.get(i); + int vfff = mutableTrie.get(i + 0xfff); + if (v0 != expected || vfff != expected) { + fail(String.format( + "error: MutableCodePointTrie U+%04x unexpected value\n", i)); + } + } + + mutableTrie = testTrieSerializeAllValueWidth(testName, mutableTrie, false, checkRanges); + } + + @Test + public void MuchDataTest() { + String testName="much-data"; + + MutableCodePointTrie mutableTrie; + int r, c; + CheckRange[] checkRanges = new CheckRange[(0x10000 >> 6) + (0x10240 >> 4) + 10]; + + mutableTrie = new MutableCodePointTrie(0xff33, 0xad); + checkRanges[0] = new CheckRange(0, 0xff33); // initialValue + r = 1; + + // Add much data that does not compact well, + // to get more than 128k data values after compaction. + for (c = 0; c < 0x10000; c += 0x40) { + int value = c >> 4; + mutableTrie.setRange(c, c + 0x3f, value); + checkRanges[r++] = new CheckRange(c + 0x40, value); + } + checkRanges[r++] = new CheckRange(0x20000, 0xff33); + for (c = 0x20000; c < 0x30230; c += 0x10) { + int value = c >> 4; + mutableTrie.setRange(c, c + 0xf, value); + checkRanges[r++] = new CheckRange(c + 0x10, value); + } + mutableTrie.setRange(0x30230, 0x30233, 0x3023); + checkRanges[r++] = new CheckRange(0x30234, 0x3023); + mutableTrie.setRange(0x30234, 0xdffff, 0x5005); + checkRanges[r++] = new CheckRange(0xe0000, 0x5005); + mutableTrie.setRange(0xe0000, 0x10ffff, 0x9009); + checkRanges[r++] = new CheckRange(0x110000, 0x9009); + + checkRanges = Arrays.copyOf(checkRanges, r); + testBuilder(testName, mutableTrie, checkRanges); + testTrieSerialize("much-data.16", mutableTrie, + CodePointTrie.Type.FAST, CodePointTrie.ValueWidth.BITS_16, false, + checkRanges); + } + + private void testGetRangesFixedSurr(String testName, MutableCodePointTrie mutableTrie, + CodePointMap.RangeOption option, CheckRange checkRanges[]) { + testTrieGetRanges(testName, mutableTrie, option, 5, checkRanges); + MutableCodePointTrie clone = mutableTrie.clone(); + CodePointTrie trie = + clone.buildImmutable(CodePointTrie.Type.FAST, CodePointTrie.ValueWidth.BITS_16); + testTrieGetRanges(testName, trie, option, 5, checkRanges); + } + + @Test + public void TrieTestGetRangesFixedSurr() { + final SetRange + setRangesFixedSurr[]={ + new SetRange(0xd000, 0xd7ff, 5), + new SetRange(0xd7ff, 0xe001, 3), + new SetRange(0xe001, 0xf900, 5), + }; + + final CheckRange + checkRangesFixedLeadSurr1[]={ + new CheckRange(0, 0), + new CheckRange(0xd000, 0), + new CheckRange(0xd7ff, 5), + new CheckRange(0xd800, 3), + new CheckRange(0xdc00, 5), + new CheckRange(0xe001, 3), + new CheckRange(0xf900, 5), + new CheckRange(0x110000, 0) + }; + + final CheckRange + checkRangesFixedAllSurr1[]={ + new CheckRange(0, 0), + new CheckRange(0xd000, 0), + new CheckRange(0xd7ff, 5), + new CheckRange(0xd800, 3), + new CheckRange(0xe000, 5), + new CheckRange(0xe001, 3), + new CheckRange(0xf900, 5), + new CheckRange(0x110000, 0) + }; + + final CheckRange + checkRangesFixedLeadSurr3[]={ + new CheckRange(0, 0), + new CheckRange(0xd000, 0), + new CheckRange(0xdc00, 5), + new CheckRange(0xe001, 3), + new CheckRange(0xf900, 5), + new CheckRange(0x110000, 0) + }; + + final CheckRange + checkRangesFixedAllSurr3[]={ + new CheckRange(0, 0), + new CheckRange(0xd000, 0), + new CheckRange(0xe000, 5), + new CheckRange(0xe001, 3), + new CheckRange(0xf900, 5), + new CheckRange(0x110000, 0) + }; + + final CheckRange + checkRangesFixedSurr4[]={ + new CheckRange(0, 0), + new CheckRange(0xd000, 0), + new CheckRange(0xf900, 5), + new CheckRange(0x110000, 0) + }; + + MutableCodePointTrie mutableTrie = makeTrieWithRanges( + "fixedSurr", false, setRangesFixedSurr, checkRangesFixedLeadSurr1); + testGetRangesFixedSurr("fixedLeadSurr1", mutableTrie, + CodePointMap.RangeOption.FIXED_LEAD_SURROGATES, checkRangesFixedLeadSurr1); + testGetRangesFixedSurr("fixedAllSurr1", mutableTrie, + CodePointMap.RangeOption.FIXED_ALL_SURROGATES, checkRangesFixedAllSurr1); + // Setting a range in the middle of lead surrogates makes no difference. + mutableTrie.setRange(0xd844, 0xd899, 5); + testGetRangesFixedSurr("fixedLeadSurr2", mutableTrie, + CodePointMap.RangeOption.FIXED_LEAD_SURROGATES, checkRangesFixedLeadSurr1); + // Bridge the gap before the lead surrogates. + mutableTrie.set(0xd7ff, 5); + testGetRangesFixedSurr("fixedLeadSurr3", mutableTrie, + CodePointMap.RangeOption.FIXED_LEAD_SURROGATES, checkRangesFixedLeadSurr3); + testGetRangesFixedSurr("fixedAllSurr3", mutableTrie, + CodePointMap.RangeOption.FIXED_ALL_SURROGATES, checkRangesFixedAllSurr3); + // Bridge the gap after the trail surrogates. + mutableTrie.set(0xe000, 5); + testGetRangesFixedSurr("fixedSurr4", mutableTrie, + CodePointMap.RangeOption.FIXED_ALL_SURROGATES, checkRangesFixedSurr4); + } +}