diff --git a/.gitattributes b/.gitattributes index 0d829743ac8..e402cb66255 100644 --- a/.gitattributes +++ b/.gitattributes @@ -49,6 +49,10 @@ README text !eol *.tri2 -text icu4c/icu4c.css -text +icu4c/source/data/in/nfc.nrm -text +icu4c/source/data/in/nfkc.nrm -text +icu4c/source/data/in/nfkc_cf.nrm -text +icu4c/source/data/in/unorm.icu -text icu4c/source/data/locales/pool.res -text icu4c/source/samples/ucnv/data02.bin -text icu4c/source/test/perf/README -text diff --git a/.gitignore b/.gitignore index 5d48f2b5bd4..fc3e8601f34 100644 --- a/.gitignore +++ b/.gitignore @@ -560,6 +560,20 @@ icu4c/source/tools/gennorm/gennorm.vcproj.*.*.user icu4c/source/tools/gennorm/release icu4c/source/tools/gennorm/x64 icu4c/source/tools/gennorm/x86 +icu4c/source/tools/gennorm2/*.d +icu4c/source/tools/gennorm2/*.o +icu4c/source/tools/gennorm2/*.pdb +icu4c/source/tools/gennorm2/*.plg +icu4c/source/tools/gennorm2/Debug +icu4c/source/tools/gennorm2/Makefile +icu4c/source/tools/gennorm2/Release +icu4c/source/tools/gennorm2/debug +icu4c/source/tools/gennorm2/gennorm2 +icu4c/source/tools/gennorm2/gennorm2.[0-9] +icu4c/source/tools/gennorm2/gennorm2.vcproj.*.*.user +icu4c/source/tools/gennorm2/release +icu4c/source/tools/gennorm2/x64 +icu4c/source/tools/gennorm2/x86 icu4c/source/tools/genpname/*.d icu4c/source/tools/genpname/*.o icu4c/source/tools/genpname/*.pdb diff --git a/icu4c/source/allinone/allinone.sln b/icu4c/source/allinone/allinone.sln index de34a09ffb4..cc9b808e375 100644 --- a/icu4c/source/allinone/allinone.sln +++ b/icu4c/source/allinone/allinone.sln @@ -1,5 +1,5 @@ Microsoft Visual Studio Solution File, Format Version 10.00 -# Visual Studio 2008 +# Visual C++ Express 2008 Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "cal", "..\samples\cal\cal.vcproj", "{F7659D77-09CF-4FE9-ACEE-927287AA9509}" ProjectSection(ProjectDependencies) = postProject {0178B127-6269-407D-B112-93877BB62776} = {0178B127-6269-407D-B112-93877BB62776} @@ -259,6 +259,12 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "gencfu", "..\tools\gencfu\g {73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D} = {73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D} EndProjectSection EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "gennorm2", "..\tools\gennorm2\gennorm2.vcproj", "{C7891A65-80AB-4245-912E-5F1E17B0E6C4}" + ProjectSection(ProjectDependencies) = postProject + {6B231032-3CB5-4EED-9210-810D666A23A0} = {6B231032-3CB5-4EED-9210-810D666A23A0} + {73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D} = {73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D} + EndProjectSection +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Win32 = Debug|Win32 @@ -555,6 +561,14 @@ Global {691EE0C0-DC57-4A48-8AEE-8ED75EB3A057}.Release|Win32.Build.0 = Release|Win32 {691EE0C0-DC57-4A48-8AEE-8ED75EB3A057}.Release|x64.ActiveCfg = Release|x64 {691EE0C0-DC57-4A48-8AEE-8ED75EB3A057}.Release|x64.Build.0 = Release|x64 + {C7891A65-80AB-4245-912E-5F1E17B0E6C4}.Debug|Win32.ActiveCfg = Debug|Win32 + {C7891A65-80AB-4245-912E-5F1E17B0E6C4}.Debug|Win32.Build.0 = Debug|Win32 + {C7891A65-80AB-4245-912E-5F1E17B0E6C4}.Debug|x64.ActiveCfg = Debug|Win32 + {C7891A65-80AB-4245-912E-5F1E17B0E6C4}.Debug|x64.Build.0 = Debug|Win32 + {C7891A65-80AB-4245-912E-5F1E17B0E6C4}.Release|Win32.ActiveCfg = Release|Win32 + {C7891A65-80AB-4245-912E-5F1E17B0E6C4}.Release|Win32.Build.0 = Release|Win32 + {C7891A65-80AB-4245-912E-5F1E17B0E6C4}.Release|x64.ActiveCfg = Release|Win32 + {C7891A65-80AB-4245-912E-5F1E17B0E6C4}.Release|x64.Build.0 = Release|Win32 EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE diff --git a/icu4c/source/common/Makefile.in b/icu4c/source/common/Makefile.in index 3b39c378787..c2a94148d7d 100644 --- a/icu4c/source/common/Makefile.in +++ b/icu4c/source/common/Makefile.in @@ -1,6 +1,6 @@ #****************************************************************************** # -# Copyright (C) 1999-2009, International Business Machines +# Copyright (C) 1999-2010, International Business Machines # Corporation and others. All Rights Reserved. # #****************************************************************************** @@ -78,7 +78,8 @@ ucat.o locmap.o uloc.o locid.o locutil.o \ bytestream.o stringpiece.o \ ustr_cnv.o unistr_cnv.o unistr.o unistr_case.o unistr_props.o \ utf_impl.o ustring.o ustrcase.o ucasemap.o cstring.o ustrfmt.o ustrtrns.o ustr_wcs.o utext.o \ -normlzr.o unorm.o unormcmp.o unorm_it.o chariter.o schriter.o uchriter.o uiter.o \ +normalizer2impl.o normalizer2.o filterednormalizer2.o normlzr.o unorm.o unormcmp.o unorm_it.o \ +chariter.o schriter.o uchriter.o uiter.o \ uchar.o uprops.o ucase.o propname.o ubidi_props.o ubidi.o ubidiwrt.o ubidiln.o ushape.o \ uscript.o usc_impl.o unames.o \ utrie.o utrie2.o utrie2_builder.o bmpset.o unisetspan.o uset_props.o uniset_props.o uset.o uniset.o usetiter.o ruleiter.o caniter.o unifilt.o unifunct.o \ diff --git a/icu4c/source/common/caniter.cpp b/icu4c/source/common/caniter.cpp index 04d48ba8fce..c11c31e2bd7 100644 --- a/icu4c/source/common/caniter.cpp +++ b/icu4c/source/common/caniter.cpp @@ -1,6 +1,6 @@ /* ***************************************************************************** - * Copyright (C) 1996-2006, International Business Machines Corporation and * + * Copyright (C) 1996-2010, International Business Machines Corporation and * * others. All Rights Reserved. * ***************************************************************************** */ @@ -12,6 +12,7 @@ #include "unicode/uset.h" #include "unicode/ustring.h" #include "hash.h" +#include "normalizer2impl.h" #include "unormimp.h" #include "unicode/caniter.h" #include "unicode/normlzr.h" @@ -68,7 +69,8 @@ CanonicalIterator::CanonicalIterator(const UnicodeString &sourceStr, UErrorCode pieces_length(0), pieces_lengths(NULL), current(NULL), - current_length(0) + current_length(0), + nfd(*Normalizer2Factory::getNFDInstance(status)) { if(U_SUCCESS(status)) { setSource(sourceStr, status); @@ -499,73 +501,39 @@ Hashtable *CanonicalIterator::extract(Hashtable *fillinResult, UChar32 comp, con return NULL; } - const int32_t bufSize = 256; - int32_t bufLen = 0; - UChar temp[bufSize]; - - int32_t inputLen = 0, decompLen; - UChar stackBuffer[4]; - const UChar *decomp; - - U16_APPEND_UNSAFE(temp, inputLen, comp); - decomp = unorm_getCanonicalDecomposition(comp, stackBuffer, &decompLen); - if(decomp == NULL) { - /* copy temp */ - stackBuffer[0] = temp[0]; - if(inputLen > 1) { - stackBuffer[1] = temp[1]; - } - decomp = stackBuffer; - decompLen = inputLen; - } - - UChar *buff = temp+inputLen; + UnicodeString temp(comp); + int32_t inputLen=temp.length(); + UnicodeString decompString; + nfd.normalize(temp, decompString, status); + const UChar *decomp=decompString.getBuffer(); + int32_t decompLen=decompString.length(); // See if it matches the start of segment (at segmentPos) UBool ok = FALSE; UChar32 cp; int32_t decompPos = 0; UChar32 decompCp; - UTF_NEXT_CHAR(decomp, decompPos, decompLen, decompCp); + U16_NEXT(decomp, decompPos, decompLen, decompCp); - int32_t i; - UBool overflow = FALSE; - - i = segmentPos; + int32_t i = segmentPos; while(i < segLen) { - UTF_NEXT_CHAR(segment, i, segLen, cp); + U16_NEXT(segment, i, segLen, cp); if (cp == decompCp) { // if equal, eat another cp from decomp //if (PROGRESS) printf(" matches: %s\n", UToS(Tr(UnicodeString(cp)))); if (decompPos == decompLen) { // done, have all decomp characters! - //u_strcat(buff+bufLen, segment+i); - uprv_memcpy(buff+bufLen, segment+i, (segLen-i)*sizeof(UChar)); - bufLen+=segLen-i; - + temp.append(segment+i, segLen-i); ok = TRUE; break; } - UTF_NEXT_CHAR(decomp, decompPos, decompLen, decompCp); + U16_NEXT(decomp, decompPos, decompLen, decompCp); } else { //if (PROGRESS) printf(" buffer: %s\n", UToS(Tr(UnicodeString(cp)))); // brute force approach - - U16_APPEND(buff, bufLen, bufSize, cp, overflow); - - if(overflow) { - /* - * ### TODO handle buffer overflow - * The buffer is large, but an overflow may still happen with - * unusual input (many combining marks?). - * Reallocate buffer and continue. - * markus 20020929 - */ - - overflow = FALSE; - } + temp.append(cp); /* TODO: optimize // since we know that the classes are monotonically increasing, after zero @@ -585,25 +553,20 @@ Hashtable *CanonicalIterator::extract(Hashtable *fillinResult, UChar32 comp, con //if (PROGRESS) printf("Matches\n"); - if (bufLen == 0) { + if (inputLen == temp.length()) { fillinResult->put(UnicodeString(), new UnicodeString(), status); return fillinResult; // succeed, but no remainder } // brute force approach // check to make sure result is canonically equivalent - int32_t tempLen = inputLen + bufLen; - - UChar trial[bufSize]; - unorm_decompose(trial, bufSize, temp, tempLen, FALSE, 0, &status); - - if(U_FAILURE(status) - || uprv_memcmp(segment+segmentPos, trial, (segLen - segmentPos)*sizeof(UChar)) != 0) - { + UnicodeString trial; + nfd.normalize(temp, trial, status); + if(U_FAILURE(status) || trial.compare(segment+segmentPos, segLen - segmentPos) != 0) { return NULL; } - return getEquivalents2(fillinResult, buff, bufLen, status); + return getEquivalents2(fillinResult, temp.getBuffer()+inputLen, temp.length()-inputLen, status); } U_NAMESPACE_END diff --git a/icu4c/source/common/common.vcproj b/icu4c/source/common/common.vcproj index 44f05d7de5a..14ce8e78d6b 100644 --- a/icu4c/source/common/common.vcproj +++ b/icu4c/source/common/common.vcproj @@ -769,7 +769,7 @@ Name="collation" > + + @@ -3057,6 +3061,62 @@ /> + + + + + + + + + + + + + + + + + + + + + + @@ -3145,6 +3205,46 @@ /> + + + + + + + + + + + + + + @@ -3470,7 +3570,7 @@ > 0) { + return fInstance; // instance was created + } else if(haveInstance<0) { + errorCode=fErrorCode; // instance creation failed + return NULL; + } else /* haveInstance==0 */ { + void *instance=instantiator(context, errorCode); + Mutex mutex; + if(fHaveInstance==0) { + if(U_SUCCESS(errorCode)) { + fInstance=instance; + instance=NULL; + fHaveInstance=1; + } else { + fErrorCode=errorCode; + fHaveInstance=-1; + } + } else { + errorCode=fErrorCode; + } + duplicate=instance; + return fInstance; + } +} + +void TriStateSingleton::reset() { + fInstance=NULL; + fErrorCode=U_ZERO_ERROR; + fHaveInstance=0; +} #if UCONFIG_NO_SERVICE /* If UCONFIG_NO_SERVICE, then there is no invocation of Mutex elsewhere in common, so add one here to force an export */ -#include "mutex.h" static Mutex *aMutex = 0; /* UCONFIG_NO_SERVICE */ #endif + +U_NAMESPACE_END diff --git a/icu4c/source/common/mutex.h b/icu4c/source/common/mutex.h index aba0dd20645..ea2e3485d8d 100644 --- a/icu4c/source/common/mutex.h +++ b/icu4c/source/common/mutex.h @@ -1,7 +1,7 @@ /* ****************************************************************************** * -* Copyright (C) 1997-2009, International Business Machines +* Copyright (C) 1997-2010, International Business Machines * Corporation and others. All Rights Reserved. * ****************************************************************************** @@ -71,6 +71,128 @@ inline Mutex::~Mutex() umtx_unlock(fMutex); } +// common code for singletons ---------------------------------------------- *** + +/** + * Function pointer for the instantiator parameter of + * SimpleSingleton::getInstance() and TriStateSingleton::getInstance(). + * The function creates some object, optionally using the context parameter. + * The function need not check for U_FAILURE(errorCode). + */ +typedef void *InstantiatorFn(const void *context, UErrorCode &errorCode); + +/** + * Singleton struct with shared instantiation/mutexing code. + * Simple: Does not remember if a previous instantiation failed. + * Best used if the instantiation can really only fail with an out-of-memory error, + * otherwise use a TriStateSingleton. + * Best used via SimpleSingletonWrapper or similar. + * Define a static SimpleSingleton instance via the STATIC_SIMPLE_SINGLETON macro. + */ +struct SimpleSingleton { + void *fInstance; + + /** + * Returns the singleton instance, or NULL if it could not be created. + * Calls the instantiator with the context if the instance has not been + * created yet. In a race condition, the duplicate may not be NULL. + * The caller must delete the duplicate. + * The caller need not initialize the duplicate before the call. + */ + void *getInstance(InstantiatorFn *instantiator, const void *context, + void *&duplicate, + UErrorCode &errorCode); + /** + * Resets the fields. The caller must have deleted the singleton instance. + * Not mutexed. + * Call this from a cleanup function. + */ + void reset() { fInstance=NULL; } +}; + +#define STATIC_SIMPLE_SINGLETON(name) static SimpleSingleton name={ NULL } + +/** + * Handy wrapper for an SimpleSingleton. + * Intended for temporary use on the stack, to make the SimpleSingleton easier to deal with. + * Takes care of the duplicate deletion and type casting. + */ +template +class SimpleSingletonWrapper { +public: + SimpleSingletonWrapper(SimpleSingleton &s) : singleton(s) {} + void deleteInstance() { + delete (T *)singleton.fInstance; + singleton.reset(); + } + T *getInstance(InstantiatorFn *instantiator, const void *context, + UErrorCode &errorCode) { + void *duplicate; + T *instance=(T *)singleton.getInstance(instantiator, context, duplicate, errorCode); + delete (T *)duplicate; + return instance; + } +private: + SimpleSingleton &singleton; +}; + +/** + * Singleton struct with shared instantiation/mutexing code. + * Tri-state: Instantiation succeeded/failed/not attempted yet. + * Best used via TriStateSingletonWrapper or similar. + * Define a static TriStateSingleton instance via the STATIC_TRI_STATE_SINGLETON macro. + */ +struct TriStateSingleton { + void *fInstance; + UErrorCode fErrorCode; + int8_t fHaveInstance; + + /** + * Returns the singleton instance, or NULL if it could not be created. + * Calls the instantiator with the context if the instance has not been + * created yet. In a race condition, the duplicate may not be NULL. + * The caller must delete the duplicate. + * The caller need not initialize the duplicate before the call. + * The singleton creation is only attempted once. If it fails, + * the singleton will then always return NULL. + */ + void *getInstance(InstantiatorFn *instantiator, const void *context, + void *&duplicate, + UErrorCode &errorCode); + /** + * Resets the fields. The caller must have deleted the singleton instance. + * Not mutexed. + * Call this from a cleanup function. + */ + void reset(); +}; + +#define STATIC_TRI_STATE_SINGLETON(name) static TriStateSingleton name={ NULL, U_ZERO_ERROR, 0 } + +/** + * Handy wrapper for an TriStateSingleton. + * Intended for temporary use on the stack, to make the TriStateSingleton easier to deal with. + * Takes care of the duplicate deletion and type casting. + */ +template +class TriStateSingletonWrapper { +public: + TriStateSingletonWrapper(TriStateSingleton &s) : singleton(s) {} + void deleteInstance() { + delete (T *)singleton.fInstance; + singleton.reset(); + } + T *getInstance(InstantiatorFn *instantiator, const void *context, + UErrorCode &errorCode) { + void *duplicate; + T *instance=(T *)singleton.getInstance(instantiator, context, duplicate, errorCode); + delete (T *)duplicate; + return instance; + } +private: + TriStateSingleton &singleton; +}; + U_NAMESPACE_END #endif //_MUTEX_ diff --git a/icu4c/source/common/normalizer2.cpp b/icu4c/source/common/normalizer2.cpp new file mode 100644 index 00000000000..d531e9c1aa5 --- /dev/null +++ b/icu4c/source/common/normalizer2.cpp @@ -0,0 +1,744 @@ +/* +******************************************************************************* +* +* Copyright (C) 2009-2010, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: normalizer2.cpp +* encoding: US-ASCII +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2009nov22 +* created by: Markus W. Scherer +*/ + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_NORMALIZATION + +#include "unicode/localpointer.h" +#include "unicode/normalizer2.h" +#include "unicode/unistr.h" +#include "unicode/unorm.h" +#include "cpputils.h" +#include "cstring.h" +#include "mutex.h" +#include "normalizer2impl.h" +#include "ucln_cmn.h" + +U_NAMESPACE_BEGIN + +// Public API dispatch via Normalizer2 subclasses -------------------------- *** + +// Normalizer2 implementation for the old UNORM_NONE. +class NoopNormalizer2 : public Normalizer2 { + virtual UnicodeString & + normalize(const UnicodeString &src, + UnicodeString &dest, + UErrorCode &errorCode) const { + if(U_SUCCESS(errorCode)) { + if(&dest!=&src) { + dest=src; + } else { + errorCode=U_ILLEGAL_ARGUMENT_ERROR; + } + } + return dest; + } + virtual UnicodeString & + normalizeSecondAndAppend(UnicodeString &first, + const UnicodeString &second, + UErrorCode &errorCode) const { + if(U_SUCCESS(errorCode)) { + first.append(second); + } + return first; + } + virtual UnicodeString & + append(UnicodeString &first, + const UnicodeString &second, + UErrorCode &errorCode) const { + if(U_SUCCESS(errorCode)) { + if(&first!=&second) { + first.append(second); + } else { + errorCode=U_ILLEGAL_ARGUMENT_ERROR; + } + } + return first; + } + virtual UBool + isNormalized(const UnicodeString &s, UErrorCode &errorCode) const { + return TRUE; + } + virtual UNormalizationCheckResult + quickCheck(const UnicodeString &s, UErrorCode &errorCode) const { + return UNORM_YES; + } + virtual int32_t + spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const { + return s.length(); + } + virtual UBool hasBoundaryBefore(UChar32 c) const { return TRUE; } + virtual UBool hasBoundaryAfter(UChar32 c) const { return TRUE; } + virtual UBool isInert(UChar32 c) const { return TRUE; } + + static UClassID U_EXPORT2 getStaticClassID(); + virtual UClassID getDynamicClassID() const; +}; + +UOBJECT_DEFINE_RTTI_IMPLEMENTATION(NoopNormalizer2) + +// Intermediate class: +// Has Normalizer2Impl and does boilerplate argument checking and setup. +class Normalizer2WithImpl : public Normalizer2 { +public: + Normalizer2WithImpl(const Normalizer2Impl &ni) : impl(ni) {} + + // normalize + virtual UnicodeString & + normalize(const UnicodeString &src, + UnicodeString &dest, + UErrorCode &errorCode) const { + if(U_FAILURE(errorCode)) { + dest.setToBogus(); + return dest; + } + const UChar *sArray=src.getBuffer(); + if(&dest==&src || sArray==NULL) { + errorCode=U_ILLEGAL_ARGUMENT_ERROR; + dest.setToBogus(); + return dest; + } + dest.remove(); + ReorderingBuffer buffer(impl, dest); + if(buffer.init(src.length(), errorCode)) { + normalize(sArray, sArray+src.length(), buffer, errorCode); + } + return dest; + } + virtual void + normalize(const UChar *src, const UChar *limit, + ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0; + + // normalize and append + virtual UnicodeString & + normalizeSecondAndAppend(UnicodeString &first, + const UnicodeString &second, + UErrorCode &errorCode) const { + return normalizeSecondAndAppend(first, second, TRUE, errorCode); + } + virtual UnicodeString & + append(UnicodeString &first, + const UnicodeString &second, + UErrorCode &errorCode) const { + return normalizeSecondAndAppend(first, second, FALSE, errorCode); + } + UnicodeString & + normalizeSecondAndAppend(UnicodeString &first, + const UnicodeString &second, + UBool doNormalize, + UErrorCode &errorCode) const { + uprv_checkCanGetBuffer(first, errorCode); + if(U_FAILURE(errorCode)) { + return first; + } + const UChar *secondArray=second.getBuffer(); + if(&first==&second || secondArray==NULL) { + errorCode=U_ILLEGAL_ARGUMENT_ERROR; + return first; + } + ReorderingBuffer buffer(impl, first); + if(buffer.init(first.length()+second.length(), errorCode)) { + normalizeAndAppend(secondArray, secondArray+second.length(), doNormalize, + buffer, errorCode); + } + return first; + } + virtual void + normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize, + ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0; + + // quick checks + virtual UBool + isNormalized(const UnicodeString &s, UErrorCode &errorCode) const { + if(U_FAILURE(errorCode)) { + return FALSE; + } + const UChar *sArray=s.getBuffer(); + if(sArray==NULL) { + errorCode=U_ILLEGAL_ARGUMENT_ERROR; + return FALSE; + } + const UChar *sLimit=sArray+s.length(); + return sLimit==spanQuickCheckYes(sArray, sLimit, errorCode); + } + virtual UNormalizationCheckResult + quickCheck(const UnicodeString &s, UErrorCode &errorCode) const { + return Normalizer2WithImpl::isNormalized(s, errorCode) ? UNORM_YES : UNORM_NO; + } + virtual int32_t + spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const { + if(U_FAILURE(errorCode)) { + return 0; + } + const UChar *sArray=s.getBuffer(); + if(sArray==NULL) { + errorCode=U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + return (int32_t)(spanQuickCheckYes(sArray, sArray+s.length(), errorCode)-sArray); + } + virtual const UChar * + spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const = 0; + + virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const { + return UNORM_YES; + } + + static UClassID U_EXPORT2 getStaticClassID(); + virtual UClassID getDynamicClassID() const; + + const Normalizer2Impl &impl; +}; + +UOBJECT_DEFINE_RTTI_IMPLEMENTATION(Normalizer2WithImpl) + +class DecomposeNormalizer2 : public Normalizer2WithImpl { +public: + DecomposeNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {} + + virtual void + normalize(const UChar *src, const UChar *limit, + ReorderingBuffer &buffer, UErrorCode &errorCode) const { + impl.decompose(src, limit, &buffer, errorCode); + } + virtual void + normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize, + ReorderingBuffer &buffer, UErrorCode &errorCode) const { + impl.decomposeAndAppend(src, limit, doNormalize, buffer, errorCode); + } + virtual const UChar * + spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const { + return impl.decompose(src, limit, NULL, errorCode); + } + virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const { + return impl.isDecompYes(impl.getNorm16(c)) ? UNORM_YES : UNORM_NO; + } + virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasDecompBoundary(c, TRUE); } + virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasDecompBoundary(c, FALSE); } + virtual UBool isInert(UChar32 c) const { return impl.isDecompInert(c); } +}; + +class ComposeNormalizer2 : public Normalizer2WithImpl { +public: + ComposeNormalizer2(const Normalizer2Impl &ni, UBool fcc) : + Normalizer2WithImpl(ni), onlyContiguous(fcc) {} + + virtual void + normalize(const UChar *src, const UChar *limit, + ReorderingBuffer &buffer, UErrorCode &errorCode) const { + impl.compose(src, limit, onlyContiguous, TRUE, buffer, errorCode); + } + virtual void + normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize, + ReorderingBuffer &buffer, UErrorCode &errorCode) const { + impl.composeAndAppend(src, limit, doNormalize, onlyContiguous, buffer, errorCode); + } + + virtual UBool + isNormalized(const UnicodeString &s, UErrorCode &errorCode) const { + if(U_FAILURE(errorCode)) { + return FALSE; + } + const UChar *sArray=s.getBuffer(); + if(sArray==NULL) { + errorCode=U_ILLEGAL_ARGUMENT_ERROR; + return FALSE; + } + UnicodeString temp; + ReorderingBuffer buffer(impl, temp); + if(!buffer.init(5, errorCode)) { // small destCapacity for substring normalization + return FALSE; + } + return impl.compose(sArray, sArray+s.length(), onlyContiguous, FALSE, buffer, errorCode); + } + virtual UNormalizationCheckResult + quickCheck(const UnicodeString &s, UErrorCode &errorCode) const { + if(U_FAILURE(errorCode)) { + return UNORM_MAYBE; + } + const UChar *sArray=s.getBuffer(); + if(sArray==NULL) { + errorCode=U_ILLEGAL_ARGUMENT_ERROR; + return UNORM_MAYBE; + } + UNormalizationCheckResult qcResult=UNORM_YES; + impl.composeQuickCheck(sArray, sArray+s.length(), onlyContiguous, &qcResult); + return qcResult; + } + virtual const UChar * + spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const { + return impl.composeQuickCheck(src, limit, onlyContiguous, NULL); + } + virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const { + return impl.getCompQuickCheck(impl.getNorm16(c)); + } + virtual UBool hasBoundaryBefore(UChar32 c) const { + return impl.hasCompBoundaryBefore(c); + } + virtual UBool hasBoundaryAfter(UChar32 c) const { + return impl.hasCompBoundaryAfter(c, onlyContiguous, FALSE); + } + virtual UBool isInert(UChar32 c) const { + return impl.hasCompBoundaryAfter(c, onlyContiguous, TRUE); + } +private: + UBool onlyContiguous; +}; + +class FCDNormalizer2 : public Normalizer2WithImpl { +public: + FCDNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {} + + virtual void + normalize(const UChar *src, const UChar *limit, + ReorderingBuffer &buffer, UErrorCode &errorCode) const { + impl.makeFCD(src, limit, &buffer, errorCode); + } + virtual void + normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize, + ReorderingBuffer &buffer, UErrorCode &errorCode) const { + impl.makeFCDAndAppend(src, limit, doNormalize, buffer, errorCode); + } + virtual const UChar * + spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const { + return impl.makeFCD(src, limit, NULL, errorCode); + } + virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasFCDBoundaryBefore(c); } + virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasFCDBoundaryAfter(c); } + virtual UBool isInert(UChar32 c) const { return impl.isFCDInert(c); } +}; + +// instance cache ---------------------------------------------------------- *** + +struct Norm2AllModes : public UMemory { + static Norm2AllModes *createInstance(const char *packageName, + const char *name, + UErrorCode &errorCode); + Norm2AllModes() : comp(impl, FALSE), decomp(impl), fcd(impl), fcc(impl, TRUE) {} + + Normalizer2Impl impl; + ComposeNormalizer2 comp; + DecomposeNormalizer2 decomp; + FCDNormalizer2 fcd; + ComposeNormalizer2 fcc; +}; + +Norm2AllModes * +Norm2AllModes::createInstance(const char *packageName, + const char *name, + UErrorCode &errorCode) { + if(U_FAILURE(errorCode)) { + return NULL; + } + LocalPointer allModes(new Norm2AllModes); + if(allModes.isNull()) { + errorCode=U_MEMORY_ALLOCATION_ERROR; + return NULL; + } + allModes->impl.load(packageName, name, errorCode); + return U_SUCCESS(errorCode) ? allModes.orphan() : NULL; +} + +U_CDECL_BEGIN +static UBool U_CALLCONV uprv_normalizer2_cleanup(); +U_CDECL_END + +class Norm2AllModesSingleton : public TriStateSingletonWrapper { +public: + Norm2AllModesSingleton(TriStateSingleton &s, const char *n) : + TriStateSingletonWrapper(s), name(n) {} + Norm2AllModes *getInstance(UErrorCode &errorCode) { + return TriStateSingletonWrapper::getInstance(createInstance, name, errorCode); + } +private: + static void *createInstance(const void *context, UErrorCode &errorCode) { + ucln_common_registerCleanup(UCLN_COMMON_NORMALIZER2, uprv_normalizer2_cleanup); + return Norm2AllModes::createInstance(NULL, (const char *)context, errorCode); + } + + const char *name; +}; + +STATIC_TRI_STATE_SINGLETON(nfcSingleton); +STATIC_TRI_STATE_SINGLETON(nfkcSingleton); +STATIC_TRI_STATE_SINGLETON(nfkc_cfSingleton); + +class Norm2Singleton : public SimpleSingletonWrapper { +public: + Norm2Singleton(SimpleSingleton &s) : SimpleSingletonWrapper(s) {} + Normalizer2 *getInstance(UErrorCode &errorCode) { + return SimpleSingletonWrapper::getInstance(createInstance, NULL, errorCode); + } +private: + static void *createInstance(const void *context, UErrorCode &errorCode) { + Normalizer2 *noop=new NoopNormalizer2; + if(noop==NULL) { + errorCode=U_MEMORY_ALLOCATION_ERROR; + } + ucln_common_registerCleanup(UCLN_COMMON_NORMALIZER2, uprv_normalizer2_cleanup); + return noop; + } +}; + +STATIC_SIMPLE_SINGLETON(noopSingleton); + +U_CDECL_BEGIN + +static UBool U_CALLCONV uprv_normalizer2_cleanup() { + Norm2AllModesSingleton(nfcSingleton, NULL).deleteInstance(); + Norm2AllModesSingleton(nfkcSingleton, NULL).deleteInstance(); + Norm2AllModesSingleton(nfkc_cfSingleton, NULL).deleteInstance(); + Norm2Singleton(noopSingleton).deleteInstance(); + return TRUE; +} + +U_CDECL_END + +const Normalizer2 *Normalizer2Factory::getNFCInstance(UErrorCode &errorCode) { + Norm2AllModes *allModes=Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode); + return allModes!=NULL ? &allModes->comp : NULL; +} + +const Normalizer2 *Normalizer2Factory::getNFDInstance(UErrorCode &errorCode) { + Norm2AllModes *allModes=Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode); + return allModes!=NULL ? &allModes->decomp : NULL; +} + +const Normalizer2 *Normalizer2Factory::getFCDInstance(UErrorCode &errorCode) { + Norm2AllModes *allModes=Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode); + if(allModes!=NULL) { + allModes->impl.getFCDTrie(errorCode); + return &allModes->fcd; + } else { + return NULL; + } +} + +const Normalizer2 *Normalizer2Factory::getFCCInstance(UErrorCode &errorCode) { + Norm2AllModes *allModes=Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode); + return allModes!=NULL ? &allModes->fcc : NULL; +} + +const Normalizer2 *Normalizer2Factory::getNFKCInstance(UErrorCode &errorCode) { + Norm2AllModes *allModes= + Norm2AllModesSingleton(nfkcSingleton, "nfkc").getInstance(errorCode); + return allModes!=NULL ? &allModes->comp : NULL; +} + +const Normalizer2 *Normalizer2Factory::getNFKDInstance(UErrorCode &errorCode) { + Norm2AllModes *allModes= + Norm2AllModesSingleton(nfkcSingleton, "nfkc").getInstance(errorCode); + return allModes!=NULL ? &allModes->decomp : NULL; +} + +const Normalizer2 *Normalizer2Factory::getNFKC_CFInstance(UErrorCode &errorCode) { + Norm2AllModes *allModes= + Norm2AllModesSingleton(nfkc_cfSingleton, "nfkc_cf").getInstance(errorCode); + return allModes!=NULL ? &allModes->comp : NULL; +} + +const Normalizer2 *Normalizer2Factory::getNoopInstance(UErrorCode &errorCode) { + return Norm2Singleton(noopSingleton).getInstance(errorCode); +} + +const Normalizer2 * +Normalizer2Factory::getInstance(UNormalizationMode mode, UErrorCode &errorCode) { + if(U_FAILURE(errorCode)) { + return NULL; + } + switch(mode) { + case UNORM_NFD: + return getNFDInstance(errorCode); + case UNORM_NFKD: + return getNFKDInstance(errorCode); + case UNORM_NFC: + return getNFCInstance(errorCode); + case UNORM_NFKC: + return getNFKCInstance(errorCode); + case UNORM_FCD: + return getFCDInstance(errorCode); + default: // UNORM_NONE + return getNoopInstance(errorCode); + } +} + +const Normalizer2Impl * +Normalizer2Factory::getNFCImpl(UErrorCode &errorCode) { + Norm2AllModes *allModes= + Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode); + return allModes!=NULL ? &allModes->impl : NULL; +} + +const Normalizer2Impl * +Normalizer2Factory::getNFKCImpl(UErrorCode &errorCode) { + Norm2AllModes *allModes= + Norm2AllModesSingleton(nfkcSingleton, "nfkc").getInstance(errorCode); + return allModes!=NULL ? &allModes->impl : NULL; +} + +const Normalizer2Impl * +Normalizer2Factory::getNFKC_CFImpl(UErrorCode &errorCode) { + Norm2AllModes *allModes= + Norm2AllModesSingleton(nfkc_cfSingleton, "nfkc_cf").getInstance(errorCode); + return allModes!=NULL ? &allModes->impl : NULL; +} + +const UTrie2 * +Normalizer2Factory::getFCDTrie(UErrorCode &errorCode) { + Norm2AllModes *allModes= + Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode); + if(allModes!=NULL) { + return allModes->impl.getFCDTrie(errorCode); + } else { + return NULL; + } +} + +const Normalizer2 * +Normalizer2::getInstance(const char *packageName, + const char *name, + UNormalization2Mode mode, + UErrorCode &errorCode) { + if(U_FAILURE(errorCode)) { + return NULL; + } + if(packageName==NULL) { + Norm2AllModes *allModes=NULL; + if(0==uprv_strcmp(name, "nfc")) { + allModes=Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode); + } else if(0==uprv_strcmp(name, "nfkc")) { + allModes=Norm2AllModesSingleton(nfkcSingleton, "nfkc").getInstance(errorCode); + } else if(0==uprv_strcmp(name, "nfkc_cf")) { + allModes=Norm2AllModesSingleton(nfkc_cfSingleton, "nfkc_cf").getInstance(errorCode); + } + if(allModes!=NULL) { + switch(mode) { + case UNORM2_COMPOSE: + return &allModes->comp; + case UNORM2_DECOMPOSE: + return &allModes->decomp; + case UNORM2_FCD: + allModes->impl.getFCDTrie(errorCode); + return &allModes->fcd; + case UNORM2_COMPOSE_CONTIGUOUS: + return &allModes->fcc; + default: + break; // do nothing + } + } + } + if(U_SUCCESS(errorCode)) { + // TODO: Real loading and caching... + errorCode=U_UNSUPPORTED_ERROR; + } + return NULL; +} + +UOBJECT_DEFINE_ABSTRACT_RTTI_IMPLEMENTATION(Normalizer2) + +// C API ------------------------------------------------------------------- *** + +U_DRAFT const UNormalizer2 * U_EXPORT2 +unorm2_getInstance(const char *packageName, + const char *name, + UNormalization2Mode mode, + UErrorCode *pErrorCode) { + return (const UNormalizer2 *)Normalizer2::getInstance(packageName, name, mode, *pErrorCode); +} + +U_DRAFT void U_EXPORT2 +unorm2_close(UNormalizer2 *norm2) { + delete (Normalizer2 *)norm2; +} + +U_DRAFT int32_t U_EXPORT2 +unorm2_normalize(const UNormalizer2 *norm2, + const UChar *src, int32_t length, + UChar *dest, int32_t capacity, + UErrorCode *pErrorCode) { + if(U_FAILURE(*pErrorCode)) { + return 0; + } + if(src==NULL || length<-1 || capacity<0 || (dest==NULL && capacity>0) || src==dest) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + UnicodeString destString(dest, 0, capacity); + const Normalizer2 *n2=(const Normalizer2 *)norm2; + if(n2->getDynamicClassID()==Normalizer2WithImpl::getStaticClassID()) { + // Avoid duplicate argument checking and support NUL-terminated src. + const Normalizer2WithImpl *n2wi=(const Normalizer2WithImpl *)n2; + ReorderingBuffer buffer(n2wi->impl, destString); + if(buffer.init(length, *pErrorCode)) { + n2wi->normalize(src, length>=0 ? src+length : NULL, buffer, *pErrorCode); + } + } else { + UnicodeString srcString(length<0, src, length); + n2->normalize(srcString, destString, *pErrorCode); + } + return destString.extract(dest, capacity, *pErrorCode); +} + +static int32_t +normalizeSecondAndAppend(const UNormalizer2 *norm2, + UChar *first, int32_t firstLength, int32_t firstCapacity, + const UChar *second, int32_t secondLength, + UBool doNormalize, + UErrorCode *pErrorCode) { + if(U_FAILURE(*pErrorCode)) { + return 0; + } + if( second==NULL || secondLength<-1 || + firstCapacity<0 || (first==NULL && firstCapacity>0) || firstLength<-1 || + first==second + ) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + UnicodeString firstString(first, firstLength, firstCapacity); + const Normalizer2 *n2=(const Normalizer2 *)norm2; + if(n2->getDynamicClassID()==Normalizer2WithImpl::getStaticClassID()) { + // Avoid duplicate argument checking and support NUL-terminated src. + const Normalizer2WithImpl *n2wi=(const Normalizer2WithImpl *)n2; + ReorderingBuffer buffer(n2wi->impl, firstString); + if(buffer.init(firstLength+secondLength+1, *pErrorCode)) { // destCapacity>=-1 + n2wi->normalizeAndAppend(second, secondLength>=0 ? second+secondLength : NULL, + doNormalize, buffer, *pErrorCode); + } + } else { + UnicodeString secondString(secondLength<0, second, secondLength); + if(doNormalize) { + n2->normalizeSecondAndAppend(firstString, secondString, *pErrorCode); + } else { + n2->append(firstString, secondString, *pErrorCode); + } + } + return firstString.extract(first, firstCapacity, *pErrorCode); +} + +U_DRAFT int32_t U_EXPORT2 +unorm2_normalizeSecondAndAppend(const UNormalizer2 *norm2, + UChar *first, int32_t firstLength, int32_t firstCapacity, + const UChar *second, int32_t secondLength, + UErrorCode *pErrorCode) { + return normalizeSecondAndAppend(norm2, + first, firstLength, firstCapacity, + second, secondLength, + TRUE, pErrorCode); +} + +U_DRAFT int32_t U_EXPORT2 +unorm2_append(const UNormalizer2 *norm2, + UChar *first, int32_t firstLength, int32_t firstCapacity, + const UChar *second, int32_t secondLength, + UErrorCode *pErrorCode) { + return normalizeSecondAndAppend(norm2, + first, firstLength, firstCapacity, + second, secondLength, + FALSE, pErrorCode); +} + +U_DRAFT UBool U_EXPORT2 +unorm2_isNormalized(const UNormalizer2 *norm2, + const UChar *s, int32_t length, + UErrorCode *pErrorCode) { + if(U_FAILURE(*pErrorCode)) { + return 0; + } + if(s==NULL || length<-1) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + UnicodeString sString(length<0, s, length); + return ((const Normalizer2 *)norm2)->isNormalized(sString, *pErrorCode); +} + +U_DRAFT UNormalizationCheckResult U_EXPORT2 +unorm2_quickCheck(const UNormalizer2 *norm2, + const UChar *s, int32_t length, + UErrorCode *pErrorCode) { + if(U_FAILURE(*pErrorCode)) { + return UNORM_NO; + } + if(s==NULL || length<-1) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return UNORM_NO; + } + UnicodeString sString(length<0, s, length); + return ((const Normalizer2 *)norm2)->quickCheck(sString, *pErrorCode); +} + +U_DRAFT int32_t U_EXPORT2 +unorm2_spanQuickCheckYes(const UNormalizer2 *norm2, + const UChar *s, int32_t length, + UErrorCode *pErrorCode) { + if(U_FAILURE(*pErrorCode)) { + return 0; + } + if(s==NULL || length<-1) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + UnicodeString sString(length<0, s, length); + return ((const Normalizer2 *)norm2)->spanQuickCheckYes(sString, *pErrorCode); +} + +U_DRAFT UBool U_EXPORT2 +unorm2_hasBoundaryBefore(const UNormalizer2 *norm2, UChar32 c) { + return ((const Normalizer2 *)norm2)->hasBoundaryBefore(c); +} + +U_DRAFT UBool U_EXPORT2 +unorm2_hasBoundaryAfter(const UNormalizer2 *norm2, UChar32 c) { + return ((const Normalizer2 *)norm2)->hasBoundaryAfter(c); +} + +U_DRAFT UBool U_EXPORT2 +unorm2_isInert(const UNormalizer2 *norm2, UChar32 c) { + return ((const Normalizer2 *)norm2)->isInert(c); +} + +// Some properties APIs ---------------------------------------------------- *** + +U_CFUNC UNormalizationCheckResult U_EXPORT2 +unorm_getQuickCheck(UChar32 c, UNormalizationMode mode) { + if(mode<=UNORM_NONE || UNORM_FCD<=mode) { + return UNORM_YES; + } + UErrorCode errorCode=U_ZERO_ERROR; + const Normalizer2 *norm2=Normalizer2Factory::getInstance(mode, errorCode); + if(U_SUCCESS(errorCode)) { + return ((const Normalizer2WithImpl *)norm2)->getQuickCheck(c); + } else { + return UNORM_MAYBE; + } +} + +U_CAPI const uint16_t * U_EXPORT2 +unorm_getFCDTrieIndex(UChar32 &fcdHighStart, UErrorCode *pErrorCode) { + const UTrie2 *trie=Normalizer2Factory::getFCDTrie(*pErrorCode); + if(U_SUCCESS(*pErrorCode)) { + fcdHighStart=trie->highStart; + return trie->index; + } else { + return NULL; + } +} + +U_NAMESPACE_END + +#endif // !UCONFIG_NO_NORMALIZATION diff --git a/icu4c/source/common/normalizer2impl.cpp b/icu4c/source/common/normalizer2impl.cpp new file mode 100644 index 00000000000..f303540490f --- /dev/null +++ b/icu4c/source/common/normalizer2impl.cpp @@ -0,0 +1,1727 @@ +/* +******************************************************************************* +* +* Copyright (C) 2009-2010, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: normalizer2impl.cpp +* encoding: US-ASCII +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2009nov22 +* created by: Markus W. Scherer +*/ + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_NORMALIZATION + +#include "unicode/normalizer2.h" +#include "unicode/udata.h" +#include "unicode/ustring.h" +#include "cmemory.h" +#include "mutex.h" +#include "normalizer2impl.h" +#include "uassert.h" +#include "uset_imp.h" +#include "utrie2.h" + +U_NAMESPACE_BEGIN + +// ReorderingBuffer -------------------------------------------------------- *** + +UBool ReorderingBuffer::init(int32_t destCapacity, UErrorCode &errorCode) { + int32_t length=str.length(); + start=str.getBuffer(destCapacity); + if(start==NULL) { + // getBuffer() already did str.setToBogus() + errorCode=U_MEMORY_ALLOCATION_ERROR; + return FALSE; + } + limit=start+length; + remainingCapacity=str.getCapacity()-length; + reorderStart=start; + if(start==limit) { + lastCC=0; + } else { + setIterator(); + lastCC=previousCC(); + // Set reorderStart after the last code point with cc<=1 if there is one. + if(lastCC>1) { + while(previousCC()>1) {} + } + reorderStart=codePointLimit; + } + return TRUE; +} + +UBool ReorderingBuffer::appendSupplementary(UChar32 c, uint8_t cc, UErrorCode &errorCode) { + if(remainingCapacity<2 && !resize(2, errorCode)) { + return FALSE; + } + if(lastCC<=cc || cc==0) { + limit[0]=U16_LEAD(c); + limit[1]=U16_TRAIL(c); + limit+=2; + lastCC=cc; + if(cc<=1) { + reorderStart=limit; + } + } else { + insert(c, cc); + } + remainingCapacity-=2; + return TRUE; +} + +UBool ReorderingBuffer::append(const UChar *s, int32_t length, + uint8_t leadCC, uint8_t trailCC, + UErrorCode &errorCode) { + if(length==0) { + return TRUE; + } + if(remainingCapacity=codePointStart) { + return 0; + } + UChar c=*--codePointStart; + if(ccc;) {} + // insert c at codePointLimit, after the character with prevCC<=cc + UChar *q=limit; + UChar *r=limit+=U16_LENGTH(c); + do { + *--r=*--q; + } while(codePointLimit!=q); + writeCodePoint(q, c); + if(cc<=1) { + reorderStart=r; + } +} + +// Normalizer2Impl --------------------------------------------------------- *** + +Normalizer2Impl::~Normalizer2Impl() { + udata_close(memory); + utrie2_close(normTrie); + UTrie2Singleton(fcdTrieSingleton).deleteInstance(); +} + +UBool U_CALLCONV +Normalizer2Impl::isAcceptable(void *context, + const char *type, const char *name, + const UDataInfo *pInfo) { + if( + pInfo->size>=20 && + pInfo->isBigEndian==U_IS_BIG_ENDIAN && + pInfo->charsetFamily==U_CHARSET_FAMILY && + pInfo->dataFormat[0]==0x4e && /* dataFormat="Nrm2" */ + pInfo->dataFormat[1]==0x72 && + pInfo->dataFormat[2]==0x6d && + pInfo->dataFormat[3]==0x32 && + pInfo->formatVersion[0]==1 + ) { + Normalizer2Impl *me=(Normalizer2Impl *)context; + uprv_memcpy(me->dataVersion, pInfo->dataVersion, 4); + return TRUE; + } else { + return FALSE; + } +} + +void +Normalizer2Impl::load(const char *packageName, const char *name, UErrorCode &errorCode) { + if(U_FAILURE(errorCode)) { + return; + } + memory=udata_openChoice(packageName, "nrm", name, isAcceptable, this, &errorCode); + if(U_FAILURE(errorCode)) { + return; + } + const uint8_t *inBytes=(const uint8_t *)udata_getMemory(memory); + const int32_t *inIndexes=(const int32_t *)inBytes; + int32_t indexesLength=inIndexes[IX_NORM_TRIE_OFFSET]/4; + if(indexesLength<=IX_MIN_MAYBE_YES) { + errorCode=U_INVALID_FORMAT_ERROR; // Not enough indexes. + return; + } + + minDecompNoCP=inIndexes[IX_MIN_DECOMP_NO_CP]; + minCompNoMaybeCP=inIndexes[IX_MIN_COMP_NO_MAYBE_CP]; + + minYesNo=inIndexes[IX_MIN_YES_NO]; + minNoNo=inIndexes[IX_MIN_NO_NO]; + limitNoNo=inIndexes[IX_LIMIT_NO_NO]; + minMaybeYes=inIndexes[IX_MIN_MAYBE_YES]; + + int32_t offset=inIndexes[IX_NORM_TRIE_OFFSET]; + int32_t nextOffset=inIndexes[IX_EXTRA_DATA_OFFSET]; + normTrie=utrie2_openFromSerialized(UTRIE2_16_VALUE_BITS, + inBytes+offset, nextOffset-offset, NULL, + &errorCode); + if(U_FAILURE(errorCode)) { + return; + } + + offset=nextOffset; + maybeYesCompositions=(const uint16_t *)(inBytes+offset); + extraData=maybeYesCompositions+(MIN_NORMAL_MAYBE_YES-minMaybeYes); +} + +uint8_t Normalizer2Impl::getTrailCCFromCompYesAndZeroCC(const UChar *cpStart, const UChar *cpLimit) const { + uint16_t prevNorm16; + if(cpStart==(cpLimit-1)) { + prevNorm16=getNorm16FromBMP(*cpStart); + } else { + prevNorm16=getNorm16FromSurrogatePair(cpStart[0], cpStart[1]); + } + if(prevNorm16<=minYesNo) { + return 0; // yesYes and Hangul LV/LVT have ccc=tccc=0 + } else { + return (uint8_t)(*getMapping(prevNorm16)>>8); // tccc from yesNo + } +} + +U_CDECL_BEGIN + +static UBool U_CALLCONV +enumPropertyStartsRange(const void *context, UChar32 start, UChar32 /*end*/, uint32_t /*value*/) { + /* add the start code point to the USet */ + const USetAdder *sa=(const USetAdder *)context; + sa->add(sa->set, start); + return TRUE; +} + +U_CDECL_END + +void +Normalizer2Impl::addPropertyStarts(const USetAdder *sa, UErrorCode &errorCode) const { + /* add the start code point of each same-value range of each trie */ + utrie2_enum(normTrie, NULL, enumPropertyStartsRange, sa); + + /* add Hangul LV syllables and LV+1 because of skippables */ + for(UChar c=Hangul::HANGUL_BASE; cadd(sa->set, c); + sa->add(sa->set, c+1); + } + sa->add(sa->set, Hangul::HANGUL_LIMIT); /* add Hangul+1 to continue with other properties */ +} + +const UChar * +Normalizer2Impl::copyLowPrefixFromNulTerminated(const UChar *src, + UChar32 minNeedDataCP, + ReorderingBuffer *buffer, + UErrorCode &errorCode) const { + // Make some effort to support NUL-terminated strings reasonably. + // Take the part of the fast quick check loop that does not look up + // data and check the first part of the string. + // After this prefix, determine the string length to simplify the rest + // of the code. + const UChar *prevSrc=src; + UChar c; + while((c=*src++)appendZeroCC(prevSrc, src, errorCode); + } + } + return src; +} + +const UChar * +Normalizer2Impl::decompose(const UChar *src, const UChar *limit, + ReorderingBuffer *buffer, + UErrorCode &errorCode) const { + UChar32 minNoCP=minDecompNoCP; + if(limit==NULL) { + src=copyLowPrefixFromNulTerminated(src, minNoCP, buffer, errorCode); + if(U_FAILURE(errorCode)) { + return src; + } + limit=u_strchr(src, 0); + } + + const UChar *prevSrc; + UChar32 c=0; + uint16_t norm16=0; + + // only for quick check + const UChar *prevBoundary=src; + uint8_t prevCC=0; + + for(;;) { + // count code units below the minimum or with irrelevant data for the quick check + for(prevSrc=src; src!=limit;) { + if( (c=*src)appendZeroCC(prevSrc, src, errorCode)) { + break; + } + } else { + prevCC=0; + prevBoundary=src; + } + } + if(src==limit) { + break; + } + + // Check one above-minimum, relevant code point. + src+=U16_LENGTH(c); + if(buffer!=NULL) { + if(!decompose(c, norm16, *buffer, errorCode)) { + break; + } + } else { + if(isDecompYes(norm16)) { + uint8_t cc=getCCFromYesOrMaybe(norm16); + if(prevCC<=cc || cc==0) { + prevCC=cc; + if(cc<=1) { + prevBoundary=src; + } + continue; + } + } + return prevBoundary; // "no" or cc out of order + } + } + return src; +} + +// Decompose a short piece of text which is likely to contain characters that +// fail the quick check loop and/or where the quick check loop's overhead +// is unlikely to be amortized. +// Called by the compose() and makeFCD() implementations. +UBool Normalizer2Impl::decomposeShort(const UChar *src, const UChar *limit, + ReorderingBuffer &buffer, + UErrorCode &errorCode) const { + while(src>8); + if(firstUnit&MAPPING_HAS_CCC_LCCC_WORD) { + leadCC=(uint8_t)(*mapping++>>8); + } else { + leadCC=0; + } + return buffer.append((const UChar *)mapping, length, leadCC, trailCC, errorCode); + } + } +} + +const UChar * +Normalizer2Impl::getDecomposition(UChar32 c, UChar buffer[4], int32_t &length) const { + const UChar *decomp=NULL; + uint16_t norm16; + for(;;) { + if(cMIN_NORMAL_MAYBE_YES) { + return FALSE; // ccc!=0 + } else if(isDecompNoAlgorithmic(norm16)) { + c=mapAlgorithmic(c, norm16); + } else { + // c decomposes, get everything from the variable-length extra data + const uint16_t *mapping=getMapping(norm16); + uint16_t firstUnit=*mapping++; + if((firstUnit&MAPPING_LENGTH_MASK)==0) { + return FALSE; + } + if(!before) { + // decomp after-boundary: same as hasFCDBoundaryAfter(), + // fcd16<=1 || trailCC==0 + if(firstUnit>0x1ff) { + return FALSE; // trailCC>1 + } + if(firstUnit<=0xff) { + return TRUE; // trailCC==0 + } + // if(trailCC==1) test leadCC==0, same as checking for before-boundary + } + // TRUE if leadCC==0 (hasFCDBoundaryBefore()) + return (firstUnit&MAPPING_HAS_CCC_LCCC_WORD)==0 || (*mapping&0xff00)==0; + } + } +} + +/* + * Finds the recomposition result for + * a forward-combining "lead" character, + * specified with a pointer to its compositions list, + * and a backward-combining "trail" character. + * + * If the lead and trail characters combine, then this function returns + * the following "compositeAndFwd" value: + * Bits 21..1 composite character + * Bit 0 set if the composite is a forward-combining starter + * otherwise it returns -1. + * + * The compositions list has (trail, compositeAndFwd) pair entries, + * encoded as either pairs or triples of 16-bit units. + * The last entry has the high bit of its first unit set. + * + * The list is sorted by ascending trail characters (there are no duplicates). + * A linear search is used. + * + * See normalizer2impl.h for a more detailed description + * of the compositions list format. + */ +int32_t Normalizer2Impl::combine(const uint16_t *list, UChar32 trail) { + uint16_t key1, firstUnit; + if(trail(firstUnit=*list)) { + list+=2+(firstUnit&COMP_1_TRIPLE); + } + if(key1==(firstUnit&COMP_1_TRAIL_MASK)) { + if(firstUnit&COMP_1_TRIPLE) { + return ((int32_t)list[1]<<16)|list[2]; + } else { + return list[1]; + } + } + } else { + // trail character is 3400..10FFFF + // result entry has 3 units + key1=(uint16_t)(COMP_1_TRAIL_LIMIT+ + ((trail>>COMP_1_TRAIL_SHIFT))& + ~COMP_1_TRIPLE); + uint16_t key2=(uint16_t)(trail<(firstUnit=*list)) { + list+=2+(firstUnit&COMP_1_TRIPLE); + } else if(key1==(firstUnit&COMP_1_TRAIL_MASK)) { + if(key2>(secondUnit=list[1])) { + if(firstUnit&COMP_1_LAST_TUPLE) { + break; + } else { + list+=3; + } + } else if(key2==(secondUnit&COMP_2_TRAIL_MASK)) { + return ((int32_t)(secondUnit&~COMP_2_TRAIL_MASK)<<16)|list[2]; + } else { + break; + } + } else { + break; + } + } + } + return -1; +} + +/* + * Recomposes the text in [p..limit[ + * (which is in NFD - decomposed and canonically ordered), + * and returns how much shorter the string became. + * + * Note that recomposition never lengthens the text: + * Any character consists of either one or two code units; + * a composition may contain at most one more code unit than the original starter, + * while the combining mark that is removed has at least one code unit. + */ +void Normalizer2Impl::recompose(ReorderingBuffer &buffer, int32_t recomposeStartIndex, + UBool onlyContiguous) const { + UChar *p=buffer.getStart()+recomposeStartIndex; + UChar *limit=buffer.getLimit(); + if(p==limit) { + return; + } + + UChar *starter, *pRemove, *q, *r; + const uint16_t *compositionsList; + UChar32 c, compositeAndFwd; + uint16_t norm16; + uint8_t cc, prevCC; + UBool starterIsSupplementary; + + // Some of the following variables are not used until we have a forward-combining starter + // and are only initialized now to avoid compiler warnings. + compositionsList=NULL; // used as indicator for whether we have a forward-combining starter + starter=NULL; + starterIsSupplementary=FALSE; + prevCC=0; + + for(;;) { + UTRIE2_U16_NEXT16(normTrie, p, limit, c, norm16); + cc=getCCFromYesOrMaybe(norm16); + if( // this character combines backward and + isMaybe(norm16) && + // we have seen a starter that combines forward and + compositionsList!=NULL && + // the backward-combining character is not blocked + (prevCC=0) { + // The starter and the combining mark (c) do combine. + UChar32 composite=compositeAndFwd>>1; + + // Replace the starter with the composite, remove the combining mark. + pRemove=p-U16_LENGTH(c); // pRemove & p: start & limit of the combining mark + if(starterIsSupplementary) { + if(U_IS_SUPPLEMENTARY(composite)) { + // both are supplementary + starter[0]=U16_LEAD(composite); + starter[1]=U16_TRAIL(composite); + } else { + *starter=(UChar)composite; + // The composite is shorter than the starter, + // move the intermediate characters forward one. + starterIsSupplementary=FALSE; + q=starter+1; + r=q+1; + while(r=minNoNo. + * c is either a "noNo" (has a mapping) or a "maybeYes" (combines backward) + * or has ccc!=0. + * Check for Jamo V/T, then for regular characters. + * c is not a Hangul syllable or Jamo L because those have "yes" properties. + */ + if(isJamoVT(norm16) && prevBoundary!=prevSrc) { + UChar prev=*(prevSrc-1); + UBool needToDecompose=FALSE; + if(c=MIN_YES_YES_WITH_CC) { + uint8_t cc=(uint8_t)norm16; // cc!=0 + if( onlyContiguous && // FCC + (doCompose ? buffer.getLastCC() : prevCC)==0 && + prevBoundarycc + ) { + // Fails FCD test, need to decompose and contiguously recompose. + if(!doCompose) { + return FALSE; + } + } else if(doCompose) { + if(!buffer.append(c, cc, errorCode)) { + break; + } + continue; + } else if(prevCC<=cc) { + prevCC=cc; + continue; + } else { + return FALSE; + } + } else if(!doCompose && !isMaybeOrNonZeroCC(norm16)) { + return FALSE; + } + + /* + * Find appropriate boundaries around this character, + * decompose the source text from between the boundaries, + * and recompose it. + * + * We may need to remove the last few characters from the ReorderingBuffer + * to account for source text that was copied or appended + * but needs to take part in the recomposition. + */ + + /* + * Find the last composition boundary in [prevBoundary..src[. + * It is either the decomposition of the current character (at prevSrc), + * or prevBoundary. + */ + if(hasCompBoundaryBefore(c, norm16)) { + prevBoundary=prevSrc; + } else if(doCompose) { + buffer.removeZeroCCSuffix((int32_t)(prevSrc-prevBoundary)); + } + + // Find the next composition boundary in [src..limit[ - + // modifies src to point to the next starter. + src=(UChar *)findNextCompBoundary(src, limit); + + // Decompose [prevBoundary..src[ into the buffer and then recompose that part of it. + int32_t recomposeStartIndex=buffer.length(); + if(!decomposeShort(prevBoundary, src, buffer, errorCode)) { + break; + } + recompose(buffer, recomposeStartIndex, onlyContiguous); + if(!doCompose) { + int32_t bufferLength=buffer.length(); + if( bufferLength!=(int32_t)(src-prevBoundary) || + 0!=u_memcmp(buffer.getStart(), prevBoundary, bufferLength) + ) { + return FALSE; + } + buffer.removeZeroCCSuffix(bufferLength); + prevCC=0; + } + + // Move to the next starter. We never need to look back before this point again. + prevBoundary=src; + } + return TRUE; +} + +// Very similar to compose(): Make the same changes in both places if relevant. +// pQCResult==NULL: spanQuickCheckYes +// pQCResult!=NULL: quickCheck (*pQCResult must be UNORM_YES) +const UChar * +Normalizer2Impl::composeQuickCheck(const UChar *src, const UChar *limit, + UBool onlyContiguous, + UNormalizationCheckResult *pQCResult) const { + UChar32 minNoMaybeCP=minCompNoMaybeCP; + if(limit==NULL) { + UErrorCode errorCode=U_ZERO_ERROR; + src=copyLowPrefixFromNulTerminated(src, minNoMaybeCP, NULL, errorCode); + limit=u_strchr(src, 0); + } + + /* + * prevBoundary points to the last character before the current one + * that has a composition boundary before it with ccc==0 and quick check "yes". + */ + const UChar *prevBoundary=src; + const UChar *prevSrc; + UChar32 c=0; + uint16_t norm16=0; + uint8_t prevCC=0; + + for(;;) { + // count code units below the minimum or with irrelevant data for the quick check + for(prevSrc=src;;) { + if(src==limit) { + return src; + } + if( (c=*src)=minNoNo. + * c is either a "noNo" (has a mapping) or a "maybeYes" (combines backward) + * or has ccc!=0. + */ + if(isMaybeOrNonZeroCC(norm16)) { + uint8_t cc=getCCFromYesOrMaybe(norm16); + if( onlyContiguous && // FCC + cc!=0 && + prevCC==0 && + prevBoundarycc + ) { + // Fails FCD test. + } else if(prevCC<=cc || cc==0) { + prevCC=cc; + if(norm16= (testInert ? minNoNo : minMaybeYes)) { + return FALSE; + } else if(isDecompNoAlgorithmic(norm16)) { + c=mapAlgorithmic(c, norm16); + } else { + // c decomposes, get everything from the variable-length extra data. + // If testInert, then c must be a yesNo character which has lccc=0, + // otherwise it could be a noNo. + const uint16_t *mapping=getMapping(norm16); + uint16_t firstUnit=*mapping; + // TRUE if + // c is not deleted, and + // it and its decomposition do not combine forward, and it has a starter, and + // if FCC then trailCC<=1 + return + (firstUnit&MAPPING_LENGTH_MASK)!=0 && + (firstUnit&(MAPPING_PLUS_COMPOSITION_LIST|MAPPING_NO_COMP_BOUNDARY_AFTER))==0 && + (!onlyContiguous || firstUnit<=0x1ff); + } + } +} + +const UChar *Normalizer2Impl::findPreviousCompBoundary(const UChar *start, const UChar *p) const { + BackwardUTrie2StringIterator iter(normTrie, start, p); + uint16_t norm16; + do { + norm16=iter.previous16(); + } while(!hasCompBoundaryBefore(iter.codePoint, norm16)); + // We could also test hasCompBoundaryAfter() and return iter.codePointLimit, + // but that's probably not worth the extra cost. + return iter.codePointStart; +} + +const UChar *Normalizer2Impl::findNextCompBoundary(const UChar *p, const UChar *limit) const { + ForwardUTrie2StringIterator iter(normTrie, p, limit); + uint16_t norm16; + do { + norm16=iter.next16(); + } while(!hasCompBoundaryBefore(iter.codePoint, norm16)); + return iter.codePointStart; +} + +class FCDTrieSingleton : public UTrie2Singleton { +public: + FCDTrieSingleton(SimpleSingleton &s, Normalizer2Impl &ni, UErrorCode &ec) : + UTrie2Singleton(s), impl(ni), errorCode(ec) {} + UTrie2 *getInstance(UErrorCode &errorCode) { + return UTrie2Singleton::getInstance(createInstance, this, errorCode); + } + static void *createInstance(const void *context, UErrorCode &errorCode); + UBool rangeHandler(UChar32 start, UChar32 end, uint32_t value) { + if(value!=0) { + impl.setFCD16FromNorm16(start, end, (uint16_t)value, newFCDTrie, errorCode); + } + return U_SUCCESS(errorCode); + } + + Normalizer2Impl &impl; + UTrie2 *newFCDTrie; + UErrorCode &errorCode; +}; + +U_CDECL_BEGIN + +// Set the FCD value for a range of same-norm16 charcters. +static UBool U_CALLCONV +enumRangeHandler(const void *context, UChar32 start, UChar32 end, uint32_t value) { + return ((FCDTrieSingleton *)context)->rangeHandler(start, end, value); +} + +// Collect (OR together) the FCD values for a range of supplementary characters, +// for their lead surrogate code unit. +static UBool U_CALLCONV +enumRangeOrValue(const void *context, UChar32 start, UChar32 end, uint32_t value) { + *((uint32_t *)context)|=value; + return TRUE; +} + +U_CDECL_END + +void *FCDTrieSingleton::createInstance(const void *context, UErrorCode &errorCode) { + FCDTrieSingleton *me=(FCDTrieSingleton *)context; + me->newFCDTrie=utrie2_open(0, 0, &errorCode); + if(U_SUCCESS(errorCode)) { + utrie2_enum(me->impl.getNormTrie(), NULL, enumRangeHandler, me); + for(UChar lead=0xd800; lead<0xdc00; ++lead) { + uint32_t oredValue=utrie2_get32(me->newFCDTrie, lead); + utrie2_enumForLeadSurrogate(me->newFCDTrie, lead, NULL, enumRangeOrValue, &oredValue); + if(oredValue!=0) { + // Set a "bad" value for makeFCD() to break the quick check loop + // and look up the value for the supplementary code point. + // If there is any lccc, then set the worst-case lccc of 1. + // The ORed-together value's tccc is already the worst case. + if(oredValue>0xff) { + oredValue=0x100|(oredValue&0xff); + } + utrie2_set32ForLeadSurrogateCodeUnit(me->newFCDTrie, lead, oredValue, &errorCode); + } + } + utrie2_freeze(me->newFCDTrie, UTRIE2_16_VALUE_BITS, &errorCode); + if(U_SUCCESS(errorCode)) { + return me->newFCDTrie; + } + } + utrie2_close(me->newFCDTrie); + return NULL; +} + +void Normalizer2Impl::setFCD16FromNorm16(UChar32 start, UChar32 end, uint16_t norm16, + UTrie2 *newFCDTrie, UErrorCode &errorCode) const { + // Only loops for 1:1 algorithmic mappings. + for(;;) { + if(norm16>=MIN_NORMAL_MAYBE_YES) { + norm16&=0xff; + norm16|=norm16<<8; + } else if(norm16<=minYesNo || minMaybeYes<=norm16) { + // no decomposition or Hangul syllable, all zeros + break; + } else if(limitNoNo<=norm16) { + int32_t delta=norm16-(minMaybeYes-MAX_DELTA-1); + if(start==end) { + start+=delta; + norm16=getNorm16(start); + } else { + // the same delta leads from different original characters to different mappings + do { + UChar32 c=start+delta; + setFCD16FromNorm16(c, c, getNorm16(c), newFCDTrie, errorCode); + } while(++start<=end); + break; + } + } else { + // c decomposes, get everything from the variable-length extra data + const uint16_t *mapping=getMapping(norm16); + uint16_t firstUnit=*mapping; + if((firstUnit&MAPPING_LENGTH_MASK)==0) { + // A character that is deleted (maps to an empty string) must + // get the worst-case lccc and tccc values because arbitrary + // characters on both sides will become adjacent. + norm16=0x1ff; + } else { + if(firstUnit&MAPPING_HAS_CCC_LCCC_WORD) { + norm16=mapping[1]&0xff00; // lccc + } else { + norm16=0; + } + norm16|=*mapping>>8; // tccc + } + } + utrie2_setRange32(newFCDTrie, start, end, norm16, TRUE, &errorCode); + break; + } +} + +const UTrie2 *Normalizer2Impl::getFCDTrie(UErrorCode &errorCode) const { + // Logically const: Synchronized instantiation. + Normalizer2Impl *me=const_cast(this); + return FCDTrieSingleton(me->fcdTrieSingleton, *me, errorCode).getInstance(errorCode); +} + +const UChar * +Normalizer2Impl::makeFCD(const UChar *src, const UChar *limit, + ReorderingBuffer *buffer, + UErrorCode &errorCode) const { + if(limit==NULL) { + src=copyLowPrefixFromNulTerminated(src, MIN_CCC_LCCC_CP, buffer, errorCode); + if(U_FAILURE(errorCode)) { + return src; + } + limit=u_strchr(src, 0); + } + + // Note: In this function we use buffer->appendZeroCC() because we track + // the lead and trail combining classes here, rather than leaving it to + // the ReorderingBuffer. + // The exception is the call to decomposeShort() which uses the buffer + // in the normal way. + + const UTrie2 *trie=fcdTrie(); + + // Tracks the last FCD-safe boundary, before lccc=0 or after properly-ordered tccc<=1. + // Similar to the prevBoundary in the compose() implementation. + const UChar *prevBoundary=src; + const UChar *prevSrc; + UChar32 c=0; + int32_t prevFCD16=0; + uint16_t fcd16=0; + + for(;;) { + // count code units with lccc==0 + for(prevSrc=src; src!=limit;) { + if((c=*src)appendZeroCC(prevSrc, src, errorCode)) { + break; + } + if(src==limit) { + break; + } + prevBoundary=src; + // We know that the previous character's lccc==0. + if(prevFCD16<0) { + // Fetching the fcd16 value was deferred for this below-U+0300 code point. + prevFCD16=getFCD16FromSingleLead((UChar)~prevFCD16); + if(prevFCD16>1) { + --prevBoundary; + } + } else { + const UChar *p=src-1; + if(U16_IS_TRAIL(*p) && prevSrc

1) { + prevBoundary=p; + } + } + // The start of the current character (c). + prevSrc=src; + } else if(src==limit) { + break; + } + + src+=U16_LENGTH(c); + // The current character (c) at [prevSrc..src[ has a non-zero lead combining class. + // Check for proper order, and decompose locally if necessary. + if((prevFCD16&0xff)<=(fcd16>>8)) { + // proper order: prev tccc <= current lccc + if((fcd16&0xff)<=1) { + prevBoundary=src; + } + if(buffer!=NULL && !buffer->appendZeroCC(c, errorCode)) { + break; + } + prevFCD16=fcd16; + continue; + } else if(buffer==NULL) { + return prevBoundary; // quick check "no" + } else { + /* + * Back out the part of the source that we copied or appended + * already but is now going to be decomposed. + * prevSrc is set to after what was copied/appended. + */ + buffer->removeZeroCCSuffix((int32_t)(prevSrc-prevBoundary)); + /* + * Find the part of the source that needs to be decomposed, + * up to the next safe boundary. + */ + src=findNextFCDBoundary(src, limit); + /* + * The source text does not fulfill the conditions for FCD. + * Decompose and reorder a limited piece of the text. + */ + if(!decomposeShort(prevBoundary, src, *buffer, errorCode)) { + break; + } + prevBoundary=src; + prevFCD16=0; + } + } + return src; +} + +void Normalizer2Impl::makeFCDAndAppend(const UChar *src, const UChar *limit, + UBool doMakeFCD, + ReorderingBuffer &buffer, + UErrorCode &errorCode) const { + if(!buffer.isEmpty()) { + const UChar *firstBoundaryInSrc=findNextFCDBoundary(src, limit); + if(src!=firstBoundaryInSrc) { + const UChar *lastBoundaryInDest=findPreviousFCDBoundary(buffer.getStart(), + buffer.getLimit()); + UnicodeString middle(lastBoundaryInDest, + (int32_t)(buffer.getLimit()-lastBoundaryInDest)); + buffer.removeZeroCCSuffix((int32_t)(buffer.getLimit()-lastBoundaryInDest)); + middle.append(src, (int32_t)(firstBoundaryInSrc-src)); + const UChar *middleStart=middle.getBuffer(); + makeFCD(middleStart, middleStart+middle.length(), &buffer, errorCode); + if(U_FAILURE(errorCode)) { + return; + } + src=firstBoundaryInSrc; + } + } + if(doMakeFCD) { + makeFCD(src, limit, &buffer, errorCode); + } else { + buffer.appendZeroCC(src, limit, errorCode); + } +} + +const UChar *Normalizer2Impl::findPreviousFCDBoundary(const UChar *start, const UChar *p) const { + BackwardUTrie2StringIterator iter(fcdTrie(), start, p); + uint16_t fcd16; + do { + fcd16=iter.previous16(); + } while(fcd16>0xff); + return iter.codePointStart; +} + +const UChar *Normalizer2Impl::findNextFCDBoundary(const UChar *p, const UChar *limit) const { + ForwardUTrie2StringIterator iter(fcdTrie(), p, limit); + uint16_t fcd16; + do { + fcd16=iter.next16(); + } while(fcd16>0xff); + return iter.codePointStart; +} + +// Normalizer2 data swapping ----------------------------------------------- *** + +U_CAPI int32_t U_EXPORT2 +unorm2_swap(const UDataSwapper *ds, + const void *inData, int32_t length, void *outData, + UErrorCode *pErrorCode) { + const UDataInfo *pInfo; + int32_t headerSize; + + const uint8_t *inBytes; + uint8_t *outBytes; + + const int32_t *inIndexes; + int32_t indexes[Normalizer2Impl::IX_MIN_MAYBE_YES+1]; + + int32_t i, offset, nextOffset, size; + + /* udata_swapDataHeader checks the arguments */ + headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { + return 0; + } + + /* check data format and format version */ + pInfo=(const UDataInfo *)((const char *)inData+4); + if(!( + pInfo->dataFormat[0]==0x4e && /* dataFormat="Nrm2" */ + pInfo->dataFormat[1]==0x72 && + pInfo->dataFormat[2]==0x6d && + pInfo->dataFormat[3]==0x32 && + pInfo->formatVersion[0]==1 + )) { + udata_printError(ds, "unorm2_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as Normalizer2 data\n", + pInfo->dataFormat[0], pInfo->dataFormat[1], + pInfo->dataFormat[2], pInfo->dataFormat[3], + pInfo->formatVersion[0]); + *pErrorCode=U_UNSUPPORTED_ERROR; + return 0; + } + + inBytes=(const uint8_t *)inData+headerSize; + outBytes=(uint8_t *)outData+headerSize; + + inIndexes=(const int32_t *)inBytes; + + if(length>=0) { + length-=headerSize; + if(length=0) { + if(lengthswapArray32(ds, inBytes, nextOffset-offset, outBytes, pErrorCode); + offset=nextOffset; + + /* swap the UTrie2 */ + nextOffset=indexes[Normalizer2Impl::IX_EXTRA_DATA_OFFSET]; + utrie2_swap(ds, inBytes+offset, nextOffset-offset, outBytes+offset, pErrorCode); + offset=nextOffset; + + /* swap the uint16_t extraData[] */ + nextOffset=indexes[Normalizer2Impl::IX_EXTRA_DATA_OFFSET+1]; + ds->swapArray16(ds, inBytes+offset, nextOffset-offset, outBytes+offset, pErrorCode); + offset=nextOffset; + + U_ASSERT(offset==size); + } + + return headerSize+size; +} + +U_NAMESPACE_END + +#endif // !UCONFIG_NO_NORMALIZATION diff --git a/icu4c/source/common/normalizer2impl.h b/icu4c/source/common/normalizer2impl.h new file mode 100644 index 00000000000..cf32e4b959c --- /dev/null +++ b/icu4c/source/common/normalizer2impl.h @@ -0,0 +1,603 @@ +/* +******************************************************************************* +* +* Copyright (C) 2009-2010, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: normalizer2impl.h +* encoding: US-ASCII +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2009nov22 +* created by: Markus W. Scherer +*/ + +#ifndef __NORMALIZER2IMPL_H__ +#define __NORMALIZER2IMPL_H__ + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_NORMALIZATION + +#include "unicode/normalizer2.h" +#include "unicode/udata.h" +#include "unicode/unistr.h" +#include "unicode/unorm.h" +#include "mutex.h" +#include "uset_imp.h" +#include "utrie2.h" + +U_NAMESPACE_BEGIN + +class Hangul { +public: + /* Korean Hangul and Jamo constants */ + enum { + JAMO_L_BASE=0x1100, /* "lead" jamo */ + JAMO_V_BASE=0x1161, /* "vowel" jamo */ + JAMO_T_BASE=0x11a7, /* "trail" jamo */ + + HANGUL_BASE=0xac00, + + JAMO_L_COUNT=19, + JAMO_V_COUNT=21, + JAMO_T_COUNT=28, + + HANGUL_COUNT=JAMO_L_COUNT*JAMO_V_COUNT*JAMO_T_COUNT, + HANGUL_LIMIT=HANGUL_BASE+HANGUL_COUNT + }; + + static inline UBool isHangul(UChar32 c) { + return HANGUL_BASE<=c && c=MIN_NORMAL_MAYBE_YES) { + return (uint8_t)norm16; + } + if(norm16=MIN_NORMAL_MAYBE_YES ? (uint8_t)norm16 : 0; + } + + uint16_t getFCD16(UChar32 c) const { return UTRIE2_GET16(fcdTrie(), c); } + uint16_t getFCD16FromBMP(UChar c) const { return UTRIE2_GET16(fcdTrie(), c); } + uint16_t getFCD16FromSingleLead(UChar c) const { + return UTRIE2_GET16_FROM_U16_SINGLE_LEAD(fcdTrie(), c); + } + uint16_t getFCD16FromSupplementary(UChar32 c) const { + return UTRIE2_GET16_FROM_SUPP(fcdTrie(), c); + } + uint16_t getFCD16FromSurrogatePair(UChar c, UChar c2) const { + return getFCD16FromSupplementary(U16_GET_SUPPLEMENTARY(c, c2)); + } + + void setFCD16FromNorm16(UChar32 start, UChar32 end, uint16_t norm16, + UTrie2 *newFCDTrie, UErrorCode &errorCode) const; + + /** + * Get the decomposition for one code point. + * @param c code point + * @param buffer out-only buffer for algorithmic decompositions + * @param length out-only, takes the length of the decomposition, if any + * @return pointer to the decomposition, or NULL if none + */ + const UChar *getDecomposition(UChar32 c, UChar buffer[4], int32_t &length) const; + + enum { + MIN_CCC_LCCC_CP=0x300 + }; + + enum { + MIN_YES_YES_WITH_CC=0xff01, + JAMO_VT=0xff00, + MIN_NORMAL_MAYBE_YES=0xfe00, + JAMO_L=1, + MAX_DELTA=0x40 + }; + + enum { + // Byte offsets from the start of the data, after the generic header. + IX_NORM_TRIE_OFFSET, + IX_EXTRA_DATA_OFFSET, + IX_RESERVED2_OFFSET, + IX_RESERVED3_OFFSET, + IX_RESERVED4_OFFSET, + IX_RESERVED5_OFFSET, + IX_RESERVED6_OFFSET, + IX_TOTAL_SIZE, + + // Code point thresholds for quick check codes. + IX_MIN_DECOMP_NO_CP, + IX_MIN_COMP_NO_MAYBE_CP, + + // Norm16 value thresholds for quick check combinations and types of extra data. + IX_MIN_YES_NO, + IX_MIN_NO_NO, + IX_LIMIT_NO_NO, + IX_MIN_MAYBE_YES, + + IX_RESERVED14, + IX_RESERVED15, + IX_COUNT + }; + + enum { + MAPPING_HAS_CCC_LCCC_WORD=0x80, + MAPPING_PLUS_COMPOSITION_LIST=0x40, + MAPPING_NO_COMP_BOUNDARY_AFTER=0x20, + MAPPING_LENGTH_MASK=0x1f + }; + + enum { + COMP_1_LAST_TUPLE=0x8000, + COMP_1_TRIPLE=1, + COMP_1_TRAIL_LIMIT=0x3400, + COMP_1_TRAIL_MASK=0x7ffe, + COMP_1_TRAIL_SHIFT=9, // 10-1 for the "triple" bit + COMP_2_TRAIL_SHIFT=6, + COMP_2_TRAIL_MASK=0xffc0 + }; + + // higher-level functionality ------------------------------------------ *** + + const UChar *decompose(const UChar *src, const UChar *limit, + ReorderingBuffer *buffer, UErrorCode &errorCode) const; + void decomposeAndAppend(const UChar *src, const UChar *limit, + UBool doDecompose, + ReorderingBuffer &buffer, + UErrorCode &errorCode) const; + UBool compose(const UChar *src, const UChar *limit, + UBool onlyContiguous, + UBool doCompose, + ReorderingBuffer &buffer, + UErrorCode &errorCode) const; + const UChar *composeQuickCheck(const UChar *src, const UChar *limit, + UBool onlyContiguous, + UNormalizationCheckResult *pQCResult) const; + void composeAndAppend(const UChar *src, const UChar *limit, + UBool doCompose, + UBool onlyContiguous, + ReorderingBuffer &buffer, + UErrorCode &errorCode) const; + const UChar *makeFCD(const UChar *src, const UChar *limit, + ReorderingBuffer *buffer, UErrorCode &errorCode) const; + void makeFCDAndAppend(const UChar *src, const UChar *limit, + UBool doMakeFCD, + ReorderingBuffer &buffer, + UErrorCode &errorCode) const; + + UBool hasDecompBoundary(UChar32 c, UBool before) const; + UBool isDecompInert(UChar32 c) const { return isDecompYesAndZeroCC(getNorm16(c)); } + + UBool hasCompBoundaryBefore(UChar32 c) const { + return c=minMaybeYes; } + static UBool isInert(uint16_t norm16) { return norm16==0; } + // static UBool isJamoL(uint16_t norm16) const { return norm16==1; } + static UBool isJamoVT(uint16_t norm16) { return norm16==JAMO_VT; } + UBool isHangul(uint16_t norm16) const { return norm16==minYesNo; } + UBool isCompYesAndZeroCC(uint16_t norm16) const { return norm16=MIN_YES_YES_WITH_CC || norm16=limitNoNo; } + + // For use with isCompYes(). + // Perhaps the compiler can combine the two tests for MIN_YES_YES_WITH_CC. + // static uint8_t getCCFromYes(uint16_t norm16) { + // return norm16>=MIN_YES_YES_WITH_CC ? (uint8_t)norm16 : 0; + // } + uint8_t getCCFromNoNo(uint16_t norm16) const { + const uint16_t *mapping=getMapping(norm16); + if(*mapping&MAPPING_HAS_CCC_LCCC_WORD) { + return (uint8_t)mapping[1]; + } else { + return 0; + } + } + // requires that the [cpStart..cpLimit[ character passes isCompYesAndZeroCC() + uint8_t getTrailCCFromCompYesAndZeroCC(const UChar *cpStart, const UChar *cpLimit) const; + + // Requires algorithmic-NoNo. + UChar32 mapAlgorithmic(UChar32 c, uint16_t norm16) const { + return c+norm16-(minMaybeYes-MAX_DELTA-1); + } + + // Requires minYesNo>7)&1); // +1 if MAPPING_HAS_CCC_LCCC_WORD + } + + const UChar *copyLowPrefixFromNulTerminated(const UChar *src, + UChar32 minNeedDataCP, + ReorderingBuffer *buffer, + UErrorCode &errorCode) const; + UBool decomposeShort(const UChar *src, const UChar *limit, + ReorderingBuffer &buffer, UErrorCode &errorCode) const; + UBool decompose(UChar32 c, uint16_t norm16, + ReorderingBuffer &buffer, UErrorCode &errorCode) const; + + static int32_t combine(const uint16_t *list, UChar32 trail); + void recompose(ReorderingBuffer &buffer, int32_t recomposeStartIndex, + UBool onlyContiguous) const; + + UBool hasCompBoundaryBefore(UChar32 c, uint16_t norm16) const; + const UChar *findPreviousCompBoundary(const UChar *start, const UChar *p) const; + const UChar *findNextCompBoundary(const UChar *p, const UChar *limit) const; + + const UTrie2 *fcdTrie() const { return (const UTrie2 *)fcdTrieSingleton.fInstance; } + + const UChar *findPreviousFCDBoundary(const UChar *start, const UChar *p) const; + const UChar *findNextFCDBoundary(const UChar *p, const UChar *limit) const; + + UDataMemory *memory; + UVersionInfo dataVersion; + + // Code point thresholds for quick check codes. + UChar32 minDecompNoCP; + UChar32 minCompNoMaybeCP; + + // Norm16 value thresholds for quick check combinations and types of extra data. + uint16_t minYesNo; + uint16_t minNoNo; + uint16_t limitNoNo; + uint16_t minMaybeYes; + + UTrie2 *normTrie; + const uint16_t *maybeYesCompositions; + const uint16_t *extraData; // mappings and/or compositions for yesYes, yesNo & noNo characters + + SimpleSingleton fcdTrieSingleton; +}; + +/** + * ICU-internal shortcut for quick access to standard Unicode normalization. + */ +class U_COMMON_API Normalizer2Factory { +public: + static const Normalizer2 *getNFCInstance(UErrorCode &errorCode); + static const Normalizer2 *getNFDInstance(UErrorCode &errorCode); + static const Normalizer2 *getFCDInstance(UErrorCode &errorCode); + static const Normalizer2 *getFCCInstance(UErrorCode &errorCode); + static const Normalizer2 *getNFKCInstance(UErrorCode &errorCode); + static const Normalizer2 *getNFKDInstance(UErrorCode &errorCode); + static const Normalizer2 *getNFKC_CFInstance(UErrorCode &errorCode); + static const Normalizer2 *getNoopInstance(UErrorCode &errorCode); + + static const Normalizer2 *getInstance(UNormalizationMode mode, UErrorCode &errorCode); + + static const Normalizer2Impl *getNFCImpl(UErrorCode &errorCode); + static const Normalizer2Impl *getNFKCImpl(UErrorCode &errorCode); + static const Normalizer2Impl *getNFKC_CFImpl(UErrorCode &errorCode); + + static const UTrie2 *getFCDTrie(UErrorCode &errorCode); +private: + Normalizer2Factory(); // No instantiation. +}; + +U_CAPI int32_t U_EXPORT2 +unorm2_swap(const UDataSwapper *ds, + const void *inData, int32_t length, void *outData, + UErrorCode *pErrorCode); + +/** + * Get the NF*_QC property for a code point, for u_getIntPropertyValue(). + * @internal + */ +U_CFUNC UNormalizationCheckResult U_EXPORT2 +unorm_getQuickCheck(UChar32 c, UNormalizationMode mode); + +/** + * Internal API, used by collation code. + * Get access to the internal FCD trie table to be able to perform + * incremental, per-code unit, FCD checks in collation. + * One pointer is sufficient because the trie index values are offset + * by the index size, so that the same pointer is used to access the trie data. + * Code points at fcdHighStart and above have a zero FCD value. + * @internal + */ +U_CAPI const uint16_t * U_EXPORT2 +unorm_getFCDTrieIndex(UChar32 &fcdHighStart, UErrorCode *pErrorCode); + +/** + * Internal API, used by collation code. + * Get the FCD value for a code unit, with + * bits 15..8 lead combining class + * bits 7..0 trail combining class + * + * If c is a lead surrogate and the value is not 0, + * then some of c's associated supplementary code points have a non-zero FCD value. + * + * @internal + */ +static inline uint16_t +unorm_getFCD16(const uint16_t *fcdTrieIndex, UChar c) { + return fcdTrieIndex[_UTRIE2_INDEX_FROM_U16_SINGLE_LEAD(fcdTrieIndex, c)]; +} + +/** + * Internal API, used by collation code. + * Get the FCD value of the next code point (post-increment), with + * bits 15..8 lead combining class + * bits 7..0 trail combining class + * + * @internal + */ +static inline uint16_t +unorm_nextFCD16(const uint16_t *fcdTrieIndex, UChar32 fcdHighStart, + const UChar *&s, const UChar *limit) { + UChar32 c=*s++; + uint16_t fcd=fcdTrieIndex[_UTRIE2_INDEX_FROM_U16_SINGLE_LEAD(fcdTrieIndex, c)]; + if(fcd!=0 && U16_IS_LEAD(c)) { + UChar c2; + if(s!=limit && U16_IS_TRAIL(c2=*s)) { + ++s; + c=U16_GET_SUPPLEMENTARY(c, c2); + if(cclone()), currentIndex(copy.currentIndex), nextIndex(copy.nextIndex), buffer(copy.buffer), bufferPos(copy.bufferPos) { - init(((CharacterIterator *)(copy.text->context))->clone()); + init(); } static const UChar _NUL=0; void -Normalizer::init(CharacterIterator *iter) { +Normalizer::init() { UErrorCode errorCode=U_ZERO_ERROR; - - text=(UCharIterator *)uprv_malloc(sizeof(UCharIterator)); - if(text!=NULL) { - if(unorm_haveData(&errorCode)) { - uiter_setCharacterIterator(text, iter); - } else { - delete iter; - uiter_setCharacterIterator(text, new UCharCharacterIterator(&_NUL, 0)); - } - } else { - delete iter; + fNorm2=Normalizer2Factory::getInstance(fUMode, errorCode); + if(fOptions&UNORM_UNICODE_3_2) { + delete fFilteredNorm2; + fNorm2=fFilteredNorm2= + new FilteredNormalizer2(*fNorm2, *uniset_getUnicode32Instance(errorCode)); + } + if(U_FAILURE(errorCode)) { + errorCode=U_ZERO_ERROR; + fNorm2=Normalizer2Factory::getNoopInstance(errorCode); } } Normalizer::~Normalizer() { - if(text!=NULL) { - delete (CharacterIterator *)text->context; - uprv_free(text); - } + delete fFilteredNorm2; + delete text; } Normalizer* Normalizer::clone() const { - if(this!=0) { - return new Normalizer(*this); - } else { - return 0; - } + return new Normalizer(*this); } /** @@ -101,7 +98,7 @@ Normalizer::clone() const */ int32_t Normalizer::hashCode() const { - return ((CharacterIterator *)(text->context))->hashCode() + fUMode + fOptions + buffer.hashCode() + bufferPos + currentIndex + nextIndex; + return text->hashCode() + fUMode + fOptions + buffer.hashCode() + bufferPos + currentIndex + nextIndex; } UBool Normalizer::operator==(const Normalizer& that) const @@ -110,7 +107,7 @@ UBool Normalizer::operator==(const Normalizer& that) const this==&that || fUMode==that.fUMode && fOptions==that.fOptions && - *((CharacterIterator *)(text->context))==*((CharacterIterator *)(that.text->context)) && + *text==*that.text && buffer==that.buffer && bufferPos==that.bufferPos && nextIndex==that.nextIndex; @@ -140,29 +137,18 @@ Normalizer::normalize(const UnicodeString& source, // the source and result strings are the same object, use a temporary one dest=&localDest; } - - UChar *buffer=dest->getBuffer(source.length()); - int32_t length=unorm_internalNormalize(buffer, dest->getCapacity(), - source.getBuffer(), source.length(), - mode, options, - &status); - dest->releaseBuffer(U_SUCCESS(status) ? length : 0); - if(status==U_BUFFER_OVERFLOW_ERROR) { - status=U_ZERO_ERROR; - buffer=dest->getBuffer(length); - length=unorm_internalNormalize(buffer, dest->getCapacity(), - source.getBuffer(), source.length(), - mode, options, - &status); - dest->releaseBuffer(U_SUCCESS(status) ? length : 0); + const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, status); + if(U_SUCCESS(status)) { + if(options&UNORM_UNICODE_3_2) { + FilteredNormalizer2(*n2, *uniset_getUnicode32Instance(status)). + normalize(source, *dest, status); + } else { + n2->normalize(source, *dest, status); + } } - - if(dest==&localDest) { + if(dest==&localDest && U_SUCCESS(status)) { result=*dest; } - if(U_FAILURE(status)) { - result.setToBogus(); - } } } @@ -171,45 +157,7 @@ Normalizer::compose(const UnicodeString& source, UBool compat, int32_t options, UnicodeString& result, UErrorCode &status) { - if(source.isBogus() || U_FAILURE(status)) { - result.setToBogus(); - if(U_SUCCESS(status)) { - status=U_ILLEGAL_ARGUMENT_ERROR; - } - } else { - UnicodeString localDest; - UnicodeString *dest; - - if(&source!=&result) { - dest=&result; - } else { - // the source and result strings are the same object, use a temporary one - dest=&localDest; - } - - UChar *buffer=dest->getBuffer(source.length()); - int32_t length=unorm_compose(buffer, dest->getCapacity(), - source.getBuffer(), source.length(), - compat, options, - &status); - dest->releaseBuffer(U_SUCCESS(status) ? length : 0); - if(status==U_BUFFER_OVERFLOW_ERROR) { - status=U_ZERO_ERROR; - buffer=dest->getBuffer(length); - length=unorm_compose(buffer, dest->getCapacity(), - source.getBuffer(), source.length(), - compat, options, - &status); - dest->releaseBuffer(U_SUCCESS(status) ? length : 0); - } - - if(dest==&localDest) { - result=*dest; - } - if(U_FAILURE(status)) { - result.setToBogus(); - } - } + normalize(source, compat ? UNORM_NFKC : UNORM_NFC, options, result, status); } void U_EXPORT2 @@ -217,44 +165,40 @@ Normalizer::decompose(const UnicodeString& source, UBool compat, int32_t options, UnicodeString& result, UErrorCode &status) { - if(source.isBogus() || U_FAILURE(status)) { - result.setToBogus(); - if(U_SUCCESS(status)) { - status=U_ILLEGAL_ARGUMENT_ERROR; + normalize(source, compat ? UNORM_NFKD : UNORM_NFD, options, result, status); +} + +UNormalizationCheckResult +Normalizer::quickCheck(const UnicodeString& source, + UNormalizationMode mode, int32_t options, + UErrorCode &status) { + const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, status); + if(U_SUCCESS(status)) { + if(options&UNORM_UNICODE_3_2) { + return FilteredNormalizer2(*n2, *uniset_getUnicode32Instance(status)). + quickCheck(source, status); + } else { + return n2->quickCheck(source, status); } } else { - UnicodeString localDest; - UnicodeString *dest; + return UNORM_MAYBE; + } +} - if(&source!=&result) { - dest=&result; +UBool +Normalizer::isNormalized(const UnicodeString& source, + UNormalizationMode mode, int32_t options, + UErrorCode &status) { + const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, status); + if(U_SUCCESS(status)) { + if(options&UNORM_UNICODE_3_2) { + return FilteredNormalizer2(*n2, *uniset_getUnicode32Instance(status)). + isNormalized(source, status); } else { - // the source and result strings are the same object, use a temporary one - dest=&localDest; - } - - UChar *buffer=dest->getBuffer(source.length()); - int32_t length=unorm_decompose(buffer, dest->getCapacity(), - source.getBuffer(), source.length(), - compat, options, - &status); - dest->releaseBuffer(U_SUCCESS(status) ? length : 0); - if(status==U_BUFFER_OVERFLOW_ERROR) { - status=U_ZERO_ERROR; - buffer=dest->getBuffer(length); - length=unorm_decompose(buffer, dest->getCapacity(), - source.getBuffer(), source.length(), - compat, options, - &status); - dest->releaseBuffer(U_SUCCESS(status) ? length : 0); - } - - if(dest==&localDest) { - result=*dest; - } - if(U_FAILURE(status)) { - result.setToBogus(); + return n2->isNormalized(source, status); } + } else { + return FALSE; } } @@ -272,37 +216,25 @@ Normalizer::concatenate(UnicodeString &left, UnicodeString &right, UnicodeString localDest; UnicodeString *dest; - if(&left!=&result && &right!=&result) { + if(&right!=&result) { dest=&result; } else { - // the source and result strings are the same object, use a temporary one + // the right and result strings are the same object, use a temporary one dest=&localDest; } - - UChar *buffer=dest->getBuffer(left.length()+right.length()); - int32_t length=unorm_concatenate(left.getBuffer(), left.length(), - right.getBuffer(), right.length(), - buffer, dest->getCapacity(), - mode, options, - &errorCode); - dest->releaseBuffer(U_SUCCESS(errorCode) ? length : 0); - if(errorCode==U_BUFFER_OVERFLOW_ERROR) { - errorCode=U_ZERO_ERROR; - buffer=dest->getBuffer(length); - int32_t length=unorm_concatenate(left.getBuffer(), left.length(), - right.getBuffer(), right.length(), - buffer, dest->getCapacity(), - mode, options, - &errorCode); - dest->releaseBuffer(U_SUCCESS(errorCode) ? length : 0); + *dest=left; + const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, errorCode); + if(U_SUCCESS(errorCode)) { + if(options&UNORM_UNICODE_3_2) { + FilteredNormalizer2(*n2, *uniset_getUnicode32Instance(errorCode)). + append(*dest, right, errorCode); + } else { + n2->append(*dest, right, errorCode); + } } - - if(dest==&localDest) { + if(dest==&localDest && U_SUCCESS(errorCode)) { result=*dest; } - if(U_FAILURE(errorCode)) { - result.setToBogus(); - } } return result; } @@ -353,19 +285,20 @@ UChar32 Normalizer::previous() { } void Normalizer::reset() { - currentIndex=nextIndex=text->move(text, 0, UITER_START); + currentIndex=nextIndex=text->setToStart(); clearBuffer(); } void Normalizer::setIndexOnly(int32_t index) { - currentIndex=nextIndex=text->move(text, index, UITER_ZERO); // validates index + text->setIndex(index); // pins index + currentIndex=nextIndex=text->getIndex(); clearBuffer(); } /** - * Return the first character in the normalized text-> This resets - * the Normalizer's position to the beginning of the text-> + * Return the first character in the normalized text. This resets + * the Normalizer's position to the beginning of the text. */ UChar32 Normalizer::first() { reset(); @@ -373,12 +306,12 @@ UChar32 Normalizer::first() { } /** - * Return the last character in the normalized text-> This resets + * Return the last character in the normalized text. This resets * the Normalizer's position to be just before the * the input text corresponding to that normalized character. */ UChar32 Normalizer::last() { - currentIndex=nextIndex=text->move(text, 0, UITER_LIMIT); + currentIndex=nextIndex=text->setToEnd(); clearBuffer(); return previous(); } @@ -406,21 +339,21 @@ int32_t Normalizer::getIndex() const { } /** - * Retrieve the index of the start of the input text-> This is the begin index + * Retrieve the index of the start of the input text. This is the begin index * of the CharacterIterator or the start (i.e. 0) of the String * over which this Normalizer is iterating */ int32_t Normalizer::startIndex() const { - return text->getIndex(text, UITER_START); + return text->startIndex(); } /** - * Retrieve the index of the end of the input text-> This is the end index + * Retrieve the index of the end of the input text. This is the end index * of the CharacterIterator or the length of the String * over which this Normalizer is iterating */ int32_t Normalizer::endIndex() const { - return text->getIndex(text, UITER_LIMIT); + return text->endIndex(); } //------------------------------------------------------------------------- @@ -431,6 +364,7 @@ void Normalizer::setMode(UNormalizationMode newMode) { fUMode = newMode; + init(); } UNormalizationMode @@ -448,6 +382,7 @@ Normalizer::setOption(int32_t option, } else { fOptions &= (~option); } + init(); } UBool @@ -458,7 +393,7 @@ Normalizer::getOption(int32_t option) const /** * Set the input text over which this Normalizer will iterate. - * The iteration position is set to the beginning of the input text-> + * The iteration position is set to the beginning of the input text. */ void Normalizer::setText(const UnicodeString& newText, @@ -472,8 +407,8 @@ Normalizer::setText(const UnicodeString& newText, status = U_MEMORY_ALLOCATION_ERROR; return; } - delete (CharacterIterator *)(text->context); - text->context = newIter; + delete text; + text = newIter; reset(); } @@ -493,8 +428,8 @@ Normalizer::setText(const CharacterIterator& newText, status = U_MEMORY_ALLOCATION_ERROR; return; } - delete (CharacterIterator *)(text->context); - text->context = newIter; + delete text; + text = newIter; reset(); } @@ -511,8 +446,8 @@ Normalizer::setText(const UChar* newText, status = U_MEMORY_ALLOCATION_ERROR; return; } - delete (CharacterIterator *)(text->context); - text->context = newIter; + delete text; + text = newIter; reset(); } @@ -523,7 +458,7 @@ Normalizer::setText(const UChar* newText, void Normalizer::getText(UnicodeString& result) { - ((CharacterIterator *)(text->context))->getText(result); + text->getText(result); } //------------------------------------------------------------------------- @@ -537,72 +472,48 @@ void Normalizer::clearBuffer() { UBool Normalizer::nextNormalize() { - UChar *p; - int32_t length; - UErrorCode errorCode; - clearBuffer(); currentIndex=nextIndex; - text->move(text, nextIndex, UITER_ZERO); - if(!text->hasNext(text)) { + text->setIndex(nextIndex); + if(!text->hasNext()) { return FALSE; } - - errorCode=U_ZERO_ERROR; - p=buffer.getBuffer(-1); - length=unorm_next(text, p, buffer.getCapacity(), - fUMode, fOptions, - TRUE, 0, - &errorCode); - buffer.releaseBuffer(U_SUCCESS(errorCode) ? length : 0); - if(errorCode==U_BUFFER_OVERFLOW_ERROR) { - errorCode=U_ZERO_ERROR; - text->move(text, nextIndex, UITER_ZERO); - p=buffer.getBuffer(length); - length=unorm_next(text, p, buffer.getCapacity(), - fUMode, fOptions, - TRUE, 0, - &errorCode); - buffer.releaseBuffer(U_SUCCESS(errorCode) ? length : 0); + // Skip at least one character so we make progress. + UnicodeString segment(text->next32PostInc()); + while(text->hasNext()) { + UChar32 c; + if(fNorm2->hasBoundaryBefore(c=text->next32PostInc())) { + text->move32(-1, CharacterIterator::kCurrent); + break; + } + segment.append(c); } - - nextIndex=text->getIndex(text, UITER_CURRENT); + nextIndex=text->getIndex(); + UErrorCode errorCode=U_ZERO_ERROR; + fNorm2->normalize(segment, buffer, errorCode); return U_SUCCESS(errorCode) && !buffer.isEmpty(); } UBool Normalizer::previousNormalize() { - UChar *p; - int32_t length; - UErrorCode errorCode; - clearBuffer(); nextIndex=currentIndex; - text->move(text, currentIndex, UITER_ZERO); - if(!text->hasPrevious(text)) { + text->setIndex(currentIndex); + if(!text->hasPrevious()) { return FALSE; } - - errorCode=U_ZERO_ERROR; - p=buffer.getBuffer(-1); - length=unorm_previous(text, p, buffer.getCapacity(), - fUMode, fOptions, - TRUE, 0, - &errorCode); - buffer.releaseBuffer(U_SUCCESS(errorCode) ? length : 0); - if(errorCode==U_BUFFER_OVERFLOW_ERROR) { - errorCode=U_ZERO_ERROR; - text->move(text, currentIndex, UITER_ZERO); - p=buffer.getBuffer(length); - length=unorm_previous(text, p, buffer.getCapacity(), - fUMode, fOptions, - TRUE, 0, - &errorCode); - buffer.releaseBuffer(U_SUCCESS(errorCode) ? length : 0); + UnicodeString segment; + while(text->hasPrevious()) { + UChar32 c=text->previous32(); + segment.insert(0, c); + if(fNorm2->hasBoundaryBefore(c)) { + break; + } } - + currentIndex=text->getIndex(); + UErrorCode errorCode=U_ZERO_ERROR; + fNorm2->normalize(segment, buffer, errorCode); bufferPos=buffer.length(); - currentIndex=text->getIndex(text, UITER_CURRENT); return U_SUCCESS(errorCode) && !buffer.isEmpty(); } diff --git a/icu4c/source/common/uchar.c b/icu4c/source/common/uchar.c index cdccd9ad972..a22d4ab52d5 100644 --- a/icu4c/source/common/uchar.c +++ b/icu4c/source/common/uchar.c @@ -1,6 +1,6 @@ /* ******************************************************************************** -* Copyright (C) 1996-2009, International Business Machines +* Copyright (C) 1996-2010, International Business Machines * Corporation and others. All Rights Reserved. ******************************************************************************** * @@ -28,7 +28,6 @@ #include "ucln_cmn.h" #include "utrie2.h" #include "udataswp.h" -#include "unormimp.h" /* JAMO_L_BASE etc. */ #include "uprops.h" #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) @@ -650,10 +649,6 @@ u_getNumericValue(UChar32 c) { } } -/* ICU 3.4: bidi/shaping properties moved to ubidi_props.c */ - -/* ICU 2.1: u_getCombiningClass() moved to unorm.cpp */ - U_CAPI int32_t U_EXPORT2 u_digit(UChar32 ch, int8_t radix) { int8_t value; diff --git a/icu4c/source/common/ucln_cmn.h b/icu4c/source/common/ucln_cmn.h index b6f069959cd..2ccc6e984fc 100644 --- a/icu4c/source/common/ucln_cmn.h +++ b/icu4c/source/common/ucln_cmn.h @@ -1,7 +1,7 @@ /* ****************************************************************************** * * -* Copyright (C) 2001-2006, International Business Machines * +* Copyright (C) 2001-2010, International Business Machines * * Corporation and others. All Rights Reserved. * * * ****************************************************************************** @@ -41,6 +41,7 @@ typedef enum ECleanupCommonType { UCLN_COMMON_LOCALE, UCLN_COMMON_ULOC, UCLN_COMMON_UNORM, + UCLN_COMMON_NORMALIZER2, UCLN_COMMON_USET, UCLN_COMMON_UNAMES, UCLN_COMMON_PNAME, diff --git a/icu4c/source/common/ucol_swp.c b/icu4c/source/common/ucol_swp.cpp similarity index 99% rename from icu4c/source/common/ucol_swp.c rename to icu4c/source/common/ucol_swp.cpp index 57b4b3376fc..ede8dd68a82 100644 --- a/icu4c/source/common/ucol_swp.c +++ b/icu4c/source/common/ucol_swp.cpp @@ -1,7 +1,7 @@ /* ******************************************************************************* * -* Copyright (C) 2003-2009, International Business Machines +* Copyright (C) 2003-2010, International Business Machines * Corporation and others. All Rights Reserved. * ******************************************************************************* diff --git a/icu4c/source/common/unicode/caniter.h b/icu4c/source/common/unicode/caniter.h index 84a65958d16..0c99a33fd52 100644 --- a/icu4c/source/common/unicode/caniter.h +++ b/icu4c/source/common/unicode/caniter.h @@ -1,6 +1,6 @@ /* ******************************************************************************* - * Copyright (C) 1996-2006, International Business Machines Corporation and * + * Copyright (C) 1996-2010, International Business Machines Corporation and * * others. All Rights Reserved. * ******************************************************************************* */ @@ -31,6 +31,7 @@ U_NAMESPACE_BEGIN class Hashtable; +class Normalizer2; /** * This class allows one to iterate through all the strings that are canonically equivalent to a given @@ -174,6 +175,8 @@ private: // transient fields UnicodeString buffer; + const Normalizer2 &nfd; + // we have a segment, in NFD. Find all the strings that are canonically equivalent to it. UnicodeString *getEquivalents(const UnicodeString &segment, int32_t &result_len, UErrorCode &status); //private String[] getEquivalents(String segment) diff --git a/icu4c/source/common/unicode/normalizer2.h b/icu4c/source/common/unicode/normalizer2.h new file mode 100644 index 00000000000..b9932eb53aa --- /dev/null +++ b/icu4c/source/common/unicode/normalizer2.h @@ -0,0 +1,460 @@ +/* +******************************************************************************* +* +* Copyright (C) 2009-2010, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: normalizer2.h +* encoding: US-ASCII +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2009nov22 +* created by: Markus W. Scherer +*/ + +#ifndef __NORMALIZER2_H__ +#define __NORMALIZER2_H__ + +/** + * \file + * \brief C++ API: New API for Unicode Normalization. + */ + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_NORMALIZATION + +#include "unicode/uniset.h" +#include "unicode/unistr.h" +#include "unicode/unorm2.h" + +U_NAMESPACE_BEGIN + +/** + * Unicode normalization functionality for standard Unicode normalization or + * for using custom mapping tables. + * All instances of this class are unmodifiable/immutable. + * Instances returned by getInstance() are singletons that must not be deleted by the caller. + * + * Some of the functions in this class identify normalization boundaries. + * At a normalization boundary, the portions of the string + * before it and starting from it do not interact and can be handled independently. + * + * The spanQuickCheckYes() stops at a normalization boundary. + * When the goal is a normalized string, then the text before the boundary + * can be copied, and the remainder can be processed with normalizeSecondAndAppend(). + * + * The isBoundary() function tests whether a character is at a normalization boundary. + * This is used for moving from one normalization boundary to the next + * or preceding boundary, and for performing iterative normalization. + * + * Iterative normalization is useful when only a small portion of a + * longer string needs to be processed. + * In ICU, iterative normalization is used by the NormalizationTransliterator + * (to avoid replacing already-normalized text) and ucol_nextSortKeyPart() + * (to process only the substring for which sort key bytes are computed). + * + * The set of normalization boundaries returned by these functions may not be + * complete: There may be more boundaries that could be returned. + * Different functions may return different boundaries. + * @draft ICU 4.4 + */ +class U_COMMON_API Normalizer2 : public UObject { +public: + /** + * Returns a Normalizer2 instance which uses the specified data file + * (packageName/name similar to ucnv_openPackage() and ures_open()/ResourceBundle) + * and which composes or decomposes text according to the specified mode. + * Returns an unmodifiable singleton instance. Do not delete it. + * + * Use packageName=NULL for data files that are part of ICU's own data. + * Use name="nfc" and UNORM2_COMPOSE/UNORM2_DECOMPOSE for Unicode standard NFC/NFD. + * Use name="nfkc" and UNORM2_COMPOSE/UNORM2_DECOMPOSE for Unicode standard NFKC/NFKD. + * Use name="nfkc_cf" and UNORM2_COMPOSE for Unicode standard NFKC_CF=NFKC_Casefold. + * + * @param packageName NULL for ICU built-in data, otherwise application data package name + * @param name "nfc" or "nfkc" or "nfkc_cf" or name of custom data file + * @param mode normalization mode (compose or decompose etc.) + * @param errorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return the requested Normalizer2, if successful + * @draft ICU 4.4 + */ + static const Normalizer2 * + getInstance(const char *packageName, + const char *name, + UNormalization2Mode mode, + UErrorCode &errorCode); + + /** + * Returns the normalized form of the source string. + * @param src source string + * @param errorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return normalized src + * @draft ICU 4.4 + */ + UnicodeString + normalize(const UnicodeString &src, UErrorCode &errorCode) const { + UnicodeString result; + normalize(src, result, errorCode); + return result; + } + /** + * Writes the normalized form of the source string to the destination string + * (replacing its contents) and returns the destination string. + * The source and destination strings must be different objects. + * @param src source string + * @param dest destination string; its contents is replaced with normalized src + * @param errorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return dest + * @draft ICU 4.4 + */ + virtual UnicodeString & + normalize(const UnicodeString &src, + UnicodeString &dest, + UErrorCode &errorCode) const = 0; + /** + * Appends the normalized form of the second string to the first string + * (merging them at the boundary) and returns the first string. + * The result is normalized if the first string was normalized. + * The first and second strings must be different objects. + * @param first string, should be normalized + * @param second string, will be normalized + * @param errorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return first + * @draft ICU 4.4 + */ + virtual UnicodeString & + normalizeSecondAndAppend(UnicodeString &first, + const UnicodeString &second, + UErrorCode &errorCode) const = 0; + /** + * Appends the second string to the first string + * (merging them at the boundary) and returns the first string. + * The result is normalized if both the strings were normalized. + * The first and second strings must be different objects. + * @param first string, should be normalized + * @param second string, should be normalized + * @param errorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return first + * @draft ICU 4.4 + */ + virtual UnicodeString & + append(UnicodeString &first, + const UnicodeString &second, + UErrorCode &errorCode) const = 0; + + /** + * Tests if the string is normalized. + * Internally, in cases where the quickCheck() method would return "maybe" + * (which is only possible for the two COMPOSE modes) this method + * resolves to "yes" or "no" to provide a definitive result, + * at the cost of doing more work in those cases. + * @param s input string + * @param errorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return TRUE if s is normalized + * @draft ICU 4.4 + */ + virtual UBool + isNormalized(const UnicodeString &s, UErrorCode &errorCode) const = 0; + + /** + * Tests if the string is normalized. + * For the two COMPOSE modes, the result could be "maybe" in cases that + * would take a little more work to resolve definitively. + * Use spanQuickCheckYes() and normalizeSecondAndAppend() for a faster + * combination of quick check + normalization, to avoid + * re-checking the "yes" prefix. + * @param s input string + * @param errorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return UNormalizationCheckResult + * @draft ICU 4.4 + */ + virtual UNormalizationCheckResult + quickCheck(const UnicodeString &s, UErrorCode &errorCode) const = 0; + + /** + * Returns the end of the normalized substring of the input string. + * In other words, with end=spanQuickCheckYes(s, ec); + * the substring UnicodeString(s, 0, end) + * will pass the quick check with a "yes" result. + * + * The returned end index is usually one or more characters before the + * "no" or "maybe" character: The end index is at a normalization boundary. + * (See the class documentation for more about normalization boundaries.) + * + * When the goal is a normalized string and most input strings are expected + * to be normalized already, then call this method, + * and if it returns a prefix shorter than the input string, + * copy that prefix and use normalizeSecondAndAppend() for the remainder. + * @param s input string + * @param errorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return UNormalizationCheckResult + * @draft ICU 4.4 + */ + virtual int32_t + spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const = 0; + + /** + * Tests if the character has a normalization boundary before it. + * If true, then the character does not normalization-interact with + * preceding characters. + * In other words, a string containing this character can be normalized + * by processing portions before this character and starting from this + * character independently. + * This is used for iterative normalization. See the class documentation for details. + * @param c character to test + * @return TRUE if c has a normalization boundary before it + * @draft ICU 4.4 + */ + virtual UBool hasBoundaryBefore(UChar32 c) const = 0; + + /** + * Tests if the character has a normalization boundary after it. + * If true, then the character does not normalization-interact with + * following characters. + * In other words, a string containing this character can be normalized + * by processing portions up to this character and after this + * character independently. + * This is used for iterative normalization. See the class documentation for details. + * @param c character to test + * @return TRUE if c has a normalization boundary after it + * @draft ICU 4.4 + */ + virtual UBool hasBoundaryAfter(UChar32 c) const = 0; + + /** + * Tests if the character is normalization-inert. + * If true, then the character does not change, nor normalization-interact with + * preceding or following characters. + * In other words, a string containing this character can be normalized + * by processing portions before this character and after this + * character independently. + * This is used for iterative normalization. See the class documentation for details. + * @param c character to test + * @return TRUE if c is normalization-inert + * @draft ICU 4.4 + */ + virtual UBool isInert(UChar32 c) const = 0; + + /** + * ICU "poor man's RTTI", returns a UClassID for this class. + * @returns a UClassID for this class. + * @draft ICU 4.4 + */ + static UClassID U_EXPORT2 getStaticClassID(); + + /** + * ICU "poor man's RTTI", returns a UClassID for the actual class. + * @return a UClassID for the actual class. + * @draft ICU 4.4 + */ + virtual UClassID getDynamicClassID() const = 0; +}; + +/** + * Normalization filtered by a UnicodeSet. + * Normalizes portions of the text contained in the filter set and leaves + * portions not contained in the filter set unchanged. + * Filtering is done via UnicodeSet::span(..., USET_SPAN_SIMPLE). + * Not-in-the-filter text is treated as "is normalized" and "quick check yes". + * This class implements all of (and only) the Normalizer2 API. + * An instance of this class is unmodifiable/immutable but is constructed and + * must be destructed by the owner. + * @draft ICU 4.4 + */ +class U_COMMON_API FilteredNormalizer2 : public Normalizer2 { +public: + /** + * Constructs a filtered normalizer wrapping any Normalizer2 instance + * and a filter set. + * Both are aliased and must not be modified or deleted while this object + * is used. + * The filter set should be frozen; otherwise the performance will suffer greatly. + * @param n2 wrapped Normalizer2 instance + * @param filterSet UnicodeSet which determines the characters to be normalized + * @draft ICU 4.4 + */ + FilteredNormalizer2(const Normalizer2 &n2, const UnicodeSet &filterSet) : + norm2(n2), set(filterSet) {} + + /** + * Writes the normalized form of the source string to the destination string + * (replacing its contents) and returns the destination string. + * The source and destination strings must be different objects. + * @param src source string + * @param dest destination string; its contents is replaced with normalized src + * @param errorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return dest + * @draft ICU 4.4 + */ + virtual UnicodeString & + normalize(const UnicodeString &src, + UnicodeString &dest, + UErrorCode &errorCode) const; + /** + * Appends the normalized form of the second string to the first string + * (merging them at the boundary) and returns the first string. + * The result is normalized if the first string was normalized. + * The first and second strings must be different objects. + * @param first string, should be normalized + * @param second string, will be normalized + * @param errorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return first + * @draft ICU 4.4 + */ + virtual UnicodeString & + normalizeSecondAndAppend(UnicodeString &first, + const UnicodeString &second, + UErrorCode &errorCode) const; + /** + * Appends the second string to the first string + * (merging them at the boundary) and returns the first string. + * The result is normalized if both the strings were normalized. + * The first and second strings must be different objects. + * @param first string, should be normalized + * @param second string, should be normalized + * @param errorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return first + * @draft ICU 4.4 + */ + virtual UnicodeString & + append(UnicodeString &first, + const UnicodeString &second, + UErrorCode &errorCode) const; + + /** + * Tests if the string is normalized. + * For details see the Normalizer2 base class documentation. + * @param s input string + * @param errorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return TRUE if s is normalized + * @draft ICU 4.4 + */ + virtual UBool + isNormalized(const UnicodeString &s, UErrorCode &errorCode) const; + /** + * Tests if the string is normalized. + * For details see the Normalizer2 base class documentation. + * @param s input string + * @param errorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return UNormalizationCheckResult + * @draft ICU 4.4 + */ + virtual UNormalizationCheckResult + quickCheck(const UnicodeString &s, UErrorCode &errorCode) const; + /** + * Returns the end of the normalized substring of the input string. + * For details see the Normalizer2 base class documentation. + * @param s input string + * @param errorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return UNormalizationCheckResult + * @draft ICU 4.4 + */ + virtual int32_t + spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const; + + /** + * Tests if the character has a normalization boundary before it. + * For details see the Normalizer2 base class documentation. + * @param c character to test + * @return TRUE if c has a normalization boundary before it + * @draft ICU 4.4 + */ + virtual UBool hasBoundaryBefore(UChar32 c) const; + + /** + * Tests if the character has a normalization boundary after it. + * For details see the Normalizer2 base class documentation. + * @param c character to test + * @return TRUE if c has a normalization boundary after it + * @draft ICU 4.4 + */ + virtual UBool hasBoundaryAfter(UChar32 c) const; + + /** + * Tests if the character is normalization-inert. + * For details see the Normalizer2 base class documentation. + * @param c character to test + * @return TRUE if c is normalization-inert + * @draft ICU 4.4 + */ + virtual UBool isInert(UChar32 c) const; + + /** + * ICU "poor man's RTTI", returns a UClassID for this class. + * @returns a UClassID for this class. + * @draft ICU 4.4 + */ + static UClassID U_EXPORT2 getStaticClassID(); + + /** + * ICU "poor man's RTTI", returns a UClassID for the actual class. + * @return a UClassID for the actual class. + * @draft ICU 4.4 + */ + virtual UClassID getDynamicClassID() const; +private: + UnicodeString & + normalize(const UnicodeString &src, + UnicodeString &dest, + USetSpanCondition spanCondition, + UErrorCode &errorCode) const; + + UnicodeString & + normalizeSecondAndAppend(UnicodeString &first, + const UnicodeString &second, + UBool doNormalize, + UErrorCode &errorCode) const; + + const Normalizer2 &norm2; + const UnicodeSet &set; +}; + +U_NAMESPACE_END + +#endif // !UCONFIG_NO_NORMALIZATION +#endif // __NORMALIZER2_H__ diff --git a/icu4c/source/common/unicode/normlzr.h b/icu4c/source/common/unicode/normlzr.h index 7974f1ac4dd..a6cd44c67b7 100644 --- a/icu4c/source/common/unicode/normlzr.h +++ b/icu4c/source/common/unicode/normlzr.h @@ -1,7 +1,7 @@ /* ******************************************************************** * COPYRIGHT: - * Copyright (c) 1996-2006, International Business Machines Corporation and + * Copyright (c) 1996-2010, International Business Machines Corporation and * others. All Rights Reserved. ******************************************************************** */ @@ -18,14 +18,11 @@ #if !UCONFIG_NO_NORMALIZATION -#include "unicode/uobject.h" -#include "unicode/unistr.h" #include "unicode/chariter.h" +#include "unicode/normalizer2.h" +#include "unicode/unistr.h" #include "unicode/unorm.h" - - -struct UCharIterator; -typedef struct UCharIterator UCharIterator; /**< C typedef for struct UCharIterator. @stable ICU 2.1 */ +#include "unicode/uobject.h" U_NAMESPACE_BEGIN /** @@ -33,6 +30,10 @@ U_NAMESPACE_BEGIN * * Unicode Standard Annex #15: Unicode Normalization Forms. * + * Note: This API has been replaced by the Normalizer2 class and is only available + * for backward compatibility. This class simply delegates to the Normalizer2 class. + * There is one exception: The new API does not provide a replacement for Normalizer::compare(). + * * The Normalizer class consists of two parts: * - static functions that normalize strings or test if strings are normalized * - a Normalizer object is an iterator that takes any kind of text and @@ -40,13 +41,11 @@ U_NAMESPACE_BEGIN * * The Normalizer class is not suitable for subclassing. * - * The static functions are basically wrappers around the C implementation, - * using UnicodeString instead of UChar*. * For basic information about normalization forms and details about the C API * please see the documentation in unorm.h. * * The iterator API with the Normalizer constructors and the non-static functions - * uses a CharacterIterator as input. It is possible to pass a string which + * use a CharacterIterator as input. It is possible to pass a string which * is then internally wrapped in a CharacterIterator. * The input text is not normalized all at once, but incrementally where needed * (providing efficient random access). @@ -287,7 +286,7 @@ public: * @see isNormalized * @stable ICU 2.6 */ - static inline UNormalizationCheckResult + static UNormalizationCheckResult quickCheck(const UnicodeString &source, UNormalizationMode mode, int32_t options, UErrorCode &status); /** @@ -328,7 +327,7 @@ public: * @see quickCheck * @stable ICU 2.6 */ - static inline UBool + static UBool isNormalized(const UnicodeString &src, UNormalizationMode mode, int32_t options, UErrorCode &errorCode); /** @@ -726,18 +725,20 @@ private: UBool nextNormalize(); UBool previousNormalize(); - void init(CharacterIterator *iter); + void init(); void clearBuffer(void); //------------------------------------------------------------------------- // Private data //------------------------------------------------------------------------- + FilteredNormalizer2*fFilteredNorm2; // owned if not NULL + const Normalizer2 *fNorm2; // not owned; may be equal to fFilteredNorm2 UNormalizationMode fUMode; int32_t fOptions; // The input text and our position in it - UCharIterator *text; + CharacterIterator *text; // The normalization buffer is the result of normalization // of the source in [currentIndex..nextIndex[ . @@ -746,7 +747,6 @@ private: // A buffer for holding intermediate results UnicodeString buffer; int32_t bufferPos; - }; //------------------------------------------------------------------------- @@ -761,48 +761,14 @@ inline UNormalizationCheckResult Normalizer::quickCheck(const UnicodeString& source, UNormalizationMode mode, UErrorCode &status) { - if(U_FAILURE(status)) { - return UNORM_MAYBE; - } - - return unorm_quickCheck(source.getBuffer(), source.length(), - mode, &status); -} - -inline UNormalizationCheckResult -Normalizer::quickCheck(const UnicodeString& source, - UNormalizationMode mode, int32_t options, - UErrorCode &status) { - if(U_FAILURE(status)) { - return UNORM_MAYBE; - } - - return unorm_quickCheckWithOptions(source.getBuffer(), source.length(), - mode, options, &status); + return quickCheck(source, mode, 0, status); } inline UBool Normalizer::isNormalized(const UnicodeString& source, UNormalizationMode mode, UErrorCode &status) { - if(U_FAILURE(status)) { - return FALSE; - } - - return unorm_isNormalized(source.getBuffer(), source.length(), - mode, &status); -} - -inline UBool -Normalizer::isNormalized(const UnicodeString& source, - UNormalizationMode mode, int32_t options, - UErrorCode &status) { - if(U_FAILURE(status)) { - return FALSE; - } - - return unorm_isNormalizedWithOptions(source.getBuffer(), source.length(), - mode, options, &status); + return isNormalized(source, mode, 0, status); } inline int32_t diff --git a/icu4c/source/common/unicode/uchar.h b/icu4c/source/common/unicode/uchar.h index f4c276c7e0d..31fb5013a40 100644 --- a/icu4c/source/common/unicode/uchar.h +++ b/icu4c/source/common/unicode/uchar.h @@ -1,6 +1,6 @@ /* ********************************************************************** -* Copyright (C) 1997-2009, International Business Machines +* Copyright (C) 1997-2010, International Business Machines * Corporation and others. All Rights Reserved. ********************************************************************** * @@ -321,51 +321,29 @@ typedef enum UProperty { /** Binary property NFD_Inert. ICU-specific property for characters that are inert under NFD, i.e., they do not interact with adjacent characters. - Used for example in normalizing transforms in incremental mode - to find the boundary of safely normalizable text despite possible - text additions. - - There is one such property per normalization form. - These properties are computed as follows - an inert character is: - a) unassigned, or ALL of the following: - b) of combining class 0. - c) not decomposed by this normalization form. - AND if NFC or NFKC, - d) can never compose with a previous character. - e) can never compose with a following character. - f) can never change if another character is added. - Example: a-breve might satisfy all but f, but if you - add an ogonek it changes to a-ogonek + breve - - See also com.ibm.text.UCD.NFSkippable in the ICU4J repository, - and icu/source/common/unormimp.h . + See the documentation for the Normalizer2 class and the + Normalizer2::isInert() method. @stable ICU 3.0 */ UCHAR_NFD_INERT=37, /** Binary property NFKD_Inert. ICU-specific property for characters that are inert under NFKD, i.e., they do not interact with adjacent characters. - Used for example in normalizing transforms in incremental mode - to find the boundary of safely normalizable text despite possible - text additions. - @see UCHAR_NFD_INERT + See the documentation for the Normalizer2 class and the + Normalizer2::isInert() method. @stable ICU 3.0 */ UCHAR_NFKD_INERT=38, /** Binary property NFC_Inert. ICU-specific property for characters that are inert under NFC, i.e., they do not interact with adjacent characters. - Used for example in normalizing transforms in incremental mode - to find the boundary of safely normalizable text despite possible - text additions. - @see UCHAR_NFD_INERT + See the documentation for the Normalizer2 class and the + Normalizer2::isInert() method. @stable ICU 3.0 */ UCHAR_NFC_INERT=39, /** Binary property NFKC_Inert. ICU-specific property for characters that are inert under NFKC, i.e., they do not interact with adjacent characters. - Used for example in normalizing transforms in incremental mode - to find the boundary of safely normalizable text despite possible - text additions. - @see UCHAR_NFD_INERT + See the documentation for the Normalizer2 class and the + Normalizer2::isInert() method. @stable ICU 3.0 */ UCHAR_NFKC_INERT=40, /** Binary Property Segment_Starter. @@ -428,8 +406,10 @@ typedef enum UProperty { UCHAR_CHANGES_WHEN_CASEFOLDED=54, /** Binary property Changes_When_Casemapped. @draft ICU 4.4 */ UCHAR_CHANGES_WHEN_CASEMAPPED=55, + /** Binary property Changes_When_NFKC_Casefolded. @draft ICU 4.4 */ + UCHAR_CHANGES_WHEN_NFKC_CASEFOLDED=56, /** One more than the last constant for binary Unicode properties. @stable ICU 2.1 */ - UCHAR_BINARY_LIMIT=56, + UCHAR_BINARY_LIMIT=57, /** Enumerated property Bidi_Class. Same as u_charDirection, returns UCharDirection values. @stable ICU 2.2 */ diff --git a/icu4c/source/common/unicode/uniset.h b/icu4c/source/common/unicode/uniset.h index 4849fd312ea..5ea23e8011e 100644 --- a/icu4c/source/common/unicode/uniset.h +++ b/icu4c/source/common/unicode/uniset.h @@ -1,6 +1,6 @@ /* *************************************************************************** -* Copyright (C) 1999-2009, International Business Machines Corporation +* Copyright (C) 1999-2010, International Business Machines Corporation * and others. All Rights Reserved. *************************************************************************** * Date Name Description @@ -861,6 +861,20 @@ public: */ int32_t span(const UChar *s, int32_t length, USetSpanCondition spanCondition) const; + /** + * Returns the end of the substring of the input string according to the USetSpanCondition. + * Same as start+span(s.getBuffer()+start, s.length()-start, spanCondition) + * after pinning start to 0<=start<=s.length(). + * @param s the string + * @param start the start index in the string for the span operation + * @param spanCondition specifies the containment condition + * @return the exclusive end of the substring according to the spanCondition; + * the substring s.tempSubStringBetween(start, end) fulfills the spanCondition + * @draft ICU 4.4 + * @see USetSpanCondition + */ + inline int32_t span(const UnicodeString &s, int32_t start, USetSpanCondition spanCondition) const; + /** * Returns the start of the trailing substring of the input string which * consists only of characters and strings that are contained in this set @@ -880,6 +894,21 @@ public: */ int32_t spanBack(const UChar *s, int32_t length, USetSpanCondition spanCondition) const; + /** + * Returns the start of the substring of the input string according to the USetSpanCondition. + * Same as spanBack(s.getBuffer(), limit, spanCondition) + * after pinning limit to 0<=end<=s.length(). + * @param s the string + * @param limit the exclusive-end index in the string for the span operation + * (use s.length() or INT32_MAX for spanning back from the end of the string) + * @param spanCondition specifies the containment condition + * @return the start of the substring according to the spanCondition; + * the substring s.tempSubStringBetween(start, limit) fulfills the spanCondition + * @draft ICU 4.4 + * @see USetSpanCondition + */ + inline int32_t spanBack(const UnicodeString &s, int32_t limit, USetSpanCondition spanCondition) const; + /** * Returns the length of the initial substring of the input string which * consists only of characters and strings that are contained in this set @@ -1619,6 +1648,26 @@ inline const USet *UnicodeSet::toUSet() const { return reinterpret_cast(this); } +inline int32_t UnicodeSet::span(const UnicodeString &s, int32_t start, USetSpanCondition spanCondition) const { + int32_t sLength=s.length(); + if(start<0) { + start=0; + } else if(start>sLength) { + start=sLength; + } + return start+span(s.getBuffer()+start, sLength-start, spanCondition); +} + +inline int32_t UnicodeSet::spanBack(const UnicodeString &s, int32_t limit, USetSpanCondition spanCondition) const { + int32_t sLength=s.length(); + if(limit<0) { + limit=0; + } else if(limit>sLength) { + limit=sLength; + } + return spanBack(s.getBuffer(), limit, spanCondition); +} + U_NAMESPACE_END #endif diff --git a/icu4c/source/common/unicode/unistr.h b/icu4c/source/common/unicode/unistr.h index 953e9ed9a5d..b3b2898c7e5 100644 --- a/icu4c/source/common/unicode/unistr.h +++ b/icu4c/source/common/unicode/unistr.h @@ -1,6 +1,6 @@ /* ********************************************************************** -* Copyright (C) 1998-2009, International Business Machines +* Copyright (C) 1998-2010, International Business Machines * Corporation and others. All Rights Reserved. ********************************************************************** * @@ -1566,6 +1566,33 @@ public: #endif + /** + * Create a temporary substring for the specified range. + * Unlike the substring constructor and setTo() functions, + * the object returned here will be a read-only alias (using getBuffer()) + * rather than copying the text. + * As a result, this substring operation is much faster but requires + * that the original string not be modified or deleted during the lifetime + * of the returned substring object. + * @param start offset of the first character visible in the substring + * @param length length of the substring + * @return a read-only alias UnicodeString object for the substring + * @draft ICU 4.4 + */ + UnicodeString tempSubString(int32_t start=0, int32_t length=INT32_MAX) const; + + /** + * Create a temporary substring for the specified range. + * Same as tempSubString(start, length) except that the substring range + * is specified as a (start, limit) pair (with an exclusive limit index) + * rather than a (start, length) pair. + * @param start offset of the first character visible in the substring + * @param limit offset immediately following the last character visible in the substring + * @return a read-only alias UnicodeString object for the substring + * @draft ICU 4.4 + */ + inline UnicodeString tempSubStringBetween(int32_t start, int32_t limit=INT32_MAX) const; + /** * Convert the UnicodeString to UTF-8 and write the result * to a ByteSink. This is called by toUTF8String(). @@ -2396,6 +2423,16 @@ public: inline UnicodeString& removeBetween(int32_t start, int32_t limit = (int32_t)INT32_MAX); + /** + * Retain only the characters in the range + * [start, limit) from the UnicodeString object. + * Removes characters before start and at and after limit. + * @param start the offset of the first character to retain + * @param limit the offset immediately following the range to retain + * @return a reference to this + * @draft ICU 4.4 + */ + inline UnicodeString &retainBetween(int32_t start, int32_t limit = INT32_MAX); /* Length operations */ @@ -4068,6 +4105,11 @@ UnicodeString::extractBetween(int32_t start, doExtract(start, limit - start, dst, dstStart); } +inline UnicodeString +UnicodeString::tempSubStringBetween(int32_t start, int32_t limit) const { + return tempSubString(start, limit - start); +} + inline UChar UnicodeString::doCharAt(int32_t offset) const { @@ -4161,7 +4203,13 @@ UnicodeString::getTerminatedBuffer() { } else { UChar *array = getArrayStart(); int32_t len = length(); - if(len < getCapacity()) { + if(len < getCapacity() && ((fFlags&kRefCounted) == 0 || refCount() == 1)) { + /* + * kRefCounted: Do not write the NUL if the buffer is shared. + * That is mostly safe, except when the length of one copy was modified + * without copy-on-write, e.g., via truncate(newLength) or remove(void). + * Then the NUL would be written into the middle of another copy's string. + */ if(!(fFlags&kBufferIsReadonly)) { /* * We must not write to a readonly buffer, but it is known to be @@ -4332,10 +4380,12 @@ inline UnicodeString& UnicodeString::remove() { // remove() of a bogus string makes the string empty and non-bogus - if(isBogus()) { - unBogus(); + // we also un-alias a read-only alias to deal with NUL-termination + // issues with getTerminatedBuffer() + if(fFlags & (kIsBogus|kBufferIsReadonly)) { + setToEmpty(); } else { - setLength(0); + fShortLength = 0; } return *this; } @@ -4356,6 +4406,12 @@ UnicodeString::removeBetween(int32_t start, int32_t limit) { return doReplace(start, limit - start, NULL, 0, 0); } +inline UnicodeString & +UnicodeString::retainBetween(int32_t start, int32_t limit) { + truncate(limit); + return doReplace(0, start, NULL, 0, 0); +} + inline UBool UnicodeString::truncate(int32_t targetLength) { @@ -4365,6 +4421,9 @@ UnicodeString::truncate(int32_t targetLength) return FALSE; } else if((uint32_t)targetLength < (uint32_t)length()) { setLength(targetLength); + if(fFlags&kBufferIsReadonly) { + fUnion.fFields.fCapacity = targetLength; // not NUL-terminated any more + } return TRUE; } else { return FALSE; diff --git a/icu4c/source/common/unicode/unorm.h b/icu4c/source/common/unicode/unorm.h index c22b808aa57..fbb7b49b364 100644 --- a/icu4c/source/common/unicode/unorm.h +++ b/icu4c/source/common/unicode/unorm.h @@ -1,6 +1,6 @@ /* ******************************************************************************* -* Copyright (c) 1996-2007, International Business Machines Corporation +* Copyright (c) 1996-2010, International Business Machines Corporation * and others. All Rights Reserved. ******************************************************************************* * File unorm.h @@ -20,6 +20,7 @@ #if !UCONFIG_NO_NORMALIZATION #include "unicode/uiter.h" +#include "unicode/unorm2.h" /** * \file @@ -27,6 +28,11 @@ * *

Unicode normalization API

* + * Note: This API has been replaced by the unorm2.h API and is only available + * for backward compatibility. The functions here simply delegate to the + * unorm2.h functions, for example unorm2_getInstance() and unorm2_normalize(). + * There is one exception: The new API does not provide a replacement for unorm_compare(). + * * unorm_normalize transforms Unicode text into an equivalent composed or * decomposed form, allowing for easier sorting and searching of text. * unorm_normalize supports the standard normalization forms described in @@ -202,28 +208,7 @@ unorm_normalize(const UChar *source, int32_t sourceLength, UNormalizationMode mode, int32_t options, UChar *result, int32_t resultLength, UErrorCode *status); -#endif -/** - * Result values for unorm_quickCheck(). - * For details see Unicode Technical Report 15. - * @stable ICU 2.0 - */ -typedef enum UNormalizationCheckResult { - /** - * Indicates that string is not in the normalized format - */ - UNORM_NO, - /** - * Indicates that string is in the normalized format - */ - UNORM_YES, - /** - * Indicates that string cannot be determined if it is in the normalized - * format without further thorough checks. - */ - UNORM_MAYBE -} UNormalizationCheckResult; -#if !UCONFIG_NO_NORMALIZATION + /** * Performing quick check on a string, to quickly determine if the string is * in a particular normalization format. diff --git a/icu4c/source/common/unicode/unorm2.h b/icu4c/source/common/unicode/unorm2.h new file mode 100644 index 00000000000..6cb73ea3227 --- /dev/null +++ b/icu4c/source/common/unicode/unorm2.h @@ -0,0 +1,348 @@ +/* +******************************************************************************* +* +* Copyright (C) 2009-2010, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: unorm2.h +* encoding: US-ASCII +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2009dec15 +* created by: Markus W. Scherer +*/ + +#ifndef __UNORM2_H__ +#define __UNORM2_H__ + +/** + * \file + * \brief C API: New API for Unicode Normalization. + * + * Unicode normalization functionality for standard Unicode normalization or + * for using custom mapping tables. + * All instances of UNormalizer2 are unmodifiable/immutable. + * Instances returned by unorm2_getInstance() are singletons that must not be deleted by the caller. + * For more details see the Normalizer2 C++ class. + */ + +#include "unicode/utypes.h" +#include "unicode/uset.h" + +/** + * Constants for normalization modes. + * For details about standard Unicode normalization forms + * and about the algorithms which are also used with custom mapping tables + * see http://www.unicode.org/unicode/reports/tr15/ + * @draft ICU 4.4 + */ +typedef enum { + /** + * Decomposition followed by composition. + * Same as standard NFC when using an "nfc" instance. + * Same as standard NFKC when using an "nfkc" instance. + * For details about standard Unicode normalization forms + * see http://www.unicode.org/unicode/reports/tr15/ + * @draft ICU 4.4 + */ + UNORM2_COMPOSE, + /** + * Map, and reorder canonically. + * Same as standard NFD when using an "nfc" instance. + * Same as standard NFKD when using an "nfkc" instance. + * For details about standard Unicode normalization forms + * see http://www.unicode.org/unicode/reports/tr15/ + * @draft ICU 4.4 + */ + UNORM2_DECOMPOSE, + /** + * "Fast C or D" form. + * Further decomposition without reordering + * would yield the same form as DECOMPOSE. + * Text in "Fast C or D" form can be processed efficiently with data tables + * that are "canonically closed", that is, that provide equivalent data for + * equivalent text, without having to be fully normalized. + * Not a standard Unicode normalization form. + * Not a unique form: Different FCD strings can be canonically equivalent. + * For details see http://www.unicode.org/notes/tn5/#FCD + * @draft ICU 4.4 + */ + UNORM2_FCD, + /** + * Compose only contiguously. + * Also known as "FCC" or "Fast C Contiguous". + * The result will often but not always be in NFC. + * The result will conform to FCD which is useful for processing. + * Not a standard Unicode normalization form. + * For details see http://www.unicode.org/notes/tn5/#FCC + * @draft ICU 4.4 + */ + UNORM2_COMPOSE_CONTIGUOUS +} UNormalization2Mode; + +/** + * Result values for normalization quick check functions. + * For details see http://www.unicode.org/reports/tr15/#Detecting_Normalization_Forms + * @stable ICU 2.0 + */ +typedef enum UNormalizationCheckResult { + /** + * The input string is not in the normalization form. + * @stable ICU 2.0 + */ + UNORM_NO, + /** + * The input string is in the normalization form. + * @stable ICU 2.0 + */ + UNORM_YES, + /** + * The input string may or may not be in the normalization form. + * This value is only returned for composition forms like NFC and FCC, + * when a backward-combining character is found for which the surrounding text + * would have to be analyzed further. + * @stable ICU 2.0 + */ + UNORM_MAYBE +} UNormalizationCheckResult; + +/** + * Opaque C service object type for the new normalization API. + * @draft ICU 4.4 + */ +struct UNormalizer2; +typedef struct UNormalizer2 UNormalizer2; /**< C typedef for struct UNormalizer2. @draft ICU 4.4 */ + +#if !UCONFIG_NO_NORMALIZATION + +/** + * Returns a UNormalizer2 instance which uses the specified data file + * (packageName/name similar to ucnv_openPackage() and ures_open()/ResourceBundle) + * and which composes or decomposes text according to the specified mode. + * Returns an unmodifiable singleton instance. Do not delete it. + * + * Use packageName=NULL for data files that are part of ICU's own data. + * Use name="nfc" and UNORM2_COMPOSE/UNORM2_DECOMPOSE for Unicode standard NFC/NFD. + * Use name="nfkc" and UNORM2_COMPOSE/UNORM2_DECOMPOSE for Unicode standard NFKC/NFKD. + * Use name="nfkc_cf" and UNORM2_COMPOSE for Unicode standard NFKC_CF=NFKC_Casefold. + * + * @param packageName NULL for ICU built-in data, otherwise application data package name + * @param name "nfc" or "nfkc" or "nfkc_cf" or name of custom data file + * @param mode normalization mode (compose or decompose etc.) + * @param pErrorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return the requested UNormalizer2, if successful + * @draft ICU 4.4 + */ +U_DRAFT const UNormalizer2 * U_EXPORT2 +unorm2_getInstance(const char *packageName, + const char *name, + UNormalization2Mode mode, + UErrorCode *pErrorCode); + +/** + * Constructs a filtered normalizer wrapping any UNormalizer2 instance + * and a filter set. + * Both are aliased and must not be modified or deleted while this object + * is used. + * The filter set should be frozen; otherwise the performance will suffer greatly. + * @param norm2 wrapped Normalizer2 instance + * @param filterSet USet which determines the characters to be normalized + * @param pErrorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return the requested UNormalizer2, if successful + * @draft ICU 4.4 + */ +U_DRAFT UNormalizer2 * U_EXPORT2 +unorm2_openFiltered(const UNormalizer2 *norm2, const USet *filterSet, UErrorCode *pErrorCode); + +/** + * Closes a UNormalizer2 instance from unorm2_openFiltered(). + * Do not close instances from unorm2_getInstance()! + * @param norm2 UNormalizer2 instance to be closed + * @draft ICU 4.4 + */ +U_DRAFT void U_EXPORT2 +unorm2_close(UNormalizer2 *norm2); + +/** + * Writes the normalized form of the source string to the destination string + * (replacing its contents) and returns the length of the destination string. + * The source and destination strings must be different buffers. + * @param norm2 UNormalizer2 instance + * @param src source string + * @param length length of the source string, or -1 if NUL-terminated + * @param dest destination string; its contents is replaced with normalized src + * @param capacity number of UChars that can be written to dest + * @param pErrorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return dest + * @draft ICU 4.4 + */ +U_DRAFT int32_t U_EXPORT2 +unorm2_normalize(const UNormalizer2 *norm2, + const UChar *src, int32_t length, + UChar *dest, int32_t capacity, + UErrorCode *pErrorCode); +/** + * Appends the normalized form of the second string to the first string + * (merging them at the boundary) and returns the length of the first string. + * The result is normalized if the first string was normalized. + * The first and second strings must be different buffers. + * @param norm2 UNormalizer2 instance + * @param first string, should be normalized + * @param firstLength length of the first string, or -1 if NUL-terminated + * @param firstCapacity number of UChars that can be written to first + * @param second string, will be normalized + * @param secondLength length of the source string, or -1 if NUL-terminated + * @param pErrorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return first + * @draft ICU 4.4 + */ +U_DRAFT int32_t U_EXPORT2 +unorm2_normalizeSecondAndAppend(const UNormalizer2 *norm2, + UChar *first, int32_t firstLength, int32_t firstCapacity, + const UChar *second, int32_t secondLength, + UErrorCode *pErrorCode); +/** + * Appends the second string to the first string + * (merging them at the boundary) and returns the length of the first string. + * The result is normalized if both the strings were normalized. + * The first and second strings must be different buffers. + * @param norm2 UNormalizer2 instance + * @param first string, should be normalized + * @param firstLength length of the first string, or -1 if NUL-terminated + * @param firstCapacity number of UChars that can be written to first + * @param second string, should be normalized + * @param secondLength length of the source string, or -1 if NUL-terminated + * @param pErrorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return first + * @draft ICU 4.4 + */ +U_DRAFT int32_t U_EXPORT2 +unorm2_append(const UNormalizer2 *norm2, + UChar *first, int32_t firstLength, int32_t firstCapacity, + const UChar *second, int32_t secondLength, + UErrorCode *pErrorCode); + +/** + * Tests if the string is normalized. + * Internally, in cases where the quickCheck() method would return "maybe" + * (which is only possible for the two COMPOSE modes) this method + * resolves to "yes" or "no" to provide a definitive result, + * at the cost of doing more work in those cases. + * @param norm2 UNormalizer2 instance + * @param s input string + * @param length length of the string, or -1 if NUL-terminated + * @param pErrorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return TRUE if s is normalized + * @draft ICU 4.4 + */ +U_DRAFT UBool U_EXPORT2 +unorm2_isNormalized(const UNormalizer2 *norm2, + const UChar *s, int32_t length, + UErrorCode *pErrorCode); + +/** + * Tests if the string is normalized. + * For the two COMPOSE modes, the result could be "maybe" in cases that + * would take a little more work to resolve definitively. + * Use spanQuickCheckYes() and normalizeSecondAndAppend() for a faster + * combination of quick check + normalization, to avoid + * re-checking the "yes" prefix. + * @param norm2 UNormalizer2 instance + * @param s input string + * @param length length of the string, or -1 if NUL-terminated + * @param pErrorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return UNormalizationCheckResult + * @draft ICU 4.4 + */ +U_DRAFT UNormalizationCheckResult U_EXPORT2 +unorm2_quickCheck(const UNormalizer2 *norm2, + const UChar *s, int32_t length, + UErrorCode *pErrorCode); + +/** + * Returns the end of the normalized substring of the input string. + * In other words, with end=spanQuickCheckYes(s, ec); + * the substring UnicodeString(s, 0, end) + * will pass the quick check with a "yes" result. + * + * The returned end index is usually one or more characters before the + * "no" or "maybe" character: The end index is at a normalization boundary. + * (See the class documentation for more about normalization boundaries.) + * + * When the goal is a normalized string and most input strings are expected + * to be normalized already, then call this method, + * and if it returns a prefix shorter than the input string, + * copy that prefix and use normalizeSecondAndAppend() for the remainder. + * @param norm2 UNormalizer2 instance + * @param s input string + * @param length length of the string, or -1 if NUL-terminated + * @param pErrorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return UNormalizationCheckResult + * @draft ICU 4.4 + */ +U_DRAFT int32_t U_EXPORT2 +unorm2_spanQuickCheckYes(const UNormalizer2 *norm2, + const UChar *s, int32_t length, + UErrorCode *pErrorCode); + +/** + * Tests if the character has a normalization boundary before it. + * For details see the Normalizer2 base class documentation. + * @param norm2 UNormalizer2 instance + * @param c character to test + * @return TRUE if c has a normalization boundary before it + * @draft ICU 4.4 + */ +U_DRAFT UBool U_EXPORT2 +unorm2_hasBoundaryBefore(const UNormalizer2 *norm2, UChar32 c); + +/** + * Tests if the character has a normalization boundary after it. + * For details see the Normalizer2 base class documentation. + * @param norm2 UNormalizer2 instance + * @param c character to test + * @return TRUE if c has a normalization boundary after it + * @draft ICU 4.4 + */ +U_DRAFT UBool U_EXPORT2 +unorm2_hasBoundaryAfter(const UNormalizer2 *norm2, UChar32 c); + +/** + * Tests if the character is normalization-inert. + * For details see the Normalizer2 base class documentation. + * @param norm2 UNormalizer2 instance + * @param c character to test + * @return TRUE if c is normalization-inert + * @draft ICU 4.4 + */ +U_DRAFT UBool U_EXPORT2 +unorm2_isInert(const UNormalizer2 *norm2, UChar32 c); + +#endif /* !UCONFIG_NO_NORMALIZATION */ +#endif /* __UNORM2_H__ */ diff --git a/icu4c/source/common/uniset_props.cpp b/icu4c/source/common/uniset_props.cpp index 3a09d438a35..3e4779c05b9 100644 --- a/icu4c/source/common/uniset_props.cpp +++ b/icu4c/source/common/uniset_props.cpp @@ -1,7 +1,7 @@ /* ******************************************************************************* * -* Copyright (C) 1999-2009, International Business Machines +* Copyright (C) 1999-2010, International Business Machines * Corporation and others. All Rights Reserved. * ******************************************************************************* @@ -33,12 +33,15 @@ #include "uvector.h" #include "uprops.h" #include "propname.h" +#include "normalizer2impl.h" #include "unormimp.h" #include "ucase.h" #include "ubidi_props.h" #include "uinvchar.h" +#include "uprops.h" #include "charstr.h" #include "cstring.h" +#include "mutex.h" #include "umutex.h" #include "uassert.h" #include "hash.h" @@ -91,10 +94,43 @@ static const char ASSIGNED[] = "Assigned"; // [:^Cn:] */ //static const UChar CATEGORY_CLOSE[] = {COLON, SET_CLOSE, 0x0000}; /* ":]" */ +// Cached sets ------------------------------------------------------------- *** + +U_CDECL_BEGIN +static UBool U_CALLCONV uset_cleanup(); +U_CDECL_END + +// Not a TriStateSingletonWrapper because we think the UnicodeSet constructor +// can only fail with an out-of-memory error +// if we have a correct pattern and the properties data is hardcoded and always available. +class UnicodeSetSingleton : public SimpleSingletonWrapper { +public: + UnicodeSetSingleton(SimpleSingleton &s, const char *pattern) : + SimpleSingletonWrapper(s), fPattern(pattern) {} + UnicodeSet *getInstance(UErrorCode &errorCode) { + return SimpleSingletonWrapper::getInstance(createInstance, fPattern, errorCode); + } +private: + static void *createInstance(const void *context, UErrorCode &errorCode) { + UnicodeString pattern((const char *)context, -1, US_INV); + UnicodeSet *set=new UnicodeSet(pattern, errorCode); + if(set==NULL) { + errorCode=U_MEMORY_ALLOCATION_ERROR; + } + set->freeze(); + ucln_common_registerCleanup(UCLN_COMMON_USET, uset_cleanup); + return set; + } + + const char *fPattern; +}; + U_CDECL_BEGIN static UnicodeSet *INCLUSIONS[UPROPS_SRC_COUNT] = { NULL }; // cached getInclusions() +STATIC_SIMPLE_SINGLETON(uni32Singleton); + //---------------------------------------------------------------- // Inclusions list //---------------------------------------------------------------- @@ -128,7 +164,7 @@ static UBool U_CALLCONV uset_cleanup(void) { INCLUSIONS[i] = NULL; } } - + UnicodeSetSingleton(uni32Singleton, NULL).deleteInstance(); return TRUE; } @@ -177,6 +213,27 @@ const UnicodeSet* UnicodeSet::getInclusions(int32_t src, UErrorCode &status) { ucase_addPropertyStarts(ucase_getSingleton(&status), &sa, &status); unorm_addPropertyStarts(&sa, &status); break; + case UPROPS_SRC_NFC: { + const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(status); + if(U_SUCCESS(status)) { + impl->addPropertyStarts(&sa, status); + } + break; + } + case UPROPS_SRC_NFKC: { + const Normalizer2Impl *impl=Normalizer2Factory::getNFKCImpl(status); + if(U_SUCCESS(status)) { + impl->addPropertyStarts(&sa, status); + } + break; + } + case UPROPS_SRC_NFKC_CF: { + const Normalizer2Impl *impl=Normalizer2Factory::getNFKC_CFImpl(status); + if(U_SUCCESS(status)) { + impl->addPropertyStarts(&sa, status); + } + break; + } #endif case UPROPS_SRC_CASE: ucase_addPropertyStarts(ucase_getSingleton(&status), &sa, &status); @@ -207,6 +264,13 @@ const UnicodeSet* UnicodeSet::getInclusions(int32_t src, UErrorCode &status) { return INCLUSIONS[src]; } +// Cache some sets for other services -------------------------------------- *** + +U_CFUNC UnicodeSet * +uniset_getUnicode32Instance(UErrorCode &errorCode) { + return UnicodeSetSingleton(uni32Singleton, "[:age=3.2:]").getInstance(errorCode); +} + // helper functions for matching of pattern syntax pieces ------------------ *** // these functions are parallel to the PERL_OPEN etc. strings above diff --git a/icu4c/source/common/unistr.cpp b/icu4c/source/common/unistr.cpp index e79b9dfb201..d5b2e87f552 100644 --- a/icu4c/source/common/unistr.cpp +++ b/icu4c/source/common/unistr.cpp @@ -1,6 +1,6 @@ /* ****************************************************************************** -* Copyright (C) 1999-2009, International Business Machines Corporation and * +* Copyright (C) 1999-2010, International Business Machines Corporation and * * others. All Rights Reserved. * ****************************************************************************** * @@ -780,6 +780,17 @@ UnicodeString::extract(int32_t start, return u_terminateChars(target, targetCapacity, length, &status); } +UnicodeString +UnicodeString::tempSubString(int32_t start, int32_t len) const { + pinIndices(start, len); + const UChar *array = getBuffer(); // not getArrayStart() to check kIsBogus & kOpenGetBuffer + if(array==NULL) { + array=fUnion.fStackBuffer; // anything not NULL because that would make an empty string + len=-2; // bogus result string + } + return UnicodeString(FALSE, array + start, len); +} + int32_t UnicodeString::toUTF8(int32_t start, int32_t len, char *target, int32_t capacity) const { @@ -1218,6 +1229,28 @@ UnicodeString::doReplace(int32_t start, return *this; } + int32_t oldLength = this->length(); + + // optimize (read-only alias).remove(0, start) and .remove(start, end) + if((fFlags&kBufferIsReadonly) && srcLength == 0) { + if(start == 0) { + // remove prefix by adjusting the array pointer + pinIndex(length); + fUnion.fFields.fArray += length; + fUnion.fFields.fCapacity -= length; + setLength(oldLength - length); + return *this; + } else { + pinIndex(start); + if(length >= (oldLength - start)) { + // remove suffix by reducing the length (like truncate()) + setLength(start); + fUnion.fFields.fCapacity = start; // not NUL-terminated any more + return *this; + } + } + } + if(srcChars == 0) { srcStart = srcLength = 0; } else if(srcLength < 0) { @@ -1225,8 +1258,6 @@ UnicodeString::doReplace(int32_t start, srcLength = u_strlen(srcChars + srcStart); } - int32_t oldLength = this->length(); - // calculate the size of the string after the replace int32_t newSize; @@ -1594,4 +1625,3 @@ static void uprv_UnicodeStringDummy(void) { delete [] (new UnicodeString[2]); } #endif - diff --git a/icu4c/source/common/unorm.cpp b/icu4c/source/common/unorm.cpp index 2eb17b4b33d..890c9809cce 100644 --- a/icu4c/source/common/unorm.cpp +++ b/icu4c/source/common/unorm.cpp @@ -1,6 +1,6 @@ /* ****************************************************************************** -* Copyright (c) 1996-2009, International Business Machines +* Copyright (c) 1996-2010, International Business Machines * Corporation and others. All Rights Reserved. ****************************************************************************** * File unorm.cpp @@ -20,6 +20,7 @@ * instead of just wrappers around normlzr.cpp, * load unorm.dat, support Unicode 3.1 with * supplementary code points, etc. +* 2009-nov..2010-jan Markus Scherer total rewrite, new Normalizer2 API & code */ #include "unicode/utypes.h" @@ -30,52 +31,17 @@ #include "unicode/uchar.h" #include "unicode/ustring.h" #include "unicode/uiter.h" -#include "unicode/uniset.h" -#include "unicode/usetiter.h" #include "unicode/unorm.h" +#include "normalizer2impl.h" #include "ucln_cmn.h" #include "unormimp.h" -#include "ucase.h" +#include "uprops.h" #include "cmemory.h" #include "umutex.h" #include "utrie2.h" #include "unicode/uset.h" -#include "udataswp.h" #include "putilimp.h" -/* - * Status of tailored normalization - * - * This was done initially for investigation on Unicode public review issue 7 - * (http://www.unicode.org/review/). See Jitterbug 2481. - * While the UTC at meeting #94 (2003mar) did not take up the issue, this is - * a permanent feature in ICU 2.6 in support of IDNA which requires true - * Unicode 3.2 normalization. - * (NormalizationCorrections are rolled into IDNA mapping tables.) - * - * Tailored normalization as implemented here allows to "normalize less" - * than full Unicode normalization would. - * Based internally on a UnicodeSet of code points that are - * "excluded from normalization", the normalization functions leave those - * code points alone ("inert"). This means that tailored normalization - * still transforms text into a canonically equivalent form. - * It does not add decompositions to code points that do not have any or - * change decomposition results. - * - * Any function that searches for a safe boundary has not been touched, - * which means that these functions will be over-pessimistic when - * exclusions are applied. - * This should not matter because subsequent checks and normalizations - * do apply the exclusions; only a little more of the text may be processed - * than necessary under exclusions. - * - * Normalization exclusions have the following effect on excluded code points c: - * - c is not decomposed - * - c is not a composition target - * - c does not combine forward or backward for composition - * except that this is not implemented for Jamo - * - c is treated as having a combining class of 0 - */ #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) U_NAMESPACE_USE @@ -86,87 +52,6 @@ U_NAMESPACE_USE * The format of that file is described in unormimp.h . */ -/* -------------------------------------------------------------------------- */ - -enum { - _STACK_BUFFER_CAPACITY=100 -}; - -/* - * Constants for the bit fields in the options bit set parameter. - * These need not be public. - * A user only needs to know the currently assigned values. - * The number and positions of reserved bits per field can remain private - * and may change in future implementations. - */ -enum { - _NORM_OPTIONS_NX_MASK=0x1f, - _NORM_OPTIONS_UNICODE_MASK=0x60, - _NORM_OPTIONS_SETS_MASK=0x7f, - - _NORM_OPTIONS_UNICODE_SHIFT=5, - - /* - * The following options are used only in some composition functions. - * They use bits 12 and up to preserve lower bits for the available options - * space in unorm_compare() - - * see documentation for UNORM_COMPARE_NORM_OPTIONS_SHIFT. - */ - - /** Options bit 12, for compatibility vs. canonical decomposition. */ - _NORM_OPTIONS_COMPAT=0x1000, - /** Options bit 13, no discontiguous composition (FCC vs. NFC). */ - _NORM_OPTIONS_COMPOSE_CONTIGUOUS=0x2000 -}; - -U_CDECL_BEGIN -static inline UBool -isHangulWithoutJamoT(UChar c) { - c-=HANGUL_BASE; - return c=_NORM_MIN_HANGUL; -} - -/* - * Given isNorm32HangulOrJamo(), - * is this a Hangul syllable or a Jamo? - */ -/*static inline UBool -isHangulJamoNorm32HangulOrJamoL(uint32_t norm32) { - return norm32<_NORM_MIN_JAMO_V; -}*/ - -/* - * Given norm32 for Jamo V or T, - * is this a Jamo V? - */ -static inline UBool -isJamoVTNorm32JamoV(uint32_t norm32) { - return norm32<_NORM_JAMO_V_TOP; -} -U_CDECL_END - /* load unorm.dat ----------------------------------------------------------- */ #define UNORM_HARDCODE_DATA 1 @@ -205,15 +90,10 @@ static UVersionInfo dataVersion={ 0, 0, 0, 0 }; #endif -/* cache UnicodeSets for each combination of exclusion flags */ -static UnicodeSet *nxCache[_NORM_OPTIONS_SETS_MASK+1]={ NULL }; - U_CDECL_BEGIN static UBool U_CALLCONV unorm_cleanup(void) { - int32_t i; - #if !UNORM_HARDCODE_DATA if(normData!=NULL) { udata_close(normData); @@ -223,13 +103,6 @@ unorm_cleanup(void) { haveNormData=0; #endif - for(i=0; i<(int32_t)LENGTHOF(nxCache); ++i) { - if (nxCache[i]) { - delete nxCache[i]; - nxCache[i] = 0; - } - } - return TRUE; } @@ -397,562 +270,7 @@ unorm_haveData(UErrorCode *pErrorCode) { return _haveData(*pErrorCode); } -U_CAPI const uint16_t * U_EXPORT2 -unorm_getFCDTrieIndex(UChar32 &fcdHighStart, UErrorCode *pErrorCode) { - if(_haveData(*pErrorCode)) { - fcdHighStart=fcdTrie.highStart; - return fcdTrie.index; - } else { - return NULL; - } -} - -/* data access primitives --------------------------------------------------- */ - -static inline uint32_t -_getNorm32(UChar c) { - return UTRIE2_GET32_FROM_U16_SINGLE_LEAD(&normTrie, c); -} - -static inline uint32_t -_getNorm32FromSurrogatePair(UChar c, UChar c2) { - UChar32 cp=U16_GET_SUPPLEMENTARY(c, c2); - return UTRIE2_GET32_FROM_SUPP(&normTrie, cp); -} - -/* - * get a norm32 from text with complete code points - * (like from decompositions) - */ -static inline uint32_t -_getNorm32(const UChar *p, uint32_t mask) { - UChar c=*p; - uint32_t norm32=_getNorm32(c); - if((norm32&mask) && U16_IS_LEAD(c)) { - /* c is a lead surrogate, get the real norm32 */ - norm32=_getNorm32FromSurrogatePair(c, *(p+1)); - } - return norm32; -} - -static inline uint16_t -_getFCD16(UChar c) { - return UTRIE2_GET16_FROM_U16_SINGLE_LEAD(&fcdTrie, c); -} - -static inline uint16_t -_getFCD16FromSurrogatePair(UChar c, UChar c2) { - UChar32 cp=U16_GET_SUPPLEMENTARY(c, c2); - return UTRIE2_GET16_FROM_SUPP(&fcdTrie, cp); -} - -static inline const uint16_t * -_getExtraData(uint32_t norm32) { - return extraData+(norm32>>_NORM_EXTRA_SHIFT); -} - -/* - * TODO(markus): Revisit if it makes sense for functions like _getNextCC() - * and their call sites, and a fair bit of other code here, to work with UTF-16 code units, - * or whether code simplification would suggest just using UChar32 and maybe UTRIE2_NEXT32(). - */ - -#if 0 -/* - * It is possible to get the FCD data from the main trie if unorm.icu - * was built without the FCD trie, although it is slower. - * This is not implemented because it is hard to test, and because it seems - * unusual to want to use FCD and not build the data file for it. - * - * Untested sample code: - */ -static inline uint16_t -_getFCD16FromNormData(UChar32 c) { - uint32_t norm32, fcd; - - norm32=_getNorm32(c); - if((norm32&_NORM_QC_NFD) && isNorm32Regular(norm32)) { - /* get the lead/trail cc from the decomposition data */ - const uint16_t *nfd=_getExtraData(norm32); - if(*nfd&_NORM_DECOMP_FLAG_LENGTH_HAS_CC) { - fcd=nfd[1]; - } - } else { - fcd=norm32&_NORM_CC_MASK; - if(fcd!=0) { - /* use the code point cc value for both lead and trail cc's */ - fcd|=fcd>>_NORM_CC_SHIFT; /* assume that the cc is in bits 15..8 */ - } - } - - return (uint16_t)fcd; -} -#endif - -/* normalization exclusion sets --------------------------------------------- */ - -/* - * Normalization exclusion UnicodeSets are used for tailored normalization; - * see the comment near the beginning of this file. - * - * By specifying one or several sets of code points, - * those code points become inert for normalization. - */ - -static const UnicodeSet * -internalGetNXHangul(UErrorCode &errorCode) { - /* internal function, does not check for incoming U_FAILURE */ - UBool isCached; - - UMTX_CHECK(NULL, (UBool)(nxCache[UNORM_NX_HANGUL]!=NULL), isCached); - - if(!isCached) { - UnicodeSet *set=new UnicodeSet(0xac00, 0xd7a3); - if(set==NULL) { - errorCode=U_MEMORY_ALLOCATION_ERROR; - return NULL; - } - // Compact the set for caching. - set->compact(); - - umtx_lock(NULL); - if(nxCache[UNORM_NX_HANGUL]==NULL) { - nxCache[UNORM_NX_HANGUL]=set; - set=NULL; - ucln_common_registerCleanup(UCLN_COMMON_UNORM, unorm_cleanup); - } - umtx_unlock(NULL); - - delete set; - } - - return nxCache[UNORM_NX_HANGUL]; -} - -/* unorm.cpp 1.116 had and used -static const UnicodeSet * -internalGetNXFromPattern(int32_t options, const char *pattern, UErrorCode &errorCode) { - ... -} -*/ - -/* get and set an exclusion set from a serialized UnicodeSet */ -static const UnicodeSet * -internalGetSerializedNX(int32_t options, int32_t nxIndex, UErrorCode &errorCode) { - /* internal function, does not check for incoming U_FAILURE */ - UBool isCached; - - UMTX_CHECK(NULL, (UBool)(nxCache[options]!=NULL), isCached); - - if( !isCached && - canonStartSets!=NULL && - canonStartSets[nxIndex]!=0 && canonStartSets[nxIndex+1]>canonStartSets[nxIndex] - ) { - USerializedSet sset; - UnicodeSet *set; - UChar32 start, end; - int32_t i; - - if( !uset_getSerializedSet( - &sset, - canonStartSets+canonStartSets[nxIndex], - canonStartSets[nxIndex+1]-canonStartSets[nxIndex]) - ) { - errorCode=U_INVALID_FORMAT_ERROR; - return NULL; - } - - /* turn the serialized set into a UnicodeSet */ - set=new UnicodeSet(); - if(set==NULL) { - errorCode=U_MEMORY_ALLOCATION_ERROR; - return NULL; - } - for(i=0; uset_getSerializedRange(&sset, i, &start, &end); ++i) { - set->add(start, end); - } - // Compact the set for caching. - set->compact(); - - umtx_lock(NULL); - if(nxCache[options]==NULL) { - nxCache[options]=set; - set=NULL; - ucln_common_registerCleanup(UCLN_COMMON_UNORM, unorm_cleanup); - } - umtx_unlock(NULL); - - delete set; - } - - return nxCache[options]; -} - -static const UnicodeSet * -internalGetNXCJKCompat(UErrorCode &errorCode) { - /* build a set from [[:Ideographic:]&[:NFD_QC=No:]]=[CJK Ideographs]&[has canonical decomposition] */ - return internalGetSerializedNX( - UNORM_NX_CJK_COMPAT, - _NORM_SET_INDEX_NX_CJK_COMPAT_OFFSET, - errorCode); -} - -static const UnicodeSet * -internalGetNXUnicode(uint32_t options, UErrorCode &errorCode) { - /* internal function, does not check for incoming U_FAILURE */ - int32_t nxIndex; - - options&=_NORM_OPTIONS_UNICODE_MASK; - switch(options) { - case 0: - return NULL; - case UNORM_UNICODE_3_2: - /* [:^Age=3.2:] */ - nxIndex=_NORM_SET_INDEX_NX_UNICODE32_OFFSET; - break; - default: - errorCode=U_ILLEGAL_ARGUMENT_ERROR; - return NULL; - } - - /* build a set with all code points that were not designated by the specified Unicode version */ - return internalGetSerializedNX(options, nxIndex, errorCode); -} - -/* Get a decomposition exclusion set. The data must be loaded. */ -static const UnicodeSet * -internalGetNX(int32_t options, UErrorCode &errorCode) { - options&=_NORM_OPTIONS_SETS_MASK; - - UBool isCached; - - UMTX_CHECK(NULL, (UBool)(nxCache[options]!=NULL), isCached); - - if(!isCached) { - /* return basic sets */ - if(options==UNORM_NX_HANGUL) { - return internalGetNXHangul(errorCode); - } - if(options==UNORM_NX_CJK_COMPAT) { - return internalGetNXCJKCompat(errorCode); - } - if((options&_NORM_OPTIONS_UNICODE_MASK)!=0 && (options&_NORM_OPTIONS_NX_MASK)==0) { - return internalGetNXUnicode(options, errorCode); - } - - /* build a set from multiple subsets */ - UnicodeSet *set; - const UnicodeSet *other; - - set=new UnicodeSet(); - if(set==NULL) { - errorCode=U_MEMORY_ALLOCATION_ERROR; - return NULL; - } - - if((options&UNORM_NX_HANGUL)!=0 && NULL!=(other=internalGetNXHangul(errorCode))) { - set->addAll(*other); - } - if((options&UNORM_NX_CJK_COMPAT)!=0 && NULL!=(other=internalGetNXCJKCompat(errorCode))) { - set->addAll(*other); - } - if((options&_NORM_OPTIONS_UNICODE_MASK)!=0 && NULL!=(other=internalGetNXUnicode(options, errorCode))) { - set->addAll(*other); - } - - if(U_FAILURE(errorCode)) { - delete set; - return NULL; - } - // Compact the set for caching. - set->compact(); - - umtx_lock(NULL); - if(nxCache[options]==NULL) { - nxCache[options]=set; - set=NULL; - ucln_common_registerCleanup(UCLN_COMMON_UNORM, unorm_cleanup); - } - umtx_unlock(NULL); - - delete set; - } - - return nxCache[options]; -} - -static inline const UnicodeSet * -getNX(int32_t options, UErrorCode &errorCode) { - if(U_FAILURE(errorCode) || (options&=_NORM_OPTIONS_SETS_MASK)==0) { - /* incoming failure, or no decomposition exclusions requested */ - return NULL; - } else { - return internalGetNX(options, errorCode); - } -} - -U_CFUNC const UnicodeSet * -unorm_getNX(int32_t options, UErrorCode *pErrorCode) { - return getNX(options, *pErrorCode); -} - -static inline UBool -nx_contains(const UnicodeSet *nx, UChar32 c) { - return nx!=NULL && nx->contains(c); -} - -static inline UBool -nx_contains(const UnicodeSet *nx, UChar c, UChar c2) { - return nx!=NULL && nx->contains(c2==0 ? c : U16_GET_SUPPLEMENTARY(c, c2)); -} - -/* other normalization primitives ------------------------------------------- */ - -/* get the canonical or compatibility decomposition for one character */ -static inline const UChar * -_decompose(uint32_t norm32, uint32_t qcMask, int32_t &length, - uint8_t &cc, uint8_t &trailCC) { - const UChar *p=(const UChar *)_getExtraData(norm32); - length=*p++; - - if((norm32&qcMask&_NORM_QC_NFKD)!=0 && length>=0x100) { - /* use compatibility decomposition, skip canonical data */ - p+=((length>>7)&1)+(length&_NORM_DECOMP_LENGTH_MASK); - length>>=8; - } - - if(length&_NORM_DECOMP_FLAG_LENGTH_HAS_CC) { - /* get the lead and trail cc's */ - UChar bothCCs=*p++; - cc=(uint8_t)(bothCCs>>8); - trailCC=(uint8_t)bothCCs; - } else { - /* lead and trail cc's are both 0 */ - cc=trailCC=0; - } - - length&=_NORM_DECOMP_LENGTH_MASK; - return p; -} - -/* get the canonical decomposition for one character */ -static inline const UChar * -_decompose(uint32_t norm32, int32_t &length, - uint8_t &cc, uint8_t &trailCC) { - const UChar *p=(const UChar *)_getExtraData(norm32); - length=*p++; - - if(length&_NORM_DECOMP_FLAG_LENGTH_HAS_CC) { - /* get the lead and trail cc's */ - UChar bothCCs=*p++; - cc=(uint8_t)(bothCCs>>8); - trailCC=(uint8_t)bothCCs; - } else { - /* lead and trail cc's are both 0 */ - cc=trailCC=0; - } - - length&=_NORM_DECOMP_LENGTH_MASK; - return p; -} - -/** - * Get the canonical decomposition for one code point. - * @param c code point - * @param buffer out-only buffer for algorithmic decompositions of Hangul - * @param length out-only, takes the length of the decomposition, if any - * @return pointer to decomposition, or 0 if none - * @internal - */ -U_CFUNC const UChar * -unorm_getCanonicalDecomposition(UChar32 c, UChar buffer[4], int32_t *pLength) { - uint32_t norm32; - - if(c0) { - buffer[2]=(UChar)(JAMO_T_BASE+c2); - *pLength=3; - } else { - *pLength=2; - } - - buffer[1]=(UChar)(JAMO_V_BASE+c%JAMO_V_COUNT); - buffer[0]=(UChar)(JAMO_L_BASE+c/JAMO_V_COUNT); - return buffer; - } else { - /* normal decomposition */ - uint8_t cc, trailCC; - return _decompose(norm32, *pLength, cc, trailCC); - } - } else { - return 0; - } -} - -/* - * get the combining class of (c, c2)=*p++ - * before: p>_NORM_CC_SHIFT); -} - -/* - * read backwards and get norm32 - * return 0 if the character is >_NORM_CC_SHIFT); -} - -/* - * is this a safe boundary character for NF*D? - * (lead cc==0) - */ -static inline UBool -_isNFDSafe(uint32_t norm32, uint32_t ccOrQCMask, uint32_t decompQCMask) { - if((norm32&ccOrQCMask)==0) { - return TRUE; /* cc==0 and no decomposition: this is NF*D safe */ - } - - /* inspect its decomposition - maybe a Hangul but not a surrogate here */ - if(isNorm32Regular(norm32) && (norm32&decompQCMask)!=0) { - int32_t length; - uint8_t cc, trailCC; - - /* decomposes, get everything from the variable-length extra data */ - _decompose(norm32, decompQCMask, length, cc, trailCC); - return cc==0; - } else { - /* no decomposition (or Hangul), test the cc directly */ - return (norm32&_NORM_CC_MASK)==0; - } -} - -/* - * is this (or does its decomposition begin with) a "true starter"? - * (cc==0 and NF*C_YES) - */ -static inline UBool -_isTrueStarter(uint32_t norm32, uint32_t ccOrQCMask, uint32_t decompQCMask) { - if((norm32&ccOrQCMask)==0) { - return TRUE; /* this is a true starter (could be Hangul or Jamo L) */ - } - - /* inspect its decomposition - not a Hangul or a surrogate here */ - if((norm32&decompQCMask)!=0) { - const UChar *p; - int32_t length; - uint8_t cc, trailCC; - - /* decomposes, get everything from the variable-length extra data */ - p=_decompose(norm32, decompQCMask, length, cc, trailCC); - if(cc==0) { - uint32_t qcMask=ccOrQCMask&_NORM_QC_MASK; - - /* does it begin with NFC_YES? */ - if((_getNorm32(p, qcMask)&qcMask)==0) { - /* yes, the decomposition begins with a true starter */ - return TRUE; - } - } - } - return FALSE; -} - -/* uchar.h */ -U_CAPI uint8_t U_EXPORT2 -u_getCombiningClass(UChar32 c) { -#if !UNORM_HARDCODE_DATA - UErrorCode errorCode=U_ZERO_ERROR; - if(_haveData(errorCode)) { -#endif - uint32_t norm32=UTRIE2_GET32(&normTrie, c); - return (uint8_t)(norm32>>_NORM_CC_SHIFT); -#if !UNORM_HARDCODE_DATA - } else { - return 0; - } -#endif -} - -U_CFUNC UBool U_EXPORT2 -unorm_internalIsFullCompositionExclusion(UChar32 c) { -#if UNORM_HARDCODE_DATA - if(auxTrie.index!=NULL) { -#else - UErrorCode errorCode=U_ZERO_ERROR; - if(_haveData(errorCode) && auxTrie.index!=NULL) { -#endif - uint16_t aux=UTRIE2_GET16(&auxTrie, c); - return (UBool)((aux&_NORM_AUX_COMP_EX_MASK)!=0); - } else { - return FALSE; - } -} +/* normalization properties ------------------------------------------------- */ U_CFUNC UBool U_EXPORT2 unorm_isCanonSafeStart(UChar32 c) { @@ -1113,76 +431,6 @@ u_getFC_NFKC_Closure(UChar32 c, UChar *dest, int32_t destCapacity, UErrorCode *p } } -/* Is c an NF-skippable code point? See unormimp.h. */ -U_CAPI UBool U_EXPORT2 -unorm_isNFSkippable(UChar32 c, UNormalizationMode mode) { - uint32_t norm32, mask; - uint16_t aux; - -#if !UNORM_HARDCODE_DATA - UErrorCode errorCode=U_ZERO_ERROR; - if(!_haveData(errorCode)) { - return FALSE; - } -#endif - - /* handle trivial cases; set the comparison mask for the normal ones */ - switch(mode) { - case UNORM_NONE: - return TRUE; - case UNORM_NFD: - mask=_NORM_CC_MASK|_NORM_QC_NFD; - break; - case UNORM_NFKD: - mask=_NORM_CC_MASK|_NORM_QC_NFKD; - break; - case UNORM_NFC: - /* case UNORM_FCC: */ - mask=_NORM_CC_MASK|_NORM_COMBINES_ANY|(_NORM_QC_NFC&_NORM_QC_ANY_NO); - break; - case UNORM_NFKC: - mask=_NORM_CC_MASK|_NORM_COMBINES_ANY|(_NORM_QC_NFKC&_NORM_QC_ANY_NO); - break; - case UNORM_FCD: - /* FCD: skippable if lead cc==0 and trail cc<=1 */ - return fcdTrie.index!=NULL && UTRIE2_GET16(&fcdTrie, c)<=1; - default: - return FALSE; - } - - /* check conditions (a)..(e), see unormimp.h */ - norm32=UTRIE2_GET32(&normTrie, c); - if((norm32&mask)!=0) { - return FALSE; /* fails (a)..(e), not skippable */ - } - - if(modeadd(sa->set, c); sa->add(sa->set, c+1); } - sa->add(sa->set, HANGUL_BASE+HANGUL_COUNT); /* add Hangul+1 to continue with other properties */ -} - -U_CFUNC UNormalizationCheckResult U_EXPORT2 -unorm_getQuickCheck(UChar32 c, UNormalizationMode mode) { - static const uint32_t qcMask[UNORM_MODE_COUNT]={ - 0, 0, _NORM_QC_NFD, _NORM_QC_NFKD, _NORM_QC_NFC, _NORM_QC_NFKC - }; - - uint32_t norm32; - -#if !UNORM_HARDCODE_DATA - UErrorCode errorCode=U_ZERO_ERROR; - if(!_haveData(errorCode)) { - return UNORM_YES; - } -#endif - - norm32=UTRIE2_GET32(&normTrie, c); - norm32&=qcMask[mode]; - - if(norm32==0) { - return UNORM_YES; - } else if(norm32&_NORM_QC_ANY_NO) { - return UNORM_NO; - } else /* _NORM_QC_ANY_MAYBE */ { - return UNORM_MAYBE; - } -} - -U_CFUNC uint16_t U_EXPORT2 -unorm_getFCD16FromCodePoint(UChar32 c) { -#if !UNORM_HARDCODE_DATA - UErrorCode errorCode; - errorCode=U_ZERO_ERROR; -#endif - - if( -#if !UNORM_HARDCODE_DATA - !_haveData(errorCode) || -#endif - fcdTrie.index==NULL - ) { - return 0; - } - return UTRIE2_GET16(&fcdTrie, c); -} - -/* reorder UTF-16 in-place -------------------------------------------------- */ - -/* - * simpler, single-character version of _mergeOrdered() - - * bubble-insert one single code point into the preceding string - * which is already canonically ordered - * (c, c2) may or may not yet have been inserted at [current..p[ - * - * it must be p=current+lengthof(c, c2) i.e. p=current+(c2==0 ? 1 : 2) - * - * before: [start..current[ is already ordered, and - * [current..p[ may or may not hold (c, c2) but - * must be exactly the same length as (c, c2) - * after: [start..p[ is ordered - * - * returns the trailing combining class - */ -static uint8_t -_insertOrdered(const UChar *start, UChar *current, UChar *p, - UChar c, UChar c2, uint8_t cc) { - const UChar *pBack, *pPreBack; - UChar *r; - uint8_t prevCC, trailCC=cc; - - if(start=prevCC */ - pPreBack=pBack=current; - prevCC=_getPrevCC(start, pPreBack); - if(cc=prevCC) { - break; - } - pBack=pPreBack; - } - - /* - * this is where we are right now with all these pointers: - * [start..pPreBack[ 0..? code points that we can ignore - * [pPreBack..pBack[ 0..1 code points with prevCC<=cc - * [pBack..current[ 0..n code points with >cc, move up to insert (c, c2) - * [current..p[ 1 code point (c, c2) with cc - */ - - /* move the code units in between up */ - r=p; - do { - *--r=*--current; - } while(pBack!=current); - } - } - - /* insert (c, c2) */ - *current=c; - if(c2!=0) { - *(current+1)=c2; - } - - /* we know the cc of the last code point */ - return trailCC; -} - -/* - * merge two UTF-16 string parts together - * to canonically order (order by combining classes) their concatenation - * - * the two strings may already be adjacent, so that the merging is done in-place - * if the two strings are not adjacent, then the buffer holding the first one - * must be large enough - * the second string may or may not be ordered in itself - * - * before: [start..current[ is already ordered, and - * [next..limit[ may be ordered in itself, but - * is not in relation to [start..current[ - * after: [start..current+(limit-next)[ is ordered - * - * the algorithm is a simple bubble-sort that takes the characters from *next++ - * and inserts them in correct combining class order into the preceding part - * of the string - * - * since this function is called much less often than the single-code point - * _insertOrdered(), it just uses that for easier maintenance - * (see file version from before 2001aug31 for a more optimized version) - * - * returns the trailing combining class - */ -static uint8_t -_mergeOrdered(UChar *start, UChar *current, - const UChar *next, const UChar *limit, UBool isOrdered=TRUE) { - UChar *r; - UChar c, c2; - uint8_t cc, trailCC=0; - UBool adjacent; - - adjacent= current==next; - - if(start!=current || !isOrdered) { - while(next0) || destCapacity==0) - ) { - uint32_t norm32, qcMask; - UChar32 minNoMaybe; - int32_t length; - - /* initialize */ - if(!compat) { - minNoMaybe=(UChar32)indexes[_NORM_INDEX_MIN_NFD_NO_MAYBE]; - qcMask=_NORM_QC_NFD; - } else { - minNoMaybe=(UChar32)indexes[_NORM_INDEX_MIN_NFKD_NO_MAYBE]; - qcMask=_NORM_QC_NFKD; - } - - if(c0) { - dest[0]=(UChar)c; - } - return -1; - } - - /* data lookup */ - norm32=UTRIE2_GET32(&normTrie, c); - if((norm32&qcMask)==0) { - /* simple case: no decomposition */ - if(c<=0xffff) { - if(destCapacity>0) { - dest[0]=(UChar)c; - } - return -1; - } else { - if(destCapacity>=2) { - dest[0]=UTF16_LEAD(c); - dest[1]=UTF16_TRAIL(c); - } - return -2; - } - } else if(isNorm32HangulOrJamo(norm32)) { - /* Hangul syllable: decompose algorithmically */ - UChar c2; - - c-=HANGUL_BASE; - - c2=(UChar)(c%JAMO_T_COUNT); - c/=JAMO_T_COUNT; - if(c2>0) { - if(destCapacity>=3) { - dest[2]=(UChar)(JAMO_T_BASE+c2); - } - length=3; - } else { - length=2; - } - - if(destCapacity>=2) { - dest[1]=(UChar)(JAMO_V_BASE+c%JAMO_V_COUNT); - dest[0]=(UChar)(JAMO_L_BASE+c/JAMO_V_COUNT); - } - return length; - } else { - /* c decomposes, get everything from the variable-length extra data */ - const UChar *p, *limit; - uint8_t cc, trailCC; - - p=_decompose(norm32, qcMask, length, cc, trailCC); - if(length<=destCapacity) { - limit=p+length; - do { - *dest++=*p++; - } while(p=0) { - /* string with length */ - limit=src+srcLength; - } else /* srcLength==-1 */ { - /* zero-terminated string */ - limit=NULL; - } - - U_ALIGN_CODE(16); - - for(;;) { - /* count code units below the minimum or with irrelevant data for the quick check */ - prevSrc=src; - if(limit==NULL) { - while((c=*src)0) { - buffer[2]=(UChar)(JAMO_T_BASE+c2); - length=3; - } else { - length=2; - } - - buffer[1]=(UChar)(JAMO_V_BASE+c%JAMO_V_COUNT); - buffer[0]=(UChar)(JAMO_L_BASE+c/JAMO_V_COUNT); - } - } else { - if(isNorm32Regular(norm32)) { - c2=0; - length=1; - } else { - /* c is a lead surrogate, get the real norm32 */ - if(src!=limit && UTF_IS_SECOND_SURROGATE(c2=*src)) { - ++src; - length=2; - norm32=_getNorm32FromSurrogatePair(c, c2); - } else { - c2=0; - length=1; - norm32=0; - } - } - - /* get the decomposition and the lead and trail cc's */ - if(nx_contains(nx, c, c2)) { - /* excluded: norm32==0 */ - cc=trailCC=0; - p=NULL; - } else if((norm32&qcMask)==0) { - /* c does not decompose */ - cc=trailCC=(uint8_t)(norm32>>_NORM_CC_SHIFT); - p=NULL; - } else { - /* c decomposes, get everything from the variable-length extra data */ - p=_decompose(norm32, qcMask, length, cc, trailCC); - if(length==1) { - /* fastpath a single code unit from decomposition */ - c=*p; - c2=0; - p=NULL; - } - } - } - - /* append the decomposition to the destination buffer, assume length>0 */ - if((destIndex+length)<=destCapacity) { - UChar *reorderSplit=dest+destIndex; - if(p==NULL) { - /* fastpath: single code point */ - if(cc!=0 && cc0); - } - } - } else { - /* buffer overflow */ - /* keep incrementing the destIndex for preflighting */ - destIndex+=length; - } - - prevCC=trailCC; - if(prevCC==0) { - reorderStartIndex=destIndex; - } - } - - outTrailCC=prevCC; - return destIndex; -} - -U_CAPI int32_t U_EXPORT2 -unorm_decompose(UChar *dest, int32_t destCapacity, - const UChar *src, int32_t srcLength, - UBool compat, int32_t options, - UErrorCode *pErrorCode) { - const UnicodeSet *nx; - int32_t destIndex; - uint8_t trailCC; - - if(!_haveData(*pErrorCode)) { - return 0; - } - - nx=getNX(options, *pErrorCode); - if(U_FAILURE(*pErrorCode)) { - return 0; - } - - destIndex=_decompose(dest, destCapacity, - src, srcLength, - compat, nx, - trailCC); - - return u_terminateUChars(dest, destCapacity, destIndex, pErrorCode); -} - -/* make NFC & NFKC ---------------------------------------------------------- */ - -/* get the composition properties of the next character */ -static inline uint32_t -_getNextCombining(UChar *&p, const UChar *limit, - UChar &c, UChar &c2, - uint16_t &combiningIndex, uint8_t &cc, - const UnicodeSet *nx) { - uint32_t norm32, combineFlags; - - /* get properties */ - c=*p++; - norm32=_getNorm32(c); - - /* preset output values for most characters */ - c2=0; - combiningIndex=0; - cc=0; - - if((norm32&(_NORM_CC_MASK|_NORM_COMBINES_ANY))==0) { - return 0; - } else { - if(isNorm32Regular(norm32)) { - /* set cc etc. below */ - } else if(isNorm32HangulOrJamo(norm32)) { - /* a compatibility decomposition contained Jamos */ - combiningIndex=(uint16_t)(0xfff0|(norm32>>_NORM_EXTRA_SHIFT)); - return norm32&_NORM_COMBINES_ANY; - } else { - /* c is a lead surrogate, get the real norm32 */ - if(p!=limit && UTF_IS_SECOND_SURROGATE(c2=*p)) { - ++p; - norm32=_getNorm32FromSurrogatePair(c, c2); - } else { - c2=0; - return 0; - } - } - - if(nx_contains(nx, c, c2)) { - return 0; /* excluded: norm32==0 */ - } - - cc=(uint8_t)(norm32>>_NORM_CC_SHIFT); - - combineFlags=norm32&_NORM_COMBINES_ANY; - if(combineFlags!=0) { - combiningIndex=*(_getExtraData(norm32)-1); - } - return combineFlags; - } -} - -/* - * given a composition-result starter (c, c2) - which means its cc==0, - * it combines forward, it has extra data, its norm32!=0, - * it is not a Hangul or Jamo, - * get just its combineFwdIndex - * - * norm32(c) is special if and only if c2!=0 - */ -static inline uint16_t -_getCombiningIndexFromStarter(UChar c, UChar c2) { - uint32_t norm32; - - if(c2==0) { - norm32=_getNorm32(c); - } else { - norm32=_getNorm32FromSurrogatePair(c, c2); - } - return *(_getExtraData(norm32)-1); -} - -/* - * Find the recomposition result for - * a forward-combining character - * (specified with a pointer to its part of the combiningTable[]) - * and a backward-combining character - * (specified with its combineBackIndex). - * - * If these two characters combine, then set (value, value2) - * with the code unit(s) of the composition character. - * - * Return value: - * 0 do not combine - * 1 combine - * >1 combine, and the composition is a forward-combining starter - * - * See unormimp.h for a description of the composition table format. - */ -static inline uint16_t -_combine(const uint16_t *table, uint16_t combineBackIndex, - uint16_t &value, uint16_t &value2) { - uint16_t key; - - /* search in the starter's composition table */ - for(;;) { - key=*table++; - if(key>=combineBackIndex) { - break; - } - table+= *table&0x8000 ? 2 : 1; - } - - /* mask off bit 15, the last-entry-in-the-list flag */ - if((key&0x7fff)==combineBackIndex) { - /* found! combine! */ - value=*table; - - /* is the composition a starter that combines forward? */ - key=(uint16_t)((value&0x2000)+1); - - /* get the composition result code point from the variable-length result value */ - if(value&0x8000) { - if(value&0x4000) { - /* surrogate pair composition result */ - value=(uint16_t)((value&0x3ff)|0xd800); - value2=*(table+1); - } else { - /* BMP composition result U+2000..U+ffff */ - value=*(table+1); - value2=0; - } - } else { - /* BMP composition result U+0000..U+1fff */ - value&=0x1fff; - value2=0; - } - - return key; - } else { - /* not found */ - return 0; - } -} - -static inline UBool -_composeHangul(UChar prev, UChar c, uint32_t norm32, const UChar *&src, const UChar *limit, - UBool compat, UChar *dest, const UnicodeSet *nx) { - if(isJamoVTNorm32JamoV(norm32)) { - /* c is a Jamo V, compose with previous Jamo L and following Jamo T */ - prev=(UChar)(prev-JAMO_L_BASE); - if(prev=uchars.getCapacity() && NULL==uchars.resize(2*uchars.getCapacity(), length)) { - return FALSE; - } - uchars[length++]=c; - } - UChar *getAppendBuffer(int32_t minCapacity, - int32_t desiredCapacityHint, - int32_t &resultCapacity) { - int32_t capacity=uchars.getCapacity(); - int32_t restCapacity=capacity-length; - if(minCapacity>restCapacity) { - int32_t newCapacity=capacity+desiredCapacityHint; - int32_t doubleCapacity=2*capacity; - if(newCapacityuchars.getCapacity()) { - int32_t newCapacity=length+2*len; - int32_t doubleCapacity=2*uchars.getCapacity(); - if(newCapacity uchars; - int32_t length; -}; - -/* - * recompose the characters in the buffer - * (which is in NFD - decomposed and canonically ordered), - * and return the trailing cc - * - * since for NFKC we may get Jamos in decompositions, we need to - * recompose those too - * - * note that recomposition never lengthens the text: - * any character consists of either one or two code units; - * a composition may contain at most one more code unit than the original starter, - * while the combining mark that is removed has at least one code unit - */ -static uint8_t -_recompose(UCharBuffer &buffer, int32_t options, const UnicodeSet *nx) { - UChar *p; - UChar *limit; - UChar *starter, *pRemove, *q, *r; - uint32_t combineFlags; - UChar c, c2; - uint16_t combineFwdIndex, combineBackIndex; - uint16_t result, value, value2; - uint8_t cc, prevCC; - UBool starterIsSupplementary; - - p=buffer.getAlias(); - limit=buffer.getLimit(); - starter=NULL; /* no starter */ - combineFwdIndex=0; /* will not be used until starter!=NULL - avoid compiler warnings */ - combineBackIndex=0; /* will always be set if combineFlags!=0 - avoid compiler warnings */ - value=value2=0; /* always set by _combine() before used - avoid compiler warnings */ - starterIsSupplementary=FALSE; /* will not be used until starter!=NULL - avoid compiler warnings */ - prevCC=0; - - for(;;) { - combineFlags=_getNextCombining(p, limit, c, c2, combineBackIndex, cc, nx); - if( - // this character combines backward and - (combineFlags&_NORM_COMBINES_BACK) && - // we have seen a starter that combines forward and - starter!=NULL && - // the backward-combining character is not blocked - (prevCC1) { - combineFwdIndex=_getCombiningIndexFromStarter((UChar)value, (UChar)value2); - } else { - starter=NULL; - } - - /* we combined; continue with looking for compositions */ - continue; - } - } - - /* no combination this time */ - prevCC=cc; - if(p==limit) { - return prevCC; - } - - /* if (c, c2) did not combine, then check if it is a starter */ - if(cc==0) { - /* found a new starter; combineFlags==0 if (c, c2) is excluded */ - if(combineFlags&_NORM_COMBINES_FWD) { - /* it may combine with something, prepare for it */ - if(c2==0) { - starterIsSupplementary=FALSE; - starter=p-1; - } else { - starterIsSupplementary=TRUE; - starter=p-2; - } - combineFwdIndex=combineBackIndex; - } else { - /* it will not combine with anything */ - starter=NULL; - } - } else if(options&_NORM_OPTIONS_COMPOSE_CONTIGUOUS) { - /* FCC: no discontiguous compositions; any intervening character blocks */ - starter=NULL; - } - } -} - -/* decompose and recompose [prevStarter..src[ */ -static const UChar * -_composePart(UCharBuffer &buffer, - const UChar *prevStarter, const UChar *src, - uint8_t &prevCC, - int32_t options, const UnicodeSet *nx, - UErrorCode *pErrorCode) { - uint8_t trailCC; - UBool compat; - - compat=(UBool)((options&_NORM_OPTIONS_COMPAT)!=0); - - /* decompose [prevStarter..src[ */ - // TODO: change _decompose() to write to the UCharBuffer - int32_t capacity; - int32_t length=(int32_t)(src-prevStarter); - UChar *p=buffer.getAppendBuffer(length, 2*length, capacity); - if(p==NULL) { - *pErrorCode=U_MEMORY_ALLOCATION_ERROR; - return NULL; - } - length=_decompose(p, capacity, - prevStarter, (int32_t)(src-prevStarter), - compat, nx, - trailCC); - if(length>capacity) { - p=buffer.getAppendBuffer(length, 2*length, capacity); - if(p==NULL) { - *pErrorCode=U_MEMORY_ALLOCATION_ERROR; - return NULL; - } - length=_decompose(p, capacity, - prevStarter, (int32_t)(src-prevStarter), - compat, nx, - trailCC); - } - buffer.releaseAppendBuffer(length); - - /* recompose the decomposition */ - if(length>=2) { - prevCC=_recompose(buffer, options, nx); - } - - /* return with a pointer to the recomposition */ - return buffer; -} - -static int32_t -_compose(UChar *dest, int32_t destCapacity, - const UChar *src, int32_t srcLength, - int32_t options, const UnicodeSet *nx, - UErrorCode *pErrorCode) { - UCharBuffer buffer; - const UChar *limit, *prevSrc, *prevStarter; - uint32_t norm32, ccOrQCMask, qcMask; - int32_t destIndex, reorderStartIndex, length; - UChar c, c2, minNoMaybe; - uint8_t cc, prevCC; - - if(options&_NORM_OPTIONS_COMPAT) { - minNoMaybe=(UChar)indexes[_NORM_INDEX_MIN_NFKC_NO_MAYBE]; - qcMask=_NORM_QC_NFKC; - } else { - minNoMaybe=(UChar)indexes[_NORM_INDEX_MIN_NFC_NO_MAYBE]; - qcMask=_NORM_QC_NFC; - } - - /* initialize */ - - /* - * prevStarter points to the last character before the current one - * that is a "true" starter with cc==0 and quick check "yes". - * - * prevStarter will be used instead of looking for a true starter - * while incrementally decomposing [prevStarter..prevSrc[ - * in _composePart(). Having a good prevStarter allows to just decompose - * the entire [prevStarter..prevSrc[. - * - * When _composePart() backs out from prevSrc back to prevStarter, - * then it also backs out destIndex by the same amount. - * Therefore, at all times, the (prevSrc-prevStarter) source units - * must correspond 1:1 to destination units counted with destIndex, - * except for reordering. - * This is true for the qc "yes" characters copied in the fast loop, - * and for pure reordering. - * prevStarter must be set forward to src when this is not true: - * In _composePart() and after composing a Hangul syllable. - * - * This mechanism relies on the assumption that the decomposition of a true starter - * also begins with a true starter. gennorm/store.c checks for this. - */ - prevStarter=src; - - ccOrQCMask=_NORM_CC_MASK|qcMask; - destIndex=reorderStartIndex=0; - prevCC=0; - - /* avoid compiler warnings */ - norm32=0; - c=0; - - if(srcLength>=0) { - /* string with length */ - limit=src+srcLength; - } else /* srcLength==-1 */ { - /* zero-terminated string */ - limit=NULL; - } - - U_ALIGN_CODE(16); - - for(;;) { - /* count code units below the minimum or with irrelevant data for the quick check */ - prevSrc=src; - if(limit==NULL) { - while((c=*src)0 && - _composeHangul( - *(prevSrc-1), c, norm32, src, limit, (UBool)((options&_NORM_OPTIONS_COMPAT)!=0), - destIndex<=destCapacity ? dest+(destIndex-1) : 0, - nx) - ) { - prevStarter=src; - continue; - } - - /* the Jamo V/T did not compose into a Hangul syllable, just append to dest */ - c2=0; - length=1; - prevStarter=prevSrc; - } else { - if(isNorm32Regular(norm32)) { - c2=0; - length=1; - } else { - /* c is a lead surrogate, get the real norm32 */ - if(src!=limit && UTF_IS_SECOND_SURROGATE(c2=*src)) { - ++src; - length=2; - norm32=_getNorm32FromSurrogatePair(c, c2); - } else { - /* c is an unpaired lead surrogate, nothing to do */ - c2=0; - length=1; - norm32=0; - } - } - - /* we are looking at the character (c, c2) at [prevSrc..src[ */ - if(nx_contains(nx, c, c2)) { - /* excluded: norm32==0 */ - cc=0; - } else if((norm32&qcMask)==0) { - cc=(uint8_t)(norm32>>_NORM_CC_SHIFT); - } else { - const UChar *p; - uint32_t decompQCMask; - - /* - * find appropriate boundaries around this character, - * decompose the source text from between the boundaries, - * and recompose it - * - * this puts the intermediate text into the side buffer because - * it might be longer than the recomposition end result, - * or the destination buffer may be too short or missing - * - * note that destIndex may be adjusted backwards to account - * for source text that passed the quick check but needed to - * take part in the recomposition - */ - decompQCMask=(qcMask<<2)&0xf; /* decomposition quick check mask */ - - /* - * find the last true starter in [prevStarter..src[ - * it is either the decomposition of the current character (at prevSrc), - * or prevStarter - */ - if(_isTrueStarter(norm32, ccOrQCMask, decompQCMask)) { - prevStarter=prevSrc; - } else { - /* adjust destIndex: back out what had been copied with qc "yes" */ - destIndex-=(int32_t)(prevSrc-prevStarter); - } - - /* find the next true starter in [src..limit[ - modifies src to point to the next starter */ - src=_findNextStarter(src, limit, qcMask, decompQCMask, minNoMaybe); - - /* compose [prevStarter..src[ */ - buffer.setLength(0); - p=_composePart(buffer, /* output */ - prevStarter, src, - prevCC, /* output */ - options, nx, - pErrorCode); - - if(p==NULL) { - destIndex=0; /* an error occurred (out of memory) */ - break; - } - - /* append the recomposed buffer contents to the destination buffer */ - length=buffer.getLength(); - if((destIndex+length)<=destCapacity) { - while(length>0) { - dest[destIndex++]=*p++; - --length; - } - } else { - /* buffer overflow */ - /* keep incrementing the destIndex for preflighting */ - destIndex+=length; - } - - /* set the next starter */ - prevStarter=src; - - continue; - } - } - - /* append the single code point (c, c2) to the destination buffer */ - if((destIndex+length)<=destCapacity) { - if(cc!=0 && cc>_NORM_CC_SHIFT); - p=NULL; - } else { - /* c decomposes, get everything from the variable-length extra data */ - p=_decompose(norm32, length, cc, trailCC); - if(length==1) { - /* fastpath a single code unit from decomposition */ - c=*p; - c2=0; - p=NULL; - } - } - - /* append the decomposition to the destination buffer, assume length>0 */ - if((destIndex+length)<=destCapacity) { - UChar *reorderSplit=dest+destIndex; - if(p==NULL) { - /* fastpath: single code point */ - if(cc!=0 && cc0); - } - } - } else { - /* buffer overflow */ - /* keep incrementing the destIndex for preflighting */ - destIndex+=length; - } - - prevCC=trailCC; - if(prevCC==0) { - reorderStartIndex=destIndex; - } - } - - return prevCC; -} - -static int32_t -unorm_makeFCD(UChar *dest, int32_t destCapacity, - const UChar *src, int32_t srcLength, - const UnicodeSet *nx, - UErrorCode *pErrorCode) { - const UChar *limit, *prevSrc, *decompStart; - int32_t destIndex, length; - UChar c, c2; - uint16_t fcd16; - int16_t prevCC, cc; - - if(!_haveData(*pErrorCode)) { - return 0; - } - - /* initialize */ - decompStart=src; - destIndex=0; - prevCC=0; - - /* avoid compiler warnings */ - c=0; - fcd16=0; - - if(srcLength>=0) { - /* string with length */ - limit=src+srcLength; - } else /* srcLength==-1 */ { - /* zero-terminated string */ - limit=NULL; - } - - U_ALIGN_CODE(16); - - for(;;) { - /* skip a run of code units below the minimum or with irrelevant data for the FCD check */ - prevSrc=src; - if(limit==NULL) { - for(;;) { - c=*src; - if(c<_NORM_MIN_WITH_LEAD_CC) { - if(c==0) { - break; - } - prevCC=(int16_t)-c; - } else if((fcd16=_getFCD16(c))==0) { - prevCC=0; - } else { - break; - } - ++src; - } - } else { - for(;;) { - if(src==limit) { - break; - } else if((c=*src)<_NORM_MIN_WITH_LEAD_CC) { - prevCC=(int16_t)-c; - } else if((fcd16=_getFCD16(c))==0) { - prevCC=0; - } else { - break; - } - ++src; - } - } - - /* - * prevCC has values from the following ranges: - * 0..0xff - the previous trail combining class - * <0 - the negative value of the previous code unit; - * that code unit was <_NORM_MIN_WITH_LEAD_CC and its _getFCD16() - * was deferred so that average text is checked faster - */ - - /* copy these code units all at once */ - if(src!=prevSrc) { - length=(int32_t)(src-prevSrc); - if((destIndex+length)<=destCapacity) { - uprv_memcpy(dest+destIndex, prevSrc, length*U_SIZEOF_UCHAR); - } - destIndex+=length; - prevSrc=src; - - /* prevCC<0 is only possible from the above loop, i.e., only if prevSrc=0 - */ - - /* end of source reached? */ - if(limit==NULL ? c==0 : src==limit) { - break; - } - - /* set a pointer to after the last source position where prevCC==0 */ - if(prevCC==0) { - decompStart=prevSrc; - } - - /* c already contains *src and fcd16 is set for it, increment src */ - ++src; - - /* check one above-minimum, relevant code unit */ - if(UTF_IS_FIRST_SURROGATE(c)) { - /* c is a lead surrogate, get the real fcd16 */ - if(src!=limit && UTF_IS_SECOND_SURROGATE(c2=*src)) { - ++src; - fcd16=_getFCD16FromSurrogatePair(c, c2); - } else { - c2=0; - fcd16=0; - } - } else { - c2=0; - } - - /* we are looking at the character (c, c2) at [prevSrc..src[ */ - if(nx_contains(nx, c, c2)) { - fcd16=0; /* excluded: fcd16==0 */ - } - - /* check the combining order, get the lead cc */ - cc=(int16_t)(fcd16>>8); - if(cc==0 || cc>=prevCC) { - /* the order is ok */ - if(cc==0) { - decompStart=prevSrc; - } - prevCC=(int16_t)(fcd16&0xff); - - /* just append (c, c2) */ - length= c2==0 ? 1 : 2; - if((destIndex+length)<=destCapacity) { - dest[destIndex++]=c; - if(c2!=0) { - dest[destIndex++]=c2; - } - } else { - destIndex+=length; - } - } else { - /* - * back out the part of the source that we copied already but - * is now going to be decomposed; - * prevSrc is set to after what was copied - */ - destIndex-=(int32_t)(prevSrc-decompStart); - - /* - * find the part of the source that needs to be decomposed; - * to be safe and simple, decompose to before the next character with lead cc==0 - */ - src=_findSafeFCD(src, limit, fcd16); - - /* - * the source text does not fulfill the conditions for FCD; - * decompose and reorder a limited piece of the text - */ - prevCC=_decomposeFCD(decompStart, src, - dest, destIndex, destCapacity, - nx); - decompStart=src; - } - } - - return u_terminateUChars(dest, destCapacity, destIndex, pErrorCode); + sa->add(sa->set, Hangul::HANGUL_LIMIT); /* add Hangul+1 to continue with other properties */ } /* quick check functions ---------------------------------------------------- */ -static UBool -unorm_checkFCD(const UChar *src, int32_t srcLength, const UnicodeSet *nx) { - const UChar *limit; - UChar c, c2; - uint16_t fcd16; - int16_t prevCC, cc; - - /* initialize */ - prevCC=0; - - if(srcLength>=0) { - /* string with length */ - limit=src+srcLength; - } else /* srcLength==-1 */ { - /* zero-terminated string */ - limit=NULL; - } - - U_ALIGN_CODE(16); - - for(;;) { - /* skip a run of code units below the minimum or with irrelevant data for the FCD check */ - if(limit==NULL) { - for(;;) { - c=*src++; - if(c<_NORM_MIN_WITH_LEAD_CC) { - if(c==0) { - return TRUE; - } - /* - * delay _getFCD16(c) for any character <_NORM_MIN_WITH_LEAD_CC - * because chances are good that the next one will have - * a leading cc of 0; - * _getFCD16(-prevCC) is later called when necessary - - * -c fits into int16_t because it is <_NORM_MIN_WITH_LEAD_CC==0x300 - */ - prevCC=(int16_t)-c; - } else if((fcd16=_getFCD16(c))==0) { - prevCC=0; - } else { - break; - } - } - } else { - for(;;) { - if(src==limit) { - return TRUE; - } else if((c=*src++)<_NORM_MIN_WITH_LEAD_CC) { - prevCC=(int16_t)-c; - } else if((fcd16=_getFCD16(c))==0) { - prevCC=0; - } else { - break; - } - } - } - - /* check one above-minimum, relevant code unit */ - if(UTF_IS_FIRST_SURROGATE(c)) { - /* c is a lead surrogate, get the real fcd16 */ - if(src!=limit && UTF_IS_SECOND_SURROGATE(c2=*src)) { - ++src; - fcd16=_getFCD16FromSurrogatePair(c, c2); - } else { - c2=0; - fcd16=0; - } - } else { - c2=0; - } - - if(nx_contains(nx, c, c2)) { - prevCC=0; /* excluded: fcd16==0 */ - continue; - } - - /* - * prevCC has values from the following ranges: - * 0..0xff - the previous trail combining class - * <0 - the negative value of the previous code unit; - * that code unit was <_NORM_MIN_WITH_LEAD_CC and its _getFCD16() - * was deferred so that average text is checked faster - */ - - /* check the combining order */ - cc=(int16_t)(fcd16>>8); - if(cc!=0) { - if(prevCC<0) { - /* the previous character was <_NORM_MIN_WITH_LEAD_CC, we need to get its trail cc */ - if(!nx_contains(nx, (UChar32)-prevCC)) { - prevCC=(int16_t)(_getFCD16((UChar)-prevCC)&0xff); - } else { - prevCC=0; /* excluded: fcd16==0 */ - } - } - - if(cc=0) { - /* string with length */ - limit=src+srcLength; - } else /* srcLength==-1 */ { - /* zero-terminated string */ - limit=NULL; - } - - U_ALIGN_CODE(16); - - for(;;) { - /* skip a run of code units below the minimum or with irrelevant data for the quick check */ - if(limit==NULL) { - for(;;) { - c=*src++; - if(c=minNoMaybe && ((norm32=_getNorm32(c))&ccOrQCMask)!=0) { - break; - } - prevCC=0; - } - } - - /* check one above-minimum, relevant code unit */ - if(U16_IS_LEAD(c)) { - /* c is a lead surrogate, get the real norm32 */ - if(src!=limit && U16_IS_TRAIL(c2=*src)) { - ++src; - norm32=_getNorm32FromSurrogatePair(c, c2); - } else { - c2=0; - norm32=0; - } - } else { - c2=0; - } - - if(nx_contains(nx, c, c2)) { - /* excluded: norm32==0 */ - norm32=0; - } - - /* check the combining order */ - cc=(uint8_t)(norm32>>_NORM_CC_SHIFT); - if(cc!=0 && cc0 && srcLength<=destCapacity) { - uprv_memcpy(dest, src, srcLength*U_SIZEOF_UCHAR); - } - destLength=srcLength; - break; - default: - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - - return u_terminateUChars(dest, destCapacity, destLength, pErrorCode); -} - -/** - * Internal API for normalizing. - * Does not check for bad input. - * @internal - */ -U_CAPI int32_t U_EXPORT2 -unorm_internalNormalize(UChar *dest, int32_t destCapacity, - const UChar *src, int32_t srcLength, - UNormalizationMode mode, int32_t options, - UErrorCode *pErrorCode) { - const UnicodeSet *nx; - - if(!_haveData(*pErrorCode)) { - return 0; - } - - nx=getNX(options, *pErrorCode); - if(U_FAILURE(*pErrorCode)) { - return 0; - } - - /* reset options bits that should only be set inside unorm_internalNormalizeWithNX() */ - options&=~(_NORM_OPTIONS_SETS_MASK|_NORM_OPTIONS_COMPAT|_NORM_OPTIONS_COMPOSE_CONTIGUOUS); - - return unorm_internalNormalizeWithNX(dest, destCapacity, - src, srcLength, - mode, options, nx, - pErrorCode); -} - /** Public API for normalizing. */ U_CAPI int32_t U_EXPORT2 unorm_normalize(const UChar *src, int32_t srcLength, UNormalizationMode mode, int32_t options, UChar *dest, int32_t destCapacity, UErrorCode *pErrorCode) { - /* check argument values */ - if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { - return 0; + const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, *pErrorCode); + if(options&UNORM_UNICODE_3_2) { + FilteredNormalizer2 fn2(*n2, *uniset_getUnicode32Instance(*pErrorCode)); + return unorm2_normalize((const UNormalizer2 *)&fn2, + src, srcLength, dest, destCapacity, pErrorCode); + } else { + return unorm2_normalize((const UNormalizer2 *)n2, + src, srcLength, dest, destCapacity, pErrorCode); } - - if( destCapacity<0 || (dest==NULL && destCapacity>0) || - src==NULL || srcLength<-1 - ) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - - /* check for overlapping src and destination */ - if( dest!=NULL && - ((src>=dest && src<(dest+destCapacity)) || - (srcLength>0 && dest>=src && dest<(src+srcLength))) - ) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - - return unorm_internalNormalize(dest, destCapacity, - src, srcLength, - mode, options, - pErrorCode); } /* iteration functions ------------------------------------------------------ */ -/* - * These iteration functions are the core implementations of the - * Normalizer class iteration API. - * They read from a UCharIterator into their own buffer - * and normalize into the Normalizer iteration buffer. - * Normalizer itself then iterates over its buffer until that needs to be - * filled again. - */ - -/* - * ### TODO: - * Now that UCharIterator.next/previous return (int32_t)-1 not (UChar)0xffff - * if iteration bounds are reached, - * try to not call hasNext/hasPrevious and instead check for >=0. - */ - -/* backward iteration ------------------------------------------------------- */ - -/* - * read backwards and get norm32 - * return 0 if the character is 0) || + src==NULL + ) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } - /* initialize */ - stackBuffer=buffer; - startIndex=bufferCapacity; /* fill the buffer from the end backwards */ + if(pNeededToNormalize!=NULL) { + *pNeededToNormalize=FALSE; + } + if(!(forward ? src->hasNext(src) : src->hasPrevious(src))) { + return u_terminateUChars(dest, destCapacity, 0, pErrorCode); + } - while(src.hasPrevious(&src)) { - isBoundary=isPrevBoundary(src, minC, mask, c, c2); - - /* always write this character to the front of the buffer */ - /* make sure there is enough space in the buffer */ - if(startIndex < (c2==0 ? 1 : 2)) { - int32_t bufferLength=bufferCapacity; - - if(!u_growBufferFromStatic(stackBuffer, &buffer, &bufferCapacity, 2*bufferCapacity, bufferLength)) { - *pErrorCode=U_MEMORY_ALLOCATION_ERROR; - src.move(&src, 0, UITER_START); - return 0; + UnicodeString buffer; + UChar32 c; + if(forward) { + /* get one character and ignore its properties */ + buffer.append(uiter_next32(src)); + /* get all following characters until we see a boundary */ + while((c=uiter_next32(src))>=0) { + if(n2->hasBoundaryBefore(c)) { + /* back out the latest movement to stop at the boundary */ + src->move(src, -U16_LENGTH(c), UITER_CURRENT); + break; + } else { + buffer.append(c); } - - /* move the current buffer contents up */ - uprv_memmove(buffer+(bufferCapacity-bufferLength), buffer, bufferLength*U_SIZEOF_UCHAR); - startIndex+=bufferCapacity-bufferLength; } - - buffer[--startIndex]=c; - if(c2!=0) { - buffer[--startIndex]=c2; - } - - /* stop if this just-copied character is a boundary */ - if(isBoundary) { - break; + } else { + while((c=uiter_previous32(src))>=0) { + /* always write this character to the front of the buffer */ + buffer.insert(0, c); + /* stop if this just-copied character is a boundary */ + if(n2->hasBoundaryBefore(c)) { + break; + } } } - /* return the length of the buffer contents */ - return bufferCapacity-startIndex; + UnicodeString destString(dest, 0, destCapacity); + if(buffer.length()>0 && doNormalize) { + n2->normalize(buffer, destString, *pErrorCode).extract(dest, destCapacity, *pErrorCode); + if(pNeededToNormalize!=NULL && U_SUCCESS(*pErrorCode)) { + *pNeededToNormalize= destString!=buffer; + } + return destString.length(); + } else { + /* just copy the source characters */ + return buffer.extract(dest, destCapacity, *pErrorCode); + } } U_CAPI int32_t U_EXPORT2 @@ -3550,251 +598,11 @@ unorm_previous(UCharIterator *src, UNormalizationMode mode, int32_t options, UBool doNormalize, UBool *pNeededToNormalize, UErrorCode *pErrorCode) { - UChar stackBuffer[100]; - UChar *buffer=NULL; - IsPrevBoundaryFn *isPreviousBoundary=NULL; - uint32_t mask=0; - int32_t startIndex=0, bufferLength=0, bufferCapacity=0, destLength=0; - int32_t c=0, c2=0; - UChar minC=0; - - /* check argument values */ - if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { - return 0; - } - - if( destCapacity<0 || (dest==NULL && destCapacity>0) || - src==NULL - ) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - - if(!_haveData(*pErrorCode)) { - return 0; - } - - if(pNeededToNormalize!=NULL) { - *pNeededToNormalize=FALSE; - } - - switch(mode) { - case UNORM_FCD: - if(fcdTrie.index==NULL) { - *pErrorCode=U_UNSUPPORTED_ERROR; - return 0; - } - /* fall through to NFD */ - case UNORM_NFD: - isPreviousBoundary=_isPrevNFDSafe; - minC=_NORM_MIN_WITH_LEAD_CC; - mask=_NORM_CC_MASK|_NORM_QC_NFD; - break; - case UNORM_NFKD: - isPreviousBoundary=_isPrevNFDSafe; - minC=_NORM_MIN_WITH_LEAD_CC; - mask=_NORM_CC_MASK|_NORM_QC_NFKD; - break; - case UNORM_NFC: - isPreviousBoundary=_isPrevTrueStarter; - minC=(UChar)indexes[_NORM_INDEX_MIN_NFC_NO_MAYBE]; - mask=_NORM_CC_MASK|_NORM_QC_NFC; - break; - case UNORM_NFKC: - isPreviousBoundary=_isPrevTrueStarter; - minC=(UChar)indexes[_NORM_INDEX_MIN_NFKC_NO_MAYBE]; - mask=_NORM_CC_MASK|_NORM_QC_NFKC; - break; - case UNORM_NONE: - destLength=0; - if((c=src->previous(src))>=0) { - destLength=1; - if(UTF_IS_TRAIL(c) && (c2=src->previous(src))>=0) { - if(UTF_IS_LEAD(c2)) { - if(destCapacity>=2) { - dest[1]=(UChar)c; /* trail surrogate */ - destLength=2; - } - c=c2; /* lead surrogate to be written below */ - } else { - src->move(src, 1, UITER_CURRENT); - } - } - - if(destCapacity>0) { - dest[0]=(UChar)c; - } - } - return u_terminateUChars(dest, destCapacity, destLength, pErrorCode); - default: - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - - buffer=stackBuffer; - bufferCapacity=(int32_t)(sizeof(stackBuffer)/U_SIZEOF_UCHAR); - bufferLength=_findPreviousIterationBoundary(*src, - isPreviousBoundary, minC, mask, - buffer, bufferCapacity, - startIndex, - pErrorCode); - if(bufferLength>0) { - if(doNormalize) { - destLength=unorm_internalNormalize(dest, destCapacity, - buffer+startIndex, bufferLength, - mode, options, - pErrorCode); - if(pNeededToNormalize!=0 && U_SUCCESS(*pErrorCode)) { - *pNeededToNormalize= - (UBool)(destLength!=bufferLength || - 0!=uprv_memcmp(dest, buffer+startIndex, destLength*U_SIZEOF_UCHAR)); - } - } else { - /* just copy the source characters */ - if(destCapacity>0) { - uprv_memcpy(dest, buffer+startIndex, uprv_min(bufferLength, destCapacity)*U_SIZEOF_UCHAR); - } - destLength=u_terminateUChars(dest, destCapacity, bufferLength, pErrorCode); - } - } else { - destLength=u_terminateUChars(dest, destCapacity, 0, pErrorCode); - } - - /* cleanup */ - if(buffer!=stackBuffer) { - uprv_free(buffer); - } - - return destLength; -} - -/* forward iteration -------------------------------------------------------- */ - -/* - * read forward and get norm32 - * return 0 if the character is 0) || - src==NULL - ) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - - if(!_haveData(*pErrorCode)) { - return 0; - } - - if(pNeededToNormalize!=NULL) { - *pNeededToNormalize=FALSE; - } - - switch(mode) { - case UNORM_FCD: - if(fcdTrie.index==NULL) { - *pErrorCode=U_UNSUPPORTED_ERROR; - return 0; - } - /* fall through to NFD */ - case UNORM_NFD: - isNextBoundary=_isNextNFDSafe; - minC=_NORM_MIN_WITH_LEAD_CC; - mask=_NORM_CC_MASK|_NORM_QC_NFD; - break; - case UNORM_NFKD: - isNextBoundary=_isNextNFDSafe; - minC=_NORM_MIN_WITH_LEAD_CC; - mask=_NORM_CC_MASK|_NORM_QC_NFKD; - break; - case UNORM_NFC: - isNextBoundary=_isNextTrueStarter; - minC=(UChar)indexes[_NORM_INDEX_MIN_NFC_NO_MAYBE]; - mask=_NORM_CC_MASK|_NORM_QC_NFC; - break; - case UNORM_NFKC: - isNextBoundary=_isNextTrueStarter; - minC=(UChar)indexes[_NORM_INDEX_MIN_NFKC_NO_MAYBE]; - mask=_NORM_CC_MASK|_NORM_QC_NFKC; - break; - case UNORM_NONE: - destLength=0; - if((c=src->next(src))>=0) { - destLength=1; - if(UTF_IS_LEAD(c) && (c2=src->next(src))>=0) { - if(UTF_IS_TRAIL(c2)) { - if(destCapacity>=2) { - dest[1]=(UChar)c2; /* trail surrogate */ - destLength=2; - } - /* lead surrogate to be written below */ - } else { - src->move(src, -1, UITER_CURRENT); - } - } - - if(destCapacity>0) { - dest[0]=(UChar)c; - } - } - return u_terminateUChars(dest, destCapacity, destLength, pErrorCode); - default: - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - - buffer=stackBuffer; - bufferCapacity=(int32_t)(sizeof(stackBuffer)/U_SIZEOF_UCHAR); - bufferLength=_findNextIterationBoundary(*src, - isNextBoundary, minC, mask, - buffer, bufferCapacity, - pErrorCode); - if(bufferLength>0) { - if(doNormalize) { - destLength=unorm_internalNormalize(dest, destCapacity, - buffer, bufferLength, - mode, options, - pErrorCode); - if(pNeededToNormalize!=0 && U_SUCCESS(*pErrorCode)) { - *pNeededToNormalize= - (UBool)(destLength!=bufferLength || - 0!=uprv_memcmp(dest, buffer, destLength*U_SIZEOF_UCHAR)); - } - } else { - /* just copy the source characters */ - if(destCapacity>0) { - uprv_memcpy(dest, buffer, uprv_min(bufferLength, destCapacity)*U_SIZEOF_UCHAR); - } - destLength=u_terminateUChars(dest, destCapacity, bufferLength, pErrorCode); - } - } else { - destLength=u_terminateUChars(dest, destCapacity, 0, pErrorCode); - } - - /* cleanup */ - if(buffer!=stackBuffer) { - uprv_free(buffer); - } - - return destLength; + return unorm_iterate(src, TRUE, + dest, destCapacity, + mode, options, + doNormalize, pNeededToNormalize, + pErrorCode); } -/* - * ### TODO: check if NF*D and FCD iteration finds optimal boundaries - * and if not, how hard it would be to improve it. - * For example, see _findSafeFCD(). - */ - /* Concatenation of normalized strings -------------------------------------- */ U_CAPI int32_t U_EXPORT2 @@ -3934,18 +626,20 @@ unorm_concatenate(const UChar *left, int32_t leftLength, UChar *dest, int32_t destCapacity, UNormalizationMode mode, int32_t options, UErrorCode *pErrorCode) { - UChar stackBuffer[100]; - UChar *buffer; - int32_t bufferLength, bufferCapacity; - - UCharIterator iter; - int32_t leftBoundary, rightBoundary, destLength; - - /* check argument values */ - if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { + const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, *pErrorCode); + const UnicodeSet *uni32; + if(options&UNORM_UNICODE_3_2) { + uni32=uniset_getUnicode32Instance(*pErrorCode); + } else { + uni32=NULL; // unused + } + FilteredNormalizer2 fn2(*n2, *uni32); + if(options&UNORM_UNICODE_3_2) { + n2=&fn2; + } + if(U_FAILURE(*pErrorCode)) { return 0; } - if( destCapacity<0 || (dest==NULL && destCapacity>0) || left==NULL || leftLength<-1 || right==NULL || rightLength<-1 @@ -3964,112 +658,15 @@ unorm_concatenate(const UChar *left, int32_t leftLength, } /* allow left==dest */ - - /* set up intermediate buffer */ - buffer=stackBuffer; - bufferCapacity=(int32_t)(sizeof(stackBuffer)/U_SIZEOF_UCHAR); - - /* - * Input: left[0..leftLength[ + right[0..rightLength[ - * - * Find normalization-safe boundaries leftBoundary and rightBoundary - * and copy the end parts together: - * buffer=left[leftBoundary..leftLength[ + right[0..rightBoundary[ - * - * dest=left[0..leftBoundary[ + - * normalize(buffer) + - * right[rightBoundary..rightLength[ - */ - - /* - * find a normalization boundary at the end of the left string - * and copy the end part into the buffer - */ - uiter_setString(&iter, left, leftLength); - iter.index=leftLength=iter.length; /* end of left string */ - - bufferLength=unorm_previous(&iter, buffer, bufferCapacity, - mode, options, - FALSE, NULL, - pErrorCode); - leftBoundary=iter.index; - if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) { - *pErrorCode=U_ZERO_ERROR; - if(!u_growBufferFromStatic(stackBuffer, &buffer, &bufferCapacity, 2*bufferLength, 0)) { - *pErrorCode=U_MEMORY_ALLOCATION_ERROR; - /* dont need to cleanup here since - * u_growBufferFromStatic frees buffer if(buffer!=stackBuffer) - */ - return 0; - } - - /* just copy from the left string: we know the boundary already */ - uprv_memcpy(buffer, left+leftBoundary, bufferLength*U_SIZEOF_UCHAR); - } - - /* - * find a normalization boundary at the beginning of the right string - * and concatenate the beginning part to the buffer - */ - uiter_setString(&iter, right, rightLength); - rightLength=iter.length; /* in case it was -1 */ - - rightBoundary=unorm_next(&iter, buffer+bufferLength, bufferCapacity-bufferLength, - mode, options, - FALSE, NULL, - pErrorCode); - if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) { - *pErrorCode=U_ZERO_ERROR; - if(!u_growBufferFromStatic(stackBuffer, &buffer, &bufferCapacity, bufferLength+rightBoundary, 0)) { - *pErrorCode=U_MEMORY_ALLOCATION_ERROR; - /* dont need to cleanup here since - * u_growBufferFromStatic frees buffer if(buffer!=stackBuffer) - */ - return 0; - } - - /* just copy from the right string: we know the boundary already */ - uprv_memcpy(buffer+bufferLength, right, rightBoundary*U_SIZEOF_UCHAR); - } - - bufferLength+=rightBoundary; - - /* copy left[0..leftBoundary[ to dest */ - if(left!=dest && leftBoundary>0 && destCapacity>0) { - uprv_memcpy(dest, left, uprv_min(leftBoundary, destCapacity)*U_SIZEOF_UCHAR); - } - destLength=leftBoundary; - - /* concatenate the normalization of the buffer to dest */ - if(destCapacity>destLength) { - destLength+=unorm_internalNormalize(dest+destLength, destCapacity-destLength, - buffer, bufferLength, - mode, options, - pErrorCode); + UnicodeString destString; + if(left==dest) { + destString.setTo(dest, leftLength, destCapacity); } else { - destLength+=unorm_internalNormalize(NULL, 0, - buffer, bufferLength, - mode, options, - pErrorCode); + destString.setTo(dest, 0, destCapacity); + destString.append(left, leftLength); } - /* - * only errorCode that is expected is a U_BUFFER_OVERFLOW_ERROR - * so we dont check for the error code here..just let it pass through - */ - /* concatenate right[rightBoundary..rightLength[ to dest */ - right+=rightBoundary; - rightLength-=rightBoundary; - if(rightLength>0 && destCapacity>destLength) { - uprv_memcpy(dest+destLength, right, uprv_min(rightLength, destCapacity-destLength)*U_SIZEOF_UCHAR); - } - destLength+=rightLength; - - /* cleanup */ - if(buffer!=stackBuffer) { - uprv_free(buffer); - } - - return u_terminateUChars(dest, destCapacity, destLength, pErrorCode); + return n2->append(destString, UnicodeString(rightLength<0, right, rightLength), *pErrorCode). + extract(dest, destCapacity, *pErrorCode); } #endif /* #if !UCONFIG_NO_NORMALIZATION */ diff --git a/icu4c/source/common/unorm_props_data.c b/icu4c/source/common/unorm_props_data.c index 274ab6e1542..a7a7670090f 100644 --- a/icu4c/source/common/unorm_props_data.c +++ b/icu4c/source/common/unorm_props_data.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 1999-2009, International Business Machines + * Copyright (C) 1999-2010, International Business Machines * Corporation and others. All Rights Reserved. * * file name: unorm_props_data.c @@ -14,6 +14,7 @@ static const int32_t indexes[_NORM_INDEX_TOP]={ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 }; +#if 0 static const uint16_t normTrie_index[2532]={ 0,8,0x10,0x18,0x28,0x30,0x38,0x40,0x48,0x50,0x58,0x60,0x68,0x70,0x77,0x7f, 0x87,0x8f,0x1f,0x27,0x94,0x9c,0xa3,0xab,0xb3,0xbb,0xc3,0xcb,0xd3,0xdb,0xe3,0xeb, @@ -835,6 +836,7 @@ static const UTrie2 normTrie={ 0x2810, NULL, 0, FALSE, FALSE, 0, NULL }; +#endif static const uint16_t extraData[16431]={ 0x1c2,0xff02,0x20,0x3b9,0xff01,0x3c5,0xff01,0x3cd,0xff01,0x3cb,0xff01,0x3c3,0xff01,0x61,0xff01,0xe6, @@ -1866,6 +1868,7 @@ static const uint16_t extraData[16431]={ 0x773,0x776,0x77c,0x782,0x788,0x78e,0x794,0x797,0x79a,0x79d,0x7a0,0x7a3,0x7a6,0x7a9,0x7ac }; +#if 0 static const uint16_t combiningTable[1967]={ 0x7af,0xc0,0x7b0,0xc1,0x7b1,0x20c2,0x7b2,0xc3,0x7b3,0x20c4,0x7b4,0x20c5,0x7b6,0x100,0x7b7,0x2102, 0x7b8,0x104,0x7b9,0x2226,0x7ba,0x1cd,0x7bd,0x200,0x7be,0x202,0x7d6,0x1e00,0x7d7,0x3ea0,0x87dd,0x1ea2, @@ -2416,6 +2419,7 @@ static const UTrie2 fcdTrie={ 0x1968, NULL, 0, FALSE, FALSE, 0, NULL }; +#endif static const uint16_t auxTrie_index[6664]={ 0x278,0x280,0x288,0x290,0x278,0x280,0x2a8,0x2b0,0x2b8,0x2c0,0x2c8,0x2d0,0x278,0x280,0x2d8,0x2e0, diff --git a/icu4c/source/common/unormcmp.cpp b/icu4c/source/common/unormcmp.cpp index 83ce3c2ba79..18b3763514a 100644 --- a/icu4c/source/common/unormcmp.cpp +++ b/icu4c/source/common/unormcmp.cpp @@ -1,7 +1,7 @@ /* ******************************************************************************* * -* Copyright (C) 2001-2009, International Business Machines +* Copyright (C) 2001-2010, International Business Machines * Corporation and others. All Rights Reserved. * ******************************************************************************* @@ -22,12 +22,13 @@ #if !UCONFIG_NO_NORMALIZATION -#include "unicode/ustring.h" #include "unicode/unorm.h" -#include "unicode/uniset.h" -#include "unormimp.h" -#include "ucase.h" +#include "unicode/ustring.h" #include "cmemory.h" +#include "normalizer2impl.h" +#include "ucase.h" +#include "uprops.h" +#include "ustr_imp.h" U_NAMESPACE_USE @@ -134,12 +135,19 @@ struct CmpEquivLevel { }; typedef struct CmpEquivLevel CmpEquivLevel; +/** + * Internal option for unorm_cmpEquivFold() for decomposing. + * If not set, just do strcasecmp(). + */ +#define _COMPARE_EQUIV 0x80000 + /* internal function */ static int32_t unorm_cmpEquivFold(const UChar *s1, int32_t length1, const UChar *s2, int32_t length2, uint32_t options, UErrorCode *pErrorCode) { + const Normalizer2Impl *nfcImpl; const UCaseProps *csp; /* current-level start/limit - s1/s2 as current */ @@ -152,7 +160,7 @@ unorm_cmpEquivFold(const UChar *s1, int32_t length1, /* stacks of previous-level start/current/limit */ CmpEquivLevel stack1[2], stack2[2]; - /* decomposition buffers for Hangul */ + /* buffers for algorithmic decompositions */ UChar decomp1[4], decomp2[4]; /* case folding buffers, only use current-level start/limit */ @@ -173,19 +181,19 @@ unorm_cmpEquivFold(const UChar *s1, int32_t length1, */ /* normalization/properties data loaded? */ - if( ((options&_COMPARE_EQUIV)!=0 && !unorm_haveData(pErrorCode)) || - U_FAILURE(*pErrorCode) - ) { - return 0; + if((options&_COMPARE_EQUIV)!=0) { + nfcImpl=Normalizer2Factory::getNFCImpl(*pErrorCode); + } else { + nfcImpl=NULL; } if((options&U_COMPARE_IGNORE_CASE)!=0) { csp=ucase_getSingleton(pErrorCode); - if(U_FAILURE(*pErrorCode)) { - return 0; - } } else { csp=NULL; } + if(U_FAILURE(*pErrorCode)) { + return 0; + } /* initialize */ start1=s1; @@ -404,7 +412,7 @@ unorm_cmpEquivFold(const UChar *s1, int32_t length1, } if( level1<2 && (options&_COMPARE_EQUIV) && - 0!=(p=unorm_getCanonicalDecomposition((UChar32)cp1, decomp1, &length)) + 0!=(p=nfcImpl->getDecomposition((UChar32)cp1, decomp1, length)) ) { /* cp1 decomposes into p[length] */ if(U_IS_SURROGATE(c1)) { @@ -445,7 +453,7 @@ unorm_cmpEquivFold(const UChar *s1, int32_t length1, } if( level2<2 && (options&_COMPARE_EQUIV) && - 0!=(p=unorm_getCanonicalDecomposition((UChar32)cp2, decomp2, &length)) + 0!=(p=nfcImpl->getDecomposition((UChar32)cp2, decomp2, length)) ) { /* cp2 decomposes into p[length] */ if(U_IS_SURROGATE(c2)) { @@ -534,14 +542,8 @@ unorm_compare(const UChar *s1, int32_t length1, const UChar *s2, int32_t length2, uint32_t options, UErrorCode *pErrorCode) { - MaybeStackArray fcd1, fcd2; - const UnicodeSet *nx; - UNormalizationMode mode; - int32_t normOptions; - int32_t result; - /* argument checking */ - if(pErrorCode==0 || U_FAILURE(*pErrorCode)) { + if(U_FAILURE(*pErrorCode)) { return 0; } if(s1==0 || length1<-1 || s2==0 || length2<-1) { @@ -549,21 +551,9 @@ unorm_compare(const UChar *s1, int32_t length1, return 0; } - if(!unorm_haveData(pErrorCode)) { - return 0; - } - if(!uprv_haveProperties(pErrorCode)) { - return 0; - } - - normOptions=(int32_t)(options>>UNORM_COMPARE_NORM_OPTIONS_SHIFT); - nx=unorm_getNX(normOptions, pErrorCode); - if(U_FAILURE(*pErrorCode)) { - return 0; - } - + UnicodeString fcd1, fcd2; + int32_t normOptions=(int32_t)(options>>UNORM_COMPARE_NORM_OPTIONS_SHIFT); options|=_COMPARE_EQUIV; - result=0; /* * UAX #21 Case Mappings, as fixed for Unicode version 4 @@ -586,20 +576,30 @@ unorm_compare(const UChar *s1, int32_t length1, * are first decomposed or not, so an FCD check - a check only for * canonical order - is not sufficient. */ - if(options&U_FOLD_CASE_EXCLUDE_SPECIAL_I) { - mode=UNORM_NFD; - options&=~UNORM_INPUT_IS_FCD; - } else { - mode=UNORM_FCD; - } - - if(!(options&UNORM_INPUT_IS_FCD)) { - int32_t _len1, _len2; - UBool isFCD1, isFCD2; + if(!(options&UNORM_INPUT_IS_FCD) || (options&U_FOLD_CASE_EXCLUDE_SPECIAL_I)) { + const Normalizer2 *n2; + if(options&U_FOLD_CASE_EXCLUDE_SPECIAL_I) { + n2=Normalizer2Factory::getNFDInstance(*pErrorCode); + } else { + n2=Normalizer2Factory::getFCDInstance(*pErrorCode); + } // check if s1 and/or s2 fulfill the FCD conditions - isFCD1= UNORM_YES==unorm_internalQuickCheck(s1, length1, mode, TRUE, nx, pErrorCode); - isFCD2= UNORM_YES==unorm_internalQuickCheck(s2, length2, mode, TRUE, nx, pErrorCode); + const UnicodeSet *uni32; + if(normOptions&UNORM_UNICODE_3_2) { + uni32=uniset_getUnicode32Instance(*pErrorCode); + } else { + uni32=NULL; // unused + } + FilteredNormalizer2 fn2(*n2, *uni32); + if(normOptions&UNORM_UNICODE_3_2) { + n2=&fn2; + } + + UnicodeString str1(length1<0, s1, length1); + UnicodeString str2(length2<0, s2, length2); + int32_t spanQCYes1=n2->spanQuickCheckYes(str1, *pErrorCode); + int32_t spanQCYes2=n2->spanQuickCheckYes(str2, *pErrorCode); if(U_FAILURE(*pErrorCode)) { return 0; } @@ -613,59 +613,27 @@ unorm_compare(const UChar *s1, int32_t length1, * Therefore, ICU 2.6 removes that optimization. */ - if(!isFCD1) { - _len1=unorm_internalNormalizeWithNX(fcd1.getAlias(), fcd1.getCapacity(), - s1, length1, - mode, normOptions, nx, - pErrorCode); - if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) { - if(fcd1.resize(_len1)==NULL) { - *pErrorCode=U_MEMORY_ALLOCATION_ERROR; - return result; - } - - *pErrorCode=U_ZERO_ERROR; - _len1=unorm_internalNormalizeWithNX(fcd1.getAlias(), fcd1.getCapacity(), - s1, length1, - mode, normOptions, nx, - pErrorCode); - if(U_FAILURE(*pErrorCode)) { - return result; - } - } - s1=fcd1.getAlias(); - length1=_len1; + if(spanQCYes1normalizeSecondAndAppend(fcd1, unnormalized, *pErrorCode); + s1=fcd1.getBuffer(); + length1=fcd1.length(); } - - if(!isFCD2) { - _len2=unorm_internalNormalizeWithNX(fcd2.getAlias(), fcd2.getCapacity(), - s2, length2, - mode, normOptions, nx, - pErrorCode); - if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) { - if(fcd2.resize(_len2)==NULL) { - *pErrorCode=U_MEMORY_ALLOCATION_ERROR; - return result; - } - - *pErrorCode=U_ZERO_ERROR; - _len2=unorm_internalNormalizeWithNX(fcd2.getAlias(), fcd2.getCapacity(), - s2, length2, - mode, normOptions, nx, - pErrorCode); - if(U_FAILURE(*pErrorCode)) { - return result; - } - } - s2=fcd2.getAlias(); - length2=_len2; + if(spanQCYes2normalizeSecondAndAppend(fcd2, unnormalized, *pErrorCode); + s2=fcd2.getBuffer(); + length2=fcd2.length(); } } if(U_SUCCESS(*pErrorCode)) { - result=unorm_cmpEquivFold(s1, length1, s2, length2, options, pErrorCode); + return unorm_cmpEquivFold(s1, length1, s2, length2, options, pErrorCode); + } else { + return 0; } - return result; } #endif /* #if !UCONFIG_NO_NORMALIZATION */ diff --git a/icu4c/source/common/unormimp.h b/icu4c/source/common/unormimp.h index 0ad49a77536..7658919d650 100644 --- a/icu4c/source/common/unormimp.h +++ b/icu4c/source/common/unormimp.h @@ -1,7 +1,7 @@ /* ******************************************************************************* * -* Copyright (C) 2001-2009, International Business Machines +* Copyright (C) 2001-2010, International Business Machines * Corporation and others. All Rights Reserved. * ******************************************************************************* @@ -163,25 +163,6 @@ enum { _NORM_DECOMP_LENGTH_MASK=0x7f }; -#endif /* #if !UCONFIG_NO_NORMALIZATION */ - -/* Korean Hangul and Jamo constants */ -enum { - JAMO_L_BASE=0x1100, /* "lead" jamo */ - JAMO_V_BASE=0x1161, /* "vowel" jamo */ - JAMO_T_BASE=0x11a7, /* "trail" jamo */ - - HANGUL_BASE=0xac00, - - JAMO_L_COUNT=19, - JAMO_V_COUNT=21, - JAMO_T_COUNT=28, - - HANGUL_COUNT=JAMO_L_COUNT*JAMO_V_COUNT*JAMO_T_COUNT -}; - -#if !UCONFIG_NO_NORMALIZATION - /* Constants for options flags for normalization. @draft ICU 2.6 */ enum { /** Options bit 0, do not decompose Hangul syllables. @draft ICU 2.6 */ @@ -205,199 +186,6 @@ enum { U_CAPI UBool U_EXPORT2 unorm_haveData(UErrorCode *pErrorCode); -/** - * Internal API for normalizing. - * Does not check for bad input. - * @internal - */ -U_CAPI int32_t U_EXPORT2 -unorm_internalNormalize(UChar *dest, int32_t destCapacity, - const UChar *src, int32_t srcLength, - UNormalizationMode mode, int32_t options, - UErrorCode *pErrorCode); - -#ifdef XP_CPLUSPLUS - -/** - * Internal API for normalizing. - * Does not check for bad input. - * Requires _haveData() to be true. - * @internal - */ -U_CFUNC int32_t -unorm_internalNormalizeWithNX(UChar *dest, int32_t destCapacity, - const UChar *src, int32_t srcLength, - UNormalizationMode mode, int32_t options, const U_NAMESPACE_QUALIFIER UnicodeSet *nx, - UErrorCode *pErrorCode); - -#endif - -/** - * internal API, used by normlzr.cpp - * @internal - */ -U_CAPI int32_t U_EXPORT2 -unorm_decompose(UChar *dest, int32_t destCapacity, - const UChar *src, int32_t srcLength, - UBool compat, int32_t options, - UErrorCode *pErrorCode); - -/** - * internal API, used by normlzr.cpp - * @internal - */ -U_CAPI int32_t U_EXPORT2 -unorm_compose(UChar *dest, int32_t destCapacity, - const UChar *src, int32_t srcLength, - UBool compat, int32_t options, - UErrorCode *pErrorCode); - -#ifdef XP_CPLUSPLUS - -/** - * internal API, used by unormcmp.cpp - * @internal - */ -U_CFUNC UNormalizationCheckResult -unorm_internalQuickCheck(const UChar *src, - int32_t srcLength, - UNormalizationMode mode, - UBool allowMaybe, - const U_NAMESPACE_QUALIFIER UnicodeSet *nx, - UErrorCode *pErrorCode); - -#endif - -#endif /* #if !UCONFIG_NO_NORMALIZATION */ - -/** - * Internal option for unorm_cmpEquivFold() for decomposing. - * If not set, just do strcasecmp(). - * @internal - */ -#define _COMPARE_EQUIV 0x80000 - -#ifndef U_COMPARE_IGNORE_CASE -/* see also unorm.h */ -/** - * Option bit for unorm_compare: - * Perform case-insensitive comparison. - * @draft ICU 2.2 - */ -#define U_COMPARE_IGNORE_CASE 0x10000 -#endif - -/** - * Internal option for unorm_cmpEquivFold() for strncmp style. - * If set, checks for both string length and terminating NUL. - * @internal - */ -#define _STRNCMP_STYLE 0x1000 - -#if !UCONFIG_NO_NORMALIZATION - -/** - * Internal API to get the 16-bit FCD value (lccc + tccc) for c, - * for u_getIntPropertyValue(). - * @internal - */ -U_CFUNC uint16_t U_EXPORT2 -unorm_getFCD16FromCodePoint(UChar32 c); - -#ifdef XP_CPLUSPLUS - -/** - * Internal API, used by collation code. - * Get access to the internal FCD trie table to be able to perform - * incremental, per-code unit, FCD checks in collation. - * One pointer is sufficient because the trie index values are offset - * by the index size, so that the same pointer is used to access the trie data. - * Code points at fcdHighStart and above have a zero FCD value. - * @internal - */ -U_CAPI const uint16_t * U_EXPORT2 -unorm_getFCDTrieIndex(UChar32 &fcdHighStart, UErrorCode *pErrorCode); - -/** - * Internal API, used by collation code. - * Get the FCD value for a code unit, with - * bits 15..8 lead combining class - * bits 7..0 trail combining class - * - * If c is a lead surrogate and the value is not 0, - * then some of c's associated supplementary code points have a non-zero FCD value. - * - * @internal - */ -static inline uint16_t -unorm_getFCD16(const uint16_t *fcdTrieIndex, UChar c) { - return fcdTrieIndex[_UTRIE2_INDEX_FROM_U16_SINGLE_LEAD(fcdTrieIndex, c)]; -} - -/** - * Internal API, used by collation code. - * Get the FCD value of the next code point (post-increment), with - * bits 15..8 lead combining class - * bits 7..0 trail combining class - * - * @internal - */ -static inline uint16_t -unorm_nextFCD16(const uint16_t *fcdTrieIndex, UChar32 fcdHighStart, - const UChar *&s, const UChar *limit) { - UChar32 c=*s++; - uint16_t fcd=fcdTrieIndex[_UTRIE2_INDEX_FROM_U16_SINGLE_LEAD(fcdTrieIndex, c)]; - if(fcd!=0 && U16_IS_LEAD(c)) { - UChar c2; - if(s!=limit && U16_IS_TRAIL(c2=*s)) { - ++s; - c=U16_GET_SUPPLEMENTARY(c, c2); - if(c-skippable code point? See unormimp.h. - * @internal - */ -U_CAPI UBool U_EXPORT2 -unorm_isNFSkippable(UChar32 c, UNormalizationMode mode); - #ifdef XP_CPLUSPLUS /** @@ -484,13 +236,6 @@ unorm_swap(const UDataSwapper *ds, const void *inData, int32_t length, void *outData, UErrorCode *pErrorCode); -/** - * Get the NF*_QC property for a code point, for u_getIntPropertyValue(). - * @internal - */ -U_CFUNC UNormalizationCheckResult U_EXPORT2 -unorm_getQuickCheck(UChar32 c, UNormalizationMode mode); - /** * Description of the format of unorm.icu version 2.3. * diff --git a/icu4c/source/common/uprops.c b/icu4c/source/common/uprops.cpp similarity index 86% rename from icu4c/source/common/uprops.c rename to icu4c/source/common/uprops.cpp index a64013fbea0..13a4264fd5d 100644 --- a/icu4c/source/common/uprops.c +++ b/icu4c/source/common/uprops.cpp @@ -1,11 +1,11 @@ /* ******************************************************************************* * -* Copyright (C) 2002-2009, International Business Machines +* Copyright (C) 2002-2010, International Business Machines * Corporation and others. All Rights Reserved. * ******************************************************************************* -* file name: uprops.h +* file name: uprops.cpp * encoding: US-ASCII * tab size: 8 (not used) * indentation:4 @@ -26,6 +26,7 @@ #include "unicode/uscript.h" #include "unicode/ustring.h" #include "cstring.h" +#include "normalizer2impl.h" #include "ucln_cmn.h" #include "umutex.h" #include "unormimp.h" @@ -106,7 +107,7 @@ static const struct { { 1, U_MASK(UPROPS_DEPRECATED) }, { 1, U_MASK(UPROPS_DIACRITIC) }, { 1, U_MASK(UPROPS_EXTENDER) }, - { UPROPS_SRC_NORM, 0 }, /* UCHAR_FULL_COMPOSITION_EXCLUSION */ + { UPROPS_SRC_NFC, 0 }, /* UCHAR_FULL_COMPOSITION_EXCLUSION */ { 1, U_MASK(UPROPS_GRAPHEME_BASE) }, { 1, U_MASK(UPROPS_GRAPHEME_EXTEND) }, { 1, U_MASK(UPROPS_GRAPHEME_LINK) }, @@ -134,10 +135,10 @@ static const struct { { UPROPS_SRC_CASE, 0 }, /* UCHAR_CASE_SENSITIVE */ { 1, U_MASK(UPROPS_S_TERM) }, { 1, U_MASK(UPROPS_VARIATION_SELECTOR) }, - { UPROPS_SRC_NORM, 0 }, /* UCHAR_NFD_INERT */ - { UPROPS_SRC_NORM, 0 }, /* UCHAR_NFKD_INERT */ - { UPROPS_SRC_NORM, 0 }, /* UCHAR_NFC_INERT */ - { UPROPS_SRC_NORM, 0 }, /* UCHAR_NFKC_INERT */ + { UPROPS_SRC_NFC, 0 }, /* UCHAR_NFD_INERT */ + { UPROPS_SRC_NFKC, 0 }, /* UCHAR_NFKD_INERT */ + { UPROPS_SRC_NFC, 0 }, /* UCHAR_NFC_INERT */ + { UPROPS_SRC_NFKC, 0 }, /* UCHAR_NFKC_INERT */ { UPROPS_SRC_NORM, 0 }, /* UCHAR_SEGMENT_STARTER */ { 1, U_MASK(UPROPS_PATTERN_SYNTAX) }, { 1, U_MASK(UPROPS_PATTERN_WHITE_SPACE) }, @@ -152,7 +153,8 @@ static const struct { { UPROPS_SRC_CASE, 0 }, /* UCHAR_CHANGES_WHEN_UPPERCASED */ { UPROPS_SRC_CASE, 0 }, /* UCHAR_CHANGES_WHEN_TITLECASED */ { UPROPS_SRC_CASE_AND_NORM, 0 }, /* UCHAR_CHANGES_WHEN_CASEFOLDED */ - { UPROPS_SRC_CASE, 0 } /* UCHAR_CHANGES_WHEN_CASEMAPPED */ + { UPROPS_SRC_CASE, 0 }, /* UCHAR_CHANGES_WHEN_CASEMAPPED */ + { UPROPS_SRC_NFKC_CF, 0 } /* UCHAR_CHANGES_WHEN_NFKC_CASEFOLDED */ }; U_CAPI UBool U_EXPORT2 @@ -173,18 +175,56 @@ u_hasBinaryProperty(UChar32 c, UProperty which) { #if !UCONFIG_NO_NORMALIZATION /* normalization properties from unorm.icu */ switch(which) { - case UCHAR_FULL_COMPOSITION_EXCLUSION: - return unorm_internalIsFullCompositionExclusion(c); - case UCHAR_NFD_INERT: - case UCHAR_NFKD_INERT: - case UCHAR_NFC_INERT: - case UCHAR_NFKC_INERT: - return unorm_isNFSkippable(c, (UNormalizationMode)(which-UCHAR_NFD_INERT+UNORM_NFD)); case UCHAR_SEGMENT_STARTER: return unorm_isCanonSafeStart(c); default: break; } +#endif + } else if(column==UPROPS_SRC_NFC || column==UPROPS_SRC_NFKC) { +#if !UCONFIG_NO_NORMALIZATION + UErrorCode errorCode=U_ZERO_ERROR; + switch(which) { + case UCHAR_FULL_COMPOSITION_EXCLUSION: { + // By definition, Full_Composition_Exclusion is the same as NFC_QC=No. + const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(errorCode); + if(U_SUCCESS(errorCode)) { + return impl->isCompNo(impl->getNorm16(c)); + } + break; + } + default: { + // UCHAR_NF..._INERT properties + const Normalizer2 *norm2=Normalizer2Factory::getInstance( + (UNormalizationMode)(which-UCHAR_NFD_INERT+UNORM_NFD), errorCode); + if(U_SUCCESS(errorCode)) { + return norm2->isInert(c); + } + break; + } + } +#endif + } else if(column==UPROPS_SRC_NFKC_CF) { + // currently only for UCHAR_CHANGES_WHEN_NFKC_CASEFOLDED +#if !UCONFIG_NO_NORMALIZATION + UErrorCode errorCode=U_ZERO_ERROR; + const Normalizer2Impl *kcf=Normalizer2Factory::getNFKC_CFImpl(errorCode); + if(U_SUCCESS(errorCode)) { + UnicodeString src(c); + UnicodeString dest; + { + // The ReorderingBuffer must be in a block because its destructor + // needs to release dest's buffer before we look at its contents. + ReorderingBuffer buffer(*kcf, dest); + // Small destCapacity for NFKC_CF(c). + if(U_SUCCESS(errorCode) && buffer.init(5, errorCode)) { + const UChar *srcArray=src.getBuffer(); + kcf->compose(srcArray, srcArray+src.length(), FALSE, + TRUE, buffer, errorCode); + } + } + return U_SUCCESS(errorCode) && dest!=src; + } #endif } else if(column==UPROPS_SRC_BIDI) { /* bidi/shaping properties */ @@ -225,14 +265,16 @@ u_hasBinaryProperty(UChar32 c, UProperty which) { } else if(column==UPROPS_SRC_CASE_AND_NORM) { #if !UCONFIG_NO_NORMALIZATION UChar nfdBuffer[4]; - const UChar *nfd=NULL; + const UChar *nfd; int32_t nfdLength; - UErrorCode errorCode = U_ZERO_ERROR; + UErrorCode errorCode=U_ZERO_ERROR; + const Normalizer2Impl *nfcImpl=Normalizer2Factory::getNFCImpl(errorCode); + if(U_FAILURE(errorCode)) { + return FALSE; + } switch(which) { case UCHAR_CHANGES_WHEN_CASEFOLDED: - if(unorm_haveData(&errorCode)) { - nfd=unorm_getCanonicalDecomposition(c, nfdBuffer, &nfdLength); - } + nfd=nfcImpl->getDecomposition(c, nfdBuffer, nfdLength); if(nfd!=NULL) { /* c has a decomposition */ if(nfdLength==1) { @@ -274,6 +316,32 @@ u_hasBinaryProperty(UChar32 c, UProperty which) { return FALSE; } +#if !UCONFIG_NO_NORMALIZATION + +U_CAPI uint8_t U_EXPORT2 +u_getCombiningClass(UChar32 c) { + UErrorCode errorCode=U_ZERO_ERROR; + const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(errorCode); + if(U_SUCCESS(errorCode)) { + return impl->getCC(impl->getNorm16(c)); + } else { + return 0; + } +} + +static uint16_t +getFCD16(UChar32 c) { + UErrorCode errorCode=U_ZERO_ERROR; + const UTrie2 *trie=Normalizer2Factory::getFCDTrie(errorCode); + if(U_SUCCESS(errorCode)) { + return UTRIE2_GET16(trie, c); + } else { + return 0; + } +} + +#endif + /* * Map some of the Grapheme Cluster Break values to Hangul Syllable Types. * Hangul_Syllable_Type is fully redundant with a subset of Grapheme_Cluster_Break. @@ -311,11 +379,9 @@ u_getIntPropertyValue(UChar32 c, UProperty which) { return (int32_t)u_charDirection(c); case UCHAR_BLOCK: return (int32_t)ublock_getCode(c); - case UCHAR_CANONICAL_COMBINING_CLASS: #if !UCONFIG_NO_NORMALIZATION + case UCHAR_CANONICAL_COMBINING_CLASS: return u_getCombiningClass(c); -#else - return 0; #endif case UCHAR_DECOMPOSITION_TYPE: return (int32_t)(u_getUnicodeProperties(c, 2)&UPROPS_DT_MASK); @@ -352,9 +418,9 @@ u_getIntPropertyValue(UChar32 c, UProperty which) { case UCHAR_NFKC_QUICK_CHECK: return (int32_t)unorm_getQuickCheck(c, (UNormalizationMode)(which-UCHAR_NFD_QUICK_CHECK+UNORM_NFD)); case UCHAR_LEAD_CANONICAL_COMBINING_CLASS: - return unorm_getFCD16FromCodePoint(c)>>8; + return getFCD16(c)>>8; case UCHAR_TRAIL_CANONICAL_COMBINING_CLASS: - return unorm_getFCD16FromCodePoint(c)&0xff; + return getFCD16(c)&0xff; #endif case UCHAR_GRAPHEME_CLUSTER_BREAK: return (int32_t)(u_getUnicodeProperties(c, 2)&UPROPS_GCB_MASK)>>UPROPS_GCB_SHIFT; @@ -462,12 +528,13 @@ uprops_getSource(UProperty which) { case UCHAR_CANONICAL_COMBINING_CLASS: case UCHAR_NFD_QUICK_CHECK: - case UCHAR_NFKD_QUICK_CHECK: case UCHAR_NFC_QUICK_CHECK: - case UCHAR_NFKC_QUICK_CHECK: case UCHAR_LEAD_CANONICAL_COMBINING_CLASS: case UCHAR_TRAIL_CANONICAL_COMBINING_CLASS: - return UPROPS_SRC_NORM; + return UPROPS_SRC_NFC; + case UCHAR_NFKD_QUICK_CHECK: + case UCHAR_NFKC_QUICK_CHECK: + return UPROPS_SRC_NFKC; case UCHAR_BIDI_CLASS: case UCHAR_JOINING_GROUP: diff --git a/icu4c/source/common/uprops.h b/icu4c/source/common/uprops.h index d2ac7667e28..2ce8d288187 100644 --- a/icu4c/source/common/uprops.h +++ b/icu4c/source/common/uprops.h @@ -1,7 +1,7 @@ /* ******************************************************************************* * -* Copyright (C) 2002-2009, International Business Machines +* Copyright (C) 2002-2010, International Business Machines * Corporation and others. All Rights Reserved. * ******************************************************************************* @@ -332,6 +332,12 @@ enum UPropertySource { UPROPS_SRC_CHAR_AND_PROPSVEC, /** From ucase.c/ucase.icu as well as unorm.cpp/unorm.icu */ UPROPS_SRC_CASE_AND_NORM, + /** From normalizer2impl.cpp/nfc.nrm */ + UPROPS_SRC_NFC, + /** From normalizer2impl.cpp/nfkc.nrm */ + UPROPS_SRC_NFKC, + /** From normalizer2impl.cpp/nfkc_cf.nrm */ + UPROPS_SRC_NFKC_CF, /** One more than the highest UPropertySource (UPROPS_SRC_) constant. */ UPROPS_SRC_COUNT }; @@ -390,4 +396,18 @@ uchar_swapNames(const UDataSwapper *ds, const void *inData, int32_t length, void *outData, UErrorCode *pErrorCode); +#ifdef XP_CPLUSPLUS + +U_NAMESPACE_BEGIN + +class UnicodeSet; + +// implemented in uniset_props.cpp +U_CFUNC UnicodeSet * +uniset_getUnicode32Instance(UErrorCode &errorCode); + +U_NAMESPACE_END + +#endif + #endif diff --git a/icu4c/source/common/ustr_imp.h b/icu4c/source/common/ustr_imp.h index 121f04aca1c..02a70909402 100644 --- a/icu4c/source/common/ustr_imp.h +++ b/icu4c/source/common/ustr_imp.h @@ -1,6 +1,6 @@ /* ********************************************************************** -* Copyright (C) 1999-2009, International Business Machines +* Copyright (C) 1999-2010, International Business Machines * Corporation and others. All Rights Reserved. ********************************************************************** * file name: ustr_imp.h @@ -25,6 +25,23 @@ typedef struct UBreakIterator UBreakIterator; #endif +#ifndef U_COMPARE_IGNORE_CASE +/* see also unorm.h */ +/** + * Option bit for unorm_compare: + * Perform case-insensitive comparison. + * @draft ICU 2.2 + */ +#define U_COMPARE_IGNORE_CASE 0x10000 +#endif + +/** + * Internal option for unorm_cmpEquivFold() for strncmp style. + * If set, checks for both string length and terminating NUL. + * @internal + */ +#define _STRNCMP_STYLE 0x1000 + /** * Compare two strings in code point order or code unit order. * Works in strcmp style (both lengths -1), diff --git a/icu4c/source/common/ustrcase.c b/icu4c/source/common/ustrcase.c index 06441ff266e..c7c02ea3908 100644 --- a/icu4c/source/common/ustrcase.c +++ b/icu4c/source/common/ustrcase.c @@ -1,7 +1,7 @@ /* ******************************************************************************* * -* Copyright (C) 2001-2009, International Business Machines +* Copyright (C) 2001-2010, International Business Machines * Corporation and others. All Rights Reserved. * ******************************************************************************* @@ -25,7 +25,6 @@ #include "unicode/ubrk.h" #include "cmemory.h" #include "ucase.h" -#include "unormimp.h" #include "ustr_imp.h" /* string casing ------------------------------------------------------------ */ diff --git a/icu4c/source/common/utrie2.c b/icu4c/source/common/utrie2.cpp similarity index 94% rename from icu4c/source/common/utrie2.c rename to icu4c/source/common/utrie2.cpp index c45a9a397fd..cfbc9ecb8ba 100644 --- a/icu4c/source/common/utrie2.c +++ b/icu4c/source/common/utrie2.cpp @@ -1,11 +1,11 @@ /* ****************************************************************************** * -* Copyright (C) 2001-2009, International Business Machines +* Copyright (C) 2001-2010, International Business Machines * Corporation and others. All Rights Reserved. * ****************************************************************************** -* file name: utrie2.c +* file name: utrie2.cpp * encoding: US-ASCII * tab size: 8 (not used) * indentation:4 @@ -423,7 +423,7 @@ utrie2_swap(const UDataSwapper *ds, trie.indexLength=ds->readUInt16(inTrie->indexLength); trie.shiftedDataLength=ds->readUInt16(inTrie->shiftedDataLength); - valueBits=trie.options&UTRIE2_OPTIONS_VALUE_BITS_MASK; + valueBits=(UTrie2ValueBits)(trie.options&UTRIE2_OPTIONS_VALUE_BITS_MASK); dataLength=(int32_t)trie.shiftedDataLength<=codePointStart) { + codePoint=U_SENTINEL; + return 0; + } + uint16_t result; + UTRIE2_U16_PREV16(trie, start, codePointStart, codePoint, result); + return result; +} + +uint16_t ForwardUTrie2StringIterator::next16() { + codePointStart=codePointLimit; + if(codePointLimit==limit) { + codePoint=U_SENTINEL; + return 0; + } + uint16_t result; + UTRIE2_U16_NEXT16(trie, codePointLimit, limit, codePoint, result); + return result; +} + +UTrie2 *UTrie2Singleton::getInstance(InstantiatorFn *instantiator, const void *context, + UErrorCode &errorCode) { + void *duplicate; + UTrie2 *instance=(UTrie2 *)singleton.getInstance(instantiator, context, duplicate, errorCode); + utrie2_close((UTrie2 *)duplicate); + return instance; +} + +U_NAMESPACE_END diff --git a/icu4c/source/common/utrie2.h b/icu4c/source/common/utrie2.h index 8c7d9c93cb0..17606437daf 100644 --- a/icu4c/source/common/utrie2.h +++ b/icu4c/source/common/utrie2.h @@ -1,7 +1,7 @@ /* ****************************************************************************** * -* Copyright (C) 2001-2009, International Business Machines +* Copyright (C) 2001-2010, International Business Machines * Corporation and others. All Rights Reserved. * ****************************************************************************** @@ -605,8 +605,70 @@ utrie2_set32ForLeadSurrogateCodeUnit(UTrie2 *trie, */ #define UTRIE2_GET32_FROM_SUPP(trie, c) _UTRIE2_GET_FROM_SUPP((trie), data32, c) +U_CDECL_END + +/* C++ convenience wrappers ------------------------------------------------- */ + +#ifdef XP_CPLUSPLUS + +#include "mutex.h" + +U_NAMESPACE_BEGIN + +// Use the Forward/Backward subclasses below. +class UTrie2StringIterator : public UMemory { +public: + UTrie2StringIterator(const UTrie2 *t, const UChar *p) : + trie(t), codePointStart(p), codePointLimit(p), codePoint(U_SENTINEL) {} + + const UTrie2 *trie; + const UChar *codePointStart, *codePointLimit; + UChar32 codePoint; +}; + +class BackwardUTrie2StringIterator : public UTrie2StringIterator { +public: + BackwardUTrie2StringIterator(const UTrie2 *t, const UChar *s, const UChar *p) : + UTrie2StringIterator(t, p), start(s) {} + + uint16_t previous16(); + + const UChar *start; +}; + +class ForwardUTrie2StringIterator : public UTrie2StringIterator { +public: + // Iteration limit l can be NULL. + // In that case, the caller must detect c==0 and stop. + ForwardUTrie2StringIterator(const UTrie2 *t, const UChar *p, const UChar *l) : + UTrie2StringIterator(t, p), limit(l) {} + + uint16_t next16(); + + const UChar *limit; +}; + +class UTrie2Singleton { +public: + UTrie2Singleton(SimpleSingleton &s) : singleton(s) {} + void deleteInstance() { + utrie2_close((UTrie2 *)singleton.fInstance); + singleton.reset(); + } + UTrie2 *getInstance(InstantiatorFn *instantiator, const void *context, + UErrorCode &errorCode); +private: + SimpleSingleton &singleton; +}; + +U_NAMESPACE_END + +#endif + /* Internal definitions ----------------------------------------------------- */ +U_CDECL_BEGIN + /** Build-time trie structure. */ struct UNewTrie2; typedef struct UNewTrie2 UNewTrie2; diff --git a/icu4c/source/configure b/icu4c/source/configure index 6d5e49d2170..5baa118faa6 100755 --- a/icu4c/source/configure +++ b/icu4c/source/configure @@ -2,7 +2,7 @@ # Guess values for system-dependent variables and create Makefiles. # Generated by GNU Autoconf 2.63. # -# Copyright (c) 1999-2009, International Business Machines Corporation and others. All Rights Reserved. +# Copyright (c) 1999-2010, International Business Machines Corporation and others. All Rights Reserved. # # Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001, # 2002, 2003, 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc. @@ -10583,7 +10583,7 @@ then fi # output the Makefiles -ac_config_files="$ac_config_files icudefs.mk Makefile data/pkgdataMakefile config/Makefile.inc config/pkgdataMakefile data/Makefile stubdata/Makefile common/Makefile i18n/Makefile layout/Makefile layoutex/Makefile io/Makefile extra/Makefile extra/uconv/Makefile extra/uconv/pkgdataMakefile extra/scrptrun/Makefile tools/Makefile tools/ctestfw/Makefile tools/toolutil/Makefile tools/makeconv/Makefile tools/genrb/Makefile tools/genuca/Makefile tools/genccode/Makefile tools/gencmn/Makefile tools/gencnval/Makefile tools/genctd/Makefile tools/gennames/Makefile tools/gentest/Makefile tools/gennorm/Makefile tools/genprops/Makefile tools/gencase/Makefile tools/genbidi/Makefile tools/genpname/Makefile tools/genbrk/Makefile tools/gensprep/Makefile tools/icupkg/Makefile tools/icuswap/Makefile tools/pkgdata/Makefile tools/tzcode/Makefile tools/gencfu/Makefile test/Makefile test/compat/Makefile test/testdata/Makefile test/testdata/pkgdataMakefile test/hdrtst/Makefile test/intltest/Makefile test/cintltst/Makefile test/iotest/Makefile test/letest/Makefile test/perf/Makefile test/perf/collationperf/Makefile test/perf/ubrkperf/Makefile test/perf/charperf/Makefile test/perf/convperf/Makefile test/perf/normperf/Makefile test/perf/strsrchperf/Makefile test/perf/unisetperf/Makefile test/perf/usetperf/Makefile test/perf/ustrperf/Makefile test/perf/utfperf/Makefile test/perf/utrie2perf/Makefile samples/Makefile samples/date/Makefile samples/cal/Makefile samples/layout/Makefile common/unicode/platform.h" +ac_config_files="$ac_config_files icudefs.mk Makefile data/pkgdataMakefile config/Makefile.inc config/pkgdataMakefile data/Makefile stubdata/Makefile common/Makefile i18n/Makefile layout/Makefile layoutex/Makefile io/Makefile extra/Makefile extra/uconv/Makefile extra/uconv/pkgdataMakefile extra/scrptrun/Makefile tools/Makefile tools/ctestfw/Makefile tools/toolutil/Makefile tools/makeconv/Makefile tools/genrb/Makefile tools/genuca/Makefile tools/genccode/Makefile tools/gencmn/Makefile tools/gencnval/Makefile tools/genctd/Makefile tools/gennames/Makefile tools/gentest/Makefile tools/gennorm/Makefile tools/gennorm2/Makefile tools/genprops/Makefile tools/gencase/Makefile tools/genbidi/Makefile tools/genpname/Makefile tools/genbrk/Makefile tools/gensprep/Makefile tools/icupkg/Makefile tools/icuswap/Makefile tools/pkgdata/Makefile tools/tzcode/Makefile tools/gencfu/Makefile test/Makefile test/compat/Makefile test/testdata/Makefile test/testdata/pkgdataMakefile test/hdrtst/Makefile test/intltest/Makefile test/cintltst/Makefile test/iotest/Makefile test/letest/Makefile test/perf/Makefile test/perf/collationperf/Makefile test/perf/ubrkperf/Makefile test/perf/charperf/Makefile test/perf/convperf/Makefile test/perf/normperf/Makefile test/perf/strsrchperf/Makefile test/perf/unisetperf/Makefile test/perf/usetperf/Makefile test/perf/ustrperf/Makefile test/perf/utfperf/Makefile test/perf/utrie2perf/Makefile samples/Makefile samples/date/Makefile samples/cal/Makefile samples/layout/Makefile common/unicode/platform.h" cat >confcache <<\_ACEOF # This file is a shell script that caches the results of configure @@ -11210,6 +11210,7 @@ do "tools/gennames/Makefile") CONFIG_FILES="$CONFIG_FILES tools/gennames/Makefile" ;; "tools/gentest/Makefile") CONFIG_FILES="$CONFIG_FILES tools/gentest/Makefile" ;; "tools/gennorm/Makefile") CONFIG_FILES="$CONFIG_FILES tools/gennorm/Makefile" ;; + "tools/gennorm2/Makefile") CONFIG_FILES="$CONFIG_FILES tools/gennorm2/Makefile" ;; "tools/genprops/Makefile") CONFIG_FILES="$CONFIG_FILES tools/genprops/Makefile" ;; "tools/gencase/Makefile") CONFIG_FILES="$CONFIG_FILES tools/gencase/Makefile" ;; "tools/genbidi/Makefile") CONFIG_FILES="$CONFIG_FILES tools/genbidi/Makefile" ;; diff --git a/icu4c/source/configure.in b/icu4c/source/configure.in index 39ca6fe7d62..f520cfb48f5 100644 --- a/icu4c/source/configure.in +++ b/icu4c/source/configure.in @@ -1,5 +1,5 @@ # -*-autoconf-*- -AC_COPYRIGHT([ Copyright (c) 1999-2009, International Business Machines Corporation and others. All Rights Reserved. ]) +AC_COPYRIGHT([ Copyright (c) 1999-2010, International Business Machines Corporation and others. All Rights Reserved. ]) # configure.in for ICU # Stephen F. Booth, heavily modified by Yves and others @@ -1223,6 +1223,7 @@ AC_CONFIG_FILES([icudefs.mk \ tools/gennames/Makefile \ tools/gentest/Makefile \ tools/gennorm/Makefile \ + tools/gennorm2/Makefile \ tools/genprops/Makefile \ tools/gencase/Makefile \ tools/genbidi/Makefile \ diff --git a/icu4c/source/data/Makefile.in b/icu4c/source/data/Makefile.in index 0b63a47b12f..cb32e42c70a 100644 --- a/icu4c/source/data/Makefile.in +++ b/icu4c/source/data/Makefile.in @@ -1,5 +1,5 @@ ## Makefile.in for ICU data -## Copyright (c) 1999-2009, International Business Machines Corporation and +## Copyright (c) 1999-2010, International Business Machines Corporation and ## others. All Rights Reserved. ## Source directory information @@ -223,7 +223,7 @@ package390: $(OUTTMPDIR)/icudata390.lst $(PKGDATA_LIST) ./icupkg.inc packagedata # 2005-may-05 Removed Unicode properties files (unorm.icu, uprops.icu, ucase.icu, ubidi.icu) # from data build. See Jitterbug 4497. (makedata.mak revision 1.117) # -DAT_FILES_SHORT=pnames.icu unames.icu cnvalias.icu coll/ucadata.icu coll/invuca.icu +DAT_FILES_SHORT=pnames.icu unames.icu cnvalias.icu coll/ucadata.icu coll/invuca.icu nfc.nrm nfkc.nrm nfkc_cf.nrm DAT_FILES=$(DAT_FILES_SHORT:%=$(BUILDDIR)/%) ## BRK files @@ -488,14 +488,17 @@ $(BUILDDIR)/pnames.icu: $(UNICODEDATADIR)/PropertyAliases.txt $(UNICODEDATADIR)/ $(INVOKE) $(TOOLBINDIR)/genpname -d $(BUILDDIR) # unorm.icu -$(BUILDDIR)/unorm.icu: $(UNICODEDATADIR)/UnicodeData.txt $(UNICODEDATADIR)/DerivedNormalizationProps.txt $(UNICODEDATADIR)/BidiMirroring.txt $(TOOLBINDIR)/gennorm$(TOOLEXEEXT) $(BUILDDIR)/$(ICUDT)pnames.icu $(BUILDDIR)/$(ICUDT)uprops.icu $(BUILDDIR)/$(ICUDT)ucase.icu - $(INVOKE) $(TOOLBINDIR)/gennorm -s $(UNICODEDATADIR) -i $(BUILDDIR) -d $(BUILDDIR) -u $(UNICODE_VERSION) +# ICU 4.4: $(BUILDDIR)/unorm.icu is now prebuilt, see below. +$(OUTTMPDIR)/unorm_props_data.c: $(UNICODEDATADIR)/UnicodeData.txt $(UNICODEDATADIR)/DerivedNormalizationProps.txt $(UNICODEDATADIR)/BidiMirroring.txt $(TOOLBINDIR)/gennorm$(TOOLEXEEXT) $(BUILDDIR)/$(ICUDT)pnames.icu $(BUILDDIR)/$(ICUDT)uprops.icu $(BUILDDIR)/$(ICUDT)ucase.icu $(INVOKE) $(TOOLBINDIR)/gennorm --csource -s $(UNICODEDATADIR) -i $(BUILDDIR) -d $(OUTTMPDIR) -u $(UNICODE_VERSION) +# unorm.icu used to be built like this: +# $(INVOKE) $(TOOLBINDIR)/gennorm -s $(UNICODEDATADIR) -i $(BUILDDIR) -d $(BUILDDIR) -u $(UNICODE_VERSION) + # ucadata.icu # used to depend on $(BUILDDIR)/$(ICUDT)unorm.icu $(BUILDDIR)/$(ICUDT)ucase.icu # see Jitterbug 4497 -$(COLBLDDIR)/ucadata.icu $(COLBLDDIR)/invuca.icu: $(UNICODEDATADIR)/FractionalUCA.txt $(TOOLBINDIR)/genuca$(TOOLEXEEXT) +$(COLBLDDIR)/ucadata.icu $(COLBLDDIR)/invuca.icu: $(UNICODEDATADIR)/FractionalUCA.txt $(TOOLBINDIR)/genuca$(TOOLEXEEXT) $(BUILDDIR)/$(ICUDT)nfc.nrm $(INVOKE) $(TOOLBINDIR)/genuca -s $(UNICODEDATADIR) -d $(COLBLDDIR) -i $(BUILDDIR) # unames.icu @@ -506,6 +509,13 @@ $(BUILDDIR)/unames.icu: $(UNICODEDATADIR)/UnicodeData.txt $(UNICODEDATADIR)/Name $(BUILDDIR)/cnvalias.icu: $(UCMSRCDIR)/convrtrs.txt $(TOOLBINDIR)/gencnval$(TOOLEXEEXT) $(INVOKE) $(TOOLBINDIR)/gencnval -d $(BUILDDIR) $(UCMSRCDIR)/convrtrs.txt +# Targets for prebuilt Unicode data +$(BUILDDIR)/unorm.icu: $(SRCDATADIR)/in/unorm.icu + $(INVOKE) $(TOOLBINDIR)/icupkg -t$(ICUDATA_CHAR) $< $@ + +$(BUILDDIR)/%.nrm: $(SRCDATADIR)/in/%.nrm + $(INVOKE) $(TOOLBINDIR)/icupkg -t$(ICUDATA_CHAR) $< $@ + #################################################### SPP # SPP FILES @@ -751,7 +761,7 @@ $(INDEX_RES_FILE): $(INDEX_FILE) $(TOOLBINDIR)/genrb$(TOOLEXEEXT) # They are not built by default but need to be built for ICU4J data and for getting the .c source files # when updating the Unicode data. # Changed in Makefile.in revision 1.147. See Jitterbug 4497. -uni-core-data: build-dir $(UNI_CORE_TARGET_DATA) +uni-core-data: build-dir $(UNI_CORE_TARGET_DATA) $(OUTTMPDIR)/unorm_props_data.c @echo Unicode .icu files built to $(BUILDDIR) @echo Unicode .c source files built to $(OUTTMPDIR) diff --git a/icu4c/source/data/in/nfc.nrm b/icu4c/source/data/in/nfc.nrm new file mode 100644 index 00000000000..5dd6333cb21 Binary files /dev/null and b/icu4c/source/data/in/nfc.nrm differ diff --git a/icu4c/source/data/in/nfkc.nrm b/icu4c/source/data/in/nfkc.nrm new file mode 100644 index 00000000000..eed8b38bcf8 Binary files /dev/null and b/icu4c/source/data/in/nfkc.nrm differ diff --git a/icu4c/source/data/in/nfkc_cf.nrm b/icu4c/source/data/in/nfkc_cf.nrm new file mode 100644 index 00000000000..43c21494e27 Binary files /dev/null and b/icu4c/source/data/in/nfkc_cf.nrm differ diff --git a/icu4c/source/data/in/unorm.icu b/icu4c/source/data/in/unorm.icu new file mode 100644 index 00000000000..5aa149e6666 Binary files /dev/null and b/icu4c/source/data/in/unorm.icu differ diff --git a/icu4c/source/data/makedata.mak b/icu4c/source/data/makedata.mak index e9c633312bf..b810499720b 100644 --- a/icu4c/source/data/makedata.mak +++ b/icu4c/source/data/makedata.mak @@ -1,5 +1,5 @@ #********************************************************************** -#* Copyright (C) 1999-2009, International Business Machines Corporation +#* Copyright (C) 1999-2010, International Business Machines Corporation #* and others. All Rights Reserved. #********************************************************************** # nmake file for creating data files on win32 @@ -28,7 +28,7 @@ ICU_LIB_TARGET=$(DLL_OUTPUT)\$(U_ICUDATA_NAME).dll !MESSAGE ICU data make path is $(ICUMAKE) # Suffixes for data files -.SUFFIXES : .ucm .cnv .dll .dat .res .txt .c +.SUFFIXES : .nrm .icu .ucm .cnv .dll .dat .res .txt .c ICUOUT=$(ICUMAKE)\out @@ -474,8 +474,8 @@ ALL : GODATA "$(ICU_LIB_TARGET)" "$(TESTDATAOUT)\testdata.dat" # when updating the Unicode data. # Changed in makedata.mak revision 1.117. See Jitterbug 4497. # Command line: -# C:\svn\icuproj\icu\trunk\source\data>nmake -f makedata.mak ICUMAKE=C:\svn\icuproj\icu\trunk\source\data\ CFG=Debug uni-core-data -uni-core-data: GODATA "$(ICUBLD_PKG)\uprops.icu" "$(ICUBLD_PKG)\ucase.icu" "$(ICUBLD_PKG)\ubidi.icu" "$(ICUBLD_PKG)\unorm.icu" +# C:\svn\icuproj\icu\trunk\source\data>nmake -f makedata.mak ICUMAKE=C:\svn\icuproj\icu\trunk\source\data\ CFG=x86\Debug uni-core-data +uni-core-data: GODATA "$(ICUBLD_PKG)\uprops.icu" "$(ICUBLD_PKG)\ucase.icu" "$(ICUBLD_PKG)\ubidi.icu" "$(ICUBLD_PKG)\unorm.icu" "$(ICUTMP)\unorm_props_data.c" @echo Unicode .icu files built to "$(ICUBLD_PKG)" @echo Unicode .c source files built to "$(ICUTMP)" @@ -553,7 +553,7 @@ testdata.jar: GODATA "$(ICUOUT)\icu4j\testdata.jar" copy "$(ICUTMP)\$(ICUPKG).dat" "$(ICUOUT)\$(U_ICUDATA_NAME)$(U_ICUDATA_ENDIAN_SUFFIX).dat" -@erase "$(ICUTMP)\$(ICUPKG).dat" !ELSE -"$(ICU_LIB_TARGET)" : $(COMMON_ICUDATA_DEPENDENCIES) $(CNV_FILES) "$(ICUBLD_PKG)\unames.icu" "$(ICUBLD_PKG)\pnames.icu" "$(ICUBLD_PKG)\cnvalias.icu" "$(ICUBLD_PKG)\$(ICUCOL)\ucadata.icu" "$(ICUBLD_PKG)\$(ICUCOL)\invuca.icu" $(CURR_RES_FILES) $(LANG_RES_FILES) $(REGION_RES_FILES) $(ZONE_RES_FILES) $(BRK_FILES) $(BRK_CTD_FILES) $(BRK_RES_FILES) $(COL_COL_FILES) $(RBNF_RES_FILES) $(TRANSLIT_RES_FILES) $(ALL_RES) $(SPREP_FILES) "$(ICUBLD_PKG)\confusables.cfu" +"$(ICU_LIB_TARGET)" : $(COMMON_ICUDATA_DEPENDENCIES) $(CNV_FILES) "$(ICUBLD_PKG)\unames.icu" "$(ICUBLD_PKG)\pnames.icu" "$(ICUBLD_PKG)\cnvalias.icu" "$(ICUBLD_PKG)\nfc.nrm" "$(ICUBLD_PKG)\nfkc.nrm" "$(ICUBLD_PKG)\nfkc_cf.nrm" "$(ICUBLD_PKG)\$(ICUCOL)\ucadata.icu" "$(ICUBLD_PKG)\$(ICUCOL)\invuca.icu" $(CURR_RES_FILES) $(LANG_RES_FILES) $(REGION_RES_FILES) $(ZONE_RES_FILES) $(BRK_FILES) $(BRK_CTD_FILES) $(BRK_RES_FILES) $(COL_COL_FILES) $(RBNF_RES_FILES) $(TRANSLIT_RES_FILES) $(ALL_RES) $(SPREP_FILES) "$(ICUBLD_PKG)\confusables.cfu" @echo Building icu data cd "$(ICUBLD_PKG)" "$(ICUPBIN)\pkgdata" $(COMMON_ICUDATA_ARGUMENTS) <<"$(ICUTMP)\icudata.lst" @@ -563,6 +563,9 @@ confusables.cfu $(ICUCOL)\ucadata.icu $(ICUCOL)\invuca.icu cnvalias.icu +nfc.nrm +nfkc.nrm +nfkc_cf.nrm $(CNV_FILES:.cnv =.cnv ) $(ALL_RES:.res =.res @@ -627,6 +630,7 @@ CLEAN : GODATA -@erase "*.exp" -@erase "*.icu" -@erase "*.lib" + -@erase "*.nrm" -@erase "*.res" -@erase "*.spp" -@erase "*.txt" @@ -878,9 +882,10 @@ res_index:table(nofallback) { @"$(ICUTOOLS)\gencase\$(CFG)\gencase" --csource -u $(UNICODE_VERSION) -i "$(ICUBLD_PKG)" -s "$(ICUUNIDATA)" -d "$(ICUTMP)" # Targets for unorm.icu -"$(ICUBLD_PKG)\unorm.icu": "$(ICUUNIDATA)\*.txt" "$(ICUTOOLS)\gennorm\$(CFG)\gennorm.exe" "$(ICUBLD_PKG)\pnames.icu" "$(ICUBLD_PKG)\uprops.icu" "$(ICUBLD_PKG)\ucase.icu" +# ICU 4.4: "$(ICUBLD_PKG)\unorm.icu" is now prebuilt, see below. +"$(ICUTMP)\unorm_props_data.c": "$(ICUUNIDATA)\*.txt" "$(ICUTOOLS)\gennorm\$(CFG)\gennorm.exe" "$(ICUBLD_PKG)\pnames.icu" "$(ICUBLD_PKG)\uprops.icu" "$(ICUBLD_PKG)\ucase.icu" @echo Creating data file for Unicode Normalization - @"$(ICUTOOLS)\gennorm\$(CFG)\gennorm" -u $(UNICODE_VERSION) -i "$(ICUBLD_PKG)" -s "$(ICUUNIDATA)" -d "$(ICUBLD_PKG)" + @rem @"$(ICUTOOLS)\gennorm\$(CFG)\gennorm" -u $(UNICODE_VERSION) -i "$(ICUBLD_PKG)" -s "$(ICUUNIDATA)" -d "$(ICUBLD_PKG)" @"$(ICUTOOLS)\gennorm\$(CFG)\gennorm" --csource -u $(UNICODE_VERSION) -i "$(ICUBLD_PKG)" -s "$(ICUUNIDATA)" -d "$(ICUTMP)" # Targets for converters @@ -891,10 +896,23 @@ res_index:table(nofallback) { # Targets for ucadata.icu & invuca.icu # used to depend on "$(ICUBLD_PKG)\uprops.icu" "$(ICUBLD_PKG)\ucase.icu" "$(ICUBLD_PKG)\unorm.icu" # see Jitterbug 4497 -"$(ICUBLD_PKG)\$(ICUCOL)\invuca.icu" "$(ICUBLD_PKG)\$(ICUCOL)\ucadata.icu": "$(ICUUNIDATA)\FractionalUCA.txt" "$(ICUTOOLS)\genuca\$(CFG)\genuca.exe" +"$(ICUBLD_PKG)\$(ICUCOL)\invuca.icu" "$(ICUBLD_PKG)\$(ICUCOL)\ucadata.icu": "$(ICUUNIDATA)\FractionalUCA.txt" "$(ICUTOOLS)\genuca\$(CFG)\genuca.exe" "$(ICUBLD_PKG)\nfc.nrm" @echo Creating UCA data files @"$(ICUTOOLS)\genuca\$(CFG)\genuca" -d "$(ICUBLD_PKG)\$(ICUCOL)" -i "$(ICUBLD_PKG)" -s "$(ICUUNIDATA)" +# Targets for prebuilt Unicode data +"$(ICUBLD_PKG)\unorm.icu": $(ICUSRCDATA_RELATIVE_PATH)\in\unorm.icu + "$(ICUPBIN)\icupkg" -tl $? $@ + +"$(ICUBLD_PKG)\nfc.nrm": $(ICUSRCDATA_RELATIVE_PATH)\in\nfc.nrm + "$(ICUPBIN)\icupkg" -tl $? $@ + +"$(ICUBLD_PKG)\nfkc.nrm": $(ICUSRCDATA_RELATIVE_PATH)\in\nfkc.nrm + "$(ICUPBIN)\icupkg" -tl $? $@ + +"$(ICUBLD_PKG)\nfkc_cf.nrm": $(ICUSRCDATA_RELATIVE_PATH)\in\nfkc_cf.nrm + "$(ICUPBIN)\icupkg" -tl $? $@ + # Stringprep .spp file generation. {$(ICUSRCDATA_RELATIVE_PATH)\$(ICUSPREP)}.txt.spp: @echo Creating $@ @@ -924,6 +942,6 @@ $(MISC_SOURCE) $(RB_FILES) $(CURR_FILES) $(LANG_FILES) $(REGION_FILES) $(ZONE_FI # This used to depend on "$(ICUBLD_PKG)\uprops.icu" "$(ICUBLD_PKG)\ucase.icu" "$(ICUBLD_PKG)\ubidi.icu" "$(ICUBLD_PKG)\unorm.icu" # This data is now hard coded as a part of the library. # See Jitterbug 4497 for details. -$(BRK_SOURCE) : "$(ICUBLD_PKG)\unames.icu" "$(ICUBLD_PKG)\pnames.icu" +$(BRK_SOURCE) : "$(ICUBLD_PKG)\unames.icu" "$(ICUBLD_PKG)\pnames.icu" "$(ICUBLD_PKG)\nfc.nrm" !ENDIF diff --git a/icu4c/source/data/unidata/norm2/nfc.txt b/icu4c/source/data/unidata/norm2/nfc.txt new file mode 100644 index 00000000000..99c398586d6 --- /dev/null +++ b/icu4c/source/data/unidata/norm2/nfc.txt @@ -0,0 +1,2319 @@ +# Copyright (C) 1999-2010, International Business Machines +# Corporation and others. All Rights Reserved. +# +# file name: nfc.txt +# +# machine-generated on: 2009-11-30 +# + +# Canonical_Combining_Class (ccc) values +0300..0314:230 +0315:232 +0316..0319:220 +031A:232 +031B:216 +031C..0320:220 +0321..0322:202 +0323..0326:220 +0327..0328:202 +0329..0333:220 +0334..0338:1 +0339..033C:220 +033D..0344:230 +0345:240 +0346:230 +0347..0349:220 +034A..034C:230 +034D..034E:220 +0350..0352:230 +0353..0356:220 +0357:230 +0358:232 +0359..035A:220 +035B:230 +035C:233 +035D..035E:234 +035F:233 +0360..0361:234 +0362:233 +0363..036F:230 +0483..0487:230 +0591:220 +0592..0595:230 +0596:220 +0597..0599:230 +059A:222 +059B:220 +059C..05A1:230 +05A2..05A7:220 +05A8..05A9:230 +05AA:220 +05AB..05AC:230 +05AD:222 +05AE:228 +05AF:230 +05B0:10 +05B1:11 +05B2:12 +05B3:13 +05B4:14 +05B5:15 +05B6:16 +05B7:17 +05B8:18 +05B9..05BA:19 +05BB:20 +05BC:21 +05BD:22 +05BF:23 +05C1:24 +05C2:25 +05C4:230 +05C5:220 +05C7:18 +0610..0617:230 +0618:30 +0619:31 +061A:32 +064B:27 +064C:28 +064D:29 +064E:30 +064F:31 +0650:32 +0651:33 +0652:34 +0653..0654:230 +0655..0656:220 +0657..065B:230 +065C:220 +065D..065E:230 +0670:35 +06D6..06DC:230 +06DF..06E2:230 +06E3:220 +06E4:230 +06E7..06E8:230 +06EA:220 +06EB..06EC:230 +06ED:220 +0711:36 +0730:230 +0731:220 +0732..0733:230 +0734:220 +0735..0736:230 +0737..0739:220 +073A:230 +073B..073C:220 +073D:230 +073E:220 +073F..0741:230 +0742:220 +0743:230 +0744:220 +0745:230 +0746:220 +0747:230 +0748:220 +0749..074A:230 +07EB..07F1:230 +07F2:220 +07F3:230 +0816..0819:230 +081B..0823:230 +0825..0827:230 +0829..082D:230 +093C:7 +094D:9 +0951:230 +0952:220 +0953..0954:230 +09BC:7 +09CD:9 +0A3C:7 +0A4D:9 +0ABC:7 +0ACD:9 +0B3C:7 +0B4D:9 +0BCD:9 +0C4D:9 +0C55:84 +0C56:91 +0CBC:7 +0CCD:9 +0D4D:9 +0DCA:9 +0E38..0E39:103 +0E3A:9 +0E48..0E4B:107 +0EB8..0EB9:118 +0EC8..0ECB:122 +0F18..0F19:220 +0F35:220 +0F37:220 +0F39:216 +0F71:129 +0F72:130 +0F74:132 +0F7A..0F7D:130 +0F80:130 +0F82..0F83:230 +0F84:9 +0F86..0F87:230 +0FC6:220 +1037:7 +1039..103A:9 +108D:220 +135F:230 +1714:9 +1734:9 +17D2:9 +17DD:230 +18A9:228 +1939:222 +193A:230 +193B:220 +1A17:230 +1A18:220 +1A60:9 +1A75..1A7C:230 +1A7F:220 +1B34:7 +1B44:9 +1B6B:230 +1B6C:220 +1B6D..1B73:230 +1BAA:9 +1C37:7 +1CD0..1CD2:230 +1CD4:1 +1CD5..1CD9:220 +1CDA..1CDB:230 +1CDC..1CDF:220 +1CE0:230 +1CE2..1CE8:1 +1CED:220 +1DC0..1DC1:230 +1DC2:220 +1DC3..1DC9:230 +1DCA:220 +1DCB..1DCC:230 +1DCD:234 +1DCE:214 +1DCF:220 +1DD0:202 +1DD1..1DE6:230 +1DFD:220 +1DFE:230 +1DFF:220 +20D0..20D1:230 +20D2..20D3:1 +20D4..20D7:230 +20D8..20DA:1 +20DB..20DC:230 +20E1:230 +20E5..20E6:1 +20E7:230 +20E8:220 +20E9:230 +20EA..20EB:1 +20EC..20EF:220 +20F0:230 +2CEF..2CF1:230 +2DE0..2DFF:230 +302A:218 +302B:228 +302C:232 +302D:222 +302E..302F:224 +3099..309A:8 +A66F:230 +A67C..A67D:230 +A6F0..A6F1:230 +A806:9 +A8C4:9 +A8E0..A8F1:230 +A92B..A92D:220 +A953:9 +A9B3:7 +A9C0:9 +AAB0:230 +AAB2..AAB3:230 +AAB4:220 +AAB7..AAB8:230 +AABE..AABF:230 +AAC1:230 +ABED:9 +FB1E:26 +FE20..FE26:230 +101FD:220 +10A0D:220 +10A0F:230 +10A38:230 +10A39:1 +10A3A:220 +10A3F:9 +110B9:9 +110BA:7 +1D165..1D166:216 +1D167..1D169:1 +1D16D:226 +1D16E..1D172:216 +1D17B..1D182:220 +1D185..1D189:230 +1D18A..1D18B:220 +1D1AA..1D1AD:230 +1D242..1D244:230 + +# Canonical decomposition mappings +00C0=0041 0300 +00C1=0041 0301 +00C2=0041 0302 +00C3=0041 0303 +00C4=0041 0308 +00C5=0041 030A +00C7=0043 0327 +00C8=0045 0300 +00C9=0045 0301 +00CA=0045 0302 +00CB=0045 0308 +00CC=0049 0300 +00CD=0049 0301 +00CE=0049 0302 +00CF=0049 0308 +00D1=004E 0303 +00D2=004F 0300 +00D3=004F 0301 +00D4=004F 0302 +00D5=004F 0303 +00D6=004F 0308 +00D9=0055 0300 +00DA=0055 0301 +00DB=0055 0302 +00DC=0055 0308 +00DD=0059 0301 +00E0=0061 0300 +00E1=0061 0301 +00E2=0061 0302 +00E3=0061 0303 +00E4=0061 0308 +00E5=0061 030A +00E7=0063 0327 +00E8=0065 0300 +00E9=0065 0301 +00EA=0065 0302 +00EB=0065 0308 +00EC=0069 0300 +00ED=0069 0301 +00EE=0069 0302 +00EF=0069 0308 +00F1=006E 0303 +00F2=006F 0300 +00F3=006F 0301 +00F4=006F 0302 +00F5=006F 0303 +00F6=006F 0308 +00F9=0075 0300 +00FA=0075 0301 +00FB=0075 0302 +00FC=0075 0308 +00FD=0079 0301 +00FF=0079 0308 +0100=0041 0304 +0101=0061 0304 +0102=0041 0306 +0103=0061 0306 +0104=0041 0328 +0105=0061 0328 +0106=0043 0301 +0107=0063 0301 +0108=0043 0302 +0109=0063 0302 +010A=0043 0307 +010B=0063 0307 +010C=0043 030C +010D=0063 030C +010E=0044 030C +010F=0064 030C +0112=0045 0304 +0113=0065 0304 +0114=0045 0306 +0115=0065 0306 +0116=0045 0307 +0117=0065 0307 +0118=0045 0328 +0119=0065 0328 +011A=0045 030C +011B=0065 030C +011C=0047 0302 +011D=0067 0302 +011E=0047 0306 +011F=0067 0306 +0120=0047 0307 +0121=0067 0307 +0122=0047 0327 +0123=0067 0327 +0124=0048 0302 +0125=0068 0302 +0128=0049 0303 +0129=0069 0303 +012A=0049 0304 +012B=0069 0304 +012C=0049 0306 +012D=0069 0306 +012E=0049 0328 +012F=0069 0328 +0130=0049 0307 +0134=004A 0302 +0135=006A 0302 +0136=004B 0327 +0137=006B 0327 +0139=004C 0301 +013A=006C 0301 +013B=004C 0327 +013C=006C 0327 +013D=004C 030C +013E=006C 030C +0143=004E 0301 +0144=006E 0301 +0145=004E 0327 +0146=006E 0327 +0147=004E 030C +0148=006E 030C +014C=004F 0304 +014D=006F 0304 +014E=004F 0306 +014F=006F 0306 +0150=004F 030B +0151=006F 030B +0154=0052 0301 +0155=0072 0301 +0156=0052 0327 +0157=0072 0327 +0158=0052 030C +0159=0072 030C +015A=0053 0301 +015B=0073 0301 +015C=0053 0302 +015D=0073 0302 +015E=0053 0327 +015F=0073 0327 +0160=0053 030C +0161=0073 030C +0162=0054 0327 +0163=0074 0327 +0164=0054 030C +0165=0074 030C +0168=0055 0303 +0169=0075 0303 +016A=0055 0304 +016B=0075 0304 +016C=0055 0306 +016D=0075 0306 +016E=0055 030A +016F=0075 030A +0170=0055 030B +0171=0075 030B +0172=0055 0328 +0173=0075 0328 +0174=0057 0302 +0175=0077 0302 +0176=0059 0302 +0177=0079 0302 +0178=0059 0308 +0179=005A 0301 +017A=007A 0301 +017B=005A 0307 +017C=007A 0307 +017D=005A 030C +017E=007A 030C +01A0=004F 031B +01A1=006F 031B +01AF=0055 031B +01B0=0075 031B +01CD=0041 030C +01CE=0061 030C +01CF=0049 030C +01D0=0069 030C +01D1=004F 030C +01D2=006F 030C +01D3=0055 030C +01D4=0075 030C +01D5=00DC 0304 +01D6=00FC 0304 +01D7=00DC 0301 +01D8=00FC 0301 +01D9=00DC 030C +01DA=00FC 030C +01DB=00DC 0300 +01DC=00FC 0300 +01DE=00C4 0304 +01DF=00E4 0304 +01E0=0226 0304 +01E1=0227 0304 +01E2=00C6 0304 +01E3=00E6 0304 +01E6=0047 030C +01E7=0067 030C +01E8=004B 030C +01E9=006B 030C +01EA=004F 0328 +01EB=006F 0328 +01EC=01EA 0304 +01ED=01EB 0304 +01EE=01B7 030C +01EF=0292 030C +01F0=006A 030C +01F4=0047 0301 +01F5=0067 0301 +01F8=004E 0300 +01F9=006E 0300 +01FA=00C5 0301 +01FB=00E5 0301 +01FC=00C6 0301 +01FD=00E6 0301 +01FE=00D8 0301 +01FF=00F8 0301 +0200=0041 030F +0201=0061 030F +0202=0041 0311 +0203=0061 0311 +0204=0045 030F +0205=0065 030F +0206=0045 0311 +0207=0065 0311 +0208=0049 030F +0209=0069 030F +020A=0049 0311 +020B=0069 0311 +020C=004F 030F +020D=006F 030F +020E=004F 0311 +020F=006F 0311 +0210=0052 030F +0211=0072 030F +0212=0052 0311 +0213=0072 0311 +0214=0055 030F +0215=0075 030F +0216=0055 0311 +0217=0075 0311 +0218=0053 0326 +0219=0073 0326 +021A=0054 0326 +021B=0074 0326 +021E=0048 030C +021F=0068 030C +0226=0041 0307 +0227=0061 0307 +0228=0045 0327 +0229=0065 0327 +022A=00D6 0304 +022B=00F6 0304 +022C=00D5 0304 +022D=00F5 0304 +022E=004F 0307 +022F=006F 0307 +0230=022E 0304 +0231=022F 0304 +0232=0059 0304 +0233=0079 0304 +0340>0300 +0341>0301 +0343>0313 +0344>0308 0301 +0374>02B9 +037E>003B +0385=00A8 0301 +0386=0391 0301 +0387>00B7 +0388=0395 0301 +0389=0397 0301 +038A=0399 0301 +038C=039F 0301 +038E=03A5 0301 +038F=03A9 0301 +0390=03CA 0301 +03AA=0399 0308 +03AB=03A5 0308 +03AC=03B1 0301 +03AD=03B5 0301 +03AE=03B7 0301 +03AF=03B9 0301 +03B0=03CB 0301 +03CA=03B9 0308 +03CB=03C5 0308 +03CC=03BF 0301 +03CD=03C5 0301 +03CE=03C9 0301 +03D3=03D2 0301 +03D4=03D2 0308 +0400=0415 0300 +0401=0415 0308 +0403=0413 0301 +0407=0406 0308 +040C=041A 0301 +040D=0418 0300 +040E=0423 0306 +0419=0418 0306 +0439=0438 0306 +0450=0435 0300 +0451=0435 0308 +0453=0433 0301 +0457=0456 0308 +045C=043A 0301 +045D=0438 0300 +045E=0443 0306 +0476=0474 030F +0477=0475 030F +04C1=0416 0306 +04C2=0436 0306 +04D0=0410 0306 +04D1=0430 0306 +04D2=0410 0308 +04D3=0430 0308 +04D6=0415 0306 +04D7=0435 0306 +04DA=04D8 0308 +04DB=04D9 0308 +04DC=0416 0308 +04DD=0436 0308 +04DE=0417 0308 +04DF=0437 0308 +04E2=0418 0304 +04E3=0438 0304 +04E4=0418 0308 +04E5=0438 0308 +04E6=041E 0308 +04E7=043E 0308 +04EA=04E8 0308 +04EB=04E9 0308 +04EC=042D 0308 +04ED=044D 0308 +04EE=0423 0304 +04EF=0443 0304 +04F0=0423 0308 +04F1=0443 0308 +04F2=0423 030B +04F3=0443 030B +04F4=0427 0308 +04F5=0447 0308 +04F8=042B 0308 +04F9=044B 0308 +0622=0627 0653 +0623=0627 0654 +0624=0648 0654 +0625=0627 0655 +0626=064A 0654 +06C0=06D5 0654 +06C2=06C1 0654 +06D3=06D2 0654 +0929=0928 093C +0931=0930 093C +0934=0933 093C +0958>0915 093C +0959>0916 093C +095A>0917 093C +095B>091C 093C +095C>0921 093C +095D>0922 093C +095E>092B 093C +095F>092F 093C +09CB=09C7 09BE +09CC=09C7 09D7 +09DC>09A1 09BC +09DD>09A2 09BC +09DF>09AF 09BC +0A33>0A32 0A3C +0A36>0A38 0A3C +0A59>0A16 0A3C +0A5A>0A17 0A3C +0A5B>0A1C 0A3C +0A5E>0A2B 0A3C +0B48=0B47 0B56 +0B4B=0B47 0B3E +0B4C=0B47 0B57 +0B5C>0B21 0B3C +0B5D>0B22 0B3C +0B94=0B92 0BD7 +0BCA=0BC6 0BBE +0BCB=0BC7 0BBE +0BCC=0BC6 0BD7 +0C48=0C46 0C56 +0CC0=0CBF 0CD5 +0CC7=0CC6 0CD5 +0CC8=0CC6 0CD6 +0CCA=0CC6 0CC2 +0CCB=0CCA 0CD5 +0D4A=0D46 0D3E +0D4B=0D47 0D3E +0D4C=0D46 0D57 +0DDA=0DD9 0DCA +0DDC=0DD9 0DCF +0DDD=0DDC 0DCA +0DDE=0DD9 0DDF +0F43>0F42 0FB7 +0F4D>0F4C 0FB7 +0F52>0F51 0FB7 +0F57>0F56 0FB7 +0F5C>0F5B 0FB7 +0F69>0F40 0FB5 +0F73>0F71 0F72 +0F75>0F71 0F74 +0F76>0FB2 0F80 +0F78>0FB3 0F80 +0F81>0F71 0F80 +0F93>0F92 0FB7 +0F9D>0F9C 0FB7 +0FA2>0FA1 0FB7 +0FA7>0FA6 0FB7 +0FAC>0FAB 0FB7 +0FB9>0F90 0FB5 +1026=1025 102E +1B06=1B05 1B35 +1B08=1B07 1B35 +1B0A=1B09 1B35 +1B0C=1B0B 1B35 +1B0E=1B0D 1B35 +1B12=1B11 1B35 +1B3B=1B3A 1B35 +1B3D=1B3C 1B35 +1B40=1B3E 1B35 +1B41=1B3F 1B35 +1B43=1B42 1B35 +1E00=0041 0325 +1E01=0061 0325 +1E02=0042 0307 +1E03=0062 0307 +1E04=0042 0323 +1E05=0062 0323 +1E06=0042 0331 +1E07=0062 0331 +1E08=00C7 0301 +1E09=00E7 0301 +1E0A=0044 0307 +1E0B=0064 0307 +1E0C=0044 0323 +1E0D=0064 0323 +1E0E=0044 0331 +1E0F=0064 0331 +1E10=0044 0327 +1E11=0064 0327 +1E12=0044 032D +1E13=0064 032D +1E14=0112 0300 +1E15=0113 0300 +1E16=0112 0301 +1E17=0113 0301 +1E18=0045 032D +1E19=0065 032D +1E1A=0045 0330 +1E1B=0065 0330 +1E1C=0228 0306 +1E1D=0229 0306 +1E1E=0046 0307 +1E1F=0066 0307 +1E20=0047 0304 +1E21=0067 0304 +1E22=0048 0307 +1E23=0068 0307 +1E24=0048 0323 +1E25=0068 0323 +1E26=0048 0308 +1E27=0068 0308 +1E28=0048 0327 +1E29=0068 0327 +1E2A=0048 032E +1E2B=0068 032E +1E2C=0049 0330 +1E2D=0069 0330 +1E2E=00CF 0301 +1E2F=00EF 0301 +1E30=004B 0301 +1E31=006B 0301 +1E32=004B 0323 +1E33=006B 0323 +1E34=004B 0331 +1E35=006B 0331 +1E36=004C 0323 +1E37=006C 0323 +1E38=1E36 0304 +1E39=1E37 0304 +1E3A=004C 0331 +1E3B=006C 0331 +1E3C=004C 032D +1E3D=006C 032D +1E3E=004D 0301 +1E3F=006D 0301 +1E40=004D 0307 +1E41=006D 0307 +1E42=004D 0323 +1E43=006D 0323 +1E44=004E 0307 +1E45=006E 0307 +1E46=004E 0323 +1E47=006E 0323 +1E48=004E 0331 +1E49=006E 0331 +1E4A=004E 032D +1E4B=006E 032D +1E4C=00D5 0301 +1E4D=00F5 0301 +1E4E=00D5 0308 +1E4F=00F5 0308 +1E50=014C 0300 +1E51=014D 0300 +1E52=014C 0301 +1E53=014D 0301 +1E54=0050 0301 +1E55=0070 0301 +1E56=0050 0307 +1E57=0070 0307 +1E58=0052 0307 +1E59=0072 0307 +1E5A=0052 0323 +1E5B=0072 0323 +1E5C=1E5A 0304 +1E5D=1E5B 0304 +1E5E=0052 0331 +1E5F=0072 0331 +1E60=0053 0307 +1E61=0073 0307 +1E62=0053 0323 +1E63=0073 0323 +1E64=015A 0307 +1E65=015B 0307 +1E66=0160 0307 +1E67=0161 0307 +1E68=1E62 0307 +1E69=1E63 0307 +1E6A=0054 0307 +1E6B=0074 0307 +1E6C=0054 0323 +1E6D=0074 0323 +1E6E=0054 0331 +1E6F=0074 0331 +1E70=0054 032D +1E71=0074 032D +1E72=0055 0324 +1E73=0075 0324 +1E74=0055 0330 +1E75=0075 0330 +1E76=0055 032D +1E77=0075 032D +1E78=0168 0301 +1E79=0169 0301 +1E7A=016A 0308 +1E7B=016B 0308 +1E7C=0056 0303 +1E7D=0076 0303 +1E7E=0056 0323 +1E7F=0076 0323 +1E80=0057 0300 +1E81=0077 0300 +1E82=0057 0301 +1E83=0077 0301 +1E84=0057 0308 +1E85=0077 0308 +1E86=0057 0307 +1E87=0077 0307 +1E88=0057 0323 +1E89=0077 0323 +1E8A=0058 0307 +1E8B=0078 0307 +1E8C=0058 0308 +1E8D=0078 0308 +1E8E=0059 0307 +1E8F=0079 0307 +1E90=005A 0302 +1E91=007A 0302 +1E92=005A 0323 +1E93=007A 0323 +1E94=005A 0331 +1E95=007A 0331 +1E96=0068 0331 +1E97=0074 0308 +1E98=0077 030A +1E99=0079 030A +1E9B=017F 0307 +1EA0=0041 0323 +1EA1=0061 0323 +1EA2=0041 0309 +1EA3=0061 0309 +1EA4=00C2 0301 +1EA5=00E2 0301 +1EA6=00C2 0300 +1EA7=00E2 0300 +1EA8=00C2 0309 +1EA9=00E2 0309 +1EAA=00C2 0303 +1EAB=00E2 0303 +1EAC=1EA0 0302 +1EAD=1EA1 0302 +1EAE=0102 0301 +1EAF=0103 0301 +1EB0=0102 0300 +1EB1=0103 0300 +1EB2=0102 0309 +1EB3=0103 0309 +1EB4=0102 0303 +1EB5=0103 0303 +1EB6=1EA0 0306 +1EB7=1EA1 0306 +1EB8=0045 0323 +1EB9=0065 0323 +1EBA=0045 0309 +1EBB=0065 0309 +1EBC=0045 0303 +1EBD=0065 0303 +1EBE=00CA 0301 +1EBF=00EA 0301 +1EC0=00CA 0300 +1EC1=00EA 0300 +1EC2=00CA 0309 +1EC3=00EA 0309 +1EC4=00CA 0303 +1EC5=00EA 0303 +1EC6=1EB8 0302 +1EC7=1EB9 0302 +1EC8=0049 0309 +1EC9=0069 0309 +1ECA=0049 0323 +1ECB=0069 0323 +1ECC=004F 0323 +1ECD=006F 0323 +1ECE=004F 0309 +1ECF=006F 0309 +1ED0=00D4 0301 +1ED1=00F4 0301 +1ED2=00D4 0300 +1ED3=00F4 0300 +1ED4=00D4 0309 +1ED5=00F4 0309 +1ED6=00D4 0303 +1ED7=00F4 0303 +1ED8=1ECC 0302 +1ED9=1ECD 0302 +1EDA=01A0 0301 +1EDB=01A1 0301 +1EDC=01A0 0300 +1EDD=01A1 0300 +1EDE=01A0 0309 +1EDF=01A1 0309 +1EE0=01A0 0303 +1EE1=01A1 0303 +1EE2=01A0 0323 +1EE3=01A1 0323 +1EE4=0055 0323 +1EE5=0075 0323 +1EE6=0055 0309 +1EE7=0075 0309 +1EE8=01AF 0301 +1EE9=01B0 0301 +1EEA=01AF 0300 +1EEB=01B0 0300 +1EEC=01AF 0309 +1EED=01B0 0309 +1EEE=01AF 0303 +1EEF=01B0 0303 +1EF0=01AF 0323 +1EF1=01B0 0323 +1EF2=0059 0300 +1EF3=0079 0300 +1EF4=0059 0323 +1EF5=0079 0323 +1EF6=0059 0309 +1EF7=0079 0309 +1EF8=0059 0303 +1EF9=0079 0303 +1F00=03B1 0313 +1F01=03B1 0314 +1F02=1F00 0300 +1F03=1F01 0300 +1F04=1F00 0301 +1F05=1F01 0301 +1F06=1F00 0342 +1F07=1F01 0342 +1F08=0391 0313 +1F09=0391 0314 +1F0A=1F08 0300 +1F0B=1F09 0300 +1F0C=1F08 0301 +1F0D=1F09 0301 +1F0E=1F08 0342 +1F0F=1F09 0342 +1F10=03B5 0313 +1F11=03B5 0314 +1F12=1F10 0300 +1F13=1F11 0300 +1F14=1F10 0301 +1F15=1F11 0301 +1F18=0395 0313 +1F19=0395 0314 +1F1A=1F18 0300 +1F1B=1F19 0300 +1F1C=1F18 0301 +1F1D=1F19 0301 +1F20=03B7 0313 +1F21=03B7 0314 +1F22=1F20 0300 +1F23=1F21 0300 +1F24=1F20 0301 +1F25=1F21 0301 +1F26=1F20 0342 +1F27=1F21 0342 +1F28=0397 0313 +1F29=0397 0314 +1F2A=1F28 0300 +1F2B=1F29 0300 +1F2C=1F28 0301 +1F2D=1F29 0301 +1F2E=1F28 0342 +1F2F=1F29 0342 +1F30=03B9 0313 +1F31=03B9 0314 +1F32=1F30 0300 +1F33=1F31 0300 +1F34=1F30 0301 +1F35=1F31 0301 +1F36=1F30 0342 +1F37=1F31 0342 +1F38=0399 0313 +1F39=0399 0314 +1F3A=1F38 0300 +1F3B=1F39 0300 +1F3C=1F38 0301 +1F3D=1F39 0301 +1F3E=1F38 0342 +1F3F=1F39 0342 +1F40=03BF 0313 +1F41=03BF 0314 +1F42=1F40 0300 +1F43=1F41 0300 +1F44=1F40 0301 +1F45=1F41 0301 +1F48=039F 0313 +1F49=039F 0314 +1F4A=1F48 0300 +1F4B=1F49 0300 +1F4C=1F48 0301 +1F4D=1F49 0301 +1F50=03C5 0313 +1F51=03C5 0314 +1F52=1F50 0300 +1F53=1F51 0300 +1F54=1F50 0301 +1F55=1F51 0301 +1F56=1F50 0342 +1F57=1F51 0342 +1F59=03A5 0314 +1F5B=1F59 0300 +1F5D=1F59 0301 +1F5F=1F59 0342 +1F60=03C9 0313 +1F61=03C9 0314 +1F62=1F60 0300 +1F63=1F61 0300 +1F64=1F60 0301 +1F65=1F61 0301 +1F66=1F60 0342 +1F67=1F61 0342 +1F68=03A9 0313 +1F69=03A9 0314 +1F6A=1F68 0300 +1F6B=1F69 0300 +1F6C=1F68 0301 +1F6D=1F69 0301 +1F6E=1F68 0342 +1F6F=1F69 0342 +1F70=03B1 0300 +1F71>03AC +1F72=03B5 0300 +1F73>03AD +1F74=03B7 0300 +1F75>03AE +1F76=03B9 0300 +1F77>03AF +1F78=03BF 0300 +1F79>03CC +1F7A=03C5 0300 +1F7B>03CD +1F7C=03C9 0300 +1F7D>03CE +1F80=1F00 0345 +1F81=1F01 0345 +1F82=1F02 0345 +1F83=1F03 0345 +1F84=1F04 0345 +1F85=1F05 0345 +1F86=1F06 0345 +1F87=1F07 0345 +1F88=1F08 0345 +1F89=1F09 0345 +1F8A=1F0A 0345 +1F8B=1F0B 0345 +1F8C=1F0C 0345 +1F8D=1F0D 0345 +1F8E=1F0E 0345 +1F8F=1F0F 0345 +1F90=1F20 0345 +1F91=1F21 0345 +1F92=1F22 0345 +1F93=1F23 0345 +1F94=1F24 0345 +1F95=1F25 0345 +1F96=1F26 0345 +1F97=1F27 0345 +1F98=1F28 0345 +1F99=1F29 0345 +1F9A=1F2A 0345 +1F9B=1F2B 0345 +1F9C=1F2C 0345 +1F9D=1F2D 0345 +1F9E=1F2E 0345 +1F9F=1F2F 0345 +1FA0=1F60 0345 +1FA1=1F61 0345 +1FA2=1F62 0345 +1FA3=1F63 0345 +1FA4=1F64 0345 +1FA5=1F65 0345 +1FA6=1F66 0345 +1FA7=1F67 0345 +1FA8=1F68 0345 +1FA9=1F69 0345 +1FAA=1F6A 0345 +1FAB=1F6B 0345 +1FAC=1F6C 0345 +1FAD=1F6D 0345 +1FAE=1F6E 0345 +1FAF=1F6F 0345 +1FB0=03B1 0306 +1FB1=03B1 0304 +1FB2=1F70 0345 +1FB3=03B1 0345 +1FB4=03AC 0345 +1FB6=03B1 0342 +1FB7=1FB6 0345 +1FB8=0391 0306 +1FB9=0391 0304 +1FBA=0391 0300 +1FBB>0386 +1FBC=0391 0345 +1FBE>03B9 +1FC1=00A8 0342 +1FC2=1F74 0345 +1FC3=03B7 0345 +1FC4=03AE 0345 +1FC6=03B7 0342 +1FC7=1FC6 0345 +1FC8=0395 0300 +1FC9>0388 +1FCA=0397 0300 +1FCB>0389 +1FCC=0397 0345 +1FCD=1FBF 0300 +1FCE=1FBF 0301 +1FCF=1FBF 0342 +1FD0=03B9 0306 +1FD1=03B9 0304 +1FD2=03CA 0300 +1FD3>0390 +1FD6=03B9 0342 +1FD7=03CA 0342 +1FD8=0399 0306 +1FD9=0399 0304 +1FDA=0399 0300 +1FDB>038A +1FDD=1FFE 0300 +1FDE=1FFE 0301 +1FDF=1FFE 0342 +1FE0=03C5 0306 +1FE1=03C5 0304 +1FE2=03CB 0300 +1FE3>03B0 +1FE4=03C1 0313 +1FE5=03C1 0314 +1FE6=03C5 0342 +1FE7=03CB 0342 +1FE8=03A5 0306 +1FE9=03A5 0304 +1FEA=03A5 0300 +1FEB>038E +1FEC=03A1 0314 +1FED=00A8 0300 +1FEE>0385 +1FEF>0060 +1FF2=1F7C 0345 +1FF3=03C9 0345 +1FF4=03CE 0345 +1FF6=03C9 0342 +1FF7=1FF6 0345 +1FF8=039F 0300 +1FF9>038C +1FFA=03A9 0300 +1FFB>038F +1FFC=03A9 0345 +1FFD>00B4 +2000>2002 +2001>2003 +2126>03A9 +212A>004B +212B>00C5 +219A=2190 0338 +219B=2192 0338 +21AE=2194 0338 +21CD=21D0 0338 +21CE=21D4 0338 +21CF=21D2 0338 +2204=2203 0338 +2209=2208 0338 +220C=220B 0338 +2224=2223 0338 +2226=2225 0338 +2241=223C 0338 +2244=2243 0338 +2247=2245 0338 +2249=2248 0338 +2260=003D 0338 +2262=2261 0338 +226D=224D 0338 +226E=003C 0338 +226F=003E 0338 +2270=2264 0338 +2271=2265 0338 +2274=2272 0338 +2275=2273 0338 +2278=2276 0338 +2279=2277 0338 +2280=227A 0338 +2281=227B 0338 +2284=2282 0338 +2285=2283 0338 +2288=2286 0338 +2289=2287 0338 +22AC=22A2 0338 +22AD=22A8 0338 +22AE=22A9 0338 +22AF=22AB 0338 +22E0=227C 0338 +22E1=227D 0338 +22E2=2291 0338 +22E3=2292 0338 +22EA=22B2 0338 +22EB=22B3 0338 +22EC=22B4 0338 +22ED=22B5 0338 +2329>3008 +232A>3009 +2ADC>2ADD 0338 +304C=304B 3099 +304E=304D 3099 +3050=304F 3099 +3052=3051 3099 +3054=3053 3099 +3056=3055 3099 +3058=3057 3099 +305A=3059 3099 +305C=305B 3099 +305E=305D 3099 +3060=305F 3099 +3062=3061 3099 +3065=3064 3099 +3067=3066 3099 +3069=3068 3099 +3070=306F 3099 +3071=306F 309A +3073=3072 3099 +3074=3072 309A +3076=3075 3099 +3077=3075 309A +3079=3078 3099 +307A=3078 309A +307C=307B 3099 +307D=307B 309A +3094=3046 3099 +309E=309D 3099 +30AC=30AB 3099 +30AE=30AD 3099 +30B0=30AF 3099 +30B2=30B1 3099 +30B4=30B3 3099 +30B6=30B5 3099 +30B8=30B7 3099 +30BA=30B9 3099 +30BC=30BB 3099 +30BE=30BD 3099 +30C0=30BF 3099 +30C2=30C1 3099 +30C5=30C4 3099 +30C7=30C6 3099 +30C9=30C8 3099 +30D0=30CF 3099 +30D1=30CF 309A +30D3=30D2 3099 +30D4=30D2 309A +30D6=30D5 3099 +30D7=30D5 309A +30D9=30D8 3099 +30DA=30D8 309A +30DC=30DB 3099 +30DD=30DB 309A +30F4=30A6 3099 +30F7=30EF 3099 +30F8=30F0 3099 +30F9=30F1 3099 +30FA=30F2 3099 +30FE=30FD 3099 +F900>8C48 +F901>66F4 +F902>8ECA +F903>8CC8 +F904>6ED1 +F905>4E32 +F906>53E5 +F907>9F9C +F908>9F9C +F909>5951 +F90A>91D1 +F90B>5587 +F90C>5948 +F90D>61F6 +F90E>7669 +F90F>7F85 +F910>863F +F911>87BA +F912>88F8 +F913>908F +F914>6A02 +F915>6D1B +F916>70D9 +F917>73DE +F918>843D +F919>916A +F91A>99F1 +F91B>4E82 +F91C>5375 +F91D>6B04 +F91E>721B +F91F>862D +F920>9E1E +F921>5D50 +F922>6FEB +F923>85CD +F924>8964 +F925>62C9 +F926>81D8 +F927>881F +F928>5ECA +F929>6717 +F92A>6D6A +F92B>72FC +F92C>90CE +F92D>4F86 +F92E>51B7 +F92F>52DE +F930>64C4 +F931>6AD3 +F932>7210 +F933>76E7 +F934>8001 +F935>8606 +F936>865C +F937>8DEF +F938>9732 +F939>9B6F +F93A>9DFA +F93B>788C +F93C>797F +F93D>7DA0 +F93E>83C9 +F93F>9304 +F940>9E7F +F941>8AD6 +F942>58DF +F943>5F04 +F944>7C60 +F945>807E +F946>7262 +F947>78CA +F948>8CC2 +F949>96F7 +F94A>58D8 +F94B>5C62 +F94C>6A13 +F94D>6DDA +F94E>6F0F +F94F>7D2F +F950>7E37 +F951>964B +F952>52D2 +F953>808B +F954>51DC +F955>51CC +F956>7A1C +F957>7DBE +F958>83F1 +F959>9675 +F95A>8B80 +F95B>62CF +F95C>6A02 +F95D>8AFE +F95E>4E39 +F95F>5BE7 +F960>6012 +F961>7387 +F962>7570 +F963>5317 +F964>78FB +F965>4FBF +F966>5FA9 +F967>4E0D +F968>6CCC +F969>6578 +F96A>7D22 +F96B>53C3 +F96C>585E +F96D>7701 +F96E>8449 +F96F>8AAA +F970>6BBA +F971>8FB0 +F972>6C88 +F973>62FE +F974>82E5 +F975>63A0 +F976>7565 +F977>4EAE +F978>5169 +F979>51C9 +F97A>6881 +F97B>7CE7 +F97C>826F +F97D>8AD2 +F97E>91CF +F97F>52F5 +F980>5442 +F981>5973 +F982>5EEC +F983>65C5 +F984>6FFE +F985>792A +F986>95AD +F987>9A6A +F988>9E97 +F989>9ECE +F98A>529B +F98B>66C6 +F98C>6B77 +F98D>8F62 +F98E>5E74 +F98F>6190 +F990>6200 +F991>649A +F992>6F23 +F993>7149 +F994>7489 +F995>79CA +F996>7DF4 +F997>806F +F998>8F26 +F999>84EE +F99A>9023 +F99B>934A +F99C>5217 +F99D>52A3 +F99E>54BD +F99F>70C8 +F9A0>88C2 +F9A1>8AAA +F9A2>5EC9 +F9A3>5FF5 +F9A4>637B +F9A5>6BAE +F9A6>7C3E +F9A7>7375 +F9A8>4EE4 +F9A9>56F9 +F9AA>5BE7 +F9AB>5DBA +F9AC>601C +F9AD>73B2 +F9AE>7469 +F9AF>7F9A +F9B0>8046 +F9B1>9234 +F9B2>96F6 +F9B3>9748 +F9B4>9818 +F9B5>4F8B +F9B6>79AE +F9B7>91B4 +F9B8>96B8 +F9B9>60E1 +F9BA>4E86 +F9BB>50DA +F9BC>5BEE +F9BD>5C3F +F9BE>6599 +F9BF>6A02 +F9C0>71CE +F9C1>7642 +F9C2>84FC +F9C3>907C +F9C4>9F8D +F9C5>6688 +F9C6>962E +F9C7>5289 +F9C8>677B +F9C9>67F3 +F9CA>6D41 +F9CB>6E9C +F9CC>7409 +F9CD>7559 +F9CE>786B +F9CF>7D10 +F9D0>985E +F9D1>516D +F9D2>622E +F9D3>9678 +F9D4>502B +F9D5>5D19 +F9D6>6DEA +F9D7>8F2A +F9D8>5F8B +F9D9>6144 +F9DA>6817 +F9DB>7387 +F9DC>9686 +F9DD>5229 +F9DE>540F +F9DF>5C65 +F9E0>6613 +F9E1>674E +F9E2>68A8 +F9E3>6CE5 +F9E4>7406 +F9E5>75E2 +F9E6>7F79 +F9E7>88CF +F9E8>88E1 +F9E9>91CC +F9EA>96E2 +F9EB>533F +F9EC>6EBA +F9ED>541D +F9EE>71D0 +F9EF>7498 +F9F0>85FA +F9F1>96A3 +F9F2>9C57 +F9F3>9E9F +F9F4>6797 +F9F5>6DCB +F9F6>81E8 +F9F7>7ACB +F9F8>7B20 +F9F9>7C92 +F9FA>72C0 +F9FB>7099 +F9FC>8B58 +F9FD>4EC0 +F9FE>8336 +F9FF>523A +FA00>5207 +FA01>5EA6 +FA02>62D3 +FA03>7CD6 +FA04>5B85 +FA05>6D1E +FA06>66B4 +FA07>8F3B +FA08>884C +FA09>964D +FA0A>898B +FA0B>5ED3 +FA0C>5140 +FA0D>55C0 +FA10>585A +FA12>6674 +FA15>51DE +FA16>732A +FA17>76CA +FA18>793C +FA19>795E +FA1A>7965 +FA1B>798F +FA1C>9756 +FA1D>7CBE +FA1E>7FBD +FA20>8612 +FA22>8AF8 +FA25>9038 +FA26>90FD +FA2A>98EF +FA2B>98FC +FA2C>9928 +FA2D>9DB4 +FA30>4FAE +FA31>50E7 +FA32>514D +FA33>52C9 +FA34>52E4 +FA35>5351 +FA36>559D +FA37>5606 +FA38>5668 +FA39>5840 +FA3A>58A8 +FA3B>5C64 +FA3C>5C6E +FA3D>6094 +FA3E>6168 +FA3F>618E +FA40>61F2 +FA41>654F +FA42>65E2 +FA43>6691 +FA44>6885 +FA45>6D77 +FA46>6E1A +FA47>6F22 +FA48>716E +FA49>722B +FA4A>7422 +FA4B>7891 +FA4C>793E +FA4D>7949 +FA4E>7948 +FA4F>7950 +FA50>7956 +FA51>795D +FA52>798D +FA53>798E +FA54>7A40 +FA55>7A81 +FA56>7BC0 +FA57>7DF4 +FA58>7E09 +FA59>7E41 +FA5A>7F72 +FA5B>8005 +FA5C>81ED +FA5D>8279 +FA5E>8279 +FA5F>8457 +FA60>8910 +FA61>8996 +FA62>8B01 +FA63>8B39 +FA64>8CD3 +FA65>8D08 +FA66>8FB6 +FA67>9038 +FA68>96E3 +FA69>97FF +FA6A>983B +FA6B>6075 +FA6C>242EE +FA6D>8218 +FA70>4E26 +FA71>51B5 +FA72>5168 +FA73>4F80 +FA74>5145 +FA75>5180 +FA76>52C7 +FA77>52FA +FA78>559D +FA79>5555 +FA7A>5599 +FA7B>55E2 +FA7C>585A +FA7D>58B3 +FA7E>5944 +FA7F>5954 +FA80>5A62 +FA81>5B28 +FA82>5ED2 +FA83>5ED9 +FA84>5F69 +FA85>5FAD +FA86>60D8 +FA87>614E +FA88>6108 +FA89>618E +FA8A>6160 +FA8B>61F2 +FA8C>6234 +FA8D>63C4 +FA8E>641C +FA8F>6452 +FA90>6556 +FA91>6674 +FA92>6717 +FA93>671B +FA94>6756 +FA95>6B79 +FA96>6BBA +FA97>6D41 +FA98>6EDB +FA99>6ECB +FA9A>6F22 +FA9B>701E +FA9C>716E +FA9D>77A7 +FA9E>7235 +FA9F>72AF +FAA0>732A +FAA1>7471 +FAA2>7506 +FAA3>753B +FAA4>761D +FAA5>761F +FAA6>76CA +FAA7>76DB +FAA8>76F4 +FAA9>774A +FAAA>7740 +FAAB>78CC +FAAC>7AB1 +FAAD>7BC0 +FAAE>7C7B +FAAF>7D5B +FAB0>7DF4 +FAB1>7F3E +FAB2>8005 +FAB3>8352 +FAB4>83EF +FAB5>8779 +FAB6>8941 +FAB7>8986 +FAB8>8996 +FAB9>8ABF +FABA>8AF8 +FABB>8ACB +FABC>8B01 +FABD>8AFE +FABE>8AED +FABF>8B39 +FAC0>8B8A +FAC1>8D08 +FAC2>8F38 +FAC3>9072 +FAC4>9199 +FAC5>9276 +FAC6>967C +FAC7>96E3 +FAC8>9756 +FAC9>97DB +FACA>97FF +FACB>980B +FACC>983B +FACD>9B12 +FACE>9F9C +FACF>2284A +FAD0>22844 +FAD1>233D5 +FAD2>3B9D +FAD3>4018 +FAD4>4039 +FAD5>25249 +FAD6>25CD0 +FAD7>27ED3 +FAD8>9F43 +FAD9>9F8E +FB1D>05D9 05B4 +FB1F>05F2 05B7 +FB2A>05E9 05C1 +FB2B>05E9 05C2 +FB2C>FB49 05C1 +FB2D>FB49 05C2 +FB2E>05D0 05B7 +FB2F>05D0 05B8 +FB30>05D0 05BC +FB31>05D1 05BC +FB32>05D2 05BC +FB33>05D3 05BC +FB34>05D4 05BC +FB35>05D5 05BC +FB36>05D6 05BC +FB38>05D8 05BC +FB39>05D9 05BC +FB3A>05DA 05BC +FB3B>05DB 05BC +FB3C>05DC 05BC +FB3E>05DE 05BC +FB40>05E0 05BC +FB41>05E1 05BC +FB43>05E3 05BC +FB44>05E4 05BC +FB46>05E6 05BC +FB47>05E7 05BC +FB48>05E8 05BC +FB49>05E9 05BC +FB4A>05EA 05BC +FB4B>05D5 05B9 +FB4C>05D1 05BF +FB4D>05DB 05BF +FB4E>05E4 05BF +1109A=11099 110BA +1109C=1109B 110BA +110AB=110A5 110BA +1D15E>1D157 1D165 +1D15F>1D158 1D165 +1D160>1D15F 1D16E +1D161>1D15F 1D16F +1D162>1D15F 1D170 +1D163>1D15F 1D171 +1D164>1D15F 1D172 +1D1BB>1D1B9 1D165 +1D1BC>1D1BA 1D165 +1D1BD>1D1BB 1D16E +1D1BE>1D1BC 1D16E +1D1BF>1D1BB 1D16F +1D1C0>1D1BC 1D16F +2F800>4E3D +2F801>4E38 +2F802>4E41 +2F803>20122 +2F804>4F60 +2F805>4FAE +2F806>4FBB +2F807>5002 +2F808>507A +2F809>5099 +2F80A>50E7 +2F80B>50CF +2F80C>349E +2F80D>2063A +2F80E>514D +2F80F>5154 +2F810>5164 +2F811>5177 +2F812>2051C +2F813>34B9 +2F814>5167 +2F815>518D +2F816>2054B +2F817>5197 +2F818>51A4 +2F819>4ECC +2F81A>51AC +2F81B>51B5 +2F81C>291DF +2F81D>51F5 +2F81E>5203 +2F81F>34DF +2F820>523B +2F821>5246 +2F822>5272 +2F823>5277 +2F824>3515 +2F825>52C7 +2F826>52C9 +2F827>52E4 +2F828>52FA +2F829>5305 +2F82A>5306 +2F82B>5317 +2F82C>5349 +2F82D>5351 +2F82E>535A +2F82F>5373 +2F830>537D +2F831>537F +2F832>537F +2F833>537F +2F834>20A2C +2F835>7070 +2F836>53CA +2F837>53DF +2F838>20B63 +2F839>53EB +2F83A>53F1 +2F83B>5406 +2F83C>549E +2F83D>5438 +2F83E>5448 +2F83F>5468 +2F840>54A2 +2F841>54F6 +2F842>5510 +2F843>5553 +2F844>5563 +2F845>5584 +2F846>5584 +2F847>5599 +2F848>55AB +2F849>55B3 +2F84A>55C2 +2F84B>5716 +2F84C>5606 +2F84D>5717 +2F84E>5651 +2F84F>5674 +2F850>5207 +2F851>58EE +2F852>57CE +2F853>57F4 +2F854>580D +2F855>578B +2F856>5832 +2F857>5831 +2F858>58AC +2F859>214E4 +2F85A>58F2 +2F85B>58F7 +2F85C>5906 +2F85D>591A +2F85E>5922 +2F85F>5962 +2F860>216A8 +2F861>216EA +2F862>59EC +2F863>5A1B +2F864>5A27 +2F865>59D8 +2F866>5A66 +2F867>36EE +2F868>36FC +2F869>5B08 +2F86A>5B3E +2F86B>5B3E +2F86C>219C8 +2F86D>5BC3 +2F86E>5BD8 +2F86F>5BE7 +2F870>5BF3 +2F871>21B18 +2F872>5BFF +2F873>5C06 +2F874>5F53 +2F875>5C22 +2F876>3781 +2F877>5C60 +2F878>5C6E +2F879>5CC0 +2F87A>5C8D +2F87B>21DE4 +2F87C>5D43 +2F87D>21DE6 +2F87E>5D6E +2F87F>5D6B +2F880>5D7C +2F881>5DE1 +2F882>5DE2 +2F883>382F +2F884>5DFD +2F885>5E28 +2F886>5E3D +2F887>5E69 +2F888>3862 +2F889>22183 +2F88A>387C +2F88B>5EB0 +2F88C>5EB3 +2F88D>5EB6 +2F88E>5ECA +2F88F>2A392 +2F890>5EFE +2F891>22331 +2F892>22331 +2F893>8201 +2F894>5F22 +2F895>5F22 +2F896>38C7 +2F897>232B8 +2F898>261DA +2F899>5F62 +2F89A>5F6B +2F89B>38E3 +2F89C>5F9A +2F89D>5FCD +2F89E>5FD7 +2F89F>5FF9 +2F8A0>6081 +2F8A1>393A +2F8A2>391C +2F8A3>6094 +2F8A4>226D4 +2F8A5>60C7 +2F8A6>6148 +2F8A7>614C +2F8A8>614E +2F8A9>614C +2F8AA>617A +2F8AB>618E +2F8AC>61B2 +2F8AD>61A4 +2F8AE>61AF +2F8AF>61DE +2F8B0>61F2 +2F8B1>61F6 +2F8B2>6210 +2F8B3>621B +2F8B4>625D +2F8B5>62B1 +2F8B6>62D4 +2F8B7>6350 +2F8B8>22B0C +2F8B9>633D +2F8BA>62FC +2F8BB>6368 +2F8BC>6383 +2F8BD>63E4 +2F8BE>22BF1 +2F8BF>6422 +2F8C0>63C5 +2F8C1>63A9 +2F8C2>3A2E +2F8C3>6469 +2F8C4>647E +2F8C5>649D +2F8C6>6477 +2F8C7>3A6C +2F8C8>654F +2F8C9>656C +2F8CA>2300A +2F8CB>65E3 +2F8CC>66F8 +2F8CD>6649 +2F8CE>3B19 +2F8CF>6691 +2F8D0>3B08 +2F8D1>3AE4 +2F8D2>5192 +2F8D3>5195 +2F8D4>6700 +2F8D5>669C +2F8D6>80AD +2F8D7>43D9 +2F8D8>6717 +2F8D9>671B +2F8DA>6721 +2F8DB>675E +2F8DC>6753 +2F8DD>233C3 +2F8DE>3B49 +2F8DF>67FA +2F8E0>6785 +2F8E1>6852 +2F8E2>6885 +2F8E3>2346D +2F8E4>688E +2F8E5>681F +2F8E6>6914 +2F8E7>3B9D +2F8E8>6942 +2F8E9>69A3 +2F8EA>69EA +2F8EB>6AA8 +2F8EC>236A3 +2F8ED>6ADB +2F8EE>3C18 +2F8EF>6B21 +2F8F0>238A7 +2F8F1>6B54 +2F8F2>3C4E +2F8F3>6B72 +2F8F4>6B9F +2F8F5>6BBA +2F8F6>6BBB +2F8F7>23A8D +2F8F8>21D0B +2F8F9>23AFA +2F8FA>6C4E +2F8FB>23CBC +2F8FC>6CBF +2F8FD>6CCD +2F8FE>6C67 +2F8FF>6D16 +2F900>6D3E +2F901>6D77 +2F902>6D41 +2F903>6D69 +2F904>6D78 +2F905>6D85 +2F906>23D1E +2F907>6D34 +2F908>6E2F +2F909>6E6E +2F90A>3D33 +2F90B>6ECB +2F90C>6EC7 +2F90D>23ED1 +2F90E>6DF9 +2F90F>6F6E +2F910>23F5E +2F911>23F8E +2F912>6FC6 +2F913>7039 +2F914>701E +2F915>701B +2F916>3D96 +2F917>704A +2F918>707D +2F919>7077 +2F91A>70AD +2F91B>20525 +2F91C>7145 +2F91D>24263 +2F91E>719C +2F91F>243AB +2F920>7228 +2F921>7235 +2F922>7250 +2F923>24608 +2F924>7280 +2F925>7295 +2F926>24735 +2F927>24814 +2F928>737A +2F929>738B +2F92A>3EAC +2F92B>73A5 +2F92C>3EB8 +2F92D>3EB8 +2F92E>7447 +2F92F>745C +2F930>7471 +2F931>7485 +2F932>74CA +2F933>3F1B +2F934>7524 +2F935>24C36 +2F936>753E +2F937>24C92 +2F938>7570 +2F939>2219F +2F93A>7610 +2F93B>24FA1 +2F93C>24FB8 +2F93D>25044 +2F93E>3FFC +2F93F>4008 +2F940>76F4 +2F941>250F3 +2F942>250F2 +2F943>25119 +2F944>25133 +2F945>771E +2F946>771F +2F947>771F +2F948>774A +2F949>4039 +2F94A>778B +2F94B>4046 +2F94C>4096 +2F94D>2541D +2F94E>784E +2F94F>788C +2F950>78CC +2F951>40E3 +2F952>25626 +2F953>7956 +2F954>2569A +2F955>256C5 +2F956>798F +2F957>79EB +2F958>412F +2F959>7A40 +2F95A>7A4A +2F95B>7A4F +2F95C>2597C +2F95D>25AA7 +2F95E>25AA7 +2F95F>7AEE +2F960>4202 +2F961>25BAB +2F962>7BC6 +2F963>7BC9 +2F964>4227 +2F965>25C80 +2F966>7CD2 +2F967>42A0 +2F968>7CE8 +2F969>7CE3 +2F96A>7D00 +2F96B>25F86 +2F96C>7D63 +2F96D>4301 +2F96E>7DC7 +2F96F>7E02 +2F970>7E45 +2F971>4334 +2F972>26228 +2F973>26247 +2F974>4359 +2F975>262D9 +2F976>7F7A +2F977>2633E +2F978>7F95 +2F979>7FFA +2F97A>8005 +2F97B>264DA +2F97C>26523 +2F97D>8060 +2F97E>265A8 +2F97F>8070 +2F980>2335F +2F981>43D5 +2F982>80B2 +2F983>8103 +2F984>440B +2F985>813E +2F986>5AB5 +2F987>267A7 +2F988>267B5 +2F989>23393 +2F98A>2339C +2F98B>8201 +2F98C>8204 +2F98D>8F9E +2F98E>446B +2F98F>8291 +2F990>828B +2F991>829D +2F992>52B3 +2F993>82B1 +2F994>82B3 +2F995>82BD +2F996>82E6 +2F997>26B3C +2F998>82E5 +2F999>831D +2F99A>8363 +2F99B>83AD +2F99C>8323 +2F99D>83BD +2F99E>83E7 +2F99F>8457 +2F9A0>8353 +2F9A1>83CA +2F9A2>83CC +2F9A3>83DC +2F9A4>26C36 +2F9A5>26D6B +2F9A6>26CD5 +2F9A7>452B +2F9A8>84F1 +2F9A9>84F3 +2F9AA>8516 +2F9AB>273CA +2F9AC>8564 +2F9AD>26F2C +2F9AE>455D +2F9AF>4561 +2F9B0>26FB1 +2F9B1>270D2 +2F9B2>456B +2F9B3>8650 +2F9B4>865C +2F9B5>8667 +2F9B6>8669 +2F9B7>86A9 +2F9B8>8688 +2F9B9>870E +2F9BA>86E2 +2F9BB>8779 +2F9BC>8728 +2F9BD>876B +2F9BE>8786 +2F9BF>45D7 +2F9C0>87E1 +2F9C1>8801 +2F9C2>45F9 +2F9C3>8860 +2F9C4>8863 +2F9C5>27667 +2F9C6>88D7 +2F9C7>88DE +2F9C8>4635 +2F9C9>88FA +2F9CA>34BB +2F9CB>278AE +2F9CC>27966 +2F9CD>46BE +2F9CE>46C7 +2F9CF>8AA0 +2F9D0>8AED +2F9D1>8B8A +2F9D2>8C55 +2F9D3>27CA8 +2F9D4>8CAB +2F9D5>8CC1 +2F9D6>8D1B +2F9D7>8D77 +2F9D8>27F2F +2F9D9>20804 +2F9DA>8DCB +2F9DB>8DBC +2F9DC>8DF0 +2F9DD>208DE +2F9DE>8ED4 +2F9DF>8F38 +2F9E0>285D2 +2F9E1>285ED +2F9E2>9094 +2F9E3>90F1 +2F9E4>9111 +2F9E5>2872E +2F9E6>911B +2F9E7>9238 +2F9E8>92D7 +2F9E9>92D8 +2F9EA>927C +2F9EB>93F9 +2F9EC>9415 +2F9ED>28BFA +2F9EE>958B +2F9EF>4995 +2F9F0>95B7 +2F9F1>28D77 +2F9F2>49E6 +2F9F3>96C3 +2F9F4>5DB2 +2F9F5>9723 +2F9F6>29145 +2F9F7>2921A +2F9F8>4A6E +2F9F9>4A76 +2F9FA>97E0 +2F9FB>2940A +2F9FC>4AB2 +2F9FD>29496 +2F9FE>980B +2F9FF>980B +2FA00>9829 +2FA01>295B6 +2FA02>98E2 +2FA03>4B33 +2FA04>9929 +2FA05>99A7 +2FA06>99C2 +2FA07>99FE +2FA08>4BCE +2FA09>29B30 +2FA0A>9B12 +2FA0B>9C40 +2FA0C>9CFD +2FA0D>4CCE +2FA0E>4CED +2FA0F>9D67 +2FA10>2A0CE +2FA11>4CF8 +2FA12>2A105 +2FA13>2A20E +2FA14>2A291 +2FA15>9EBB +2FA16>4D56 +2FA17>9EF9 +2FA18>9EFE +2FA19>9F05 +2FA1A>9F0F +2FA1B>9F16 +2FA1C>9F3B +2FA1D>2A600 diff --git a/icu4c/source/data/unidata/norm2/nfkc.txt b/icu4c/source/data/unidata/norm2/nfkc.txt new file mode 100644 index 00000000000..08aaf353f9b --- /dev/null +++ b/icu4c/source/data/unidata/norm2/nfkc.txt @@ -0,0 +1,5786 @@ +# Copyright (C) 1999-2010, International Business Machines +# Corporation and others. All Rights Reserved. +# +# file name: nfkc.txt +# +# machine-generated on: 2009-11-30 +# + +# Canonical_Combining_Class (ccc) values +0300..0314:230 +0315:232 +0316..0319:220 +031A:232 +031B:216 +031C..0320:220 +0321..0322:202 +0323..0326:220 +0327..0328:202 +0329..0333:220 +0334..0338:1 +0339..033C:220 +033D..0344:230 +0345:240 +0346:230 +0347..0349:220 +034A..034C:230 +034D..034E:220 +0350..0352:230 +0353..0356:220 +0357:230 +0358:232 +0359..035A:220 +035B:230 +035C:233 +035D..035E:234 +035F:233 +0360..0361:234 +0362:233 +0363..036F:230 +0483..0487:230 +0591:220 +0592..0595:230 +0596:220 +0597..0599:230 +059A:222 +059B:220 +059C..05A1:230 +05A2..05A7:220 +05A8..05A9:230 +05AA:220 +05AB..05AC:230 +05AD:222 +05AE:228 +05AF:230 +05B0:10 +05B1:11 +05B2:12 +05B3:13 +05B4:14 +05B5:15 +05B6:16 +05B7:17 +05B8:18 +05B9..05BA:19 +05BB:20 +05BC:21 +05BD:22 +05BF:23 +05C1:24 +05C2:25 +05C4:230 +05C5:220 +05C7:18 +0610..0617:230 +0618:30 +0619:31 +061A:32 +064B:27 +064C:28 +064D:29 +064E:30 +064F:31 +0650:32 +0651:33 +0652:34 +0653..0654:230 +0655..0656:220 +0657..065B:230 +065C:220 +065D..065E:230 +0670:35 +06D6..06DC:230 +06DF..06E2:230 +06E3:220 +06E4:230 +06E7..06E8:230 +06EA:220 +06EB..06EC:230 +06ED:220 +0711:36 +0730:230 +0731:220 +0732..0733:230 +0734:220 +0735..0736:230 +0737..0739:220 +073A:230 +073B..073C:220 +073D:230 +073E:220 +073F..0741:230 +0742:220 +0743:230 +0744:220 +0745:230 +0746:220 +0747:230 +0748:220 +0749..074A:230 +07EB..07F1:230 +07F2:220 +07F3:230 +0816..0819:230 +081B..0823:230 +0825..0827:230 +0829..082D:230 +093C:7 +094D:9 +0951:230 +0952:220 +0953..0954:230 +09BC:7 +09CD:9 +0A3C:7 +0A4D:9 +0ABC:7 +0ACD:9 +0B3C:7 +0B4D:9 +0BCD:9 +0C4D:9 +0C55:84 +0C56:91 +0CBC:7 +0CCD:9 +0D4D:9 +0DCA:9 +0E38..0E39:103 +0E3A:9 +0E48..0E4B:107 +0EB8..0EB9:118 +0EC8..0ECB:122 +0F18..0F19:220 +0F35:220 +0F37:220 +0F39:216 +0F71:129 +0F72:130 +0F74:132 +0F7A..0F7D:130 +0F80:130 +0F82..0F83:230 +0F84:9 +0F86..0F87:230 +0FC6:220 +1037:7 +1039..103A:9 +108D:220 +135F:230 +1714:9 +1734:9 +17D2:9 +17DD:230 +18A9:228 +1939:222 +193A:230 +193B:220 +1A17:230 +1A18:220 +1A60:9 +1A75..1A7C:230 +1A7F:220 +1B34:7 +1B44:9 +1B6B:230 +1B6C:220 +1B6D..1B73:230 +1BAA:9 +1C37:7 +1CD0..1CD2:230 +1CD4:1 +1CD5..1CD9:220 +1CDA..1CDB:230 +1CDC..1CDF:220 +1CE0:230 +1CE2..1CE8:1 +1CED:220 +1DC0..1DC1:230 +1DC2:220 +1DC3..1DC9:230 +1DCA:220 +1DCB..1DCC:230 +1DCD:234 +1DCE:214 +1DCF:220 +1DD0:202 +1DD1..1DE6:230 +1DFD:220 +1DFE:230 +1DFF:220 +20D0..20D1:230 +20D2..20D3:1 +20D4..20D7:230 +20D8..20DA:1 +20DB..20DC:230 +20E1:230 +20E5..20E6:1 +20E7:230 +20E8:220 +20E9:230 +20EA..20EB:1 +20EC..20EF:220 +20F0:230 +2CEF..2CF1:230 +2DE0..2DFF:230 +302A:218 +302B:228 +302C:232 +302D:222 +302E..302F:224 +3099..309A:8 +A66F:230 +A67C..A67D:230 +A6F0..A6F1:230 +A806:9 +A8C4:9 +A8E0..A8F1:230 +A92B..A92D:220 +A953:9 +A9B3:7 +A9C0:9 +AAB0:230 +AAB2..AAB3:230 +AAB4:220 +AAB7..AAB8:230 +AABE..AABF:230 +AAC1:230 +ABED:9 +FB1E:26 +FE20..FE26:230 +101FD:220 +10A0D:220 +10A0F:230 +10A38:230 +10A39:1 +10A3A:220 +10A3F:9 +110B9:9 +110BA:7 +1D165..1D166:216 +1D167..1D169:1 +1D16D:226 +1D16E..1D172:216 +1D17B..1D182:220 +1D185..1D189:230 +1D18A..1D18B:220 +1D1AA..1D1AD:230 +1D242..1D244:230 + +# Canonical and compatibility decomposition mappings +00A0>0020 +00A8>0020 0308 +00AA>0061 +00AF>0020 0304 +00B2>0032 +00B3>0033 +00B4>0020 0301 +00B5>03BC +00B8>0020 0327 +00B9>0031 +00BA>006F +00BC>0031 2044 0034 +00BD>0031 2044 0032 +00BE>0033 2044 0034 +00C0=0041 0300 +00C1=0041 0301 +00C2=0041 0302 +00C3=0041 0303 +00C4=0041 0308 +00C5=0041 030A +00C7=0043 0327 +00C8=0045 0300 +00C9=0045 0301 +00CA=0045 0302 +00CB=0045 0308 +00CC=0049 0300 +00CD=0049 0301 +00CE=0049 0302 +00CF=0049 0308 +00D1=004E 0303 +00D2=004F 0300 +00D3=004F 0301 +00D4=004F 0302 +00D5=004F 0303 +00D6=004F 0308 +00D9=0055 0300 +00DA=0055 0301 +00DB=0055 0302 +00DC=0055 0308 +00DD=0059 0301 +00E0=0061 0300 +00E1=0061 0301 +00E2=0061 0302 +00E3=0061 0303 +00E4=0061 0308 +00E5=0061 030A +00E7=0063 0327 +00E8=0065 0300 +00E9=0065 0301 +00EA=0065 0302 +00EB=0065 0308 +00EC=0069 0300 +00ED=0069 0301 +00EE=0069 0302 +00EF=0069 0308 +00F1=006E 0303 +00F2=006F 0300 +00F3=006F 0301 +00F4=006F 0302 +00F5=006F 0303 +00F6=006F 0308 +00F9=0075 0300 +00FA=0075 0301 +00FB=0075 0302 +00FC=0075 0308 +00FD=0079 0301 +00FF=0079 0308 +0100=0041 0304 +0101=0061 0304 +0102=0041 0306 +0103=0061 0306 +0104=0041 0328 +0105=0061 0328 +0106=0043 0301 +0107=0063 0301 +0108=0043 0302 +0109=0063 0302 +010A=0043 0307 +010B=0063 0307 +010C=0043 030C +010D=0063 030C +010E=0044 030C +010F=0064 030C +0112=0045 0304 +0113=0065 0304 +0114=0045 0306 +0115=0065 0306 +0116=0045 0307 +0117=0065 0307 +0118=0045 0328 +0119=0065 0328 +011A=0045 030C +011B=0065 030C +011C=0047 0302 +011D=0067 0302 +011E=0047 0306 +011F=0067 0306 +0120=0047 0307 +0121=0067 0307 +0122=0047 0327 +0123=0067 0327 +0124=0048 0302 +0125=0068 0302 +0128=0049 0303 +0129=0069 0303 +012A=0049 0304 +012B=0069 0304 +012C=0049 0306 +012D=0069 0306 +012E=0049 0328 +012F=0069 0328 +0130=0049 0307 +0132>0049 004A +0133>0069 006A +0134=004A 0302 +0135=006A 0302 +0136=004B 0327 +0137=006B 0327 +0139=004C 0301 +013A=006C 0301 +013B=004C 0327 +013C=006C 0327 +013D=004C 030C +013E=006C 030C +013F>004C 00B7 +0140>006C 00B7 +0143=004E 0301 +0144=006E 0301 +0145=004E 0327 +0146=006E 0327 +0147=004E 030C +0148=006E 030C +0149>02BC 006E +014C=004F 0304 +014D=006F 0304 +014E=004F 0306 +014F=006F 0306 +0150=004F 030B +0151=006F 030B +0154=0052 0301 +0155=0072 0301 +0156=0052 0327 +0157=0072 0327 +0158=0052 030C +0159=0072 030C +015A=0053 0301 +015B=0073 0301 +015C=0053 0302 +015D=0073 0302 +015E=0053 0327 +015F=0073 0327 +0160=0053 030C +0161=0073 030C +0162=0054 0327 +0163=0074 0327 +0164=0054 030C +0165=0074 030C +0168=0055 0303 +0169=0075 0303 +016A=0055 0304 +016B=0075 0304 +016C=0055 0306 +016D=0075 0306 +016E=0055 030A +016F=0075 030A +0170=0055 030B +0171=0075 030B +0172=0055 0328 +0173=0075 0328 +0174=0057 0302 +0175=0077 0302 +0176=0059 0302 +0177=0079 0302 +0178=0059 0308 +0179=005A 0301 +017A=007A 0301 +017B=005A 0307 +017C=007A 0307 +017D=005A 030C +017E=007A 030C +017F>0073 +01A0=004F 031B +01A1=006F 031B +01AF=0055 031B +01B0=0075 031B +01C4>0044 017D +01C5>0044 017E +01C6>0064 017E +01C7>004C 004A +01C8>004C 006A +01C9>006C 006A +01CA>004E 004A +01CB>004E 006A +01CC>006E 006A +01CD=0041 030C +01CE=0061 030C +01CF=0049 030C +01D0=0069 030C +01D1=004F 030C +01D2=006F 030C +01D3=0055 030C +01D4=0075 030C +01D5=00DC 0304 +01D6=00FC 0304 +01D7=00DC 0301 +01D8=00FC 0301 +01D9=00DC 030C +01DA=00FC 030C +01DB=00DC 0300 +01DC=00FC 0300 +01DE=00C4 0304 +01DF=00E4 0304 +01E0=0226 0304 +01E1=0227 0304 +01E2=00C6 0304 +01E3=00E6 0304 +01E6=0047 030C +01E7=0067 030C +01E8=004B 030C +01E9=006B 030C +01EA=004F 0328 +01EB=006F 0328 +01EC=01EA 0304 +01ED=01EB 0304 +01EE=01B7 030C +01EF=0292 030C +01F0=006A 030C +01F1>0044 005A +01F2>0044 007A +01F3>0064 007A +01F4=0047 0301 +01F5=0067 0301 +01F8=004E 0300 +01F9=006E 0300 +01FA=00C5 0301 +01FB=00E5 0301 +01FC=00C6 0301 +01FD=00E6 0301 +01FE=00D8 0301 +01FF=00F8 0301 +0200=0041 030F +0201=0061 030F +0202=0041 0311 +0203=0061 0311 +0204=0045 030F +0205=0065 030F +0206=0045 0311 +0207=0065 0311 +0208=0049 030F +0209=0069 030F +020A=0049 0311 +020B=0069 0311 +020C=004F 030F +020D=006F 030F +020E=004F 0311 +020F=006F 0311 +0210=0052 030F +0211=0072 030F +0212=0052 0311 +0213=0072 0311 +0214=0055 030F +0215=0075 030F +0216=0055 0311 +0217=0075 0311 +0218=0053 0326 +0219=0073 0326 +021A=0054 0326 +021B=0074 0326 +021E=0048 030C +021F=0068 030C +0226=0041 0307 +0227=0061 0307 +0228=0045 0327 +0229=0065 0327 +022A=00D6 0304 +022B=00F6 0304 +022C=00D5 0304 +022D=00F5 0304 +022E=004F 0307 +022F=006F 0307 +0230=022E 0304 +0231=022F 0304 +0232=0059 0304 +0233=0079 0304 +02B0>0068 +02B1>0266 +02B2>006A +02B3>0072 +02B4>0279 +02B5>027B +02B6>0281 +02B7>0077 +02B8>0079 +02D8>0020 0306 +02D9>0020 0307 +02DA>0020 030A +02DB>0020 0328 +02DC>0020 0303 +02DD>0020 030B +02E0>0263 +02E1>006C +02E2>0073 +02E3>0078 +02E4>0295 +0340>0300 +0341>0301 +0343>0313 +0344>0308 0301 +0374>02B9 +037A>0020 0345 +037E>003B +0384>0020 0301 +0385>00A8 0301 +0386=0391 0301 +0387>00B7 +0388=0395 0301 +0389=0397 0301 +038A=0399 0301 +038C=039F 0301 +038E=03A5 0301 +038F=03A9 0301 +0390=03CA 0301 +03AA=0399 0308 +03AB=03A5 0308 +03AC=03B1 0301 +03AD=03B5 0301 +03AE=03B7 0301 +03AF=03B9 0301 +03B0=03CB 0301 +03CA=03B9 0308 +03CB=03C5 0308 +03CC=03BF 0301 +03CD=03C5 0301 +03CE=03C9 0301 +03D0>03B2 +03D1>03B8 +03D2>03A5 +03D3>03D2 0301 +03D4>03D2 0308 +03D5>03C6 +03D6>03C0 +03F0>03BA +03F1>03C1 +03F2>03C2 +03F4>0398 +03F5>03B5 +03F9>03A3 +0400=0415 0300 +0401=0415 0308 +0403=0413 0301 +0407=0406 0308 +040C=041A 0301 +040D=0418 0300 +040E=0423 0306 +0419=0418 0306 +0439=0438 0306 +0450=0435 0300 +0451=0435 0308 +0453=0433 0301 +0457=0456 0308 +045C=043A 0301 +045D=0438 0300 +045E=0443 0306 +0476=0474 030F +0477=0475 030F +04C1=0416 0306 +04C2=0436 0306 +04D0=0410 0306 +04D1=0430 0306 +04D2=0410 0308 +04D3=0430 0308 +04D6=0415 0306 +04D7=0435 0306 +04DA=04D8 0308 +04DB=04D9 0308 +04DC=0416 0308 +04DD=0436 0308 +04DE=0417 0308 +04DF=0437 0308 +04E2=0418 0304 +04E3=0438 0304 +04E4=0418 0308 +04E5=0438 0308 +04E6=041E 0308 +04E7=043E 0308 +04EA=04E8 0308 +04EB=04E9 0308 +04EC=042D 0308 +04ED=044D 0308 +04EE=0423 0304 +04EF=0443 0304 +04F0=0423 0308 +04F1=0443 0308 +04F2=0423 030B +04F3=0443 030B +04F4=0427 0308 +04F5=0447 0308 +04F8=042B 0308 +04F9=044B 0308 +0587>0565 0582 +0622=0627 0653 +0623=0627 0654 +0624=0648 0654 +0625=0627 0655 +0626=064A 0654 +0675>0627 0674 +0676>0648 0674 +0677>06C7 0674 +0678>064A 0674 +06C0=06D5 0654 +06C2=06C1 0654 +06D3=06D2 0654 +0929=0928 093C +0931=0930 093C +0934=0933 093C +0958>0915 093C +0959>0916 093C +095A>0917 093C +095B>091C 093C +095C>0921 093C +095D>0922 093C +095E>092B 093C +095F>092F 093C +09CB=09C7 09BE +09CC=09C7 09D7 +09DC>09A1 09BC +09DD>09A2 09BC +09DF>09AF 09BC +0A33>0A32 0A3C +0A36>0A38 0A3C +0A59>0A16 0A3C +0A5A>0A17 0A3C +0A5B>0A1C 0A3C +0A5E>0A2B 0A3C +0B48=0B47 0B56 +0B4B=0B47 0B3E +0B4C=0B47 0B57 +0B5C>0B21 0B3C +0B5D>0B22 0B3C +0B94=0B92 0BD7 +0BCA=0BC6 0BBE +0BCB=0BC7 0BBE +0BCC=0BC6 0BD7 +0C48=0C46 0C56 +0CC0=0CBF 0CD5 +0CC7=0CC6 0CD5 +0CC8=0CC6 0CD6 +0CCA=0CC6 0CC2 +0CCB=0CCA 0CD5 +0D4A=0D46 0D3E +0D4B=0D47 0D3E +0D4C=0D46 0D57 +0DDA=0DD9 0DCA +0DDC=0DD9 0DCF +0DDD=0DDC 0DCA +0DDE=0DD9 0DDF +0E33>0E4D 0E32 +0EB3>0ECD 0EB2 +0EDC>0EAB 0E99 +0EDD>0EAB 0EA1 +0F0C>0F0B +0F43>0F42 0FB7 +0F4D>0F4C 0FB7 +0F52>0F51 0FB7 +0F57>0F56 0FB7 +0F5C>0F5B 0FB7 +0F69>0F40 0FB5 +0F73>0F71 0F72 +0F75>0F71 0F74 +0F76>0FB2 0F80 +0F77>0FB2 0F81 +0F78>0FB3 0F80 +0F79>0FB3 0F81 +0F81>0F71 0F80 +0F93>0F92 0FB7 +0F9D>0F9C 0FB7 +0FA2>0FA1 0FB7 +0FA7>0FA6 0FB7 +0FAC>0FAB 0FB7 +0FB9>0F90 0FB5 +1026=1025 102E +10FC>10DC +1B06=1B05 1B35 +1B08=1B07 1B35 +1B0A=1B09 1B35 +1B0C=1B0B 1B35 +1B0E=1B0D 1B35 +1B12=1B11 1B35 +1B3B=1B3A 1B35 +1B3D=1B3C 1B35 +1B40=1B3E 1B35 +1B41=1B3F 1B35 +1B43=1B42 1B35 +1D2C>0041 +1D2D>00C6 +1D2E>0042 +1D30>0044 +1D31>0045 +1D32>018E +1D33>0047 +1D34>0048 +1D35>0049 +1D36>004A +1D37>004B +1D38>004C +1D39>004D +1D3A>004E +1D3C>004F +1D3D>0222 +1D3E>0050 +1D3F>0052 +1D40>0054 +1D41>0055 +1D42>0057 +1D43>0061 +1D44>0250 +1D45>0251 +1D46>1D02 +1D47>0062 +1D48>0064 +1D49>0065 +1D4A>0259 +1D4B>025B +1D4C>025C +1D4D>0067 +1D4F>006B +1D50>006D +1D51>014B +1D52>006F +1D53>0254 +1D54>1D16 +1D55>1D17 +1D56>0070 +1D57>0074 +1D58>0075 +1D59>1D1D +1D5A>026F +1D5B>0076 +1D5C>1D25 +1D5D>03B2 +1D5E>03B3 +1D5F>03B4 +1D60>03C6 +1D61>03C7 +1D62>0069 +1D63>0072 +1D64>0075 +1D65>0076 +1D66>03B2 +1D67>03B3 +1D68>03C1 +1D69>03C6 +1D6A>03C7 +1D78>043D +1D9B>0252 +1D9C>0063 +1D9D>0255 +1D9E>00F0 +1D9F>025C +1DA0>0066 +1DA1>025F +1DA2>0261 +1DA3>0265 +1DA4>0268 +1DA5>0269 +1DA6>026A +1DA7>1D7B +1DA8>029D +1DA9>026D +1DAA>1D85 +1DAB>029F +1DAC>0271 +1DAD>0270 +1DAE>0272 +1DAF>0273 +1DB0>0274 +1DB1>0275 +1DB2>0278 +1DB3>0282 +1DB4>0283 +1DB5>01AB +1DB6>0289 +1DB7>028A +1DB8>1D1C +1DB9>028B +1DBA>028C +1DBB>007A +1DBC>0290 +1DBD>0291 +1DBE>0292 +1DBF>03B8 +1E00=0041 0325 +1E01=0061 0325 +1E02=0042 0307 +1E03=0062 0307 +1E04=0042 0323 +1E05=0062 0323 +1E06=0042 0331 +1E07=0062 0331 +1E08=00C7 0301 +1E09=00E7 0301 +1E0A=0044 0307 +1E0B=0064 0307 +1E0C=0044 0323 +1E0D=0064 0323 +1E0E=0044 0331 +1E0F=0064 0331 +1E10=0044 0327 +1E11=0064 0327 +1E12=0044 032D +1E13=0064 032D +1E14=0112 0300 +1E15=0113 0300 +1E16=0112 0301 +1E17=0113 0301 +1E18=0045 032D +1E19=0065 032D +1E1A=0045 0330 +1E1B=0065 0330 +1E1C=0228 0306 +1E1D=0229 0306 +1E1E=0046 0307 +1E1F=0066 0307 +1E20=0047 0304 +1E21=0067 0304 +1E22=0048 0307 +1E23=0068 0307 +1E24=0048 0323 +1E25=0068 0323 +1E26=0048 0308 +1E27=0068 0308 +1E28=0048 0327 +1E29=0068 0327 +1E2A=0048 032E +1E2B=0068 032E +1E2C=0049 0330 +1E2D=0069 0330 +1E2E=00CF 0301 +1E2F=00EF 0301 +1E30=004B 0301 +1E31=006B 0301 +1E32=004B 0323 +1E33=006B 0323 +1E34=004B 0331 +1E35=006B 0331 +1E36=004C 0323 +1E37=006C 0323 +1E38=1E36 0304 +1E39=1E37 0304 +1E3A=004C 0331 +1E3B=006C 0331 +1E3C=004C 032D +1E3D=006C 032D +1E3E=004D 0301 +1E3F=006D 0301 +1E40=004D 0307 +1E41=006D 0307 +1E42=004D 0323 +1E43=006D 0323 +1E44=004E 0307 +1E45=006E 0307 +1E46=004E 0323 +1E47=006E 0323 +1E48=004E 0331 +1E49=006E 0331 +1E4A=004E 032D +1E4B=006E 032D +1E4C=00D5 0301 +1E4D=00F5 0301 +1E4E=00D5 0308 +1E4F=00F5 0308 +1E50=014C 0300 +1E51=014D 0300 +1E52=014C 0301 +1E53=014D 0301 +1E54=0050 0301 +1E55=0070 0301 +1E56=0050 0307 +1E57=0070 0307 +1E58=0052 0307 +1E59=0072 0307 +1E5A=0052 0323 +1E5B=0072 0323 +1E5C=1E5A 0304 +1E5D=1E5B 0304 +1E5E=0052 0331 +1E5F=0072 0331 +1E60=0053 0307 +1E61=0073 0307 +1E62=0053 0323 +1E63=0073 0323 +1E64=015A 0307 +1E65=015B 0307 +1E66=0160 0307 +1E67=0161 0307 +1E68=1E62 0307 +1E69=1E63 0307 +1E6A=0054 0307 +1E6B=0074 0307 +1E6C=0054 0323 +1E6D=0074 0323 +1E6E=0054 0331 +1E6F=0074 0331 +1E70=0054 032D +1E71=0074 032D +1E72=0055 0324 +1E73=0075 0324 +1E74=0055 0330 +1E75=0075 0330 +1E76=0055 032D +1E77=0075 032D +1E78=0168 0301 +1E79=0169 0301 +1E7A=016A 0308 +1E7B=016B 0308 +1E7C=0056 0303 +1E7D=0076 0303 +1E7E=0056 0323 +1E7F=0076 0323 +1E80=0057 0300 +1E81=0077 0300 +1E82=0057 0301 +1E83=0077 0301 +1E84=0057 0308 +1E85=0077 0308 +1E86=0057 0307 +1E87=0077 0307 +1E88=0057 0323 +1E89=0077 0323 +1E8A=0058 0307 +1E8B=0078 0307 +1E8C=0058 0308 +1E8D=0078 0308 +1E8E=0059 0307 +1E8F=0079 0307 +1E90=005A 0302 +1E91=007A 0302 +1E92=005A 0323 +1E93=007A 0323 +1E94=005A 0331 +1E95=007A 0331 +1E96=0068 0331 +1E97=0074 0308 +1E98=0077 030A +1E99=0079 030A +1E9A>0061 02BE +1E9B>017F 0307 +1EA0=0041 0323 +1EA1=0061 0323 +1EA2=0041 0309 +1EA3=0061 0309 +1EA4=00C2 0301 +1EA5=00E2 0301 +1EA6=00C2 0300 +1EA7=00E2 0300 +1EA8=00C2 0309 +1EA9=00E2 0309 +1EAA=00C2 0303 +1EAB=00E2 0303 +1EAC=1EA0 0302 +1EAD=1EA1 0302 +1EAE=0102 0301 +1EAF=0103 0301 +1EB0=0102 0300 +1EB1=0103 0300 +1EB2=0102 0309 +1EB3=0103 0309 +1EB4=0102 0303 +1EB5=0103 0303 +1EB6=1EA0 0306 +1EB7=1EA1 0306 +1EB8=0045 0323 +1EB9=0065 0323 +1EBA=0045 0309 +1EBB=0065 0309 +1EBC=0045 0303 +1EBD=0065 0303 +1EBE=00CA 0301 +1EBF=00EA 0301 +1EC0=00CA 0300 +1EC1=00EA 0300 +1EC2=00CA 0309 +1EC3=00EA 0309 +1EC4=00CA 0303 +1EC5=00EA 0303 +1EC6=1EB8 0302 +1EC7=1EB9 0302 +1EC8=0049 0309 +1EC9=0069 0309 +1ECA=0049 0323 +1ECB=0069 0323 +1ECC=004F 0323 +1ECD=006F 0323 +1ECE=004F 0309 +1ECF=006F 0309 +1ED0=00D4 0301 +1ED1=00F4 0301 +1ED2=00D4 0300 +1ED3=00F4 0300 +1ED4=00D4 0309 +1ED5=00F4 0309 +1ED6=00D4 0303 +1ED7=00F4 0303 +1ED8=1ECC 0302 +1ED9=1ECD 0302 +1EDA=01A0 0301 +1EDB=01A1 0301 +1EDC=01A0 0300 +1EDD=01A1 0300 +1EDE=01A0 0309 +1EDF=01A1 0309 +1EE0=01A0 0303 +1EE1=01A1 0303 +1EE2=01A0 0323 +1EE3=01A1 0323 +1EE4=0055 0323 +1EE5=0075 0323 +1EE6=0055 0309 +1EE7=0075 0309 +1EE8=01AF 0301 +1EE9=01B0 0301 +1EEA=01AF 0300 +1EEB=01B0 0300 +1EEC=01AF 0309 +1EED=01B0 0309 +1EEE=01AF 0303 +1EEF=01B0 0303 +1EF0=01AF 0323 +1EF1=01B0 0323 +1EF2=0059 0300 +1EF3=0079 0300 +1EF4=0059 0323 +1EF5=0079 0323 +1EF6=0059 0309 +1EF7=0079 0309 +1EF8=0059 0303 +1EF9=0079 0303 +1F00=03B1 0313 +1F01=03B1 0314 +1F02=1F00 0300 +1F03=1F01 0300 +1F04=1F00 0301 +1F05=1F01 0301 +1F06=1F00 0342 +1F07=1F01 0342 +1F08=0391 0313 +1F09=0391 0314 +1F0A=1F08 0300 +1F0B=1F09 0300 +1F0C=1F08 0301 +1F0D=1F09 0301 +1F0E=1F08 0342 +1F0F=1F09 0342 +1F10=03B5 0313 +1F11=03B5 0314 +1F12=1F10 0300 +1F13=1F11 0300 +1F14=1F10 0301 +1F15=1F11 0301 +1F18=0395 0313 +1F19=0395 0314 +1F1A=1F18 0300 +1F1B=1F19 0300 +1F1C=1F18 0301 +1F1D=1F19 0301 +1F20=03B7 0313 +1F21=03B7 0314 +1F22=1F20 0300 +1F23=1F21 0300 +1F24=1F20 0301 +1F25=1F21 0301 +1F26=1F20 0342 +1F27=1F21 0342 +1F28=0397 0313 +1F29=0397 0314 +1F2A=1F28 0300 +1F2B=1F29 0300 +1F2C=1F28 0301 +1F2D=1F29 0301 +1F2E=1F28 0342 +1F2F=1F29 0342 +1F30=03B9 0313 +1F31=03B9 0314 +1F32=1F30 0300 +1F33=1F31 0300 +1F34=1F30 0301 +1F35=1F31 0301 +1F36=1F30 0342 +1F37=1F31 0342 +1F38=0399 0313 +1F39=0399 0314 +1F3A=1F38 0300 +1F3B=1F39 0300 +1F3C=1F38 0301 +1F3D=1F39 0301 +1F3E=1F38 0342 +1F3F=1F39 0342 +1F40=03BF 0313 +1F41=03BF 0314 +1F42=1F40 0300 +1F43=1F41 0300 +1F44=1F40 0301 +1F45=1F41 0301 +1F48=039F 0313 +1F49=039F 0314 +1F4A=1F48 0300 +1F4B=1F49 0300 +1F4C=1F48 0301 +1F4D=1F49 0301 +1F50=03C5 0313 +1F51=03C5 0314 +1F52=1F50 0300 +1F53=1F51 0300 +1F54=1F50 0301 +1F55=1F51 0301 +1F56=1F50 0342 +1F57=1F51 0342 +1F59=03A5 0314 +1F5B=1F59 0300 +1F5D=1F59 0301 +1F5F=1F59 0342 +1F60=03C9 0313 +1F61=03C9 0314 +1F62=1F60 0300 +1F63=1F61 0300 +1F64=1F60 0301 +1F65=1F61 0301 +1F66=1F60 0342 +1F67=1F61 0342 +1F68=03A9 0313 +1F69=03A9 0314 +1F6A=1F68 0300 +1F6B=1F69 0300 +1F6C=1F68 0301 +1F6D=1F69 0301 +1F6E=1F68 0342 +1F6F=1F69 0342 +1F70=03B1 0300 +1F71>03AC +1F72=03B5 0300 +1F73>03AD +1F74=03B7 0300 +1F75>03AE +1F76=03B9 0300 +1F77>03AF +1F78=03BF 0300 +1F79>03CC +1F7A=03C5 0300 +1F7B>03CD +1F7C=03C9 0300 +1F7D>03CE +1F80=1F00 0345 +1F81=1F01 0345 +1F82=1F02 0345 +1F83=1F03 0345 +1F84=1F04 0345 +1F85=1F05 0345 +1F86=1F06 0345 +1F87=1F07 0345 +1F88=1F08 0345 +1F89=1F09 0345 +1F8A=1F0A 0345 +1F8B=1F0B 0345 +1F8C=1F0C 0345 +1F8D=1F0D 0345 +1F8E=1F0E 0345 +1F8F=1F0F 0345 +1F90=1F20 0345 +1F91=1F21 0345 +1F92=1F22 0345 +1F93=1F23 0345 +1F94=1F24 0345 +1F95=1F25 0345 +1F96=1F26 0345 +1F97=1F27 0345 +1F98=1F28 0345 +1F99=1F29 0345 +1F9A=1F2A 0345 +1F9B=1F2B 0345 +1F9C=1F2C 0345 +1F9D=1F2D 0345 +1F9E=1F2E 0345 +1F9F=1F2F 0345 +1FA0=1F60 0345 +1FA1=1F61 0345 +1FA2=1F62 0345 +1FA3=1F63 0345 +1FA4=1F64 0345 +1FA5=1F65 0345 +1FA6=1F66 0345 +1FA7=1F67 0345 +1FA8=1F68 0345 +1FA9=1F69 0345 +1FAA=1F6A 0345 +1FAB=1F6B 0345 +1FAC=1F6C 0345 +1FAD=1F6D 0345 +1FAE=1F6E 0345 +1FAF=1F6F 0345 +1FB0=03B1 0306 +1FB1=03B1 0304 +1FB2=1F70 0345 +1FB3=03B1 0345 +1FB4=03AC 0345 +1FB6=03B1 0342 +1FB7=1FB6 0345 +1FB8=0391 0306 +1FB9=0391 0304 +1FBA=0391 0300 +1FBB>0386 +1FBC=0391 0345 +1FBD>0020 0313 +1FBE>03B9 +1FBF>0020 0313 +1FC0>0020 0342 +1FC1>00A8 0342 +1FC2=1F74 0345 +1FC3=03B7 0345 +1FC4=03AE 0345 +1FC6=03B7 0342 +1FC7=1FC6 0345 +1FC8=0395 0300 +1FC9>0388 +1FCA=0397 0300 +1FCB>0389 +1FCC=0397 0345 +1FCD>1FBF 0300 +1FCE>1FBF 0301 +1FCF>1FBF 0342 +1FD0=03B9 0306 +1FD1=03B9 0304 +1FD2=03CA 0300 +1FD3>0390 +1FD6=03B9 0342 +1FD7=03CA 0342 +1FD8=0399 0306 +1FD9=0399 0304 +1FDA=0399 0300 +1FDB>038A +1FDD>1FFE 0300 +1FDE>1FFE 0301 +1FDF>1FFE 0342 +1FE0=03C5 0306 +1FE1=03C5 0304 +1FE2=03CB 0300 +1FE3>03B0 +1FE4=03C1 0313 +1FE5=03C1 0314 +1FE6=03C5 0342 +1FE7=03CB 0342 +1FE8=03A5 0306 +1FE9=03A5 0304 +1FEA=03A5 0300 +1FEB>038E +1FEC=03A1 0314 +1FED>00A8 0300 +1FEE>0385 +1FEF>0060 +1FF2=1F7C 0345 +1FF3=03C9 0345 +1FF4=03CE 0345 +1FF6=03C9 0342 +1FF7=1FF6 0345 +1FF8=039F 0300 +1FF9>038C +1FFA=03A9 0300 +1FFB>038F +1FFC=03A9 0345 +1FFD>00B4 +1FFE>0020 0314 +2000>2002 +2001>2003 +2002>0020 +2003>0020 +2004>0020 +2005>0020 +2006>0020 +2007>0020 +2008>0020 +2009>0020 +200A>0020 +2011>2010 +2017>0020 0333 +2024>002E +2025>002E 002E +2026>002E 002E 002E +202F>0020 +2033>2032 2032 +2034>2032 2032 2032 +2036>2035 2035 +2037>2035 2035 2035 +203C>0021 0021 +203E>0020 0305 +2047>003F 003F +2048>003F 0021 +2049>0021 003F +2057>2032 2032 2032 2032 +205F>0020 +2070>0030 +2071>0069 +2074>0034 +2075>0035 +2076>0036 +2077>0037 +2078>0038 +2079>0039 +207A>002B +207B>2212 +207C>003D +207D>0028 +207E>0029 +207F>006E +2080>0030 +2081>0031 +2082>0032 +2083>0033 +2084>0034 +2085>0035 +2086>0036 +2087>0037 +2088>0038 +2089>0039 +208A>002B +208B>2212 +208C>003D +208D>0028 +208E>0029 +2090>0061 +2091>0065 +2092>006F +2093>0078 +2094>0259 +20A8>0052 0073 +2100>0061 002F 0063 +2101>0061 002F 0073 +2102>0043 +2103>00B0 0043 +2105>0063 002F 006F +2106>0063 002F 0075 +2107>0190 +2109>00B0 0046 +210A>0067 +210B>0048 +210C>0048 +210D>0048 +210E>0068 +210F>0127 +2110>0049 +2111>0049 +2112>004C +2113>006C +2115>004E +2116>004E 006F +2119>0050 +211A>0051 +211B>0052 +211C>0052 +211D>0052 +2120>0053 004D +2121>0054 0045 004C +2122>0054 004D +2124>005A +2126>03A9 +2128>005A +212A>004B +212B>00C5 +212C>0042 +212D>0043 +212F>0065 +2130>0045 +2131>0046 +2133>004D +2134>006F +2135>05D0 +2136>05D1 +2137>05D2 +2138>05D3 +2139>0069 +213B>0046 0041 0058 +213C>03C0 +213D>03B3 +213E>0393 +213F>03A0 +2140>2211 +2145>0044 +2146>0064 +2147>0065 +2148>0069 +2149>006A +2150>0031 2044 0037 +2151>0031 2044 0039 +2152>0031 2044 0031 0030 +2153>0031 2044 0033 +2154>0032 2044 0033 +2155>0031 2044 0035 +2156>0032 2044 0035 +2157>0033 2044 0035 +2158>0034 2044 0035 +2159>0031 2044 0036 +215A>0035 2044 0036 +215B>0031 2044 0038 +215C>0033 2044 0038 +215D>0035 2044 0038 +215E>0037 2044 0038 +215F>0031 2044 +2160>0049 +2161>0049 0049 +2162>0049 0049 0049 +2163>0049 0056 +2164>0056 +2165>0056 0049 +2166>0056 0049 0049 +2167>0056 0049 0049 0049 +2168>0049 0058 +2169>0058 +216A>0058 0049 +216B>0058 0049 0049 +216C>004C +216D>0043 +216E>0044 +216F>004D +2170>0069 +2171>0069 0069 +2172>0069 0069 0069 +2173>0069 0076 +2174>0076 +2175>0076 0069 +2176>0076 0069 0069 +2177>0076 0069 0069 0069 +2178>0069 0078 +2179>0078 +217A>0078 0069 +217B>0078 0069 0069 +217C>006C +217D>0063 +217E>0064 +217F>006D +2189>0030 2044 0033 +219A=2190 0338 +219B=2192 0338 +21AE=2194 0338 +21CD=21D0 0338 +21CE=21D4 0338 +21CF=21D2 0338 +2204=2203 0338 +2209=2208 0338 +220C=220B 0338 +2224=2223 0338 +2226=2225 0338 +222C>222B 222B +222D>222B 222B 222B +222F>222E 222E +2230>222E 222E 222E +2241=223C 0338 +2244=2243 0338 +2247=2245 0338 +2249=2248 0338 +2260=003D 0338 +2262=2261 0338 +226D=224D 0338 +226E=003C 0338 +226F=003E 0338 +2270=2264 0338 +2271=2265 0338 +2274=2272 0338 +2275=2273 0338 +2278=2276 0338 +2279=2277 0338 +2280=227A 0338 +2281=227B 0338 +2284=2282 0338 +2285=2283 0338 +2288=2286 0338 +2289=2287 0338 +22AC=22A2 0338 +22AD=22A8 0338 +22AE=22A9 0338 +22AF=22AB 0338 +22E0=227C 0338 +22E1=227D 0338 +22E2=2291 0338 +22E3=2292 0338 +22EA=22B2 0338 +22EB=22B3 0338 +22EC=22B4 0338 +22ED=22B5 0338 +2329>3008 +232A>3009 +2460>0031 +2461>0032 +2462>0033 +2463>0034 +2464>0035 +2465>0036 +2466>0037 +2467>0038 +2468>0039 +2469>0031 0030 +246A>0031 0031 +246B>0031 0032 +246C>0031 0033 +246D>0031 0034 +246E>0031 0035 +246F>0031 0036 +2470>0031 0037 +2471>0031 0038 +2472>0031 0039 +2473>0032 0030 +2474>0028 0031 0029 +2475>0028 0032 0029 +2476>0028 0033 0029 +2477>0028 0034 0029 +2478>0028 0035 0029 +2479>0028 0036 0029 +247A>0028 0037 0029 +247B>0028 0038 0029 +247C>0028 0039 0029 +247D>0028 0031 0030 0029 +247E>0028 0031 0031 0029 +247F>0028 0031 0032 0029 +2480>0028 0031 0033 0029 +2481>0028 0031 0034 0029 +2482>0028 0031 0035 0029 +2483>0028 0031 0036 0029 +2484>0028 0031 0037 0029 +2485>0028 0031 0038 0029 +2486>0028 0031 0039 0029 +2487>0028 0032 0030 0029 +2488>0031 002E +2489>0032 002E +248A>0033 002E +248B>0034 002E +248C>0035 002E +248D>0036 002E +248E>0037 002E +248F>0038 002E +2490>0039 002E +2491>0031 0030 002E +2492>0031 0031 002E +2493>0031 0032 002E +2494>0031 0033 002E +2495>0031 0034 002E +2496>0031 0035 002E +2497>0031 0036 002E +2498>0031 0037 002E +2499>0031 0038 002E +249A>0031 0039 002E +249B>0032 0030 002E +249C>0028 0061 0029 +249D>0028 0062 0029 +249E>0028 0063 0029 +249F>0028 0064 0029 +24A0>0028 0065 0029 +24A1>0028 0066 0029 +24A2>0028 0067 0029 +24A3>0028 0068 0029 +24A4>0028 0069 0029 +24A5>0028 006A 0029 +24A6>0028 006B 0029 +24A7>0028 006C 0029 +24A8>0028 006D 0029 +24A9>0028 006E 0029 +24AA>0028 006F 0029 +24AB>0028 0070 0029 +24AC>0028 0071 0029 +24AD>0028 0072 0029 +24AE>0028 0073 0029 +24AF>0028 0074 0029 +24B0>0028 0075 0029 +24B1>0028 0076 0029 +24B2>0028 0077 0029 +24B3>0028 0078 0029 +24B4>0028 0079 0029 +24B5>0028 007A 0029 +24B6>0041 +24B7>0042 +24B8>0043 +24B9>0044 +24BA>0045 +24BB>0046 +24BC>0047 +24BD>0048 +24BE>0049 +24BF>004A +24C0>004B +24C1>004C +24C2>004D +24C3>004E +24C4>004F +24C5>0050 +24C6>0051 +24C7>0052 +24C8>0053 +24C9>0054 +24CA>0055 +24CB>0056 +24CC>0057 +24CD>0058 +24CE>0059 +24CF>005A +24D0>0061 +24D1>0062 +24D2>0063 +24D3>0064 +24D4>0065 +24D5>0066 +24D6>0067 +24D7>0068 +24D8>0069 +24D9>006A +24DA>006B +24DB>006C +24DC>006D +24DD>006E +24DE>006F +24DF>0070 +24E0>0071 +24E1>0072 +24E2>0073 +24E3>0074 +24E4>0075 +24E5>0076 +24E6>0077 +24E7>0078 +24E8>0079 +24E9>007A +24EA>0030 +2A0C>222B 222B 222B 222B +2A74>003A 003A 003D +2A75>003D 003D +2A76>003D 003D 003D +2ADC>2ADD 0338 +2C7C>006A +2C7D>0056 +2D6F>2D61 +2E9F>6BCD +2EF3>9F9F +2F00>4E00 +2F01>4E28 +2F02>4E36 +2F03>4E3F +2F04>4E59 +2F05>4E85 +2F06>4E8C +2F07>4EA0 +2F08>4EBA +2F09>513F +2F0A>5165 +2F0B>516B +2F0C>5182 +2F0D>5196 +2F0E>51AB +2F0F>51E0 +2F10>51F5 +2F11>5200 +2F12>529B +2F13>52F9 +2F14>5315 +2F15>531A +2F16>5338 +2F17>5341 +2F18>535C +2F19>5369 +2F1A>5382 +2F1B>53B6 +2F1C>53C8 +2F1D>53E3 +2F1E>56D7 +2F1F>571F +2F20>58EB +2F21>5902 +2F22>590A +2F23>5915 +2F24>5927 +2F25>5973 +2F26>5B50 +2F27>5B80 +2F28>5BF8 +2F29>5C0F +2F2A>5C22 +2F2B>5C38 +2F2C>5C6E +2F2D>5C71 +2F2E>5DDB +2F2F>5DE5 +2F30>5DF1 +2F31>5DFE +2F32>5E72 +2F33>5E7A +2F34>5E7F +2F35>5EF4 +2F36>5EFE +2F37>5F0B +2F38>5F13 +2F39>5F50 +2F3A>5F61 +2F3B>5F73 +2F3C>5FC3 +2F3D>6208 +2F3E>6236 +2F3F>624B +2F40>652F +2F41>6534 +2F42>6587 +2F43>6597 +2F44>65A4 +2F45>65B9 +2F46>65E0 +2F47>65E5 +2F48>66F0 +2F49>6708 +2F4A>6728 +2F4B>6B20 +2F4C>6B62 +2F4D>6B79 +2F4E>6BB3 +2F4F>6BCB +2F50>6BD4 +2F51>6BDB +2F52>6C0F +2F53>6C14 +2F54>6C34 +2F55>706B +2F56>722A +2F57>7236 +2F58>723B +2F59>723F +2F5A>7247 +2F5B>7259 +2F5C>725B +2F5D>72AC +2F5E>7384 +2F5F>7389 +2F60>74DC +2F61>74E6 +2F62>7518 +2F63>751F +2F64>7528 +2F65>7530 +2F66>758B +2F67>7592 +2F68>7676 +2F69>767D +2F6A>76AE +2F6B>76BF +2F6C>76EE +2F6D>77DB +2F6E>77E2 +2F6F>77F3 +2F70>793A +2F71>79B8 +2F72>79BE +2F73>7A74 +2F74>7ACB +2F75>7AF9 +2F76>7C73 +2F77>7CF8 +2F78>7F36 +2F79>7F51 +2F7A>7F8A +2F7B>7FBD +2F7C>8001 +2F7D>800C +2F7E>8012 +2F7F>8033 +2F80>807F +2F81>8089 +2F82>81E3 +2F83>81EA +2F84>81F3 +2F85>81FC +2F86>820C +2F87>821B +2F88>821F +2F89>826E +2F8A>8272 +2F8B>8278 +2F8C>864D +2F8D>866B +2F8E>8840 +2F8F>884C +2F90>8863 +2F91>897E +2F92>898B +2F93>89D2 +2F94>8A00 +2F95>8C37 +2F96>8C46 +2F97>8C55 +2F98>8C78 +2F99>8C9D +2F9A>8D64 +2F9B>8D70 +2F9C>8DB3 +2F9D>8EAB +2F9E>8ECA +2F9F>8F9B +2FA0>8FB0 +2FA1>8FB5 +2FA2>9091 +2FA3>9149 +2FA4>91C6 +2FA5>91CC +2FA6>91D1 +2FA7>9577 +2FA8>9580 +2FA9>961C +2FAA>96B6 +2FAB>96B9 +2FAC>96E8 +2FAD>9751 +2FAE>975E +2FAF>9762 +2FB0>9769 +2FB1>97CB +2FB2>97ED +2FB3>97F3 +2FB4>9801 +2FB5>98A8 +2FB6>98DB +2FB7>98DF +2FB8>9996 +2FB9>9999 +2FBA>99AC +2FBB>9AA8 +2FBC>9AD8 +2FBD>9ADF +2FBE>9B25 +2FBF>9B2F +2FC0>9B32 +2FC1>9B3C +2FC2>9B5A +2FC3>9CE5 +2FC4>9E75 +2FC5>9E7F +2FC6>9EA5 +2FC7>9EBB +2FC8>9EC3 +2FC9>9ECD +2FCA>9ED1 +2FCB>9EF9 +2FCC>9EFD +2FCD>9F0E +2FCE>9F13 +2FCF>9F20 +2FD0>9F3B +2FD1>9F4A +2FD2>9F52 +2FD3>9F8D +2FD4>9F9C +2FD5>9FA0 +3000>0020 +3036>3012 +3038>5341 +3039>5344 +303A>5345 +304C=304B 3099 +304E=304D 3099 +3050=304F 3099 +3052=3051 3099 +3054=3053 3099 +3056=3055 3099 +3058=3057 3099 +305A=3059 3099 +305C=305B 3099 +305E=305D 3099 +3060=305F 3099 +3062=3061 3099 +3065=3064 3099 +3067=3066 3099 +3069=3068 3099 +3070=306F 3099 +3071=306F 309A +3073=3072 3099 +3074=3072 309A +3076=3075 3099 +3077=3075 309A +3079=3078 3099 +307A=3078 309A +307C=307B 3099 +307D=307B 309A +3094=3046 3099 +309B>0020 3099 +309C>0020 309A +309E=309D 3099 +309F>3088 308A +30AC=30AB 3099 +30AE=30AD 3099 +30B0=30AF 3099 +30B2=30B1 3099 +30B4=30B3 3099 +30B6=30B5 3099 +30B8=30B7 3099 +30BA=30B9 3099 +30BC=30BB 3099 +30BE=30BD 3099 +30C0=30BF 3099 +30C2=30C1 3099 +30C5=30C4 3099 +30C7=30C6 3099 +30C9=30C8 3099 +30D0=30CF 3099 +30D1=30CF 309A +30D3=30D2 3099 +30D4=30D2 309A +30D6=30D5 3099 +30D7=30D5 309A +30D9=30D8 3099 +30DA=30D8 309A +30DC=30DB 3099 +30DD=30DB 309A +30F4=30A6 3099 +30F7=30EF 3099 +30F8=30F0 3099 +30F9=30F1 3099 +30FA=30F2 3099 +30FE=30FD 3099 +30FF>30B3 30C8 +3131>1100 +3132>1101 +3133>11AA +3134>1102 +3135>11AC +3136>11AD +3137>1103 +3138>1104 +3139>1105 +313A>11B0 +313B>11B1 +313C>11B2 +313D>11B3 +313E>11B4 +313F>11B5 +3140>111A +3141>1106 +3142>1107 +3143>1108 +3144>1121 +3145>1109 +3146>110A +3147>110B +3148>110C +3149>110D +314A>110E +314B>110F +314C>1110 +314D>1111 +314E>1112 +314F>1161 +3150>1162 +3151>1163 +3152>1164 +3153>1165 +3154>1166 +3155>1167 +3156>1168 +3157>1169 +3158>116A +3159>116B +315A>116C +315B>116D +315C>116E +315D>116F +315E>1170 +315F>1171 +3160>1172 +3161>1173 +3162>1174 +3163>1175 +3164>1160 +3165>1114 +3166>1115 +3167>11C7 +3168>11C8 +3169>11CC +316A>11CE +316B>11D3 +316C>11D7 +316D>11D9 +316E>111C +316F>11DD +3170>11DF +3171>111D +3172>111E +3173>1120 +3174>1122 +3175>1123 +3176>1127 +3177>1129 +3178>112B +3179>112C +317A>112D +317B>112E +317C>112F +317D>1132 +317E>1136 +317F>1140 +3180>1147 +3181>114C +3182>11F1 +3183>11F2 +3184>1157 +3185>1158 +3186>1159 +3187>1184 +3188>1185 +3189>1188 +318A>1191 +318B>1192 +318C>1194 +318D>119E +318E>11A1 +3192>4E00 +3193>4E8C +3194>4E09 +3195>56DB +3196>4E0A +3197>4E2D +3198>4E0B +3199>7532 +319A>4E59 +319B>4E19 +319C>4E01 +319D>5929 +319E>5730 +319F>4EBA +3200>0028 1100 0029 +3201>0028 1102 0029 +3202>0028 1103 0029 +3203>0028 1105 0029 +3204>0028 1106 0029 +3205>0028 1107 0029 +3206>0028 1109 0029 +3207>0028 110B 0029 +3208>0028 110C 0029 +3209>0028 110E 0029 +320A>0028 110F 0029 +320B>0028 1110 0029 +320C>0028 1111 0029 +320D>0028 1112 0029 +320E>0028 1100 1161 0029 +320F>0028 1102 1161 0029 +3210>0028 1103 1161 0029 +3211>0028 1105 1161 0029 +3212>0028 1106 1161 0029 +3213>0028 1107 1161 0029 +3214>0028 1109 1161 0029 +3215>0028 110B 1161 0029 +3216>0028 110C 1161 0029 +3217>0028 110E 1161 0029 +3218>0028 110F 1161 0029 +3219>0028 1110 1161 0029 +321A>0028 1111 1161 0029 +321B>0028 1112 1161 0029 +321C>0028 110C 116E 0029 +321D>0028 110B 1169 110C 1165 11AB 0029 +321E>0028 110B 1169 1112 116E 0029 +3220>0028 4E00 0029 +3221>0028 4E8C 0029 +3222>0028 4E09 0029 +3223>0028 56DB 0029 +3224>0028 4E94 0029 +3225>0028 516D 0029 +3226>0028 4E03 0029 +3227>0028 516B 0029 +3228>0028 4E5D 0029 +3229>0028 5341 0029 +322A>0028 6708 0029 +322B>0028 706B 0029 +322C>0028 6C34 0029 +322D>0028 6728 0029 +322E>0028 91D1 0029 +322F>0028 571F 0029 +3230>0028 65E5 0029 +3231>0028 682A 0029 +3232>0028 6709 0029 +3233>0028 793E 0029 +3234>0028 540D 0029 +3235>0028 7279 0029 +3236>0028 8CA1 0029 +3237>0028 795D 0029 +3238>0028 52B4 0029 +3239>0028 4EE3 0029 +323A>0028 547C 0029 +323B>0028 5B66 0029 +323C>0028 76E3 0029 +323D>0028 4F01 0029 +323E>0028 8CC7 0029 +323F>0028 5354 0029 +3240>0028 796D 0029 +3241>0028 4F11 0029 +3242>0028 81EA 0029 +3243>0028 81F3 0029 +3244>554F +3245>5E7C +3246>6587 +3247>7B8F +3250>0050 0054 0045 +3251>0032 0031 +3252>0032 0032 +3253>0032 0033 +3254>0032 0034 +3255>0032 0035 +3256>0032 0036 +3257>0032 0037 +3258>0032 0038 +3259>0032 0039 +325A>0033 0030 +325B>0033 0031 +325C>0033 0032 +325D>0033 0033 +325E>0033 0034 +325F>0033 0035 +3260>1100 +3261>1102 +3262>1103 +3263>1105 +3264>1106 +3265>1107 +3266>1109 +3267>110B +3268>110C +3269>110E +326A>110F +326B>1110 +326C>1111 +326D>1112 +326E>1100 1161 +326F>1102 1161 +3270>1103 1161 +3271>1105 1161 +3272>1106 1161 +3273>1107 1161 +3274>1109 1161 +3275>110B 1161 +3276>110C 1161 +3277>110E 1161 +3278>110F 1161 +3279>1110 1161 +327A>1111 1161 +327B>1112 1161 +327C>110E 1161 11B7 1100 1169 +327D>110C 116E 110B 1174 +327E>110B 116E +3280>4E00 +3281>4E8C +3282>4E09 +3283>56DB +3284>4E94 +3285>516D +3286>4E03 +3287>516B +3288>4E5D +3289>5341 +328A>6708 +328B>706B +328C>6C34 +328D>6728 +328E>91D1 +328F>571F +3290>65E5 +3291>682A +3292>6709 +3293>793E +3294>540D +3295>7279 +3296>8CA1 +3297>795D +3298>52B4 +3299>79D8 +329A>7537 +329B>5973 +329C>9069 +329D>512A +329E>5370 +329F>6CE8 +32A0>9805 +32A1>4F11 +32A2>5199 +32A3>6B63 +32A4>4E0A +32A5>4E2D +32A6>4E0B +32A7>5DE6 +32A8>53F3 +32A9>533B +32AA>5B97 +32AB>5B66 +32AC>76E3 +32AD>4F01 +32AE>8CC7 +32AF>5354 +32B0>591C +32B1>0033 0036 +32B2>0033 0037 +32B3>0033 0038 +32B4>0033 0039 +32B5>0034 0030 +32B6>0034 0031 +32B7>0034 0032 +32B8>0034 0033 +32B9>0034 0034 +32BA>0034 0035 +32BB>0034 0036 +32BC>0034 0037 +32BD>0034 0038 +32BE>0034 0039 +32BF>0035 0030 +32C0>0031 6708 +32C1>0032 6708 +32C2>0033 6708 +32C3>0034 6708 +32C4>0035 6708 +32C5>0036 6708 +32C6>0037 6708 +32C7>0038 6708 +32C8>0039 6708 +32C9>0031 0030 6708 +32CA>0031 0031 6708 +32CB>0031 0032 6708 +32CC>0048 0067 +32CD>0065 0072 0067 +32CE>0065 0056 +32CF>004C 0054 0044 +32D0>30A2 +32D1>30A4 +32D2>30A6 +32D3>30A8 +32D4>30AA +32D5>30AB +32D6>30AD +32D7>30AF +32D8>30B1 +32D9>30B3 +32DA>30B5 +32DB>30B7 +32DC>30B9 +32DD>30BB +32DE>30BD +32DF>30BF +32E0>30C1 +32E1>30C4 +32E2>30C6 +32E3>30C8 +32E4>30CA +32E5>30CB +32E6>30CC +32E7>30CD +32E8>30CE +32E9>30CF +32EA>30D2 +32EB>30D5 +32EC>30D8 +32ED>30DB +32EE>30DE +32EF>30DF +32F0>30E0 +32F1>30E1 +32F2>30E2 +32F3>30E4 +32F4>30E6 +32F5>30E8 +32F6>30E9 +32F7>30EA +32F8>30EB +32F9>30EC +32FA>30ED +32FB>30EF +32FC>30F0 +32FD>30F1 +32FE>30F2 +3300>30A2 30D1 30FC 30C8 +3301>30A2 30EB 30D5 30A1 +3302>30A2 30F3 30DA 30A2 +3303>30A2 30FC 30EB +3304>30A4 30CB 30F3 30B0 +3305>30A4 30F3 30C1 +3306>30A6 30A9 30F3 +3307>30A8 30B9 30AF 30FC 30C9 +3308>30A8 30FC 30AB 30FC +3309>30AA 30F3 30B9 +330A>30AA 30FC 30E0 +330B>30AB 30A4 30EA +330C>30AB 30E9 30C3 30C8 +330D>30AB 30ED 30EA 30FC +330E>30AC 30ED 30F3 +330F>30AC 30F3 30DE +3310>30AE 30AC +3311>30AE 30CB 30FC +3312>30AD 30E5 30EA 30FC +3313>30AE 30EB 30C0 30FC +3314>30AD 30ED +3315>30AD 30ED 30B0 30E9 30E0 +3316>30AD 30ED 30E1 30FC 30C8 30EB +3317>30AD 30ED 30EF 30C3 30C8 +3318>30B0 30E9 30E0 +3319>30B0 30E9 30E0 30C8 30F3 +331A>30AF 30EB 30BC 30A4 30ED +331B>30AF 30ED 30FC 30CD +331C>30B1 30FC 30B9 +331D>30B3 30EB 30CA +331E>30B3 30FC 30DD +331F>30B5 30A4 30AF 30EB +3320>30B5 30F3 30C1 30FC 30E0 +3321>30B7 30EA 30F3 30B0 +3322>30BB 30F3 30C1 +3323>30BB 30F3 30C8 +3324>30C0 30FC 30B9 +3325>30C7 30B7 +3326>30C9 30EB +3327>30C8 30F3 +3328>30CA 30CE +3329>30CE 30C3 30C8 +332A>30CF 30A4 30C4 +332B>30D1 30FC 30BB 30F3 30C8 +332C>30D1 30FC 30C4 +332D>30D0 30FC 30EC 30EB +332E>30D4 30A2 30B9 30C8 30EB +332F>30D4 30AF 30EB +3330>30D4 30B3 +3331>30D3 30EB +3332>30D5 30A1 30E9 30C3 30C9 +3333>30D5 30A3 30FC 30C8 +3334>30D6 30C3 30B7 30A7 30EB +3335>30D5 30E9 30F3 +3336>30D8 30AF 30BF 30FC 30EB +3337>30DA 30BD +3338>30DA 30CB 30D2 +3339>30D8 30EB 30C4 +333A>30DA 30F3 30B9 +333B>30DA 30FC 30B8 +333C>30D9 30FC 30BF +333D>30DD 30A4 30F3 30C8 +333E>30DC 30EB 30C8 +333F>30DB 30F3 +3340>30DD 30F3 30C9 +3341>30DB 30FC 30EB +3342>30DB 30FC 30F3 +3343>30DE 30A4 30AF 30ED +3344>30DE 30A4 30EB +3345>30DE 30C3 30CF +3346>30DE 30EB 30AF +3347>30DE 30F3 30B7 30E7 30F3 +3348>30DF 30AF 30ED 30F3 +3349>30DF 30EA +334A>30DF 30EA 30D0 30FC 30EB +334B>30E1 30AC +334C>30E1 30AC 30C8 30F3 +334D>30E1 30FC 30C8 30EB +334E>30E4 30FC 30C9 +334F>30E4 30FC 30EB +3350>30E6 30A2 30F3 +3351>30EA 30C3 30C8 30EB +3352>30EA 30E9 +3353>30EB 30D4 30FC +3354>30EB 30FC 30D6 30EB +3355>30EC 30E0 +3356>30EC 30F3 30C8 30B2 30F3 +3357>30EF 30C3 30C8 +3358>0030 70B9 +3359>0031 70B9 +335A>0032 70B9 +335B>0033 70B9 +335C>0034 70B9 +335D>0035 70B9 +335E>0036 70B9 +335F>0037 70B9 +3360>0038 70B9 +3361>0039 70B9 +3362>0031 0030 70B9 +3363>0031 0031 70B9 +3364>0031 0032 70B9 +3365>0031 0033 70B9 +3366>0031 0034 70B9 +3367>0031 0035 70B9 +3368>0031 0036 70B9 +3369>0031 0037 70B9 +336A>0031 0038 70B9 +336B>0031 0039 70B9 +336C>0032 0030 70B9 +336D>0032 0031 70B9 +336E>0032 0032 70B9 +336F>0032 0033 70B9 +3370>0032 0034 70B9 +3371>0068 0050 0061 +3372>0064 0061 +3373>0041 0055 +3374>0062 0061 0072 +3375>006F 0056 +3376>0070 0063 +3377>0064 006D +3378>0064 006D 00B2 +3379>0064 006D 00B3 +337A>0049 0055 +337B>5E73 6210 +337C>662D 548C +337D>5927 6B63 +337E>660E 6CBB +337F>682A 5F0F 4F1A 793E +3380>0070 0041 +3381>006E 0041 +3382>03BC 0041 +3383>006D 0041 +3384>006B 0041 +3385>004B 0042 +3386>004D 0042 +3387>0047 0042 +3388>0063 0061 006C +3389>006B 0063 0061 006C +338A>0070 0046 +338B>006E 0046 +338C>03BC 0046 +338D>03BC 0067 +338E>006D 0067 +338F>006B 0067 +3390>0048 007A +3391>006B 0048 007A +3392>004D 0048 007A +3393>0047 0048 007A +3394>0054 0048 007A +3395>03BC 2113 +3396>006D 2113 +3397>0064 2113 +3398>006B 2113 +3399>0066 006D +339A>006E 006D +339B>03BC 006D +339C>006D 006D +339D>0063 006D +339E>006B 006D +339F>006D 006D 00B2 +33A0>0063 006D 00B2 +33A1>006D 00B2 +33A2>006B 006D 00B2 +33A3>006D 006D 00B3 +33A4>0063 006D 00B3 +33A5>006D 00B3 +33A6>006B 006D 00B3 +33A7>006D 2215 0073 +33A8>006D 2215 0073 00B2 +33A9>0050 0061 +33AA>006B 0050 0061 +33AB>004D 0050 0061 +33AC>0047 0050 0061 +33AD>0072 0061 0064 +33AE>0072 0061 0064 2215 0073 +33AF>0072 0061 0064 2215 0073 00B2 +33B0>0070 0073 +33B1>006E 0073 +33B2>03BC 0073 +33B3>006D 0073 +33B4>0070 0056 +33B5>006E 0056 +33B6>03BC 0056 +33B7>006D 0056 +33B8>006B 0056 +33B9>004D 0056 +33BA>0070 0057 +33BB>006E 0057 +33BC>03BC 0057 +33BD>006D 0057 +33BE>006B 0057 +33BF>004D 0057 +33C0>006B 03A9 +33C1>004D 03A9 +33C2>0061 002E 006D 002E +33C3>0042 0071 +33C4>0063 0063 +33C5>0063 0064 +33C6>0043 2215 006B 0067 +33C7>0043 006F 002E +33C8>0064 0042 +33C9>0047 0079 +33CA>0068 0061 +33CB>0048 0050 +33CC>0069 006E +33CD>004B 004B +33CE>004B 004D +33CF>006B 0074 +33D0>006C 006D +33D1>006C 006E +33D2>006C 006F 0067 +33D3>006C 0078 +33D4>006D 0062 +33D5>006D 0069 006C +33D6>006D 006F 006C +33D7>0050 0048 +33D8>0070 002E 006D 002E +33D9>0050 0050 004D +33DA>0050 0052 +33DB>0073 0072 +33DC>0053 0076 +33DD>0057 0062 +33DE>0056 2215 006D +33DF>0041 2215 006D +33E0>0031 65E5 +33E1>0032 65E5 +33E2>0033 65E5 +33E3>0034 65E5 +33E4>0035 65E5 +33E5>0036 65E5 +33E6>0037 65E5 +33E7>0038 65E5 +33E8>0039 65E5 +33E9>0031 0030 65E5 +33EA>0031 0031 65E5 +33EB>0031 0032 65E5 +33EC>0031 0033 65E5 +33ED>0031 0034 65E5 +33EE>0031 0035 65E5 +33EF>0031 0036 65E5 +33F0>0031 0037 65E5 +33F1>0031 0038 65E5 +33F2>0031 0039 65E5 +33F3>0032 0030 65E5 +33F4>0032 0031 65E5 +33F5>0032 0032 65E5 +33F6>0032 0033 65E5 +33F7>0032 0034 65E5 +33F8>0032 0035 65E5 +33F9>0032 0036 65E5 +33FA>0032 0037 65E5 +33FB>0032 0038 65E5 +33FC>0032 0039 65E5 +33FD>0033 0030 65E5 +33FE>0033 0031 65E5 +33FF>0067 0061 006C +A770>A76F +F900>8C48 +F901>66F4 +F902>8ECA +F903>8CC8 +F904>6ED1 +F905>4E32 +F906>53E5 +F907>9F9C +F908>9F9C +F909>5951 +F90A>91D1 +F90B>5587 +F90C>5948 +F90D>61F6 +F90E>7669 +F90F>7F85 +F910>863F +F911>87BA +F912>88F8 +F913>908F +F914>6A02 +F915>6D1B +F916>70D9 +F917>73DE +F918>843D +F919>916A +F91A>99F1 +F91B>4E82 +F91C>5375 +F91D>6B04 +F91E>721B +F91F>862D +F920>9E1E +F921>5D50 +F922>6FEB +F923>85CD +F924>8964 +F925>62C9 +F926>81D8 +F927>881F +F928>5ECA +F929>6717 +F92A>6D6A +F92B>72FC +F92C>90CE +F92D>4F86 +F92E>51B7 +F92F>52DE +F930>64C4 +F931>6AD3 +F932>7210 +F933>76E7 +F934>8001 +F935>8606 +F936>865C +F937>8DEF +F938>9732 +F939>9B6F +F93A>9DFA +F93B>788C +F93C>797F +F93D>7DA0 +F93E>83C9 +F93F>9304 +F940>9E7F +F941>8AD6 +F942>58DF +F943>5F04 +F944>7C60 +F945>807E +F946>7262 +F947>78CA +F948>8CC2 +F949>96F7 +F94A>58D8 +F94B>5C62 +F94C>6A13 +F94D>6DDA +F94E>6F0F +F94F>7D2F +F950>7E37 +F951>964B +F952>52D2 +F953>808B +F954>51DC +F955>51CC +F956>7A1C +F957>7DBE +F958>83F1 +F959>9675 +F95A>8B80 +F95B>62CF +F95C>6A02 +F95D>8AFE +F95E>4E39 +F95F>5BE7 +F960>6012 +F961>7387 +F962>7570 +F963>5317 +F964>78FB +F965>4FBF +F966>5FA9 +F967>4E0D +F968>6CCC +F969>6578 +F96A>7D22 +F96B>53C3 +F96C>585E +F96D>7701 +F96E>8449 +F96F>8AAA +F970>6BBA +F971>8FB0 +F972>6C88 +F973>62FE +F974>82E5 +F975>63A0 +F976>7565 +F977>4EAE +F978>5169 +F979>51C9 +F97A>6881 +F97B>7CE7 +F97C>826F +F97D>8AD2 +F97E>91CF +F97F>52F5 +F980>5442 +F981>5973 +F982>5EEC +F983>65C5 +F984>6FFE +F985>792A +F986>95AD +F987>9A6A +F988>9E97 +F989>9ECE +F98A>529B +F98B>66C6 +F98C>6B77 +F98D>8F62 +F98E>5E74 +F98F>6190 +F990>6200 +F991>649A +F992>6F23 +F993>7149 +F994>7489 +F995>79CA +F996>7DF4 +F997>806F +F998>8F26 +F999>84EE +F99A>9023 +F99B>934A +F99C>5217 +F99D>52A3 +F99E>54BD +F99F>70C8 +F9A0>88C2 +F9A1>8AAA +F9A2>5EC9 +F9A3>5FF5 +F9A4>637B +F9A5>6BAE +F9A6>7C3E +F9A7>7375 +F9A8>4EE4 +F9A9>56F9 +F9AA>5BE7 +F9AB>5DBA +F9AC>601C +F9AD>73B2 +F9AE>7469 +F9AF>7F9A +F9B0>8046 +F9B1>9234 +F9B2>96F6 +F9B3>9748 +F9B4>9818 +F9B5>4F8B +F9B6>79AE +F9B7>91B4 +F9B8>96B8 +F9B9>60E1 +F9BA>4E86 +F9BB>50DA +F9BC>5BEE +F9BD>5C3F +F9BE>6599 +F9BF>6A02 +F9C0>71CE +F9C1>7642 +F9C2>84FC +F9C3>907C +F9C4>9F8D +F9C5>6688 +F9C6>962E +F9C7>5289 +F9C8>677B +F9C9>67F3 +F9CA>6D41 +F9CB>6E9C +F9CC>7409 +F9CD>7559 +F9CE>786B +F9CF>7D10 +F9D0>985E +F9D1>516D +F9D2>622E +F9D3>9678 +F9D4>502B +F9D5>5D19 +F9D6>6DEA +F9D7>8F2A +F9D8>5F8B +F9D9>6144 +F9DA>6817 +F9DB>7387 +F9DC>9686 +F9DD>5229 +F9DE>540F +F9DF>5C65 +F9E0>6613 +F9E1>674E +F9E2>68A8 +F9E3>6CE5 +F9E4>7406 +F9E5>75E2 +F9E6>7F79 +F9E7>88CF +F9E8>88E1 +F9E9>91CC +F9EA>96E2 +F9EB>533F +F9EC>6EBA +F9ED>541D +F9EE>71D0 +F9EF>7498 +F9F0>85FA +F9F1>96A3 +F9F2>9C57 +F9F3>9E9F +F9F4>6797 +F9F5>6DCB +F9F6>81E8 +F9F7>7ACB +F9F8>7B20 +F9F9>7C92 +F9FA>72C0 +F9FB>7099 +F9FC>8B58 +F9FD>4EC0 +F9FE>8336 +F9FF>523A +FA00>5207 +FA01>5EA6 +FA02>62D3 +FA03>7CD6 +FA04>5B85 +FA05>6D1E +FA06>66B4 +FA07>8F3B +FA08>884C +FA09>964D +FA0A>898B +FA0B>5ED3 +FA0C>5140 +FA0D>55C0 +FA10>585A +FA12>6674 +FA15>51DE +FA16>732A +FA17>76CA +FA18>793C +FA19>795E +FA1A>7965 +FA1B>798F +FA1C>9756 +FA1D>7CBE +FA1E>7FBD +FA20>8612 +FA22>8AF8 +FA25>9038 +FA26>90FD +FA2A>98EF +FA2B>98FC +FA2C>9928 +FA2D>9DB4 +FA30>4FAE +FA31>50E7 +FA32>514D +FA33>52C9 +FA34>52E4 +FA35>5351 +FA36>559D +FA37>5606 +FA38>5668 +FA39>5840 +FA3A>58A8 +FA3B>5C64 +FA3C>5C6E +FA3D>6094 +FA3E>6168 +FA3F>618E +FA40>61F2 +FA41>654F +FA42>65E2 +FA43>6691 +FA44>6885 +FA45>6D77 +FA46>6E1A +FA47>6F22 +FA48>716E +FA49>722B +FA4A>7422 +FA4B>7891 +FA4C>793E +FA4D>7949 +FA4E>7948 +FA4F>7950 +FA50>7956 +FA51>795D +FA52>798D +FA53>798E +FA54>7A40 +FA55>7A81 +FA56>7BC0 +FA57>7DF4 +FA58>7E09 +FA59>7E41 +FA5A>7F72 +FA5B>8005 +FA5C>81ED +FA5D>8279 +FA5E>8279 +FA5F>8457 +FA60>8910 +FA61>8996 +FA62>8B01 +FA63>8B39 +FA64>8CD3 +FA65>8D08 +FA66>8FB6 +FA67>9038 +FA68>96E3 +FA69>97FF +FA6A>983B +FA6B>6075 +FA6C>242EE +FA6D>8218 +FA70>4E26 +FA71>51B5 +FA72>5168 +FA73>4F80 +FA74>5145 +FA75>5180 +FA76>52C7 +FA77>52FA +FA78>559D +FA79>5555 +FA7A>5599 +FA7B>55E2 +FA7C>585A +FA7D>58B3 +FA7E>5944 +FA7F>5954 +FA80>5A62 +FA81>5B28 +FA82>5ED2 +FA83>5ED9 +FA84>5F69 +FA85>5FAD +FA86>60D8 +FA87>614E +FA88>6108 +FA89>618E +FA8A>6160 +FA8B>61F2 +FA8C>6234 +FA8D>63C4 +FA8E>641C +FA8F>6452 +FA90>6556 +FA91>6674 +FA92>6717 +FA93>671B +FA94>6756 +FA95>6B79 +FA96>6BBA +FA97>6D41 +FA98>6EDB +FA99>6ECB +FA9A>6F22 +FA9B>701E +FA9C>716E +FA9D>77A7 +FA9E>7235 +FA9F>72AF +FAA0>732A +FAA1>7471 +FAA2>7506 +FAA3>753B +FAA4>761D +FAA5>761F +FAA6>76CA +FAA7>76DB +FAA8>76F4 +FAA9>774A +FAAA>7740 +FAAB>78CC +FAAC>7AB1 +FAAD>7BC0 +FAAE>7C7B +FAAF>7D5B +FAB0>7DF4 +FAB1>7F3E +FAB2>8005 +FAB3>8352 +FAB4>83EF +FAB5>8779 +FAB6>8941 +FAB7>8986 +FAB8>8996 +FAB9>8ABF +FABA>8AF8 +FABB>8ACB +FABC>8B01 +FABD>8AFE +FABE>8AED +FABF>8B39 +FAC0>8B8A +FAC1>8D08 +FAC2>8F38 +FAC3>9072 +FAC4>9199 +FAC5>9276 +FAC6>967C +FAC7>96E3 +FAC8>9756 +FAC9>97DB +FACA>97FF +FACB>980B +FACC>983B +FACD>9B12 +FACE>9F9C +FACF>2284A +FAD0>22844 +FAD1>233D5 +FAD2>3B9D +FAD3>4018 +FAD4>4039 +FAD5>25249 +FAD6>25CD0 +FAD7>27ED3 +FAD8>9F43 +FAD9>9F8E +FB00>0066 0066 +FB01>0066 0069 +FB02>0066 006C +FB03>0066 0066 0069 +FB04>0066 0066 006C +FB05>017F 0074 +FB06>0073 0074 +FB13>0574 0576 +FB14>0574 0565 +FB15>0574 056B +FB16>057E 0576 +FB17>0574 056D +FB1D>05D9 05B4 +FB1F>05F2 05B7 +FB20>05E2 +FB21>05D0 +FB22>05D3 +FB23>05D4 +FB24>05DB +FB25>05DC +FB26>05DD +FB27>05E8 +FB28>05EA +FB29>002B +FB2A>05E9 05C1 +FB2B>05E9 05C2 +FB2C>FB49 05C1 +FB2D>FB49 05C2 +FB2E>05D0 05B7 +FB2F>05D0 05B8 +FB30>05D0 05BC +FB31>05D1 05BC +FB32>05D2 05BC +FB33>05D3 05BC +FB34>05D4 05BC +FB35>05D5 05BC +FB36>05D6 05BC +FB38>05D8 05BC +FB39>05D9 05BC +FB3A>05DA 05BC +FB3B>05DB 05BC +FB3C>05DC 05BC +FB3E>05DE 05BC +FB40>05E0 05BC +FB41>05E1 05BC +FB43>05E3 05BC +FB44>05E4 05BC +FB46>05E6 05BC +FB47>05E7 05BC +FB48>05E8 05BC +FB49>05E9 05BC +FB4A>05EA 05BC +FB4B>05D5 05B9 +FB4C>05D1 05BF +FB4D>05DB 05BF +FB4E>05E4 05BF +FB4F>05D0 05DC +FB50>0671 +FB51>0671 +FB52>067B +FB53>067B +FB54>067B +FB55>067B +FB56>067E +FB57>067E +FB58>067E +FB59>067E +FB5A>0680 +FB5B>0680 +FB5C>0680 +FB5D>0680 +FB5E>067A +FB5F>067A +FB60>067A +FB61>067A +FB62>067F +FB63>067F +FB64>067F +FB65>067F +FB66>0679 +FB67>0679 +FB68>0679 +FB69>0679 +FB6A>06A4 +FB6B>06A4 +FB6C>06A4 +FB6D>06A4 +FB6E>06A6 +FB6F>06A6 +FB70>06A6 +FB71>06A6 +FB72>0684 +FB73>0684 +FB74>0684 +FB75>0684 +FB76>0683 +FB77>0683 +FB78>0683 +FB79>0683 +FB7A>0686 +FB7B>0686 +FB7C>0686 +FB7D>0686 +FB7E>0687 +FB7F>0687 +FB80>0687 +FB81>0687 +FB82>068D +FB83>068D +FB84>068C +FB85>068C +FB86>068E +FB87>068E +FB88>0688 +FB89>0688 +FB8A>0698 +FB8B>0698 +FB8C>0691 +FB8D>0691 +FB8E>06A9 +FB8F>06A9 +FB90>06A9 +FB91>06A9 +FB92>06AF +FB93>06AF +FB94>06AF +FB95>06AF +FB96>06B3 +FB97>06B3 +FB98>06B3 +FB99>06B3 +FB9A>06B1 +FB9B>06B1 +FB9C>06B1 +FB9D>06B1 +FB9E>06BA +FB9F>06BA +FBA0>06BB +FBA1>06BB +FBA2>06BB +FBA3>06BB +FBA4>06C0 +FBA5>06C0 +FBA6>06C1 +FBA7>06C1 +FBA8>06C1 +FBA9>06C1 +FBAA>06BE +FBAB>06BE +FBAC>06BE +FBAD>06BE +FBAE>06D2 +FBAF>06D2 +FBB0>06D3 +FBB1>06D3 +FBD3>06AD +FBD4>06AD +FBD5>06AD +FBD6>06AD +FBD7>06C7 +FBD8>06C7 +FBD9>06C6 +FBDA>06C6 +FBDB>06C8 +FBDC>06C8 +FBDD>0677 +FBDE>06CB +FBDF>06CB +FBE0>06C5 +FBE1>06C5 +FBE2>06C9 +FBE3>06C9 +FBE4>06D0 +FBE5>06D0 +FBE6>06D0 +FBE7>06D0 +FBE8>0649 +FBE9>0649 +FBEA>0626 0627 +FBEB>0626 0627 +FBEC>0626 06D5 +FBED>0626 06D5 +FBEE>0626 0648 +FBEF>0626 0648 +FBF0>0626 06C7 +FBF1>0626 06C7 +FBF2>0626 06C6 +FBF3>0626 06C6 +FBF4>0626 06C8 +FBF5>0626 06C8 +FBF6>0626 06D0 +FBF7>0626 06D0 +FBF8>0626 06D0 +FBF9>0626 0649 +FBFA>0626 0649 +FBFB>0626 0649 +FBFC>06CC +FBFD>06CC +FBFE>06CC +FBFF>06CC +FC00>0626 062C +FC01>0626 062D +FC02>0626 0645 +FC03>0626 0649 +FC04>0626 064A +FC05>0628 062C +FC06>0628 062D +FC07>0628 062E +FC08>0628 0645 +FC09>0628 0649 +FC0A>0628 064A +FC0B>062A 062C +FC0C>062A 062D +FC0D>062A 062E +FC0E>062A 0645 +FC0F>062A 0649 +FC10>062A 064A +FC11>062B 062C +FC12>062B 0645 +FC13>062B 0649 +FC14>062B 064A +FC15>062C 062D +FC16>062C 0645 +FC17>062D 062C +FC18>062D 0645 +FC19>062E 062C +FC1A>062E 062D +FC1B>062E 0645 +FC1C>0633 062C +FC1D>0633 062D +FC1E>0633 062E +FC1F>0633 0645 +FC20>0635 062D +FC21>0635 0645 +FC22>0636 062C +FC23>0636 062D +FC24>0636 062E +FC25>0636 0645 +FC26>0637 062D +FC27>0637 0645 +FC28>0638 0645 +FC29>0639 062C +FC2A>0639 0645 +FC2B>063A 062C +FC2C>063A 0645 +FC2D>0641 062C +FC2E>0641 062D +FC2F>0641 062E +FC30>0641 0645 +FC31>0641 0649 +FC32>0641 064A +FC33>0642 062D +FC34>0642 0645 +FC35>0642 0649 +FC36>0642 064A +FC37>0643 0627 +FC38>0643 062C +FC39>0643 062D +FC3A>0643 062E +FC3B>0643 0644 +FC3C>0643 0645 +FC3D>0643 0649 +FC3E>0643 064A +FC3F>0644 062C +FC40>0644 062D +FC41>0644 062E +FC42>0644 0645 +FC43>0644 0649 +FC44>0644 064A +FC45>0645 062C +FC46>0645 062D +FC47>0645 062E +FC48>0645 0645 +FC49>0645 0649 +FC4A>0645 064A +FC4B>0646 062C +FC4C>0646 062D +FC4D>0646 062E +FC4E>0646 0645 +FC4F>0646 0649 +FC50>0646 064A +FC51>0647 062C +FC52>0647 0645 +FC53>0647 0649 +FC54>0647 064A +FC55>064A 062C +FC56>064A 062D +FC57>064A 062E +FC58>064A 0645 +FC59>064A 0649 +FC5A>064A 064A +FC5B>0630 0670 +FC5C>0631 0670 +FC5D>0649 0670 +FC5E>0020 064C 0651 +FC5F>0020 064D 0651 +FC60>0020 064E 0651 +FC61>0020 064F 0651 +FC62>0020 0650 0651 +FC63>0020 0651 0670 +FC64>0626 0631 +FC65>0626 0632 +FC66>0626 0645 +FC67>0626 0646 +FC68>0626 0649 +FC69>0626 064A +FC6A>0628 0631 +FC6B>0628 0632 +FC6C>0628 0645 +FC6D>0628 0646 +FC6E>0628 0649 +FC6F>0628 064A +FC70>062A 0631 +FC71>062A 0632 +FC72>062A 0645 +FC73>062A 0646 +FC74>062A 0649 +FC75>062A 064A +FC76>062B 0631 +FC77>062B 0632 +FC78>062B 0645 +FC79>062B 0646 +FC7A>062B 0649 +FC7B>062B 064A +FC7C>0641 0649 +FC7D>0641 064A +FC7E>0642 0649 +FC7F>0642 064A +FC80>0643 0627 +FC81>0643 0644 +FC82>0643 0645 +FC83>0643 0649 +FC84>0643 064A +FC85>0644 0645 +FC86>0644 0649 +FC87>0644 064A +FC88>0645 0627 +FC89>0645 0645 +FC8A>0646 0631 +FC8B>0646 0632 +FC8C>0646 0645 +FC8D>0646 0646 +FC8E>0646 0649 +FC8F>0646 064A +FC90>0649 0670 +FC91>064A 0631 +FC92>064A 0632 +FC93>064A 0645 +FC94>064A 0646 +FC95>064A 0649 +FC96>064A 064A +FC97>0626 062C +FC98>0626 062D +FC99>0626 062E +FC9A>0626 0645 +FC9B>0626 0647 +FC9C>0628 062C +FC9D>0628 062D +FC9E>0628 062E +FC9F>0628 0645 +FCA0>0628 0647 +FCA1>062A 062C +FCA2>062A 062D +FCA3>062A 062E +FCA4>062A 0645 +FCA5>062A 0647 +FCA6>062B 0645 +FCA7>062C 062D +FCA8>062C 0645 +FCA9>062D 062C +FCAA>062D 0645 +FCAB>062E 062C +FCAC>062E 0645 +FCAD>0633 062C +FCAE>0633 062D +FCAF>0633 062E +FCB0>0633 0645 +FCB1>0635 062D +FCB2>0635 062E +FCB3>0635 0645 +FCB4>0636 062C +FCB5>0636 062D +FCB6>0636 062E +FCB7>0636 0645 +FCB8>0637 062D +FCB9>0638 0645 +FCBA>0639 062C +FCBB>0639 0645 +FCBC>063A 062C +FCBD>063A 0645 +FCBE>0641 062C +FCBF>0641 062D +FCC0>0641 062E +FCC1>0641 0645 +FCC2>0642 062D +FCC3>0642 0645 +FCC4>0643 062C +FCC5>0643 062D +FCC6>0643 062E +FCC7>0643 0644 +FCC8>0643 0645 +FCC9>0644 062C +FCCA>0644 062D +FCCB>0644 062E +FCCC>0644 0645 +FCCD>0644 0647 +FCCE>0645 062C +FCCF>0645 062D +FCD0>0645 062E +FCD1>0645 0645 +FCD2>0646 062C +FCD3>0646 062D +FCD4>0646 062E +FCD5>0646 0645 +FCD6>0646 0647 +FCD7>0647 062C +FCD8>0647 0645 +FCD9>0647 0670 +FCDA>064A 062C +FCDB>064A 062D +FCDC>064A 062E +FCDD>064A 0645 +FCDE>064A 0647 +FCDF>0626 0645 +FCE0>0626 0647 +FCE1>0628 0645 +FCE2>0628 0647 +FCE3>062A 0645 +FCE4>062A 0647 +FCE5>062B 0645 +FCE6>062B 0647 +FCE7>0633 0645 +FCE8>0633 0647 +FCE9>0634 0645 +FCEA>0634 0647 +FCEB>0643 0644 +FCEC>0643 0645 +FCED>0644 0645 +FCEE>0646 0645 +FCEF>0646 0647 +FCF0>064A 0645 +FCF1>064A 0647 +FCF2>0640 064E 0651 +FCF3>0640 064F 0651 +FCF4>0640 0650 0651 +FCF5>0637 0649 +FCF6>0637 064A +FCF7>0639 0649 +FCF8>0639 064A +FCF9>063A 0649 +FCFA>063A 064A +FCFB>0633 0649 +FCFC>0633 064A +FCFD>0634 0649 +FCFE>0634 064A +FCFF>062D 0649 +FD00>062D 064A +FD01>062C 0649 +FD02>062C 064A +FD03>062E 0649 +FD04>062E 064A +FD05>0635 0649 +FD06>0635 064A +FD07>0636 0649 +FD08>0636 064A +FD09>0634 062C +FD0A>0634 062D +FD0B>0634 062E +FD0C>0634 0645 +FD0D>0634 0631 +FD0E>0633 0631 +FD0F>0635 0631 +FD10>0636 0631 +FD11>0637 0649 +FD12>0637 064A +FD13>0639 0649 +FD14>0639 064A +FD15>063A 0649 +FD16>063A 064A +FD17>0633 0649 +FD18>0633 064A +FD19>0634 0649 +FD1A>0634 064A +FD1B>062D 0649 +FD1C>062D 064A +FD1D>062C 0649 +FD1E>062C 064A +FD1F>062E 0649 +FD20>062E 064A +FD21>0635 0649 +FD22>0635 064A +FD23>0636 0649 +FD24>0636 064A +FD25>0634 062C +FD26>0634 062D +FD27>0634 062E +FD28>0634 0645 +FD29>0634 0631 +FD2A>0633 0631 +FD2B>0635 0631 +FD2C>0636 0631 +FD2D>0634 062C +FD2E>0634 062D +FD2F>0634 062E +FD30>0634 0645 +FD31>0633 0647 +FD32>0634 0647 +FD33>0637 0645 +FD34>0633 062C +FD35>0633 062D +FD36>0633 062E +FD37>0634 062C +FD38>0634 062D +FD39>0634 062E +FD3A>0637 0645 +FD3B>0638 0645 +FD3C>0627 064B +FD3D>0627 064B +FD50>062A 062C 0645 +FD51>062A 062D 062C +FD52>062A 062D 062C +FD53>062A 062D 0645 +FD54>062A 062E 0645 +FD55>062A 0645 062C +FD56>062A 0645 062D +FD57>062A 0645 062E +FD58>062C 0645 062D +FD59>062C 0645 062D +FD5A>062D 0645 064A +FD5B>062D 0645 0649 +FD5C>0633 062D 062C +FD5D>0633 062C 062D +FD5E>0633 062C 0649 +FD5F>0633 0645 062D +FD60>0633 0645 062D +FD61>0633 0645 062C +FD62>0633 0645 0645 +FD63>0633 0645 0645 +FD64>0635 062D 062D +FD65>0635 062D 062D +FD66>0635 0645 0645 +FD67>0634 062D 0645 +FD68>0634 062D 0645 +FD69>0634 062C 064A +FD6A>0634 0645 062E +FD6B>0634 0645 062E +FD6C>0634 0645 0645 +FD6D>0634 0645 0645 +FD6E>0636 062D 0649 +FD6F>0636 062E 0645 +FD70>0636 062E 0645 +FD71>0637 0645 062D +FD72>0637 0645 062D +FD73>0637 0645 0645 +FD74>0637 0645 064A +FD75>0639 062C 0645 +FD76>0639 0645 0645 +FD77>0639 0645 0645 +FD78>0639 0645 0649 +FD79>063A 0645 0645 +FD7A>063A 0645 064A +FD7B>063A 0645 0649 +FD7C>0641 062E 0645 +FD7D>0641 062E 0645 +FD7E>0642 0645 062D +FD7F>0642 0645 0645 +FD80>0644 062D 0645 +FD81>0644 062D 064A +FD82>0644 062D 0649 +FD83>0644 062C 062C +FD84>0644 062C 062C +FD85>0644 062E 0645 +FD86>0644 062E 0645 +FD87>0644 0645 062D +FD88>0644 0645 062D +FD89>0645 062D 062C +FD8A>0645 062D 0645 +FD8B>0645 062D 064A +FD8C>0645 062C 062D +FD8D>0645 062C 0645 +FD8E>0645 062E 062C +FD8F>0645 062E 0645 +FD92>0645 062C 062E +FD93>0647 0645 062C +FD94>0647 0645 0645 +FD95>0646 062D 0645 +FD96>0646 062D 0649 +FD97>0646 062C 0645 +FD98>0646 062C 0645 +FD99>0646 062C 0649 +FD9A>0646 0645 064A +FD9B>0646 0645 0649 +FD9C>064A 0645 0645 +FD9D>064A 0645 0645 +FD9E>0628 062E 064A +FD9F>062A 062C 064A +FDA0>062A 062C 0649 +FDA1>062A 062E 064A +FDA2>062A 062E 0649 +FDA3>062A 0645 064A +FDA4>062A 0645 0649 +FDA5>062C 0645 064A +FDA6>062C 062D 0649 +FDA7>062C 0645 0649 +FDA8>0633 062E 0649 +FDA9>0635 062D 064A +FDAA>0634 062D 064A +FDAB>0636 062D 064A +FDAC>0644 062C 064A +FDAD>0644 0645 064A +FDAE>064A 062D 064A +FDAF>064A 062C 064A +FDB0>064A 0645 064A +FDB1>0645 0645 064A +FDB2>0642 0645 064A +FDB3>0646 062D 064A +FDB4>0642 0645 062D +FDB5>0644 062D 0645 +FDB6>0639 0645 064A +FDB7>0643 0645 064A +FDB8>0646 062C 062D +FDB9>0645 062E 064A +FDBA>0644 062C 0645 +FDBB>0643 0645 0645 +FDBC>0644 062C 0645 +FDBD>0646 062C 062D +FDBE>062C 062D 064A +FDBF>062D 062C 064A +FDC0>0645 062C 064A +FDC1>0641 0645 064A +FDC2>0628 062D 064A +FDC3>0643 0645 0645 +FDC4>0639 062C 0645 +FDC5>0635 0645 0645 +FDC6>0633 062E 064A +FDC7>0646 062C 064A +FDF0>0635 0644 06D2 +FDF1>0642 0644 06D2 +FDF2>0627 0644 0644 0647 +FDF3>0627 0643 0628 0631 +FDF4>0645 062D 0645 062F +FDF5>0635 0644 0639 0645 +FDF6>0631 0633 0648 0644 +FDF7>0639 0644 064A 0647 +FDF8>0648 0633 0644 0645 +FDF9>0635 0644 0649 +FDFA>0635 0644 0649 0020 0627 0644 0644 0647 0020 0639 0644 064A 0647 0020 0648 0633 0644 0645 +FDFB>062C 0644 0020 062C 0644 0627 0644 0647 +FDFC>0631 06CC 0627 0644 +FE10>002C +FE11>3001 +FE12>3002 +FE13>003A +FE14>003B +FE15>0021 +FE16>003F +FE17>3016 +FE18>3017 +FE19>2026 +FE30>2025 +FE31>2014 +FE32>2013 +FE33>005F +FE34>005F +FE35>0028 +FE36>0029 +FE37>007B +FE38>007D +FE39>3014 +FE3A>3015 +FE3B>3010 +FE3C>3011 +FE3D>300A +FE3E>300B +FE3F>3008 +FE40>3009 +FE41>300C +FE42>300D +FE43>300E +FE44>300F +FE47>005B +FE48>005D +FE49>203E +FE4A>203E +FE4B>203E +FE4C>203E +FE4D>005F +FE4E>005F +FE4F>005F +FE50>002C +FE51>3001 +FE52>002E +FE54>003B +FE55>003A +FE56>003F +FE57>0021 +FE58>2014 +FE59>0028 +FE5A>0029 +FE5B>007B +FE5C>007D +FE5D>3014 +FE5E>3015 +FE5F>0023 +FE60>0026 +FE61>002A +FE62>002B +FE63>002D +FE64>003C +FE65>003E +FE66>003D +FE68>005C +FE69>0024 +FE6A>0025 +FE6B>0040 +FE70>0020 064B +FE71>0640 064B +FE72>0020 064C +FE74>0020 064D +FE76>0020 064E +FE77>0640 064E +FE78>0020 064F +FE79>0640 064F +FE7A>0020 0650 +FE7B>0640 0650 +FE7C>0020 0651 +FE7D>0640 0651 +FE7E>0020 0652 +FE7F>0640 0652 +FE80>0621 +FE81>0622 +FE82>0622 +FE83>0623 +FE84>0623 +FE85>0624 +FE86>0624 +FE87>0625 +FE88>0625 +FE89>0626 +FE8A>0626 +FE8B>0626 +FE8C>0626 +FE8D>0627 +FE8E>0627 +FE8F>0628 +FE90>0628 +FE91>0628 +FE92>0628 +FE93>0629 +FE94>0629 +FE95>062A +FE96>062A +FE97>062A +FE98>062A +FE99>062B +FE9A>062B +FE9B>062B +FE9C>062B +FE9D>062C +FE9E>062C +FE9F>062C +FEA0>062C +FEA1>062D +FEA2>062D +FEA3>062D +FEA4>062D +FEA5>062E +FEA6>062E +FEA7>062E +FEA8>062E +FEA9>062F +FEAA>062F +FEAB>0630 +FEAC>0630 +FEAD>0631 +FEAE>0631 +FEAF>0632 +FEB0>0632 +FEB1>0633 +FEB2>0633 +FEB3>0633 +FEB4>0633 +FEB5>0634 +FEB6>0634 +FEB7>0634 +FEB8>0634 +FEB9>0635 +FEBA>0635 +FEBB>0635 +FEBC>0635 +FEBD>0636 +FEBE>0636 +FEBF>0636 +FEC0>0636 +FEC1>0637 +FEC2>0637 +FEC3>0637 +FEC4>0637 +FEC5>0638 +FEC6>0638 +FEC7>0638 +FEC8>0638 +FEC9>0639 +FECA>0639 +FECB>0639 +FECC>0639 +FECD>063A +FECE>063A +FECF>063A +FED0>063A +FED1>0641 +FED2>0641 +FED3>0641 +FED4>0641 +FED5>0642 +FED6>0642 +FED7>0642 +FED8>0642 +FED9>0643 +FEDA>0643 +FEDB>0643 +FEDC>0643 +FEDD>0644 +FEDE>0644 +FEDF>0644 +FEE0>0644 +FEE1>0645 +FEE2>0645 +FEE3>0645 +FEE4>0645 +FEE5>0646 +FEE6>0646 +FEE7>0646 +FEE8>0646 +FEE9>0647 +FEEA>0647 +FEEB>0647 +FEEC>0647 +FEED>0648 +FEEE>0648 +FEEF>0649 +FEF0>0649 +FEF1>064A +FEF2>064A +FEF3>064A +FEF4>064A +FEF5>0644 0622 +FEF6>0644 0622 +FEF7>0644 0623 +FEF8>0644 0623 +FEF9>0644 0625 +FEFA>0644 0625 +FEFB>0644 0627 +FEFC>0644 0627 +FF01>0021 +FF02>0022 +FF03>0023 +FF04>0024 +FF05>0025 +FF06>0026 +FF07>0027 +FF08>0028 +FF09>0029 +FF0A>002A +FF0B>002B +FF0C>002C +FF0D>002D +FF0E>002E +FF0F>002F +FF10>0030 +FF11>0031 +FF12>0032 +FF13>0033 +FF14>0034 +FF15>0035 +FF16>0036 +FF17>0037 +FF18>0038 +FF19>0039 +FF1A>003A +FF1B>003B +FF1C>003C +FF1D>003D +FF1E>003E +FF1F>003F +FF20>0040 +FF21>0041 +FF22>0042 +FF23>0043 +FF24>0044 +FF25>0045 +FF26>0046 +FF27>0047 +FF28>0048 +FF29>0049 +FF2A>004A +FF2B>004B +FF2C>004C +FF2D>004D +FF2E>004E +FF2F>004F +FF30>0050 +FF31>0051 +FF32>0052 +FF33>0053 +FF34>0054 +FF35>0055 +FF36>0056 +FF37>0057 +FF38>0058 +FF39>0059 +FF3A>005A +FF3B>005B +FF3C>005C +FF3D>005D +FF3E>005E +FF3F>005F +FF40>0060 +FF41>0061 +FF42>0062 +FF43>0063 +FF44>0064 +FF45>0065 +FF46>0066 +FF47>0067 +FF48>0068 +FF49>0069 +FF4A>006A +FF4B>006B +FF4C>006C +FF4D>006D +FF4E>006E +FF4F>006F +FF50>0070 +FF51>0071 +FF52>0072 +FF53>0073 +FF54>0074 +FF55>0075 +FF56>0076 +FF57>0077 +FF58>0078 +FF59>0079 +FF5A>007A +FF5B>007B +FF5C>007C +FF5D>007D +FF5E>007E +FF5F>2985 +FF60>2986 +FF61>3002 +FF62>300C +FF63>300D +FF64>3001 +FF65>30FB +FF66>30F2 +FF67>30A1 +FF68>30A3 +FF69>30A5 +FF6A>30A7 +FF6B>30A9 +FF6C>30E3 +FF6D>30E5 +FF6E>30E7 +FF6F>30C3 +FF70>30FC +FF71>30A2 +FF72>30A4 +FF73>30A6 +FF74>30A8 +FF75>30AA +FF76>30AB +FF77>30AD +FF78>30AF +FF79>30B1 +FF7A>30B3 +FF7B>30B5 +FF7C>30B7 +FF7D>30B9 +FF7E>30BB +FF7F>30BD +FF80>30BF +FF81>30C1 +FF82>30C4 +FF83>30C6 +FF84>30C8 +FF85>30CA +FF86>30CB +FF87>30CC +FF88>30CD +FF89>30CE +FF8A>30CF +FF8B>30D2 +FF8C>30D5 +FF8D>30D8 +FF8E>30DB +FF8F>30DE +FF90>30DF +FF91>30E0 +FF92>30E1 +FF93>30E2 +FF94>30E4 +FF95>30E6 +FF96>30E8 +FF97>30E9 +FF98>30EA +FF99>30EB +FF9A>30EC +FF9B>30ED +FF9C>30EF +FF9D>30F3 +FF9E>3099 +FF9F>309A +FFA0>3164 +FFA1>3131 +FFA2>3132 +FFA3>3133 +FFA4>3134 +FFA5>3135 +FFA6>3136 +FFA7>3137 +FFA8>3138 +FFA9>3139 +FFAA>313A +FFAB>313B +FFAC>313C +FFAD>313D +FFAE>313E +FFAF>313F +FFB0>3140 +FFB1>3141 +FFB2>3142 +FFB3>3143 +FFB4>3144 +FFB5>3145 +FFB6>3146 +FFB7>3147 +FFB8>3148 +FFB9>3149 +FFBA>314A +FFBB>314B +FFBC>314C +FFBD>314D +FFBE>314E +FFC2>314F +FFC3>3150 +FFC4>3151 +FFC5>3152 +FFC6>3153 +FFC7>3154 +FFCA>3155 +FFCB>3156 +FFCC>3157 +FFCD>3158 +FFCE>3159 +FFCF>315A +FFD2>315B +FFD3>315C +FFD4>315D +FFD5>315E +FFD6>315F +FFD7>3160 +FFDA>3161 +FFDB>3162 +FFDC>3163 +FFE0>00A2 +FFE1>00A3 +FFE2>00AC +FFE3>00AF +FFE4>00A6 +FFE5>00A5 +FFE6>20A9 +FFE8>2502 +FFE9>2190 +FFEA>2191 +FFEB>2192 +FFEC>2193 +FFED>25A0 +FFEE>25CB +1109A=11099 110BA +1109C=1109B 110BA +110AB=110A5 110BA +1D15E>1D157 1D165 +1D15F>1D158 1D165 +1D160>1D15F 1D16E +1D161>1D15F 1D16F +1D162>1D15F 1D170 +1D163>1D15F 1D171 +1D164>1D15F 1D172 +1D1BB>1D1B9 1D165 +1D1BC>1D1BA 1D165 +1D1BD>1D1BB 1D16E +1D1BE>1D1BC 1D16E +1D1BF>1D1BB 1D16F +1D1C0>1D1BC 1D16F +1D400>0041 +1D401>0042 +1D402>0043 +1D403>0044 +1D404>0045 +1D405>0046 +1D406>0047 +1D407>0048 +1D408>0049 +1D409>004A +1D40A>004B +1D40B>004C +1D40C>004D +1D40D>004E +1D40E>004F +1D40F>0050 +1D410>0051 +1D411>0052 +1D412>0053 +1D413>0054 +1D414>0055 +1D415>0056 +1D416>0057 +1D417>0058 +1D418>0059 +1D419>005A +1D41A>0061 +1D41B>0062 +1D41C>0063 +1D41D>0064 +1D41E>0065 +1D41F>0066 +1D420>0067 +1D421>0068 +1D422>0069 +1D423>006A +1D424>006B +1D425>006C +1D426>006D +1D427>006E +1D428>006F +1D429>0070 +1D42A>0071 +1D42B>0072 +1D42C>0073 +1D42D>0074 +1D42E>0075 +1D42F>0076 +1D430>0077 +1D431>0078 +1D432>0079 +1D433>007A +1D434>0041 +1D435>0042 +1D436>0043 +1D437>0044 +1D438>0045 +1D439>0046 +1D43A>0047 +1D43B>0048 +1D43C>0049 +1D43D>004A +1D43E>004B +1D43F>004C +1D440>004D +1D441>004E +1D442>004F +1D443>0050 +1D444>0051 +1D445>0052 +1D446>0053 +1D447>0054 +1D448>0055 +1D449>0056 +1D44A>0057 +1D44B>0058 +1D44C>0059 +1D44D>005A +1D44E>0061 +1D44F>0062 +1D450>0063 +1D451>0064 +1D452>0065 +1D453>0066 +1D454>0067 +1D456>0069 +1D457>006A +1D458>006B +1D459>006C +1D45A>006D +1D45B>006E +1D45C>006F +1D45D>0070 +1D45E>0071 +1D45F>0072 +1D460>0073 +1D461>0074 +1D462>0075 +1D463>0076 +1D464>0077 +1D465>0078 +1D466>0079 +1D467>007A +1D468>0041 +1D469>0042 +1D46A>0043 +1D46B>0044 +1D46C>0045 +1D46D>0046 +1D46E>0047 +1D46F>0048 +1D470>0049 +1D471>004A +1D472>004B +1D473>004C +1D474>004D +1D475>004E +1D476>004F +1D477>0050 +1D478>0051 +1D479>0052 +1D47A>0053 +1D47B>0054 +1D47C>0055 +1D47D>0056 +1D47E>0057 +1D47F>0058 +1D480>0059 +1D481>005A +1D482>0061 +1D483>0062 +1D484>0063 +1D485>0064 +1D486>0065 +1D487>0066 +1D488>0067 +1D489>0068 +1D48A>0069 +1D48B>006A +1D48C>006B +1D48D>006C +1D48E>006D +1D48F>006E +1D490>006F +1D491>0070 +1D492>0071 +1D493>0072 +1D494>0073 +1D495>0074 +1D496>0075 +1D497>0076 +1D498>0077 +1D499>0078 +1D49A>0079 +1D49B>007A +1D49C>0041 +1D49E>0043 +1D49F>0044 +1D4A2>0047 +1D4A5>004A +1D4A6>004B +1D4A9>004E +1D4AA>004F +1D4AB>0050 +1D4AC>0051 +1D4AE>0053 +1D4AF>0054 +1D4B0>0055 +1D4B1>0056 +1D4B2>0057 +1D4B3>0058 +1D4B4>0059 +1D4B5>005A +1D4B6>0061 +1D4B7>0062 +1D4B8>0063 +1D4B9>0064 +1D4BB>0066 +1D4BD>0068 +1D4BE>0069 +1D4BF>006A +1D4C0>006B +1D4C1>006C +1D4C2>006D +1D4C3>006E +1D4C5>0070 +1D4C6>0071 +1D4C7>0072 +1D4C8>0073 +1D4C9>0074 +1D4CA>0075 +1D4CB>0076 +1D4CC>0077 +1D4CD>0078 +1D4CE>0079 +1D4CF>007A +1D4D0>0041 +1D4D1>0042 +1D4D2>0043 +1D4D3>0044 +1D4D4>0045 +1D4D5>0046 +1D4D6>0047 +1D4D7>0048 +1D4D8>0049 +1D4D9>004A +1D4DA>004B +1D4DB>004C +1D4DC>004D +1D4DD>004E +1D4DE>004F +1D4DF>0050 +1D4E0>0051 +1D4E1>0052 +1D4E2>0053 +1D4E3>0054 +1D4E4>0055 +1D4E5>0056 +1D4E6>0057 +1D4E7>0058 +1D4E8>0059 +1D4E9>005A +1D4EA>0061 +1D4EB>0062 +1D4EC>0063 +1D4ED>0064 +1D4EE>0065 +1D4EF>0066 +1D4F0>0067 +1D4F1>0068 +1D4F2>0069 +1D4F3>006A +1D4F4>006B +1D4F5>006C +1D4F6>006D +1D4F7>006E +1D4F8>006F +1D4F9>0070 +1D4FA>0071 +1D4FB>0072 +1D4FC>0073 +1D4FD>0074 +1D4FE>0075 +1D4FF>0076 +1D500>0077 +1D501>0078 +1D502>0079 +1D503>007A +1D504>0041 +1D505>0042 +1D507>0044 +1D508>0045 +1D509>0046 +1D50A>0047 +1D50D>004A +1D50E>004B +1D50F>004C +1D510>004D +1D511>004E +1D512>004F +1D513>0050 +1D514>0051 +1D516>0053 +1D517>0054 +1D518>0055 +1D519>0056 +1D51A>0057 +1D51B>0058 +1D51C>0059 +1D51E>0061 +1D51F>0062 +1D520>0063 +1D521>0064 +1D522>0065 +1D523>0066 +1D524>0067 +1D525>0068 +1D526>0069 +1D527>006A +1D528>006B +1D529>006C +1D52A>006D +1D52B>006E +1D52C>006F +1D52D>0070 +1D52E>0071 +1D52F>0072 +1D530>0073 +1D531>0074 +1D532>0075 +1D533>0076 +1D534>0077 +1D535>0078 +1D536>0079 +1D537>007A +1D538>0041 +1D539>0042 +1D53B>0044 +1D53C>0045 +1D53D>0046 +1D53E>0047 +1D540>0049 +1D541>004A +1D542>004B +1D543>004C +1D544>004D +1D546>004F +1D54A>0053 +1D54B>0054 +1D54C>0055 +1D54D>0056 +1D54E>0057 +1D54F>0058 +1D550>0059 +1D552>0061 +1D553>0062 +1D554>0063 +1D555>0064 +1D556>0065 +1D557>0066 +1D558>0067 +1D559>0068 +1D55A>0069 +1D55B>006A +1D55C>006B +1D55D>006C +1D55E>006D +1D55F>006E +1D560>006F +1D561>0070 +1D562>0071 +1D563>0072 +1D564>0073 +1D565>0074 +1D566>0075 +1D567>0076 +1D568>0077 +1D569>0078 +1D56A>0079 +1D56B>007A +1D56C>0041 +1D56D>0042 +1D56E>0043 +1D56F>0044 +1D570>0045 +1D571>0046 +1D572>0047 +1D573>0048 +1D574>0049 +1D575>004A +1D576>004B +1D577>004C +1D578>004D +1D579>004E +1D57A>004F +1D57B>0050 +1D57C>0051 +1D57D>0052 +1D57E>0053 +1D57F>0054 +1D580>0055 +1D581>0056 +1D582>0057 +1D583>0058 +1D584>0059 +1D585>005A +1D586>0061 +1D587>0062 +1D588>0063 +1D589>0064 +1D58A>0065 +1D58B>0066 +1D58C>0067 +1D58D>0068 +1D58E>0069 +1D58F>006A +1D590>006B +1D591>006C +1D592>006D +1D593>006E +1D594>006F +1D595>0070 +1D596>0071 +1D597>0072 +1D598>0073 +1D599>0074 +1D59A>0075 +1D59B>0076 +1D59C>0077 +1D59D>0078 +1D59E>0079 +1D59F>007A +1D5A0>0041 +1D5A1>0042 +1D5A2>0043 +1D5A3>0044 +1D5A4>0045 +1D5A5>0046 +1D5A6>0047 +1D5A7>0048 +1D5A8>0049 +1D5A9>004A +1D5AA>004B +1D5AB>004C +1D5AC>004D +1D5AD>004E +1D5AE>004F +1D5AF>0050 +1D5B0>0051 +1D5B1>0052 +1D5B2>0053 +1D5B3>0054 +1D5B4>0055 +1D5B5>0056 +1D5B6>0057 +1D5B7>0058 +1D5B8>0059 +1D5B9>005A +1D5BA>0061 +1D5BB>0062 +1D5BC>0063 +1D5BD>0064 +1D5BE>0065 +1D5BF>0066 +1D5C0>0067 +1D5C1>0068 +1D5C2>0069 +1D5C3>006A +1D5C4>006B +1D5C5>006C +1D5C6>006D +1D5C7>006E +1D5C8>006F +1D5C9>0070 +1D5CA>0071 +1D5CB>0072 +1D5CC>0073 +1D5CD>0074 +1D5CE>0075 +1D5CF>0076 +1D5D0>0077 +1D5D1>0078 +1D5D2>0079 +1D5D3>007A +1D5D4>0041 +1D5D5>0042 +1D5D6>0043 +1D5D7>0044 +1D5D8>0045 +1D5D9>0046 +1D5DA>0047 +1D5DB>0048 +1D5DC>0049 +1D5DD>004A +1D5DE>004B +1D5DF>004C +1D5E0>004D +1D5E1>004E +1D5E2>004F +1D5E3>0050 +1D5E4>0051 +1D5E5>0052 +1D5E6>0053 +1D5E7>0054 +1D5E8>0055 +1D5E9>0056 +1D5EA>0057 +1D5EB>0058 +1D5EC>0059 +1D5ED>005A +1D5EE>0061 +1D5EF>0062 +1D5F0>0063 +1D5F1>0064 +1D5F2>0065 +1D5F3>0066 +1D5F4>0067 +1D5F5>0068 +1D5F6>0069 +1D5F7>006A +1D5F8>006B +1D5F9>006C +1D5FA>006D +1D5FB>006E +1D5FC>006F +1D5FD>0070 +1D5FE>0071 +1D5FF>0072 +1D600>0073 +1D601>0074 +1D602>0075 +1D603>0076 +1D604>0077 +1D605>0078 +1D606>0079 +1D607>007A +1D608>0041 +1D609>0042 +1D60A>0043 +1D60B>0044 +1D60C>0045 +1D60D>0046 +1D60E>0047 +1D60F>0048 +1D610>0049 +1D611>004A +1D612>004B +1D613>004C +1D614>004D +1D615>004E +1D616>004F +1D617>0050 +1D618>0051 +1D619>0052 +1D61A>0053 +1D61B>0054 +1D61C>0055 +1D61D>0056 +1D61E>0057 +1D61F>0058 +1D620>0059 +1D621>005A +1D622>0061 +1D623>0062 +1D624>0063 +1D625>0064 +1D626>0065 +1D627>0066 +1D628>0067 +1D629>0068 +1D62A>0069 +1D62B>006A +1D62C>006B +1D62D>006C +1D62E>006D +1D62F>006E +1D630>006F +1D631>0070 +1D632>0071 +1D633>0072 +1D634>0073 +1D635>0074 +1D636>0075 +1D637>0076 +1D638>0077 +1D639>0078 +1D63A>0079 +1D63B>007A +1D63C>0041 +1D63D>0042 +1D63E>0043 +1D63F>0044 +1D640>0045 +1D641>0046 +1D642>0047 +1D643>0048 +1D644>0049 +1D645>004A +1D646>004B +1D647>004C +1D648>004D +1D649>004E +1D64A>004F +1D64B>0050 +1D64C>0051 +1D64D>0052 +1D64E>0053 +1D64F>0054 +1D650>0055 +1D651>0056 +1D652>0057 +1D653>0058 +1D654>0059 +1D655>005A +1D656>0061 +1D657>0062 +1D658>0063 +1D659>0064 +1D65A>0065 +1D65B>0066 +1D65C>0067 +1D65D>0068 +1D65E>0069 +1D65F>006A +1D660>006B +1D661>006C +1D662>006D +1D663>006E +1D664>006F +1D665>0070 +1D666>0071 +1D667>0072 +1D668>0073 +1D669>0074 +1D66A>0075 +1D66B>0076 +1D66C>0077 +1D66D>0078 +1D66E>0079 +1D66F>007A +1D670>0041 +1D671>0042 +1D672>0043 +1D673>0044 +1D674>0045 +1D675>0046 +1D676>0047 +1D677>0048 +1D678>0049 +1D679>004A +1D67A>004B +1D67B>004C +1D67C>004D +1D67D>004E +1D67E>004F +1D67F>0050 +1D680>0051 +1D681>0052 +1D682>0053 +1D683>0054 +1D684>0055 +1D685>0056 +1D686>0057 +1D687>0058 +1D688>0059 +1D689>005A +1D68A>0061 +1D68B>0062 +1D68C>0063 +1D68D>0064 +1D68E>0065 +1D68F>0066 +1D690>0067 +1D691>0068 +1D692>0069 +1D693>006A +1D694>006B +1D695>006C +1D696>006D +1D697>006E +1D698>006F +1D699>0070 +1D69A>0071 +1D69B>0072 +1D69C>0073 +1D69D>0074 +1D69E>0075 +1D69F>0076 +1D6A0>0077 +1D6A1>0078 +1D6A2>0079 +1D6A3>007A +1D6A4>0131 +1D6A5>0237 +1D6A8>0391 +1D6A9>0392 +1D6AA>0393 +1D6AB>0394 +1D6AC>0395 +1D6AD>0396 +1D6AE>0397 +1D6AF>0398 +1D6B0>0399 +1D6B1>039A +1D6B2>039B +1D6B3>039C +1D6B4>039D +1D6B5>039E +1D6B6>039F +1D6B7>03A0 +1D6B8>03A1 +1D6B9>03F4 +1D6BA>03A3 +1D6BB>03A4 +1D6BC>03A5 +1D6BD>03A6 +1D6BE>03A7 +1D6BF>03A8 +1D6C0>03A9 +1D6C1>2207 +1D6C2>03B1 +1D6C3>03B2 +1D6C4>03B3 +1D6C5>03B4 +1D6C6>03B5 +1D6C7>03B6 +1D6C8>03B7 +1D6C9>03B8 +1D6CA>03B9 +1D6CB>03BA +1D6CC>03BB +1D6CD>03BC +1D6CE>03BD +1D6CF>03BE +1D6D0>03BF +1D6D1>03C0 +1D6D2>03C1 +1D6D3>03C2 +1D6D4>03C3 +1D6D5>03C4 +1D6D6>03C5 +1D6D7>03C6 +1D6D8>03C7 +1D6D9>03C8 +1D6DA>03C9 +1D6DB>2202 +1D6DC>03F5 +1D6DD>03D1 +1D6DE>03F0 +1D6DF>03D5 +1D6E0>03F1 +1D6E1>03D6 +1D6E2>0391 +1D6E3>0392 +1D6E4>0393 +1D6E5>0394 +1D6E6>0395 +1D6E7>0396 +1D6E8>0397 +1D6E9>0398 +1D6EA>0399 +1D6EB>039A +1D6EC>039B +1D6ED>039C +1D6EE>039D +1D6EF>039E +1D6F0>039F +1D6F1>03A0 +1D6F2>03A1 +1D6F3>03F4 +1D6F4>03A3 +1D6F5>03A4 +1D6F6>03A5 +1D6F7>03A6 +1D6F8>03A7 +1D6F9>03A8 +1D6FA>03A9 +1D6FB>2207 +1D6FC>03B1 +1D6FD>03B2 +1D6FE>03B3 +1D6FF>03B4 +1D700>03B5 +1D701>03B6 +1D702>03B7 +1D703>03B8 +1D704>03B9 +1D705>03BA +1D706>03BB +1D707>03BC +1D708>03BD +1D709>03BE +1D70A>03BF +1D70B>03C0 +1D70C>03C1 +1D70D>03C2 +1D70E>03C3 +1D70F>03C4 +1D710>03C5 +1D711>03C6 +1D712>03C7 +1D713>03C8 +1D714>03C9 +1D715>2202 +1D716>03F5 +1D717>03D1 +1D718>03F0 +1D719>03D5 +1D71A>03F1 +1D71B>03D6 +1D71C>0391 +1D71D>0392 +1D71E>0393 +1D71F>0394 +1D720>0395 +1D721>0396 +1D722>0397 +1D723>0398 +1D724>0399 +1D725>039A +1D726>039B +1D727>039C +1D728>039D +1D729>039E +1D72A>039F +1D72B>03A0 +1D72C>03A1 +1D72D>03F4 +1D72E>03A3 +1D72F>03A4 +1D730>03A5 +1D731>03A6 +1D732>03A7 +1D733>03A8 +1D734>03A9 +1D735>2207 +1D736>03B1 +1D737>03B2 +1D738>03B3 +1D739>03B4 +1D73A>03B5 +1D73B>03B6 +1D73C>03B7 +1D73D>03B8 +1D73E>03B9 +1D73F>03BA +1D740>03BB +1D741>03BC +1D742>03BD +1D743>03BE +1D744>03BF +1D745>03C0 +1D746>03C1 +1D747>03C2 +1D748>03C3 +1D749>03C4 +1D74A>03C5 +1D74B>03C6 +1D74C>03C7 +1D74D>03C8 +1D74E>03C9 +1D74F>2202 +1D750>03F5 +1D751>03D1 +1D752>03F0 +1D753>03D5 +1D754>03F1 +1D755>03D6 +1D756>0391 +1D757>0392 +1D758>0393 +1D759>0394 +1D75A>0395 +1D75B>0396 +1D75C>0397 +1D75D>0398 +1D75E>0399 +1D75F>039A +1D760>039B +1D761>039C +1D762>039D +1D763>039E +1D764>039F +1D765>03A0 +1D766>03A1 +1D767>03F4 +1D768>03A3 +1D769>03A4 +1D76A>03A5 +1D76B>03A6 +1D76C>03A7 +1D76D>03A8 +1D76E>03A9 +1D76F>2207 +1D770>03B1 +1D771>03B2 +1D772>03B3 +1D773>03B4 +1D774>03B5 +1D775>03B6 +1D776>03B7 +1D777>03B8 +1D778>03B9 +1D779>03BA +1D77A>03BB +1D77B>03BC +1D77C>03BD +1D77D>03BE +1D77E>03BF +1D77F>03C0 +1D780>03C1 +1D781>03C2 +1D782>03C3 +1D783>03C4 +1D784>03C5 +1D785>03C6 +1D786>03C7 +1D787>03C8 +1D788>03C9 +1D789>2202 +1D78A>03F5 +1D78B>03D1 +1D78C>03F0 +1D78D>03D5 +1D78E>03F1 +1D78F>03D6 +1D790>0391 +1D791>0392 +1D792>0393 +1D793>0394 +1D794>0395 +1D795>0396 +1D796>0397 +1D797>0398 +1D798>0399 +1D799>039A +1D79A>039B +1D79B>039C +1D79C>039D +1D79D>039E +1D79E>039F +1D79F>03A0 +1D7A0>03A1 +1D7A1>03F4 +1D7A2>03A3 +1D7A3>03A4 +1D7A4>03A5 +1D7A5>03A6 +1D7A6>03A7 +1D7A7>03A8 +1D7A8>03A9 +1D7A9>2207 +1D7AA>03B1 +1D7AB>03B2 +1D7AC>03B3 +1D7AD>03B4 +1D7AE>03B5 +1D7AF>03B6 +1D7B0>03B7 +1D7B1>03B8 +1D7B2>03B9 +1D7B3>03BA +1D7B4>03BB +1D7B5>03BC +1D7B6>03BD +1D7B7>03BE +1D7B8>03BF +1D7B9>03C0 +1D7BA>03C1 +1D7BB>03C2 +1D7BC>03C3 +1D7BD>03C4 +1D7BE>03C5 +1D7BF>03C6 +1D7C0>03C7 +1D7C1>03C8 +1D7C2>03C9 +1D7C3>2202 +1D7C4>03F5 +1D7C5>03D1 +1D7C6>03F0 +1D7C7>03D5 +1D7C8>03F1 +1D7C9>03D6 +1D7CA>03DC +1D7CB>03DD +1D7CE>0030 +1D7CF>0031 +1D7D0>0032 +1D7D1>0033 +1D7D2>0034 +1D7D3>0035 +1D7D4>0036 +1D7D5>0037 +1D7D6>0038 +1D7D7>0039 +1D7D8>0030 +1D7D9>0031 +1D7DA>0032 +1D7DB>0033 +1D7DC>0034 +1D7DD>0035 +1D7DE>0036 +1D7DF>0037 +1D7E0>0038 +1D7E1>0039 +1D7E2>0030 +1D7E3>0031 +1D7E4>0032 +1D7E5>0033 +1D7E6>0034 +1D7E7>0035 +1D7E8>0036 +1D7E9>0037 +1D7EA>0038 +1D7EB>0039 +1D7EC>0030 +1D7ED>0031 +1D7EE>0032 +1D7EF>0033 +1D7F0>0034 +1D7F1>0035 +1D7F2>0036 +1D7F3>0037 +1D7F4>0038 +1D7F5>0039 +1D7F6>0030 +1D7F7>0031 +1D7F8>0032 +1D7F9>0033 +1D7FA>0034 +1D7FB>0035 +1D7FC>0036 +1D7FD>0037 +1D7FE>0038 +1D7FF>0039 +1F100>0030 002E +1F101>0030 002C +1F102>0031 002C +1F103>0032 002C +1F104>0033 002C +1F105>0034 002C +1F106>0035 002C +1F107>0036 002C +1F108>0037 002C +1F109>0038 002C +1F10A>0039 002C +1F110>0028 0041 0029 +1F111>0028 0042 0029 +1F112>0028 0043 0029 +1F113>0028 0044 0029 +1F114>0028 0045 0029 +1F115>0028 0046 0029 +1F116>0028 0047 0029 +1F117>0028 0048 0029 +1F118>0028 0049 0029 +1F119>0028 004A 0029 +1F11A>0028 004B 0029 +1F11B>0028 004C 0029 +1F11C>0028 004D 0029 +1F11D>0028 004E 0029 +1F11E>0028 004F 0029 +1F11F>0028 0050 0029 +1F120>0028 0051 0029 +1F121>0028 0052 0029 +1F122>0028 0053 0029 +1F123>0028 0054 0029 +1F124>0028 0055 0029 +1F125>0028 0056 0029 +1F126>0028 0057 0029 +1F127>0028 0058 0029 +1F128>0028 0059 0029 +1F129>0028 005A 0029 +1F12A>3014 0053 3015 +1F12B>0043 +1F12C>0052 +1F12D>0043 0044 +1F12E>0057 005A +1F131>0042 +1F13D>004E +1F13F>0050 +1F142>0053 +1F146>0057 +1F14A>0048 0056 +1F14B>004D 0056 +1F14C>0053 0044 +1F14D>0053 0053 +1F14E>0050 0050 0056 +1F190>0044 004A +1F200>307B 304B +1F210>624B +1F211>5B57 +1F212>53CC +1F213>30C7 +1F214>4E8C +1F215>591A +1F216>89E3 +1F217>5929 +1F218>4EA4 +1F219>6620 +1F21A>7121 +1F21B>6599 +1F21C>524D +1F21D>5F8C +1F21E>518D +1F21F>65B0 +1F220>521D +1F221>7D42 +1F222>751F +1F223>8CA9 +1F224>58F0 +1F225>5439 +1F226>6F14 +1F227>6295 +1F228>6355 +1F229>4E00 +1F22A>4E09 +1F22B>904A +1F22C>5DE6 +1F22D>4E2D +1F22E>53F3 +1F22F>6307 +1F230>8D70 +1F231>6253 +1F240>3014 672C 3015 +1F241>3014 4E09 3015 +1F242>3014 4E8C 3015 +1F243>3014 5B89 3015 +1F244>3014 70B9 3015 +1F245>3014 6253 3015 +1F246>3014 76D7 3015 +1F247>3014 52DD 3015 +1F248>3014 6557 3015 +2F800>4E3D +2F801>4E38 +2F802>4E41 +2F803>20122 +2F804>4F60 +2F805>4FAE +2F806>4FBB +2F807>5002 +2F808>507A +2F809>5099 +2F80A>50E7 +2F80B>50CF +2F80C>349E +2F80D>2063A +2F80E>514D +2F80F>5154 +2F810>5164 +2F811>5177 +2F812>2051C +2F813>34B9 +2F814>5167 +2F815>518D +2F816>2054B +2F817>5197 +2F818>51A4 +2F819>4ECC +2F81A>51AC +2F81B>51B5 +2F81C>291DF +2F81D>51F5 +2F81E>5203 +2F81F>34DF +2F820>523B +2F821>5246 +2F822>5272 +2F823>5277 +2F824>3515 +2F825>52C7 +2F826>52C9 +2F827>52E4 +2F828>52FA +2F829>5305 +2F82A>5306 +2F82B>5317 +2F82C>5349 +2F82D>5351 +2F82E>535A +2F82F>5373 +2F830>537D +2F831>537F +2F832>537F +2F833>537F +2F834>20A2C +2F835>7070 +2F836>53CA +2F837>53DF +2F838>20B63 +2F839>53EB +2F83A>53F1 +2F83B>5406 +2F83C>549E +2F83D>5438 +2F83E>5448 +2F83F>5468 +2F840>54A2 +2F841>54F6 +2F842>5510 +2F843>5553 +2F844>5563 +2F845>5584 +2F846>5584 +2F847>5599 +2F848>55AB +2F849>55B3 +2F84A>55C2 +2F84B>5716 +2F84C>5606 +2F84D>5717 +2F84E>5651 +2F84F>5674 +2F850>5207 +2F851>58EE +2F852>57CE +2F853>57F4 +2F854>580D +2F855>578B +2F856>5832 +2F857>5831 +2F858>58AC +2F859>214E4 +2F85A>58F2 +2F85B>58F7 +2F85C>5906 +2F85D>591A +2F85E>5922 +2F85F>5962 +2F860>216A8 +2F861>216EA +2F862>59EC +2F863>5A1B +2F864>5A27 +2F865>59D8 +2F866>5A66 +2F867>36EE +2F868>36FC +2F869>5B08 +2F86A>5B3E +2F86B>5B3E +2F86C>219C8 +2F86D>5BC3 +2F86E>5BD8 +2F86F>5BE7 +2F870>5BF3 +2F871>21B18 +2F872>5BFF +2F873>5C06 +2F874>5F53 +2F875>5C22 +2F876>3781 +2F877>5C60 +2F878>5C6E +2F879>5CC0 +2F87A>5C8D +2F87B>21DE4 +2F87C>5D43 +2F87D>21DE6 +2F87E>5D6E +2F87F>5D6B +2F880>5D7C +2F881>5DE1 +2F882>5DE2 +2F883>382F +2F884>5DFD +2F885>5E28 +2F886>5E3D +2F887>5E69 +2F888>3862 +2F889>22183 +2F88A>387C +2F88B>5EB0 +2F88C>5EB3 +2F88D>5EB6 +2F88E>5ECA +2F88F>2A392 +2F890>5EFE +2F891>22331 +2F892>22331 +2F893>8201 +2F894>5F22 +2F895>5F22 +2F896>38C7 +2F897>232B8 +2F898>261DA +2F899>5F62 +2F89A>5F6B +2F89B>38E3 +2F89C>5F9A +2F89D>5FCD +2F89E>5FD7 +2F89F>5FF9 +2F8A0>6081 +2F8A1>393A +2F8A2>391C +2F8A3>6094 +2F8A4>226D4 +2F8A5>60C7 +2F8A6>6148 +2F8A7>614C +2F8A8>614E +2F8A9>614C +2F8AA>617A +2F8AB>618E +2F8AC>61B2 +2F8AD>61A4 +2F8AE>61AF +2F8AF>61DE +2F8B0>61F2 +2F8B1>61F6 +2F8B2>6210 +2F8B3>621B +2F8B4>625D +2F8B5>62B1 +2F8B6>62D4 +2F8B7>6350 +2F8B8>22B0C +2F8B9>633D +2F8BA>62FC +2F8BB>6368 +2F8BC>6383 +2F8BD>63E4 +2F8BE>22BF1 +2F8BF>6422 +2F8C0>63C5 +2F8C1>63A9 +2F8C2>3A2E +2F8C3>6469 +2F8C4>647E +2F8C5>649D +2F8C6>6477 +2F8C7>3A6C +2F8C8>654F +2F8C9>656C +2F8CA>2300A +2F8CB>65E3 +2F8CC>66F8 +2F8CD>6649 +2F8CE>3B19 +2F8CF>6691 +2F8D0>3B08 +2F8D1>3AE4 +2F8D2>5192 +2F8D3>5195 +2F8D4>6700 +2F8D5>669C +2F8D6>80AD +2F8D7>43D9 +2F8D8>6717 +2F8D9>671B +2F8DA>6721 +2F8DB>675E +2F8DC>6753 +2F8DD>233C3 +2F8DE>3B49 +2F8DF>67FA +2F8E0>6785 +2F8E1>6852 +2F8E2>6885 +2F8E3>2346D +2F8E4>688E +2F8E5>681F +2F8E6>6914 +2F8E7>3B9D +2F8E8>6942 +2F8E9>69A3 +2F8EA>69EA +2F8EB>6AA8 +2F8EC>236A3 +2F8ED>6ADB +2F8EE>3C18 +2F8EF>6B21 +2F8F0>238A7 +2F8F1>6B54 +2F8F2>3C4E +2F8F3>6B72 +2F8F4>6B9F +2F8F5>6BBA +2F8F6>6BBB +2F8F7>23A8D +2F8F8>21D0B +2F8F9>23AFA +2F8FA>6C4E +2F8FB>23CBC +2F8FC>6CBF +2F8FD>6CCD +2F8FE>6C67 +2F8FF>6D16 +2F900>6D3E +2F901>6D77 +2F902>6D41 +2F903>6D69 +2F904>6D78 +2F905>6D85 +2F906>23D1E +2F907>6D34 +2F908>6E2F +2F909>6E6E +2F90A>3D33 +2F90B>6ECB +2F90C>6EC7 +2F90D>23ED1 +2F90E>6DF9 +2F90F>6F6E +2F910>23F5E +2F911>23F8E +2F912>6FC6 +2F913>7039 +2F914>701E +2F915>701B +2F916>3D96 +2F917>704A +2F918>707D +2F919>7077 +2F91A>70AD +2F91B>20525 +2F91C>7145 +2F91D>24263 +2F91E>719C +2F91F>243AB +2F920>7228 +2F921>7235 +2F922>7250 +2F923>24608 +2F924>7280 +2F925>7295 +2F926>24735 +2F927>24814 +2F928>737A +2F929>738B +2F92A>3EAC +2F92B>73A5 +2F92C>3EB8 +2F92D>3EB8 +2F92E>7447 +2F92F>745C +2F930>7471 +2F931>7485 +2F932>74CA +2F933>3F1B +2F934>7524 +2F935>24C36 +2F936>753E +2F937>24C92 +2F938>7570 +2F939>2219F +2F93A>7610 +2F93B>24FA1 +2F93C>24FB8 +2F93D>25044 +2F93E>3FFC +2F93F>4008 +2F940>76F4 +2F941>250F3 +2F942>250F2 +2F943>25119 +2F944>25133 +2F945>771E +2F946>771F +2F947>771F +2F948>774A +2F949>4039 +2F94A>778B +2F94B>4046 +2F94C>4096 +2F94D>2541D +2F94E>784E +2F94F>788C +2F950>78CC +2F951>40E3 +2F952>25626 +2F953>7956 +2F954>2569A +2F955>256C5 +2F956>798F +2F957>79EB +2F958>412F +2F959>7A40 +2F95A>7A4A +2F95B>7A4F +2F95C>2597C +2F95D>25AA7 +2F95E>25AA7 +2F95F>7AEE +2F960>4202 +2F961>25BAB +2F962>7BC6 +2F963>7BC9 +2F964>4227 +2F965>25C80 +2F966>7CD2 +2F967>42A0 +2F968>7CE8 +2F969>7CE3 +2F96A>7D00 +2F96B>25F86 +2F96C>7D63 +2F96D>4301 +2F96E>7DC7 +2F96F>7E02 +2F970>7E45 +2F971>4334 +2F972>26228 +2F973>26247 +2F974>4359 +2F975>262D9 +2F976>7F7A +2F977>2633E +2F978>7F95 +2F979>7FFA +2F97A>8005 +2F97B>264DA +2F97C>26523 +2F97D>8060 +2F97E>265A8 +2F97F>8070 +2F980>2335F +2F981>43D5 +2F982>80B2 +2F983>8103 +2F984>440B +2F985>813E +2F986>5AB5 +2F987>267A7 +2F988>267B5 +2F989>23393 +2F98A>2339C +2F98B>8201 +2F98C>8204 +2F98D>8F9E +2F98E>446B +2F98F>8291 +2F990>828B +2F991>829D +2F992>52B3 +2F993>82B1 +2F994>82B3 +2F995>82BD +2F996>82E6 +2F997>26B3C +2F998>82E5 +2F999>831D +2F99A>8363 +2F99B>83AD +2F99C>8323 +2F99D>83BD +2F99E>83E7 +2F99F>8457 +2F9A0>8353 +2F9A1>83CA +2F9A2>83CC +2F9A3>83DC +2F9A4>26C36 +2F9A5>26D6B +2F9A6>26CD5 +2F9A7>452B +2F9A8>84F1 +2F9A9>84F3 +2F9AA>8516 +2F9AB>273CA +2F9AC>8564 +2F9AD>26F2C +2F9AE>455D +2F9AF>4561 +2F9B0>26FB1 +2F9B1>270D2 +2F9B2>456B +2F9B3>8650 +2F9B4>865C +2F9B5>8667 +2F9B6>8669 +2F9B7>86A9 +2F9B8>8688 +2F9B9>870E +2F9BA>86E2 +2F9BB>8779 +2F9BC>8728 +2F9BD>876B +2F9BE>8786 +2F9BF>45D7 +2F9C0>87E1 +2F9C1>8801 +2F9C2>45F9 +2F9C3>8860 +2F9C4>8863 +2F9C5>27667 +2F9C6>88D7 +2F9C7>88DE +2F9C8>4635 +2F9C9>88FA +2F9CA>34BB +2F9CB>278AE +2F9CC>27966 +2F9CD>46BE +2F9CE>46C7 +2F9CF>8AA0 +2F9D0>8AED +2F9D1>8B8A +2F9D2>8C55 +2F9D3>27CA8 +2F9D4>8CAB +2F9D5>8CC1 +2F9D6>8D1B +2F9D7>8D77 +2F9D8>27F2F +2F9D9>20804 +2F9DA>8DCB +2F9DB>8DBC +2F9DC>8DF0 +2F9DD>208DE +2F9DE>8ED4 +2F9DF>8F38 +2F9E0>285D2 +2F9E1>285ED +2F9E2>9094 +2F9E3>90F1 +2F9E4>9111 +2F9E5>2872E +2F9E6>911B +2F9E7>9238 +2F9E8>92D7 +2F9E9>92D8 +2F9EA>927C +2F9EB>93F9 +2F9EC>9415 +2F9ED>28BFA +2F9EE>958B +2F9EF>4995 +2F9F0>95B7 +2F9F1>28D77 +2F9F2>49E6 +2F9F3>96C3 +2F9F4>5DB2 +2F9F5>9723 +2F9F6>29145 +2F9F7>2921A +2F9F8>4A6E +2F9F9>4A76 +2F9FA>97E0 +2F9FB>2940A +2F9FC>4AB2 +2F9FD>29496 +2F9FE>980B +2F9FF>980B +2FA00>9829 +2FA01>295B6 +2FA02>98E2 +2FA03>4B33 +2FA04>9929 +2FA05>99A7 +2FA06>99C2 +2FA07>99FE +2FA08>4BCE +2FA09>29B30 +2FA0A>9B12 +2FA0B>9C40 +2FA0C>9CFD +2FA0D>4CCE +2FA0E>4CED +2FA0F>9D67 +2FA10>2A0CE +2FA11>4CF8 +2FA12>2A105 +2FA13>2A20E +2FA14>2A291 +2FA15>9EBB +2FA16>4D56 +2FA17>9EF9 +2FA18>9EFE +2FA19>9F05 +2FA1A>9F0F +2FA1B>9F16 +2FA1C>9F3B +2FA1D>2A600 diff --git a/icu4c/source/data/unidata/norm2/nfkc_cf.txt b/icu4c/source/data/unidata/norm2/nfkc_cf.txt new file mode 100644 index 00000000000..becabbbf34b --- /dev/null +++ b/icu4c/source/data/unidata/norm2/nfkc_cf.txt @@ -0,0 +1,5376 @@ +# Extracted from: +# DerivedNormalizationProps-5.2.0.txt +# Date: 2009-08-26, 18:18:50 GMT [MD] +# +# Unicode Character Database +# Copyright (c) 1991-2009 Unicode, Inc. +# For terms of use, see http://www.unicode.org/terms_of_use.html +# For documentation, see http://www.unicode.org/reports/tr44/ + +# ================================================ +# This file has been reformatted into syntax for the +# gennorm2 Normalizer2 data generator tool. +# Only the NFKC_CF mappings are retained and reformatted. +# Reformatting via regular expression: s/ *; NFKC_CF; */>/ +# Use this file as the second gennorm2 input file after nfkc.txt. +# ================================================ + +# Derived Property: NFKC_Casefold (NFKC_CF) +# This property removes certain variations from characters: case, compatibility, and default-ignorables. +# It is used for loose matching and certain types of identifiers. +# It is constructed by applying NFKC, CaseFolding, and removal of Default_Ignorable_Code_Points. +# The process of applying these transformations is repeated until a stable result is produced. +# WARNING: Application to STRINGS must apply NFC after mapping each character, because characters may interact. +# For more information, see [http://www.unicode.org/reports/tr44/] +# Omitted code points are unchanged by this mapping. +# @missing: 0000..10FFFF> + +# All code points not explicitly listed for NFKC_Casefold +# have the value . + +0041>0061 +0042>0062 +0043>0063 +0044>0064 +0045>0065 +0046>0066 +0047>0067 +0048>0068 +0049>0069 +004A>006A +004B>006B +004C>006C +004D>006D +004E>006E +004F>006F +0050>0070 +0051>0071 +0052>0072 +0053>0073 +0054>0074 +0055>0075 +0056>0076 +0057>0077 +0058>0078 +0059>0079 +005A>007A +00A0>0020 +00A8>0020 0308 +00AA>0061 +00AD> +00AF>0020 0304 +00B2>0032 +00B3>0033 +00B4>0020 0301 +00B5>03BC +00B8>0020 0327 +00B9>0031 +00BA>006F +00BC>0031 2044 0034 +00BD>0031 2044 0032 +00BE>0033 2044 0034 +00C0>00E0 +00C1>00E1 +00C2>00E2 +00C3>00E3 +00C4>00E4 +00C5>00E5 +00C6>00E6 +00C7>00E7 +00C8>00E8 +00C9>00E9 +00CA>00EA +00CB>00EB +00CC>00EC +00CD>00ED +00CE>00EE +00CF>00EF +00D0>00F0 +00D1>00F1 +00D2>00F2 +00D3>00F3 +00D4>00F4 +00D5>00F5 +00D6>00F6 +00D8>00F8 +00D9>00F9 +00DA>00FA +00DB>00FB +00DC>00FC +00DD>00FD +00DE>00FE +00DF>0073 0073 +0100>0101 +0102>0103 +0104>0105 +0106>0107 +0108>0109 +010A>010B +010C>010D +010E>010F +0110>0111 +0112>0113 +0114>0115 +0116>0117 +0118>0119 +011A>011B +011C>011D +011E>011F +0120>0121 +0122>0123 +0124>0125 +0126>0127 +0128>0129 +012A>012B +012C>012D +012E>012F +0130>0069 0307 +0132..0133>0069 006A +0134>0135 +0136>0137 +0139>013A +013B>013C +013D>013E +013F..0140>006C 00B7 +0141>0142 +0143>0144 +0145>0146 +0147>0148 +0149>02BC 006E +014A>014B +014C>014D +014E>014F +0150>0151 +0152>0153 +0154>0155 +0156>0157 +0158>0159 +015A>015B +015C>015D +015E>015F +0160>0161 +0162>0163 +0164>0165 +0166>0167 +0168>0169 +016A>016B +016C>016D +016E>016F +0170>0171 +0172>0173 +0174>0175 +0176>0177 +0178>00FF +0179>017A +017B>017C +017D>017E +017F>0073 +0181>0253 +0182>0183 +0184>0185 +0186>0254 +0187>0188 +0189>0256 +018A>0257 +018B>018C +018E>01DD +018F>0259 +0190>025B +0191>0192 +0193>0260 +0194>0263 +0196>0269 +0197>0268 +0198>0199 +019C>026F +019D>0272 +019F>0275 +01A0>01A1 +01A2>01A3 +01A4>01A5 +01A6>0280 +01A7>01A8 +01A9>0283 +01AC>01AD +01AE>0288 +01AF>01B0 +01B1>028A +01B2>028B +01B3>01B4 +01B5>01B6 +01B7>0292 +01B8>01B9 +01BC>01BD +01C4..01C6>0064 017E +01C7..01C9>006C 006A +01CA..01CC>006E 006A +01CD>01CE +01CF>01D0 +01D1>01D2 +01D3>01D4 +01D5>01D6 +01D7>01D8 +01D9>01DA +01DB>01DC +01DE>01DF +01E0>01E1 +01E2>01E3 +01E4>01E5 +01E6>01E7 +01E8>01E9 +01EA>01EB +01EC>01ED +01EE>01EF +01F1..01F3>0064 007A +01F4>01F5 +01F6>0195 +01F7>01BF +01F8>01F9 +01FA>01FB +01FC>01FD +01FE>01FF +0200>0201 +0202>0203 +0204>0205 +0206>0207 +0208>0209 +020A>020B +020C>020D +020E>020F +0210>0211 +0212>0213 +0214>0215 +0216>0217 +0218>0219 +021A>021B +021C>021D +021E>021F +0220>019E +0222>0223 +0224>0225 +0226>0227 +0228>0229 +022A>022B +022C>022D +022E>022F +0230>0231 +0232>0233 +023A>2C65 +023B>023C +023D>019A +023E>2C66 +0241>0242 +0243>0180 +0244>0289 +0245>028C +0246>0247 +0248>0249 +024A>024B +024C>024D +024E>024F +02B0>0068 +02B1>0266 +02B2>006A +02B3>0072 +02B4>0279 +02B5>027B +02B6>0281 +02B7>0077 +02B8>0079 +02D8>0020 0306 +02D9>0020 0307 +02DA>0020 030A +02DB>0020 0328 +02DC>0020 0303 +02DD>0020 030B +02E0>0263 +02E1>006C +02E2>0073 +02E3>0078 +02E4>0295 +0340>0300 +0341>0301 +0343>0313 +0344>0308 0301 +0345>03B9 +034F> +0370>0371 +0372>0373 +0374>02B9 +0376>0377 +037A>0020 03B9 +037E>003B +0384>0020 0301 +0385>0020 0308 0301 +0386>03AC +0387>00B7 +0388>03AD +0389>03AE +038A>03AF +038C>03CC +038E>03CD +038F>03CE +0391>03B1 +0392>03B2 +0393>03B3 +0394>03B4 +0395>03B5 +0396>03B6 +0397>03B7 +0398>03B8 +0399>03B9 +039A>03BA +039B>03BB +039C>03BC +039D>03BD +039E>03BE +039F>03BF +03A0>03C0 +03A1>03C1 +03A3>03C3 +03A4>03C4 +03A5>03C5 +03A6>03C6 +03A7>03C7 +03A8>03C8 +03A9>03C9 +03AA>03CA +03AB>03CB +03C2>03C3 +03CF>03D7 +03D0>03B2 +03D1>03B8 +03D2>03C5 +03D3>03CD +03D4>03CB +03D5>03C6 +03D6>03C0 +03D8>03D9 +03DA>03DB +03DC>03DD +03DE>03DF +03E0>03E1 +03E2>03E3 +03E4>03E5 +03E6>03E7 +03E8>03E9 +03EA>03EB +03EC>03ED +03EE>03EF +03F0>03BA +03F1>03C1 +03F2>03C3 +03F4>03B8 +03F5>03B5 +03F7>03F8 +03F9>03C3 +03FA>03FB +03FD>037B +03FE>037C +03FF>037D +0400>0450 +0401>0451 +0402>0452 +0403>0453 +0404>0454 +0405>0455 +0406>0456 +0407>0457 +0408>0458 +0409>0459 +040A>045A +040B>045B +040C>045C +040D>045D +040E>045E +040F>045F +0410>0430 +0411>0431 +0412>0432 +0413>0433 +0414>0434 +0415>0435 +0416>0436 +0417>0437 +0418>0438 +0419>0439 +041A>043A +041B>043B +041C>043C +041D>043D +041E>043E +041F>043F +0420>0440 +0421>0441 +0422>0442 +0423>0443 +0424>0444 +0425>0445 +0426>0446 +0427>0447 +0428>0448 +0429>0449 +042A>044A +042B>044B +042C>044C +042D>044D +042E>044E +042F>044F +0460>0461 +0462>0463 +0464>0465 +0466>0467 +0468>0469 +046A>046B +046C>046D +046E>046F +0470>0471 +0472>0473 +0474>0475 +0476>0477 +0478>0479 +047A>047B +047C>047D +047E>047F +0480>0481 +048A>048B +048C>048D +048E>048F +0490>0491 +0492>0493 +0494>0495 +0496>0497 +0498>0499 +049A>049B +049C>049D +049E>049F +04A0>04A1 +04A2>04A3 +04A4>04A5 +04A6>04A7 +04A8>04A9 +04AA>04AB +04AC>04AD +04AE>04AF +04B0>04B1 +04B2>04B3 +04B4>04B5 +04B6>04B7 +04B8>04B9 +04BA>04BB +04BC>04BD +04BE>04BF +04C0>04CF +04C1>04C2 +04C3>04C4 +04C5>04C6 +04C7>04C8 +04C9>04CA +04CB>04CC +04CD>04CE +04D0>04D1 +04D2>04D3 +04D4>04D5 +04D6>04D7 +04D8>04D9 +04DA>04DB +04DC>04DD +04DE>04DF +04E0>04E1 +04E2>04E3 +04E4>04E5 +04E6>04E7 +04E8>04E9 +04EA>04EB +04EC>04ED +04EE>04EF +04F0>04F1 +04F2>04F3 +04F4>04F5 +04F6>04F7 +04F8>04F9 +04FA>04FB +04FC>04FD +04FE>04FF +0500>0501 +0502>0503 +0504>0505 +0506>0507 +0508>0509 +050A>050B +050C>050D +050E>050F +0510>0511 +0512>0513 +0514>0515 +0516>0517 +0518>0519 +051A>051B +051C>051D +051E>051F +0520>0521 +0522>0523 +0524>0525 +0531>0561 +0532>0562 +0533>0563 +0534>0564 +0535>0565 +0536>0566 +0537>0567 +0538>0568 +0539>0569 +053A>056A +053B>056B +053C>056C +053D>056D +053E>056E +053F>056F +0540>0570 +0541>0571 +0542>0572 +0543>0573 +0544>0574 +0545>0575 +0546>0576 +0547>0577 +0548>0578 +0549>0579 +054A>057A +054B>057B +054C>057C +054D>057D +054E>057E +054F>057F +0550>0580 +0551>0581 +0552>0582 +0553>0583 +0554>0584 +0555>0585 +0556>0586 +0587>0565 0582 +0675>0627 0674 +0676>0648 0674 +0677>06C7 0674 +0678>064A 0674 +0958>0915 093C +0959>0916 093C +095A>0917 093C +095B>091C 093C +095C>0921 093C +095D>0922 093C +095E>092B 093C +095F>092F 093C +09DC>09A1 09BC +09DD>09A2 09BC +09DF>09AF 09BC +0A33>0A32 0A3C +0A36>0A38 0A3C +0A59>0A16 0A3C +0A5A>0A17 0A3C +0A5B>0A1C 0A3C +0A5E>0A2B 0A3C +0B5C>0B21 0B3C +0B5D>0B22 0B3C +0E33>0E4D 0E32 +0EB3>0ECD 0EB2 +0EDC>0EAB 0E99 +0EDD>0EAB 0EA1 +0F0C>0F0B +0F43>0F42 0FB7 +0F4D>0F4C 0FB7 +0F52>0F51 0FB7 +0F57>0F56 0FB7 +0F5C>0F5B 0FB7 +0F69>0F40 0FB5 +0F73>0F71 0F72 +0F75>0F71 0F74 +0F76>0FB2 0F80 +0F77>0FB2 0F71 0F80 +0F78>0FB3 0F80 +0F79>0FB3 0F71 0F80 +0F81>0F71 0F80 +0F93>0F92 0FB7 +0F9D>0F9C 0FB7 +0FA2>0FA1 0FB7 +0FA7>0FA6 0FB7 +0FAC>0FAB 0FB7 +0FB9>0F90 0FB5 +10A0>2D00 +10A1>2D01 +10A2>2D02 +10A3>2D03 +10A4>2D04 +10A5>2D05 +10A6>2D06 +10A7>2D07 +10A8>2D08 +10A9>2D09 +10AA>2D0A +10AB>2D0B +10AC>2D0C +10AD>2D0D +10AE>2D0E +10AF>2D0F +10B0>2D10 +10B1>2D11 +10B2>2D12 +10B3>2D13 +10B4>2D14 +10B5>2D15 +10B6>2D16 +10B7>2D17 +10B8>2D18 +10B9>2D19 +10BA>2D1A +10BB>2D1B +10BC>2D1C +10BD>2D1D +10BE>2D1E +10BF>2D1F +10C0>2D20 +10C1>2D21 +10C2>2D22 +10C3>2D23 +10C4>2D24 +10C5>2D25 +10FC>10DC +115F..1160> +17B4..17B5> +180B..180D> +1D2C>0061 +1D2D>00E6 +1D2E>0062 +1D30>0064 +1D31>0065 +1D32>01DD +1D33>0067 +1D34>0068 +1D35>0069 +1D36>006A +1D37>006B +1D38>006C +1D39>006D +1D3A>006E +1D3C>006F +1D3D>0223 +1D3E>0070 +1D3F>0072 +1D40>0074 +1D41>0075 +1D42>0077 +1D43>0061 +1D44>0250 +1D45>0251 +1D46>1D02 +1D47>0062 +1D48>0064 +1D49>0065 +1D4A>0259 +1D4B>025B +1D4C>025C +1D4D>0067 +1D4F>006B +1D50>006D +1D51>014B +1D52>006F +1D53>0254 +1D54>1D16 +1D55>1D17 +1D56>0070 +1D57>0074 +1D58>0075 +1D59>1D1D +1D5A>026F +1D5B>0076 +1D5C>1D25 +1D5D>03B2 +1D5E>03B3 +1D5F>03B4 +1D60>03C6 +1D61>03C7 +1D62>0069 +1D63>0072 +1D64>0075 +1D65>0076 +1D66>03B2 +1D67>03B3 +1D68>03C1 +1D69>03C6 +1D6A>03C7 +1D78>043D +1D9B>0252 +1D9C>0063 +1D9D>0255 +1D9E>00F0 +1D9F>025C +1DA0>0066 +1DA1>025F +1DA2>0261 +1DA3>0265 +1DA4>0268 +1DA5>0269 +1DA6>026A +1DA7>1D7B +1DA8>029D +1DA9>026D +1DAA>1D85 +1DAB>029F +1DAC>0271 +1DAD>0270 +1DAE>0272 +1DAF>0273 +1DB0>0274 +1DB1>0275 +1DB2>0278 +1DB3>0282 +1DB4>0283 +1DB5>01AB +1DB6>0289 +1DB7>028A +1DB8>1D1C +1DB9>028B +1DBA>028C +1DBB>007A +1DBC>0290 +1DBD>0291 +1DBE>0292 +1DBF>03B8 +1E00>1E01 +1E02>1E03 +1E04>1E05 +1E06>1E07 +1E08>1E09 +1E0A>1E0B +1E0C>1E0D +1E0E>1E0F +1E10>1E11 +1E12>1E13 +1E14>1E15 +1E16>1E17 +1E18>1E19 +1E1A>1E1B +1E1C>1E1D +1E1E>1E1F +1E20>1E21 +1E22>1E23 +1E24>1E25 +1E26>1E27 +1E28>1E29 +1E2A>1E2B +1E2C>1E2D +1E2E>1E2F +1E30>1E31 +1E32>1E33 +1E34>1E35 +1E36>1E37 +1E38>1E39 +1E3A>1E3B +1E3C>1E3D +1E3E>1E3F +1E40>1E41 +1E42>1E43 +1E44>1E45 +1E46>1E47 +1E48>1E49 +1E4A>1E4B +1E4C>1E4D +1E4E>1E4F +1E50>1E51 +1E52>1E53 +1E54>1E55 +1E56>1E57 +1E58>1E59 +1E5A>1E5B +1E5C>1E5D +1E5E>1E5F +1E60>1E61 +1E62>1E63 +1E64>1E65 +1E66>1E67 +1E68>1E69 +1E6A>1E6B +1E6C>1E6D +1E6E>1E6F +1E70>1E71 +1E72>1E73 +1E74>1E75 +1E76>1E77 +1E78>1E79 +1E7A>1E7B +1E7C>1E7D +1E7E>1E7F +1E80>1E81 +1E82>1E83 +1E84>1E85 +1E86>1E87 +1E88>1E89 +1E8A>1E8B +1E8C>1E8D +1E8E>1E8F +1E90>1E91 +1E92>1E93 +1E94>1E95 +1E9A>0061 02BE +1E9B>1E61 +1E9E>0073 0073 +1EA0>1EA1 +1EA2>1EA3 +1EA4>1EA5 +1EA6>1EA7 +1EA8>1EA9 +1EAA>1EAB +1EAC>1EAD +1EAE>1EAF +1EB0>1EB1 +1EB2>1EB3 +1EB4>1EB5 +1EB6>1EB7 +1EB8>1EB9 +1EBA>1EBB +1EBC>1EBD +1EBE>1EBF +1EC0>1EC1 +1EC2>1EC3 +1EC4>1EC5 +1EC6>1EC7 +1EC8>1EC9 +1ECA>1ECB +1ECC>1ECD +1ECE>1ECF +1ED0>1ED1 +1ED2>1ED3 +1ED4>1ED5 +1ED6>1ED7 +1ED8>1ED9 +1EDA>1EDB +1EDC>1EDD +1EDE>1EDF +1EE0>1EE1 +1EE2>1EE3 +1EE4>1EE5 +1EE6>1EE7 +1EE8>1EE9 +1EEA>1EEB +1EEC>1EED +1EEE>1EEF +1EF0>1EF1 +1EF2>1EF3 +1EF4>1EF5 +1EF6>1EF7 +1EF8>1EF9 +1EFA>1EFB +1EFC>1EFD +1EFE>1EFF +1F08>1F00 +1F09>1F01 +1F0A>1F02 +1F0B>1F03 +1F0C>1F04 +1F0D>1F05 +1F0E>1F06 +1F0F>1F07 +1F18>1F10 +1F19>1F11 +1F1A>1F12 +1F1B>1F13 +1F1C>1F14 +1F1D>1F15 +1F28>1F20 +1F29>1F21 +1F2A>1F22 +1F2B>1F23 +1F2C>1F24 +1F2D>1F25 +1F2E>1F26 +1F2F>1F27 +1F38>1F30 +1F39>1F31 +1F3A>1F32 +1F3B>1F33 +1F3C>1F34 +1F3D>1F35 +1F3E>1F36 +1F3F>1F37 +1F48>1F40 +1F49>1F41 +1F4A>1F42 +1F4B>1F43 +1F4C>1F44 +1F4D>1F45 +1F59>1F51 +1F5B>1F53 +1F5D>1F55 +1F5F>1F57 +1F68>1F60 +1F69>1F61 +1F6A>1F62 +1F6B>1F63 +1F6C>1F64 +1F6D>1F65 +1F6E>1F66 +1F6F>1F67 +1F71>03AC +1F73>03AD +1F75>03AE +1F77>03AF +1F79>03CC +1F7B>03CD +1F7D>03CE +1F80>1F00 03B9 +1F81>1F01 03B9 +1F82>1F02 03B9 +1F83>1F03 03B9 +1F84>1F04 03B9 +1F85>1F05 03B9 +1F86>1F06 03B9 +1F87>1F07 03B9 +1F88>1F00 03B9 +1F89>1F01 03B9 +1F8A>1F02 03B9 +1F8B>1F03 03B9 +1F8C>1F04 03B9 +1F8D>1F05 03B9 +1F8E>1F06 03B9 +1F8F>1F07 03B9 +1F90>1F20 03B9 +1F91>1F21 03B9 +1F92>1F22 03B9 +1F93>1F23 03B9 +1F94>1F24 03B9 +1F95>1F25 03B9 +1F96>1F26 03B9 +1F97>1F27 03B9 +1F98>1F20 03B9 +1F99>1F21 03B9 +1F9A>1F22 03B9 +1F9B>1F23 03B9 +1F9C>1F24 03B9 +1F9D>1F25 03B9 +1F9E>1F26 03B9 +1F9F>1F27 03B9 +1FA0>1F60 03B9 +1FA1>1F61 03B9 +1FA2>1F62 03B9 +1FA3>1F63 03B9 +1FA4>1F64 03B9 +1FA5>1F65 03B9 +1FA6>1F66 03B9 +1FA7>1F67 03B9 +1FA8>1F60 03B9 +1FA9>1F61 03B9 +1FAA>1F62 03B9 +1FAB>1F63 03B9 +1FAC>1F64 03B9 +1FAD>1F65 03B9 +1FAE>1F66 03B9 +1FAF>1F67 03B9 +1FB2>1F70 03B9 +1FB3>03B1 03B9 +1FB4>03AC 03B9 +1FB7>1FB6 03B9 +1FB8>1FB0 +1FB9>1FB1 +1FBA>1F70 +1FBB>03AC +1FBC>03B1 03B9 +1FBD>0020 0313 +1FBE>03B9 +1FBF>0020 0313 +1FC0>0020 0342 +1FC1>0020 0308 0342 +1FC2>1F74 03B9 +1FC3>03B7 03B9 +1FC4>03AE 03B9 +1FC7>1FC6 03B9 +1FC8>1F72 +1FC9>03AD +1FCA>1F74 +1FCB>03AE +1FCC>03B7 03B9 +1FCD>0020 0313 0300 +1FCE>0020 0313 0301 +1FCF>0020 0313 0342 +1FD3>0390 +1FD8>1FD0 +1FD9>1FD1 +1FDA>1F76 +1FDB>03AF +1FDD>0020 0314 0300 +1FDE>0020 0314 0301 +1FDF>0020 0314 0342 +1FE3>03B0 +1FE8>1FE0 +1FE9>1FE1 +1FEA>1F7A +1FEB>03CD +1FEC>1FE5 +1FED>0020 0308 0300 +1FEE>0020 0308 0301 +1FEF>0060 +1FF2>1F7C 03B9 +1FF3>03C9 03B9 +1FF4>03CE 03B9 +1FF7>1FF6 03B9 +1FF8>1F78 +1FF9>03CC +1FFA>1F7C +1FFB>03CE +1FFC>03C9 03B9 +1FFD>0020 0301 +1FFE>0020 0314 +2000..200A>0020 +200B..200F> +2011>2010 +2017>0020 0333 +2024>002E +2025>002E 002E +2026>002E 002E 002E +202A..202E> +202F>0020 +2033>2032 2032 +2034>2032 2032 2032 +2036>2035 2035 +2037>2035 2035 2035 +203C>0021 0021 +203E>0020 0305 +2047>003F 003F +2048>003F 0021 +2049>0021 003F +2057>2032 2032 2032 2032 +205F>0020 +2060..2064> +2065..2069> +206A..206F> +2070>0030 +2071>0069 +2074>0034 +2075>0035 +2076>0036 +2077>0037 +2078>0038 +2079>0039 +207A>002B +207B>2212 +207C>003D +207D>0028 +207E>0029 +207F>006E +2080>0030 +2081>0031 +2082>0032 +2083>0033 +2084>0034 +2085>0035 +2086>0036 +2087>0037 +2088>0038 +2089>0039 +208A>002B +208B>2212 +208C>003D +208D>0028 +208E>0029 +2090>0061 +2091>0065 +2092>006F +2093>0078 +2094>0259 +20A8>0072 0073 +2100>0061 002F 0063 +2101>0061 002F 0073 +2102>0063 +2103>00B0 0063 +2105>0063 002F 006F +2106>0063 002F 0075 +2107>025B +2109>00B0 0066 +210A>0067 +210B..210E>0068 +210F>0127 +2110..2111>0069 +2112..2113>006C +2115>006E +2116>006E 006F +2119>0070 +211A>0071 +211B..211D>0072 +2120>0073 006D +2121>0074 0065 006C +2122>0074 006D +2124>007A +2126>03C9 +2128>007A +212A>006B +212B>00E5 +212C>0062 +212D>0063 +212F..2130>0065 +2131>0066 +2132>214E +2133>006D +2134>006F +2135>05D0 +2136>05D1 +2137>05D2 +2138>05D3 +2139>0069 +213B>0066 0061 0078 +213C>03C0 +213D..213E>03B3 +213F>03C0 +2140>2211 +2145..2146>0064 +2147>0065 +2148>0069 +2149>006A +2150>0031 2044 0037 +2151>0031 2044 0039 +2152>0031 2044 0031 0030 +2153>0031 2044 0033 +2154>0032 2044 0033 +2155>0031 2044 0035 +2156>0032 2044 0035 +2157>0033 2044 0035 +2158>0034 2044 0035 +2159>0031 2044 0036 +215A>0035 2044 0036 +215B>0031 2044 0038 +215C>0033 2044 0038 +215D>0035 2044 0038 +215E>0037 2044 0038 +215F>0031 2044 +2160>0069 +2161>0069 0069 +2162>0069 0069 0069 +2163>0069 0076 +2164>0076 +2165>0076 0069 +2166>0076 0069 0069 +2167>0076 0069 0069 0069 +2168>0069 0078 +2169>0078 +216A>0078 0069 +216B>0078 0069 0069 +216C>006C +216D>0063 +216E>0064 +216F>006D +2170>0069 +2171>0069 0069 +2172>0069 0069 0069 +2173>0069 0076 +2174>0076 +2175>0076 0069 +2176>0076 0069 0069 +2177>0076 0069 0069 0069 +2178>0069 0078 +2179>0078 +217A>0078 0069 +217B>0078 0069 0069 +217C>006C +217D>0063 +217E>0064 +217F>006D +2183>2184 +2189>0030 2044 0033 +222C>222B 222B +222D>222B 222B 222B +222F>222E 222E +2230>222E 222E 222E +2329>3008 +232A>3009 +2460>0031 +2461>0032 +2462>0033 +2463>0034 +2464>0035 +2465>0036 +2466>0037 +2467>0038 +2468>0039 +2469>0031 0030 +246A>0031 0031 +246B>0031 0032 +246C>0031 0033 +246D>0031 0034 +246E>0031 0035 +246F>0031 0036 +2470>0031 0037 +2471>0031 0038 +2472>0031 0039 +2473>0032 0030 +2474>0028 0031 0029 +2475>0028 0032 0029 +2476>0028 0033 0029 +2477>0028 0034 0029 +2478>0028 0035 0029 +2479>0028 0036 0029 +247A>0028 0037 0029 +247B>0028 0038 0029 +247C>0028 0039 0029 +247D>0028 0031 0030 0029 +247E>0028 0031 0031 0029 +247F>0028 0031 0032 0029 +2480>0028 0031 0033 0029 +2481>0028 0031 0034 0029 +2482>0028 0031 0035 0029 +2483>0028 0031 0036 0029 +2484>0028 0031 0037 0029 +2485>0028 0031 0038 0029 +2486>0028 0031 0039 0029 +2487>0028 0032 0030 0029 +2488>0031 002E +2489>0032 002E +248A>0033 002E +248B>0034 002E +248C>0035 002E +248D>0036 002E +248E>0037 002E +248F>0038 002E +2490>0039 002E +2491>0031 0030 002E +2492>0031 0031 002E +2493>0031 0032 002E +2494>0031 0033 002E +2495>0031 0034 002E +2496>0031 0035 002E +2497>0031 0036 002E +2498>0031 0037 002E +2499>0031 0038 002E +249A>0031 0039 002E +249B>0032 0030 002E +249C>0028 0061 0029 +249D>0028 0062 0029 +249E>0028 0063 0029 +249F>0028 0064 0029 +24A0>0028 0065 0029 +24A1>0028 0066 0029 +24A2>0028 0067 0029 +24A3>0028 0068 0029 +24A4>0028 0069 0029 +24A5>0028 006A 0029 +24A6>0028 006B 0029 +24A7>0028 006C 0029 +24A8>0028 006D 0029 +24A9>0028 006E 0029 +24AA>0028 006F 0029 +24AB>0028 0070 0029 +24AC>0028 0071 0029 +24AD>0028 0072 0029 +24AE>0028 0073 0029 +24AF>0028 0074 0029 +24B0>0028 0075 0029 +24B1>0028 0076 0029 +24B2>0028 0077 0029 +24B3>0028 0078 0029 +24B4>0028 0079 0029 +24B5>0028 007A 0029 +24B6>0061 +24B7>0062 +24B8>0063 +24B9>0064 +24BA>0065 +24BB>0066 +24BC>0067 +24BD>0068 +24BE>0069 +24BF>006A +24C0>006B +24C1>006C +24C2>006D +24C3>006E +24C4>006F +24C5>0070 +24C6>0071 +24C7>0072 +24C8>0073 +24C9>0074 +24CA>0075 +24CB>0076 +24CC>0077 +24CD>0078 +24CE>0079 +24CF>007A +24D0>0061 +24D1>0062 +24D2>0063 +24D3>0064 +24D4>0065 +24D5>0066 +24D6>0067 +24D7>0068 +24D8>0069 +24D9>006A +24DA>006B +24DB>006C +24DC>006D +24DD>006E +24DE>006F +24DF>0070 +24E0>0071 +24E1>0072 +24E2>0073 +24E3>0074 +24E4>0075 +24E5>0076 +24E6>0077 +24E7>0078 +24E8>0079 +24E9>007A +24EA>0030 +2A0C>222B 222B 222B 222B +2A74>003A 003A 003D +2A75>003D 003D +2A76>003D 003D 003D +2ADC>2ADD 0338 +2C00>2C30 +2C01>2C31 +2C02>2C32 +2C03>2C33 +2C04>2C34 +2C05>2C35 +2C06>2C36 +2C07>2C37 +2C08>2C38 +2C09>2C39 +2C0A>2C3A +2C0B>2C3B +2C0C>2C3C +2C0D>2C3D +2C0E>2C3E +2C0F>2C3F +2C10>2C40 +2C11>2C41 +2C12>2C42 +2C13>2C43 +2C14>2C44 +2C15>2C45 +2C16>2C46 +2C17>2C47 +2C18>2C48 +2C19>2C49 +2C1A>2C4A +2C1B>2C4B +2C1C>2C4C +2C1D>2C4D +2C1E>2C4E +2C1F>2C4F +2C20>2C50 +2C21>2C51 +2C22>2C52 +2C23>2C53 +2C24>2C54 +2C25>2C55 +2C26>2C56 +2C27>2C57 +2C28>2C58 +2C29>2C59 +2C2A>2C5A +2C2B>2C5B +2C2C>2C5C +2C2D>2C5D +2C2E>2C5E +2C60>2C61 +2C62>026B +2C63>1D7D +2C64>027D +2C67>2C68 +2C69>2C6A +2C6B>2C6C +2C6D>0251 +2C6E>0271 +2C6F>0250 +2C70>0252 +2C72>2C73 +2C75>2C76 +2C7C>006A +2C7D>0076 +2C7E>023F +2C7F>0240 +2C80>2C81 +2C82>2C83 +2C84>2C85 +2C86>2C87 +2C88>2C89 +2C8A>2C8B +2C8C>2C8D +2C8E>2C8F +2C90>2C91 +2C92>2C93 +2C94>2C95 +2C96>2C97 +2C98>2C99 +2C9A>2C9B +2C9C>2C9D +2C9E>2C9F +2CA0>2CA1 +2CA2>2CA3 +2CA4>2CA5 +2CA6>2CA7 +2CA8>2CA9 +2CAA>2CAB +2CAC>2CAD +2CAE>2CAF +2CB0>2CB1 +2CB2>2CB3 +2CB4>2CB5 +2CB6>2CB7 +2CB8>2CB9 +2CBA>2CBB +2CBC>2CBD +2CBE>2CBF +2CC0>2CC1 +2CC2>2CC3 +2CC4>2CC5 +2CC6>2CC7 +2CC8>2CC9 +2CCA>2CCB +2CCC>2CCD +2CCE>2CCF +2CD0>2CD1 +2CD2>2CD3 +2CD4>2CD5 +2CD6>2CD7 +2CD8>2CD9 +2CDA>2CDB +2CDC>2CDD +2CDE>2CDF +2CE0>2CE1 +2CE2>2CE3 +2CEB>2CEC +2CED>2CEE +2D6F>2D61 +2E9F>6BCD +2EF3>9F9F +2F00>4E00 +2F01>4E28 +2F02>4E36 +2F03>4E3F +2F04>4E59 +2F05>4E85 +2F06>4E8C +2F07>4EA0 +2F08>4EBA +2F09>513F +2F0A>5165 +2F0B>516B +2F0C>5182 +2F0D>5196 +2F0E>51AB +2F0F>51E0 +2F10>51F5 +2F11>5200 +2F12>529B +2F13>52F9 +2F14>5315 +2F15>531A +2F16>5338 +2F17>5341 +2F18>535C +2F19>5369 +2F1A>5382 +2F1B>53B6 +2F1C>53C8 +2F1D>53E3 +2F1E>56D7 +2F1F>571F +2F20>58EB +2F21>5902 +2F22>590A +2F23>5915 +2F24>5927 +2F25>5973 +2F26>5B50 +2F27>5B80 +2F28>5BF8 +2F29>5C0F +2F2A>5C22 +2F2B>5C38 +2F2C>5C6E +2F2D>5C71 +2F2E>5DDB +2F2F>5DE5 +2F30>5DF1 +2F31>5DFE +2F32>5E72 +2F33>5E7A +2F34>5E7F +2F35>5EF4 +2F36>5EFE +2F37>5F0B +2F38>5F13 +2F39>5F50 +2F3A>5F61 +2F3B>5F73 +2F3C>5FC3 +2F3D>6208 +2F3E>6236 +2F3F>624B +2F40>652F +2F41>6534 +2F42>6587 +2F43>6597 +2F44>65A4 +2F45>65B9 +2F46>65E0 +2F47>65E5 +2F48>66F0 +2F49>6708 +2F4A>6728 +2F4B>6B20 +2F4C>6B62 +2F4D>6B79 +2F4E>6BB3 +2F4F>6BCB +2F50>6BD4 +2F51>6BDB +2F52>6C0F +2F53>6C14 +2F54>6C34 +2F55>706B +2F56>722A +2F57>7236 +2F58>723B +2F59>723F +2F5A>7247 +2F5B>7259 +2F5C>725B +2F5D>72AC +2F5E>7384 +2F5F>7389 +2F60>74DC +2F61>74E6 +2F62>7518 +2F63>751F +2F64>7528 +2F65>7530 +2F66>758B +2F67>7592 +2F68>7676 +2F69>767D +2F6A>76AE +2F6B>76BF +2F6C>76EE +2F6D>77DB +2F6E>77E2 +2F6F>77F3 +2F70>793A +2F71>79B8 +2F72>79BE +2F73>7A74 +2F74>7ACB +2F75>7AF9 +2F76>7C73 +2F77>7CF8 +2F78>7F36 +2F79>7F51 +2F7A>7F8A +2F7B>7FBD +2F7C>8001 +2F7D>800C +2F7E>8012 +2F7F>8033 +2F80>807F +2F81>8089 +2F82>81E3 +2F83>81EA +2F84>81F3 +2F85>81FC +2F86>820C +2F87>821B +2F88>821F +2F89>826E +2F8A>8272 +2F8B>8278 +2F8C>864D +2F8D>866B +2F8E>8840 +2F8F>884C +2F90>8863 +2F91>897E +2F92>898B +2F93>89D2 +2F94>8A00 +2F95>8C37 +2F96>8C46 +2F97>8C55 +2F98>8C78 +2F99>8C9D +2F9A>8D64 +2F9B>8D70 +2F9C>8DB3 +2F9D>8EAB +2F9E>8ECA +2F9F>8F9B +2FA0>8FB0 +2FA1>8FB5 +2FA2>9091 +2FA3>9149 +2FA4>91C6 +2FA5>91CC +2FA6>91D1 +2FA7>9577 +2FA8>9580 +2FA9>961C +2FAA>96B6 +2FAB>96B9 +2FAC>96E8 +2FAD>9751 +2FAE>975E +2FAF>9762 +2FB0>9769 +2FB1>97CB +2FB2>97ED +2FB3>97F3 +2FB4>9801 +2FB5>98A8 +2FB6>98DB +2FB7>98DF +2FB8>9996 +2FB9>9999 +2FBA>99AC +2FBB>9AA8 +2FBC>9AD8 +2FBD>9ADF +2FBE>9B25 +2FBF>9B2F +2FC0>9B32 +2FC1>9B3C +2FC2>9B5A +2FC3>9CE5 +2FC4>9E75 +2FC5>9E7F +2FC6>9EA5 +2FC7>9EBB +2FC8>9EC3 +2FC9>9ECD +2FCA>9ED1 +2FCB>9EF9 +2FCC>9EFD +2FCD>9F0E +2FCE>9F13 +2FCF>9F20 +2FD0>9F3B +2FD1>9F4A +2FD2>9F52 +2FD3>9F8D +2FD4>9F9C +2FD5>9FA0 +3000>0020 +3036>3012 +3038>5341 +3039>5344 +303A>5345 +309B>0020 3099 +309C>0020 309A +309F>3088 308A +30FF>30B3 30C8 +3131>1100 +3132>1101 +3133>11AA +3134>1102 +3135>11AC +3136>11AD +3137>1103 +3138>1104 +3139>1105 +313A>11B0 +313B>11B1 +313C>11B2 +313D>11B3 +313E>11B4 +313F>11B5 +3140>111A +3141>1106 +3142>1107 +3143>1108 +3144>1121 +3145>1109 +3146>110A +3147>110B +3148>110C +3149>110D +314A>110E +314B>110F +314C>1110 +314D>1111 +314E>1112 +314F>1161 +3150>1162 +3151>1163 +3152>1164 +3153>1165 +3154>1166 +3155>1167 +3156>1168 +3157>1169 +3158>116A +3159>116B +315A>116C +315B>116D +315C>116E +315D>116F +315E>1170 +315F>1171 +3160>1172 +3161>1173 +3162>1174 +3163>1175 +3164> +3165>1114 +3166>1115 +3167>11C7 +3168>11C8 +3169>11CC +316A>11CE +316B>11D3 +316C>11D7 +316D>11D9 +316E>111C +316F>11DD +3170>11DF +3171>111D +3172>111E +3173>1120 +3174>1122 +3175>1123 +3176>1127 +3177>1129 +3178>112B +3179>112C +317A>112D +317B>112E +317C>112F +317D>1132 +317E>1136 +317F>1140 +3180>1147 +3181>114C +3182>11F1 +3183>11F2 +3184>1157 +3185>1158 +3186>1159 +3187>1184 +3188>1185 +3189>1188 +318A>1191 +318B>1192 +318C>1194 +318D>119E +318E>11A1 +3192>4E00 +3193>4E8C +3194>4E09 +3195>56DB +3196>4E0A +3197>4E2D +3198>4E0B +3199>7532 +319A>4E59 +319B>4E19 +319C>4E01 +319D>5929 +319E>5730 +319F>4EBA +3200>0028 1100 0029 +3201>0028 1102 0029 +3202>0028 1103 0029 +3203>0028 1105 0029 +3204>0028 1106 0029 +3205>0028 1107 0029 +3206>0028 1109 0029 +3207>0028 110B 0029 +3208>0028 110C 0029 +3209>0028 110E 0029 +320A>0028 110F 0029 +320B>0028 1110 0029 +320C>0028 1111 0029 +320D>0028 1112 0029 +320E>0028 AC00 0029 +320F>0028 B098 0029 +3210>0028 B2E4 0029 +3211>0028 B77C 0029 +3212>0028 B9C8 0029 +3213>0028 BC14 0029 +3214>0028 C0AC 0029 +3215>0028 C544 0029 +3216>0028 C790 0029 +3217>0028 CC28 0029 +3218>0028 CE74 0029 +3219>0028 D0C0 0029 +321A>0028 D30C 0029 +321B>0028 D558 0029 +321C>0028 C8FC 0029 +321D>0028 C624 C804 0029 +321E>0028 C624 D6C4 0029 +3220>0028 4E00 0029 +3221>0028 4E8C 0029 +3222>0028 4E09 0029 +3223>0028 56DB 0029 +3224>0028 4E94 0029 +3225>0028 516D 0029 +3226>0028 4E03 0029 +3227>0028 516B 0029 +3228>0028 4E5D 0029 +3229>0028 5341 0029 +322A>0028 6708 0029 +322B>0028 706B 0029 +322C>0028 6C34 0029 +322D>0028 6728 0029 +322E>0028 91D1 0029 +322F>0028 571F 0029 +3230>0028 65E5 0029 +3231>0028 682A 0029 +3232>0028 6709 0029 +3233>0028 793E 0029 +3234>0028 540D 0029 +3235>0028 7279 0029 +3236>0028 8CA1 0029 +3237>0028 795D 0029 +3238>0028 52B4 0029 +3239>0028 4EE3 0029 +323A>0028 547C 0029 +323B>0028 5B66 0029 +323C>0028 76E3 0029 +323D>0028 4F01 0029 +323E>0028 8CC7 0029 +323F>0028 5354 0029 +3240>0028 796D 0029 +3241>0028 4F11 0029 +3242>0028 81EA 0029 +3243>0028 81F3 0029 +3244>554F +3245>5E7C +3246>6587 +3247>7B8F +3250>0070 0074 0065 +3251>0032 0031 +3252>0032 0032 +3253>0032 0033 +3254>0032 0034 +3255>0032 0035 +3256>0032 0036 +3257>0032 0037 +3258>0032 0038 +3259>0032 0039 +325A>0033 0030 +325B>0033 0031 +325C>0033 0032 +325D>0033 0033 +325E>0033 0034 +325F>0033 0035 +3260>1100 +3261>1102 +3262>1103 +3263>1105 +3264>1106 +3265>1107 +3266>1109 +3267>110B +3268>110C +3269>110E +326A>110F +326B>1110 +326C>1111 +326D>1112 +326E>AC00 +326F>B098 +3270>B2E4 +3271>B77C +3272>B9C8 +3273>BC14 +3274>C0AC +3275>C544 +3276>C790 +3277>CC28 +3278>CE74 +3279>D0C0 +327A>D30C +327B>D558 +327C>CC38 ACE0 +327D>C8FC C758 +327E>C6B0 +3280>4E00 +3281>4E8C +3282>4E09 +3283>56DB +3284>4E94 +3285>516D +3286>4E03 +3287>516B +3288>4E5D +3289>5341 +328A>6708 +328B>706B +328C>6C34 +328D>6728 +328E>91D1 +328F>571F +3290>65E5 +3291>682A +3292>6709 +3293>793E +3294>540D +3295>7279 +3296>8CA1 +3297>795D +3298>52B4 +3299>79D8 +329A>7537 +329B>5973 +329C>9069 +329D>512A +329E>5370 +329F>6CE8 +32A0>9805 +32A1>4F11 +32A2>5199 +32A3>6B63 +32A4>4E0A +32A5>4E2D +32A6>4E0B +32A7>5DE6 +32A8>53F3 +32A9>533B +32AA>5B97 +32AB>5B66 +32AC>76E3 +32AD>4F01 +32AE>8CC7 +32AF>5354 +32B0>591C +32B1>0033 0036 +32B2>0033 0037 +32B3>0033 0038 +32B4>0033 0039 +32B5>0034 0030 +32B6>0034 0031 +32B7>0034 0032 +32B8>0034 0033 +32B9>0034 0034 +32BA>0034 0035 +32BB>0034 0036 +32BC>0034 0037 +32BD>0034 0038 +32BE>0034 0039 +32BF>0035 0030 +32C0>0031 6708 +32C1>0032 6708 +32C2>0033 6708 +32C3>0034 6708 +32C4>0035 6708 +32C5>0036 6708 +32C6>0037 6708 +32C7>0038 6708 +32C8>0039 6708 +32C9>0031 0030 6708 +32CA>0031 0031 6708 +32CB>0031 0032 6708 +32CC>0068 0067 +32CD>0065 0072 0067 +32CE>0065 0076 +32CF>006C 0074 0064 +32D0>30A2 +32D1>30A4 +32D2>30A6 +32D3>30A8 +32D4>30AA +32D5>30AB +32D6>30AD +32D7>30AF +32D8>30B1 +32D9>30B3 +32DA>30B5 +32DB>30B7 +32DC>30B9 +32DD>30BB +32DE>30BD +32DF>30BF +32E0>30C1 +32E1>30C4 +32E2>30C6 +32E3>30C8 +32E4>30CA +32E5>30CB +32E6>30CC +32E7>30CD +32E8>30CE +32E9>30CF +32EA>30D2 +32EB>30D5 +32EC>30D8 +32ED>30DB +32EE>30DE +32EF>30DF +32F0>30E0 +32F1>30E1 +32F2>30E2 +32F3>30E4 +32F4>30E6 +32F5>30E8 +32F6>30E9 +32F7>30EA +32F8>30EB +32F9>30EC +32FA>30ED +32FB>30EF +32FC>30F0 +32FD>30F1 +32FE>30F2 +3300>30A2 30D1 30FC 30C8 +3301>30A2 30EB 30D5 30A1 +3302>30A2 30F3 30DA 30A2 +3303>30A2 30FC 30EB +3304>30A4 30CB 30F3 30B0 +3305>30A4 30F3 30C1 +3306>30A6 30A9 30F3 +3307>30A8 30B9 30AF 30FC 30C9 +3308>30A8 30FC 30AB 30FC +3309>30AA 30F3 30B9 +330A>30AA 30FC 30E0 +330B>30AB 30A4 30EA +330C>30AB 30E9 30C3 30C8 +330D>30AB 30ED 30EA 30FC +330E>30AC 30ED 30F3 +330F>30AC 30F3 30DE +3310>30AE 30AC +3311>30AE 30CB 30FC +3312>30AD 30E5 30EA 30FC +3313>30AE 30EB 30C0 30FC +3314>30AD 30ED +3315>30AD 30ED 30B0 30E9 30E0 +3316>30AD 30ED 30E1 30FC 30C8 30EB +3317>30AD 30ED 30EF 30C3 30C8 +3318>30B0 30E9 30E0 +3319>30B0 30E9 30E0 30C8 30F3 +331A>30AF 30EB 30BC 30A4 30ED +331B>30AF 30ED 30FC 30CD +331C>30B1 30FC 30B9 +331D>30B3 30EB 30CA +331E>30B3 30FC 30DD +331F>30B5 30A4 30AF 30EB +3320>30B5 30F3 30C1 30FC 30E0 +3321>30B7 30EA 30F3 30B0 +3322>30BB 30F3 30C1 +3323>30BB 30F3 30C8 +3324>30C0 30FC 30B9 +3325>30C7 30B7 +3326>30C9 30EB +3327>30C8 30F3 +3328>30CA 30CE +3329>30CE 30C3 30C8 +332A>30CF 30A4 30C4 +332B>30D1 30FC 30BB 30F3 30C8 +332C>30D1 30FC 30C4 +332D>30D0 30FC 30EC 30EB +332E>30D4 30A2 30B9 30C8 30EB +332F>30D4 30AF 30EB +3330>30D4 30B3 +3331>30D3 30EB +3332>30D5 30A1 30E9 30C3 30C9 +3333>30D5 30A3 30FC 30C8 +3334>30D6 30C3 30B7 30A7 30EB +3335>30D5 30E9 30F3 +3336>30D8 30AF 30BF 30FC 30EB +3337>30DA 30BD +3338>30DA 30CB 30D2 +3339>30D8 30EB 30C4 +333A>30DA 30F3 30B9 +333B>30DA 30FC 30B8 +333C>30D9 30FC 30BF +333D>30DD 30A4 30F3 30C8 +333E>30DC 30EB 30C8 +333F>30DB 30F3 +3340>30DD 30F3 30C9 +3341>30DB 30FC 30EB +3342>30DB 30FC 30F3 +3343>30DE 30A4 30AF 30ED +3344>30DE 30A4 30EB +3345>30DE 30C3 30CF +3346>30DE 30EB 30AF +3347>30DE 30F3 30B7 30E7 30F3 +3348>30DF 30AF 30ED 30F3 +3349>30DF 30EA +334A>30DF 30EA 30D0 30FC 30EB +334B>30E1 30AC +334C>30E1 30AC 30C8 30F3 +334D>30E1 30FC 30C8 30EB +334E>30E4 30FC 30C9 +334F>30E4 30FC 30EB +3350>30E6 30A2 30F3 +3351>30EA 30C3 30C8 30EB +3352>30EA 30E9 +3353>30EB 30D4 30FC +3354>30EB 30FC 30D6 30EB +3355>30EC 30E0 +3356>30EC 30F3 30C8 30B2 30F3 +3357>30EF 30C3 30C8 +3358>0030 70B9 +3359>0031 70B9 +335A>0032 70B9 +335B>0033 70B9 +335C>0034 70B9 +335D>0035 70B9 +335E>0036 70B9 +335F>0037 70B9 +3360>0038 70B9 +3361>0039 70B9 +3362>0031 0030 70B9 +3363>0031 0031 70B9 +3364>0031 0032 70B9 +3365>0031 0033 70B9 +3366>0031 0034 70B9 +3367>0031 0035 70B9 +3368>0031 0036 70B9 +3369>0031 0037 70B9 +336A>0031 0038 70B9 +336B>0031 0039 70B9 +336C>0032 0030 70B9 +336D>0032 0031 70B9 +336E>0032 0032 70B9 +336F>0032 0033 70B9 +3370>0032 0034 70B9 +3371>0068 0070 0061 +3372>0064 0061 +3373>0061 0075 +3374>0062 0061 0072 +3375>006F 0076 +3376>0070 0063 +3377>0064 006D +3378>0064 006D 0032 +3379>0064 006D 0033 +337A>0069 0075 +337B>5E73 6210 +337C>662D 548C +337D>5927 6B63 +337E>660E 6CBB +337F>682A 5F0F 4F1A 793E +3380>0070 0061 +3381>006E 0061 +3382>03BC 0061 +3383>006D 0061 +3384>006B 0061 +3385>006B 0062 +3386>006D 0062 +3387>0067 0062 +3388>0063 0061 006C +3389>006B 0063 0061 006C +338A>0070 0066 +338B>006E 0066 +338C>03BC 0066 +338D>03BC 0067 +338E>006D 0067 +338F>006B 0067 +3390>0068 007A +3391>006B 0068 007A +3392>006D 0068 007A +3393>0067 0068 007A +3394>0074 0068 007A +3395>03BC 006C +3396>006D 006C +3397>0064 006C +3398>006B 006C +3399>0066 006D +339A>006E 006D +339B>03BC 006D +339C>006D 006D +339D>0063 006D +339E>006B 006D +339F>006D 006D 0032 +33A0>0063 006D 0032 +33A1>006D 0032 +33A2>006B 006D 0032 +33A3>006D 006D 0033 +33A4>0063 006D 0033 +33A5>006D 0033 +33A6>006B 006D 0033 +33A7>006D 2215 0073 +33A8>006D 2215 0073 0032 +33A9>0070 0061 +33AA>006B 0070 0061 +33AB>006D 0070 0061 +33AC>0067 0070 0061 +33AD>0072 0061 0064 +33AE>0072 0061 0064 2215 0073 +33AF>0072 0061 0064 2215 0073 0032 +33B0>0070 0073 +33B1>006E 0073 +33B2>03BC 0073 +33B3>006D 0073 +33B4>0070 0076 +33B5>006E 0076 +33B6>03BC 0076 +33B7>006D 0076 +33B8>006B 0076 +33B9>006D 0076 +33BA>0070 0077 +33BB>006E 0077 +33BC>03BC 0077 +33BD>006D 0077 +33BE>006B 0077 +33BF>006D 0077 +33C0>006B 03C9 +33C1>006D 03C9 +33C2>0061 002E 006D 002E +33C3>0062 0071 +33C4>0063 0063 +33C5>0063 0064 +33C6>0063 2215 006B 0067 +33C7>0063 006F 002E +33C8>0064 0062 +33C9>0067 0079 +33CA>0068 0061 +33CB>0068 0070 +33CC>0069 006E +33CD>006B 006B +33CE>006B 006D +33CF>006B 0074 +33D0>006C 006D +33D1>006C 006E +33D2>006C 006F 0067 +33D3>006C 0078 +33D4>006D 0062 +33D5>006D 0069 006C +33D6>006D 006F 006C +33D7>0070 0068 +33D8>0070 002E 006D 002E +33D9>0070 0070 006D +33DA>0070 0072 +33DB>0073 0072 +33DC>0073 0076 +33DD>0077 0062 +33DE>0076 2215 006D +33DF>0061 2215 006D +33E0>0031 65E5 +33E1>0032 65E5 +33E2>0033 65E5 +33E3>0034 65E5 +33E4>0035 65E5 +33E5>0036 65E5 +33E6>0037 65E5 +33E7>0038 65E5 +33E8>0039 65E5 +33E9>0031 0030 65E5 +33EA>0031 0031 65E5 +33EB>0031 0032 65E5 +33EC>0031 0033 65E5 +33ED>0031 0034 65E5 +33EE>0031 0035 65E5 +33EF>0031 0036 65E5 +33F0>0031 0037 65E5 +33F1>0031 0038 65E5 +33F2>0031 0039 65E5 +33F3>0032 0030 65E5 +33F4>0032 0031 65E5 +33F5>0032 0032 65E5 +33F6>0032 0033 65E5 +33F7>0032 0034 65E5 +33F8>0032 0035 65E5 +33F9>0032 0036 65E5 +33FA>0032 0037 65E5 +33FB>0032 0038 65E5 +33FC>0032 0039 65E5 +33FD>0033 0030 65E5 +33FE>0033 0031 65E5 +33FF>0067 0061 006C +A640>A641 +A642>A643 +A644>A645 +A646>A647 +A648>A649 +A64A>A64B +A64C>A64D +A64E>A64F +A650>A651 +A652>A653 +A654>A655 +A656>A657 +A658>A659 +A65A>A65B +A65C>A65D +A65E>A65F +A662>A663 +A664>A665 +A666>A667 +A668>A669 +A66A>A66B +A66C>A66D +A680>A681 +A682>A683 +A684>A685 +A686>A687 +A688>A689 +A68A>A68B +A68C>A68D +A68E>A68F +A690>A691 +A692>A693 +A694>A695 +A696>A697 +A722>A723 +A724>A725 +A726>A727 +A728>A729 +A72A>A72B +A72C>A72D +A72E>A72F +A732>A733 +A734>A735 +A736>A737 +A738>A739 +A73A>A73B +A73C>A73D +A73E>A73F +A740>A741 +A742>A743 +A744>A745 +A746>A747 +A748>A749 +A74A>A74B +A74C>A74D +A74E>A74F +A750>A751 +A752>A753 +A754>A755 +A756>A757 +A758>A759 +A75A>A75B +A75C>A75D +A75E>A75F +A760>A761 +A762>A763 +A764>A765 +A766>A767 +A768>A769 +A76A>A76B +A76C>A76D +A76E>A76F +A770>A76F +A779>A77A +A77B>A77C +A77D>1D79 +A77E>A77F +A780>A781 +A782>A783 +A784>A785 +A786>A787 +A78B>A78C +F900>8C48 +F901>66F4 +F902>8ECA +F903>8CC8 +F904>6ED1 +F905>4E32 +F906>53E5 +F907..F908>9F9C +F909>5951 +F90A>91D1 +F90B>5587 +F90C>5948 +F90D>61F6 +F90E>7669 +F90F>7F85 +F910>863F +F911>87BA +F912>88F8 +F913>908F +F914>6A02 +F915>6D1B +F916>70D9 +F917>73DE +F918>843D +F919>916A +F91A>99F1 +F91B>4E82 +F91C>5375 +F91D>6B04 +F91E>721B +F91F>862D +F920>9E1E +F921>5D50 +F922>6FEB +F923>85CD +F924>8964 +F925>62C9 +F926>81D8 +F927>881F +F928>5ECA +F929>6717 +F92A>6D6A +F92B>72FC +F92C>90CE +F92D>4F86 +F92E>51B7 +F92F>52DE +F930>64C4 +F931>6AD3 +F932>7210 +F933>76E7 +F934>8001 +F935>8606 +F936>865C +F937>8DEF +F938>9732 +F939>9B6F +F93A>9DFA +F93B>788C +F93C>797F +F93D>7DA0 +F93E>83C9 +F93F>9304 +F940>9E7F +F941>8AD6 +F942>58DF +F943>5F04 +F944>7C60 +F945>807E +F946>7262 +F947>78CA +F948>8CC2 +F949>96F7 +F94A>58D8 +F94B>5C62 +F94C>6A13 +F94D>6DDA +F94E>6F0F +F94F>7D2F +F950>7E37 +F951>964B +F952>52D2 +F953>808B +F954>51DC +F955>51CC +F956>7A1C +F957>7DBE +F958>83F1 +F959>9675 +F95A>8B80 +F95B>62CF +F95C>6A02 +F95D>8AFE +F95E>4E39 +F95F>5BE7 +F960>6012 +F961>7387 +F962>7570 +F963>5317 +F964>78FB +F965>4FBF +F966>5FA9 +F967>4E0D +F968>6CCC +F969>6578 +F96A>7D22 +F96B>53C3 +F96C>585E +F96D>7701 +F96E>8449 +F96F>8AAA +F970>6BBA +F971>8FB0 +F972>6C88 +F973>62FE +F974>82E5 +F975>63A0 +F976>7565 +F977>4EAE +F978>5169 +F979>51C9 +F97A>6881 +F97B>7CE7 +F97C>826F +F97D>8AD2 +F97E>91CF +F97F>52F5 +F980>5442 +F981>5973 +F982>5EEC +F983>65C5 +F984>6FFE +F985>792A +F986>95AD +F987>9A6A +F988>9E97 +F989>9ECE +F98A>529B +F98B>66C6 +F98C>6B77 +F98D>8F62 +F98E>5E74 +F98F>6190 +F990>6200 +F991>649A +F992>6F23 +F993>7149 +F994>7489 +F995>79CA +F996>7DF4 +F997>806F +F998>8F26 +F999>84EE +F99A>9023 +F99B>934A +F99C>5217 +F99D>52A3 +F99E>54BD +F99F>70C8 +F9A0>88C2 +F9A1>8AAA +F9A2>5EC9 +F9A3>5FF5 +F9A4>637B +F9A5>6BAE +F9A6>7C3E +F9A7>7375 +F9A8>4EE4 +F9A9>56F9 +F9AA>5BE7 +F9AB>5DBA +F9AC>601C +F9AD>73B2 +F9AE>7469 +F9AF>7F9A +F9B0>8046 +F9B1>9234 +F9B2>96F6 +F9B3>9748 +F9B4>9818 +F9B5>4F8B +F9B6>79AE +F9B7>91B4 +F9B8>96B8 +F9B9>60E1 +F9BA>4E86 +F9BB>50DA +F9BC>5BEE +F9BD>5C3F +F9BE>6599 +F9BF>6A02 +F9C0>71CE +F9C1>7642 +F9C2>84FC +F9C3>907C +F9C4>9F8D +F9C5>6688 +F9C6>962E +F9C7>5289 +F9C8>677B +F9C9>67F3 +F9CA>6D41 +F9CB>6E9C +F9CC>7409 +F9CD>7559 +F9CE>786B +F9CF>7D10 +F9D0>985E +F9D1>516D +F9D2>622E +F9D3>9678 +F9D4>502B +F9D5>5D19 +F9D6>6DEA +F9D7>8F2A +F9D8>5F8B +F9D9>6144 +F9DA>6817 +F9DB>7387 +F9DC>9686 +F9DD>5229 +F9DE>540F +F9DF>5C65 +F9E0>6613 +F9E1>674E +F9E2>68A8 +F9E3>6CE5 +F9E4>7406 +F9E5>75E2 +F9E6>7F79 +F9E7>88CF +F9E8>88E1 +F9E9>91CC +F9EA>96E2 +F9EB>533F +F9EC>6EBA +F9ED>541D +F9EE>71D0 +F9EF>7498 +F9F0>85FA +F9F1>96A3 +F9F2>9C57 +F9F3>9E9F +F9F4>6797 +F9F5>6DCB +F9F6>81E8 +F9F7>7ACB +F9F8>7B20 +F9F9>7C92 +F9FA>72C0 +F9FB>7099 +F9FC>8B58 +F9FD>4EC0 +F9FE>8336 +F9FF>523A +FA00>5207 +FA01>5EA6 +FA02>62D3 +FA03>7CD6 +FA04>5B85 +FA05>6D1E +FA06>66B4 +FA07>8F3B +FA08>884C +FA09>964D +FA0A>898B +FA0B>5ED3 +FA0C>5140 +FA0D>55C0 +FA10>585A +FA12>6674 +FA15>51DE +FA16>732A +FA17>76CA +FA18>793C +FA19>795E +FA1A>7965 +FA1B>798F +FA1C>9756 +FA1D>7CBE +FA1E>7FBD +FA20>8612 +FA22>8AF8 +FA25>9038 +FA26>90FD +FA2A>98EF +FA2B>98FC +FA2C>9928 +FA2D>9DB4 +FA30>4FAE +FA31>50E7 +FA32>514D +FA33>52C9 +FA34>52E4 +FA35>5351 +FA36>559D +FA37>5606 +FA38>5668 +FA39>5840 +FA3A>58A8 +FA3B>5C64 +FA3C>5C6E +FA3D>6094 +FA3E>6168 +FA3F>618E +FA40>61F2 +FA41>654F +FA42>65E2 +FA43>6691 +FA44>6885 +FA45>6D77 +FA46>6E1A +FA47>6F22 +FA48>716E +FA49>722B +FA4A>7422 +FA4B>7891 +FA4C>793E +FA4D>7949 +FA4E>7948 +FA4F>7950 +FA50>7956 +FA51>795D +FA52>798D +FA53>798E +FA54>7A40 +FA55>7A81 +FA56>7BC0 +FA57>7DF4 +FA58>7E09 +FA59>7E41 +FA5A>7F72 +FA5B>8005 +FA5C>81ED +FA5D..FA5E>8279 +FA5F>8457 +FA60>8910 +FA61>8996 +FA62>8B01 +FA63>8B39 +FA64>8CD3 +FA65>8D08 +FA66>8FB6 +FA67>9038 +FA68>96E3 +FA69>97FF +FA6A>983B +FA6B>6075 +FA6C>242EE +FA6D>8218 +FA70>4E26 +FA71>51B5 +FA72>5168 +FA73>4F80 +FA74>5145 +FA75>5180 +FA76>52C7 +FA77>52FA +FA78>559D +FA79>5555 +FA7A>5599 +FA7B>55E2 +FA7C>585A +FA7D>58B3 +FA7E>5944 +FA7F>5954 +FA80>5A62 +FA81>5B28 +FA82>5ED2 +FA83>5ED9 +FA84>5F69 +FA85>5FAD +FA86>60D8 +FA87>614E +FA88>6108 +FA89>618E +FA8A>6160 +FA8B>61F2 +FA8C>6234 +FA8D>63C4 +FA8E>641C +FA8F>6452 +FA90>6556 +FA91>6674 +FA92>6717 +FA93>671B +FA94>6756 +FA95>6B79 +FA96>6BBA +FA97>6D41 +FA98>6EDB +FA99>6ECB +FA9A>6F22 +FA9B>701E +FA9C>716E +FA9D>77A7 +FA9E>7235 +FA9F>72AF +FAA0>732A +FAA1>7471 +FAA2>7506 +FAA3>753B +FAA4>761D +FAA5>761F +FAA6>76CA +FAA7>76DB +FAA8>76F4 +FAA9>774A +FAAA>7740 +FAAB>78CC +FAAC>7AB1 +FAAD>7BC0 +FAAE>7C7B +FAAF>7D5B +FAB0>7DF4 +FAB1>7F3E +FAB2>8005 +FAB3>8352 +FAB4>83EF +FAB5>8779 +FAB6>8941 +FAB7>8986 +FAB8>8996 +FAB9>8ABF +FABA>8AF8 +FABB>8ACB +FABC>8B01 +FABD>8AFE +FABE>8AED +FABF>8B39 +FAC0>8B8A +FAC1>8D08 +FAC2>8F38 +FAC3>9072 +FAC4>9199 +FAC5>9276 +FAC6>967C +FAC7>96E3 +FAC8>9756 +FAC9>97DB +FACA>97FF +FACB>980B +FACC>983B +FACD>9B12 +FACE>9F9C +FACF>2284A +FAD0>22844 +FAD1>233D5 +FAD2>3B9D +FAD3>4018 +FAD4>4039 +FAD5>25249 +FAD6>25CD0 +FAD7>27ED3 +FAD8>9F43 +FAD9>9F8E +FB00>0066 0066 +FB01>0066 0069 +FB02>0066 006C +FB03>0066 0066 0069 +FB04>0066 0066 006C +FB05..FB06>0073 0074 +FB13>0574 0576 +FB14>0574 0565 +FB15>0574 056B +FB16>057E 0576 +FB17>0574 056D +FB1D>05D9 05B4 +FB1F>05F2 05B7 +FB20>05E2 +FB21>05D0 +FB22>05D3 +FB23>05D4 +FB24>05DB +FB25>05DC +FB26>05DD +FB27>05E8 +FB28>05EA +FB29>002B +FB2A>05E9 05C1 +FB2B>05E9 05C2 +FB2C>05E9 05BC 05C1 +FB2D>05E9 05BC 05C2 +FB2E>05D0 05B7 +FB2F>05D0 05B8 +FB30>05D0 05BC +FB31>05D1 05BC +FB32>05D2 05BC +FB33>05D3 05BC +FB34>05D4 05BC +FB35>05D5 05BC +FB36>05D6 05BC +FB38>05D8 05BC +FB39>05D9 05BC +FB3A>05DA 05BC +FB3B>05DB 05BC +FB3C>05DC 05BC +FB3E>05DE 05BC +FB40>05E0 05BC +FB41>05E1 05BC +FB43>05E3 05BC +FB44>05E4 05BC +FB46>05E6 05BC +FB47>05E7 05BC +FB48>05E8 05BC +FB49>05E9 05BC +FB4A>05EA 05BC +FB4B>05D5 05B9 +FB4C>05D1 05BF +FB4D>05DB 05BF +FB4E>05E4 05BF +FB4F>05D0 05DC +FB50..FB51>0671 +FB52..FB55>067B +FB56..FB59>067E +FB5A..FB5D>0680 +FB5E..FB61>067A +FB62..FB65>067F +FB66..FB69>0679 +FB6A..FB6D>06A4 +FB6E..FB71>06A6 +FB72..FB75>0684 +FB76..FB79>0683 +FB7A..FB7D>0686 +FB7E..FB81>0687 +FB82..FB83>068D +FB84..FB85>068C +FB86..FB87>068E +FB88..FB89>0688 +FB8A..FB8B>0698 +FB8C..FB8D>0691 +FB8E..FB91>06A9 +FB92..FB95>06AF +FB96..FB99>06B3 +FB9A..FB9D>06B1 +FB9E..FB9F>06BA +FBA0..FBA3>06BB +FBA4..FBA5>06C0 +FBA6..FBA9>06C1 +FBAA..FBAD>06BE +FBAE..FBAF>06D2 +FBB0..FBB1>06D3 +FBD3..FBD6>06AD +FBD7..FBD8>06C7 +FBD9..FBDA>06C6 +FBDB..FBDC>06C8 +FBDD>06C7 0674 +FBDE..FBDF>06CB +FBE0..FBE1>06C5 +FBE2..FBE3>06C9 +FBE4..FBE7>06D0 +FBE8..FBE9>0649 +FBEA..FBEB>0626 0627 +FBEC..FBED>0626 06D5 +FBEE..FBEF>0626 0648 +FBF0..FBF1>0626 06C7 +FBF2..FBF3>0626 06C6 +FBF4..FBF5>0626 06C8 +FBF6..FBF8>0626 06D0 +FBF9..FBFB>0626 0649 +FBFC..FBFF>06CC +FC00>0626 062C +FC01>0626 062D +FC02>0626 0645 +FC03>0626 0649 +FC04>0626 064A +FC05>0628 062C +FC06>0628 062D +FC07>0628 062E +FC08>0628 0645 +FC09>0628 0649 +FC0A>0628 064A +FC0B>062A 062C +FC0C>062A 062D +FC0D>062A 062E +FC0E>062A 0645 +FC0F>062A 0649 +FC10>062A 064A +FC11>062B 062C +FC12>062B 0645 +FC13>062B 0649 +FC14>062B 064A +FC15>062C 062D +FC16>062C 0645 +FC17>062D 062C +FC18>062D 0645 +FC19>062E 062C +FC1A>062E 062D +FC1B>062E 0645 +FC1C>0633 062C +FC1D>0633 062D +FC1E>0633 062E +FC1F>0633 0645 +FC20>0635 062D +FC21>0635 0645 +FC22>0636 062C +FC23>0636 062D +FC24>0636 062E +FC25>0636 0645 +FC26>0637 062D +FC27>0637 0645 +FC28>0638 0645 +FC29>0639 062C +FC2A>0639 0645 +FC2B>063A 062C +FC2C>063A 0645 +FC2D>0641 062C +FC2E>0641 062D +FC2F>0641 062E +FC30>0641 0645 +FC31>0641 0649 +FC32>0641 064A +FC33>0642 062D +FC34>0642 0645 +FC35>0642 0649 +FC36>0642 064A +FC37>0643 0627 +FC38>0643 062C +FC39>0643 062D +FC3A>0643 062E +FC3B>0643 0644 +FC3C>0643 0645 +FC3D>0643 0649 +FC3E>0643 064A +FC3F>0644 062C +FC40>0644 062D +FC41>0644 062E +FC42>0644 0645 +FC43>0644 0649 +FC44>0644 064A +FC45>0645 062C +FC46>0645 062D +FC47>0645 062E +FC48>0645 0645 +FC49>0645 0649 +FC4A>0645 064A +FC4B>0646 062C +FC4C>0646 062D +FC4D>0646 062E +FC4E>0646 0645 +FC4F>0646 0649 +FC50>0646 064A +FC51>0647 062C +FC52>0647 0645 +FC53>0647 0649 +FC54>0647 064A +FC55>064A 062C +FC56>064A 062D +FC57>064A 062E +FC58>064A 0645 +FC59>064A 0649 +FC5A>064A 064A +FC5B>0630 0670 +FC5C>0631 0670 +FC5D>0649 0670 +FC5E>0020 064C 0651 +FC5F>0020 064D 0651 +FC60>0020 064E 0651 +FC61>0020 064F 0651 +FC62>0020 0650 0651 +FC63>0020 0651 0670 +FC64>0626 0631 +FC65>0626 0632 +FC66>0626 0645 +FC67>0626 0646 +FC68>0626 0649 +FC69>0626 064A +FC6A>0628 0631 +FC6B>0628 0632 +FC6C>0628 0645 +FC6D>0628 0646 +FC6E>0628 0649 +FC6F>0628 064A +FC70>062A 0631 +FC71>062A 0632 +FC72>062A 0645 +FC73>062A 0646 +FC74>062A 0649 +FC75>062A 064A +FC76>062B 0631 +FC77>062B 0632 +FC78>062B 0645 +FC79>062B 0646 +FC7A>062B 0649 +FC7B>062B 064A +FC7C>0641 0649 +FC7D>0641 064A +FC7E>0642 0649 +FC7F>0642 064A +FC80>0643 0627 +FC81>0643 0644 +FC82>0643 0645 +FC83>0643 0649 +FC84>0643 064A +FC85>0644 0645 +FC86>0644 0649 +FC87>0644 064A +FC88>0645 0627 +FC89>0645 0645 +FC8A>0646 0631 +FC8B>0646 0632 +FC8C>0646 0645 +FC8D>0646 0646 +FC8E>0646 0649 +FC8F>0646 064A +FC90>0649 0670 +FC91>064A 0631 +FC92>064A 0632 +FC93>064A 0645 +FC94>064A 0646 +FC95>064A 0649 +FC96>064A 064A +FC97>0626 062C +FC98>0626 062D +FC99>0626 062E +FC9A>0626 0645 +FC9B>0626 0647 +FC9C>0628 062C +FC9D>0628 062D +FC9E>0628 062E +FC9F>0628 0645 +FCA0>0628 0647 +FCA1>062A 062C +FCA2>062A 062D +FCA3>062A 062E +FCA4>062A 0645 +FCA5>062A 0647 +FCA6>062B 0645 +FCA7>062C 062D +FCA8>062C 0645 +FCA9>062D 062C +FCAA>062D 0645 +FCAB>062E 062C +FCAC>062E 0645 +FCAD>0633 062C +FCAE>0633 062D +FCAF>0633 062E +FCB0>0633 0645 +FCB1>0635 062D +FCB2>0635 062E +FCB3>0635 0645 +FCB4>0636 062C +FCB5>0636 062D +FCB6>0636 062E +FCB7>0636 0645 +FCB8>0637 062D +FCB9>0638 0645 +FCBA>0639 062C +FCBB>0639 0645 +FCBC>063A 062C +FCBD>063A 0645 +FCBE>0641 062C +FCBF>0641 062D +FCC0>0641 062E +FCC1>0641 0645 +FCC2>0642 062D +FCC3>0642 0645 +FCC4>0643 062C +FCC5>0643 062D +FCC6>0643 062E +FCC7>0643 0644 +FCC8>0643 0645 +FCC9>0644 062C +FCCA>0644 062D +FCCB>0644 062E +FCCC>0644 0645 +FCCD>0644 0647 +FCCE>0645 062C +FCCF>0645 062D +FCD0>0645 062E +FCD1>0645 0645 +FCD2>0646 062C +FCD3>0646 062D +FCD4>0646 062E +FCD5>0646 0645 +FCD6>0646 0647 +FCD7>0647 062C +FCD8>0647 0645 +FCD9>0647 0670 +FCDA>064A 062C +FCDB>064A 062D +FCDC>064A 062E +FCDD>064A 0645 +FCDE>064A 0647 +FCDF>0626 0645 +FCE0>0626 0647 +FCE1>0628 0645 +FCE2>0628 0647 +FCE3>062A 0645 +FCE4>062A 0647 +FCE5>062B 0645 +FCE6>062B 0647 +FCE7>0633 0645 +FCE8>0633 0647 +FCE9>0634 0645 +FCEA>0634 0647 +FCEB>0643 0644 +FCEC>0643 0645 +FCED>0644 0645 +FCEE>0646 0645 +FCEF>0646 0647 +FCF0>064A 0645 +FCF1>064A 0647 +FCF2>0640 064E 0651 +FCF3>0640 064F 0651 +FCF4>0640 0650 0651 +FCF5>0637 0649 +FCF6>0637 064A +FCF7>0639 0649 +FCF8>0639 064A +FCF9>063A 0649 +FCFA>063A 064A +FCFB>0633 0649 +FCFC>0633 064A +FCFD>0634 0649 +FCFE>0634 064A +FCFF>062D 0649 +FD00>062D 064A +FD01>062C 0649 +FD02>062C 064A +FD03>062E 0649 +FD04>062E 064A +FD05>0635 0649 +FD06>0635 064A +FD07>0636 0649 +FD08>0636 064A +FD09>0634 062C +FD0A>0634 062D +FD0B>0634 062E +FD0C>0634 0645 +FD0D>0634 0631 +FD0E>0633 0631 +FD0F>0635 0631 +FD10>0636 0631 +FD11>0637 0649 +FD12>0637 064A +FD13>0639 0649 +FD14>0639 064A +FD15>063A 0649 +FD16>063A 064A +FD17>0633 0649 +FD18>0633 064A +FD19>0634 0649 +FD1A>0634 064A +FD1B>062D 0649 +FD1C>062D 064A +FD1D>062C 0649 +FD1E>062C 064A +FD1F>062E 0649 +FD20>062E 064A +FD21>0635 0649 +FD22>0635 064A +FD23>0636 0649 +FD24>0636 064A +FD25>0634 062C +FD26>0634 062D +FD27>0634 062E +FD28>0634 0645 +FD29>0634 0631 +FD2A>0633 0631 +FD2B>0635 0631 +FD2C>0636 0631 +FD2D>0634 062C +FD2E>0634 062D +FD2F>0634 062E +FD30>0634 0645 +FD31>0633 0647 +FD32>0634 0647 +FD33>0637 0645 +FD34>0633 062C +FD35>0633 062D +FD36>0633 062E +FD37>0634 062C +FD38>0634 062D +FD39>0634 062E +FD3A>0637 0645 +FD3B>0638 0645 +FD3C..FD3D>0627 064B +FD50>062A 062C 0645 +FD51..FD52>062A 062D 062C +FD53>062A 062D 0645 +FD54>062A 062E 0645 +FD55>062A 0645 062C +FD56>062A 0645 062D +FD57>062A 0645 062E +FD58..FD59>062C 0645 062D +FD5A>062D 0645 064A +FD5B>062D 0645 0649 +FD5C>0633 062D 062C +FD5D>0633 062C 062D +FD5E>0633 062C 0649 +FD5F..FD60>0633 0645 062D +FD61>0633 0645 062C +FD62..FD63>0633 0645 0645 +FD64..FD65>0635 062D 062D +FD66>0635 0645 0645 +FD67..FD68>0634 062D 0645 +FD69>0634 062C 064A +FD6A..FD6B>0634 0645 062E +FD6C..FD6D>0634 0645 0645 +FD6E>0636 062D 0649 +FD6F..FD70>0636 062E 0645 +FD71..FD72>0637 0645 062D +FD73>0637 0645 0645 +FD74>0637 0645 064A +FD75>0639 062C 0645 +FD76..FD77>0639 0645 0645 +FD78>0639 0645 0649 +FD79>063A 0645 0645 +FD7A>063A 0645 064A +FD7B>063A 0645 0649 +FD7C..FD7D>0641 062E 0645 +FD7E>0642 0645 062D +FD7F>0642 0645 0645 +FD80>0644 062D 0645 +FD81>0644 062D 064A +FD82>0644 062D 0649 +FD83..FD84>0644 062C 062C +FD85..FD86>0644 062E 0645 +FD87..FD88>0644 0645 062D +FD89>0645 062D 062C +FD8A>0645 062D 0645 +FD8B>0645 062D 064A +FD8C>0645 062C 062D +FD8D>0645 062C 0645 +FD8E>0645 062E 062C +FD8F>0645 062E 0645 +FD92>0645 062C 062E +FD93>0647 0645 062C +FD94>0647 0645 0645 +FD95>0646 062D 0645 +FD96>0646 062D 0649 +FD97..FD98>0646 062C 0645 +FD99>0646 062C 0649 +FD9A>0646 0645 064A +FD9B>0646 0645 0649 +FD9C..FD9D>064A 0645 0645 +FD9E>0628 062E 064A +FD9F>062A 062C 064A +FDA0>062A 062C 0649 +FDA1>062A 062E 064A +FDA2>062A 062E 0649 +FDA3>062A 0645 064A +FDA4>062A 0645 0649 +FDA5>062C 0645 064A +FDA6>062C 062D 0649 +FDA7>062C 0645 0649 +FDA8>0633 062E 0649 +FDA9>0635 062D 064A +FDAA>0634 062D 064A +FDAB>0636 062D 064A +FDAC>0644 062C 064A +FDAD>0644 0645 064A +FDAE>064A 062D 064A +FDAF>064A 062C 064A +FDB0>064A 0645 064A +FDB1>0645 0645 064A +FDB2>0642 0645 064A +FDB3>0646 062D 064A +FDB4>0642 0645 062D +FDB5>0644 062D 0645 +FDB6>0639 0645 064A +FDB7>0643 0645 064A +FDB8>0646 062C 062D +FDB9>0645 062E 064A +FDBA>0644 062C 0645 +FDBB>0643 0645 0645 +FDBC>0644 062C 0645 +FDBD>0646 062C 062D +FDBE>062C 062D 064A +FDBF>062D 062C 064A +FDC0>0645 062C 064A +FDC1>0641 0645 064A +FDC2>0628 062D 064A +FDC3>0643 0645 0645 +FDC4>0639 062C 0645 +FDC5>0635 0645 0645 +FDC6>0633 062E 064A +FDC7>0646 062C 064A +FDF0>0635 0644 06D2 +FDF1>0642 0644 06D2 +FDF2>0627 0644 0644 0647 +FDF3>0627 0643 0628 0631 +FDF4>0645 062D 0645 062F +FDF5>0635 0644 0639 0645 +FDF6>0631 0633 0648 0644 +FDF7>0639 0644 064A 0647 +FDF8>0648 0633 0644 0645 +FDF9>0635 0644 0649 +FDFA>0635 0644 0649 0020 0627 0644 0644 0647 0020 0639 0644 064A 0647 0020 0648 0633 0644 0645 +FDFB>062C 0644 0020 062C 0644 0627 0644 0647 +FDFC>0631 06CC 0627 0644 +FE00..FE0F> +FE10>002C +FE11>3001 +FE12>3002 +FE13>003A +FE14>003B +FE15>0021 +FE16>003F +FE17>3016 +FE18>3017 +FE19>002E 002E 002E +FE30>002E 002E +FE31>2014 +FE32>2013 +FE33..FE34>005F +FE35>0028 +FE36>0029 +FE37>007B +FE38>007D +FE39>3014 +FE3A>3015 +FE3B>3010 +FE3C>3011 +FE3D>300A +FE3E>300B +FE3F>3008 +FE40>3009 +FE41>300C +FE42>300D +FE43>300E +FE44>300F +FE47>005B +FE48>005D +FE49..FE4C>0020 0305 +FE4D..FE4F>005F +FE50>002C +FE51>3001 +FE52>002E +FE54>003B +FE55>003A +FE56>003F +FE57>0021 +FE58>2014 +FE59>0028 +FE5A>0029 +FE5B>007B +FE5C>007D +FE5D>3014 +FE5E>3015 +FE5F>0023 +FE60>0026 +FE61>002A +FE62>002B +FE63>002D +FE64>003C +FE65>003E +FE66>003D +FE68>005C +FE69>0024 +FE6A>0025 +FE6B>0040 +FE70>0020 064B +FE71>0640 064B +FE72>0020 064C +FE74>0020 064D +FE76>0020 064E +FE77>0640 064E +FE78>0020 064F +FE79>0640 064F +FE7A>0020 0650 +FE7B>0640 0650 +FE7C>0020 0651 +FE7D>0640 0651 +FE7E>0020 0652 +FE7F>0640 0652 +FE80>0621 +FE81..FE82>0622 +FE83..FE84>0623 +FE85..FE86>0624 +FE87..FE88>0625 +FE89..FE8C>0626 +FE8D..FE8E>0627 +FE8F..FE92>0628 +FE93..FE94>0629 +FE95..FE98>062A +FE99..FE9C>062B +FE9D..FEA0>062C +FEA1..FEA4>062D +FEA5..FEA8>062E +FEA9..FEAA>062F +FEAB..FEAC>0630 +FEAD..FEAE>0631 +FEAF..FEB0>0632 +FEB1..FEB4>0633 +FEB5..FEB8>0634 +FEB9..FEBC>0635 +FEBD..FEC0>0636 +FEC1..FEC4>0637 +FEC5..FEC8>0638 +FEC9..FECC>0639 +FECD..FED0>063A +FED1..FED4>0641 +FED5..FED8>0642 +FED9..FEDC>0643 +FEDD..FEE0>0644 +FEE1..FEE4>0645 +FEE5..FEE8>0646 +FEE9..FEEC>0647 +FEED..FEEE>0648 +FEEF..FEF0>0649 +FEF1..FEF4>064A +FEF5..FEF6>0644 0622 +FEF7..FEF8>0644 0623 +FEF9..FEFA>0644 0625 +FEFB..FEFC>0644 0627 +FEFF> +FF01>0021 +FF02>0022 +FF03>0023 +FF04>0024 +FF05>0025 +FF06>0026 +FF07>0027 +FF08>0028 +FF09>0029 +FF0A>002A +FF0B>002B +FF0C>002C +FF0D>002D +FF0E>002E +FF0F>002F +FF10>0030 +FF11>0031 +FF12>0032 +FF13>0033 +FF14>0034 +FF15>0035 +FF16>0036 +FF17>0037 +FF18>0038 +FF19>0039 +FF1A>003A +FF1B>003B +FF1C>003C +FF1D>003D +FF1E>003E +FF1F>003F +FF20>0040 +FF21>0061 +FF22>0062 +FF23>0063 +FF24>0064 +FF25>0065 +FF26>0066 +FF27>0067 +FF28>0068 +FF29>0069 +FF2A>006A +FF2B>006B +FF2C>006C +FF2D>006D +FF2E>006E +FF2F>006F +FF30>0070 +FF31>0071 +FF32>0072 +FF33>0073 +FF34>0074 +FF35>0075 +FF36>0076 +FF37>0077 +FF38>0078 +FF39>0079 +FF3A>007A +FF3B>005B +FF3C>005C +FF3D>005D +FF3E>005E +FF3F>005F +FF40>0060 +FF41>0061 +FF42>0062 +FF43>0063 +FF44>0064 +FF45>0065 +FF46>0066 +FF47>0067 +FF48>0068 +FF49>0069 +FF4A>006A +FF4B>006B +FF4C>006C +FF4D>006D +FF4E>006E +FF4F>006F +FF50>0070 +FF51>0071 +FF52>0072 +FF53>0073 +FF54>0074 +FF55>0075 +FF56>0076 +FF57>0077 +FF58>0078 +FF59>0079 +FF5A>007A +FF5B>007B +FF5C>007C +FF5D>007D +FF5E>007E +FF5F>2985 +FF60>2986 +FF61>3002 +FF62>300C +FF63>300D +FF64>3001 +FF65>30FB +FF66>30F2 +FF67>30A1 +FF68>30A3 +FF69>30A5 +FF6A>30A7 +FF6B>30A9 +FF6C>30E3 +FF6D>30E5 +FF6E>30E7 +FF6F>30C3 +FF70>30FC +FF71>30A2 +FF72>30A4 +FF73>30A6 +FF74>30A8 +FF75>30AA +FF76>30AB +FF77>30AD +FF78>30AF +FF79>30B1 +FF7A>30B3 +FF7B>30B5 +FF7C>30B7 +FF7D>30B9 +FF7E>30BB +FF7F>30BD +FF80>30BF +FF81>30C1 +FF82>30C4 +FF83>30C6 +FF84>30C8 +FF85>30CA +FF86>30CB +FF87>30CC +FF88>30CD +FF89>30CE +FF8A>30CF +FF8B>30D2 +FF8C>30D5 +FF8D>30D8 +FF8E>30DB +FF8F>30DE +FF90>30DF +FF91>30E0 +FF92>30E1 +FF93>30E2 +FF94>30E4 +FF95>30E6 +FF96>30E8 +FF97>30E9 +FF98>30EA +FF99>30EB +FF9A>30EC +FF9B>30ED +FF9C>30EF +FF9D>30F3 +FF9E>3099 +FF9F>309A +FFA0> +FFA1>1100 +FFA2>1101 +FFA3>11AA +FFA4>1102 +FFA5>11AC +FFA6>11AD +FFA7>1103 +FFA8>1104 +FFA9>1105 +FFAA>11B0 +FFAB>11B1 +FFAC>11B2 +FFAD>11B3 +FFAE>11B4 +FFAF>11B5 +FFB0>111A +FFB1>1106 +FFB2>1107 +FFB3>1108 +FFB4>1121 +FFB5>1109 +FFB6>110A +FFB7>110B +FFB8>110C +FFB9>110D +FFBA>110E +FFBB>110F +FFBC>1110 +FFBD>1111 +FFBE>1112 +FFC2>1161 +FFC3>1162 +FFC4>1163 +FFC5>1164 +FFC6>1165 +FFC7>1166 +FFCA>1167 +FFCB>1168 +FFCC>1169 +FFCD>116A +FFCE>116B +FFCF>116C +FFD2>116D +FFD3>116E +FFD4>116F +FFD5>1170 +FFD6>1171 +FFD7>1172 +FFDA>1173 +FFDB>1174 +FFDC>1175 +FFE0>00A2 +FFE1>00A3 +FFE2>00AC +FFE3>0020 0304 +FFE4>00A6 +FFE5>00A5 +FFE6>20A9 +FFE8>2502 +FFE9>2190 +FFEA>2191 +FFEB>2192 +FFEC>2193 +FFED>25A0 +FFEE>25CB +FFF0..FFF8> +10400>10428 +10401>10429 +10402>1042A +10403>1042B +10404>1042C +10405>1042D +10406>1042E +10407>1042F +10408>10430 +10409>10431 +1040A>10432 +1040B>10433 +1040C>10434 +1040D>10435 +1040E>10436 +1040F>10437 +10410>10438 +10411>10439 +10412>1043A +10413>1043B +10414>1043C +10415>1043D +10416>1043E +10417>1043F +10418>10440 +10419>10441 +1041A>10442 +1041B>10443 +1041C>10444 +1041D>10445 +1041E>10446 +1041F>10447 +10420>10448 +10421>10449 +10422>1044A +10423>1044B +10424>1044C +10425>1044D +10426>1044E +10427>1044F +1D15E>1D157 1D165 +1D15F>1D158 1D165 +1D160>1D158 1D165 1D16E +1D161>1D158 1D165 1D16F +1D162>1D158 1D165 1D170 +1D163>1D158 1D165 1D171 +1D164>1D158 1D165 1D172 +1D173..1D17A> +1D1BB>1D1B9 1D165 +1D1BC>1D1BA 1D165 +1D1BD>1D1B9 1D165 1D16E +1D1BE>1D1BA 1D165 1D16E +1D1BF>1D1B9 1D165 1D16F +1D1C0>1D1BA 1D165 1D16F +1D400>0061 +1D401>0062 +1D402>0063 +1D403>0064 +1D404>0065 +1D405>0066 +1D406>0067 +1D407>0068 +1D408>0069 +1D409>006A +1D40A>006B +1D40B>006C +1D40C>006D +1D40D>006E +1D40E>006F +1D40F>0070 +1D410>0071 +1D411>0072 +1D412>0073 +1D413>0074 +1D414>0075 +1D415>0076 +1D416>0077 +1D417>0078 +1D418>0079 +1D419>007A +1D41A>0061 +1D41B>0062 +1D41C>0063 +1D41D>0064 +1D41E>0065 +1D41F>0066 +1D420>0067 +1D421>0068 +1D422>0069 +1D423>006A +1D424>006B +1D425>006C +1D426>006D +1D427>006E +1D428>006F +1D429>0070 +1D42A>0071 +1D42B>0072 +1D42C>0073 +1D42D>0074 +1D42E>0075 +1D42F>0076 +1D430>0077 +1D431>0078 +1D432>0079 +1D433>007A +1D434>0061 +1D435>0062 +1D436>0063 +1D437>0064 +1D438>0065 +1D439>0066 +1D43A>0067 +1D43B>0068 +1D43C>0069 +1D43D>006A +1D43E>006B +1D43F>006C +1D440>006D +1D441>006E +1D442>006F +1D443>0070 +1D444>0071 +1D445>0072 +1D446>0073 +1D447>0074 +1D448>0075 +1D449>0076 +1D44A>0077 +1D44B>0078 +1D44C>0079 +1D44D>007A +1D44E>0061 +1D44F>0062 +1D450>0063 +1D451>0064 +1D452>0065 +1D453>0066 +1D454>0067 +1D456>0069 +1D457>006A +1D458>006B +1D459>006C +1D45A>006D +1D45B>006E +1D45C>006F +1D45D>0070 +1D45E>0071 +1D45F>0072 +1D460>0073 +1D461>0074 +1D462>0075 +1D463>0076 +1D464>0077 +1D465>0078 +1D466>0079 +1D467>007A +1D468>0061 +1D469>0062 +1D46A>0063 +1D46B>0064 +1D46C>0065 +1D46D>0066 +1D46E>0067 +1D46F>0068 +1D470>0069 +1D471>006A +1D472>006B +1D473>006C +1D474>006D +1D475>006E +1D476>006F +1D477>0070 +1D478>0071 +1D479>0072 +1D47A>0073 +1D47B>0074 +1D47C>0075 +1D47D>0076 +1D47E>0077 +1D47F>0078 +1D480>0079 +1D481>007A +1D482>0061 +1D483>0062 +1D484>0063 +1D485>0064 +1D486>0065 +1D487>0066 +1D488>0067 +1D489>0068 +1D48A>0069 +1D48B>006A +1D48C>006B +1D48D>006C +1D48E>006D +1D48F>006E +1D490>006F +1D491>0070 +1D492>0071 +1D493>0072 +1D494>0073 +1D495>0074 +1D496>0075 +1D497>0076 +1D498>0077 +1D499>0078 +1D49A>0079 +1D49B>007A +1D49C>0061 +1D49E>0063 +1D49F>0064 +1D4A2>0067 +1D4A5>006A +1D4A6>006B +1D4A9>006E +1D4AA>006F +1D4AB>0070 +1D4AC>0071 +1D4AE>0073 +1D4AF>0074 +1D4B0>0075 +1D4B1>0076 +1D4B2>0077 +1D4B3>0078 +1D4B4>0079 +1D4B5>007A +1D4B6>0061 +1D4B7>0062 +1D4B8>0063 +1D4B9>0064 +1D4BB>0066 +1D4BD>0068 +1D4BE>0069 +1D4BF>006A +1D4C0>006B +1D4C1>006C +1D4C2>006D +1D4C3>006E +1D4C5>0070 +1D4C6>0071 +1D4C7>0072 +1D4C8>0073 +1D4C9>0074 +1D4CA>0075 +1D4CB>0076 +1D4CC>0077 +1D4CD>0078 +1D4CE>0079 +1D4CF>007A +1D4D0>0061 +1D4D1>0062 +1D4D2>0063 +1D4D3>0064 +1D4D4>0065 +1D4D5>0066 +1D4D6>0067 +1D4D7>0068 +1D4D8>0069 +1D4D9>006A +1D4DA>006B +1D4DB>006C +1D4DC>006D +1D4DD>006E +1D4DE>006F +1D4DF>0070 +1D4E0>0071 +1D4E1>0072 +1D4E2>0073 +1D4E3>0074 +1D4E4>0075 +1D4E5>0076 +1D4E6>0077 +1D4E7>0078 +1D4E8>0079 +1D4E9>007A +1D4EA>0061 +1D4EB>0062 +1D4EC>0063 +1D4ED>0064 +1D4EE>0065 +1D4EF>0066 +1D4F0>0067 +1D4F1>0068 +1D4F2>0069 +1D4F3>006A +1D4F4>006B +1D4F5>006C +1D4F6>006D +1D4F7>006E +1D4F8>006F +1D4F9>0070 +1D4FA>0071 +1D4FB>0072 +1D4FC>0073 +1D4FD>0074 +1D4FE>0075 +1D4FF>0076 +1D500>0077 +1D501>0078 +1D502>0079 +1D503>007A +1D504>0061 +1D505>0062 +1D507>0064 +1D508>0065 +1D509>0066 +1D50A>0067 +1D50D>006A +1D50E>006B +1D50F>006C +1D510>006D +1D511>006E +1D512>006F +1D513>0070 +1D514>0071 +1D516>0073 +1D517>0074 +1D518>0075 +1D519>0076 +1D51A>0077 +1D51B>0078 +1D51C>0079 +1D51E>0061 +1D51F>0062 +1D520>0063 +1D521>0064 +1D522>0065 +1D523>0066 +1D524>0067 +1D525>0068 +1D526>0069 +1D527>006A +1D528>006B +1D529>006C +1D52A>006D +1D52B>006E +1D52C>006F +1D52D>0070 +1D52E>0071 +1D52F>0072 +1D530>0073 +1D531>0074 +1D532>0075 +1D533>0076 +1D534>0077 +1D535>0078 +1D536>0079 +1D537>007A +1D538>0061 +1D539>0062 +1D53B>0064 +1D53C>0065 +1D53D>0066 +1D53E>0067 +1D540>0069 +1D541>006A +1D542>006B +1D543>006C +1D544>006D +1D546>006F +1D54A>0073 +1D54B>0074 +1D54C>0075 +1D54D>0076 +1D54E>0077 +1D54F>0078 +1D550>0079 +1D552>0061 +1D553>0062 +1D554>0063 +1D555>0064 +1D556>0065 +1D557>0066 +1D558>0067 +1D559>0068 +1D55A>0069 +1D55B>006A +1D55C>006B +1D55D>006C +1D55E>006D +1D55F>006E +1D560>006F +1D561>0070 +1D562>0071 +1D563>0072 +1D564>0073 +1D565>0074 +1D566>0075 +1D567>0076 +1D568>0077 +1D569>0078 +1D56A>0079 +1D56B>007A +1D56C>0061 +1D56D>0062 +1D56E>0063 +1D56F>0064 +1D570>0065 +1D571>0066 +1D572>0067 +1D573>0068 +1D574>0069 +1D575>006A +1D576>006B +1D577>006C +1D578>006D +1D579>006E +1D57A>006F +1D57B>0070 +1D57C>0071 +1D57D>0072 +1D57E>0073 +1D57F>0074 +1D580>0075 +1D581>0076 +1D582>0077 +1D583>0078 +1D584>0079 +1D585>007A +1D586>0061 +1D587>0062 +1D588>0063 +1D589>0064 +1D58A>0065 +1D58B>0066 +1D58C>0067 +1D58D>0068 +1D58E>0069 +1D58F>006A +1D590>006B +1D591>006C +1D592>006D +1D593>006E +1D594>006F +1D595>0070 +1D596>0071 +1D597>0072 +1D598>0073 +1D599>0074 +1D59A>0075 +1D59B>0076 +1D59C>0077 +1D59D>0078 +1D59E>0079 +1D59F>007A +1D5A0>0061 +1D5A1>0062 +1D5A2>0063 +1D5A3>0064 +1D5A4>0065 +1D5A5>0066 +1D5A6>0067 +1D5A7>0068 +1D5A8>0069 +1D5A9>006A +1D5AA>006B +1D5AB>006C +1D5AC>006D +1D5AD>006E +1D5AE>006F +1D5AF>0070 +1D5B0>0071 +1D5B1>0072 +1D5B2>0073 +1D5B3>0074 +1D5B4>0075 +1D5B5>0076 +1D5B6>0077 +1D5B7>0078 +1D5B8>0079 +1D5B9>007A +1D5BA>0061 +1D5BB>0062 +1D5BC>0063 +1D5BD>0064 +1D5BE>0065 +1D5BF>0066 +1D5C0>0067 +1D5C1>0068 +1D5C2>0069 +1D5C3>006A +1D5C4>006B +1D5C5>006C +1D5C6>006D +1D5C7>006E +1D5C8>006F +1D5C9>0070 +1D5CA>0071 +1D5CB>0072 +1D5CC>0073 +1D5CD>0074 +1D5CE>0075 +1D5CF>0076 +1D5D0>0077 +1D5D1>0078 +1D5D2>0079 +1D5D3>007A +1D5D4>0061 +1D5D5>0062 +1D5D6>0063 +1D5D7>0064 +1D5D8>0065 +1D5D9>0066 +1D5DA>0067 +1D5DB>0068 +1D5DC>0069 +1D5DD>006A +1D5DE>006B +1D5DF>006C +1D5E0>006D +1D5E1>006E +1D5E2>006F +1D5E3>0070 +1D5E4>0071 +1D5E5>0072 +1D5E6>0073 +1D5E7>0074 +1D5E8>0075 +1D5E9>0076 +1D5EA>0077 +1D5EB>0078 +1D5EC>0079 +1D5ED>007A +1D5EE>0061 +1D5EF>0062 +1D5F0>0063 +1D5F1>0064 +1D5F2>0065 +1D5F3>0066 +1D5F4>0067 +1D5F5>0068 +1D5F6>0069 +1D5F7>006A +1D5F8>006B +1D5F9>006C +1D5FA>006D +1D5FB>006E +1D5FC>006F +1D5FD>0070 +1D5FE>0071 +1D5FF>0072 +1D600>0073 +1D601>0074 +1D602>0075 +1D603>0076 +1D604>0077 +1D605>0078 +1D606>0079 +1D607>007A +1D608>0061 +1D609>0062 +1D60A>0063 +1D60B>0064 +1D60C>0065 +1D60D>0066 +1D60E>0067 +1D60F>0068 +1D610>0069 +1D611>006A +1D612>006B +1D613>006C +1D614>006D +1D615>006E +1D616>006F +1D617>0070 +1D618>0071 +1D619>0072 +1D61A>0073 +1D61B>0074 +1D61C>0075 +1D61D>0076 +1D61E>0077 +1D61F>0078 +1D620>0079 +1D621>007A +1D622>0061 +1D623>0062 +1D624>0063 +1D625>0064 +1D626>0065 +1D627>0066 +1D628>0067 +1D629>0068 +1D62A>0069 +1D62B>006A +1D62C>006B +1D62D>006C +1D62E>006D +1D62F>006E +1D630>006F +1D631>0070 +1D632>0071 +1D633>0072 +1D634>0073 +1D635>0074 +1D636>0075 +1D637>0076 +1D638>0077 +1D639>0078 +1D63A>0079 +1D63B>007A +1D63C>0061 +1D63D>0062 +1D63E>0063 +1D63F>0064 +1D640>0065 +1D641>0066 +1D642>0067 +1D643>0068 +1D644>0069 +1D645>006A +1D646>006B +1D647>006C +1D648>006D +1D649>006E +1D64A>006F +1D64B>0070 +1D64C>0071 +1D64D>0072 +1D64E>0073 +1D64F>0074 +1D650>0075 +1D651>0076 +1D652>0077 +1D653>0078 +1D654>0079 +1D655>007A +1D656>0061 +1D657>0062 +1D658>0063 +1D659>0064 +1D65A>0065 +1D65B>0066 +1D65C>0067 +1D65D>0068 +1D65E>0069 +1D65F>006A +1D660>006B +1D661>006C +1D662>006D +1D663>006E +1D664>006F +1D665>0070 +1D666>0071 +1D667>0072 +1D668>0073 +1D669>0074 +1D66A>0075 +1D66B>0076 +1D66C>0077 +1D66D>0078 +1D66E>0079 +1D66F>007A +1D670>0061 +1D671>0062 +1D672>0063 +1D673>0064 +1D674>0065 +1D675>0066 +1D676>0067 +1D677>0068 +1D678>0069 +1D679>006A +1D67A>006B +1D67B>006C +1D67C>006D +1D67D>006E +1D67E>006F +1D67F>0070 +1D680>0071 +1D681>0072 +1D682>0073 +1D683>0074 +1D684>0075 +1D685>0076 +1D686>0077 +1D687>0078 +1D688>0079 +1D689>007A +1D68A>0061 +1D68B>0062 +1D68C>0063 +1D68D>0064 +1D68E>0065 +1D68F>0066 +1D690>0067 +1D691>0068 +1D692>0069 +1D693>006A +1D694>006B +1D695>006C +1D696>006D +1D697>006E +1D698>006F +1D699>0070 +1D69A>0071 +1D69B>0072 +1D69C>0073 +1D69D>0074 +1D69E>0075 +1D69F>0076 +1D6A0>0077 +1D6A1>0078 +1D6A2>0079 +1D6A3>007A +1D6A4>0131 +1D6A5>0237 +1D6A8>03B1 +1D6A9>03B2 +1D6AA>03B3 +1D6AB>03B4 +1D6AC>03B5 +1D6AD>03B6 +1D6AE>03B7 +1D6AF>03B8 +1D6B0>03B9 +1D6B1>03BA +1D6B2>03BB +1D6B3>03BC +1D6B4>03BD +1D6B5>03BE +1D6B6>03BF +1D6B7>03C0 +1D6B8>03C1 +1D6B9>03B8 +1D6BA>03C3 +1D6BB>03C4 +1D6BC>03C5 +1D6BD>03C6 +1D6BE>03C7 +1D6BF>03C8 +1D6C0>03C9 +1D6C1>2207 +1D6C2>03B1 +1D6C3>03B2 +1D6C4>03B3 +1D6C5>03B4 +1D6C6>03B5 +1D6C7>03B6 +1D6C8>03B7 +1D6C9>03B8 +1D6CA>03B9 +1D6CB>03BA +1D6CC>03BB +1D6CD>03BC +1D6CE>03BD +1D6CF>03BE +1D6D0>03BF +1D6D1>03C0 +1D6D2>03C1 +1D6D3..1D6D4>03C3 +1D6D5>03C4 +1D6D6>03C5 +1D6D7>03C6 +1D6D8>03C7 +1D6D9>03C8 +1D6DA>03C9 +1D6DB>2202 +1D6DC>03B5 +1D6DD>03B8 +1D6DE>03BA +1D6DF>03C6 +1D6E0>03C1 +1D6E1>03C0 +1D6E2>03B1 +1D6E3>03B2 +1D6E4>03B3 +1D6E5>03B4 +1D6E6>03B5 +1D6E7>03B6 +1D6E8>03B7 +1D6E9>03B8 +1D6EA>03B9 +1D6EB>03BA +1D6EC>03BB +1D6ED>03BC +1D6EE>03BD +1D6EF>03BE +1D6F0>03BF +1D6F1>03C0 +1D6F2>03C1 +1D6F3>03B8 +1D6F4>03C3 +1D6F5>03C4 +1D6F6>03C5 +1D6F7>03C6 +1D6F8>03C7 +1D6F9>03C8 +1D6FA>03C9 +1D6FB>2207 +1D6FC>03B1 +1D6FD>03B2 +1D6FE>03B3 +1D6FF>03B4 +1D700>03B5 +1D701>03B6 +1D702>03B7 +1D703>03B8 +1D704>03B9 +1D705>03BA +1D706>03BB +1D707>03BC +1D708>03BD +1D709>03BE +1D70A>03BF +1D70B>03C0 +1D70C>03C1 +1D70D..1D70E>03C3 +1D70F>03C4 +1D710>03C5 +1D711>03C6 +1D712>03C7 +1D713>03C8 +1D714>03C9 +1D715>2202 +1D716>03B5 +1D717>03B8 +1D718>03BA +1D719>03C6 +1D71A>03C1 +1D71B>03C0 +1D71C>03B1 +1D71D>03B2 +1D71E>03B3 +1D71F>03B4 +1D720>03B5 +1D721>03B6 +1D722>03B7 +1D723>03B8 +1D724>03B9 +1D725>03BA +1D726>03BB +1D727>03BC +1D728>03BD +1D729>03BE +1D72A>03BF +1D72B>03C0 +1D72C>03C1 +1D72D>03B8 +1D72E>03C3 +1D72F>03C4 +1D730>03C5 +1D731>03C6 +1D732>03C7 +1D733>03C8 +1D734>03C9 +1D735>2207 +1D736>03B1 +1D737>03B2 +1D738>03B3 +1D739>03B4 +1D73A>03B5 +1D73B>03B6 +1D73C>03B7 +1D73D>03B8 +1D73E>03B9 +1D73F>03BA +1D740>03BB +1D741>03BC +1D742>03BD +1D743>03BE +1D744>03BF +1D745>03C0 +1D746>03C1 +1D747..1D748>03C3 +1D749>03C4 +1D74A>03C5 +1D74B>03C6 +1D74C>03C7 +1D74D>03C8 +1D74E>03C9 +1D74F>2202 +1D750>03B5 +1D751>03B8 +1D752>03BA +1D753>03C6 +1D754>03C1 +1D755>03C0 +1D756>03B1 +1D757>03B2 +1D758>03B3 +1D759>03B4 +1D75A>03B5 +1D75B>03B6 +1D75C>03B7 +1D75D>03B8 +1D75E>03B9 +1D75F>03BA +1D760>03BB +1D761>03BC +1D762>03BD +1D763>03BE +1D764>03BF +1D765>03C0 +1D766>03C1 +1D767>03B8 +1D768>03C3 +1D769>03C4 +1D76A>03C5 +1D76B>03C6 +1D76C>03C7 +1D76D>03C8 +1D76E>03C9 +1D76F>2207 +1D770>03B1 +1D771>03B2 +1D772>03B3 +1D773>03B4 +1D774>03B5 +1D775>03B6 +1D776>03B7 +1D777>03B8 +1D778>03B9 +1D779>03BA +1D77A>03BB +1D77B>03BC +1D77C>03BD +1D77D>03BE +1D77E>03BF +1D77F>03C0 +1D780>03C1 +1D781..1D782>03C3 +1D783>03C4 +1D784>03C5 +1D785>03C6 +1D786>03C7 +1D787>03C8 +1D788>03C9 +1D789>2202 +1D78A>03B5 +1D78B>03B8 +1D78C>03BA +1D78D>03C6 +1D78E>03C1 +1D78F>03C0 +1D790>03B1 +1D791>03B2 +1D792>03B3 +1D793>03B4 +1D794>03B5 +1D795>03B6 +1D796>03B7 +1D797>03B8 +1D798>03B9 +1D799>03BA +1D79A>03BB +1D79B>03BC +1D79C>03BD +1D79D>03BE +1D79E>03BF +1D79F>03C0 +1D7A0>03C1 +1D7A1>03B8 +1D7A2>03C3 +1D7A3>03C4 +1D7A4>03C5 +1D7A5>03C6 +1D7A6>03C7 +1D7A7>03C8 +1D7A8>03C9 +1D7A9>2207 +1D7AA>03B1 +1D7AB>03B2 +1D7AC>03B3 +1D7AD>03B4 +1D7AE>03B5 +1D7AF>03B6 +1D7B0>03B7 +1D7B1>03B8 +1D7B2>03B9 +1D7B3>03BA +1D7B4>03BB +1D7B5>03BC +1D7B6>03BD +1D7B7>03BE +1D7B8>03BF +1D7B9>03C0 +1D7BA>03C1 +1D7BB..1D7BC>03C3 +1D7BD>03C4 +1D7BE>03C5 +1D7BF>03C6 +1D7C0>03C7 +1D7C1>03C8 +1D7C2>03C9 +1D7C3>2202 +1D7C4>03B5 +1D7C5>03B8 +1D7C6>03BA +1D7C7>03C6 +1D7C8>03C1 +1D7C9>03C0 +1D7CA..1D7CB>03DD +1D7CE>0030 +1D7CF>0031 +1D7D0>0032 +1D7D1>0033 +1D7D2>0034 +1D7D3>0035 +1D7D4>0036 +1D7D5>0037 +1D7D6>0038 +1D7D7>0039 +1D7D8>0030 +1D7D9>0031 +1D7DA>0032 +1D7DB>0033 +1D7DC>0034 +1D7DD>0035 +1D7DE>0036 +1D7DF>0037 +1D7E0>0038 +1D7E1>0039 +1D7E2>0030 +1D7E3>0031 +1D7E4>0032 +1D7E5>0033 +1D7E6>0034 +1D7E7>0035 +1D7E8>0036 +1D7E9>0037 +1D7EA>0038 +1D7EB>0039 +1D7EC>0030 +1D7ED>0031 +1D7EE>0032 +1D7EF>0033 +1D7F0>0034 +1D7F1>0035 +1D7F2>0036 +1D7F3>0037 +1D7F4>0038 +1D7F5>0039 +1D7F6>0030 +1D7F7>0031 +1D7F8>0032 +1D7F9>0033 +1D7FA>0034 +1D7FB>0035 +1D7FC>0036 +1D7FD>0037 +1D7FE>0038 +1D7FF>0039 +1F100>0030 002E +1F101>0030 002C +1F102>0031 002C +1F103>0032 002C +1F104>0033 002C +1F105>0034 002C +1F106>0035 002C +1F107>0036 002C +1F108>0037 002C +1F109>0038 002C +1F10A>0039 002C +1F110>0028 0061 0029 +1F111>0028 0062 0029 +1F112>0028 0063 0029 +1F113>0028 0064 0029 +1F114>0028 0065 0029 +1F115>0028 0066 0029 +1F116>0028 0067 0029 +1F117>0028 0068 0029 +1F118>0028 0069 0029 +1F119>0028 006A 0029 +1F11A>0028 006B 0029 +1F11B>0028 006C 0029 +1F11C>0028 006D 0029 +1F11D>0028 006E 0029 +1F11E>0028 006F 0029 +1F11F>0028 0070 0029 +1F120>0028 0071 0029 +1F121>0028 0072 0029 +1F122>0028 0073 0029 +1F123>0028 0074 0029 +1F124>0028 0075 0029 +1F125>0028 0076 0029 +1F126>0028 0077 0029 +1F127>0028 0078 0029 +1F128>0028 0079 0029 +1F129>0028 007A 0029 +1F12A>3014 0073 3015 +1F12B>0063 +1F12C>0072 +1F12D>0063 0064 +1F12E>0077 007A +1F131>0062 +1F13D>006E +1F13F>0070 +1F142>0073 +1F146>0077 +1F14A>0068 0076 +1F14B>006D 0076 +1F14C>0073 0064 +1F14D>0073 0073 +1F14E>0070 0070 0076 +1F190>0064 006A +1F200>307B 304B +1F210>624B +1F211>5B57 +1F212>53CC +1F213>30C7 +1F214>4E8C +1F215>591A +1F216>89E3 +1F217>5929 +1F218>4EA4 +1F219>6620 +1F21A>7121 +1F21B>6599 +1F21C>524D +1F21D>5F8C +1F21E>518D +1F21F>65B0 +1F220>521D +1F221>7D42 +1F222>751F +1F223>8CA9 +1F224>58F0 +1F225>5439 +1F226>6F14 +1F227>6295 +1F228>6355 +1F229>4E00 +1F22A>4E09 +1F22B>904A +1F22C>5DE6 +1F22D>4E2D +1F22E>53F3 +1F22F>6307 +1F230>8D70 +1F231>6253 +1F240>3014 672C 3015 +1F241>3014 4E09 3015 +1F242>3014 4E8C 3015 +1F243>3014 5B89 3015 +1F244>3014 70B9 3015 +1F245>3014 6253 3015 +1F246>3014 76D7 3015 +1F247>3014 52DD 3015 +1F248>3014 6557 3015 +2F800>4E3D +2F801>4E38 +2F802>4E41 +2F803>20122 +2F804>4F60 +2F805>4FAE +2F806>4FBB +2F807>5002 +2F808>507A +2F809>5099 +2F80A>50E7 +2F80B>50CF +2F80C>349E +2F80D>2063A +2F80E>514D +2F80F>5154 +2F810>5164 +2F811>5177 +2F812>2051C +2F813>34B9 +2F814>5167 +2F815>518D +2F816>2054B +2F817>5197 +2F818>51A4 +2F819>4ECC +2F81A>51AC +2F81B>51B5 +2F81C>291DF +2F81D>51F5 +2F81E>5203 +2F81F>34DF +2F820>523B +2F821>5246 +2F822>5272 +2F823>5277 +2F824>3515 +2F825>52C7 +2F826>52C9 +2F827>52E4 +2F828>52FA +2F829>5305 +2F82A>5306 +2F82B>5317 +2F82C>5349 +2F82D>5351 +2F82E>535A +2F82F>5373 +2F830>537D +2F831..2F833>537F +2F834>20A2C +2F835>7070 +2F836>53CA +2F837>53DF +2F838>20B63 +2F839>53EB +2F83A>53F1 +2F83B>5406 +2F83C>549E +2F83D>5438 +2F83E>5448 +2F83F>5468 +2F840>54A2 +2F841>54F6 +2F842>5510 +2F843>5553 +2F844>5563 +2F845..2F846>5584 +2F847>5599 +2F848>55AB +2F849>55B3 +2F84A>55C2 +2F84B>5716 +2F84C>5606 +2F84D>5717 +2F84E>5651 +2F84F>5674 +2F850>5207 +2F851>58EE +2F852>57CE +2F853>57F4 +2F854>580D +2F855>578B +2F856>5832 +2F857>5831 +2F858>58AC +2F859>214E4 +2F85A>58F2 +2F85B>58F7 +2F85C>5906 +2F85D>591A +2F85E>5922 +2F85F>5962 +2F860>216A8 +2F861>216EA +2F862>59EC +2F863>5A1B +2F864>5A27 +2F865>59D8 +2F866>5A66 +2F867>36EE +2F868>36FC +2F869>5B08 +2F86A..2F86B>5B3E +2F86C>219C8 +2F86D>5BC3 +2F86E>5BD8 +2F86F>5BE7 +2F870>5BF3 +2F871>21B18 +2F872>5BFF +2F873>5C06 +2F874>5F53 +2F875>5C22 +2F876>3781 +2F877>5C60 +2F878>5C6E +2F879>5CC0 +2F87A>5C8D +2F87B>21DE4 +2F87C>5D43 +2F87D>21DE6 +2F87E>5D6E +2F87F>5D6B +2F880>5D7C +2F881>5DE1 +2F882>5DE2 +2F883>382F +2F884>5DFD +2F885>5E28 +2F886>5E3D +2F887>5E69 +2F888>3862 +2F889>22183 +2F88A>387C +2F88B>5EB0 +2F88C>5EB3 +2F88D>5EB6 +2F88E>5ECA +2F88F>2A392 +2F890>5EFE +2F891..2F892>22331 +2F893>8201 +2F894..2F895>5F22 +2F896>38C7 +2F897>232B8 +2F898>261DA +2F899>5F62 +2F89A>5F6B +2F89B>38E3 +2F89C>5F9A +2F89D>5FCD +2F89E>5FD7 +2F89F>5FF9 +2F8A0>6081 +2F8A1>393A +2F8A2>391C +2F8A3>6094 +2F8A4>226D4 +2F8A5>60C7 +2F8A6>6148 +2F8A7>614C +2F8A8>614E +2F8A9>614C +2F8AA>617A +2F8AB>618E +2F8AC>61B2 +2F8AD>61A4 +2F8AE>61AF +2F8AF>61DE +2F8B0>61F2 +2F8B1>61F6 +2F8B2>6210 +2F8B3>621B +2F8B4>625D +2F8B5>62B1 +2F8B6>62D4 +2F8B7>6350 +2F8B8>22B0C +2F8B9>633D +2F8BA>62FC +2F8BB>6368 +2F8BC>6383 +2F8BD>63E4 +2F8BE>22BF1 +2F8BF>6422 +2F8C0>63C5 +2F8C1>63A9 +2F8C2>3A2E +2F8C3>6469 +2F8C4>647E +2F8C5>649D +2F8C6>6477 +2F8C7>3A6C +2F8C8>654F +2F8C9>656C +2F8CA>2300A +2F8CB>65E3 +2F8CC>66F8 +2F8CD>6649 +2F8CE>3B19 +2F8CF>6691 +2F8D0>3B08 +2F8D1>3AE4 +2F8D2>5192 +2F8D3>5195 +2F8D4>6700 +2F8D5>669C +2F8D6>80AD +2F8D7>43D9 +2F8D8>6717 +2F8D9>671B +2F8DA>6721 +2F8DB>675E +2F8DC>6753 +2F8DD>233C3 +2F8DE>3B49 +2F8DF>67FA +2F8E0>6785 +2F8E1>6852 +2F8E2>6885 +2F8E3>2346D +2F8E4>688E +2F8E5>681F +2F8E6>6914 +2F8E7>3B9D +2F8E8>6942 +2F8E9>69A3 +2F8EA>69EA +2F8EB>6AA8 +2F8EC>236A3 +2F8ED>6ADB +2F8EE>3C18 +2F8EF>6B21 +2F8F0>238A7 +2F8F1>6B54 +2F8F2>3C4E +2F8F3>6B72 +2F8F4>6B9F +2F8F5>6BBA +2F8F6>6BBB +2F8F7>23A8D +2F8F8>21D0B +2F8F9>23AFA +2F8FA>6C4E +2F8FB>23CBC +2F8FC>6CBF +2F8FD>6CCD +2F8FE>6C67 +2F8FF>6D16 +2F900>6D3E +2F901>6D77 +2F902>6D41 +2F903>6D69 +2F904>6D78 +2F905>6D85 +2F906>23D1E +2F907>6D34 +2F908>6E2F +2F909>6E6E +2F90A>3D33 +2F90B>6ECB +2F90C>6EC7 +2F90D>23ED1 +2F90E>6DF9 +2F90F>6F6E +2F910>23F5E +2F911>23F8E +2F912>6FC6 +2F913>7039 +2F914>701E +2F915>701B +2F916>3D96 +2F917>704A +2F918>707D +2F919>7077 +2F91A>70AD +2F91B>20525 +2F91C>7145 +2F91D>24263 +2F91E>719C +2F91F>243AB +2F920>7228 +2F921>7235 +2F922>7250 +2F923>24608 +2F924>7280 +2F925>7295 +2F926>24735 +2F927>24814 +2F928>737A +2F929>738B +2F92A>3EAC +2F92B>73A5 +2F92C..2F92D>3EB8 +2F92E>7447 +2F92F>745C +2F930>7471 +2F931>7485 +2F932>74CA +2F933>3F1B +2F934>7524 +2F935>24C36 +2F936>753E +2F937>24C92 +2F938>7570 +2F939>2219F +2F93A>7610 +2F93B>24FA1 +2F93C>24FB8 +2F93D>25044 +2F93E>3FFC +2F93F>4008 +2F940>76F4 +2F941>250F3 +2F942>250F2 +2F943>25119 +2F944>25133 +2F945>771E +2F946..2F947>771F +2F948>774A +2F949>4039 +2F94A>778B +2F94B>4046 +2F94C>4096 +2F94D>2541D +2F94E>784E +2F94F>788C +2F950>78CC +2F951>40E3 +2F952>25626 +2F953>7956 +2F954>2569A +2F955>256C5 +2F956>798F +2F957>79EB +2F958>412F +2F959>7A40 +2F95A>7A4A +2F95B>7A4F +2F95C>2597C +2F95D..2F95E>25AA7 +2F95F>7AEE +2F960>4202 +2F961>25BAB +2F962>7BC6 +2F963>7BC9 +2F964>4227 +2F965>25C80 +2F966>7CD2 +2F967>42A0 +2F968>7CE8 +2F969>7CE3 +2F96A>7D00 +2F96B>25F86 +2F96C>7D63 +2F96D>4301 +2F96E>7DC7 +2F96F>7E02 +2F970>7E45 +2F971>4334 +2F972>26228 +2F973>26247 +2F974>4359 +2F975>262D9 +2F976>7F7A +2F977>2633E +2F978>7F95 +2F979>7FFA +2F97A>8005 +2F97B>264DA +2F97C>26523 +2F97D>8060 +2F97E>265A8 +2F97F>8070 +2F980>2335F +2F981>43D5 +2F982>80B2 +2F983>8103 +2F984>440B +2F985>813E +2F986>5AB5 +2F987>267A7 +2F988>267B5 +2F989>23393 +2F98A>2339C +2F98B>8201 +2F98C>8204 +2F98D>8F9E +2F98E>446B +2F98F>8291 +2F990>828B +2F991>829D +2F992>52B3 +2F993>82B1 +2F994>82B3 +2F995>82BD +2F996>82E6 +2F997>26B3C +2F998>82E5 +2F999>831D +2F99A>8363 +2F99B>83AD +2F99C>8323 +2F99D>83BD +2F99E>83E7 +2F99F>8457 +2F9A0>8353 +2F9A1>83CA +2F9A2>83CC +2F9A3>83DC +2F9A4>26C36 +2F9A5>26D6B +2F9A6>26CD5 +2F9A7>452B +2F9A8>84F1 +2F9A9>84F3 +2F9AA>8516 +2F9AB>273CA +2F9AC>8564 +2F9AD>26F2C +2F9AE>455D +2F9AF>4561 +2F9B0>26FB1 +2F9B1>270D2 +2F9B2>456B +2F9B3>8650 +2F9B4>865C +2F9B5>8667 +2F9B6>8669 +2F9B7>86A9 +2F9B8>8688 +2F9B9>870E +2F9BA>86E2 +2F9BB>8779 +2F9BC>8728 +2F9BD>876B +2F9BE>8786 +2F9BF>45D7 +2F9C0>87E1 +2F9C1>8801 +2F9C2>45F9 +2F9C3>8860 +2F9C4>8863 +2F9C5>27667 +2F9C6>88D7 +2F9C7>88DE +2F9C8>4635 +2F9C9>88FA +2F9CA>34BB +2F9CB>278AE +2F9CC>27966 +2F9CD>46BE +2F9CE>46C7 +2F9CF>8AA0 +2F9D0>8AED +2F9D1>8B8A +2F9D2>8C55 +2F9D3>27CA8 +2F9D4>8CAB +2F9D5>8CC1 +2F9D6>8D1B +2F9D7>8D77 +2F9D8>27F2F +2F9D9>20804 +2F9DA>8DCB +2F9DB>8DBC +2F9DC>8DF0 +2F9DD>208DE +2F9DE>8ED4 +2F9DF>8F38 +2F9E0>285D2 +2F9E1>285ED +2F9E2>9094 +2F9E3>90F1 +2F9E4>9111 +2F9E5>2872E +2F9E6>911B +2F9E7>9238 +2F9E8>92D7 +2F9E9>92D8 +2F9EA>927C +2F9EB>93F9 +2F9EC>9415 +2F9ED>28BFA +2F9EE>958B +2F9EF>4995 +2F9F0>95B7 +2F9F1>28D77 +2F9F2>49E6 +2F9F3>96C3 +2F9F4>5DB2 +2F9F5>9723 +2F9F6>29145 +2F9F7>2921A +2F9F8>4A6E +2F9F9>4A76 +2F9FA>97E0 +2F9FB>2940A +2F9FC>4AB2 +2F9FD>29496 +2F9FE..2F9FF>980B +2FA00>9829 +2FA01>295B6 +2FA02>98E2 +2FA03>4B33 +2FA04>9929 +2FA05>99A7 +2FA06>99C2 +2FA07>99FE +2FA08>4BCE +2FA09>29B30 +2FA0A>9B12 +2FA0B>9C40 +2FA0C>9CFD +2FA0D>4CCE +2FA0E>4CED +2FA0F>9D67 +2FA10>2A0CE +2FA11>4CF8 +2FA12>2A105 +2FA13>2A20E +2FA14>2A291 +2FA15>9EBB +2FA16>4D56 +2FA17>9EF9 +2FA18>9EFE +2FA19>9F05 +2FA1A>9F0F +2FA1B>9F16 +2FA1C>9F3B +2FA1D>2A600 +E0000> +E0001> +E0002..E001F> +E0020..E007F> +E0080..E00FF> +E0100..E01EF> +E01F0..E0FFF> + +# Total code points: 9740 diff --git a/icu4c/source/i18n/bmsearch.cpp b/icu4c/source/i18n/bmsearch.cpp index 7dc5678fccf..d7aa5c82859 100644 --- a/icu4c/source/i18n/bmsearch.cpp +++ b/icu4c/source/i18n/bmsearch.cpp @@ -1,6 +1,6 @@ /* ****************************************************************************** - * Copyright (C) 1996-2009, International Business Machines * + * Copyright (C) 1996-2010, International Business Machines * * Corporation and others. All Rights Reserved. * ****************************************************************************** */ @@ -27,7 +27,7 @@ #include "hash.h" #include "uhash.h" #include "ucol_imp.h" -#include "unormimp.h" +#include "normalizer2impl.h" #include "unicode/colldata.h" #include "unicode/bmsearch.h" @@ -81,6 +81,7 @@ private: uint32_t variableTop; UBool toShift; UCollator *coll; + const Normalizer2 &nfd; const UnicodeString *targetString; const UChar *targetBuffer; @@ -93,6 +94,7 @@ private: Target::Target(UCollator *theCollator, const UnicodeString *target, int32_t patternLength, UErrorCode &status) : bufferSize(0), bufferMin(0), bufferMax(0), strengthMask(0), strength(UCOL_PRIMARY), variableTop(0), toShift(FALSE), coll(theCollator), + nfd(*Normalizer2Factory::getNFDInstance(status)), targetString(NULL), targetBuffer(NULL), targetLength(0), elements(NULL), charBreakIterator(NULL) { strength = ucol_getStrength(coll); @@ -348,63 +350,14 @@ UBool Target::isIdentical(UnicodeString &pattern, int32_t start, int32_t end) return TRUE; } - UChar t2[32], p2[32]; - const UChar *pBuffer = pattern.getBuffer(); - int32_t pLength = pattern.length(); - int32_t length = end - start; - - UErrorCode status = U_ZERO_ERROR, status2 = U_ZERO_ERROR; - - int32_t decomplength = unorm_decompose(t2, ARRAY_SIZE(t2), - targetBuffer + start, length, - FALSE, 0, &status); - - // use separate status2 in case of buffer overflow - if (decomplength != unorm_decompose(p2, ARRAY_SIZE(p2), - pBuffer, pLength, - FALSE, 0, &status2)) { - return FALSE; // lengths are different - } - - // compare contents - UChar *text, *pat; - - if(U_SUCCESS(status)) { - text = t2; - pat = p2; - } else if(status == U_BUFFER_OVERFLOW_ERROR) { - status = U_ZERO_ERROR; - - // allocate one buffer for both decompositions - text = NEW_ARRAY(UChar, decomplength * 2); - - // Check for allocation failure. - if (text == NULL) { - return FALSE; - } - - pat = text + decomplength; - - unorm_decompose(text, decomplength, targetBuffer + start, - length, FALSE, 0, &status); - - unorm_decompose(pat, decomplength, pBuffer, - pLength, FALSE, 0, &status); - } else { - // NFD failed, make sure that u_memcmp() does not overrun t2 & p2 - // and that we don't uprv_free() an undefined text pointer - text = pat = t2; - decomplength = 0; - } - - UBool result = (UBool)(u_memcmp(pat, text, decomplength) == 0); - - if(text != t2) { - DELETE_ARRAY(text); - } - + // Note: We could use Normalizer::compare() or similar, but for short strings + // which may not be in FCD it might be faster to just NFD them. + UErrorCode status = U_ZERO_ERROR; + UnicodeString t2, p2; + nfd.normalize(UnicodeString(FALSE, targetBuffer + start, end - start), t2, status); + nfd.normalize(pattern, p2, status); // return FALSE if NFD failed - return U_SUCCESS(status) && result; + return U_SUCCESS(status) && t2 == p2; } #define HASH_TABLE_SIZE 257 diff --git a/icu4c/source/i18n/coleitr.cpp b/icu4c/source/i18n/coleitr.cpp index 1dc79d9c093..173166af622 100644 --- a/icu4c/source/i18n/coleitr.cpp +++ b/icu4c/source/i18n/coleitr.cpp @@ -1,6 +1,6 @@ /* ******************************************************************************* -* Copyright (C) 1996-2009, International Business Machines Corporation and * +* Copyright (C) 1996-2010, International Business Machines Corporation and * * others. All Rights Reserved. * ******************************************************************************* */ @@ -122,9 +122,9 @@ UBool CollationElementIterator::operator==( } // both are in the normalization buffer if (m_data_->iteratordata_.pos - - m_data_->iteratordata_.writableBuffer + - m_data_->iteratordata_.writableBuffer.getBuffer() != that.m_data_->iteratordata_.pos - - that.m_data_->iteratordata_.writableBuffer) { + - that.m_data_->iteratordata_.writableBuffer.getBuffer()) { // not in the same position in the normalization buffer return FALSE; } @@ -176,7 +176,7 @@ void CollationElementIterator::setText(const UnicodeString& source, int32_t length = source.length(); UChar *string = NULL; if (m_data_->isWritable && m_data_->iteratordata_.string != NULL) { - uprv_free(m_data_->iteratordata_.string); + uprv_free((UChar *)m_data_->iteratordata_.string); } m_data_->isWritable = TRUE; if (length > 0) { @@ -200,7 +200,7 @@ void CollationElementIterator::setText(const UnicodeString& source, /* Free offsetBuffer before initializing it. */ ucol_freeOffsetBuffer(&(m_data_->iteratordata_)); uprv_init_collIterate(m_data_->iteratordata_.coll, string, length, - &m_data_->iteratordata_); + &m_data_->iteratordata_, &status); m_data_->reset_ = TRUE; } @@ -241,13 +241,13 @@ void CollationElementIterator::setText(CharacterIterator& source, } if (m_data_->isWritable && m_data_->iteratordata_.string != NULL) { - uprv_free(m_data_->iteratordata_.string); + uprv_free((UChar *)m_data_->iteratordata_.string); } m_data_->isWritable = TRUE; /* Free offsetBuffer before initializing it. */ ucol_freeOffsetBuffer(&(m_data_->iteratordata_)); uprv_init_collIterate(m_data_->iteratordata_.coll, buffer, length, - &m_data_->iteratordata_); + &m_data_->iteratordata_, &status); m_data_->reset_ = TRUE; } @@ -407,7 +407,7 @@ const CollationElementIterator& CollationElementIterator::operator=( if (length > 0) { coliter->string = (UChar *)uprv_malloc(length * U_SIZEOF_UCHAR); if(coliter->string != NULL) { - uprv_memcpy(coliter->string, othercoliter->string, + uprv_memcpy((UChar *)coliter->string, othercoliter->string, length * U_SIZEOF_UCHAR); } else { // Error: couldn't allocate memory. No copying should be done length = 0; @@ -423,27 +423,8 @@ const CollationElementIterator& CollationElementIterator::operator=( /* handle writable buffer here */ if (othercoliter->flags & UCOL_ITER_INNORMBUF) { - uint32_t wlength = u_strlen(othercoliter->writableBuffer) + 1; - if (wlength < coliter->writableBufSize) { - uprv_memcpy(coliter->stackWritableBuffer, - othercoliter->stackWritableBuffer, - wlength * U_SIZEOF_UCHAR); - } - else { - if (coliter->writableBuffer != coliter->stackWritableBuffer) { - uprv_free(coliter->writableBuffer); - } - coliter->writableBuffer = (UChar *)uprv_malloc( - wlength * U_SIZEOF_UCHAR); - if(coliter->writableBuffer != NULL) { - uprv_memcpy(coliter->writableBuffer, - othercoliter->writableBuffer, - wlength * U_SIZEOF_UCHAR); - coliter->writableBufSize = wlength; - } else { // Error: couldn't allocate memory for writableBuffer - coliter->writableBufSize = 0; - } - } + coliter->writableBuffer = othercoliter->writableBuffer; + coliter->writableBuffer.getTerminatedBuffer(); } /* current position */ @@ -453,13 +434,9 @@ const CollationElementIterator& CollationElementIterator::operator=( coliter->pos = coliter->string + (othercoliter->pos - othercoliter->string); } - else if (coliter->writableBuffer != NULL) { - coliter->pos = coliter->writableBuffer + - (othercoliter->pos - othercoliter->writableBuffer); - } else { - // Error: couldn't allocate memory for writableBuffer - coliter->pos = NULL; + coliter->pos = coliter->writableBuffer.getTerminatedBuffer() + + (othercoliter->pos - othercoliter->writableBuffer.getBuffer()); } /* CE buffer */ diff --git a/icu4c/source/i18n/i18n.vcproj b/icu4c/source/i18n/i18n.vcproj index c2d460d9bb4..c5d221c8463 100644 --- a/icu4c/source/i18n/i18n.vcproj +++ b/icu4c/source/i18n/i18n.vcproj @@ -895,7 +895,7 @@ >
= limit) { return; } - // a C code unit iterator, implemented around the Replaceable - UCharIterator iter; - uiter_setReplaceable(&iter, &text); - - // the output string and buffer pointer - UnicodeString output; - UChar *buffer; - UBool neededToNormalize; - - UErrorCode errorCode; - /* * Normalize as short chunks at a time as possible even in * bulk mode, so that styled text is minimally disrupted. @@ -129,101 +113,62 @@ void NormalizationTransliterator::handleTransliterate(Replaceable& text, UTransP * * If it was known that the input text is not styled, then * a bulk mode normalization could look like this: - * - - UChar staticChars[256]; - UnicodeString input; - - length = limit - start; - input.setTo(staticChars, 0, sizeof(staticChars)/U_SIZEOF_UCHAR); // writable alias + UnicodeString input, normalized; + int32_t length = limit - start; _Replaceable_extractBetween(text, start, limit, input.getBuffer(length)); input.releaseBuffer(length); UErrorCode status = U_ZERO_ERROR; - Normalizer::normalize(input, fMode, options, output, status); + fNorm2.normalize(input, normalized, status); - text.handleReplaceBetween(start, limit, output); + text.handleReplaceBetween(start, limit, normalized); - int32_t delta = output.length() - length; + int32_t delta = normalized.length() - length; offsets.contextLimit += delta; offsets.limit += delta; offsets.start = limit + delta; - * */ - while(start < limit) { - // set the iterator limits for the remaining input range - // this is a moving target because of the replacements in the text object - iter.start = iter.index = start; - iter.limit = limit; - - // incrementally normalize a small chunk of the input - buffer = output.getBuffer(-1); - errorCode = U_ZERO_ERROR; - length = unorm_next(&iter, buffer, output.getCapacity(), - fMode, 0, - TRUE, &neededToNormalize, - &errorCode); - output.releaseBuffer(U_SUCCESS(errorCode) ? length : 0); - - if(errorCode == U_BUFFER_OVERFLOW_ERROR) { - // use a larger output string buffer and do it again from the start - iter.index = start; - buffer = output.getBuffer(length); - errorCode = U_ZERO_ERROR; - length = unorm_next(&iter, buffer, output.getCapacity(), - fMode, 0, - TRUE, &neededToNormalize, - &errorCode); - output.releaseBuffer(U_SUCCESS(errorCode) ? length : 0); + UErrorCode errorCode = U_ZERO_ERROR; + UnicodeString segment; + UnicodeString normalized; + UChar32 c = text.char32At(start); + do { + int32_t prev = start; + // Skip at least one character so we make progress. + // c holds the character at start. + segment.setTo(c); + start += U16_LENGTH(c); + while(start < limit && !fNorm2.hasBoundaryBefore(c = text.char32At(start))) { + segment.append(c); + start += U16_LENGTH(c); } - - if(U_FAILURE(errorCode)) { - break; - } - - limit = iter.index; - if(isIncremental && limit == iter.limit) { + if(start == limit && isIncremental && !fNorm2.hasBoundaryAfter(c)) { // stop in incremental mode when we reach the input limit // in case there are additional characters that could change the // normalization result - - // UNLESS all characters in the result of the normalization of - // the last run are in the skippable set - const UChar *s=output.getBuffer(); - int32_t i=0, outLength=output.length(); - UChar32 c; - - while(istring = (s)->pos = (UChar *)(sourceString); + (s)->string = (s)->pos = sourceString; (s)->origFlags = 0; (s)->flags = 0; if (sourceLen >= 0) { @@ -102,9 +106,8 @@ inline void IInit_collIterate(const UCollator *collator, const UChar *sourceStri (s)->offsetBufferSize = 0; (s)->offsetReturn = (s)->offsetStore = NULL; (s)->offsetRepeatCount = (s)->offsetRepeatValue = 0; - (s)->writableBuffer = (s)->stackWritableBuffer; - (s)->writableBufSize = UCOL_WRITABLE_BUFFER_SIZE; (s)->coll = (collator); + (s)->nfd = Normalizer2Factory::getNFDInstance(*status); (s)->fcdPosition = 0; if(collator->normalizationMode == UCOL_ON) { (s)->flags |= UCOL_ITER_NORM; @@ -118,9 +121,33 @@ inline void IInit_collIterate(const UCollator *collator, const UChar *sourceStri U_CAPI void U_EXPORT2 uprv_init_collIterate(const UCollator *collator, const UChar *sourceString, - int32_t sourceLen, collIterate *s){ + int32_t sourceLen, collIterate *s, + UErrorCode *status) { /* Out-of-line version for use from other files. */ - IInit_collIterate(collator, sourceString, sourceLen, s); + IInit_collIterate(collator, sourceString, sourceLen, s, status); +} + +U_CAPI collIterate * U_EXPORT2 +uprv_new_collIterate(UErrorCode *status) { + if(U_FAILURE(*status)) { + return NULL; + } + collIterate *s = new collIterate; + if(s == NULL) { + *status = U_MEMORY_ALLOCATION_ERROR; + return NULL; + } + return s; +} + +U_CAPI void U_EXPORT2 +uprv_delete_collIterate(collIterate *s) { + delete s; +} + +U_CAPI UBool U_EXPORT2 +uprv_collIterateAtEnd(collIterate *s) { + return s == NULL || s->pos == s->endp; } /** @@ -135,8 +162,8 @@ inline void backupState(const collIterate *data, collIterateState *backup) backup->flags = data->flags; backup->origFlags = data->origFlags; backup->pos = data->pos; - backup->bufferaddress = data->writableBuffer; - backup->buffersize = data->writableBufSize; + backup->bufferaddress = data->writableBuffer.getBuffer(); + backup->buffersize = data->writableBuffer.length(); backup->iteratorMove = 0; backup->iteratorIndex = 0; if(data->iterator != NULL) { @@ -177,21 +204,21 @@ inline void loadState(collIterate *data, const collIterateState *backup, data->pos = backup->pos; if ((data->flags & UCOL_ITER_INNORMBUF) && - data->writableBuffer != backup->bufferaddress) { + data->writableBuffer.getBuffer() != backup->bufferaddress) { /* this is when a new buffer has been reallocated and we'll have to calculate the new position. note the new buffer has to contain the contents of the old buffer. */ if (forwards) { - data->pos = data->writableBuffer + + data->pos = data->writableBuffer.getTerminatedBuffer() + (data->pos - backup->bufferaddress); } else { /* backwards direction */ - uint32_t temp = backup->buffersize - - (uint32_t)(data->pos - backup->bufferaddress); - data->pos = data->writableBuffer + (data->writableBufSize - temp); + int32_t temp = backup->buffersize - + (int32_t)(data->pos - backup->bufferaddress); + data->pos = data->writableBuffer.getTerminatedBuffer() + (data->writableBuffer.length() - temp); } } if ((data->flags & UCOL_ITER_INNORMBUF) == 0) { @@ -211,6 +238,50 @@ inline void loadState(collIterate *data, const collIterateState *backup, } } +static UBool +reallocCEs(collIterate *data, int32_t newCapacity) { + uint32_t *oldCEs = data->extendCEs; + if(oldCEs == NULL) { + oldCEs = data->CEs; + } + int32_t length = data->CEpos - oldCEs; + uint32_t *newCEs = (uint32_t *)uprv_malloc(newCapacity * 4); + if(newCEs == NULL) { + return FALSE; + } + uprv_memcpy(newCEs, oldCEs, length * 4); + uprv_free(data->extendCEs); + data->extendCEs = newCEs; + data->extendCEsSize = newCapacity; + data->CEpos = newCEs + length; + return TRUE; +} + +static UBool +increaseCEsCapacity(collIterate *data) { + int32_t oldCapacity; + if(data->extendCEs != NULL) { + oldCapacity = data->extendCEsSize; + } else { + oldCapacity = LENGTHOF(data->CEs); + } + return reallocCEs(data, 2 * oldCapacity); +} + +static UBool +ensureCEsCapacity(collIterate *data, int32_t minCapacity) { + int32_t oldCapacity; + if(data->extendCEs != NULL) { + oldCapacity = data->extendCEsSize; + } else { + oldCapacity = LENGTHOF(data->CEs); + } + if(minCapacity <= oldCapacity) { + return TRUE; + } + oldCapacity *= 2; + return reallocCEs(data, minCapacity > oldCapacity ? minCapacity : oldCapacity); +} /* * collIter_eos() @@ -291,20 +362,6 @@ inline UBool collIter_SimpleBos(collIterate *source) { //return (data->pos == data->string) || -/** -* Checks and free writable buffer if it is not the original stack buffer -* in collIterate. This function does not reassign the writable buffer. -* @param data collIterate struct to determine and free the writable buffer -*/ -static -inline void freeHeapWritableBuffer(collIterate *data) -{ - if (data->writableBuffer != data->stackWritableBuffer) { - uprv_free(data->writableBuffer); - } -} - - /****************************************************************************/ /* Following are the open/close functions */ /* */ @@ -1190,44 +1247,20 @@ static void collIterNormalize(collIterate *collationSource) { UErrorCode status = U_ZERO_ERROR; + const UChar *srcP = collationSource->pos - 1; /* Start of chars to normalize */ + const UChar *endP = collationSource->fcdPosition; /* End of region to normalize+1 */ - int32_t normLen; - UChar *srcP = collationSource->pos - 1; /* Start of chars to normalize */ - UChar *endP = collationSource->fcdPosition; /* End of region to normalize+1 */ - - normLen = unorm_decompose(collationSource->writableBuffer, (int32_t)collationSource->writableBufSize, - srcP, (int32_t)(endP - srcP), - FALSE, 0, - &status); - if(status == U_BUFFER_OVERFLOW_ERROR || status == U_STRING_NOT_TERMINATED_WARNING) { - // reallocate and terminate - if(!u_growBufferFromStatic(collationSource->stackWritableBuffer, - &collationSource->writableBuffer, - (int32_t *)&collationSource->writableBufSize, normLen + 1, - 0) - ) { -#ifdef UCOL_DEBUG - fprintf(stderr, "collIterNormalize(), out of memory\n"); -#endif - return; - } - status = U_ZERO_ERROR; - normLen = unorm_decompose(collationSource->writableBuffer, (int32_t)collationSource->writableBufSize, - srcP, (int32_t)(endP - srcP), - FALSE, 0, - &status); - } + collationSource->nfd->normalize(UnicodeString(FALSE, srcP, (int32_t)(endP - srcP)), + collationSource->writableBuffer, + status); if (U_FAILURE(status)) { #ifdef UCOL_DEBUG - fprintf(stderr, "collIterNormalize(), unorm_decompose() failed, status = %s\n", u_errorName(status)); + fprintf(stderr, "collIterNormalize(), NFD failed, status = %s\n", u_errorName(status)); #endif return; } - if(collationSource->writableBuffer != collationSource->stackWritableBuffer) { - collationSource->flags |= UCOL_ITER_ALLOCATED; - } - collationSource->pos = collationSource->writableBuffer; + collationSource->pos = collationSource->writableBuffer.getTerminatedBuffer(); collationSource->origFlags = collationSource->flags; collationSource->flags |= UCOL_ITER_INNORMBUF; collationSource->flags &= ~(UCOL_ITER_NORM | UCOL_ITER_HASLEN | UCOL_USE_ITERATOR); @@ -1415,7 +1448,7 @@ inline uint32_t ucol_IGetNextCE(const UCollator *coll, collIterate *collationSou // Usually this means the end of the normalized data, // except for one odd case: a null followed by combining chars, // which is the case if we are at the start of the buffer. - if (collationSource->pos == collationSource->writableBuffer+1) { + if (collationSource->pos == collationSource->writableBuffer.getBuffer()+1) { break; } @@ -1554,10 +1587,8 @@ static void collPrevIterNormalize(collIterate *data) { UErrorCode status = U_ZERO_ERROR; - UChar *pEnd = data->pos; /* End normalize + 1 */ - UChar *pStart; - uint32_t normLen; - UChar *pStartNorm; + const UChar *pEnd = data->pos; /* End normalize + 1 */ + const UChar *pStart; /* Start normalize */ if (data->fcdPosition == NULL) { @@ -1567,30 +1598,19 @@ void collPrevIterNormalize(collIterate *data) pStart = data->fcdPosition + 1; } - normLen = unorm_normalize(pStart, (int32_t)((pEnd - pStart) + 1), UNORM_NFD, 0, - data->writableBuffer, 0, &status); - - if (data->writableBufSize <= normLen) { - freeHeapWritableBuffer(data); - data->writableBuffer = (UChar *)uprv_malloc((normLen + 1) * - sizeof(UChar)); - if(data->writableBuffer == NULL) { // something is wrong here, return - data->writableBufSize = 0; // Reset writableBufSize - return; - } - data->flags |= UCOL_ITER_ALLOCATED; - /* to handle the zero termination */ - data->writableBufSize = normLen + 1; + int32_t normLen = + data->nfd->normalize(UnicodeString(FALSE, pStart, (int32_t)((pEnd - pStart) + 1)), + data->writableBuffer, + status). + length(); + if(U_FAILURE(status)) { + return; } - status = U_ZERO_ERROR; /* this puts the null termination infront of the normalized string instead of the end */ - pStartNorm = data->writableBuffer + (data->writableBufSize - normLen); - *(pStartNorm - 1) = 0; - unorm_normalize(pStart, (int32_t)((pEnd - pStart) + 1), UNORM_NFD, 0, pStartNorm, - normLen, &status); + data->writableBuffer.insert(0, (UChar)0); if (data->offsetBuffer == NULL) { int32_t len = normLen >= UCOL_EXPAND_CE_BUFFER_SIZE ? normLen + 1 : UCOL_EXPAND_CE_BUFFER_SIZE; @@ -1598,7 +1618,7 @@ void collPrevIterNormalize(collIterate *data) data->offsetBufferSize = len; data->offsetBuffer = (int32_t *) uprv_malloc(sizeof(int32_t) * len); data->offsetStore = data->offsetBuffer; - } else if(data->offsetBufferSize < (int32_t) normLen) { + } else if(data->offsetBufferSize < normLen) { int32_t storeIX = (int32_t)(data->offsetStore - data->offsetBuffer); int32_t *tob = (int32_t *) uprv_realloc(data->offsetBuffer, sizeof(int32_t) * (normLen + 1)); @@ -1669,7 +1689,7 @@ void collPrevIterNormalize(collIterate *data) data->offsetStore = data->offsetBuffer; } - data->pos = data->writableBuffer + data->writableBufSize; + data->pos = data->writableBuffer.getTerminatedBuffer() + 1 + normLen; data->origFlags = data->flags; data->flags |= UCOL_ITER_INNORMBUF; data->flags &= ~(UCOL_ITER_NORM | UCOL_ITER_HASLEN); @@ -2019,92 +2039,40 @@ ucol_getPrevCE(const UCollator *coll, collIterate *data, U_CFUNC uint32_t U_EXPORT2 ucol_getFirstCE(const UCollator *coll, UChar u, UErrorCode *status) { collIterate colIt; - uint32_t order; - IInit_collIterate(coll, &u, 1, &colIt); - order = ucol_IGetNextCE(coll, &colIt, status); - /*UCOL_GETNEXTCE(order, coll, colIt, status);*/ - return order; + IInit_collIterate(coll, &u, 1, &colIt, status); + if(U_FAILURE(*status)) { + return 0; + } + return ucol_IGetNextCE(coll, &colIt, status); } /** * Inserts the argument character into the end of the buffer pushing back the * null terminator. * @param data collIterate struct data -* @param pNull pointer to the null termination * @param ch character to be appended * @return the position of the new addition */ static -inline UChar * insertBufferEnd(collIterate *data, UChar *pNull, UChar ch) +inline const UChar * insertBufferEnd(collIterate *data, UChar ch) { - uint32_t size = data->writableBufSize; - UChar *newbuffer; - static const uint32_t INCSIZE = 5; - - if ((data->writableBuffer + size) > (pNull + 1)) { - *pNull = ch; - *(pNull + 1) = 0; - return pNull; - } - - /* - buffer will always be null terminated at the end. - giving extra space since it is likely that more characters will be added. - */ - size += INCSIZE; - newbuffer = (UChar *)uprv_malloc(sizeof(UChar) * size); - if(newbuffer != NULL) { // something wrong, but no status - uprv_memcpy(newbuffer, data->writableBuffer, - data->writableBufSize * sizeof(UChar)); - - freeHeapWritableBuffer(data); - data->writableBufSize = size; - data->writableBuffer = newbuffer; - - newbuffer = newbuffer + data->writableBufSize; - *newbuffer = ch; - *(newbuffer + 1) = 0; - } - return newbuffer; + int32_t oldLength = data->writableBuffer.length(); + return data->writableBuffer.append(ch).getTerminatedBuffer() + oldLength; } /** * Inserts the argument string into the end of the buffer pushing back the * null terminator. * @param data collIterate struct data -* @param pNull pointer to the null termination * @param string to be appended * @param length of the string to be appended * @return the position of the new addition */ static -inline UChar * insertBufferEnd(collIterate *data, UChar *pNull, UChar *str, - int32_t length) +inline const UChar * insertBufferEnd(collIterate *data, const UChar *str, int32_t length) { - uint32_t size = (uint32_t)(pNull - data->writableBuffer); - UChar *newbuffer; - - if (data->writableBuffer + data->writableBufSize > pNull + length + 1) { - uprv_memcpy(pNull, str, length * sizeof(UChar)); - *(pNull + length) = 0; - return pNull; - } - - /* - buffer will always be null terminated at the end. - giving extra space since it is likely that more characters will be added. - */ - newbuffer = (UChar *)uprv_malloc(sizeof(UChar) * (size + length + 1)); - if(newbuffer != NULL) { - uprv_memcpy(newbuffer, data->writableBuffer, size * sizeof(UChar)); - uprv_memcpy(newbuffer + size, str, length * sizeof(UChar)); - - freeHeapWritableBuffer(data); - data->writableBufSize = size + length + 1; - data->writableBuffer = newbuffer; - } - - return newbuffer; + int32_t oldLength = data->writableBuffer.length(); + return data->writableBuffer.append(str, length).getTerminatedBuffer() + oldLength; } /** @@ -2119,50 +2087,29 @@ inline UChar * insertBufferEnd(collIterate *data, UChar *pNull, UChar *str, static inline void normalizeNextContraction(collIterate *data) { - UChar *buffer = data->writableBuffer; - uint32_t buffersize = data->writableBufSize; - uint32_t strsize; + int32_t strsize; UErrorCode status = U_ZERO_ERROR; /* because the pointer points to the next character */ - UChar *pStart = data->pos - 1; - UChar *pEnd; - uint32_t normLen; - UChar *pStartNorm; + const UChar *pStart = data->pos - 1; + const UChar *pEnd; if ((data->flags & UCOL_ITER_INNORMBUF) == 0) { - *data->writableBuffer = *(pStart - 1); + data->writableBuffer.setTo(*(pStart - 1)); strsize = 1; } else { - strsize = u_strlen(data->writableBuffer); + strsize = data->writableBuffer.length(); } pEnd = data->fcdPosition; - normLen = unorm_normalize(pStart, (int32_t)(pEnd - pStart), UNORM_NFD, 0, buffer, 0, - &status); - - if (buffersize <= normLen + strsize) { - uint32_t size = strsize + normLen + 1; - UChar *temp = (UChar *)uprv_malloc(size * sizeof(UChar)); - if(temp != NULL) { - uprv_memcpy(temp, buffer, sizeof(UChar) * strsize); - freeHeapWritableBuffer(data); - data->writableBuffer = temp; - data->writableBufSize = size; - data->flags |= UCOL_ITER_ALLOCATED; - } else { - return; // Avoid writing past bound of buffer->writableBuffer. - } + data->writableBuffer.append( + data->nfd->normalize(UnicodeString(FALSE, pStart, (int32_t)(pEnd - pStart)), status)); + if(U_FAILURE(status)) { + return; } - status = U_ZERO_ERROR; - pStartNorm = buffer + strsize; - /* null-termination will be added here */ - unorm_normalize(pStart, (int32_t)(pEnd - pStart), UNORM_NFD, 0, pStartNorm, - normLen + 1, &status); - - data->pos = data->writableBuffer + strsize; + data->pos = data->writableBuffer.getTerminatedBuffer() + strsize; data->origFlags = data->flags; data->flags |= UCOL_ITER_INNORMBUF; data->flags &= ~(UCOL_ITER_NORM | UCOL_ITER_HASLEN); @@ -2202,7 +2149,6 @@ inline UChar getNextNormalizedChar(collIterate *data) //normalizeIterator(data); //} - UChar *pEndWritableBuffer = NULL; UBool innormbuf = (UBool)(data->flags & UCOL_ITER_INNORMBUF); if ((innormbuf && *data->pos != 0) || (data->fcdPosition != NULL && !innormbuf && @@ -2241,15 +2187,13 @@ inline UChar getNextNormalizedChar(collIterate *data) if (*(data->fcdPosition + 1) == 0 || data->fcdPosition + 1 == data->endp) { /* at the end of the string, dump it into the normalizer */ - data->pos = insertBufferEnd(data, data->pos, - *(data->fcdPosition)) + 1; + data->pos = insertBufferEnd(data, *(data->fcdPosition)) + 1; // Check if data->pos received a null pointer if (data->pos == NULL) { return (UChar)-1; // Return to indicate error. } return *(data->fcdPosition ++); } - pEndWritableBuffer = data->pos; data->pos = data->fcdPosition; } else if(data->origFlags & UCOL_USE_ITERATOR) { // if we are here, we're using a normalizing iterator. @@ -2290,8 +2234,7 @@ inline UChar getNextNormalizedChar(collIterate *data) don't input the rest into this, we'll get the wrong position when we reach the end of the writableBuffer */ int32_t length = (int32_t)(data->fcdPosition - data->pos + 1); - data->pos = insertBufferEnd(data, pEndWritableBuffer, - data->pos - 1, length); + data->pos = insertBufferEnd(data, data->pos - 1, length); // Check if data->pos received a null pointer if (data->pos == NULL) { return (UChar)-1; // Return to indicate error. @@ -2305,7 +2248,7 @@ inline UChar getNextNormalizedChar(collIterate *data) no normalization is to be done hence only one character will be appended to the buffer. */ - data->pos = insertBufferEnd(data, pEndWritableBuffer, ch) + 1; + data->pos = insertBufferEnd(data, ch) + 1; // Check if data->pos received a null pointer if (data->pos == NULL) { return (UChar)-1; // Return to indicate error. @@ -2323,11 +2266,9 @@ inline UChar getNextNormalizedChar(collIterate *data) * the correct position * @param source data string source * @param buffer character buffer -* @param tempdb current position in buffer that has been used up */ static -inline void setDiscontiguosAttribute(collIterate *source, UChar *buffer, - UChar *tempdb) +inline void setDiscontiguosAttribute(collIterate *source, const UnicodeString &buffer) { /* okay confusing part here. to ensure that the skipped characters are considered later, we need to place it in the appropriate position in the @@ -2338,30 +2279,19 @@ inline void setDiscontiguosAttribute(collIterate *source, UChar *buffer, to the start of the normalization buffer. why am i doing these copies? well, so that the whole chunk of codes in the getNextCE, ucol_prv_getSpecialCE does not require any changes, which be really painful. */ - uint32_t length = u_strlen(buffer);; if (source->flags & UCOL_ITER_INNORMBUF) { - u_strcpy(tempdb, source->pos); + int32_t replaceLength = source->pos - source->writableBuffer.getBuffer(); + source->writableBuffer.replace(0, replaceLength, buffer); } else { source->fcdPosition = source->pos; source->origFlags = source->flags; source->flags |= UCOL_ITER_INNORMBUF; source->flags &= ~(UCOL_ITER_NORM | UCOL_ITER_HASLEN | UCOL_USE_ITERATOR); + source->writableBuffer = buffer; } - if (length >= source->writableBufSize) { - freeHeapWritableBuffer(source); - source->writableBuffer = - (UChar *)uprv_malloc((length + 1) * sizeof(UChar)); - if(source->writableBuffer == NULL) { - source->writableBufSize = 0; // Reset size - return; - } - source->writableBufSize = length; - } - - u_strcpy(source->writableBuffer, buffer); - source->pos = source->writableBuffer; + source->pos = source->writableBuffer.getTerminatedBuffer(); } /** @@ -2378,20 +2308,16 @@ uint32_t getDiscontiguous(const UCollator *coll, collIterate *source, { /* source->pos currently points to the second combining character after the start character */ - UChar *temppos = source->pos; - UChar buffer[4*UCOL_MAX_BUFFER]; - UChar *tempdb = buffer; + const UChar *temppos = source->pos; + UnicodeString buffer; const UChar *tempconstart = constart; uint8_t tempflags = source->flags; UBool multicontraction = FALSE; - UChar *tempbufferpos = 0; collIterateState discState; backupState(source, &discState); - //*tempdb = *(source->pos - 1); - *tempdb = peekCharacter(source, -1); - tempdb++; + buffer.setTo(peekCharacter(source, -1)); for (;;) { UChar *UCharOffset; UChar schar, @@ -2413,9 +2339,8 @@ uint32_t getDiscontiguous(const UCollator *coll, collIterate *source, //u_getCombiningClass(*(source->pos)) == 0) { //constart = (UChar *)coll->image + getContractOffset(CE); if (multicontraction) { - *tempbufferpos = 0; source->pos = temppos - 1; - setDiscontiguosAttribute(source, buffer, tempdb); + setDiscontiguosAttribute(source, buffer); return *(coll->contractionCEs + (tempconstart - coll->contractionIndex)); } @@ -2433,22 +2358,19 @@ uint32_t getDiscontiguous(const UCollator *coll, collIterate *source, if (schar != tchar) { /* not the correct codepoint. we stuff the current codepoint into the discontiguos buffer and try the next character */ - *tempdb = schar; - tempdb ++; + buffer.append(schar); continue; } else { if (u_getCombiningClass(schar) == u_getCombiningClass(peekCharacter(source, -2))) { //u_getCombiningClass(*(source->pos - 2))) { - *tempdb = schar; - tempdb ++; + buffer.append(schar); continue; } result = *(coll->contractionCEs + (UCharOffset - coll->contractionIndex)); } - *tempdb = 0; if (result == UCOL_NOT_FOUND) { break; @@ -2459,10 +2381,9 @@ uint32_t getDiscontiguous(const UCollator *coll, collIterate *source, != UCOL_NOT_FOUND) { multicontraction = TRUE; temppos = source->pos + 1; - tempbufferpos = buffer + u_strlen(buffer); } } else { - setDiscontiguosAttribute(source, buffer, tempdb); + setDiscontiguosAttribute(source, buffer); return result; } } @@ -2503,44 +2424,12 @@ inline uint32_t getImplicit(UChar32 cp, collIterate *collationSource) { * Inserts the argument character into the front of the buffer replacing the * front null terminator. * @param data collation element iterator data -* @param pNull pointer to the null terminator * @param ch character to be appended -* @return positon of added character */ static -inline UChar * insertBufferFront(collIterate *data, UChar *pNull, UChar ch) +inline void insertBufferFront(collIterate *data, UChar ch) { - uint32_t size = data->writableBufSize; - UChar *end; - UChar *newbuffer; - static const uint32_t INCSIZE = 5; - - if (pNull > data->writableBuffer + 1) { - *pNull = ch; - *(pNull - 1) = 0; - return pNull; - } - - /* - buffer will always be null terminated infront. - giving extra space since it is likely that more characters will be added. - */ - size += INCSIZE; - newbuffer = (UChar *)uprv_malloc(sizeof(UChar) * size); - if(newbuffer == NULL) { - return NULL; - } - end = newbuffer + INCSIZE; - uprv_memcpy(end, data->writableBuffer, - data->writableBufSize * sizeof(UChar)); - *end = ch; - *(end - 1) = 0; - - freeHeapWritableBuffer(data); - - data->writableBufSize = size; - data->writableBuffer = newbuffer; - return end; + data->pos = data->writableBuffer.setCharAt(0, ch).insert(0, (UChar)0).getTerminatedBuffer() + 2; } /** @@ -2555,65 +2444,42 @@ inline UChar * insertBufferFront(collIterate *data, UChar *pNull, UChar ch) static inline void normalizePrevContraction(collIterate *data, UErrorCode *status) { - uint32_t nulltermsize; - UErrorCode localstatus = U_ZERO_ERROR; - UChar *pEnd = data->pos + 1; /* End normalize + 1 */ - UChar *pStart; - uint32_t normLen; - UChar *pStartNorm; + const UChar *pEnd = data->pos + 1; /* End normalize + 1 */ + const UChar *pStart; + UnicodeString endOfBuffer; if (data->flags & UCOL_ITER_HASLEN) { /* normalization buffer not used yet, we'll pull down the next character into the end of the buffer */ - *(data->writableBuffer + (data->writableBufSize - 1)) = *(data->pos + 1); - nulltermsize = data->writableBufSize - 1; + endOfBuffer.setTo(*pEnd); } else { - nulltermsize = data->writableBufSize; - UChar *temp = data->writableBuffer + (nulltermsize - 1); - while (*(temp --) != 0) { - nulltermsize --; - } + endOfBuffer.setTo(data->writableBuffer, 1); // after the leading NUL } - /* Start normalize */ if (data->fcdPosition == NULL) { pStart = data->string; } else { pStart = data->fcdPosition + 1; } - - normLen = unorm_normalize(pStart, (int32_t)(pEnd - pStart), UNORM_NFD, 0, data->writableBuffer, 0, - &localstatus); - - if (nulltermsize <= normLen) { - uint32_t size = data->writableBufSize - nulltermsize + normLen + 1; - UChar *temp = (UChar *)uprv_malloc(size * sizeof(UChar)); - if (temp == NULL) { - *status = U_MEMORY_ALLOCATION_ERROR; - return; - } - nulltermsize = normLen + 1; - uprv_memcpy(temp + normLen, data->writableBuffer, - sizeof(UChar) * (data->writableBufSize - nulltermsize)); - freeHeapWritableBuffer(data); - data->writableBuffer = temp; - data->writableBufSize = size; + int32_t normLen = + data->nfd->normalize(UnicodeString(FALSE, pStart, (int32_t)(pEnd - pStart)), + data->writableBuffer, + *status). + length(); + if(U_FAILURE(*status)) { + return; } - /* this puts the null termination infront of the normalized string instead of the end */ - pStartNorm = data->writableBuffer + (nulltermsize - normLen); - *(pStartNorm - 1) = 0; - unorm_normalize(pStart, (int32_t)(pEnd - pStart), UNORM_NFD, 0, pStartNorm, normLen, - status); - - data->pos = data->writableBuffer + nulltermsize; + data->pos = + data->writableBuffer.insert(0, (UChar)0).append(endOfBuffer).getTerminatedBuffer() + + 1 + normLen; data->origFlags = data->flags; data->flags |= UCOL_ITER_INNORMBUF; data->flags &= ~(UCOL_ITER_NORM | UCOL_ITER_HASLEN); @@ -2637,9 +2503,8 @@ inline UChar getPrevNormalizedChar(collIterate *data, UErrorCode *status) { UChar prevch; UChar ch; - UChar *start; + const UChar *start; UBool innormbuf = (UBool)(data->flags & UCOL_ITER_INNORMBUF); - UChar *pNull = NULL; if ((data->flags & (UCOL_ITER_NORM | UCOL_ITER_INNORMBUF)) == 0 || (innormbuf && *(data->pos - 1) != 0)) { /* @@ -2672,11 +2537,10 @@ inline UChar getPrevNormalizedChar(collIterate *data, UErrorCode *status) */ if (data->fcdPosition == data->string) { /* at the start of the string, just dump it into the normalizer */ - insertBufferFront(data, data->pos - 1, *(data->fcdPosition)); + insertBufferFront(data, *(data->fcdPosition)); data->fcdPosition = NULL; return *(data->pos - 1); } - pNull = data->pos - 1; start = data->fcdPosition; ch = *start; prevch = *(start - 1); @@ -2692,7 +2556,7 @@ inline UChar getPrevNormalizedChar(collIterate *data, UErrorCode *status) Need a more complete FCD check and possible normalization. normalize substring will be appended to buffer */ - UChar *backuppos = data->pos; + const UChar *backuppos = data->pos; data->pos = start; if (collPrevIterFCD(data)) { normalizePrevContraction(data, status); @@ -2707,7 +2571,7 @@ inline UChar getPrevNormalizedChar(collIterate *data, UErrorCode *status) no normalization is to be done hence only one character will be appended to the buffer. */ - insertBufferFront(data, pNull, ch); + insertBufferFront(data, ch); data->fcdPosition --; } @@ -3251,18 +3115,21 @@ uint32_t ucol_prv_getSpecialCE(const UCollator *coll, UChar ch, uint32_t CE, col source->pos = NULL; } // Move Jamos into normalization buffer - source->writableBuffer[0] = (UChar)L; - source->writableBuffer[1] = (UChar)V; + UChar *buffer = source->writableBuffer.getBuffer(4); + int32_t bufferLength; + buffer[0] = (UChar)L; + buffer[1] = (UChar)V; if (T != TBase) { - source->writableBuffer[2] = (UChar)T; - source->writableBuffer[3] = 0; + buffer[2] = (UChar)T; + bufferLength = 3; } else { - source->writableBuffer[2] = 0; + bufferLength = 2; } + source->writableBuffer.releaseBuffer(bufferLength); source->fcdPosition = source->pos; // Indicate where to continue in main input string // after exhausting the writableBuffer - source->pos = source->writableBuffer; + source->pos = source->writableBuffer.getTerminatedBuffer(); source->origFlags = source->flags; source->flags |= UCOL_ITER_INNORMBUF; source->flags &= ~(UCOL_ITER_NORM | UCOL_ITER_HASLEN); @@ -3490,7 +3357,7 @@ uint32_t ucol_prv_getSpecialPrevCE(const UCollator *coll, UChar ch, uint32_t CE, break; } - case CONTRACTION_TAG: + case CONTRACTION_TAG: { /* to ensure that the backwards and forwards iteration matches, we take the current region of most possible match and pass it through the forward iteration. this will ensure that the obstinate problem of @@ -3569,7 +3436,10 @@ uint32_t ucol_prv_getSpecialPrevCE(const UCollator *coll, UChar ch, uint32_t CE, collIterate temp; int32_t rawOffset; - IInit_collIterate(coll, UCharOffset, noChars, &temp); + IInit_collIterate(coll, UCharOffset, noChars, &temp, status); + if(U_FAILURE(*status)) { + return UCOL_NULLORDER; + } temp.flags &= ~UCOL_ITER_NORM; temp.flags |= source->flags & UCOL_FORCE_HAN_IMPLICIT; @@ -3603,33 +3473,8 @@ uint32_t ucol_prv_getSpecialPrevCE(const UCollator *coll, UChar ch, uint32_t CE, If reallocation fails, reset pointers and bail out, there's no guarantee of the right character position after this bail*/ - if (source->extendCEs == NULL) { - source->extendCEs = (uint32_t *)uprv_malloc(sizeof(uint32_t) * - (source->extendCEsSize =UCOL_EXPAND_CE_BUFFER_SIZE + UCOL_EXPAND_CE_BUFFER_EXTEND_SIZE)); - if (source->extendCEs == NULL) { - // Handle error later. - CECount = -1; - } else { - source->extendCEs = (uint32_t *)uprv_memcpy(source->extendCEs, source->CEs, UCOL_EXPAND_CE_BUFFER_SIZE * sizeof(uint32_t)); - } - } else { - uint32_t *tempBufCE = (uint32_t *)uprv_realloc(source->extendCEs, - sizeof(uint32_t) * (source->extendCEsSize += UCOL_EXPAND_CE_BUFFER_EXTEND_SIZE)); - if (tempBufCE == NULL) { - // Handle error later. - CECount = -1; - } - else { - source->extendCEs = tempBufCE; - } - } - - if (CECount == -1) { + if (!increaseCEsCapacity(source)) { *status = U_MEMORY_ALLOCATION_ERROR; - source->extendCEsSize = 0; - source->CEpos = source->CEs; - freeHeapWritableBuffer(&temp); - if (strbuffer != buffer) { uprv_free(strbuffer); } @@ -3637,7 +3482,6 @@ uint32_t ucol_prv_getSpecialPrevCE(const UCollator *coll, UChar ch, uint32_t CE, return (uint32_t)UCOL_NULLORDER; } - source->CEpos = source->extendCEs + CECount; endCEBuffer = source->extendCEs + source->extendCEsSize; } @@ -3654,7 +3498,6 @@ uint32_t ucol_prv_getSpecialPrevCE(const UCollator *coll, UChar ch, uint32_t CE, // memory error... *status = U_MEMORY_ALLOCATION_ERROR; source->CEpos = source->CEs; - freeHeapWritableBuffer(&temp); if (strbuffer != buffer) { uprv_free(strbuffer); @@ -3682,8 +3525,6 @@ uint32_t ucol_prv_getSpecialPrevCE(const UCollator *coll, UChar ch, uint32_t CE, } } - freeHeapWritableBuffer(&temp); - if (strbuffer != buffer) { uprv_free(strbuffer); } @@ -3701,7 +3542,7 @@ uint32_t ucol_prv_getSpecialPrevCE(const UCollator *coll, UChar ch, uint32_t CE, } return *(source->toReturn); - + } case LONG_PRIMARY_TAG: { *(source->CEpos++) = ((CE & 0xFFFF00) << 8) | (UCOL_BYTE_COMMON << 8) | UCOL_BYTE_COMMON; @@ -3810,7 +3651,6 @@ uint32_t ucol_prv_getSpecialPrevCE(const UCollator *coll, UChar ch, uint32_t CE, We do a check to see if we want to collate digits as numbers; if so we generate a custom collation key. Otherwise we pull out the value stored in the expansion table. */ - //uint32_t size; uint32_t i; /* general counter */ if (source->coll->numericCollation == UCOL_ON){ @@ -4013,8 +3853,11 @@ uint32_t ucol_prv_getSpecialPrevCE(const UCollator *coll, UChar ch, uint32_t CE, numTempBuf[1] = (uint8_t)(0x80 + (exponent & 0x7F)); // Now transfer the collation key to our collIterate struct. - // The total size for our collation key is endIndx bumped up to the next largest even value divided by two. - //size = ((endIndex+1) & ~1)/2; + // The total size for our collation key is half of endIndex, rounded up. + int32_t size = (endIndex+1)/2; + if(!ensureCEsCapacity(source, size)) { + return UCOL_NULLORDER; + } *(source->CEpos++) = (((numTempBuf[0] << 8) | numTempBuf[1]) << UCOL_PRIMARYORDERSHIFT) | //Primary weight (UCOL_BYTE_COMMON << UCOL_SECONDARYORDERSHIFT) | // Secondary weight UCOL_BYTE_COMMON; // Tertiary weight. @@ -4101,19 +3944,18 @@ uint32_t ucol_prv_getSpecialPrevCE(const UCollator *coll, UChar ch, uint32_t CE, Move the Jamos into the normalization buffer */ - UChar *tempbuffer = source->writableBuffer + - (source->writableBufSize - 1); - *(tempbuffer) = 0; + UChar *tempbuffer = source->writableBuffer.getBuffer(5); + int32_t tempbufferLength; + tempbuffer[0] = 0; + tempbuffer[1] = (UChar)L; + tempbuffer[2] = (UChar)V; if (T != TBase) { - *(tempbuffer - 1) = (UChar)T; - *(tempbuffer - 2) = (UChar)V; - *(tempbuffer - 3) = (UChar)L; - *(tempbuffer - 4) = 0; + tempbuffer[3] = (UChar)T; + tempbufferLength = 4; } else { - *(tempbuffer - 1) = (UChar)V; - *(tempbuffer - 2) = (UChar)L; - *(tempbuffer - 3) = 0; + tempbufferLength = 3; } + source->writableBuffer.releaseBuffer(tempbufferLength); /* Indicate where to continue in main input string after exhausting @@ -4125,7 +3967,7 @@ uint32_t ucol_prv_getSpecialPrevCE(const UCollator *coll, UChar ch, uint32_t CE, source->fcdPosition = source->pos-1; } - source->pos = tempbuffer; + source->pos = source->writableBuffer.getTerminatedBuffer() + tempbufferLength; source->origFlags = source->flags; source->flags |= UCOL_ITER_INNORMBUF; source->flags &= ~(UCOL_ITER_NORM | UCOL_ITER_HASLEN); @@ -4154,12 +3996,12 @@ uint32_t ucol_prv_getSpecialPrevCE(const UCollator *coll, UChar ch, uint32_t CE, { UChar32 cp = 0; UChar prevChar; - UChar *prev; + const UChar *prev; if (isAtStartPrevIterate(source)) { /* we are at the start of the string, wrong place to be at */ return 0; } - if (source->pos != source->writableBuffer) { + if (source->pos != source->writableBuffer.getBuffer()) { prev = source->pos - 1; } else { prev = source->fcdPosition; @@ -4789,9 +4631,7 @@ ucol_calcSortKey(const UCollator *coll, uint32_t sortKeySize = 1; /* it is always \0 terminated */ - UChar normBuffer[UCOL_NORMALIZATION_MAX_BUFFER]; - UChar *normSource = normBuffer; - int32_t normSourceLen = UCOL_NORMALIZATION_MAX_BUFFER; + UnicodeString normSource; int32_t len = (sourceLength == -1 ? u_strlen(source) : sourceLength); @@ -4827,52 +4667,36 @@ ucol_calcSortKey(const UCollator *coll, sortKeySize += ((compareSec?0:1) + (compareTer?0:1) + (doCase?1:0) + /*(qShifted?1:0)*/(compareQuad?0:1) + (compareIdent?1:0)); /* If we need to normalize, we'll do it all at once at the beginning! */ - UNormalizationMode normMode; + const Normalizer2 *norm2; if(compareIdent) { - normMode = UNORM_NFD; + norm2 = Normalizer2Factory::getNFDInstance(*status); } else if(coll->normalizationMode != UCOL_OFF) { - normMode = UNORM_FCD; + norm2 = Normalizer2Factory::getFCDInstance(*status); } else { - normMode = UNORM_NONE; + norm2 = NULL; } - - if(normMode != UNORM_NONE && UNORM_YES != unorm_quickCheck(source, len, normMode, status)) { - len = unorm_internalNormalize(normSource, normSourceLen, - source, len, - normMode, FALSE, - status); - if(*status == U_BUFFER_OVERFLOW_ERROR) { - normSourceLen = len; - normSource = (UChar *)uprv_malloc(len*U_SIZEOF_UCHAR); - if(normSource == NULL) { - *status = U_MEMORY_ALLOCATION_ERROR; - return 0; - } - *status = U_ZERO_ERROR; - len = unorm_internalNormalize(normSource, normSourceLen, - source, len, - normMode, FALSE, - status); + if(norm2 != NULL) { + normSource.setTo(FALSE, source, len); + int32_t qcYesLength = norm2->spanQuickCheckYes(normSource, *status); + if(qcYesLength != len) { + UnicodeString unnormalized = normSource.tempSubString(qcYesLength); + normSource.truncate(qcYesLength); + norm2->normalizeSecondAndAppend(normSource, unnormalized, *status); + source = normSource.getBuffer(); + len = normSource.length(); } - - if(U_FAILURE(*status)) { - return 0; - } - source = normSource; } - collIterate s; - IInit_collIterate(coll, (UChar *)source, len, &s); - if(source == normSource) { + IInit_collIterate(coll, source, len, &s, status); + if(U_FAILURE(*status)) { + return 0; + } + if(source == normSource.getBuffer()) { s.flags &= ~UCOL_ITER_NORM; } if(resultLength == 0 || primaries == NULL) { - int32_t keyLen = ucol_getSortKeySize(coll, &s, sortKeySize, strength, len); - if(normSource != normBuffer) { - uprv_free(normSource); - } - return keyLen; + return ucol_getSortKeySize(coll, &s, sortKeySize, strength, len); } uint8_t *primarySafeEnd = primaries + resultLength - 1; if(strength > UCOL_PRIMARY) { @@ -5140,8 +4964,13 @@ ucol_calcSortKey(const UCollator *coll, if(primaries > primarySafeEnd) { /* We have stepped over the primary buffer */ if(allocateSKBuffer == FALSE) { /* need to save our butts if we cannot reallocate */ - IInit_collIterate(coll, (UChar *)source, len, &s); - if(source == normSource) { + IInit_collIterate(coll, (UChar *)source, len, &s, status); + if(U_FAILURE(*status)) { + sortKeySize = DEFAULT_ERROR_SIZE_FOR_CALCSORTKEY; + finished = TRUE; + break; + } + if(source == normSource.getBuffer()) { s.flags &= ~UCOL_ITER_NORM; } sortKeySize = ucol_getSortKeySize(coll, &s, sortKeySize, strength, len); @@ -5409,10 +5238,6 @@ cleanup: /* To avoid memory leak, free the offset buffer if necessary. */ ucol_freeOffsetBuffer(&s); - if(normSource != normBuffer) { - uprv_free(normSource); - } - return sortKeySize; } @@ -5449,55 +5274,34 @@ ucol_calcSortKeySimpleTertiary(const UCollator *coll, uint32_t sortKeySize = 3; /* it is always \0 terminated plus separators for secondary and tertiary */ - UChar normBuffer[UCOL_NORMALIZATION_MAX_BUFFER]; - UChar *normSource = normBuffer; - int32_t normSourceLen = UCOL_NORMALIZATION_MAX_BUFFER; + UnicodeString normSource; int32_t len = sourceLength; /* If we need to normalize, we'll do it all at once at the beginning! */ - if(coll->normalizationMode != UCOL_OFF && UNORM_YES != unorm_quickCheck(source, len, UNORM_FCD, status)) { - len = unorm_internalNormalize(normSource, normSourceLen, - source, len, - UNORM_FCD, FALSE, - status); - if(*status == U_BUFFER_OVERFLOW_ERROR) { - normSourceLen = len; - normSource = (UChar *)uprv_malloc(len*U_SIZEOF_UCHAR); - if(normSource == NULL) { - *status = U_MEMORY_ALLOCATION_ERROR; - return 0; - } - *status = U_ZERO_ERROR; - len = unorm_internalNormalize(normSource, normSourceLen, - source, len, - UNORM_FCD, FALSE, - status); - if(U_FAILURE(*status)) { - /* Should never happen. */ - uprv_free(normSource); - normSource = normBuffer; - } + if(coll->normalizationMode != UCOL_OFF) { + normSource.setTo(len < 0, source, len); + const Normalizer2 *norm2 = Normalizer2Factory::getFCDInstance(*status); + int32_t qcYesLength = norm2->spanQuickCheckYes(normSource, *status); + if(qcYesLength != normSource.length()) { + UnicodeString unnormalized = normSource.tempSubString(qcYesLength); + normSource.truncate(qcYesLength); + norm2->normalizeSecondAndAppend(normSource, unnormalized, *status); + source = normSource.getBuffer(); + len = normSource.length(); } - - if(U_FAILURE(*status)) { - return 0; - } - source = normSource; } - collIterate s; - IInit_collIterate(coll, (UChar *)source, len, &s); - if(source == normSource) { + IInit_collIterate(coll, (UChar *)source, len, &s, status); + if(U_FAILURE(*status)) { + return 0; + } + if(source == normSource.getBuffer()) { s.flags &= ~UCOL_ITER_NORM; } if(resultLength == 0 || primaries == NULL) { - int32_t t = ucol_getSortKeySize(coll, &s, sortKeySize, coll->strength, len); - if(normSource != normBuffer) { - uprv_free(normSource); - } - return t; + return ucol_getSortKeySize(coll, &s, sortKeySize, coll->strength, len); } uint8_t *primarySafeEnd = primaries + resultLength - 2; @@ -5653,8 +5457,13 @@ ucol_calcSortKeySimpleTertiary(const UCollator *coll, if(primaries > primarySafeEnd) { /* We have stepped over the primary buffer */ if(allocateSKBuffer == FALSE) { /* need to save our butts if we cannot reallocate */ - IInit_collIterate(coll, (UChar *)source, len, &s); - if(source == normSource) { + IInit_collIterate(coll, (UChar *)source, len, &s, status); + if(U_FAILURE(*status)) { + sortKeySize = DEFAULT_ERROR_SIZE_FOR_CALCSORTKEY; + finished = TRUE; + break; + } + if(source == normSource.getBuffer()) { s.flags &= ~UCOL_ITER_NORM; } sortKeySize = ucol_getSortKeySize(coll, &s, sortKeySize, coll->strength, len); @@ -5793,10 +5602,6 @@ cleanup: /* To avoid memory leak, free the offset buffer if necessary. */ ucol_freeOffsetBuffer(&s); - if(normSource != normBuffer) { - uprv_free(normSource); - } - return sortKeySize; } @@ -5996,7 +5801,11 @@ ucol_nextSortKeyPart(const UCollator *coll, uint32_t CE = UCOL_NO_MORE_CES; collIterate s; - IInit_collIterate(coll, NULL, -1, &s); + IInit_collIterate(coll, NULL, -1, &s, status); + if(U_FAILURE(*status)) { + UTRACE_EXIT_STATUS(*status); + return 0; + } s.iterator = iter; s.flags |= UCOL_USE_ITERATOR; // This variable tells us whether we have produced some other levels in this iteration @@ -7042,7 +6851,10 @@ ucol_setVariableTop(UCollator *coll, const UChar *varTop, int32_t len, UErrorCod } collIterate s; - IInit_collIterate(coll, varTop, len, &s); + IInit_collIterate(coll, varTop, len, &s, status); + if(U_FAILURE(*status)) { + return 0; + } uint32_t CE = ucol_IGetNextCE(coll, &s, status); @@ -7339,42 +7151,24 @@ ucol_isTailored(const UCollator *coll, const UChar u, UErrorCode *status) { /* ucol_checkIdent internal function. Does byte level string compare. */ /* Used by strcoll if strength == identical and strings */ -/* are otherwise equal. Moved out-of-line because this */ -/* is a rare case. */ +/* are otherwise equal. */ /* */ /* Comparison must be done on NFD normalized strings. */ /* FCD is not good enough. */ -/* */ -/* TODO: make an incremental NFD Comparison function, which could */ -/* be of general use */ static UCollationResult ucol_checkIdent(collIterate *sColl, collIterate *tColl, UBool normalize, UErrorCode *status) { - - // TODO: When we have an UChar iterator, we need to access the whole string. One - // useful modification would be a UChar iterator extract API, since reset next next... - // is not optimal. - // TODO: Handle long strings. Do the same in compareUsingSortKeys. - - // When we arrive here, we can have normal strings or UCharIterators. Currently they are both - // of same type, but that doesn't really mean that it will stay that way. - - // The division for the array length may truncate the array size to - // a little less than UNORM_ITER_SIZE, but that size is dimensioned too high - // for all platforms anyway. - UAlignedMemory stackNormIter1[UNORM_ITER_SIZE/sizeof(UAlignedMemory)]; - UAlignedMemory stackNormIter2[UNORM_ITER_SIZE/sizeof(UAlignedMemory)]; - //UChar sStackBuf[256], tStackBuf[256]; - //int32_t sBufSize = 256, tBufSize = 256; + // When we arrive here, we can have normal strings or UCharIterators. Currently they are both + // of same type, but that doesn't really mean that it will stay that way. int32_t comparison; - int32_t sLen = 0; - UChar *sBuf = NULL; - int32_t tLen = 0; - UChar *tBuf = NULL; - UBool freeSBuf = FALSE, freeTBuf = FALSE; if (sColl->flags & UCOL_USE_ITERATOR) { + // The division for the array length may truncate the array size to + // a little less than UNORM_ITER_SIZE, but that size is dimensioned too high + // for all platforms anyway. + UAlignedMemory stackNormIter1[UNORM_ITER_SIZE/sizeof(UAlignedMemory)]; + UAlignedMemory stackNormIter2[UNORM_ITER_SIZE/sizeof(UAlignedMemory)]; UNormIterator *sNIt = NULL, *tNIt = NULL; sNIt = unorm_openIter(stackNormIter1, sizeof(stackNormIter1), status); tNIt = unorm_openIter(stackNormIter2, sizeof(stackNormIter2), status); @@ -7386,90 +7180,30 @@ UCollationResult ucol_checkIdent(collIterate *sColl, collIterate *tColl, UBoo unorm_closeIter(sNIt); unorm_closeIter(tNIt); } else { - sLen = (sColl->flags & UCOL_ITER_HASLEN) ? (int32_t)(sColl->endp - sColl->string) : -1; - sBuf = sColl->string; - tLen = (tColl->flags & UCOL_ITER_HASLEN) ? (int32_t)(tColl->endp - tColl->string) : -1; - tBuf = tColl->string; + int32_t sLen = (sColl->flags & UCOL_ITER_HASLEN) ? (int32_t)(sColl->endp - sColl->string) : -1; + const UChar *sBuf = sColl->string; + int32_t tLen = (tColl->flags & UCOL_ITER_HASLEN) ? (int32_t)(tColl->endp - tColl->string) : -1; + const UChar *tBuf = tColl->string; if (normalize) { *status = U_ZERO_ERROR; - if (unorm_quickCheck(sBuf, sLen, UNORM_NFD, status) != UNORM_YES) { - sLen = unorm_decompose(sColl->writableBuffer, (int32_t)sColl->writableBufSize, - sBuf, sLen, - FALSE, 0, - status); - if(*status == U_BUFFER_OVERFLOW_ERROR) { - if(!u_growBufferFromStatic(sColl->stackWritableBuffer, - &sColl->writableBuffer, - (int32_t *)&sColl->writableBufSize, sLen, - 0) - ) - { - *status = U_MEMORY_ALLOCATION_ERROR; - return UCOL_LESS; /* TODO set *status = U_MEMORY_ALLOCATION_ERROR; */ - } - *status = U_ZERO_ERROR; - sLen = unorm_decompose(sColl->writableBuffer, (int32_t)sColl->writableBufSize, - sBuf, sLen, - FALSE, 0, - status); - } - if(freeSBuf) { - uprv_free(sBuf); - freeSBuf = FALSE; - } - sBuf = sColl->writableBuffer; - if (sBuf != sColl->stackWritableBuffer) { - sColl->flags |= UCOL_ITER_ALLOCATED; - } + // Note: We could use Normalizer::compare() or similar, but for short strings + // which may not be in FCD it might be faster to just NFD them. + // Note: spanQuickCheckYes() + normalizeSecondAndAppend() rather than + // NFD'ing immediately might be faster for long strings, + // but string comparison is usually done on relatively short strings. + sColl->nfd->normalize(UnicodeString((sColl->flags & UCOL_ITER_HASLEN) == 0, sBuf, sLen), + sColl->writableBuffer, + *status); + tColl->nfd->normalize(UnicodeString((tColl->flags & UCOL_ITER_HASLEN) == 0, tBuf, tLen), + tColl->writableBuffer, + *status); + if(U_FAILURE(*status)) { + return UCOL_LESS; } - - *status = U_ZERO_ERROR; - if (unorm_quickCheck(tBuf, tLen, UNORM_NFD, status) != UNORM_YES) { - tLen = unorm_decompose(tColl->writableBuffer, (int32_t)tColl->writableBufSize, - tBuf, tLen, - FALSE, 0, - status); - if(*status == U_BUFFER_OVERFLOW_ERROR) { - if(!u_growBufferFromStatic(tColl->stackWritableBuffer, - &tColl->writableBuffer, - (int32_t *)&tColl->writableBufSize, tLen, - 0) - ) - { - *status = U_MEMORY_ALLOCATION_ERROR; - return UCOL_LESS; /* TODO set *status = U_MEMORY_ALLOCATION_ERROR; */ - } - *status = U_ZERO_ERROR; - tLen = unorm_decompose(tColl->writableBuffer, (int32_t)tColl->writableBufSize, - tBuf, tLen, - FALSE, 0, - status); - } - if(freeTBuf) { - uprv_free(tBuf); - freeTBuf = FALSE; - } - tBuf = tColl->writableBuffer; - if (tBuf != tColl->stackWritableBuffer) { - tColl->flags |= UCOL_ITER_ALLOCATED; - } - } - } - - if (sLen == -1 && tLen == -1) { - comparison = u_strcmpCodePointOrder(sBuf, tBuf); + comparison = sColl->writableBuffer.compareCodePointOrder(tColl->writableBuffer); } else { - if (sLen == -1) { - sLen = u_strlen(sBuf); - } - if (tLen == -1) { - tLen = u_strlen(tBuf); - } - comparison = u_memcmpCodePointOrder(sBuf, tBuf, uprv_min(sLen, tLen)); - if (comparison == 0) { - comparison = sLen - tLen; - } + comparison = u_strCompare(sBuf, sLen, tBuf, tLen, TRUE); } } @@ -7545,29 +7279,27 @@ static UCollationResult ucol_compareUsingSortKeys(collIterate *sColl, uint8_t *targetKeyP = targetKey; int32_t sourceKeyLen = UCOL_MAX_BUFFER, targetKeyLen = UCOL_MAX_BUFFER; const UCollator *coll = sColl->coll; - UChar *source = NULL; - UChar *target = NULL; + const UChar *source = NULL; + const UChar *target = NULL; int32_t result = UCOL_EQUAL; - UChar sStackBuf[256], tStackBuf[256]; - int32_t sourceLength = (sColl->flags&UCOL_ITER_HASLEN)?(int32_t)(sColl->endp-sColl->string):-1; - int32_t targetLength = (tColl->flags&UCOL_ITER_HASLEN)?(int32_t)(tColl->endp-tColl->string):-1; + UnicodeString sourceString, targetString; + int32_t sourceLength; + int32_t targetLength; - // TODO: Handle long strings. Do the same in ucol_checkIdent. if(sColl->flags & UCOL_USE_ITERATOR) { sColl->iterator->move(sColl->iterator, 0, UITER_START); tColl->iterator->move(tColl->iterator, 0, UITER_START); - source = sStackBuf; - UChar *sBufp = source; - target = tStackBuf; - UChar *tBufp = target; - while(sColl->iterator->hasNext(sColl->iterator)) { - *sBufp++ = (UChar)sColl->iterator->next(sColl->iterator); + UChar32 c; + while((c=sColl->iterator->next(sColl->iterator))>=0) { + sourceString.append((UChar)c); } - while(tColl->iterator->hasNext(tColl->iterator)) { - *tBufp++ = (UChar)tColl->iterator->next(tColl->iterator); + while((c=tColl->iterator->next(tColl->iterator))>=0) { + targetString.append((UChar)c); } - sourceLength = (int32_t)(sBufp - source); - targetLength = (int32_t)(tBufp - target); + source = sourceString.getBuffer(); + sourceLength = sourceString.length(); + target = targetString.getBuffer(); + targetLength = targetString.length(); } else { // no iterators sourceLength = (sColl->flags&UCOL_ITER_HASLEN)?(int32_t)(sColl->endp-sColl->string):-1; targetLength = (tColl->flags&UCOL_ITER_HASLEN)?(int32_t)(tColl->endp-tColl->string):-1; @@ -7618,14 +7350,8 @@ cleanup_and_do_compare: } -static inline UCollationResult -ucol_strcollRegular( collIterate *sColl, collIterate *tColl, -// const UCollator *coll, -// const UChar *source, -// int32_t sourceLength, -// const UChar *target, -// int32_t targetLength, - UErrorCode *status) +static UCollationResult +ucol_strcollRegular(collIterate *sColl, collIterate *tColl, UErrorCode *status) { U_ALIGN_CODE(16); @@ -8088,9 +7814,6 @@ ucol_strcollRegular( collIterate *sColl, collIterate *tColl, commonReturn: if ((sColl->flags | tColl->flags) & UCOL_ITER_ALLOCATED) { - freeHeapWritableBuffer(sColl); - freeHeapWritableBuffer(tColl); - if (sCEs.buf != sCEs.localArray ) { uprv_free(sCEs.buf); } @@ -8102,6 +7825,20 @@ commonReturn: return result; } +static UCollationResult +ucol_strcollRegular(const UCollator *coll, + const UChar *source, int32_t sourceLength, + const UChar *target, int32_t targetLength, + UErrorCode *status) { + collIterate sColl, tColl; + // Preparing the context objects for iterating over strings + IInit_collIterate(coll, source, sourceLength, &sColl, status); + IInit_collIterate(coll, target, targetLength, &tColl, status); + if(U_FAILURE(*status)) { + return UCOL_LESS; + } + return ucol_strcollRegular(&sColl, &tColl, status); +} static inline uint32_t ucol_getLatinOneContraction(const UCollator *coll, int32_t strength, @@ -8162,7 +7899,7 @@ ucol_getLatinOneContraction(const UCollator *coll, int32_t strength, * doesn't understand something, it will go to the regular * strcoll. */ -static inline UCollationResult +static UCollationResult ucol_strcollUseLatin1( const UCollator *coll, const UChar *source, int32_t sLen, @@ -8203,8 +7940,7 @@ ucol_strcollUseLatin1( const UCollator *coll, } if(sChar&0xFF00) { // if we encounter non-latin-1, we bail out (sChar > 0xFF, but this is faster on win32) //fprintf(stderr, "R"); - goto returnRegular; - //return ucol_strcollRegular(coll, source, sLen, target, tLen, status); + return ucol_strcollRegular(coll, source, sLen, target, tLen, status); } sOrder = elements[sChar]; if(sOrder >= UCOL_NOT_FOUND) { // if we got a special @@ -8218,8 +7954,7 @@ ucol_strcollUseLatin1( const UCollator *coll, } if(sOrder >= UCOL_NOT_FOUND /*== UCOL_BAIL_OUT_CE*/) { //fprintf(stderr, "S"); - goto returnRegular; - //return ucol_strcollRegular(coll, source, sLen, target, tLen, status); + return ucol_strcollRegular(coll, source, sLen, target, tLen, status); } } } @@ -8251,8 +7986,7 @@ ucol_strcollUseLatin1( const UCollator *coll, } if(tChar&0xFF00) { // if we encounter non-latin-1, we bail out (sChar > 0xFF, but this is faster on win32) //fprintf(stderr, "R"); - goto returnRegular; - //return ucol_strcollRegular(coll, source, sLen, target, tLen, status); + return ucol_strcollRegular(coll, source, sLen, target, tLen, status); } tOrder = elements[tChar]; if(tOrder >= UCOL_NOT_FOUND) { @@ -8263,8 +7997,7 @@ ucol_strcollUseLatin1( const UCollator *coll, } if(tOrder >= UCOL_NOT_FOUND /*== UCOL_BAIL_OUT_CE*/) { //fprintf(stderr, "S"); - goto returnRegular; - //return ucol_strcollRegular(coll, source, sLen, target, tLen, status); + return ucol_strcollRegular(coll, source, sLen, target, tLen, status); } } } @@ -8359,8 +8092,7 @@ endOfPrimLoop: } else { // French if(haveContractions) { // if we have contractions, we have to bail out // since we don't really know how to handle them here - goto returnRegular; - //return ucol_strcollRegular(coll, source, sLen, target, tLen, status); + return ucol_strcollRegular(coll, source, sLen, target, tLen, status); } // For French, we go backwards sIndex = sLen; tIndex = tLen; @@ -8462,14 +8194,6 @@ endOfSecLoop: } } return UCOL_EQUAL; - -returnRegular: - // Preparing the context objects for iterating over strings - collIterate sColl, tColl; - - IInit_collIterate(coll, source, sLen, &sColl); - IInit_collIterate(coll, target, tLen, &tColl); - return ucol_strcollRegular(&sColl, &tColl, status); } @@ -8500,6 +8224,12 @@ ucol_strcollIter( const UCollator *coll, // Preparing the context objects for iterating over strings collIterate sColl, tColl; + IInit_collIterate(coll, NULL, -1, &sColl, status); + IInit_collIterate(coll, NULL, -1, &tColl, status); + if(U_FAILURE(*status)) { + UTRACE_EXIT_VALUE_STATUS(UCOL_EQUAL, *status) + return UCOL_EQUAL; + } // The division for the array length may truncate the array size to // a little less than UNORM_ITER_SIZE, but that size is dimensioned too high // for all platforms anyway. @@ -8507,10 +8237,8 @@ ucol_strcollIter( const UCollator *coll, UAlignedMemory stackNormIter2[UNORM_ITER_SIZE/sizeof(UAlignedMemory)]; UNormIterator *sNormIter = NULL, *tNormIter = NULL; - IInit_collIterate(coll, NULL, -1, &sColl); sColl.iterator = sIter; sColl.flags |= UCOL_USE_ITERATOR; - IInit_collIterate(coll, NULL, -1, &tColl); tColl.flags |= UCOL_USE_ITERATOR; tColl.iterator = tIter; @@ -8692,11 +8420,7 @@ ucol_strcoll( const UCollator *coll, UErrorCode status = U_ZERO_ERROR; UCollationResult returnVal; if(!coll->latinOneUse || (sourceLength > 0 && *source&0xff00) || (targetLength > 0 && *target&0xff00)) { - collIterate sColl, tColl; - // Preparing the context objects for iterating over strings - IInit_collIterate(coll, source, sourceLength, &sColl); - IInit_collIterate(coll, target, targetLength, &tColl); - returnVal = ucol_strcollRegular(&sColl, &tColl, &status); + returnVal = ucol_strcollRegular(coll, source, sourceLength, target, targetLength, &status); } else { returnVal = ucol_strcollUseLatin1(coll, source, sourceLength, target, targetLength, &status); } diff --git a/icu4c/source/i18n/ucol_bld.cpp b/icu4c/source/i18n/ucol_bld.cpp index fcaa9e31b48..ac0a45e795a 100644 --- a/icu4c/source/i18n/ucol_bld.cpp +++ b/icu4c/source/i18n/ucol_bld.cpp @@ -1,7 +1,7 @@ /* ******************************************************************************* * -* Copyright (C) 2001-2008, International Business Machines +* Copyright (C) 2001-2010, International Business Machines * Corporation and others. All Rights Reserved. * ******************************************************************************* @@ -25,12 +25,12 @@ #include "unicode/udata.h" #include "unicode/uchar.h" #include "unicode/uniset.h" +#include "normalizer2impl.h" #include "ucol_bld.h" #include "ucol_elm.h" #include "ucol_cnt.h" #include "ucln_in.h" #include "umutex.h" -#include "unormimp.h" #include "cmemory.h" static const InverseUCATableHeader* _staticInvUCA = NULL; @@ -626,7 +626,7 @@ uint8_t ucol_uprv_getCaseBits(const UCollator *UCA, const UChar *src, uint32_t l nLen = unorm_normalize(src, len, UNORM_NFKD, 0, n, 128, status); if(U_SUCCESS(*status)) { for(i = 0; i < nLen; i++) { - uprv_init_collIterate(UCA, &n[i], 1, &s); + uprv_init_collIterate(UCA, &n[i], 1, &s, status); order = ucol_getNextCE(UCA, &s, status); if(isContinuation(order)) { *status = U_INTERNAL_PROGRAM_ERROR; @@ -878,7 +878,7 @@ U_CFUNC void ucol_createElements(UColTokenParser *src, tempUCATable *t, UColTokL /* then pick CEs out until there is no more and stuff them into expansion */ collIterate s; uint32_t order = 0; - uprv_init_collIterate(src->UCA, expOffset + src->source, 1, &s); + uprv_init_collIterate(src->UCA, expOffset + src->source, 1, &s, status); for(;;) { order = ucol_getNextCE(src->UCA, &s, status); @@ -1045,7 +1045,7 @@ ucol_uprv_bld_copyRangeFromUCA(UColTokenParser *src, tempUCATable *t, // it doesn't make any difference whether we have to go to the UCA // or not. { - uprv_init_collIterate(src->UCA, el.uchars, el.cSize, &colIt); + uprv_init_collIterate(src->UCA, el.uchars, el.cSize, &colIt, status); while(CE != UCOL_NO_MORE_CES) { CE = ucol_getNextCE(src->UCA, &colIt, status); if(CE != UCOL_NO_MORE_CES) { diff --git a/icu4c/source/i18n/ucol_elm.cpp b/icu4c/source/i18n/ucol_elm.cpp index b01d1ff1b59..f28d03dc939 100644 --- a/icu4c/source/i18n/ucol_elm.cpp +++ b/icu4c/source/i18n/ucol_elm.cpp @@ -1,7 +1,7 @@ /* ******************************************************************************* * -* Copyright (C) 2001-2009, International Business Machines +* Copyright (C) 2001-2010, International Business Machines * Corporation and others. All Rights Reserved. * ******************************************************************************* @@ -31,6 +31,7 @@ #include "unicode/unistr.h" #include "unicode/ucoleitr.h" #include "unicode/normlzr.h" +#include "normalizer2impl.h" #include "ucol_elm.h" #include "ucol_tok.h" #include "ucol_cnt.h" @@ -1602,6 +1603,7 @@ struct enumStruct { tempUCATable *t; UCollator *tempColl; UCollationElements* colEl; + const Normalizer2Impl *nfcImpl; int32_t noOfClosures; UErrorCode *status; }; @@ -1615,7 +1617,8 @@ _enumCategoryRangeClosureCategory(const void *context, UChar32 start, UChar32 li UCollator *tempColl = ((enumStruct *)context)->tempColl; UCollationElements* colEl = ((enumStruct *)context)->colEl; UCAElements el; - UChar decomp[256] = { 0 }; + UChar decompBuffer[4]; + const UChar *decomp; int32_t noOfDec = 0; UChar32 u32 = 0; @@ -1623,13 +1626,14 @@ _enumCategoryRangeClosureCategory(const void *context, UChar32 start, UChar32 li uint32_t len = 0; for(u32 = start; u32 < limit; u32++) { - noOfDec = unorm_getDecomposition(u32, FALSE, decomp, 256); + decomp = ((enumStruct *)context)->nfcImpl-> + getDecomposition(u32, decompBuffer, noOfDec); //if((noOfDec = unorm_normalize(comp, len, UNORM_NFD, 0, decomp, 256, status)) > 1 //|| (noOfDec == 1 && *decomp != (UChar)u32)) - if(noOfDec > 0) // if we're positive, that means there is no decomposition + if(decomp != NULL) { len = 0; - UTF_APPEND_CHAR_UNSAFE(comp, len, u32); + U16_APPEND_UNSAFE(comp, len, u32); if(ucol_strcoll(tempColl, comp, len, decomp, noOfDec) != UCOL_EQUAL) { #ifdef UCOL_DEBUG fprintf(stderr, "Closure: %08X -> ", u32); @@ -1640,7 +1644,7 @@ _enumCategoryRangeClosureCategory(const void *context, UChar32 start, UChar32 li fprintf(stderr, "\n"); #endif ((enumStruct *)context)->noOfClosures++; - el.cPoints = decomp; + el.cPoints = (UChar *)decomp; el.cSize = noOfDec; el.noOfCEs = 0; el.prefix = el.prefixChars; @@ -1938,7 +1942,7 @@ uprv_uca_canonicalClosure(tempUCATable *t, UChar baseChar, firstCM; UChar32 fcdHighStart; const uint16_t *fcdTrieIndex = unorm_getFCDTrieIndex(fcdHighStart, status); - + context.nfcImpl=Normalizer2Factory::getNFCImpl(*status); if(U_FAILURE(*status)) { return 0; } diff --git a/icu4c/source/i18n/ucol_imp.h b/icu4c/source/i18n/ucol_imp.h index e26fab2c905..491bcdf7f54 100644 --- a/icu4c/source/i18n/ucol_imp.h +++ b/icu4c/source/i18n/ucol_imp.h @@ -1,7 +1,7 @@ /* ******************************************************************************* * -* Copyright (C) 1998-2009, International Business Machines +* Copyright (C) 1998-2010, International Business Machines * Corporation and others. All Rights Reserved. * ******************************************************************************* @@ -41,6 +41,10 @@ #if !UCONFIG_NO_COLLATION +#ifdef XP_CPLUSPLUS +#include "unicode/normalizer2.h" +#include "unicode/unistr.h" +#endif #include "unicode/ucol.h" #include "utrie.h" #include "cmemory.h" @@ -264,12 +268,14 @@ minimum number for special Jamo #define NFC_ZERO_CC_BLOCK_LIMIT_ 0x300 -typedef struct collIterate { - UChar *string; /* Original string */ +#ifdef XP_CPLUSPLUS + +typedef struct collIterate : public UMemory { + const UChar *string; /* Original string */ /* UChar *start; Pointer to the start of the source string. Either points to string or to writableBuffer */ - UChar *endp; /* string end ptr. Is undefined for null terminated strings */ - UChar *pos; /* This is position in the string. Can be to original or writable buf */ + const UChar *endp; /* string end ptr. Is undefined for null terminated strings */ + const UChar *pos; /* This is position in the string. Can be to original or writable buf */ uint32_t *toReturn; /* This is the CE from CEs buffer that should be returned */ uint32_t *CEpos; /* This is the position to which we have stored processed CEs */ @@ -279,16 +285,15 @@ typedef struct collIterate { int32_t offsetRepeatCount; /* Repeat stored offset if non-zero */ int32_t offsetRepeatValue; /* offset value to repeat */ - UChar *writableBuffer; - uint32_t writableBufSize; - UChar *fcdPosition; /* Position in the original string to continue FCD check from. */ + UnicodeString writableBuffer; + const UChar *fcdPosition; /* Position in the original string to continue FCD check from. */ const UCollator *coll; + const Normalizer2 *nfd; uint8_t flags; uint8_t origFlags; uint32_t *extendCEs; /* This is use if CEs is not big enough */ int32_t extendCEsSize; /* Holds the size of the dynamic CEs buffer */ uint32_t CEs[UCOL_EXPAND_CE_BUFFER_SIZE]; /* This is where we store CEs */ - UChar stackWritableBuffer[UCOL_WRITABLE_BUFFER_SIZE]; /* A writable buffer. */ int32_t *offsetBuffer; /* A dynamic buffer to hold offsets */ int32_t offsetBufferSize; /* The size of the offset buffer */ @@ -297,6 +302,12 @@ typedef struct collIterate { /*int32_t iteratorIndex;*/ } collIterate; +#else + +typedef struct collIterate collIterate; + +#endif + #define paddedsize(something) ((something)+((((something)%4)!=0)?(4-(something)%4):0)) #define headersize (paddedsize(sizeof(UCATableHeader))+paddedsize(sizeof(UColOptionSet))) @@ -305,19 +316,34 @@ struct used internally in getSpecial*CE. data similar to collIterate. */ struct collIterateState { - UChar *pos; /* This is position in the string. Can be to original or writable buf */ - UChar *returnPos; - UChar *fcdPosition; /* Position in the original string to continue FCD check from. */ - UChar *bufferaddress; /* address of the normalization buffer */ - uint32_t buffersize; + const UChar *pos; /* This is position in the string. Can be to original or writable buf */ + const UChar *returnPos; + const UChar *fcdPosition; /* Position in the original string to continue FCD check from. */ + const UChar *bufferaddress; /* address of the normalization buffer */ + int32_t buffersize; uint8_t flags; uint8_t origFlags; uint32_t iteratorIndex; int32_t iteratorMove; }; -U_CAPI void U_EXPORT2 -uprv_init_collIterate(const UCollator *collator, const UChar *sourceString, int32_t sourceLen, collIterate *s); +U_CAPI void U_EXPORT2 +uprv_init_collIterate(const UCollator *collator, + const UChar *sourceString, int32_t sourceLen, + collIterate *s, UErrorCode *status); + +/* Internal functions for C test code. */ +U_CAPI collIterate * U_EXPORT2 +uprv_new_collIterate(UErrorCode *status); + +U_CAPI void U_EXPORT2 +uprv_delete_collIterate(collIterate *s); + +/* @return s->pos == s->endp */ +U_CAPI UBool U_EXPORT2 +uprv_collIterateAtEnd(collIterate *s); + +#ifdef XP_CPLUSPLUS U_NAMESPACE_BEGIN @@ -326,7 +352,7 @@ typedef struct UCollationPCE UCollationPCE; U_NAMESPACE_END -struct UCollationElements +struct UCollationElements : public UMemory { /** * Struct wrapper for source data @@ -351,6 +377,8 @@ struct UCollationElements U_CAPI void U_EXPORT2 uprv_init_pce(const struct UCollationElements *elems); +#endif + #define UCOL_LEVELTERMINATOR 1 /* mask off anything but primary order */ @@ -1066,7 +1094,6 @@ static inline UBool ucol_unsafeCP(UChar c, const UCollator *coll) { /* The offsetBuffer in collIterate might need to be freed to avoid memory leaks. */ void ucol_freeOffsetBuffer(collIterate *s); - #endif /* #if !UCONFIG_NO_COLLATION */ #endif diff --git a/icu4c/source/i18n/ucol_tok.cpp b/icu4c/source/i18n/ucol_tok.cpp index 11e1bd6e6f6..193a422028a 100644 --- a/icu4c/source/i18n/ucol_tok.cpp +++ b/icu4c/source/i18n/ucol_tok.cpp @@ -1,7 +1,7 @@ /* ******************************************************************************* * -* Copyright (C) 2001-2009, International Business Machines +* Copyright (C) 2001-2010, International Business Machines * Corporation and others. All Rights Reserved. * ******************************************************************************* @@ -1108,7 +1108,7 @@ reset may be null. handled. */ -static UColToken *ucol_tok_initAReset(UColTokenParser *src, UChar *expand, uint32_t *expandNext, +static UColToken *ucol_tok_initAReset(UColTokenParser *src, const UChar *expand, uint32_t *expandNext, UParseError *parseError, UErrorCode *status) { if(src->resultLen == src->listCapacity) { @@ -1200,9 +1200,12 @@ inline UColToken *getVirginBefore(UColTokenParser *src, UColToken *sourceToken, uint32_t CE, SecondCE; uint32_t invPos; if(sourceToken != NULL) { - uprv_init_collIterate(src->UCA, src->source+((sourceToken->source)&0xFFFFFF), 1, &s); + uprv_init_collIterate(src->UCA, src->source+((sourceToken->source)&0xFFFFFF), 1, &s, status); } else { - uprv_init_collIterate(src->UCA, src->source+src->parsedToken.charsOffset /**charsOffset*/, 1, &s); + uprv_init_collIterate(src->UCA, src->source+src->parsedToken.charsOffset /**charsOffset*/, 1, &s, status); + } + if(U_FAILURE(*status)) { + return NULL; } baseCE = ucol_getNextCE(src->UCA, &s, status) & 0xFFFFFF3F; @@ -1684,10 +1687,10 @@ uint32_t ucol_tok_assembleTokenList(UColTokenParser *src, UParseError *parseErro collIterate s; uint32_t CE = UCOL_NOT_FOUND, SecondCE = UCOL_NOT_FOUND; - uprv_init_collIterate(src->UCA, src->source+src->parsedToken.charsOffset, src->parsedToken.charsLen, &s); + uprv_init_collIterate(src->UCA, src->source+src->parsedToken.charsOffset, src->parsedToken.charsLen, &s, status); CE = ucol_getNextCE(src->UCA, &s, status); - UChar *expand = s.pos; + const UChar *expand = s.pos; SecondCE = ucol_getNextCE(src->UCA, &s, status); ListList[src->resultLen].baseCE = CE & 0xFFFFFF3F; diff --git a/icu4c/source/i18n/ucol_wgt.c b/icu4c/source/i18n/ucol_wgt.cpp similarity index 99% rename from icu4c/source/i18n/ucol_wgt.c rename to icu4c/source/i18n/ucol_wgt.cpp index 9e212792bdc..56531d2156f 100644 --- a/icu4c/source/i18n/ucol_wgt.c +++ b/icu4c/source/i18n/ucol_wgt.cpp @@ -1,7 +1,7 @@ /* ******************************************************************************* * -* Copyright (C) 1999-2009, International Business Machines +* Copyright (C) 1999-2010, International Business Machines * Corporation and others. All Rights Reserved. * ******************************************************************************* diff --git a/icu4c/source/i18n/ucoleitr.cpp b/icu4c/source/i18n/ucoleitr.cpp index f70892e56e2..ed3ebb2b6cf 100644 --- a/icu4c/source/i18n/ucoleitr.cpp +++ b/icu4c/source/i18n/ucoleitr.cpp @@ -1,6 +1,6 @@ /* ****************************************************************************** -* Copyright (C) 2001-2009, International Business Machines +* Copyright (C) 2001-20109, International Business Machines * Corporation and others. All Rights Reserved. ****************************************************************************** * @@ -313,19 +313,16 @@ ucol_openElements(const UCollator *coll, int32_t textLength, UErrorCode *status) { - UCollationElements *result; - if (U_FAILURE(*status)) { return NULL; } - result = (UCollationElements *)uprv_malloc(sizeof(UCollationElements)); - /* test for NULL */ + UCollationElements *result = new UCollationElements; if (result == NULL) { *status = U_MEMORY_ALLOCATION_ERROR; return NULL; } - + result->reset_ = TRUE; result->isWritable = FALSE; result->pce = NULL; @@ -333,7 +330,7 @@ ucol_openElements(const UCollator *coll, if (text == NULL) { textLength = 0; } - uprv_init_collIterate(coll, text, textLength, &result->iteratordata_); + uprv_init_collIterate(coll, text, textLength, &result->iteratordata_, status); return result; } @@ -345,30 +342,24 @@ ucol_closeElements(UCollationElements *elems) if (elems != NULL) { collIterate *ci = &elems->iteratordata_; - if (ci != NULL) { - if (ci->writableBuffer != ci->stackWritableBuffer) { - uprv_free(ci->writableBuffer); - } + if (ci->extendCEs) { + uprv_free(ci->extendCEs); + } - if (ci->extendCEs) { - uprv_free(ci->extendCEs); - } - - if (ci->offsetBuffer) { - uprv_free(ci->offsetBuffer); - } + if (ci->offsetBuffer) { + uprv_free(ci->offsetBuffer); } if (elems->isWritable && elems->iteratordata_.string != NULL) { - uprv_free(elems->iteratordata_.string); + uprv_free((UChar *)elems->iteratordata_.string); } if (elems->pce != NULL) { delete elems->pce; } - uprv_free(elems); + delete elems; } } @@ -387,11 +378,7 @@ ucol_reset(UCollationElements *elems) ci->flags |= UCOL_ITER_NORM; } - if (ci->stackWritableBuffer != ci->writableBuffer) { - uprv_free(ci->writableBuffer); - ci->writableBuffer = ci->stackWritableBuffer; - ci->writableBufSize = UCOL_WRITABLE_BUFFER_SIZE; - } + ci->writableBuffer.remove(); ci->fcdPosition = NULL; //ci->offsetReturn = ci->offsetStore = NULL; @@ -686,7 +673,7 @@ ucol_setText( UCollationElements *elems, if (elems->isWritable && elems->iteratordata_.string != NULL) { - uprv_free(elems->iteratordata_.string); + uprv_free((UChar *)elems->iteratordata_.string); } if (text == NULL) { @@ -698,7 +685,7 @@ ucol_setText( UCollationElements *elems, /* free offset buffer to avoid memory leak before initializing. */ ucol_freeOffsetBuffer(&(elems->iteratordata_)); uprv_init_collIterate(elems->iteratordata_.coll, text, textLength, - &elems->iteratordata_); + &elems->iteratordata_, status); elems->reset_ = TRUE; } diff --git a/icu4c/source/i18n/usearch.cpp b/icu4c/source/i18n/usearch.cpp index c323599fc10..60d404937f3 100644 --- a/icu4c/source/i18n/usearch.cpp +++ b/icu4c/source/i18n/usearch.cpp @@ -1,6 +1,6 @@ /* ********************************************************************** -* Copyright (C) 2001-2009 IBM and others. All rights reserved. +* Copyright (C) 2001-2010 IBM and others. All rights reserved. ********************************************************************** * Date Name Description * 07/02/2001 synwee Creation. @@ -14,12 +14,14 @@ #include "unicode/usearch.h" #include "unicode/ustring.h" #include "unicode/uchar.h" +#include "normalizer2impl.h" #include "unormimp.h" #include "ucol_imp.h" #include "usrchimp.h" #include "cmemory.h" #include "ucln_in.h" #include "uassert.h" +#include "ustr_imp.h" U_NAMESPACE_USE @@ -311,7 +313,11 @@ inline uint16_t initializePatternCETable(UStringSearch *strsrch, else { uprv_init_collIterate(strsrch->collator, pattern->text, pattern->textLength, - &coleiter->iteratordata_); + &coleiter->iteratordata_, + status); + } + if(U_FAILURE(*status)) { + return 0; } if (pattern->CE != cetable && pattern->CE) { @@ -381,7 +387,11 @@ inline uint16_t initializePatternPCETable(UStringSearch *strsrch, } else { uprv_init_collIterate(strsrch->collator, pattern->text, pattern->textLength, - &coleiter->iteratordata_); + &coleiter->iteratordata_, + status); + } + if(U_FAILURE(*status)) { + return 0; } if (pattern->PCE != pcetable && pattern->PCE != NULL) { @@ -1074,54 +1084,20 @@ static inline UBool checkIdentical(const UStringSearch *strsrch, int32_t start, int32_t end) { - UChar t2[32], p2[32]; - int32_t length = end - start; if (strsrch->strength != UCOL_IDENTICAL) { return TRUE; } - UErrorCode status = U_ZERO_ERROR, status2 = U_ZERO_ERROR; - int32_t decomplength = unorm_decompose(t2, LENGTHOF(t2), - strsrch->search->text + start, length, - FALSE, 0, &status); - // use separate status2 in case of buffer overflow - if (decomplength != unorm_decompose(p2, LENGTHOF(p2), - strsrch->pattern.text, - strsrch->pattern.textLength, - FALSE, 0, &status2)) { - return FALSE; // lengths are different - } - - // compare contents - UChar *text, *pattern; - if(U_SUCCESS(status)) { - text = t2; - pattern = p2; - } else if(status==U_BUFFER_OVERFLOW_ERROR) { - status = U_ZERO_ERROR; - // allocate one buffer for both decompositions - text = (UChar *)uprv_malloc(decomplength * 2 * U_SIZEOF_UCHAR); - // Check for allocation failure. - if (text == NULL) { - return FALSE; - } - pattern = text + decomplength; - unorm_decompose(text, decomplength, strsrch->search->text + start, - length, FALSE, 0, &status); - unorm_decompose(pattern, decomplength, strsrch->pattern.text, - strsrch->pattern.textLength, FALSE, 0, &status); - } else { - // NFD failed, make sure that u_memcmp() does not overrun t2 & p2 - // and that we don't uprv_free() an undefined text pointer - text = pattern = t2; - decomplength = 0; - } - UBool result = (UBool)(u_memcmp(pattern, text, decomplength) == 0); - if(text != t2) { - uprv_free(text); - } + // Note: We could use Normalizer::compare() or similar, but for short strings + // which may not be in FCD it might be faster to just NFD them. + UErrorCode status = U_ZERO_ERROR; + UnicodeString t2, p2; + strsrch->nfd->normalize( + UnicodeString(FALSE, strsrch->search->text + start, end - start), t2, status); + strsrch->nfd->normalize( + UnicodeString(FALSE, strsrch->pattern.text, strsrch->pattern.textLength), p2, status); // return FALSE if NFD failed - return U_SUCCESS(status) && result; + return U_SUCCESS(status) && t2 == p2; } #if BOYER_MOORE @@ -2724,6 +2700,8 @@ U_CAPI UStringSearch * U_EXPORT2 usearch_openFromCollator( UCOL_SHIFTED; result->variableTop = ucol_getVariableTop(collator, status); + result->nfd = Normalizer2Factory::getNFDInstance(*status); + if (U_FAILURE(*status)) { uprv_free(result); return NULL; @@ -3040,7 +3018,8 @@ U_CAPI void U_EXPORT2 usearch_setCollator( UStringSearch *strsrch, ucol_freeOffsetBuffer(&(strsrch->textIter->iteratordata_)); uprv_init_collIterate(collator, strsrch->search->text, strsrch->search->textLength, - &(strsrch->textIter->iteratordata_)); + &(strsrch->textIter->iteratordata_), + status); strsrch->utilIter->iteratordata_.coll = collator; } } @@ -3432,7 +3411,8 @@ U_CAPI void U_EXPORT2 usearch_reset(UStringSearch *strsrch) ucol_freeOffsetBuffer(&(strsrch->textIter->iteratordata_)); uprv_init_collIterate(strsrch->collator, strsrch->search->text, strsrch->search->textLength, - &(strsrch->textIter->iteratordata_)); + &(strsrch->textIter->iteratordata_), + &status); strsrch->search->matchedLength = 0; strsrch->search->matchedIndex = USEARCH_DONE; strsrch->search->isOverlap = FALSE; diff --git a/icu4c/source/i18n/usrchimp.h b/icu4c/source/i18n/usrchimp.h index ae95e94147c..964b76ee0b3 100644 --- a/icu4c/source/i18n/usrchimp.h +++ b/icu4c/source/i18n/usrchimp.h @@ -1,6 +1,6 @@ /* ********************************************************************** -* Copyright (C) 2001-2008 IBM and others. All rights reserved. +* Copyright (C) 2001-2010 IBM and others. All rights reserved. ********************************************************************** * Date Name Description * 08/13/2001 synwee Creation. @@ -13,6 +13,7 @@ #if !UCONFIG_NO_COLLATION +#include "unicode/normalizer2.h" #include "unicode/ucol.h" #include "unicode/ucoleitr.h" #include "unicode/ubrk.h" @@ -59,6 +60,7 @@ struct UStringSearch { struct USearch *search; struct UPattern pattern; const UCollator *collator; + const Normalizer2 *nfd; // positions within the collation element iterator is used to determine // if we are at the start of the text. UCollationElements *textIter; diff --git a/icu4c/source/test/cintltst/callcoll.c b/icu4c/source/test/cintltst/callcoll.c index 36068b521af..12b9a395adc 100644 --- a/icu4c/source/test/cintltst/callcoll.c +++ b/icu4c/source/test/cintltst/callcoll.c @@ -1,6 +1,6 @@ /******************************************************************** * COPYRIGHT: - * Copyright (c) 1997-2009, International Business Machines Corporation and + * Copyright (c) 1997-2010, International Business Machines Corporation and * others. All Rights Reserved. ********************************************************************/ /******************************************************************************* @@ -52,7 +52,6 @@ #include "calldata.h" #include "cstring.h" #include "cmemory.h" -#include "ucol_imp.h" /* set to 1 to test offsets in backAndForth() */ #define TEST_OFFSETS 0 @@ -148,13 +147,14 @@ static char* U_EXPORT2 sortKeyToString(const UCollator *coll, const uint8_t *sor int32_t strength = UCOL_PRIMARY; uint32_t res_size = 0; UBool doneCase = FALSE; + UErrorCode errorCode = U_ZERO_ERROR; char *current = buffer; const uint8_t *currentSk = sortkey; uprv_strcpy(current, "["); - while(strength <= UCOL_QUATERNARY && strength <= coll->strength) { + while(strength <= UCOL_QUATERNARY && strength <= ucol_getStrength(coll)) { if(strength > UCOL_PRIMARY) { uprv_strcat(current, " . "); } @@ -162,20 +162,20 @@ static char* U_EXPORT2 sortKeyToString(const UCollator *coll, const uint8_t *sor uprv_appendByteToHexString(current, *currentSk++); uprv_strcat(current, " "); } - if(coll->caseLevel == UCOL_ON && strength == UCOL_SECONDARY && doneCase == FALSE) { + if(ucol_getAttribute(coll, UCOL_CASE_LEVEL, &errorCode) == UCOL_ON && strength == UCOL_SECONDARY && doneCase == FALSE) { doneCase = TRUE; - } else if(coll->caseLevel == UCOL_OFF || doneCase == TRUE || strength != UCOL_SECONDARY) { + } else if(ucol_getAttribute(coll, UCOL_CASE_LEVEL, &errorCode) == UCOL_OFF || doneCase == TRUE || strength != UCOL_SECONDARY) { strength ++; } if (*currentSk) { uprv_appendByteToHexString(current, *currentSk++); /* This should print '01' */ } - if(strength == UCOL_QUATERNARY && coll->alternateHandling == UCOL_NON_IGNORABLE) { + if(strength == UCOL_QUATERNARY && ucol_getAttribute(coll, UCOL_ALTERNATE_HANDLING, &errorCode) == UCOL_NON_IGNORABLE) { break; } } - if(coll->strength == UCOL_IDENTICAL) { + if(ucol_getStrength(coll) == UCOL_IDENTICAL) { uprv_strcat(current, " . "); while(*currentSk != 0) { uprv_appendByteToHexString(current, *currentSk++); @@ -214,7 +214,7 @@ UBool hasCollationElements(const char *locName) { UErrorCode status = U_ZERO_ERROR; - UResourceBundle *loc = ures_open(U_ICUDATA_COLL, locName, &status);; + UResourceBundle *loc = ures_open(U_ICUDATA_NAME U_TREE_SEPARATOR_STRING "coll", locName, &status);; if(U_SUCCESS(status)) { status = U_ZERO_ERROR; diff --git a/icu4c/source/test/cintltst/citertst.c b/icu4c/source/test/cintltst/citertst.c index 250ee7d2881..eb159e63b9b 100644 --- a/icu4c/source/test/cintltst/citertst.c +++ b/icu4c/source/test/cintltst/citertst.c @@ -1,6 +1,6 @@ /******************************************************************** * COPYRIGHT: - * Copyright (c) 1997-2009, International Business Machines Corporation and + * Copyright (c) 1997-2010, International Business Machines Corporation and * others. All Rights Reserved. ********************************************************************/ /******************************************************************************** @@ -994,11 +994,6 @@ static void TestSmallBuffer() free(orders); ucol_reset(testiter); - /* ensures that the writable buffer was cleared */ - if (testiter->iteratordata_.writableBuffer != - testiter->iteratordata_.stackWritableBuffer) { - log_err("Error Writable buffer in collation element iterator not reset\n"); - } /* ensures closing of elements done properly to clear writable buffer */ ucol_next(testiter, &status); diff --git a/icu4c/source/test/cintltst/cmsccoll.c b/icu4c/source/test/cintltst/cmsccoll.c index 1dc28ff0b60..f9364e39578 100644 --- a/icu4c/source/test/cintltst/cmsccoll.c +++ b/icu4c/source/test/cintltst/cmsccoll.c @@ -1,6 +1,6 @@ /******************************************************************** * COPYRIGHT: - * Copyright (c) 2001-2009, International Business Machines Corporation and + * Copyright (c) 2001-2010, International Business Machines Corporation and * others. All Rights Reserved. ********************************************************************/ /******************************************************************************* @@ -1093,7 +1093,7 @@ static void testCEs(UCollator *coll, UErrorCode *status) { UColOptionSet opts; UParseError parseError; UChar *rulesCopy = NULL; - collIterate c; + collIterate *c = uprv_new_collIterate(status); UCAConstants *consts = NULL; uint32_t UCOL_RESET_TOP_VALUE, /*UCOL_RESET_TOP_CONT, */ UCOL_NEXT_TOP_VALUE, UCOL_NEXT_TOP_CONT; @@ -1102,12 +1102,15 @@ static void testCEs(UCollator *coll, UErrorCode *status) { if (U_FAILURE(*status)) { log_err("Could not open root collator %s\n", u_errorName(*status)); + uprv_delete_collIterate(c); return; } colLoc = ucol_getLocaleByType(coll, ULOC_ACTUAL_LOCALE, status); if (U_FAILURE(*status)) { log_err("Could not get collator name: %s\n", u_errorName(*status)); + ucol_close(UCA); + uprv_delete_collIterate(c); return; } @@ -1183,15 +1186,15 @@ static void testCEs(UCollator *coll, UErrorCode *status) { varT = (UBool)((specs & UCOL_TOK_VARIABLE_TOP) != 0); top_ = (UBool)((specs & UCOL_TOK_TOP) != 0); - uprv_init_collIterate(coll, rulesCopy+chOffset, chLen, &c); + uprv_init_collIterate(coll, rulesCopy+chOffset, chLen, c, status); - currCE = ucol_getNextCE(coll, &c, status); + currCE = ucol_getNextCE(coll, c, status); if(currCE == 0 && UCOL_ISTHAIPREVOWEL(*(rulesCopy+chOffset))) { log_verbose("Thai prevowel detected. Will pick next CE\n"); - currCE = ucol_getNextCE(coll, &c, status); + currCE = ucol_getNextCE(coll, c, status); } - currContCE = ucol_getNextCE(coll, &c, status); + currContCE = ucol_getNextCE(coll, c, status); if(!isContinuation(currContCE)) { currContCE = 0; } @@ -1272,6 +1275,7 @@ static void testCEs(UCollator *coll, UErrorCode *status) { free(rulesCopy); } ucol_close(UCA); + uprv_delete_collIterate(c); } #if 0 @@ -2992,10 +2996,11 @@ static void TestVariableTopSetting(void) { uint32_t CE = UCOL_NO_MORE_CES; /* before we start screaming, let's see if there is a problem with the rules */ - collIterate s; - uprv_init_collIterate(coll, rulesCopy+oldChOffset, oldChLen, &s); + UErrorCode collIterateStatus = U_ZERO_ERROR; + collIterate *s = uprv_new_collIterate(&collIterateStatus); + uprv_init_collIterate(coll, rulesCopy+oldChOffset, oldChLen, s, &collIterateStatus); - CE = ucol_getNextCE(coll, &s, &status); + CE = ucol_getNextCE(coll, s, &status); for(i = 0; i < oldChLen; i++) { j = sprintf(buf, "%04X ", *(rulesCopy+oldChOffset+i)); @@ -3004,7 +3009,7 @@ static void TestVariableTopSetting(void) { if(status == U_PRIMARY_TOO_LONG_ERROR) { log_verbose("= Expected failure for %s =", buffer); } else { - if(s.pos == s.endp) { + if(uprv_collIterateAtEnd(s)) { log_err("Unexpected failure setting variable top at offset %d. Error %s. Codepoints: %s\n", oldChOffset, u_errorName(status), buffer); } else { @@ -3012,6 +3017,7 @@ static void TestVariableTopSetting(void) { buffer); } } + uprv_delete_collIterate(s); } varTop2 = ucol_getVariableTop(coll, &status); if((varTop1 & 0xFFFF0000) != (varTop2 & 0xFFFF0000)) { diff --git a/icu4c/source/test/cintltst/cnormtst.c b/icu4c/source/test/cintltst/cnormtst.c index 2d6d4d53025..bc61654d5f4 100644 --- a/icu4c/source/test/cintltst/cnormtst.c +++ b/icu4c/source/test/cintltst/cnormtst.c @@ -1,6 +1,6 @@ /******************************************************************** * COPYRIGHT: - * Copyright (c) 1997-2009, International Business Machines Corporation and + * Copyright (c) 1997-2010, International Business Machines Corporation and * others. All Rights Reserved. ********************************************************************/ /******************************************************************************** @@ -1334,17 +1334,6 @@ TestNextPrevious() { log_err("error unorm_next(U_MISPLACED_QUANTIFIER) %s\n", u_errorName(errorCode)); return; } - - /* missing pErrorCode */ - buffer[0]=5; - iter.index=1; - length=unorm_next(&iter, buffer, sizeof(buffer)/U_SIZEOF_UCHAR, - UNORM_NFD, 0, TRUE, NULL, - NULL); - if(iter.index!=1 || buffer[0]!=5) { - log_err("error unorm_next(pErrorCode==NULL) %s\n", u_errorName(errorCode)); - return; - } } static void diff --git a/icu4c/source/test/cintltst/cucdtst.c b/icu4c/source/test/cintltst/cucdtst.c index dff63adfc83..643d8c51dd2 100644 --- a/icu4c/source/test/cintltst/cucdtst.c +++ b/icu4c/source/test/cintltst/cucdtst.c @@ -1,6 +1,6 @@ /******************************************************************** * COPYRIGHT: - * Copyright (c) 1997-2009, International Business Machines Corporation and + * Copyright (c) 1997-2010, International Business Machines Corporation and * others. All Rights Reserved. ********************************************************************/ /******************************************************************************* @@ -22,6 +22,7 @@ #include "unicode/putil.h" #include "unicode/ustring.h" #include "unicode/uloc.h" +#include "unicode/unorm2.h" #include "cintltst.h" #include "putilimp.h" @@ -2942,6 +2943,7 @@ TestConsistency() { UErrorCode errorCode; #if !UCONFIG_NO_NORMALIZATION + const UNormalizer2 *norm2; USerializedSet sset; #endif UChar32 start, end; @@ -3070,15 +3072,26 @@ TestConsistency() { * In general, the set for the middle such character should be a subset * of the set for the first. */ + errorCode=U_ZERO_ERROR; + norm2=unorm2_getInstance(NULL, "nfc", UNORM2_DECOMPOSE, &errorCode); + if(U_FAILURE(errorCode)) { + log_data_err("unorm2_getInstance(NFD) failed - %s\n", u_errorName(errorCode)); + return; + } + set1=uset_open(1, 0); set2=uset_open(1, 0); if (unorm_getCanonStartSet(0x49, &sset)) { + UChar source[1]; + _setAddSerialized(set1, &sset); /* enumerate all characters that are plausible to be latin letters */ for(start=0xa0; start<0x2000; ++start) { - if(unorm_getDecomposition(start, FALSE, buffer16, LENGTHOF(buffer16))>1 && buffer16[0]==0x49) { + source[0]=(UChar)start; + length=unorm2_normalize(norm2, source, 1, buffer16, LENGTHOF(buffer16), &errorCode); + if(length>1 && buffer16[0]==0x49) { uset_add(set2, start); } } diff --git a/icu4c/source/test/hdrtst/cxxfiles.txt b/icu4c/source/test/hdrtst/cxxfiles.txt index b0c0bf4b011..a58d18bf20d 100644 --- a/icu4c/source/test/hdrtst/cxxfiles.txt +++ b/icu4c/source/test/hdrtst/cxxfiles.txt @@ -1,4 +1,4 @@ -# Copyright (c) 2001-2009 International Business Machines +# Copyright (c) 2001-20109 International Business Machines # Corporation and others. All Rights Reserved. # common & i18n bidi.h @@ -38,6 +38,7 @@ measfmt.h measunit.h measure.h msgfmt.h +normalizer2.h normlzr.h numfmt.h numsys.h diff --git a/icu4c/source/test/intltest/tstnorm.cpp b/icu4c/source/test/intltest/tstnorm.cpp index fa1771db0eb..3c9b3713aef 100644 --- a/icu4c/source/test/intltest/tstnorm.cpp +++ b/icu4c/source/test/intltest/tstnorm.cpp @@ -1,6 +1,6 @@ /******************************************************************** * COPYRIGHT: - * Copyright (c) 1997-2009, International Business Machines Corporation and + * Copyright (c) 1997-2010, International Business Machines Corporation and * others. All Rights Reserved. ********************************************************************/ @@ -776,38 +776,10 @@ void BasicNormalizerTest::TestConcatenate() { }, /* ### TODO: add more interesting cases */ { - "D", - "\\u0340\\u0341\\u0343\\u0344\\u0374\\u037E\\u0387\\u0958" - "\\u0959\\u095A\\u095B\\u095C\\u095D\\u095E\\u095F\\u09DC" - "\\u09DD\\u09DF\\u0A33\\u0A36\\u0A59\\u0A5A\\u0A5B\\u0A5E" - "\\u0B5C\\u0B5D\\u0F43\\u0F4D\\u0F52\\u0F57\\u0F5C\\u0F69" - "\\u0F73\\u0F75\\u0F76\\u0F78\\u0F81\\u0F93\\u0F9D\\u0FA2" - "\\u0FA7\\u0FAC\\u0FB9\\u1F71\\u1F73\\u1F75\\u1F77\\u1F79" - "\\u1F7B\\u1F7D\\u1FBB\\u1FBE\\u1FC9\\u1FCB\\u1FD3\\u1FDB", - - "\\u1FE3\\u1FEB\\u1FEE\\u1FEF\\u1FF9\\u1FFB\\u1FFD\\u2000" - "\\u2001\\u2126\\u212A\\u212B\\u2329\\u232A\\uF900\\uFA10" - "\\uFA12\\uFA15\\uFA20\\uFA22\\uFA25\\uFA26\\uFA2A\\uFB1F" - "\\uFB2A\\uFB2B\\uFB2C\\uFB2D\\uFB2E\\uFB2F\\uFB30\\uFB31" - "\\uFB32\\uFB33\\uFB34\\uFB35\\uFB36\\uFB38\\uFB39\\uFB3A" - "\\uFB3B\\uFB3C\\uFB3E\\uFB40\\uFB41\\uFB43\\uFB44\\uFB46" - "\\uFB47\\uFB48\\uFB49\\uFB4A\\uFB4B\\uFB4C\\uFB4D\\uFB4E", - - "\\u0340\\u0341\\u0343\\u0344\\u0374\\u037E\\u0387\\u0958" - "\\u0959\\u095A\\u095B\\u095C\\u095D\\u095E\\u095F\\u09DC" - "\\u09DD\\u09DF\\u0A33\\u0A36\\u0A59\\u0A5A\\u0A5B\\u0A5E" - "\\u0B5C\\u0B5D\\u0F43\\u0F4D\\u0F52\\u0F57\\u0F5C\\u0F69" - "\\u0F73\\u0F75\\u0F76\\u0F78\\u0F81\\u0F93\\u0F9D\\u0FA2" - "\\u0FA7\\u0FAC\\u0FB9\\u1F71\\u1F73\\u1F75\\u1F77\\u1F79" - "\\u1F7B\\u1F7D\\u1FBB\\u1FBE\\u1FC9\\u1FCB\\u1FD3\\u0399" - "\\u0301\\u03C5\\u0308\\u0301\\u1FEB\\u1FEE\\u1FEF\\u1FF9" - "\\u1FFB\\u1FFD\\u2000\\u2001\\u2126\\u212A\\u212B\\u2329" - "\\u232A\\uF900\\uFA10\\uFA12\\uFA15\\uFA20\\uFA22\\uFA25" - "\\uFA26\\uFA2A\\uFB1F\\uFB2A\\uFB2B\\uFB2C\\uFB2D\\uFB2E" - "\\uFB2F\\uFB30\\uFB31\\uFB32\\uFB33\\uFB34\\uFB35\\uFB36" - "\\uFB38\\uFB39\\uFB3A\\uFB3B\\uFB3C\\uFB3E\\uFB40\\uFB41" - "\\uFB43\\uFB44\\uFB46\\uFB47\\uFB48\\uFB49\\uFB4A\\uFB4B" - "\\uFB4C\\uFB4D\\uFB4E" + "D", + "\\u03B1\\u0345", + "\\u0C4D\\U000110BA\\U0001D169", + "\\u03B1\\U0001D169\\U000110BA\\u0C4D\\u0345" } }; @@ -1743,72 +1715,23 @@ U_CDECL_END void BasicNormalizerTest::TestSkippable() { - UnicodeSet starts, diff, skipSets[UNORM_MODE_COUNT], expectSets[UNORM_MODE_COUNT]; - UnicodeSet *startsPtr = &starts; + UnicodeSet diff, skipSets[UNORM_MODE_COUNT], expectSets[UNORM_MODE_COUNT]; UnicodeString s, pattern; - UChar32 start, limit, rangeStart, rangeEnd; - int32_t i, range, count; - - UErrorCode status; /* build NF*Skippable sets from runtime data */ - status=U_ZERO_ERROR; - USetAdder sa = { - (USet *)startsPtr, - _set_add, - _set_addRange, - _set_addString, - NULL, // don't need remove() - NULL - }; - unorm_addPropertyStarts(&sa, &status); - if(U_FAILURE(status)) { - errln("unable to load normalization data for unorm_addPropertyStarts(() - %s\n", u_errorName(status)); + IcuTestErrorCode errorCode(*this, "TestSkippable"); + skipSets[UNORM_NFD].applyPattern(UNICODE_STRING_SIMPLE("[:NFD_Inert:]"), errorCode); + skipSets[UNORM_NFKD].applyPattern(UNICODE_STRING_SIMPLE("[:NFKD_Inert:]"), errorCode); + skipSets[UNORM_NFC].applyPattern(UNICODE_STRING_SIMPLE("[:NFC_Inert:]"), errorCode); + skipSets[UNORM_NFKC].applyPattern(UNICODE_STRING_SIMPLE("[:NFKC_Inert:]"), errorCode); + if(errorCode.logIfFailureAndReset("UnicodeSet(NF..._Inert) failed")) { return; } - count=starts.getRangeCount(); - - start=limit=0; - rangeStart=rangeEnd=0; - range=0; - for(;;) { - if(startrangeEnd) { - if(rangeputi(UnicodeString(ignorePropNames[i], -1, US_INV), 1, errorCode); + } } UnicodeTest::~UnicodeTest() @@ -76,7 +93,7 @@ getTokenIndex(const char *const tokens[], int32_t countTokens, const char *s) { } static const char *const -derivedCorePropsNames[]={ +derivedPropsNames[]={ "Math", "Alphabetic", "Lowercase", @@ -86,6 +103,7 @@ derivedCorePropsNames[]={ "XID_Start", "XID_Continue", "Default_Ignorable_Code_Point", + "Full_Composition_Exclusion", "Grapheme_Extend", "Grapheme_Link", /* Unicode 5 moves this property here from PropList.txt */ "Grapheme_Base", @@ -95,11 +113,12 @@ derivedCorePropsNames[]={ "Changes_When_Uppercased", "Changes_When_Titlecased", "Changes_When_Casefolded", - "Changes_When_Casemapped" + "Changes_When_Casemapped", + "Changes_When_NFKC_Casefolded" }; static const UProperty -derivedCorePropsIndex[]={ +derivedPropsIndex[]={ UCHAR_MATH, UCHAR_ALPHABETIC, UCHAR_LOWERCASE, @@ -109,6 +128,7 @@ derivedCorePropsIndex[]={ UCHAR_XID_START, UCHAR_XID_CONTINUE, UCHAR_DEFAULT_IGNORABLE_CODE_POINT, + UCHAR_FULL_COMPOSITION_EXCLUSION, UCHAR_GRAPHEME_EXTEND, UCHAR_GRAPHEME_LINK, UCHAR_GRAPHEME_BASE, @@ -118,17 +138,18 @@ derivedCorePropsIndex[]={ UCHAR_CHANGES_WHEN_UPPERCASED, UCHAR_CHANGES_WHEN_TITLECASED, UCHAR_CHANGES_WHEN_CASEFOLDED, - UCHAR_CHANGES_WHEN_CASEMAPPED + UCHAR_CHANGES_WHEN_CASEMAPPED, + UCHAR_CHANGES_WHEN_NFKC_CASEFOLDED }; -static int32_t numErrors[LENGTHOF(derivedCorePropsIndex)]={ 0 }; +static int32_t numErrors[LENGTHOF(derivedPropsIndex)]={ 0 }; enum { MAX_ERRORS=50 }; U_CFUNC void U_CALLCONV -derivedCorePropsLineFn(void *context, - char *fields[][2], int32_t /* fieldCount */, - UErrorCode *pErrorCode) +derivedPropsLineFn(void *context, + char *fields[][2], int32_t /* fieldCount */, + UErrorCode *pErrorCode) { UnicodeTest *me=(UnicodeTest *)context; uint32_t start, end; @@ -136,35 +157,35 @@ derivedCorePropsLineFn(void *context, u_parseCodePointRange(fields[0][0], &start, &end, pErrorCode); if(U_FAILURE(*pErrorCode)) { - me->errln("UnicodeTest: syntax error in DerivedCoreProperties.txt field 0 at %s\n", fields[0][0]); + me->errln("UnicodeTest: syntax error in DerivedCoreProperties.txt or DerivedNormalizationProps.txt field 0 at %s\n", fields[0][0]); return; } /* parse derived binary property name, ignore unknown names */ - i=getTokenIndex(derivedCorePropsNames, LENGTHOF(derivedCorePropsNames), fields[1][0]); + i=getTokenIndex(derivedPropsNames, LENGTHOF(derivedPropsNames), fields[1][0]); if(i<0) { UnicodeString propName(fields[1][0], (int32_t)(fields[1][1]-fields[1][0])); propName.trim(); if(me->unknownPropertyNames->find(propName)==NULL) { UErrorCode errorCode=U_ZERO_ERROR; me->unknownPropertyNames->puti(propName, 1, errorCode); - me->errln("UnicodeTest warning: unknown property name '%s' in DerivedCoreProperties.txt\n", fields[1][0]); + me->errln("UnicodeTest warning: unknown property name '%s' in DerivedCoreProperties.txt or DerivedNormalizationProps.txt\n", fields[1][0]); } return; } - me->derivedCoreProps[i].add(start, end); + me->derivedProps[i].add(start, end); } void UnicodeTest::TestAdditionalProperties() { - // test DerivedCoreProperties.txt - if(LENGTHOF(derivedCoreProps)=MAX_ERRORS) { errln("Too many errors, moving to the next test"); break; @@ -224,19 +254,19 @@ void UnicodeTest::TestAdditionalProperties() { } // invert all properties - for(i=0; i=MAX_ERRORS) { errln("Too many errors, moving to the next test"); break; diff --git a/icu4c/source/test/intltest/ucdtest.h b/icu4c/source/test/intltest/ucdtest.h index 63ba382989a..97cfab285cd 100644 --- a/icu4c/source/test/intltest/ucdtest.h +++ b/icu4c/source/test/intltest/ucdtest.h @@ -1,6 +1,6 @@ /******************************************************************** * COPYRIGHT: - * Copyright (c) 1997-2009, International Business Machines Corporation and + * Copyright (c) 1997-2010, International Business Machines Corporation and * others. All Rights Reserved. ********************************************************************/ @@ -13,9 +13,9 @@ U_CFUNC void U_CALLCONV unicodeDataLineFn(void *context, UErrorCode *pErrorCode); U_CFUNC void U_CALLCONV -derivedCorePropsLineFn(void *context, - char *fields[][2], int32_t fieldCount, - UErrorCode *pErrorCode); +derivedPropsLineFn(void *context, + char *fields[][2], int32_t fieldCount, + UErrorCode *pErrorCode); U_NAMESPACE_BEGIN @@ -43,11 +43,11 @@ private: UErrorCode *pErrorCode); friend void U_CALLCONV - derivedCorePropsLineFn(void *context, + derivedPropsLineFn(void *context, char *fields[][2], int32_t fieldCount, UErrorCode *pErrorCode); - UnicodeSet derivedCoreProps[30]; + UnicodeSet derivedProps[30]; U_NAMESPACE_QUALIFIER Hashtable *unknownPropertyNames; }; diff --git a/icu4c/source/test/intltest/usettest.cpp b/icu4c/source/test/intltest/usettest.cpp index 12f5a22bdb4..f1db7c80274 100644 --- a/icu4c/source/test/intltest/usettest.cpp +++ b/icu4c/source/test/intltest/usettest.cpp @@ -1,6 +1,6 @@ /* ******************************************************************************** -* Copyright (C) 1999-2009 International Business Machines Corporation and +* Copyright (C) 1999-2010 International Business Machines Corporation and * others. All Rights Reserved. ******************************************************************************** * Date Name Description @@ -709,6 +709,37 @@ void UnicodeSetTest::TestAPI() { TEST_ASSERT((void *)constUSet == (void *)constSet); const UnicodeSet *constSetx = UnicodeSet::fromUSet(constUSet); TEST_ASSERT((void *)constSetx == (void *)constUSet); + + // span(UnicodeString) and spanBack(UnicodeString) convenience methods + UnicodeString longString=UNICODE_STRING_SIMPLE("aaaaaaaaaabbbbbbbbbbcccccccccc"); + UnicodeSet ac(0x61, 0x63); + ac.remove(0x62).freeze(); + if( ac.span(longString, -5, USET_SPAN_CONTAINED)!=10 || + ac.span(longString, 0, USET_SPAN_CONTAINED)!=10 || + ac.span(longString, 5, USET_SPAN_CONTAINED)!=10 || + ac.span(longString, 10, USET_SPAN_CONTAINED)!=10 || + ac.span(longString, 15, USET_SPAN_CONTAINED)!=15 || + ac.span(longString, 20, USET_SPAN_CONTAINED)!=30 || + ac.span(longString, 25, USET_SPAN_CONTAINED)!=30 || + ac.span(longString, 30, USET_SPAN_CONTAINED)!=30 || + ac.span(longString, 35, USET_SPAN_CONTAINED)!=30 || + ac.span(longString, INT32_MAX, USET_SPAN_CONTAINED)!=30 + ) { + errln("UnicodeSet.span(UnicodeString, ...) returns incorrect end indexes"); + } + if( ac.spanBack(longString, -5, USET_SPAN_CONTAINED)!=0 || + ac.spanBack(longString, 0, USET_SPAN_CONTAINED)!=0 || + ac.spanBack(longString, 5, USET_SPAN_CONTAINED)!=0 || + ac.spanBack(longString, 10, USET_SPAN_CONTAINED)!=0 || + ac.spanBack(longString, 15, USET_SPAN_CONTAINED)!=15 || + ac.spanBack(longString, 20, USET_SPAN_CONTAINED)!=20 || + ac.spanBack(longString, 25, USET_SPAN_CONTAINED)!=20 || + ac.spanBack(longString, 30, USET_SPAN_CONTAINED)!=20 || + ac.spanBack(longString, 35, USET_SPAN_CONTAINED)!=20 || + ac.spanBack(longString, INT32_MAX, USET_SPAN_CONTAINED)!=20 + ) { + errln("UnicodeSet.spanBack(UnicodeString, ...) returns incorrect start indexes"); + } } void UnicodeSetTest::TestIteration() { diff --git a/icu4c/source/test/intltest/ustrtest.cpp b/icu4c/source/test/intltest/ustrtest.cpp index 66c7b11651d..689052ab81c 100644 --- a/icu4c/source/test/intltest/ustrtest.cpp +++ b/icu4c/source/test/intltest/ustrtest.cpp @@ -1,6 +1,6 @@ /******************************************************************** * COPYRIGHT: - * Copyright (c) 1997-2009, International Business Machines Corporation and + * Copyright (c) 1997-2010, International Business Machines Corporation and * others. All Rights Reserved. ********************************************************************/ @@ -62,6 +62,7 @@ void UnicodeStringTest::runIndexedTest( int32_t index, UBool exec, const char* & case 17: name = "TestNameSpace"; if (exec) TestNameSpace(); break; case 18: name = "TestUTF32"; if (exec) TestUTF32(); break; case 19: name = "TestUTF8"; if (exec) TestUTF8(); break; + case 20: name = "TestReadOnlyAlias"; if (exec) TestReadOnlyAlias(); break; default: name = ""; break; //needed to end loop } @@ -1120,6 +1121,30 @@ UnicodeStringTest::TestMiscellaneous() if(test1.hasMetaData() || UnicodeString().hasMetaData()) { errln("UnicodeString::hasMetaData() returns TRUE"); } + + // test getTerminatedBuffer() on a truncated, shared, heap-allocated string + test1=UNICODE_STRING_SIMPLE("abcdefghijklmnopqrstuvwxyz0123456789."); + test1.truncate(36); // ensure length() - - - - - - - - + diff --git a/icu4c/source/tools/gennorm/gennorm.c b/icu4c/source/tools/gennorm/gennorm.c index 69a12e30db6..96874510770 100644 --- a/icu4c/source/tools/gennorm/gennorm.c +++ b/icu4c/source/tools/gennorm/gennorm.c @@ -1,7 +1,7 @@ /* ******************************************************************************* * -* Copyright (C) 2001-2005, International Business Machines +* Copyright (C) 2001-2010, International Business Machines * Corporation and others. All Rights Reserved. * ******************************************************************************* @@ -61,7 +61,8 @@ enum { UNICODE_VERSION, ICUDATADIR, CSOURCE, - STORE_FLAGS + STORE_FLAGS, + WRITE_NORM2 }; static UOption options[]={ @@ -74,7 +75,8 @@ static UOption options[]={ UOPTION_DEF("unicode", 'u', UOPT_REQUIRES_ARG), UOPTION_ICUDATADIR, UOPTION_DEF("csource", 'C', UOPT_NO_ARG), - UOPTION_DEF("prune", 'p', UOPT_REQUIRES_ARG) + UOPTION_DEF("prune", 'p', UOPT_REQUIRES_ARG), + UOPTION_DEF("write-norm2", '\1', UOPT_NO_ARG) }; extern int @@ -140,6 +142,8 @@ main(int argc, char* argv[]) { "\t to the source file basenames before opening;\n" "\t 'gennorm new' will read UnicodeData-new.txt etc.\n", u_getDataDirectory()); + fprintf(stderr, + "\t--write-norm2 write nfc.txt and nfkc.txt files for gennorm2\n"); return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR; } @@ -243,7 +247,7 @@ main(int argc, char* argv[]) { /* prepare the filename beginning with the source dir */ uprv_strcpy(filename, srcDir); basename=filename+uprv_strlen(filename); - if(basename>filename && *(basename-1)!=U_FILE_SEP_CHAR) { + if(basename>filename && *(basename-1)!=U_FILE_SEP_CHAR && *(basename-1)!=U_FILE_ALT_SEP_CHAR) { *basename++=U_FILE_SEP_CHAR; } @@ -286,6 +290,10 @@ main(int argc, char* argv[]) { /* process parsed data */ if(U_SUCCESS(errorCode)) { + if(options[WRITE_NORM2].doesOccur) { + writeNorm2(destDir); + } + processData(); /* write the properties data file */ diff --git a/icu4c/source/tools/gennorm/gennorm.h b/icu4c/source/tools/gennorm/gennorm.h index ea33d957098..aacadfd01bb 100644 --- a/icu4c/source/tools/gennorm/gennorm.h +++ b/icu4c/source/tools/gennorm/gennorm.h @@ -1,7 +1,7 @@ /* ******************************************************************************* * -* Copyright (C) 1999-2005, International Business Machines +* Copyright (C) 1999-2010, International Business Machines * Corporation and others. All Rights Reserved. * ******************************************************************************* @@ -83,6 +83,9 @@ setCompositionExclusion(uint32_t code); U_CFUNC void setFNC(uint32_t c, UChar *s); +extern void +writeNorm2(const char *dataDir); + extern void processData(void); diff --git a/icu4c/source/tools/gennorm/gennorm.vcproj b/icu4c/source/tools/gennorm/gennorm.vcproj index a57114ba54d..5d99c54ac35 100644 --- a/icu4c/source/tools/gennorm/gennorm.vcproj +++ b/icu4c/source/tools/gennorm/gennorm.vcproj @@ -389,33 +389,18 @@ - - - - - - - + - - - - + - + diff --git a/icu4c/source/tools/gennorm/store.c b/icu4c/source/tools/gennorm/store.c index c48403b3083..690239be067 100644 --- a/icu4c/source/tools/gennorm/store.c +++ b/icu4c/source/tools/gennorm/store.c @@ -1,7 +1,7 @@ /* ******************************************************************************* * -* Copyright (C) 1999-2009, International Business Machines +* Copyright (C) 1999-2010, International Business Machines * Corporation and others. All Rights Reserved. * ******************************************************************************* @@ -106,11 +106,13 @@ static UToolMemory *normMem, *utf32Mem, *extraMem, *combiningTriplesMem; static Norm *norms; +#if GENNORM_OBSOLETE /* * set a flag for each code point that was seen in decompositions - * avoid to decompose ones that have not been used before */ static uint32_t haveSeenFlags[256]; +#endif /* set of characters with NFD_QC=No (i.e., those with canonical decompositions) */ static USet *nfdQCNoSet; @@ -192,8 +194,10 @@ init() { /* allocate UTF-32 string memory */ utf32Mem=utm_open("gennorm UTF-32 strings", 30000, 30000, 4); +#if GENNORM_OBSOLETE /* reset all "have seen" flags */ uprv_memset(haveSeenFlags, 0, sizeof(haveSeenFlags)); +#endif /* open an empty set */ nfdQCNoSet=uset_open(1, 0); @@ -289,6 +293,7 @@ enumTrie(EnumTrieFn *fn, void *context) { return count; } +#if GENNORM_OBSOLETE static void setHaveSeenString(const uint32_t *s, int32_t length) { uint32_t c; @@ -301,6 +306,7 @@ setHaveSeenString(const uint32_t *s, int32_t length) { } #define HAVE_SEEN(c) (haveSeenFlags[((c)>>5)&0xff]&(1<<((c)&0x1f))) +#endif /* handle combining data ---------------------------------------------------- */ @@ -410,6 +416,7 @@ findCombiningCP(uint32_t code, UBool isLead) { return 0xffff; } +#if GENNORM_OBSOLETE static void addCombiningTriple(uint32_t lead, uint32_t trail, uint32_t combined) { CombiningTriple *triple; @@ -434,6 +441,7 @@ addCombiningTriple(uint32_t lead, uint32_t trail, uint32_t combined) { triple->trail=trail; triple->combined=combined; } +#endif static int compareTriples(const void *l, const void *r) { @@ -560,6 +568,7 @@ processCombining() { /* processing incoming normalization data ----------------------------------- */ +#if GENNORM_OBSOLETE /* * Decompose Hangul syllables algorithmically and fill a pseudo-Norm struct. * c must be a Hangul syllable code point. @@ -594,6 +603,7 @@ getHangulDecomposition(uint32_t c, Norm *pHangulNorm, uint32_t hangulBuffer[3]) pHangulNorm->lenNFKD=length; } } +#endif /* * decompose the one decomposition further, may generate two decompositions @@ -601,6 +611,20 @@ getHangulDecomposition(uint32_t c, Norm *pHangulNorm, uint32_t hangulBuffer[3]) */ static void decompStoreNewNF(uint32_t code, Norm *norm) { +#if !GENNORM_OBSOLETE + /* always allocate the original string */ + uint32_t *s32; + uint8_t length; + if((length=norm->lenNFD)!=0) { + s32=utm_allocN(utf32Mem, norm->lenNFD); + uprv_memcpy(s32, norm->nfd, norm->lenNFD*4); + norm->nfd=s32; + } else if((length=norm->lenNFKD)!=0) { + s32=utm_allocN(utf32Mem, norm->lenNFKD); + uprv_memcpy(s32, norm->nfkd, norm->lenNFKD*4); + norm->nfkd=s32; + } +#else uint32_t nfd[40], nfkd[40], hangulBuffer[3]; Norm hangulNorm; @@ -695,8 +719,10 @@ decompStoreNewNF(uint32_t code, Norm *norm) { norm->nfkd=s32; setHaveSeenString(nfkd, lenNFKD); } +#endif } +#if GENNORM_OBSOLETE typedef struct DecompSingle { uint32_t c; Norm *norm; @@ -800,6 +826,7 @@ decompWithSingleFn(void *context, uint32_t code, Norm *norm) { norm->nfkd=s32; } } +#endif /* * process the data for one code point listed in UnicodeData; @@ -807,7 +834,9 @@ decompWithSingleFn(void *context, uint32_t code, Norm *norm) { */ extern void storeNorm(uint32_t code, Norm *norm) { +#if GENNORM_OBSOLETE DecompSingle decompSingle; +#endif Norm *p; if(DO_NOT_STORE(UGENNORM_STORE_COMPAT)) { @@ -826,6 +855,7 @@ storeNorm(uint32_t code, Norm *norm) { /* decompose this one decomposition further, may generate two decompositions */ decompStoreNewNF(code, norm); +#if GENNORM_OBSOLETE /* has this code point been used in previous decompositions? */ if(HAVE_SEEN(code)) { /* use this decomposition to decompose other decompositions further */ @@ -833,6 +863,7 @@ storeNorm(uint32_t code, Norm *norm) { decompSingle.norm=norm; enumTrie(decompWithSingleFn, &decompSingle); } +#endif } /* store the data */ @@ -1815,6 +1846,144 @@ getFoldingAuxOffset(uint32_t data) { #endif /* #if !UCONFIG_NO_NORMALIZATION */ +static void +writeAllCC(FILE *f) { + uint32_t i; + UChar32 prevCode, code; + uint8_t prevCC, cc; + UBool isInBlockZero; + + fprintf(f, "# Canonical_Combining_Class (ccc) values\n"); + prevCode=0; + prevCC=0; + for(code=0; code<=0x110000;) { + if(code==0x110000) { + cc=0; + } else { + i=utrie_get32(normTrie, code, &isInBlockZero); + if(i==0 || isInBlockZero) { + cc=0; + } else { + cc=norms[i].udataCC; + } + } + if(prevCC!=cc) { + if(prevCC!=0) { + uint32_t lastCode=code-1; + if(prevCode==lastCode) { + fprintf(f, "%04lX:%d\n", (long)lastCode, prevCC); + } else { + fprintf(f, "%04lX..%04lX:%d\n", + (long)prevCode, (long)lastCode, prevCC); + } + } + prevCode=code; + prevCC=cc; + } + if(isInBlockZero) { + code+=UTRIE_DATA_BLOCK_LENGTH; + } else { + ++code; + } + } +} + +static UBool +hasMapping(uint32_t code) { + Norm *norm=norms+utrie_get32(normTrie, code, NULL); + return norm->lenNFD!=0 || norm->lenNFKD!=0; +} + +static UBool +hasOneWayMapping(uint32_t code, UBool withCompat) { + for(;;) { + Norm *norm=norms+utrie_get32(normTrie, code, NULL); + uint8_t length; + if((length=norm->lenNFD)!=0) { + /* + * The canonical decomposition is a one-way mapping if + * - it does not map to exactly two code points + * - the code has ccc!=0 + * - the code has the Composition_Exclusion property + * - its starter has a one-way mapping (loop for this) + * - its non-starter decomposes + */ + if( length!=2 || + norm->udataCC!=0 || + norm->combiningFlags&0x80 || + hasMapping(norm->nfd[1]) + ) { + return TRUE; + } + code=norm->nfd[0]; /* continue */ + } else if(withCompat && norm->lenNFKD!=0) { + return TRUE; + } else { + return FALSE; + } + } +} + +static void +writeAllMappings(FILE *f, UBool withCompat) { + uint32_t i, code; + UBool isInBlockZero; + + if(withCompat) { + fprintf(f, "\n# Canonical and compatibility decomposition mappings\n"); + } else { + fprintf(f, "\n# Canonical decomposition mappings\n"); + } + for(code=0; code<=0x10ffff;) { + i=utrie_get32(normTrie, code, &isInBlockZero); + if(isInBlockZero) { + code+=UTRIE_DATA_BLOCK_LENGTH; + } else { + if(i!=0) { + uint32_t *s32; + uint8_t length; + char separator; + if((length=norms[i].lenNFD)!=0) { + s32=norms[i].nfd; + separator= hasOneWayMapping(code, withCompat) ? '>' : '='; + } else if(withCompat && (length=norms[i].lenNFKD)!=0) { + s32=norms[i].nfkd; + separator='>'; + } + if(length!=0) { + uint8_t j; + fprintf(f, "%04lX%c", (long)code, separator); + for(j=0; j +#include +#include +#include +#include "unicode/utypes.h" +#include "unicode/errorcode.h" +#include "unicode/localpointer.h" +#include "unicode/putil.h" +#include "unicode/uchar.h" +#include "unicode/unistr.h" +#include "n2builder.h" +#include "normalizer2impl.h" +#include "toolutil.h" +#include "uoptions.h" +#include "uparse.h" + +#if UCONFIG_NO_NORMALIZATION +#include "unewdata.h" +#endif + +#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) + +U_NAMESPACE_BEGIN + +UBool beVerbose=FALSE, haveCopyright=TRUE; + +U_DEFINE_LOCAL_OPEN_POINTER(LocalStdioFilePointer, FILE, fclose); + +#if !UCONFIG_NO_NORMALIZATION +void parseFile(FILE *f, Normalizer2DataBuilder &builder); +#endif + +/* -------------------------------------------------------------------------- */ + +enum { + HELP_H, + HELP_QUESTION_MARK, + VERBOSE, + COPYRIGHT, + SOURCEDIR, + OUTPUT_FILENAME, + UNICODE_VERSION +}; + +static UOption options[]={ + UOPTION_HELP_H, + UOPTION_HELP_QUESTION_MARK, + UOPTION_VERBOSE, + UOPTION_COPYRIGHT, + UOPTION_SOURCEDIR, + UOPTION_DEF("output", 'o', UOPT_REQUIRES_ARG), + UOPTION_DEF("unicode", 'u', UOPT_REQUIRES_ARG) +}; + +extern "C" int +main(int argc, char* argv[]) { + U_MAIN_INIT_ARGS(argc, argv); + + /* preset then read command line options */ + options[SOURCEDIR].value=""; + options[UNICODE_VERSION].value=U_UNICODE_VERSION; + argc=u_parseArgs(argc, argv, sizeof(options)/sizeof(options[HELP_H]), options); + + /* error handling, printing usage message */ + if(argc<0) { + fprintf(stderr, + "error in command line argument \"%s\"\n", + argv[-argc]); + } + if(!options[OUTPUT_FILENAME].doesOccur) { + argc=-1; + } + if( argc<2 || + options[HELP_H].doesOccur || options[HELP_QUESTION_MARK].doesOccur + ) { + /* + * Broken into chunks because the C89 standard says the minimum + * required supported string length is 509 bytes. + */ + fprintf(stderr, + "Usage: %s [-options] infiles+ -o outputfilename\n" + "\n" + "Reads the infiles with normalization data and\n" + "creates a binary file (outputfilename) with the data.\n" + "\n", + argv[0]); + fprintf(stderr, + "Options:\n" + "\t-h or -? or --help this usage text\n" + "\t-v or --verbose verbose output\n" + "\t-c or --copyright include a copyright notice\n" + "\t-u or --unicode Unicode version, followed by the version like 5.2.0\n"); + fprintf(stderr, + "\t-s or --sourcedir source directory, followed by the path\n" + "\t-o or --output output filename\n"); + return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR; + } + + beVerbose=options[VERBOSE].doesOccur; + haveCopyright=options[COPYRIGHT].doesOccur; + + IcuToolErrorCode errorCode("gennorm2/main()"); + +#if UCONFIG_NO_NORMALIZATION + + fprintf(stderr, + "gennorm2 writes a dummy binary data file " + "because UCONFIG_NO_NORMALIZATION is set, \n" + "see icu/source/common/unicode/uconfig.h\n"); + udata_createDummy(NULL, NULL, options[OUTPUT_FILENAME].value, errorCode); + return U_UNSUPPORTED_ERROR; + +#else + + LocalPointer builder(new Normalizer2DataBuilder(errorCode)); + errorCode.assertSuccess(); + + builder->setUnicodeVersion(options[UNICODE_VERSION].value); + + // prepare the filename beginning with the source dir + std::string filename(options[SOURCEDIR].value); + int32_t pathLength=filename.length(); + if( pathLength>0 && + filename[pathLength-1]!=U_FILE_SEP_CHAR && + filename[pathLength-1]!=U_FILE_ALT_SEP_CHAR + ) { + filename.push_back(U_FILE_SEP_CHAR); + pathLength=filename.length(); + } + + for(int i=1; isetOverrideHandling(Normalizer2DataBuilder::OVERRIDE_PREVIOUS); + parseFile(f.getAlias(), *builder); + filename.erase(pathLength); + } + + builder->writeBinaryFile(options[OUTPUT_FILENAME].value); + + return errorCode.get(); + +#endif +} + +#if !UCONFIG_NO_NORMALIZATION + +void parseFile(FILE *f, Normalizer2DataBuilder &builder) { + IcuToolErrorCode errorCode("gennorm2/parseFile()"); + char line[300]; + uint32_t startCP, endCP; + while(NULL!=fgets(line, (int)sizeof(line), f)) { + char *comment=(char *)strchr(line, '#'); + if(comment!=NULL) { + *comment=0; + } + u_rtrim(line); + if(line[0]==0) { + continue; // skip empty and comment-only lines + } + if(line[0]=='*') { + continue; // reserved syntax + } + const char *delimiter; + int32_t rangeLength= + u_parseCodePointRangeAnyTerminator(line, &startCP, &endCP, &delimiter, errorCode); + if(errorCode.isFailure()) { + fprintf(stderr, "gennorm2 error: parsing code point range from %s\n", line); + exit(errorCode.reset()); + } + delimiter=u_skipWhitespace(delimiter); + if(*delimiter==':') { + const char *s=u_skipWhitespace(delimiter+1); + char *end; + unsigned long value=strtoul(s, &end, 10); + if(end<=s || *u_skipWhitespace(end)!=0 || value>=0xff) { + fprintf(stderr, "gennorm2 error: parsing ccc from %s\n", line); + exit(U_PARSE_ERROR); + } + for(UChar32 c=(UChar32)startCP; c<=(UChar32)endCP; ++c) { + builder.setCC(c, (uint8_t)value); + } + continue; + } + if(*delimiter=='-') { + if(*u_skipWhitespace(delimiter+1)!=0) { + fprintf(stderr, "gennorm2 error: parsing remove-mapping %s\n", line); + exit(U_PARSE_ERROR); + } + for(UChar32 c=(UChar32)startCP; c<=(UChar32)endCP; ++c) { + builder.removeMapping(c); + } + continue; + } + if(*delimiter=='=' || *delimiter=='>') { + UChar uchars[Normalizer2Impl::MAPPING_LENGTH_MASK]; + int32_t length=u_parseString(delimiter+1, uchars, LENGTHOF(uchars), NULL, errorCode); + if(errorCode.isFailure()) { + fprintf(stderr, "gennorm2 error: parsing mapping string from %s\n", line); + exit(errorCode.reset()); + } + UnicodeString mapping(FALSE, uchars, length); + if(*delimiter=='=') { + if(rangeLength!=1) { + fprintf(stderr, + "gennorm2 error: round-trip mapping for more than 1 code point on %s\n", + line); + exit(U_PARSE_ERROR); + } + builder.setRoundTripMapping((UChar32)startCP, mapping); + } else { + for(UChar32 c=(UChar32)startCP; c<=(UChar32)endCP; ++c) { + builder.setOneWayMapping(c, mapping); + } + } + continue; + } + fprintf(stderr, "gennorm2 error: unrecognized data line %s\n", line); + exit(U_PARSE_ERROR); + } +} + +#endif // !UCONFIG_NO_NORMALIZATION + +U_NAMESPACE_END + +/* + * Hey, Emacs, please set the following: + * + * Local Variables: + * indent-tabs-mode: nil + * End: + * + */ diff --git a/icu4c/source/tools/gennorm2/gennorm2.vcproj b/icu4c/source/tools/gennorm2/gennorm2.vcproj new file mode 100644 index 00000000000..f061a5ef6c5 --- /dev/null +++ b/icu4c/source/tools/gennorm2/gennorm2.vcproj @@ -0,0 +1,409 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/icu4c/source/tools/gennorm2/n2builder.cpp b/icu4c/source/tools/gennorm2/n2builder.cpp new file mode 100644 index 00000000000..a67ace412cd --- /dev/null +++ b/icu4c/source/tools/gennorm2/n2builder.cpp @@ -0,0 +1,1094 @@ +/* +******************************************************************************* +* +* Copyright (C) 2009-2010, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: n2builder.cpp +* encoding: US-ASCII +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2009nov25 +* created by: Markus W. Scherer +* +* Builds Normalizer2 data and writes a binary .nrm file. +* For the file format see source/common/normalizer2impl.h. +*/ + +#include "n2builder.h" + +#include +#include +#include +#include +#include "unicode/utypes.h" +#include "unicode/errorcode.h" +#include "unicode/localpointer.h" +#include "unicode/putil.h" +#include "unicode/udata.h" +#include "unicode/uniset.h" +#include "unicode/unistr.h" +#include "unicode/ustring.h" +#include "hash.h" +#include "normalizer2impl.h" +#include "toolutil.h" +#include "unewdata.h" +#include "unormimp.h" +#include "utrie2.h" + +#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) + +#if !UCONFIG_NO_NORMALIZATION + +/* UDataInfo cf. udata.h */ +static UDataInfo dataInfo={ + sizeof(UDataInfo), + 0, + + U_IS_BIG_ENDIAN, + U_CHARSET_FAMILY, + U_SIZEOF_UCHAR, + 0, + + { 0x4e, 0x72, 0x6d, 0x32 }, /* dataFormat="Nrm2" */ + { 1, 0, 0, 0 }, /* formatVersion */ + { 5, 2, 0, 0 } /* dataVersion (Unicode version) */ +}; + +U_NAMESPACE_BEGIN + +class HangulIterator { +public: + struct Range { + UChar32 start, limit; + uint16_t norm16; + }; + + HangulIterator() : rangeIndex(0) {} + const Range *nextRange() { + if(rangeIndexREMOVED; } + + // Requires hasMapping() and well-formed mapping. + void setMappingCP() { + UChar32 c; + if(!mapping->isEmpty() && mapping->length()==U16_LENGTH(c=mapping->char32At(0))) { + mappingCP=c; + } else { + mappingCP=U_SENTINEL; + } + } + + UnicodeString *mapping; + UChar32 mappingCP; // >=0 if mapping to 1 code point + int32_t mappingPhase; + MappingType mappingType; + + std::vector *compositions; + uint8_t cc; + UBool combinesBack; + UBool hasNoCompBoundaryAfter; + + enum OffsetType { + OFFSET_NONE, OFFSET_MAYBE_YES, + OFFSET_YES_YES, OFFSET_YES_NO, OFFSET_NO_NO, + OFFSET_DELTA + }; + enum { OFFSET_SHIFT=4, OFFSET_MASK=(1<rangeHandler(start, end, value); +} + +U_CDECL_END + +Normalizer2DataBuilder::Normalizer2DataBuilder(UErrorCode &errorCode) : + phase(0), overrideHandling(OVERRIDE_PREVIOUS) { + memset(unicodeVersion, 0, sizeof(unicodeVersion)); + normTrie=utrie2_open(0, 0, &errorCode); + normMem=utm_open("gennorm2 normalization structs", 10000, 0x110100, sizeof(Norm)); + norms=allocNorm(); // unused Norm struct at index 0 + memset(indexes, 0, sizeof(indexes)); +} + +Normalizer2DataBuilder::~Normalizer2DataBuilder() { + utrie2_close(normTrie); + int32_t normsLength=utm_countItems(normMem); + for(int32_t i=1; imappingType!=Norm::NONE) { + if( overrideHandling==OVERRIDE_NONE || + (overrideHandling==OVERRIDE_PREVIOUS && p->mappingPhase==phase) + ) { + fprintf(stderr, + "error in gennorm2 phase %d: " + "not permitted to override mapping for U+%04lX from phase %d\n", + (int)phase, (long)c, (int)p->mappingPhase); + exit(U_INVALID_FORMAT_ERROR); + } + delete p->mapping; + p->mapping=NULL; + } + p->mappingPhase=phase; + } + return p; +} + +void Normalizer2DataBuilder::setOverrideHandling(OverrideHandling oh) { + overrideHandling=oh; + ++phase; +} + +void Normalizer2DataBuilder::setCC(UChar32 c, uint8_t cc) { + createNorm(c)->cc=cc; +} + +uint8_t Normalizer2DataBuilder::getCC(UChar32 c) const { + return getNormRef(c).cc; +} + +static UBool isWellFormed(const UnicodeString &s) { + UErrorCode errorCode=U_ZERO_ERROR; + u_strToUTF8(NULL, 0, NULL, s.getBuffer(), s.length(), &errorCode); + return U_SUCCESS(errorCode) || errorCode==U_BUFFER_OVERFLOW_ERROR; +} + +void Normalizer2DataBuilder::setOneWayMapping(UChar32 c, const UnicodeString &m) { + if(!isWellFormed(m)) { + fprintf(stderr, + "error in gennorm2 phase %d: " + "illegal one-way mapping from U+%04lX to malformed string\n", + (int)phase, (long)c); + exit(U_INVALID_FORMAT_ERROR); + } + Norm *p=checkNormForMapping(createNorm(c), c); + p->mapping=new UnicodeString(m); + p->mappingType=Norm::ONE_WAY; + p->setMappingCP(); +} + +void Normalizer2DataBuilder::setRoundTripMapping(UChar32 c, const UnicodeString &m) { + if(U_IS_SURROGATE(c)) { + fprintf(stderr, + "error in gennorm2 phase %d: " + "illegal round-trip mapping from surrogate code point U+%04lX\n", + (int)phase, (long)c); + exit(U_INVALID_FORMAT_ERROR); + } + if(!isWellFormed(m)) { + fprintf(stderr, + "error in gennorm2 phase %d: " + "illegal round-trip mapping from U+%04lX to malformed string\n", + (int)phase, (long)c); + exit(U_INVALID_FORMAT_ERROR); + } + int32_t numCP=u_countChar32(m.getBuffer(), m.length()); + if(numCP!=2) { + fprintf(stderr, + "error in gennorm2 phase %d: " + "illegal round-trip mapping from U+%04lX to %d!=2 code points\n", + (int)phase, (long)c, (int)numCP); + exit(U_INVALID_FORMAT_ERROR); + } + Norm *p=checkNormForMapping(createNorm(c), c); + p->mapping=new UnicodeString(m); + p->mappingType=Norm::ROUND_TRIP; + p->mappingCP=U_SENTINEL; +} + +void Normalizer2DataBuilder::removeMapping(UChar32 c) { + Norm *p=checkNormForMapping(getNorm(c), c); + if(p!=NULL) { + p->mappingType=Norm::REMOVED; + } +} + +class CompositionBuilder : public Normalizer2DBEnumerator { +public: + CompositionBuilder(Normalizer2DataBuilder &b) : Normalizer2DBEnumerator(b) {} + virtual UBool rangeHandler(UChar32 start, UChar32 end, uint32_t value) { + builder.addComposition(start, end, value); + return TRUE; + } +}; + +void +Normalizer2DataBuilder::addComposition(UChar32 start, UChar32 end, uint32_t value) { + if(norms[value].mappingType==Norm::ROUND_TRIP) { + if(start!=end) { + fprintf(stderr, + "gennorm2 error: same round-trip mapping for " + "more than 1 code point U+%04lX..U+%04lX\n", + (long)start, (long)end); + exit(U_INVALID_FORMAT_ERROR); + } + if(norms[value].cc!=0) { + fprintf(stderr, + "gennorm2 error: " + "U+%04lX has a round-trip mapping and ccc!=0, " + "not possible in Unicode normalization\n", + (long)start); + exit(U_INVALID_FORMAT_ERROR); + } + // setRoundTripMapping() ensured that there are exactly two code points. + const UnicodeString &m=*norms[value].mapping; + UChar32 lead=m.char32At(0); + UChar32 trail=m.char32At(m.length()-1); + if(getCC(lead)!=0) { + fprintf(stderr, + "gennorm2 error: " + "U+%04lX's round-trip mapping's starter U+%04lX has ccc!=0, " + "not possible in Unicode normalization\n", + (long)start, (long)lead); + exit(U_INVALID_FORMAT_ERROR); + } + // Flag for trailing character. + createNorm(trail)->combinesBack=TRUE; + // Insert (trail, composite) pair into compositions list for the lead character. + CompositionPair pair(trail, start); + Norm *leadNorm=createNorm(lead); + std::vector *compositions=leadNorm->compositions; + if(compositions==NULL) { + compositions=leadNorm->compositions=new std::vector; + compositions->push_back(pair); + } else { + // Insertion sort, and check for duplicate trail characters. + std::vector::iterator it; + for(it=compositions->begin(); it!=compositions->end(); ++it) { + if(trail==it->trail) { + fprintf(stderr, + "gennorm2 error: same round-trip mapping for " + "more than 1 code point (e.g., U+%04lX) to U+%04lX + U+%04lX\n", + (long)start, (long)lead, (long)trail); + exit(U_INVALID_FORMAT_ERROR); + } + if(trailtrail) { + break; + } + } + compositions->insert(it, pair); + } + } +} + +UBool Normalizer2DataBuilder::combinesWithCCBetween(const Norm &norm, + uint8_t lowCC, uint8_t highCC) const { + const std::vector *compositions=norm.compositions; + if(compositions!=NULL && (highCC-lowCC)>=2) { + std::vector::const_iterator it; + for(it=compositions->begin(); it!=compositions->end(); ++it) { + uint8_t trailCC=getCC(it->trail); + if(lowCC *compositions=norm.compositions; + if(compositions!=NULL) { + std::vector::const_iterator it; + for(it=compositions->begin(); it!=compositions->end(); ++it) { + if(trail==it->trail) { + return it->composite; + } + if(trailtrail) { + break; + } + } + } + return U_SENTINEL; +} + +class Decomposer : public Normalizer2DBEnumerator { +public: + Decomposer(Normalizer2DataBuilder &b) : Normalizer2DBEnumerator(b), didDecompose(FALSE) {} + virtual UBool rangeHandler(UChar32 start, UChar32 end, uint32_t value) { + didDecompose|=builder.decompose(start, end, value); + return TRUE; + } + UBool didDecompose; +}; + +UBool +Normalizer2DataBuilder::decompose(UChar32 start, UChar32 end, uint32_t value) { + if(norms[value].hasMapping()) { + const UnicodeString &m=*norms[value].mapping; + UnicodeString *decomposed=NULL; + const UChar *s=m.getBuffer(); + int32_t length=m.length(); + int32_t prev, i=0; + UChar32 c; + while(ichar32At(cNorm.mapping->length()-1); + uint8_t cTrailCC=getCC(cTrailChar); + if(cTrailCC>myTrailCC) { + fprintf(stderr, + "gennorm2 error: " + "U+%04lX's round-trip mapping's starter " + "U+%04lX decomposes and the " + "inner/earlier tccc=%hu > outer/following tccc=%hu, " + "not possible in Unicode normalization\n", + (long)start, (long)c, + (short)cTrailCC, (short)myTrailCC); + exit(U_INVALID_FORMAT_ERROR); + } + } else { + fprintf(stderr, + "gennorm2 error: " + "U+%04lX's round-trip mapping's non-starter " + "U+%04lX decomposes, " + "not possible in Unicode normalization\n", + (long)start, (long)c); + exit(U_INVALID_FORMAT_ERROR); + } + } + if(decomposed==NULL) { + decomposed=new UnicodeString(m, 0, prev); + } + decomposed->append(*cNorm.mapping); + } else if(Hangul::isHangul(c)) { + UChar buffer[3]; + int32_t hangulLength=Hangul::decompose(c, buffer); + if(norms[value].mappingType==Norm::ROUND_TRIP && prev!=0) { + fprintf(stderr, + "gennorm2 error: " + "U+%04lX's round-trip mapping's non-starter " + "U+%04lX decomposes, " + "not possible in Unicode normalization\n", + (long)start, (long)c); + exit(U_INVALID_FORMAT_ERROR); + } + if(decomposed==NULL) { + decomposed=new UnicodeString(m, 0, prev); + } + decomposed->append(buffer, hangulLength); + } else if(decomposed!=NULL) { + decomposed->append(m, prev, i-prev); + } + } + if(decomposed!=NULL) { + delete norms[value].mapping; + norms[value].mapping=decomposed; + // Not norms[value].setMappingCP(); because the original mapping + // is most likely to be encodable as a delta. + return TRUE; + } + } + return FALSE; +} + +class BuilderReorderingBuffer { +public: + BuilderReorderingBuffer() : fLength(0), fLastStarterIndex(-1), fDidReorder(FALSE) {} + void reset() { + fLength=0; + fLastStarterIndex=-1; + fDidReorder=FALSE; + } + int32_t length() const { return fLength; } + UBool isEmpty() const { return fLength==0; } + int32_t lastStarterIndex() const { return fLastStarterIndex; } + UChar32 charAt(int32_t i) const { return fArray[i]>>8; } + uint8_t ccAt(int32_t i) const { return (uint8_t)fArray[i]; } + UBool didReorder() const { return fDidReorder; } + void append(UChar32 c, uint8_t cc) { + if(cc==0 || fLength==0 || ccAt(fLength-1)<=cc) { + if(cc==0) { + fLastStarterIndex=fLength; + } + fArray[fLength++]=(c<<8)|cc; + return; + } + // Let this character bubble back to its canonical order. + int32_t i=fLength-1; + while(i>fLastStarterIndex && ccAt(i)>cc) { + --i; + } + ++i; // after the last starter or prevCC<=cc + // Move this and the following characters forward one to make space. + for(int32_t j=fLength; imapping; + int32_t length=m.length(); + if(length>Normalizer2Impl::MAPPING_LENGTH_MASK) { + return; // writeMapping() will complain about it and print the code point. + } + const UChar *s=m.getBuffer(); + int32_t i=0; + UChar32 c; + while(icompositions==NULL) { + return FALSE; // the last starter does not combine forward + } + // Compose as far as possible, and see if further compositions are possible. + uint8_t prevCC=0; + for(int32_t combMarkIndex=lastStarterIndex+1; combMarkIndex=0 + ) { + buffer.setComposite(starter, combMarkIndex); + starterNorm=&getNormRef(starter); + if(starterNorm->compositions==NULL) { + return FALSE; // the composite does not combine further + } + } else { + prevCC=cc; + ++combMarkIndex; + } + } + // TRUE if the final, forward-combining starter is at the end. + return prevCC==0; +} + +// Requires p->hasMapping(). +void Normalizer2DataBuilder::writeMapping(UChar32 c, const Norm *p, UnicodeString &dataString) { + UnicodeString &m=*p->mapping; + int32_t length=m.length(); + if(length>Normalizer2Impl::MAPPING_LENGTH_MASK) { + fprintf(stderr, + "gennorm2 error: " + "mapping for U+%04lX longer than maximum of %d\n", + (long)c, Normalizer2Impl::MAPPING_LENGTH_MASK); + exit(U_INVALID_FORMAT_ERROR); + } + int32_t leadCC, trailCC; + if(length==0) { + leadCC=trailCC=0; + } else { + leadCC=getCC(m.char32At(0)); + trailCC=getCC(m.char32At(length-1)); + } + if(ccc!=0 || leadCC!=0)) { + fprintf(stderr, + "gennorm2 error: " + "U+%04lX below U+0300 has ccc!=0 or lccc!=0, not supported by ICU\n", + (long)c); + exit(U_INVALID_FORMAT_ERROR); + } + int32_t firstUnit=length|(trailCC<<8); + int32_t secondUnit=p->cc|(leadCC<<8); + if(secondUnit!=0) { + firstUnit|=Normalizer2Impl::MAPPING_HAS_CCC_LCCC_WORD; + } + if(p->compositions!=NULL) { + firstUnit|=Normalizer2Impl::MAPPING_PLUS_COMPOSITION_LIST; + } + if(p->hasNoCompBoundaryAfter) { + firstUnit|=Normalizer2Impl::MAPPING_NO_COMP_BOUNDARY_AFTER; + } + dataString.append((UChar)firstUnit); + if(secondUnit!=0) { + dataString.append((UChar)secondUnit); + } + dataString.append(m); +} + +// Requires p->compositions!=NULL. +void Normalizer2DataBuilder::writeCompositions(UChar32 c, const Norm *p, UnicodeString &dataString) { + if(p->cc!=0) { + fprintf(stderr, + "gennorm2 error: " + "U+%04lX combines-forward and has ccc!=0, not possible in Unicode normalization\n", + (long)c); + exit(U_INVALID_FORMAT_ERROR); + } + int32_t length=p->compositions->size(); + for(int32_t i=0; icompositions->at(i); + // 22 bits for the composite character and whether it combines forward. + UChar32 compositeAndFwd=pair.composite<<1; + if(getNormRef(pair.composite).compositions!=NULL) { + compositeAndFwd|=1; // The composite character also combines-forward. + } + // Encode most pairs in two units and some in three. + int32_t firstUnit, secondUnit, thirdUnit; + if(pair.trail>16; + thirdUnit=compositeAndFwd; + } + } else { + firstUnit=(Normalizer2Impl::COMP_1_TRAIL_LIMIT+ + (pair.trail>>Normalizer2Impl::COMP_1_TRAIL_SHIFT))| + Normalizer2Impl::COMP_1_TRIPLE; + secondUnit=(pair.trail<>16); + thirdUnit=compositeAndFwd; + } + // Set the high bit of the first unit if this is the last composition pair. + if(i==(length-1)) { + firstUnit|=Normalizer2Impl::COMP_1_LAST_TUPLE; + } + dataString.append((UChar)firstUnit).append((UChar)secondUnit); + if(thirdUnit>=0) { + dataString.append((UChar)thirdUnit); + } + } +} + +class ExtraDataWriter : public Normalizer2DBEnumerator { +public: + ExtraDataWriter(Normalizer2DataBuilder &b) : + Normalizer2DBEnumerator(b), + yesYesCompositions(1000, (UChar32)0xffff, 2), // 0=inert, 1=Jamo L, 2=start of compositions + yesNoData(1000, (UChar32)0, 1) {} // 0=Hangul, 1=start of normal data + virtual UBool rangeHandler(UChar32 start, UChar32 end, uint32_t value) { + if(value!=0) { + if(start!=end) { + fprintf(stderr, + "gennorm2 error: unexpected shared data for " + "multiple code points U+%04lX..U+%04lX\n", + (long)start, (long)end); + exit(U_INTERNAL_PROGRAM_ERROR); + } + builder.writeExtraData(start, value, *this); + } + return TRUE; + } + UnicodeString maybeYesCompositions; + UnicodeString yesYesCompositions; + UnicodeString yesNoData; + UnicodeString noNoMappings; + Hashtable previousNoNoMappings; // If constructed in runtime code, pass in UErrorCode. +}; + +void Normalizer2DataBuilder::writeExtraData(UChar32 c, uint32_t value, ExtraDataWriter &writer) { + Norm *p=norms+value; + if(p->combinesBack) { + if(p->hasMapping()) { + fprintf(stderr, + "gennorm2 error: " + "U+%04lX combines-back and decomposes, not possible in Unicode normalization\n", + (long)c); + exit(U_INVALID_FORMAT_ERROR); + } + if(p->compositions!=NULL) { + p->offset= + (writer.maybeYesCompositions.length()<hasMapping()) { + if(p->compositions!=NULL) { + p->offset= + (writer.yesYesCompositions.length()<mappingType==Norm::ROUND_TRIP) { + p->offset= + (writer.yesNoData.length()<compositions!=NULL) { + writeCompositions(c, p, writer.yesNoData); + } + } else /* one-way */ { + if(p->compositions!=NULL) { + fprintf(stderr, + "gennorm2 error: " + "U+%04lX combines-forward and has a one-way mapping, " + "not possible in Unicode normalization\n", + (long)c); + exit(U_INVALID_FORMAT_ERROR); + } + if(p->cc==0) { + // Try a compact, algorithmic encoding. + // Only for ccc=0. + if(p->mappingCP>=0) { + int32_t delta=p->mappingCP-c; + if(-Normalizer2Impl::MAX_DELTA<=delta && delta<=Normalizer2Impl::MAX_DELTA) { + p->offset=(delta<offset==0) { + int32_t oldNoNoLength=writer.noNoMappings.length(); + writeMapping(c, p, writer.noNoMappings); + UnicodeString newMapping(FALSE, + writer.noNoMappings.getBuffer()+oldNoNoLength, + writer.noNoMappings.length()-oldNoNoLength); + int32_t previousOffset=writer.previousNoNoMappings.geti(newMapping); + if(previousOffset!=0) { + // Duplicate, remove the new units and point to the old ones. + writer.noNoMappings.truncate(oldNoNoLength); + p->offset= + ((previousOffset-1)<offset= + (oldNoNoLength<offset>>Norm::OFFSET_SHIFT; + int32_t norm16=0; + UBool isDecompNo=FALSE; + UBool isCompNoMaybe=FALSE; + switch(p->offset&Norm::OFFSET_MASK) { + case Norm::OFFSET_NONE: + // No mapping, no compositions list. + if(p->combinesBack) { + norm16=Normalizer2Impl::MIN_NORMAL_MAYBE_YES+p->cc; + isDecompNo=(UBool)(p->cc!=0); + isCompNoMaybe=TRUE; + } else if(p->cc!=0) { + norm16=Normalizer2Impl::MIN_YES_YES_WITH_CC-1+p->cc; + isDecompNo=isCompNoMaybe=TRUE; + } + break; + case Norm::OFFSET_MAYBE_YES: + norm16=indexes[Normalizer2Impl::IX_MIN_MAYBE_YES]+offset; + isCompNoMaybe=TRUE; + break; + case Norm::OFFSET_YES_YES: + norm16=offset; + break; + case Norm::OFFSET_YES_NO: + norm16=indexes[Normalizer2Impl::IX_MIN_YES_NO]+offset; + isDecompNo=TRUE; + break; + case Norm::OFFSET_NO_NO: + norm16=indexes[Normalizer2Impl::IX_MIN_NO_NO]+offset; + isDecompNo=isCompNoMaybe=TRUE; + break; + case Norm::OFFSET_DELTA: + norm16=getCenterNoNoDelta()+offset; + isDecompNo=isCompNoMaybe=TRUE; + break; + default: // Should not occur. + exit(U_INTERNAL_PROGRAM_ERROR); + } + IcuToolErrorCode errorCode("gennorm2/writeNorm16()"); + utrie2_setRange32(norm16Trie, start, end, (uint32_t)norm16, TRUE, errorCode); + if(isDecompNo && startstart; climit; ++c) { + if(utrie2_get32(norm16Trie, c)!=0) { + fprintf(stderr, + "gennorm2 error: " + "illegal mapping/composition/ccc data for Hangul or Jamo U+%04lX\n", + (long)c); + exit(U_INVALID_FORMAT_ERROR); + } + } + } + // Set data for algorithmic runtime handling. + IcuToolErrorCode errorCode("gennorm2/setHangulData()"); + hi.reset(); + while((range=hi.nextRange())!=NULL) { + uint16_t norm16=range->norm16; + if(norm16==0) { + norm16=(uint16_t)indexes[Normalizer2Impl::IX_MIN_YES_NO]; // Hangul LV/LVT encoded as minYesNo + if(range->startstart; + } + } else { + if(range->startstart; + } + } + utrie2_setRange32(norm16Trie, range->start, range->limit-1, norm16, TRUE, errorCode); + errorCode.assertSuccess(); + } +} + +U_CDECL_BEGIN + +static UBool U_CALLCONV +enumRangeMaxValue(const void *context, UChar32 /*start*/, UChar32 /*end*/, uint32_t value) { + uint32_t *pMaxValue=(uint32_t *)context; + if(value>*pMaxValue) { + *pMaxValue=value; + } + return TRUE; +} + +U_CDECL_END + +void Normalizer2DataBuilder::processData() { + IcuToolErrorCode errorCode("gennorm2/processData()"); + norm16Trie=utrie2_open(0, 0, errorCode); + errorCode.assertSuccess(); + + utrie2_enum(normTrie, NULL, enumRangeHandler, CompositionBuilder(*this).ptr()); + + Decomposer decomposer(*this); + do { + decomposer.didDecompose=FALSE; + utrie2_enum(normTrie, NULL, enumRangeHandler, &decomposer); + } while(decomposer.didDecompose); + + BuilderReorderingBuffer buffer; + int32_t normsLength=utm_countItems(normMem); + for(int32_t i=1; iminNoNoDelta) { + fprintf(stderr, + "gennorm2 error: " + "data structure overflow, too much mapping composition data\n"); + exit(U_BUFFER_OVERFLOW_ERROR); + } + + utrie2_enum(normTrie, NULL, enumRangeHandler, Norm16Writer(*this).ptr()); + + setHangulData(); + + // Look for the "worst" norm16 value of any supplementary code point + // corresponding to a lead surrogate, and set it as that surrogate's value. + // Enables quick check inner loops to look at only code units. + // + // We could be more sophisticated: + // We could collect a bit set for whether there are values in the different + // norm16 ranges (yesNo, maybeYes, yesYesWithCC etc.) + // and select the best value that only breaks the composition and/or decomposition + // inner loops if necessary. + // However, that seems like overkill for an optimization for supplementary characters. + for(UChar lead=0xd800; lead<0xdc00; ++lead) { + uint32_t maxValue=utrie2_get32(norm16Trie, lead); + utrie2_enumForLeadSurrogate(norm16Trie, lead, NULL, enumRangeMaxValue, &maxValue); + if( maxValue>=(uint32_t)indexes[Normalizer2Impl::IX_LIMIT_NO_NO] && + maxValue>(uint32_t)indexes[Normalizer2Impl::IX_MIN_NO_NO] + ) { + // Set noNo ("worst" value) if it got into "less-bad" maybeYes or ccc!=0. + // Otherwise it might end up at something like JAMO_VT which stays in + // the inner decomposition quick check loop. + maxValue=(uint32_t)indexes[Normalizer2Impl::IX_LIMIT_NO_NO]-1; + } + utrie2_set32ForLeadSurrogateCodeUnit(norm16Trie, lead, maxValue, errorCode); + } + + // Adjust supplementary minimum code points to break quick check loops at their lead surrogates. + // For an empty data file, minCP=0x110000 turns into 0xdc00 (first trail surrogate) + // which is harmless. + // As a result, the minimum code points are always BMP code points. + int32_t minCP=indexes[Normalizer2Impl::IX_MIN_DECOMP_NO_CP]; + if(minCP>=0x10000) { + indexes[Normalizer2Impl::IX_MIN_DECOMP_NO_CP]=U16_LEAD(minCP); + } + minCP=indexes[Normalizer2Impl::IX_MIN_COMP_NO_MAYBE_CP]; + if(minCP>=0x10000) { + indexes[Normalizer2Impl::IX_MIN_COMP_NO_MAYBE_CP]=U16_LEAD(minCP); + } +} + +void Normalizer2DataBuilder::writeBinaryFile(const char *filename) { + processData(); + + IcuToolErrorCode errorCode("gennorm2/writeBinaryFile()"); + utrie2_freeze(norm16Trie, UTRIE2_16_VALUE_BITS, errorCode); + int32_t norm16TrieLength=utrie2_serialize(norm16Trie, NULL, 0, errorCode); + if(errorCode.get()!=U_BUFFER_OVERFLOW_ERROR) { + fprintf(stderr, "gennorm2 error: unable to freeze/serialize the normalization trie - %s\n", + errorCode.errorName()); + exit(errorCode.reset()); + } + errorCode.reset(); + LocalArray norm16TrieBytes(new uint8_t[norm16TrieLength]); + utrie2_serialize(norm16Trie, norm16TrieBytes.getAlias(), norm16TrieLength, errorCode); + errorCode.assertSuccess(); + + int32_t offset=(int32_t)sizeof(indexes); + indexes[Normalizer2Impl::IX_NORM_TRIE_OFFSET]=offset; + offset+=norm16TrieLength; + indexes[Normalizer2Impl::IX_EXTRA_DATA_OFFSET]=offset; + int32_t totalSize=offset+=extraData.length()*2; + for(int32_t i=Normalizer2Impl::IX_RESERVED2_OFFSET; i<=Normalizer2Impl::IX_TOTAL_SIZE; ++i) { + indexes[i]=totalSize; + } + + if(beVerbose) { + printf("size of normalization trie: %5ld bytes\n", (long)norm16TrieLength); + printf("size of 16-bit extra data: %5ld uint16_t\n", (long)extraData.length()); + printf("size of binary data file contents: %5ld bytes\n", (long)totalSize); + printf("minDecompNoCodePoint: U+%04lX\n", (long)indexes[Normalizer2Impl::IX_MIN_DECOMP_NO_CP]); + printf("minCompNoMaybeCodePoint: U+%04lX\n", (long)indexes[Normalizer2Impl::IX_MIN_COMP_NO_MAYBE_CP]); + printf("minYesNo: 0x%04x\n", (int)indexes[Normalizer2Impl::IX_MIN_YES_NO]); + printf("minNoNo: 0x%04x\n", (int)indexes[Normalizer2Impl::IX_MIN_NO_NO]); + printf("limitNoNo: 0x%04x\n", (int)indexes[Normalizer2Impl::IX_LIMIT_NO_NO]); + printf("minMaybeYes: 0x%04x\n", (int)indexes[Normalizer2Impl::IX_MIN_MAYBE_YES]); + } + + memcpy(dataInfo.dataVersion, unicodeVersion, 4); + UNewDataMemory *pData= + udata_create(NULL, NULL, filename, &dataInfo, + haveCopyright ? U_COPYRIGHT_STRING : NULL, errorCode); + if(errorCode.isFailure()) { + fprintf(stderr, "gennorm2 error: unable to create the output file %s - %s\n", + filename, errorCode.errorName()); + exit(errorCode.reset()); + } + udata_writeBlock(pData, indexes, sizeof(indexes)); + udata_writeBlock(pData, norm16TrieBytes.getAlias(), norm16TrieLength); + udata_writeUString(pData, extraData.getBuffer(), extraData.length()); + + int32_t writtenSize=udata_finish(pData, errorCode); + if(errorCode.isFailure()) { + fprintf(stderr, "gennorm2: error %s writing the output file\n", errorCode.errorName()); + exit(errorCode.reset()); + } + if(writtenSize!=totalSize) { + fprintf(stderr, "gennorm2 error: written size %ld != calculated size %ld\n", + (long)writtenSize, (long)totalSize); + exit(U_INTERNAL_PROGRAM_ERROR); + } +} + +U_NAMESPACE_END + +#endif /* #if !UCONFIG_NO_NORMALIZATION */ + +/* + * Hey, Emacs, please set the following: + * + * Local Variables: + * indent-tabs-mode: nil + * End: + */ diff --git a/icu4c/source/tools/gennorm2/n2builder.h b/icu4c/source/tools/gennorm2/n2builder.h new file mode 100644 index 00000000000..2a61289f75f --- /dev/null +++ b/icu4c/source/tools/gennorm2/n2builder.h @@ -0,0 +1,113 @@ +/* +******************************************************************************* +* +* Copyright (C) 2009-2010, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: n2builder.h +* encoding: US-ASCII +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2009nov25 +* created by: Markus W. Scherer +*/ + +#ifndef __N2BUILDER_H__ +#define __N2BUILDER_H__ + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_NORMALIZATION + +#include "unicode/errorcode.h" +#include "unicode/unistr.h" +#include "normalizer2impl.h" // for IX_COUNT +#include "toolutil.h" +#include "utrie2.h" + +U_NAMESPACE_BEGIN + +extern UBool beVerbose, haveCopyright; + +struct Norm; + +class BuilderReorderingBuffer; +class ExtraDataWriter; + +class Normalizer2DataBuilder { +public: + Normalizer2DataBuilder(UErrorCode &errorCode); + ~Normalizer2DataBuilder(); + + enum OverrideHandling { + OVERRIDE_NONE, + OVERRIDE_ANY, + OVERRIDE_PREVIOUS + }; + + void setOverrideHandling(OverrideHandling oh); + + void setCC(UChar32 c, uint8_t cc); + void setOneWayMapping(UChar32 c, const UnicodeString &m); + void setRoundTripMapping(UChar32 c, const UnicodeString &m); + void removeMapping(UChar32 c); + + void setUnicodeVersion(const char *v); + + void writeBinaryFile(const char *filename); + +private: + friend class CompositionBuilder; + friend class Decomposer; + friend class ExtraDataWriter; + friend class Norm16Writer; + + // No copy constructor nor assignment operator. + Normalizer2DataBuilder(const Normalizer2DataBuilder &other); + Normalizer2DataBuilder &operator=(const Normalizer2DataBuilder &other); + + Norm *allocNorm(); + Norm *getNorm(UChar32 c); + Norm *createNorm(UChar32 c); + Norm *checkNormForMapping(Norm *p, UChar32 c); // check for permitted overrides + + const Norm &getNormRef(UChar32 c) const; + uint8_t getCC(UChar32 c) const; + UBool combinesWithCCBetween(const Norm &norm, uint8_t lowCC, uint8_t highCC) const; + UChar32 combine(const Norm &norm, UChar32 trail) const; + + void addComposition(UChar32 start, UChar32 end, uint32_t value); + UBool decompose(UChar32 start, UChar32 end, uint32_t value); + void reorder(Norm *p, BuilderReorderingBuffer &buffer); + UBool hasNoCompBoundaryAfter(BuilderReorderingBuffer &buffer); + void setHangulData(); + void writeMapping(UChar32 c, const Norm *p, UnicodeString &dataString); + void writeCompositions(UChar32 c, const Norm *p, UnicodeString &dataString); + void writeExtraData(UChar32 c, uint32_t value, ExtraDataWriter &writer); + int32_t getCenterNoNoDelta() { + return indexes[Normalizer2Impl::IX_MIN_MAYBE_YES]-Normalizer2Impl::MAX_DELTA-1; + } + void writeNorm16(UChar32 start, UChar32 end, uint32_t value); + void processData(); + + UTrie2 *normTrie; + UToolMemory *normMem; + Norm *norms; + + int32_t phase; + OverrideHandling overrideHandling; + + int32_t indexes[Normalizer2Impl::IX_COUNT]; + UTrie2 *norm16Trie; + UnicodeString extraData; + + UVersionInfo unicodeVersion; +}; + +U_NAMESPACE_END + +#endif // #if !UCONFIG_NO_NORMALIZATION + +#endif // __N2BUILDER_H__ diff --git a/icu4c/source/tools/genpname/data.h b/icu4c/source/tools/genpname/data.h index a682ae693d8..0b47eb9a014 100644 --- a/icu4c/source/tools/genpname/data.h +++ b/icu4c/source/tools/genpname/data.h @@ -1,5 +1,5 @@ /** - * Copyright (C) 2002-2009, International Business Machines Corporation and + * Copyright (C) 2002-2010, International Business Machines Corporation and * others. All Rights Reserved. * * MACHINE GENERATED FILE. !!! Do not edit manually !!! @@ -11,7 +11,7 @@ * PropertyAliases.txt * PropertyValueAliases.txt * - * Date: Tue Sep 29 14:00:53 2009 + * Date: Fri Dec 11 22:16:44 2009 * Unicode version: 5.2.0 * Script: preparse.pl */ @@ -22,7 +22,7 @@ const uint8_t VERSION_1 = 2; const uint8_t VERSION_2 = 0; const uint8_t VERSION_3 = 0; -const int32_t STRING_COUNT = 908; +const int32_t STRING_COUNT = 910; /* to be sorted */ const AliasName STRING_TABLE[] = { @@ -146,1472 +146,1475 @@ const AliasName STRING_TABLE[] = { AliasName("CS", 117), AliasName("CWCF", 118), AliasName("CWCM", 119), - AliasName("CWL", 120), - AliasName("CWT", 121), - AliasName("CWU", 122), - AliasName("Cakm", 123), - AliasName("Can", 124), - AliasName("Canadian_Aboriginal", 125), - AliasName("Canadian_Syllabics", 126), - AliasName("Canonical", 127), - AliasName("Canonical_Combining_Class", 128), - AliasName("Cans", 129), - AliasName("Cari", 130), - AliasName("Carian", 131), - AliasName("Carriage_Return", 132), - AliasName("Case_Folding", 133), - AliasName("Case_Ignorable", 134), - AliasName("Case_Sensitive", 135), - AliasName("Cased", 136), - AliasName("Cased_Letter", 137), - AliasName("Cc", 138), - AliasName("Cf", 139), - AliasName("Cham", 140), - AliasName("Changes_When_Casefolded", 141), - AliasName("Changes_When_Casemapped", 142), - AliasName("Changes_When_Lowercased", 143), - AliasName("Changes_When_Titlecased", 144), - AliasName("Changes_When_Uppercased", 145), - AliasName("Cher", 146), - AliasName("Cherokee", 147), - AliasName("Circle", 148), - AliasName("Cirt", 149), - AliasName("Close", 150), - AliasName("Close_Parenthesis", 151), - AliasName("Close_Punctuation", 152), - AliasName("Cn", 153), - AliasName("Co", 154), - AliasName("Com", 155), - AliasName("Combining_Diacritical_Marks", 156), - AliasName("Combining_Diacritical_Marks_For_Symbols", 157), - AliasName("Combining_Diacritical_Marks_Supplement", 158), - AliasName("Combining_Half_Marks", 159), - AliasName("Combining_Mark", 160), - AliasName("Combining_Marks_For_Symbols", 161), - AliasName("Common", 162), - AliasName("Common_Indic_Number_Forms", 163), - AliasName("Common_Separator", 164), - AliasName("Comp_Ex", 165), - AliasName("Compat", 166), - AliasName("Complex_Context", 167), - AliasName("Connector_Punctuation", 168), - AliasName("Contingent_Break", 169), - AliasName("Control", 170), - AliasName("Control_Pictures", 171), - AliasName("Copt", 172), - AliasName("Coptic", 173), - AliasName("Counting_Rod_Numerals", 174), - AliasName("Cprt", 175), - AliasName("Cs", 176), - AliasName("Cuneiform", 177), - AliasName("Cuneiform_Numbers_And_Punctuation", 178), - AliasName("Currency_Symbol", 179), - AliasName("Currency_Symbols", 180), - AliasName("Cypriot", 181), - AliasName("Cypriot_Syllabary", 182), - AliasName("Cyrillic", 183), - AliasName("Cyrillic_Extended_A", 184), - AliasName("Cyrillic_Extended_B", 185), - AliasName("Cyrillic_Supplement", 186), - AliasName("Cyrillic_Supplementary", 187), - AliasName("Cyrl", 188), - AliasName("Cyrs", 189), - AliasName("D", 190), - AliasName("DA", 191), - AliasName("DB", 192), - AliasName("DI", 193), - AliasName("Dal", 194), - AliasName("Dalath_Rish", 195), - AliasName("Dash", 196), - AliasName("Dash_Punctuation", 197), - AliasName("De", 198), - AliasName("Decimal", 199), - AliasName("Decimal_Number", 200), - AliasName("Decomposition_Type", 201), - AliasName("Default_Ignorable_Code_Point", 202), - AliasName("Dep", 203), - AliasName("Deprecated", 204), - AliasName("Deseret", 205), - AliasName("Deva", 206), - AliasName("Devanagari", 207), - AliasName("Devanagari_Extended", 208), - AliasName("Di", 209), - AliasName("Dia", 210), - AliasName("Diacritic", 211), - AliasName("Digit", 212), - AliasName("Dingbats", 213), - AliasName("Domino_Tiles", 214), - AliasName("Double_Above", 215), - AliasName("Double_Below", 216), - AliasName("Dsrt", 217), - AliasName("Dual_Joining", 218), - AliasName("E", 219), - AliasName("EN", 220), - AliasName("ES", 221), - AliasName("ET", 222), - AliasName("EX", 223), - AliasName("East_Asian_Width", 224), - AliasName("Egyd", 225), - AliasName("Egyh", 226), - AliasName("Egyp", 227), - AliasName("Egyptian_Hieroglyphs", 228), - AliasName("Enc", 229), - AliasName("Enclosed_Alphanumeric_Supplement", 230), - AliasName("Enclosed_Alphanumerics", 231), - AliasName("Enclosed_CJK_Letters_And_Months", 232), - AliasName("Enclosed_Ideographic_Supplement", 233), - AliasName("Enclosing_Mark", 234), - AliasName("Ethi", 235), - AliasName("Ethiopic", 236), - AliasName("Ethiopic_Extended", 237), - AliasName("Ethiopic_Supplement", 238), - AliasName("European_Number", 239), - AliasName("European_Separator", 240), - AliasName("European_Terminator", 241), - AliasName("Exclamation", 242), - AliasName("Ext", 243), - AliasName("Extend", 244), - AliasName("ExtendNumLet", 245), - AliasName("Extender", 246), - AliasName("F", 247), - AliasName("FO", 248), - AliasName("False", 249), - AliasName("Farsi_Yeh", 250), - AliasName("Fe", 251), - AliasName("Feh", 252), - AliasName("Fin", 253), - AliasName("Final", 254), - AliasName("Final_Punctuation", 255), - AliasName("Final_Semkath", 256), - AliasName("Font", 257), - AliasName("Format", 258), - AliasName("Fra", 259), - AliasName("Fraction", 260), - AliasName("Full_Composition_Exclusion", 261), - AliasName("Fullwidth", 262), - AliasName("GCB", 263), - AliasName("GL", 264), - AliasName("Gaf", 265), - AliasName("Gamal", 266), - AliasName("General_Category", 267), - AliasName("General_Category_Mask", 268), - AliasName("General_Punctuation", 269), - AliasName("Geok", 270), - AliasName("Geometric_Shapes", 271), - AliasName("Geor", 272), - AliasName("Georgian", 273), - AliasName("Georgian_Supplement", 274), - AliasName("Glag", 275), - AliasName("Glagolitic", 276), - AliasName("Glue", 277), - AliasName("Goth", 278), - AliasName("Gothic", 279), - AliasName("Gr_Base", 280), - AliasName("Gr_Ext", 281), - AliasName("Gr_Link", 282), - AliasName("Grapheme_Base", 283), - AliasName("Grapheme_Cluster_Break", 284), - AliasName("Grapheme_Extend", 285), - AliasName("Grapheme_Link", 286), - AliasName("Greek", 287), - AliasName("Greek_And_Coptic", 288), - AliasName("Greek_Extended", 289), - AliasName("Grek", 290), - AliasName("Gujarati", 291), - AliasName("Gujr", 292), - AliasName("Gurmukhi", 293), - AliasName("Guru", 294), - AliasName("H", 295), - AliasName("H2", 296), - AliasName("H3", 297), - AliasName("HY", 298), - AliasName("Hah", 299), - AliasName("Halfwidth", 300), - AliasName("Halfwidth_And_Fullwidth_Forms", 301), - AliasName("Hamza_On_Heh_Goal", 302), - AliasName("Han", 303), - AliasName("Hang", 304), - AliasName("Hangul", 305), - AliasName("Hangul_Compatibility_Jamo", 306), - AliasName("Hangul_Jamo", 307), - AliasName("Hangul_Jamo_Extended_A", 308), - AliasName("Hangul_Jamo_Extended_B", 309), - AliasName("Hangul_Syllable_Type", 310), - AliasName("Hangul_Syllables", 311), - AliasName("Hani", 312), - AliasName("Hano", 313), - AliasName("Hans", 314), - AliasName("Hant", 315), - AliasName("Hanunoo", 316), - AliasName("He", 317), - AliasName("Hebr", 318), - AliasName("Hebrew", 319), - AliasName("Heh", 320), - AliasName("Heh_Goal", 321), - AliasName("Heth", 322), - AliasName("Hex", 323), - AliasName("Hex_Digit", 324), - AliasName("High_Private_Use_Surrogates", 325), - AliasName("High_Surrogates", 326), - AliasName("Hira", 327), - AliasName("Hiragana", 328), - AliasName("Hmng", 329), - AliasName("Hrkt", 330), - AliasName("Hung", 331), - AliasName("Hyphen", 332), - AliasName("ID", 333), - AliasName("IDC", 334), - AliasName("IDS", 335), - AliasName("IDSB", 336), - AliasName("IDST", 337), - AliasName("IDS_Binary_Operator", 338), - AliasName("IDS_Trinary_Operator", 339), - AliasName("ID_Continue", 340), - AliasName("ID_Start", 341), - AliasName("IN", 342), - AliasName("IPA_Extensions", 343), - AliasName("IS", 344), - AliasName("ISO_Comment", 345), - AliasName("Ideo", 346), - AliasName("Ideographic", 347), - AliasName("Ideographic_Description_Characters", 348), - AliasName("Imperial_Aramaic", 349), - AliasName("Inds", 350), - AliasName("Infix_Numeric", 351), - AliasName("Inherited", 352), - AliasName("Init", 353), - AliasName("Initial", 354), - AliasName("Initial_Punctuation", 355), - AliasName("Inscriptional_Pahlavi", 356), - AliasName("Inscriptional_Parthian", 357), - AliasName("Inseparable", 358), - AliasName("Inseperable", 359), - AliasName("Iota_Subscript", 360), - AliasName("Iso", 361), - AliasName("Isolated", 362), - AliasName("Ital", 363), - AliasName("JL", 364), - AliasName("JT", 365), - AliasName("JV", 366), - AliasName("Java", 367), - AliasName("Javanese", 368), - AliasName("Join_C", 369), - AliasName("Join_Causing", 370), - AliasName("Join_Control", 371), - AliasName("Joining_Group", 372), - AliasName("Joining_Type", 373), - AliasName("Jpan", 374), - AliasName("KA", 375), - AliasName("KV", 376), - AliasName("Kaf", 377), - AliasName("Kaithi", 378), - AliasName("Kali", 379), - AliasName("Kana", 380), - AliasName("Kana_Voicing", 381), - AliasName("Kanbun", 382), - AliasName("Kangxi_Radicals", 383), - AliasName("Kannada", 384), - AliasName("Kaph", 385), - AliasName("Katakana", 386), - AliasName("Katakana_Or_Hiragana", 387), - AliasName("Katakana_Phonetic_Extensions", 388), - AliasName("Kayah_Li", 389), - AliasName("Khaph", 390), - AliasName("Khar", 391), - AliasName("Kharoshthi", 392), - AliasName("Khmer", 393), - AliasName("Khmer_Symbols", 394), - AliasName("Khmr", 395), - AliasName("Knda", 396), - AliasName("Knotted_Heh", 397), - AliasName("Kore", 398), - AliasName("Kthi", 399), - AliasName("L", 400), - AliasName("LC", 401), - AliasName("LE", 402), - AliasName("LF", 403), - AliasName("LO", 404), - AliasName("LOE", 405), - AliasName("LRE", 406), - AliasName("LRO", 407), - AliasName("LV", 408), - AliasName("LVT", 409), - AliasName("LVT_Syllable", 410), - AliasName("LV_Syllable", 411), - AliasName("Lam", 412), - AliasName("Lamadh", 413), - AliasName("Lana", 414), - AliasName("Lao", 415), - AliasName("Laoo", 416), - AliasName("Latf", 417), - AliasName("Latg", 418), - AliasName("Latin", 419), - AliasName("Latin_1", 420), - AliasName("Latin_1_Supplement", 421), - AliasName("Latin_Extended_A", 422), - AliasName("Latin_Extended_Additional", 423), - AliasName("Latin_Extended_B", 424), - AliasName("Latin_Extended_C", 425), - AliasName("Latin_Extended_D", 426), - AliasName("Latn", 427), - AliasName("Lead_Canonical_Combining_Class", 428), - AliasName("Leading_Jamo", 429), - AliasName("Left", 430), - AliasName("Left_Joining", 431), - AliasName("Left_To_Right", 432), - AliasName("Left_To_Right_Embedding", 433), - AliasName("Left_To_Right_Override", 434), - AliasName("Lepc", 435), - AliasName("Lepcha", 436), - AliasName("Letter", 437), - AliasName("Letter_Number", 438), - AliasName("Letterlike_Symbols", 439), - AliasName("Limb", 440), - AliasName("Limbu", 441), - AliasName("Lina", 442), - AliasName("Linb", 443), - AliasName("Line_Break", 444), - AliasName("Line_Feed", 445), - AliasName("Line_Separator", 446), - AliasName("Linear_B", 447), - AliasName("Linear_B_Ideograms", 448), - AliasName("Linear_B_Syllabary", 449), - AliasName("Lisu", 450), - AliasName("Ll", 451), - AliasName("Lm", 452), - AliasName("Lo", 453), - AliasName("Logical_Order_Exception", 454), - AliasName("Low_Surrogates", 455), - AliasName("Lower", 456), - AliasName("Lowercase", 457), - AliasName("Lowercase_Letter", 458), - AliasName("Lowercase_Mapping", 459), - AliasName("Lt", 460), - AliasName("Lu", 461), - AliasName("Lyci", 462), - AliasName("Lycian", 463), - AliasName("Lydi", 464), - AliasName("Lydian", 465), - AliasName("M", 466), - AliasName("MB", 467), - AliasName("ML", 468), - AliasName("MN", 469), - AliasName("Mahjong_Tiles", 470), - AliasName("Malayalam", 471), - AliasName("Mand", 472), - AliasName("Mandatory_Break", 473), - AliasName("Mani", 474), - AliasName("Mark", 475), - AliasName("Math", 476), - AliasName("Math_Symbol", 477), - AliasName("Mathematical_Alphanumeric_Symbols", 478), - AliasName("Mathematical_Operators", 479), - AliasName("Maya", 480), - AliasName("Maybe", 481), - AliasName("Mc", 482), - AliasName("Me", 483), - AliasName("Med", 484), - AliasName("Medial", 485), - AliasName("Meem", 486), - AliasName("Meetei_Mayek", 487), - AliasName("Mero", 488), - AliasName("MidLetter", 489), - AliasName("MidNum", 490), - AliasName("MidNumLet", 491), - AliasName("Mim", 492), - AliasName("Miscellaneous_Mathematical_Symbols_A", 493), - AliasName("Miscellaneous_Mathematical_Symbols_B", 494), - AliasName("Miscellaneous_Symbols", 495), - AliasName("Miscellaneous_Symbols_And_Arrows", 496), - AliasName("Miscellaneous_Technical", 497), - AliasName("Mlym", 498), - AliasName("Mn", 499), - AliasName("Modifier_Letter", 500), - AliasName("Modifier_Symbol", 501), - AliasName("Modifier_Tone_Letters", 502), - AliasName("Mong", 503), - AliasName("Mongolian", 504), - AliasName("Moon", 505), - AliasName("Mtei", 506), - AliasName("Musical_Symbols", 507), - AliasName("Myanmar", 508), - AliasName("Myanmar_Extended_A", 509), - AliasName("Mymr", 510), - AliasName("N", 511), - AliasName("NA", 512), - AliasName("NChar", 513), - AliasName("NFC_Inert", 514), - AliasName("NFC_QC", 515), - AliasName("NFC_Quick_Check", 516), - AliasName("NFD_Inert", 517), - AliasName("NFD_QC", 518), - AliasName("NFD_Quick_Check", 519), - AliasName("NFKC_Inert", 520), - AliasName("NFKC_QC", 521), - AliasName("NFKC_Quick_Check", 522), - AliasName("NFKD_Inert", 523), - AliasName("NFKD_QC", 524), - AliasName("NFKD_Quick_Check", 525), - AliasName("NK", 526), - AliasName("NKo", 527), - AliasName("NL", 528), - AliasName("NR", 529), - AliasName("NS", 530), - AliasName("NSM", 531), - AliasName("NU", 532), - AliasName("Na", 533), - AliasName("Name", 534), - AliasName("Nar", 535), - AliasName("Narrow", 536), - AliasName("Nb", 537), - AliasName("Nd", 538), - AliasName("Neutral", 539), - AliasName("New_Tai_Lue", 540), - AliasName("Newline", 541), - AliasName("Next_Line", 542), - AliasName("Nkgb", 543), - AliasName("Nko", 544), - AliasName("Nkoo", 545), - AliasName("Nl", 546), - AliasName("No", 547), - AliasName("No_Block", 548), - AliasName("No_Joining_Group", 549), - AliasName("Nobreak", 550), - AliasName("Non_Joining", 551), - AliasName("Noncharacter_Code_Point", 552), - AliasName("None", 553), - AliasName("Nonspacing_Mark", 554), - AliasName("Nonstarter", 555), - AliasName("Noon", 556), - AliasName("Not_Applicable", 557), - AliasName("Not_Reordered", 558), - AliasName("Nu", 559), - AliasName("Nukta", 560), - AliasName("Number", 561), - AliasName("Number_Forms", 562), - AliasName("Numeric", 563), - AliasName("Numeric_Type", 564), - AliasName("Numeric_Value", 565), - AliasName("Nun", 566), - AliasName("Nya", 567), - AliasName("OLetter", 568), - AliasName("ON", 569), - AliasName("OP", 570), - AliasName("OV", 571), - AliasName("Ogam", 572), - AliasName("Ogham", 573), - AliasName("Ol_Chiki", 574), - AliasName("Olck", 575), - AliasName("Old_Italic", 576), - AliasName("Old_Persian", 577), - AliasName("Old_South_Arabian", 578), - AliasName("Old_Turkic", 579), - AliasName("Open_Punctuation", 580), - AliasName("Optical_Character_Recognition", 581), - AliasName("Oriya", 582), - AliasName("Orkh", 583), - AliasName("Orya", 584), - AliasName("Osma", 585), - AliasName("Osmanya", 586), - AliasName("Other", 587), - AliasName("Other_Letter", 588), - AliasName("Other_Neutral", 589), - AliasName("Other_Number", 590), - AliasName("Other_Punctuation", 591), - AliasName("Other_Symbol", 592), - AliasName("Overlay", 593), - AliasName("P", 594), - AliasName("PDF", 595), - AliasName("PO", 596), - AliasName("PP", 597), - AliasName("PR", 598), - AliasName("Paragraph_Separator", 599), - AliasName("Pat_Syn", 600), - AliasName("Pat_WS", 601), - AliasName("Pattern_Syntax", 602), - AliasName("Pattern_White_Space", 603), - AliasName("Pc", 604), - AliasName("Pd", 605), - AliasName("Pe", 606), - AliasName("Perm", 607), - AliasName("Pf", 608), - AliasName("Phag", 609), - AliasName("Phags_Pa", 610), - AliasName("Phaistos_Disc", 611), - AliasName("Phli", 612), - AliasName("Phlp", 613), - AliasName("Phlv", 614), - AliasName("Phnx", 615), - AliasName("Phoenician", 616), - AliasName("Phonetic_Extensions", 617), - AliasName("Phonetic_Extensions_Supplement", 618), - AliasName("Pi", 619), - AliasName("Plrd", 620), - AliasName("Po", 621), - AliasName("Pop_Directional_Format", 622), - AliasName("Postfix_Numeric", 623), - AliasName("Prefix_Numeric", 624), - AliasName("Prepend", 625), - AliasName("Private_Use", 626), - AliasName("Private_Use_Area", 627), - AliasName("Prti", 628), - AliasName("Ps", 629), - AliasName("Punctuation", 630), - AliasName("QMark", 631), - AliasName("QU", 632), - AliasName("Qaac", 633), - AliasName("Qaai", 634), - AliasName("Qaf", 635), - AliasName("Qaph", 636), - AliasName("Quotation", 637), - AliasName("Quotation_Mark", 638), - AliasName("R", 639), - AliasName("RLE", 640), - AliasName("RLO", 641), - AliasName("Radical", 642), - AliasName("Reh", 643), - AliasName("Rejang", 644), - AliasName("Reversed_Pe", 645), - AliasName("Right", 646), - AliasName("Right_Joining", 647), - AliasName("Right_To_Left", 648), - AliasName("Right_To_Left_Embedding", 649), - AliasName("Right_To_Left_Override", 650), - AliasName("Rjng", 651), - AliasName("Roro", 652), - AliasName("Rumi_Numeral_Symbols", 653), - AliasName("Runic", 654), - AliasName("Runr", 655), - AliasName("S", 656), - AliasName("SA", 657), - AliasName("SB", 658), - AliasName("SC", 659), - AliasName("SContinue", 660), - AliasName("SD", 661), - AliasName("SE", 662), - AliasName("SG", 663), - AliasName("SM", 664), - AliasName("SP", 665), - AliasName("ST", 666), - AliasName("STerm", 667), - AliasName("SY", 668), - AliasName("Sad", 669), - AliasName("Sadhe", 670), - AliasName("Samaritan", 671), - AliasName("Samr", 672), - AliasName("Sara", 673), - AliasName("Sarb", 674), - AliasName("Saur", 675), - AliasName("Saurashtra", 676), - AliasName("Sc", 677), - AliasName("Script", 678), - AliasName("Seen", 679), - AliasName("Segment_Separator", 680), - AliasName("Segment_Starter", 681), - AliasName("Semkath", 682), - AliasName("Sensitive", 683), - AliasName("Sentence_Break", 684), - AliasName("Sep", 685), - AliasName("Separator", 686), - AliasName("Sgnw", 687), - AliasName("Shavian", 688), - AliasName("Shaw", 689), - AliasName("Shin", 690), - AliasName("Simple_Case_Folding", 691), - AliasName("Simple_Lowercase_Mapping", 692), - AliasName("Simple_Titlecase_Mapping", 693), - AliasName("Simple_Uppercase_Mapping", 694), - AliasName("Sinh", 695), - AliasName("Sinhala", 696), - AliasName("Sk", 697), - AliasName("Sm", 698), - AliasName("Small", 699), - AliasName("Small_Form_Variants", 700), - AliasName("Sml", 701), - AliasName("So", 702), - AliasName("Soft_Dotted", 703), - AliasName("Sp", 704), - AliasName("Space", 705), - AliasName("Space_Separator", 706), - AliasName("SpacingMark", 707), - AliasName("Spacing_Mark", 708), - AliasName("Spacing_Modifier_Letters", 709), - AliasName("Specials", 710), - AliasName("Sqr", 711), - AliasName("Square", 712), - AliasName("Sub", 713), - AliasName("Sund", 714), - AliasName("Sundanese", 715), - AliasName("Sup", 716), - AliasName("Super", 717), - AliasName("Superscripts_And_Subscripts", 718), - AliasName("Supplemental_Arrows_A", 719), - AliasName("Supplemental_Arrows_B", 720), - AliasName("Supplemental_Mathematical_Operators", 721), - AliasName("Supplemental_Punctuation", 722), - AliasName("Supplementary_Private_Use_Area_A", 723), - AliasName("Supplementary_Private_Use_Area_B", 724), - AliasName("Surrogate", 725), - AliasName("Swash_Kaf", 726), - AliasName("Sylo", 727), - AliasName("Syloti_Nagri", 728), - AliasName("Symbol", 729), - AliasName("Syrc", 730), - AliasName("Syre", 731), - AliasName("Syriac", 732), - AliasName("Syriac_Waw", 733), - AliasName("Syrj", 734), - AliasName("Syrn", 735), - AliasName("T", 736), - AliasName("Tagalog", 737), - AliasName("Tagb", 738), - AliasName("Tagbanwa", 739), - AliasName("Tags", 740), - AliasName("Tah", 741), - AliasName("Tai_Le", 742), - AliasName("Tai_Tham", 743), - AliasName("Tai_Viet", 744), - AliasName("Tai_Xuan_Jing_Symbols", 745), - AliasName("Tale", 746), - AliasName("Talu", 747), - AliasName("Tamil", 748), - AliasName("Taml", 749), - AliasName("Tavt", 750), - AliasName("Taw", 751), - AliasName("Teh_Marbuta", 752), - AliasName("Telu", 753), - AliasName("Telugu", 754), - AliasName("Teng", 755), - AliasName("Term", 756), - AliasName("Terminal_Punctuation", 757), - AliasName("Teth", 758), - AliasName("Tfng", 759), - AliasName("Tglg", 760), - AliasName("Thaa", 761), - AliasName("Thaana", 762), - AliasName("Thai", 763), - AliasName("Tibetan", 764), - AliasName("Tibt", 765), - AliasName("Tifinagh", 766), - AliasName("Titlecase_Letter", 767), - AliasName("Titlecase_Mapping", 768), - AliasName("Trail_Canonical_Combining_Class", 769), - AliasName("Trailing_Jamo", 770), - AliasName("Transparent", 771), - AliasName("True", 772), - AliasName("U", 773), - AliasName("UIdeo", 774), - AliasName("UP", 775), - AliasName("Ugar", 776), - AliasName("Ugaritic", 777), - AliasName("Unassigned", 778), - AliasName("Unicode_1_Name", 779), - AliasName("Unified_Canadian_Aboriginal_Syllabics", 780), - AliasName("Unified_Canadian_Aboriginal_Syllabics_Extended", 781), - AliasName("Unified_Ideograph", 782), - AliasName("Unknown", 783), - AliasName("Upper", 784), - AliasName("Uppercase", 785), - AliasName("Uppercase_Letter", 786), - AliasName("Uppercase_Mapping", 787), - AliasName("V", 788), - AliasName("VR", 789), - AliasName("VS", 790), - AliasName("Vai", 791), - AliasName("Vaii", 792), - AliasName("Variation_Selector", 793), - AliasName("Variation_Selectors", 794), - AliasName("Variation_Selectors_Supplement", 795), - AliasName("Vedic_Extensions", 796), - AliasName("Vert", 797), - AliasName("Vertical", 798), - AliasName("Vertical_Forms", 799), - AliasName("Virama", 800), - AliasName("Visp", 801), - AliasName("Vowel_Jamo", 802), - AliasName("W", 803), - AliasName("WB", 804), - AliasName("WJ", 805), - AliasName("WS", 806), - AliasName("WSpace", 807), - AliasName("Waw", 808), - AliasName("White_Space", 809), - AliasName("Wide", 810), - AliasName("Word_Break", 811), - AliasName("Word_Joiner", 812), - AliasName("XIDC", 813), - AliasName("XIDS", 814), - AliasName("XID_Continue", 815), - AliasName("XID_Start", 816), - AliasName("XX", 817), - AliasName("Xpeo", 818), - AliasName("Xsux", 819), - AliasName("Y", 820), - AliasName("Yeh", 821), - AliasName("Yeh_Barree", 822), - AliasName("Yeh_With_Tail", 823), - AliasName("Yes", 824), - AliasName("Yi", 825), - AliasName("Yi_Radicals", 826), - AliasName("Yi_Syllables", 827), - AliasName("Yiii", 828), - AliasName("Yijing_Hexagram_Symbols", 829), - AliasName("Yudh", 830), - AliasName("Yudh_He", 831), - AliasName("Z", 832), - AliasName("ZW", 833), - AliasName("ZWSpace", 834), - AliasName("Zain", 835), - AliasName("Zhain", 836), - AliasName("Zinh", 837), - AliasName("Zl", 838), - AliasName("Zmth", 839), - AliasName("Zp", 840), - AliasName("Zs", 841), - AliasName("Zsym", 842), - AliasName("Zxxx", 843), - AliasName("Zyyy", 844), - AliasName("Zzzz", 845), - AliasName("age", 846), - AliasName("alnum", 847), - AliasName("bc", 848), - AliasName("blank", 849), - AliasName("blk", 850), - AliasName("bmg", 851), - AliasName("can", 852), - AliasName("ccc", 853), - AliasName("cf", 854), - AliasName("cntrl", 855), - AliasName("com", 856), - AliasName("digit", 857), - AliasName("dt", 858), - AliasName("ea", 859), - AliasName("enc", 860), - AliasName("fin", 861), - AliasName("font", 862), - AliasName("fra", 863), - AliasName("gc", 864), - AliasName("gcm", 865), - AliasName("graph", 866), - AliasName("hst", 867), - AliasName("init", 868), - AliasName("isc", 869), - AliasName("iso", 870), - AliasName("jg", 871), - AliasName("jt", 872), - AliasName("lb", 873), - AliasName("lc", 874), - AliasName("lccc", 875), - AliasName("med", 876), - AliasName("na", 877), - AliasName("na1", 878), - AliasName("nar", 879), - AliasName("nb", 880), - AliasName("nfcinert", 881), - AliasName("nfdinert", 882), - AliasName("nfkcinert", 883), - AliasName("nfkdinert", 884), - AliasName("none", 885), - AliasName("nt", 886), - AliasName("nv", 887), - AliasName("print", 888), - AliasName("punct", 889), - AliasName("sc", 890), - AliasName("scf", 891), - AliasName("segstart", 892), - AliasName("sfc", 893), - AliasName("slc", 894), - AliasName("sml", 895), - AliasName("space", 896), - AliasName("sqr", 897), - AliasName("stc", 898), - AliasName("sub", 899), - AliasName("suc", 900), - AliasName("sup", 901), - AliasName("tc", 902), - AliasName("tccc", 903), - AliasName("uc", 904), - AliasName("vert", 905), - AliasName("wide", 906), - AliasName("xdigit", 907), + AliasName("CWKCF", 120), + AliasName("CWL", 121), + AliasName("CWT", 122), + AliasName("CWU", 123), + AliasName("Cakm", 124), + AliasName("Can", 125), + AliasName("Canadian_Aboriginal", 126), + AliasName("Canadian_Syllabics", 127), + AliasName("Canonical", 128), + AliasName("Canonical_Combining_Class", 129), + AliasName("Cans", 130), + AliasName("Cari", 131), + AliasName("Carian", 132), + AliasName("Carriage_Return", 133), + AliasName("Case_Folding", 134), + AliasName("Case_Ignorable", 135), + AliasName("Case_Sensitive", 136), + AliasName("Cased", 137), + AliasName("Cased_Letter", 138), + AliasName("Cc", 139), + AliasName("Cf", 140), + AliasName("Cham", 141), + AliasName("Changes_When_Casefolded", 142), + AliasName("Changes_When_Casemapped", 143), + AliasName("Changes_When_Lowercased", 144), + AliasName("Changes_When_NFKC_Casefolded", 145), + AliasName("Changes_When_Titlecased", 146), + AliasName("Changes_When_Uppercased", 147), + AliasName("Cher", 148), + AliasName("Cherokee", 149), + AliasName("Circle", 150), + AliasName("Cirt", 151), + AliasName("Close", 152), + AliasName("Close_Parenthesis", 153), + AliasName("Close_Punctuation", 154), + AliasName("Cn", 155), + AliasName("Co", 156), + AliasName("Com", 157), + AliasName("Combining_Diacritical_Marks", 158), + AliasName("Combining_Diacritical_Marks_For_Symbols", 159), + AliasName("Combining_Diacritical_Marks_Supplement", 160), + AliasName("Combining_Half_Marks", 161), + AliasName("Combining_Mark", 162), + AliasName("Combining_Marks_For_Symbols", 163), + AliasName("Common", 164), + AliasName("Common_Indic_Number_Forms", 165), + AliasName("Common_Separator", 166), + AliasName("Comp_Ex", 167), + AliasName("Compat", 168), + AliasName("Complex_Context", 169), + AliasName("Connector_Punctuation", 170), + AliasName("Contingent_Break", 171), + AliasName("Control", 172), + AliasName("Control_Pictures", 173), + AliasName("Copt", 174), + AliasName("Coptic", 175), + AliasName("Counting_Rod_Numerals", 176), + AliasName("Cprt", 177), + AliasName("Cs", 178), + AliasName("Cuneiform", 179), + AliasName("Cuneiform_Numbers_And_Punctuation", 180), + AliasName("Currency_Symbol", 181), + AliasName("Currency_Symbols", 182), + AliasName("Cypriot", 183), + AliasName("Cypriot_Syllabary", 184), + AliasName("Cyrillic", 185), + AliasName("Cyrillic_Extended_A", 186), + AliasName("Cyrillic_Extended_B", 187), + AliasName("Cyrillic_Supplement", 188), + AliasName("Cyrillic_Supplementary", 189), + AliasName("Cyrl", 190), + AliasName("Cyrs", 191), + AliasName("D", 192), + AliasName("DA", 193), + AliasName("DB", 194), + AliasName("DI", 195), + AliasName("Dal", 196), + AliasName("Dalath_Rish", 197), + AliasName("Dash", 198), + AliasName("Dash_Punctuation", 199), + AliasName("De", 200), + AliasName("Decimal", 201), + AliasName("Decimal_Number", 202), + AliasName("Decomposition_Type", 203), + AliasName("Default_Ignorable_Code_Point", 204), + AliasName("Dep", 205), + AliasName("Deprecated", 206), + AliasName("Deseret", 207), + AliasName("Deva", 208), + AliasName("Devanagari", 209), + AliasName("Devanagari_Extended", 210), + AliasName("Di", 211), + AliasName("Dia", 212), + AliasName("Diacritic", 213), + AliasName("Digit", 214), + AliasName("Dingbats", 215), + AliasName("Domino_Tiles", 216), + AliasName("Double_Above", 217), + AliasName("Double_Below", 218), + AliasName("Dsrt", 219), + AliasName("Dual_Joining", 220), + AliasName("E", 221), + AliasName("EN", 222), + AliasName("ES", 223), + AliasName("ET", 224), + AliasName("EX", 225), + AliasName("East_Asian_Width", 226), + AliasName("Egyd", 227), + AliasName("Egyh", 228), + AliasName("Egyp", 229), + AliasName("Egyptian_Hieroglyphs", 230), + AliasName("Enc", 231), + AliasName("Enclosed_Alphanumeric_Supplement", 232), + AliasName("Enclosed_Alphanumerics", 233), + AliasName("Enclosed_CJK_Letters_And_Months", 234), + AliasName("Enclosed_Ideographic_Supplement", 235), + AliasName("Enclosing_Mark", 236), + AliasName("Ethi", 237), + AliasName("Ethiopic", 238), + AliasName("Ethiopic_Extended", 239), + AliasName("Ethiopic_Supplement", 240), + AliasName("European_Number", 241), + AliasName("European_Separator", 242), + AliasName("European_Terminator", 243), + AliasName("Exclamation", 244), + AliasName("Ext", 245), + AliasName("Extend", 246), + AliasName("ExtendNumLet", 247), + AliasName("Extender", 248), + AliasName("F", 249), + AliasName("FO", 250), + AliasName("False", 251), + AliasName("Farsi_Yeh", 252), + AliasName("Fe", 253), + AliasName("Feh", 254), + AliasName("Fin", 255), + AliasName("Final", 256), + AliasName("Final_Punctuation", 257), + AliasName("Final_Semkath", 258), + AliasName("Font", 259), + AliasName("Format", 260), + AliasName("Fra", 261), + AliasName("Fraction", 262), + AliasName("Full_Composition_Exclusion", 263), + AliasName("Fullwidth", 264), + AliasName("GCB", 265), + AliasName("GL", 266), + AliasName("Gaf", 267), + AliasName("Gamal", 268), + AliasName("General_Category", 269), + AliasName("General_Category_Mask", 270), + AliasName("General_Punctuation", 271), + AliasName("Geok", 272), + AliasName("Geometric_Shapes", 273), + AliasName("Geor", 274), + AliasName("Georgian", 275), + AliasName("Georgian_Supplement", 276), + AliasName("Glag", 277), + AliasName("Glagolitic", 278), + AliasName("Glue", 279), + AliasName("Goth", 280), + AliasName("Gothic", 281), + AliasName("Gr_Base", 282), + AliasName("Gr_Ext", 283), + AliasName("Gr_Link", 284), + AliasName("Grapheme_Base", 285), + AliasName("Grapheme_Cluster_Break", 286), + AliasName("Grapheme_Extend", 287), + AliasName("Grapheme_Link", 288), + AliasName("Greek", 289), + AliasName("Greek_And_Coptic", 290), + AliasName("Greek_Extended", 291), + AliasName("Grek", 292), + AliasName("Gujarati", 293), + AliasName("Gujr", 294), + AliasName("Gurmukhi", 295), + AliasName("Guru", 296), + AliasName("H", 297), + AliasName("H2", 298), + AliasName("H3", 299), + AliasName("HY", 300), + AliasName("Hah", 301), + AliasName("Halfwidth", 302), + AliasName("Halfwidth_And_Fullwidth_Forms", 303), + AliasName("Hamza_On_Heh_Goal", 304), + AliasName("Han", 305), + AliasName("Hang", 306), + AliasName("Hangul", 307), + AliasName("Hangul_Compatibility_Jamo", 308), + AliasName("Hangul_Jamo", 309), + AliasName("Hangul_Jamo_Extended_A", 310), + AliasName("Hangul_Jamo_Extended_B", 311), + AliasName("Hangul_Syllable_Type", 312), + AliasName("Hangul_Syllables", 313), + AliasName("Hani", 314), + AliasName("Hano", 315), + AliasName("Hans", 316), + AliasName("Hant", 317), + AliasName("Hanunoo", 318), + AliasName("He", 319), + AliasName("Hebr", 320), + AliasName("Hebrew", 321), + AliasName("Heh", 322), + AliasName("Heh_Goal", 323), + AliasName("Heth", 324), + AliasName("Hex", 325), + AliasName("Hex_Digit", 326), + AliasName("High_Private_Use_Surrogates", 327), + AliasName("High_Surrogates", 328), + AliasName("Hira", 329), + AliasName("Hiragana", 330), + AliasName("Hmng", 331), + AliasName("Hrkt", 332), + AliasName("Hung", 333), + AliasName("Hyphen", 334), + AliasName("ID", 335), + AliasName("IDC", 336), + AliasName("IDS", 337), + AliasName("IDSB", 338), + AliasName("IDST", 339), + AliasName("IDS_Binary_Operator", 340), + AliasName("IDS_Trinary_Operator", 341), + AliasName("ID_Continue", 342), + AliasName("ID_Start", 343), + AliasName("IN", 344), + AliasName("IPA_Extensions", 345), + AliasName("IS", 346), + AliasName("ISO_Comment", 347), + AliasName("Ideo", 348), + AliasName("Ideographic", 349), + AliasName("Ideographic_Description_Characters", 350), + AliasName("Imperial_Aramaic", 351), + AliasName("Inds", 352), + AliasName("Infix_Numeric", 353), + AliasName("Inherited", 354), + AliasName("Init", 355), + AliasName("Initial", 356), + AliasName("Initial_Punctuation", 357), + AliasName("Inscriptional_Pahlavi", 358), + AliasName("Inscriptional_Parthian", 359), + AliasName("Inseparable", 360), + AliasName("Inseperable", 361), + AliasName("Iota_Subscript", 362), + AliasName("Iso", 363), + AliasName("Isolated", 364), + AliasName("Ital", 365), + AliasName("JL", 366), + AliasName("JT", 367), + AliasName("JV", 368), + AliasName("Java", 369), + AliasName("Javanese", 370), + AliasName("Join_C", 371), + AliasName("Join_Causing", 372), + AliasName("Join_Control", 373), + AliasName("Joining_Group", 374), + AliasName("Joining_Type", 375), + AliasName("Jpan", 376), + AliasName("KA", 377), + AliasName("KV", 378), + AliasName("Kaf", 379), + AliasName("Kaithi", 380), + AliasName("Kali", 381), + AliasName("Kana", 382), + AliasName("Kana_Voicing", 383), + AliasName("Kanbun", 384), + AliasName("Kangxi_Radicals", 385), + AliasName("Kannada", 386), + AliasName("Kaph", 387), + AliasName("Katakana", 388), + AliasName("Katakana_Or_Hiragana", 389), + AliasName("Katakana_Phonetic_Extensions", 390), + AliasName("Kayah_Li", 391), + AliasName("Khaph", 392), + AliasName("Khar", 393), + AliasName("Kharoshthi", 394), + AliasName("Khmer", 395), + AliasName("Khmer_Symbols", 396), + AliasName("Khmr", 397), + AliasName("Knda", 398), + AliasName("Knotted_Heh", 399), + AliasName("Kore", 400), + AliasName("Kthi", 401), + AliasName("L", 402), + AliasName("LC", 403), + AliasName("LE", 404), + AliasName("LF", 405), + AliasName("LO", 406), + AliasName("LOE", 407), + AliasName("LRE", 408), + AliasName("LRO", 409), + AliasName("LV", 410), + AliasName("LVT", 411), + AliasName("LVT_Syllable", 412), + AliasName("LV_Syllable", 413), + AliasName("Lam", 414), + AliasName("Lamadh", 415), + AliasName("Lana", 416), + AliasName("Lao", 417), + AliasName("Laoo", 418), + AliasName("Latf", 419), + AliasName("Latg", 420), + AliasName("Latin", 421), + AliasName("Latin_1", 422), + AliasName("Latin_1_Supplement", 423), + AliasName("Latin_Extended_A", 424), + AliasName("Latin_Extended_Additional", 425), + AliasName("Latin_Extended_B", 426), + AliasName("Latin_Extended_C", 427), + AliasName("Latin_Extended_D", 428), + AliasName("Latn", 429), + AliasName("Lead_Canonical_Combining_Class", 430), + AliasName("Leading_Jamo", 431), + AliasName("Left", 432), + AliasName("Left_Joining", 433), + AliasName("Left_To_Right", 434), + AliasName("Left_To_Right_Embedding", 435), + AliasName("Left_To_Right_Override", 436), + AliasName("Lepc", 437), + AliasName("Lepcha", 438), + AliasName("Letter", 439), + AliasName("Letter_Number", 440), + AliasName("Letterlike_Symbols", 441), + AliasName("Limb", 442), + AliasName("Limbu", 443), + AliasName("Lina", 444), + AliasName("Linb", 445), + AliasName("Line_Break", 446), + AliasName("Line_Feed", 447), + AliasName("Line_Separator", 448), + AliasName("Linear_B", 449), + AliasName("Linear_B_Ideograms", 450), + AliasName("Linear_B_Syllabary", 451), + AliasName("Lisu", 452), + AliasName("Ll", 453), + AliasName("Lm", 454), + AliasName("Lo", 455), + AliasName("Logical_Order_Exception", 456), + AliasName("Low_Surrogates", 457), + AliasName("Lower", 458), + AliasName("Lowercase", 459), + AliasName("Lowercase_Letter", 460), + AliasName("Lowercase_Mapping", 461), + AliasName("Lt", 462), + AliasName("Lu", 463), + AliasName("Lyci", 464), + AliasName("Lycian", 465), + AliasName("Lydi", 466), + AliasName("Lydian", 467), + AliasName("M", 468), + AliasName("MB", 469), + AliasName("ML", 470), + AliasName("MN", 471), + AliasName("Mahjong_Tiles", 472), + AliasName("Malayalam", 473), + AliasName("Mand", 474), + AliasName("Mandatory_Break", 475), + AliasName("Mani", 476), + AliasName("Mark", 477), + AliasName("Math", 478), + AliasName("Math_Symbol", 479), + AliasName("Mathematical_Alphanumeric_Symbols", 480), + AliasName("Mathematical_Operators", 481), + AliasName("Maya", 482), + AliasName("Maybe", 483), + AliasName("Mc", 484), + AliasName("Me", 485), + AliasName("Med", 486), + AliasName("Medial", 487), + AliasName("Meem", 488), + AliasName("Meetei_Mayek", 489), + AliasName("Mero", 490), + AliasName("MidLetter", 491), + AliasName("MidNum", 492), + AliasName("MidNumLet", 493), + AliasName("Mim", 494), + AliasName("Miscellaneous_Mathematical_Symbols_A", 495), + AliasName("Miscellaneous_Mathematical_Symbols_B", 496), + AliasName("Miscellaneous_Symbols", 497), + AliasName("Miscellaneous_Symbols_And_Arrows", 498), + AliasName("Miscellaneous_Technical", 499), + AliasName("Mlym", 500), + AliasName("Mn", 501), + AliasName("Modifier_Letter", 502), + AliasName("Modifier_Symbol", 503), + AliasName("Modifier_Tone_Letters", 504), + AliasName("Mong", 505), + AliasName("Mongolian", 506), + AliasName("Moon", 507), + AliasName("Mtei", 508), + AliasName("Musical_Symbols", 509), + AliasName("Myanmar", 510), + AliasName("Myanmar_Extended_A", 511), + AliasName("Mymr", 512), + AliasName("N", 513), + AliasName("NA", 514), + AliasName("NChar", 515), + AliasName("NFC_Inert", 516), + AliasName("NFC_QC", 517), + AliasName("NFC_Quick_Check", 518), + AliasName("NFD_Inert", 519), + AliasName("NFD_QC", 520), + AliasName("NFD_Quick_Check", 521), + AliasName("NFKC_Inert", 522), + AliasName("NFKC_QC", 523), + AliasName("NFKC_Quick_Check", 524), + AliasName("NFKD_Inert", 525), + AliasName("NFKD_QC", 526), + AliasName("NFKD_Quick_Check", 527), + AliasName("NK", 528), + AliasName("NKo", 529), + AliasName("NL", 530), + AliasName("NR", 531), + AliasName("NS", 532), + AliasName("NSM", 533), + AliasName("NU", 534), + AliasName("Na", 535), + AliasName("Name", 536), + AliasName("Nar", 537), + AliasName("Narrow", 538), + AliasName("Nb", 539), + AliasName("Nd", 540), + AliasName("Neutral", 541), + AliasName("New_Tai_Lue", 542), + AliasName("Newline", 543), + AliasName("Next_Line", 544), + AliasName("Nkgb", 545), + AliasName("Nko", 546), + AliasName("Nkoo", 547), + AliasName("Nl", 548), + AliasName("No", 549), + AliasName("No_Block", 550), + AliasName("No_Joining_Group", 551), + AliasName("Nobreak", 552), + AliasName("Non_Joining", 553), + AliasName("Noncharacter_Code_Point", 554), + AliasName("None", 555), + AliasName("Nonspacing_Mark", 556), + AliasName("Nonstarter", 557), + AliasName("Noon", 558), + AliasName("Not_Applicable", 559), + AliasName("Not_Reordered", 560), + AliasName("Nu", 561), + AliasName("Nukta", 562), + AliasName("Number", 563), + AliasName("Number_Forms", 564), + AliasName("Numeric", 565), + AliasName("Numeric_Type", 566), + AliasName("Numeric_Value", 567), + AliasName("Nun", 568), + AliasName("Nya", 569), + AliasName("OLetter", 570), + AliasName("ON", 571), + AliasName("OP", 572), + AliasName("OV", 573), + AliasName("Ogam", 574), + AliasName("Ogham", 575), + AliasName("Ol_Chiki", 576), + AliasName("Olck", 577), + AliasName("Old_Italic", 578), + AliasName("Old_Persian", 579), + AliasName("Old_South_Arabian", 580), + AliasName("Old_Turkic", 581), + AliasName("Open_Punctuation", 582), + AliasName("Optical_Character_Recognition", 583), + AliasName("Oriya", 584), + AliasName("Orkh", 585), + AliasName("Orya", 586), + AliasName("Osma", 587), + AliasName("Osmanya", 588), + AliasName("Other", 589), + AliasName("Other_Letter", 590), + AliasName("Other_Neutral", 591), + AliasName("Other_Number", 592), + AliasName("Other_Punctuation", 593), + AliasName("Other_Symbol", 594), + AliasName("Overlay", 595), + AliasName("P", 596), + AliasName("PDF", 597), + AliasName("PO", 598), + AliasName("PP", 599), + AliasName("PR", 600), + AliasName("Paragraph_Separator", 601), + AliasName("Pat_Syn", 602), + AliasName("Pat_WS", 603), + AliasName("Pattern_Syntax", 604), + AliasName("Pattern_White_Space", 605), + AliasName("Pc", 606), + AliasName("Pd", 607), + AliasName("Pe", 608), + AliasName("Perm", 609), + AliasName("Pf", 610), + AliasName("Phag", 611), + AliasName("Phags_Pa", 612), + AliasName("Phaistos_Disc", 613), + AliasName("Phli", 614), + AliasName("Phlp", 615), + AliasName("Phlv", 616), + AliasName("Phnx", 617), + AliasName("Phoenician", 618), + AliasName("Phonetic_Extensions", 619), + AliasName("Phonetic_Extensions_Supplement", 620), + AliasName("Pi", 621), + AliasName("Plrd", 622), + AliasName("Po", 623), + AliasName("Pop_Directional_Format", 624), + AliasName("Postfix_Numeric", 625), + AliasName("Prefix_Numeric", 626), + AliasName("Prepend", 627), + AliasName("Private_Use", 628), + AliasName("Private_Use_Area", 629), + AliasName("Prti", 630), + AliasName("Ps", 631), + AliasName("Punctuation", 632), + AliasName("QMark", 633), + AliasName("QU", 634), + AliasName("Qaac", 635), + AliasName("Qaai", 636), + AliasName("Qaf", 637), + AliasName("Qaph", 638), + AliasName("Quotation", 639), + AliasName("Quotation_Mark", 640), + AliasName("R", 641), + AliasName("RLE", 642), + AliasName("RLO", 643), + AliasName("Radical", 644), + AliasName("Reh", 645), + AliasName("Rejang", 646), + AliasName("Reversed_Pe", 647), + AliasName("Right", 648), + AliasName("Right_Joining", 649), + AliasName("Right_To_Left", 650), + AliasName("Right_To_Left_Embedding", 651), + AliasName("Right_To_Left_Override", 652), + AliasName("Rjng", 653), + AliasName("Roro", 654), + AliasName("Rumi_Numeral_Symbols", 655), + AliasName("Runic", 656), + AliasName("Runr", 657), + AliasName("S", 658), + AliasName("SA", 659), + AliasName("SB", 660), + AliasName("SC", 661), + AliasName("SContinue", 662), + AliasName("SD", 663), + AliasName("SE", 664), + AliasName("SG", 665), + AliasName("SM", 666), + AliasName("SP", 667), + AliasName("ST", 668), + AliasName("STerm", 669), + AliasName("SY", 670), + AliasName("Sad", 671), + AliasName("Sadhe", 672), + AliasName("Samaritan", 673), + AliasName("Samr", 674), + AliasName("Sara", 675), + AliasName("Sarb", 676), + AliasName("Saur", 677), + AliasName("Saurashtra", 678), + AliasName("Sc", 679), + AliasName("Script", 680), + AliasName("Seen", 681), + AliasName("Segment_Separator", 682), + AliasName("Segment_Starter", 683), + AliasName("Semkath", 684), + AliasName("Sensitive", 685), + AliasName("Sentence_Break", 686), + AliasName("Sep", 687), + AliasName("Separator", 688), + AliasName("Sgnw", 689), + AliasName("Shavian", 690), + AliasName("Shaw", 691), + AliasName("Shin", 692), + AliasName("Simple_Case_Folding", 693), + AliasName("Simple_Lowercase_Mapping", 694), + AliasName("Simple_Titlecase_Mapping", 695), + AliasName("Simple_Uppercase_Mapping", 696), + AliasName("Sinh", 697), + AliasName("Sinhala", 698), + AliasName("Sk", 699), + AliasName("Sm", 700), + AliasName("Small", 701), + AliasName("Small_Form_Variants", 702), + AliasName("Sml", 703), + AliasName("So", 704), + AliasName("Soft_Dotted", 705), + AliasName("Sp", 706), + AliasName("Space", 707), + AliasName("Space_Separator", 708), + AliasName("SpacingMark", 709), + AliasName("Spacing_Mark", 710), + AliasName("Spacing_Modifier_Letters", 711), + AliasName("Specials", 712), + AliasName("Sqr", 713), + AliasName("Square", 714), + AliasName("Sub", 715), + AliasName("Sund", 716), + AliasName("Sundanese", 717), + AliasName("Sup", 718), + AliasName("Super", 719), + AliasName("Superscripts_And_Subscripts", 720), + AliasName("Supplemental_Arrows_A", 721), + AliasName("Supplemental_Arrows_B", 722), + AliasName("Supplemental_Mathematical_Operators", 723), + AliasName("Supplemental_Punctuation", 724), + AliasName("Supplementary_Private_Use_Area_A", 725), + AliasName("Supplementary_Private_Use_Area_B", 726), + AliasName("Surrogate", 727), + AliasName("Swash_Kaf", 728), + AliasName("Sylo", 729), + AliasName("Syloti_Nagri", 730), + AliasName("Symbol", 731), + AliasName("Syrc", 732), + AliasName("Syre", 733), + AliasName("Syriac", 734), + AliasName("Syriac_Waw", 735), + AliasName("Syrj", 736), + AliasName("Syrn", 737), + AliasName("T", 738), + AliasName("Tagalog", 739), + AliasName("Tagb", 740), + AliasName("Tagbanwa", 741), + AliasName("Tags", 742), + AliasName("Tah", 743), + AliasName("Tai_Le", 744), + AliasName("Tai_Tham", 745), + AliasName("Tai_Viet", 746), + AliasName("Tai_Xuan_Jing_Symbols", 747), + AliasName("Tale", 748), + AliasName("Talu", 749), + AliasName("Tamil", 750), + AliasName("Taml", 751), + AliasName("Tavt", 752), + AliasName("Taw", 753), + AliasName("Teh_Marbuta", 754), + AliasName("Telu", 755), + AliasName("Telugu", 756), + AliasName("Teng", 757), + AliasName("Term", 758), + AliasName("Terminal_Punctuation", 759), + AliasName("Teth", 760), + AliasName("Tfng", 761), + AliasName("Tglg", 762), + AliasName("Thaa", 763), + AliasName("Thaana", 764), + AliasName("Thai", 765), + AliasName("Tibetan", 766), + AliasName("Tibt", 767), + AliasName("Tifinagh", 768), + AliasName("Titlecase_Letter", 769), + AliasName("Titlecase_Mapping", 770), + AliasName("Trail_Canonical_Combining_Class", 771), + AliasName("Trailing_Jamo", 772), + AliasName("Transparent", 773), + AliasName("True", 774), + AliasName("U", 775), + AliasName("UIdeo", 776), + AliasName("UP", 777), + AliasName("Ugar", 778), + AliasName("Ugaritic", 779), + AliasName("Unassigned", 780), + AliasName("Unicode_1_Name", 781), + AliasName("Unified_Canadian_Aboriginal_Syllabics", 782), + AliasName("Unified_Canadian_Aboriginal_Syllabics_Extended", 783), + AliasName("Unified_Ideograph", 784), + AliasName("Unknown", 785), + AliasName("Upper", 786), + AliasName("Uppercase", 787), + AliasName("Uppercase_Letter", 788), + AliasName("Uppercase_Mapping", 789), + AliasName("V", 790), + AliasName("VR", 791), + AliasName("VS", 792), + AliasName("Vai", 793), + AliasName("Vaii", 794), + AliasName("Variation_Selector", 795), + AliasName("Variation_Selectors", 796), + AliasName("Variation_Selectors_Supplement", 797), + AliasName("Vedic_Extensions", 798), + AliasName("Vert", 799), + AliasName("Vertical", 800), + AliasName("Vertical_Forms", 801), + AliasName("Virama", 802), + AliasName("Visp", 803), + AliasName("Vowel_Jamo", 804), + AliasName("W", 805), + AliasName("WB", 806), + AliasName("WJ", 807), + AliasName("WS", 808), + AliasName("WSpace", 809), + AliasName("Waw", 810), + AliasName("White_Space", 811), + AliasName("Wide", 812), + AliasName("Word_Break", 813), + AliasName("Word_Joiner", 814), + AliasName("XIDC", 815), + AliasName("XIDS", 816), + AliasName("XID_Continue", 817), + AliasName("XID_Start", 818), + AliasName("XX", 819), + AliasName("Xpeo", 820), + AliasName("Xsux", 821), + AliasName("Y", 822), + AliasName("Yeh", 823), + AliasName("Yeh_Barree", 824), + AliasName("Yeh_With_Tail", 825), + AliasName("Yes", 826), + AliasName("Yi", 827), + AliasName("Yi_Radicals", 828), + AliasName("Yi_Syllables", 829), + AliasName("Yiii", 830), + AliasName("Yijing_Hexagram_Symbols", 831), + AliasName("Yudh", 832), + AliasName("Yudh_He", 833), + AliasName("Z", 834), + AliasName("ZW", 835), + AliasName("ZWSpace", 836), + AliasName("Zain", 837), + AliasName("Zhain", 838), + AliasName("Zinh", 839), + AliasName("Zl", 840), + AliasName("Zmth", 841), + AliasName("Zp", 842), + AliasName("Zs", 843), + AliasName("Zsym", 844), + AliasName("Zxxx", 845), + AliasName("Zyyy", 846), + AliasName("Zzzz", 847), + AliasName("age", 848), + AliasName("alnum", 849), + AliasName("bc", 850), + AliasName("blank", 851), + AliasName("blk", 852), + AliasName("bmg", 853), + AliasName("can", 854), + AliasName("ccc", 855), + AliasName("cf", 856), + AliasName("cntrl", 857), + AliasName("com", 858), + AliasName("digit", 859), + AliasName("dt", 860), + AliasName("ea", 861), + AliasName("enc", 862), + AliasName("fin", 863), + AliasName("font", 864), + AliasName("fra", 865), + AliasName("gc", 866), + AliasName("gcm", 867), + AliasName("graph", 868), + AliasName("hst", 869), + AliasName("init", 870), + AliasName("isc", 871), + AliasName("iso", 872), + AliasName("jg", 873), + AliasName("jt", 874), + AliasName("lb", 875), + AliasName("lc", 876), + AliasName("lccc", 877), + AliasName("med", 878), + AliasName("na", 879), + AliasName("na1", 880), + AliasName("nar", 881), + AliasName("nb", 882), + AliasName("nfcinert", 883), + AliasName("nfdinert", 884), + AliasName("nfkcinert", 885), + AliasName("nfkdinert", 886), + AliasName("none", 887), + AliasName("nt", 888), + AliasName("nv", 889), + AliasName("print", 890), + AliasName("punct", 891), + AliasName("sc", 892), + AliasName("scf", 893), + AliasName("segstart", 894), + AliasName("sfc", 895), + AliasName("slc", 896), + AliasName("sml", 897), + AliasName("space", 898), + AliasName("sqr", 899), + AliasName("stc", 900), + AliasName("sub", 901), + AliasName("suc", 902), + AliasName("sup", 903), + AliasName("tc", 904), + AliasName("tccc", 905), + AliasName("uc", 906), + AliasName("vert", 907), + AliasName("wide", 908), + AliasName("xdigit", 909), }; /* to be filled in */ -int32_t REMAP[908]; +int32_t REMAP[910]; -const int32_t NAME_GROUP_COUNT = 1374; +const int32_t NAME_GROUP_COUNT = 1376; int32_t NAME_GROUP[] = { - 114, -170, /* 0: "CN", "Control" */ + 114, -172, /* 0: "CN", "Control" */ 116, -116, /* 2: "CR", "CR" */ - 223, -244, /* 4: "EX", "Extend" */ - 400, -400, /* 6: "L", "L" */ - 403, -403, /* 8: "LF", "LF" */ - 408, -408, /* 10: "LV", "LV" */ - 409, -409, /* 12: "LVT", "LVT" */ - 817, -587, /* 14: "XX", "Other" */ - 597, -625, /* 16: "PP", "Prepend" */ - 664, -707, /* 18: "SM", "SpacingMark" */ - 736, -736, /* 20: "T", "T" */ - 788, -788, /* 22: "V", "V" */ - 466, -481, /* 24: "M", "Maybe" */ - 511, -547, /* 26: "N", "No" */ - 820, -824, /* 28: "Y", "Yes" */ + 225, -246, /* 4: "EX", "Extend" */ + 402, -402, /* 6: "L", "L" */ + 405, -405, /* 8: "LF", "LF" */ + 410, -410, /* 10: "LV", "LV" */ + 411, -411, /* 12: "LVT", "LVT" */ + 819, -589, /* 14: "XX", "Other" */ + 599, -627, /* 16: "PP", "Prepend" */ + 666, -709, /* 18: "SM", "SpacingMark" */ + 738, -738, /* 20: "T", "T" */ + 790, -790, /* 22: "V", "V" */ + 468, -483, /* 24: "M", "Maybe" */ + 513, -549, /* 26: "N", "No" */ + 822, -826, /* 28: "Y", "Yes" */ 10, -15, /* 30: "AT", "ATerm" */ - 112, -150, /* 32: "CL", "Close" */ - 248, -258, /* 34: "FO", "Format" */ - 404, -456, /* 36: "LO", "Lower" */ - 532, -563, /* 38: "NU", "Numeric" */ - 402, -568, /* 40: "LE", "OLetter" */ - 659, -660, /* 42: "SC", "SContinue" */ - 662, -685, /* 44: "SE", "Sep" */ - 665, -704, /* 46: "SP", "Sp" */ - 666, -667, /* 48: "ST", "STerm" */ - 775, -784, /* 50: "UP", "Upper" */ - 402, -5, /* 52: "LE", "ALetter" */ - 244, -244, /* 54: "Extend", "Extend" */ - 223, -245, /* 56: "EX", "ExtendNumLet" */ - 375, -386, /* 58: "KA", "Katakana" */ - 468, -489, /* 60: "ML", "MidLetter" */ - 469, -490, /* 62: "MN", "MidNum" */ - 467, -491, /* 64: "MB", "MidNumLet" */ - 528, -541, /* 66: "NL", "Newline" */ + 112, -152, /* 32: "CL", "Close" */ + 250, -260, /* 34: "FO", "Format" */ + 406, -458, /* 36: "LO", "Lower" */ + 534, -565, /* 38: "NU", "Numeric" */ + 404, -570, /* 40: "LE", "OLetter" */ + 661, -662, /* 42: "SC", "SContinue" */ + 664, -687, /* 44: "SE", "Sep" */ + 667, -706, /* 46: "SP", "Sp" */ + 668, -669, /* 48: "ST", "STerm" */ + 777, -786, /* 50: "UP", "Upper" */ + 404, -5, /* 52: "LE", "ALetter" */ + 246, -246, /* 54: "Extend", "Extend" */ + 225, -247, /* 56: "EX", "ExtendNumLet" */ + 377, -388, /* 58: "KA", "Katakana" */ + 470, -491, /* 60: "ML", "MidLetter" */ + 471, -492, /* 62: "MN", "MidNum" */ + 469, -493, /* 64: "MB", "MidNumLet" */ + 530, -543, /* 66: "NL", "Newline" */ 24, -25, /* 68: "Alpha", "Alphabetic" */ 2, -9, /* 70: "AHex", "ASCII_Hex_Digit" */ 70, -72, /* 72: "Bidi_C", "Bidi_Control" */ 73, -74, /* 74: "Bidi_M", "Bidi_Mirrored" */ - 136, -136, /* 76: "Cased", "Cased" */ - 100, -134, /* 78: "CI", "Case_Ignorable" */ - 683, -135, /* 80: "Sensitive", "Case_Sensitive" */ - 118, -141, /* 82: "CWCF", "Changes_When_Casefolded" */ - 119, -142, /* 84: "CWCM", "Changes_When_Casemapped" */ - 120, -143, /* 86: "CWL", "Changes_When_Lowercased" */ - 121, -144, /* 88: "CWT", "Changes_When_Titlecased" */ - 122, -145, /* 90: "CWU", "Changes_When_Uppercased" */ - 196, -196, /* 92: "Dash", "Dash" */ - 193, -202, /* 94: "DI", "Default_Ignorable_Code_Point" */ - 203, -204, /* 96: "Dep", "Deprecated" */ - 210, -211, /* 98: "Dia", "Diacritic" */ - 243, -246, /* 100: "Ext", "Extender" */ - 165, -261, /* 102: "Comp_Ex", "Full_Composition_Exclusion" */ - 280, -283, /* 104: "Gr_Base", "Grapheme_Base" */ - 281, -285, /* 106: "Gr_Ext", "Grapheme_Extend" */ - 282, -286, /* 108: "Gr_Link", "Grapheme_Link" */ - 323, -324, /* 110: "Hex", "Hex_Digit" */ - 332, -332, /* 112: "Hyphen", "Hyphen" */ - 346, -347, /* 114: "Ideo", "Ideographic" */ - 336, -338, /* 116: "IDSB", "IDS_Binary_Operator" */ - 337, -339, /* 118: "IDST", "IDS_Trinary_Operator" */ - 334, -340, /* 120: "IDC", "ID_Continue" */ - 335, -341, /* 122: "IDS", "ID_Start" */ - 369, -371, /* 124: "Join_C", "Join_Control" */ - 405, -454, /* 126: "LOE", "Logical_Order_Exception" */ - 456, -457, /* 128: "Lower", "Lowercase" */ - 476, -476, /* 130: "Math", "Math" */ - 881, -514, /* 132: "nfcinert", "NFC_Inert" */ - 882, -517, /* 134: "nfdinert", "NFD_Inert" */ - 883, -520, /* 136: "nfkcinert", "NFKC_Inert" */ - 884, -523, /* 138: "nfkdinert", "NFKD_Inert" */ - 513, -552, /* 140: "NChar", "Noncharacter_Code_Point" */ - 600, -602, /* 142: "Pat_Syn", "Pattern_Syntax" */ - 601, -603, /* 144: "Pat_WS", "Pattern_White_Space" */ - 0, -847, /* 146: "", "alnum" */ - 0, -849, /* 148: "", "blank" */ - 0, -866, /* 150: "", "graph" */ - 0, -888, /* 152: "", "print" */ - 0, -907, /* 154: "", "xdigit" */ - 631, -638, /* 156: "QMark", "Quotation_Mark" */ - 642, -642, /* 158: "Radical", "Radical" */ - 892, -681, /* 160: "segstart", "Segment_Starter" */ - 661, -703, /* 162: "SD", "Soft_Dotted" */ - 667, -667, /* 164: "STerm", "STerm" */ - 756, -757, /* 166: "Term", "Terminal_Punctuation" */ - 774, -782, /* 168: "UIdeo", "Unified_Ideograph" */ - 784, -785, /* 170: "Upper", "Uppercase" */ - 790, -793, /* 172: "VS", "Variation_Selector" */ - 807, 809, -896, /* 174: "WSpace", "White_Space", "space" */ - 813, -815, /* 177: "XIDC", "XID_Continue" */ - 814, -816, /* 179: "XIDS", "XID_Start" */ - 887, -565, /* 181: "nv", "Numeric_Value" */ - 848, -71, /* 183: "bc", "Bidi_Class" */ - 850, -77, /* 185: "blk", "Block" */ - 853, -128, /* 187: "ccc", "Canonical_Combining_Class" */ - 858, -201, /* 189: "dt", "Decomposition_Type" */ - 859, -224, /* 191: "ea", "East_Asian_Width" */ - 864, -267, /* 193: "gc", "General_Category" */ - 263, -284, /* 195: "GCB", "Grapheme_Cluster_Break" */ - 867, -310, /* 197: "hst", "Hangul_Syllable_Type" */ - 871, -372, /* 199: "jg", "Joining_Group" */ - 872, -373, /* 201: "jt", "Joining_Type" */ - 875, -428, /* 203: "lccc", "Lead_Canonical_Combining_Class" */ - 873, -444, /* 205: "lb", "Line_Break" */ - 515, -516, /* 207: "NFC_QC", "NFC_Quick_Check" */ - 518, -519, /* 209: "NFD_QC", "NFD_Quick_Check" */ - 521, -522, /* 211: "NFKC_QC", "NFKC_Quick_Check" */ - 524, -525, /* 213: "NFKD_QC", "NFKD_Quick_Check" */ - 886, -564, /* 215: "nt", "Numeric_Type" */ - 890, -678, /* 217: "sc", "Script" */ - 658, -684, /* 219: "SB", "Sentence_Break" */ - 903, -769, /* 221: "tccc", "Trail_Canonical_Combining_Class" */ - 804, -811, /* 223: "WB", "Word_Break" */ - 865, -268, /* 225: "gcm", "General_Category_Mask" */ - 846, -20, /* 227: "age", "Age" */ - 851, -75, /* 229: "bmg", "Bidi_Mirroring_Glyph" */ - 854, -133, /* 231: "cf", "Case_Folding" */ - 869, -345, /* 233: "isc", "ISO_Comment" */ - 874, -459, /* 235: "lc", "Lowercase_Mapping" */ - 877, -534, /* 237: "na", "Name" */ - 891, 691, -893, /* 239: "scf", "Simple_Case_Folding", "sfc" */ - 894, -692, /* 242: "slc", "Simple_Lowercase_Mapping" */ - 898, -693, /* 244: "stc", "Simple_Titlecase_Mapping" */ - 900, -694, /* 246: "suc", "Simple_Uppercase_Mapping" */ - 902, -768, /* 248: "tc", "Titlecase_Mapping" */ - 878, -779, /* 250: "na1", "Unicode_1_Name" */ - 904, -787, /* 252: "uc", "Uppercase_Mapping" */ - 6, -34, /* 254: "AN", "Arabic_Number" */ - 49, -599, /* 256: "B", "Paragraph_Separator" */ - 55, -82, /* 258: "BN", "Boundary_Neutral" */ - 117, -164, /* 260: "CS", "Common_Separator" */ - 531, -554, /* 262: "NSM", "Nonspacing_Mark" */ - 220, -239, /* 264: "EN", "European_Number" */ - 221, -240, /* 266: "ES", "European_Separator" */ - 222, -241, /* 268: "ET", "European_Terminator" */ - 400, -432, /* 270: "L", "Left_To_Right" */ - 406, -433, /* 272: "LRE", "Left_To_Right_Embedding" */ - 407, -434, /* 274: "LRO", "Left_To_Right_Override" */ - 569, -589, /* 276: "ON", "Other_Neutral" */ - 595, -622, /* 278: "PDF", "Pop_Directional_Format" */ - 639, -648, /* 280: "R", "Right_To_Left" */ - 4, -33, /* 282: "AL", "Arabic_Letter" */ - 640, -649, /* 284: "RLE", "Right_To_Left_Embedding" */ - 641, -650, /* 286: "RLO", "Right_To_Left_Override" */ - 656, -680, /* 288: "S", "Segment_Separator" */ - 806, -809, /* 290: "WS", "White_Space" */ - 511, 547, 247, -249, /* 292: "N", "No", "F", "False" */ - 820, 824, 736, -772, /* 296: "Y", "Yes", "T", "True" */ - 0, -19, /* 300: "", "Aegean_Numbers" */ - 0, -26, /* 302: "", "Alphabetic_Presentation_Forms" */ - 0, -28, /* 304: "", "Ancient_Greek_Musical_Notation" */ - 0, -29, /* 306: "", "Ancient_Greek_Numbers" */ - 0, -30, /* 308: "", "Ancient_Symbols" */ - 0, -32, /* 310: "", "Arabic" */ - 0, 36, -35, /* 312: "", "Arabic_Presentation_Forms_A", "Arabic_Presentation_Forms-A" */ - 0, -37, /* 315: "", "Arabic_Presentation_Forms_B" */ - 0, -38, /* 317: "", "Arabic_Supplement" */ - 0, -39, /* 319: "", "Armenian" */ - 0, -42, /* 321: "", "Arrows" */ - 0, -47, /* 323: "", "Avestan" */ - 0, -58, /* 325: "", "Balinese" */ - 0, -60, /* 327: "", "Bamum" */ - 0, 61, -8, /* 329: "", "Basic_Latin", "ASCII" */ - 0, -68, /* 332: "", "Bengali" */ - 0, -78, /* 334: "", "Block_Elements" */ - 0, -80, /* 336: "", "Bopomofo" */ - 0, -81, /* 338: "", "Bopomofo_Extended" */ - 0, -83, /* 340: "", "Box_Drawing" */ - 0, -87, /* 342: "", "Braille_Patterns" */ - 0, -93, /* 344: "", "Buginese" */ - 0, -95, /* 346: "", "Buhid" */ - 0, -97, /* 348: "", "Byzantine_Musical_Symbols" */ - 0, -131, /* 350: "", "Carian" */ - 0, -140, /* 352: "", "Cham" */ - 0, -147, /* 354: "", "Cherokee" */ - 0, -101, /* 356: "", "CJK_Compatibility" */ - 0, -102, /* 358: "", "CJK_Compatibility_Forms" */ - 0, -103, /* 360: "", "CJK_Compatibility_Ideographs" */ - 0, -104, /* 362: "", "CJK_Compatibility_Ideographs_Supplement" */ - 0, -105, /* 364: "", "CJK_Radicals_Supplement" */ - 0, -106, /* 366: "", "CJK_Strokes" */ - 0, -107, /* 368: "", "CJK_Symbols_And_Punctuation" */ - 0, -108, /* 370: "", "CJK_Unified_Ideographs" */ - 0, -109, /* 372: "", "CJK_Unified_Ideographs_Extension_A" */ - 0, -110, /* 374: "", "CJK_Unified_Ideographs_Extension_B" */ - 0, -111, /* 376: "", "CJK_Unified_Ideographs_Extension_C" */ - 0, -156, /* 378: "", "Combining_Diacritical_Marks" */ - 0, -158, /* 380: "", "Combining_Diacritical_Marks_Supplement" */ - 0, -159, /* 382: "", "Combining_Half_Marks" */ - 0, 157, -161, /* 384: "", "Combining_Diacritical_Marks_For_Symbols", "Combining_Marks_For_Symbols" */ - 0, -163, /* 387: "", "Common_Indic_Number_Forms" */ - 0, -171, /* 389: "", "Control_Pictures" */ - 0, -173, /* 391: "", "Coptic" */ - 0, -174, /* 393: "", "Counting_Rod_Numerals" */ - 0, -177, /* 395: "", "Cuneiform" */ - 0, -178, /* 397: "", "Cuneiform_Numbers_And_Punctuation" */ - 0, -180, /* 399: "", "Currency_Symbols" */ - 0, -182, /* 401: "", "Cypriot_Syllabary" */ - 0, -183, /* 403: "", "Cyrillic" */ - 0, -184, /* 405: "", "Cyrillic_Extended_A" */ - 0, -185, /* 407: "", "Cyrillic_Extended_B" */ - 0, 186, -187, /* 409: "", "Cyrillic_Supplement", "Cyrillic_Supplementary" */ - 0, -205, /* 412: "", "Deseret" */ - 0, -207, /* 414: "", "Devanagari" */ - 0, -208, /* 416: "", "Devanagari_Extended" */ - 0, -213, /* 418: "", "Dingbats" */ - 0, -214, /* 420: "", "Domino_Tiles" */ - 0, -228, /* 422: "", "Egyptian_Hieroglyphs" */ - 0, -231, /* 424: "", "Enclosed_Alphanumerics" */ - 0, -230, /* 426: "", "Enclosed_Alphanumeric_Supplement" */ - 0, -232, /* 428: "", "Enclosed_CJK_Letters_And_Months" */ - 0, -233, /* 430: "", "Enclosed_Ideographic_Supplement" */ - 0, -236, /* 432: "", "Ethiopic" */ - 0, -237, /* 434: "", "Ethiopic_Extended" */ - 0, -238, /* 436: "", "Ethiopic_Supplement" */ - 0, -269, /* 438: "", "General_Punctuation" */ - 0, -271, /* 440: "", "Geometric_Shapes" */ - 0, -273, /* 442: "", "Georgian" */ - 0, -274, /* 444: "", "Georgian_Supplement" */ - 0, -276, /* 446: "", "Glagolitic" */ - 0, -279, /* 448: "", "Gothic" */ - 0, 288, -287, /* 450: "", "Greek_And_Coptic", "Greek" */ - 0, -289, /* 453: "", "Greek_Extended" */ - 0, -291, /* 455: "", "Gujarati" */ - 0, -293, /* 457: "", "Gurmukhi" */ - 0, -301, /* 459: "", "Halfwidth_And_Fullwidth_Forms" */ - 0, -306, /* 461: "", "Hangul_Compatibility_Jamo" */ - 0, -307, /* 463: "", "Hangul_Jamo" */ - 0, -308, /* 465: "", "Hangul_Jamo_Extended_A" */ - 0, -309, /* 467: "", "Hangul_Jamo_Extended_B" */ - 0, -311, /* 469: "", "Hangul_Syllables" */ - 0, -316, /* 471: "", "Hanunoo" */ - 0, -319, /* 473: "", "Hebrew" */ - 0, -325, /* 475: "", "High_Private_Use_Surrogates" */ - 0, -326, /* 477: "", "High_Surrogates" */ - 0, -328, /* 479: "", "Hiragana" */ - 0, -348, /* 481: "", "Ideographic_Description_Characters" */ - 0, -349, /* 483: "", "Imperial_Aramaic" */ - 0, -356, /* 485: "", "Inscriptional_Pahlavi" */ - 0, -357, /* 487: "", "Inscriptional_Parthian" */ - 0, -343, /* 489: "", "IPA_Extensions" */ - 0, -368, /* 491: "", "Javanese" */ - 0, -378, /* 493: "", "Kaithi" */ - 0, -382, /* 495: "", "Kanbun" */ - 0, -383, /* 497: "", "Kangxi_Radicals" */ - 0, -384, /* 499: "", "Kannada" */ - 0, -386, /* 501: "", "Katakana" */ - 0, -388, /* 503: "", "Katakana_Phonetic_Extensions" */ - 0, -389, /* 505: "", "Kayah_Li" */ - 0, -392, /* 507: "", "Kharoshthi" */ - 0, -393, /* 509: "", "Khmer" */ - 0, -394, /* 511: "", "Khmer_Symbols" */ - 0, -415, /* 513: "", "Lao" */ - 0, 421, -420, /* 515: "", "Latin_1_Supplement", "Latin_1" */ - 0, -422, /* 518: "", "Latin_Extended_A" */ - 0, -423, /* 520: "", "Latin_Extended_Additional" */ - 0, -424, /* 522: "", "Latin_Extended_B" */ - 0, -425, /* 524: "", "Latin_Extended_C" */ - 0, -426, /* 526: "", "Latin_Extended_D" */ - 0, -436, /* 528: "", "Lepcha" */ - 0, -439, /* 530: "", "Letterlike_Symbols" */ - 0, -441, /* 532: "", "Limbu" */ - 0, -448, /* 534: "", "Linear_B_Ideograms" */ - 0, -449, /* 536: "", "Linear_B_Syllabary" */ - 0, -450, /* 538: "", "Lisu" */ - 0, -455, /* 540: "", "Low_Surrogates" */ - 0, -463, /* 542: "", "Lycian" */ - 0, -465, /* 544: "", "Lydian" */ - 0, -470, /* 546: "", "Mahjong_Tiles" */ - 0, -471, /* 548: "", "Malayalam" */ - 0, -478, /* 550: "", "Mathematical_Alphanumeric_Symbols" */ - 0, -479, /* 552: "", "Mathematical_Operators" */ - 0, -487, /* 554: "", "Meetei_Mayek" */ - 0, -493, /* 556: "", "Miscellaneous_Mathematical_Symbols_A" */ - 0, -494, /* 558: "", "Miscellaneous_Mathematical_Symbols_B" */ - 0, -495, /* 560: "", "Miscellaneous_Symbols" */ - 0, -496, /* 562: "", "Miscellaneous_Symbols_And_Arrows" */ - 0, -497, /* 564: "", "Miscellaneous_Technical" */ - 0, -502, /* 566: "", "Modifier_Tone_Letters" */ - 0, -504, /* 568: "", "Mongolian" */ - 0, -507, /* 570: "", "Musical_Symbols" */ - 0, -508, /* 572: "", "Myanmar" */ - 0, -509, /* 574: "", "Myanmar_Extended_A" */ - 0, -540, /* 576: "", "New_Tai_Lue" */ - 0, -527, /* 578: "", "NKo" */ - 0, -548, /* 580: "", "No_Block" */ - 0, -562, /* 582: "", "Number_Forms" */ - 0, -573, /* 584: "", "Ogham" */ - 0, -576, /* 586: "", "Old_Italic" */ - 0, -577, /* 588: "", "Old_Persian" */ - 0, -578, /* 590: "", "Old_South_Arabian" */ - 0, -579, /* 592: "", "Old_Turkic" */ - 0, -574, /* 594: "", "Ol_Chiki" */ - 0, -581, /* 596: "", "Optical_Character_Recognition" */ - 0, -582, /* 598: "", "Oriya" */ - 0, -586, /* 600: "", "Osmanya" */ - 0, -610, /* 602: "", "Phags_Pa" */ - 0, -611, /* 604: "", "Phaistos_Disc" */ - 0, -616, /* 606: "", "Phoenician" */ - 0, -617, /* 608: "", "Phonetic_Extensions" */ - 0, -618, /* 610: "", "Phonetic_Extensions_Supplement" */ - 0, 627, -626, /* 612: "", "Private_Use_Area", "Private_Use" */ - 0, -644, /* 615: "", "Rejang" */ - 0, -653, /* 617: "", "Rumi_Numeral_Symbols" */ - 0, -654, /* 619: "", "Runic" */ - 0, -671, /* 621: "", "Samaritan" */ - 0, -676, /* 623: "", "Saurashtra" */ - 0, -688, /* 625: "", "Shavian" */ - 0, -696, /* 627: "", "Sinhala" */ - 0, -700, /* 629: "", "Small_Form_Variants" */ - 0, -709, /* 631: "", "Spacing_Modifier_Letters" */ - 0, -710, /* 633: "", "Specials" */ - 0, -715, /* 635: "", "Sundanese" */ - 0, -718, /* 637: "", "Superscripts_And_Subscripts" */ - 0, -719, /* 639: "", "Supplemental_Arrows_A" */ - 0, -720, /* 641: "", "Supplemental_Arrows_B" */ - 0, -721, /* 643: "", "Supplemental_Mathematical_Operators" */ - 0, -722, /* 645: "", "Supplemental_Punctuation" */ - 0, -723, /* 647: "", "Supplementary_Private_Use_Area_A" */ - 0, -724, /* 649: "", "Supplementary_Private_Use_Area_B" */ - 0, -728, /* 651: "", "Syloti_Nagri" */ - 0, -732, /* 653: "", "Syriac" */ - 0, -737, /* 655: "", "Tagalog" */ - 0, -739, /* 657: "", "Tagbanwa" */ - 0, -740, /* 659: "", "Tags" */ - 0, -742, /* 661: "", "Tai_Le" */ - 0, -743, /* 663: "", "Tai_Tham" */ - 0, -744, /* 665: "", "Tai_Viet" */ - 0, -745, /* 667: "", "Tai_Xuan_Jing_Symbols" */ - 0, -748, /* 669: "", "Tamil" */ - 0, -754, /* 671: "", "Telugu" */ - 0, -762, /* 673: "", "Thaana" */ - 0, -763, /* 675: "", "Thai" */ - 0, -764, /* 677: "", "Tibetan" */ - 0, -766, /* 679: "", "Tifinagh" */ - 0, -777, /* 681: "", "Ugaritic" */ - 0, 780, -126, /* 683: "", "Unified_Canadian_Aboriginal_Syllabics", "Canadian_Syllabics" */ - 0, -781, /* 686: "", "Unified_Canadian_Aboriginal_Syllabics_Extended" */ - 0, -791, /* 688: "", "Vai" */ - 0, -794, /* 690: "", "Variation_Selectors" */ - 0, -795, /* 692: "", "Variation_Selectors_Supplement" */ - 0, -796, /* 694: "", "Vedic_Extensions" */ - 0, -799, /* 696: "", "Vertical_Forms" */ - 0, -829, /* 698: "", "Yijing_Hexagram_Symbols" */ - 0, -826, /* 700: "", "Yi_Radicals" */ - 0, -827, /* 702: "", "Yi_Syllables" */ - 529, -558, /* 704: "NR", "Not_Reordered" */ - 571, -593, /* 706: "OV", "Overlay" */ - 14, -46, /* 708: "ATBL", "Attached_Below_Left" */ - 13, -45, /* 710: "ATB", "Attached_Below" */ - 11, -43, /* 712: "ATA", "Attached_Above" */ - 12, -44, /* 714: "ATAR", "Attached_Above_Right" */ - 54, -65, /* 716: "BL", "Below_Left" */ - 49, -64, /* 718: "B", "Below" */ - 56, -66, /* 720: "BR", "Below_Right" */ - 400, -430, /* 722: "L", "Left" */ - 639, -646, /* 724: "R", "Right" */ - 4, -17, /* 726: "AL", "Above_Left" */ - 1, -16, /* 728: "A", "Above" */ - 7, -18, /* 730: "AR", "Above_Right" */ - 192, -216, /* 732: "DB", "Double_Below" */ - 191, -215, /* 734: "DA", "Double_Above" */ - 344, -360, /* 736: "IS", "Iota_Subscript" */ - 526, -560, /* 738: "NK", "Nukta" */ - 376, -381, /* 740: "KV", "Kana_Voicing" */ - 789, -800, /* 742: "VR", "Virama" */ - 124, 127, -852, /* 744: "Can", "Canonical", "can" */ - 229, 148, -860, /* 747: "Enc", "Circle", "enc" */ - 155, 166, -856, /* 750: "Com", "Compat", "com" */ - 253, 254, -861, /* 753: "Fin", "Final", "fin" */ - 257, -862, /* 756: "Font", "font" */ - 259, 260, -863, /* 758: "Fra", "Fraction", "fra" */ - 353, 354, -868, /* 761: "Init", "Initial", "init" */ - 361, 362, -870, /* 764: "Iso", "Isolated", "iso" */ - 484, 485, -876, /* 767: "Med", "Medial", "med" */ - 535, 536, -879, /* 770: "Nar", "Narrow", "nar" */ - 537, 550, -880, /* 773: "Nb", "Nobreak", "nb" */ - 553, -885, /* 776: "None", "none" */ - 701, 699, -895, /* 778: "Sml", "Small", "sml" */ - 711, 712, -897, /* 781: "Sqr", "Square", "sqr" */ - 713, -899, /* 784: "Sub", "sub" */ - 716, 717, -901, /* 786: "Sup", "Super", "sup" */ - 797, 798, -905, /* 789: "Vert", "Vertical", "vert" */ - 810, -906, /* 792: "Wide", "wide" */ - 1, -27, /* 794: "A", "Ambiguous" */ - 247, -262, /* 796: "F", "Fullwidth" */ - 295, -300, /* 798: "H", "Halfwidth" */ - 533, -536, /* 800: "Na", "Narrow" */ - 511, -539, /* 802: "N", "Neutral" */ - 803, -810, /* 804: "W", "Wide" */ - 482, -708, /* 806: "Mc", "Spacing_Mark" */ - 604, -168, /* 808: "Pc", "Connector_Punctuation" */ - 138, 170, -855, /* 810: "Cc", "Control", "cntrl" */ - 677, -179, /* 813: "Sc", "Currency_Symbol" */ - 605, -197, /* 815: "Pd", "Dash_Punctuation" */ - 538, 200, -857, /* 817: "Nd", "Decimal_Number", "digit" */ - 483, -234, /* 820: "Me", "Enclosing_Mark" */ - 606, -152, /* 822: "Pe", "Close_Punctuation" */ - 608, -255, /* 824: "Pf", "Final_Punctuation" */ - 139, -258, /* 826: "Cf", "Format" */ - 153, -778, /* 828: "Cn", "Unassigned" */ - 619, -355, /* 830: "Pi", "Initial_Punctuation" */ - 546, -438, /* 832: "Nl", "Letter_Number" */ - 838, -446, /* 834: "Zl", "Line_Separator" */ - 451, -458, /* 836: "Ll", "Lowercase_Letter" */ - 698, -477, /* 838: "Sm", "Math_Symbol" */ - 452, -500, /* 840: "Lm", "Modifier_Letter" */ - 697, -501, /* 842: "Sk", "Modifier_Symbol" */ - 499, -554, /* 844: "Mn", "Nonspacing_Mark" */ - 453, -588, /* 846: "Lo", "Other_Letter" */ - 547, -590, /* 848: "No", "Other_Number" */ - 621, -591, /* 850: "Po", "Other_Punctuation" */ - 702, -592, /* 852: "So", "Other_Symbol" */ - 840, -599, /* 854: "Zp", "Paragraph_Separator" */ - 154, -626, /* 856: "Co", "Private_Use" */ - 841, -706, /* 858: "Zs", "Space_Separator" */ - 629, -580, /* 860: "Ps", "Open_Punctuation" */ - 176, -725, /* 862: "Cs", "Surrogate" */ - 460, -767, /* 864: "Lt", "Titlecase_Letter" */ - 461, -786, /* 866: "Lu", "Uppercase_Letter" */ - 98, -587, /* 868: "C", "Other" */ - 401, -137, /* 870: "LC", "Cased_Letter" */ - 400, -437, /* 872: "L", "Letter" */ - 466, -475, /* 874: "M", "Mark" */ - 511, -561, /* 876: "N", "Number" */ - 594, 630, -889, /* 878: "P", "Punctuation", "punct" */ - 656, -729, /* 881: "S", "Symbol" */ - 832, -686, /* 883: "Z", "Separator" */ - 400, -429, /* 885: "L", "Leading_Jamo" */ - 409, -410, /* 887: "LVT", "LVT_Syllable" */ - 408, -411, /* 889: "LV", "LV_Syllable" */ - 512, -557, /* 891: "NA", "Not_Applicable" */ - 736, -770, /* 893: "T", "Trailing_Jamo" */ - 788, -802, /* 895: "V", "Vowel_Jamo" */ - 0, -21, /* 897: "", "Ain" */ - 0, -22, /* 899: "", "Alaph" */ - 0, -23, /* 901: "", "Alef" */ - 0, -63, /* 903: "", "Beh" */ - 0, -69, /* 905: "", "Beth" */ - 0, -96, /* 907: "", "Burushaski_Yeh_Barree" */ - 0, -194, /* 909: "", "Dal" */ - 0, -195, /* 911: "", "Dalath_Rish" */ - 0, -219, /* 913: "", "E" */ - 0, -250, /* 915: "", "Farsi_Yeh" */ - 0, -251, /* 917: "", "Fe" */ - 0, -252, /* 919: "", "Feh" */ - 0, -256, /* 921: "", "Final_Semkath" */ - 0, -265, /* 923: "", "Gaf" */ - 0, -266, /* 925: "", "Gamal" */ - 0, -299, /* 927: "", "Hah" */ - 0, -302, /* 929: "", "Hamza_On_Heh_Goal" */ - 0, -317, /* 931: "", "He" */ - 0, -320, /* 933: "", "Heh" */ - 0, -321, /* 935: "", "Heh_Goal" */ - 0, -322, /* 937: "", "Heth" */ - 0, -377, /* 939: "", "Kaf" */ - 0, -385, /* 941: "", "Kaph" */ - 0, -390, /* 943: "", "Khaph" */ - 0, -397, /* 945: "", "Knotted_Heh" */ - 0, -412, /* 947: "", "Lam" */ - 0, -413, /* 949: "", "Lamadh" */ - 0, -486, /* 951: "", "Meem" */ - 0, -492, /* 953: "", "Mim" */ - 0, -556, /* 955: "", "Noon" */ - 0, -549, /* 957: "", "No_Joining_Group" */ - 0, -566, /* 959: "", "Nun" */ - 0, -567, /* 961: "", "Nya" */ - 0, -606, /* 963: "", "Pe" */ - 0, -635, /* 965: "", "Qaf" */ - 0, -636, /* 967: "", "Qaph" */ - 0, -643, /* 969: "", "Reh" */ - 0, -645, /* 971: "", "Reversed_Pe" */ - 0, -669, /* 973: "", "Sad" */ - 0, -670, /* 975: "", "Sadhe" */ - 0, -679, /* 977: "", "Seen" */ - 0, -682, /* 979: "", "Semkath" */ - 0, -690, /* 981: "", "Shin" */ - 0, -726, /* 983: "", "Swash_Kaf" */ - 0, -733, /* 985: "", "Syriac_Waw" */ - 0, -741, /* 987: "", "Tah" */ - 0, -751, /* 989: "", "Taw" */ - 0, -752, /* 991: "", "Teh_Marbuta" */ - 0, -758, /* 993: "", "Teth" */ - 0, -808, /* 995: "", "Waw" */ - 0, -821, /* 997: "", "Yeh" */ - 0, -822, /* 999: "", "Yeh_Barree" */ - 0, -823, /* 1001: "", "Yeh_With_Tail" */ - 0, -830, /* 1003: "", "Yudh" */ - 0, -831, /* 1005: "", "Yudh_He" */ - 0, -835, /* 1007: "", "Zain" */ - 0, -836, /* 1009: "", "Zhain" */ - 190, -218, /* 1011: "D", "Dual_Joining" */ - 98, -370, /* 1013: "C", "Join_Causing" */ - 400, -431, /* 1015: "L", "Left_Joining" */ - 773, -551, /* 1017: "U", "Non_Joining" */ - 639, -647, /* 1019: "R", "Right_Joining" */ - 736, -771, /* 1021: "T", "Transparent" */ - 4, -25, /* 1023: "AL", "Alphabetic" */ - 3, -27, /* 1025: "AI", "Ambiguous" */ - 51, -88, /* 1027: "BA", "Break_After" */ - 52, -89, /* 1029: "BB", "Break_Before" */ - 50, -90, /* 1031: "B2", "Break_Both" */ - 668, -91, /* 1033: "SY", "Break_Symbols" */ - 116, -132, /* 1035: "CR", "Carriage_Return" */ - 115, -151, /* 1037: "CP", "Close_Parenthesis" */ - 112, -152, /* 1039: "CL", "Close_Punctuation" */ - 113, -160, /* 1041: "CM", "Combining_Mark" */ - 657, -167, /* 1043: "SA", "Complex_Context" */ - 99, -169, /* 1045: "CB", "Contingent_Break" */ - 223, -242, /* 1047: "EX", "Exclamation" */ - 264, -277, /* 1049: "GL", "Glue" */ - 296, -296, /* 1051: "H2", "H2" */ - 297, -297, /* 1053: "H3", "H3" */ - 298, -332, /* 1055: "HY", "Hyphen" */ - 333, -347, /* 1057: "ID", "Ideographic" */ - 344, -351, /* 1059: "IS", "Infix_Numeric" */ - 342, 358, -359, /* 1061: "IN", "Inseparable", "Inseperable" */ - 364, -364, /* 1064: "JL", "JL" */ - 365, -365, /* 1066: "JT", "JT" */ - 366, -366, /* 1068: "JV", "JV" */ - 403, -445, /* 1070: "LF", "Line_Feed" */ - 53, -473, /* 1072: "BK", "Mandatory_Break" */ - 528, -542, /* 1074: "NL", "Next_Line" */ - 530, -555, /* 1076: "NS", "Nonstarter" */ - 570, -580, /* 1078: "OP", "Open_Punctuation" */ - 596, -623, /* 1080: "PO", "Postfix_Numeric" */ - 598, -624, /* 1082: "PR", "Prefix_Numeric" */ - 632, -637, /* 1084: "QU", "Quotation" */ - 665, -705, /* 1086: "SP", "Space" */ - 663, -725, /* 1088: "SG", "Surrogate" */ - 817, -783, /* 1090: "XX", "Unknown" */ - 805, -812, /* 1092: "WJ", "Word_Joiner" */ - 833, -834, /* 1094: "ZW", "ZWSpace" */ - 198, -199, /* 1096: "De", "Decimal" */ - 209, -212, /* 1098: "Di", "Digit" */ - 553, -553, /* 1100: "None", "None" */ - 559, -563, /* 1102: "Nu", "Numeric" */ - 31, -32, /* 1104: "Arab", "Arabic" */ - 41, -39, /* 1106: "Armn", "Armenian" */ - 48, -47, /* 1108: "Avst", "Avestan" */ - 57, -58, /* 1110: "Bali", "Balinese" */ - 59, -60, /* 1112: "Bamu", "Bamum" */ - 62, -62, /* 1114: "Batk", "Batk" */ - 67, -68, /* 1116: "Beng", "Bengali" */ - 76, -76, /* 1118: "Blis", "Blis" */ - 614, -614, /* 1120: "Phlv", "Phlv" */ - 79, -80, /* 1122: "Bopo", "Bopomofo" */ - 84, -84, /* 1124: "Brah", "Brah" */ - 85, -86, /* 1126: "Brai", "Braille" */ - 92, -93, /* 1128: "Bugi", "Buginese" */ - 94, -95, /* 1130: "Buhd", "Buhid" */ - 129, -125, /* 1132: "Cans", "Canadian_Aboriginal" */ - 130, -131, /* 1134: "Cari", "Carian" */ - 123, -123, /* 1136: "Cakm", "Cakm" */ - 140, -140, /* 1138: "Cham", "Cham" */ - 146, -147, /* 1140: "Cher", "Cherokee" */ - 149, -149, /* 1142: "Cirt", "Cirt" */ - 844, -162, /* 1144: "Zyyy", "Common" */ - 172, 173, -633, /* 1146: "Copt", "Coptic", "Qaac" */ - 819, -177, /* 1149: "Xsux", "Cuneiform" */ - 175, -181, /* 1151: "Cprt", "Cypriot" */ - 188, -183, /* 1153: "Cyrl", "Cyrillic" */ - 225, -225, /* 1155: "Egyd", "Egyd" */ - 217, -205, /* 1157: "Dsrt", "Deseret" */ - 206, -207, /* 1159: "Deva", "Devanagari" */ - 735, -735, /* 1161: "Syrn", "Syrn" */ - 227, -228, /* 1163: "Egyp", "Egyptian_Hieroglyphs" */ - 731, -731, /* 1165: "Syre", "Syre" */ - 235, -236, /* 1167: "Ethi", "Ethiopic" */ - 272, -273, /* 1169: "Geor", "Georgian" */ - 275, -276, /* 1171: "Glag", "Glagolitic" */ - 278, -279, /* 1173: "Goth", "Gothic" */ - 290, -287, /* 1175: "Grek", "Greek" */ - 292, -291, /* 1177: "Gujr", "Gujarati" */ - 294, -293, /* 1179: "Guru", "Gurmukhi" */ - 312, -303, /* 1181: "Hani", "Han" */ - 304, -305, /* 1183: "Hang", "Hangul" */ - 313, -316, /* 1185: "Hano", "Hanunoo" */ - 350, -350, /* 1187: "Inds", "Inds" */ - 318, -319, /* 1189: "Hebr", "Hebrew" */ - 226, -226, /* 1191: "Egyh", "Egyh" */ - 327, -328, /* 1193: "Hira", "Hiragana" */ - 40, -349, /* 1195: "Armi", "Imperial_Aramaic" */ - 837, 352, -634, /* 1197: "Zinh", "Inherited", "Qaai" */ - 612, -356, /* 1200: "Phli", "Inscriptional_Pahlavi" */ - 628, -357, /* 1202: "Prti", "Inscriptional_Parthian" */ - 374, -374, /* 1204: "Jpan", "Jpan" */ - 367, -368, /* 1206: "Java", "Javanese" */ - 399, -378, /* 1208: "Kthi", "Kaithi" */ - 396, -384, /* 1210: "Knda", "Kannada" */ - 380, -386, /* 1212: "Kana", "Katakana" */ - 330, -387, /* 1214: "Hrkt", "Katakana_Or_Hiragana" */ - 379, -389, /* 1216: "Kali", "Kayah_Li" */ - 391, -392, /* 1218: "Khar", "Kharoshthi" */ - 395, -393, /* 1220: "Khmr", "Khmer" */ - 270, -270, /* 1222: "Geok", "Geok" */ - 398, -398, /* 1224: "Kore", "Kore" */ - 414, -743, /* 1226: "Lana", "Tai_Tham" */ - 416, -415, /* 1228: "Laoo", "Lao" */ - 427, -419, /* 1230: "Latn", "Latin" */ - 417, -417, /* 1232: "Latf", "Latf" */ - 418, -418, /* 1234: "Latg", "Latg" */ - 435, -436, /* 1236: "Lepc", "Lepcha" */ - 440, -441, /* 1238: "Limb", "Limbu" */ - 442, -442, /* 1240: "Lina", "Lina" */ - 443, -447, /* 1242: "Linb", "Linear_B" */ - 450, -450, /* 1244: "Lisu", "Lisu" */ - 462, -463, /* 1246: "Lyci", "Lycian" */ - 464, -465, /* 1248: "Lydi", "Lydian" */ - 498, -471, /* 1250: "Mlym", "Malayalam" */ - 472, -472, /* 1252: "Mand", "Mand" */ - 474, -474, /* 1254: "Mani", "Mani" */ - 839, -839, /* 1256: "Zmth", "Zmth" */ - 480, -480, /* 1258: "Maya", "Maya" */ - 506, -487, /* 1260: "Mtei", "Meetei_Mayek" */ - 488, -488, /* 1262: "Mero", "Mero" */ - 503, -504, /* 1264: "Mong", "Mongolian" */ - 505, -505, /* 1266: "Moon", "Moon" */ - 510, -508, /* 1268: "Mymr", "Myanmar" */ - 543, -543, /* 1270: "Nkgb", "Nkgb" */ - 747, -540, /* 1272: "Talu", "New_Tai_Lue" */ - 545, -544, /* 1274: "Nkoo", "Nko" */ - 572, -573, /* 1276: "Ogam", "Ogham" */ - 189, -189, /* 1278: "Cyrs", "Cyrs" */ - 331, -331, /* 1280: "Hung", "Hung" */ - 363, -576, /* 1282: "Ital", "Old_Italic" */ - 607, -607, /* 1284: "Perm", "Perm" */ - 818, -577, /* 1286: "Xpeo", "Old_Persian" */ - 674, -578, /* 1288: "Sarb", "Old_South_Arabian" */ - 575, -574, /* 1290: "Olck", "Ol_Chiki" */ - 584, -582, /* 1292: "Orya", "Oriya" */ - 583, -579, /* 1294: "Orkh", "Old_Turkic" */ - 585, -586, /* 1296: "Osma", "Osmanya" */ - 329, -329, /* 1298: "Hmng", "Hmng" */ - 609, -610, /* 1300: "Phag", "Phags_Pa" */ - 615, -616, /* 1302: "Phnx", "Phoenician" */ - 620, -620, /* 1304: "Plrd", "Plrd" */ - 613, -613, /* 1306: "Phlp", "Phlp" */ - 651, -644, /* 1308: "Rjng", "Rejang" */ - 652, -652, /* 1310: "Roro", "Roro" */ - 655, -654, /* 1312: "Runr", "Runic" */ - 672, -671, /* 1314: "Samr", "Samaritan" */ - 673, -673, /* 1316: "Sara", "Sara" */ - 675, -676, /* 1318: "Saur", "Saurashtra" */ - 689, -688, /* 1320: "Shaw", "Shavian" */ - 687, -687, /* 1322: "Sgnw", "Sgnw" */ - 314, -314, /* 1324: "Hans", "Hans" */ - 695, -696, /* 1326: "Sinh", "Sinhala" */ - 714, -715, /* 1328: "Sund", "Sundanese" */ - 727, -728, /* 1330: "Sylo", "Syloti_Nagri" */ - 842, -842, /* 1332: "Zsym", "Zsym" */ - 730, -732, /* 1334: "Syrc", "Syriac" */ - 760, -737, /* 1336: "Tglg", "Tagalog" */ - 738, -739, /* 1338: "Tagb", "Tagbanwa" */ - 746, -742, /* 1340: "Tale", "Tai_Le" */ - 750, -744, /* 1342: "Tavt", "Tai_Viet" */ - 749, -748, /* 1344: "Taml", "Tamil" */ - 753, -754, /* 1346: "Telu", "Telugu" */ - 755, -755, /* 1348: "Teng", "Teng" */ - 761, -762, /* 1350: "Thaa", "Thaana" */ - 763, -763, /* 1352: "Thai", "Thai" */ - 765, -764, /* 1354: "Tibt", "Tibetan" */ - 759, -766, /* 1356: "Tfng", "Tifinagh" */ - 315, -315, /* 1358: "Hant", "Hant" */ - 776, -777, /* 1360: "Ugar", "Ugaritic" */ - 845, -783, /* 1362: "Zzzz", "Unknown" */ - 843, -843, /* 1364: "Zxxx", "Zxxx" */ - 792, -791, /* 1366: "Vaii", "Vai" */ - 801, -801, /* 1368: "Visp", "Visp" */ - 734, -734, /* 1370: "Syrj", "Syrj" */ - 828, -825, /* 1372: "Yiii", "Yi" */ + 137, -137, /* 76: "Cased", "Cased" */ + 100, -135, /* 78: "CI", "Case_Ignorable" */ + 685, -136, /* 80: "Sensitive", "Case_Sensitive" */ + 118, -142, /* 82: "CWCF", "Changes_When_Casefolded" */ + 119, -143, /* 84: "CWCM", "Changes_When_Casemapped" */ + 121, -144, /* 86: "CWL", "Changes_When_Lowercased" */ + 120, -145, /* 88: "CWKCF", "Changes_When_NFKC_Casefolded" */ + 122, -146, /* 90: "CWT", "Changes_When_Titlecased" */ + 123, -147, /* 92: "CWU", "Changes_When_Uppercased" */ + 198, -198, /* 94: "Dash", "Dash" */ + 195, -204, /* 96: "DI", "Default_Ignorable_Code_Point" */ + 205, -206, /* 98: "Dep", "Deprecated" */ + 212, -213, /* 100: "Dia", "Diacritic" */ + 245, -248, /* 102: "Ext", "Extender" */ + 167, -263, /* 104: "Comp_Ex", "Full_Composition_Exclusion" */ + 282, -285, /* 106: "Gr_Base", "Grapheme_Base" */ + 283, -287, /* 108: "Gr_Ext", "Grapheme_Extend" */ + 284, -288, /* 110: "Gr_Link", "Grapheme_Link" */ + 325, -326, /* 112: "Hex", "Hex_Digit" */ + 334, -334, /* 114: "Hyphen", "Hyphen" */ + 348, -349, /* 116: "Ideo", "Ideographic" */ + 338, -340, /* 118: "IDSB", "IDS_Binary_Operator" */ + 339, -341, /* 120: "IDST", "IDS_Trinary_Operator" */ + 336, -342, /* 122: "IDC", "ID_Continue" */ + 337, -343, /* 124: "IDS", "ID_Start" */ + 371, -373, /* 126: "Join_C", "Join_Control" */ + 407, -456, /* 128: "LOE", "Logical_Order_Exception" */ + 458, -459, /* 130: "Lower", "Lowercase" */ + 478, -478, /* 132: "Math", "Math" */ + 883, -516, /* 134: "nfcinert", "NFC_Inert" */ + 884, -519, /* 136: "nfdinert", "NFD_Inert" */ + 885, -522, /* 138: "nfkcinert", "NFKC_Inert" */ + 886, -525, /* 140: "nfkdinert", "NFKD_Inert" */ + 515, -554, /* 142: "NChar", "Noncharacter_Code_Point" */ + 602, -604, /* 144: "Pat_Syn", "Pattern_Syntax" */ + 603, -605, /* 146: "Pat_WS", "Pattern_White_Space" */ + 0, -849, /* 148: "", "alnum" */ + 0, -851, /* 150: "", "blank" */ + 0, -868, /* 152: "", "graph" */ + 0, -890, /* 154: "", "print" */ + 0, -909, /* 156: "", "xdigit" */ + 633, -640, /* 158: "QMark", "Quotation_Mark" */ + 644, -644, /* 160: "Radical", "Radical" */ + 894, -683, /* 162: "segstart", "Segment_Starter" */ + 663, -705, /* 164: "SD", "Soft_Dotted" */ + 669, -669, /* 166: "STerm", "STerm" */ + 758, -759, /* 168: "Term", "Terminal_Punctuation" */ + 776, -784, /* 170: "UIdeo", "Unified_Ideograph" */ + 786, -787, /* 172: "Upper", "Uppercase" */ + 792, -795, /* 174: "VS", "Variation_Selector" */ + 809, 811, -898, /* 176: "WSpace", "White_Space", "space" */ + 815, -817, /* 179: "XIDC", "XID_Continue" */ + 816, -818, /* 181: "XIDS", "XID_Start" */ + 889, -567, /* 183: "nv", "Numeric_Value" */ + 850, -71, /* 185: "bc", "Bidi_Class" */ + 852, -77, /* 187: "blk", "Block" */ + 855, -129, /* 189: "ccc", "Canonical_Combining_Class" */ + 860, -203, /* 191: "dt", "Decomposition_Type" */ + 861, -226, /* 193: "ea", "East_Asian_Width" */ + 866, -269, /* 195: "gc", "General_Category" */ + 265, -286, /* 197: "GCB", "Grapheme_Cluster_Break" */ + 869, -312, /* 199: "hst", "Hangul_Syllable_Type" */ + 873, -374, /* 201: "jg", "Joining_Group" */ + 874, -375, /* 203: "jt", "Joining_Type" */ + 877, -430, /* 205: "lccc", "Lead_Canonical_Combining_Class" */ + 875, -446, /* 207: "lb", "Line_Break" */ + 517, -518, /* 209: "NFC_QC", "NFC_Quick_Check" */ + 520, -521, /* 211: "NFD_QC", "NFD_Quick_Check" */ + 523, -524, /* 213: "NFKC_QC", "NFKC_Quick_Check" */ + 526, -527, /* 215: "NFKD_QC", "NFKD_Quick_Check" */ + 888, -566, /* 217: "nt", "Numeric_Type" */ + 892, -680, /* 219: "sc", "Script" */ + 660, -686, /* 221: "SB", "Sentence_Break" */ + 905, -771, /* 223: "tccc", "Trail_Canonical_Combining_Class" */ + 806, -813, /* 225: "WB", "Word_Break" */ + 867, -270, /* 227: "gcm", "General_Category_Mask" */ + 848, -20, /* 229: "age", "Age" */ + 853, -75, /* 231: "bmg", "Bidi_Mirroring_Glyph" */ + 856, -134, /* 233: "cf", "Case_Folding" */ + 871, -347, /* 235: "isc", "ISO_Comment" */ + 876, -461, /* 237: "lc", "Lowercase_Mapping" */ + 879, -536, /* 239: "na", "Name" */ + 893, 693, -895, /* 241: "scf", "Simple_Case_Folding", "sfc" */ + 896, -694, /* 244: "slc", "Simple_Lowercase_Mapping" */ + 900, -695, /* 246: "stc", "Simple_Titlecase_Mapping" */ + 902, -696, /* 248: "suc", "Simple_Uppercase_Mapping" */ + 904, -770, /* 250: "tc", "Titlecase_Mapping" */ + 880, -781, /* 252: "na1", "Unicode_1_Name" */ + 906, -789, /* 254: "uc", "Uppercase_Mapping" */ + 6, -34, /* 256: "AN", "Arabic_Number" */ + 49, -601, /* 258: "B", "Paragraph_Separator" */ + 55, -82, /* 260: "BN", "Boundary_Neutral" */ + 117, -166, /* 262: "CS", "Common_Separator" */ + 533, -556, /* 264: "NSM", "Nonspacing_Mark" */ + 222, -241, /* 266: "EN", "European_Number" */ + 223, -242, /* 268: "ES", "European_Separator" */ + 224, -243, /* 270: "ET", "European_Terminator" */ + 402, -434, /* 272: "L", "Left_To_Right" */ + 408, -435, /* 274: "LRE", "Left_To_Right_Embedding" */ + 409, -436, /* 276: "LRO", "Left_To_Right_Override" */ + 571, -591, /* 278: "ON", "Other_Neutral" */ + 597, -624, /* 280: "PDF", "Pop_Directional_Format" */ + 641, -650, /* 282: "R", "Right_To_Left" */ + 4, -33, /* 284: "AL", "Arabic_Letter" */ + 642, -651, /* 286: "RLE", "Right_To_Left_Embedding" */ + 643, -652, /* 288: "RLO", "Right_To_Left_Override" */ + 658, -682, /* 290: "S", "Segment_Separator" */ + 808, -811, /* 292: "WS", "White_Space" */ + 513, 549, 249, -251, /* 294: "N", "No", "F", "False" */ + 822, 826, 738, -774, /* 298: "Y", "Yes", "T", "True" */ + 0, -19, /* 302: "", "Aegean_Numbers" */ + 0, -26, /* 304: "", "Alphabetic_Presentation_Forms" */ + 0, -28, /* 306: "", "Ancient_Greek_Musical_Notation" */ + 0, -29, /* 308: "", "Ancient_Greek_Numbers" */ + 0, -30, /* 310: "", "Ancient_Symbols" */ + 0, -32, /* 312: "", "Arabic" */ + 0, 36, -35, /* 314: "", "Arabic_Presentation_Forms_A", "Arabic_Presentation_Forms-A" */ + 0, -37, /* 317: "", "Arabic_Presentation_Forms_B" */ + 0, -38, /* 319: "", "Arabic_Supplement" */ + 0, -39, /* 321: "", "Armenian" */ + 0, -42, /* 323: "", "Arrows" */ + 0, -47, /* 325: "", "Avestan" */ + 0, -58, /* 327: "", "Balinese" */ + 0, -60, /* 329: "", "Bamum" */ + 0, 61, -8, /* 331: "", "Basic_Latin", "ASCII" */ + 0, -68, /* 334: "", "Bengali" */ + 0, -78, /* 336: "", "Block_Elements" */ + 0, -80, /* 338: "", "Bopomofo" */ + 0, -81, /* 340: "", "Bopomofo_Extended" */ + 0, -83, /* 342: "", "Box_Drawing" */ + 0, -87, /* 344: "", "Braille_Patterns" */ + 0, -93, /* 346: "", "Buginese" */ + 0, -95, /* 348: "", "Buhid" */ + 0, -97, /* 350: "", "Byzantine_Musical_Symbols" */ + 0, -132, /* 352: "", "Carian" */ + 0, -141, /* 354: "", "Cham" */ + 0, -149, /* 356: "", "Cherokee" */ + 0, -101, /* 358: "", "CJK_Compatibility" */ + 0, -102, /* 360: "", "CJK_Compatibility_Forms" */ + 0, -103, /* 362: "", "CJK_Compatibility_Ideographs" */ + 0, -104, /* 364: "", "CJK_Compatibility_Ideographs_Supplement" */ + 0, -105, /* 366: "", "CJK_Radicals_Supplement" */ + 0, -106, /* 368: "", "CJK_Strokes" */ + 0, -107, /* 370: "", "CJK_Symbols_And_Punctuation" */ + 0, -108, /* 372: "", "CJK_Unified_Ideographs" */ + 0, -109, /* 374: "", "CJK_Unified_Ideographs_Extension_A" */ + 0, -110, /* 376: "", "CJK_Unified_Ideographs_Extension_B" */ + 0, -111, /* 378: "", "CJK_Unified_Ideographs_Extension_C" */ + 0, -158, /* 380: "", "Combining_Diacritical_Marks" */ + 0, -160, /* 382: "", "Combining_Diacritical_Marks_Supplement" */ + 0, -161, /* 384: "", "Combining_Half_Marks" */ + 0, 159, -163, /* 386: "", "Combining_Diacritical_Marks_For_Symbols", "Combining_Marks_For_Symbols" */ + 0, -165, /* 389: "", "Common_Indic_Number_Forms" */ + 0, -173, /* 391: "", "Control_Pictures" */ + 0, -175, /* 393: "", "Coptic" */ + 0, -176, /* 395: "", "Counting_Rod_Numerals" */ + 0, -179, /* 397: "", "Cuneiform" */ + 0, -180, /* 399: "", "Cuneiform_Numbers_And_Punctuation" */ + 0, -182, /* 401: "", "Currency_Symbols" */ + 0, -184, /* 403: "", "Cypriot_Syllabary" */ + 0, -185, /* 405: "", "Cyrillic" */ + 0, -186, /* 407: "", "Cyrillic_Extended_A" */ + 0, -187, /* 409: "", "Cyrillic_Extended_B" */ + 0, 188, -189, /* 411: "", "Cyrillic_Supplement", "Cyrillic_Supplementary" */ + 0, -207, /* 414: "", "Deseret" */ + 0, -209, /* 416: "", "Devanagari" */ + 0, -210, /* 418: "", "Devanagari_Extended" */ + 0, -215, /* 420: "", "Dingbats" */ + 0, -216, /* 422: "", "Domino_Tiles" */ + 0, -230, /* 424: "", "Egyptian_Hieroglyphs" */ + 0, -233, /* 426: "", "Enclosed_Alphanumerics" */ + 0, -232, /* 428: "", "Enclosed_Alphanumeric_Supplement" */ + 0, -234, /* 430: "", "Enclosed_CJK_Letters_And_Months" */ + 0, -235, /* 432: "", "Enclosed_Ideographic_Supplement" */ + 0, -238, /* 434: "", "Ethiopic" */ + 0, -239, /* 436: "", "Ethiopic_Extended" */ + 0, -240, /* 438: "", "Ethiopic_Supplement" */ + 0, -271, /* 440: "", "General_Punctuation" */ + 0, -273, /* 442: "", "Geometric_Shapes" */ + 0, -275, /* 444: "", "Georgian" */ + 0, -276, /* 446: "", "Georgian_Supplement" */ + 0, -278, /* 448: "", "Glagolitic" */ + 0, -281, /* 450: "", "Gothic" */ + 0, 290, -289, /* 452: "", "Greek_And_Coptic", "Greek" */ + 0, -291, /* 455: "", "Greek_Extended" */ + 0, -293, /* 457: "", "Gujarati" */ + 0, -295, /* 459: "", "Gurmukhi" */ + 0, -303, /* 461: "", "Halfwidth_And_Fullwidth_Forms" */ + 0, -308, /* 463: "", "Hangul_Compatibility_Jamo" */ + 0, -309, /* 465: "", "Hangul_Jamo" */ + 0, -310, /* 467: "", "Hangul_Jamo_Extended_A" */ + 0, -311, /* 469: "", "Hangul_Jamo_Extended_B" */ + 0, -313, /* 471: "", "Hangul_Syllables" */ + 0, -318, /* 473: "", "Hanunoo" */ + 0, -321, /* 475: "", "Hebrew" */ + 0, -327, /* 477: "", "High_Private_Use_Surrogates" */ + 0, -328, /* 479: "", "High_Surrogates" */ + 0, -330, /* 481: "", "Hiragana" */ + 0, -350, /* 483: "", "Ideographic_Description_Characters" */ + 0, -351, /* 485: "", "Imperial_Aramaic" */ + 0, -358, /* 487: "", "Inscriptional_Pahlavi" */ + 0, -359, /* 489: "", "Inscriptional_Parthian" */ + 0, -345, /* 491: "", "IPA_Extensions" */ + 0, -370, /* 493: "", "Javanese" */ + 0, -380, /* 495: "", "Kaithi" */ + 0, -384, /* 497: "", "Kanbun" */ + 0, -385, /* 499: "", "Kangxi_Radicals" */ + 0, -386, /* 501: "", "Kannada" */ + 0, -388, /* 503: "", "Katakana" */ + 0, -390, /* 505: "", "Katakana_Phonetic_Extensions" */ + 0, -391, /* 507: "", "Kayah_Li" */ + 0, -394, /* 509: "", "Kharoshthi" */ + 0, -395, /* 511: "", "Khmer" */ + 0, -396, /* 513: "", "Khmer_Symbols" */ + 0, -417, /* 515: "", "Lao" */ + 0, 423, -422, /* 517: "", "Latin_1_Supplement", "Latin_1" */ + 0, -424, /* 520: "", "Latin_Extended_A" */ + 0, -425, /* 522: "", "Latin_Extended_Additional" */ + 0, -426, /* 524: "", "Latin_Extended_B" */ + 0, -427, /* 526: "", "Latin_Extended_C" */ + 0, -428, /* 528: "", "Latin_Extended_D" */ + 0, -438, /* 530: "", "Lepcha" */ + 0, -441, /* 532: "", "Letterlike_Symbols" */ + 0, -443, /* 534: "", "Limbu" */ + 0, -450, /* 536: "", "Linear_B_Ideograms" */ + 0, -451, /* 538: "", "Linear_B_Syllabary" */ + 0, -452, /* 540: "", "Lisu" */ + 0, -457, /* 542: "", "Low_Surrogates" */ + 0, -465, /* 544: "", "Lycian" */ + 0, -467, /* 546: "", "Lydian" */ + 0, -472, /* 548: "", "Mahjong_Tiles" */ + 0, -473, /* 550: "", "Malayalam" */ + 0, -480, /* 552: "", "Mathematical_Alphanumeric_Symbols" */ + 0, -481, /* 554: "", "Mathematical_Operators" */ + 0, -489, /* 556: "", "Meetei_Mayek" */ + 0, -495, /* 558: "", "Miscellaneous_Mathematical_Symbols_A" */ + 0, -496, /* 560: "", "Miscellaneous_Mathematical_Symbols_B" */ + 0, -497, /* 562: "", "Miscellaneous_Symbols" */ + 0, -498, /* 564: "", "Miscellaneous_Symbols_And_Arrows" */ + 0, -499, /* 566: "", "Miscellaneous_Technical" */ + 0, -504, /* 568: "", "Modifier_Tone_Letters" */ + 0, -506, /* 570: "", "Mongolian" */ + 0, -509, /* 572: "", "Musical_Symbols" */ + 0, -510, /* 574: "", "Myanmar" */ + 0, -511, /* 576: "", "Myanmar_Extended_A" */ + 0, -542, /* 578: "", "New_Tai_Lue" */ + 0, -529, /* 580: "", "NKo" */ + 0, -550, /* 582: "", "No_Block" */ + 0, -564, /* 584: "", "Number_Forms" */ + 0, -575, /* 586: "", "Ogham" */ + 0, -578, /* 588: "", "Old_Italic" */ + 0, -579, /* 590: "", "Old_Persian" */ + 0, -580, /* 592: "", "Old_South_Arabian" */ + 0, -581, /* 594: "", "Old_Turkic" */ + 0, -576, /* 596: "", "Ol_Chiki" */ + 0, -583, /* 598: "", "Optical_Character_Recognition" */ + 0, -584, /* 600: "", "Oriya" */ + 0, -588, /* 602: "", "Osmanya" */ + 0, -612, /* 604: "", "Phags_Pa" */ + 0, -613, /* 606: "", "Phaistos_Disc" */ + 0, -618, /* 608: "", "Phoenician" */ + 0, -619, /* 610: "", "Phonetic_Extensions" */ + 0, -620, /* 612: "", "Phonetic_Extensions_Supplement" */ + 0, 629, -628, /* 614: "", "Private_Use_Area", "Private_Use" */ + 0, -646, /* 617: "", "Rejang" */ + 0, -655, /* 619: "", "Rumi_Numeral_Symbols" */ + 0, -656, /* 621: "", "Runic" */ + 0, -673, /* 623: "", "Samaritan" */ + 0, -678, /* 625: "", "Saurashtra" */ + 0, -690, /* 627: "", "Shavian" */ + 0, -698, /* 629: "", "Sinhala" */ + 0, -702, /* 631: "", "Small_Form_Variants" */ + 0, -711, /* 633: "", "Spacing_Modifier_Letters" */ + 0, -712, /* 635: "", "Specials" */ + 0, -717, /* 637: "", "Sundanese" */ + 0, -720, /* 639: "", "Superscripts_And_Subscripts" */ + 0, -721, /* 641: "", "Supplemental_Arrows_A" */ + 0, -722, /* 643: "", "Supplemental_Arrows_B" */ + 0, -723, /* 645: "", "Supplemental_Mathematical_Operators" */ + 0, -724, /* 647: "", "Supplemental_Punctuation" */ + 0, -725, /* 649: "", "Supplementary_Private_Use_Area_A" */ + 0, -726, /* 651: "", "Supplementary_Private_Use_Area_B" */ + 0, -730, /* 653: "", "Syloti_Nagri" */ + 0, -734, /* 655: "", "Syriac" */ + 0, -739, /* 657: "", "Tagalog" */ + 0, -741, /* 659: "", "Tagbanwa" */ + 0, -742, /* 661: "", "Tags" */ + 0, -744, /* 663: "", "Tai_Le" */ + 0, -745, /* 665: "", "Tai_Tham" */ + 0, -746, /* 667: "", "Tai_Viet" */ + 0, -747, /* 669: "", "Tai_Xuan_Jing_Symbols" */ + 0, -750, /* 671: "", "Tamil" */ + 0, -756, /* 673: "", "Telugu" */ + 0, -764, /* 675: "", "Thaana" */ + 0, -765, /* 677: "", "Thai" */ + 0, -766, /* 679: "", "Tibetan" */ + 0, -768, /* 681: "", "Tifinagh" */ + 0, -779, /* 683: "", "Ugaritic" */ + 0, 782, -127, /* 685: "", "Unified_Canadian_Aboriginal_Syllabics", "Canadian_Syllabics" */ + 0, -783, /* 688: "", "Unified_Canadian_Aboriginal_Syllabics_Extended" */ + 0, -793, /* 690: "", "Vai" */ + 0, -796, /* 692: "", "Variation_Selectors" */ + 0, -797, /* 694: "", "Variation_Selectors_Supplement" */ + 0, -798, /* 696: "", "Vedic_Extensions" */ + 0, -801, /* 698: "", "Vertical_Forms" */ + 0, -831, /* 700: "", "Yijing_Hexagram_Symbols" */ + 0, -828, /* 702: "", "Yi_Radicals" */ + 0, -829, /* 704: "", "Yi_Syllables" */ + 531, -560, /* 706: "NR", "Not_Reordered" */ + 573, -595, /* 708: "OV", "Overlay" */ + 14, -46, /* 710: "ATBL", "Attached_Below_Left" */ + 13, -45, /* 712: "ATB", "Attached_Below" */ + 11, -43, /* 714: "ATA", "Attached_Above" */ + 12, -44, /* 716: "ATAR", "Attached_Above_Right" */ + 54, -65, /* 718: "BL", "Below_Left" */ + 49, -64, /* 720: "B", "Below" */ + 56, -66, /* 722: "BR", "Below_Right" */ + 402, -432, /* 724: "L", "Left" */ + 641, -648, /* 726: "R", "Right" */ + 4, -17, /* 728: "AL", "Above_Left" */ + 1, -16, /* 730: "A", "Above" */ + 7, -18, /* 732: "AR", "Above_Right" */ + 194, -218, /* 734: "DB", "Double_Below" */ + 193, -217, /* 736: "DA", "Double_Above" */ + 346, -362, /* 738: "IS", "Iota_Subscript" */ + 528, -562, /* 740: "NK", "Nukta" */ + 378, -383, /* 742: "KV", "Kana_Voicing" */ + 791, -802, /* 744: "VR", "Virama" */ + 125, 128, -854, /* 746: "Can", "Canonical", "can" */ + 231, 150, -862, /* 749: "Enc", "Circle", "enc" */ + 157, 168, -858, /* 752: "Com", "Compat", "com" */ + 255, 256, -863, /* 755: "Fin", "Final", "fin" */ + 259, -864, /* 758: "Font", "font" */ + 261, 262, -865, /* 760: "Fra", "Fraction", "fra" */ + 355, 356, -870, /* 763: "Init", "Initial", "init" */ + 363, 364, -872, /* 766: "Iso", "Isolated", "iso" */ + 486, 487, -878, /* 769: "Med", "Medial", "med" */ + 537, 538, -881, /* 772: "Nar", "Narrow", "nar" */ + 539, 552, -882, /* 775: "Nb", "Nobreak", "nb" */ + 555, -887, /* 778: "None", "none" */ + 703, 701, -897, /* 780: "Sml", "Small", "sml" */ + 713, 714, -899, /* 783: "Sqr", "Square", "sqr" */ + 715, -901, /* 786: "Sub", "sub" */ + 718, 719, -903, /* 788: "Sup", "Super", "sup" */ + 799, 800, -907, /* 791: "Vert", "Vertical", "vert" */ + 812, -908, /* 794: "Wide", "wide" */ + 1, -27, /* 796: "A", "Ambiguous" */ + 249, -264, /* 798: "F", "Fullwidth" */ + 297, -302, /* 800: "H", "Halfwidth" */ + 535, -538, /* 802: "Na", "Narrow" */ + 513, -541, /* 804: "N", "Neutral" */ + 805, -812, /* 806: "W", "Wide" */ + 484, -710, /* 808: "Mc", "Spacing_Mark" */ + 606, -170, /* 810: "Pc", "Connector_Punctuation" */ + 139, 172, -857, /* 812: "Cc", "Control", "cntrl" */ + 679, -181, /* 815: "Sc", "Currency_Symbol" */ + 607, -199, /* 817: "Pd", "Dash_Punctuation" */ + 540, 202, -859, /* 819: "Nd", "Decimal_Number", "digit" */ + 485, -236, /* 822: "Me", "Enclosing_Mark" */ + 608, -154, /* 824: "Pe", "Close_Punctuation" */ + 610, -257, /* 826: "Pf", "Final_Punctuation" */ + 140, -260, /* 828: "Cf", "Format" */ + 155, -780, /* 830: "Cn", "Unassigned" */ + 621, -357, /* 832: "Pi", "Initial_Punctuation" */ + 548, -440, /* 834: "Nl", "Letter_Number" */ + 840, -448, /* 836: "Zl", "Line_Separator" */ + 453, -460, /* 838: "Ll", "Lowercase_Letter" */ + 700, -479, /* 840: "Sm", "Math_Symbol" */ + 454, -502, /* 842: "Lm", "Modifier_Letter" */ + 699, -503, /* 844: "Sk", "Modifier_Symbol" */ + 501, -556, /* 846: "Mn", "Nonspacing_Mark" */ + 455, -590, /* 848: "Lo", "Other_Letter" */ + 549, -592, /* 850: "No", "Other_Number" */ + 623, -593, /* 852: "Po", "Other_Punctuation" */ + 704, -594, /* 854: "So", "Other_Symbol" */ + 842, -601, /* 856: "Zp", "Paragraph_Separator" */ + 156, -628, /* 858: "Co", "Private_Use" */ + 843, -708, /* 860: "Zs", "Space_Separator" */ + 631, -582, /* 862: "Ps", "Open_Punctuation" */ + 178, -727, /* 864: "Cs", "Surrogate" */ + 462, -769, /* 866: "Lt", "Titlecase_Letter" */ + 463, -788, /* 868: "Lu", "Uppercase_Letter" */ + 98, -589, /* 870: "C", "Other" */ + 403, -138, /* 872: "LC", "Cased_Letter" */ + 402, -439, /* 874: "L", "Letter" */ + 468, -477, /* 876: "M", "Mark" */ + 513, -563, /* 878: "N", "Number" */ + 596, 632, -891, /* 880: "P", "Punctuation", "punct" */ + 658, -731, /* 883: "S", "Symbol" */ + 834, -688, /* 885: "Z", "Separator" */ + 402, -431, /* 887: "L", "Leading_Jamo" */ + 411, -412, /* 889: "LVT", "LVT_Syllable" */ + 410, -413, /* 891: "LV", "LV_Syllable" */ + 514, -559, /* 893: "NA", "Not_Applicable" */ + 738, -772, /* 895: "T", "Trailing_Jamo" */ + 790, -804, /* 897: "V", "Vowel_Jamo" */ + 0, -21, /* 899: "", "Ain" */ + 0, -22, /* 901: "", "Alaph" */ + 0, -23, /* 903: "", "Alef" */ + 0, -63, /* 905: "", "Beh" */ + 0, -69, /* 907: "", "Beth" */ + 0, -96, /* 909: "", "Burushaski_Yeh_Barree" */ + 0, -196, /* 911: "", "Dal" */ + 0, -197, /* 913: "", "Dalath_Rish" */ + 0, -221, /* 915: "", "E" */ + 0, -252, /* 917: "", "Farsi_Yeh" */ + 0, -253, /* 919: "", "Fe" */ + 0, -254, /* 921: "", "Feh" */ + 0, -258, /* 923: "", "Final_Semkath" */ + 0, -267, /* 925: "", "Gaf" */ + 0, -268, /* 927: "", "Gamal" */ + 0, -301, /* 929: "", "Hah" */ + 0, -304, /* 931: "", "Hamza_On_Heh_Goal" */ + 0, -319, /* 933: "", "He" */ + 0, -322, /* 935: "", "Heh" */ + 0, -323, /* 937: "", "Heh_Goal" */ + 0, -324, /* 939: "", "Heth" */ + 0, -379, /* 941: "", "Kaf" */ + 0, -387, /* 943: "", "Kaph" */ + 0, -392, /* 945: "", "Khaph" */ + 0, -399, /* 947: "", "Knotted_Heh" */ + 0, -414, /* 949: "", "Lam" */ + 0, -415, /* 951: "", "Lamadh" */ + 0, -488, /* 953: "", "Meem" */ + 0, -494, /* 955: "", "Mim" */ + 0, -558, /* 957: "", "Noon" */ + 0, -551, /* 959: "", "No_Joining_Group" */ + 0, -568, /* 961: "", "Nun" */ + 0, -569, /* 963: "", "Nya" */ + 0, -608, /* 965: "", "Pe" */ + 0, -637, /* 967: "", "Qaf" */ + 0, -638, /* 969: "", "Qaph" */ + 0, -645, /* 971: "", "Reh" */ + 0, -647, /* 973: "", "Reversed_Pe" */ + 0, -671, /* 975: "", "Sad" */ + 0, -672, /* 977: "", "Sadhe" */ + 0, -681, /* 979: "", "Seen" */ + 0, -684, /* 981: "", "Semkath" */ + 0, -692, /* 983: "", "Shin" */ + 0, -728, /* 985: "", "Swash_Kaf" */ + 0, -735, /* 987: "", "Syriac_Waw" */ + 0, -743, /* 989: "", "Tah" */ + 0, -753, /* 991: "", "Taw" */ + 0, -754, /* 993: "", "Teh_Marbuta" */ + 0, -760, /* 995: "", "Teth" */ + 0, -810, /* 997: "", "Waw" */ + 0, -823, /* 999: "", "Yeh" */ + 0, -824, /* 1001: "", "Yeh_Barree" */ + 0, -825, /* 1003: "", "Yeh_With_Tail" */ + 0, -832, /* 1005: "", "Yudh" */ + 0, -833, /* 1007: "", "Yudh_He" */ + 0, -837, /* 1009: "", "Zain" */ + 0, -838, /* 1011: "", "Zhain" */ + 192, -220, /* 1013: "D", "Dual_Joining" */ + 98, -372, /* 1015: "C", "Join_Causing" */ + 402, -433, /* 1017: "L", "Left_Joining" */ + 775, -553, /* 1019: "U", "Non_Joining" */ + 641, -649, /* 1021: "R", "Right_Joining" */ + 738, -773, /* 1023: "T", "Transparent" */ + 4, -25, /* 1025: "AL", "Alphabetic" */ + 3, -27, /* 1027: "AI", "Ambiguous" */ + 51, -88, /* 1029: "BA", "Break_After" */ + 52, -89, /* 1031: "BB", "Break_Before" */ + 50, -90, /* 1033: "B2", "Break_Both" */ + 670, -91, /* 1035: "SY", "Break_Symbols" */ + 116, -133, /* 1037: "CR", "Carriage_Return" */ + 115, -153, /* 1039: "CP", "Close_Parenthesis" */ + 112, -154, /* 1041: "CL", "Close_Punctuation" */ + 113, -162, /* 1043: "CM", "Combining_Mark" */ + 659, -169, /* 1045: "SA", "Complex_Context" */ + 99, -171, /* 1047: "CB", "Contingent_Break" */ + 225, -244, /* 1049: "EX", "Exclamation" */ + 266, -279, /* 1051: "GL", "Glue" */ + 298, -298, /* 1053: "H2", "H2" */ + 299, -299, /* 1055: "H3", "H3" */ + 300, -334, /* 1057: "HY", "Hyphen" */ + 335, -349, /* 1059: "ID", "Ideographic" */ + 346, -353, /* 1061: "IS", "Infix_Numeric" */ + 344, 360, -361, /* 1063: "IN", "Inseparable", "Inseperable" */ + 366, -366, /* 1066: "JL", "JL" */ + 367, -367, /* 1068: "JT", "JT" */ + 368, -368, /* 1070: "JV", "JV" */ + 405, -447, /* 1072: "LF", "Line_Feed" */ + 53, -475, /* 1074: "BK", "Mandatory_Break" */ + 530, -544, /* 1076: "NL", "Next_Line" */ + 532, -557, /* 1078: "NS", "Nonstarter" */ + 572, -582, /* 1080: "OP", "Open_Punctuation" */ + 598, -625, /* 1082: "PO", "Postfix_Numeric" */ + 600, -626, /* 1084: "PR", "Prefix_Numeric" */ + 634, -639, /* 1086: "QU", "Quotation" */ + 667, -707, /* 1088: "SP", "Space" */ + 665, -727, /* 1090: "SG", "Surrogate" */ + 819, -785, /* 1092: "XX", "Unknown" */ + 807, -814, /* 1094: "WJ", "Word_Joiner" */ + 835, -836, /* 1096: "ZW", "ZWSpace" */ + 200, -201, /* 1098: "De", "Decimal" */ + 211, -214, /* 1100: "Di", "Digit" */ + 555, -555, /* 1102: "None", "None" */ + 561, -565, /* 1104: "Nu", "Numeric" */ + 31, -32, /* 1106: "Arab", "Arabic" */ + 41, -39, /* 1108: "Armn", "Armenian" */ + 48, -47, /* 1110: "Avst", "Avestan" */ + 57, -58, /* 1112: "Bali", "Balinese" */ + 59, -60, /* 1114: "Bamu", "Bamum" */ + 62, -62, /* 1116: "Batk", "Batk" */ + 67, -68, /* 1118: "Beng", "Bengali" */ + 76, -76, /* 1120: "Blis", "Blis" */ + 616, -616, /* 1122: "Phlv", "Phlv" */ + 79, -80, /* 1124: "Bopo", "Bopomofo" */ + 84, -84, /* 1126: "Brah", "Brah" */ + 85, -86, /* 1128: "Brai", "Braille" */ + 92, -93, /* 1130: "Bugi", "Buginese" */ + 94, -95, /* 1132: "Buhd", "Buhid" */ + 130, -126, /* 1134: "Cans", "Canadian_Aboriginal" */ + 131, -132, /* 1136: "Cari", "Carian" */ + 124, -124, /* 1138: "Cakm", "Cakm" */ + 141, -141, /* 1140: "Cham", "Cham" */ + 148, -149, /* 1142: "Cher", "Cherokee" */ + 151, -151, /* 1144: "Cirt", "Cirt" */ + 846, -164, /* 1146: "Zyyy", "Common" */ + 174, 175, -635, /* 1148: "Copt", "Coptic", "Qaac" */ + 821, -179, /* 1151: "Xsux", "Cuneiform" */ + 177, -183, /* 1153: "Cprt", "Cypriot" */ + 190, -185, /* 1155: "Cyrl", "Cyrillic" */ + 227, -227, /* 1157: "Egyd", "Egyd" */ + 219, -207, /* 1159: "Dsrt", "Deseret" */ + 208, -209, /* 1161: "Deva", "Devanagari" */ + 737, -737, /* 1163: "Syrn", "Syrn" */ + 229, -230, /* 1165: "Egyp", "Egyptian_Hieroglyphs" */ + 733, -733, /* 1167: "Syre", "Syre" */ + 237, -238, /* 1169: "Ethi", "Ethiopic" */ + 274, -275, /* 1171: "Geor", "Georgian" */ + 277, -278, /* 1173: "Glag", "Glagolitic" */ + 280, -281, /* 1175: "Goth", "Gothic" */ + 292, -289, /* 1177: "Grek", "Greek" */ + 294, -293, /* 1179: "Gujr", "Gujarati" */ + 296, -295, /* 1181: "Guru", "Gurmukhi" */ + 314, -305, /* 1183: "Hani", "Han" */ + 306, -307, /* 1185: "Hang", "Hangul" */ + 315, -318, /* 1187: "Hano", "Hanunoo" */ + 352, -352, /* 1189: "Inds", "Inds" */ + 320, -321, /* 1191: "Hebr", "Hebrew" */ + 228, -228, /* 1193: "Egyh", "Egyh" */ + 329, -330, /* 1195: "Hira", "Hiragana" */ + 40, -351, /* 1197: "Armi", "Imperial_Aramaic" */ + 839, 354, -636, /* 1199: "Zinh", "Inherited", "Qaai" */ + 614, -358, /* 1202: "Phli", "Inscriptional_Pahlavi" */ + 630, -359, /* 1204: "Prti", "Inscriptional_Parthian" */ + 376, -376, /* 1206: "Jpan", "Jpan" */ + 369, -370, /* 1208: "Java", "Javanese" */ + 401, -380, /* 1210: "Kthi", "Kaithi" */ + 398, -386, /* 1212: "Knda", "Kannada" */ + 382, -388, /* 1214: "Kana", "Katakana" */ + 332, -389, /* 1216: "Hrkt", "Katakana_Or_Hiragana" */ + 381, -391, /* 1218: "Kali", "Kayah_Li" */ + 393, -394, /* 1220: "Khar", "Kharoshthi" */ + 397, -395, /* 1222: "Khmr", "Khmer" */ + 272, -272, /* 1224: "Geok", "Geok" */ + 400, -400, /* 1226: "Kore", "Kore" */ + 416, -745, /* 1228: "Lana", "Tai_Tham" */ + 418, -417, /* 1230: "Laoo", "Lao" */ + 429, -421, /* 1232: "Latn", "Latin" */ + 419, -419, /* 1234: "Latf", "Latf" */ + 420, -420, /* 1236: "Latg", "Latg" */ + 437, -438, /* 1238: "Lepc", "Lepcha" */ + 442, -443, /* 1240: "Limb", "Limbu" */ + 444, -444, /* 1242: "Lina", "Lina" */ + 445, -449, /* 1244: "Linb", "Linear_B" */ + 452, -452, /* 1246: "Lisu", "Lisu" */ + 464, -465, /* 1248: "Lyci", "Lycian" */ + 466, -467, /* 1250: "Lydi", "Lydian" */ + 500, -473, /* 1252: "Mlym", "Malayalam" */ + 474, -474, /* 1254: "Mand", "Mand" */ + 476, -476, /* 1256: "Mani", "Mani" */ + 841, -841, /* 1258: "Zmth", "Zmth" */ + 482, -482, /* 1260: "Maya", "Maya" */ + 508, -489, /* 1262: "Mtei", "Meetei_Mayek" */ + 490, -490, /* 1264: "Mero", "Mero" */ + 505, -506, /* 1266: "Mong", "Mongolian" */ + 507, -507, /* 1268: "Moon", "Moon" */ + 512, -510, /* 1270: "Mymr", "Myanmar" */ + 545, -545, /* 1272: "Nkgb", "Nkgb" */ + 749, -542, /* 1274: "Talu", "New_Tai_Lue" */ + 547, -546, /* 1276: "Nkoo", "Nko" */ + 574, -575, /* 1278: "Ogam", "Ogham" */ + 191, -191, /* 1280: "Cyrs", "Cyrs" */ + 333, -333, /* 1282: "Hung", "Hung" */ + 365, -578, /* 1284: "Ital", "Old_Italic" */ + 609, -609, /* 1286: "Perm", "Perm" */ + 820, -579, /* 1288: "Xpeo", "Old_Persian" */ + 676, -580, /* 1290: "Sarb", "Old_South_Arabian" */ + 577, -576, /* 1292: "Olck", "Ol_Chiki" */ + 586, -584, /* 1294: "Orya", "Oriya" */ + 585, -581, /* 1296: "Orkh", "Old_Turkic" */ + 587, -588, /* 1298: "Osma", "Osmanya" */ + 331, -331, /* 1300: "Hmng", "Hmng" */ + 611, -612, /* 1302: "Phag", "Phags_Pa" */ + 617, -618, /* 1304: "Phnx", "Phoenician" */ + 622, -622, /* 1306: "Plrd", "Plrd" */ + 615, -615, /* 1308: "Phlp", "Phlp" */ + 653, -646, /* 1310: "Rjng", "Rejang" */ + 654, -654, /* 1312: "Roro", "Roro" */ + 657, -656, /* 1314: "Runr", "Runic" */ + 674, -673, /* 1316: "Samr", "Samaritan" */ + 675, -675, /* 1318: "Sara", "Sara" */ + 677, -678, /* 1320: "Saur", "Saurashtra" */ + 691, -690, /* 1322: "Shaw", "Shavian" */ + 689, -689, /* 1324: "Sgnw", "Sgnw" */ + 316, -316, /* 1326: "Hans", "Hans" */ + 697, -698, /* 1328: "Sinh", "Sinhala" */ + 716, -717, /* 1330: "Sund", "Sundanese" */ + 729, -730, /* 1332: "Sylo", "Syloti_Nagri" */ + 844, -844, /* 1334: "Zsym", "Zsym" */ + 732, -734, /* 1336: "Syrc", "Syriac" */ + 762, -739, /* 1338: "Tglg", "Tagalog" */ + 740, -741, /* 1340: "Tagb", "Tagbanwa" */ + 748, -744, /* 1342: "Tale", "Tai_Le" */ + 752, -746, /* 1344: "Tavt", "Tai_Viet" */ + 751, -750, /* 1346: "Taml", "Tamil" */ + 755, -756, /* 1348: "Telu", "Telugu" */ + 757, -757, /* 1350: "Teng", "Teng" */ + 763, -764, /* 1352: "Thaa", "Thaana" */ + 765, -765, /* 1354: "Thai", "Thai" */ + 767, -766, /* 1356: "Tibt", "Tibetan" */ + 761, -768, /* 1358: "Tfng", "Tifinagh" */ + 317, -317, /* 1360: "Hant", "Hant" */ + 778, -779, /* 1362: "Ugar", "Ugaritic" */ + 847, -785, /* 1364: "Zzzz", "Unknown" */ + 845, -845, /* 1366: "Zxxx", "Zxxx" */ + 794, -793, /* 1368: "Vaii", "Vai" */ + 803, -803, /* 1370: "Visp", "Visp" */ + 736, -736, /* 1372: "Syrj", "Syrj" */ + 830, -827, /* 1374: "Yiii", "Yi" */ }; #define MAX_NAMES_PER_GROUP 4 @@ -1704,699 +1707,699 @@ const Alias VALUES_WB[] = { const int32_t VALUES_bc_COUNT = 19; const Alias VALUES_bc[] = { - Alias((int32_t) U_ARABIC_NUMBER, 254), - Alias((int32_t) U_BLOCK_SEPARATOR, 256), - Alias((int32_t) U_BOUNDARY_NEUTRAL, 258), - Alias((int32_t) U_COMMON_NUMBER_SEPARATOR, 260), - Alias((int32_t) U_DIR_NON_SPACING_MARK, 262), - Alias((int32_t) U_EUROPEAN_NUMBER, 264), - Alias((int32_t) U_EUROPEAN_NUMBER_SEPARATOR, 266), - Alias((int32_t) U_EUROPEAN_NUMBER_TERMINATOR, 268), - Alias((int32_t) U_LEFT_TO_RIGHT, 270), - Alias((int32_t) U_LEFT_TO_RIGHT_EMBEDDING, 272), - Alias((int32_t) U_LEFT_TO_RIGHT_OVERRIDE, 274), - Alias((int32_t) U_OTHER_NEUTRAL, 276), - Alias((int32_t) U_POP_DIRECTIONAL_FORMAT, 278), - Alias((int32_t) U_RIGHT_TO_LEFT, 280), - Alias((int32_t) U_RIGHT_TO_LEFT_ARABIC, 282), - Alias((int32_t) U_RIGHT_TO_LEFT_EMBEDDING, 284), - Alias((int32_t) U_RIGHT_TO_LEFT_OVERRIDE, 286), - Alias((int32_t) U_SEGMENT_SEPARATOR, 288), - Alias((int32_t) U_WHITE_SPACE_NEUTRAL, 290), + Alias((int32_t) U_ARABIC_NUMBER, 256), + Alias((int32_t) U_BLOCK_SEPARATOR, 258), + Alias((int32_t) U_BOUNDARY_NEUTRAL, 260), + Alias((int32_t) U_COMMON_NUMBER_SEPARATOR, 262), + Alias((int32_t) U_DIR_NON_SPACING_MARK, 264), + Alias((int32_t) U_EUROPEAN_NUMBER, 266), + Alias((int32_t) U_EUROPEAN_NUMBER_SEPARATOR, 268), + Alias((int32_t) U_EUROPEAN_NUMBER_TERMINATOR, 270), + Alias((int32_t) U_LEFT_TO_RIGHT, 272), + Alias((int32_t) U_LEFT_TO_RIGHT_EMBEDDING, 274), + Alias((int32_t) U_LEFT_TO_RIGHT_OVERRIDE, 276), + Alias((int32_t) U_OTHER_NEUTRAL, 278), + Alias((int32_t) U_POP_DIRECTIONAL_FORMAT, 280), + Alias((int32_t) U_RIGHT_TO_LEFT, 282), + Alias((int32_t) U_RIGHT_TO_LEFT_ARABIC, 284), + Alias((int32_t) U_RIGHT_TO_LEFT_EMBEDDING, 286), + Alias((int32_t) U_RIGHT_TO_LEFT_OVERRIDE, 288), + Alias((int32_t) U_SEGMENT_SEPARATOR, 290), + Alias((int32_t) U_WHITE_SPACE_NEUTRAL, 292), }; const int32_t VALUES_binprop_COUNT = 2; const Alias VALUES_binprop[] = { - Alias((int32_t) 0, 292), - Alias((int32_t) 1, 296), + Alias((int32_t) 0, 294), + Alias((int32_t) 1, 298), }; const int32_t VALUES_blk_COUNT = 198; const Alias VALUES_blk[] = { - Alias((int32_t) UBLOCK_AEGEAN_NUMBERS, 300), - Alias((int32_t) UBLOCK_ALPHABETIC_PRESENTATION_FORMS, 302), - Alias((int32_t) UBLOCK_ANCIENT_GREEK_MUSICAL_NOTATION, 304), - Alias((int32_t) UBLOCK_ANCIENT_GREEK_NUMBERS, 306), - Alias((int32_t) UBLOCK_ANCIENT_SYMBOLS, 308), - Alias((int32_t) UBLOCK_ARABIC, 310), - Alias((int32_t) UBLOCK_ARABIC_PRESENTATION_FORMS_A, 312), - Alias((int32_t) UBLOCK_ARABIC_PRESENTATION_FORMS_B, 315), - Alias((int32_t) UBLOCK_ARABIC_SUPPLEMENT, 317), - Alias((int32_t) UBLOCK_ARMENIAN, 319), - Alias((int32_t) UBLOCK_ARROWS, 321), - Alias((int32_t) UBLOCK_AVESTAN, 323), - Alias((int32_t) UBLOCK_BALINESE, 325), - Alias((int32_t) UBLOCK_BAMUM, 327), - Alias((int32_t) UBLOCK_BASIC_LATIN, 329), - Alias((int32_t) UBLOCK_BENGALI, 332), - Alias((int32_t) UBLOCK_BLOCK_ELEMENTS, 334), - Alias((int32_t) UBLOCK_BOPOMOFO, 336), - Alias((int32_t) UBLOCK_BOPOMOFO_EXTENDED, 338), - Alias((int32_t) UBLOCK_BOX_DRAWING, 340), - Alias((int32_t) UBLOCK_BRAILLE_PATTERNS, 342), - Alias((int32_t) UBLOCK_BUGINESE, 344), - Alias((int32_t) UBLOCK_BUHID, 346), - Alias((int32_t) UBLOCK_BYZANTINE_MUSICAL_SYMBOLS, 348), - Alias((int32_t) UBLOCK_CARIAN, 350), - Alias((int32_t) UBLOCK_CHAM, 352), - Alias((int32_t) UBLOCK_CHEROKEE, 354), - Alias((int32_t) UBLOCK_CJK_COMPATIBILITY, 356), - Alias((int32_t) UBLOCK_CJK_COMPATIBILITY_FORMS, 358), - Alias((int32_t) UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS, 360), - Alias((int32_t) UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT, 362), - Alias((int32_t) UBLOCK_CJK_RADICALS_SUPPLEMENT, 364), - Alias((int32_t) UBLOCK_CJK_STROKES, 366), - Alias((int32_t) UBLOCK_CJK_SYMBOLS_AND_PUNCTUATION, 368), - Alias((int32_t) UBLOCK_CJK_UNIFIED_IDEOGRAPHS, 370), - Alias((int32_t) UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A, 372), - Alias((int32_t) UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B, 374), - Alias((int32_t) UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C, 376), - Alias((int32_t) UBLOCK_COMBINING_DIACRITICAL_MARKS, 378), - Alias((int32_t) UBLOCK_COMBINING_DIACRITICAL_MARKS_SUPPLEMENT, 380), - Alias((int32_t) UBLOCK_COMBINING_HALF_MARKS, 382), - Alias((int32_t) UBLOCK_COMBINING_MARKS_FOR_SYMBOLS, 384), - Alias((int32_t) UBLOCK_COMMON_INDIC_NUMBER_FORMS, 387), - Alias((int32_t) UBLOCK_CONTROL_PICTURES, 389), - Alias((int32_t) UBLOCK_COPTIC, 391), - Alias((int32_t) UBLOCK_COUNTING_ROD_NUMERALS, 393), - Alias((int32_t) UBLOCK_CUNEIFORM, 395), - Alias((int32_t) UBLOCK_CUNEIFORM_NUMBERS_AND_PUNCTUATION, 397), - Alias((int32_t) UBLOCK_CURRENCY_SYMBOLS, 399), - Alias((int32_t) UBLOCK_CYPRIOT_SYLLABARY, 401), - Alias((int32_t) UBLOCK_CYRILLIC, 403), - Alias((int32_t) UBLOCK_CYRILLIC_EXTENDED_A, 405), - Alias((int32_t) UBLOCK_CYRILLIC_EXTENDED_B, 407), - Alias((int32_t) UBLOCK_CYRILLIC_SUPPLEMENT, 409), - Alias((int32_t) UBLOCK_DESERET, 412), - Alias((int32_t) UBLOCK_DEVANAGARI, 414), - Alias((int32_t) UBLOCK_DEVANAGARI_EXTENDED, 416), - Alias((int32_t) UBLOCK_DINGBATS, 418), - Alias((int32_t) UBLOCK_DOMINO_TILES, 420), - Alias((int32_t) UBLOCK_EGYPTIAN_HIEROGLYPHS, 422), - Alias((int32_t) UBLOCK_ENCLOSED_ALPHANUMERICS, 424), - Alias((int32_t) UBLOCK_ENCLOSED_ALPHANUMERIC_SUPPLEMENT, 426), - Alias((int32_t) UBLOCK_ENCLOSED_CJK_LETTERS_AND_MONTHS, 428), - Alias((int32_t) UBLOCK_ENCLOSED_IDEOGRAPHIC_SUPPLEMENT, 430), - Alias((int32_t) UBLOCK_ETHIOPIC, 432), - Alias((int32_t) UBLOCK_ETHIOPIC_EXTENDED, 434), - Alias((int32_t) UBLOCK_ETHIOPIC_SUPPLEMENT, 436), - Alias((int32_t) UBLOCK_GENERAL_PUNCTUATION, 438), - Alias((int32_t) UBLOCK_GEOMETRIC_SHAPES, 440), - Alias((int32_t) UBLOCK_GEORGIAN, 442), - Alias((int32_t) UBLOCK_GEORGIAN_SUPPLEMENT, 444), - Alias((int32_t) UBLOCK_GLAGOLITIC, 446), - Alias((int32_t) UBLOCK_GOTHIC, 448), - Alias((int32_t) UBLOCK_GREEK, 450), - Alias((int32_t) UBLOCK_GREEK_EXTENDED, 453), - Alias((int32_t) UBLOCK_GUJARATI, 455), - Alias((int32_t) UBLOCK_GURMUKHI, 457), - Alias((int32_t) UBLOCK_HALFWIDTH_AND_FULLWIDTH_FORMS, 459), - Alias((int32_t) UBLOCK_HANGUL_COMPATIBILITY_JAMO, 461), - Alias((int32_t) UBLOCK_HANGUL_JAMO, 463), - Alias((int32_t) UBLOCK_HANGUL_JAMO_EXTENDED_A, 465), - Alias((int32_t) UBLOCK_HANGUL_JAMO_EXTENDED_B, 467), - Alias((int32_t) UBLOCK_HANGUL_SYLLABLES, 469), - Alias((int32_t) UBLOCK_HANUNOO, 471), - Alias((int32_t) UBLOCK_HEBREW, 473), - Alias((int32_t) UBLOCK_HIGH_PRIVATE_USE_SURROGATES, 475), - Alias((int32_t) UBLOCK_HIGH_SURROGATES, 477), - Alias((int32_t) UBLOCK_HIRAGANA, 479), - Alias((int32_t) UBLOCK_IDEOGRAPHIC_DESCRIPTION_CHARACTERS, 481), - Alias((int32_t) UBLOCK_IMPERIAL_ARAMAIC, 483), - Alias((int32_t) UBLOCK_INSCRIPTIONAL_PAHLAVI, 485), - Alias((int32_t) UBLOCK_INSCRIPTIONAL_PARTHIAN, 487), - Alias((int32_t) UBLOCK_IPA_EXTENSIONS, 489), - Alias((int32_t) UBLOCK_JAVANESE, 491), - Alias((int32_t) UBLOCK_KAITHI, 493), - Alias((int32_t) UBLOCK_KANBUN, 495), - Alias((int32_t) UBLOCK_KANGXI_RADICALS, 497), - Alias((int32_t) UBLOCK_KANNADA, 499), - Alias((int32_t) UBLOCK_KATAKANA, 501), - Alias((int32_t) UBLOCK_KATAKANA_PHONETIC_EXTENSIONS, 503), - Alias((int32_t) UBLOCK_KAYAH_LI, 505), - Alias((int32_t) UBLOCK_KHAROSHTHI, 507), - Alias((int32_t) UBLOCK_KHMER, 509), - Alias((int32_t) UBLOCK_KHMER_SYMBOLS, 511), - Alias((int32_t) UBLOCK_LAO, 513), - Alias((int32_t) UBLOCK_LATIN_1_SUPPLEMENT, 515), - Alias((int32_t) UBLOCK_LATIN_EXTENDED_A, 518), - Alias((int32_t) UBLOCK_LATIN_EXTENDED_ADDITIONAL, 520), - Alias((int32_t) UBLOCK_LATIN_EXTENDED_B, 522), - Alias((int32_t) UBLOCK_LATIN_EXTENDED_C, 524), - Alias((int32_t) UBLOCK_LATIN_EXTENDED_D, 526), - Alias((int32_t) UBLOCK_LEPCHA, 528), - Alias((int32_t) UBLOCK_LETTERLIKE_SYMBOLS, 530), - Alias((int32_t) UBLOCK_LIMBU, 532), - Alias((int32_t) UBLOCK_LINEAR_B_IDEOGRAMS, 534), - Alias((int32_t) UBLOCK_LINEAR_B_SYLLABARY, 536), - Alias((int32_t) UBLOCK_LISU, 538), - Alias((int32_t) UBLOCK_LOW_SURROGATES, 540), - Alias((int32_t) UBLOCK_LYCIAN, 542), - Alias((int32_t) UBLOCK_LYDIAN, 544), - Alias((int32_t) UBLOCK_MAHJONG_TILES, 546), - Alias((int32_t) UBLOCK_MALAYALAM, 548), - Alias((int32_t) UBLOCK_MATHEMATICAL_ALPHANUMERIC_SYMBOLS, 550), - Alias((int32_t) UBLOCK_MATHEMATICAL_OPERATORS, 552), - Alias((int32_t) UBLOCK_MEETEI_MAYEK, 554), - Alias((int32_t) UBLOCK_MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A, 556), - Alias((int32_t) UBLOCK_MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B, 558), - Alias((int32_t) UBLOCK_MISCELLANEOUS_SYMBOLS, 560), - Alias((int32_t) UBLOCK_MISCELLANEOUS_SYMBOLS_AND_ARROWS, 562), - Alias((int32_t) UBLOCK_MISCELLANEOUS_TECHNICAL, 564), - Alias((int32_t) UBLOCK_MODIFIER_TONE_LETTERS, 566), - Alias((int32_t) UBLOCK_MONGOLIAN, 568), - Alias((int32_t) UBLOCK_MUSICAL_SYMBOLS, 570), - Alias((int32_t) UBLOCK_MYANMAR, 572), - Alias((int32_t) UBLOCK_MYANMAR_EXTENDED_A, 574), - Alias((int32_t) UBLOCK_NEW_TAI_LUE, 576), - Alias((int32_t) UBLOCK_NKO, 578), - Alias((int32_t) UBLOCK_NO_BLOCK, 580), - Alias((int32_t) UBLOCK_NUMBER_FORMS, 582), - Alias((int32_t) UBLOCK_OGHAM, 584), - Alias((int32_t) UBLOCK_OLD_ITALIC, 586), - Alias((int32_t) UBLOCK_OLD_PERSIAN, 588), - Alias((int32_t) UBLOCK_OLD_SOUTH_ARABIAN, 590), - Alias((int32_t) UBLOCK_OLD_TURKIC, 592), - Alias((int32_t) UBLOCK_OL_CHIKI, 594), - Alias((int32_t) UBLOCK_OPTICAL_CHARACTER_RECOGNITION, 596), - Alias((int32_t) UBLOCK_ORIYA, 598), - Alias((int32_t) UBLOCK_OSMANYA, 600), - Alias((int32_t) UBLOCK_PHAGS_PA, 602), - Alias((int32_t) UBLOCK_PHAISTOS_DISC, 604), - Alias((int32_t) UBLOCK_PHOENICIAN, 606), - Alias((int32_t) UBLOCK_PHONETIC_EXTENSIONS, 608), - Alias((int32_t) UBLOCK_PHONETIC_EXTENSIONS_SUPPLEMENT, 610), - Alias((int32_t) UBLOCK_PRIVATE_USE_AREA, 612), - Alias((int32_t) UBLOCK_REJANG, 615), - Alias((int32_t) UBLOCK_RUMI_NUMERAL_SYMBOLS, 617), - Alias((int32_t) UBLOCK_RUNIC, 619), - Alias((int32_t) UBLOCK_SAMARITAN, 621), - Alias((int32_t) UBLOCK_SAURASHTRA, 623), - Alias((int32_t) UBLOCK_SHAVIAN, 625), - Alias((int32_t) UBLOCK_SINHALA, 627), - Alias((int32_t) UBLOCK_SMALL_FORM_VARIANTS, 629), - Alias((int32_t) UBLOCK_SPACING_MODIFIER_LETTERS, 631), - Alias((int32_t) UBLOCK_SPECIALS, 633), - Alias((int32_t) UBLOCK_SUNDANESE, 635), - Alias((int32_t) UBLOCK_SUPERSCRIPTS_AND_SUBSCRIPTS, 637), - Alias((int32_t) UBLOCK_SUPPLEMENTAL_ARROWS_A, 639), - Alias((int32_t) UBLOCK_SUPPLEMENTAL_ARROWS_B, 641), - Alias((int32_t) UBLOCK_SUPPLEMENTAL_MATHEMATICAL_OPERATORS, 643), - Alias((int32_t) UBLOCK_SUPPLEMENTAL_PUNCTUATION, 645), - Alias((int32_t) UBLOCK_SUPPLEMENTARY_PRIVATE_USE_AREA_A, 647), - Alias((int32_t) UBLOCK_SUPPLEMENTARY_PRIVATE_USE_AREA_B, 649), - Alias((int32_t) UBLOCK_SYLOTI_NAGRI, 651), - Alias((int32_t) UBLOCK_SYRIAC, 653), - Alias((int32_t) UBLOCK_TAGALOG, 655), - Alias((int32_t) UBLOCK_TAGBANWA, 657), - Alias((int32_t) UBLOCK_TAGS, 659), - Alias((int32_t) UBLOCK_TAI_LE, 661), - Alias((int32_t) UBLOCK_TAI_THAM, 663), - Alias((int32_t) UBLOCK_TAI_VIET, 665), - Alias((int32_t) UBLOCK_TAI_XUAN_JING_SYMBOLS, 667), - Alias((int32_t) UBLOCK_TAMIL, 669), - Alias((int32_t) UBLOCK_TELUGU, 671), - Alias((int32_t) UBLOCK_THAANA, 673), - Alias((int32_t) UBLOCK_THAI, 675), - Alias((int32_t) UBLOCK_TIBETAN, 677), - Alias((int32_t) UBLOCK_TIFINAGH, 679), - Alias((int32_t) UBLOCK_UGARITIC, 681), - Alias((int32_t) UBLOCK_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS, 683), - Alias((int32_t) UBLOCK_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED, 686), - Alias((int32_t) UBLOCK_VAI, 688), - Alias((int32_t) UBLOCK_VARIATION_SELECTORS, 690), - Alias((int32_t) UBLOCK_VARIATION_SELECTORS_SUPPLEMENT, 692), - Alias((int32_t) UBLOCK_VEDIC_EXTENSIONS, 694), - Alias((int32_t) UBLOCK_VERTICAL_FORMS, 696), - Alias((int32_t) UBLOCK_YIJING_HEXAGRAM_SYMBOLS, 698), - Alias((int32_t) UBLOCK_YI_RADICALS, 700), - Alias((int32_t) UBLOCK_YI_SYLLABLES, 702), + Alias((int32_t) UBLOCK_AEGEAN_NUMBERS, 302), + Alias((int32_t) UBLOCK_ALPHABETIC_PRESENTATION_FORMS, 304), + Alias((int32_t) UBLOCK_ANCIENT_GREEK_MUSICAL_NOTATION, 306), + Alias((int32_t) UBLOCK_ANCIENT_GREEK_NUMBERS, 308), + Alias((int32_t) UBLOCK_ANCIENT_SYMBOLS, 310), + Alias((int32_t) UBLOCK_ARABIC, 312), + Alias((int32_t) UBLOCK_ARABIC_PRESENTATION_FORMS_A, 314), + Alias((int32_t) UBLOCK_ARABIC_PRESENTATION_FORMS_B, 317), + Alias((int32_t) UBLOCK_ARABIC_SUPPLEMENT, 319), + Alias((int32_t) UBLOCK_ARMENIAN, 321), + Alias((int32_t) UBLOCK_ARROWS, 323), + Alias((int32_t) UBLOCK_AVESTAN, 325), + Alias((int32_t) UBLOCK_BALINESE, 327), + Alias((int32_t) UBLOCK_BAMUM, 329), + Alias((int32_t) UBLOCK_BASIC_LATIN, 331), + Alias((int32_t) UBLOCK_BENGALI, 334), + Alias((int32_t) UBLOCK_BLOCK_ELEMENTS, 336), + Alias((int32_t) UBLOCK_BOPOMOFO, 338), + Alias((int32_t) UBLOCK_BOPOMOFO_EXTENDED, 340), + Alias((int32_t) UBLOCK_BOX_DRAWING, 342), + Alias((int32_t) UBLOCK_BRAILLE_PATTERNS, 344), + Alias((int32_t) UBLOCK_BUGINESE, 346), + Alias((int32_t) UBLOCK_BUHID, 348), + Alias((int32_t) UBLOCK_BYZANTINE_MUSICAL_SYMBOLS, 350), + Alias((int32_t) UBLOCK_CARIAN, 352), + Alias((int32_t) UBLOCK_CHAM, 354), + Alias((int32_t) UBLOCK_CHEROKEE, 356), + Alias((int32_t) UBLOCK_CJK_COMPATIBILITY, 358), + Alias((int32_t) UBLOCK_CJK_COMPATIBILITY_FORMS, 360), + Alias((int32_t) UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS, 362), + Alias((int32_t) UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT, 364), + Alias((int32_t) UBLOCK_CJK_RADICALS_SUPPLEMENT, 366), + Alias((int32_t) UBLOCK_CJK_STROKES, 368), + Alias((int32_t) UBLOCK_CJK_SYMBOLS_AND_PUNCTUATION, 370), + Alias((int32_t) UBLOCK_CJK_UNIFIED_IDEOGRAPHS, 372), + Alias((int32_t) UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A, 374), + Alias((int32_t) UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B, 376), + Alias((int32_t) UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C, 378), + Alias((int32_t) UBLOCK_COMBINING_DIACRITICAL_MARKS, 380), + Alias((int32_t) UBLOCK_COMBINING_DIACRITICAL_MARKS_SUPPLEMENT, 382), + Alias((int32_t) UBLOCK_COMBINING_HALF_MARKS, 384), + Alias((int32_t) UBLOCK_COMBINING_MARKS_FOR_SYMBOLS, 386), + Alias((int32_t) UBLOCK_COMMON_INDIC_NUMBER_FORMS, 389), + Alias((int32_t) UBLOCK_CONTROL_PICTURES, 391), + Alias((int32_t) UBLOCK_COPTIC, 393), + Alias((int32_t) UBLOCK_COUNTING_ROD_NUMERALS, 395), + Alias((int32_t) UBLOCK_CUNEIFORM, 397), + Alias((int32_t) UBLOCK_CUNEIFORM_NUMBERS_AND_PUNCTUATION, 399), + Alias((int32_t) UBLOCK_CURRENCY_SYMBOLS, 401), + Alias((int32_t) UBLOCK_CYPRIOT_SYLLABARY, 403), + Alias((int32_t) UBLOCK_CYRILLIC, 405), + Alias((int32_t) UBLOCK_CYRILLIC_EXTENDED_A, 407), + Alias((int32_t) UBLOCK_CYRILLIC_EXTENDED_B, 409), + Alias((int32_t) UBLOCK_CYRILLIC_SUPPLEMENT, 411), + Alias((int32_t) UBLOCK_DESERET, 414), + Alias((int32_t) UBLOCK_DEVANAGARI, 416), + Alias((int32_t) UBLOCK_DEVANAGARI_EXTENDED, 418), + Alias((int32_t) UBLOCK_DINGBATS, 420), + Alias((int32_t) UBLOCK_DOMINO_TILES, 422), + Alias((int32_t) UBLOCK_EGYPTIAN_HIEROGLYPHS, 424), + Alias((int32_t) UBLOCK_ENCLOSED_ALPHANUMERICS, 426), + Alias((int32_t) UBLOCK_ENCLOSED_ALPHANUMERIC_SUPPLEMENT, 428), + Alias((int32_t) UBLOCK_ENCLOSED_CJK_LETTERS_AND_MONTHS, 430), + Alias((int32_t) UBLOCK_ENCLOSED_IDEOGRAPHIC_SUPPLEMENT, 432), + Alias((int32_t) UBLOCK_ETHIOPIC, 434), + Alias((int32_t) UBLOCK_ETHIOPIC_EXTENDED, 436), + Alias((int32_t) UBLOCK_ETHIOPIC_SUPPLEMENT, 438), + Alias((int32_t) UBLOCK_GENERAL_PUNCTUATION, 440), + Alias((int32_t) UBLOCK_GEOMETRIC_SHAPES, 442), + Alias((int32_t) UBLOCK_GEORGIAN, 444), + Alias((int32_t) UBLOCK_GEORGIAN_SUPPLEMENT, 446), + Alias((int32_t) UBLOCK_GLAGOLITIC, 448), + Alias((int32_t) UBLOCK_GOTHIC, 450), + Alias((int32_t) UBLOCK_GREEK, 452), + Alias((int32_t) UBLOCK_GREEK_EXTENDED, 455), + Alias((int32_t) UBLOCK_GUJARATI, 457), + Alias((int32_t) UBLOCK_GURMUKHI, 459), + Alias((int32_t) UBLOCK_HALFWIDTH_AND_FULLWIDTH_FORMS, 461), + Alias((int32_t) UBLOCK_HANGUL_COMPATIBILITY_JAMO, 463), + Alias((int32_t) UBLOCK_HANGUL_JAMO, 465), + Alias((int32_t) UBLOCK_HANGUL_JAMO_EXTENDED_A, 467), + Alias((int32_t) UBLOCK_HANGUL_JAMO_EXTENDED_B, 469), + Alias((int32_t) UBLOCK_HANGUL_SYLLABLES, 471), + Alias((int32_t) UBLOCK_HANUNOO, 473), + Alias((int32_t) UBLOCK_HEBREW, 475), + Alias((int32_t) UBLOCK_HIGH_PRIVATE_USE_SURROGATES, 477), + Alias((int32_t) UBLOCK_HIGH_SURROGATES, 479), + Alias((int32_t) UBLOCK_HIRAGANA, 481), + Alias((int32_t) UBLOCK_IDEOGRAPHIC_DESCRIPTION_CHARACTERS, 483), + Alias((int32_t) UBLOCK_IMPERIAL_ARAMAIC, 485), + Alias((int32_t) UBLOCK_INSCRIPTIONAL_PAHLAVI, 487), + Alias((int32_t) UBLOCK_INSCRIPTIONAL_PARTHIAN, 489), + Alias((int32_t) UBLOCK_IPA_EXTENSIONS, 491), + Alias((int32_t) UBLOCK_JAVANESE, 493), + Alias((int32_t) UBLOCK_KAITHI, 495), + Alias((int32_t) UBLOCK_KANBUN, 497), + Alias((int32_t) UBLOCK_KANGXI_RADICALS, 499), + Alias((int32_t) UBLOCK_KANNADA, 501), + Alias((int32_t) UBLOCK_KATAKANA, 503), + Alias((int32_t) UBLOCK_KATAKANA_PHONETIC_EXTENSIONS, 505), + Alias((int32_t) UBLOCK_KAYAH_LI, 507), + Alias((int32_t) UBLOCK_KHAROSHTHI, 509), + Alias((int32_t) UBLOCK_KHMER, 511), + Alias((int32_t) UBLOCK_KHMER_SYMBOLS, 513), + Alias((int32_t) UBLOCK_LAO, 515), + Alias((int32_t) UBLOCK_LATIN_1_SUPPLEMENT, 517), + Alias((int32_t) UBLOCK_LATIN_EXTENDED_A, 520), + Alias((int32_t) UBLOCK_LATIN_EXTENDED_ADDITIONAL, 522), + Alias((int32_t) UBLOCK_LATIN_EXTENDED_B, 524), + Alias((int32_t) UBLOCK_LATIN_EXTENDED_C, 526), + Alias((int32_t) UBLOCK_LATIN_EXTENDED_D, 528), + Alias((int32_t) UBLOCK_LEPCHA, 530), + Alias((int32_t) UBLOCK_LETTERLIKE_SYMBOLS, 532), + Alias((int32_t) UBLOCK_LIMBU, 534), + Alias((int32_t) UBLOCK_LINEAR_B_IDEOGRAMS, 536), + Alias((int32_t) UBLOCK_LINEAR_B_SYLLABARY, 538), + Alias((int32_t) UBLOCK_LISU, 540), + Alias((int32_t) UBLOCK_LOW_SURROGATES, 542), + Alias((int32_t) UBLOCK_LYCIAN, 544), + Alias((int32_t) UBLOCK_LYDIAN, 546), + Alias((int32_t) UBLOCK_MAHJONG_TILES, 548), + Alias((int32_t) UBLOCK_MALAYALAM, 550), + Alias((int32_t) UBLOCK_MATHEMATICAL_ALPHANUMERIC_SYMBOLS, 552), + Alias((int32_t) UBLOCK_MATHEMATICAL_OPERATORS, 554), + Alias((int32_t) UBLOCK_MEETEI_MAYEK, 556), + Alias((int32_t) UBLOCK_MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A, 558), + Alias((int32_t) UBLOCK_MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B, 560), + Alias((int32_t) UBLOCK_MISCELLANEOUS_SYMBOLS, 562), + Alias((int32_t) UBLOCK_MISCELLANEOUS_SYMBOLS_AND_ARROWS, 564), + Alias((int32_t) UBLOCK_MISCELLANEOUS_TECHNICAL, 566), + Alias((int32_t) UBLOCK_MODIFIER_TONE_LETTERS, 568), + Alias((int32_t) UBLOCK_MONGOLIAN, 570), + Alias((int32_t) UBLOCK_MUSICAL_SYMBOLS, 572), + Alias((int32_t) UBLOCK_MYANMAR, 574), + Alias((int32_t) UBLOCK_MYANMAR_EXTENDED_A, 576), + Alias((int32_t) UBLOCK_NEW_TAI_LUE, 578), + Alias((int32_t) UBLOCK_NKO, 580), + Alias((int32_t) UBLOCK_NO_BLOCK, 582), + Alias((int32_t) UBLOCK_NUMBER_FORMS, 584), + Alias((int32_t) UBLOCK_OGHAM, 586), + Alias((int32_t) UBLOCK_OLD_ITALIC, 588), + Alias((int32_t) UBLOCK_OLD_PERSIAN, 590), + Alias((int32_t) UBLOCK_OLD_SOUTH_ARABIAN, 592), + Alias((int32_t) UBLOCK_OLD_TURKIC, 594), + Alias((int32_t) UBLOCK_OL_CHIKI, 596), + Alias((int32_t) UBLOCK_OPTICAL_CHARACTER_RECOGNITION, 598), + Alias((int32_t) UBLOCK_ORIYA, 600), + Alias((int32_t) UBLOCK_OSMANYA, 602), + Alias((int32_t) UBLOCK_PHAGS_PA, 604), + Alias((int32_t) UBLOCK_PHAISTOS_DISC, 606), + Alias((int32_t) UBLOCK_PHOENICIAN, 608), + Alias((int32_t) UBLOCK_PHONETIC_EXTENSIONS, 610), + Alias((int32_t) UBLOCK_PHONETIC_EXTENSIONS_SUPPLEMENT, 612), + Alias((int32_t) UBLOCK_PRIVATE_USE_AREA, 614), + Alias((int32_t) UBLOCK_REJANG, 617), + Alias((int32_t) UBLOCK_RUMI_NUMERAL_SYMBOLS, 619), + Alias((int32_t) UBLOCK_RUNIC, 621), + Alias((int32_t) UBLOCK_SAMARITAN, 623), + Alias((int32_t) UBLOCK_SAURASHTRA, 625), + Alias((int32_t) UBLOCK_SHAVIAN, 627), + Alias((int32_t) UBLOCK_SINHALA, 629), + Alias((int32_t) UBLOCK_SMALL_FORM_VARIANTS, 631), + Alias((int32_t) UBLOCK_SPACING_MODIFIER_LETTERS, 633), + Alias((int32_t) UBLOCK_SPECIALS, 635), + Alias((int32_t) UBLOCK_SUNDANESE, 637), + Alias((int32_t) UBLOCK_SUPERSCRIPTS_AND_SUBSCRIPTS, 639), + Alias((int32_t) UBLOCK_SUPPLEMENTAL_ARROWS_A, 641), + Alias((int32_t) UBLOCK_SUPPLEMENTAL_ARROWS_B, 643), + Alias((int32_t) UBLOCK_SUPPLEMENTAL_MATHEMATICAL_OPERATORS, 645), + Alias((int32_t) UBLOCK_SUPPLEMENTAL_PUNCTUATION, 647), + Alias((int32_t) UBLOCK_SUPPLEMENTARY_PRIVATE_USE_AREA_A, 649), + Alias((int32_t) UBLOCK_SUPPLEMENTARY_PRIVATE_USE_AREA_B, 651), + Alias((int32_t) UBLOCK_SYLOTI_NAGRI, 653), + Alias((int32_t) UBLOCK_SYRIAC, 655), + Alias((int32_t) UBLOCK_TAGALOG, 657), + Alias((int32_t) UBLOCK_TAGBANWA, 659), + Alias((int32_t) UBLOCK_TAGS, 661), + Alias((int32_t) UBLOCK_TAI_LE, 663), + Alias((int32_t) UBLOCK_TAI_THAM, 665), + Alias((int32_t) UBLOCK_TAI_VIET, 667), + Alias((int32_t) UBLOCK_TAI_XUAN_JING_SYMBOLS, 669), + Alias((int32_t) UBLOCK_TAMIL, 671), + Alias((int32_t) UBLOCK_TELUGU, 673), + Alias((int32_t) UBLOCK_THAANA, 675), + Alias((int32_t) UBLOCK_THAI, 677), + Alias((int32_t) UBLOCK_TIBETAN, 679), + Alias((int32_t) UBLOCK_TIFINAGH, 681), + Alias((int32_t) UBLOCK_UGARITIC, 683), + Alias((int32_t) UBLOCK_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS, 685), + Alias((int32_t) UBLOCK_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED, 688), + Alias((int32_t) UBLOCK_VAI, 690), + Alias((int32_t) UBLOCK_VARIATION_SELECTORS, 692), + Alias((int32_t) UBLOCK_VARIATION_SELECTORS_SUPPLEMENT, 694), + Alias((int32_t) UBLOCK_VEDIC_EXTENSIONS, 696), + Alias((int32_t) UBLOCK_VERTICAL_FORMS, 698), + Alias((int32_t) UBLOCK_YIJING_HEXAGRAM_SYMBOLS, 700), + Alias((int32_t) UBLOCK_YI_RADICALS, 702), + Alias((int32_t) UBLOCK_YI_SYLLABLES, 704), }; const int32_t VALUES_ccc_COUNT = 20; const Alias VALUES_ccc[] = { - Alias((int32_t) 0, 704), - Alias((int32_t) 1, 706), - Alias((int32_t) 200, 708), - Alias((int32_t) 202, 710), - Alias((int32_t) 214, 712), - Alias((int32_t) 216, 714), - Alias((int32_t) 218, 716), - Alias((int32_t) 220, 718), - Alias((int32_t) 222, 720), - Alias((int32_t) 224, 722), - Alias((int32_t) 226, 724), - Alias((int32_t) 228, 726), - Alias((int32_t) 230, 728), - Alias((int32_t) 232, 730), - Alias((int32_t) 233, 732), - Alias((int32_t) 234, 734), - Alias((int32_t) 240, 736), - Alias((int32_t) 7, 738), - Alias((int32_t) 8, 740), - Alias((int32_t) 9, 742), + Alias((int32_t) 0, 706), + Alias((int32_t) 1, 708), + Alias((int32_t) 200, 710), + Alias((int32_t) 202, 712), + Alias((int32_t) 214, 714), + Alias((int32_t) 216, 716), + Alias((int32_t) 218, 718), + Alias((int32_t) 220, 720), + Alias((int32_t) 222, 722), + Alias((int32_t) 224, 724), + Alias((int32_t) 226, 726), + Alias((int32_t) 228, 728), + Alias((int32_t) 230, 730), + Alias((int32_t) 232, 732), + Alias((int32_t) 233, 734), + Alias((int32_t) 234, 736), + Alias((int32_t) 240, 738), + Alias((int32_t) 7, 740), + Alias((int32_t) 8, 742), + Alias((int32_t) 9, 744), }; const int32_t VALUES_dt_COUNT = 18; const Alias VALUES_dt[] = { - Alias((int32_t) U_DT_CANONICAL, 744), - Alias((int32_t) U_DT_CIRCLE, 747), - Alias((int32_t) U_DT_COMPAT, 750), - Alias((int32_t) U_DT_FINAL, 753), - Alias((int32_t) U_DT_FONT, 756), - Alias((int32_t) U_DT_FRACTION, 758), - Alias((int32_t) U_DT_INITIAL, 761), - Alias((int32_t) U_DT_ISOLATED, 764), - Alias((int32_t) U_DT_MEDIAL, 767), - Alias((int32_t) U_DT_NARROW, 770), - Alias((int32_t) U_DT_NOBREAK, 773), - Alias((int32_t) U_DT_NONE, 776), - Alias((int32_t) U_DT_SMALL, 778), - Alias((int32_t) U_DT_SQUARE, 781), - Alias((int32_t) U_DT_SUB, 784), - Alias((int32_t) U_DT_SUPER, 786), - Alias((int32_t) U_DT_VERTICAL, 789), - Alias((int32_t) U_DT_WIDE, 792), + Alias((int32_t) U_DT_CANONICAL, 746), + Alias((int32_t) U_DT_CIRCLE, 749), + Alias((int32_t) U_DT_COMPAT, 752), + Alias((int32_t) U_DT_FINAL, 755), + Alias((int32_t) U_DT_FONT, 758), + Alias((int32_t) U_DT_FRACTION, 760), + Alias((int32_t) U_DT_INITIAL, 763), + Alias((int32_t) U_DT_ISOLATED, 766), + Alias((int32_t) U_DT_MEDIAL, 769), + Alias((int32_t) U_DT_NARROW, 772), + Alias((int32_t) U_DT_NOBREAK, 775), + Alias((int32_t) U_DT_NONE, 778), + Alias((int32_t) U_DT_SMALL, 780), + Alias((int32_t) U_DT_SQUARE, 783), + Alias((int32_t) U_DT_SUB, 786), + Alias((int32_t) U_DT_SUPER, 788), + Alias((int32_t) U_DT_VERTICAL, 791), + Alias((int32_t) U_DT_WIDE, 794), }; const int32_t VALUES_ea_COUNT = 6; const Alias VALUES_ea[] = { - Alias((int32_t) U_EA_AMBIGUOUS, 794), - Alias((int32_t) U_EA_FULLWIDTH, 796), - Alias((int32_t) U_EA_HALFWIDTH, 798), - Alias((int32_t) U_EA_NARROW, 800), - Alias((int32_t) U_EA_NEUTRAL, 802), - Alias((int32_t) U_EA_WIDE, 804), + Alias((int32_t) U_EA_AMBIGUOUS, 796), + Alias((int32_t) U_EA_FULLWIDTH, 798), + Alias((int32_t) U_EA_HALFWIDTH, 800), + Alias((int32_t) U_EA_NARROW, 802), + Alias((int32_t) U_EA_NEUTRAL, 804), + Alias((int32_t) U_EA_WIDE, 806), }; const int32_t VALUES_gc_COUNT = 30; const Alias VALUES_gc[] = { - Alias((int32_t) U_COMBINING_SPACING_MARK, 806), - Alias((int32_t) U_CONNECTOR_PUNCTUATION, 808), - Alias((int32_t) U_CONTROL_CHAR, 810), - Alias((int32_t) U_CURRENCY_SYMBOL, 813), - Alias((int32_t) U_DASH_PUNCTUATION, 815), - Alias((int32_t) U_DECIMAL_DIGIT_NUMBER, 817), - Alias((int32_t) U_ENCLOSING_MARK, 820), - Alias((int32_t) U_END_PUNCTUATION, 822), - Alias((int32_t) U_FINAL_PUNCTUATION, 824), - Alias((int32_t) U_FORMAT_CHAR, 826), - Alias((int32_t) U_GENERAL_OTHER_TYPES, 828), - Alias((int32_t) U_INITIAL_PUNCTUATION, 830), - Alias((int32_t) U_LETTER_NUMBER, 832), - Alias((int32_t) U_LINE_SEPARATOR, 834), - Alias((int32_t) U_LOWERCASE_LETTER, 836), - Alias((int32_t) U_MATH_SYMBOL, 838), - Alias((int32_t) U_MODIFIER_LETTER, 840), - Alias((int32_t) U_MODIFIER_SYMBOL, 842), - Alias((int32_t) U_NON_SPACING_MARK, 844), - Alias((int32_t) U_OTHER_LETTER, 846), - Alias((int32_t) U_OTHER_NUMBER, 848), - Alias((int32_t) U_OTHER_PUNCTUATION, 850), - Alias((int32_t) U_OTHER_SYMBOL, 852), - Alias((int32_t) U_PARAGRAPH_SEPARATOR, 854), - Alias((int32_t) U_PRIVATE_USE_CHAR, 856), - Alias((int32_t) U_SPACE_SEPARATOR, 858), - Alias((int32_t) U_START_PUNCTUATION, 860), - Alias((int32_t) U_SURROGATE, 862), - Alias((int32_t) U_TITLECASE_LETTER, 864), - Alias((int32_t) U_UPPERCASE_LETTER, 866), + Alias((int32_t) U_COMBINING_SPACING_MARK, 808), + Alias((int32_t) U_CONNECTOR_PUNCTUATION, 810), + Alias((int32_t) U_CONTROL_CHAR, 812), + Alias((int32_t) U_CURRENCY_SYMBOL, 815), + Alias((int32_t) U_DASH_PUNCTUATION, 817), + Alias((int32_t) U_DECIMAL_DIGIT_NUMBER, 819), + Alias((int32_t) U_ENCLOSING_MARK, 822), + Alias((int32_t) U_END_PUNCTUATION, 824), + Alias((int32_t) U_FINAL_PUNCTUATION, 826), + Alias((int32_t) U_FORMAT_CHAR, 828), + Alias((int32_t) U_GENERAL_OTHER_TYPES, 830), + Alias((int32_t) U_INITIAL_PUNCTUATION, 832), + Alias((int32_t) U_LETTER_NUMBER, 834), + Alias((int32_t) U_LINE_SEPARATOR, 836), + Alias((int32_t) U_LOWERCASE_LETTER, 838), + Alias((int32_t) U_MATH_SYMBOL, 840), + Alias((int32_t) U_MODIFIER_LETTER, 842), + Alias((int32_t) U_MODIFIER_SYMBOL, 844), + Alias((int32_t) U_NON_SPACING_MARK, 846), + Alias((int32_t) U_OTHER_LETTER, 848), + Alias((int32_t) U_OTHER_NUMBER, 850), + Alias((int32_t) U_OTHER_PUNCTUATION, 852), + Alias((int32_t) U_OTHER_SYMBOL, 854), + Alias((int32_t) U_PARAGRAPH_SEPARATOR, 856), + Alias((int32_t) U_PRIVATE_USE_CHAR, 858), + Alias((int32_t) U_SPACE_SEPARATOR, 860), + Alias((int32_t) U_START_PUNCTUATION, 862), + Alias((int32_t) U_SURROGATE, 864), + Alias((int32_t) U_TITLECASE_LETTER, 866), + Alias((int32_t) U_UPPERCASE_LETTER, 868), }; const int32_t VALUES_gcm_COUNT = 38; const Alias VALUES_gcm[] = { - Alias((int32_t) U_GC_CC_MASK, 810), - Alias((int32_t) U_GC_CF_MASK, 826), - Alias((int32_t) U_GC_CN_MASK, 828), - Alias((int32_t) U_GC_CO_MASK, 856), - Alias((int32_t) U_GC_CS_MASK, 862), - Alias((int32_t) U_GC_C_MASK, 868), - Alias((int32_t) U_GC_LC_MASK, 870), - Alias((int32_t) U_GC_LL_MASK, 836), - Alias((int32_t) U_GC_LM_MASK, 840), - Alias((int32_t) U_GC_LO_MASK, 846), - Alias((int32_t) U_GC_LT_MASK, 864), - Alias((int32_t) U_GC_LU_MASK, 866), - Alias((int32_t) U_GC_L_MASK, 872), - Alias((int32_t) U_GC_MC_MASK, 806), - Alias((int32_t) U_GC_ME_MASK, 820), - Alias((int32_t) U_GC_MN_MASK, 844), - Alias((int32_t) U_GC_M_MASK, 874), - Alias((int32_t) U_GC_ND_MASK, 817), - Alias((int32_t) U_GC_NL_MASK, 832), - Alias((int32_t) U_GC_NO_MASK, 848), - Alias((int32_t) U_GC_N_MASK, 876), - Alias((int32_t) U_GC_PC_MASK, 808), - Alias((int32_t) U_GC_PD_MASK, 815), - Alias((int32_t) U_GC_PE_MASK, 822), - Alias((int32_t) U_GC_PF_MASK, 824), - Alias((int32_t) U_GC_PI_MASK, 830), - Alias((int32_t) U_GC_PO_MASK, 850), - Alias((int32_t) U_GC_PS_MASK, 860), - Alias((int32_t) U_GC_P_MASK, 878), - Alias((int32_t) U_GC_SC_MASK, 813), - Alias((int32_t) U_GC_SK_MASK, 842), - Alias((int32_t) U_GC_SM_MASK, 838), - Alias((int32_t) U_GC_SO_MASK, 852), - Alias((int32_t) U_GC_S_MASK, 881), - Alias((int32_t) U_GC_ZL_MASK, 834), - Alias((int32_t) U_GC_ZP_MASK, 854), - Alias((int32_t) U_GC_ZS_MASK, 858), - Alias((int32_t) U_GC_Z_MASK, 883), + Alias((int32_t) U_GC_CC_MASK, 812), + Alias((int32_t) U_GC_CF_MASK, 828), + Alias((int32_t) U_GC_CN_MASK, 830), + Alias((int32_t) U_GC_CO_MASK, 858), + Alias((int32_t) U_GC_CS_MASK, 864), + Alias((int32_t) U_GC_C_MASK, 870), + Alias((int32_t) U_GC_LC_MASK, 872), + Alias((int32_t) U_GC_LL_MASK, 838), + Alias((int32_t) U_GC_LM_MASK, 842), + Alias((int32_t) U_GC_LO_MASK, 848), + Alias((int32_t) U_GC_LT_MASK, 866), + Alias((int32_t) U_GC_LU_MASK, 868), + Alias((int32_t) U_GC_L_MASK, 874), + Alias((int32_t) U_GC_MC_MASK, 808), + Alias((int32_t) U_GC_ME_MASK, 822), + Alias((int32_t) U_GC_MN_MASK, 846), + Alias((int32_t) U_GC_M_MASK, 876), + Alias((int32_t) U_GC_ND_MASK, 819), + Alias((int32_t) U_GC_NL_MASK, 834), + Alias((int32_t) U_GC_NO_MASK, 850), + Alias((int32_t) U_GC_N_MASK, 878), + Alias((int32_t) U_GC_PC_MASK, 810), + Alias((int32_t) U_GC_PD_MASK, 817), + Alias((int32_t) U_GC_PE_MASK, 824), + Alias((int32_t) U_GC_PF_MASK, 826), + Alias((int32_t) U_GC_PI_MASK, 832), + Alias((int32_t) U_GC_PO_MASK, 852), + Alias((int32_t) U_GC_PS_MASK, 862), + Alias((int32_t) U_GC_P_MASK, 880), + Alias((int32_t) U_GC_SC_MASK, 815), + Alias((int32_t) U_GC_SK_MASK, 844), + Alias((int32_t) U_GC_SM_MASK, 840), + Alias((int32_t) U_GC_SO_MASK, 854), + Alias((int32_t) U_GC_S_MASK, 883), + Alias((int32_t) U_GC_ZL_MASK, 836), + Alias((int32_t) U_GC_ZP_MASK, 856), + Alias((int32_t) U_GC_ZS_MASK, 860), + Alias((int32_t) U_GC_Z_MASK, 885), }; const int32_t VALUES_hst_COUNT = 6; const Alias VALUES_hst[] = { - Alias((int32_t) U_HST_LEADING_JAMO, 885), - Alias((int32_t) U_HST_LVT_SYLLABLE, 887), - Alias((int32_t) U_HST_LV_SYLLABLE, 889), - Alias((int32_t) U_HST_NOT_APPLICABLE, 891), - Alias((int32_t) U_HST_TRAILING_JAMO, 893), - Alias((int32_t) U_HST_VOWEL_JAMO, 895), + Alias((int32_t) U_HST_LEADING_JAMO, 887), + Alias((int32_t) U_HST_LVT_SYLLABLE, 889), + Alias((int32_t) U_HST_LV_SYLLABLE, 891), + Alias((int32_t) U_HST_NOT_APPLICABLE, 893), + Alias((int32_t) U_HST_TRAILING_JAMO, 895), + Alias((int32_t) U_HST_VOWEL_JAMO, 897), }; const int32_t VALUES_jg_COUNT = 57; const Alias VALUES_jg[] = { - Alias((int32_t) U_JG_AIN, 897), - Alias((int32_t) U_JG_ALAPH, 899), - Alias((int32_t) U_JG_ALEF, 901), - Alias((int32_t) U_JG_BEH, 903), - Alias((int32_t) U_JG_BETH, 905), - Alias((int32_t) U_JG_BURUSHASKI_YEH_BARREE, 907), - Alias((int32_t) U_JG_DAL, 909), - Alias((int32_t) U_JG_DALATH_RISH, 911), - Alias((int32_t) U_JG_E, 913), - Alias((int32_t) U_JG_FARSI_YEH, 915), - Alias((int32_t) U_JG_FE, 917), - Alias((int32_t) U_JG_FEH, 919), - Alias((int32_t) U_JG_FINAL_SEMKATH, 921), - Alias((int32_t) U_JG_GAF, 923), - Alias((int32_t) U_JG_GAMAL, 925), - Alias((int32_t) U_JG_HAH, 927), - Alias((int32_t) U_JG_HAMZA_ON_HEH_GOAL, 929), - Alias((int32_t) U_JG_HE, 931), - Alias((int32_t) U_JG_HEH, 933), - Alias((int32_t) U_JG_HEH_GOAL, 935), - Alias((int32_t) U_JG_HETH, 937), - Alias((int32_t) U_JG_KAF, 939), - Alias((int32_t) U_JG_KAPH, 941), - Alias((int32_t) U_JG_KHAPH, 943), - Alias((int32_t) U_JG_KNOTTED_HEH, 945), - Alias((int32_t) U_JG_LAM, 947), - Alias((int32_t) U_JG_LAMADH, 949), - Alias((int32_t) U_JG_MEEM, 951), - Alias((int32_t) U_JG_MIM, 953), - Alias((int32_t) U_JG_NOON, 955), - Alias((int32_t) U_JG_NO_JOINING_GROUP, 957), - Alias((int32_t) U_JG_NUN, 959), - Alias((int32_t) U_JG_NYA, 961), - Alias((int32_t) U_JG_PE, 963), - Alias((int32_t) U_JG_QAF, 965), - Alias((int32_t) U_JG_QAPH, 967), - Alias((int32_t) U_JG_REH, 969), - Alias((int32_t) U_JG_REVERSED_PE, 971), - Alias((int32_t) U_JG_SAD, 973), - Alias((int32_t) U_JG_SADHE, 975), - Alias((int32_t) U_JG_SEEN, 977), - Alias((int32_t) U_JG_SEMKATH, 979), - Alias((int32_t) U_JG_SHIN, 981), - Alias((int32_t) U_JG_SWASH_KAF, 983), - Alias((int32_t) U_JG_SYRIAC_WAW, 985), - Alias((int32_t) U_JG_TAH, 987), - Alias((int32_t) U_JG_TAW, 989), - Alias((int32_t) U_JG_TEH_MARBUTA, 991), - Alias((int32_t) U_JG_TETH, 993), - Alias((int32_t) U_JG_WAW, 995), - Alias((int32_t) U_JG_YEH, 997), - Alias((int32_t) U_JG_YEH_BARREE, 999), - Alias((int32_t) U_JG_YEH_WITH_TAIL, 1001), - Alias((int32_t) U_JG_YUDH, 1003), - Alias((int32_t) U_JG_YUDH_HE, 1005), - Alias((int32_t) U_JG_ZAIN, 1007), - Alias((int32_t) U_JG_ZHAIN, 1009), + Alias((int32_t) U_JG_AIN, 899), + Alias((int32_t) U_JG_ALAPH, 901), + Alias((int32_t) U_JG_ALEF, 903), + Alias((int32_t) U_JG_BEH, 905), + Alias((int32_t) U_JG_BETH, 907), + Alias((int32_t) U_JG_BURUSHASKI_YEH_BARREE, 909), + Alias((int32_t) U_JG_DAL, 911), + Alias((int32_t) U_JG_DALATH_RISH, 913), + Alias((int32_t) U_JG_E, 915), + Alias((int32_t) U_JG_FARSI_YEH, 917), + Alias((int32_t) U_JG_FE, 919), + Alias((int32_t) U_JG_FEH, 921), + Alias((int32_t) U_JG_FINAL_SEMKATH, 923), + Alias((int32_t) U_JG_GAF, 925), + Alias((int32_t) U_JG_GAMAL, 927), + Alias((int32_t) U_JG_HAH, 929), + Alias((int32_t) U_JG_HAMZA_ON_HEH_GOAL, 931), + Alias((int32_t) U_JG_HE, 933), + Alias((int32_t) U_JG_HEH, 935), + Alias((int32_t) U_JG_HEH_GOAL, 937), + Alias((int32_t) U_JG_HETH, 939), + Alias((int32_t) U_JG_KAF, 941), + Alias((int32_t) U_JG_KAPH, 943), + Alias((int32_t) U_JG_KHAPH, 945), + Alias((int32_t) U_JG_KNOTTED_HEH, 947), + Alias((int32_t) U_JG_LAM, 949), + Alias((int32_t) U_JG_LAMADH, 951), + Alias((int32_t) U_JG_MEEM, 953), + Alias((int32_t) U_JG_MIM, 955), + Alias((int32_t) U_JG_NOON, 957), + Alias((int32_t) U_JG_NO_JOINING_GROUP, 959), + Alias((int32_t) U_JG_NUN, 961), + Alias((int32_t) U_JG_NYA, 963), + Alias((int32_t) U_JG_PE, 965), + Alias((int32_t) U_JG_QAF, 967), + Alias((int32_t) U_JG_QAPH, 969), + Alias((int32_t) U_JG_REH, 971), + Alias((int32_t) U_JG_REVERSED_PE, 973), + Alias((int32_t) U_JG_SAD, 975), + Alias((int32_t) U_JG_SADHE, 977), + Alias((int32_t) U_JG_SEEN, 979), + Alias((int32_t) U_JG_SEMKATH, 981), + Alias((int32_t) U_JG_SHIN, 983), + Alias((int32_t) U_JG_SWASH_KAF, 985), + Alias((int32_t) U_JG_SYRIAC_WAW, 987), + Alias((int32_t) U_JG_TAH, 989), + Alias((int32_t) U_JG_TAW, 991), + Alias((int32_t) U_JG_TEH_MARBUTA, 993), + Alias((int32_t) U_JG_TETH, 995), + Alias((int32_t) U_JG_WAW, 997), + Alias((int32_t) U_JG_YEH, 999), + Alias((int32_t) U_JG_YEH_BARREE, 1001), + Alias((int32_t) U_JG_YEH_WITH_TAIL, 1003), + Alias((int32_t) U_JG_YUDH, 1005), + Alias((int32_t) U_JG_YUDH_HE, 1007), + Alias((int32_t) U_JG_ZAIN, 1009), + Alias((int32_t) U_JG_ZHAIN, 1011), }; const int32_t VALUES_jt_COUNT = 6; const Alias VALUES_jt[] = { - Alias((int32_t) U_JT_DUAL_JOINING, 1011), - Alias((int32_t) U_JT_JOIN_CAUSING, 1013), - Alias((int32_t) U_JT_LEFT_JOINING, 1015), - Alias((int32_t) U_JT_NON_JOINING, 1017), - Alias((int32_t) U_JT_RIGHT_JOINING, 1019), - Alias((int32_t) U_JT_TRANSPARENT, 1021), + Alias((int32_t) U_JT_DUAL_JOINING, 1013), + Alias((int32_t) U_JT_JOIN_CAUSING, 1015), + Alias((int32_t) U_JT_LEFT_JOINING, 1017), + Alias((int32_t) U_JT_NON_JOINING, 1019), + Alias((int32_t) U_JT_RIGHT_JOINING, 1021), + Alias((int32_t) U_JT_TRANSPARENT, 1023), }; const int32_t VALUES_lb_COUNT = 37; const Alias VALUES_lb[] = { - Alias((int32_t) U_LB_ALPHABETIC, 1023), - Alias((int32_t) U_LB_AMBIGUOUS, 1025), - Alias((int32_t) U_LB_BREAK_AFTER, 1027), - Alias((int32_t) U_LB_BREAK_BEFORE, 1029), - Alias((int32_t) U_LB_BREAK_BOTH, 1031), - Alias((int32_t) U_LB_BREAK_SYMBOLS, 1033), - Alias((int32_t) U_LB_CARRIAGE_RETURN, 1035), - Alias((int32_t) U_LB_CLOSE_PARENTHESIS, 1037), - Alias((int32_t) U_LB_CLOSE_PUNCTUATION, 1039), - Alias((int32_t) U_LB_COMBINING_MARK, 1041), - Alias((int32_t) U_LB_COMPLEX_CONTEXT, 1043), - Alias((int32_t) U_LB_CONTINGENT_BREAK, 1045), - Alias((int32_t) U_LB_EXCLAMATION, 1047), - Alias((int32_t) U_LB_GLUE, 1049), - Alias((int32_t) U_LB_H2, 1051), - Alias((int32_t) U_LB_H3, 1053), - Alias((int32_t) U_LB_HYPHEN, 1055), - Alias((int32_t) U_LB_IDEOGRAPHIC, 1057), - Alias((int32_t) U_LB_INFIX_NUMERIC, 1059), - Alias((int32_t) U_LB_INSEPARABLE, 1061), - Alias((int32_t) U_LB_JL, 1064), - Alias((int32_t) U_LB_JT, 1066), - Alias((int32_t) U_LB_JV, 1068), - Alias((int32_t) U_LB_LINE_FEED, 1070), - Alias((int32_t) U_LB_MANDATORY_BREAK, 1072), - Alias((int32_t) U_LB_NEXT_LINE, 1074), - Alias((int32_t) U_LB_NONSTARTER, 1076), + Alias((int32_t) U_LB_ALPHABETIC, 1025), + Alias((int32_t) U_LB_AMBIGUOUS, 1027), + Alias((int32_t) U_LB_BREAK_AFTER, 1029), + Alias((int32_t) U_LB_BREAK_BEFORE, 1031), + Alias((int32_t) U_LB_BREAK_BOTH, 1033), + Alias((int32_t) U_LB_BREAK_SYMBOLS, 1035), + Alias((int32_t) U_LB_CARRIAGE_RETURN, 1037), + Alias((int32_t) U_LB_CLOSE_PARENTHESIS, 1039), + Alias((int32_t) U_LB_CLOSE_PUNCTUATION, 1041), + Alias((int32_t) U_LB_COMBINING_MARK, 1043), + Alias((int32_t) U_LB_COMPLEX_CONTEXT, 1045), + Alias((int32_t) U_LB_CONTINGENT_BREAK, 1047), + Alias((int32_t) U_LB_EXCLAMATION, 1049), + Alias((int32_t) U_LB_GLUE, 1051), + Alias((int32_t) U_LB_H2, 1053), + Alias((int32_t) U_LB_H3, 1055), + Alias((int32_t) U_LB_HYPHEN, 1057), + Alias((int32_t) U_LB_IDEOGRAPHIC, 1059), + Alias((int32_t) U_LB_INFIX_NUMERIC, 1061), + Alias((int32_t) U_LB_INSEPARABLE, 1063), + Alias((int32_t) U_LB_JL, 1066), + Alias((int32_t) U_LB_JT, 1068), + Alias((int32_t) U_LB_JV, 1070), + Alias((int32_t) U_LB_LINE_FEED, 1072), + Alias((int32_t) U_LB_MANDATORY_BREAK, 1074), + Alias((int32_t) U_LB_NEXT_LINE, 1076), + Alias((int32_t) U_LB_NONSTARTER, 1078), Alias((int32_t) U_LB_NUMERIC, 38), - Alias((int32_t) U_LB_OPEN_PUNCTUATION, 1078), - Alias((int32_t) U_LB_POSTFIX_NUMERIC, 1080), - Alias((int32_t) U_LB_PREFIX_NUMERIC, 1082), - Alias((int32_t) U_LB_QUOTATION, 1084), - Alias((int32_t) U_LB_SPACE, 1086), - Alias((int32_t) U_LB_SURROGATE, 1088), - Alias((int32_t) U_LB_UNKNOWN, 1090), - Alias((int32_t) U_LB_WORD_JOINER, 1092), - Alias((int32_t) U_LB_ZWSPACE, 1094), + Alias((int32_t) U_LB_OPEN_PUNCTUATION, 1080), + Alias((int32_t) U_LB_POSTFIX_NUMERIC, 1082), + Alias((int32_t) U_LB_PREFIX_NUMERIC, 1084), + Alias((int32_t) U_LB_QUOTATION, 1086), + Alias((int32_t) U_LB_SPACE, 1088), + Alias((int32_t) U_LB_SURROGATE, 1090), + Alias((int32_t) U_LB_UNKNOWN, 1092), + Alias((int32_t) U_LB_WORD_JOINER, 1094), + Alias((int32_t) U_LB_ZWSPACE, 1096), }; const int32_t VALUES_lccc_COUNT = 20; const Alias VALUES_lccc[] = { - Alias((int32_t) 0, 704), - Alias((int32_t) 1, 706), - Alias((int32_t) 200, 708), - Alias((int32_t) 202, 710), - Alias((int32_t) 214, 712), - Alias((int32_t) 216, 714), - Alias((int32_t) 218, 716), - Alias((int32_t) 220, 718), - Alias((int32_t) 222, 720), - Alias((int32_t) 224, 722), - Alias((int32_t) 226, 724), - Alias((int32_t) 228, 726), - Alias((int32_t) 230, 728), - Alias((int32_t) 232, 730), - Alias((int32_t) 233, 732), - Alias((int32_t) 234, 734), - Alias((int32_t) 240, 736), - Alias((int32_t) 7, 738), - Alias((int32_t) 8, 740), - Alias((int32_t) 9, 742), + Alias((int32_t) 0, 706), + Alias((int32_t) 1, 708), + Alias((int32_t) 200, 710), + Alias((int32_t) 202, 712), + Alias((int32_t) 214, 714), + Alias((int32_t) 216, 716), + Alias((int32_t) 218, 718), + Alias((int32_t) 220, 720), + Alias((int32_t) 222, 722), + Alias((int32_t) 224, 724), + Alias((int32_t) 226, 726), + Alias((int32_t) 228, 728), + Alias((int32_t) 230, 730), + Alias((int32_t) 232, 732), + Alias((int32_t) 233, 734), + Alias((int32_t) 234, 736), + Alias((int32_t) 240, 738), + Alias((int32_t) 7, 740), + Alias((int32_t) 8, 742), + Alias((int32_t) 9, 744), }; const int32_t VALUES_nt_COUNT = 4; const Alias VALUES_nt[] = { - Alias((int32_t) U_NT_DECIMAL, 1096), - Alias((int32_t) U_NT_DIGIT, 1098), - Alias((int32_t) U_NT_NONE, 1100), - Alias((int32_t) U_NT_NUMERIC, 1102), + Alias((int32_t) U_NT_DECIMAL, 1098), + Alias((int32_t) U_NT_DIGIT, 1100), + Alias((int32_t) U_NT_NONE, 1102), + Alias((int32_t) U_NT_NUMERIC, 1104), }; const int32_t VALUES_sc_COUNT = 134; const Alias VALUES_sc[] = { - Alias((int32_t) USCRIPT_ARABIC, 1104), - Alias((int32_t) USCRIPT_ARMENIAN, 1106), - Alias((int32_t) USCRIPT_AVESTAN, 1108), - Alias((int32_t) USCRIPT_BALINESE, 1110), - Alias((int32_t) USCRIPT_BAMUM, 1112), - Alias((int32_t) USCRIPT_BATAK, 1114), - Alias((int32_t) USCRIPT_BENGALI, 1116), - Alias((int32_t) USCRIPT_BLISSYMBOLS, 1118), - Alias((int32_t) USCRIPT_BOOK_PAHLAVI, 1120), - Alias((int32_t) USCRIPT_BOPOMOFO, 1122), - Alias((int32_t) USCRIPT_BRAHMI, 1124), - Alias((int32_t) USCRIPT_BRAILLE, 1126), - Alias((int32_t) USCRIPT_BUGINESE, 1128), - Alias((int32_t) USCRIPT_BUHID, 1130), - Alias((int32_t) USCRIPT_CANADIAN_ABORIGINAL, 1132), - Alias((int32_t) USCRIPT_CARIAN, 1134), - Alias((int32_t) USCRIPT_CHAKMA, 1136), - Alias((int32_t) USCRIPT_CHAM, 1138), - Alias((int32_t) USCRIPT_CHEROKEE, 1140), - Alias((int32_t) USCRIPT_CIRTH, 1142), - Alias((int32_t) USCRIPT_COMMON, 1144), - Alias((int32_t) USCRIPT_COPTIC, 1146), - Alias((int32_t) USCRIPT_CUNEIFORM, 1149), - Alias((int32_t) USCRIPT_CYPRIOT, 1151), - Alias((int32_t) USCRIPT_CYRILLIC, 1153), - Alias((int32_t) USCRIPT_DEMOTIC_EGYPTIAN, 1155), - Alias((int32_t) USCRIPT_DESERET, 1157), - Alias((int32_t) USCRIPT_DEVANAGARI, 1159), - Alias((int32_t) USCRIPT_EASTERN_SYRIAC, 1161), - Alias((int32_t) USCRIPT_EGYPTIAN_HIEROGLYPHS, 1163), - Alias((int32_t) USCRIPT_ESTRANGELO_SYRIAC, 1165), - Alias((int32_t) USCRIPT_ETHIOPIC, 1167), - Alias((int32_t) USCRIPT_GEORGIAN, 1169), - Alias((int32_t) USCRIPT_GLAGOLITIC, 1171), - Alias((int32_t) USCRIPT_GOTHIC, 1173), - Alias((int32_t) USCRIPT_GREEK, 1175), - Alias((int32_t) USCRIPT_GUJARATI, 1177), - Alias((int32_t) USCRIPT_GURMUKHI, 1179), - Alias((int32_t) USCRIPT_HAN, 1181), - Alias((int32_t) USCRIPT_HANGUL, 1183), - Alias((int32_t) USCRIPT_HANUNOO, 1185), - Alias((int32_t) USCRIPT_HARAPPAN_INDUS, 1187), - Alias((int32_t) USCRIPT_HEBREW, 1189), - Alias((int32_t) USCRIPT_HIERATIC_EGYPTIAN, 1191), - Alias((int32_t) USCRIPT_HIRAGANA, 1193), - Alias((int32_t) USCRIPT_IMPERIAL_ARAMAIC, 1195), - Alias((int32_t) USCRIPT_INHERITED, 1197), - Alias((int32_t) USCRIPT_INSCRIPTIONAL_PAHLAVI, 1200), - Alias((int32_t) USCRIPT_INSCRIPTIONAL_PARTHIAN, 1202), - Alias((int32_t) USCRIPT_JAPANESE, 1204), - Alias((int32_t) USCRIPT_JAVANESE, 1206), - Alias((int32_t) USCRIPT_KAITHI, 1208), - Alias((int32_t) USCRIPT_KANNADA, 1210), - Alias((int32_t) USCRIPT_KATAKANA, 1212), - Alias((int32_t) USCRIPT_KATAKANA_OR_HIRAGANA, 1214), - Alias((int32_t) USCRIPT_KAYAH_LI, 1216), - Alias((int32_t) USCRIPT_KHAROSHTHI, 1218), - Alias((int32_t) USCRIPT_KHMER, 1220), - Alias((int32_t) USCRIPT_KHUTSURI, 1222), - Alias((int32_t) USCRIPT_KOREAN, 1224), - Alias((int32_t) USCRIPT_LANNA, 1226), - Alias((int32_t) USCRIPT_LAO, 1228), - Alias((int32_t) USCRIPT_LATIN, 1230), - Alias((int32_t) USCRIPT_LATIN_FRAKTUR, 1232), - Alias((int32_t) USCRIPT_LATIN_GAELIC, 1234), - Alias((int32_t) USCRIPT_LEPCHA, 1236), - Alias((int32_t) USCRIPT_LIMBU, 1238), - Alias((int32_t) USCRIPT_LINEAR_A, 1240), - Alias((int32_t) USCRIPT_LINEAR_B, 1242), - Alias((int32_t) USCRIPT_LISU, 1244), - Alias((int32_t) USCRIPT_LYCIAN, 1246), - Alias((int32_t) USCRIPT_LYDIAN, 1248), - Alias((int32_t) USCRIPT_MALAYALAM, 1250), - Alias((int32_t) USCRIPT_MANDAEAN, 1252), - Alias((int32_t) USCRIPT_MANICHAEAN, 1254), - Alias((int32_t) USCRIPT_MATHEMATICAL_NOTATION, 1256), - Alias((int32_t) USCRIPT_MAYAN_HIEROGLYPHS, 1258), - Alias((int32_t) USCRIPT_MEITEI_MAYEK, 1260), - Alias((int32_t) USCRIPT_MEROITIC, 1262), - Alias((int32_t) USCRIPT_MONGOLIAN, 1264), - Alias((int32_t) USCRIPT_MOON, 1266), - Alias((int32_t) USCRIPT_MYANMAR, 1268), - Alias((int32_t) USCRIPT_NAKHI_GEBA, 1270), - Alias((int32_t) USCRIPT_NEW_TAI_LUE, 1272), - Alias((int32_t) USCRIPT_NKO, 1274), - Alias((int32_t) USCRIPT_OGHAM, 1276), - Alias((int32_t) USCRIPT_OLD_CHURCH_SLAVONIC_CYRILLIC, 1278), - Alias((int32_t) USCRIPT_OLD_HUNGARIAN, 1280), - Alias((int32_t) USCRIPT_OLD_ITALIC, 1282), - Alias((int32_t) USCRIPT_OLD_PERMIC, 1284), - Alias((int32_t) USCRIPT_OLD_PERSIAN, 1286), - Alias((int32_t) USCRIPT_OLD_SOUTH_ARABIAN, 1288), - Alias((int32_t) USCRIPT_OL_CHIKI, 1290), - Alias((int32_t) USCRIPT_ORIYA, 1292), - Alias((int32_t) USCRIPT_ORKHON, 1294), - Alias((int32_t) USCRIPT_OSMANYA, 1296), - Alias((int32_t) USCRIPT_PAHAWH_HMONG, 1298), - Alias((int32_t) USCRIPT_PHAGS_PA, 1300), - Alias((int32_t) USCRIPT_PHOENICIAN, 1302), - Alias((int32_t) USCRIPT_PHONETIC_POLLARD, 1304), - Alias((int32_t) USCRIPT_PSALTER_PAHLAVI, 1306), - Alias((int32_t) USCRIPT_REJANG, 1308), - Alias((int32_t) USCRIPT_RONGORONGO, 1310), - Alias((int32_t) USCRIPT_RUNIC, 1312), - Alias((int32_t) USCRIPT_SAMARITAN, 1314), - Alias((int32_t) USCRIPT_SARATI, 1316), - Alias((int32_t) USCRIPT_SAURASHTRA, 1318), - Alias((int32_t) USCRIPT_SHAVIAN, 1320), - Alias((int32_t) USCRIPT_SIGN_WRITING, 1322), - Alias((int32_t) USCRIPT_SIMPLIFIED_HAN, 1324), - Alias((int32_t) USCRIPT_SINHALA, 1326), - Alias((int32_t) USCRIPT_SUNDANESE, 1328), - Alias((int32_t) USCRIPT_SYLOTI_NAGRI, 1330), - Alias((int32_t) USCRIPT_SYMBOLS, 1332), - Alias((int32_t) USCRIPT_SYRIAC, 1334), - Alias((int32_t) USCRIPT_TAGALOG, 1336), - Alias((int32_t) USCRIPT_TAGBANWA, 1338), - Alias((int32_t) USCRIPT_TAI_LE, 1340), - Alias((int32_t) USCRIPT_TAI_VIET, 1342), - Alias((int32_t) USCRIPT_TAMIL, 1344), - Alias((int32_t) USCRIPT_TELUGU, 1346), - Alias((int32_t) USCRIPT_TENGWAR, 1348), - Alias((int32_t) USCRIPT_THAANA, 1350), - Alias((int32_t) USCRIPT_THAI, 1352), - Alias((int32_t) USCRIPT_TIBETAN, 1354), - Alias((int32_t) USCRIPT_TIFINAGH, 1356), - Alias((int32_t) USCRIPT_TRADITIONAL_HAN, 1358), - Alias((int32_t) USCRIPT_UGARITIC, 1360), - Alias((int32_t) USCRIPT_UNKNOWN, 1362), - Alias((int32_t) USCRIPT_UNWRITTEN_LANGUAGES, 1364), - Alias((int32_t) USCRIPT_VAI, 1366), - Alias((int32_t) USCRIPT_VISIBLE_SPEECH, 1368), - Alias((int32_t) USCRIPT_WESTERN_SYRIAC, 1370), - Alias((int32_t) USCRIPT_YI, 1372), + Alias((int32_t) USCRIPT_ARABIC, 1106), + Alias((int32_t) USCRIPT_ARMENIAN, 1108), + Alias((int32_t) USCRIPT_AVESTAN, 1110), + Alias((int32_t) USCRIPT_BALINESE, 1112), + Alias((int32_t) USCRIPT_BAMUM, 1114), + Alias((int32_t) USCRIPT_BATAK, 1116), + Alias((int32_t) USCRIPT_BENGALI, 1118), + Alias((int32_t) USCRIPT_BLISSYMBOLS, 1120), + Alias((int32_t) USCRIPT_BOOK_PAHLAVI, 1122), + Alias((int32_t) USCRIPT_BOPOMOFO, 1124), + Alias((int32_t) USCRIPT_BRAHMI, 1126), + Alias((int32_t) USCRIPT_BRAILLE, 1128), + Alias((int32_t) USCRIPT_BUGINESE, 1130), + Alias((int32_t) USCRIPT_BUHID, 1132), + Alias((int32_t) USCRIPT_CANADIAN_ABORIGINAL, 1134), + Alias((int32_t) USCRIPT_CARIAN, 1136), + Alias((int32_t) USCRIPT_CHAKMA, 1138), + Alias((int32_t) USCRIPT_CHAM, 1140), + Alias((int32_t) USCRIPT_CHEROKEE, 1142), + Alias((int32_t) USCRIPT_CIRTH, 1144), + Alias((int32_t) USCRIPT_COMMON, 1146), + Alias((int32_t) USCRIPT_COPTIC, 1148), + Alias((int32_t) USCRIPT_CUNEIFORM, 1151), + Alias((int32_t) USCRIPT_CYPRIOT, 1153), + Alias((int32_t) USCRIPT_CYRILLIC, 1155), + Alias((int32_t) USCRIPT_DEMOTIC_EGYPTIAN, 1157), + Alias((int32_t) USCRIPT_DESERET, 1159), + Alias((int32_t) USCRIPT_DEVANAGARI, 1161), + Alias((int32_t) USCRIPT_EASTERN_SYRIAC, 1163), + Alias((int32_t) USCRIPT_EGYPTIAN_HIEROGLYPHS, 1165), + Alias((int32_t) USCRIPT_ESTRANGELO_SYRIAC, 1167), + Alias((int32_t) USCRIPT_ETHIOPIC, 1169), + Alias((int32_t) USCRIPT_GEORGIAN, 1171), + Alias((int32_t) USCRIPT_GLAGOLITIC, 1173), + Alias((int32_t) USCRIPT_GOTHIC, 1175), + Alias((int32_t) USCRIPT_GREEK, 1177), + Alias((int32_t) USCRIPT_GUJARATI, 1179), + Alias((int32_t) USCRIPT_GURMUKHI, 1181), + Alias((int32_t) USCRIPT_HAN, 1183), + Alias((int32_t) USCRIPT_HANGUL, 1185), + Alias((int32_t) USCRIPT_HANUNOO, 1187), + Alias((int32_t) USCRIPT_HARAPPAN_INDUS, 1189), + Alias((int32_t) USCRIPT_HEBREW, 1191), + Alias((int32_t) USCRIPT_HIERATIC_EGYPTIAN, 1193), + Alias((int32_t) USCRIPT_HIRAGANA, 1195), + Alias((int32_t) USCRIPT_IMPERIAL_ARAMAIC, 1197), + Alias((int32_t) USCRIPT_INHERITED, 1199), + Alias((int32_t) USCRIPT_INSCRIPTIONAL_PAHLAVI, 1202), + Alias((int32_t) USCRIPT_INSCRIPTIONAL_PARTHIAN, 1204), + Alias((int32_t) USCRIPT_JAPANESE, 1206), + Alias((int32_t) USCRIPT_JAVANESE, 1208), + Alias((int32_t) USCRIPT_KAITHI, 1210), + Alias((int32_t) USCRIPT_KANNADA, 1212), + Alias((int32_t) USCRIPT_KATAKANA, 1214), + Alias((int32_t) USCRIPT_KATAKANA_OR_HIRAGANA, 1216), + Alias((int32_t) USCRIPT_KAYAH_LI, 1218), + Alias((int32_t) USCRIPT_KHAROSHTHI, 1220), + Alias((int32_t) USCRIPT_KHMER, 1222), + Alias((int32_t) USCRIPT_KHUTSURI, 1224), + Alias((int32_t) USCRIPT_KOREAN, 1226), + Alias((int32_t) USCRIPT_LANNA, 1228), + Alias((int32_t) USCRIPT_LAO, 1230), + Alias((int32_t) USCRIPT_LATIN, 1232), + Alias((int32_t) USCRIPT_LATIN_FRAKTUR, 1234), + Alias((int32_t) USCRIPT_LATIN_GAELIC, 1236), + Alias((int32_t) USCRIPT_LEPCHA, 1238), + Alias((int32_t) USCRIPT_LIMBU, 1240), + Alias((int32_t) USCRIPT_LINEAR_A, 1242), + Alias((int32_t) USCRIPT_LINEAR_B, 1244), + Alias((int32_t) USCRIPT_LISU, 1246), + Alias((int32_t) USCRIPT_LYCIAN, 1248), + Alias((int32_t) USCRIPT_LYDIAN, 1250), + Alias((int32_t) USCRIPT_MALAYALAM, 1252), + Alias((int32_t) USCRIPT_MANDAEAN, 1254), + Alias((int32_t) USCRIPT_MANICHAEAN, 1256), + Alias((int32_t) USCRIPT_MATHEMATICAL_NOTATION, 1258), + Alias((int32_t) USCRIPT_MAYAN_HIEROGLYPHS, 1260), + Alias((int32_t) USCRIPT_MEITEI_MAYEK, 1262), + Alias((int32_t) USCRIPT_MEROITIC, 1264), + Alias((int32_t) USCRIPT_MONGOLIAN, 1266), + Alias((int32_t) USCRIPT_MOON, 1268), + Alias((int32_t) USCRIPT_MYANMAR, 1270), + Alias((int32_t) USCRIPT_NAKHI_GEBA, 1272), + Alias((int32_t) USCRIPT_NEW_TAI_LUE, 1274), + Alias((int32_t) USCRIPT_NKO, 1276), + Alias((int32_t) USCRIPT_OGHAM, 1278), + Alias((int32_t) USCRIPT_OLD_CHURCH_SLAVONIC_CYRILLIC, 1280), + Alias((int32_t) USCRIPT_OLD_HUNGARIAN, 1282), + Alias((int32_t) USCRIPT_OLD_ITALIC, 1284), + Alias((int32_t) USCRIPT_OLD_PERMIC, 1286), + Alias((int32_t) USCRIPT_OLD_PERSIAN, 1288), + Alias((int32_t) USCRIPT_OLD_SOUTH_ARABIAN, 1290), + Alias((int32_t) USCRIPT_OL_CHIKI, 1292), + Alias((int32_t) USCRIPT_ORIYA, 1294), + Alias((int32_t) USCRIPT_ORKHON, 1296), + Alias((int32_t) USCRIPT_OSMANYA, 1298), + Alias((int32_t) USCRIPT_PAHAWH_HMONG, 1300), + Alias((int32_t) USCRIPT_PHAGS_PA, 1302), + Alias((int32_t) USCRIPT_PHOENICIAN, 1304), + Alias((int32_t) USCRIPT_PHONETIC_POLLARD, 1306), + Alias((int32_t) USCRIPT_PSALTER_PAHLAVI, 1308), + Alias((int32_t) USCRIPT_REJANG, 1310), + Alias((int32_t) USCRIPT_RONGORONGO, 1312), + Alias((int32_t) USCRIPT_RUNIC, 1314), + Alias((int32_t) USCRIPT_SAMARITAN, 1316), + Alias((int32_t) USCRIPT_SARATI, 1318), + Alias((int32_t) USCRIPT_SAURASHTRA, 1320), + Alias((int32_t) USCRIPT_SHAVIAN, 1322), + Alias((int32_t) USCRIPT_SIGN_WRITING, 1324), + Alias((int32_t) USCRIPT_SIMPLIFIED_HAN, 1326), + Alias((int32_t) USCRIPT_SINHALA, 1328), + Alias((int32_t) USCRIPT_SUNDANESE, 1330), + Alias((int32_t) USCRIPT_SYLOTI_NAGRI, 1332), + Alias((int32_t) USCRIPT_SYMBOLS, 1334), + Alias((int32_t) USCRIPT_SYRIAC, 1336), + Alias((int32_t) USCRIPT_TAGALOG, 1338), + Alias((int32_t) USCRIPT_TAGBANWA, 1340), + Alias((int32_t) USCRIPT_TAI_LE, 1342), + Alias((int32_t) USCRIPT_TAI_VIET, 1344), + Alias((int32_t) USCRIPT_TAMIL, 1346), + Alias((int32_t) USCRIPT_TELUGU, 1348), + Alias((int32_t) USCRIPT_TENGWAR, 1350), + Alias((int32_t) USCRIPT_THAANA, 1352), + Alias((int32_t) USCRIPT_THAI, 1354), + Alias((int32_t) USCRIPT_TIBETAN, 1356), + Alias((int32_t) USCRIPT_TIFINAGH, 1358), + Alias((int32_t) USCRIPT_TRADITIONAL_HAN, 1360), + Alias((int32_t) USCRIPT_UGARITIC, 1362), + Alias((int32_t) USCRIPT_UNKNOWN, 1364), + Alias((int32_t) USCRIPT_UNWRITTEN_LANGUAGES, 1366), + Alias((int32_t) USCRIPT_VAI, 1368), + Alias((int32_t) USCRIPT_VISIBLE_SPEECH, 1370), + Alias((int32_t) USCRIPT_WESTERN_SYRIAC, 1372), + Alias((int32_t) USCRIPT_YI, 1374), }; const int32_t VALUES_tccc_COUNT = 20; const Alias VALUES_tccc[] = { - Alias((int32_t) 0, 704), - Alias((int32_t) 1, 706), - Alias((int32_t) 200, 708), - Alias((int32_t) 202, 710), - Alias((int32_t) 214, 712), - Alias((int32_t) 216, 714), - Alias((int32_t) 218, 716), - Alias((int32_t) 220, 718), - Alias((int32_t) 222, 720), - Alias((int32_t) 224, 722), - Alias((int32_t) 226, 724), - Alias((int32_t) 228, 726), - Alias((int32_t) 230, 728), - Alias((int32_t) 232, 730), - Alias((int32_t) 233, 732), - Alias((int32_t) 234, 734), - Alias((int32_t) 240, 736), - Alias((int32_t) 7, 738), - Alias((int32_t) 8, 740), - Alias((int32_t) 9, 742), + Alias((int32_t) 0, 706), + Alias((int32_t) 1, 708), + Alias((int32_t) 200, 710), + Alias((int32_t) 202, 712), + Alias((int32_t) 214, 714), + Alias((int32_t) 216, 716), + Alias((int32_t) 218, 718), + Alias((int32_t) 220, 720), + Alias((int32_t) 222, 722), + Alias((int32_t) 224, 724), + Alias((int32_t) 226, 726), + Alias((int32_t) 228, 728), + Alias((int32_t) 230, 730), + Alias((int32_t) 232, 732), + Alias((int32_t) 233, 734), + Alias((int32_t) 234, 736), + Alias((int32_t) 240, 738), + Alias((int32_t) 7, 740), + Alias((int32_t) 8, 742), + Alias((int32_t) 9, 744), }; -const int32_t PROPERTY_COUNT = 92; +const int32_t PROPERTY_COUNT = 93; const Property PROPERTY[] = { Property((int32_t) UCHAR_ALPHABETIC, 68, VALUES_binprop_COUNT, VALUES_binprop), @@ -2409,88 +2412,89 @@ const Property PROPERTY[] = { Property((int32_t) UCHAR_CHANGES_WHEN_CASEFOLDED, 82, VALUES_binprop_COUNT, VALUES_binprop), Property((int32_t) UCHAR_CHANGES_WHEN_CASEMAPPED, 84, VALUES_binprop_COUNT, VALUES_binprop), Property((int32_t) UCHAR_CHANGES_WHEN_LOWERCASED, 86, VALUES_binprop_COUNT, VALUES_binprop), - Property((int32_t) UCHAR_CHANGES_WHEN_TITLECASED, 88, VALUES_binprop_COUNT, VALUES_binprop), - Property((int32_t) UCHAR_CHANGES_WHEN_UPPERCASED, 90, VALUES_binprop_COUNT, VALUES_binprop), - Property((int32_t) UCHAR_DASH, 92, VALUES_binprop_COUNT, VALUES_binprop), - Property((int32_t) UCHAR_DEFAULT_IGNORABLE_CODE_POINT, 94, VALUES_binprop_COUNT, VALUES_binprop), - Property((int32_t) UCHAR_DEPRECATED, 96, VALUES_binprop_COUNT, VALUES_binprop), - Property((int32_t) UCHAR_DIACRITIC, 98, VALUES_binprop_COUNT, VALUES_binprop), - Property((int32_t) UCHAR_EXTENDER, 100, VALUES_binprop_COUNT, VALUES_binprop), - Property((int32_t) UCHAR_FULL_COMPOSITION_EXCLUSION, 102, VALUES_binprop_COUNT, VALUES_binprop), - Property((int32_t) UCHAR_GRAPHEME_BASE, 104, VALUES_binprop_COUNT, VALUES_binprop), - Property((int32_t) UCHAR_GRAPHEME_EXTEND, 106, VALUES_binprop_COUNT, VALUES_binprop), - Property((int32_t) UCHAR_GRAPHEME_LINK, 108, VALUES_binprop_COUNT, VALUES_binprop), - Property((int32_t) UCHAR_HEX_DIGIT, 110, VALUES_binprop_COUNT, VALUES_binprop), - Property((int32_t) UCHAR_HYPHEN, 112, VALUES_binprop_COUNT, VALUES_binprop), - Property((int32_t) UCHAR_IDEOGRAPHIC, 114, VALUES_binprop_COUNT, VALUES_binprop), - Property((int32_t) UCHAR_IDS_BINARY_OPERATOR, 116, VALUES_binprop_COUNT, VALUES_binprop), - Property((int32_t) UCHAR_IDS_TRINARY_OPERATOR, 118, VALUES_binprop_COUNT, VALUES_binprop), - Property((int32_t) UCHAR_ID_CONTINUE, 120, VALUES_binprop_COUNT, VALUES_binprop), - Property((int32_t) UCHAR_ID_START, 122, VALUES_binprop_COUNT, VALUES_binprop), - Property((int32_t) UCHAR_JOIN_CONTROL, 124, VALUES_binprop_COUNT, VALUES_binprop), - Property((int32_t) UCHAR_LOGICAL_ORDER_EXCEPTION, 126, VALUES_binprop_COUNT, VALUES_binprop), - Property((int32_t) UCHAR_LOWERCASE, 128, VALUES_binprop_COUNT, VALUES_binprop), - Property((int32_t) UCHAR_MATH, 130, VALUES_binprop_COUNT, VALUES_binprop), - Property((int32_t) UCHAR_NFC_INERT, 132, VALUES_binprop_COUNT, VALUES_binprop), - Property((int32_t) UCHAR_NFD_INERT, 134, VALUES_binprop_COUNT, VALUES_binprop), - Property((int32_t) UCHAR_NFKC_INERT, 136, VALUES_binprop_COUNT, VALUES_binprop), - Property((int32_t) UCHAR_NFKD_INERT, 138, VALUES_binprop_COUNT, VALUES_binprop), - Property((int32_t) UCHAR_NONCHARACTER_CODE_POINT, 140, VALUES_binprop_COUNT, VALUES_binprop), - Property((int32_t) UCHAR_PATTERN_SYNTAX, 142, VALUES_binprop_COUNT, VALUES_binprop), - Property((int32_t) UCHAR_PATTERN_WHITE_SPACE, 144, VALUES_binprop_COUNT, VALUES_binprop), - Property((int32_t) UCHAR_POSIX_ALNUM, 146, VALUES_binprop_COUNT, VALUES_binprop), - Property((int32_t) UCHAR_POSIX_BLANK, 148, VALUES_binprop_COUNT, VALUES_binprop), - Property((int32_t) UCHAR_POSIX_GRAPH, 150, VALUES_binprop_COUNT, VALUES_binprop), - Property((int32_t) UCHAR_POSIX_PRINT, 152, VALUES_binprop_COUNT, VALUES_binprop), - Property((int32_t) UCHAR_POSIX_XDIGIT, 154, VALUES_binprop_COUNT, VALUES_binprop), - Property((int32_t) UCHAR_QUOTATION_MARK, 156, VALUES_binprop_COUNT, VALUES_binprop), - Property((int32_t) UCHAR_RADICAL, 158, VALUES_binprop_COUNT, VALUES_binprop), - Property((int32_t) UCHAR_SEGMENT_STARTER, 160, VALUES_binprop_COUNT, VALUES_binprop), - Property((int32_t) UCHAR_SOFT_DOTTED, 162, VALUES_binprop_COUNT, VALUES_binprop), - Property((int32_t) UCHAR_S_TERM, 164, VALUES_binprop_COUNT, VALUES_binprop), - Property((int32_t) UCHAR_TERMINAL_PUNCTUATION, 166, VALUES_binprop_COUNT, VALUES_binprop), - Property((int32_t) UCHAR_UNIFIED_IDEOGRAPH, 168, VALUES_binprop_COUNT, VALUES_binprop), - Property((int32_t) UCHAR_UPPERCASE, 170, VALUES_binprop_COUNT, VALUES_binprop), - Property((int32_t) UCHAR_VARIATION_SELECTOR, 172, VALUES_binprop_COUNT, VALUES_binprop), - Property((int32_t) UCHAR_WHITE_SPACE, 174, VALUES_binprop_COUNT, VALUES_binprop), - Property((int32_t) UCHAR_XID_CONTINUE, 177, VALUES_binprop_COUNT, VALUES_binprop), - Property((int32_t) UCHAR_XID_START, 179, VALUES_binprop_COUNT, VALUES_binprop), - Property((int32_t) UCHAR_BIDI_CLASS, 183, VALUES_bc_COUNT, VALUES_bc), - Property((int32_t) UCHAR_BLOCK, 185, VALUES_blk_COUNT, VALUES_blk), - Property((int32_t) UCHAR_CANONICAL_COMBINING_CLASS, 187, VALUES_ccc_COUNT, VALUES_ccc), - Property((int32_t) UCHAR_DECOMPOSITION_TYPE, 189, VALUES_dt_COUNT, VALUES_dt), - Property((int32_t) UCHAR_EAST_ASIAN_WIDTH, 191, VALUES_ea_COUNT, VALUES_ea), - Property((int32_t) UCHAR_GENERAL_CATEGORY, 193, VALUES_gc_COUNT, VALUES_gc), - Property((int32_t) UCHAR_GRAPHEME_CLUSTER_BREAK, 195, VALUES_GCB_COUNT, VALUES_GCB), - Property((int32_t) UCHAR_HANGUL_SYLLABLE_TYPE, 197, VALUES_hst_COUNT, VALUES_hst), - Property((int32_t) UCHAR_JOINING_GROUP, 199, VALUES_jg_COUNT, VALUES_jg), - Property((int32_t) UCHAR_JOINING_TYPE, 201, VALUES_jt_COUNT, VALUES_jt), - Property((int32_t) UCHAR_LEAD_CANONICAL_COMBINING_CLASS, 203, VALUES_lccc_COUNT, VALUES_lccc), - Property((int32_t) UCHAR_LINE_BREAK, 205, VALUES_lb_COUNT, VALUES_lb), - Property((int32_t) UCHAR_NFC_QUICK_CHECK, 207, VALUES_NFC_QC_COUNT, VALUES_NFC_QC), - Property((int32_t) UCHAR_NFD_QUICK_CHECK, 209, VALUES_NFD_QC_COUNT, VALUES_NFD_QC), - Property((int32_t) UCHAR_NFKC_QUICK_CHECK, 211, VALUES_NFKC_QC_COUNT, VALUES_NFKC_QC), - Property((int32_t) UCHAR_NFKD_QUICK_CHECK, 213, VALUES_NFKD_QC_COUNT, VALUES_NFKD_QC), - Property((int32_t) UCHAR_NUMERIC_TYPE, 215, VALUES_nt_COUNT, VALUES_nt), - Property((int32_t) UCHAR_SCRIPT, 217, VALUES_sc_COUNT, VALUES_sc), - Property((int32_t) UCHAR_SENTENCE_BREAK, 219, VALUES_SB_COUNT, VALUES_SB), - Property((int32_t) UCHAR_TRAIL_CANONICAL_COMBINING_CLASS, 221, VALUES_tccc_COUNT, VALUES_tccc), - Property((int32_t) UCHAR_WORD_BREAK, 223, VALUES_WB_COUNT, VALUES_WB), - Property((int32_t) UCHAR_AGE, 227, 0, NULL), - Property((int32_t) UCHAR_BIDI_MIRRORING_GLYPH, 229, 0, NULL), - Property((int32_t) UCHAR_CASE_FOLDING, 231, 0, NULL), - Property((int32_t) UCHAR_ISO_COMMENT, 233, 0, NULL), - Property((int32_t) UCHAR_LOWERCASE_MAPPING, 235, 0, NULL), - Property((int32_t) UCHAR_NAME, 237, 0, NULL), - Property((int32_t) UCHAR_SIMPLE_CASE_FOLDING, 239, 0, NULL), - Property((int32_t) UCHAR_SIMPLE_LOWERCASE_MAPPING, 242, 0, NULL), - Property((int32_t) UCHAR_SIMPLE_TITLECASE_MAPPING, 244, 0, NULL), - Property((int32_t) UCHAR_SIMPLE_UPPERCASE_MAPPING, 246, 0, NULL), - Property((int32_t) UCHAR_TITLECASE_MAPPING, 248, 0, NULL), - Property((int32_t) UCHAR_UNICODE_1_NAME, 250, 0, NULL), - Property((int32_t) UCHAR_UPPERCASE_MAPPING, 252, 0, NULL), - Property((int32_t) UCHAR_NUMERIC_VALUE, 181, 0, NULL), - Property((int32_t) UCHAR_GENERAL_CATEGORY_MASK, 225, VALUES_gcm_COUNT, VALUES_gcm), + Property((int32_t) UCHAR_CHANGES_WHEN_NFKC_CASEFOLDED, 88, VALUES_binprop_COUNT, VALUES_binprop), + Property((int32_t) UCHAR_CHANGES_WHEN_TITLECASED, 90, VALUES_binprop_COUNT, VALUES_binprop), + Property((int32_t) UCHAR_CHANGES_WHEN_UPPERCASED, 92, VALUES_binprop_COUNT, VALUES_binprop), + Property((int32_t) UCHAR_DASH, 94, VALUES_binprop_COUNT, VALUES_binprop), + Property((int32_t) UCHAR_DEFAULT_IGNORABLE_CODE_POINT, 96, VALUES_binprop_COUNT, VALUES_binprop), + Property((int32_t) UCHAR_DEPRECATED, 98, VALUES_binprop_COUNT, VALUES_binprop), + Property((int32_t) UCHAR_DIACRITIC, 100, VALUES_binprop_COUNT, VALUES_binprop), + Property((int32_t) UCHAR_EXTENDER, 102, VALUES_binprop_COUNT, VALUES_binprop), + Property((int32_t) UCHAR_FULL_COMPOSITION_EXCLUSION, 104, VALUES_binprop_COUNT, VALUES_binprop), + Property((int32_t) UCHAR_GRAPHEME_BASE, 106, VALUES_binprop_COUNT, VALUES_binprop), + Property((int32_t) UCHAR_GRAPHEME_EXTEND, 108, VALUES_binprop_COUNT, VALUES_binprop), + Property((int32_t) UCHAR_GRAPHEME_LINK, 110, VALUES_binprop_COUNT, VALUES_binprop), + Property((int32_t) UCHAR_HEX_DIGIT, 112, VALUES_binprop_COUNT, VALUES_binprop), + Property((int32_t) UCHAR_HYPHEN, 114, VALUES_binprop_COUNT, VALUES_binprop), + Property((int32_t) UCHAR_IDEOGRAPHIC, 116, VALUES_binprop_COUNT, VALUES_binprop), + Property((int32_t) UCHAR_IDS_BINARY_OPERATOR, 118, VALUES_binprop_COUNT, VALUES_binprop), + Property((int32_t) UCHAR_IDS_TRINARY_OPERATOR, 120, VALUES_binprop_COUNT, VALUES_binprop), + Property((int32_t) UCHAR_ID_CONTINUE, 122, VALUES_binprop_COUNT, VALUES_binprop), + Property((int32_t) UCHAR_ID_START, 124, VALUES_binprop_COUNT, VALUES_binprop), + Property((int32_t) UCHAR_JOIN_CONTROL, 126, VALUES_binprop_COUNT, VALUES_binprop), + Property((int32_t) UCHAR_LOGICAL_ORDER_EXCEPTION, 128, VALUES_binprop_COUNT, VALUES_binprop), + Property((int32_t) UCHAR_LOWERCASE, 130, VALUES_binprop_COUNT, VALUES_binprop), + Property((int32_t) UCHAR_MATH, 132, VALUES_binprop_COUNT, VALUES_binprop), + Property((int32_t) UCHAR_NFC_INERT, 134, VALUES_binprop_COUNT, VALUES_binprop), + Property((int32_t) UCHAR_NFD_INERT, 136, VALUES_binprop_COUNT, VALUES_binprop), + Property((int32_t) UCHAR_NFKC_INERT, 138, VALUES_binprop_COUNT, VALUES_binprop), + Property((int32_t) UCHAR_NFKD_INERT, 140, VALUES_binprop_COUNT, VALUES_binprop), + Property((int32_t) UCHAR_NONCHARACTER_CODE_POINT, 142, VALUES_binprop_COUNT, VALUES_binprop), + Property((int32_t) UCHAR_PATTERN_SYNTAX, 144, VALUES_binprop_COUNT, VALUES_binprop), + Property((int32_t) UCHAR_PATTERN_WHITE_SPACE, 146, VALUES_binprop_COUNT, VALUES_binprop), + Property((int32_t) UCHAR_POSIX_ALNUM, 148, VALUES_binprop_COUNT, VALUES_binprop), + Property((int32_t) UCHAR_POSIX_BLANK, 150, VALUES_binprop_COUNT, VALUES_binprop), + Property((int32_t) UCHAR_POSIX_GRAPH, 152, VALUES_binprop_COUNT, VALUES_binprop), + Property((int32_t) UCHAR_POSIX_PRINT, 154, VALUES_binprop_COUNT, VALUES_binprop), + Property((int32_t) UCHAR_POSIX_XDIGIT, 156, VALUES_binprop_COUNT, VALUES_binprop), + Property((int32_t) UCHAR_QUOTATION_MARK, 158, VALUES_binprop_COUNT, VALUES_binprop), + Property((int32_t) UCHAR_RADICAL, 160, VALUES_binprop_COUNT, VALUES_binprop), + Property((int32_t) UCHAR_SEGMENT_STARTER, 162, VALUES_binprop_COUNT, VALUES_binprop), + Property((int32_t) UCHAR_SOFT_DOTTED, 164, VALUES_binprop_COUNT, VALUES_binprop), + Property((int32_t) UCHAR_S_TERM, 166, VALUES_binprop_COUNT, VALUES_binprop), + Property((int32_t) UCHAR_TERMINAL_PUNCTUATION, 168, VALUES_binprop_COUNT, VALUES_binprop), + Property((int32_t) UCHAR_UNIFIED_IDEOGRAPH, 170, VALUES_binprop_COUNT, VALUES_binprop), + Property((int32_t) UCHAR_UPPERCASE, 172, VALUES_binprop_COUNT, VALUES_binprop), + Property((int32_t) UCHAR_VARIATION_SELECTOR, 174, VALUES_binprop_COUNT, VALUES_binprop), + Property((int32_t) UCHAR_WHITE_SPACE, 176, VALUES_binprop_COUNT, VALUES_binprop), + Property((int32_t) UCHAR_XID_CONTINUE, 179, VALUES_binprop_COUNT, VALUES_binprop), + Property((int32_t) UCHAR_XID_START, 181, VALUES_binprop_COUNT, VALUES_binprop), + Property((int32_t) UCHAR_BIDI_CLASS, 185, VALUES_bc_COUNT, VALUES_bc), + Property((int32_t) UCHAR_BLOCK, 187, VALUES_blk_COUNT, VALUES_blk), + Property((int32_t) UCHAR_CANONICAL_COMBINING_CLASS, 189, VALUES_ccc_COUNT, VALUES_ccc), + Property((int32_t) UCHAR_DECOMPOSITION_TYPE, 191, VALUES_dt_COUNT, VALUES_dt), + Property((int32_t) UCHAR_EAST_ASIAN_WIDTH, 193, VALUES_ea_COUNT, VALUES_ea), + Property((int32_t) UCHAR_GENERAL_CATEGORY, 195, VALUES_gc_COUNT, VALUES_gc), + Property((int32_t) UCHAR_GRAPHEME_CLUSTER_BREAK, 197, VALUES_GCB_COUNT, VALUES_GCB), + Property((int32_t) UCHAR_HANGUL_SYLLABLE_TYPE, 199, VALUES_hst_COUNT, VALUES_hst), + Property((int32_t) UCHAR_JOINING_GROUP, 201, VALUES_jg_COUNT, VALUES_jg), + Property((int32_t) UCHAR_JOINING_TYPE, 203, VALUES_jt_COUNT, VALUES_jt), + Property((int32_t) UCHAR_LEAD_CANONICAL_COMBINING_CLASS, 205, VALUES_lccc_COUNT, VALUES_lccc), + Property((int32_t) UCHAR_LINE_BREAK, 207, VALUES_lb_COUNT, VALUES_lb), + Property((int32_t) UCHAR_NFC_QUICK_CHECK, 209, VALUES_NFC_QC_COUNT, VALUES_NFC_QC), + Property((int32_t) UCHAR_NFD_QUICK_CHECK, 211, VALUES_NFD_QC_COUNT, VALUES_NFD_QC), + Property((int32_t) UCHAR_NFKC_QUICK_CHECK, 213, VALUES_NFKC_QC_COUNT, VALUES_NFKC_QC), + Property((int32_t) UCHAR_NFKD_QUICK_CHECK, 215, VALUES_NFKD_QC_COUNT, VALUES_NFKD_QC), + Property((int32_t) UCHAR_NUMERIC_TYPE, 217, VALUES_nt_COUNT, VALUES_nt), + Property((int32_t) UCHAR_SCRIPT, 219, VALUES_sc_COUNT, VALUES_sc), + Property((int32_t) UCHAR_SENTENCE_BREAK, 221, VALUES_SB_COUNT, VALUES_SB), + Property((int32_t) UCHAR_TRAIL_CANONICAL_COMBINING_CLASS, 223, VALUES_tccc_COUNT, VALUES_tccc), + Property((int32_t) UCHAR_WORD_BREAK, 225, VALUES_WB_COUNT, VALUES_WB), + Property((int32_t) UCHAR_AGE, 229, 0, NULL), + Property((int32_t) UCHAR_BIDI_MIRRORING_GLYPH, 231, 0, NULL), + Property((int32_t) UCHAR_CASE_FOLDING, 233, 0, NULL), + Property((int32_t) UCHAR_ISO_COMMENT, 235, 0, NULL), + Property((int32_t) UCHAR_LOWERCASE_MAPPING, 237, 0, NULL), + Property((int32_t) UCHAR_NAME, 239, 0, NULL), + Property((int32_t) UCHAR_SIMPLE_CASE_FOLDING, 241, 0, NULL), + Property((int32_t) UCHAR_SIMPLE_LOWERCASE_MAPPING, 244, 0, NULL), + Property((int32_t) UCHAR_SIMPLE_TITLECASE_MAPPING, 246, 0, NULL), + Property((int32_t) UCHAR_SIMPLE_UPPERCASE_MAPPING, 248, 0, NULL), + Property((int32_t) UCHAR_TITLECASE_MAPPING, 250, 0, NULL), + Property((int32_t) UCHAR_UNICODE_1_NAME, 252, 0, NULL), + Property((int32_t) UCHAR_UPPERCASE_MAPPING, 254, 0, NULL), + Property((int32_t) UCHAR_NUMERIC_VALUE, 183, 0, NULL), + Property((int32_t) UCHAR_GENERAL_CATEGORY_MASK, 227, VALUES_gcm_COUNT, VALUES_gcm), }; /*eof*/ diff --git a/icu4c/source/tools/genpname/genpname.vcproj b/icu4c/source/tools/genpname/genpname.vcproj index c7439fde325..b733d6c47ec 100644 --- a/icu4c/source/tools/genpname/genpname.vcproj +++ b/icu4c/source/tools/genpname/genpname.vcproj @@ -389,29 +389,14 @@ - - - - - + - - - - - + diff --git a/icu4c/source/tools/toolutil/swapimpl.cpp b/icu4c/source/tools/toolutil/swapimpl.cpp index e835bb04088..011cae5eecb 100644 --- a/icu4c/source/tools/toolutil/swapimpl.cpp +++ b/icu4c/source/tools/toolutil/swapimpl.cpp @@ -1,7 +1,7 @@ /* ******************************************************************************* * -* Copyright (C) 2005-2009, International Business Machines +* Copyright (C) 2005-2010, International Business Machines * Corporation and others. All Rights Reserved. * ******************************************************************************* @@ -50,6 +50,7 @@ #include "ucol_swp.h" #include "ucnv_bld.h" #include "unormimp.h" +#include "normalizer2impl.h" #include "sprpimpl.h" #include "propname.h" #include "rbbidata.h" @@ -619,6 +620,7 @@ static const struct { #if !UCONFIG_NO_NORMALIZATION { { 0x4e, 0x6f, 0x72, 0x6d }, unorm_swap }, /* dataFormat="Norm" */ + { { 0x4e, 0x72, 0x6d, 0x32 }, unorm2_swap }, /* dataFormat="Nrm2" */ #endif #if !UCONFIG_NO_COLLATION { { 0x55, 0x43, 0x6f, 0x6c }, ucol_swap }, /* dataFormat="UCol" */ diff --git a/icu4c/source/tools/toolutil/toolutil.c b/icu4c/source/tools/toolutil/toolutil.cpp similarity index 90% rename from icu4c/source/tools/toolutil/toolutil.c rename to icu4c/source/tools/toolutil/toolutil.cpp index d91ce647b33..6952e85c71a 100644 --- a/icu4c/source/tools/toolutil/toolutil.c +++ b/icu4c/source/tools/toolutil/toolutil.cpp @@ -1,7 +1,7 @@ /* ******************************************************************************* * -* Copyright (C) 1999-2009, International Business Machines +* Copyright (C) 1999-2010, International Business Machines * Corporation and others. All Rights Reserved. * ******************************************************************************* @@ -21,11 +21,6 @@ #include #include #include "unicode/utypes.h" -#include "unicode/putil.h" -#include "cmemory.h" -#include "cstring.h" -#include "toolutil.h" -#include "unicode/ucal.h" #ifdef U_WINDOWS # define VC_EXTRALEAN @@ -42,6 +37,27 @@ #endif #include +#include "unicode/errorcode.h" +#include "unicode/putil.h" +#include "cmemory.h" +#include "cstring.h" +#include "toolutil.h" +#include "unicode/ucal.h" + +U_NAMESPACE_BEGIN + +IcuToolErrorCode::~IcuToolErrorCode() { + // Safe because our handleFailure() does not throw exceptions. + if(isFailure()) { handleFailure(); } +} + +void IcuToolErrorCode::handleFailure() const { + fprintf(stderr, "error at %s: %s\n", location, errorName()); + exit(errorCode); +} + +U_NAMESPACE_END + static int32_t currentYear = -1; U_CAPI int32_t U_EXPORT2 getCurrentYear() { @@ -235,6 +251,7 @@ utm_hasCapacity(UToolMemory *mem, int32_t capacity) { fprintf(stderr, "error: %s - out of memory\n", mem->name); exit(U_MEMORY_ALLOCATION_ERROR); } + mem->capacity=newCapacity; } return TRUE; @@ -242,9 +259,11 @@ utm_hasCapacity(UToolMemory *mem, int32_t capacity) { U_CAPI void * U_EXPORT2 utm_alloc(UToolMemory *mem) { - char *p=(char *)mem->array+mem->idx*mem->size; - int32_t newIndex=mem->idx+1; + char *p=NULL; + int32_t oldIndex=mem->idx; + int32_t newIndex=oldIndex+1; if(utm_hasCapacity(mem, newIndex)) { + p=(char *)mem->array+oldIndex*mem->size; mem->idx=newIndex; uprv_memset(p, 0, mem->size); } @@ -253,9 +272,11 @@ utm_alloc(UToolMemory *mem) { U_CAPI void * U_EXPORT2 utm_allocN(UToolMemory *mem, int32_t n) { - char *p=(char *)mem->array+mem->idx*mem->size; - int32_t newIndex=mem->idx+n; + char *p=NULL; + int32_t oldIndex=mem->idx; + int32_t newIndex=oldIndex+n; if(utm_hasCapacity(mem, newIndex)) { + p=(char *)mem->array+oldIndex*mem->size; mem->idx=newIndex; uprv_memset(p, 0, n*mem->size); } diff --git a/icu4c/source/tools/toolutil/toolutil.h b/icu4c/source/tools/toolutil/toolutil.h index 1817d2f67b4..d56dc92dff4 100644 --- a/icu4c/source/tools/toolutil/toolutil.h +++ b/icu4c/source/tools/toolutil/toolutil.h @@ -1,7 +1,7 @@ /* ******************************************************************************* * -* Copyright (C) 1999-2009, International Business Machines +* Copyright (C) 1999-2010, International Business Machines * Corporation and others. All Rights Reserved. * ******************************************************************************* @@ -21,6 +21,33 @@ #include "unicode/utypes.h" +#ifdef XP_CPLUSPLUS + +#include "unicode/errorcode.h" + +U_NAMESPACE_BEGIN + +/** + * ErrorCode subclass for use in ICU command-line tools. + * The destructor calls handleFailure() which calls exit(errorCode) when isFailure(). + */ +class U_TOOLUTIL_API IcuToolErrorCode : public ErrorCode { +public: + /** + * @param loc A short string describing where the IcuToolErrorCode is used. + */ + IcuToolErrorCode(const char *loc) : location(loc) {} + virtual ~IcuToolErrorCode(); +protected: + virtual void handleFailure() const; +private: + const char *location; +}; + +U_NAMESPACE_END + +#endif + /* * For Windows, a path/filename may be the short (8.3) version * of the "real", long one. In this case, the short one diff --git a/icu4c/source/tools/toolutil/toolutil.vcproj b/icu4c/source/tools/toolutil/toolutil.vcproj index 2be23ef8040..4922adf4872 100644 --- a/icu4c/source/tools/toolutil/toolutil.vcproj +++ b/icu4c/source/tools/toolutil/toolutil.vcproj @@ -407,261 +407,246 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/icu4c/source/tools/toolutil/unewdata.c b/icu4c/source/tools/toolutil/unewdata.c index b483a194fb7..9ea60d56683 100644 --- a/icu4c/source/tools/toolutil/unewdata.c +++ b/icu4c/source/tools/toolutil/unewdata.c @@ -1,7 +1,7 @@ /* ******************************************************************************* * -* Copyright (C) 1999,2008, International Business Machines +* Copyright (C) 1999-2010, International Business Machines * Corporation and others. All Rights Reserved. * ******************************************************************************* @@ -14,6 +14,7 @@ * created by: Markus W. Scherer */ +#include #include "unicode/utypes.h" #include "unicode/putil.h" #include "unicode/ustring.h" @@ -162,6 +163,33 @@ udata_finish(UNewDataMemory *pData, UErrorCode *pErrorCode) { return fileLength; } +/* dummy UDataInfo cf. udata.h */ +static const UDataInfo dummyDataInfo = { + sizeof(UDataInfo), + 0, + + U_IS_BIG_ENDIAN, + U_CHARSET_FAMILY, + U_SIZEOF_UCHAR, + 0, + + { 0, 0, 0, 0 }, /* dummy dataFormat */ + { 0, 0, 0, 0 }, /* dummy formatVersion */ + { 0, 0, 0, 0 } /* dummy dataVersion */ +}; + +U_CAPI void U_EXPORT2 +udata_createDummy(const char *dir, const char *type, const char *name, UErrorCode *pErrorCode) { + if(U_SUCCESS(*pErrorCode)) { + udata_finish(udata_create(dir, type, name, &dummyDataInfo, NULL, pErrorCode), pErrorCode); + if(U_FAILURE(*pErrorCode)) { + fprintf(stderr, "error %s writing dummy data file %s" U_FILE_SEP_STRING "%s.%s\n", + u_errorName(*pErrorCode), dir, name, type); + exit(*pErrorCode); + } + } +} + U_CAPI void U_EXPORT2 udata_write8(UNewDataMemory *pData, uint8_t byte) { if(pData!=NULL && pData->file!=NULL) { diff --git a/icu4c/source/tools/toolutil/unewdata.h b/icu4c/source/tools/toolutil/unewdata.h index fb190e6f89d..d25b8e9f749 100644 --- a/icu4c/source/tools/toolutil/unewdata.h +++ b/icu4c/source/tools/toolutil/unewdata.h @@ -1,7 +1,7 @@ /* ******************************************************************************* * -* Copyright (C) 1999-2000, International Business Machines +* Copyright (C) 1999-2010, International Business Machines * Corporation and others. All Rights Reserved. * ******************************************************************************* @@ -66,6 +66,10 @@ udata_create(const char *dir, const char *type, const char *name, U_CAPI uint32_t U_EXPORT2 udata_finish(UNewDataMemory *pData, UErrorCode *pErrorCode); +/** @memo Write a dummy data file. */ +U_CAPI void U_EXPORT2 +udata_createDummy(const char *dir, const char *type, const char *name, UErrorCode *pErrorCode); + /** @memo Write an 8-bit byte to the file. */ U_CAPI void U_EXPORT2 udata_write8(UNewDataMemory *pData, uint8_t byte); diff --git a/icu4c/source/tools/toolutil/uparse.c b/icu4c/source/tools/toolutil/uparse.c index e3cc79330e2..c988e3da4b5 100644 --- a/icu4c/source/tools/toolutil/uparse.c +++ b/icu4c/source/tools/toolutil/uparse.c @@ -1,7 +1,7 @@ /* ******************************************************************************* * -* Copyright (C) 2000-2009, International Business Machines +* Copyright (C) 2000-2010, International Business Machines * Corporation and others. All Rights Reserved. * ******************************************************************************* @@ -81,7 +81,7 @@ u_parseDelimitedFile(const char *filename, char delimiter, char *start, *limit; int32_t i, length; - if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { + if(U_FAILURE(*pErrorCode)) { return; } @@ -193,7 +193,7 @@ u_parseCodePoints(const char *s, uint32_t value; int32_t count; - if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { + if(U_FAILURE(*pErrorCode)) { return 0; } if(s==NULL || destCapacity<0 || (destCapacity>0 && dest==NULL)) { @@ -242,7 +242,7 @@ u_parseString(const char *s, uint32_t value; int32_t destLength; - if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { + if(U_FAILURE(*pErrorCode)) { return 0; } if(s==NULL || destCapacity<0 || (destCapacity>0 && dest==NULL)) { @@ -275,15 +275,16 @@ u_parseString(const char *s, } /* store the first code point */ - if(destLength==0 && pFirst!=NULL) { + if(pFirst!=NULL) { *pFirst=value; + pFirst=NULL; } /* append it to the destination array */ - if((destLength+UTF_CHAR_LENGTH(value))<=destCapacity) { - UTF_APPEND_CHAR_UNSAFE(dest, destLength, value); + if((destLength+U16_LENGTH(value))<=destCapacity) { + U16_APPEND_UNSAFE(dest, destLength, value); } else { - destLength+=UTF_CHAR_LENGTH(value); + destLength+=U16_LENGTH(value); } /* go to the following characters */ @@ -293,13 +294,14 @@ u_parseString(const char *s, /* read a range like start or start..end */ U_CAPI int32_t U_EXPORT2 -u_parseCodePointRange(const char *s, - uint32_t *pStart, uint32_t *pEnd, - UErrorCode *pErrorCode) { +u_parseCodePointRangeAnyTerminator(const char *s, + uint32_t *pStart, uint32_t *pEnd, + const char **terminator, + UErrorCode *pErrorCode) { char *end; uint32_t value; - if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { + if(U_FAILURE(*pErrorCode)) { return 0; } if(s==NULL || pStart==NULL || pEnd==NULL) { @@ -307,15 +309,10 @@ u_parseCodePointRange(const char *s, return 0; } - s=u_skipWhitespace(s); - if(*s==';' || *s==0) { - *pErrorCode=U_PARSE_ERROR; - return 0; - } - /* read the start code point */ + s=u_skipWhitespace(s); value=(uint32_t)uprv_strtoul(s, &end, 16); - if(end<=s || (!IS_INV_WHITESPACE(*end) && *end!='.' && *end!=';' && *end!=0) || value>=0x110000) { + if(end<=s || value>=0x110000) { *pErrorCode=U_PARSE_ERROR; return 0; } @@ -323,19 +320,15 @@ u_parseCodePointRange(const char *s, /* is there a "..end"? */ s=u_skipWhitespace(end); - if(*s==';' || *s==0) { + if(*s!='.' || s[1]!='.') { + *terminator=end; return 1; } - - if(*s!='.' || s[1]!='.') { - *pErrorCode=U_PARSE_ERROR; - return 0; - } - s+=2; + s=u_skipWhitespace(s+2); /* read the end code point */ value=(uint32_t)uprv_strtoul(s, &end, 16); - if(end<=s || (!IS_INV_WHITESPACE(*end) && *end!=';' && *end!=0) || value>=0x110000) { + if(end<=s || value>=0x110000) { *pErrorCode=U_PARSE_ERROR; return 0; } @@ -347,14 +340,25 @@ u_parseCodePointRange(const char *s, return 0; } - /* no garbage after that? */ - s=u_skipWhitespace(end); - if(*s==';' || *s==0) { - return value-*pStart+1; - } else { - *pErrorCode=U_PARSE_ERROR; - return 0; + *terminator=end; + return value-*pStart+1; +} + +U_CAPI int32_t U_EXPORT2 +u_parseCodePointRange(const char *s, + uint32_t *pStart, uint32_t *pEnd, + UErrorCode *pErrorCode) { + const char *terminator; + int32_t rangeLength= + u_parseCodePointRangeAnyTerminator(s, pStart, pEnd, &terminator, pErrorCode); + if(U_SUCCESS(*pErrorCode)) { + terminator=u_skipWhitespace(terminator); + if(*terminator!=';' && *terminator!=0) { + *pErrorCode=U_PARSE_ERROR; + return 0; + } } + return rangeLength; } U_CAPI int32_t U_EXPORT2 diff --git a/icu4c/source/tools/toolutil/uparse.h b/icu4c/source/tools/toolutil/uparse.h index fb9faac46f3..96bd1ff2404 100644 --- a/icu4c/source/tools/toolutil/uparse.h +++ b/icu4c/source/tools/toolutil/uparse.h @@ -1,7 +1,7 @@ /* ******************************************************************************* * -* Copyright (C) 2000-2009, International Business Machines +* Copyright (C) 2000-2010, International Business Machines * Corporation and others. All Rights Reserved. * ******************************************************************************* @@ -126,6 +126,16 @@ u_parseCodePointRange(const char *s, uint32_t *pStart, uint32_t *pEnd, UErrorCode *pErrorCode); +/** + * Same as u_parseCodePointRange() but the range may be terminated by + * any character. The position of the terminating character is returned via + * the *terminator output parameter. + */ +U_CAPI int32_t U_EXPORT2 +u_parseCodePointRangeAnyTerminator(const char *s, + uint32_t *pStart, uint32_t *pEnd, + const char **terminator, + UErrorCode *pErrorCode); U_CAPI int32_t U_EXPORT2 u_parseUTF8(const char *source, int32_t sLen, char *dest, int32_t destCapacity, UErrorCode *status); diff --git a/icu4c/source/tools/toolutil/writesrc.c b/icu4c/source/tools/toolutil/writesrc.c index 631c3ccf0fa..59d6e057b78 100644 --- a/icu4c/source/tools/toolutil/writesrc.c +++ b/icu4c/source/tools/toolutil/writesrc.c @@ -1,7 +1,7 @@ /* ******************************************************************************* * -* Copyright (C) 2005-2008, International Business Machines +* Copyright (C) 2005-2010, International Business Machines * Corporation and others. All Rights Reserved. * ******************************************************************************* @@ -24,8 +24,8 @@ #include "cstring.h" #include "writesrc.h" -U_CAPI FILE * U_EXPORT2 -usrc_create(const char *path, const char *filename) { +static FILE * +usrc_createWithHeader(const char *path, const char *filename, const char *header) { char buffer[1024]; const char *p; char *q; @@ -55,19 +55,7 @@ usrc_create(const char *path, const char *filename) { lt=localtime(&t); strftime(year, sizeof(year), "%Y", lt); strftime(buffer, sizeof(buffer), "%Y-%m-%d", lt); - fprintf( - f, - "/*\n" - " * Copyright (C) 1999-%s, International Business Machines\n" - " * Corporation and others. All Rights Reserved.\n" - " *\n" - " * file name: %s\n" - " *\n" - " * machine-generated on: %s\n" - " */\n\n", - year, - filename, - buffer); + fprintf(f, header, year, filename, buffer); } else { fprintf( stderr, @@ -77,6 +65,33 @@ usrc_create(const char *path, const char *filename) { return f; } +U_CAPI FILE * U_EXPORT2 +usrc_create(const char *path, const char *filename) { + const char *header= + "/*\n" + " * Copyright (C) 1999-%s, International Business Machines\n" + " * Corporation and others. All Rights Reserved.\n" + " *\n" + " * file name: %s\n" + " *\n" + " * machine-generated on: %s\n" + " */\n\n"; + return usrc_createWithHeader(path, filename, header); +} + +U_CAPI FILE * U_EXPORT2 +usrc_createTextData(const char *path, const char *filename) { + const char *header= + "# Copyright (C) 1999-%s, International Business Machines\n" + "# Corporation and others. All Rights Reserved.\n" + "#\n" + "# file name: %s\n" + "#\n" + "# machine-generated on: %s\n" + "#\n\n"; + return usrc_createWithHeader(path, filename, header); +} + U_CAPI void U_EXPORT2 usrc_writeArray(FILE *f, const char *prefix, diff --git a/icu4c/source/tools/toolutil/writesrc.h b/icu4c/source/tools/toolutil/writesrc.h index 3636dcae491..4519cf18006 100644 --- a/icu4c/source/tools/toolutil/writesrc.h +++ b/icu4c/source/tools/toolutil/writesrc.h @@ -1,7 +1,7 @@ /* ******************************************************************************* * -* Copyright (C) 2005-2008, International Business Machines +* Copyright (C) 2005-2010, International Business Machines * Corporation and others. All Rights Reserved. * ******************************************************************************* @@ -25,10 +25,18 @@ /** * Create a source text file and write a header comment with the ICU copyright. + * Writes a C/Java-style comment. */ U_CAPI FILE * U_EXPORT2 usrc_create(const char *path, const char *filename); +/** + * Create a source text file and write a header comment with the ICU copyright. + * Writes the comment with # lines, as used in scripts and text data. + */ +U_CAPI FILE * U_EXPORT2 +usrc_createTextData(const char *path, const char *filename); + /** * Write the contents of an array of 8/16/32-bit words. * The prefix and postfix are optional (can be NULL) and are written first/last.