mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-05 21:45:37 +00:00
ICU-9644 re-hardcode some normalization data: nfc.nrm
X-SVN-Rev: 36384
This commit is contained in:
parent
823e3a9372
commit
0f78abc7ee
23 changed files with 2035 additions and 662 deletions
|
@ -94,7 +94,7 @@ dictionarydata.o \
|
|||
appendable.o ustr_cnv.o unistr_cnv.o unistr.o unistr_case.o unistr_props.o \
|
||||
utf_impl.o ustring.o ustrcase.o ucasemap.o ucasemap_titlecase_brkiter.o cstring.o ustrfmt.o ustrtrns.o ustr_wcs.o utext.o \
|
||||
unistr_case_locale.o ustrcase_locale.o unistr_titlecase_brkiter.o ustr_titlecase_brkiter.o \
|
||||
normalizer2impl.o normalizer2.o filterednormalizer2.o normlzr.o unorm.o unormcmp.o \
|
||||
normalizer2impl.o normalizer2.o filterednormalizer2.o normlzr.o unorm.o unormcmp.o loadednormalizer2impl.o \
|
||||
chariter.o schriter.o uchriter.o uiter.o \
|
||||
patternprops.o uchar.o uprops.o ucase.o propname.o ubidi_props.o ubidi.o ubidiwrt.o ubidiln.o ushape.o \
|
||||
uscript.o uscript_props.o usc_impl.o unames.o \
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
/*
|
||||
*****************************************************************************
|
||||
* Copyright (C) 1996-2011, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
* Copyright (C) 1996-2014, International Business Machines Corporation and
|
||||
* others. All Rights Reserved.
|
||||
*****************************************************************************
|
||||
*/
|
||||
|
||||
|
@ -71,7 +71,7 @@ CanonicalIterator::CanonicalIterator(const UnicodeString &sourceStr, UErrorCode
|
|||
pieces_lengths(NULL),
|
||||
current(NULL),
|
||||
current_length(0),
|
||||
nfd(*Normalizer2Factory::getNFDInstance(status)),
|
||||
nfd(*Normalizer2::getNFDInstance(status)),
|
||||
nfcImpl(*Normalizer2Factory::getNFCImpl(status))
|
||||
{
|
||||
if(U_SUCCESS(status) && nfcImpl.ensureCanonIterData(status)) {
|
||||
|
|
|
@ -381,10 +381,9 @@
|
|||
<ClCompile Include="caniter.cpp">
|
||||
</ClCompile>
|
||||
<ClCompile Include="filterednormalizer2.cpp" />
|
||||
<ClCompile Include="normalizer2.cpp">
|
||||
</ClCompile>
|
||||
<ClCompile Include="normalizer2impl.cpp">
|
||||
</ClCompile>
|
||||
<ClCompile Include="loadednormalizer2impl.cpp" />
|
||||
<ClCompile Include="normalizer2.cpp" />
|
||||
<ClCompile Include="normalizer2impl.cpp" />
|
||||
<ClCompile Include="normlzr.cpp">
|
||||
</ClCompile>
|
||||
<ClCompile Include="unorm.cpp" />
|
||||
|
@ -1145,6 +1144,7 @@
|
|||
</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
|
||||
</CustomBuild>
|
||||
<ClInclude Include="norm2allmodes.h" />
|
||||
<CustomBuild Include="unicode\normalizer2.h">
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">copy "%(FullPath)" ..\..\include\unicode
|
||||
</Command>
|
||||
|
|
|
@ -349,6 +349,9 @@
|
|||
<ClCompile Include="filterednormalizer2.cpp">
|
||||
<Filter>normalization</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="loadednormalizer2impl.cpp">
|
||||
<Filter>normalization</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="normalizer2.cpp">
|
||||
<Filter>normalization</Filter>
|
||||
</ClCompile>
|
||||
|
@ -760,6 +763,9 @@
|
|||
<ClInclude Include="ureslocs.h">
|
||||
<Filter>locales & resources</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="norm2allmodes.h">
|
||||
<Filter>normalization</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="normalizer2impl.h">
|
||||
<Filter>normalization</Filter>
|
||||
</ClInclude>
|
||||
|
|
342
icu4c/source/common/loadednormalizer2impl.cpp
Normal file
342
icu4c/source/common/loadednormalizer2impl.cpp
Normal file
|
@ -0,0 +1,342 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2014, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*******************************************************************************
|
||||
* loadednormalizer2impl.cpp
|
||||
*
|
||||
* created on: 2014sep03
|
||||
* created by: Markus W. Scherer
|
||||
*/
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if !UCONFIG_NO_NORMALIZATION
|
||||
|
||||
#include "unicode/udata.h"
|
||||
#include "unicode/localpointer.h"
|
||||
#include "unicode/normalizer2.h"
|
||||
#include "unicode/unistr.h"
|
||||
#include "unicode/unorm.h"
|
||||
#include "cstring.h"
|
||||
#include "mutex.h"
|
||||
#include "norm2allmodes.h"
|
||||
#include "normalizer2impl.h"
|
||||
#include "uassert.h"
|
||||
#include "ucln_cmn.h"
|
||||
#include "uhash.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
class LoadedNormalizer2Impl : public Normalizer2Impl {
|
||||
public:
|
||||
LoadedNormalizer2Impl() : memory(NULL), ownedTrie(NULL) {}
|
||||
virtual ~LoadedNormalizer2Impl();
|
||||
|
||||
void load(const char *packageName, const char *name, UErrorCode &errorCode);
|
||||
|
||||
private:
|
||||
static UBool U_CALLCONV
|
||||
isAcceptable(void *context, const char *type, const char *name, const UDataInfo *pInfo);
|
||||
|
||||
UDataMemory *memory;
|
||||
UTrie2 *ownedTrie;
|
||||
};
|
||||
|
||||
LoadedNormalizer2Impl::~LoadedNormalizer2Impl() {
|
||||
udata_close(memory);
|
||||
utrie2_close(ownedTrie);
|
||||
}
|
||||
|
||||
UBool U_CALLCONV
|
||||
LoadedNormalizer2Impl::isAcceptable(void * /*context*/,
|
||||
const char * /* type */, const char * /*name*/,
|
||||
const UDataInfo *pInfo) {
|
||||
if(
|
||||
pInfo->size>=20 &&
|
||||
pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
|
||||
pInfo->charsetFamily==U_CHARSET_FAMILY &&
|
||||
pInfo->dataFormat[0]==0x4e && /* dataFormat="Nrm2" */
|
||||
pInfo->dataFormat[1]==0x72 &&
|
||||
pInfo->dataFormat[2]==0x6d &&
|
||||
pInfo->dataFormat[3]==0x32 &&
|
||||
pInfo->formatVersion[0]==2
|
||||
) {
|
||||
// Normalizer2Impl *me=(Normalizer2Impl *)context;
|
||||
// uprv_memcpy(me->dataVersion, pInfo->dataVersion, 4);
|
||||
return TRUE;
|
||||
} else {
|
||||
return FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
LoadedNormalizer2Impl::load(const char *packageName, const char *name, UErrorCode &errorCode) {
|
||||
if(U_FAILURE(errorCode)) {
|
||||
return;
|
||||
}
|
||||
memory=udata_openChoice(packageName, "nrm", name, isAcceptable, this, &errorCode);
|
||||
if(U_FAILURE(errorCode)) {
|
||||
return;
|
||||
}
|
||||
const uint8_t *inBytes=(const uint8_t *)udata_getMemory(memory);
|
||||
const int32_t *inIndexes=(const int32_t *)inBytes;
|
||||
int32_t indexesLength=inIndexes[IX_NORM_TRIE_OFFSET]/4;
|
||||
if(indexesLength<=IX_MIN_MAYBE_YES) {
|
||||
errorCode=U_INVALID_FORMAT_ERROR; // Not enough indexes.
|
||||
return;
|
||||
}
|
||||
|
||||
int32_t offset=inIndexes[IX_NORM_TRIE_OFFSET];
|
||||
int32_t nextOffset=inIndexes[IX_EXTRA_DATA_OFFSET];
|
||||
ownedTrie=utrie2_openFromSerialized(UTRIE2_16_VALUE_BITS,
|
||||
inBytes+offset, nextOffset-offset, NULL,
|
||||
&errorCode);
|
||||
if(U_FAILURE(errorCode)) {
|
||||
return;
|
||||
}
|
||||
|
||||
offset=nextOffset;
|
||||
nextOffset=inIndexes[IX_SMALL_FCD_OFFSET];
|
||||
const uint16_t *inExtraData=(const uint16_t *)(inBytes+offset);
|
||||
|
||||
// smallFCD: new in formatVersion 2
|
||||
offset=nextOffset;
|
||||
const uint8_t *inSmallFCD=inBytes+offset;
|
||||
|
||||
init(inIndexes, ownedTrie, inExtraData, inSmallFCD);
|
||||
}
|
||||
|
||||
// instance cache ---------------------------------------------------------- ***
|
||||
|
||||
Norm2AllModes *
|
||||
Norm2AllModes::createInstance(const char *packageName,
|
||||
const char *name,
|
||||
UErrorCode &errorCode) {
|
||||
if(U_FAILURE(errorCode)) {
|
||||
return NULL;
|
||||
}
|
||||
LoadedNormalizer2Impl *impl=new LoadedNormalizer2Impl;
|
||||
if(impl==NULL) {
|
||||
errorCode=U_MEMORY_ALLOCATION_ERROR;
|
||||
return NULL;
|
||||
}
|
||||
impl->load(packageName, name, errorCode);
|
||||
return createInstance(impl, errorCode);
|
||||
}
|
||||
|
||||
U_CDECL_BEGIN
|
||||
static UBool U_CALLCONV uprv_loaded_normalizer2_cleanup();
|
||||
U_CDECL_END
|
||||
|
||||
static Norm2AllModes *nfkcSingleton;
|
||||
static Norm2AllModes *nfkc_cfSingleton;
|
||||
static UHashtable *cache=NULL;
|
||||
|
||||
static icu::UInitOnce nfkcInitOnce = U_INITONCE_INITIALIZER;
|
||||
static icu::UInitOnce nfkc_cfInitOnce = U_INITONCE_INITIALIZER;
|
||||
|
||||
// UInitOnce singleton initialization function
|
||||
static void U_CALLCONV initSingletons(const char *what, UErrorCode &errorCode) {
|
||||
if (uprv_strcmp(what, "nfkc") == 0) {
|
||||
nfkcSingleton = Norm2AllModes::createInstance(NULL, "nfkc", errorCode);
|
||||
} else if (uprv_strcmp(what, "nfkc_cf") == 0) {
|
||||
nfkc_cfSingleton = Norm2AllModes::createInstance(NULL, "nfkc_cf", errorCode);
|
||||
} else {
|
||||
U_ASSERT(FALSE); // Unknown singleton
|
||||
}
|
||||
ucln_common_registerCleanup(UCLN_COMMON_NORMALIZER2, uprv_loaded_normalizer2_cleanup);
|
||||
}
|
||||
|
||||
U_CDECL_BEGIN
|
||||
|
||||
static void U_CALLCONV deleteNorm2AllModes(void *allModes) {
|
||||
delete (Norm2AllModes *)allModes;
|
||||
}
|
||||
|
||||
static UBool U_CALLCONV uprv_loaded_normalizer2_cleanup() {
|
||||
delete nfkcSingleton;
|
||||
nfkcSingleton = NULL;
|
||||
delete nfkc_cfSingleton;
|
||||
nfkc_cfSingleton = NULL;
|
||||
uhash_close(cache);
|
||||
cache=NULL;
|
||||
nfkcInitOnce.reset();
|
||||
nfkc_cfInitOnce.reset();
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
U_CDECL_END
|
||||
|
||||
const Norm2AllModes *
|
||||
Norm2AllModes::getNFKCInstance(UErrorCode &errorCode) {
|
||||
if(U_FAILURE(errorCode)) { return NULL; }
|
||||
umtx_initOnce(nfkcInitOnce, &initSingletons, "nfkc", errorCode);
|
||||
return nfkcSingleton;
|
||||
}
|
||||
|
||||
const Norm2AllModes *
|
||||
Norm2AllModes::getNFKC_CFInstance(UErrorCode &errorCode) {
|
||||
if(U_FAILURE(errorCode)) { return NULL; }
|
||||
umtx_initOnce(nfkc_cfInitOnce, &initSingletons, "nfkc_cf", errorCode);
|
||||
return nfkc_cfSingleton;
|
||||
}
|
||||
|
||||
const Normalizer2 *
|
||||
Normalizer2::getNFKCInstance(UErrorCode &errorCode) {
|
||||
const Norm2AllModes *allModes=Norm2AllModes::getNFKCInstance(errorCode);
|
||||
return allModes!=NULL ? &allModes->comp : NULL;
|
||||
}
|
||||
|
||||
const Normalizer2 *
|
||||
Normalizer2::getNFKDInstance(UErrorCode &errorCode) {
|
||||
const Norm2AllModes *allModes=Norm2AllModes::getNFKCInstance(errorCode);
|
||||
return allModes!=NULL ? &allModes->decomp : NULL;
|
||||
}
|
||||
|
||||
const Normalizer2 *
|
||||
Normalizer2::getNFKCCasefoldInstance(UErrorCode &errorCode) {
|
||||
const Norm2AllModes *allModes=Norm2AllModes::getNFKC_CFInstance(errorCode);
|
||||
return allModes!=NULL ? &allModes->comp : NULL;
|
||||
}
|
||||
|
||||
const Normalizer2 *
|
||||
Normalizer2::getInstance(const char *packageName,
|
||||
const char *name,
|
||||
UNormalization2Mode mode,
|
||||
UErrorCode &errorCode) {
|
||||
if(U_FAILURE(errorCode)) {
|
||||
return NULL;
|
||||
}
|
||||
if(name==NULL || *name==0) {
|
||||
errorCode=U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return NULL;
|
||||
}
|
||||
const Norm2AllModes *allModes=NULL;
|
||||
if(packageName==NULL) {
|
||||
if(0==uprv_strcmp(name, "nfc")) {
|
||||
allModes=Norm2AllModes::getNFCInstance(errorCode);
|
||||
} else if(0==uprv_strcmp(name, "nfkc")) {
|
||||
allModes=Norm2AllModes::getNFKCInstance(errorCode);
|
||||
} else if(0==uprv_strcmp(name, "nfkc_cf")) {
|
||||
allModes=Norm2AllModes::getNFKC_CFInstance(errorCode);
|
||||
}
|
||||
}
|
||||
if(allModes==NULL && U_SUCCESS(errorCode)) {
|
||||
{
|
||||
Mutex lock;
|
||||
if(cache!=NULL) {
|
||||
allModes=(Norm2AllModes *)uhash_get(cache, name);
|
||||
}
|
||||
}
|
||||
if(allModes==NULL) {
|
||||
LocalPointer<Norm2AllModes> localAllModes(
|
||||
Norm2AllModes::createInstance(packageName, name, errorCode));
|
||||
if(U_SUCCESS(errorCode)) {
|
||||
Mutex lock;
|
||||
if(cache==NULL) {
|
||||
cache=uhash_open(uhash_hashChars, uhash_compareChars, NULL, &errorCode);
|
||||
if(U_FAILURE(errorCode)) {
|
||||
return NULL;
|
||||
}
|
||||
uhash_setKeyDeleter(cache, uprv_free);
|
||||
uhash_setValueDeleter(cache, deleteNorm2AllModes);
|
||||
}
|
||||
void *temp=uhash_get(cache, name);
|
||||
if(temp==NULL) {
|
||||
int32_t keyLength=uprv_strlen(name)+1;
|
||||
char *nameCopy=(char *)uprv_malloc(keyLength);
|
||||
if(nameCopy==NULL) {
|
||||
errorCode=U_MEMORY_ALLOCATION_ERROR;
|
||||
return NULL;
|
||||
}
|
||||
uprv_memcpy(nameCopy, name, keyLength);
|
||||
allModes=localAllModes.getAlias();
|
||||
uhash_put(cache, nameCopy, localAllModes.orphan(), &errorCode);
|
||||
} else {
|
||||
// race condition
|
||||
allModes=(Norm2AllModes *)temp;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if(allModes!=NULL && U_SUCCESS(errorCode)) {
|
||||
switch(mode) {
|
||||
case UNORM2_COMPOSE:
|
||||
return &allModes->comp;
|
||||
case UNORM2_DECOMPOSE:
|
||||
return &allModes->decomp;
|
||||
case UNORM2_FCD:
|
||||
return &allModes->fcd;
|
||||
case UNORM2_COMPOSE_CONTIGUOUS:
|
||||
return &allModes->fcc;
|
||||
default:
|
||||
break; // do nothing
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
const Normalizer2 *
|
||||
Normalizer2Factory::getInstance(UNormalizationMode mode, UErrorCode &errorCode) {
|
||||
if(U_FAILURE(errorCode)) {
|
||||
return NULL;
|
||||
}
|
||||
switch(mode) {
|
||||
case UNORM_NFD:
|
||||
return Normalizer2::getNFDInstance(errorCode);
|
||||
case UNORM_NFKD:
|
||||
return Normalizer2::getNFKDInstance(errorCode);
|
||||
case UNORM_NFC:
|
||||
return Normalizer2::getNFCInstance(errorCode);
|
||||
case UNORM_NFKC:
|
||||
return Normalizer2::getNFKCInstance(errorCode);
|
||||
case UNORM_FCD:
|
||||
return getFCDInstance(errorCode);
|
||||
default: // UNORM_NONE
|
||||
return getNoopInstance(errorCode);
|
||||
}
|
||||
}
|
||||
|
||||
const Normalizer2Impl *
|
||||
Normalizer2Factory::getNFKCImpl(UErrorCode &errorCode) {
|
||||
const Norm2AllModes *allModes=Norm2AllModes::getNFKCInstance(errorCode);
|
||||
return allModes!=NULL ? allModes->impl : NULL;
|
||||
}
|
||||
|
||||
const Normalizer2Impl *
|
||||
Normalizer2Factory::getNFKC_CFImpl(UErrorCode &errorCode) {
|
||||
const Norm2AllModes *allModes=Norm2AllModes::getNFKC_CFInstance(errorCode);
|
||||
return allModes!=NULL ? allModes->impl : NULL;
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
// C API ------------------------------------------------------------------- ***
|
||||
|
||||
U_NAMESPACE_USE
|
||||
|
||||
U_CAPI const UNormalizer2 * U_EXPORT2
|
||||
unorm2_getNFKCInstance(UErrorCode *pErrorCode) {
|
||||
return (const UNormalizer2 *)Normalizer2::getNFKCInstance(*pErrorCode);
|
||||
}
|
||||
|
||||
U_CAPI const UNormalizer2 * U_EXPORT2
|
||||
unorm2_getNFKDInstance(UErrorCode *pErrorCode) {
|
||||
return (const UNormalizer2 *)Normalizer2::getNFKDInstance(*pErrorCode);
|
||||
}
|
||||
|
||||
U_CAPI const UNormalizer2 * U_EXPORT2
|
||||
unorm2_getNFKCCasefoldInstance(UErrorCode *pErrorCode) {
|
||||
return (const UNormalizer2 *)Normalizer2::getNFKCCasefoldInstance(*pErrorCode);
|
||||
}
|
||||
|
||||
U_CAPI const UNormalizer2 * U_EXPORT2
|
||||
unorm2_getInstance(const char *packageName,
|
||||
const char *name,
|
||||
UNormalization2Mode mode,
|
||||
UErrorCode *pErrorCode) {
|
||||
return (const UNormalizer2 *)Normalizer2::getInstance(packageName, name, mode, *pErrorCode);
|
||||
}
|
||||
|
||||
#endif // !UCONFIG_NO_NORMALIZATION
|
1121
icu4c/source/common/norm2_nfc_data.h
Normal file
1121
icu4c/source/common/norm2_nfc_data.h
Normal file
File diff suppressed because it is too large
Load diff
341
icu4c/source/common/norm2allmodes.h
Normal file
341
icu4c/source/common/norm2allmodes.h
Normal file
|
@ -0,0 +1,341 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2014, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*******************************************************************************
|
||||
* loadednormalizer2impl.h
|
||||
*
|
||||
* created on: 2014sep07
|
||||
* created by: Markus W. Scherer
|
||||
*/
|
||||
|
||||
#ifndef __NORM2ALLMODES_H__
|
||||
#define __NORM2ALLMODES_H__
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if !UCONFIG_NO_NORMALIZATION
|
||||
|
||||
#include "unicode/normalizer2.h"
|
||||
#include "unicode/unistr.h"
|
||||
#include "cpputils.h"
|
||||
#include "normalizer2impl.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
// Intermediate class:
|
||||
// Has Normalizer2Impl and does boilerplate argument checking and setup.
|
||||
class Normalizer2WithImpl : public Normalizer2 {
|
||||
public:
|
||||
Normalizer2WithImpl(const Normalizer2Impl &ni) : impl(ni) {}
|
||||
virtual ~Normalizer2WithImpl();
|
||||
|
||||
// normalize
|
||||
virtual UnicodeString &
|
||||
normalize(const UnicodeString &src,
|
||||
UnicodeString &dest,
|
||||
UErrorCode &errorCode) const {
|
||||
if(U_FAILURE(errorCode)) {
|
||||
dest.setToBogus();
|
||||
return dest;
|
||||
}
|
||||
const UChar *sArray=src.getBuffer();
|
||||
if(&dest==&src || sArray==NULL) {
|
||||
errorCode=U_ILLEGAL_ARGUMENT_ERROR;
|
||||
dest.setToBogus();
|
||||
return dest;
|
||||
}
|
||||
dest.remove();
|
||||
ReorderingBuffer buffer(impl, dest);
|
||||
if(buffer.init(src.length(), errorCode)) {
|
||||
normalize(sArray, sArray+src.length(), buffer, errorCode);
|
||||
}
|
||||
return dest;
|
||||
}
|
||||
virtual void
|
||||
normalize(const UChar *src, const UChar *limit,
|
||||
ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0;
|
||||
|
||||
// normalize and append
|
||||
virtual UnicodeString &
|
||||
normalizeSecondAndAppend(UnicodeString &first,
|
||||
const UnicodeString &second,
|
||||
UErrorCode &errorCode) const {
|
||||
return normalizeSecondAndAppend(first, second, TRUE, errorCode);
|
||||
}
|
||||
virtual UnicodeString &
|
||||
append(UnicodeString &first,
|
||||
const UnicodeString &second,
|
||||
UErrorCode &errorCode) const {
|
||||
return normalizeSecondAndAppend(first, second, FALSE, errorCode);
|
||||
}
|
||||
UnicodeString &
|
||||
normalizeSecondAndAppend(UnicodeString &first,
|
||||
const UnicodeString &second,
|
||||
UBool doNormalize,
|
||||
UErrorCode &errorCode) const {
|
||||
uprv_checkCanGetBuffer(first, errorCode);
|
||||
if(U_FAILURE(errorCode)) {
|
||||
return first;
|
||||
}
|
||||
const UChar *secondArray=second.getBuffer();
|
||||
if(&first==&second || secondArray==NULL) {
|
||||
errorCode=U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return first;
|
||||
}
|
||||
int32_t firstLength=first.length();
|
||||
UnicodeString safeMiddle;
|
||||
{
|
||||
ReorderingBuffer buffer(impl, first);
|
||||
if(buffer.init(firstLength+second.length(), errorCode)) {
|
||||
normalizeAndAppend(secondArray, secondArray+second.length(), doNormalize,
|
||||
safeMiddle, buffer, errorCode);
|
||||
}
|
||||
} // The ReorderingBuffer destructor finalizes the first string.
|
||||
if(U_FAILURE(errorCode)) {
|
||||
// Restore the modified suffix of the first string.
|
||||
first.replace(firstLength-safeMiddle.length(), 0x7fffffff, safeMiddle);
|
||||
}
|
||||
return first;
|
||||
}
|
||||
virtual void
|
||||
normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
|
||||
UnicodeString &safeMiddle,
|
||||
ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0;
|
||||
virtual UBool
|
||||
getDecomposition(UChar32 c, UnicodeString &decomposition) const {
|
||||
UChar buffer[4];
|
||||
int32_t length;
|
||||
const UChar *d=impl.getDecomposition(c, buffer, length);
|
||||
if(d==NULL) {
|
||||
return FALSE;
|
||||
}
|
||||
if(d==buffer) {
|
||||
decomposition.setTo(buffer, length); // copy the string (Jamos from Hangul syllable c)
|
||||
} else {
|
||||
decomposition.setTo(FALSE, d, length); // read-only alias
|
||||
}
|
||||
return TRUE;
|
||||
}
|
||||
virtual UBool
|
||||
getRawDecomposition(UChar32 c, UnicodeString &decomposition) const {
|
||||
UChar buffer[30];
|
||||
int32_t length;
|
||||
const UChar *d=impl.getRawDecomposition(c, buffer, length);
|
||||
if(d==NULL) {
|
||||
return FALSE;
|
||||
}
|
||||
if(d==buffer) {
|
||||
decomposition.setTo(buffer, length); // copy the string (algorithmic decomposition)
|
||||
} else {
|
||||
decomposition.setTo(FALSE, d, length); // read-only alias
|
||||
}
|
||||
return TRUE;
|
||||
}
|
||||
virtual UChar32
|
||||
composePair(UChar32 a, UChar32 b) const {
|
||||
return impl.composePair(a, b);
|
||||
}
|
||||
|
||||
virtual uint8_t
|
||||
getCombiningClass(UChar32 c) const {
|
||||
return impl.getCC(impl.getNorm16(c));
|
||||
}
|
||||
|
||||
// quick checks
|
||||
virtual UBool
|
||||
isNormalized(const UnicodeString &s, UErrorCode &errorCode) const {
|
||||
if(U_FAILURE(errorCode)) {
|
||||
return FALSE;
|
||||
}
|
||||
const UChar *sArray=s.getBuffer();
|
||||
if(sArray==NULL) {
|
||||
errorCode=U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return FALSE;
|
||||
}
|
||||
const UChar *sLimit=sArray+s.length();
|
||||
return sLimit==spanQuickCheckYes(sArray, sLimit, errorCode);
|
||||
}
|
||||
virtual UNormalizationCheckResult
|
||||
quickCheck(const UnicodeString &s, UErrorCode &errorCode) const {
|
||||
return Normalizer2WithImpl::isNormalized(s, errorCode) ? UNORM_YES : UNORM_NO;
|
||||
}
|
||||
virtual int32_t
|
||||
spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const {
|
||||
if(U_FAILURE(errorCode)) {
|
||||
return 0;
|
||||
}
|
||||
const UChar *sArray=s.getBuffer();
|
||||
if(sArray==NULL) {
|
||||
errorCode=U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return 0;
|
||||
}
|
||||
return (int32_t)(spanQuickCheckYes(sArray, sArray+s.length(), errorCode)-sArray);
|
||||
}
|
||||
virtual const UChar *
|
||||
spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const = 0;
|
||||
|
||||
virtual UNormalizationCheckResult getQuickCheck(UChar32) const {
|
||||
return UNORM_YES;
|
||||
}
|
||||
|
||||
const Normalizer2Impl &impl;
|
||||
};
|
||||
|
||||
class DecomposeNormalizer2 : public Normalizer2WithImpl {
|
||||
public:
|
||||
DecomposeNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {}
|
||||
virtual ~DecomposeNormalizer2();
|
||||
|
||||
private:
|
||||
virtual void
|
||||
normalize(const UChar *src, const UChar *limit,
|
||||
ReorderingBuffer &buffer, UErrorCode &errorCode) const {
|
||||
impl.decompose(src, limit, &buffer, errorCode);
|
||||
}
|
||||
using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function.
|
||||
virtual void
|
||||
normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
|
||||
UnicodeString &safeMiddle,
|
||||
ReorderingBuffer &buffer, UErrorCode &errorCode) const {
|
||||
impl.decomposeAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode);
|
||||
}
|
||||
virtual const UChar *
|
||||
spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const {
|
||||
return impl.decompose(src, limit, NULL, errorCode);
|
||||
}
|
||||
using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function.
|
||||
virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const {
|
||||
return impl.isDecompYes(impl.getNorm16(c)) ? UNORM_YES : UNORM_NO;
|
||||
}
|
||||
virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasDecompBoundary(c, TRUE); }
|
||||
virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasDecompBoundary(c, FALSE); }
|
||||
virtual UBool isInert(UChar32 c) const { return impl.isDecompInert(c); }
|
||||
};
|
||||
|
||||
class ComposeNormalizer2 : public Normalizer2WithImpl {
|
||||
public:
|
||||
ComposeNormalizer2(const Normalizer2Impl &ni, UBool fcc) :
|
||||
Normalizer2WithImpl(ni), onlyContiguous(fcc) {}
|
||||
virtual ~ComposeNormalizer2();
|
||||
|
||||
private:
|
||||
virtual void
|
||||
normalize(const UChar *src, const UChar *limit,
|
||||
ReorderingBuffer &buffer, UErrorCode &errorCode) const {
|
||||
impl.compose(src, limit, onlyContiguous, TRUE, buffer, errorCode);
|
||||
}
|
||||
using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function.
|
||||
virtual void
|
||||
normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
|
||||
UnicodeString &safeMiddle,
|
||||
ReorderingBuffer &buffer, UErrorCode &errorCode) const {
|
||||
impl.composeAndAppend(src, limit, doNormalize, onlyContiguous, safeMiddle, buffer, errorCode);
|
||||
}
|
||||
|
||||
virtual UBool
|
||||
isNormalized(const UnicodeString &s, UErrorCode &errorCode) const {
|
||||
if(U_FAILURE(errorCode)) {
|
||||
return FALSE;
|
||||
}
|
||||
const UChar *sArray=s.getBuffer();
|
||||
if(sArray==NULL) {
|
||||
errorCode=U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return FALSE;
|
||||
}
|
||||
UnicodeString temp;
|
||||
ReorderingBuffer buffer(impl, temp);
|
||||
if(!buffer.init(5, errorCode)) { // small destCapacity for substring normalization
|
||||
return FALSE;
|
||||
}
|
||||
return impl.compose(sArray, sArray+s.length(), onlyContiguous, FALSE, buffer, errorCode);
|
||||
}
|
||||
virtual UNormalizationCheckResult
|
||||
quickCheck(const UnicodeString &s, UErrorCode &errorCode) const {
|
||||
if(U_FAILURE(errorCode)) {
|
||||
return UNORM_MAYBE;
|
||||
}
|
||||
const UChar *sArray=s.getBuffer();
|
||||
if(sArray==NULL) {
|
||||
errorCode=U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return UNORM_MAYBE;
|
||||
}
|
||||
UNormalizationCheckResult qcResult=UNORM_YES;
|
||||
impl.composeQuickCheck(sArray, sArray+s.length(), onlyContiguous, &qcResult);
|
||||
return qcResult;
|
||||
}
|
||||
virtual const UChar *
|
||||
spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &) const {
|
||||
return impl.composeQuickCheck(src, limit, onlyContiguous, NULL);
|
||||
}
|
||||
using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function.
|
||||
virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const {
|
||||
return impl.getCompQuickCheck(impl.getNorm16(c));
|
||||
}
|
||||
virtual UBool hasBoundaryBefore(UChar32 c) const {
|
||||
return impl.hasCompBoundaryBefore(c);
|
||||
}
|
||||
virtual UBool hasBoundaryAfter(UChar32 c) const {
|
||||
return impl.hasCompBoundaryAfter(c, onlyContiguous, FALSE);
|
||||
}
|
||||
virtual UBool isInert(UChar32 c) const {
|
||||
return impl.hasCompBoundaryAfter(c, onlyContiguous, TRUE);
|
||||
}
|
||||
|
||||
const UBool onlyContiguous;
|
||||
};
|
||||
|
||||
class FCDNormalizer2 : public Normalizer2WithImpl {
|
||||
public:
|
||||
FCDNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {}
|
||||
virtual ~FCDNormalizer2();
|
||||
|
||||
private:
|
||||
virtual void
|
||||
normalize(const UChar *src, const UChar *limit,
|
||||
ReorderingBuffer &buffer, UErrorCode &errorCode) const {
|
||||
impl.makeFCD(src, limit, &buffer, errorCode);
|
||||
}
|
||||
using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function.
|
||||
virtual void
|
||||
normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
|
||||
UnicodeString &safeMiddle,
|
||||
ReorderingBuffer &buffer, UErrorCode &errorCode) const {
|
||||
impl.makeFCDAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode);
|
||||
}
|
||||
virtual const UChar *
|
||||
spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const {
|
||||
return impl.makeFCD(src, limit, NULL, errorCode);
|
||||
}
|
||||
using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function.
|
||||
virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasFCDBoundaryBefore(c); }
|
||||
virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasFCDBoundaryAfter(c); }
|
||||
virtual UBool isInert(UChar32 c) const { return impl.isFCDInert(c); }
|
||||
};
|
||||
|
||||
struct Norm2AllModes : public UMemory {
|
||||
Norm2AllModes(Normalizer2Impl *i)
|
||||
: impl(i), comp(*i, FALSE), decomp(*i), fcd(*i), fcc(*i, TRUE) {}
|
||||
~Norm2AllModes();
|
||||
|
||||
static Norm2AllModes *createInstance(Normalizer2Impl *impl, UErrorCode &errorCode);
|
||||
static Norm2AllModes *createNFCInstance(UErrorCode &errorCode);
|
||||
static Norm2AllModes *createInstance(const char *packageName,
|
||||
const char *name,
|
||||
UErrorCode &errorCode);
|
||||
|
||||
static const Norm2AllModes *getNFCInstance(UErrorCode &errorCode);
|
||||
static const Norm2AllModes *getNFKCInstance(UErrorCode &errorCode);
|
||||
static const Norm2AllModes *getNFKC_CFInstance(UErrorCode &errorCode);
|
||||
|
||||
Normalizer2Impl *impl;
|
||||
ComposeNormalizer2 comp;
|
||||
DecomposeNormalizer2 decomp;
|
||||
FCDNormalizer2 fcd;
|
||||
ComposeNormalizer2 fcc;
|
||||
};
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif // !UCONFIG_NO_NORMALIZATION
|
||||
#endif // __NORM2ALLMODES_H__
|
|
@ -1,7 +1,7 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2009-2013, International Business Machines
|
||||
* Copyright (C) 2009-2014, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
|
@ -18,17 +18,20 @@
|
|||
|
||||
#if !UCONFIG_NO_NORMALIZATION
|
||||
|
||||
#include "unicode/localpointer.h"
|
||||
#include "unicode/normalizer2.h"
|
||||
#include "unicode/unistr.h"
|
||||
#include "unicode/unorm.h"
|
||||
#include "cpputils.h"
|
||||
#include "cstring.h"
|
||||
#include "mutex.h"
|
||||
#include "norm2allmodes.h"
|
||||
#include "normalizer2impl.h"
|
||||
#include "uassert.h"
|
||||
#include "ucln_cmn.h"
|
||||
#include "uhash.h"
|
||||
|
||||
using icu::Normalizer2Impl;
|
||||
|
||||
// NFC/NFD data machine-generated by gennorm2 --csource
|
||||
#include "norm2_nfc_data.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
|
@ -118,470 +121,131 @@ class NoopNormalizer2 : public Normalizer2 {
|
|||
|
||||
NoopNormalizer2::~NoopNormalizer2() {}
|
||||
|
||||
// Intermediate class:
|
||||
// Has Normalizer2Impl and does boilerplate argument checking and setup.
|
||||
class Normalizer2WithImpl : public Normalizer2 {
|
||||
public:
|
||||
Normalizer2WithImpl(const Normalizer2Impl &ni) : impl(ni) {}
|
||||
virtual ~Normalizer2WithImpl();
|
||||
|
||||
// normalize
|
||||
virtual UnicodeString &
|
||||
normalize(const UnicodeString &src,
|
||||
UnicodeString &dest,
|
||||
UErrorCode &errorCode) const {
|
||||
if(U_FAILURE(errorCode)) {
|
||||
dest.setToBogus();
|
||||
return dest;
|
||||
}
|
||||
const UChar *sArray=src.getBuffer();
|
||||
if(&dest==&src || sArray==NULL) {
|
||||
errorCode=U_ILLEGAL_ARGUMENT_ERROR;
|
||||
dest.setToBogus();
|
||||
return dest;
|
||||
}
|
||||
dest.remove();
|
||||
ReorderingBuffer buffer(impl, dest);
|
||||
if(buffer.init(src.length(), errorCode)) {
|
||||
normalize(sArray, sArray+src.length(), buffer, errorCode);
|
||||
}
|
||||
return dest;
|
||||
}
|
||||
virtual void
|
||||
normalize(const UChar *src, const UChar *limit,
|
||||
ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0;
|
||||
|
||||
// normalize and append
|
||||
virtual UnicodeString &
|
||||
normalizeSecondAndAppend(UnicodeString &first,
|
||||
const UnicodeString &second,
|
||||
UErrorCode &errorCode) const {
|
||||
return normalizeSecondAndAppend(first, second, TRUE, errorCode);
|
||||
}
|
||||
virtual UnicodeString &
|
||||
append(UnicodeString &first,
|
||||
const UnicodeString &second,
|
||||
UErrorCode &errorCode) const {
|
||||
return normalizeSecondAndAppend(first, second, FALSE, errorCode);
|
||||
}
|
||||
UnicodeString &
|
||||
normalizeSecondAndAppend(UnicodeString &first,
|
||||
const UnicodeString &second,
|
||||
UBool doNormalize,
|
||||
UErrorCode &errorCode) const {
|
||||
uprv_checkCanGetBuffer(first, errorCode);
|
||||
if(U_FAILURE(errorCode)) {
|
||||
return first;
|
||||
}
|
||||
const UChar *secondArray=second.getBuffer();
|
||||
if(&first==&second || secondArray==NULL) {
|
||||
errorCode=U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return first;
|
||||
}
|
||||
int32_t firstLength=first.length();
|
||||
UnicodeString safeMiddle;
|
||||
{
|
||||
ReorderingBuffer buffer(impl, first);
|
||||
if(buffer.init(firstLength+second.length(), errorCode)) {
|
||||
normalizeAndAppend(secondArray, secondArray+second.length(), doNormalize,
|
||||
safeMiddle, buffer, errorCode);
|
||||
}
|
||||
} // The ReorderingBuffer destructor finalizes the first string.
|
||||
if(U_FAILURE(errorCode)) {
|
||||
// Restore the modified suffix of the first string.
|
||||
first.replace(firstLength-safeMiddle.length(), 0x7fffffff, safeMiddle);
|
||||
}
|
||||
return first;
|
||||
}
|
||||
virtual void
|
||||
normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
|
||||
UnicodeString &safeMiddle,
|
||||
ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0;
|
||||
virtual UBool
|
||||
getDecomposition(UChar32 c, UnicodeString &decomposition) const {
|
||||
UChar buffer[4];
|
||||
int32_t length;
|
||||
const UChar *d=impl.getDecomposition(c, buffer, length);
|
||||
if(d==NULL) {
|
||||
return FALSE;
|
||||
}
|
||||
if(d==buffer) {
|
||||
decomposition.setTo(buffer, length); // copy the string (Jamos from Hangul syllable c)
|
||||
} else {
|
||||
decomposition.setTo(FALSE, d, length); // read-only alias
|
||||
}
|
||||
return TRUE;
|
||||
}
|
||||
virtual UBool
|
||||
getRawDecomposition(UChar32 c, UnicodeString &decomposition) const {
|
||||
UChar buffer[30];
|
||||
int32_t length;
|
||||
const UChar *d=impl.getRawDecomposition(c, buffer, length);
|
||||
if(d==NULL) {
|
||||
return FALSE;
|
||||
}
|
||||
if(d==buffer) {
|
||||
decomposition.setTo(buffer, length); // copy the string (algorithmic decomposition)
|
||||
} else {
|
||||
decomposition.setTo(FALSE, d, length); // read-only alias
|
||||
}
|
||||
return TRUE;
|
||||
}
|
||||
virtual UChar32
|
||||
composePair(UChar32 a, UChar32 b) const {
|
||||
return impl.composePair(a, b);
|
||||
}
|
||||
|
||||
virtual uint8_t
|
||||
getCombiningClass(UChar32 c) const {
|
||||
return impl.getCC(impl.getNorm16(c));
|
||||
}
|
||||
|
||||
// quick checks
|
||||
virtual UBool
|
||||
isNormalized(const UnicodeString &s, UErrorCode &errorCode) const {
|
||||
if(U_FAILURE(errorCode)) {
|
||||
return FALSE;
|
||||
}
|
||||
const UChar *sArray=s.getBuffer();
|
||||
if(sArray==NULL) {
|
||||
errorCode=U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return FALSE;
|
||||
}
|
||||
const UChar *sLimit=sArray+s.length();
|
||||
return sLimit==spanQuickCheckYes(sArray, sLimit, errorCode);
|
||||
}
|
||||
virtual UNormalizationCheckResult
|
||||
quickCheck(const UnicodeString &s, UErrorCode &errorCode) const {
|
||||
return Normalizer2WithImpl::isNormalized(s, errorCode) ? UNORM_YES : UNORM_NO;
|
||||
}
|
||||
virtual int32_t
|
||||
spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const {
|
||||
if(U_FAILURE(errorCode)) {
|
||||
return 0;
|
||||
}
|
||||
const UChar *sArray=s.getBuffer();
|
||||
if(sArray==NULL) {
|
||||
errorCode=U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return 0;
|
||||
}
|
||||
return (int32_t)(spanQuickCheckYes(sArray, sArray+s.length(), errorCode)-sArray);
|
||||
}
|
||||
virtual const UChar *
|
||||
spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const = 0;
|
||||
|
||||
virtual UNormalizationCheckResult getQuickCheck(UChar32) const {
|
||||
return UNORM_YES;
|
||||
}
|
||||
|
||||
const Normalizer2Impl &impl;
|
||||
};
|
||||
|
||||
Normalizer2WithImpl::~Normalizer2WithImpl() {}
|
||||
|
||||
class DecomposeNormalizer2 : public Normalizer2WithImpl {
|
||||
public:
|
||||
DecomposeNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {}
|
||||
virtual ~DecomposeNormalizer2();
|
||||
|
||||
private:
|
||||
virtual void
|
||||
normalize(const UChar *src, const UChar *limit,
|
||||
ReorderingBuffer &buffer, UErrorCode &errorCode) const {
|
||||
impl.decompose(src, limit, &buffer, errorCode);
|
||||
}
|
||||
using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function.
|
||||
virtual void
|
||||
normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
|
||||
UnicodeString &safeMiddle,
|
||||
ReorderingBuffer &buffer, UErrorCode &errorCode) const {
|
||||
impl.decomposeAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode);
|
||||
}
|
||||
virtual const UChar *
|
||||
spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const {
|
||||
return impl.decompose(src, limit, NULL, errorCode);
|
||||
}
|
||||
using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function.
|
||||
virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const {
|
||||
return impl.isDecompYes(impl.getNorm16(c)) ? UNORM_YES : UNORM_NO;
|
||||
}
|
||||
virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasDecompBoundary(c, TRUE); }
|
||||
virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasDecompBoundary(c, FALSE); }
|
||||
virtual UBool isInert(UChar32 c) const { return impl.isDecompInert(c); }
|
||||
};
|
||||
|
||||
DecomposeNormalizer2::~DecomposeNormalizer2() {}
|
||||
|
||||
class ComposeNormalizer2 : public Normalizer2WithImpl {
|
||||
public:
|
||||
ComposeNormalizer2(const Normalizer2Impl &ni, UBool fcc) :
|
||||
Normalizer2WithImpl(ni), onlyContiguous(fcc) {}
|
||||
virtual ~ComposeNormalizer2();
|
||||
|
||||
private:
|
||||
virtual void
|
||||
normalize(const UChar *src, const UChar *limit,
|
||||
ReorderingBuffer &buffer, UErrorCode &errorCode) const {
|
||||
impl.compose(src, limit, onlyContiguous, TRUE, buffer, errorCode);
|
||||
}
|
||||
using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function.
|
||||
virtual void
|
||||
normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
|
||||
UnicodeString &safeMiddle,
|
||||
ReorderingBuffer &buffer, UErrorCode &errorCode) const {
|
||||
impl.composeAndAppend(src, limit, doNormalize, onlyContiguous, safeMiddle, buffer, errorCode);
|
||||
}
|
||||
|
||||
virtual UBool
|
||||
isNormalized(const UnicodeString &s, UErrorCode &errorCode) const {
|
||||
if(U_FAILURE(errorCode)) {
|
||||
return FALSE;
|
||||
}
|
||||
const UChar *sArray=s.getBuffer();
|
||||
if(sArray==NULL) {
|
||||
errorCode=U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return FALSE;
|
||||
}
|
||||
UnicodeString temp;
|
||||
ReorderingBuffer buffer(impl, temp);
|
||||
if(!buffer.init(5, errorCode)) { // small destCapacity for substring normalization
|
||||
return FALSE;
|
||||
}
|
||||
return impl.compose(sArray, sArray+s.length(), onlyContiguous, FALSE, buffer, errorCode);
|
||||
}
|
||||
virtual UNormalizationCheckResult
|
||||
quickCheck(const UnicodeString &s, UErrorCode &errorCode) const {
|
||||
if(U_FAILURE(errorCode)) {
|
||||
return UNORM_MAYBE;
|
||||
}
|
||||
const UChar *sArray=s.getBuffer();
|
||||
if(sArray==NULL) {
|
||||
errorCode=U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return UNORM_MAYBE;
|
||||
}
|
||||
UNormalizationCheckResult qcResult=UNORM_YES;
|
||||
impl.composeQuickCheck(sArray, sArray+s.length(), onlyContiguous, &qcResult);
|
||||
return qcResult;
|
||||
}
|
||||
virtual const UChar *
|
||||
spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &) const {
|
||||
return impl.composeQuickCheck(src, limit, onlyContiguous, NULL);
|
||||
}
|
||||
using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function.
|
||||
virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const {
|
||||
return impl.getCompQuickCheck(impl.getNorm16(c));
|
||||
}
|
||||
virtual UBool hasBoundaryBefore(UChar32 c) const {
|
||||
return impl.hasCompBoundaryBefore(c);
|
||||
}
|
||||
virtual UBool hasBoundaryAfter(UChar32 c) const {
|
||||
return impl.hasCompBoundaryAfter(c, onlyContiguous, FALSE);
|
||||
}
|
||||
virtual UBool isInert(UChar32 c) const {
|
||||
return impl.hasCompBoundaryAfter(c, onlyContiguous, TRUE);
|
||||
}
|
||||
|
||||
const UBool onlyContiguous;
|
||||
};
|
||||
|
||||
ComposeNormalizer2::~ComposeNormalizer2() {}
|
||||
|
||||
class FCDNormalizer2 : public Normalizer2WithImpl {
|
||||
public:
|
||||
FCDNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {}
|
||||
virtual ~FCDNormalizer2();
|
||||
|
||||
private:
|
||||
virtual void
|
||||
normalize(const UChar *src, const UChar *limit,
|
||||
ReorderingBuffer &buffer, UErrorCode &errorCode) const {
|
||||
impl.makeFCD(src, limit, &buffer, errorCode);
|
||||
}
|
||||
using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function.
|
||||
virtual void
|
||||
normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
|
||||
UnicodeString &safeMiddle,
|
||||
ReorderingBuffer &buffer, UErrorCode &errorCode) const {
|
||||
impl.makeFCDAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode);
|
||||
}
|
||||
virtual const UChar *
|
||||
spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const {
|
||||
return impl.makeFCD(src, limit, NULL, errorCode);
|
||||
}
|
||||
using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function.
|
||||
virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasFCDBoundaryBefore(c); }
|
||||
virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasFCDBoundaryAfter(c); }
|
||||
virtual UBool isInert(UChar32 c) const { return impl.isFCDInert(c); }
|
||||
};
|
||||
|
||||
FCDNormalizer2::~FCDNormalizer2() {}
|
||||
|
||||
// instance cache ---------------------------------------------------------- ***
|
||||
|
||||
struct Norm2AllModes : public UMemory {
|
||||
static Norm2AllModes *createInstance(const char *packageName,
|
||||
const char *name,
|
||||
UErrorCode &errorCode);
|
||||
Norm2AllModes() : comp(impl, FALSE), decomp(impl), fcd(impl), fcc(impl, TRUE) {}
|
||||
|
||||
Normalizer2Impl impl;
|
||||
ComposeNormalizer2 comp;
|
||||
DecomposeNormalizer2 decomp;
|
||||
FCDNormalizer2 fcd;
|
||||
ComposeNormalizer2 fcc;
|
||||
};
|
||||
Norm2AllModes::~Norm2AllModes() {
|
||||
delete impl;
|
||||
}
|
||||
|
||||
Norm2AllModes *
|
||||
Norm2AllModes::createInstance(const char *packageName,
|
||||
const char *name,
|
||||
UErrorCode &errorCode) {
|
||||
Norm2AllModes::createInstance(Normalizer2Impl *impl, UErrorCode &errorCode) {
|
||||
if(U_FAILURE(errorCode)) {
|
||||
delete impl;
|
||||
return NULL;
|
||||
}
|
||||
Norm2AllModes *allModes=new Norm2AllModes(impl);
|
||||
if(allModes==NULL) {
|
||||
errorCode=U_MEMORY_ALLOCATION_ERROR;
|
||||
delete impl;
|
||||
return NULL;
|
||||
}
|
||||
return allModes;
|
||||
}
|
||||
|
||||
Norm2AllModes *
|
||||
Norm2AllModes::createNFCInstance(UErrorCode &errorCode) {
|
||||
if(U_FAILURE(errorCode)) {
|
||||
return NULL;
|
||||
}
|
||||
LocalPointer<Norm2AllModes> allModes(new Norm2AllModes);
|
||||
if(allModes.isNull()) {
|
||||
Normalizer2Impl *impl=new Normalizer2Impl;
|
||||
if(impl==NULL) {
|
||||
errorCode=U_MEMORY_ALLOCATION_ERROR;
|
||||
return NULL;
|
||||
}
|
||||
allModes->impl.load(packageName, name, errorCode);
|
||||
return U_SUCCESS(errorCode) ? allModes.orphan() : NULL;
|
||||
impl->init(norm2_nfc_data_indexes, &norm2_nfc_data_trie,
|
||||
norm2_nfc_data_extraData, norm2_nfc_data_smallFCD);
|
||||
return createInstance(impl, errorCode);
|
||||
}
|
||||
|
||||
U_CDECL_BEGIN
|
||||
static UBool U_CALLCONV uprv_normalizer2_cleanup();
|
||||
U_CDECL_END
|
||||
|
||||
|
||||
static Norm2AllModes *nfcSingleton;
|
||||
static Norm2AllModes *nfkcSingleton;
|
||||
static Norm2AllModes *nfkc_cfSingleton;
|
||||
static Normalizer2 *noopSingleton;
|
||||
static UHashtable *cache=NULL;
|
||||
|
||||
static icu::UInitOnce nfcInitOnce = U_INITONCE_INITIALIZER;
|
||||
static icu::UInitOnce nfkcInitOnce = U_INITONCE_INITIALIZER;
|
||||
static icu::UInitOnce nfkc_cfInitOnce = U_INITONCE_INITIALIZER;
|
||||
static icu::UInitOnce noopInitOnce = U_INITONCE_INITIALIZER;
|
||||
|
||||
// UInitOnce singleton initialization function
|
||||
static void U_CALLCONV initSingletons(const char *what, UErrorCode &errorCode) {
|
||||
if (uprv_strcmp(what, "nfc") == 0) {
|
||||
nfcSingleton = Norm2AllModes::createInstance(NULL, "nfc", errorCode);
|
||||
} else if (uprv_strcmp(what, "nfkc") == 0) {
|
||||
nfkcSingleton = Norm2AllModes::createInstance(NULL, "nfkc", errorCode);
|
||||
} else if (uprv_strcmp(what, "nfkc_cf") == 0) {
|
||||
nfkc_cfSingleton = Norm2AllModes::createInstance(NULL, "nfkc_cf", errorCode);
|
||||
} else if (uprv_strcmp(what, "noop") == 0) {
|
||||
noopSingleton = new NoopNormalizer2;
|
||||
} else {
|
||||
U_ASSERT(FALSE); // Unknown singleton
|
||||
// UInitOnce singleton initialization functions
|
||||
static void U_CALLCONV initNFCSingleton(UErrorCode &errorCode) {
|
||||
nfcSingleton=Norm2AllModes::createNFCInstance(errorCode);
|
||||
ucln_common_registerCleanup(UCLN_COMMON_NORMALIZER2, uprv_normalizer2_cleanup);
|
||||
}
|
||||
|
||||
static void U_CALLCONV initNoopSingleton(UErrorCode &errorCode) {
|
||||
if(U_FAILURE(errorCode)) {
|
||||
return;
|
||||
}
|
||||
noopSingleton=new NoopNormalizer2;
|
||||
if(noopSingleton==NULL) {
|
||||
errorCode=U_MEMORY_ALLOCATION_ERROR;
|
||||
return;
|
||||
}
|
||||
ucln_common_registerCleanup(UCLN_COMMON_NORMALIZER2, uprv_normalizer2_cleanup);
|
||||
}
|
||||
|
||||
U_CDECL_BEGIN
|
||||
|
||||
static void U_CALLCONV deleteNorm2AllModes(void *allModes) {
|
||||
delete (Norm2AllModes *)allModes;
|
||||
}
|
||||
|
||||
static UBool U_CALLCONV uprv_normalizer2_cleanup() {
|
||||
delete nfcSingleton;
|
||||
nfcSingleton = NULL;
|
||||
delete nfkcSingleton;
|
||||
nfkcSingleton = NULL;
|
||||
delete nfkc_cfSingleton;
|
||||
nfkc_cfSingleton = NULL;
|
||||
delete noopSingleton;
|
||||
noopSingleton = NULL;
|
||||
uhash_close(cache);
|
||||
cache=NULL;
|
||||
nfcInitOnce.reset();
|
||||
nfkcInitOnce.reset();
|
||||
nfkc_cfInitOnce.reset();
|
||||
noopInitOnce.reset();
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
U_CDECL_END
|
||||
|
||||
const Normalizer2 *Normalizer2Factory::getNFCInstance(UErrorCode &errorCode) {
|
||||
umtx_initOnce(nfcInitOnce, &initSingletons, "nfc", errorCode);
|
||||
return nfcSingleton!=NULL ? &nfcSingleton->comp : NULL;
|
||||
}
|
||||
|
||||
const Normalizer2 *Normalizer2Factory::getNFDInstance(UErrorCode &errorCode) {
|
||||
umtx_initOnce(nfcInitOnce, &initSingletons, "nfc", errorCode);
|
||||
return nfcSingleton!=NULL ? &nfcSingleton->decomp : NULL;
|
||||
}
|
||||
|
||||
const Normalizer2 *Normalizer2Factory::getFCDInstance(UErrorCode &errorCode) {
|
||||
umtx_initOnce(nfcInitOnce, &initSingletons, "nfc", errorCode);
|
||||
return nfcSingleton!=NULL ? &nfcSingleton->fcd : NULL;
|
||||
}
|
||||
|
||||
const Normalizer2 *Normalizer2Factory::getFCCInstance(UErrorCode &errorCode) {
|
||||
umtx_initOnce(nfcInitOnce, &initSingletons, "nfc", errorCode);
|
||||
return nfcSingleton!=NULL ? &nfcSingleton->fcc : NULL;
|
||||
}
|
||||
|
||||
const Normalizer2 *Normalizer2Factory::getNFKCInstance(UErrorCode &errorCode) {
|
||||
umtx_initOnce(nfkcInitOnce, &initSingletons, "nfkc", errorCode);
|
||||
return nfkcSingleton!=NULL ? &nfkcSingleton->comp : NULL;
|
||||
}
|
||||
|
||||
const Normalizer2 *Normalizer2Factory::getNFKDInstance(UErrorCode &errorCode) {
|
||||
umtx_initOnce(nfkcInitOnce, &initSingletons, "nfkc", errorCode);
|
||||
return nfkcSingleton!=NULL ? &nfkcSingleton->decomp : NULL;
|
||||
}
|
||||
|
||||
const Normalizer2 *Normalizer2Factory::getNFKC_CFInstance(UErrorCode &errorCode) {
|
||||
umtx_initOnce(nfkc_cfInitOnce, &initSingletons, "nfkc_cf", errorCode);
|
||||
return nfkc_cfSingleton!=NULL ? &nfkc_cfSingleton->comp : NULL;
|
||||
}
|
||||
|
||||
const Normalizer2 *Normalizer2Factory::getNoopInstance(UErrorCode &errorCode) {
|
||||
umtx_initOnce(noopInitOnce, &initSingletons, "noop", errorCode);
|
||||
return noopSingleton;
|
||||
const Norm2AllModes *
|
||||
Norm2AllModes::getNFCInstance(UErrorCode &errorCode) {
|
||||
if(U_FAILURE(errorCode)) { return NULL; }
|
||||
umtx_initOnce(nfcInitOnce, &initNFCSingleton, errorCode);
|
||||
return nfcSingleton;
|
||||
}
|
||||
|
||||
const Normalizer2 *
|
||||
Normalizer2Factory::getInstance(UNormalizationMode mode, UErrorCode &errorCode) {
|
||||
if(U_FAILURE(errorCode)) {
|
||||
return NULL;
|
||||
}
|
||||
switch(mode) {
|
||||
case UNORM_NFD:
|
||||
return getNFDInstance(errorCode);
|
||||
case UNORM_NFKD:
|
||||
return getNFKDInstance(errorCode);
|
||||
case UNORM_NFC:
|
||||
return getNFCInstance(errorCode);
|
||||
case UNORM_NFKC:
|
||||
return getNFKCInstance(errorCode);
|
||||
case UNORM_FCD:
|
||||
return getFCDInstance(errorCode);
|
||||
default: // UNORM_NONE
|
||||
return getNoopInstance(errorCode);
|
||||
}
|
||||
Normalizer2::getNFCInstance(UErrorCode &errorCode) {
|
||||
const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode);
|
||||
return allModes!=NULL ? &allModes->comp : NULL;
|
||||
}
|
||||
|
||||
const Normalizer2 *
|
||||
Normalizer2::getNFDInstance(UErrorCode &errorCode) {
|
||||
const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode);
|
||||
return allModes!=NULL ? &allModes->decomp : NULL;
|
||||
}
|
||||
|
||||
const Normalizer2 *Normalizer2Factory::getFCDInstance(UErrorCode &errorCode) {
|
||||
const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode);
|
||||
return allModes!=NULL ? &allModes->fcd : NULL;
|
||||
}
|
||||
|
||||
const Normalizer2 *Normalizer2Factory::getFCCInstance(UErrorCode &errorCode) {
|
||||
const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode);
|
||||
return allModes!=NULL ? &allModes->fcc : NULL;
|
||||
}
|
||||
|
||||
const Normalizer2 *Normalizer2Factory::getNoopInstance(UErrorCode &errorCode) {
|
||||
if(U_FAILURE(errorCode)) { return NULL; }
|
||||
umtx_initOnce(noopInitOnce, &initNoopSingleton, errorCode);
|
||||
return noopSingleton;
|
||||
}
|
||||
|
||||
const Normalizer2Impl *
|
||||
Normalizer2Factory::getNFCImpl(UErrorCode &errorCode) {
|
||||
umtx_initOnce(nfcInitOnce, &initSingletons, "nfc", errorCode);
|
||||
return nfcSingleton!=NULL ? &nfcSingleton->impl : NULL;
|
||||
}
|
||||
|
||||
const Normalizer2Impl *
|
||||
Normalizer2Factory::getNFKCImpl(UErrorCode &errorCode) {
|
||||
umtx_initOnce(nfkcInitOnce, &initSingletons, "nfkc", errorCode);
|
||||
return nfkcSingleton!=NULL ? &nfkcSingleton->impl : NULL;
|
||||
}
|
||||
|
||||
const Normalizer2Impl *
|
||||
Normalizer2Factory::getNFKC_CFImpl(UErrorCode &errorCode) {
|
||||
umtx_initOnce(nfkc_cfInitOnce, &initSingletons, "nfkc_cf", errorCode);
|
||||
return nfkc_cfSingleton!=NULL ? &nfkc_cfSingleton->impl : NULL;
|
||||
const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode);
|
||||
return allModes!=NULL ? allModes->impl : NULL;
|
||||
}
|
||||
|
||||
const Normalizer2Impl *
|
||||
|
@ -589,110 +253,6 @@ Normalizer2Factory::getImpl(const Normalizer2 *norm2) {
|
|||
return &((Normalizer2WithImpl *)norm2)->impl;
|
||||
}
|
||||
|
||||
const Normalizer2 *
|
||||
Normalizer2::getNFCInstance(UErrorCode &errorCode) {
|
||||
return Normalizer2Factory::getNFCInstance(errorCode);
|
||||
}
|
||||
|
||||
const Normalizer2 *
|
||||
Normalizer2::getNFDInstance(UErrorCode &errorCode) {
|
||||
return Normalizer2Factory::getNFDInstance(errorCode);
|
||||
}
|
||||
|
||||
const Normalizer2 *
|
||||
Normalizer2::getNFKCInstance(UErrorCode &errorCode) {
|
||||
return Normalizer2Factory::getNFKCInstance(errorCode);
|
||||
}
|
||||
|
||||
const Normalizer2 *
|
||||
Normalizer2::getNFKDInstance(UErrorCode &errorCode) {
|
||||
return Normalizer2Factory::getNFKDInstance(errorCode);
|
||||
}
|
||||
|
||||
const Normalizer2 *
|
||||
Normalizer2::getNFKCCasefoldInstance(UErrorCode &errorCode) {
|
||||
return Normalizer2Factory::getNFKC_CFInstance(errorCode);
|
||||
}
|
||||
|
||||
const Normalizer2 *
|
||||
Normalizer2::getInstance(const char *packageName,
|
||||
const char *name,
|
||||
UNormalization2Mode mode,
|
||||
UErrorCode &errorCode) {
|
||||
if(U_FAILURE(errorCode)) {
|
||||
return NULL;
|
||||
}
|
||||
if(name==NULL || *name==0) {
|
||||
errorCode=U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return NULL;
|
||||
}
|
||||
Norm2AllModes *allModes=NULL;
|
||||
if(packageName==NULL) {
|
||||
if(0==uprv_strcmp(name, "nfc")) {
|
||||
umtx_initOnce(nfcInitOnce, &initSingletons, "nfc", errorCode);
|
||||
allModes=nfcSingleton;
|
||||
} else if(0==uprv_strcmp(name, "nfkc")) {
|
||||
umtx_initOnce(nfkcInitOnce, &initSingletons, "nfkc", errorCode);
|
||||
allModes=nfkcSingleton;
|
||||
} else if(0==uprv_strcmp(name, "nfkc_cf")) {
|
||||
umtx_initOnce(nfkc_cfInitOnce, &initSingletons, "nfkc_cf", errorCode);
|
||||
allModes=nfkc_cfSingleton;
|
||||
}
|
||||
}
|
||||
if(allModes==NULL && U_SUCCESS(errorCode)) {
|
||||
{
|
||||
Mutex lock;
|
||||
if(cache!=NULL) {
|
||||
allModes=(Norm2AllModes *)uhash_get(cache, name);
|
||||
}
|
||||
}
|
||||
if(allModes==NULL) {
|
||||
LocalPointer<Norm2AllModes> localAllModes(
|
||||
Norm2AllModes::createInstance(packageName, name, errorCode));
|
||||
if(U_SUCCESS(errorCode)) {
|
||||
Mutex lock;
|
||||
if(cache==NULL) {
|
||||
cache=uhash_open(uhash_hashChars, uhash_compareChars, NULL, &errorCode);
|
||||
if(U_FAILURE(errorCode)) {
|
||||
return NULL;
|
||||
}
|
||||
uhash_setKeyDeleter(cache, uprv_free);
|
||||
uhash_setValueDeleter(cache, deleteNorm2AllModes);
|
||||
}
|
||||
void *temp=uhash_get(cache, name);
|
||||
if(temp==NULL) {
|
||||
int32_t keyLength=uprv_strlen(name)+1;
|
||||
char *nameCopy=(char *)uprv_malloc(keyLength);
|
||||
if(nameCopy==NULL) {
|
||||
errorCode=U_MEMORY_ALLOCATION_ERROR;
|
||||
return NULL;
|
||||
}
|
||||
uprv_memcpy(nameCopy, name, keyLength);
|
||||
uhash_put(cache, nameCopy, allModes=localAllModes.orphan(), &errorCode);
|
||||
} else {
|
||||
// race condition
|
||||
allModes=(Norm2AllModes *)temp;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if(allModes!=NULL && U_SUCCESS(errorCode)) {
|
||||
switch(mode) {
|
||||
case UNORM2_COMPOSE:
|
||||
return &allModes->comp;
|
||||
case UNORM2_DECOMPOSE:
|
||||
return &allModes->decomp;
|
||||
case UNORM2_FCD:
|
||||
return &allModes->fcd;
|
||||
case UNORM2_COMPOSE_CONTIGUOUS:
|
||||
return &allModes->fcc;
|
||||
default:
|
||||
break; // do nothing
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
// C API ------------------------------------------------------------------- ***
|
||||
|
@ -709,29 +269,6 @@ unorm2_getNFDInstance(UErrorCode *pErrorCode) {
|
|||
return (const UNormalizer2 *)Normalizer2::getNFDInstance(*pErrorCode);
|
||||
}
|
||||
|
||||
U_CAPI const UNormalizer2 * U_EXPORT2
|
||||
unorm2_getNFKCInstance(UErrorCode *pErrorCode) {
|
||||
return (const UNormalizer2 *)Normalizer2::getNFKCInstance(*pErrorCode);
|
||||
}
|
||||
|
||||
U_CAPI const UNormalizer2 * U_EXPORT2
|
||||
unorm2_getNFKDInstance(UErrorCode *pErrorCode) {
|
||||
return (const UNormalizer2 *)Normalizer2::getNFKDInstance(*pErrorCode);
|
||||
}
|
||||
|
||||
U_CAPI const UNormalizer2 * U_EXPORT2
|
||||
unorm2_getNFKCCasefoldInstance(UErrorCode *pErrorCode) {
|
||||
return (const UNormalizer2 *)Normalizer2::getNFKCCasefoldInstance(*pErrorCode);
|
||||
}
|
||||
|
||||
U_CAPI const UNormalizer2 * U_EXPORT2
|
||||
unorm2_getInstance(const char *packageName,
|
||||
const char *name,
|
||||
UNormalization2Mode mode,
|
||||
UErrorCode *pErrorCode) {
|
||||
return (const UNormalizer2 *)Normalizer2::getInstance(packageName, name, mode, *pErrorCode);
|
||||
}
|
||||
|
||||
U_CAPI void U_EXPORT2
|
||||
unorm2_close(UNormalizer2 *norm2) {
|
||||
delete (Normalizer2 *)norm2;
|
||||
|
@ -962,7 +499,7 @@ unorm2_isInert(const UNormalizer2 *norm2, UChar32 c) {
|
|||
U_CAPI uint8_t U_EXPORT2
|
||||
u_getCombiningClass(UChar32 c) {
|
||||
UErrorCode errorCode=U_ZERO_ERROR;
|
||||
const Normalizer2 *nfd=Normalizer2Factory::getNFDInstance(errorCode);
|
||||
const Normalizer2 *nfd=Normalizer2::getNFDInstance(errorCode);
|
||||
if(U_SUCCESS(errorCode)) {
|
||||
return nfd->getCombiningClass(c);
|
||||
} else {
|
||||
|
|
|
@ -253,50 +253,12 @@ struct CanonIterData : public UMemory {
|
|||
};
|
||||
|
||||
Normalizer2Impl::~Normalizer2Impl() {
|
||||
udata_close(memory);
|
||||
utrie2_close(normTrie);
|
||||
delete fCanonIterData;
|
||||
}
|
||||
|
||||
UBool U_CALLCONV
|
||||
Normalizer2Impl::isAcceptable(void *context,
|
||||
const char * /* type */, const char * /*name*/,
|
||||
const UDataInfo *pInfo) {
|
||||
if(
|
||||
pInfo->size>=20 &&
|
||||
pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
|
||||
pInfo->charsetFamily==U_CHARSET_FAMILY &&
|
||||
pInfo->dataFormat[0]==0x4e && /* dataFormat="Nrm2" */
|
||||
pInfo->dataFormat[1]==0x72 &&
|
||||
pInfo->dataFormat[2]==0x6d &&
|
||||
pInfo->dataFormat[3]==0x32 &&
|
||||
pInfo->formatVersion[0]==2
|
||||
) {
|
||||
Normalizer2Impl *me=(Normalizer2Impl *)context;
|
||||
uprv_memcpy(me->dataVersion, pInfo->dataVersion, 4);
|
||||
return TRUE;
|
||||
} else {
|
||||
return FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
Normalizer2Impl::load(const char *packageName, const char *name, UErrorCode &errorCode) {
|
||||
if(U_FAILURE(errorCode)) {
|
||||
return;
|
||||
}
|
||||
memory=udata_openChoice(packageName, "nrm", name, isAcceptable, this, &errorCode);
|
||||
if(U_FAILURE(errorCode)) {
|
||||
return;
|
||||
}
|
||||
const uint8_t *inBytes=(const uint8_t *)udata_getMemory(memory);
|
||||
const int32_t *inIndexes=(const int32_t *)inBytes;
|
||||
int32_t indexesLength=inIndexes[IX_NORM_TRIE_OFFSET]/4;
|
||||
if(indexesLength<=IX_MIN_MAYBE_YES) {
|
||||
errorCode=U_INVALID_FORMAT_ERROR; // Not enough indexes.
|
||||
return;
|
||||
}
|
||||
|
||||
Normalizer2Impl::init(const int32_t *inIndexes, const UTrie2 *inTrie,
|
||||
const uint16_t *inExtraData, const uint8_t *inSmallFCD) {
|
||||
minDecompNoCP=inIndexes[IX_MIN_DECOMP_NO_CP];
|
||||
minCompNoMaybeCP=inIndexes[IX_MIN_COMP_NO_MAYBE_CP];
|
||||
|
||||
|
@ -306,23 +268,12 @@ Normalizer2Impl::load(const char *packageName, const char *name, UErrorCode &err
|
|||
limitNoNo=inIndexes[IX_LIMIT_NO_NO];
|
||||
minMaybeYes=inIndexes[IX_MIN_MAYBE_YES];
|
||||
|
||||
int32_t offset=inIndexes[IX_NORM_TRIE_OFFSET];
|
||||
int32_t nextOffset=inIndexes[IX_EXTRA_DATA_OFFSET];
|
||||
normTrie=utrie2_openFromSerialized(UTRIE2_16_VALUE_BITS,
|
||||
inBytes+offset, nextOffset-offset, NULL,
|
||||
&errorCode);
|
||||
if(U_FAILURE(errorCode)) {
|
||||
return;
|
||||
}
|
||||
normTrie=inTrie;
|
||||
|
||||
offset=nextOffset;
|
||||
nextOffset=inIndexes[IX_SMALL_FCD_OFFSET];
|
||||
maybeYesCompositions=(const uint16_t *)(inBytes+offset);
|
||||
maybeYesCompositions=inExtraData;
|
||||
extraData=maybeYesCompositions+(MIN_NORMAL_MAYBE_YES-minMaybeYes);
|
||||
|
||||
// smallFCD: new in formatVersion 2
|
||||
offset=nextOffset;
|
||||
smallFCD=inBytes+offset;
|
||||
smallFCD=inSmallFCD;
|
||||
|
||||
// Build tccc180[].
|
||||
// gennorm2 enforces lccc=0 for c<MIN_CCC_LCCC_CP=U+0300.
|
||||
|
|
|
@ -22,7 +22,6 @@
|
|||
#if !UCONFIG_NO_NORMALIZATION
|
||||
|
||||
#include "unicode/normalizer2.h"
|
||||
#include "unicode/udata.h"
|
||||
#include "unicode/unistr.h"
|
||||
#include "unicode/unorm.h"
|
||||
#include "unicode/utf16.h"
|
||||
|
@ -217,14 +216,15 @@ private:
|
|||
UChar *codePointStart, *codePointLimit;
|
||||
};
|
||||
|
||||
class U_COMMON_API Normalizer2Impl : public UMemory {
|
||||
class U_COMMON_API Normalizer2Impl : public UObject {
|
||||
public:
|
||||
Normalizer2Impl() : memory(NULL), normTrie(NULL), fCanonIterData(NULL) {
|
||||
Normalizer2Impl() : normTrie(NULL), fCanonIterData(NULL) {
|
||||
fCanonIterDataInitOnce.reset();
|
||||
}
|
||||
~Normalizer2Impl();
|
||||
virtual ~Normalizer2Impl();
|
||||
|
||||
void load(const char *packageName, const char *name, UErrorCode &errorCode);
|
||||
void init(const int32_t *inIndexes, const UTrie2 *inTrie,
|
||||
const uint16_t *inExtraData, const uint8_t *inSmallFCD);
|
||||
|
||||
void addLcccChars(UnicodeSet &set) const;
|
||||
void addPropertyStarts(const USetAdder *sa, UErrorCode &errorCode) const;
|
||||
|
@ -478,9 +478,6 @@ public:
|
|||
}
|
||||
UBool isFCDInert(UChar32 c) const { return getFCD16(c)<=1; }
|
||||
private:
|
||||
static UBool U_CALLCONV
|
||||
isAcceptable(void *context, const char *type, const char *name, const UDataInfo *pInfo);
|
||||
|
||||
UBool isMaybe(uint16_t norm16) const { return minMaybeYes<=norm16 && norm16<=JAMO_VT; }
|
||||
UBool isMaybeOrNonZeroCC(uint16_t norm16) const { return norm16>=minMaybeYes; }
|
||||
static UBool isInert(uint16_t norm16) { return norm16==0; }
|
||||
|
@ -584,8 +581,7 @@ private:
|
|||
int32_t getCanonValue(UChar32 c) const;
|
||||
const UnicodeSet &getCanonStartSet(int32_t n) const;
|
||||
|
||||
UDataMemory *memory;
|
||||
UVersionInfo dataVersion;
|
||||
// UVersionInfo dataVersion;
|
||||
|
||||
// Code point thresholds for quick check codes.
|
||||
UChar32 minDecompNoCP;
|
||||
|
@ -598,13 +594,13 @@ private:
|
|||
uint16_t limitNoNo;
|
||||
uint16_t minMaybeYes;
|
||||
|
||||
UTrie2 *normTrie;
|
||||
const UTrie2 *normTrie;
|
||||
const uint16_t *maybeYesCompositions;
|
||||
const uint16_t *extraData; // mappings and/or compositions for yesYes, yesNo & noNo characters
|
||||
const uint8_t *smallFCD; // [0x100] one bit per 32 BMP code points, set if any FCD!=0
|
||||
uint8_t tccc180[0x180]; // tccc values for U+0000..U+017F
|
||||
|
||||
public: // CanonIterData is public to allow access from C callback functions.
|
||||
public: // CanonIterData is public to allow access from C callback functions.
|
||||
UInitOnce fCanonIterDataInitOnce;
|
||||
CanonIterData *fCanonIterData;
|
||||
};
|
||||
|
@ -620,13 +616,8 @@ private:
|
|||
*/
|
||||
class U_COMMON_API Normalizer2Factory {
|
||||
public:
|
||||
static const Normalizer2 *getNFCInstance(UErrorCode &errorCode);
|
||||
static const Normalizer2 *getNFDInstance(UErrorCode &errorCode);
|
||||
static const Normalizer2 *getFCDInstance(UErrorCode &errorCode);
|
||||
static const Normalizer2 *getFCCInstance(UErrorCode &errorCode);
|
||||
static const Normalizer2 *getNFKCInstance(UErrorCode &errorCode);
|
||||
static const Normalizer2 *getNFKDInstance(UErrorCode &errorCode);
|
||||
static const Normalizer2 *getNFKC_CFInstance(UErrorCode &errorCode);
|
||||
static const Normalizer2 *getNoopInstance(UErrorCode &errorCode);
|
||||
|
||||
static const Normalizer2 *getInstance(UNormalizationMode mode, UErrorCode &errorCode);
|
||||
|
|
|
@ -41,6 +41,7 @@ typedef enum ECleanupCommonType {
|
|||
UCLN_COMMON_LOCALE,
|
||||
UCLN_COMMON_LOCALE_AVAILABLE,
|
||||
UCLN_COMMON_ULOC,
|
||||
UCLN_COMMON_LOADED_NORMALIZER2,
|
||||
UCLN_COMMON_NORMALIZER2,
|
||||
UCLN_COMMON_USET,
|
||||
UCLN_COMMON_UNAMES,
|
||||
|
|
|
@ -603,7 +603,7 @@ unorm_compare(const UChar *s1, int32_t length1,
|
|||
if(!(options&UNORM_INPUT_IS_FCD) || (options&U_FOLD_CASE_EXCLUDE_SPECIAL_I)) {
|
||||
const Normalizer2 *n2;
|
||||
if(options&U_FOLD_CASE_EXCLUDE_SPECIAL_I) {
|
||||
n2=Normalizer2Factory::getNFDInstance(*pErrorCode);
|
||||
n2=Normalizer2::getNFDInstance(*pErrorCode);
|
||||
} else {
|
||||
n2=Normalizer2Factory::getFCDInstance(*pErrorCode);
|
||||
}
|
||||
|
|
|
@ -106,7 +106,7 @@ static UBool changesWhenCasefolded(const BinaryProperty &, UChar32, UProperty) {
|
|||
static UBool changesWhenCasefolded(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
|
||||
UnicodeString nfd;
|
||||
UErrorCode errorCode=U_ZERO_ERROR;
|
||||
const Normalizer2 *nfcNorm2=Normalizer2Factory::getNFCInstance(errorCode);
|
||||
const Normalizer2 *nfcNorm2=Normalizer2::getNFCInstance(errorCode);
|
||||
if(U_FAILURE(errorCode)) {
|
||||
return FALSE;
|
||||
}
|
||||
|
@ -569,7 +569,7 @@ u_getFC_NFKC_Closure(UChar32 c, UChar *dest, int32_t destCapacity, UErrorCode *p
|
|||
// (What could be useful is a custom normalization table that combines
|
||||
// case folding and NFKC.)
|
||||
// For the derivation, see Unicode's DerivedNormalizationProps.txt.
|
||||
const Normalizer2 *nfkc=Normalizer2Factory::getNFKCInstance(*pErrorCode);
|
||||
const Normalizer2 *nfkc=Normalizer2::getNFKCInstance(*pErrorCode);
|
||||
const UCaseProps *csp=ucase_getSingleton();
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
return 0;
|
||||
|
|
|
@ -243,7 +243,7 @@ package390: $(OUTTMPDIR)/icudata390.lst $(PKGDATA_LIST) ./icupkg.inc packagedata
|
|||
# 2010-dec Removed pnames.icu.
|
||||
# These are now hardcoded in ICU4C and only loaded in ICU4J.
|
||||
#
|
||||
DAT_FILES_SHORT=unames.icu cnvalias.icu coll/ucadata.icu nfc.nrm nfkc.nrm nfkc_cf.nrm uts46.nrm
|
||||
DAT_FILES_SHORT=unames.icu cnvalias.icu coll/ucadata.icu nfkc.nrm nfkc_cf.nrm uts46.nrm
|
||||
DAT_FILES=$(DAT_FILES_SHORT:%=$(BUILDDIR)/%)
|
||||
|
||||
## BRK files
|
||||
|
@ -431,7 +431,7 @@ COLL_FILES_LIST=$(COLLATION_FILES_SHORT) $(COLLATION_INDEX_RES_SHORT)
|
|||
BRK_FILES_LIST=$(BRK_FILES_SHORT) $(BRK_RES_FILES_SHORT) $(BRK_RES_INDEX_RES_SHORT) $(DICT_FILES_SHORT)
|
||||
LOCALE_FILES_LIST= $(RES_FILES_SHORT) $(LANG_FILES_SHORT) $(REGION_FILES_SHORT) $(ZONE_FILES_SHORT)
|
||||
MISC_FILES_LIST=$(DAT_FILES_SHORT) $(CNV_FILES_SHORT) $(CNV_FILES_SHORT_SPECIAL) $(CURR_FILES_SHORT) $(RBNF_FILES_SHORT) $(RBNF_INDEX_RES_SHORT) $(TRANSLIT_FILES_SHORT) $(SPREP_FILES_SHORT) $(CFU_FILES_SHORT)
|
||||
UNI_CORE_DATA=pnames.icu uprops.icu ucase.icu ubidi.icu
|
||||
UNI_CORE_DATA=pnames.icu uprops.icu ucase.icu ubidi.icu nfc.nrm
|
||||
UNI_CORE_TARGET_DATA=$(UNI_CORE_DATA:%=$(BUILDDIR)/%)
|
||||
|
||||
ifneq ($(INCLUDE_UNI_CORE_DATA),)
|
||||
|
@ -815,7 +815,7 @@ ICU4J_TZDATA_PATHS=$(ICU4J_TZDATA_FILES:%="$(ICU4J_DATA_DIRNAME)/%.res")
|
|||
generate-data: build-dir packagedata $(OUTTMPDIR)/$(ICUDATA_PLATFORM_NAME).dat uni-core-data
|
||||
mkdir -p $(OUTDIR)/icu4j/$(ICU4J_DATA_DIRNAME)
|
||||
mkdir -p $(OUTDIR)/icu4j/tzdata/$(ICU4J_DATA_DIRNAME)
|
||||
echo pnames.icu ubidi.icu ucase.icu uprops.icu > $(OUTDIR)/icu4j/add.txt
|
||||
echo $(UNI_CORE_DATA) > $(OUTDIR)/icu4j/add.txt
|
||||
$(INVOKE) $(TOOLBINDIR)/icupkg $(OUTTMPDIR)/$(ICUDATA_PLATFORM_NAME).dat $(OUTDIR)/icu4j/$(ICUDATA_BASENAME_VERSION)b.dat -a $(OUTDIR)/icu4j/add.txt -s $(BUILDDIR) -x '*' -tb -d $(OUTDIR)/icu4j/$(ICU4J_DATA_DIRNAME)
|
||||
mv $(ICU4J_TZDATA_PATHS:%=$(OUTDIR)/icu4j/%) "$(OUTDIR)/icu4j/tzdata/$(ICU4J_DATA_DIRNAME)"
|
||||
|
||||
|
|
|
@ -489,7 +489,7 @@ ALL : GODATA "$(ICU_LIB_TARGET)" "$(TESTDATAOUT)\testdata.dat"
|
|||
# 2010-dec Removed pnames.icu.
|
||||
# Command line:
|
||||
# C:\svn\icuproj\icu\trunk\source\data>nmake -f makedata.mak ICUMAKE=C:\svn\icuproj\icu\trunk\source\data\ CFG=x86\Debug uni-core-data
|
||||
uni-core-data: GODATA "$(ICUBLD_PKG)\pnames.icu" "$(ICUBLD_PKG)\uprops.icu" "$(ICUBLD_PKG)\ucase.icu" "$(ICUBLD_PKG)\ubidi.icu"
|
||||
uni-core-data: GODATA "$(ICUBLD_PKG)\pnames.icu" "$(ICUBLD_PKG)\uprops.icu" "$(ICUBLD_PKG)\ucase.icu" "$(ICUBLD_PKG)\ubidi.icu" "$(ICUBLD_PKG)\nfc.nrm"
|
||||
@echo Unicode .icu files built to "$(ICUBLD_PKG)"
|
||||
|
||||
# Build the ICU4J icudata.jar and testdata.jar.
|
||||
|
@ -502,7 +502,7 @@ ICU4J_TZDATA_PATHS=$(ICU4J_DATA_DIRNAME)\zoneinfo64.res $(ICU4J_DATA_DIRNAME)\me
|
|||
generate-data: GODATA "$(ICUOUT)\$(ICUPKG).dat" uni-core-data
|
||||
if not exist "$(ICUOUT)\icu4j\$(ICU4J_DATA_DIRNAME)" mkdir "$(ICUOUT)\icu4j\$(ICU4J_DATA_DIRNAME)"
|
||||
if not exist "$(ICUOUT)\icu4j\tzdata\$(ICU4J_DATA_DIRNAME)" mkdir "$(ICUOUT)\icu4j\tzdata\$(ICU4J_DATA_DIRNAME)"
|
||||
echo pnames.icu ubidi.icu ucase.icu uprops.icu > "$(ICUOUT)\icu4j\add.txt"
|
||||
echo pnames.icu ubidi.icu ucase.icu uprops.icu nfc.nrm > "$(ICUOUT)\icu4j\add.txt"
|
||||
"$(ICUPBIN)\icupkg" "$(ICUOUT)\$(ICUPKG).dat" "$(ICUOUT)\icu4j\$(U_ICUDATA_NAME)b.dat" -a "$(ICUOUT)\icu4j\add.txt" -s "$(ICUBLD_PKG)" -x * -tb -d "$(ICUOUT)\icu4j\$(ICU4J_DATA_DIRNAME)"
|
||||
@for %f in ($(ICU4J_TZDATA_PATHS)) do @move "$(ICUOUT)\icu4j\%f" "$(ICUOUT)\icu4j\tzdata\$(ICU4J_DATA_DIRNAME)"
|
||||
|
||||
|
@ -602,7 +602,7 @@ icu4j-data-install :
|
|||
copy "$(ICUTMP)\$(ICUPKG).dat" "$(ICUOUT)\$(U_ICUDATA_NAME)$(U_ICUDATA_ENDIAN_SUFFIX).dat"
|
||||
-@erase "$(ICUTMP)\$(ICUPKG).dat"
|
||||
!ELSE
|
||||
"$(ICU_LIB_TARGET)" : $(COMMON_ICUDATA_DEPENDENCIES) $(CNV_FILES) $(CNV_FILES_SPECIAL) "$(ICUBLD_PKG)\unames.icu" "$(ICUBLD_PKG)\cnvalias.icu" "$(ICUBLD_PKG)\nfc.nrm" "$(ICUBLD_PKG)\nfkc.nrm" "$(ICUBLD_PKG)\nfkc_cf.nrm" "$(ICUBLD_PKG)\uts46.nrm" "$(ICUBLD_PKG)\$(ICUCOL)\ucadata.icu" $(CURR_RES_FILES) $(LANG_RES_FILES) $(REGION_RES_FILES) $(ZONE_RES_FILES) $(BRK_FILES) $(BRK_DICT_FILES) $(BRK_RES_FILES) $(ALL_RES) $(COL_COL_FILES) $(RBNF_RES_FILES) $(TRANSLIT_RES_FILES) $(SPREP_FILES) "$(ICUBLD_PKG)\confusables.cfu"
|
||||
"$(ICU_LIB_TARGET)" : $(COMMON_ICUDATA_DEPENDENCIES) $(CNV_FILES) $(CNV_FILES_SPECIAL) "$(ICUBLD_PKG)\unames.icu" "$(ICUBLD_PKG)\cnvalias.icu" "$(ICUBLD_PKG)\nfkc.nrm" "$(ICUBLD_PKG)\nfkc_cf.nrm" "$(ICUBLD_PKG)\uts46.nrm" "$(ICUBLD_PKG)\$(ICUCOL)\ucadata.icu" $(CURR_RES_FILES) $(LANG_RES_FILES) $(REGION_RES_FILES) $(ZONE_RES_FILES) $(BRK_FILES) $(BRK_DICT_FILES) $(BRK_RES_FILES) $(ALL_RES) $(COL_COL_FILES) $(RBNF_RES_FILES) $(TRANSLIT_RES_FILES) $(SPREP_FILES) "$(ICUBLD_PKG)\confusables.cfu"
|
||||
@echo Building icu data
|
||||
cd "$(ICUBLD_PKG)"
|
||||
"$(ICUPBIN)\pkgdata" $(COMMON_ICUDATA_ARGUMENTS) <<"$(ICUTMP)\icudata.lst"
|
||||
|
@ -610,7 +610,6 @@ unames.icu
|
|||
confusables.cfu
|
||||
$(ICUCOL)\ucadata.icu
|
||||
cnvalias.icu
|
||||
nfc.nrm
|
||||
nfkc.nrm
|
||||
nfkc_cf.nrm
|
||||
uts46.nrm
|
||||
|
@ -1006,10 +1005,9 @@ $(UCM_SOURCE_SPECIAL): {"$(ICUTOOLS)\makeconv\$(CFG)"}makeconv.exe
|
|||
# This used to depend on "$(ICUBLD_PKG)\uprops.icu" "$(ICUBLD_PKG)\ucase.icu" "$(ICUBLD_PKG)\ubidi.icu"
|
||||
# This data is now hard coded as a part of the library.
|
||||
# See Jitterbug 4497 for details.
|
||||
$(MISC_SOURCE) $(RB_FILES) $(CURR_FILES) $(LANG_FILES) $(REGION_FILES) $(ZONE_FILES) $(COL_COL_FILES) $(RBNF_RES_FILES) $(BRK_RES_FILES) $(TRANSLIT_RES_FILES): {"$(ICUTOOLS)\genrb\$(CFG)"}genrb.exe "$(ICUBLD_PKG)\nfc.nrm" "$(ICUBLD_PKG)\$(ICUCOL)\ucadata.icu"
|
||||
$(MISC_SOURCE) $(RB_FILES) $(CURR_FILES) $(LANG_FILES) $(REGION_FILES) $(ZONE_FILES) $(COL_COL_FILES) $(RBNF_RES_FILES) $(BRK_RES_FILES) $(TRANSLIT_RES_FILES): {"$(ICUTOOLS)\genrb\$(CFG)"}genrb.exe "$(ICUBLD_PKG)\$(ICUCOL)\ucadata.icu"
|
||||
|
||||
# This used to depend on "$(ICUBLD_PKG)\pnames.icu" "$(ICUBLD_PKG)\uprops.icu" "$(ICUBLD_PKG)\ucase.icu" "$(ICUBLD_PKG)\ubidi.icu"
|
||||
# These are now hardcoded in ICU4C and only loaded in ICU4J.
|
||||
$(BRK_SOURCE) : "$(ICUBLD_PKG)\unames.icu" "$(ICUBLD_PKG)\nfc.nrm"
|
||||
$(BRK_SOURCE) : "$(ICUBLD_PKG)\unames.icu"
|
||||
!ENDIF
|
||||
|
||||
|
|
|
@ -236,13 +236,15 @@ http://www.unicode.org/reports/tr44/tr44-13.html
|
|||
~/svn.icutools/trunk/src/unicode$ py/parsescriptmetadata.py $ICU_SRC_DIR/source/common/unicode/uscript.h ~/svn.cldr/trunk/common/properties/scriptMetadata.txt
|
||||
|
||||
* generate normalization data files
|
||||
- ~/svn.icu/uni70/dbg$ export LD_LIBRARY_PATH=~/svn.icu/uni70/dbg/lib
|
||||
- ~/svn.icu/uni70/dbg$ SRC_DATA_IN=$ICU_SRC_DIR/source/data/in
|
||||
- ~/svn.icu/uni70/dbg$ UNIDATA=$ICU_SRC_DIR/source/data/unidata
|
||||
- ~/svn.icu/uni70/dbg$ bin/gennorm2 -o $SRC_DATA_IN/nfc.nrm -s $UNIDATA/norm2 nfc.txt
|
||||
- ~/svn.icu/uni70/dbg$ bin/gennorm2 -o $SRC_DATA_IN/nfkc.nrm -s $UNIDATA/norm2 nfc.txt nfkc.txt
|
||||
- ~/svn.icu/uni70/dbg$ bin/gennorm2 -o $SRC_DATA_IN/nfkc_cf.nrm -s $UNIDATA/norm2 nfc.txt nfkc.txt nfkc_cf.txt
|
||||
- ~/svn.icu/uni70/dbg$ bin/gennorm2 -o $SRC_DATA_IN/uts46.nrm -s $UNIDATA/norm2 nfc.txt uts46.txt
|
||||
- cd $ICU_ROOT/dbg
|
||||
- export LD_LIBRARY_PATH=$ICU_ROOT/dbg/lib
|
||||
- SRC_DATA_IN=$ICU_SRC_DIR/source/data/in
|
||||
- UNIDATA=$ICU_SRC_DIR/source/data/unidata
|
||||
- bin/gennorm2 -o $ICU_SRC_DIR/source/common/norm2_nfc_data.h -s $UNIDATA/norm2 nfc.txt --csource
|
||||
- bin/gennorm2 -o $SRC_DATA_IN/nfc.nrm -s $UNIDATA/norm2 nfc.txt
|
||||
- bin/gennorm2 -o $SRC_DATA_IN/nfkc.nrm -s $UNIDATA/norm2 nfc.txt nfkc.txt
|
||||
- bin/gennorm2 -o $SRC_DATA_IN/nfkc_cf.nrm -s $UNIDATA/norm2 nfc.txt nfkc.txt nfkc_cf.txt
|
||||
- bin/gennorm2 -o $SRC_DATA_IN/uts46.nrm -s $UNIDATA/norm2 nfc.txt uts46.txt
|
||||
|
||||
* build ICU (make install)
|
||||
so that the tools build can pick up the new definitions from the installed header files.
|
||||
|
|
|
@ -2685,7 +2685,7 @@ U_CAPI UStringSearch * U_EXPORT2 usearch_openFromCollator(
|
|||
UCOL_SHIFTED;
|
||||
result->variableTop = ucol_getVariableTop(collator, status);
|
||||
|
||||
result->nfd = Normalizer2Factory::getNFDInstance(*status);
|
||||
result->nfd = Normalizer2::getNFDInstance(*status);
|
||||
|
||||
if (U_FAILURE(*status)) {
|
||||
uprv_free(result);
|
||||
|
|
|
@ -1368,14 +1368,14 @@ static const struct {
|
|||
* to testdata) for code coverage in tests.
|
||||
* See Jitterbug 4497.
|
||||
*
|
||||
* ICU4C 4.4 adds normalization data files again, e.g., nfc.nrm.
|
||||
* ICU4C 4.4 adds normalization data files again, e.g., nfkc.nrm.
|
||||
*/
|
||||
{"uprops", "icu", uprops_swap},
|
||||
{"ucase", "icu", ucase_swap},
|
||||
{"ubidi", "icu", ubidi_swap},
|
||||
#endif
|
||||
#if !UCONFIG_NO_NORMALIZATION && !UCONFIG_ONLY_COLLATION
|
||||
{"nfc", "nrm", unorm2_swap},
|
||||
{"nfkc", "nrm", unorm2_swap},
|
||||
{"confusables", "cfu", uspoof_swap},
|
||||
#endif
|
||||
{"unames", "icu", uchar_swapNames}
|
||||
|
|
|
@ -1729,7 +1729,7 @@ void CollationTest::TestDataDriven() {
|
|||
IcuTestErrorCode errorCode(*this, "TestDataDriven");
|
||||
|
||||
fcd = Normalizer2Factory::getFCDInstance(errorCode);
|
||||
nfd = Normalizer2Factory::getNFDInstance(errorCode);
|
||||
nfd = Normalizer2::getNFDInstance(errorCode);
|
||||
if(errorCode.logDataIfFailureAndReset("Normalizer2Factory::getFCDInstance() or getNFDInstance()")) {
|
||||
return;
|
||||
}
|
||||
|
|
|
@ -1123,7 +1123,7 @@ BasicNormalizerTest::TestCompare() {
|
|||
}
|
||||
|
||||
// test all of these precomposed characters
|
||||
const Normalizer2 *nfcNorm2=Normalizer2Factory::getNFCInstance(errorCode);
|
||||
const Normalizer2 *nfcNorm2=Normalizer2::getNFCInstance(errorCode);
|
||||
UnicodeSetIterator it(set);
|
||||
while(it.next() && !it.isString()) {
|
||||
UChar32 c=it.getCodepoint();
|
||||
|
@ -1484,9 +1484,9 @@ BasicNormalizerTest::TestCustomFCC() {
|
|||
void
|
||||
BasicNormalizerTest::TestFilteredNormalizer2Coverage() {
|
||||
UErrorCode errorCode = U_ZERO_ERROR;
|
||||
const Normalizer2 *nfcNorm2=Normalizer2Factory::getNFCInstance(errorCode);
|
||||
const Normalizer2 *nfcNorm2=Normalizer2::getNFCInstance(errorCode);
|
||||
if (U_FAILURE(errorCode)) {
|
||||
dataerrln("Normalizer2Factory::getNFCInstance() call failed - %s", u_errorName(status));
|
||||
dataerrln("Normalizer2::getNFCInstance() call failed - %s", u_errorName(status));
|
||||
return;
|
||||
}
|
||||
UnicodeSet filter(UNICODE_STRING_SIMPLE("[^\\u00a0-\\u00ff\\u0310-\\u031f]"), errorCode);
|
||||
|
|
|
@ -58,6 +58,7 @@ enum {
|
|||
SOURCEDIR,
|
||||
OUTPUT_FILENAME,
|
||||
UNICODE_VERSION,
|
||||
WRITE_C_SOURCE,
|
||||
OPT_FAST
|
||||
};
|
||||
|
||||
|
@ -69,6 +70,7 @@ static UOption options[]={
|
|||
UOPTION_SOURCEDIR,
|
||||
UOPTION_DEF("output", 'o', UOPT_REQUIRES_ARG),
|
||||
UOPTION_DEF("unicode", 'u', UOPT_REQUIRES_ARG),
|
||||
UOPTION_DEF("csource", '\1', UOPT_NO_ARG),
|
||||
UOPTION_DEF("fast", '\1', UOPT_NO_ARG)
|
||||
};
|
||||
|
||||
|
@ -100,7 +102,7 @@ main(int argc, char* argv[]) {
|
|||
"Usage: %s [-options] infiles+ -o outputfilename\n"
|
||||
"\n"
|
||||
"Reads the infiles with normalization data and\n"
|
||||
"creates a binary file (outputfilename) with the data.\n"
|
||||
"creates a binary or C source file (outputfilename) with the data.\n"
|
||||
"\n",
|
||||
argv[0]);
|
||||
fprintf(stderr,
|
||||
|
@ -111,9 +113,10 @@ main(int argc, char* argv[]) {
|
|||
"\t-u or --unicode Unicode version, followed by the version like 5.2.0\n");
|
||||
fprintf(stderr,
|
||||
"\t-s or --sourcedir source directory, followed by the path\n"
|
||||
"\t-o or --output output filename\n");
|
||||
"\t-o or --output output filename\n"
|
||||
"\t --csource writes a C source file with initializers\n");
|
||||
fprintf(stderr,
|
||||
"\t --fast optimize the .nrm file for fast normalization,\n"
|
||||
"\t --fast optimize the data for fast normalization,\n"
|
||||
"\t which might increase its size (Writes fully decomposed\n"
|
||||
"\t regular mappings instead of delta mappings.\n"
|
||||
"\t You should measure the runtime speed to make sure that\n"
|
||||
|
@ -174,7 +177,11 @@ main(int argc, char* argv[]) {
|
|||
filename.truncate(pathLength);
|
||||
}
|
||||
|
||||
builder->writeBinaryFile(options[OUTPUT_FILENAME].value);
|
||||
if(options[WRITE_C_SOURCE].doesOccur) {
|
||||
builder->writeCSourceFile(options[OUTPUT_FILENAME].value);
|
||||
} else {
|
||||
builder->writeBinaryFile(options[OUTPUT_FILENAME].value);
|
||||
}
|
||||
|
||||
return errorCode.get();
|
||||
|
||||
|
|
|
@ -33,12 +33,14 @@
|
|||
#include "unicode/uniset.h"
|
||||
#include "unicode/unistr.h"
|
||||
#include "unicode/ustring.h"
|
||||
#include "charstr.h"
|
||||
#include "hash.h"
|
||||
#include "normalizer2impl.h"
|
||||
#include "toolutil.h"
|
||||
#include "unewdata.h"
|
||||
#include "utrie2.h"
|
||||
#include "uvectr32.h"
|
||||
#include "writesrc.h"
|
||||
|
||||
#if !UCONFIG_NO_NORMALIZATION
|
||||
|
||||
|
@ -168,7 +170,8 @@ enumRangeHandler(const void *context, UChar32 start, UChar32 end, uint32_t value
|
|||
U_CDECL_END
|
||||
|
||||
Normalizer2DataBuilder::Normalizer2DataBuilder(UErrorCode &errorCode) :
|
||||
phase(0), overrideHandling(OVERRIDE_PREVIOUS), optimization(OPTIMIZE_NORMAL) {
|
||||
phase(0), overrideHandling(OVERRIDE_PREVIOUS), optimization(OPTIMIZE_NORMAL),
|
||||
norm16TrieLength(0) {
|
||||
memset(unicodeVersion, 0, sizeof(unicodeVersion));
|
||||
normTrie=utrie2_open(0, 0, &errorCode);
|
||||
normMem=utm_open("gennorm2 normalization structs", 10000, 0x110100, sizeof(Norm));
|
||||
|
@ -1143,23 +1146,15 @@ void Normalizer2DataBuilder::processData() {
|
|||
if(minCP>=0x10000) {
|
||||
indexes[Normalizer2Impl::IX_MIN_COMP_NO_MAYBE_CP]=U16_LEAD(minCP);
|
||||
}
|
||||
}
|
||||
|
||||
void Normalizer2DataBuilder::writeBinaryFile(const char *filename) {
|
||||
processData();
|
||||
|
||||
IcuToolErrorCode errorCode("gennorm2/writeBinaryFile()");
|
||||
utrie2_freeze(norm16Trie, UTRIE2_16_VALUE_BITS, errorCode);
|
||||
int32_t norm16TrieLength=utrie2_serialize(norm16Trie, NULL, 0, errorCode);
|
||||
norm16TrieLength=utrie2_serialize(norm16Trie, NULL, 0, errorCode);
|
||||
if(errorCode.get()!=U_BUFFER_OVERFLOW_ERROR) {
|
||||
fprintf(stderr, "gennorm2 error: unable to freeze/serialize the normalization trie - %s\n",
|
||||
errorCode.errorName());
|
||||
exit(errorCode.reset());
|
||||
}
|
||||
errorCode.reset();
|
||||
LocalArray<uint8_t> norm16TrieBytes(new uint8_t[norm16TrieLength]);
|
||||
utrie2_serialize(norm16Trie, norm16TrieBytes.getAlias(), norm16TrieLength, errorCode);
|
||||
errorCode.assertSuccess();
|
||||
|
||||
int32_t offset=(int32_t)sizeof(indexes);
|
||||
indexes[Normalizer2Impl::IX_NORM_TRIE_OFFSET]=offset;
|
||||
|
@ -1192,6 +1187,16 @@ void Normalizer2DataBuilder::writeBinaryFile(const char *filename) {
|
|||
u_versionFromString(unicodeVersion, U_UNICODE_VERSION);
|
||||
}
|
||||
memcpy(dataInfo.dataVersion, unicodeVersion, 4);
|
||||
}
|
||||
|
||||
void Normalizer2DataBuilder::writeBinaryFile(const char *filename) {
|
||||
processData();
|
||||
|
||||
IcuToolErrorCode errorCode("gennorm2/writeBinaryFile()");
|
||||
LocalArray<uint8_t> norm16TrieBytes(new uint8_t[norm16TrieLength]);
|
||||
utrie2_serialize(norm16Trie, norm16TrieBytes.getAlias(), norm16TrieLength, errorCode);
|
||||
errorCode.assertSuccess();
|
||||
|
||||
UNewDataMemory *pData=
|
||||
udata_create(NULL, NULL, filename, &dataInfo,
|
||||
haveCopyright ? U_COPYRIGHT_STRING : NULL, errorCode);
|
||||
|
@ -1209,6 +1214,7 @@ void Normalizer2DataBuilder::writeBinaryFile(const char *filename) {
|
|||
fprintf(stderr, "gennorm2: error %s writing the output file\n", errorCode.errorName());
|
||||
exit(errorCode.reset());
|
||||
}
|
||||
int32_t totalSize=indexes[Normalizer2Impl::IX_TOTAL_SIZE];
|
||||
if(writtenSize!=totalSize) {
|
||||
fprintf(stderr, "gennorm2 error: written size %ld != calculated size %ld\n",
|
||||
(long)writtenSize, (long)totalSize);
|
||||
|
@ -1216,6 +1222,74 @@ void Normalizer2DataBuilder::writeBinaryFile(const char *filename) {
|
|||
}
|
||||
}
|
||||
|
||||
void
|
||||
Normalizer2DataBuilder::writeCSourceFile(const char *filename) {
|
||||
processData();
|
||||
|
||||
IcuToolErrorCode errorCode("gennorm2/writeCSourceFile()");
|
||||
const char *basename=findBasename(filename);
|
||||
CharString path(filename, (int32_t)(basename-filename), errorCode);
|
||||
CharString dataName(basename, errorCode);
|
||||
const char *extension=strrchr(basename, '.');
|
||||
if(extension!=NULL) {
|
||||
dataName.truncate((int32_t)(extension-basename));
|
||||
}
|
||||
errorCode.assertSuccess();
|
||||
|
||||
LocalArray<uint8_t> norm16TrieBytes(new uint8_t[norm16TrieLength]);
|
||||
utrie2_serialize(norm16Trie, norm16TrieBytes.getAlias(), norm16TrieLength, errorCode);
|
||||
errorCode.assertSuccess();
|
||||
|
||||
FILE *f=usrc_create(path.data(), basename, "icu/source/tools/gennorm2/n2builder.cpp");
|
||||
if(f==NULL) {
|
||||
fprintf(stderr, "gennorm2/writeCSourceFile() error: unable to create the output file %s\n",
|
||||
filename);
|
||||
exit(U_FILE_ACCESS_ERROR);
|
||||
return;
|
||||
}
|
||||
char line[100];
|
||||
sprintf(line, "static const UVersionInfo %s_formatVersion={", dataName.data());
|
||||
usrc_writeArray(f, line, dataInfo.formatVersion, 8, 4, "};\n");
|
||||
sprintf(line, "static const UVersionInfo %s_dataVersion={", dataName.data());
|
||||
usrc_writeArray(f, line, dataInfo.dataVersion, 8, 4, "};\n\n");
|
||||
sprintf(line, "static const int32_t %s_indexes[Normalizer2Impl::IX_COUNT]={\n",
|
||||
dataName.data());
|
||||
usrc_writeArray(f,
|
||||
line,
|
||||
indexes, 32, Normalizer2Impl::IX_COUNT,
|
||||
"\n};\n\n");
|
||||
sprintf(line, "static const uint16_t %s_trieIndex[%%ld]={\n", dataName.data());
|
||||
usrc_writeUTrie2Arrays(f,
|
||||
line, NULL,
|
||||
norm16Trie,
|
||||
"\n};\n\n");
|
||||
sprintf(line, "static const uint16_t %s_extraData[%%ld]={\n", dataName.data());
|
||||
usrc_writeArray(f,
|
||||
line,
|
||||
extraData.getBuffer(), 16, extraData.length(),
|
||||
"\n};\n\n");
|
||||
sprintf(line, "static const uint8_t %s_smallFCD[%%ld]={\n", dataName.data());
|
||||
usrc_writeArray(f,
|
||||
line,
|
||||
smallFCD, 8, sizeof(smallFCD),
|
||||
"\n};\n\n");
|
||||
/*fputs( // TODO
|
||||
"static const UCaseProps %s_singleton={\n"
|
||||
" NULL,\n"
|
||||
" %s_indexes,\n"
|
||||
" %s_extraData,\n"
|
||||
" %s_smallFCD,\n",
|
||||
f);*/
|
||||
sprintf(line, "static const UTrie2 %s_trie {\n", dataName.data());
|
||||
char line2[100];
|
||||
sprintf(line2, "%s_trieIndex", dataName.data());
|
||||
usrc_writeUTrie2Struct(f,
|
||||
line,
|
||||
norm16Trie, line2, NULL,
|
||||
"};\n");
|
||||
fclose(f);
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif /* #if !UCONFIG_NO_NORMALIZATION */
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2009-2011, International Business Machines
|
||||
* Copyright (C) 2009-2014, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
|
@ -64,6 +64,7 @@ public:
|
|||
void setUnicodeVersion(const char *v);
|
||||
|
||||
void writeBinaryFile(const char *filename);
|
||||
void writeCSourceFile(const char *filename);
|
||||
|
||||
private:
|
||||
friend class CompositionBuilder;
|
||||
|
@ -110,6 +111,7 @@ private:
|
|||
|
||||
int32_t indexes[Normalizer2Impl::IX_COUNT];
|
||||
UTrie2 *norm16Trie;
|
||||
int32_t norm16TrieLength;
|
||||
UnicodeString extraData;
|
||||
uint8_t smallFCD[0x100];
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue