ICU-9644 re-hardcode some normalization data: nfc.nrm

X-SVN-Rev: 36384
This commit is contained in:
Markus Scherer 2014-09-08 03:05:56 +00:00
parent 823e3a9372
commit 0f78abc7ee
23 changed files with 2035 additions and 662 deletions

View file

@ -94,7 +94,7 @@ dictionarydata.o \
appendable.o ustr_cnv.o unistr_cnv.o unistr.o unistr_case.o unistr_props.o \
utf_impl.o ustring.o ustrcase.o ucasemap.o ucasemap_titlecase_brkiter.o cstring.o ustrfmt.o ustrtrns.o ustr_wcs.o utext.o \
unistr_case_locale.o ustrcase_locale.o unistr_titlecase_brkiter.o ustr_titlecase_brkiter.o \
normalizer2impl.o normalizer2.o filterednormalizer2.o normlzr.o unorm.o unormcmp.o \
normalizer2impl.o normalizer2.o filterednormalizer2.o normlzr.o unorm.o unormcmp.o loadednormalizer2impl.o \
chariter.o schriter.o uchriter.o uiter.o \
patternprops.o uchar.o uprops.o ucase.o propname.o ubidi_props.o ubidi.o ubidiwrt.o ubidiln.o ushape.o \
uscript.o uscript_props.o usc_impl.o unames.o \

View file

@ -1,7 +1,7 @@
/*
*****************************************************************************
* Copyright (C) 1996-2011, International Business Machines Corporation and *
* others. All Rights Reserved. *
* Copyright (C) 1996-2014, International Business Machines Corporation and
* others. All Rights Reserved.
*****************************************************************************
*/
@ -71,7 +71,7 @@ CanonicalIterator::CanonicalIterator(const UnicodeString &sourceStr, UErrorCode
pieces_lengths(NULL),
current(NULL),
current_length(0),
nfd(*Normalizer2Factory::getNFDInstance(status)),
nfd(*Normalizer2::getNFDInstance(status)),
nfcImpl(*Normalizer2Factory::getNFCImpl(status))
{
if(U_SUCCESS(status) && nfcImpl.ensureCanonIterData(status)) {

View file

@ -381,10 +381,9 @@
<ClCompile Include="caniter.cpp">
</ClCompile>
<ClCompile Include="filterednormalizer2.cpp" />
<ClCompile Include="normalizer2.cpp">
</ClCompile>
<ClCompile Include="normalizer2impl.cpp">
</ClCompile>
<ClCompile Include="loadednormalizer2impl.cpp" />
<ClCompile Include="normalizer2.cpp" />
<ClCompile Include="normalizer2impl.cpp" />
<ClCompile Include="normlzr.cpp">
</ClCompile>
<ClCompile Include="unorm.cpp" />
@ -1145,6 +1144,7 @@
</Command>
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<ClInclude Include="norm2allmodes.h" />
<CustomBuild Include="unicode\normalizer2.h">
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">copy "%(FullPath)" ..\..\include\unicode
</Command>

View file

@ -349,6 +349,9 @@
<ClCompile Include="filterednormalizer2.cpp">
<Filter>normalization</Filter>
</ClCompile>
<ClCompile Include="loadednormalizer2impl.cpp">
<Filter>normalization</Filter>
</ClCompile>
<ClCompile Include="normalizer2.cpp">
<Filter>normalization</Filter>
</ClCompile>
@ -760,6 +763,9 @@
<ClInclude Include="ureslocs.h">
<Filter>locales &amp; resources</Filter>
</ClInclude>
<ClInclude Include="norm2allmodes.h">
<Filter>normalization</Filter>
</ClInclude>
<ClInclude Include="normalizer2impl.h">
<Filter>normalization</Filter>
</ClInclude>

View file

@ -0,0 +1,342 @@
/*
*******************************************************************************
* Copyright (C) 2014, International Business Machines
* Corporation and others. All Rights Reserved.
*******************************************************************************
* loadednormalizer2impl.cpp
*
* created on: 2014sep03
* created by: Markus W. Scherer
*/
#include "unicode/utypes.h"
#if !UCONFIG_NO_NORMALIZATION
#include "unicode/udata.h"
#include "unicode/localpointer.h"
#include "unicode/normalizer2.h"
#include "unicode/unistr.h"
#include "unicode/unorm.h"
#include "cstring.h"
#include "mutex.h"
#include "norm2allmodes.h"
#include "normalizer2impl.h"
#include "uassert.h"
#include "ucln_cmn.h"
#include "uhash.h"
U_NAMESPACE_BEGIN
class LoadedNormalizer2Impl : public Normalizer2Impl {
public:
LoadedNormalizer2Impl() : memory(NULL), ownedTrie(NULL) {}
virtual ~LoadedNormalizer2Impl();
void load(const char *packageName, const char *name, UErrorCode &errorCode);
private:
static UBool U_CALLCONV
isAcceptable(void *context, const char *type, const char *name, const UDataInfo *pInfo);
UDataMemory *memory;
UTrie2 *ownedTrie;
};
LoadedNormalizer2Impl::~LoadedNormalizer2Impl() {
udata_close(memory);
utrie2_close(ownedTrie);
}
UBool U_CALLCONV
LoadedNormalizer2Impl::isAcceptable(void * /*context*/,
const char * /* type */, const char * /*name*/,
const UDataInfo *pInfo) {
if(
pInfo->size>=20 &&
pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
pInfo->charsetFamily==U_CHARSET_FAMILY &&
pInfo->dataFormat[0]==0x4e && /* dataFormat="Nrm2" */
pInfo->dataFormat[1]==0x72 &&
pInfo->dataFormat[2]==0x6d &&
pInfo->dataFormat[3]==0x32 &&
pInfo->formatVersion[0]==2
) {
// Normalizer2Impl *me=(Normalizer2Impl *)context;
// uprv_memcpy(me->dataVersion, pInfo->dataVersion, 4);
return TRUE;
} else {
return FALSE;
}
}
void
LoadedNormalizer2Impl::load(const char *packageName, const char *name, UErrorCode &errorCode) {
if(U_FAILURE(errorCode)) {
return;
}
memory=udata_openChoice(packageName, "nrm", name, isAcceptable, this, &errorCode);
if(U_FAILURE(errorCode)) {
return;
}
const uint8_t *inBytes=(const uint8_t *)udata_getMemory(memory);
const int32_t *inIndexes=(const int32_t *)inBytes;
int32_t indexesLength=inIndexes[IX_NORM_TRIE_OFFSET]/4;
if(indexesLength<=IX_MIN_MAYBE_YES) {
errorCode=U_INVALID_FORMAT_ERROR; // Not enough indexes.
return;
}
int32_t offset=inIndexes[IX_NORM_TRIE_OFFSET];
int32_t nextOffset=inIndexes[IX_EXTRA_DATA_OFFSET];
ownedTrie=utrie2_openFromSerialized(UTRIE2_16_VALUE_BITS,
inBytes+offset, nextOffset-offset, NULL,
&errorCode);
if(U_FAILURE(errorCode)) {
return;
}
offset=nextOffset;
nextOffset=inIndexes[IX_SMALL_FCD_OFFSET];
const uint16_t *inExtraData=(const uint16_t *)(inBytes+offset);
// smallFCD: new in formatVersion 2
offset=nextOffset;
const uint8_t *inSmallFCD=inBytes+offset;
init(inIndexes, ownedTrie, inExtraData, inSmallFCD);
}
// instance cache ---------------------------------------------------------- ***
Norm2AllModes *
Norm2AllModes::createInstance(const char *packageName,
const char *name,
UErrorCode &errorCode) {
if(U_FAILURE(errorCode)) {
return NULL;
}
LoadedNormalizer2Impl *impl=new LoadedNormalizer2Impl;
if(impl==NULL) {
errorCode=U_MEMORY_ALLOCATION_ERROR;
return NULL;
}
impl->load(packageName, name, errorCode);
return createInstance(impl, errorCode);
}
U_CDECL_BEGIN
static UBool U_CALLCONV uprv_loaded_normalizer2_cleanup();
U_CDECL_END
static Norm2AllModes *nfkcSingleton;
static Norm2AllModes *nfkc_cfSingleton;
static UHashtable *cache=NULL;
static icu::UInitOnce nfkcInitOnce = U_INITONCE_INITIALIZER;
static icu::UInitOnce nfkc_cfInitOnce = U_INITONCE_INITIALIZER;
// UInitOnce singleton initialization function
static void U_CALLCONV initSingletons(const char *what, UErrorCode &errorCode) {
if (uprv_strcmp(what, "nfkc") == 0) {
nfkcSingleton = Norm2AllModes::createInstance(NULL, "nfkc", errorCode);
} else if (uprv_strcmp(what, "nfkc_cf") == 0) {
nfkc_cfSingleton = Norm2AllModes::createInstance(NULL, "nfkc_cf", errorCode);
} else {
U_ASSERT(FALSE); // Unknown singleton
}
ucln_common_registerCleanup(UCLN_COMMON_NORMALIZER2, uprv_loaded_normalizer2_cleanup);
}
U_CDECL_BEGIN
static void U_CALLCONV deleteNorm2AllModes(void *allModes) {
delete (Norm2AllModes *)allModes;
}
static UBool U_CALLCONV uprv_loaded_normalizer2_cleanup() {
delete nfkcSingleton;
nfkcSingleton = NULL;
delete nfkc_cfSingleton;
nfkc_cfSingleton = NULL;
uhash_close(cache);
cache=NULL;
nfkcInitOnce.reset();
nfkc_cfInitOnce.reset();
return TRUE;
}
U_CDECL_END
const Norm2AllModes *
Norm2AllModes::getNFKCInstance(UErrorCode &errorCode) {
if(U_FAILURE(errorCode)) { return NULL; }
umtx_initOnce(nfkcInitOnce, &initSingletons, "nfkc", errorCode);
return nfkcSingleton;
}
const Norm2AllModes *
Norm2AllModes::getNFKC_CFInstance(UErrorCode &errorCode) {
if(U_FAILURE(errorCode)) { return NULL; }
umtx_initOnce(nfkc_cfInitOnce, &initSingletons, "nfkc_cf", errorCode);
return nfkc_cfSingleton;
}
const Normalizer2 *
Normalizer2::getNFKCInstance(UErrorCode &errorCode) {
const Norm2AllModes *allModes=Norm2AllModes::getNFKCInstance(errorCode);
return allModes!=NULL ? &allModes->comp : NULL;
}
const Normalizer2 *
Normalizer2::getNFKDInstance(UErrorCode &errorCode) {
const Norm2AllModes *allModes=Norm2AllModes::getNFKCInstance(errorCode);
return allModes!=NULL ? &allModes->decomp : NULL;
}
const Normalizer2 *
Normalizer2::getNFKCCasefoldInstance(UErrorCode &errorCode) {
const Norm2AllModes *allModes=Norm2AllModes::getNFKC_CFInstance(errorCode);
return allModes!=NULL ? &allModes->comp : NULL;
}
const Normalizer2 *
Normalizer2::getInstance(const char *packageName,
const char *name,
UNormalization2Mode mode,
UErrorCode &errorCode) {
if(U_FAILURE(errorCode)) {
return NULL;
}
if(name==NULL || *name==0) {
errorCode=U_ILLEGAL_ARGUMENT_ERROR;
return NULL;
}
const Norm2AllModes *allModes=NULL;
if(packageName==NULL) {
if(0==uprv_strcmp(name, "nfc")) {
allModes=Norm2AllModes::getNFCInstance(errorCode);
} else if(0==uprv_strcmp(name, "nfkc")) {
allModes=Norm2AllModes::getNFKCInstance(errorCode);
} else if(0==uprv_strcmp(name, "nfkc_cf")) {
allModes=Norm2AllModes::getNFKC_CFInstance(errorCode);
}
}
if(allModes==NULL && U_SUCCESS(errorCode)) {
{
Mutex lock;
if(cache!=NULL) {
allModes=(Norm2AllModes *)uhash_get(cache, name);
}
}
if(allModes==NULL) {
LocalPointer<Norm2AllModes> localAllModes(
Norm2AllModes::createInstance(packageName, name, errorCode));
if(U_SUCCESS(errorCode)) {
Mutex lock;
if(cache==NULL) {
cache=uhash_open(uhash_hashChars, uhash_compareChars, NULL, &errorCode);
if(U_FAILURE(errorCode)) {
return NULL;
}
uhash_setKeyDeleter(cache, uprv_free);
uhash_setValueDeleter(cache, deleteNorm2AllModes);
}
void *temp=uhash_get(cache, name);
if(temp==NULL) {
int32_t keyLength=uprv_strlen(name)+1;
char *nameCopy=(char *)uprv_malloc(keyLength);
if(nameCopy==NULL) {
errorCode=U_MEMORY_ALLOCATION_ERROR;
return NULL;
}
uprv_memcpy(nameCopy, name, keyLength);
allModes=localAllModes.getAlias();
uhash_put(cache, nameCopy, localAllModes.orphan(), &errorCode);
} else {
// race condition
allModes=(Norm2AllModes *)temp;
}
}
}
}
if(allModes!=NULL && U_SUCCESS(errorCode)) {
switch(mode) {
case UNORM2_COMPOSE:
return &allModes->comp;
case UNORM2_DECOMPOSE:
return &allModes->decomp;
case UNORM2_FCD:
return &allModes->fcd;
case UNORM2_COMPOSE_CONTIGUOUS:
return &allModes->fcc;
default:
break; // do nothing
}
}
return NULL;
}
const Normalizer2 *
Normalizer2Factory::getInstance(UNormalizationMode mode, UErrorCode &errorCode) {
if(U_FAILURE(errorCode)) {
return NULL;
}
switch(mode) {
case UNORM_NFD:
return Normalizer2::getNFDInstance(errorCode);
case UNORM_NFKD:
return Normalizer2::getNFKDInstance(errorCode);
case UNORM_NFC:
return Normalizer2::getNFCInstance(errorCode);
case UNORM_NFKC:
return Normalizer2::getNFKCInstance(errorCode);
case UNORM_FCD:
return getFCDInstance(errorCode);
default: // UNORM_NONE
return getNoopInstance(errorCode);
}
}
const Normalizer2Impl *
Normalizer2Factory::getNFKCImpl(UErrorCode &errorCode) {
const Norm2AllModes *allModes=Norm2AllModes::getNFKCInstance(errorCode);
return allModes!=NULL ? allModes->impl : NULL;
}
const Normalizer2Impl *
Normalizer2Factory::getNFKC_CFImpl(UErrorCode &errorCode) {
const Norm2AllModes *allModes=Norm2AllModes::getNFKC_CFInstance(errorCode);
return allModes!=NULL ? allModes->impl : NULL;
}
U_NAMESPACE_END
// C API ------------------------------------------------------------------- ***
U_NAMESPACE_USE
U_CAPI const UNormalizer2 * U_EXPORT2
unorm2_getNFKCInstance(UErrorCode *pErrorCode) {
return (const UNormalizer2 *)Normalizer2::getNFKCInstance(*pErrorCode);
}
U_CAPI const UNormalizer2 * U_EXPORT2
unorm2_getNFKDInstance(UErrorCode *pErrorCode) {
return (const UNormalizer2 *)Normalizer2::getNFKDInstance(*pErrorCode);
}
U_CAPI const UNormalizer2 * U_EXPORT2
unorm2_getNFKCCasefoldInstance(UErrorCode *pErrorCode) {
return (const UNormalizer2 *)Normalizer2::getNFKCCasefoldInstance(*pErrorCode);
}
U_CAPI const UNormalizer2 * U_EXPORT2
unorm2_getInstance(const char *packageName,
const char *name,
UNormalization2Mode mode,
UErrorCode *pErrorCode) {
return (const UNormalizer2 *)Normalizer2::getInstance(packageName, name, mode, *pErrorCode);
}
#endif // !UCONFIG_NO_NORMALIZATION

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,341 @@
/*
*******************************************************************************
* Copyright (C) 2014, International Business Machines
* Corporation and others. All Rights Reserved.
*******************************************************************************
* loadednormalizer2impl.h
*
* created on: 2014sep07
* created by: Markus W. Scherer
*/
#ifndef __NORM2ALLMODES_H__
#define __NORM2ALLMODES_H__
#include "unicode/utypes.h"
#if !UCONFIG_NO_NORMALIZATION
#include "unicode/normalizer2.h"
#include "unicode/unistr.h"
#include "cpputils.h"
#include "normalizer2impl.h"
U_NAMESPACE_BEGIN
// Intermediate class:
// Has Normalizer2Impl and does boilerplate argument checking and setup.
class Normalizer2WithImpl : public Normalizer2 {
public:
Normalizer2WithImpl(const Normalizer2Impl &ni) : impl(ni) {}
virtual ~Normalizer2WithImpl();
// normalize
virtual UnicodeString &
normalize(const UnicodeString &src,
UnicodeString &dest,
UErrorCode &errorCode) const {
if(U_FAILURE(errorCode)) {
dest.setToBogus();
return dest;
}
const UChar *sArray=src.getBuffer();
if(&dest==&src || sArray==NULL) {
errorCode=U_ILLEGAL_ARGUMENT_ERROR;
dest.setToBogus();
return dest;
}
dest.remove();
ReorderingBuffer buffer(impl, dest);
if(buffer.init(src.length(), errorCode)) {
normalize(sArray, sArray+src.length(), buffer, errorCode);
}
return dest;
}
virtual void
normalize(const UChar *src, const UChar *limit,
ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0;
// normalize and append
virtual UnicodeString &
normalizeSecondAndAppend(UnicodeString &first,
const UnicodeString &second,
UErrorCode &errorCode) const {
return normalizeSecondAndAppend(first, second, TRUE, errorCode);
}
virtual UnicodeString &
append(UnicodeString &first,
const UnicodeString &second,
UErrorCode &errorCode) const {
return normalizeSecondAndAppend(first, second, FALSE, errorCode);
}
UnicodeString &
normalizeSecondAndAppend(UnicodeString &first,
const UnicodeString &second,
UBool doNormalize,
UErrorCode &errorCode) const {
uprv_checkCanGetBuffer(first, errorCode);
if(U_FAILURE(errorCode)) {
return first;
}
const UChar *secondArray=second.getBuffer();
if(&first==&second || secondArray==NULL) {
errorCode=U_ILLEGAL_ARGUMENT_ERROR;
return first;
}
int32_t firstLength=first.length();
UnicodeString safeMiddle;
{
ReorderingBuffer buffer(impl, first);
if(buffer.init(firstLength+second.length(), errorCode)) {
normalizeAndAppend(secondArray, secondArray+second.length(), doNormalize,
safeMiddle, buffer, errorCode);
}
} // The ReorderingBuffer destructor finalizes the first string.
if(U_FAILURE(errorCode)) {
// Restore the modified suffix of the first string.
first.replace(firstLength-safeMiddle.length(), 0x7fffffff, safeMiddle);
}
return first;
}
virtual void
normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
UnicodeString &safeMiddle,
ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0;
virtual UBool
getDecomposition(UChar32 c, UnicodeString &decomposition) const {
UChar buffer[4];
int32_t length;
const UChar *d=impl.getDecomposition(c, buffer, length);
if(d==NULL) {
return FALSE;
}
if(d==buffer) {
decomposition.setTo(buffer, length); // copy the string (Jamos from Hangul syllable c)
} else {
decomposition.setTo(FALSE, d, length); // read-only alias
}
return TRUE;
}
virtual UBool
getRawDecomposition(UChar32 c, UnicodeString &decomposition) const {
UChar buffer[30];
int32_t length;
const UChar *d=impl.getRawDecomposition(c, buffer, length);
if(d==NULL) {
return FALSE;
}
if(d==buffer) {
decomposition.setTo(buffer, length); // copy the string (algorithmic decomposition)
} else {
decomposition.setTo(FALSE, d, length); // read-only alias
}
return TRUE;
}
virtual UChar32
composePair(UChar32 a, UChar32 b) const {
return impl.composePair(a, b);
}
virtual uint8_t
getCombiningClass(UChar32 c) const {
return impl.getCC(impl.getNorm16(c));
}
// quick checks
virtual UBool
isNormalized(const UnicodeString &s, UErrorCode &errorCode) const {
if(U_FAILURE(errorCode)) {
return FALSE;
}
const UChar *sArray=s.getBuffer();
if(sArray==NULL) {
errorCode=U_ILLEGAL_ARGUMENT_ERROR;
return FALSE;
}
const UChar *sLimit=sArray+s.length();
return sLimit==spanQuickCheckYes(sArray, sLimit, errorCode);
}
virtual UNormalizationCheckResult
quickCheck(const UnicodeString &s, UErrorCode &errorCode) const {
return Normalizer2WithImpl::isNormalized(s, errorCode) ? UNORM_YES : UNORM_NO;
}
virtual int32_t
spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const {
if(U_FAILURE(errorCode)) {
return 0;
}
const UChar *sArray=s.getBuffer();
if(sArray==NULL) {
errorCode=U_ILLEGAL_ARGUMENT_ERROR;
return 0;
}
return (int32_t)(spanQuickCheckYes(sArray, sArray+s.length(), errorCode)-sArray);
}
virtual const UChar *
spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const = 0;
virtual UNormalizationCheckResult getQuickCheck(UChar32) const {
return UNORM_YES;
}
const Normalizer2Impl &impl;
};
class DecomposeNormalizer2 : public Normalizer2WithImpl {
public:
DecomposeNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {}
virtual ~DecomposeNormalizer2();
private:
virtual void
normalize(const UChar *src, const UChar *limit,
ReorderingBuffer &buffer, UErrorCode &errorCode) const {
impl.decompose(src, limit, &buffer, errorCode);
}
using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function.
virtual void
normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
UnicodeString &safeMiddle,
ReorderingBuffer &buffer, UErrorCode &errorCode) const {
impl.decomposeAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode);
}
virtual const UChar *
spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const {
return impl.decompose(src, limit, NULL, errorCode);
}
using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function.
virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const {
return impl.isDecompYes(impl.getNorm16(c)) ? UNORM_YES : UNORM_NO;
}
virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasDecompBoundary(c, TRUE); }
virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasDecompBoundary(c, FALSE); }
virtual UBool isInert(UChar32 c) const { return impl.isDecompInert(c); }
};
class ComposeNormalizer2 : public Normalizer2WithImpl {
public:
ComposeNormalizer2(const Normalizer2Impl &ni, UBool fcc) :
Normalizer2WithImpl(ni), onlyContiguous(fcc) {}
virtual ~ComposeNormalizer2();
private:
virtual void
normalize(const UChar *src, const UChar *limit,
ReorderingBuffer &buffer, UErrorCode &errorCode) const {
impl.compose(src, limit, onlyContiguous, TRUE, buffer, errorCode);
}
using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function.
virtual void
normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
UnicodeString &safeMiddle,
ReorderingBuffer &buffer, UErrorCode &errorCode) const {
impl.composeAndAppend(src, limit, doNormalize, onlyContiguous, safeMiddle, buffer, errorCode);
}
virtual UBool
isNormalized(const UnicodeString &s, UErrorCode &errorCode) const {
if(U_FAILURE(errorCode)) {
return FALSE;
}
const UChar *sArray=s.getBuffer();
if(sArray==NULL) {
errorCode=U_ILLEGAL_ARGUMENT_ERROR;
return FALSE;
}
UnicodeString temp;
ReorderingBuffer buffer(impl, temp);
if(!buffer.init(5, errorCode)) { // small destCapacity for substring normalization
return FALSE;
}
return impl.compose(sArray, sArray+s.length(), onlyContiguous, FALSE, buffer, errorCode);
}
virtual UNormalizationCheckResult
quickCheck(const UnicodeString &s, UErrorCode &errorCode) const {
if(U_FAILURE(errorCode)) {
return UNORM_MAYBE;
}
const UChar *sArray=s.getBuffer();
if(sArray==NULL) {
errorCode=U_ILLEGAL_ARGUMENT_ERROR;
return UNORM_MAYBE;
}
UNormalizationCheckResult qcResult=UNORM_YES;
impl.composeQuickCheck(sArray, sArray+s.length(), onlyContiguous, &qcResult);
return qcResult;
}
virtual const UChar *
spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &) const {
return impl.composeQuickCheck(src, limit, onlyContiguous, NULL);
}
using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function.
virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const {
return impl.getCompQuickCheck(impl.getNorm16(c));
}
virtual UBool hasBoundaryBefore(UChar32 c) const {
return impl.hasCompBoundaryBefore(c);
}
virtual UBool hasBoundaryAfter(UChar32 c) const {
return impl.hasCompBoundaryAfter(c, onlyContiguous, FALSE);
}
virtual UBool isInert(UChar32 c) const {
return impl.hasCompBoundaryAfter(c, onlyContiguous, TRUE);
}
const UBool onlyContiguous;
};
class FCDNormalizer2 : public Normalizer2WithImpl {
public:
FCDNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {}
virtual ~FCDNormalizer2();
private:
virtual void
normalize(const UChar *src, const UChar *limit,
ReorderingBuffer &buffer, UErrorCode &errorCode) const {
impl.makeFCD(src, limit, &buffer, errorCode);
}
using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function.
virtual void
normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
UnicodeString &safeMiddle,
ReorderingBuffer &buffer, UErrorCode &errorCode) const {
impl.makeFCDAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode);
}
virtual const UChar *
spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const {
return impl.makeFCD(src, limit, NULL, errorCode);
}
using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function.
virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasFCDBoundaryBefore(c); }
virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasFCDBoundaryAfter(c); }
virtual UBool isInert(UChar32 c) const { return impl.isFCDInert(c); }
};
struct Norm2AllModes : public UMemory {
Norm2AllModes(Normalizer2Impl *i)
: impl(i), comp(*i, FALSE), decomp(*i), fcd(*i), fcc(*i, TRUE) {}
~Norm2AllModes();
static Norm2AllModes *createInstance(Normalizer2Impl *impl, UErrorCode &errorCode);
static Norm2AllModes *createNFCInstance(UErrorCode &errorCode);
static Norm2AllModes *createInstance(const char *packageName,
const char *name,
UErrorCode &errorCode);
static const Norm2AllModes *getNFCInstance(UErrorCode &errorCode);
static const Norm2AllModes *getNFKCInstance(UErrorCode &errorCode);
static const Norm2AllModes *getNFKC_CFInstance(UErrorCode &errorCode);
Normalizer2Impl *impl;
ComposeNormalizer2 comp;
DecomposeNormalizer2 decomp;
FCDNormalizer2 fcd;
ComposeNormalizer2 fcc;
};
U_NAMESPACE_END
#endif // !UCONFIG_NO_NORMALIZATION
#endif // __NORM2ALLMODES_H__

View file

@ -1,7 +1,7 @@
/*
*******************************************************************************
*
* Copyright (C) 2009-2013, International Business Machines
* Copyright (C) 2009-2014, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
@ -18,17 +18,20 @@
#if !UCONFIG_NO_NORMALIZATION
#include "unicode/localpointer.h"
#include "unicode/normalizer2.h"
#include "unicode/unistr.h"
#include "unicode/unorm.h"
#include "cpputils.h"
#include "cstring.h"
#include "mutex.h"
#include "norm2allmodes.h"
#include "normalizer2impl.h"
#include "uassert.h"
#include "ucln_cmn.h"
#include "uhash.h"
using icu::Normalizer2Impl;
// NFC/NFD data machine-generated by gennorm2 --csource
#include "norm2_nfc_data.h"
U_NAMESPACE_BEGIN
@ -118,470 +121,131 @@ class NoopNormalizer2 : public Normalizer2 {
NoopNormalizer2::~NoopNormalizer2() {}
// Intermediate class:
// Has Normalizer2Impl and does boilerplate argument checking and setup.
class Normalizer2WithImpl : public Normalizer2 {
public:
Normalizer2WithImpl(const Normalizer2Impl &ni) : impl(ni) {}
virtual ~Normalizer2WithImpl();
// normalize
virtual UnicodeString &
normalize(const UnicodeString &src,
UnicodeString &dest,
UErrorCode &errorCode) const {
if(U_FAILURE(errorCode)) {
dest.setToBogus();
return dest;
}
const UChar *sArray=src.getBuffer();
if(&dest==&src || sArray==NULL) {
errorCode=U_ILLEGAL_ARGUMENT_ERROR;
dest.setToBogus();
return dest;
}
dest.remove();
ReorderingBuffer buffer(impl, dest);
if(buffer.init(src.length(), errorCode)) {
normalize(sArray, sArray+src.length(), buffer, errorCode);
}
return dest;
}
virtual void
normalize(const UChar *src, const UChar *limit,
ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0;
// normalize and append
virtual UnicodeString &
normalizeSecondAndAppend(UnicodeString &first,
const UnicodeString &second,
UErrorCode &errorCode) const {
return normalizeSecondAndAppend(first, second, TRUE, errorCode);
}
virtual UnicodeString &
append(UnicodeString &first,
const UnicodeString &second,
UErrorCode &errorCode) const {
return normalizeSecondAndAppend(first, second, FALSE, errorCode);
}
UnicodeString &
normalizeSecondAndAppend(UnicodeString &first,
const UnicodeString &second,
UBool doNormalize,
UErrorCode &errorCode) const {
uprv_checkCanGetBuffer(first, errorCode);
if(U_FAILURE(errorCode)) {
return first;
}
const UChar *secondArray=second.getBuffer();
if(&first==&second || secondArray==NULL) {
errorCode=U_ILLEGAL_ARGUMENT_ERROR;
return first;
}
int32_t firstLength=first.length();
UnicodeString safeMiddle;
{
ReorderingBuffer buffer(impl, first);
if(buffer.init(firstLength+second.length(), errorCode)) {
normalizeAndAppend(secondArray, secondArray+second.length(), doNormalize,
safeMiddle, buffer, errorCode);
}
} // The ReorderingBuffer destructor finalizes the first string.
if(U_FAILURE(errorCode)) {
// Restore the modified suffix of the first string.
first.replace(firstLength-safeMiddle.length(), 0x7fffffff, safeMiddle);
}
return first;
}
virtual void
normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
UnicodeString &safeMiddle,
ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0;
virtual UBool
getDecomposition(UChar32 c, UnicodeString &decomposition) const {
UChar buffer[4];
int32_t length;
const UChar *d=impl.getDecomposition(c, buffer, length);
if(d==NULL) {
return FALSE;
}
if(d==buffer) {
decomposition.setTo(buffer, length); // copy the string (Jamos from Hangul syllable c)
} else {
decomposition.setTo(FALSE, d, length); // read-only alias
}
return TRUE;
}
virtual UBool
getRawDecomposition(UChar32 c, UnicodeString &decomposition) const {
UChar buffer[30];
int32_t length;
const UChar *d=impl.getRawDecomposition(c, buffer, length);
if(d==NULL) {
return FALSE;
}
if(d==buffer) {
decomposition.setTo(buffer, length); // copy the string (algorithmic decomposition)
} else {
decomposition.setTo(FALSE, d, length); // read-only alias
}
return TRUE;
}
virtual UChar32
composePair(UChar32 a, UChar32 b) const {
return impl.composePair(a, b);
}
virtual uint8_t
getCombiningClass(UChar32 c) const {
return impl.getCC(impl.getNorm16(c));
}
// quick checks
virtual UBool
isNormalized(const UnicodeString &s, UErrorCode &errorCode) const {
if(U_FAILURE(errorCode)) {
return FALSE;
}
const UChar *sArray=s.getBuffer();
if(sArray==NULL) {
errorCode=U_ILLEGAL_ARGUMENT_ERROR;
return FALSE;
}
const UChar *sLimit=sArray+s.length();
return sLimit==spanQuickCheckYes(sArray, sLimit, errorCode);
}
virtual UNormalizationCheckResult
quickCheck(const UnicodeString &s, UErrorCode &errorCode) const {
return Normalizer2WithImpl::isNormalized(s, errorCode) ? UNORM_YES : UNORM_NO;
}
virtual int32_t
spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const {
if(U_FAILURE(errorCode)) {
return 0;
}
const UChar *sArray=s.getBuffer();
if(sArray==NULL) {
errorCode=U_ILLEGAL_ARGUMENT_ERROR;
return 0;
}
return (int32_t)(spanQuickCheckYes(sArray, sArray+s.length(), errorCode)-sArray);
}
virtual const UChar *
spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const = 0;
virtual UNormalizationCheckResult getQuickCheck(UChar32) const {
return UNORM_YES;
}
const Normalizer2Impl &impl;
};
Normalizer2WithImpl::~Normalizer2WithImpl() {}
class DecomposeNormalizer2 : public Normalizer2WithImpl {
public:
DecomposeNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {}
virtual ~DecomposeNormalizer2();
private:
virtual void
normalize(const UChar *src, const UChar *limit,
ReorderingBuffer &buffer, UErrorCode &errorCode) const {
impl.decompose(src, limit, &buffer, errorCode);
}
using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function.
virtual void
normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
UnicodeString &safeMiddle,
ReorderingBuffer &buffer, UErrorCode &errorCode) const {
impl.decomposeAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode);
}
virtual const UChar *
spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const {
return impl.decompose(src, limit, NULL, errorCode);
}
using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function.
virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const {
return impl.isDecompYes(impl.getNorm16(c)) ? UNORM_YES : UNORM_NO;
}
virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasDecompBoundary(c, TRUE); }
virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasDecompBoundary(c, FALSE); }
virtual UBool isInert(UChar32 c) const { return impl.isDecompInert(c); }
};
DecomposeNormalizer2::~DecomposeNormalizer2() {}
class ComposeNormalizer2 : public Normalizer2WithImpl {
public:
ComposeNormalizer2(const Normalizer2Impl &ni, UBool fcc) :
Normalizer2WithImpl(ni), onlyContiguous(fcc) {}
virtual ~ComposeNormalizer2();
private:
virtual void
normalize(const UChar *src, const UChar *limit,
ReorderingBuffer &buffer, UErrorCode &errorCode) const {
impl.compose(src, limit, onlyContiguous, TRUE, buffer, errorCode);
}
using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function.
virtual void
normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
UnicodeString &safeMiddle,
ReorderingBuffer &buffer, UErrorCode &errorCode) const {
impl.composeAndAppend(src, limit, doNormalize, onlyContiguous, safeMiddle, buffer, errorCode);
}
virtual UBool
isNormalized(const UnicodeString &s, UErrorCode &errorCode) const {
if(U_FAILURE(errorCode)) {
return FALSE;
}
const UChar *sArray=s.getBuffer();
if(sArray==NULL) {
errorCode=U_ILLEGAL_ARGUMENT_ERROR;
return FALSE;
}
UnicodeString temp;
ReorderingBuffer buffer(impl, temp);
if(!buffer.init(5, errorCode)) { // small destCapacity for substring normalization
return FALSE;
}
return impl.compose(sArray, sArray+s.length(), onlyContiguous, FALSE, buffer, errorCode);
}
virtual UNormalizationCheckResult
quickCheck(const UnicodeString &s, UErrorCode &errorCode) const {
if(U_FAILURE(errorCode)) {
return UNORM_MAYBE;
}
const UChar *sArray=s.getBuffer();
if(sArray==NULL) {
errorCode=U_ILLEGAL_ARGUMENT_ERROR;
return UNORM_MAYBE;
}
UNormalizationCheckResult qcResult=UNORM_YES;
impl.composeQuickCheck(sArray, sArray+s.length(), onlyContiguous, &qcResult);
return qcResult;
}
virtual const UChar *
spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &) const {
return impl.composeQuickCheck(src, limit, onlyContiguous, NULL);
}
using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function.
virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const {
return impl.getCompQuickCheck(impl.getNorm16(c));
}
virtual UBool hasBoundaryBefore(UChar32 c) const {
return impl.hasCompBoundaryBefore(c);
}
virtual UBool hasBoundaryAfter(UChar32 c) const {
return impl.hasCompBoundaryAfter(c, onlyContiguous, FALSE);
}
virtual UBool isInert(UChar32 c) const {
return impl.hasCompBoundaryAfter(c, onlyContiguous, TRUE);
}
const UBool onlyContiguous;
};
ComposeNormalizer2::~ComposeNormalizer2() {}
class FCDNormalizer2 : public Normalizer2WithImpl {
public:
FCDNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {}
virtual ~FCDNormalizer2();
private:
virtual void
normalize(const UChar *src, const UChar *limit,
ReorderingBuffer &buffer, UErrorCode &errorCode) const {
impl.makeFCD(src, limit, &buffer, errorCode);
}
using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function.
virtual void
normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
UnicodeString &safeMiddle,
ReorderingBuffer &buffer, UErrorCode &errorCode) const {
impl.makeFCDAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode);
}
virtual const UChar *
spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const {
return impl.makeFCD(src, limit, NULL, errorCode);
}
using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function.
virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasFCDBoundaryBefore(c); }
virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasFCDBoundaryAfter(c); }
virtual UBool isInert(UChar32 c) const { return impl.isFCDInert(c); }
};
FCDNormalizer2::~FCDNormalizer2() {}
// instance cache ---------------------------------------------------------- ***
struct Norm2AllModes : public UMemory {
static Norm2AllModes *createInstance(const char *packageName,
const char *name,
UErrorCode &errorCode);
Norm2AllModes() : comp(impl, FALSE), decomp(impl), fcd(impl), fcc(impl, TRUE) {}
Normalizer2Impl impl;
ComposeNormalizer2 comp;
DecomposeNormalizer2 decomp;
FCDNormalizer2 fcd;
ComposeNormalizer2 fcc;
};
Norm2AllModes::~Norm2AllModes() {
delete impl;
}
Norm2AllModes *
Norm2AllModes::createInstance(const char *packageName,
const char *name,
UErrorCode &errorCode) {
Norm2AllModes::createInstance(Normalizer2Impl *impl, UErrorCode &errorCode) {
if(U_FAILURE(errorCode)) {
delete impl;
return NULL;
}
Norm2AllModes *allModes=new Norm2AllModes(impl);
if(allModes==NULL) {
errorCode=U_MEMORY_ALLOCATION_ERROR;
delete impl;
return NULL;
}
return allModes;
}
Norm2AllModes *
Norm2AllModes::createNFCInstance(UErrorCode &errorCode) {
if(U_FAILURE(errorCode)) {
return NULL;
}
LocalPointer<Norm2AllModes> allModes(new Norm2AllModes);
if(allModes.isNull()) {
Normalizer2Impl *impl=new Normalizer2Impl;
if(impl==NULL) {
errorCode=U_MEMORY_ALLOCATION_ERROR;
return NULL;
}
allModes->impl.load(packageName, name, errorCode);
return U_SUCCESS(errorCode) ? allModes.orphan() : NULL;
impl->init(norm2_nfc_data_indexes, &norm2_nfc_data_trie,
norm2_nfc_data_extraData, norm2_nfc_data_smallFCD);
return createInstance(impl, errorCode);
}
U_CDECL_BEGIN
static UBool U_CALLCONV uprv_normalizer2_cleanup();
U_CDECL_END
static Norm2AllModes *nfcSingleton;
static Norm2AllModes *nfkcSingleton;
static Norm2AllModes *nfkc_cfSingleton;
static Normalizer2 *noopSingleton;
static UHashtable *cache=NULL;
static icu::UInitOnce nfcInitOnce = U_INITONCE_INITIALIZER;
static icu::UInitOnce nfkcInitOnce = U_INITONCE_INITIALIZER;
static icu::UInitOnce nfkc_cfInitOnce = U_INITONCE_INITIALIZER;
static icu::UInitOnce noopInitOnce = U_INITONCE_INITIALIZER;
// UInitOnce singleton initialization function
static void U_CALLCONV initSingletons(const char *what, UErrorCode &errorCode) {
if (uprv_strcmp(what, "nfc") == 0) {
nfcSingleton = Norm2AllModes::createInstance(NULL, "nfc", errorCode);
} else if (uprv_strcmp(what, "nfkc") == 0) {
nfkcSingleton = Norm2AllModes::createInstance(NULL, "nfkc", errorCode);
} else if (uprv_strcmp(what, "nfkc_cf") == 0) {
nfkc_cfSingleton = Norm2AllModes::createInstance(NULL, "nfkc_cf", errorCode);
} else if (uprv_strcmp(what, "noop") == 0) {
noopSingleton = new NoopNormalizer2;
} else {
U_ASSERT(FALSE); // Unknown singleton
// UInitOnce singleton initialization functions
static void U_CALLCONV initNFCSingleton(UErrorCode &errorCode) {
nfcSingleton=Norm2AllModes::createNFCInstance(errorCode);
ucln_common_registerCleanup(UCLN_COMMON_NORMALIZER2, uprv_normalizer2_cleanup);
}
static void U_CALLCONV initNoopSingleton(UErrorCode &errorCode) {
if(U_FAILURE(errorCode)) {
return;
}
noopSingleton=new NoopNormalizer2;
if(noopSingleton==NULL) {
errorCode=U_MEMORY_ALLOCATION_ERROR;
return;
}
ucln_common_registerCleanup(UCLN_COMMON_NORMALIZER2, uprv_normalizer2_cleanup);
}
U_CDECL_BEGIN
static void U_CALLCONV deleteNorm2AllModes(void *allModes) {
delete (Norm2AllModes *)allModes;
}
static UBool U_CALLCONV uprv_normalizer2_cleanup() {
delete nfcSingleton;
nfcSingleton = NULL;
delete nfkcSingleton;
nfkcSingleton = NULL;
delete nfkc_cfSingleton;
nfkc_cfSingleton = NULL;
delete noopSingleton;
noopSingleton = NULL;
uhash_close(cache);
cache=NULL;
nfcInitOnce.reset();
nfkcInitOnce.reset();
nfkc_cfInitOnce.reset();
noopInitOnce.reset();
return TRUE;
}
U_CDECL_END
const Normalizer2 *Normalizer2Factory::getNFCInstance(UErrorCode &errorCode) {
umtx_initOnce(nfcInitOnce, &initSingletons, "nfc", errorCode);
return nfcSingleton!=NULL ? &nfcSingleton->comp : NULL;
}
const Normalizer2 *Normalizer2Factory::getNFDInstance(UErrorCode &errorCode) {
umtx_initOnce(nfcInitOnce, &initSingletons, "nfc", errorCode);
return nfcSingleton!=NULL ? &nfcSingleton->decomp : NULL;
}
const Normalizer2 *Normalizer2Factory::getFCDInstance(UErrorCode &errorCode) {
umtx_initOnce(nfcInitOnce, &initSingletons, "nfc", errorCode);
return nfcSingleton!=NULL ? &nfcSingleton->fcd : NULL;
}
const Normalizer2 *Normalizer2Factory::getFCCInstance(UErrorCode &errorCode) {
umtx_initOnce(nfcInitOnce, &initSingletons, "nfc", errorCode);
return nfcSingleton!=NULL ? &nfcSingleton->fcc : NULL;
}
const Normalizer2 *Normalizer2Factory::getNFKCInstance(UErrorCode &errorCode) {
umtx_initOnce(nfkcInitOnce, &initSingletons, "nfkc", errorCode);
return nfkcSingleton!=NULL ? &nfkcSingleton->comp : NULL;
}
const Normalizer2 *Normalizer2Factory::getNFKDInstance(UErrorCode &errorCode) {
umtx_initOnce(nfkcInitOnce, &initSingletons, "nfkc", errorCode);
return nfkcSingleton!=NULL ? &nfkcSingleton->decomp : NULL;
}
const Normalizer2 *Normalizer2Factory::getNFKC_CFInstance(UErrorCode &errorCode) {
umtx_initOnce(nfkc_cfInitOnce, &initSingletons, "nfkc_cf", errorCode);
return nfkc_cfSingleton!=NULL ? &nfkc_cfSingleton->comp : NULL;
}
const Normalizer2 *Normalizer2Factory::getNoopInstance(UErrorCode &errorCode) {
umtx_initOnce(noopInitOnce, &initSingletons, "noop", errorCode);
return noopSingleton;
const Norm2AllModes *
Norm2AllModes::getNFCInstance(UErrorCode &errorCode) {
if(U_FAILURE(errorCode)) { return NULL; }
umtx_initOnce(nfcInitOnce, &initNFCSingleton, errorCode);
return nfcSingleton;
}
const Normalizer2 *
Normalizer2Factory::getInstance(UNormalizationMode mode, UErrorCode &errorCode) {
if(U_FAILURE(errorCode)) {
return NULL;
}
switch(mode) {
case UNORM_NFD:
return getNFDInstance(errorCode);
case UNORM_NFKD:
return getNFKDInstance(errorCode);
case UNORM_NFC:
return getNFCInstance(errorCode);
case UNORM_NFKC:
return getNFKCInstance(errorCode);
case UNORM_FCD:
return getFCDInstance(errorCode);
default: // UNORM_NONE
return getNoopInstance(errorCode);
}
Normalizer2::getNFCInstance(UErrorCode &errorCode) {
const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode);
return allModes!=NULL ? &allModes->comp : NULL;
}
const Normalizer2 *
Normalizer2::getNFDInstance(UErrorCode &errorCode) {
const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode);
return allModes!=NULL ? &allModes->decomp : NULL;
}
const Normalizer2 *Normalizer2Factory::getFCDInstance(UErrorCode &errorCode) {
const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode);
return allModes!=NULL ? &allModes->fcd : NULL;
}
const Normalizer2 *Normalizer2Factory::getFCCInstance(UErrorCode &errorCode) {
const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode);
return allModes!=NULL ? &allModes->fcc : NULL;
}
const Normalizer2 *Normalizer2Factory::getNoopInstance(UErrorCode &errorCode) {
if(U_FAILURE(errorCode)) { return NULL; }
umtx_initOnce(noopInitOnce, &initNoopSingleton, errorCode);
return noopSingleton;
}
const Normalizer2Impl *
Normalizer2Factory::getNFCImpl(UErrorCode &errorCode) {
umtx_initOnce(nfcInitOnce, &initSingletons, "nfc", errorCode);
return nfcSingleton!=NULL ? &nfcSingleton->impl : NULL;
}
const Normalizer2Impl *
Normalizer2Factory::getNFKCImpl(UErrorCode &errorCode) {
umtx_initOnce(nfkcInitOnce, &initSingletons, "nfkc", errorCode);
return nfkcSingleton!=NULL ? &nfkcSingleton->impl : NULL;
}
const Normalizer2Impl *
Normalizer2Factory::getNFKC_CFImpl(UErrorCode &errorCode) {
umtx_initOnce(nfkc_cfInitOnce, &initSingletons, "nfkc_cf", errorCode);
return nfkc_cfSingleton!=NULL ? &nfkc_cfSingleton->impl : NULL;
const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode);
return allModes!=NULL ? allModes->impl : NULL;
}
const Normalizer2Impl *
@ -589,110 +253,6 @@ Normalizer2Factory::getImpl(const Normalizer2 *norm2) {
return &((Normalizer2WithImpl *)norm2)->impl;
}
const Normalizer2 *
Normalizer2::getNFCInstance(UErrorCode &errorCode) {
return Normalizer2Factory::getNFCInstance(errorCode);
}
const Normalizer2 *
Normalizer2::getNFDInstance(UErrorCode &errorCode) {
return Normalizer2Factory::getNFDInstance(errorCode);
}
const Normalizer2 *
Normalizer2::getNFKCInstance(UErrorCode &errorCode) {
return Normalizer2Factory::getNFKCInstance(errorCode);
}
const Normalizer2 *
Normalizer2::getNFKDInstance(UErrorCode &errorCode) {
return Normalizer2Factory::getNFKDInstance(errorCode);
}
const Normalizer2 *
Normalizer2::getNFKCCasefoldInstance(UErrorCode &errorCode) {
return Normalizer2Factory::getNFKC_CFInstance(errorCode);
}
const Normalizer2 *
Normalizer2::getInstance(const char *packageName,
const char *name,
UNormalization2Mode mode,
UErrorCode &errorCode) {
if(U_FAILURE(errorCode)) {
return NULL;
}
if(name==NULL || *name==0) {
errorCode=U_ILLEGAL_ARGUMENT_ERROR;
return NULL;
}
Norm2AllModes *allModes=NULL;
if(packageName==NULL) {
if(0==uprv_strcmp(name, "nfc")) {
umtx_initOnce(nfcInitOnce, &initSingletons, "nfc", errorCode);
allModes=nfcSingleton;
} else if(0==uprv_strcmp(name, "nfkc")) {
umtx_initOnce(nfkcInitOnce, &initSingletons, "nfkc", errorCode);
allModes=nfkcSingleton;
} else if(0==uprv_strcmp(name, "nfkc_cf")) {
umtx_initOnce(nfkc_cfInitOnce, &initSingletons, "nfkc_cf", errorCode);
allModes=nfkc_cfSingleton;
}
}
if(allModes==NULL && U_SUCCESS(errorCode)) {
{
Mutex lock;
if(cache!=NULL) {
allModes=(Norm2AllModes *)uhash_get(cache, name);
}
}
if(allModes==NULL) {
LocalPointer<Norm2AllModes> localAllModes(
Norm2AllModes::createInstance(packageName, name, errorCode));
if(U_SUCCESS(errorCode)) {
Mutex lock;
if(cache==NULL) {
cache=uhash_open(uhash_hashChars, uhash_compareChars, NULL, &errorCode);
if(U_FAILURE(errorCode)) {
return NULL;
}
uhash_setKeyDeleter(cache, uprv_free);
uhash_setValueDeleter(cache, deleteNorm2AllModes);
}
void *temp=uhash_get(cache, name);
if(temp==NULL) {
int32_t keyLength=uprv_strlen(name)+1;
char *nameCopy=(char *)uprv_malloc(keyLength);
if(nameCopy==NULL) {
errorCode=U_MEMORY_ALLOCATION_ERROR;
return NULL;
}
uprv_memcpy(nameCopy, name, keyLength);
uhash_put(cache, nameCopy, allModes=localAllModes.orphan(), &errorCode);
} else {
// race condition
allModes=(Norm2AllModes *)temp;
}
}
}
}
if(allModes!=NULL && U_SUCCESS(errorCode)) {
switch(mode) {
case UNORM2_COMPOSE:
return &allModes->comp;
case UNORM2_DECOMPOSE:
return &allModes->decomp;
case UNORM2_FCD:
return &allModes->fcd;
case UNORM2_COMPOSE_CONTIGUOUS:
return &allModes->fcc;
default:
break; // do nothing
}
}
return NULL;
}
U_NAMESPACE_END
// C API ------------------------------------------------------------------- ***
@ -709,29 +269,6 @@ unorm2_getNFDInstance(UErrorCode *pErrorCode) {
return (const UNormalizer2 *)Normalizer2::getNFDInstance(*pErrorCode);
}
U_CAPI const UNormalizer2 * U_EXPORT2
unorm2_getNFKCInstance(UErrorCode *pErrorCode) {
return (const UNormalizer2 *)Normalizer2::getNFKCInstance(*pErrorCode);
}
U_CAPI const UNormalizer2 * U_EXPORT2
unorm2_getNFKDInstance(UErrorCode *pErrorCode) {
return (const UNormalizer2 *)Normalizer2::getNFKDInstance(*pErrorCode);
}
U_CAPI const UNormalizer2 * U_EXPORT2
unorm2_getNFKCCasefoldInstance(UErrorCode *pErrorCode) {
return (const UNormalizer2 *)Normalizer2::getNFKCCasefoldInstance(*pErrorCode);
}
U_CAPI const UNormalizer2 * U_EXPORT2
unorm2_getInstance(const char *packageName,
const char *name,
UNormalization2Mode mode,
UErrorCode *pErrorCode) {
return (const UNormalizer2 *)Normalizer2::getInstance(packageName, name, mode, *pErrorCode);
}
U_CAPI void U_EXPORT2
unorm2_close(UNormalizer2 *norm2) {
delete (Normalizer2 *)norm2;
@ -962,7 +499,7 @@ unorm2_isInert(const UNormalizer2 *norm2, UChar32 c) {
U_CAPI uint8_t U_EXPORT2
u_getCombiningClass(UChar32 c) {
UErrorCode errorCode=U_ZERO_ERROR;
const Normalizer2 *nfd=Normalizer2Factory::getNFDInstance(errorCode);
const Normalizer2 *nfd=Normalizer2::getNFDInstance(errorCode);
if(U_SUCCESS(errorCode)) {
return nfd->getCombiningClass(c);
} else {

View file

@ -253,50 +253,12 @@ struct CanonIterData : public UMemory {
};
Normalizer2Impl::~Normalizer2Impl() {
udata_close(memory);
utrie2_close(normTrie);
delete fCanonIterData;
}
UBool U_CALLCONV
Normalizer2Impl::isAcceptable(void *context,
const char * /* type */, const char * /*name*/,
const UDataInfo *pInfo) {
if(
pInfo->size>=20 &&
pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
pInfo->charsetFamily==U_CHARSET_FAMILY &&
pInfo->dataFormat[0]==0x4e && /* dataFormat="Nrm2" */
pInfo->dataFormat[1]==0x72 &&
pInfo->dataFormat[2]==0x6d &&
pInfo->dataFormat[3]==0x32 &&
pInfo->formatVersion[0]==2
) {
Normalizer2Impl *me=(Normalizer2Impl *)context;
uprv_memcpy(me->dataVersion, pInfo->dataVersion, 4);
return TRUE;
} else {
return FALSE;
}
}
void
Normalizer2Impl::load(const char *packageName, const char *name, UErrorCode &errorCode) {
if(U_FAILURE(errorCode)) {
return;
}
memory=udata_openChoice(packageName, "nrm", name, isAcceptable, this, &errorCode);
if(U_FAILURE(errorCode)) {
return;
}
const uint8_t *inBytes=(const uint8_t *)udata_getMemory(memory);
const int32_t *inIndexes=(const int32_t *)inBytes;
int32_t indexesLength=inIndexes[IX_NORM_TRIE_OFFSET]/4;
if(indexesLength<=IX_MIN_MAYBE_YES) {
errorCode=U_INVALID_FORMAT_ERROR; // Not enough indexes.
return;
}
Normalizer2Impl::init(const int32_t *inIndexes, const UTrie2 *inTrie,
const uint16_t *inExtraData, const uint8_t *inSmallFCD) {
minDecompNoCP=inIndexes[IX_MIN_DECOMP_NO_CP];
minCompNoMaybeCP=inIndexes[IX_MIN_COMP_NO_MAYBE_CP];
@ -306,23 +268,12 @@ Normalizer2Impl::load(const char *packageName, const char *name, UErrorCode &err
limitNoNo=inIndexes[IX_LIMIT_NO_NO];
minMaybeYes=inIndexes[IX_MIN_MAYBE_YES];
int32_t offset=inIndexes[IX_NORM_TRIE_OFFSET];
int32_t nextOffset=inIndexes[IX_EXTRA_DATA_OFFSET];
normTrie=utrie2_openFromSerialized(UTRIE2_16_VALUE_BITS,
inBytes+offset, nextOffset-offset, NULL,
&errorCode);
if(U_FAILURE(errorCode)) {
return;
}
normTrie=inTrie;
offset=nextOffset;
nextOffset=inIndexes[IX_SMALL_FCD_OFFSET];
maybeYesCompositions=(const uint16_t *)(inBytes+offset);
maybeYesCompositions=inExtraData;
extraData=maybeYesCompositions+(MIN_NORMAL_MAYBE_YES-minMaybeYes);
// smallFCD: new in formatVersion 2
offset=nextOffset;
smallFCD=inBytes+offset;
smallFCD=inSmallFCD;
// Build tccc180[].
// gennorm2 enforces lccc=0 for c<MIN_CCC_LCCC_CP=U+0300.

View file

@ -22,7 +22,6 @@
#if !UCONFIG_NO_NORMALIZATION
#include "unicode/normalizer2.h"
#include "unicode/udata.h"
#include "unicode/unistr.h"
#include "unicode/unorm.h"
#include "unicode/utf16.h"
@ -217,14 +216,15 @@ private:
UChar *codePointStart, *codePointLimit;
};
class U_COMMON_API Normalizer2Impl : public UMemory {
class U_COMMON_API Normalizer2Impl : public UObject {
public:
Normalizer2Impl() : memory(NULL), normTrie(NULL), fCanonIterData(NULL) {
Normalizer2Impl() : normTrie(NULL), fCanonIterData(NULL) {
fCanonIterDataInitOnce.reset();
}
~Normalizer2Impl();
virtual ~Normalizer2Impl();
void load(const char *packageName, const char *name, UErrorCode &errorCode);
void init(const int32_t *inIndexes, const UTrie2 *inTrie,
const uint16_t *inExtraData, const uint8_t *inSmallFCD);
void addLcccChars(UnicodeSet &set) const;
void addPropertyStarts(const USetAdder *sa, UErrorCode &errorCode) const;
@ -478,9 +478,6 @@ public:
}
UBool isFCDInert(UChar32 c) const { return getFCD16(c)<=1; }
private:
static UBool U_CALLCONV
isAcceptable(void *context, const char *type, const char *name, const UDataInfo *pInfo);
UBool isMaybe(uint16_t norm16) const { return minMaybeYes<=norm16 && norm16<=JAMO_VT; }
UBool isMaybeOrNonZeroCC(uint16_t norm16) const { return norm16>=minMaybeYes; }
static UBool isInert(uint16_t norm16) { return norm16==0; }
@ -584,8 +581,7 @@ private:
int32_t getCanonValue(UChar32 c) const;
const UnicodeSet &getCanonStartSet(int32_t n) const;
UDataMemory *memory;
UVersionInfo dataVersion;
// UVersionInfo dataVersion;
// Code point thresholds for quick check codes.
UChar32 minDecompNoCP;
@ -598,13 +594,13 @@ private:
uint16_t limitNoNo;
uint16_t minMaybeYes;
UTrie2 *normTrie;
const UTrie2 *normTrie;
const uint16_t *maybeYesCompositions;
const uint16_t *extraData; // mappings and/or compositions for yesYes, yesNo & noNo characters
const uint8_t *smallFCD; // [0x100] one bit per 32 BMP code points, set if any FCD!=0
uint8_t tccc180[0x180]; // tccc values for U+0000..U+017F
public: // CanonIterData is public to allow access from C callback functions.
public: // CanonIterData is public to allow access from C callback functions.
UInitOnce fCanonIterDataInitOnce;
CanonIterData *fCanonIterData;
};
@ -620,13 +616,8 @@ private:
*/
class U_COMMON_API Normalizer2Factory {
public:
static const Normalizer2 *getNFCInstance(UErrorCode &errorCode);
static const Normalizer2 *getNFDInstance(UErrorCode &errorCode);
static const Normalizer2 *getFCDInstance(UErrorCode &errorCode);
static const Normalizer2 *getFCCInstance(UErrorCode &errorCode);
static const Normalizer2 *getNFKCInstance(UErrorCode &errorCode);
static const Normalizer2 *getNFKDInstance(UErrorCode &errorCode);
static const Normalizer2 *getNFKC_CFInstance(UErrorCode &errorCode);
static const Normalizer2 *getNoopInstance(UErrorCode &errorCode);
static const Normalizer2 *getInstance(UNormalizationMode mode, UErrorCode &errorCode);

View file

@ -41,6 +41,7 @@ typedef enum ECleanupCommonType {
UCLN_COMMON_LOCALE,
UCLN_COMMON_LOCALE_AVAILABLE,
UCLN_COMMON_ULOC,
UCLN_COMMON_LOADED_NORMALIZER2,
UCLN_COMMON_NORMALIZER2,
UCLN_COMMON_USET,
UCLN_COMMON_UNAMES,

View file

@ -603,7 +603,7 @@ unorm_compare(const UChar *s1, int32_t length1,
if(!(options&UNORM_INPUT_IS_FCD) || (options&U_FOLD_CASE_EXCLUDE_SPECIAL_I)) {
const Normalizer2 *n2;
if(options&U_FOLD_CASE_EXCLUDE_SPECIAL_I) {
n2=Normalizer2Factory::getNFDInstance(*pErrorCode);
n2=Normalizer2::getNFDInstance(*pErrorCode);
} else {
n2=Normalizer2Factory::getFCDInstance(*pErrorCode);
}

View file

@ -106,7 +106,7 @@ static UBool changesWhenCasefolded(const BinaryProperty &, UChar32, UProperty) {
static UBool changesWhenCasefolded(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
UnicodeString nfd;
UErrorCode errorCode=U_ZERO_ERROR;
const Normalizer2 *nfcNorm2=Normalizer2Factory::getNFCInstance(errorCode);
const Normalizer2 *nfcNorm2=Normalizer2::getNFCInstance(errorCode);
if(U_FAILURE(errorCode)) {
return FALSE;
}
@ -569,7 +569,7 @@ u_getFC_NFKC_Closure(UChar32 c, UChar *dest, int32_t destCapacity, UErrorCode *p
// (What could be useful is a custom normalization table that combines
// case folding and NFKC.)
// For the derivation, see Unicode's DerivedNormalizationProps.txt.
const Normalizer2 *nfkc=Normalizer2Factory::getNFKCInstance(*pErrorCode);
const Normalizer2 *nfkc=Normalizer2::getNFKCInstance(*pErrorCode);
const UCaseProps *csp=ucase_getSingleton();
if(U_FAILURE(*pErrorCode)) {
return 0;

View file

@ -243,7 +243,7 @@ package390: $(OUTTMPDIR)/icudata390.lst $(PKGDATA_LIST) ./icupkg.inc packagedata
# 2010-dec Removed pnames.icu.
# These are now hardcoded in ICU4C and only loaded in ICU4J.
#
DAT_FILES_SHORT=unames.icu cnvalias.icu coll/ucadata.icu nfc.nrm nfkc.nrm nfkc_cf.nrm uts46.nrm
DAT_FILES_SHORT=unames.icu cnvalias.icu coll/ucadata.icu nfkc.nrm nfkc_cf.nrm uts46.nrm
DAT_FILES=$(DAT_FILES_SHORT:%=$(BUILDDIR)/%)
## BRK files
@ -431,7 +431,7 @@ COLL_FILES_LIST=$(COLLATION_FILES_SHORT) $(COLLATION_INDEX_RES_SHORT)
BRK_FILES_LIST=$(BRK_FILES_SHORT) $(BRK_RES_FILES_SHORT) $(BRK_RES_INDEX_RES_SHORT) $(DICT_FILES_SHORT)
LOCALE_FILES_LIST= $(RES_FILES_SHORT) $(LANG_FILES_SHORT) $(REGION_FILES_SHORT) $(ZONE_FILES_SHORT)
MISC_FILES_LIST=$(DAT_FILES_SHORT) $(CNV_FILES_SHORT) $(CNV_FILES_SHORT_SPECIAL) $(CURR_FILES_SHORT) $(RBNF_FILES_SHORT) $(RBNF_INDEX_RES_SHORT) $(TRANSLIT_FILES_SHORT) $(SPREP_FILES_SHORT) $(CFU_FILES_SHORT)
UNI_CORE_DATA=pnames.icu uprops.icu ucase.icu ubidi.icu
UNI_CORE_DATA=pnames.icu uprops.icu ucase.icu ubidi.icu nfc.nrm
UNI_CORE_TARGET_DATA=$(UNI_CORE_DATA:%=$(BUILDDIR)/%)
ifneq ($(INCLUDE_UNI_CORE_DATA),)
@ -815,7 +815,7 @@ ICU4J_TZDATA_PATHS=$(ICU4J_TZDATA_FILES:%="$(ICU4J_DATA_DIRNAME)/%.res")
generate-data: build-dir packagedata $(OUTTMPDIR)/$(ICUDATA_PLATFORM_NAME).dat uni-core-data
mkdir -p $(OUTDIR)/icu4j/$(ICU4J_DATA_DIRNAME)
mkdir -p $(OUTDIR)/icu4j/tzdata/$(ICU4J_DATA_DIRNAME)
echo pnames.icu ubidi.icu ucase.icu uprops.icu > $(OUTDIR)/icu4j/add.txt
echo $(UNI_CORE_DATA) > $(OUTDIR)/icu4j/add.txt
$(INVOKE) $(TOOLBINDIR)/icupkg $(OUTTMPDIR)/$(ICUDATA_PLATFORM_NAME).dat $(OUTDIR)/icu4j/$(ICUDATA_BASENAME_VERSION)b.dat -a $(OUTDIR)/icu4j/add.txt -s $(BUILDDIR) -x '*' -tb -d $(OUTDIR)/icu4j/$(ICU4J_DATA_DIRNAME)
mv $(ICU4J_TZDATA_PATHS:%=$(OUTDIR)/icu4j/%) "$(OUTDIR)/icu4j/tzdata/$(ICU4J_DATA_DIRNAME)"

View file

@ -489,7 +489,7 @@ ALL : GODATA "$(ICU_LIB_TARGET)" "$(TESTDATAOUT)\testdata.dat"
# 2010-dec Removed pnames.icu.
# Command line:
# C:\svn\icuproj\icu\trunk\source\data>nmake -f makedata.mak ICUMAKE=C:\svn\icuproj\icu\trunk\source\data\ CFG=x86\Debug uni-core-data
uni-core-data: GODATA "$(ICUBLD_PKG)\pnames.icu" "$(ICUBLD_PKG)\uprops.icu" "$(ICUBLD_PKG)\ucase.icu" "$(ICUBLD_PKG)\ubidi.icu"
uni-core-data: GODATA "$(ICUBLD_PKG)\pnames.icu" "$(ICUBLD_PKG)\uprops.icu" "$(ICUBLD_PKG)\ucase.icu" "$(ICUBLD_PKG)\ubidi.icu" "$(ICUBLD_PKG)\nfc.nrm"
@echo Unicode .icu files built to "$(ICUBLD_PKG)"
# Build the ICU4J icudata.jar and testdata.jar.
@ -502,7 +502,7 @@ ICU4J_TZDATA_PATHS=$(ICU4J_DATA_DIRNAME)\zoneinfo64.res $(ICU4J_DATA_DIRNAME)\me
generate-data: GODATA "$(ICUOUT)\$(ICUPKG).dat" uni-core-data
if not exist "$(ICUOUT)\icu4j\$(ICU4J_DATA_DIRNAME)" mkdir "$(ICUOUT)\icu4j\$(ICU4J_DATA_DIRNAME)"
if not exist "$(ICUOUT)\icu4j\tzdata\$(ICU4J_DATA_DIRNAME)" mkdir "$(ICUOUT)\icu4j\tzdata\$(ICU4J_DATA_DIRNAME)"
echo pnames.icu ubidi.icu ucase.icu uprops.icu > "$(ICUOUT)\icu4j\add.txt"
echo pnames.icu ubidi.icu ucase.icu uprops.icu nfc.nrm > "$(ICUOUT)\icu4j\add.txt"
"$(ICUPBIN)\icupkg" "$(ICUOUT)\$(ICUPKG).dat" "$(ICUOUT)\icu4j\$(U_ICUDATA_NAME)b.dat" -a "$(ICUOUT)\icu4j\add.txt" -s "$(ICUBLD_PKG)" -x * -tb -d "$(ICUOUT)\icu4j\$(ICU4J_DATA_DIRNAME)"
@for %f in ($(ICU4J_TZDATA_PATHS)) do @move "$(ICUOUT)\icu4j\%f" "$(ICUOUT)\icu4j\tzdata\$(ICU4J_DATA_DIRNAME)"
@ -602,7 +602,7 @@ icu4j-data-install :
copy "$(ICUTMP)\$(ICUPKG).dat" "$(ICUOUT)\$(U_ICUDATA_NAME)$(U_ICUDATA_ENDIAN_SUFFIX).dat"
-@erase "$(ICUTMP)\$(ICUPKG).dat"
!ELSE
"$(ICU_LIB_TARGET)" : $(COMMON_ICUDATA_DEPENDENCIES) $(CNV_FILES) $(CNV_FILES_SPECIAL) "$(ICUBLD_PKG)\unames.icu" "$(ICUBLD_PKG)\cnvalias.icu" "$(ICUBLD_PKG)\nfc.nrm" "$(ICUBLD_PKG)\nfkc.nrm" "$(ICUBLD_PKG)\nfkc_cf.nrm" "$(ICUBLD_PKG)\uts46.nrm" "$(ICUBLD_PKG)\$(ICUCOL)\ucadata.icu" $(CURR_RES_FILES) $(LANG_RES_FILES) $(REGION_RES_FILES) $(ZONE_RES_FILES) $(BRK_FILES) $(BRK_DICT_FILES) $(BRK_RES_FILES) $(ALL_RES) $(COL_COL_FILES) $(RBNF_RES_FILES) $(TRANSLIT_RES_FILES) $(SPREP_FILES) "$(ICUBLD_PKG)\confusables.cfu"
"$(ICU_LIB_TARGET)" : $(COMMON_ICUDATA_DEPENDENCIES) $(CNV_FILES) $(CNV_FILES_SPECIAL) "$(ICUBLD_PKG)\unames.icu" "$(ICUBLD_PKG)\cnvalias.icu" "$(ICUBLD_PKG)\nfkc.nrm" "$(ICUBLD_PKG)\nfkc_cf.nrm" "$(ICUBLD_PKG)\uts46.nrm" "$(ICUBLD_PKG)\$(ICUCOL)\ucadata.icu" $(CURR_RES_FILES) $(LANG_RES_FILES) $(REGION_RES_FILES) $(ZONE_RES_FILES) $(BRK_FILES) $(BRK_DICT_FILES) $(BRK_RES_FILES) $(ALL_RES) $(COL_COL_FILES) $(RBNF_RES_FILES) $(TRANSLIT_RES_FILES) $(SPREP_FILES) "$(ICUBLD_PKG)\confusables.cfu"
@echo Building icu data
cd "$(ICUBLD_PKG)"
"$(ICUPBIN)\pkgdata" $(COMMON_ICUDATA_ARGUMENTS) <<"$(ICUTMP)\icudata.lst"
@ -610,7 +610,6 @@ unames.icu
confusables.cfu
$(ICUCOL)\ucadata.icu
cnvalias.icu
nfc.nrm
nfkc.nrm
nfkc_cf.nrm
uts46.nrm
@ -1006,10 +1005,9 @@ $(UCM_SOURCE_SPECIAL): {"$(ICUTOOLS)\makeconv\$(CFG)"}makeconv.exe
# This used to depend on "$(ICUBLD_PKG)\uprops.icu" "$(ICUBLD_PKG)\ucase.icu" "$(ICUBLD_PKG)\ubidi.icu"
# This data is now hard coded as a part of the library.
# See Jitterbug 4497 for details.
$(MISC_SOURCE) $(RB_FILES) $(CURR_FILES) $(LANG_FILES) $(REGION_FILES) $(ZONE_FILES) $(COL_COL_FILES) $(RBNF_RES_FILES) $(BRK_RES_FILES) $(TRANSLIT_RES_FILES): {"$(ICUTOOLS)\genrb\$(CFG)"}genrb.exe "$(ICUBLD_PKG)\nfc.nrm" "$(ICUBLD_PKG)\$(ICUCOL)\ucadata.icu"
$(MISC_SOURCE) $(RB_FILES) $(CURR_FILES) $(LANG_FILES) $(REGION_FILES) $(ZONE_FILES) $(COL_COL_FILES) $(RBNF_RES_FILES) $(BRK_RES_FILES) $(TRANSLIT_RES_FILES): {"$(ICUTOOLS)\genrb\$(CFG)"}genrb.exe "$(ICUBLD_PKG)\$(ICUCOL)\ucadata.icu"
# This used to depend on "$(ICUBLD_PKG)\pnames.icu" "$(ICUBLD_PKG)\uprops.icu" "$(ICUBLD_PKG)\ucase.icu" "$(ICUBLD_PKG)\ubidi.icu"
# These are now hardcoded in ICU4C and only loaded in ICU4J.
$(BRK_SOURCE) : "$(ICUBLD_PKG)\unames.icu" "$(ICUBLD_PKG)\nfc.nrm"
$(BRK_SOURCE) : "$(ICUBLD_PKG)\unames.icu"
!ENDIF

View file

@ -236,13 +236,15 @@ http://www.unicode.org/reports/tr44/tr44-13.html
~/svn.icutools/trunk/src/unicode$ py/parsescriptmetadata.py $ICU_SRC_DIR/source/common/unicode/uscript.h ~/svn.cldr/trunk/common/properties/scriptMetadata.txt
* generate normalization data files
- ~/svn.icu/uni70/dbg$ export LD_LIBRARY_PATH=~/svn.icu/uni70/dbg/lib
- ~/svn.icu/uni70/dbg$ SRC_DATA_IN=$ICU_SRC_DIR/source/data/in
- ~/svn.icu/uni70/dbg$ UNIDATA=$ICU_SRC_DIR/source/data/unidata
- ~/svn.icu/uni70/dbg$ bin/gennorm2 -o $SRC_DATA_IN/nfc.nrm -s $UNIDATA/norm2 nfc.txt
- ~/svn.icu/uni70/dbg$ bin/gennorm2 -o $SRC_DATA_IN/nfkc.nrm -s $UNIDATA/norm2 nfc.txt nfkc.txt
- ~/svn.icu/uni70/dbg$ bin/gennorm2 -o $SRC_DATA_IN/nfkc_cf.nrm -s $UNIDATA/norm2 nfc.txt nfkc.txt nfkc_cf.txt
- ~/svn.icu/uni70/dbg$ bin/gennorm2 -o $SRC_DATA_IN/uts46.nrm -s $UNIDATA/norm2 nfc.txt uts46.txt
- cd $ICU_ROOT/dbg
- export LD_LIBRARY_PATH=$ICU_ROOT/dbg/lib
- SRC_DATA_IN=$ICU_SRC_DIR/source/data/in
- UNIDATA=$ICU_SRC_DIR/source/data/unidata
- bin/gennorm2 -o $ICU_SRC_DIR/source/common/norm2_nfc_data.h -s $UNIDATA/norm2 nfc.txt --csource
- bin/gennorm2 -o $SRC_DATA_IN/nfc.nrm -s $UNIDATA/norm2 nfc.txt
- bin/gennorm2 -o $SRC_DATA_IN/nfkc.nrm -s $UNIDATA/norm2 nfc.txt nfkc.txt
- bin/gennorm2 -o $SRC_DATA_IN/nfkc_cf.nrm -s $UNIDATA/norm2 nfc.txt nfkc.txt nfkc_cf.txt
- bin/gennorm2 -o $SRC_DATA_IN/uts46.nrm -s $UNIDATA/norm2 nfc.txt uts46.txt
* build ICU (make install)
so that the tools build can pick up the new definitions from the installed header files.

View file

@ -2685,7 +2685,7 @@ U_CAPI UStringSearch * U_EXPORT2 usearch_openFromCollator(
UCOL_SHIFTED;
result->variableTop = ucol_getVariableTop(collator, status);
result->nfd = Normalizer2Factory::getNFDInstance(*status);
result->nfd = Normalizer2::getNFDInstance(*status);
if (U_FAILURE(*status)) {
uprv_free(result);

View file

@ -1368,14 +1368,14 @@ static const struct {
* to testdata) for code coverage in tests.
* See Jitterbug 4497.
*
* ICU4C 4.4 adds normalization data files again, e.g., nfc.nrm.
* ICU4C 4.4 adds normalization data files again, e.g., nfkc.nrm.
*/
{"uprops", "icu", uprops_swap},
{"ucase", "icu", ucase_swap},
{"ubidi", "icu", ubidi_swap},
#endif
#if !UCONFIG_NO_NORMALIZATION && !UCONFIG_ONLY_COLLATION
{"nfc", "nrm", unorm2_swap},
{"nfkc", "nrm", unorm2_swap},
{"confusables", "cfu", uspoof_swap},
#endif
{"unames", "icu", uchar_swapNames}

View file

@ -1729,7 +1729,7 @@ void CollationTest::TestDataDriven() {
IcuTestErrorCode errorCode(*this, "TestDataDriven");
fcd = Normalizer2Factory::getFCDInstance(errorCode);
nfd = Normalizer2Factory::getNFDInstance(errorCode);
nfd = Normalizer2::getNFDInstance(errorCode);
if(errorCode.logDataIfFailureAndReset("Normalizer2Factory::getFCDInstance() or getNFDInstance()")) {
return;
}

View file

@ -1123,7 +1123,7 @@ BasicNormalizerTest::TestCompare() {
}
// test all of these precomposed characters
const Normalizer2 *nfcNorm2=Normalizer2Factory::getNFCInstance(errorCode);
const Normalizer2 *nfcNorm2=Normalizer2::getNFCInstance(errorCode);
UnicodeSetIterator it(set);
while(it.next() && !it.isString()) {
UChar32 c=it.getCodepoint();
@ -1484,9 +1484,9 @@ BasicNormalizerTest::TestCustomFCC() {
void
BasicNormalizerTest::TestFilteredNormalizer2Coverage() {
UErrorCode errorCode = U_ZERO_ERROR;
const Normalizer2 *nfcNorm2=Normalizer2Factory::getNFCInstance(errorCode);
const Normalizer2 *nfcNorm2=Normalizer2::getNFCInstance(errorCode);
if (U_FAILURE(errorCode)) {
dataerrln("Normalizer2Factory::getNFCInstance() call failed - %s", u_errorName(status));
dataerrln("Normalizer2::getNFCInstance() call failed - %s", u_errorName(status));
return;
}
UnicodeSet filter(UNICODE_STRING_SIMPLE("[^\\u00a0-\\u00ff\\u0310-\\u031f]"), errorCode);

View file

@ -58,6 +58,7 @@ enum {
SOURCEDIR,
OUTPUT_FILENAME,
UNICODE_VERSION,
WRITE_C_SOURCE,
OPT_FAST
};
@ -69,6 +70,7 @@ static UOption options[]={
UOPTION_SOURCEDIR,
UOPTION_DEF("output", 'o', UOPT_REQUIRES_ARG),
UOPTION_DEF("unicode", 'u', UOPT_REQUIRES_ARG),
UOPTION_DEF("csource", '\1', UOPT_NO_ARG),
UOPTION_DEF("fast", '\1', UOPT_NO_ARG)
};
@ -100,7 +102,7 @@ main(int argc, char* argv[]) {
"Usage: %s [-options] infiles+ -o outputfilename\n"
"\n"
"Reads the infiles with normalization data and\n"
"creates a binary file (outputfilename) with the data.\n"
"creates a binary or C source file (outputfilename) with the data.\n"
"\n",
argv[0]);
fprintf(stderr,
@ -111,9 +113,10 @@ main(int argc, char* argv[]) {
"\t-u or --unicode Unicode version, followed by the version like 5.2.0\n");
fprintf(stderr,
"\t-s or --sourcedir source directory, followed by the path\n"
"\t-o or --output output filename\n");
"\t-o or --output output filename\n"
"\t --csource writes a C source file with initializers\n");
fprintf(stderr,
"\t --fast optimize the .nrm file for fast normalization,\n"
"\t --fast optimize the data for fast normalization,\n"
"\t which might increase its size (Writes fully decomposed\n"
"\t regular mappings instead of delta mappings.\n"
"\t You should measure the runtime speed to make sure that\n"
@ -174,7 +177,11 @@ main(int argc, char* argv[]) {
filename.truncate(pathLength);
}
builder->writeBinaryFile(options[OUTPUT_FILENAME].value);
if(options[WRITE_C_SOURCE].doesOccur) {
builder->writeCSourceFile(options[OUTPUT_FILENAME].value);
} else {
builder->writeBinaryFile(options[OUTPUT_FILENAME].value);
}
return errorCode.get();

View file

@ -33,12 +33,14 @@
#include "unicode/uniset.h"
#include "unicode/unistr.h"
#include "unicode/ustring.h"
#include "charstr.h"
#include "hash.h"
#include "normalizer2impl.h"
#include "toolutil.h"
#include "unewdata.h"
#include "utrie2.h"
#include "uvectr32.h"
#include "writesrc.h"
#if !UCONFIG_NO_NORMALIZATION
@ -168,7 +170,8 @@ enumRangeHandler(const void *context, UChar32 start, UChar32 end, uint32_t value
U_CDECL_END
Normalizer2DataBuilder::Normalizer2DataBuilder(UErrorCode &errorCode) :
phase(0), overrideHandling(OVERRIDE_PREVIOUS), optimization(OPTIMIZE_NORMAL) {
phase(0), overrideHandling(OVERRIDE_PREVIOUS), optimization(OPTIMIZE_NORMAL),
norm16TrieLength(0) {
memset(unicodeVersion, 0, sizeof(unicodeVersion));
normTrie=utrie2_open(0, 0, &errorCode);
normMem=utm_open("gennorm2 normalization structs", 10000, 0x110100, sizeof(Norm));
@ -1143,23 +1146,15 @@ void Normalizer2DataBuilder::processData() {
if(minCP>=0x10000) {
indexes[Normalizer2Impl::IX_MIN_COMP_NO_MAYBE_CP]=U16_LEAD(minCP);
}
}
void Normalizer2DataBuilder::writeBinaryFile(const char *filename) {
processData();
IcuToolErrorCode errorCode("gennorm2/writeBinaryFile()");
utrie2_freeze(norm16Trie, UTRIE2_16_VALUE_BITS, errorCode);
int32_t norm16TrieLength=utrie2_serialize(norm16Trie, NULL, 0, errorCode);
norm16TrieLength=utrie2_serialize(norm16Trie, NULL, 0, errorCode);
if(errorCode.get()!=U_BUFFER_OVERFLOW_ERROR) {
fprintf(stderr, "gennorm2 error: unable to freeze/serialize the normalization trie - %s\n",
errorCode.errorName());
exit(errorCode.reset());
}
errorCode.reset();
LocalArray<uint8_t> norm16TrieBytes(new uint8_t[norm16TrieLength]);
utrie2_serialize(norm16Trie, norm16TrieBytes.getAlias(), norm16TrieLength, errorCode);
errorCode.assertSuccess();
int32_t offset=(int32_t)sizeof(indexes);
indexes[Normalizer2Impl::IX_NORM_TRIE_OFFSET]=offset;
@ -1192,6 +1187,16 @@ void Normalizer2DataBuilder::writeBinaryFile(const char *filename) {
u_versionFromString(unicodeVersion, U_UNICODE_VERSION);
}
memcpy(dataInfo.dataVersion, unicodeVersion, 4);
}
void Normalizer2DataBuilder::writeBinaryFile(const char *filename) {
processData();
IcuToolErrorCode errorCode("gennorm2/writeBinaryFile()");
LocalArray<uint8_t> norm16TrieBytes(new uint8_t[norm16TrieLength]);
utrie2_serialize(norm16Trie, norm16TrieBytes.getAlias(), norm16TrieLength, errorCode);
errorCode.assertSuccess();
UNewDataMemory *pData=
udata_create(NULL, NULL, filename, &dataInfo,
haveCopyright ? U_COPYRIGHT_STRING : NULL, errorCode);
@ -1209,6 +1214,7 @@ void Normalizer2DataBuilder::writeBinaryFile(const char *filename) {
fprintf(stderr, "gennorm2: error %s writing the output file\n", errorCode.errorName());
exit(errorCode.reset());
}
int32_t totalSize=indexes[Normalizer2Impl::IX_TOTAL_SIZE];
if(writtenSize!=totalSize) {
fprintf(stderr, "gennorm2 error: written size %ld != calculated size %ld\n",
(long)writtenSize, (long)totalSize);
@ -1216,6 +1222,74 @@ void Normalizer2DataBuilder::writeBinaryFile(const char *filename) {
}
}
void
Normalizer2DataBuilder::writeCSourceFile(const char *filename) {
processData();
IcuToolErrorCode errorCode("gennorm2/writeCSourceFile()");
const char *basename=findBasename(filename);
CharString path(filename, (int32_t)(basename-filename), errorCode);
CharString dataName(basename, errorCode);
const char *extension=strrchr(basename, '.');
if(extension!=NULL) {
dataName.truncate((int32_t)(extension-basename));
}
errorCode.assertSuccess();
LocalArray<uint8_t> norm16TrieBytes(new uint8_t[norm16TrieLength]);
utrie2_serialize(norm16Trie, norm16TrieBytes.getAlias(), norm16TrieLength, errorCode);
errorCode.assertSuccess();
FILE *f=usrc_create(path.data(), basename, "icu/source/tools/gennorm2/n2builder.cpp");
if(f==NULL) {
fprintf(stderr, "gennorm2/writeCSourceFile() error: unable to create the output file %s\n",
filename);
exit(U_FILE_ACCESS_ERROR);
return;
}
char line[100];
sprintf(line, "static const UVersionInfo %s_formatVersion={", dataName.data());
usrc_writeArray(f, line, dataInfo.formatVersion, 8, 4, "};\n");
sprintf(line, "static const UVersionInfo %s_dataVersion={", dataName.data());
usrc_writeArray(f, line, dataInfo.dataVersion, 8, 4, "};\n\n");
sprintf(line, "static const int32_t %s_indexes[Normalizer2Impl::IX_COUNT]={\n",
dataName.data());
usrc_writeArray(f,
line,
indexes, 32, Normalizer2Impl::IX_COUNT,
"\n};\n\n");
sprintf(line, "static const uint16_t %s_trieIndex[%%ld]={\n", dataName.data());
usrc_writeUTrie2Arrays(f,
line, NULL,
norm16Trie,
"\n};\n\n");
sprintf(line, "static const uint16_t %s_extraData[%%ld]={\n", dataName.data());
usrc_writeArray(f,
line,
extraData.getBuffer(), 16, extraData.length(),
"\n};\n\n");
sprintf(line, "static const uint8_t %s_smallFCD[%%ld]={\n", dataName.data());
usrc_writeArray(f,
line,
smallFCD, 8, sizeof(smallFCD),
"\n};\n\n");
/*fputs( // TODO
"static const UCaseProps %s_singleton={\n"
" NULL,\n"
" %s_indexes,\n"
" %s_extraData,\n"
" %s_smallFCD,\n",
f);*/
sprintf(line, "static const UTrie2 %s_trie {\n", dataName.data());
char line2[100];
sprintf(line2, "%s_trieIndex", dataName.data());
usrc_writeUTrie2Struct(f,
line,
norm16Trie, line2, NULL,
"};\n");
fclose(f);
}
U_NAMESPACE_END
#endif /* #if !UCONFIG_NO_NORMALIZATION */

View file

@ -1,7 +1,7 @@
/*
*******************************************************************************
*
* Copyright (C) 2009-2011, International Business Machines
* Copyright (C) 2009-2014, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
@ -64,6 +64,7 @@ public:
void setUnicodeVersion(const char *v);
void writeBinaryFile(const char *filename);
void writeCSourceFile(const char *filename);
private:
friend class CompositionBuilder;
@ -110,6 +111,7 @@ private:
int32_t indexes[Normalizer2Impl::IX_COUNT];
UTrie2 *norm16Trie;
int32_t norm16TrieLength;
UnicodeString extraData;
uint8_t smallFCD[0x100];