mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-05 13:35:32 +00:00
ICU-9573 Removing Boyer-Moore string search implementation. CollData has moved from i18n package to intltest.
X-SVN-Rev: 32994
This commit is contained in:
parent
49d85a9d59
commit
7586fcf7d4
18 changed files with 106 additions and 3403 deletions
|
@ -1,5 +1,5 @@
|
|||
Microsoft Visual Studio Solution File, Format Version 11.00
|
||||
# Visual Studio 2010
|
||||
# Visual C++ Express 2010
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "cal", "..\samples\cal\cal.vcxproj", "{F7659D77-09CF-4FE9-ACEE-927287AA9509}"
|
||||
EndProject
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "cintltst", "..\test\cintltst\cintltst.vcxproj", "{3D1246AE-1B32-479B-BECA-AEFA97BE2321}"
|
||||
|
@ -52,7 +52,7 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "iotest", "..\test\iotest\io
|
|||
EndProject
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "icupkg", "..\tools\icupkg\icupkg.vcxproj", "{62D4B15D-7A90-4ECB-BA19-5E021D6A21BC}"
|
||||
EndProject
|
||||
Project("{9D4211F7-2C77-439C-82F0-30A4E43BA569}") = "gendict", "..\tools\gendict\gendict.vcxproj", "{9D4211F7-2C77-439C-82F0-30A4E43BA569}"
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "gendict", "..\tools\gendict\gendict.vcxproj", "{9D4211F7-2C77-439C-82F0-30A4E43BA569}"
|
||||
EndProject
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "letest", "..\test\letest\letest.vcxproj", "{67351485-4D18-4245-BE39-A7EF0675ACD2}"
|
||||
EndProject
|
||||
|
@ -65,10 +65,6 @@ EndProject
|
|||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "testplug", "..\tools\icuinfo\testplug.vcxproj", "{659D0C08-D4ED-4BF3-B02B-2D8D4B5A7A7A}"
|
||||
EndProject
|
||||
Global
|
||||
GlobalSection(SubversionScc) = preSolution
|
||||
Svn-Managed = True
|
||||
Manager = AnkhSVN - Subversion Support for Visual Studio
|
||||
EndGlobalSection
|
||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||
Debug|Win32 = Debug|Win32
|
||||
Debug|x64 = Debug|x64
|
||||
|
@ -174,12 +170,10 @@ Global
|
|||
{77C78066-746F-4EA6-B3FE-B8C8A4A97891}.Release|x64.Build.0 = Release|x64
|
||||
{0178B127-6269-407D-B112-93877BB62776}.Debug|Win32.ActiveCfg = Debug|Win32
|
||||
{0178B127-6269-407D-B112-93877BB62776}.Debug|Win32.Build.0 = Debug|Win32
|
||||
{0178B127-6269-407D-B112-93877BB62776}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{0178B127-6269-407D-B112-93877BB62776}.Debug|x64.Build.0 = Debug|x64
|
||||
{0178B127-6269-407D-B112-93877BB62776}.Debug|x64.ActiveCfg = Debug|Win32
|
||||
{0178B127-6269-407D-B112-93877BB62776}.Release|Win32.ActiveCfg = Release|Win32
|
||||
{0178B127-6269-407D-B112-93877BB62776}.Release|Win32.Build.0 = Release|Win32
|
||||
{0178B127-6269-407D-B112-93877BB62776}.Release|x64.ActiveCfg = Release|x64
|
||||
{0178B127-6269-407D-B112-93877BB62776}.Release|x64.Build.0 = Release|x64
|
||||
{0178B127-6269-407D-B112-93877BB62776}.Release|x64.ActiveCfg = Release|Win32
|
||||
{73632960-B3A6-464D-83A3-4B43365F19B8}.Debug|Win32.ActiveCfg = Debug|Win32
|
||||
{73632960-B3A6-464D-83A3-4B43365F19B8}.Debug|Win32.Build.0 = Debug|Win32
|
||||
{73632960-B3A6-464D-83A3-4B43365F19B8}.Debug|x64.ActiveCfg = Debug|x64
|
||||
|
@ -336,4 +330,8 @@ Global
|
|||
GlobalSection(SolutionProperties) = preSolution
|
||||
HideSolutionNode = FALSE
|
||||
EndGlobalSection
|
||||
GlobalSection(SubversionScc) = preSolution
|
||||
Svn-Managed = True
|
||||
Manager = AnkhSVN - Subversion Support for Visual Studio
|
||||
EndGlobalSection
|
||||
EndGlobal
|
||||
|
|
|
@ -82,7 +82,7 @@ ulocdata.o measfmt.o currfmt.o curramt.o currunit.o measure.o utmscale.o \
|
|||
csdetect.o csmatch.o csr2022.o csrecog.o csrmbcs.o csrsbcs.o csrucode.o csrutf8.o inputext.o \
|
||||
wintzimpl.o windtfmt.o winnmfmt.o basictz.o dtrule.o rbtz.o tzrule.o tztrans.o vtzone.o zonemeta.o \
|
||||
upluralrules.o plurrule.o plurfmt.o selfmt.o dtitvfmt.o dtitvinf.o udateintervalformat.o \
|
||||
tmunit.o tmutamt.o tmutfmt.o colldata.o bmsearch.o bms.o currpinf.o \
|
||||
tmunit.o tmutamt.o tmutfmt.o currpinf.o \
|
||||
uspoof.o uspoof_impl.o uspoof_build.o uspoof_conf.o uspoof_wsconf.o decfmtst.o smpdtfst.o \
|
||||
ztrans.o zrule.o vzone.o fphdlimp.o fpositer.o locdspnm.o \
|
||||
decNumber.o decContext.o alphaindex.o tznames.o tznames_impl.o tzgnames.o \
|
||||
|
|
|
@ -1,155 +0,0 @@
|
|||
/*
|
||||
* Copyright (C) 2008-2011, International Business Machines Corporation and Others.
|
||||
* All rights reserved.
|
||||
*/
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "cmemory.h"
|
||||
#include "unicode/bms.h"
|
||||
#include "unicode/unistr.h"
|
||||
#include "unicode/colldata.h"
|
||||
#include "unicode/bmsearch.h"
|
||||
|
||||
|
||||
#if !UCONFIG_NO_COLLATION && !UCONFIG_NO_BREAK_ITERATION
|
||||
|
||||
|
||||
//#define USE_SAFE_CASTS
|
||||
#ifdef USE_SAFE_CASTS
|
||||
#define STATIC_CAST(type,value) static_cast<type>(value)
|
||||
#define CONST_CAST(type,value) const_cast<type>(value)
|
||||
#else
|
||||
#define STATIC_CAST(type,value) (type) (value)
|
||||
#define CONST_CAST(type,value) (type) (value)
|
||||
#endif
|
||||
|
||||
U_NAMESPACE_USE
|
||||
|
||||
U_CAPI UCD * U_EXPORT2
|
||||
ucd_open(UCollator *coll, UErrorCode *status)
|
||||
{
|
||||
return STATIC_CAST(UCD *, CollData::open(coll, *status));
|
||||
}
|
||||
|
||||
U_CAPI void U_EXPORT2
|
||||
ucd_close(UCD *ucd)
|
||||
{
|
||||
if (ucd != NULL) {
|
||||
CollData *data = STATIC_CAST(CollData *, ucd);
|
||||
|
||||
CollData::close(data);
|
||||
}
|
||||
}
|
||||
|
||||
U_CAPI UCollator * U_EXPORT2
|
||||
ucd_getCollator(UCD *ucd)
|
||||
{
|
||||
CollData *data = STATIC_CAST(CollData *, ucd);
|
||||
|
||||
return data->getCollator();
|
||||
}
|
||||
|
||||
U_CAPI void U_EXPORT2
|
||||
ucd_freeCache()
|
||||
{
|
||||
CollData::freeCollDataCache();
|
||||
}
|
||||
|
||||
U_CAPI void U_EXPORT2
|
||||
ucd_flushCache()
|
||||
{
|
||||
CollData::flushCollDataCache();
|
||||
}
|
||||
|
||||
struct BMS
|
||||
{
|
||||
BoyerMooreSearch *bms;
|
||||
const UnicodeString *targetString;
|
||||
};
|
||||
|
||||
U_CAPI BMS * U_EXPORT2
|
||||
bms_open(UCD *ucd,
|
||||
const UChar *pattern, int32_t patternLength,
|
||||
const UChar *target, int32_t targetLength,
|
||||
UErrorCode *status)
|
||||
{
|
||||
BMS *bms = STATIC_CAST(BMS *, uprv_malloc(sizeof(BMS)));
|
||||
|
||||
if (bms == NULL) {
|
||||
*status = U_MEMORY_ALLOCATION_ERROR;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
CollData *data = (CollData *) ucd;
|
||||
UnicodeString patternString(pattern, patternLength);
|
||||
|
||||
if (target != NULL) {
|
||||
bms->targetString = new UnicodeString(target, targetLength);
|
||||
|
||||
if (bms->targetString == NULL) {
|
||||
bms->bms = NULL;
|
||||
*status = U_MEMORY_ALLOCATION_ERROR;
|
||||
return bms;
|
||||
}
|
||||
} else {
|
||||
bms->targetString = NULL;
|
||||
}
|
||||
|
||||
bms->bms = new BoyerMooreSearch(data, patternString, bms->targetString, *status);
|
||||
|
||||
if (bms->bms == NULL) {
|
||||
*status = U_MEMORY_ALLOCATION_ERROR;
|
||||
}
|
||||
|
||||
return bms;
|
||||
}
|
||||
|
||||
U_CAPI void U_EXPORT2
|
||||
bms_close(BMS *bms)
|
||||
{
|
||||
delete bms->bms;
|
||||
|
||||
delete bms->targetString;
|
||||
|
||||
uprv_free(bms);
|
||||
}
|
||||
|
||||
U_CAPI UBool U_EXPORT2
|
||||
bms_empty(BMS *bms)
|
||||
{
|
||||
return bms->bms->empty();
|
||||
}
|
||||
|
||||
U_CAPI UCD * U_EXPORT2
|
||||
bms_getData(BMS *bms)
|
||||
{
|
||||
return STATIC_CAST(UCD *, bms->bms->getData());
|
||||
}
|
||||
|
||||
U_CAPI UBool U_EXPORT2
|
||||
bms_search(BMS *bms, int32_t offset, int32_t *start, int32_t *end)
|
||||
{
|
||||
return bms->bms->search(offset, *start, *end);
|
||||
}
|
||||
|
||||
U_CAPI void U_EXPORT2
|
||||
bms_setTargetString(BMS *bms, const UChar *target, int32_t targetLength, UErrorCode *status)
|
||||
{
|
||||
if (U_FAILURE(*status)) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (bms->targetString != NULL) {
|
||||
delete bms->targetString;
|
||||
}
|
||||
|
||||
if (target != NULL) {
|
||||
bms->targetString = new UnicodeString(target, targetLength);
|
||||
} else {
|
||||
bms->targetString = NULL;
|
||||
}
|
||||
|
||||
bms->bms->setTargetString(bms->targetString, *status);
|
||||
}
|
||||
|
||||
#endif
|
|
@ -1,827 +0,0 @@
|
|||
/*
|
||||
******************************************************************************
|
||||
* Copyright (C) 1996-2012, International Business Machines *
|
||||
* Corporation and others. All Rights Reserved. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if !UCONFIG_NO_COLLATION && !UCONFIG_NO_BREAK_ITERATION
|
||||
|
||||
#include "unicode/unistr.h"
|
||||
#include "unicode/putil.h"
|
||||
#include "unicode/usearch.h"
|
||||
|
||||
#include "cmemory.h"
|
||||
#include "unicode/coll.h"
|
||||
#include "unicode/tblcoll.h"
|
||||
#include "unicode/coleitr.h"
|
||||
#include "unicode/ucoleitr.h"
|
||||
|
||||
#include "unicode/regex.h" // TODO: make conditional on regexp being built.
|
||||
|
||||
#include "unicode/uniset.h"
|
||||
#include "unicode/uset.h"
|
||||
#include "unicode/ustring.h"
|
||||
#include "hash.h"
|
||||
#include "uhash.h"
|
||||
#include "ucol_imp.h"
|
||||
#include "normalizer2impl.h"
|
||||
|
||||
#include "unicode/colldata.h"
|
||||
#include "unicode/bmsearch.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
#define ARRAY_SIZE(array) (sizeof(array)/sizeof(array[0]))
|
||||
#define NEW_ARRAY(type, count) (type *) uprv_malloc((count) * sizeof(type))
|
||||
#define DELETE_ARRAY(array) uprv_free((void *) (array))
|
||||
|
||||
|
||||
struct CEI
|
||||
{
|
||||
uint32_t order;
|
||||
int32_t lowOffset;
|
||||
int32_t highOffset;
|
||||
};
|
||||
|
||||
class Target : public UMemory
|
||||
{
|
||||
public:
|
||||
Target(UCollator *theCollator, const UnicodeString *target, int32_t patternLength, UErrorCode &status);
|
||||
~Target();
|
||||
|
||||
void setTargetString(const UnicodeString *target);
|
||||
|
||||
const CEI *nextCE(int32_t offset);
|
||||
const CEI *prevCE(int32_t offset);
|
||||
|
||||
int32_t stringLength();
|
||||
UChar charAt(int32_t offset);
|
||||
|
||||
UBool isBreakBoundary(int32_t offset);
|
||||
int32_t nextBreakBoundary(int32_t offset);
|
||||
int32_t nextSafeBoundary(int32_t offset);
|
||||
|
||||
UBool isIdentical(UnicodeString &pattern, int32_t start, int32_t end);
|
||||
|
||||
void setOffset(int32_t offset);
|
||||
void setLast(int32_t last);
|
||||
int32_t getOffset();
|
||||
|
||||
private:
|
||||
CEI *ceb;
|
||||
int32_t bufferSize;
|
||||
int32_t bufferMin;
|
||||
int32_t bufferMax;
|
||||
|
||||
uint32_t strengthMask;
|
||||
UCollationStrength strength;
|
||||
uint32_t variableTop;
|
||||
UBool toShift;
|
||||
UCollator *coll;
|
||||
const Normalizer2 &nfd;
|
||||
|
||||
const UnicodeString *targetString;
|
||||
const UChar *targetBuffer;
|
||||
int32_t targetLength;
|
||||
|
||||
UCollationElements *elements;
|
||||
UBreakIterator *charBreakIterator;
|
||||
};
|
||||
|
||||
Target::Target(UCollator *theCollator, const UnicodeString *target, int32_t patternLength, UErrorCode &status)
|
||||
: bufferSize(0), bufferMin(0), bufferMax(0),
|
||||
strengthMask(0), strength(UCOL_PRIMARY), variableTop(0), toShift(FALSE), coll(theCollator),
|
||||
nfd(*Normalizer2Factory::getNFDInstance(status)),
|
||||
targetString(NULL), targetBuffer(NULL), targetLength(0), elements(NULL), charBreakIterator(NULL)
|
||||
{
|
||||
strength = ucol_getStrength(coll);
|
||||
toShift = ucol_getAttribute(coll, UCOL_ALTERNATE_HANDLING, &status) == UCOL_SHIFTED;
|
||||
variableTop = ucol_getVariableTop(coll, &status);
|
||||
|
||||
// find the largest expansion
|
||||
uint8_t maxExpansion = 0;
|
||||
for (const uint8_t *expansion = coll->expansionCESize; *expansion != 0; expansion += 1) {
|
||||
if (*expansion > maxExpansion) {
|
||||
maxExpansion = *expansion;
|
||||
}
|
||||
}
|
||||
|
||||
// room for an extra character on each end, plus 4 for safety
|
||||
bufferSize = patternLength + (2 * maxExpansion) + 4;
|
||||
|
||||
ceb = NEW_ARRAY(CEI, bufferSize);
|
||||
|
||||
if (ceb == NULL) {
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
return;
|
||||
}
|
||||
|
||||
if (target != NULL) {
|
||||
setTargetString(target);
|
||||
}
|
||||
|
||||
switch (strength)
|
||||
{
|
||||
default:
|
||||
strengthMask |= UCOL_TERTIARYORDERMASK;
|
||||
/* fall through */
|
||||
|
||||
case UCOL_SECONDARY:
|
||||
strengthMask |= UCOL_SECONDARYORDERMASK;
|
||||
/* fall through */
|
||||
|
||||
case UCOL_PRIMARY:
|
||||
strengthMask |= UCOL_PRIMARYORDERMASK;
|
||||
}
|
||||
}
|
||||
|
||||
Target::~Target()
|
||||
{
|
||||
ubrk_close(charBreakIterator);
|
||||
ucol_closeElements(elements);
|
||||
|
||||
DELETE_ARRAY(ceb);
|
||||
}
|
||||
|
||||
void Target::setTargetString(const UnicodeString *target)
|
||||
{
|
||||
if (charBreakIterator != NULL) {
|
||||
ubrk_close(charBreakIterator);
|
||||
ucol_closeElements(elements);
|
||||
}
|
||||
|
||||
targetString = target;
|
||||
|
||||
if (targetString != NULL) {
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
|
||||
targetBuffer = targetString->getBuffer();
|
||||
targetLength = targetString->length();
|
||||
|
||||
elements = ucol_openElements(coll, target->getBuffer(), target->length(), &status);
|
||||
ucol_forceHanImplicit(elements, &status);
|
||||
|
||||
charBreakIterator = ubrk_open(UBRK_CHARACTER, ucol_getLocaleByType(coll, ULOC_VALID_LOCALE, &status),
|
||||
targetBuffer, targetLength, &status);
|
||||
} else {
|
||||
targetBuffer = NULL;
|
||||
targetLength = 0;
|
||||
}
|
||||
}
|
||||
|
||||
const CEI *Target::nextCE(int32_t offset)
|
||||
{
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
int32_t low = -1, high = -1;
|
||||
uint32_t order;
|
||||
UBool cont = FALSE;
|
||||
|
||||
if (offset >= bufferMin && offset < bufferMax) {
|
||||
return &ceb[offset];
|
||||
}
|
||||
|
||||
if (bufferMax >= bufferSize || offset != bufferMax) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
do {
|
||||
low = ucol_getOffset(elements);
|
||||
order = ucol_next(elements, &status);
|
||||
high = ucol_getOffset(elements);
|
||||
|
||||
if (order == (uint32_t)UCOL_NULLORDER) {
|
||||
//high = low = -1;
|
||||
break;
|
||||
}
|
||||
|
||||
cont = isContinuation(order);
|
||||
order &= strengthMask;
|
||||
|
||||
if (toShift && variableTop > order && (order & UCOL_PRIMARYORDERMASK) != 0) {
|
||||
if (strength >= UCOL_QUATERNARY) {
|
||||
order &= UCOL_PRIMARYORDERMASK;
|
||||
} else {
|
||||
order = UCOL_IGNORABLE;
|
||||
}
|
||||
}
|
||||
} while (order == UCOL_IGNORABLE);
|
||||
|
||||
if (cont) {
|
||||
order |= UCOL_CONTINUATION_MARKER;
|
||||
}
|
||||
|
||||
ceb[offset].order = order;
|
||||
ceb[offset].lowOffset = low;
|
||||
ceb[offset].highOffset = high;
|
||||
|
||||
bufferMax += 1;
|
||||
|
||||
return &ceb[offset];
|
||||
}
|
||||
|
||||
const CEI *Target::prevCE(int32_t offset)
|
||||
{
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
int32_t low = -1, high = -1;
|
||||
uint32_t order;
|
||||
UBool cont = FALSE;
|
||||
|
||||
if (offset >= bufferMin && offset < bufferMax) {
|
||||
return &ceb[offset];
|
||||
}
|
||||
|
||||
if (bufferMax >= bufferSize || offset != bufferMax) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
do {
|
||||
high = ucol_getOffset(elements);
|
||||
order = ucol_previous(elements, &status);
|
||||
low = ucol_getOffset(elements);
|
||||
|
||||
if (order == (uint32_t)UCOL_NULLORDER) {
|
||||
break;
|
||||
}
|
||||
|
||||
cont = isContinuation(order);
|
||||
order &= strengthMask;
|
||||
|
||||
if (toShift && variableTop > order && (order & UCOL_PRIMARYORDERMASK) != 0) {
|
||||
if (strength >= UCOL_QUATERNARY) {
|
||||
order &= UCOL_PRIMARYORDERMASK;
|
||||
} else {
|
||||
order = UCOL_IGNORABLE;
|
||||
}
|
||||
}
|
||||
} while (order == UCOL_IGNORABLE);
|
||||
|
||||
bufferMax += 1;
|
||||
|
||||
if (cont) {
|
||||
order |= UCOL_CONTINUATION_MARKER;
|
||||
}
|
||||
|
||||
ceb[offset].order = order;
|
||||
ceb[offset].lowOffset = low;
|
||||
ceb[offset].highOffset = high;
|
||||
|
||||
return &ceb[offset];
|
||||
}
|
||||
|
||||
int32_t Target::stringLength()
|
||||
{
|
||||
if (targetString != NULL) {
|
||||
return targetLength;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
UChar Target::charAt(int32_t offset)
|
||||
{
|
||||
if (targetString != NULL) {
|
||||
return targetBuffer[offset];
|
||||
}
|
||||
|
||||
return 0x0000;
|
||||
}
|
||||
|
||||
void Target::setOffset(int32_t offset)
|
||||
{
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
|
||||
bufferMin = 0;
|
||||
bufferMax = 0;
|
||||
|
||||
ucol_setOffset(elements, offset, &status);
|
||||
}
|
||||
|
||||
void Target::setLast(int32_t last)
|
||||
{
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
|
||||
bufferMin = 0;
|
||||
bufferMax = 1;
|
||||
|
||||
ceb[0].order = (uint32_t)UCOL_NULLORDER;
|
||||
ceb[0].lowOffset = last;
|
||||
ceb[0].highOffset = last;
|
||||
|
||||
ucol_setOffset(elements, last, &status);
|
||||
}
|
||||
|
||||
int32_t Target::getOffset()
|
||||
{
|
||||
return ucol_getOffset(elements);
|
||||
}
|
||||
|
||||
UBool Target::isBreakBoundary(int32_t offset)
|
||||
{
|
||||
return ubrk_isBoundary(charBreakIterator, offset);
|
||||
}
|
||||
|
||||
int32_t Target::nextBreakBoundary(int32_t offset)
|
||||
{
|
||||
return ubrk_following(charBreakIterator, offset);
|
||||
}
|
||||
|
||||
int32_t Target::nextSafeBoundary(int32_t offset)
|
||||
{
|
||||
while (offset < targetLength) {
|
||||
//UChar ch = charAt(offset);
|
||||
UChar ch = targetBuffer[offset];
|
||||
|
||||
if (U_IS_LEAD(ch) || ! ucol_unsafeCP(ch, coll)) {
|
||||
return offset;
|
||||
}
|
||||
|
||||
offset += 1;
|
||||
}
|
||||
|
||||
return targetLength;
|
||||
}
|
||||
|
||||
UBool Target::isIdentical(UnicodeString &pattern, int32_t start, int32_t end)
|
||||
{
|
||||
if (strength < UCOL_IDENTICAL) {
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
// Note: We could use Normalizer::compare() or similar, but for short strings
|
||||
// which may not be in FCD it might be faster to just NFD them.
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
UnicodeString t2, p2;
|
||||
nfd.normalize(UnicodeString(FALSE, targetBuffer + start, end - start), t2, status);
|
||||
nfd.normalize(pattern, p2, status);
|
||||
// return FALSE if NFD failed
|
||||
return U_SUCCESS(status) && t2 == p2;
|
||||
}
|
||||
|
||||
#define HASH_TABLE_SIZE 257
|
||||
|
||||
class BadCharacterTable : public UMemory
|
||||
{
|
||||
public:
|
||||
BadCharacterTable(CEList &patternCEs, CollData *data, UErrorCode &status);
|
||||
~BadCharacterTable();
|
||||
|
||||
int32_t operator[](uint32_t ce) const;
|
||||
int32_t getMaxSkip() const;
|
||||
int32_t minLengthInChars(int32_t index);
|
||||
|
||||
private:
|
||||
static int32_t hash(uint32_t ce);
|
||||
|
||||
int32_t maxSkip;
|
||||
int32_t badCharacterTable[HASH_TABLE_SIZE];
|
||||
|
||||
int32_t *minLengthCache;
|
||||
};
|
||||
|
||||
BadCharacterTable::BadCharacterTable(CEList &patternCEs, CollData *data, UErrorCode &status)
|
||||
: minLengthCache(NULL)
|
||||
{
|
||||
int32_t plen = patternCEs.size();
|
||||
|
||||
// **** need a better way to deal with this ****
|
||||
if (U_FAILURE(status) || plen == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
int32_t *history = NEW_ARRAY(int32_t, plen);
|
||||
|
||||
if (history == NULL) {
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
return;
|
||||
}
|
||||
|
||||
for (int32_t i = 0; i < plen; i += 1) {
|
||||
history[i] = -1;
|
||||
}
|
||||
|
||||
minLengthCache = NEW_ARRAY(int32_t, plen + 1);
|
||||
|
||||
if (minLengthCache == NULL) {
|
||||
DELETE_ARRAY(history);
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
return;
|
||||
}
|
||||
|
||||
maxSkip = minLengthCache[0] = data->minLengthInChars(&patternCEs, 0, history);
|
||||
|
||||
for(int32_t j = 0; j < HASH_TABLE_SIZE; j += 1) {
|
||||
badCharacterTable[j] = maxSkip;
|
||||
}
|
||||
|
||||
for(int32_t p = 1; p < plen; p += 1) {
|
||||
minLengthCache[p] = data->minLengthInChars(&patternCEs, p, history);
|
||||
|
||||
// Make sure this entry is not bigger than the previous one.
|
||||
// Otherwise, we might skip too far in some cases.
|
||||
if (minLengthCache[p] < 0 || minLengthCache[p] > minLengthCache[p - 1]) {
|
||||
minLengthCache[p] = minLengthCache[p - 1];
|
||||
}
|
||||
}
|
||||
|
||||
minLengthCache[plen] = 0;
|
||||
|
||||
for(int32_t p = 0; p < plen - 1; p += 1) {
|
||||
badCharacterTable[hash(patternCEs[p])] = minLengthCache[p + 1];
|
||||
}
|
||||
|
||||
DELETE_ARRAY(history);
|
||||
}
|
||||
|
||||
BadCharacterTable::~BadCharacterTable()
|
||||
{
|
||||
DELETE_ARRAY(minLengthCache);
|
||||
}
|
||||
|
||||
int32_t BadCharacterTable::operator[](uint32_t ce) const
|
||||
{
|
||||
return badCharacterTable[hash(ce)];
|
||||
}
|
||||
|
||||
int32_t BadCharacterTable::getMaxSkip() const
|
||||
{
|
||||
return maxSkip;
|
||||
}
|
||||
|
||||
int32_t BadCharacterTable::minLengthInChars(int32_t index)
|
||||
{
|
||||
return minLengthCache[index];
|
||||
}
|
||||
|
||||
int32_t BadCharacterTable::hash(uint32_t ce)
|
||||
{
|
||||
return UCOL_PRIMARYORDER(ce) % HASH_TABLE_SIZE;
|
||||
}
|
||||
|
||||
class GoodSuffixTable : public UMemory
|
||||
{
|
||||
public:
|
||||
GoodSuffixTable(CEList &patternCEs, BadCharacterTable &badCharacterTable, UErrorCode &status);
|
||||
~GoodSuffixTable();
|
||||
|
||||
int32_t operator[](int32_t offset) const;
|
||||
|
||||
private:
|
||||
int32_t *goodSuffixTable;
|
||||
};
|
||||
|
||||
GoodSuffixTable::GoodSuffixTable(CEList &patternCEs, BadCharacterTable &badCharacterTable, UErrorCode &status)
|
||||
: goodSuffixTable(NULL)
|
||||
{
|
||||
int32_t patlen = patternCEs.size();
|
||||
|
||||
// **** need a better way to deal with this ****
|
||||
if (U_FAILURE(status) || patlen <= 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
int32_t *suff = NEW_ARRAY(int32_t, patlen);
|
||||
int32_t start = patlen - 1, end = - 1;
|
||||
int32_t maxSkip = badCharacterTable.getMaxSkip();
|
||||
|
||||
if (suff == NULL) {
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
return;
|
||||
}
|
||||
|
||||
// initialze suff
|
||||
suff[patlen - 1] = patlen;
|
||||
|
||||
for (int32_t i = patlen - 2; i >= 0; i -= 1) {
|
||||
// (i > start) means we're inside the last suffix match we found
|
||||
// ((patlen - 1) - end) is how far the end of that match is from end of pattern
|
||||
// (i - start) is how far we are from start of that match
|
||||
// (i + (patlen - 1) - end) is index of same character at end of pattern
|
||||
// so if any suffix match at that character doesn't extend beyond the last match,
|
||||
// it's the suffix for this character as well
|
||||
if (i > start && suff[i + patlen - 1 - end] < i - start) {
|
||||
suff[i] = suff[i + patlen - 1 - end];
|
||||
} else {
|
||||
start = end = i;
|
||||
|
||||
int32_t s = patlen;
|
||||
|
||||
while (start >= 0 && patternCEs[start] == patternCEs[--s]) {
|
||||
start -= 1;
|
||||
}
|
||||
|
||||
suff[i] = end - start;
|
||||
}
|
||||
}
|
||||
|
||||
// now build goodSuffixTable
|
||||
goodSuffixTable = NEW_ARRAY(int32_t, patlen);
|
||||
|
||||
if (goodSuffixTable == NULL) {
|
||||
DELETE_ARRAY(suff);
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
// initialize entries to minLengthInChars of the pattern
|
||||
for (int32_t i = 0; i < patlen; i += 1) {
|
||||
goodSuffixTable[i] = maxSkip;
|
||||
}
|
||||
|
||||
int32_t prefix = 0;
|
||||
|
||||
for (int32_t i = patlen - /*1*/ 2; i >= 0; i -= 1) {
|
||||
if (suff[i] == i + 1) {
|
||||
// this matching suffix is a prefix of the pattern
|
||||
int32_t prefixSkip = badCharacterTable.minLengthInChars(i + 1);
|
||||
|
||||
// for any mis-match before this suffix, we should skip
|
||||
// so that the front of the pattern (i.e. the prefix)
|
||||
// lines up with the front of the suffix.
|
||||
// (patlen - 1 - i) is the start of the suffix
|
||||
while (prefix < patlen - 1 - i) {
|
||||
// value of maxSkip means never set...
|
||||
if (goodSuffixTable[prefix] == maxSkip) {
|
||||
goodSuffixTable[prefix] = prefixSkip;
|
||||
}
|
||||
|
||||
prefix += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (int32_t i = 0; i < patlen - 1; i += 1) {
|
||||
goodSuffixTable[patlen - 1 - suff[i]] = badCharacterTable.minLengthInChars(i + 1);
|
||||
}
|
||||
|
||||
DELETE_ARRAY(suff);
|
||||
}
|
||||
|
||||
GoodSuffixTable::~GoodSuffixTable()
|
||||
{
|
||||
DELETE_ARRAY(goodSuffixTable);
|
||||
}
|
||||
|
||||
int32_t GoodSuffixTable::operator[](int32_t offset) const
|
||||
{
|
||||
return goodSuffixTable[offset];
|
||||
}
|
||||
|
||||
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(BoyerMooreSearch)
|
||||
|
||||
|
||||
UBool BoyerMooreSearch::empty()
|
||||
{
|
||||
return patCEs->size() <= 0;
|
||||
}
|
||||
|
||||
CollData *BoyerMooreSearch::getData()
|
||||
{
|
||||
return data;
|
||||
}
|
||||
|
||||
CEList *BoyerMooreSearch::getPatternCEs()
|
||||
{
|
||||
return patCEs;
|
||||
}
|
||||
|
||||
BadCharacterTable *BoyerMooreSearch::getBadCharacterTable()
|
||||
{
|
||||
return badCharacterTable;
|
||||
}
|
||||
|
||||
GoodSuffixTable *BoyerMooreSearch::getGoodSuffixTable()
|
||||
{
|
||||
return goodSuffixTable;
|
||||
}
|
||||
|
||||
BoyerMooreSearch::BoyerMooreSearch(CollData *theData, const UnicodeString &patternString, const UnicodeString *targetString,
|
||||
UErrorCode &status)
|
||||
: data(theData), patCEs(NULL), badCharacterTable(NULL), goodSuffixTable(NULL), pattern(patternString), target(NULL)
|
||||
{
|
||||
|
||||
if (U_FAILURE(status)) {
|
||||
return;
|
||||
}
|
||||
|
||||
UCollator *collator = data->getCollator();
|
||||
|
||||
patCEs = new CEList(collator, patternString, status);
|
||||
|
||||
if (patCEs == NULL || U_FAILURE(status)) {
|
||||
return;
|
||||
}
|
||||
|
||||
badCharacterTable = new BadCharacterTable(*patCEs, data, status);
|
||||
|
||||
if (badCharacterTable == NULL || U_FAILURE(status)) {
|
||||
return;
|
||||
}
|
||||
|
||||
goodSuffixTable = new GoodSuffixTable(*patCEs, *badCharacterTable, status);
|
||||
|
||||
if (targetString != NULL) {
|
||||
target = new Target(collator, targetString, patCEs->size(), status);
|
||||
}
|
||||
}
|
||||
|
||||
BoyerMooreSearch::~BoyerMooreSearch()
|
||||
{
|
||||
delete target;
|
||||
delete goodSuffixTable;
|
||||
delete badCharacterTable;
|
||||
delete patCEs;
|
||||
}
|
||||
|
||||
void BoyerMooreSearch::setTargetString(const UnicodeString *targetString, UErrorCode &status)
|
||||
{
|
||||
if (U_FAILURE(status)) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (target == NULL) {
|
||||
target = new Target(data->getCollator(), targetString, patCEs->size(), status);
|
||||
} else {
|
||||
target->setTargetString(targetString);
|
||||
}
|
||||
}
|
||||
|
||||
// **** main flow of this code from Laura Werner's "Unicode Text Searching in Java" paper. ****
|
||||
/*
|
||||
* TODO:
|
||||
* * deal with trailing (and leading?) ignorables.
|
||||
* * Adding BoyerMooreSearch object slowed it down. How can we speed it up?
|
||||
*/
|
||||
UBool BoyerMooreSearch::search(int32_t offset, int32_t &start, int32_t &end)
|
||||
{
|
||||
/*UCollator *coll =*/ data->getCollator();
|
||||
int32_t plen = patCEs->size();
|
||||
int32_t tlen = target->stringLength();
|
||||
int32_t maxSkip = badCharacterTable->getMaxSkip();
|
||||
int32_t tOffset = offset + maxSkip;
|
||||
|
||||
if (plen <= 0) {
|
||||
// Searching for a zero length pattern always fails.
|
||||
start = end = -1;
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
while (tOffset <= tlen) {
|
||||
int32_t pIndex = plen - 1;
|
||||
int32_t tIndex = 0;
|
||||
int32_t lIndex = 0;
|
||||
|
||||
if (tOffset < tlen) {
|
||||
// **** we really want to skip ahead enough to ****
|
||||
// **** be sure we get at least 1 non-ignorable ****
|
||||
// **** CE after the end of the pattern. ****
|
||||
int32_t next = target->nextSafeBoundary(tOffset + 1);
|
||||
|
||||
target->setOffset(next);
|
||||
|
||||
for (lIndex = 0; ; lIndex += 1) {
|
||||
const CEI *cei = target->prevCE(lIndex);
|
||||
int32_t low = cei->lowOffset;
|
||||
int32_t high = cei->highOffset;
|
||||
|
||||
if (high == 0 || (low < high && low <= tOffset)) {
|
||||
if (low < tOffset) {
|
||||
while (lIndex >= 0 && target->prevCE(lIndex)->highOffset == high) {
|
||||
lIndex -= 1;
|
||||
}
|
||||
|
||||
if (high > tOffset) {
|
||||
tOffset = high;
|
||||
}
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
target->setLast(tOffset);
|
||||
lIndex = 0;
|
||||
}
|
||||
|
||||
tIndex = ++lIndex;
|
||||
|
||||
// Iterate backward until we hit the beginning of the pattern
|
||||
while (pIndex >= 0) {
|
||||
uint32_t pce = (*patCEs)[pIndex];
|
||||
const CEI *tcei = target->prevCE(tIndex++);
|
||||
|
||||
|
||||
if (tcei->order != pce) {
|
||||
// There is a mismatch at this position. Decide how far
|
||||
// over to shift the pattern, then try again.
|
||||
|
||||
int32_t gsOffset = tOffset + (*goodSuffixTable)[pIndex];
|
||||
#ifdef EXTRA_CAUTIOUS
|
||||
int32_t old = tOffset;
|
||||
#endif
|
||||
|
||||
tOffset += (*badCharacterTable)[tcei->order] - badCharacterTable->minLengthInChars(pIndex + 1);
|
||||
|
||||
if (gsOffset > tOffset) {
|
||||
tOffset = gsOffset;
|
||||
}
|
||||
|
||||
#ifdef EXTRA_CAUTIOUS
|
||||
// Make sure we don't skip backwards...
|
||||
if (tOffset <= old) {
|
||||
tOffset = old + 1;
|
||||
}
|
||||
#endif
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
pIndex -= 1;
|
||||
}
|
||||
|
||||
if (pIndex < 0) {
|
||||
// We made it back to the beginning of the pattern,
|
||||
// which means we matched it all. Return the location.
|
||||
const CEI firstCEI = *target->prevCE(tIndex - 1);
|
||||
const CEI lastCEI = *target->prevCE(lIndex);
|
||||
int32_t mStart = firstCEI.lowOffset;
|
||||
int32_t minLimit = lastCEI.lowOffset;
|
||||
int32_t maxLimit = lastCEI.highOffset;
|
||||
int32_t mLimit;
|
||||
UBool found = TRUE;
|
||||
|
||||
target->setOffset(/*tOffset*/maxLimit);
|
||||
|
||||
const CEI nextCEI = *target->nextCE(0);
|
||||
|
||||
if (nextCEI.lowOffset > maxLimit) {
|
||||
maxLimit = nextCEI.lowOffset;
|
||||
}
|
||||
|
||||
if (nextCEI.lowOffset == nextCEI.highOffset && nextCEI.order != (uint32_t)UCOL_NULLORDER) {
|
||||
found = FALSE;
|
||||
}
|
||||
|
||||
if (! target->isBreakBoundary(mStart)) {
|
||||
found = FALSE;
|
||||
}
|
||||
|
||||
if (firstCEI.lowOffset == firstCEI.highOffset) {
|
||||
found = FALSE;
|
||||
}
|
||||
|
||||
mLimit = maxLimit;
|
||||
if (minLimit < maxLimit) {
|
||||
// When the last CE's low index is same with its high index, the CE is likely
|
||||
// a part of expansion. In this case, the index is located just after the
|
||||
// character corresponding to the CEs compared above. If the index is right
|
||||
// at the break boundary, move the position to the next boundary will result
|
||||
// incorrect match length when there are ignorable characters exist between
|
||||
// the position and the next character produces CE(s). See ticket#8482.
|
||||
if (minLimit == lastCEI.highOffset && target->isBreakBoundary(minLimit)) {
|
||||
mLimit = minLimit;
|
||||
} else {
|
||||
int32_t nbb = target->nextBreakBoundary(minLimit);
|
||||
|
||||
if (nbb >= lastCEI.highOffset) {
|
||||
mLimit = nbb;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (mLimit > maxLimit) {
|
||||
found = FALSE;
|
||||
}
|
||||
|
||||
if (! target->isBreakBoundary(mLimit)) {
|
||||
found = FALSE;
|
||||
}
|
||||
|
||||
if (! target->isIdentical(pattern, mStart, mLimit)) {
|
||||
found = FALSE;
|
||||
}
|
||||
|
||||
if (found) {
|
||||
start = mStart;
|
||||
end = mLimit;
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
tOffset += (*goodSuffixTable)[0]; // really? Maybe += 1 or += maxSkip?
|
||||
}
|
||||
// Otherwise, we're here because of a mismatch, so keep going....
|
||||
}
|
||||
|
||||
// no match
|
||||
start = -1;
|
||||
end = -1;
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif // #if !UCONFIG_NO_COLLATION
|
|
@ -245,12 +245,9 @@
|
|||
</ItemDefinitionGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="alphaindex.cpp" />
|
||||
<ClCompile Include="bms.cpp" />
|
||||
<ClCompile Include="bmsearch.cpp" />
|
||||
<ClCompile Include="bocsu.cpp" />
|
||||
<ClCompile Include="coleitr.cpp" />
|
||||
<ClCompile Include="coll.cpp" />
|
||||
<ClCompile Include="colldata.cpp" />
|
||||
<ClCompile Include="search.cpp" />
|
||||
<ClCompile Include="sortkey.cpp" />
|
||||
<ClCompile Include="stsearch.cpp" />
|
||||
|
@ -436,34 +433,6 @@
|
|||
</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">copy "%(FullPath)" ..\..\include\unicode
|
||||
</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
|
||||
</CustomBuild>
|
||||
<CustomBuild Include="unicode\bms.h">
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">copy "%(FullPath)" ..\..\include\unicode
|
||||
</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">copy "%(FullPath)" ..\..\include\unicode
|
||||
</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">copy "%(FullPath)" ..\..\include\unicode
|
||||
</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">copy "%(FullPath)" ..\..\include\unicode
|
||||
</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
|
||||
</CustomBuild>
|
||||
<CustomBuild Include="unicode\bmsearch.h">
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">copy "%(FullPath)" ..\..\include\unicode
|
||||
</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">copy "%(FullPath)" ..\..\include\unicode
|
||||
</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">copy "%(FullPath)" ..\..\include\unicode
|
||||
</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">copy "%(FullPath)" ..\..\include\unicode
|
||||
</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
|
||||
</CustomBuild>
|
||||
|
@ -493,20 +462,6 @@
|
|||
</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">copy "%(FullPath)" ..\..\include\unicode
|
||||
</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
|
||||
</CustomBuild>
|
||||
<CustomBuild Include="unicode\colldata.h">
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">copy "%(FullPath)" ..\..\include\unicode
|
||||
</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">copy "%(FullPath)" ..\..\include\unicode
|
||||
</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">copy "%(FullPath)" ..\..\include\unicode
|
||||
</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">copy "%(FullPath)" ..\..\include\unicode
|
||||
</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
|
||||
</CustomBuild>
|
||||
|
@ -1604,4 +1559,4 @@
|
|||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
</Project>
|
|
@ -24,21 +24,12 @@
|
|||
</Filter>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="bms.cpp">
|
||||
<Filter>collation</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="bmsearch.cpp">
|
||||
<Filter>collation</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="coleitr.cpp">
|
||||
<Filter>collation</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="coll.cpp">
|
||||
<Filter>collation</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="colldata.cpp">
|
||||
<Filter>collation</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="search.cpp">
|
||||
<Filter>collation</Filter>
|
||||
</ClCompile>
|
||||
|
@ -796,21 +787,12 @@
|
|||
</ResourceCompile>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<CustomBuild Include="unicode\bms.h">
|
||||
<Filter>collation</Filter>
|
||||
</CustomBuild>
|
||||
<CustomBuild Include="unicode\bmsearch.h">
|
||||
<Filter>collation</Filter>
|
||||
</CustomBuild>
|
||||
<CustomBuild Include="unicode\coleitr.h">
|
||||
<Filter>collation</Filter>
|
||||
</CustomBuild>
|
||||
<CustomBuild Include="unicode\coll.h">
|
||||
<Filter>collation</Filter>
|
||||
</CustomBuild>
|
||||
<CustomBuild Include="unicode\colldata.h">
|
||||
<Filter>collation</Filter>
|
||||
</CustomBuild>
|
||||
<CustomBuild Include="unicode\search.h">
|
||||
<Filter>collation</Filter>
|
||||
</CustomBuild>
|
||||
|
@ -1026,4 +1008,4 @@
|
|||
<Filter>formatting</Filter>
|
||||
</CustomBuild>
|
||||
</ItemGroup>
|
||||
</Project>
|
||||
</Project>
|
|
@ -48,7 +48,6 @@ typedef enum ECleanupI18NType {
|
|||
UCLN_I18N_UCOL_RES,
|
||||
UCLN_I18N_UCOL_BLD,
|
||||
UCLN_I18N_CSDET,
|
||||
UCLN_I18N_COLL_DATA,
|
||||
UCLN_I18N_INDEX_CHARACTERS,
|
||||
UCLN_I18N_GENDERINFO,
|
||||
UCLN_I18N_CDFINFO,
|
||||
|
|
|
@ -1,280 +0,0 @@
|
|||
/*
|
||||
* Copyright (C) 1996-2012, International Business Machines Corporation and Others.
|
||||
* All rights reserved.
|
||||
*/
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C API: Boyer-Moore StringSearch prototype.
|
||||
* \internal
|
||||
*/
|
||||
|
||||
#ifndef _BMS_H
|
||||
#define _BMS_H
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if !UCONFIG_NO_COLLATION && !UCONFIG_NO_BREAK_ITERATION
|
||||
|
||||
#include "unicode/ucol.h"
|
||||
|
||||
#ifndef U_HIDE_INTERNAL_API
|
||||
|
||||
/**
|
||||
* A <code>UCD</code> object holds the Collator-specific data needed to
|
||||
* compute the length of the shortest string that can
|
||||
* generate a partcular list of CEs.
|
||||
*
|
||||
* <code>UCD</code> objects are quite expensive to compute. Because
|
||||
* of this, they are cached. When you call <code>ucd_open</code> it
|
||||
* returns a reference counted cached object. When you call <code>ucd_close</code>
|
||||
* the reference count on the object is decremented but the object is not deleted.
|
||||
*
|
||||
* If you do not need to reuse any unreferenced objects in the cache, you can call
|
||||
* <code>ucd_flushCCache</code>. If you no longer need any <code>UCD</code>
|
||||
* objects, you can call <code>ucd_freeCache</code>
|
||||
*
|
||||
* @internal ICU 4.0.1 technology preview
|
||||
*/
|
||||
typedef void UCD;
|
||||
|
||||
/**
|
||||
* Open a <code>UCD</code> object.
|
||||
*
|
||||
* @param coll - the collator
|
||||
* @param status - will be set if any errors occur.
|
||||
*
|
||||
* @return the <code>UCD</code> object. You must call
|
||||
* <code>ucd_close</code> when you are done using the object.
|
||||
*
|
||||
* Note: if on return status is set to an error, the only safe
|
||||
* thing to do with the returned object is to call <code>ucd_close</code>.
|
||||
*
|
||||
* @internal ICU 4.0.1 technology preview
|
||||
*/
|
||||
U_INTERNAL UCD * U_EXPORT2
|
||||
ucd_open(UCollator *coll, UErrorCode *status);
|
||||
|
||||
/**
|
||||
* Release a <code>UCD</code> object.
|
||||
*
|
||||
* @param ucd - the object
|
||||
*
|
||||
* @internal ICU 4.0.1 technology preview
|
||||
*/
|
||||
U_INTERNAL void U_EXPORT2
|
||||
ucd_close(UCD *ucd);
|
||||
|
||||
/**
|
||||
* Get the <code>UCollator</code> object used to create a <code>UCD</code> object.
|
||||
* The <code>UCollator</code> object returned may not be the exact
|
||||
* object that was used to create this object, but it will have the
|
||||
* same behavior.
|
||||
*
|
||||
* @param ucd - the <code>UCD</code> object
|
||||
*
|
||||
* @return the <code>UCollator</code> used to create the given
|
||||
* <code>UCD</code> object.
|
||||
*
|
||||
* @internal ICU 4.0.1 technology preview
|
||||
*/
|
||||
U_INTERNAL UCollator * U_EXPORT2
|
||||
ucd_getCollator(UCD *ucd);
|
||||
|
||||
/**
|
||||
* <code>UCD</code> objects are expensive to compute, and so
|
||||
* may be cached. This routine will free the cached objects and delete
|
||||
* the cache.
|
||||
*
|
||||
* WARNING: Don't call this until you are have called <code>close</code>
|
||||
* for each <code>UCD</code> object that you have used. also,
|
||||
* DO NOT call this if another thread may be calling <code>ucd_flushCache</code>
|
||||
* at the same time.
|
||||
*
|
||||
* @internal ICU 4.0.1 technology preview
|
||||
*/
|
||||
U_INTERNAL void U_EXPORT2
|
||||
ucd_freeCache();
|
||||
|
||||
/**
|
||||
* <code>UCD</code> objects are expensive to compute, and so
|
||||
* may be cached. This routine will remove any unused <code>UCD</code>
|
||||
* objects from the cache.
|
||||
*
|
||||
* @internal 4.0.1 technology preview
|
||||
*/
|
||||
U_INTERNAL void U_EXPORT2
|
||||
ucd_flushCache();
|
||||
|
||||
/**
|
||||
* BMS
|
||||
*
|
||||
* This object holds the information needed to do a Collation sensitive Boyer-Moore search. It encapulates
|
||||
* the pattern, the "bad character" and "good suffix" tables, the Collator-based data needed to compute them,
|
||||
* and a reference to the text being searched.
|
||||
*
|
||||
* To do a search, you first need to get a <code>UCD</code> object by calling <code>ucd_open</code>.
|
||||
* Then you construct a <code>BMS</code> object from the <code>UCD</code> object, the pattern
|
||||
* string and the target string. Then you call the <code>search</code> method. Here's a code sample:
|
||||
*
|
||||
* <pre>
|
||||
* void boyerMooreExample(UCollator *collator, UChar *pattern, int32_t patternLen, UChar *target, int32_t targetLength)
|
||||
* {
|
||||
* UErrorCode status = U_ZERO_ERROR;
|
||||
* int32_t offset = 0, start = -1, end = -1;
|
||||
* UCD *ucd = NULL);
|
||||
* BMS *bms = NULL;
|
||||
*
|
||||
* ucd = ucd_open(collator, &status);
|
||||
* if (U_FAILURE(status)) {
|
||||
* // could not create a UCD object
|
||||
* return;
|
||||
* }
|
||||
*
|
||||
* BMS *bms = bms_open(ucd, pattern, patternLength, target, targetlength, &status);
|
||||
* if (U_FAILURE(status)) {
|
||||
* // could not create a BMS object
|
||||
* ucd_close(ucd);
|
||||
* return;
|
||||
* }
|
||||
*
|
||||
*
|
||||
* // Find all matches
|
||||
* while (bms_search(bms, offset, &start, &end)) {
|
||||
* // process the match between start and end
|
||||
* ...
|
||||
*
|
||||
* // advance past the match
|
||||
* offset = end;
|
||||
* }
|
||||
*
|
||||
* // at this point, if offset == 0, there were no matches
|
||||
* if (offset == 0) {
|
||||
* // handle the case of no matches
|
||||
* }
|
||||
*
|
||||
* bms_close(bms);
|
||||
* ucd_close(ucd);
|
||||
*
|
||||
* // UCD objects are cached, so the call to
|
||||
* // ucd_close doesn't delete the object.
|
||||
* // Call this if you don't need the object any more.
|
||||
* ucd_flushCache();
|
||||
* }
|
||||
* </pre>
|
||||
*
|
||||
* NOTE: This is a technology preview. The final version of this API may not bear any resenblence to this API.
|
||||
*
|
||||
* Knows linitations:
|
||||
* 1) Backwards searching has not been implemented.
|
||||
*
|
||||
* 2) For Han and Hangul characters, this code ignores any Collation tailorings. In general,
|
||||
* this isn't a problem, but in Korean locals, at strength 1, Hangul characters are tailored
|
||||
* to be equal to Han characters with the same pronounciation. Because this code ignroes
|
||||
* tailorings, searching for a Hangul character will not find a Han character and visa-versa.
|
||||
*
|
||||
* 3) In some cases, searching for a pattern that needs to be normalized and ends
|
||||
* in a discontiguous contraction may fail. The only known cases of this are with
|
||||
* the Tibetan script. For example searching for the pattern
|
||||
* "\u0F7F\u0F80\u0F81\u0F82\u0F83\u0F84\u0F85" will fail. (This case is artificial. We've
|
||||
* been unable to find a pratical, real-world example of this failure.)
|
||||
*
|
||||
* NOTE: This is a technology preview. The final version of this API may not bear any resenblence to this API.
|
||||
*
|
||||
* @internal ICU 4.0.1 technology preview
|
||||
*/
|
||||
struct BMS;
|
||||
typedef struct BMS BMS; /**< @see BMS */
|
||||
|
||||
/**
|
||||
* Construct a <code>MBS</code> object.
|
||||
*
|
||||
* @param ucd - A <code>UCD</code> object holding the Collator-sensitive data
|
||||
* @param pattern - the string for which to search
|
||||
* @param patternLength - the length of the string for which to search
|
||||
* @param target - the string in which to search
|
||||
* @param targetLength - the length of the string in which to search
|
||||
* @param status - will be set if any errors occur.
|
||||
*
|
||||
* @return the <code>BMS</code> object.
|
||||
*
|
||||
* Note: if on return status is set to an error, the only safe
|
||||
* thing to do with the returned object is to call
|
||||
* <code>bms_close</code>.
|
||||
*
|
||||
* @internal ICU 4.0.1 technology preview
|
||||
*/
|
||||
U_INTERNAL BMS * U_EXPORT2
|
||||
bms_open(UCD *ucd,
|
||||
const UChar *pattern, int32_t patternLength,
|
||||
const UChar *target, int32_t targetLength,
|
||||
UErrorCode *status);
|
||||
|
||||
/**
|
||||
* Close a <code>BMS</code> object and release all the
|
||||
* storage associated with it.
|
||||
*
|
||||
* @param bms - the <code>BMS</code> object to close.
|
||||
* @internal ICU 4.0.1 technology preview
|
||||
*/
|
||||
U_INTERNAL void U_EXPORT2
|
||||
bms_close(BMS *bms);
|
||||
|
||||
/**
|
||||
* Test the pattern to see if it generates any CEs.
|
||||
*
|
||||
* @param bms - the <code>BMS</code> object
|
||||
* @return <code>TRUE</code> if the pattern string did not generate any CEs
|
||||
*
|
||||
* @internal ICU 4.0.1 technology preview
|
||||
*/
|
||||
U_INTERNAL UBool U_EXPORT2
|
||||
bms_empty(BMS *bms);
|
||||
|
||||
/**
|
||||
* Get the <code>UCD</code> object used to create
|
||||
* a given <code>BMS</code> object.
|
||||
*
|
||||
* @param bms - the <code>BMS</code> object
|
||||
*
|
||||
* @return - the <code>UCD</code> object used to create
|
||||
* the given <code>BMS</code> object.
|
||||
*
|
||||
* @internal ICU 4.0.1 technology preview
|
||||
*/
|
||||
U_INTERNAL UCD * U_EXPORT2
|
||||
bms_getData(BMS *bms);
|
||||
|
||||
/**
|
||||
* Search for the pattern string in the target string.
|
||||
*
|
||||
* @param bms - the <code>BMS</code> object
|
||||
* @param offset - the offset in the target string at which to begin the search
|
||||
* @param start - will be set to the starting offset of the match, or -1 if there's no match
|
||||
* @param end - will be set to the ending offset of the match, or -1 if there's no match
|
||||
*
|
||||
* @return <code>TRUE</code> if the match succeeds, <code>FALSE</code> otherwise.
|
||||
*
|
||||
* @internal ICU 4.0.1 technology preview
|
||||
*/
|
||||
U_INTERNAL UBool U_EXPORT2
|
||||
bms_search(BMS *bms, int32_t offset, int32_t *start, int32_t *end);
|
||||
|
||||
/**
|
||||
* Set the target string for the match.
|
||||
*
|
||||
* @param bms - the <code>BMS</code> object
|
||||
* @param target - the new target string
|
||||
* @param targetLength - the length of the new target string
|
||||
* @param status - will be set if any errors occur.
|
||||
*
|
||||
* @internal ICU 4.0.1 technology preview
|
||||
*/
|
||||
U_INTERNAL void U_EXPORT2
|
||||
bms_setTargetString(BMS *bms, const UChar *target, int32_t targetLength, UErrorCode *status);
|
||||
|
||||
#endif /* U_HIDE_INTERNAL_API */
|
||||
|
||||
#endif
|
||||
|
||||
#endif /* _BMS_H */
|
|
@ -1,228 +0,0 @@
|
|||
/*
|
||||
******************************************************************************
|
||||
* Copyright (C) 1996-2011, International Business Machines *
|
||||
* Corporation and others. All Rights Reserved. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C++ API: Boyer-Moore StringSearch technology preview
|
||||
* \internal ICU 4.0.1 technology preview
|
||||
*/
|
||||
|
||||
#ifndef B_M_SEARCH_H
|
||||
#define B_M_SEARCH_H
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if !UCONFIG_NO_COLLATION && !UCONFIG_NO_BREAK_ITERATION
|
||||
|
||||
#include "unicode/uobject.h"
|
||||
#include "unicode/ucol.h"
|
||||
|
||||
#include "unicode/colldata.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
class BadCharacterTable;
|
||||
class GoodSuffixTable;
|
||||
class Target;
|
||||
|
||||
#ifndef U_HIDE_INTERNAL_API
|
||||
/**
|
||||
* BoyerMooreSearch
|
||||
*
|
||||
* This object holds the information needed to do a Collation sensitive Boyer-Moore search. It encapulates
|
||||
* the pattern, the "bad character" and "good suffix" tables, the Collator-based data needed to compute them,
|
||||
* and a reference to the text being searched.
|
||||
*
|
||||
* To do a search, you fist need to get a <code>CollData</code> object by calling <code>CollData::open</code>.
|
||||
* Then you construct a <code>BoyerMooreSearch</code> object from the <code>CollData</code> object, the pattern
|
||||
* string and the target string. Then you call the <code>search</code> method. Here's a code sample:
|
||||
*
|
||||
* <pre>
|
||||
* void boyerMooreExample(UCollator *collator, UnicodeString *pattern, UnicodeString *target)
|
||||
* {
|
||||
* UErrorCode status = U_ZERO_ERROR;
|
||||
* CollData *collData = CollData::open(collator, status);
|
||||
*
|
||||
* if (U_FAILURE(status)) {
|
||||
* // could not create a CollData object
|
||||
* return;
|
||||
* }
|
||||
*
|
||||
* BoyerMooreSearch *search = new BoyerMooreSearch(collData, *patternString, target, status);
|
||||
*
|
||||
* if (U_FAILURE(status)) {
|
||||
* // could not create a BoyerMooreSearch object
|
||||
* CollData::close(collData);
|
||||
* return;
|
||||
* }
|
||||
*
|
||||
* int32_t offset = 0, start = -1, end = -1;
|
||||
*
|
||||
* // Find all matches
|
||||
* while (search->search(offset, start, end)) {
|
||||
* // process the match between start and end
|
||||
* ...
|
||||
* // advance past the match
|
||||
* offset = end;
|
||||
* }
|
||||
*
|
||||
* // at this point, if offset == 0, there were no matches
|
||||
* if (offset == 0) {
|
||||
* // handle the case of no matches
|
||||
* }
|
||||
*
|
||||
* delete search;
|
||||
* CollData::close(collData);
|
||||
*
|
||||
* // CollData objects are cached, so the call to
|
||||
* // CollData::close doesn't delete the object.
|
||||
* // Call this if you don't need the object any more.
|
||||
* CollData::flushCollDataCache();
|
||||
* }
|
||||
* </pre>
|
||||
*
|
||||
* NOTE: This is a technology preview. The final version of this API may not bear any resenblence to this API.
|
||||
*
|
||||
* Knows linitations:
|
||||
* 1) Backwards searching has not been implemented.
|
||||
*
|
||||
* 2) For Han and Hangul characters, this code ignores any Collation tailorings. In general,
|
||||
* this isn't a problem, but in Korean locals, at strength 1, Hangul characters are tailored
|
||||
* to be equal to Han characters with the same pronounciation. Because this code ignroes
|
||||
* tailorings, searching for a Hangul character will not find a Han character and visa-versa.
|
||||
*
|
||||
* 3) In some cases, searching for a pattern that needs to be normalized and ends
|
||||
* in a discontiguous contraction may fail. The only known cases of this are with
|
||||
* the Tibetan script. For example searching for the pattern
|
||||
* "\u0F7F\u0F80\u0F81\u0F82\u0F83\u0F84\u0F85" will fail. (This case is artificial. We've
|
||||
* been unable to find a pratical, real-world example of this failure.)
|
||||
*
|
||||
* @internal ICU 4.0.1 technology preview
|
||||
*
|
||||
* @see CollData
|
||||
*/
|
||||
class U_I18N_API BoyerMooreSearch : public UObject
|
||||
{
|
||||
public:
|
||||
/**
|
||||
* Construct a <code>BoyerMooreSearch</code> object.
|
||||
*
|
||||
* @param theData - A <code>CollData</code> object holding the Collator-sensitive data
|
||||
* @param patternString - the string for which to search
|
||||
* @param targetString - the string in which to search or <code>NULL</code> if youu will
|
||||
* set it later by calling <code>setTargetString</code>.
|
||||
* @param status - will be set if any errors occur.
|
||||
*
|
||||
* Note: if on return, status is set to an error code,
|
||||
* the only safe thing to do with this object is to call
|
||||
* the destructor.
|
||||
*
|
||||
* @internal ICU 4.0.1 technology preview
|
||||
*/
|
||||
BoyerMooreSearch(CollData *theData, const UnicodeString &patternString, const UnicodeString *targetString, UErrorCode &status);
|
||||
|
||||
/**
|
||||
* The desstructor
|
||||
*
|
||||
* @internal ICU 4.0.1 technology preview
|
||||
*/
|
||||
~BoyerMooreSearch();
|
||||
|
||||
/**
|
||||
* Test the pattern to see if it generates any CEs.
|
||||
*
|
||||
* @return <code>TRUE</code> if the pattern string did not generate any CEs
|
||||
*
|
||||
* @internal ICU 4.0.1 technology preview
|
||||
*/
|
||||
UBool empty();
|
||||
|
||||
/**
|
||||
* Search for the pattern string in the target string.
|
||||
*
|
||||
* @param offset - the offset in the target string at which to begin the search
|
||||
* @param start - will be set to the starting offset of the match, or -1 if there's no match
|
||||
* @param end - will be set to the ending offset of the match, or -1 if there's no match
|
||||
*
|
||||
* @return <code>TRUE</code> if the match succeeds, <code>FALSE</code> otherwise.
|
||||
*
|
||||
* @internal ICU 4.0.1 technology preview
|
||||
*/
|
||||
UBool search(int32_t offset, int32_t &start, int32_t &end);
|
||||
|
||||
/**
|
||||
* Set the target string for the match.
|
||||
*
|
||||
* @param targetString - the new target string
|
||||
* @param status - will be set if any errors occur.
|
||||
*
|
||||
* @internal ICU 4.0.1 technology preview
|
||||
*/
|
||||
void setTargetString(const UnicodeString *targetString, UErrorCode &status);
|
||||
|
||||
// **** no longer need these? ****
|
||||
/**
|
||||
* Return the <code>CollData</code> object used for searching
|
||||
*
|
||||
* @return the <code>CollData</code> object used for searching
|
||||
*
|
||||
* @internal ICU 4.0.1 technology preview
|
||||
*/
|
||||
CollData *getData();
|
||||
|
||||
/**
|
||||
* Return the CEs generated by the pattern string.
|
||||
*
|
||||
* @return a <code>CEList</code> object holding the CEs generated by the pattern string.
|
||||
*
|
||||
* @internal ICU 4.0.1 technology preview
|
||||
*/
|
||||
CEList *getPatternCEs();
|
||||
|
||||
/**
|
||||
* Return the <code>BadCharacterTable</code> object computed for the pattern string.
|
||||
*
|
||||
* @return the <code>BadCharacterTable</code> object.
|
||||
*
|
||||
* @internal ICU 4.0.1 technology preview
|
||||
*/
|
||||
BadCharacterTable *getBadCharacterTable();
|
||||
|
||||
/**
|
||||
* Return the <code>GoodSuffixTable</code> object computed for the pattern string.
|
||||
*
|
||||
* @return the <code>GoodSuffixTable</code> object computed for the pattern string.
|
||||
*
|
||||
* @internal ICU 4.0.1 technology preview
|
||||
*/
|
||||
GoodSuffixTable *getGoodSuffixTable();
|
||||
|
||||
/**
|
||||
* UObject glue...
|
||||
* @internal ICU 4.0.1 technology preview
|
||||
*/
|
||||
virtual UClassID getDynamicClassID() const;
|
||||
/**
|
||||
* UObject glue...
|
||||
* @internal ICU 4.0.1 technology preview
|
||||
*/
|
||||
static UClassID getStaticClassID();
|
||||
|
||||
private:
|
||||
CollData *data;
|
||||
CEList *patCEs;
|
||||
BadCharacterTable *badCharacterTable;
|
||||
GoodSuffixTable *goodSuffixTable;
|
||||
UnicodeString pattern;
|
||||
Target *target;
|
||||
};
|
||||
#endif /* U_HIDE_INTERNAL_API */
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif // #if !UCONFIG_NO_COLLATION
|
||||
#endif // #ifndef B_M_SEARCH_H
|
|
@ -37,7 +37,7 @@ DEFS += -D'U_TOPSRCDIR="$(top_srcdir)/"' -D'U_TOPBUILDDIR="$(BUILDDIR)"'
|
|||
LIBS = $(LIBCTESTFW) $(LIBICUI18N) $(LIBICUUC) $(LIBICUTOOLUTIL) $(DEFAULT_LIBS) $(LIB_M) $(LIB_THREAD)
|
||||
|
||||
OBJECTS = aliastst.o allcoll.o apicoll.o astrotst.o callimts.o calregts.o caltest.o \
|
||||
caltztst.o canittst.o citrtest.o cntabcol.o convtest.o currcoll.o \
|
||||
caltztst.o canittst.o citrtest.o cntabcol.o colldata.o convtest.o currcoll.o \
|
||||
fldset.o dadrfmt.o dadrcal.o dadrcoll.o dcfmapts.o decoll.o dtfmapts.o dtfmrgts.o dtfmtrtts.o dtfmttst.o \
|
||||
dtptngts.o encoll.o escoll.o ficoll.o frcoll.o g7coll.o intltest.o \
|
||||
itercoll.o itformat.o itmajor.o itutil.o jacoll.o lcukocol.o \
|
||||
|
|
|
@ -10,7 +10,6 @@
|
|||
#if !UCONFIG_NO_COLLATION
|
||||
|
||||
#include "unicode/unistr.h"
|
||||
#include "unicode/putil.h"
|
||||
#include "unicode/usearch.h"
|
||||
|
||||
#include "cmemory.h"
|
||||
|
@ -26,27 +25,16 @@
|
|||
#include "unicode/ustring.h"
|
||||
#include "hash.h"
|
||||
#include "uhash.h"
|
||||
#include "ucln_in.h"
|
||||
#include "ucol_imp.h"
|
||||
#include "umutex.h"
|
||||
#include "uassert.h"
|
||||
|
||||
#include "unicode/colldata.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
#include "colldata.h"
|
||||
|
||||
#define ARRAY_SIZE(array) (sizeof(array)/sizeof(array[0]))
|
||||
#define NEW_ARRAY(type, count) (type *) uprv_malloc((count) * sizeof(type))
|
||||
#define DELETE_ARRAY(array) uprv_free((void *) (array))
|
||||
#define ARRAY_COPY(dst, src, count) uprv_memcpy((void *) (dst), (void *) (src), (count) * sizeof (src)[0])
|
||||
|
||||
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CEList)
|
||||
|
||||
#ifdef INSTRUMENT_CELIST
|
||||
int32_t CEList::_active = 0;
|
||||
int32_t CEList::_histogram[10] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
|
||||
#endif
|
||||
|
||||
CEList::CEList(UCollator *coll, const UnicodeString &string, UErrorCode &status)
|
||||
: ces(NULL), listMax(CELIST_BUFFER_SIZE), listSize(0)
|
||||
{
|
||||
|
@ -78,11 +66,6 @@ CEList::CEList(UCollator *coll, const UnicodeString &string, UErrorCode &status)
|
|||
strengthMask |= UCOL_PRIMARYORDERMASK;
|
||||
}
|
||||
|
||||
#ifdef INSTRUMENT_CELIST
|
||||
_active += 1;
|
||||
_histogram[0] += 1;
|
||||
#endif
|
||||
|
||||
ces = ceBuffer;
|
||||
|
||||
while ((order = ucol_next(elems, &status)) != UCOL_NULLORDER) {
|
||||
|
@ -114,10 +97,6 @@ CEList::CEList(UCollator *coll, const UnicodeString &string, UErrorCode &status)
|
|||
|
||||
CEList::~CEList()
|
||||
{
|
||||
#ifdef INSTRUMENT_CELIST
|
||||
_active -= 1;
|
||||
#endif
|
||||
|
||||
if (ces != ceBuffer) {
|
||||
DELETE_ARRAY(ces);
|
||||
}
|
||||
|
@ -131,11 +110,6 @@ void CEList::add(uint32_t ce, UErrorCode &status)
|
|||
|
||||
if (listSize >= listMax) {
|
||||
int32_t newMax = listMax + CELIST_BUFFER_SIZE;
|
||||
|
||||
#ifdef INSTRUMENT_CELIST
|
||||
_histogram[listSize / CELIST_BUFFER_SIZE] += 1;
|
||||
#endif
|
||||
|
||||
uint32_t *newCEs = NEW_ARRAY(uint32_t, newMax);
|
||||
|
||||
if (newCEs == NULL) {
|
||||
|
@ -190,14 +164,6 @@ int32_t CEList::size() const
|
|||
return listSize;
|
||||
}
|
||||
|
||||
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringList)
|
||||
|
||||
#ifdef INSTRUMENT_STRING_LIST
|
||||
int32_t StringList::_lists = 0;
|
||||
int32_t StringList::_strings = 0;
|
||||
int32_t StringList::_histogram[101] = {0};
|
||||
#endif
|
||||
|
||||
StringList::StringList(UErrorCode &status)
|
||||
: strings(NULL), listMax(STRING_LIST_BUFFER_SIZE), listSize(0)
|
||||
{
|
||||
|
@ -211,11 +177,6 @@ StringList::StringList(UErrorCode &status)
|
|||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
return;
|
||||
}
|
||||
|
||||
#ifdef INSTRUMENT_STRING_LIST
|
||||
_lists += 1;
|
||||
_histogram[0] += 1;
|
||||
#endif
|
||||
}
|
||||
|
||||
StringList::~StringList()
|
||||
|
@ -228,11 +189,6 @@ void StringList::add(const UnicodeString *string, UErrorCode &status)
|
|||
if (U_FAILURE(status)) {
|
||||
return;
|
||||
}
|
||||
|
||||
#ifdef INSTRUMENT_STRING_LIST
|
||||
_strings += 1;
|
||||
#endif
|
||||
|
||||
if (listSize >= listMax) {
|
||||
int32_t newMax = listMax + STRING_LIST_BUFFER_SIZE;
|
||||
UnicodeString *newStrings = new UnicodeString[newMax];
|
||||
|
@ -243,17 +199,6 @@ void StringList::add(const UnicodeString *string, UErrorCode &status)
|
|||
for (int32_t i=0; i<listSize; ++i) {
|
||||
newStrings[i] = strings[i];
|
||||
}
|
||||
|
||||
#ifdef INSTRUMENT_STRING_LIST
|
||||
int32_t _h = listSize / STRING_LIST_BUFFER_SIZE;
|
||||
|
||||
if (_h > 100) {
|
||||
_h = 100;
|
||||
}
|
||||
|
||||
_histogram[_h] += 1;
|
||||
#endif
|
||||
|
||||
delete[] strings;
|
||||
strings = newStrings;
|
||||
listMax = newMax;
|
||||
|
@ -295,38 +240,11 @@ deleteStringList(void *obj)
|
|||
|
||||
delete strings;
|
||||
}
|
||||
static void U_CALLCONV
|
||||
deleteCEList(void *obj)
|
||||
{
|
||||
CEList *list = (CEList *) obj;
|
||||
|
||||
delete list;
|
||||
}
|
||||
|
||||
static void U_CALLCONV
|
||||
deleteUnicodeStringKey(void *obj)
|
||||
{
|
||||
UnicodeString *key = (UnicodeString *) obj;
|
||||
|
||||
delete key;
|
||||
}
|
||||
|
||||
static void U_CALLCONV
|
||||
deleteChars(void * /*obj*/)
|
||||
{
|
||||
// char *chars = (char *) obj;
|
||||
// All the key strings are owned by the
|
||||
// CollData objects and don't need to
|
||||
// be freed here.
|
||||
//DELETE_ARRAY(chars);
|
||||
}
|
||||
|
||||
U_CDECL_END
|
||||
|
||||
class CEToStringsMap : public UMemory
|
||||
class CEToStringsMap
|
||||
{
|
||||
public:
|
||||
|
||||
CEToStringsMap(UErrorCode &status);
|
||||
~CEToStringsMap();
|
||||
|
||||
|
@ -334,7 +252,6 @@ public:
|
|||
StringList *getStringList(uint32_t ce) const;
|
||||
|
||||
private:
|
||||
|
||||
void putStringList(uint32_t ce, StringList *stringList, UErrorCode &status);
|
||||
UHashtable *map;
|
||||
};
|
||||
|
@ -390,260 +307,10 @@ void CEToStringsMap::putStringList(uint32_t ce, StringList *stringList, UErrorCo
|
|||
uhash_iput(map, ce, (void *) stringList, &status);
|
||||
}
|
||||
|
||||
class StringToCEsMap : public UMemory
|
||||
{
|
||||
public:
|
||||
StringToCEsMap(UErrorCode &status);
|
||||
~StringToCEsMap();
|
||||
|
||||
void put(const UnicodeString *string, const CEList *ces, UErrorCode &status);
|
||||
const CEList *get(const UnicodeString *string);
|
||||
void free(const CEList *list);
|
||||
|
||||
private:
|
||||
|
||||
|
||||
UHashtable *map;
|
||||
};
|
||||
|
||||
StringToCEsMap::StringToCEsMap(UErrorCode &status)
|
||||
: map(NULL)
|
||||
{
|
||||
if (U_FAILURE(status)) {
|
||||
return;
|
||||
}
|
||||
|
||||
map = uhash_open(uhash_hashUnicodeString,
|
||||
uhash_compareUnicodeString,
|
||||
uhash_compareLong,
|
||||
&status);
|
||||
|
||||
if (U_FAILURE(status)) {
|
||||
return;
|
||||
}
|
||||
|
||||
uhash_setValueDeleter(map, deleteCEList);
|
||||
uhash_setKeyDeleter(map, deleteUnicodeStringKey);
|
||||
}
|
||||
|
||||
StringToCEsMap::~StringToCEsMap()
|
||||
{
|
||||
uhash_close(map);
|
||||
}
|
||||
|
||||
void StringToCEsMap::put(const UnicodeString *string, const CEList *ces, UErrorCode &status)
|
||||
{
|
||||
uhash_put(map, (void *) string, (void *) ces, &status);
|
||||
}
|
||||
|
||||
const CEList *StringToCEsMap::get(const UnicodeString *string)
|
||||
{
|
||||
return (const CEList *) uhash_get(map, string);
|
||||
}
|
||||
|
||||
class CollDataCacheEntry : public UMemory
|
||||
{
|
||||
public:
|
||||
CollDataCacheEntry(CollData *theData);
|
||||
~CollDataCacheEntry();
|
||||
|
||||
CollData *data;
|
||||
int32_t refCount;
|
||||
};
|
||||
|
||||
CollDataCacheEntry::CollDataCacheEntry(CollData *theData)
|
||||
: data(theData), refCount(1)
|
||||
{
|
||||
// nothing else to do
|
||||
}
|
||||
|
||||
CollDataCacheEntry::~CollDataCacheEntry()
|
||||
{
|
||||
// check refCount?
|
||||
delete data;
|
||||
}
|
||||
|
||||
class CollDataCache : public UMemory
|
||||
{
|
||||
public:
|
||||
CollDataCache(UErrorCode &status);
|
||||
~CollDataCache();
|
||||
|
||||
CollData *get(UCollator *collator, UErrorCode &status);
|
||||
void unref(CollData *collData);
|
||||
|
||||
void flush();
|
||||
|
||||
private:
|
||||
static char *getKey(UCollator *collator, char *keyBuffer, int32_t *charBufferLength);
|
||||
static void deleteKey(char *key);
|
||||
|
||||
UHashtable *cache;
|
||||
};
|
||||
static UMutex lock = U_MUTEX_INITIALIZER;
|
||||
|
||||
U_CDECL_BEGIN
|
||||
static void U_CALLCONV
|
||||
deleteCollDataCacheEntry(void *obj)
|
||||
{
|
||||
CollDataCacheEntry *entry = (CollDataCacheEntry *) obj;
|
||||
|
||||
delete entry;
|
||||
}
|
||||
U_CDECL_END
|
||||
|
||||
CollDataCache::CollDataCache(UErrorCode &status)
|
||||
: cache(NULL)
|
||||
{
|
||||
if (U_FAILURE(status)) {
|
||||
return;
|
||||
}
|
||||
|
||||
cache = uhash_open(uhash_hashChars, uhash_compareChars, uhash_compareLong, &status);
|
||||
|
||||
if (U_FAILURE(status)) {
|
||||
return;
|
||||
}
|
||||
|
||||
uhash_setValueDeleter(cache, deleteCollDataCacheEntry);
|
||||
uhash_setKeyDeleter(cache, deleteChars);
|
||||
}
|
||||
|
||||
CollDataCache::~CollDataCache()
|
||||
{
|
||||
umtx_lock(&lock);
|
||||
uhash_close(cache);
|
||||
cache = NULL;
|
||||
umtx_unlock(&lock);
|
||||
}
|
||||
|
||||
CollData *CollDataCache::get(UCollator *collator, UErrorCode &status)
|
||||
{
|
||||
char keyBuffer[KEY_BUFFER_SIZE];
|
||||
int32_t keyLength = KEY_BUFFER_SIZE;
|
||||
char *key = getKey(collator, keyBuffer, &keyLength);
|
||||
CollData *result = NULL, *newData = NULL;
|
||||
CollDataCacheEntry *entry = NULL, *newEntry = NULL;
|
||||
|
||||
umtx_lock(&lock);
|
||||
entry = (CollDataCacheEntry *) uhash_get(cache, key);
|
||||
|
||||
if (entry == NULL) {
|
||||
umtx_unlock(&lock);
|
||||
|
||||
newData = new CollData(collator, key, keyLength, status);
|
||||
newEntry = new CollDataCacheEntry(newData);
|
||||
|
||||
if (U_FAILURE(status) || newData == NULL || newEntry == NULL) {
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
umtx_lock(&lock);
|
||||
entry = (CollDataCacheEntry *) uhash_get(cache, key);
|
||||
|
||||
if (entry == NULL) {
|
||||
uhash_put(cache, newData->key, newEntry, &status);
|
||||
umtx_unlock(&lock);
|
||||
|
||||
if (U_FAILURE(status)) {
|
||||
delete newEntry;
|
||||
delete newData;
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return newData;
|
||||
}
|
||||
}
|
||||
|
||||
result = entry->data;
|
||||
entry->refCount += 1;
|
||||
umtx_unlock(&lock);
|
||||
|
||||
if (key != keyBuffer) {
|
||||
deleteKey(key);
|
||||
}
|
||||
|
||||
if (newEntry != NULL) {
|
||||
delete newEntry;
|
||||
delete newData;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
void CollDataCache::unref(CollData *collData)
|
||||
{
|
||||
CollDataCacheEntry *entry = NULL;
|
||||
|
||||
umtx_lock(&lock);
|
||||
entry = (CollDataCacheEntry *) uhash_get(cache, collData->key);
|
||||
|
||||
if (entry != NULL) {
|
||||
entry->refCount -= 1;
|
||||
}
|
||||
umtx_unlock(&lock);
|
||||
}
|
||||
|
||||
char *CollDataCache::getKey(UCollator *collator, char *keyBuffer, int32_t *keyBufferLength)
|
||||
{
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
int32_t len = ucol_getShortDefinitionString(collator, NULL, keyBuffer, *keyBufferLength, &status);
|
||||
|
||||
if (len >= *keyBufferLength) {
|
||||
*keyBufferLength = (len + 2) & ~1; // round to even length, leaving room for terminating null
|
||||
keyBuffer = NEW_ARRAY(char, *keyBufferLength);
|
||||
status = U_ZERO_ERROR;
|
||||
|
||||
len = ucol_getShortDefinitionString(collator, NULL, keyBuffer, *keyBufferLength, &status);
|
||||
}
|
||||
|
||||
keyBuffer[len] = '\0';
|
||||
|
||||
return keyBuffer;
|
||||
}
|
||||
|
||||
void CollDataCache::flush()
|
||||
{
|
||||
const UHashElement *element;
|
||||
int32_t pos = -1;
|
||||
|
||||
umtx_lock(&lock);
|
||||
while ((element = uhash_nextElement(cache, &pos)) != NULL) {
|
||||
CollDataCacheEntry *entry = (CollDataCacheEntry *) element->value.pointer;
|
||||
|
||||
if (entry->refCount <= 0) {
|
||||
uhash_removeElement(cache, element);
|
||||
}
|
||||
}
|
||||
umtx_unlock(&lock);
|
||||
}
|
||||
|
||||
void CollDataCache::deleteKey(char *key)
|
||||
{
|
||||
DELETE_ARRAY(key);
|
||||
}
|
||||
|
||||
U_CDECL_BEGIN
|
||||
static UBool coll_data_cleanup(void) {
|
||||
CollData::freeCollDataCache();
|
||||
return TRUE;
|
||||
}
|
||||
U_CDECL_END
|
||||
|
||||
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CollData)
|
||||
|
||||
CollData::CollData()
|
||||
{
|
||||
// nothing
|
||||
}
|
||||
|
||||
#define CLONE_COLLATOR
|
||||
|
||||
//#define CACHE_CELISTS
|
||||
CollData::CollData(UCollator *collator, char *cacheKey, int32_t cacheKeyLength, UErrorCode &status)
|
||||
: coll(NULL), charsToCEList(NULL), ceToCharsStartingWith(NULL), key(NULL)
|
||||
CollData::CollData(UCollator *collator, UErrorCode &status)
|
||||
: coll(NULL), ceToCharsStartingWith(NULL)
|
||||
{
|
||||
// [:c:] == [[:cn:][:cc:][:co:][:cf:][:cs:]]
|
||||
// i.e. other, control, private use, format, surrogate
|
||||
|
@ -665,35 +332,12 @@ CollData::CollData(UCollator *collator, char *cacheKey, int32_t cacheKeyLength,
|
|||
USet *contractions = uset_openEmpty();
|
||||
int32_t itemCount;
|
||||
|
||||
#ifdef CACHE_CELISTS
|
||||
charsToCEList = new StringToCEsMap(status);
|
||||
|
||||
if (U_FAILURE(status)) {
|
||||
goto bail;
|
||||
}
|
||||
#else
|
||||
charsToCEList = NULL;
|
||||
#endif
|
||||
|
||||
ceToCharsStartingWith = new CEToStringsMap(status);
|
||||
|
||||
if (U_FAILURE(status)) {
|
||||
goto bail;
|
||||
}
|
||||
|
||||
if (cacheKeyLength > KEY_BUFFER_SIZE) {
|
||||
key = NEW_ARRAY(char, cacheKeyLength);
|
||||
|
||||
if (key == NULL) {
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
goto bail;
|
||||
}
|
||||
} else {
|
||||
key = keyBuffer;
|
||||
}
|
||||
|
||||
ARRAY_COPY(key, cacheKey, cacheKeyLength);
|
||||
|
||||
#ifdef CLONE_COLLATOR
|
||||
coll = ucol_safeClone(collator, NULL, NULL, &status);
|
||||
|
||||
|
@ -730,12 +374,8 @@ CollData::CollData(UCollator *collator, char *cacheKey, int32_t cacheKeyLength,
|
|||
|
||||
ceToCharsStartingWith->put(ceList->get(0), st, status);
|
||||
|
||||
#ifdef CACHE_CELISTS
|
||||
charsToCEList->put(st, ceList, status);
|
||||
#else
|
||||
delete ceList;
|
||||
delete st;
|
||||
#endif
|
||||
}
|
||||
} else if (len > 0) {
|
||||
UnicodeString *st = new UnicodeString(buffer, len);
|
||||
|
@ -749,12 +389,8 @@ CollData::CollData(UCollator *collator, char *cacheKey, int32_t cacheKeyLength,
|
|||
|
||||
ceToCharsStartingWith->put(ceList->get(0), st, status);
|
||||
|
||||
#ifdef CACHE_CELISTS
|
||||
charsToCEList->put(st, ceList, status);
|
||||
#else
|
||||
delete ceList;
|
||||
delete st;
|
||||
#endif
|
||||
} else {
|
||||
// shouldn't happen...
|
||||
}
|
||||
|
@ -821,15 +457,7 @@ CollData::~CollData()
|
|||
ucol_close(coll);
|
||||
#endif
|
||||
|
||||
if (key != keyBuffer) {
|
||||
DELETE_ARRAY(key);
|
||||
}
|
||||
|
||||
delete ceToCharsStartingWith;
|
||||
|
||||
#ifdef CACHE_CELISTS
|
||||
delete charsToCEList;
|
||||
#endif
|
||||
}
|
||||
|
||||
UCollator *CollData::getCollator() const
|
||||
|
@ -844,9 +472,6 @@ const StringList *CollData::getStringList(int32_t ce) const
|
|||
|
||||
const CEList *CollData::getCEList(const UnicodeString *string) const
|
||||
{
|
||||
#ifdef CACHE_CELISTS
|
||||
return charsToCEList->get(string);
|
||||
#else
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
const CEList *list = new CEList(coll, *string, status);
|
||||
|
||||
|
@ -856,14 +481,11 @@ const CEList *CollData::getCEList(const UnicodeString *string) const
|
|||
}
|
||||
|
||||
return list;
|
||||
#endif
|
||||
}
|
||||
|
||||
void CollData::freeCEList(const CEList *list)
|
||||
{
|
||||
#ifndef CACHE_CELISTS
|
||||
delete list;
|
||||
#endif
|
||||
}
|
||||
|
||||
int32_t CollData::minLengthInChars(const CEList *ceList, int32_t offset, int32_t *history) const
|
||||
|
@ -885,9 +507,6 @@ int32_t CollData::minLengthInChars(const CEList *ceList, int32_t offset, int32_t
|
|||
|
||||
for (int32_t s = 0; s < stringCount; s += 1) {
|
||||
const UnicodeString *string = strings->get(s);
|
||||
#ifdef CACHE_CELISTS
|
||||
const CEList *ceList2 = charsToCEList->get(string);
|
||||
#else
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
const CEList *ceList2 = new CEList(coll, *string, status);
|
||||
|
||||
|
@ -895,7 +514,6 @@ int32_t CollData::minLengthInChars(const CEList *ceList, int32_t offset, int32_t
|
|||
delete ceList2;
|
||||
ceList2 = NULL;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (ceList->matchesAt(offset, ceList2)) {
|
||||
U_ASSERT(ceList2 != NULL);
|
||||
|
@ -909,9 +527,8 @@ int32_t CollData::minLengthInChars(const CEList *ceList, int32_t offset, int32_t
|
|||
|
||||
if (rlength <= 0) {
|
||||
// delete before continue to avoid memory leak.
|
||||
#ifndef CACHE_CELISTS
|
||||
delete ceList2;
|
||||
#endif
|
||||
|
||||
// ignore any dead ends
|
||||
continue;
|
||||
}
|
||||
|
@ -922,9 +539,7 @@ int32_t CollData::minLengthInChars(const CEList *ceList, int32_t offset, int32_t
|
|||
}
|
||||
}
|
||||
|
||||
#ifndef CACHE_CELISTS
|
||||
delete ceList2;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1020,89 +635,4 @@ int32_t CollData::minLengthInChars(const CEList *ceList, int32_t offset) const
|
|||
return minLength;
|
||||
}
|
||||
|
||||
CollData *CollData::open(UCollator *collator, UErrorCode &status)
|
||||
{
|
||||
if (U_FAILURE(status)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
CollDataCache *cache = getCollDataCache();
|
||||
|
||||
return cache->get(collator, status);
|
||||
}
|
||||
|
||||
void CollData::close(CollData *collData)
|
||||
{
|
||||
CollDataCache *cache = getCollDataCache();
|
||||
|
||||
cache->unref(collData);
|
||||
}
|
||||
|
||||
CollDataCache *CollData::collDataCache = NULL;
|
||||
|
||||
CollDataCache *CollData::getCollDataCache()
|
||||
{
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
CollDataCache *cache = NULL;
|
||||
|
||||
UMTX_CHECK(NULL, collDataCache, cache);
|
||||
|
||||
if (cache == NULL) {
|
||||
cache = new CollDataCache(status);
|
||||
|
||||
if (U_FAILURE(status)) {
|
||||
delete cache;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
umtx_lock(NULL);
|
||||
if (collDataCache == NULL) {
|
||||
collDataCache = cache;
|
||||
|
||||
ucln_i18n_registerCleanup(UCLN_I18N_COLL_DATA, coll_data_cleanup);
|
||||
}
|
||||
umtx_unlock(NULL);
|
||||
|
||||
if (collDataCache != cache) {
|
||||
delete cache;
|
||||
}
|
||||
}
|
||||
|
||||
return collDataCache;
|
||||
}
|
||||
|
||||
void CollData::freeCollDataCache()
|
||||
{
|
||||
CollDataCache *cache = NULL;
|
||||
|
||||
UMTX_CHECK(NULL, collDataCache, cache);
|
||||
|
||||
if (cache != NULL) {
|
||||
umtx_lock(NULL);
|
||||
if (collDataCache != NULL) {
|
||||
collDataCache = NULL;
|
||||
} else {
|
||||
cache = NULL;
|
||||
}
|
||||
umtx_unlock(NULL);
|
||||
|
||||
delete cache;
|
||||
}
|
||||
}
|
||||
|
||||
void CollData::flushCollDataCache()
|
||||
{
|
||||
CollDataCache *cache = NULL;
|
||||
|
||||
UMTX_CHECK(NULL, collDataCache, cache);
|
||||
|
||||
// **** this will fail if the another ****
|
||||
// **** thread deletes the cache here ****
|
||||
if (cache != NULL) {
|
||||
cache->flush();
|
||||
}
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif // #if !UCONFIG_NO_COLLATION
|
|
@ -7,10 +7,17 @@
|
|||
|
||||
/**
|
||||
* \file
|
||||
* \brief C++ API: Collation data used to compute minLengthInChars.
|
||||
* \brief Originally, added as C++ API for Collation data used to compute minLengthInChars
|
||||
* \internal
|
||||
*/
|
||||
|
||||
|
||||
/*
|
||||
* Note: This module was incldued in ICU 4.0.1 as @internal technology preview for supporting
|
||||
* Boyer-Moore string search API. For now, only SSearchTest depends on this module. I temporaly
|
||||
* moved the module from i18n directory to intltest, because we have no plan to publish this
|
||||
* as public API. (2012-12-18 yoshito)
|
||||
*/
|
||||
|
||||
#ifndef COLL_DATA_H
|
||||
#define COLL_DATA_H
|
||||
|
||||
|
@ -18,21 +25,10 @@
|
|||
|
||||
#if !UCONFIG_NO_COLLATION
|
||||
|
||||
#include "unicode/uobject.h"
|
||||
#include "unicode/ucol.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
#ifndef U_HIDE_INTERNAL_API
|
||||
/**
|
||||
* The size of the internal buffer for the Collator's short description string.
|
||||
* @internal ICU 4.0.1 technology preview
|
||||
*/
|
||||
#define KEY_BUFFER_SIZE 64
|
||||
|
||||
/**
|
||||
* The size of the internal CE buffer in a <code>CEList</code> object
|
||||
* @internal ICU 4.0.1 technology preview
|
||||
*/
|
||||
#define CELIST_BUFFER_SIZE 4
|
||||
|
||||
|
@ -40,31 +36,19 @@ U_NAMESPACE_BEGIN
|
|||
* \def INSTRUMENT_CELIST
|
||||
* Define this to enable the <code>CEList</code> objects to collect
|
||||
* statistics.
|
||||
* @internal ICU 4.0.1 technology preview
|
||||
*/
|
||||
//#define INSTRUMENT_CELIST
|
||||
|
||||
/**
|
||||
* The size of the initial list in a <code>StringList</code> object.
|
||||
* @internal ICU 4.0.1 technology preview
|
||||
*/
|
||||
#define STRING_LIST_BUFFER_SIZE 16
|
||||
|
||||
/**
|
||||
* \def INSTRUMENT_STRING_LIST
|
||||
* Define this to enable the <code>StringList</code> objects to
|
||||
* collect statistics.
|
||||
* @internal ICU 4.0.1 technology preview
|
||||
*/
|
||||
//#define INSTRUMENT_STRING_LIST
|
||||
|
||||
/**
|
||||
* This object holds a list of CEs generated from a particular
|
||||
* <code>UnicodeString</code>
|
||||
*
|
||||
* @internal ICU 4.0.1 technology preview
|
||||
*/
|
||||
class U_I18N_API CEList : public UObject
|
||||
class CEList
|
||||
{
|
||||
public:
|
||||
/**
|
||||
|
@ -77,14 +61,11 @@ public:
|
|||
* Note: if on return, status is set to an error code,
|
||||
* the only safe thing to do with this object is to call
|
||||
* the destructor.
|
||||
*
|
||||
* @internal ICU 4.0.1 technology preview
|
||||
*/
|
||||
CEList(UCollator *coll, const UnicodeString &string, UErrorCode &status);
|
||||
|
||||
/**
|
||||
* The destructor.
|
||||
* @internal ICU 4.0.1 technology preview
|
||||
*/
|
||||
~CEList();
|
||||
|
||||
|
@ -92,8 +73,6 @@ public:
|
|||
* Return the number of CEs in the list.
|
||||
*
|
||||
* @return the number of CEs in the list.
|
||||
*
|
||||
* @internal ICU 4.0.1 technology preview
|
||||
*/
|
||||
int32_t size() const;
|
||||
|
||||
|
@ -103,8 +82,6 @@ public:
|
|||
* @param index - the index of the CE to return
|
||||
*
|
||||
* @return the CE, or <code>0</code> if <code>index</code> is out of range
|
||||
*
|
||||
* @internal ICU 4.0.1 technology preview
|
||||
*/
|
||||
uint32_t get(int32_t index) const;
|
||||
|
||||
|
@ -116,8 +93,6 @@ public:
|
|||
* @param other - the other <code>CEList</code>
|
||||
*
|
||||
* @return <code>TRUE</code> if the CEs match, <code>FALSE</code> otherwise.
|
||||
*
|
||||
* @internal ICU 4.0.1 technology preview
|
||||
*/
|
||||
UBool matchesAt(int32_t offset, const CEList *other) const;
|
||||
|
||||
|
@ -127,22 +102,9 @@ public:
|
|||
* @param index - the index
|
||||
*
|
||||
* @return a reference to the given CE in the list
|
||||
*
|
||||
* @internal ICU 4.0.1 technology preview
|
||||
*/
|
||||
uint32_t &operator[](int32_t index) const;
|
||||
|
||||
/**
|
||||
* UObject glue...
|
||||
* @internal ICU 4.0.1 technology preview
|
||||
*/
|
||||
virtual UClassID getDynamicClassID() const;
|
||||
/**
|
||||
* UObject glue...
|
||||
* @internal ICU 4.0.1 technology preview
|
||||
*/
|
||||
static UClassID getStaticClassID();
|
||||
|
||||
private:
|
||||
void add(uint32_t ce, UErrorCode &status);
|
||||
|
||||
|
@ -150,21 +112,14 @@ private:
|
|||
uint32_t *ces;
|
||||
int32_t listMax;
|
||||
int32_t listSize;
|
||||
|
||||
#ifdef INSTRUMENT_CELIST
|
||||
static int32_t _active;
|
||||
static int32_t _histogram[10];
|
||||
#endif
|
||||
};
|
||||
|
||||
/**
|
||||
* StringList
|
||||
*
|
||||
* This object holds a list of <code>UnicodeString</code> objects.
|
||||
*
|
||||
* @internal ICU 4.0.1 technology preview
|
||||
*/
|
||||
class U_I18N_API StringList : public UObject
|
||||
class StringList
|
||||
{
|
||||
public:
|
||||
/**
|
||||
|
@ -175,15 +130,11 @@ public:
|
|||
* Note: if on return, status is set to an error code,
|
||||
* the only safe thing to do with this object is to call
|
||||
* the destructor.
|
||||
*
|
||||
* @internal ICU 4.0.1 technology preview
|
||||
*/
|
||||
StringList(UErrorCode &status);
|
||||
|
||||
/**
|
||||
* The destructor.
|
||||
*
|
||||
* @internal ICU 4.0.1 technology preview
|
||||
*/
|
||||
~StringList();
|
||||
|
||||
|
@ -192,8 +143,6 @@ public:
|
|||
*
|
||||
* @param string - the string to add
|
||||
* @param status - will be set if any errors occur.
|
||||
*
|
||||
* @internal ICU 4.0.1 technology preview
|
||||
*/
|
||||
void add(const UnicodeString *string, UErrorCode &status);
|
||||
|
||||
|
@ -203,8 +152,6 @@ public:
|
|||
* @param chars - the address of the array of code points
|
||||
* @param count - the number of code points in the array
|
||||
* @param status - will be set if any errors occur.
|
||||
*
|
||||
* @internal ICU 4.0.1 technology preview
|
||||
*/
|
||||
void add(const UChar *chars, int32_t count, UErrorCode &status);
|
||||
|
||||
|
@ -215,8 +162,6 @@ public:
|
|||
*
|
||||
* @return a pointer to the <code>UnicodeString</code> or <code>NULL</code>
|
||||
* if <code>index</code> is out of bounds.
|
||||
*
|
||||
* @internal ICU 4.0.1 technology preview
|
||||
*/
|
||||
const UnicodeString *get(int32_t index) const;
|
||||
|
||||
|
@ -224,43 +169,22 @@ public:
|
|||
* Get the number of stings in the list.
|
||||
*
|
||||
* @return the number of strings in the list.
|
||||
*
|
||||
* @internal ICU 4.0.1 technology preview
|
||||
*/
|
||||
int32_t size() const;
|
||||
|
||||
/**
|
||||
* the UObject glue...
|
||||
* @internal ICU 4.0.1 technology preview
|
||||
*/
|
||||
virtual UClassID getDynamicClassID() const;
|
||||
/**
|
||||
* the UObject glue...
|
||||
* @internal ICU 4.0.1 technology preview
|
||||
*/
|
||||
static UClassID getStaticClassID();
|
||||
|
||||
private:
|
||||
UnicodeString *strings;
|
||||
int32_t listMax;
|
||||
int32_t listSize;
|
||||
|
||||
#ifdef INSTRUMENT_STRING_LIST
|
||||
static int32_t _lists;
|
||||
static int32_t _strings;
|
||||
static int32_t _histogram[101];
|
||||
#endif
|
||||
};
|
||||
#endif /* U_HIDE_INTERNAL_API */
|
||||
|
||||
|
||||
/*
|
||||
* Forward references to internal classes.
|
||||
*/
|
||||
class StringToCEsMap;
|
||||
class CEToStringsMap;
|
||||
class CollDataCache;
|
||||
|
||||
#ifndef U_HIDE_INTERNAL_API
|
||||
/**
|
||||
* CollData
|
||||
*
|
||||
|
@ -276,10 +200,8 @@ class CollDataCache;
|
|||
* If you do not need to reuse any unreferenced objects in the cache, you can call
|
||||
* <code>CollData::flushCollDataCache</code>. If you no longer need any <code>CollData</code>
|
||||
* objects, you can call <code>CollData::freeCollDataCache</code>
|
||||
*
|
||||
* @internal ICU 4.0.1 technology preview
|
||||
*/
|
||||
class U_I18N_API CollData : public UObject
|
||||
class CollData
|
||||
{
|
||||
public:
|
||||
/**
|
||||
|
@ -287,32 +209,18 @@ public:
|
|||
*
|
||||
* @param collator - the collator
|
||||
* @param status - will be set if any errors occur.
|
||||
*
|
||||
* @return the <code>CollData</code> object. You must call
|
||||
* <code>close</code> when you are done using the object.
|
||||
*
|
||||
* Note: if on return, status is set to an error code,
|
||||
* the only safe thing to do with this object is to call
|
||||
* <code>CollData::close</code>.
|
||||
*
|
||||
* @internal ICU 4.0.1 technology preview
|
||||
*/
|
||||
static CollData *open(UCollator *collator, UErrorCode &status);
|
||||
CollData(UCollator *collator, UErrorCode &status);
|
||||
|
||||
/**
|
||||
* Release a <code>CollData</code> object.
|
||||
*
|
||||
* @param collData - the object
|
||||
*
|
||||
* @internal ICU 4.0.1 technology preview
|
||||
* The destructor.
|
||||
*/
|
||||
static void close(CollData *collData);
|
||||
~CollData();
|
||||
|
||||
/**
|
||||
* Get the <code>UCollator</code> object used to create this object.
|
||||
* The object returned may not be the exact object that was used to
|
||||
* create this object, but it will have the same behavior.
|
||||
* @internal ICU 4.0.1 technology preview
|
||||
*/
|
||||
UCollator *getCollator() const;
|
||||
|
||||
|
@ -325,8 +233,6 @@ public:
|
|||
* return a <code>StringList</code> object containing all
|
||||
* the stirngs, or <code>NULL</code> if there are
|
||||
* no such strings.
|
||||
*
|
||||
* @internal ICU 4.0.1 technology preview.
|
||||
*/
|
||||
const StringList *getStringList(int32_t ce) const;
|
||||
|
||||
|
@ -338,8 +244,6 @@ public:
|
|||
* @return a <code>CEList</code> object containt the CEs. You
|
||||
* must call <code>freeCEList</code> when you are finished
|
||||
* using the <code>CEList</code>/
|
||||
*
|
||||
* @internal ICU 4.0.1 technology preview.
|
||||
*/
|
||||
const CEList *getCEList(const UnicodeString *string) const;
|
||||
|
||||
|
@ -347,8 +251,6 @@ public:
|
|||
* Release a <code>CEList</code> returned by <code>getCEList</code>.
|
||||
*
|
||||
* @param list - the <code>CEList</code> to free.
|
||||
*
|
||||
* @internal ICU 4.0.1 technology preview
|
||||
*/
|
||||
void freeCEList(const CEList *list);
|
||||
|
||||
|
@ -360,8 +262,6 @@ public:
|
|||
* @param offset - the offset of the first CE in the list to use.
|
||||
*
|
||||
* @return the length of the shortest string.
|
||||
*
|
||||
* @internal ICU 4.0.1 technology preview
|
||||
*/
|
||||
int32_t minLengthInChars(const CEList *ces, int32_t offset) const;
|
||||
|
||||
|
@ -382,75 +282,18 @@ public:
|
|||
* the number of cEs in the <code>CEList</code>
|
||||
*
|
||||
* @return the length of the shortest string.
|
||||
*
|
||||
* @internal ICU 4.0.1 technology preview
|
||||
*/
|
||||
int32_t minLengthInChars(const CEList *ces, int32_t offset, int32_t *history) const;
|
||||
|
||||
/**
|
||||
* UObject glue...
|
||||
* @internal ICU 4.0.1 technology preview
|
||||
*/
|
||||
virtual UClassID getDynamicClassID() const;
|
||||
/**
|
||||
* UObject glue...
|
||||
* @internal ICU 4.0.1 technology preview
|
||||
*/
|
||||
static UClassID getStaticClassID();
|
||||
|
||||
/**
|
||||
* <code>CollData</code> objects are expensive to compute, and so
|
||||
* may be cached. This routine will free the cached objects and delete
|
||||
* the cache.
|
||||
*
|
||||
* WARNING: Don't call this until you are have called <code>close</code>
|
||||
* for each <code>CollData</code> object that you have used. also,
|
||||
* DO NOT call this if another thread may be calling <code>flushCollDataCache</code>
|
||||
* at the same time.
|
||||
*
|
||||
* @internal 4.0.1 technology preview
|
||||
*/
|
||||
static void freeCollDataCache();
|
||||
|
||||
/**
|
||||
* <code>CollData</code> objects are expensive to compute, and so
|
||||
* may be cached. This routine will remove any unused <code>CollData</code>
|
||||
* objects from the cache.
|
||||
*
|
||||
* @internal 4.0.1 technology preview
|
||||
*/
|
||||
static void flushCollDataCache();
|
||||
|
||||
private:
|
||||
friend class CollDataCache;
|
||||
friend class CollDataCacheEntry;
|
||||
|
||||
CollData(UCollator *collator, char *cacheKey, int32_t cachekeyLength, UErrorCode &status);
|
||||
~CollData();
|
||||
|
||||
CollData();
|
||||
|
||||
static char *getCollatorKey(UCollator *collator, char *buffer, int32_t bufferLength);
|
||||
|
||||
static CollDataCache *getCollDataCache();
|
||||
|
||||
UCollator *coll;
|
||||
StringToCEsMap *charsToCEList;
|
||||
CEToStringsMap *ceToCharsStartingWith;
|
||||
|
||||
char keyBuffer[KEY_BUFFER_SIZE];
|
||||
char *key;
|
||||
|
||||
static CollDataCache *collDataCache;
|
||||
|
||||
uint32_t minHan;
|
||||
uint32_t maxHan;
|
||||
|
||||
uint32_t jamoLimits[4];
|
||||
};
|
||||
#endif /* U_HIDE_INTERNAL_API */
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif // #if !UCONFIG_NO_COLLATION
|
||||
#endif // #ifndef COLL_DATA_H
|
|
@ -1,4 +1,4 @@
|
|||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<ItemGroup Label="ProjectConfigurations">
|
||||
<ProjectConfiguration Include="Debug|Win32">
|
||||
|
@ -224,6 +224,7 @@
|
|||
</ItemDefinitionGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="bytestrietest.cpp" />
|
||||
<ClCompile Include="colldata.cpp" />
|
||||
<ClCompile Include="ucharstrietest.cpp" />
|
||||
<ClCompile Include="itrbbi.cpp" />
|
||||
<ClCompile Include="rbbiapts.cpp" />
|
||||
|
@ -392,6 +393,7 @@
|
|||
<ClCompile Include="listformattertest.cpp" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClInclude Include="colldata.h" />
|
||||
<ClInclude Include="itrbbi.h" />
|
||||
<ClInclude Include="rbbiapts.h" />
|
||||
<ClInclude Include="rbbitst.h" />
|
||||
|
@ -537,4 +539,4 @@
|
|||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
</Project>
|
|
@ -450,6 +450,10 @@
|
|||
<ClCompile Include="alphaindextst.cpp">
|
||||
<Filter>collation</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="listformattertest.cpp" />
|
||||
<ClCompile Include="colldata.cpp">
|
||||
<Filter>collation</Filter>
|
||||
</ClCompile>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClInclude Include="itrbbi.h">
|
||||
|
@ -818,5 +822,9 @@
|
|||
<ClInclude Include="alphaindextst.h">
|
||||
<Filter>collation</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="listformattertest.h" />
|
||||
<ClInclude Include="colldata.h">
|
||||
<Filter>collation</Filter>
|
||||
</ClInclude>
|
||||
</ItemGroup>
|
||||
</Project>
|
||||
</Project>
|
File diff suppressed because it is too large
Load diff
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
**********************************************************************
|
||||
* Copyright (C) 2005-2009, International Business Machines
|
||||
* Copyright (C) 2005-2012, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
*/
|
||||
|
@ -11,7 +11,6 @@
|
|||
#include "unicode/utypes.h"
|
||||
#include "unicode/unistr.h"
|
||||
#include "unicode/ucol.h"
|
||||
#include "unicode/bmsearch.h"
|
||||
|
||||
#include "intltest.h"
|
||||
|
||||
|
@ -34,30 +33,17 @@ public:
|
|||
virtual void searchTest();
|
||||
virtual void offsetTest();
|
||||
virtual void monkeyTest(char *params);
|
||||
|
||||
virtual void bmMonkeyTest(char *params);
|
||||
virtual void boyerMooreTest();
|
||||
virtual void sharpSTest();
|
||||
virtual void goodSuffixTest();
|
||||
virtual void searchTime();
|
||||
|
||||
virtual void bmsTest();
|
||||
virtual void bmSearchTest();
|
||||
|
||||
virtual void udhrTest();
|
||||
|
||||
virtual void stringListTest();
|
||||
private:
|
||||
virtual const char *getPath(char buffer[2048], const char *filename);
|
||||
virtual int32_t monkeyTestCase(UCollator *coll, const UnicodeString &testCase, const UnicodeString &pattern, const UnicodeString &altPattern,
|
||||
const char *name, const char *strength, uint32_t seed);
|
||||
|
||||
virtual int32_t bmMonkeyTestCase(UCollator *coll, const UnicodeString &testCase, const UnicodeString &pattern, const UnicodeString &altPattern,
|
||||
BoyerMooreSearch *bms, BoyerMooreSearch *abms,
|
||||
const char *name, const char *strength, uint32_t seed);
|
||||
#endif
|
||||
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/********************************************************************
|
||||
* COPYRIGHT:
|
||||
* Copyright (C) 2008-2009 IBM, Inc. All Rights Reserved.
|
||||
* Copyright (C) 2008-2012 IBM, Inc. All Rights Reserved.
|
||||
*
|
||||
********************************************************************/
|
||||
/**
|
||||
|
@ -14,13 +14,7 @@
|
|||
StringSearchPerformanceTest::StringSearchPerformanceTest(int32_t argc, const char *argv[], UErrorCode &status)
|
||||
:UPerfTest(argc,argv,status){
|
||||
int32_t start, end;
|
||||
|
||||
#ifdef TEST_BOYER_MOORE_SEARCH
|
||||
bms = NULL;
|
||||
#else
|
||||
srch = NULL;
|
||||
#endif
|
||||
|
||||
pttrn = NULL;
|
||||
if(status== U_ILLEGAL_ARGUMENT_ERROR || line_mode){
|
||||
fprintf(stderr,gUsageString, "strsrchperf");
|
||||
|
@ -65,17 +59,8 @@ StringSearchPerformanceTest::StringSearchPerformanceTest(int32_t argc, const cha
|
|||
pttrn = temp; /* store word in pttrn */
|
||||
#endif
|
||||
|
||||
#ifdef TEST_BOYER_MOORE_SEARCH
|
||||
UnicodeString patternString(pttrn, pttrnLen);
|
||||
UCollator *coll = ucol_open(locale, &status);
|
||||
CollData *data = CollData::open(coll, status);
|
||||
|
||||
targetString = new UnicodeString(src, srcLen);
|
||||
bms = new BoyerMooreSearch(data, patternString, targetString, status);
|
||||
#else
|
||||
/* Create the StringSearch object to be use in performance test. */
|
||||
srch = usearch_open(pttrn, pttrnLen, src, srcLen, locale, NULL, &status);
|
||||
#endif
|
||||
|
||||
if(U_FAILURE(status)){
|
||||
fprintf(stderr, "FAILED to create UPerfTest object. Error: %s\n", u_errorName(status));
|
||||
|
@ -85,23 +70,12 @@ StringSearchPerformanceTest::StringSearchPerformanceTest(int32_t argc, const cha
|
|||
}
|
||||
|
||||
StringSearchPerformanceTest::~StringSearchPerformanceTest() {
|
||||
CollData *data = bms->getData();
|
||||
UCollator *coll = data->getCollator();
|
||||
|
||||
delete bms;
|
||||
delete targetString;
|
||||
CollData::close(data);
|
||||
ucol_close(coll);
|
||||
|
||||
if (pttrn != NULL) {
|
||||
free(pttrn);
|
||||
}
|
||||
|
||||
#ifndef TEST_BOYER_MOORE_SEARCH
|
||||
if (srch != NULL) {
|
||||
usearch_close(srch);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
UPerfFunction* StringSearchPerformanceTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char *par) {
|
||||
|
@ -117,20 +91,12 @@ UPerfFunction* StringSearchPerformanceTest::runIndexedTest(int32_t index, UBool
|
|||
}
|
||||
|
||||
UPerfFunction* StringSearchPerformanceTest::Test_ICU_Forward_Search(){
|
||||
#ifdef TEST_BOYER_MOORE_SEARCH
|
||||
StringSearchPerfFunction *func = new StringSearchPerfFunction(ICUForwardSearch, bms, src, srcLen, pttrn, pttrnLen);
|
||||
#else
|
||||
StringSearchPerfFunction* func = new StringSearchPerfFunction(ICUForwardSearch, srch, src, srcLen, pttrn, pttrnLen);
|
||||
#endif
|
||||
return func;
|
||||
}
|
||||
|
||||
UPerfFunction* StringSearchPerformanceTest::Test_ICU_Backward_Search(){
|
||||
#ifdef TEST_BOYER_MOORE_SEARCH
|
||||
StringSearchPerfFunction *func = new StringSearchPerfFunction(ICUBackwardSearch, bms, src, srcLen, pttrn, pttrnLen);
|
||||
#else
|
||||
StringSearchPerfFunction* func = new StringSearchPerfFunction(ICUBackwardSearch, srch, src, srcLen, pttrn, pttrnLen);
|
||||
#endif
|
||||
return func;
|
||||
}
|
||||
|
||||
|
|
|
@ -1,26 +1,17 @@
|
|||
/********************************************************************
|
||||
* COPYRIGHT:
|
||||
* Copyright (C) 2008-2009 IBM, Inc. All Rights Reserved.
|
||||
* Copyright (C) 2008-2012 IBM, Inc. All Rights Reserved.
|
||||
*
|
||||
********************************************************************/
|
||||
#ifndef _STRSRCHPERF_H
|
||||
#define _STRSRCHPERF_H
|
||||
|
||||
#include "unicode/ubrk.h"
|
||||
#include "unicode/usearch.h"
|
||||
#include "unicode/colldata.h"
|
||||
#include "unicode/bmsearch.h"
|
||||
#include "unicode/uperf.h"
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#define TEST_BOYER_MOORE_SEARCH
|
||||
|
||||
#ifdef TEST_BOYER_MOORE_SEARCH
|
||||
typedef void (*StrSrchFn) (BoyerMooreSearch * bms, const UChar *src, int32_t srcLen, const UChar *pttrn, int32_t pttrnLen, UErrorCode *status);
|
||||
#else
|
||||
typedef void (*StrSrchFn)(UStringSearch* srch, const UChar* src,int32_t srcLen, const UChar* pttrn, int32_t pttrnLen, UErrorCode* status);
|
||||
#endif
|
||||
|
||||
class StringSearchPerfFunction : public UPerfFunction {
|
||||
private:
|
||||
|
@ -29,39 +20,17 @@ private:
|
|||
int32_t srcLen;
|
||||
const UChar* pttrn;
|
||||
int32_t pttrnLen;
|
||||
#ifdef TEST_BOYER_MOORE_SEARCH
|
||||
BoyerMooreSearch *bms;
|
||||
#else
|
||||
UStringSearch* srch;
|
||||
#endif
|
||||
|
||||
public:
|
||||
virtual void call(UErrorCode* status) {
|
||||
#ifdef TEST_BOYER_MOORE_SEARCH
|
||||
(*fn)(bms, src, srcLen, pttrn, pttrnLen, status);
|
||||
#else
|
||||
(*fn)(srch, src, srcLen, pttrn, pttrnLen, status);
|
||||
#endif
|
||||
}
|
||||
|
||||
virtual long getOperationsPerIteration() {
|
||||
#if 0
|
||||
return (long)(srcLen/pttrnLen);
|
||||
#else
|
||||
return (long) srcLen;
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef TEST_BOYER_MOORE_SEARCH
|
||||
StringSearchPerfFunction(StrSrchFn func, BoyerMooreSearch *search, const UChar *source, int32_t sourceLen, const UChar *pattern, int32_t patternLen) {
|
||||
fn = func;
|
||||
src = source;
|
||||
srcLen = sourceLen;
|
||||
pttrn = pattern;
|
||||
pttrnLen = patternLen;
|
||||
bms = search;
|
||||
}
|
||||
#else
|
||||
StringSearchPerfFunction(StrSrchFn func, UStringSearch* search, const UChar* source,int32_t sourceLen, const UChar* pattern, int32_t patternLen) {
|
||||
fn = func;
|
||||
src = source;
|
||||
|
@ -70,7 +39,6 @@ public:
|
|||
pttrnLen = patternLen;
|
||||
srch = search;
|
||||
}
|
||||
#endif
|
||||
};
|
||||
|
||||
class StringSearchPerformanceTest : public UPerfTest {
|
||||
|
@ -79,42 +47,17 @@ private:
|
|||
int32_t srcLen;
|
||||
UChar* pttrn;
|
||||
int32_t pttrnLen;
|
||||
#ifdef TEST_BOYER_MOORE_SEARCH
|
||||
UnicodeString *targetString;
|
||||
BoyerMooreSearch *bms;
|
||||
#else
|
||||
UStringSearch* srch;
|
||||
#endif
|
||||
|
||||
public:
|
||||
StringSearchPerformanceTest(int32_t argc, const char *argv[], UErrorCode &status);
|
||||
~StringSearchPerformanceTest();
|
||||
virtual UPerfFunction* runIndexedTest(int32_t index, UBool exec, const char *&name, char *par = NULL);
|
||||
|
||||
UPerfFunction* Test_ICU_Forward_Search();
|
||||
|
||||
UPerfFunction* Test_ICU_Backward_Search();
|
||||
};
|
||||
|
||||
|
||||
#ifdef TEST_BOYER_MOORE_SEARCH
|
||||
void ICUForwardSearch(BoyerMooreSearch *bms, const UChar *source, int32_t sourceLen, const UChar *pattern, int32_t patternLen, UErrorCode * /*status*/) {
|
||||
int32_t offset = 0, start = -1, end = -1;
|
||||
|
||||
while (bms->search(offset, start, end)) {
|
||||
offset = end;
|
||||
}
|
||||
}
|
||||
|
||||
void ICUBackwardSearch(BoyerMooreSearch *bms, const UChar *source, int32_t sourceLen, const UChar *pattern, int32_t patternLen, UErrorCode * /*status*/) {
|
||||
int32_t offset = 0, start = -1, end = -1;
|
||||
|
||||
/* NOTE: No Boyer-Moore backward search yet... */
|
||||
while (bms->search(offset, start, end)) {
|
||||
offset = end;
|
||||
}
|
||||
}
|
||||
#else
|
||||
void ICUForwardSearch(UStringSearch *srch, const UChar* source, int32_t sourceLen, const UChar* pattern, int32_t patternLen, UErrorCode* status) {
|
||||
int32_t match;
|
||||
|
||||
|
@ -132,6 +75,5 @@ void ICUBackwardSearch(UStringSearch *srch, const UChar* source, int32_t sourceL
|
|||
match = usearch_previous(srch, status);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _STRSRCHPERF_H */
|
||||
|
|
Loading…
Add table
Reference in a new issue