diff --git a/icu4c/source/allinone/allinone.sln b/icu4c/source/allinone/allinone.sln index 0101caea0ab..191ed18c1d5 100644 --- a/icu4c/source/allinone/allinone.sln +++ b/icu4c/source/allinone/allinone.sln @@ -1,5 +1,5 @@ Microsoft Visual Studio Solution File, Format Version 11.00 -# Visual Studio 2010 +# Visual C++ Express 2010 Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "cal", "..\samples\cal\cal.vcxproj", "{F7659D77-09CF-4FE9-ACEE-927287AA9509}" EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "cintltst", "..\test\cintltst\cintltst.vcxproj", "{3D1246AE-1B32-479B-BECA-AEFA97BE2321}" @@ -52,7 +52,7 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "iotest", "..\test\iotest\io EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "icupkg", "..\tools\icupkg\icupkg.vcxproj", "{62D4B15D-7A90-4ECB-BA19-5E021D6A21BC}" EndProject -Project("{9D4211F7-2C77-439C-82F0-30A4E43BA569}") = "gendict", "..\tools\gendict\gendict.vcxproj", "{9D4211F7-2C77-439C-82F0-30A4E43BA569}" +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "gendict", "..\tools\gendict\gendict.vcxproj", "{9D4211F7-2C77-439C-82F0-30A4E43BA569}" EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "letest", "..\test\letest\letest.vcxproj", "{67351485-4D18-4245-BE39-A7EF0675ACD2}" EndProject @@ -65,10 +65,6 @@ EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "testplug", "..\tools\icuinfo\testplug.vcxproj", "{659D0C08-D4ED-4BF3-B02B-2D8D4B5A7A7A}" EndProject Global - GlobalSection(SubversionScc) = preSolution - Svn-Managed = True - Manager = AnkhSVN - Subversion Support for Visual Studio - EndGlobalSection GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Win32 = Debug|Win32 Debug|x64 = Debug|x64 @@ -174,12 +170,10 @@ Global {77C78066-746F-4EA6-B3FE-B8C8A4A97891}.Release|x64.Build.0 = Release|x64 {0178B127-6269-407D-B112-93877BB62776}.Debug|Win32.ActiveCfg = Debug|Win32 {0178B127-6269-407D-B112-93877BB62776}.Debug|Win32.Build.0 = Debug|Win32 - {0178B127-6269-407D-B112-93877BB62776}.Debug|x64.ActiveCfg = Debug|x64 - {0178B127-6269-407D-B112-93877BB62776}.Debug|x64.Build.0 = Debug|x64 + {0178B127-6269-407D-B112-93877BB62776}.Debug|x64.ActiveCfg = Debug|Win32 {0178B127-6269-407D-B112-93877BB62776}.Release|Win32.ActiveCfg = Release|Win32 {0178B127-6269-407D-B112-93877BB62776}.Release|Win32.Build.0 = Release|Win32 - {0178B127-6269-407D-B112-93877BB62776}.Release|x64.ActiveCfg = Release|x64 - {0178B127-6269-407D-B112-93877BB62776}.Release|x64.Build.0 = Release|x64 + {0178B127-6269-407D-B112-93877BB62776}.Release|x64.ActiveCfg = Release|Win32 {73632960-B3A6-464D-83A3-4B43365F19B8}.Debug|Win32.ActiveCfg = Debug|Win32 {73632960-B3A6-464D-83A3-4B43365F19B8}.Debug|Win32.Build.0 = Debug|Win32 {73632960-B3A6-464D-83A3-4B43365F19B8}.Debug|x64.ActiveCfg = Debug|x64 @@ -336,4 +330,8 @@ Global GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE EndGlobalSection + GlobalSection(SubversionScc) = preSolution + Svn-Managed = True + Manager = AnkhSVN - Subversion Support for Visual Studio + EndGlobalSection EndGlobal diff --git a/icu4c/source/i18n/Makefile.in b/icu4c/source/i18n/Makefile.in index e2132cc7ad9..a8cfef606a7 100644 --- a/icu4c/source/i18n/Makefile.in +++ b/icu4c/source/i18n/Makefile.in @@ -82,7 +82,7 @@ ulocdata.o measfmt.o currfmt.o curramt.o currunit.o measure.o utmscale.o \ csdetect.o csmatch.o csr2022.o csrecog.o csrmbcs.o csrsbcs.o csrucode.o csrutf8.o inputext.o \ wintzimpl.o windtfmt.o winnmfmt.o basictz.o dtrule.o rbtz.o tzrule.o tztrans.o vtzone.o zonemeta.o \ upluralrules.o plurrule.o plurfmt.o selfmt.o dtitvfmt.o dtitvinf.o udateintervalformat.o \ -tmunit.o tmutamt.o tmutfmt.o colldata.o bmsearch.o bms.o currpinf.o \ +tmunit.o tmutamt.o tmutfmt.o currpinf.o \ uspoof.o uspoof_impl.o uspoof_build.o uspoof_conf.o uspoof_wsconf.o decfmtst.o smpdtfst.o \ ztrans.o zrule.o vzone.o fphdlimp.o fpositer.o locdspnm.o \ decNumber.o decContext.o alphaindex.o tznames.o tznames_impl.o tzgnames.o \ diff --git a/icu4c/source/i18n/bms.cpp b/icu4c/source/i18n/bms.cpp deleted file mode 100644 index 0a1de08b6fc..00000000000 --- a/icu4c/source/i18n/bms.cpp +++ /dev/null @@ -1,155 +0,0 @@ -/* - * Copyright (C) 2008-2011, International Business Machines Corporation and Others. - * All rights reserved. - */ - -#include "unicode/utypes.h" -#include "cmemory.h" -#include "unicode/bms.h" -#include "unicode/unistr.h" -#include "unicode/colldata.h" -#include "unicode/bmsearch.h" - - -#if !UCONFIG_NO_COLLATION && !UCONFIG_NO_BREAK_ITERATION - - -//#define USE_SAFE_CASTS -#ifdef USE_SAFE_CASTS -#define STATIC_CAST(type,value) static_cast(value) -#define CONST_CAST(type,value) const_cast(value) -#else -#define STATIC_CAST(type,value) (type) (value) -#define CONST_CAST(type,value) (type) (value) -#endif - -U_NAMESPACE_USE - -U_CAPI UCD * U_EXPORT2 -ucd_open(UCollator *coll, UErrorCode *status) -{ - return STATIC_CAST(UCD *, CollData::open(coll, *status)); -} - -U_CAPI void U_EXPORT2 -ucd_close(UCD *ucd) -{ - if (ucd != NULL) { - CollData *data = STATIC_CAST(CollData *, ucd); - - CollData::close(data); - } -} - -U_CAPI UCollator * U_EXPORT2 -ucd_getCollator(UCD *ucd) -{ - CollData *data = STATIC_CAST(CollData *, ucd); - - return data->getCollator(); -} - -U_CAPI void U_EXPORT2 -ucd_freeCache() -{ - CollData::freeCollDataCache(); -} - -U_CAPI void U_EXPORT2 -ucd_flushCache() -{ - CollData::flushCollDataCache(); -} - -struct BMS -{ - BoyerMooreSearch *bms; - const UnicodeString *targetString; -}; - -U_CAPI BMS * U_EXPORT2 -bms_open(UCD *ucd, - const UChar *pattern, int32_t patternLength, - const UChar *target, int32_t targetLength, - UErrorCode *status) -{ - BMS *bms = STATIC_CAST(BMS *, uprv_malloc(sizeof(BMS))); - - if (bms == NULL) { - *status = U_MEMORY_ALLOCATION_ERROR; - return NULL; - } - - CollData *data = (CollData *) ucd; - UnicodeString patternString(pattern, patternLength); - - if (target != NULL) { - bms->targetString = new UnicodeString(target, targetLength); - - if (bms->targetString == NULL) { - bms->bms = NULL; - *status = U_MEMORY_ALLOCATION_ERROR; - return bms; - } - } else { - bms->targetString = NULL; - } - - bms->bms = new BoyerMooreSearch(data, patternString, bms->targetString, *status); - - if (bms->bms == NULL) { - *status = U_MEMORY_ALLOCATION_ERROR; - } - - return bms; -} - -U_CAPI void U_EXPORT2 -bms_close(BMS *bms) -{ - delete bms->bms; - - delete bms->targetString; - - uprv_free(bms); -} - -U_CAPI UBool U_EXPORT2 -bms_empty(BMS *bms) -{ - return bms->bms->empty(); -} - -U_CAPI UCD * U_EXPORT2 -bms_getData(BMS *bms) -{ - return STATIC_CAST(UCD *, bms->bms->getData()); -} - -U_CAPI UBool U_EXPORT2 -bms_search(BMS *bms, int32_t offset, int32_t *start, int32_t *end) -{ - return bms->bms->search(offset, *start, *end); -} - -U_CAPI void U_EXPORT2 -bms_setTargetString(BMS *bms, const UChar *target, int32_t targetLength, UErrorCode *status) -{ - if (U_FAILURE(*status)) { - return; - } - - if (bms->targetString != NULL) { - delete bms->targetString; - } - - if (target != NULL) { - bms->targetString = new UnicodeString(target, targetLength); - } else { - bms->targetString = NULL; - } - - bms->bms->setTargetString(bms->targetString, *status); -} - -#endif diff --git a/icu4c/source/i18n/bmsearch.cpp b/icu4c/source/i18n/bmsearch.cpp deleted file mode 100644 index e6d81594fa8..00000000000 --- a/icu4c/source/i18n/bmsearch.cpp +++ /dev/null @@ -1,827 +0,0 @@ -/* - ****************************************************************************** - * Copyright (C) 1996-2012, International Business Machines * - * Corporation and others. All Rights Reserved. * - ****************************************************************************** - */ - -#include "unicode/utypes.h" - -#if !UCONFIG_NO_COLLATION && !UCONFIG_NO_BREAK_ITERATION - -#include "unicode/unistr.h" -#include "unicode/putil.h" -#include "unicode/usearch.h" - -#include "cmemory.h" -#include "unicode/coll.h" -#include "unicode/tblcoll.h" -#include "unicode/coleitr.h" -#include "unicode/ucoleitr.h" - -#include "unicode/regex.h" // TODO: make conditional on regexp being built. - -#include "unicode/uniset.h" -#include "unicode/uset.h" -#include "unicode/ustring.h" -#include "hash.h" -#include "uhash.h" -#include "ucol_imp.h" -#include "normalizer2impl.h" - -#include "unicode/colldata.h" -#include "unicode/bmsearch.h" - -U_NAMESPACE_BEGIN - -#define ARRAY_SIZE(array) (sizeof(array)/sizeof(array[0])) -#define NEW_ARRAY(type, count) (type *) uprv_malloc((count) * sizeof(type)) -#define DELETE_ARRAY(array) uprv_free((void *) (array)) - - -struct CEI -{ - uint32_t order; - int32_t lowOffset; - int32_t highOffset; -}; - -class Target : public UMemory -{ -public: - Target(UCollator *theCollator, const UnicodeString *target, int32_t patternLength, UErrorCode &status); - ~Target(); - - void setTargetString(const UnicodeString *target); - - const CEI *nextCE(int32_t offset); - const CEI *prevCE(int32_t offset); - - int32_t stringLength(); - UChar charAt(int32_t offset); - - UBool isBreakBoundary(int32_t offset); - int32_t nextBreakBoundary(int32_t offset); - int32_t nextSafeBoundary(int32_t offset); - - UBool isIdentical(UnicodeString &pattern, int32_t start, int32_t end); - - void setOffset(int32_t offset); - void setLast(int32_t last); - int32_t getOffset(); - -private: - CEI *ceb; - int32_t bufferSize; - int32_t bufferMin; - int32_t bufferMax; - - uint32_t strengthMask; - UCollationStrength strength; - uint32_t variableTop; - UBool toShift; - UCollator *coll; - const Normalizer2 &nfd; - - const UnicodeString *targetString; - const UChar *targetBuffer; - int32_t targetLength; - - UCollationElements *elements; - UBreakIterator *charBreakIterator; -}; - -Target::Target(UCollator *theCollator, const UnicodeString *target, int32_t patternLength, UErrorCode &status) - : bufferSize(0), bufferMin(0), bufferMax(0), - strengthMask(0), strength(UCOL_PRIMARY), variableTop(0), toShift(FALSE), coll(theCollator), - nfd(*Normalizer2Factory::getNFDInstance(status)), - targetString(NULL), targetBuffer(NULL), targetLength(0), elements(NULL), charBreakIterator(NULL) -{ - strength = ucol_getStrength(coll); - toShift = ucol_getAttribute(coll, UCOL_ALTERNATE_HANDLING, &status) == UCOL_SHIFTED; - variableTop = ucol_getVariableTop(coll, &status); - - // find the largest expansion - uint8_t maxExpansion = 0; - for (const uint8_t *expansion = coll->expansionCESize; *expansion != 0; expansion += 1) { - if (*expansion > maxExpansion) { - maxExpansion = *expansion; - } - } - - // room for an extra character on each end, plus 4 for safety - bufferSize = patternLength + (2 * maxExpansion) + 4; - - ceb = NEW_ARRAY(CEI, bufferSize); - - if (ceb == NULL) { - status = U_MEMORY_ALLOCATION_ERROR; - return; - } - - if (target != NULL) { - setTargetString(target); - } - - switch (strength) - { - default: - strengthMask |= UCOL_TERTIARYORDERMASK; - /* fall through */ - - case UCOL_SECONDARY: - strengthMask |= UCOL_SECONDARYORDERMASK; - /* fall through */ - - case UCOL_PRIMARY: - strengthMask |= UCOL_PRIMARYORDERMASK; - } -} - -Target::~Target() -{ - ubrk_close(charBreakIterator); - ucol_closeElements(elements); - - DELETE_ARRAY(ceb); -} - -void Target::setTargetString(const UnicodeString *target) -{ - if (charBreakIterator != NULL) { - ubrk_close(charBreakIterator); - ucol_closeElements(elements); - } - - targetString = target; - - if (targetString != NULL) { - UErrorCode status = U_ZERO_ERROR; - - targetBuffer = targetString->getBuffer(); - targetLength = targetString->length(); - - elements = ucol_openElements(coll, target->getBuffer(), target->length(), &status); - ucol_forceHanImplicit(elements, &status); - - charBreakIterator = ubrk_open(UBRK_CHARACTER, ucol_getLocaleByType(coll, ULOC_VALID_LOCALE, &status), - targetBuffer, targetLength, &status); - } else { - targetBuffer = NULL; - targetLength = 0; - } -} - -const CEI *Target::nextCE(int32_t offset) -{ - UErrorCode status = U_ZERO_ERROR; - int32_t low = -1, high = -1; - uint32_t order; - UBool cont = FALSE; - - if (offset >= bufferMin && offset < bufferMax) { - return &ceb[offset]; - } - - if (bufferMax >= bufferSize || offset != bufferMax) { - return NULL; - } - - do { - low = ucol_getOffset(elements); - order = ucol_next(elements, &status); - high = ucol_getOffset(elements); - - if (order == (uint32_t)UCOL_NULLORDER) { - //high = low = -1; - break; - } - - cont = isContinuation(order); - order &= strengthMask; - - if (toShift && variableTop > order && (order & UCOL_PRIMARYORDERMASK) != 0) { - if (strength >= UCOL_QUATERNARY) { - order &= UCOL_PRIMARYORDERMASK; - } else { - order = UCOL_IGNORABLE; - } - } - } while (order == UCOL_IGNORABLE); - - if (cont) { - order |= UCOL_CONTINUATION_MARKER; - } - - ceb[offset].order = order; - ceb[offset].lowOffset = low; - ceb[offset].highOffset = high; - - bufferMax += 1; - - return &ceb[offset]; -} - -const CEI *Target::prevCE(int32_t offset) -{ - UErrorCode status = U_ZERO_ERROR; - int32_t low = -1, high = -1; - uint32_t order; - UBool cont = FALSE; - - if (offset >= bufferMin && offset < bufferMax) { - return &ceb[offset]; - } - - if (bufferMax >= bufferSize || offset != bufferMax) { - return NULL; - } - - do { - high = ucol_getOffset(elements); - order = ucol_previous(elements, &status); - low = ucol_getOffset(elements); - - if (order == (uint32_t)UCOL_NULLORDER) { - break; - } - - cont = isContinuation(order); - order &= strengthMask; - - if (toShift && variableTop > order && (order & UCOL_PRIMARYORDERMASK) != 0) { - if (strength >= UCOL_QUATERNARY) { - order &= UCOL_PRIMARYORDERMASK; - } else { - order = UCOL_IGNORABLE; - } - } - } while (order == UCOL_IGNORABLE); - - bufferMax += 1; - - if (cont) { - order |= UCOL_CONTINUATION_MARKER; - } - - ceb[offset].order = order; - ceb[offset].lowOffset = low; - ceb[offset].highOffset = high; - - return &ceb[offset]; -} - -int32_t Target::stringLength() -{ - if (targetString != NULL) { - return targetLength; - } - - return 0; -} - -UChar Target::charAt(int32_t offset) -{ - if (targetString != NULL) { - return targetBuffer[offset]; - } - - return 0x0000; -} - -void Target::setOffset(int32_t offset) -{ - UErrorCode status = U_ZERO_ERROR; - - bufferMin = 0; - bufferMax = 0; - - ucol_setOffset(elements, offset, &status); -} - -void Target::setLast(int32_t last) -{ - UErrorCode status = U_ZERO_ERROR; - - bufferMin = 0; - bufferMax = 1; - - ceb[0].order = (uint32_t)UCOL_NULLORDER; - ceb[0].lowOffset = last; - ceb[0].highOffset = last; - - ucol_setOffset(elements, last, &status); -} - -int32_t Target::getOffset() -{ - return ucol_getOffset(elements); -} - -UBool Target::isBreakBoundary(int32_t offset) -{ - return ubrk_isBoundary(charBreakIterator, offset); -} - -int32_t Target::nextBreakBoundary(int32_t offset) -{ - return ubrk_following(charBreakIterator, offset); -} - -int32_t Target::nextSafeBoundary(int32_t offset) -{ - while (offset < targetLength) { - //UChar ch = charAt(offset); - UChar ch = targetBuffer[offset]; - - if (U_IS_LEAD(ch) || ! ucol_unsafeCP(ch, coll)) { - return offset; - } - - offset += 1; - } - - return targetLength; -} - -UBool Target::isIdentical(UnicodeString &pattern, int32_t start, int32_t end) -{ - if (strength < UCOL_IDENTICAL) { - return TRUE; - } - - // Note: We could use Normalizer::compare() or similar, but for short strings - // which may not be in FCD it might be faster to just NFD them. - UErrorCode status = U_ZERO_ERROR; - UnicodeString t2, p2; - nfd.normalize(UnicodeString(FALSE, targetBuffer + start, end - start), t2, status); - nfd.normalize(pattern, p2, status); - // return FALSE if NFD failed - return U_SUCCESS(status) && t2 == p2; -} - -#define HASH_TABLE_SIZE 257 - -class BadCharacterTable : public UMemory -{ -public: - BadCharacterTable(CEList &patternCEs, CollData *data, UErrorCode &status); - ~BadCharacterTable(); - - int32_t operator[](uint32_t ce) const; - int32_t getMaxSkip() const; - int32_t minLengthInChars(int32_t index); - -private: - static int32_t hash(uint32_t ce); - - int32_t maxSkip; - int32_t badCharacterTable[HASH_TABLE_SIZE]; - - int32_t *minLengthCache; -}; - -BadCharacterTable::BadCharacterTable(CEList &patternCEs, CollData *data, UErrorCode &status) - : minLengthCache(NULL) -{ - int32_t plen = patternCEs.size(); - - // **** need a better way to deal with this **** - if (U_FAILURE(status) || plen == 0) { - return; - } - - int32_t *history = NEW_ARRAY(int32_t, plen); - - if (history == NULL) { - status = U_MEMORY_ALLOCATION_ERROR; - return; - } - - for (int32_t i = 0; i < plen; i += 1) { - history[i] = -1; - } - - minLengthCache = NEW_ARRAY(int32_t, plen + 1); - - if (minLengthCache == NULL) { - DELETE_ARRAY(history); - status = U_MEMORY_ALLOCATION_ERROR; - return; - } - - maxSkip = minLengthCache[0] = data->minLengthInChars(&patternCEs, 0, history); - - for(int32_t j = 0; j < HASH_TABLE_SIZE; j += 1) { - badCharacterTable[j] = maxSkip; - } - - for(int32_t p = 1; p < plen; p += 1) { - minLengthCache[p] = data->minLengthInChars(&patternCEs, p, history); - - // Make sure this entry is not bigger than the previous one. - // Otherwise, we might skip too far in some cases. - if (minLengthCache[p] < 0 || minLengthCache[p] > minLengthCache[p - 1]) { - minLengthCache[p] = minLengthCache[p - 1]; - } - } - - minLengthCache[plen] = 0; - - for(int32_t p = 0; p < plen - 1; p += 1) { - badCharacterTable[hash(patternCEs[p])] = minLengthCache[p + 1]; - } - - DELETE_ARRAY(history); -} - -BadCharacterTable::~BadCharacterTable() -{ - DELETE_ARRAY(minLengthCache); -} - -int32_t BadCharacterTable::operator[](uint32_t ce) const -{ - return badCharacterTable[hash(ce)]; -} - -int32_t BadCharacterTable::getMaxSkip() const -{ - return maxSkip; -} - -int32_t BadCharacterTable::minLengthInChars(int32_t index) -{ - return minLengthCache[index]; -} - -int32_t BadCharacterTable::hash(uint32_t ce) -{ - return UCOL_PRIMARYORDER(ce) % HASH_TABLE_SIZE; -} - -class GoodSuffixTable : public UMemory -{ -public: - GoodSuffixTable(CEList &patternCEs, BadCharacterTable &badCharacterTable, UErrorCode &status); - ~GoodSuffixTable(); - - int32_t operator[](int32_t offset) const; - -private: - int32_t *goodSuffixTable; -}; - -GoodSuffixTable::GoodSuffixTable(CEList &patternCEs, BadCharacterTable &badCharacterTable, UErrorCode &status) - : goodSuffixTable(NULL) -{ - int32_t patlen = patternCEs.size(); - - // **** need a better way to deal with this **** - if (U_FAILURE(status) || patlen <= 0) { - return; - } - - int32_t *suff = NEW_ARRAY(int32_t, patlen); - int32_t start = patlen - 1, end = - 1; - int32_t maxSkip = badCharacterTable.getMaxSkip(); - - if (suff == NULL) { - status = U_MEMORY_ALLOCATION_ERROR; - return; - } - - // initialze suff - suff[patlen - 1] = patlen; - - for (int32_t i = patlen - 2; i >= 0; i -= 1) { - // (i > start) means we're inside the last suffix match we found - // ((patlen - 1) - end) is how far the end of that match is from end of pattern - // (i - start) is how far we are from start of that match - // (i + (patlen - 1) - end) is index of same character at end of pattern - // so if any suffix match at that character doesn't extend beyond the last match, - // it's the suffix for this character as well - if (i > start && suff[i + patlen - 1 - end] < i - start) { - suff[i] = suff[i + patlen - 1 - end]; - } else { - start = end = i; - - int32_t s = patlen; - - while (start >= 0 && patternCEs[start] == patternCEs[--s]) { - start -= 1; - } - - suff[i] = end - start; - } - } - - // now build goodSuffixTable - goodSuffixTable = NEW_ARRAY(int32_t, patlen); - - if (goodSuffixTable == NULL) { - DELETE_ARRAY(suff); - status = U_MEMORY_ALLOCATION_ERROR; - return; - } - - - // initialize entries to minLengthInChars of the pattern - for (int32_t i = 0; i < patlen; i += 1) { - goodSuffixTable[i] = maxSkip; - } - - int32_t prefix = 0; - - for (int32_t i = patlen - /*1*/ 2; i >= 0; i -= 1) { - if (suff[i] == i + 1) { - // this matching suffix is a prefix of the pattern - int32_t prefixSkip = badCharacterTable.minLengthInChars(i + 1); - - // for any mis-match before this suffix, we should skip - // so that the front of the pattern (i.e. the prefix) - // lines up with the front of the suffix. - // (patlen - 1 - i) is the start of the suffix - while (prefix < patlen - 1 - i) { - // value of maxSkip means never set... - if (goodSuffixTable[prefix] == maxSkip) { - goodSuffixTable[prefix] = prefixSkip; - } - - prefix += 1; - } - } - } - - for (int32_t i = 0; i < patlen - 1; i += 1) { - goodSuffixTable[patlen - 1 - suff[i]] = badCharacterTable.minLengthInChars(i + 1); - } - - DELETE_ARRAY(suff); -} - -GoodSuffixTable::~GoodSuffixTable() -{ - DELETE_ARRAY(goodSuffixTable); -} - -int32_t GoodSuffixTable::operator[](int32_t offset) const -{ - return goodSuffixTable[offset]; -} - -UOBJECT_DEFINE_RTTI_IMPLEMENTATION(BoyerMooreSearch) - - -UBool BoyerMooreSearch::empty() -{ - return patCEs->size() <= 0; -} - -CollData *BoyerMooreSearch::getData() -{ - return data; -} - -CEList *BoyerMooreSearch::getPatternCEs() -{ - return patCEs; -} - -BadCharacterTable *BoyerMooreSearch::getBadCharacterTable() -{ - return badCharacterTable; -} - -GoodSuffixTable *BoyerMooreSearch::getGoodSuffixTable() -{ - return goodSuffixTable; -} - -BoyerMooreSearch::BoyerMooreSearch(CollData *theData, const UnicodeString &patternString, const UnicodeString *targetString, - UErrorCode &status) - : data(theData), patCEs(NULL), badCharacterTable(NULL), goodSuffixTable(NULL), pattern(patternString), target(NULL) -{ - - if (U_FAILURE(status)) { - return; - } - - UCollator *collator = data->getCollator(); - - patCEs = new CEList(collator, patternString, status); - - if (patCEs == NULL || U_FAILURE(status)) { - return; - } - - badCharacterTable = new BadCharacterTable(*patCEs, data, status); - - if (badCharacterTable == NULL || U_FAILURE(status)) { - return; - } - - goodSuffixTable = new GoodSuffixTable(*patCEs, *badCharacterTable, status); - - if (targetString != NULL) { - target = new Target(collator, targetString, patCEs->size(), status); - } -} - -BoyerMooreSearch::~BoyerMooreSearch() -{ - delete target; - delete goodSuffixTable; - delete badCharacterTable; - delete patCEs; -} - -void BoyerMooreSearch::setTargetString(const UnicodeString *targetString, UErrorCode &status) -{ - if (U_FAILURE(status)) { - return; - } - - if (target == NULL) { - target = new Target(data->getCollator(), targetString, patCEs->size(), status); - } else { - target->setTargetString(targetString); - } -} - -// **** main flow of this code from Laura Werner's "Unicode Text Searching in Java" paper. **** -/* - * TODO: - * * deal with trailing (and leading?) ignorables. - * * Adding BoyerMooreSearch object slowed it down. How can we speed it up? - */ -UBool BoyerMooreSearch::search(int32_t offset, int32_t &start, int32_t &end) -{ - /*UCollator *coll =*/ data->getCollator(); - int32_t plen = patCEs->size(); - int32_t tlen = target->stringLength(); - int32_t maxSkip = badCharacterTable->getMaxSkip(); - int32_t tOffset = offset + maxSkip; - - if (plen <= 0) { - // Searching for a zero length pattern always fails. - start = end = -1; - return FALSE; - } - - while (tOffset <= tlen) { - int32_t pIndex = plen - 1; - int32_t tIndex = 0; - int32_t lIndex = 0; - - if (tOffset < tlen) { - // **** we really want to skip ahead enough to **** - // **** be sure we get at least 1 non-ignorable **** - // **** CE after the end of the pattern. **** - int32_t next = target->nextSafeBoundary(tOffset + 1); - - target->setOffset(next); - - for (lIndex = 0; ; lIndex += 1) { - const CEI *cei = target->prevCE(lIndex); - int32_t low = cei->lowOffset; - int32_t high = cei->highOffset; - - if (high == 0 || (low < high && low <= tOffset)) { - if (low < tOffset) { - while (lIndex >= 0 && target->prevCE(lIndex)->highOffset == high) { - lIndex -= 1; - } - - if (high > tOffset) { - tOffset = high; - } - } - - break; - } - } - } else { - target->setLast(tOffset); - lIndex = 0; - } - - tIndex = ++lIndex; - - // Iterate backward until we hit the beginning of the pattern - while (pIndex >= 0) { - uint32_t pce = (*patCEs)[pIndex]; - const CEI *tcei = target->prevCE(tIndex++); - - - if (tcei->order != pce) { - // There is a mismatch at this position. Decide how far - // over to shift the pattern, then try again. - - int32_t gsOffset = tOffset + (*goodSuffixTable)[pIndex]; -#ifdef EXTRA_CAUTIOUS - int32_t old = tOffset; -#endif - - tOffset += (*badCharacterTable)[tcei->order] - badCharacterTable->minLengthInChars(pIndex + 1); - - if (gsOffset > tOffset) { - tOffset = gsOffset; - } - -#ifdef EXTRA_CAUTIOUS - // Make sure we don't skip backwards... - if (tOffset <= old) { - tOffset = old + 1; - } -#endif - - break; - } - - pIndex -= 1; - } - - if (pIndex < 0) { - // We made it back to the beginning of the pattern, - // which means we matched it all. Return the location. - const CEI firstCEI = *target->prevCE(tIndex - 1); - const CEI lastCEI = *target->prevCE(lIndex); - int32_t mStart = firstCEI.lowOffset; - int32_t minLimit = lastCEI.lowOffset; - int32_t maxLimit = lastCEI.highOffset; - int32_t mLimit; - UBool found = TRUE; - - target->setOffset(/*tOffset*/maxLimit); - - const CEI nextCEI = *target->nextCE(0); - - if (nextCEI.lowOffset > maxLimit) { - maxLimit = nextCEI.lowOffset; - } - - if (nextCEI.lowOffset == nextCEI.highOffset && nextCEI.order != (uint32_t)UCOL_NULLORDER) { - found = FALSE; - } - - if (! target->isBreakBoundary(mStart)) { - found = FALSE; - } - - if (firstCEI.lowOffset == firstCEI.highOffset) { - found = FALSE; - } - - mLimit = maxLimit; - if (minLimit < maxLimit) { - // When the last CE's low index is same with its high index, the CE is likely - // a part of expansion. In this case, the index is located just after the - // character corresponding to the CEs compared above. If the index is right - // at the break boundary, move the position to the next boundary will result - // incorrect match length when there are ignorable characters exist between - // the position and the next character produces CE(s). See ticket#8482. - if (minLimit == lastCEI.highOffset && target->isBreakBoundary(minLimit)) { - mLimit = minLimit; - } else { - int32_t nbb = target->nextBreakBoundary(minLimit); - - if (nbb >= lastCEI.highOffset) { - mLimit = nbb; - } - } - } - - if (mLimit > maxLimit) { - found = FALSE; - } - - if (! target->isBreakBoundary(mLimit)) { - found = FALSE; - } - - if (! target->isIdentical(pattern, mStart, mLimit)) { - found = FALSE; - } - - if (found) { - start = mStart; - end = mLimit; - - return TRUE; - } - - tOffset += (*goodSuffixTable)[0]; // really? Maybe += 1 or += maxSkip? - } - // Otherwise, we're here because of a mismatch, so keep going.... - } - - // no match - start = -1; - end = -1; - return FALSE; -} - -U_NAMESPACE_END - -#endif // #if !UCONFIG_NO_COLLATION diff --git a/icu4c/source/i18n/i18n.vcxproj b/icu4c/source/i18n/i18n.vcxproj index 0cedfe517a0..bfde1503f4d 100644 --- a/icu4c/source/i18n/i18n.vcxproj +++ b/icu4c/source/i18n/i18n.vcxproj @@ -245,12 +245,9 @@ - - - @@ -436,34 +433,6 @@ ..\..\include\unicode\%(Filename)%(Extension);%(Outputs) copy "%(FullPath)" ..\..\include\unicode - - ..\..\include\unicode\%(Filename)%(Extension);%(Outputs) - - - copy "%(FullPath)" ..\..\include\unicode - - ..\..\include\unicode\%(Filename)%(Extension);%(Outputs) - copy "%(FullPath)" ..\..\include\unicode - - ..\..\include\unicode\%(Filename)%(Extension);%(Outputs) - copy "%(FullPath)" ..\..\include\unicode - - ..\..\include\unicode\%(Filename)%(Extension);%(Outputs) - copy "%(FullPath)" ..\..\include\unicode - - ..\..\include\unicode\%(Filename)%(Extension);%(Outputs) - - - copy "%(FullPath)" ..\..\include\unicode - - ..\..\include\unicode\%(Filename)%(Extension);%(Outputs) - copy "%(FullPath)" ..\..\include\unicode - - ..\..\include\unicode\%(Filename)%(Extension);%(Outputs) - copy "%(FullPath)" ..\..\include\unicode - - ..\..\include\unicode\%(Filename)%(Extension);%(Outputs) - copy "%(FullPath)" ..\..\include\unicode ..\..\include\unicode\%(Filename)%(Extension);%(Outputs) @@ -493,20 +462,6 @@ ..\..\include\unicode\%(Filename)%(Extension);%(Outputs) copy "%(FullPath)" ..\..\include\unicode - - ..\..\include\unicode\%(Filename)%(Extension);%(Outputs) - - - copy "%(FullPath)" ..\..\include\unicode - - ..\..\include\unicode\%(Filename)%(Extension);%(Outputs) - copy "%(FullPath)" ..\..\include\unicode - - ..\..\include\unicode\%(Filename)%(Extension);%(Outputs) - copy "%(FullPath)" ..\..\include\unicode - - ..\..\include\unicode\%(Filename)%(Extension);%(Outputs) - copy "%(FullPath)" ..\..\include\unicode ..\..\include\unicode\%(Filename)%(Extension);%(Outputs) @@ -1604,4 +1559,4 @@ - + \ No newline at end of file diff --git a/icu4c/source/i18n/i18n.vcxproj.filters b/icu4c/source/i18n/i18n.vcxproj.filters index 9657b633ce5..4a5f64eeddf 100644 --- a/icu4c/source/i18n/i18n.vcxproj.filters +++ b/icu4c/source/i18n/i18n.vcxproj.filters @@ -24,21 +24,12 @@ - - collation - - - collation - collation collation - - collation - collation @@ -796,21 +787,12 @@ - - collation - - - collation - collation collation - - collation - collation @@ -1026,4 +1008,4 @@ formatting - + \ No newline at end of file diff --git a/icu4c/source/i18n/ucln_in.h b/icu4c/source/i18n/ucln_in.h index 4022f34496e..59fa80e0bba 100644 --- a/icu4c/source/i18n/ucln_in.h +++ b/icu4c/source/i18n/ucln_in.h @@ -48,7 +48,6 @@ typedef enum ECleanupI18NType { UCLN_I18N_UCOL_RES, UCLN_I18N_UCOL_BLD, UCLN_I18N_CSDET, - UCLN_I18N_COLL_DATA, UCLN_I18N_INDEX_CHARACTERS, UCLN_I18N_GENDERINFO, UCLN_I18N_CDFINFO, diff --git a/icu4c/source/i18n/unicode/bms.h b/icu4c/source/i18n/unicode/bms.h deleted file mode 100644 index 3310248de0a..00000000000 --- a/icu4c/source/i18n/unicode/bms.h +++ /dev/null @@ -1,280 +0,0 @@ -/* - * Copyright (C) 1996-2012, International Business Machines Corporation and Others. - * All rights reserved. - */ - -/** - * \file - * \brief C API: Boyer-Moore StringSearch prototype. - * \internal - */ - -#ifndef _BMS_H -#define _BMS_H - -#include "unicode/utypes.h" - -#if !UCONFIG_NO_COLLATION && !UCONFIG_NO_BREAK_ITERATION - -#include "unicode/ucol.h" - -#ifndef U_HIDE_INTERNAL_API - -/** - * A UCD object holds the Collator-specific data needed to - * compute the length of the shortest string that can - * generate a partcular list of CEs. - * - * UCD objects are quite expensive to compute. Because - * of this, they are cached. When you call ucd_open it - * returns a reference counted cached object. When you call ucd_close - * the reference count on the object is decremented but the object is not deleted. - * - * If you do not need to reuse any unreferenced objects in the cache, you can call - * ucd_flushCCache. If you no longer need any UCD - * objects, you can call ucd_freeCache - * - * @internal ICU 4.0.1 technology preview - */ -typedef void UCD; - -/** - * Open a UCD object. - * - * @param coll - the collator - * @param status - will be set if any errors occur. - * - * @return the UCD object. You must call - * ucd_close when you are done using the object. - * - * Note: if on return status is set to an error, the only safe - * thing to do with the returned object is to call ucd_close. - * - * @internal ICU 4.0.1 technology preview - */ -U_INTERNAL UCD * U_EXPORT2 -ucd_open(UCollator *coll, UErrorCode *status); - -/** - * Release a UCD object. - * - * @param ucd - the object - * - * @internal ICU 4.0.1 technology preview - */ -U_INTERNAL void U_EXPORT2 -ucd_close(UCD *ucd); - -/** - * Get the UCollator object used to create a UCD object. - * The UCollator object returned may not be the exact - * object that was used to create this object, but it will have the - * same behavior. - * - * @param ucd - the UCD object - * - * @return the UCollator used to create the given - * UCD object. - * - * @internal ICU 4.0.1 technology preview - */ -U_INTERNAL UCollator * U_EXPORT2 -ucd_getCollator(UCD *ucd); - -/** - * UCD objects are expensive to compute, and so - * may be cached. This routine will free the cached objects and delete - * the cache. - * - * WARNING: Don't call this until you are have called close - * for each UCD object that you have used. also, - * DO NOT call this if another thread may be calling ucd_flushCache - * at the same time. - * - * @internal ICU 4.0.1 technology preview - */ -U_INTERNAL void U_EXPORT2 -ucd_freeCache(); - -/** - * UCD objects are expensive to compute, and so - * may be cached. This routine will remove any unused UCD - * objects from the cache. - * - * @internal 4.0.1 technology preview - */ -U_INTERNAL void U_EXPORT2 -ucd_flushCache(); - -/** - * BMS - * - * This object holds the information needed to do a Collation sensitive Boyer-Moore search. It encapulates - * the pattern, the "bad character" and "good suffix" tables, the Collator-based data needed to compute them, - * and a reference to the text being searched. - * - * To do a search, you first need to get a UCD object by calling ucd_open. - * Then you construct a BMS object from the UCD object, the pattern - * string and the target string. Then you call the search method. Here's a code sample: - * - *
- * void boyerMooreExample(UCollator *collator, UChar *pattern, int32_t patternLen, UChar *target, int32_t targetLength)
- * {
- *     UErrorCode status = U_ZERO_ERROR;
- *     int32_t offset = 0, start = -1, end = -1;
- *     UCD *ucd = NULL);
- *     BMS *bms = NULL;
- *
- *     ucd = ucd_open(collator, &status);
- *     if (U_FAILURE(status)) {
- *         // could not create a UCD object
- *         return;
- *     }
- *
- *     BMS *bms = bms_open(ucd, pattern, patternLength, target, targetlength, &status);
- *     if (U_FAILURE(status)) {
- *         // could not create a BMS object
- *         ucd_close(ucd);
- *         return;
- *     }
- *
- *
- *     // Find all matches
- *     while (bms_search(bms, offset, &start, &end)) {
- *         // process the match between start and end
- *         ...
- *
- *         // advance past the match
- *         offset = end; 
- *     }
- *
- *     // at this point, if offset == 0, there were no matches
- *     if (offset == 0) {
- *         // handle the case of no matches
- *     }
- *
- *     bms_close(bms);
- *     ucd_close(ucd);
- *
- *     // UCD objects are cached, so the call to
- *     // ucd_close doesn't delete the object.
- *     // Call this if you don't need the object any more.
- *     ucd_flushCache();
- * }
- * 
- * - * NOTE: This is a technology preview. The final version of this API may not bear any resenblence to this API. - * - * Knows linitations: - * 1) Backwards searching has not been implemented. - * - * 2) For Han and Hangul characters, this code ignores any Collation tailorings. In general, - * this isn't a problem, but in Korean locals, at strength 1, Hangul characters are tailored - * to be equal to Han characters with the same pronounciation. Because this code ignroes - * tailorings, searching for a Hangul character will not find a Han character and visa-versa. - * - * 3) In some cases, searching for a pattern that needs to be normalized and ends - * in a discontiguous contraction may fail. The only known cases of this are with - * the Tibetan script. For example searching for the pattern - * "\u0F7F\u0F80\u0F81\u0F82\u0F83\u0F84\u0F85" will fail. (This case is artificial. We've - * been unable to find a pratical, real-world example of this failure.) - * - * NOTE: This is a technology preview. The final version of this API may not bear any resenblence to this API. - * - * @internal ICU 4.0.1 technology preview - */ -struct BMS; -typedef struct BMS BMS; /**< @see BMS */ - -/** - * Construct a MBS object. - * - * @param ucd - A UCD object holding the Collator-sensitive data - * @param pattern - the string for which to search - * @param patternLength - the length of the string for which to search - * @param target - the string in which to search - * @param targetLength - the length of the string in which to search - * @param status - will be set if any errors occur. - * - * @return the BMS object. - * - * Note: if on return status is set to an error, the only safe - * thing to do with the returned object is to call - * bms_close. - * - * @internal ICU 4.0.1 technology preview - */ -U_INTERNAL BMS * U_EXPORT2 -bms_open(UCD *ucd, - const UChar *pattern, int32_t patternLength, - const UChar *target, int32_t targetLength, - UErrorCode *status); - -/** - * Close a BMS object and release all the - * storage associated with it. - * - * @param bms - the BMS object to close. - * @internal ICU 4.0.1 technology preview - */ -U_INTERNAL void U_EXPORT2 -bms_close(BMS *bms); - -/** - * Test the pattern to see if it generates any CEs. - * - * @param bms - the BMS object - * @return TRUE if the pattern string did not generate any CEs - * - * @internal ICU 4.0.1 technology preview - */ -U_INTERNAL UBool U_EXPORT2 -bms_empty(BMS *bms); - -/** - * Get the UCD object used to create - * a given BMS object. - * - * @param bms - the BMS object - * - * @return - the UCD object used to create - * the given BMS object. - * - * @internal ICU 4.0.1 technology preview - */ -U_INTERNAL UCD * U_EXPORT2 -bms_getData(BMS *bms); - -/** - * Search for the pattern string in the target string. - * - * @param bms - the BMS object - * @param offset - the offset in the target string at which to begin the search - * @param start - will be set to the starting offset of the match, or -1 if there's no match - * @param end - will be set to the ending offset of the match, or -1 if there's no match - * - * @return TRUE if the match succeeds, FALSE otherwise. - * - * @internal ICU 4.0.1 technology preview - */ -U_INTERNAL UBool U_EXPORT2 -bms_search(BMS *bms, int32_t offset, int32_t *start, int32_t *end); - -/** - * Set the target string for the match. - * - * @param bms - the BMS object - * @param target - the new target string - * @param targetLength - the length of the new target string - * @param status - will be set if any errors occur. - * - * @internal ICU 4.0.1 technology preview - */ -U_INTERNAL void U_EXPORT2 -bms_setTargetString(BMS *bms, const UChar *target, int32_t targetLength, UErrorCode *status); - -#endif /* U_HIDE_INTERNAL_API */ - -#endif - -#endif /* _BMS_H */ diff --git a/icu4c/source/i18n/unicode/bmsearch.h b/icu4c/source/i18n/unicode/bmsearch.h deleted file mode 100644 index 81e1edde4dd..00000000000 --- a/icu4c/source/i18n/unicode/bmsearch.h +++ /dev/null @@ -1,228 +0,0 @@ -/* - ****************************************************************************** - * Copyright (C) 1996-2011, International Business Machines * - * Corporation and others. All Rights Reserved. * - ****************************************************************************** - */ - -/** - * \file - * \brief C++ API: Boyer-Moore StringSearch technology preview - * \internal ICU 4.0.1 technology preview - */ - -#ifndef B_M_SEARCH_H -#define B_M_SEARCH_H - -#include "unicode/utypes.h" - -#if !UCONFIG_NO_COLLATION && !UCONFIG_NO_BREAK_ITERATION - -#include "unicode/uobject.h" -#include "unicode/ucol.h" - -#include "unicode/colldata.h" - -U_NAMESPACE_BEGIN - -class BadCharacterTable; -class GoodSuffixTable; -class Target; - -#ifndef U_HIDE_INTERNAL_API -/** - * BoyerMooreSearch - * - * This object holds the information needed to do a Collation sensitive Boyer-Moore search. It encapulates - * the pattern, the "bad character" and "good suffix" tables, the Collator-based data needed to compute them, - * and a reference to the text being searched. - * - * To do a search, you fist need to get a CollData object by calling CollData::open. - * Then you construct a BoyerMooreSearch object from the CollData object, the pattern - * string and the target string. Then you call the search method. Here's a code sample: - * - *
- * void boyerMooreExample(UCollator *collator, UnicodeString *pattern, UnicodeString *target)
- * {
- *     UErrorCode status = U_ZERO_ERROR;
- *     CollData *collData = CollData::open(collator, status);
- *
- *     if (U_FAILURE(status)) {
- *         // could not create a CollData object
- *         return;
- *     }
- *
- *     BoyerMooreSearch *search = new BoyerMooreSearch(collData, *patternString, target, status);
- *
- *     if (U_FAILURE(status)) {
- *         // could not create a BoyerMooreSearch object
- *         CollData::close(collData);
- *         return;
- *     }
- *
- *     int32_t offset = 0, start = -1, end = -1;
- *
- *     // Find all matches
- *     while (search->search(offset, start, end)) {
- *         // process the match between start and end
- *         ...
- *         // advance past the match
- *         offset = end; 
- *     }
- *
- *     // at this point, if offset == 0, there were no matches
- *     if (offset == 0) {
- *         // handle the case of no matches
- *     }
- *
- *     delete search;
- *     CollData::close(collData);
- *
- *     // CollData objects are cached, so the call to
- *     // CollData::close doesn't delete the object.
- *     // Call this if you don't need the object any more.
- *     CollData::flushCollDataCache();
- * }
- * 
- * - * NOTE: This is a technology preview. The final version of this API may not bear any resenblence to this API. - * - * Knows linitations: - * 1) Backwards searching has not been implemented. - * - * 2) For Han and Hangul characters, this code ignores any Collation tailorings. In general, - * this isn't a problem, but in Korean locals, at strength 1, Hangul characters are tailored - * to be equal to Han characters with the same pronounciation. Because this code ignroes - * tailorings, searching for a Hangul character will not find a Han character and visa-versa. - * - * 3) In some cases, searching for a pattern that needs to be normalized and ends - * in a discontiguous contraction may fail. The only known cases of this are with - * the Tibetan script. For example searching for the pattern - * "\u0F7F\u0F80\u0F81\u0F82\u0F83\u0F84\u0F85" will fail. (This case is artificial. We've - * been unable to find a pratical, real-world example of this failure.) - * - * @internal ICU 4.0.1 technology preview - * - * @see CollData - */ -class U_I18N_API BoyerMooreSearch : public UObject -{ -public: - /** - * Construct a BoyerMooreSearch object. - * - * @param theData - A CollData object holding the Collator-sensitive data - * @param patternString - the string for which to search - * @param targetString - the string in which to search or NULL if youu will - * set it later by calling setTargetString. - * @param status - will be set if any errors occur. - * - * Note: if on return, status is set to an error code, - * the only safe thing to do with this object is to call - * the destructor. - * - * @internal ICU 4.0.1 technology preview - */ - BoyerMooreSearch(CollData *theData, const UnicodeString &patternString, const UnicodeString *targetString, UErrorCode &status); - - /** - * The desstructor - * - * @internal ICU 4.0.1 technology preview - */ - ~BoyerMooreSearch(); - - /** - * Test the pattern to see if it generates any CEs. - * - * @return TRUE if the pattern string did not generate any CEs - * - * @internal ICU 4.0.1 technology preview - */ - UBool empty(); - - /** - * Search for the pattern string in the target string. - * - * @param offset - the offset in the target string at which to begin the search - * @param start - will be set to the starting offset of the match, or -1 if there's no match - * @param end - will be set to the ending offset of the match, or -1 if there's no match - * - * @return TRUE if the match succeeds, FALSE otherwise. - * - * @internal ICU 4.0.1 technology preview - */ - UBool search(int32_t offset, int32_t &start, int32_t &end); - - /** - * Set the target string for the match. - * - * @param targetString - the new target string - * @param status - will be set if any errors occur. - * - * @internal ICU 4.0.1 technology preview - */ - void setTargetString(const UnicodeString *targetString, UErrorCode &status); - - // **** no longer need these? **** - /** - * Return the CollData object used for searching - * - * @return the CollData object used for searching - * - * @internal ICU 4.0.1 technology preview - */ - CollData *getData(); - - /** - * Return the CEs generated by the pattern string. - * - * @return a CEList object holding the CEs generated by the pattern string. - * - * @internal ICU 4.0.1 technology preview - */ - CEList *getPatternCEs(); - - /** - * Return the BadCharacterTable object computed for the pattern string. - * - * @return the BadCharacterTable object. - * - * @internal ICU 4.0.1 technology preview - */ - BadCharacterTable *getBadCharacterTable(); - - /** - * Return the GoodSuffixTable object computed for the pattern string. - * - * @return the GoodSuffixTable object computed for the pattern string. - * - * @internal ICU 4.0.1 technology preview - */ - GoodSuffixTable *getGoodSuffixTable(); - - /** - * UObject glue... - * @internal ICU 4.0.1 technology preview - */ - virtual UClassID getDynamicClassID() const; - /** - * UObject glue... - * @internal ICU 4.0.1 technology preview - */ - static UClassID getStaticClassID(); - -private: - CollData *data; - CEList *patCEs; - BadCharacterTable *badCharacterTable; - GoodSuffixTable *goodSuffixTable; - UnicodeString pattern; - Target *target; -}; -#endif /* U_HIDE_INTERNAL_API */ - -U_NAMESPACE_END - -#endif // #if !UCONFIG_NO_COLLATION -#endif // #ifndef B_M_SEARCH_H diff --git a/icu4c/source/test/intltest/Makefile.in b/icu4c/source/test/intltest/Makefile.in index 31fc8b382d0..e02f097ba9b 100644 --- a/icu4c/source/test/intltest/Makefile.in +++ b/icu4c/source/test/intltest/Makefile.in @@ -37,7 +37,7 @@ DEFS += -D'U_TOPSRCDIR="$(top_srcdir)/"' -D'U_TOPBUILDDIR="$(BUILDDIR)"' LIBS = $(LIBCTESTFW) $(LIBICUI18N) $(LIBICUUC) $(LIBICUTOOLUTIL) $(DEFAULT_LIBS) $(LIB_M) $(LIB_THREAD) OBJECTS = aliastst.o allcoll.o apicoll.o astrotst.o callimts.o calregts.o caltest.o \ -caltztst.o canittst.o citrtest.o cntabcol.o convtest.o currcoll.o \ +caltztst.o canittst.o citrtest.o cntabcol.o colldata.o convtest.o currcoll.o \ fldset.o dadrfmt.o dadrcal.o dadrcoll.o dcfmapts.o decoll.o dtfmapts.o dtfmrgts.o dtfmtrtts.o dtfmttst.o \ dtptngts.o encoll.o escoll.o ficoll.o frcoll.o g7coll.o intltest.o \ itercoll.o itformat.o itmajor.o itutil.o jacoll.o lcukocol.o \ diff --git a/icu4c/source/i18n/colldata.cpp b/icu4c/source/test/intltest/colldata.cpp similarity index 62% rename from icu4c/source/i18n/colldata.cpp rename to icu4c/source/test/intltest/colldata.cpp index 875aa9da52b..2f561c67086 100644 --- a/icu4c/source/i18n/colldata.cpp +++ b/icu4c/source/test/intltest/colldata.cpp @@ -10,7 +10,6 @@ #if !UCONFIG_NO_COLLATION #include "unicode/unistr.h" -#include "unicode/putil.h" #include "unicode/usearch.h" #include "cmemory.h" @@ -26,27 +25,16 @@ #include "unicode/ustring.h" #include "hash.h" #include "uhash.h" -#include "ucln_in.h" #include "ucol_imp.h" -#include "umutex.h" #include "uassert.h" -#include "unicode/colldata.h" - -U_NAMESPACE_BEGIN +#include "colldata.h" #define ARRAY_SIZE(array) (sizeof(array)/sizeof(array[0])) #define NEW_ARRAY(type, count) (type *) uprv_malloc((count) * sizeof(type)) #define DELETE_ARRAY(array) uprv_free((void *) (array)) #define ARRAY_COPY(dst, src, count) uprv_memcpy((void *) (dst), (void *) (src), (count) * sizeof (src)[0]) -UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CEList) - -#ifdef INSTRUMENT_CELIST -int32_t CEList::_active = 0; -int32_t CEList::_histogram[10] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; -#endif - CEList::CEList(UCollator *coll, const UnicodeString &string, UErrorCode &status) : ces(NULL), listMax(CELIST_BUFFER_SIZE), listSize(0) { @@ -78,11 +66,6 @@ CEList::CEList(UCollator *coll, const UnicodeString &string, UErrorCode &status) strengthMask |= UCOL_PRIMARYORDERMASK; } -#ifdef INSTRUMENT_CELIST - _active += 1; - _histogram[0] += 1; -#endif - ces = ceBuffer; while ((order = ucol_next(elems, &status)) != UCOL_NULLORDER) { @@ -114,10 +97,6 @@ CEList::CEList(UCollator *coll, const UnicodeString &string, UErrorCode &status) CEList::~CEList() { -#ifdef INSTRUMENT_CELIST - _active -= 1; -#endif - if (ces != ceBuffer) { DELETE_ARRAY(ces); } @@ -131,11 +110,6 @@ void CEList::add(uint32_t ce, UErrorCode &status) if (listSize >= listMax) { int32_t newMax = listMax + CELIST_BUFFER_SIZE; - -#ifdef INSTRUMENT_CELIST - _histogram[listSize / CELIST_BUFFER_SIZE] += 1; -#endif - uint32_t *newCEs = NEW_ARRAY(uint32_t, newMax); if (newCEs == NULL) { @@ -190,14 +164,6 @@ int32_t CEList::size() const return listSize; } -UOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringList) - -#ifdef INSTRUMENT_STRING_LIST -int32_t StringList::_lists = 0; -int32_t StringList::_strings = 0; -int32_t StringList::_histogram[101] = {0}; -#endif - StringList::StringList(UErrorCode &status) : strings(NULL), listMax(STRING_LIST_BUFFER_SIZE), listSize(0) { @@ -211,11 +177,6 @@ StringList::StringList(UErrorCode &status) status = U_MEMORY_ALLOCATION_ERROR; return; } - -#ifdef INSTRUMENT_STRING_LIST - _lists += 1; - _histogram[0] += 1; -#endif } StringList::~StringList() @@ -228,11 +189,6 @@ void StringList::add(const UnicodeString *string, UErrorCode &status) if (U_FAILURE(status)) { return; } - -#ifdef INSTRUMENT_STRING_LIST - _strings += 1; -#endif - if (listSize >= listMax) { int32_t newMax = listMax + STRING_LIST_BUFFER_SIZE; UnicodeString *newStrings = new UnicodeString[newMax]; @@ -243,17 +199,6 @@ void StringList::add(const UnicodeString *string, UErrorCode &status) for (int32_t i=0; i 100) { - _h = 100; - } - - _histogram[_h] += 1; -#endif - delete[] strings; strings = newStrings; listMax = newMax; @@ -295,38 +240,11 @@ deleteStringList(void *obj) delete strings; } -static void U_CALLCONV -deleteCEList(void *obj) -{ - CEList *list = (CEList *) obj; - - delete list; -} - -static void U_CALLCONV -deleteUnicodeStringKey(void *obj) -{ - UnicodeString *key = (UnicodeString *) obj; - - delete key; -} - -static void U_CALLCONV -deleteChars(void * /*obj*/) -{ - // char *chars = (char *) obj; - // All the key strings are owned by the - // CollData objects and don't need to - // be freed here. - //DELETE_ARRAY(chars); -} - U_CDECL_END -class CEToStringsMap : public UMemory +class CEToStringsMap { public: - CEToStringsMap(UErrorCode &status); ~CEToStringsMap(); @@ -334,7 +252,6 @@ public: StringList *getStringList(uint32_t ce) const; private: - void putStringList(uint32_t ce, StringList *stringList, UErrorCode &status); UHashtable *map; }; @@ -390,260 +307,10 @@ void CEToStringsMap::putStringList(uint32_t ce, StringList *stringList, UErrorCo uhash_iput(map, ce, (void *) stringList, &status); } -class StringToCEsMap : public UMemory -{ -public: - StringToCEsMap(UErrorCode &status); - ~StringToCEsMap(); - - void put(const UnicodeString *string, const CEList *ces, UErrorCode &status); - const CEList *get(const UnicodeString *string); - void free(const CEList *list); - -private: - - - UHashtable *map; -}; - -StringToCEsMap::StringToCEsMap(UErrorCode &status) - : map(NULL) -{ - if (U_FAILURE(status)) { - return; - } - - map = uhash_open(uhash_hashUnicodeString, - uhash_compareUnicodeString, - uhash_compareLong, - &status); - - if (U_FAILURE(status)) { - return; - } - - uhash_setValueDeleter(map, deleteCEList); - uhash_setKeyDeleter(map, deleteUnicodeStringKey); -} - -StringToCEsMap::~StringToCEsMap() -{ - uhash_close(map); -} - -void StringToCEsMap::put(const UnicodeString *string, const CEList *ces, UErrorCode &status) -{ - uhash_put(map, (void *) string, (void *) ces, &status); -} - -const CEList *StringToCEsMap::get(const UnicodeString *string) -{ - return (const CEList *) uhash_get(map, string); -} - -class CollDataCacheEntry : public UMemory -{ -public: - CollDataCacheEntry(CollData *theData); - ~CollDataCacheEntry(); - - CollData *data; - int32_t refCount; -}; - -CollDataCacheEntry::CollDataCacheEntry(CollData *theData) - : data(theData), refCount(1) -{ - // nothing else to do -} - -CollDataCacheEntry::~CollDataCacheEntry() -{ - // check refCount? - delete data; -} - -class CollDataCache : public UMemory -{ -public: - CollDataCache(UErrorCode &status); - ~CollDataCache(); - - CollData *get(UCollator *collator, UErrorCode &status); - void unref(CollData *collData); - - void flush(); - -private: - static char *getKey(UCollator *collator, char *keyBuffer, int32_t *charBufferLength); - static void deleteKey(char *key); - - UHashtable *cache; -}; -static UMutex lock = U_MUTEX_INITIALIZER; - -U_CDECL_BEGIN -static void U_CALLCONV -deleteCollDataCacheEntry(void *obj) -{ - CollDataCacheEntry *entry = (CollDataCacheEntry *) obj; - - delete entry; -} -U_CDECL_END - -CollDataCache::CollDataCache(UErrorCode &status) - : cache(NULL) -{ - if (U_FAILURE(status)) { - return; - } - - cache = uhash_open(uhash_hashChars, uhash_compareChars, uhash_compareLong, &status); - - if (U_FAILURE(status)) { - return; - } - - uhash_setValueDeleter(cache, deleteCollDataCacheEntry); - uhash_setKeyDeleter(cache, deleteChars); -} - -CollDataCache::~CollDataCache() -{ - umtx_lock(&lock); - uhash_close(cache); - cache = NULL; - umtx_unlock(&lock); -} - -CollData *CollDataCache::get(UCollator *collator, UErrorCode &status) -{ - char keyBuffer[KEY_BUFFER_SIZE]; - int32_t keyLength = KEY_BUFFER_SIZE; - char *key = getKey(collator, keyBuffer, &keyLength); - CollData *result = NULL, *newData = NULL; - CollDataCacheEntry *entry = NULL, *newEntry = NULL; - - umtx_lock(&lock); - entry = (CollDataCacheEntry *) uhash_get(cache, key); - - if (entry == NULL) { - umtx_unlock(&lock); - - newData = new CollData(collator, key, keyLength, status); - newEntry = new CollDataCacheEntry(newData); - - if (U_FAILURE(status) || newData == NULL || newEntry == NULL) { - status = U_MEMORY_ALLOCATION_ERROR; - return NULL; - } - - umtx_lock(&lock); - entry = (CollDataCacheEntry *) uhash_get(cache, key); - - if (entry == NULL) { - uhash_put(cache, newData->key, newEntry, &status); - umtx_unlock(&lock); - - if (U_FAILURE(status)) { - delete newEntry; - delete newData; - - return NULL; - } - - return newData; - } - } - - result = entry->data; - entry->refCount += 1; - umtx_unlock(&lock); - - if (key != keyBuffer) { - deleteKey(key); - } - - if (newEntry != NULL) { - delete newEntry; - delete newData; - } - - return result; -} - -void CollDataCache::unref(CollData *collData) -{ - CollDataCacheEntry *entry = NULL; - - umtx_lock(&lock); - entry = (CollDataCacheEntry *) uhash_get(cache, collData->key); - - if (entry != NULL) { - entry->refCount -= 1; - } - umtx_unlock(&lock); -} - -char *CollDataCache::getKey(UCollator *collator, char *keyBuffer, int32_t *keyBufferLength) -{ - UErrorCode status = U_ZERO_ERROR; - int32_t len = ucol_getShortDefinitionString(collator, NULL, keyBuffer, *keyBufferLength, &status); - - if (len >= *keyBufferLength) { - *keyBufferLength = (len + 2) & ~1; // round to even length, leaving room for terminating null - keyBuffer = NEW_ARRAY(char, *keyBufferLength); - status = U_ZERO_ERROR; - - len = ucol_getShortDefinitionString(collator, NULL, keyBuffer, *keyBufferLength, &status); - } - - keyBuffer[len] = '\0'; - - return keyBuffer; -} - -void CollDataCache::flush() -{ - const UHashElement *element; - int32_t pos = -1; - - umtx_lock(&lock); - while ((element = uhash_nextElement(cache, &pos)) != NULL) { - CollDataCacheEntry *entry = (CollDataCacheEntry *) element->value.pointer; - - if (entry->refCount <= 0) { - uhash_removeElement(cache, element); - } - } - umtx_unlock(&lock); -} - -void CollDataCache::deleteKey(char *key) -{ - DELETE_ARRAY(key); -} - -U_CDECL_BEGIN -static UBool coll_data_cleanup(void) { - CollData::freeCollDataCache(); - return TRUE; -} -U_CDECL_END - -UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CollData) - -CollData::CollData() -{ - // nothing -} - #define CLONE_COLLATOR -//#define CACHE_CELISTS -CollData::CollData(UCollator *collator, char *cacheKey, int32_t cacheKeyLength, UErrorCode &status) - : coll(NULL), charsToCEList(NULL), ceToCharsStartingWith(NULL), key(NULL) +CollData::CollData(UCollator *collator, UErrorCode &status) + : coll(NULL), ceToCharsStartingWith(NULL) { // [:c:] == [[:cn:][:cc:][:co:][:cf:][:cs:]] // i.e. other, control, private use, format, surrogate @@ -665,35 +332,12 @@ CollData::CollData(UCollator *collator, char *cacheKey, int32_t cacheKeyLength, USet *contractions = uset_openEmpty(); int32_t itemCount; -#ifdef CACHE_CELISTS - charsToCEList = new StringToCEsMap(status); - - if (U_FAILURE(status)) { - goto bail; - } -#else - charsToCEList = NULL; -#endif - ceToCharsStartingWith = new CEToStringsMap(status); if (U_FAILURE(status)) { goto bail; } - if (cacheKeyLength > KEY_BUFFER_SIZE) { - key = NEW_ARRAY(char, cacheKeyLength); - - if (key == NULL) { - status = U_MEMORY_ALLOCATION_ERROR; - goto bail; - } - } else { - key = keyBuffer; - } - - ARRAY_COPY(key, cacheKey, cacheKeyLength); - #ifdef CLONE_COLLATOR coll = ucol_safeClone(collator, NULL, NULL, &status); @@ -730,12 +374,8 @@ CollData::CollData(UCollator *collator, char *cacheKey, int32_t cacheKeyLength, ceToCharsStartingWith->put(ceList->get(0), st, status); -#ifdef CACHE_CELISTS - charsToCEList->put(st, ceList, status); -#else delete ceList; delete st; -#endif } } else if (len > 0) { UnicodeString *st = new UnicodeString(buffer, len); @@ -749,12 +389,8 @@ CollData::CollData(UCollator *collator, char *cacheKey, int32_t cacheKeyLength, ceToCharsStartingWith->put(ceList->get(0), st, status); -#ifdef CACHE_CELISTS - charsToCEList->put(st, ceList, status); -#else delete ceList; delete st; -#endif } else { // shouldn't happen... } @@ -821,15 +457,7 @@ CollData::~CollData() ucol_close(coll); #endif - if (key != keyBuffer) { - DELETE_ARRAY(key); - } - delete ceToCharsStartingWith; - -#ifdef CACHE_CELISTS - delete charsToCEList; -#endif } UCollator *CollData::getCollator() const @@ -844,9 +472,6 @@ const StringList *CollData::getStringList(int32_t ce) const const CEList *CollData::getCEList(const UnicodeString *string) const { -#ifdef CACHE_CELISTS - return charsToCEList->get(string); -#else UErrorCode status = U_ZERO_ERROR; const CEList *list = new CEList(coll, *string, status); @@ -856,14 +481,11 @@ const CEList *CollData::getCEList(const UnicodeString *string) const } return list; -#endif } void CollData::freeCEList(const CEList *list) { -#ifndef CACHE_CELISTS delete list; -#endif } int32_t CollData::minLengthInChars(const CEList *ceList, int32_t offset, int32_t *history) const @@ -885,9 +507,6 @@ int32_t CollData::minLengthInChars(const CEList *ceList, int32_t offset, int32_t for (int32_t s = 0; s < stringCount; s += 1) { const UnicodeString *string = strings->get(s); -#ifdef CACHE_CELISTS - const CEList *ceList2 = charsToCEList->get(string); -#else UErrorCode status = U_ZERO_ERROR; const CEList *ceList2 = new CEList(coll, *string, status); @@ -895,7 +514,6 @@ int32_t CollData::minLengthInChars(const CEList *ceList, int32_t offset, int32_t delete ceList2; ceList2 = NULL; } -#endif if (ceList->matchesAt(offset, ceList2)) { U_ASSERT(ceList2 != NULL); @@ -909,9 +527,8 @@ int32_t CollData::minLengthInChars(const CEList *ceList, int32_t offset, int32_t if (rlength <= 0) { // delete before continue to avoid memory leak. -#ifndef CACHE_CELISTS delete ceList2; -#endif + // ignore any dead ends continue; } @@ -922,9 +539,7 @@ int32_t CollData::minLengthInChars(const CEList *ceList, int32_t offset, int32_t } } -#ifndef CACHE_CELISTS delete ceList2; -#endif } } @@ -1020,89 +635,4 @@ int32_t CollData::minLengthInChars(const CEList *ceList, int32_t offset) const return minLength; } -CollData *CollData::open(UCollator *collator, UErrorCode &status) -{ - if (U_FAILURE(status)) { - return NULL; - } - - CollDataCache *cache = getCollDataCache(); - - return cache->get(collator, status); -} - -void CollData::close(CollData *collData) -{ - CollDataCache *cache = getCollDataCache(); - - cache->unref(collData); -} - -CollDataCache *CollData::collDataCache = NULL; - -CollDataCache *CollData::getCollDataCache() -{ - UErrorCode status = U_ZERO_ERROR; - CollDataCache *cache = NULL; - - UMTX_CHECK(NULL, collDataCache, cache); - - if (cache == NULL) { - cache = new CollDataCache(status); - - if (U_FAILURE(status)) { - delete cache; - return NULL; - } - - umtx_lock(NULL); - if (collDataCache == NULL) { - collDataCache = cache; - - ucln_i18n_registerCleanup(UCLN_I18N_COLL_DATA, coll_data_cleanup); - } - umtx_unlock(NULL); - - if (collDataCache != cache) { - delete cache; - } - } - - return collDataCache; -} - -void CollData::freeCollDataCache() -{ - CollDataCache *cache = NULL; - - UMTX_CHECK(NULL, collDataCache, cache); - - if (cache != NULL) { - umtx_lock(NULL); - if (collDataCache != NULL) { - collDataCache = NULL; - } else { - cache = NULL; - } - umtx_unlock(NULL); - - delete cache; - } -} - -void CollData::flushCollDataCache() -{ - CollDataCache *cache = NULL; - - UMTX_CHECK(NULL, collDataCache, cache); - - // **** this will fail if the another **** - // **** thread deletes the cache here **** - if (cache != NULL) { - cache->flush(); - } -} - -U_NAMESPACE_END - #endif // #if !UCONFIG_NO_COLLATION diff --git a/icu4c/source/i18n/unicode/colldata.h b/icu4c/source/test/intltest/colldata.h similarity index 61% rename from icu4c/source/i18n/unicode/colldata.h rename to icu4c/source/test/intltest/colldata.h index d9153d04818..0af1017d3e5 100644 --- a/icu4c/source/i18n/unicode/colldata.h +++ b/icu4c/source/test/intltest/colldata.h @@ -7,10 +7,17 @@ /** * \file - * \brief C++ API: Collation data used to compute minLengthInChars. + * \brief Originally, added as C++ API for Collation data used to compute minLengthInChars * \internal */ - + +/* + * Note: This module was incldued in ICU 4.0.1 as @internal technology preview for supporting + * Boyer-Moore string search API. For now, only SSearchTest depends on this module. I temporaly + * moved the module from i18n directory to intltest, because we have no plan to publish this + * as public API. (2012-12-18 yoshito) + */ + #ifndef COLL_DATA_H #define COLL_DATA_H @@ -18,21 +25,10 @@ #if !UCONFIG_NO_COLLATION -#include "unicode/uobject.h" #include "unicode/ucol.h" -U_NAMESPACE_BEGIN - -#ifndef U_HIDE_INTERNAL_API -/** - * The size of the internal buffer for the Collator's short description string. - * @internal ICU 4.0.1 technology preview - */ -#define KEY_BUFFER_SIZE 64 - /** * The size of the internal CE buffer in a CEList object - * @internal ICU 4.0.1 technology preview */ #define CELIST_BUFFER_SIZE 4 @@ -40,31 +36,19 @@ U_NAMESPACE_BEGIN * \def INSTRUMENT_CELIST * Define this to enable the CEList objects to collect * statistics. - * @internal ICU 4.0.1 technology preview */ -//#define INSTRUMENT_CELIST /** * The size of the initial list in a StringList object. - * @internal ICU 4.0.1 technology preview */ #define STRING_LIST_BUFFER_SIZE 16 -/** - * \def INSTRUMENT_STRING_LIST - * Define this to enable the StringList objects to - * collect statistics. - * @internal ICU 4.0.1 technology preview - */ -//#define INSTRUMENT_STRING_LIST - /** * This object holds a list of CEs generated from a particular * UnicodeString * - * @internal ICU 4.0.1 technology preview */ -class U_I18N_API CEList : public UObject +class CEList { public: /** @@ -77,14 +61,11 @@ public: * Note: if on return, status is set to an error code, * the only safe thing to do with this object is to call * the destructor. - * - * @internal ICU 4.0.1 technology preview */ CEList(UCollator *coll, const UnicodeString &string, UErrorCode &status); /** * The destructor. - * @internal ICU 4.0.1 technology preview */ ~CEList(); @@ -92,8 +73,6 @@ public: * Return the number of CEs in the list. * * @return the number of CEs in the list. - * - * @internal ICU 4.0.1 technology preview */ int32_t size() const; @@ -103,8 +82,6 @@ public: * @param index - the index of the CE to return * * @return the CE, or 0 if index is out of range - * - * @internal ICU 4.0.1 technology preview */ uint32_t get(int32_t index) const; @@ -116,8 +93,6 @@ public: * @param other - the other CEList * * @return TRUE if the CEs match, FALSE otherwise. - * - * @internal ICU 4.0.1 technology preview */ UBool matchesAt(int32_t offset, const CEList *other) const; @@ -127,22 +102,9 @@ public: * @param index - the index * * @return a reference to the given CE in the list - * - * @internal ICU 4.0.1 technology preview */ uint32_t &operator[](int32_t index) const; - /** - * UObject glue... - * @internal ICU 4.0.1 technology preview - */ - virtual UClassID getDynamicClassID() const; - /** - * UObject glue... - * @internal ICU 4.0.1 technology preview - */ - static UClassID getStaticClassID(); - private: void add(uint32_t ce, UErrorCode &status); @@ -150,21 +112,14 @@ private: uint32_t *ces; int32_t listMax; int32_t listSize; - -#ifdef INSTRUMENT_CELIST - static int32_t _active; - static int32_t _histogram[10]; -#endif }; /** * StringList * * This object holds a list of UnicodeString objects. - * - * @internal ICU 4.0.1 technology preview */ -class U_I18N_API StringList : public UObject +class StringList { public: /** @@ -175,15 +130,11 @@ public: * Note: if on return, status is set to an error code, * the only safe thing to do with this object is to call * the destructor. - * - * @internal ICU 4.0.1 technology preview */ StringList(UErrorCode &status); /** * The destructor. - * - * @internal ICU 4.0.1 technology preview */ ~StringList(); @@ -192,8 +143,6 @@ public: * * @param string - the string to add * @param status - will be set if any errors occur. - * - * @internal ICU 4.0.1 technology preview */ void add(const UnicodeString *string, UErrorCode &status); @@ -203,8 +152,6 @@ public: * @param chars - the address of the array of code points * @param count - the number of code points in the array * @param status - will be set if any errors occur. - * - * @internal ICU 4.0.1 technology preview */ void add(const UChar *chars, int32_t count, UErrorCode &status); @@ -215,8 +162,6 @@ public: * * @return a pointer to the UnicodeString or NULL * if index is out of bounds. - * - * @internal ICU 4.0.1 technology preview */ const UnicodeString *get(int32_t index) const; @@ -224,43 +169,22 @@ public: * Get the number of stings in the list. * * @return the number of strings in the list. - * - * @internal ICU 4.0.1 technology preview */ int32_t size() const; - /** - * the UObject glue... - * @internal ICU 4.0.1 technology preview - */ - virtual UClassID getDynamicClassID() const; - /** - * the UObject glue... - * @internal ICU 4.0.1 technology preview - */ - static UClassID getStaticClassID(); - private: UnicodeString *strings; int32_t listMax; int32_t listSize; - -#ifdef INSTRUMENT_STRING_LIST - static int32_t _lists; - static int32_t _strings; - static int32_t _histogram[101]; -#endif }; -#endif /* U_HIDE_INTERNAL_API */ + /* * Forward references to internal classes. */ class StringToCEsMap; class CEToStringsMap; -class CollDataCache; -#ifndef U_HIDE_INTERNAL_API /** * CollData * @@ -276,10 +200,8 @@ class CollDataCache; * If you do not need to reuse any unreferenced objects in the cache, you can call * CollData::flushCollDataCache. If you no longer need any CollData * objects, you can call CollData::freeCollDataCache - * - * @internal ICU 4.0.1 technology preview */ -class U_I18N_API CollData : public UObject +class CollData { public: /** @@ -287,32 +209,18 @@ public: * * @param collator - the collator * @param status - will be set if any errors occur. - * - * @return the CollData object. You must call - * close when you are done using the object. - * - * Note: if on return, status is set to an error code, - * the only safe thing to do with this object is to call - * CollData::close. - * - * @internal ICU 4.0.1 technology preview */ - static CollData *open(UCollator *collator, UErrorCode &status); + CollData(UCollator *collator, UErrorCode &status); /** - * Release a CollData object. - * - * @param collData - the object - * - * @internal ICU 4.0.1 technology preview + * The destructor. */ - static void close(CollData *collData); + ~CollData(); /** * Get the UCollator object used to create this object. * The object returned may not be the exact object that was used to * create this object, but it will have the same behavior. - * @internal ICU 4.0.1 technology preview */ UCollator *getCollator() const; @@ -325,8 +233,6 @@ public: * return a StringList object containing all * the stirngs, or NULL if there are * no such strings. - * - * @internal ICU 4.0.1 technology preview. */ const StringList *getStringList(int32_t ce) const; @@ -338,8 +244,6 @@ public: * @return a CEList object containt the CEs. You * must call freeCEList when you are finished * using the CEList/ - * - * @internal ICU 4.0.1 technology preview. */ const CEList *getCEList(const UnicodeString *string) const; @@ -347,8 +251,6 @@ public: * Release a CEList returned by getCEList. * * @param list - the CEList to free. - * - * @internal ICU 4.0.1 technology preview */ void freeCEList(const CEList *list); @@ -360,8 +262,6 @@ public: * @param offset - the offset of the first CE in the list to use. * * @return the length of the shortest string. - * - * @internal ICU 4.0.1 technology preview */ int32_t minLengthInChars(const CEList *ces, int32_t offset) const; @@ -382,75 +282,18 @@ public: * the number of cEs in the CEList * * @return the length of the shortest string. - * - * @internal ICU 4.0.1 technology preview */ int32_t minLengthInChars(const CEList *ces, int32_t offset, int32_t *history) const; - /** - * UObject glue... - * @internal ICU 4.0.1 technology preview - */ - virtual UClassID getDynamicClassID() const; - /** - * UObject glue... - * @internal ICU 4.0.1 technology preview - */ - static UClassID getStaticClassID(); - - /** - * CollData objects are expensive to compute, and so - * may be cached. This routine will free the cached objects and delete - * the cache. - * - * WARNING: Don't call this until you are have called close - * for each CollData object that you have used. also, - * DO NOT call this if another thread may be calling flushCollDataCache - * at the same time. - * - * @internal 4.0.1 technology preview - */ - static void freeCollDataCache(); - - /** - * CollData objects are expensive to compute, and so - * may be cached. This routine will remove any unused CollData - * objects from the cache. - * - * @internal 4.0.1 technology preview - */ - static void flushCollDataCache(); - private: - friend class CollDataCache; - friend class CollDataCacheEntry; - - CollData(UCollator *collator, char *cacheKey, int32_t cachekeyLength, UErrorCode &status); - ~CollData(); - - CollData(); - - static char *getCollatorKey(UCollator *collator, char *buffer, int32_t bufferLength); - - static CollDataCache *getCollDataCache(); - UCollator *coll; - StringToCEsMap *charsToCEList; CEToStringsMap *ceToCharsStartingWith; - char keyBuffer[KEY_BUFFER_SIZE]; - char *key; - - static CollDataCache *collDataCache; - uint32_t minHan; uint32_t maxHan; uint32_t jamoLimits[4]; }; -#endif /* U_HIDE_INTERNAL_API */ - -U_NAMESPACE_END #endif // #if !UCONFIG_NO_COLLATION #endif // #ifndef COLL_DATA_H diff --git a/icu4c/source/test/intltest/intltest.vcxproj b/icu4c/source/test/intltest/intltest.vcxproj index f93621b1d5f..2820333e97b 100644 --- a/icu4c/source/test/intltest/intltest.vcxproj +++ b/icu4c/source/test/intltest/intltest.vcxproj @@ -1,4 +1,4 @@ - + @@ -224,6 +224,7 @@ + @@ -392,6 +393,7 @@ + @@ -537,4 +539,4 @@ - + \ No newline at end of file diff --git a/icu4c/source/test/intltest/intltest.vcxproj.filters b/icu4c/source/test/intltest/intltest.vcxproj.filters index 5b9196e2f71..fdd84ad0a0e 100644 --- a/icu4c/source/test/intltest/intltest.vcxproj.filters +++ b/icu4c/source/test/intltest/intltest.vcxproj.filters @@ -450,6 +450,10 @@ collation + + + collation + @@ -818,5 +822,9 @@ collation + + + collation + - + \ No newline at end of file diff --git a/icu4c/source/test/intltest/ssearch.cpp b/icu4c/source/test/intltest/ssearch.cpp index deb43a7c94a..6d303ca5e38 100644 --- a/icu4c/source/test/intltest/ssearch.cpp +++ b/icu4c/source/test/intltest/ssearch.cpp @@ -5,43 +5,28 @@ ********************************************************************** */ - #include "unicode/utypes.h" #if !UCONFIG_NO_COLLATION -#include "unicode/unistr.h" -#include "unicode/putil.h" -#include "unicode/usearch.h" - #include "cmemory.h" -#include "unicode/coll.h" -#include "unicode/tblcoll.h" -#include "unicode/coleitr.h" -#include "unicode/ucoleitr.h" - -#include "unicode/regex.h" // TODO: make conditional on regexp being built. - -#include "unicode/uniset.h" -#include "unicode/uset.h" -#include "unicode/ustring.h" -#include "hash.h" -#include "uhash.h" +#include "cstring.h" #include "ucol_imp.h" -#include "intltest.h" +#include "unicode/coll.h" +#include "unicode/tblcoll.h" +#include "unicode/usearch.h" +#include "unicode/uset.h" +#include "unicode/ustring.h" + +#include "unicode/coleitr.h" +#include "unicode/regex.h" // TODO: make conditional on regexp being built. + +#include "colldata.h" #include "ssearch.h" - -#include "unicode/colldata.h" -#include "unicode/bmsearch.h" -#include "unicode/bms.h" - #include "xmlparser.h" -#include "ucbuf.h" -#include -#include -#include +#include // for sprintf char testId[100]; @@ -89,36 +74,17 @@ void SSearchTest::runIndexedTest( int32_t index, UBool exec, const char* &name, if (exec) monkeyTest(params); break; - case 3: name = "bmMonkeyTest"; - if (exec) bmMonkeyTest(params); + case 3: name = "sharpSTest"; + if (exec) sharpSTest(); break; - case 4: name = "boyerMooreTest"; - if (exec) boyerMooreTest(); - break; - - case 5: name = "goodSuffixTest"; + case 4: name = "goodSuffixTest"; if (exec) goodSuffixTest(); break; - case 6: name = "searchTime"; + case 5: name = "searchTime"; if (exec) searchTime(); break; - - case 7: name = "bmsTest"; - if (exec) bmsTest(); - break; - - case 8: name = "bmSearchTest"; - if (exec) bmSearchTest(); - break; - - case 9: name = "udhrTest"; - if (exec) udhrTest(); - break; - case 10: name = "stringListTest"; - if (exec) stringListTest(); - break; #endif default: name = ""; break; //needed to end loop @@ -354,323 +320,6 @@ void SSearchTest::searchTest() #endif } -struct UdhrTestCase -{ - const char *locale; - const char *file; -}; - -void SSearchTest::udhrTest() -{ - UErrorCode status = U_ZERO_ERROR; - char path[PATH_BUFFER_SIZE]; - const char *udhrPath = getPath(path, "udhr"); - - if (udhrPath == NULL) { - // couldn't get path: error message already output... - return; - } - - UdhrTestCase testCases[] = { - {"en", "udhr_eng.txt"}, - {"de", "udhr_deu_1996.txt"}, - {"fr", "udhr_fra.txt"}, - {"ru", "udhr_rus.txt"}, - {"th", "udhr_tha.txt"}, - {"ja", "udhr_jpn.txt"}, - {"ko", "udhr_kor.txt"}, - {"zh", "udhr_cmn_hans.txt"}, - {"zh_Hant", "udhr_cmn_hant.txt"} - }; - - int32_t testCount = ARRAY_SIZE(testCases); - - for (int32_t t = 0; t < testCount; t += 1) { - int32_t len = 0; - char *resolvedFileName = NULL; - const char *encoding = NULL; - UCHARBUF *ucharBuf = NULL; - - ucbuf_resolveFileName(udhrPath, testCases[t].file, NULL, &len, &status); - resolvedFileName = NEW_ARRAY(char, len); - - if(resolvedFileName == NULL){ - continue; - } - - if(status == U_BUFFER_OVERFLOW_ERROR){ - status = U_ZERO_ERROR; - } - - ucbuf_resolveFileName(udhrPath, testCases[t].file, resolvedFileName, &len, &status); - ucharBuf = ucbuf_open(resolvedFileName, &encoding, TRUE, FALSE, &status); - - DELETE_ARRAY(resolvedFileName); - - if(U_FAILURE(status)){ - infoln("Could not open the input file %s. Test skipped\n", testCases[t].file); - continue; - } - - int32_t targetLen = 0; - const UChar *target = ucbuf_getBuffer(ucharBuf, &targetLen, &status); - - /* The first line of the file contains the pattern */ - int32_t start = 0, end = 0, plen = 0; - - for(end = start; ; end += 1) { - UChar ch = target[end]; - - if (ch == 0x000A || ch == 0x000D || ch == 0x2028) { - break; - } - } - - plen = end - start; - - UChar *pattern = NEW_ARRAY(UChar, plen); - for (int32_t i = 0; i < plen; i += 1) { - pattern[i] = target[start++]; - } - - int32_t offset = 0; - UCollator *coll = ucol_open(testCases[t].locale, &status); - UCD *ucd = NULL; - BMS *bms = NULL; - - if (U_FAILURE(status)) { - errln("Could not open collator for %s", testCases[t].locale); - goto delete_collator; - } - - ucd = ucd_open(coll, &status); - - if (U_FAILURE(status)) { - errln("Could not open CollData object for %s", testCases[t].locale); - goto delete_ucd; - } - - bms = bms_open(ucd, pattern, plen, target, targetLen, &status); - - if (U_FAILURE(status)) { - errln("Could not open search object for %s", testCases[t].locale); - goto delete_bms; - } - - start = end = -1; - while (bms_search(bms, offset, &start, &end)) { - offset = end; - } - - if (offset == 0) { - errln("Could not find pattern - locale: %s, file: %s ", testCases[t].locale, testCases[t].file); - } - -delete_bms: - bms_close(bms); - -delete_ucd: - ucd_close(ucd); - -delete_collator: - ucol_close(coll); - - DELETE_ARRAY(pattern); - ucbuf_close(ucharBuf); - } - - ucd_flushCache(); -} - -void SSearchTest::bmSearchTest() -{ -#if !UCONFIG_NO_REGULAR_EXPRESSIONS - UErrorCode status = U_ZERO_ERROR; - char path[PATH_BUFFER_SIZE]; - const char *testFilePath = getPath(path, "ssearch.xml"); - - if (testFilePath == NULL) { - return; /* Couldn't get path: error message already output. */ - } - - UXMLParser *parser = UXMLParser::createParser(status); - TEST_ASSERT_SUCCESS(status); - UXMLElement *root = parser->parseFile(testFilePath, status); - TEST_ASSERT_SUCCESS(status); - if (U_FAILURE(status)) { - return; - } - - const UnicodeString *debugTestCase = root->getAttribute("debug"); - if (debugTestCase != NULL) { -// setenv("USEARCH_DEBUG", "1", 1); - } - - - const UXMLElement *testCase; - int32_t tc = 0; - - while((testCase = root->nextChildElement(tc)) != NULL) { - - if (testCase->getTagName().compare("test-case") != 0) { - errln("ssearch, unrecognized XML Element in test file"); - continue; - } - const UnicodeString *id = testCase->getAttribute("id"); - *testId = 0; - if (id != NULL) { - id->extract(0, id->length(), testId, sizeof(testId), US_INV); - } - - // If debugging test case has been specified and this is not it, skip to next. - if (id!=NULL && debugTestCase!=NULL && *id != *debugTestCase) { - continue; - } - // - // Get the requested collation strength. - // Default is tertiary if the XML attribute is missing from the test case. - // - const UnicodeString *strength = testCase->getAttribute("strength"); - UColAttributeValue collatorStrength = UCOL_PRIMARY; - if (strength==NULL) { collatorStrength = UCOL_TERTIARY;} - else if (*strength=="PRIMARY") { collatorStrength = UCOL_PRIMARY;} - else if (*strength=="SECONDARY") { collatorStrength = UCOL_SECONDARY;} - else if (*strength=="TERTIARY") { collatorStrength = UCOL_TERTIARY;} - else if (*strength=="QUATERNARY") { collatorStrength = UCOL_QUATERNARY;} - else if (*strength=="IDENTICAL") { collatorStrength = UCOL_IDENTICAL;} - else { - // Bogus value supplied for strength. Shouldn't happen, even from - // typos, if the XML source has been validated. - // This assert is a little deceiving in that strength can be - // any of the allowed values, not just TERTIARY, but it will - // do the job of getting the error output. - TEST_ASSERT(*strength=="TERTIARY") - } - - // - // Get the collator normalization flag. Default is UCOL_OFF. - // - UColAttributeValue normalize = UCOL_OFF; - const UnicodeString *norm = testCase->getAttribute("norm"); - TEST_ASSERT (norm==NULL || *norm=="ON" || *norm=="OFF"); - if (norm!=NULL && *norm=="ON") { - normalize = UCOL_ON; - } - - // - // Get the alternate_handling flag. Default is UCOL_NON_IGNORABLE. - // - UColAttributeValue alternateHandling = UCOL_NON_IGNORABLE; - const UnicodeString *alt = testCase->getAttribute("alternate_handling"); - TEST_ASSERT (alt == NULL || *alt == "SHIFTED" || *alt == "NON_IGNORABLE"); - if (alt != NULL && *alt == "SHIFTED") { - alternateHandling = UCOL_SHIFTED; - } - - const UnicodeString defLocale("en"); - char clocale[100]; - const UnicodeString *locale = testCase->getAttribute("locale"); - if (locale == NULL || locale->length()==0) { - locale = &defLocale; - }; - locale->extract(0, locale->length(), clocale, sizeof(clocale), NULL); - - - UnicodeString text; - UnicodeString target; - UnicodeString pattern; - int32_t expectedMatchStart = -1; - int32_t expectedMatchLimit = -1; - const UXMLElement *n; - int32_t nodeCount = 0; - - n = testCase->getChildElement("pattern"); - TEST_ASSERT(n != NULL); - if (n==NULL) { - continue; - } - text = n->getText(FALSE); - text = text.unescape(); - pattern.append(text); - nodeCount++; - - n = testCase->getChildElement("pre"); - if (n!=NULL) { - text = n->getText(FALSE); - text = text.unescape(); - target.append(text); - nodeCount++; - } - - n = testCase->getChildElement("m"); - if (n!=NULL) { - expectedMatchStart = target.length(); - text = n->getText(FALSE); - text = text.unescape(); - target.append(text); - expectedMatchLimit = target.length(); - nodeCount++; - } - - n = testCase->getChildElement("post"); - if (n!=NULL) { - text = n->getText(FALSE); - text = text.unescape(); - target.append(text); - nodeCount++; - } - - // Check that there weren't extra things in the XML - TEST_ASSERT(nodeCount == testCase->countChildren()); - - // Open a collator and StringSearch based on the parameters - // obtained from the XML. - // - status = U_ZERO_ERROR; - UCollator *collator = ucol_open(clocale, &status); - ucol_setStrength(collator, collatorStrength); - ucol_setAttribute(collator, UCOL_NORMALIZATION_MODE, normalize, &status); - ucol_setAttribute(collator, UCOL_ALTERNATE_HANDLING, alternateHandling, &status); - UCD *ucd = ucd_open(collator, &status); - BMS *bms = bms_open(ucd, pattern.getBuffer(), pattern.length(), target.getBuffer(), target.length(), &status); - - TEST_ASSERT_SUCCESS(status); - if (U_FAILURE(status)) { - bms_close(bms); - ucd_close(ucd); - ucol_close(collator); - continue; - } - - int32_t foundStart = 0; - int32_t foundLimit = 0; - UBool foundMatch; - - // - // Do the search, check the match result against the expected results. - // - foundMatch = bms_search(bms, 0, &foundStart, &foundLimit); - //TEST_ASSERT_SUCCESS(status); - if ((foundMatch && expectedMatchStart < 0) || - (foundStart != expectedMatchStart) || - (foundLimit != expectedMatchLimit)) { - TEST_ASSERT(FALSE); // ouput generic error position - infoln("Found, expected match start = %d, %d \n" - "Found, expected match limit = %d, %d", - foundStart, expectedMatchStart, foundLimit, expectedMatchLimit); - } - - bms_close(bms); - ucd_close(ucd); - ucol_close(collator); - } - - ucd_flushCache(); - delete root; - delete parser; -#endif -} - struct Order { int32_t order; @@ -1073,375 +722,60 @@ static UnicodeString &escape(const UnicodeString &string, UnicodeString &buffer) } #endif -#if 1 - -struct PCE -{ - uint64_t ce; - int32_t lowOffset; - int32_t highOffset; -}; - -class PCEList -{ -public: - PCEList(UCollator *coll, const UnicodeString &string); - ~PCEList(); - - int32_t size() const; - - const PCE *get(int32_t index) const; - - int32_t getLowOffset(int32_t index) const; - int32_t getHighOffset(int32_t index) const; - uint64_t getOrder(int32_t index) const; - - UBool matchesAt(int32_t offset, const PCEList &other) const; - - uint64_t operator[](int32_t index) const; - -private: - void add(uint64_t ce, int32_t low, int32_t high); - - PCE *list; - int32_t listMax; - int32_t listSize; -}; - -PCEList::PCEList(UCollator *coll, const UnicodeString &string) -{ - UErrorCode status = U_ZERO_ERROR; - UCollationElements *elems = ucol_openElements(coll, string.getBuffer(), string.length(), &status); - uint64_t order; - int32_t low, high; - - list = new PCE[listMax]; - - ucol_setOffset(elems, 0, &status); - - do { - order = ucol_nextProcessed(elems, &low, &high, &status); - add(order, low, high); - } while (order != UCOL_PROCESSED_NULLORDER); - - ucol_closeElements(elems); -} - -PCEList::~PCEList() -{ - delete[] list; -} - -void PCEList::add(uint64_t order, int32_t low, int32_t high) -{ - if (listSize >= listMax) { - listMax *= 2; - - PCE *newList = new PCE[listMax]; - - uprv_memcpy(newList, list, listSize * sizeof(Order)); - delete[] list; - list = newList; - } - - list[listSize].ce = order; - list[listSize].lowOffset = low; - list[listSize].highOffset = high; - - listSize += 1; -} - -const PCE *PCEList::get(int32_t index) const -{ - if (index >= listSize) { - return NULL; - } - - return &list[index]; -} - -int32_t PCEList::getLowOffset(int32_t index) const -{ - const PCE *pce = get(index); - - if (pce != NULL) { - return pce->lowOffset; - } - - return -1; -} - -int32_t PCEList::getHighOffset(int32_t index) const -{ - const PCE *pce = get(index); - - if (pce != NULL) { - return pce->highOffset; - } - - return -1; -} - -uint64_t PCEList::getOrder(int32_t index) const -{ - const PCE *pce = get(index); - - if (pce != NULL) { - return pce->ce; - } - - return UCOL_PROCESSED_NULLORDER; -} - -int32_t PCEList::size() const -{ - return listSize; -} - -UBool PCEList::matchesAt(int32_t offset, const PCEList &other) const -{ - // NOTE: sizes include the NULLORDER, which we don't want to compare. - int32_t otherSize = other.size() - 1; - - if (listSize - 1 - offset < otherSize) { - return FALSE; - } - - for (int32_t i = offset, j = 0; j < otherSize; i += 1, j += 1) { - if (getOrder(i) != other.getOrder(j)) { - return FALSE; - } - } - - return TRUE; -} - -uint64_t PCEList::operator[](int32_t index) const -{ - return getOrder(index); -} - -void SSearchTest::boyerMooreTest() +void SSearchTest::sharpSTest() { UErrorCode status = U_ZERO_ERROR; UCollator *coll = NULL; - CollData *data = NULL; - const CEList* ce = NULL; - const CEList* ce1 = NULL; UnicodeString lp = "fuss"; UnicodeString sp = "fu\\u00DF"; - BoyerMooreSearch *longPattern = NULL; - BoyerMooreSearch *shortPattern = NULL; UnicodeString targets[] = {"fu\\u00DF", "fu\\u00DFball", "1fu\\u00DFball", "12fu\\u00DFball", "123fu\\u00DFball", "1234fu\\u00DFball", "ffu\\u00DF", "fufu\\u00DF", "fusfu\\u00DF", "fuss", "ffuss", "fufuss", "fusfuss", "1fuss", "12fuss", "123fuss", "1234fuss", "fu\\u00DF", "1fu\\u00DF", "12fu\\u00DF", "123fu\\u00DF", "1234fu\\u00DF"}; int32_t start = -1, end = -1; coll = ucol_openFromShortString("LEN_S1", FALSE, NULL, &status); - if (U_FAILURE(status)) { - errcheckln(status, "Could not open collator. - %s", u_errorName(status)); - return; - } + TEST_ASSERT_SUCCESS(status); - data = CollData::open(coll, status); - if (U_FAILURE(status)) { - errln("Could not open CollData object."); - goto close_data; - } + UnicodeString lpUnescaped = lp.unescape(); + UnicodeString spUnescaped = sp.unescape(); - data->getDynamicClassID(); - if (U_FAILURE(status)) { - errln("Could not get dynamic class ID of CollData."); - goto close_patterns; - } + LocalUStringSearchPointer ussLong(usearch_openFromCollator(lpUnescaped.getBuffer(), lpUnescaped.length(), + lpUnescaped.getBuffer(), lpUnescaped.length(), // actual test data will be set later + coll, + NULL, // the break iterator + &status)); - data->getStaticClassID(); - if (U_FAILURE(status)) { - errln("Could not get static class ID of CollData."); - goto close_patterns; - } - - longPattern = new BoyerMooreSearch(data, lp.unescape(), NULL, status); - shortPattern = new BoyerMooreSearch(data, sp.unescape(), NULL, status); - if (U_FAILURE(status)) { - errln("Could not create pattern objects."); - goto close_patterns; - } - - longPattern->getBadCharacterTable(); - shortPattern->getBadCharacterTable(); - if (U_FAILURE(status)) { - errln("Could not get bad character table."); - goto close_patterns; - } - - longPattern->getGoodSuffixTable(); - shortPattern->getGoodSuffixTable(); - if (U_FAILURE(status)) { - errln("Could not get good suffix table."); - goto close_patterns; - } - - longPattern->getDynamicClassID(); - shortPattern->getDynamicClassID(); - if (U_FAILURE(status)) { - errln("Could not get dynamic class ID of BoyerMooreSearch."); - goto close_patterns; - } - - longPattern->getStaticClassID(); - shortPattern->getStaticClassID(); - if (U_FAILURE(status)) { - errln("Could not get static class ID of BoyerMooreSearch."); - goto close_patterns; - } - - longPattern->getData(); - shortPattern->getData(); - if (U_FAILURE(status)) { - errln("Could not get collate data."); - goto close_patterns; - } - - ce = longPattern->getPatternCEs(); - ce1 = shortPattern->getPatternCEs(); - if (U_FAILURE(status)) { - errln("Could not get pattern CEs."); - goto close_patterns; - } - - ce->getDynamicClassID(); - ce1->getDynamicClassID(); - if (U_FAILURE(status)) { - errln("Could not get dynamic class ID of CEList."); - goto close_patterns; - } - - ce->getStaticClassID(); - ce1->getStaticClassID(); - if (U_FAILURE(status)) { - errln("Could not get static class ID of CEList."); - goto close_patterns; - } - - if(data->minLengthInChars(ce,0) != 3){ - errln("Minimal Length in Characters for 'data' with 'ce' was suppose to give 3."); - goto close_patterns; - } - - if(data->minLengthInChars(ce1,0) != 3){ - errln("Minimal Length in Characters for 'data' with 'ce1' was suppose to give 3."); - goto close_patterns; - } + LocalUStringSearchPointer ussShort(usearch_openFromCollator(spUnescaped.getBuffer(), spUnescaped.length(), + spUnescaped.getBuffer(), spUnescaped.length(), // actual test data will be set later + coll, + NULL, // the break iterator + &status)); + TEST_ASSERT_SUCCESS(status); for (uint32_t t = 0; t < (sizeof(targets)/sizeof(targets[0])); t += 1) { + UBool bFound; UnicodeString target = targets[t].unescape(); - longPattern->setTargetString(&target, status); - if (longPattern->search(0, start, end)) { + start = end = -1; + usearch_setText(ussLong.getAlias(), target.getBuffer(), target.length(), &status); + bFound = usearch_search(ussLong.getAlias(), 0, &start, &end, &status); + TEST_ASSERT_SUCCESS(status); + if (bFound) { logln("Test %d: found long pattern at [%d, %d].", t, start, end); } else { errln("Test %d: did not find long pattern.", t); } - shortPattern->setTargetString(&target, status); - if (shortPattern->search(0, start, end)) { - logln("Test %d: found short pattern at [%d, %d].", t, start, end); - } else { - errln("Test %d: did not find short pattern.", t); - } - - if(longPattern->empty()){ - errln("Test %d: Long pattern should not have been empty."); - } - - if(shortPattern->empty()){ - errln("Test %d: Short pattern should not have been empty."); - } - } - -close_patterns: - delete shortPattern; - delete longPattern; - -close_data: - CollData::close(data); - ucol_close(coll); -} - -void SSearchTest::bmsTest() -{ - UErrorCode status = U_ZERO_ERROR; - UCollator *coll = NULL; - UCD *data = NULL; - UnicodeString lp = "fuss"; - UnicodeString lpu = lp.unescape(); - UnicodeString sp = "fu\\u00DF"; - UnicodeString spu = sp.unescape(); - BMS *longPattern = NULL; - BMS *shortPattern = NULL; - UnicodeString targets[] = {"fu\\u00DF", "fu\\u00DFball", "1fu\\u00DFball", "12fu\\u00DFball", "123fu\\u00DFball", "1234fu\\u00DFball", - "ffu\\u00DF", "fufu\\u00DF", "fusfu\\u00DF", - "fuss", "ffuss", "fufuss", "fusfuss", "1fuss", "12fuss", "123fuss", "1234fuss", "fu\\u00DF", "1fu\\u00DF", "12fu\\u00DF", "123fu\\u00DF", "1234fu\\u00DF"}; - int32_t start = -1, end = -1; - - coll = ucol_openFromShortString("LEN_S1", FALSE, NULL, &status); - if (U_FAILURE(status)) { - errcheckln(status, "Could not open collator. - %s", u_errorName(status)); - return; - } - - data = ucd_open(coll, &status); - if (U_FAILURE(status)) { - errln("Could not open CollData object."); - goto close_data; - } - - longPattern = bms_open(data, lpu.getBuffer(), lpu.length(), NULL, 0, &status); - shortPattern = bms_open(data, spu.getBuffer(), spu.length(), NULL, 0, &status); - if (U_FAILURE(status)) { - errln("Couldn't open pattern objects."); - goto close_patterns; - } - - for (uint32_t t = 0; t < (sizeof(targets)/sizeof(targets[0])); t += 1) { - UnicodeString target = targets[t].unescape(); - - bms_setTargetString(longPattern, target.getBuffer(), target.length(), &status); - if (bms_search(longPattern, 0, &start, &end)) { + usearch_setText(ussShort.getAlias(), target.getBuffer(), target.length(), &status); + bFound = usearch_search(ussShort.getAlias(), 0, &start, &end, &status); + TEST_ASSERT_SUCCESS(status); + if (bFound) { logln("Test %d: found long pattern at [%d, %d].", t, start, end); } else { errln("Test %d: did not find long pattern.", t); } - - bms_setTargetString(shortPattern, target.getBuffer(), target.length(), &status); - if (bms_search(shortPattern, 0, &start, &end)) { - logln("Test %d: found short pattern at [%d, %d].", t, start, end); - } else { - errln("Test %d: did not find short pattern.", t); - } } - /* Add better coverage for bms code. */ - if(bms_empty(longPattern)) { - errln("FAIL: longgPattern is empty."); - } - - if (!bms_getData(longPattern)) { - errln("FAIL: bms_getData returned NULL."); - } - - if (!ucd_getCollator(data)) { - errln("FAIL: ucd_getCollator returned NULL."); - } - -close_patterns: - bms_close(shortPattern); - bms_close(longPattern); - -close_data: - ucd_close(data); - ucd_freeCache(); ucol_close(coll); } @@ -1449,41 +783,29 @@ void SSearchTest::goodSuffixTest() { UErrorCode status = U_ZERO_ERROR; UCollator *coll = NULL; - CollData *data = NULL; UnicodeString pat = /*"gcagagag"*/ "fxeld"; UnicodeString target = /*"gcatcgcagagagtatacagtacg"*/ "cloveldfxeld"; - BoyerMooreSearch *pattern = NULL; int32_t start = -1, end = -1; + UBool bFound; coll = ucol_open(NULL, &status); - if (U_FAILURE(status)) { - errcheckln(status, "Couldn't open collator. - %s", u_errorName(status)); - return; - } + TEST_ASSERT_SUCCESS(status); - data = CollData::open(coll, status); - if (U_FAILURE(status)) { - errln("Couldn't open CollData object."); - goto close_data; - } + LocalUStringSearchPointer ss(usearch_openFromCollator(pat.getBuffer(), pat.length(), + target.getBuffer(), target.length(), + coll, + NULL, // the break iterator + &status)); + TEST_ASSERT_SUCCESS(status); - pattern = new BoyerMooreSearch(data, pat, &target, status); - if (U_FAILURE(status)) { - errln("Couldn't open pattern object."); - goto close_pattern; - } - - if (pattern->search(0, start, end)) { + bFound = usearch_search(ss.getAlias(), 0, &start, &end, &status); + TEST_ASSERT_SUCCESS(status); + if (bFound) { logln("Found pattern at [%d, %d].", start, end); } else { errln("Did not find pattern."); } -close_pattern: - delete pattern; - -close_data: - CollData::close(data); ucol_close(coll); } @@ -1591,7 +913,6 @@ void SSearchTest::searchTime() { "Neither to been y-buried nor y-brent,\n" "But maketh houndes ete hem in despyt. zet'\n"; -#define TEST_BOYER_MOORE 1 const char *cPattern = "maketh houndes ete hem"; //const char *cPattern = "Whylom"; //const char *cPattern = "zet"; @@ -1601,25 +922,15 @@ const char *cPattern = "maketh houndes ete hem"; LocalUCollatorPointer collator(ucol_open("en", &status)); - CollData *data = CollData::open(collator.getAlias(), status); - if (U_FAILURE(status) || collator.isNull() || data == NULL) { - errcheckln(status, "Unable to open UCollator or CollData. - %s", u_errorName(status)); - return; - } //ucol_setStrength(collator.getAlias(), collatorStrength); //ucol_setAttribute(collator.getAlias(), UCOL_NORMALIZATION_MODE, normalize, &status); UnicodeString uPattern = cPattern; -#ifndef TEST_BOYER_MOORE LocalUStringSearchPointer uss(usearch_openFromCollator(uPattern.getBuffer(), uPattern.length(), target.getBuffer(), target.length(), collator.getAlias(), NULL, // the break iterator &status)); TEST_ASSERT_SUCCESS(status); -#else - BoyerMooreSearch bms(data, uPattern, &target, status); - TEST_ASSERT_SUCCESS(status); -#endif // int32_t foundStart; // int32_t foundEnd; @@ -1631,12 +942,8 @@ const char *cPattern = "maketh houndes ete hem"; int32_t refMatchPos = (int32_t)(pm - longishText); int32_t icuMatchPos; int32_t icuMatchEnd; -#ifndef TEST_BOYER_MOORE usearch_search(uss.getAlias(), 0, &icuMatchPos, &icuMatchEnd, &status); TEST_ASSERT_SUCCESS(status); -#else - found = bms.search(0, icuMatchPos, icuMatchEnd); -#endif TEST_ASSERT_M(refMatchPos == icuMatchPos, "strstr and icu give different match positions."); int32_t i; @@ -1645,11 +952,7 @@ const char *cPattern = "maketh houndes ete hem"; // Try loopcounts around 100000 to some millions, depending on the operation, // to get runtimes of at least several seconds. for (i=0; i<10000; i++) { -#ifndef TEST_BOYER_MOORE found = usearch_search(uss.getAlias(), 0, &icuMatchPos, &icuMatchEnd, &status); -#else - found = bms.search(0, icuMatchPos, icuMatchEnd); -#endif //TEST_ASSERT_SUCCESS(status); //TEST_ASSERT(found); @@ -1663,11 +966,7 @@ const char *cPattern = "maketh houndes ete hem"; } //printf("%ld, %d\n", pm-longishText, j); -#ifdef TEST_BOYER_MOORE - CollData::close(data); -#endif } -#endif //---------------------------------------------------------------------------------------- // @@ -1878,83 +1177,6 @@ static void generateTestCase(UCollator *coll, Monkey *monkeys[], int32_t monkeyC } while (! matches); } -// -// Find the next acceptable boundary following the specified starting index -// in the target text being searched. -// TODO: refine what is an acceptable boundary. For the moment, -// choose the next position not within a combining sequence. -// -#if 0 -static int32_t nextBoundaryAfter(const UnicodeString &string, int32_t startIndex) { - const UChar *text = string.getBuffer(); - int32_t textLen = string.length(); - - if (startIndex >= textLen) { - return startIndex; - } - - UChar32 c; - int32_t i = startIndex; - - U16_NEXT(text, i, textLen, c); - - // If we are on a control character, stop without looking for combining marks. - // Control characters do not combine. - int32_t gcProperty = u_getIntPropertyValue(c, UCHAR_GRAPHEME_CLUSTER_BREAK); - if (gcProperty==U_GCB_CONTROL || gcProperty==U_GCB_LF || gcProperty==U_GCB_CR) { - return i; - } - - // The initial character was not a control, and can thus accept trailing - // combining characters. Advance over however many of them there are. - int32_t indexOfLastCharChecked; - - for (;;) { - indexOfLastCharChecked = i; - - if (i>=textLen) { - break; - } - - U16_NEXT(text, i, textLen, c); - gcProperty = u_getIntPropertyValue(c, UCHAR_GRAPHEME_CLUSTER_BREAK); - - if (gcProperty != U_GCB_EXTEND && gcProperty != U_GCB_SPACING_MARK) { - break; - } - } - - return indexOfLastCharChecked; -} -#endif - -#if 0 -static UBool isInCombiningSequence(const UnicodeString &string, int32_t index) { - const UChar *text = string.getBuffer(); - int32_t textLen = string.length(); - - if (index>=textLen || index<=0) { - return FALSE; - } - - // If the character at the current index is not a GRAPHEME_EXTEND - // then we can not be within a combining sequence. - UChar32 c; - U16_GET(text, 0, index, textLen, c); - int32_t gcProperty = u_getIntPropertyValue(c, UCHAR_GRAPHEME_CLUSTER_BREAK); - if (gcProperty != U_GCB_EXTEND && gcProperty != U_GCB_SPACING_MARK) { - return FALSE; - } - - // We are at a combining mark. If the preceding character is anything - // except a CONTROL, CR or LF, we are in a combining sequence. - U16_PREV(text, 0, index, c); - gcProperty = u_getIntPropertyValue(c, UCHAR_GRAPHEME_CLUSTER_BREAK); - - return !(gcProperty==U_GCB_CONTROL || gcProperty==U_GCB_LF || gcProperty==U_GCB_CR); -} -#endif - static UBool simpleSearch(UCollator *coll, const UnicodeString &target, int32_t offset, const UnicodeString &pattern, int32_t &matchStart, int32_t &matchEnd) { UErrorCode status = U_ZERO_ERROR; @@ -2065,7 +1287,7 @@ static int32_t getIntParam(UnicodeString name, UnicodeString ¶ms, int32_t d } params.extract(m.start(1, status), paramLength, valString, sizeof(valString)); - val = strtol(valString, NULL, 10); + val = uprv_strtol(valString, NULL, 10); // Delete this parameter from the params string. m.reset(); @@ -2128,54 +1350,6 @@ int32_t SSearchTest::monkeyTestCase(UCollator *coll, const UnicodeString &testCa notFoundCount += 1; } - return notFoundCount; -} - -int32_t SSearchTest::bmMonkeyTestCase(UCollator *coll, const UnicodeString &testCase, const UnicodeString &pattern, const UnicodeString &altPattern, - BoyerMooreSearch *bms, BoyerMooreSearch *abms, - const char *name, const char *strength, uint32_t seed) -{ - UErrorCode status = U_ZERO_ERROR; - int32_t actualStart = -1, actualEnd = -1; - //int32_t expectedStart = prefix.length(), expectedEnd = prefix.length() + altPattern.length(); - int32_t expectedStart = -1, expectedEnd = -1; - int32_t notFoundCount = 0; - - // **** TODO: find *all* matches, not just first one **** - simpleSearch(coll, testCase, 0, pattern, expectedStart, expectedEnd); - - bms->setTargetString(&testCase, status); - bms->search(0, actualStart, actualEnd); - - if (expectedStart >= 0 && (actualStart != expectedStart || actualEnd != expectedEnd)) { - errln("Boyer-Moore Search for in <%s> failed: expected [%d, %d], got [%d, %d]\n" - " strength=%s seed=%d", - name, expectedStart, expectedEnd, actualStart, actualEnd, strength, seed); - errln(UNICODE_STRING_SIMPLE(" : ") + prettify(pattern)); - } - - if (expectedStart == -1 && actualStart == -1) { - notFoundCount += 1; - } - - // **** TODO: find *all* matches, not just first one **** - simpleSearch(coll, testCase, 0, altPattern, expectedStart, expectedEnd); - - abms->setTargetString(&testCase, status); - abms->search(0, actualStart, actualEnd); - - if (expectedStart >= 0 && (actualStart != expectedStart || actualEnd != expectedEnd)) { - errln("Boyer-Moore Search for in <%s> failed: expected [%d, %d], got [%d, %d]\n" - " strength=%s seed=%d", - name, expectedStart, expectedEnd, actualStart, actualEnd, strength, seed); - errln(UNICODE_STRING_SIMPLE(" : ") + prettify(altPattern)); - } - - if (expectedStart == -1 && actualStart == -1) { - notFoundCount += 1; - } - - return notFoundCount; } #endif @@ -2192,7 +1366,7 @@ void SSearchTest::monkeyTest(char *params) return; } - CollData *monkeyData = CollData::open(coll, status); + CollData *monkeyData = new CollData(coll, status); USet *expansions = uset_openEmpty(); USet *contractions = uset_openEmpty(); @@ -2312,203 +1486,11 @@ void SSearchTest::monkeyTest(char *params) uset_close(contractions); uset_close(expansions); uset_close(letters); - - CollData::close(monkeyData); + delete monkeyData; ucol_close(coll); } -void SSearchTest::bmMonkeyTest(char *params) -{ - static const UChar skipChars[] = { 0x0E40, 0x0E41, 0x0E42, 0x0E43, 0x0E44, 0xAAB5, 0xAAB6, 0xAAB9, 0xAABB, 0xAABC, 0 }; // for timebomb - // ook! - UErrorCode status = U_ZERO_ERROR; - UCollator *coll = ucol_openFromShortString("LEN_S1", FALSE, NULL, &status); - - if (U_FAILURE(status)) { - errcheckln(status, "Failed to create collator in MonkeyTest! - %s", u_errorName(status)); - return; - } - - CollData *monkeyData = CollData::open(coll, status); - - USet *expansions = uset_openEmpty(); - USet *contractions = uset_openEmpty(); - - ucol_getContractionsAndExpansions(coll, contractions, expansions, FALSE, &status); - - U_STRING_DECL(letter_pattern, "[[:letter:]-[:ideographic:]-[:hangul:]]", 39); - U_STRING_INIT(letter_pattern, "[[:letter:]-[:ideographic:]-[:hangul:]]", 39); - USet *letters = uset_openPattern(letter_pattern, 39, &status); - SetMonkey letterMonkey(letters); - StringSetMonkey contractionMonkey(contractions, coll, monkeyData); - StringSetMonkey expansionMonkey(expansions, coll, monkeyData); - UnicodeString testCase; - UnicodeString alternate; - UnicodeString pattern, altPattern; - UnicodeString prefix, altPrefix; - UnicodeString suffix, altSuffix; - - Monkey *monkeys[] = { - &letterMonkey, - &contractionMonkey, - &expansionMonkey, - &contractionMonkey, - &expansionMonkey, - &contractionMonkey, - &expansionMonkey, - &contractionMonkey, - &expansionMonkey}; - int32_t monkeyCount = sizeof(monkeys) / sizeof(monkeys[0]); - // int32_t nonMatchCount = 0; - - UCollationStrength strengths[] = {UCOL_PRIMARY, UCOL_SECONDARY, UCOL_TERTIARY}; - const char *strengthNames[] = {"primary", "secondary", "tertiary"}; - int32_t strengthCount = sizeof(strengths) / sizeof(strengths[0]); - int32_t loopCount = quick? 1000 : 10000; - int32_t firstStrength = 0; - int32_t lastStrength = strengthCount - 1; //*/ 0; - - if (params != NULL) { -#if !UCONFIG_NO_REGULAR_EXPRESSIONS - UnicodeString p(params); - - loopCount = getIntParam("loop", p, loopCount); - m_seed = getIntParam("seed", p, m_seed); - - RegexMatcher m(" *strength *= *(primary|secondary|tertiary) *", p, 0, status); - if (m.find()) { - UnicodeString breakType = m.group(1, status); - - for (int32_t s = 0; s < strengthCount; s += 1) { - if (breakType == strengthNames[s]) { - firstStrength = lastStrength = s; - break; - } - } - - m.reset(); - p = m.replaceFirst("", status); - } - - if (RegexMatcher("\\S", p, 0, status).find()) { - // Each option is stripped out of the option string as it is processed. - // All options have been checked. The option string should have been completely emptied.. - char buf[100]; - p.extract(buf, sizeof(buf), NULL, status); - buf[sizeof(buf)-1] = 0; - errln("Unrecognized or extra parameter: %s\n", buf); - return; - } -#else - infoln("SSearchTest built with UCONFIG_NO_REGULAR_EXPRESSIONS: ignoring parameters."); -#endif - } - - for(int32_t s = firstStrength; s <= lastStrength; s += 1) { - int32_t notFoundCount = 0; - - logln("Setting strength to %s.", strengthNames[s]); - ucol_setStrength(coll, strengths[s]); - - CollData *data = CollData::open(coll, status); - - UnicodeSet skipSet; - if(isICUVersionBefore(51, 1)) { - // timebomb until ticket #9156 (was #8081) is resolved - UnicodeString skipString(skipChars); - skipSet.addAll(skipString); - } - if(isICUVersionBefore(51, 1)) { - // Time bomb until ticket #9490 is fixed. - skipSet.add(0x12327); - skipSet.add(0x1311b); - skipSet.add(0x1200d); - } - skipSet.freeze(); - // TODO: try alternate prefix and suffix too? - // TODO: alternates are only equal at primary strength. Is this OK? - for(int32_t t = 0; t < loopCount; t += 1) { - uint32_t seed = m_seed; - // int32_t nmc = 0; - - generateTestCase(coll, monkeys, monkeyCount, pattern, altPattern); - generateTestCase(coll, monkeys, monkeyCount, prefix, altPrefix); - generateTestCase(coll, monkeys, monkeyCount, suffix, altSuffix); - - if (skipSet.containsSome(pattern)) { - continue; // time bomb - } - - BoyerMooreSearch pat(data, pattern, NULL, status); - BoyerMooreSearch alt(data, altPattern, NULL, status); - - // **** need a better way to deal with this **** -#if 0 - if (pat.empty() || - alt.empty()) { - continue; - } -#endif - - // pattern - notFoundCount += bmMonkeyTestCase(coll, pattern, pattern, altPattern, &pat, &alt, "pattern", strengthNames[s], seed); - - testCase.remove(); - testCase.append(prefix); - testCase.append(/*alt*/pattern); - - // prefix + pattern - notFoundCount += bmMonkeyTestCase(coll, testCase, pattern, altPattern, &pat, &alt, "prefix + pattern", strengthNames[s], seed); - - testCase.append(suffix); - - // prefix + pattern + suffix - notFoundCount += bmMonkeyTestCase(coll, testCase, pattern, altPattern, &pat, &alt, "prefix + pattern + suffix", strengthNames[s], seed); - - testCase.remove(); - testCase.append(pattern); - testCase.append(suffix); - - // pattern + suffix - notFoundCount += bmMonkeyTestCase(coll, testCase, pattern, altPattern, &pat, &alt, "pattern + suffix", strengthNames[s], seed); - } - - CollData::close(data); - - logln("For strength %s the not found count is %d.", strengthNames[s], notFoundCount); - } - - uset_close(contractions); - uset_close(expansions); - uset_close(letters); - - CollData::close(monkeyData); - - ucol_close(coll); -} - -void SSearchTest::stringListTest(){ - UErrorCode status = U_ZERO_ERROR; - StringList *sl = new StringList(status); - if(U_FAILURE(status)){ - errln("ERROR: stringListTest: Could not start StringList"); - } - - const UChar chars[] = { - 0x0000 - }; - sl->add(chars, (int32_t) 0, status); - if(U_FAILURE(status)){ - errln("ERROR: stringListTest: StringList::add"); - } - - if(sl->getDynamicClassID() != StringList::getStaticClassID()){ - errln("ERROR: stringListTest: getDynamicClassID and getStaticClassID does not match"); - } - delete sl; -} - #endif #endif diff --git a/icu4c/source/test/intltest/ssearch.h b/icu4c/source/test/intltest/ssearch.h index e17f38019b4..ebaeec67829 100644 --- a/icu4c/source/test/intltest/ssearch.h +++ b/icu4c/source/test/intltest/ssearch.h @@ -1,6 +1,6 @@ /* ********************************************************************** - * Copyright (C) 2005-2009, International Business Machines + * Copyright (C) 2005-2012, International Business Machines * Corporation and others. All Rights Reserved. ********************************************************************** */ @@ -11,7 +11,6 @@ #include "unicode/utypes.h" #include "unicode/unistr.h" #include "unicode/ucol.h" -#include "unicode/bmsearch.h" #include "intltest.h" @@ -34,30 +33,17 @@ public: virtual void searchTest(); virtual void offsetTest(); virtual void monkeyTest(char *params); - - virtual void bmMonkeyTest(char *params); - virtual void boyerMooreTest(); + virtual void sharpSTest(); virtual void goodSuffixTest(); virtual void searchTime(); - - virtual void bmsTest(); - virtual void bmSearchTest(); - virtual void udhrTest(); - - virtual void stringListTest(); private: virtual const char *getPath(char buffer[2048], const char *filename); virtual int32_t monkeyTestCase(UCollator *coll, const UnicodeString &testCase, const UnicodeString &pattern, const UnicodeString &altPattern, const char *name, const char *strength, uint32_t seed); - - virtual int32_t bmMonkeyTestCase(UCollator *coll, const UnicodeString &testCase, const UnicodeString &pattern, const UnicodeString &altPattern, - BoyerMooreSearch *bms, BoyerMooreSearch *abms, - const char *name, const char *strength, uint32_t seed); #endif - }; #endif -#endif +#endif diff --git a/icu4c/source/test/perf/strsrchperf/strsrchperf.cpp b/icu4c/source/test/perf/strsrchperf/strsrchperf.cpp index 23d16d7de2a..1c70316ba96 100644 --- a/icu4c/source/test/perf/strsrchperf/strsrchperf.cpp +++ b/icu4c/source/test/perf/strsrchperf/strsrchperf.cpp @@ -1,6 +1,6 @@ /******************************************************************** * COPYRIGHT: - * Copyright (C) 2008-2009 IBM, Inc. All Rights Reserved. + * Copyright (C) 2008-2012 IBM, Inc. All Rights Reserved. * ********************************************************************/ /** @@ -14,13 +14,7 @@ StringSearchPerformanceTest::StringSearchPerformanceTest(int32_t argc, const char *argv[], UErrorCode &status) :UPerfTest(argc,argv,status){ int32_t start, end; - -#ifdef TEST_BOYER_MOORE_SEARCH - bms = NULL; -#else srch = NULL; -#endif - pttrn = NULL; if(status== U_ILLEGAL_ARGUMENT_ERROR || line_mode){ fprintf(stderr,gUsageString, "strsrchperf"); @@ -65,17 +59,8 @@ StringSearchPerformanceTest::StringSearchPerformanceTest(int32_t argc, const cha pttrn = temp; /* store word in pttrn */ #endif -#ifdef TEST_BOYER_MOORE_SEARCH - UnicodeString patternString(pttrn, pttrnLen); - UCollator *coll = ucol_open(locale, &status); - CollData *data = CollData::open(coll, status); - - targetString = new UnicodeString(src, srcLen); - bms = new BoyerMooreSearch(data, patternString, targetString, status); -#else /* Create the StringSearch object to be use in performance test. */ srch = usearch_open(pttrn, pttrnLen, src, srcLen, locale, NULL, &status); -#endif if(U_FAILURE(status)){ fprintf(stderr, "FAILED to create UPerfTest object. Error: %s\n", u_errorName(status)); @@ -85,23 +70,12 @@ StringSearchPerformanceTest::StringSearchPerformanceTest(int32_t argc, const cha } StringSearchPerformanceTest::~StringSearchPerformanceTest() { - CollData *data = bms->getData(); - UCollator *coll = data->getCollator(); - - delete bms; - delete targetString; - CollData::close(data); - ucol_close(coll); - if (pttrn != NULL) { free(pttrn); } - -#ifndef TEST_BOYER_MOORE_SEARCH if (srch != NULL) { usearch_close(srch); } -#endif } UPerfFunction* StringSearchPerformanceTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char *par) { @@ -117,20 +91,12 @@ UPerfFunction* StringSearchPerformanceTest::runIndexedTest(int32_t index, UBool } UPerfFunction* StringSearchPerformanceTest::Test_ICU_Forward_Search(){ -#ifdef TEST_BOYER_MOORE_SEARCH - StringSearchPerfFunction *func = new StringSearchPerfFunction(ICUForwardSearch, bms, src, srcLen, pttrn, pttrnLen); -#else StringSearchPerfFunction* func = new StringSearchPerfFunction(ICUForwardSearch, srch, src, srcLen, pttrn, pttrnLen); -#endif return func; } UPerfFunction* StringSearchPerformanceTest::Test_ICU_Backward_Search(){ -#ifdef TEST_BOYER_MOORE_SEARCH - StringSearchPerfFunction *func = new StringSearchPerfFunction(ICUBackwardSearch, bms, src, srcLen, pttrn, pttrnLen); -#else StringSearchPerfFunction* func = new StringSearchPerfFunction(ICUBackwardSearch, srch, src, srcLen, pttrn, pttrnLen); -#endif return func; } diff --git a/icu4c/source/test/perf/strsrchperf/strsrchperf.h b/icu4c/source/test/perf/strsrchperf/strsrchperf.h index 6f2281c5855..a68ab3a1b06 100644 --- a/icu4c/source/test/perf/strsrchperf/strsrchperf.h +++ b/icu4c/source/test/perf/strsrchperf/strsrchperf.h @@ -1,26 +1,17 @@ /******************************************************************** * COPYRIGHT: - * Copyright (C) 2008-2009 IBM, Inc. All Rights Reserved. + * Copyright (C) 2008-2012 IBM, Inc. All Rights Reserved. * ********************************************************************/ #ifndef _STRSRCHPERF_H #define _STRSRCHPERF_H -#include "unicode/ubrk.h" #include "unicode/usearch.h" -#include "unicode/colldata.h" -#include "unicode/bmsearch.h" #include "unicode/uperf.h" #include #include -#define TEST_BOYER_MOORE_SEARCH - -#ifdef TEST_BOYER_MOORE_SEARCH -typedef void (*StrSrchFn) (BoyerMooreSearch * bms, const UChar *src, int32_t srcLen, const UChar *pttrn, int32_t pttrnLen, UErrorCode *status); -#else typedef void (*StrSrchFn)(UStringSearch* srch, const UChar* src,int32_t srcLen, const UChar* pttrn, int32_t pttrnLen, UErrorCode* status); -#endif class StringSearchPerfFunction : public UPerfFunction { private: @@ -29,39 +20,17 @@ private: int32_t srcLen; const UChar* pttrn; int32_t pttrnLen; -#ifdef TEST_BOYER_MOORE_SEARCH - BoyerMooreSearch *bms; -#else UStringSearch* srch; -#endif public: virtual void call(UErrorCode* status) { -#ifdef TEST_BOYER_MOORE_SEARCH - (*fn)(bms, src, srcLen, pttrn, pttrnLen, status); -#else (*fn)(srch, src, srcLen, pttrn, pttrnLen, status); -#endif } virtual long getOperationsPerIteration() { -#if 0 - return (long)(srcLen/pttrnLen); -#else return (long) srcLen; -#endif } -#ifdef TEST_BOYER_MOORE_SEARCH - StringSearchPerfFunction(StrSrchFn func, BoyerMooreSearch *search, const UChar *source, int32_t sourceLen, const UChar *pattern, int32_t patternLen) { - fn = func; - src = source; - srcLen = sourceLen; - pttrn = pattern; - pttrnLen = patternLen; - bms = search; - } -#else StringSearchPerfFunction(StrSrchFn func, UStringSearch* search, const UChar* source,int32_t sourceLen, const UChar* pattern, int32_t patternLen) { fn = func; src = source; @@ -70,7 +39,6 @@ public: pttrnLen = patternLen; srch = search; } -#endif }; class StringSearchPerformanceTest : public UPerfTest { @@ -79,42 +47,17 @@ private: int32_t srcLen; UChar* pttrn; int32_t pttrnLen; -#ifdef TEST_BOYER_MOORE_SEARCH - UnicodeString *targetString; - BoyerMooreSearch *bms; -#else UStringSearch* srch; -#endif public: StringSearchPerformanceTest(int32_t argc, const char *argv[], UErrorCode &status); ~StringSearchPerformanceTest(); virtual UPerfFunction* runIndexedTest(int32_t index, UBool exec, const char *&name, char *par = NULL); - UPerfFunction* Test_ICU_Forward_Search(); - UPerfFunction* Test_ICU_Backward_Search(); }; -#ifdef TEST_BOYER_MOORE_SEARCH -void ICUForwardSearch(BoyerMooreSearch *bms, const UChar *source, int32_t sourceLen, const UChar *pattern, int32_t patternLen, UErrorCode * /*status*/) { - int32_t offset = 0, start = -1, end = -1; - - while (bms->search(offset, start, end)) { - offset = end; - } -} - -void ICUBackwardSearch(BoyerMooreSearch *bms, const UChar *source, int32_t sourceLen, const UChar *pattern, int32_t patternLen, UErrorCode * /*status*/) { - int32_t offset = 0, start = -1, end = -1; - - /* NOTE: No Boyer-Moore backward search yet... */ - while (bms->search(offset, start, end)) { - offset = end; - } -} -#else void ICUForwardSearch(UStringSearch *srch, const UChar* source, int32_t sourceLen, const UChar* pattern, int32_t patternLen, UErrorCode* status) { int32_t match; @@ -132,6 +75,5 @@ void ICUBackwardSearch(UStringSearch *srch, const UChar* source, int32_t sourceL match = usearch_previous(srch, status); } } -#endif #endif /* _STRSRCHPERF_H */