From 1c37b55ae05d6e752ecdb602a67f5ce5a3ee4a93 Mon Sep 17 00:00:00 2001 From: George Rhoten Date: Tue, 3 Oct 2006 17:41:23 +0000 Subject: [PATCH] ICU-5426 Reduce the amount of unused memory in caches by at least 500KB. X-SVN-Rev: 20476 --- icu4c/source/common/unicode/uniset.h | 16 +- icu4c/source/common/uniset.cpp | 76 ++++++--- icu4c/source/common/uniset_props.cpp | 222 +++++++++++++-------------- 3 files changed, 173 insertions(+), 141 deletions(-) diff --git a/icu4c/source/common/unicode/uniset.h b/icu4c/source/common/unicode/uniset.h index 8f497bafb68..a63a65d6ed8 100644 --- a/icu4c/source/common/unicode/uniset.h +++ b/icu4c/source/common/unicode/uniset.h @@ -262,11 +262,10 @@ class U_COMMON_API UnicodeSet : public UnicodeFilter { int32_t len; // length of list used; 0 <= len <= capacity int32_t capacity; // capacity of list - int32_t bufferCapacity; // capacity of buffer UChar32* list; // MUST be terminated with HIGH UChar32* buffer; // internal buffer, may be NULL - - UVector* strings; // maintained in sorted order + int32_t bufferCapacity; // capacity of buffer + int32_t patLen; /** * The pattern representation of this set. This may not be the @@ -277,7 +276,8 @@ class U_COMMON_API UnicodeSet : public UnicodeFilter { * indicating that toPattern() must generate a pattern * representation from the inversion list. */ - UnicodeString pat; + UChar *pat; + UVector* strings; // maintained in sorted order public: @@ -1309,9 +1309,13 @@ private: UErrorCode &status); /** - * Return a cached copy of the inclusions list for the property source. + * Set the new pattern to cache. */ - static const UnicodeSet* getInclusions(int32_t src, UErrorCode &errorCode); + void setPattern(const UnicodeString& newPat); + /** + * Release existing cached pattern. + */ + void releasePattern(); friend class UnicodeSetIterator; }; diff --git a/icu4c/source/common/uniset.cpp b/icu4c/source/common/uniset.cpp index d7170ee9681..6e2b8a65c7e 100644 --- a/icu4c/source/common/uniset.cpp +++ b/icu4c/source/common/uniset.cpp @@ -138,8 +138,8 @@ static int8_t U_CALLCONV compareUnicodeString(UHashTok t1, UHashTok t2) { * Constructs an empty set. */ UnicodeSet::UnicodeSet() : - len(1), capacity(1 + START_EXTRA), bufferCapacity(0), - list(0), buffer(0), strings(NULL) + len(1), capacity(1 + START_EXTRA), list(0), buffer(0), + bufferCapacity(0), patLen(0), strings(NULL), pat(NULL) { list = (UChar32*) uprv_malloc(sizeof(UChar32) * capacity); if(list!=NULL){ @@ -158,8 +158,8 @@ UnicodeSet::UnicodeSet() : * @param end last character, inclusive, of range */ UnicodeSet::UnicodeSet(UChar32 start, UChar32 end) : - len(1), capacity(1 + START_EXTRA), bufferCapacity(0), - list(0), buffer(0), strings(NULL) + len(1), capacity(1 + START_EXTRA), list(0), buffer(0), + bufferCapacity(0), patLen(0), strings(NULL), pat(NULL) { list = (UChar32*) uprv_malloc(sizeof(UChar32) * capacity); if(list!=NULL){ @@ -177,8 +177,8 @@ UnicodeSet::UnicodeSet(UChar32 start, UChar32 end) : */ UnicodeSet::UnicodeSet(const UnicodeSet& o) : UnicodeFilter(o), - len(0), capacity(o.len + GROW_EXTRA), bufferCapacity(0), - list(0), buffer(0), strings(NULL) + len(0), capacity(o.len + GROW_EXTRA), list(0), buffer(0), + bufferCapacity(0), patLen(0), strings(NULL), pat(NULL) { list = (UChar32*) uprv_malloc(sizeof(UChar32) * capacity); if(list!=NULL){ @@ -199,6 +199,7 @@ UnicodeSet::~UnicodeSet() { uprv_free(buffer); } delete strings; + releasePattern(); } /** @@ -210,7 +211,10 @@ UnicodeSet& UnicodeSet::operator=(const UnicodeSet& o) { uprv_memcpy(list, o.list, len*sizeof(UChar32)); UErrorCode ec = U_ZERO_ERROR; strings->assign(*o.strings, cloneUnicodeString, ec); - pat = o.pat; + releasePattern(); + if (o.pat) { + setPattern(UnicodeString(o.pat, o.patLen)); + } return *this; } @@ -869,7 +873,7 @@ UnicodeSet& UnicodeSet::add(UChar32 c) { } #endif - pat.truncate(0); + releasePattern(); return *this; } @@ -888,7 +892,7 @@ UnicodeSet& UnicodeSet::add(const UnicodeString& s) { if (cp < 0) { if (!strings->contains((void*) &s)) { _add(s); - pat.truncate(0); + releasePattern(); } } else { add((UChar32)cp, (UChar32)cp); @@ -1069,7 +1073,7 @@ UnicodeSet& UnicodeSet::remove(const UnicodeString& s) { int32_t cp = getSingleCP(s); if (cp < 0) { strings->removeElement((void*) &s); - pat.truncate(0); + releasePattern(); } else { remove((UChar32)cp, (UChar32)cp); } @@ -1092,7 +1096,7 @@ UnicodeSet& UnicodeSet::complement(UChar32 start, UChar32 end) { UChar32 range[3] = { start, end+1, UNICODESET_HIGH }; exclusiveOr(range, 2, 0); } - pat.truncate(0); + releasePattern(); return *this; } @@ -1116,7 +1120,7 @@ UnicodeSet& UnicodeSet::complement(void) { ++len; } swapBuffers(); - pat.truncate(0); + releasePattern(); return *this; } @@ -1137,7 +1141,7 @@ UnicodeSet& UnicodeSet::complement(const UnicodeString& s) { } else { _add(s); } - pat.truncate(0); + releasePattern(); } else { complement((UChar32)cp, (UChar32)cp); } @@ -1224,7 +1228,7 @@ UnicodeSet& UnicodeSet::complementAll(const UnicodeSet& c) { UnicodeSet& UnicodeSet::clear(void) { list[0] = UNICODESET_HIGH; len = 1; - pat.truncate(0); + releasePattern(); strings->removeAllElements(); return *this; } @@ -1465,7 +1469,7 @@ void UnicodeSet::exclusiveOr(const UChar32* other, int32_t otherLen, int8_t pola } } swapBuffers(); - pat.truncate(0); + releasePattern(); } // polarity = 0 is normal: x union y @@ -1570,7 +1574,7 @@ void UnicodeSet::add(const UChar32* other, int32_t otherLen, int8_t polarity) { buffer[k++] = UNICODESET_HIGH; // terminate len = k; swapBuffers(); - pat.truncate(0); + releasePattern(); } // polarity = 0 is normal: x intersect y @@ -1659,7 +1663,7 @@ void UnicodeSet::retain(const UChar32* other, int32_t otherLen, int8_t polarity) buffer[k++] = UNICODESET_HIGH; // terminate len = k; swapBuffers(); - pat.truncate(0); + releasePattern(); } /** @@ -1717,13 +1721,14 @@ escapeUnprintable) { * is one. Otherwise it will be generated. */ UnicodeString& UnicodeSet::_toPattern(UnicodeString& result, - UBool escapeUnprintable) const { - if (pat.length() > 0) { + UBool escapeUnprintable) const +{ + if (pat != NULL) { int32_t i; int32_t backslashCount = 0; - for (i=0; iadd(c); +} + +static void U_CALLCONV +_set_addRange(USet *set, UChar32 start, UChar32 end) { + ((UnicodeSet *)set)->add(start, end); +} + +static void U_CALLCONV +_set_addString(USet *set, const UChar *str, int32_t length) { + ((UnicodeSet *)set)->add(UnicodeString((UBool)(length<0), str, length)); +} + +/** + * Cleanup function for UnicodeSet + */ +static UBool U_CALLCONV uset_cleanup(void) { + int32_t i; + + for(i = UPROPS_SRC_NONE; i < UPROPS_SRC_COUNT; ++i) { + if (INCLUSIONS[i] != NULL) { + delete INCLUSIONS[i]; + INCLUSIONS[i] = NULL; + } + } + + return TRUE; +} + +U_CDECL_END + +U_NAMESPACE_BEGIN + +static const UnicodeSet* getInclusions(int32_t src, UErrorCode &status) { + UBool needInit; + UMTX_CHECK(NULL, (INCLUSIONS[src] == NULL), needInit); + if (needInit) { + UnicodeSet* incl = new UnicodeSet(); + USetAdder sa = { + (USet *)incl, + _set_add, + _set_addRange, + _set_addString, + NULL // don't need remove() + }; + + if (incl != NULL) { + switch(src) { + case UPROPS_SRC_CHAR: + uchar_addPropertyStarts(&sa, &status); + break; + case UPROPS_SRC_PROPSVEC: + upropsvec_addPropertyStarts(&sa, &status); + break; + case UPROPS_SRC_CHAR_AND_PROPSVEC: + uchar_addPropertyStarts(&sa, &status); + upropsvec_addPropertyStarts(&sa, &status); + break; + case UPROPS_SRC_HST: + uhst_addPropertyStarts(&sa, &status); + break; +#if !UCONFIG_NO_NORMALIZATION + case UPROPS_SRC_NORM: + unorm_addPropertyStarts(&sa, &status); + break; +#endif + case UPROPS_SRC_CASE: + ucase_addPropertyStarts(ucase_getSingleton(&status), &sa, &status); + break; + case UPROPS_SRC_BIDI: + ubidi_addPropertyStarts(ubidi_getSingleton(&status), &sa, &status); + break; + default: + status = U_INTERNAL_PROGRAM_ERROR; + break; + } + if (U_SUCCESS(status)) { + // Compact for caching + incl->compact(); + umtx_lock(NULL); + if (INCLUSIONS[src] == NULL) { + INCLUSIONS[src] = incl; + incl = NULL; + ucln_common_registerCleanup(UCLN_COMMON_USET, uset_cleanup); + } + umtx_unlock(NULL); + } + delete incl; + } else { + status = U_MEMORY_ALLOCATION_ERROR; + } + } + return INCLUSIONS[src]; +} + // helper functions for matching of pattern syntax pieces ------------------ *** // these functions are parallel to the PERL_OPEN etc. strings above @@ -143,8 +246,8 @@ isPOSIXClose(const UnicodeString &pattern, int32_t pos) { */ UnicodeSet::UnicodeSet(const UnicodeString& pattern, UErrorCode& status) : - len(0), capacity(START_EXTRA), bufferCapacity(0), - list(0), buffer(0), strings(NULL) + len(0), capacity(START_EXTRA), list(0), buffer(0), + bufferCapacity(0), patLen(0), strings(NULL), pat(NULL) { if(U_SUCCESS(status)){ list = (UChar32*) uprv_malloc(sizeof(UChar32) * capacity); @@ -171,8 +274,8 @@ UnicodeSet::UnicodeSet(const UnicodeString& pattern, uint32_t options, const SymbolTable* symbols, UErrorCode& status) : - len(0), capacity(START_EXTRA), bufferCapacity(0), - list(0), buffer(0), strings(NULL) + len(0), capacity(START_EXTRA), list(0), buffer(0), + bufferCapacity(0), patLen(0), strings(NULL), pat(NULL) { if(U_SUCCESS(status)){ list = (UChar32*) uprv_malloc(sizeof(UChar32) * capacity); @@ -191,8 +294,8 @@ UnicodeSet::UnicodeSet(const UnicodeString& pattern, ParsePosition& pos, uint32_t options, const SymbolTable* symbols, UErrorCode& status) : - len(0), capacity(START_EXTRA), bufferCapacity(0), - list(0), buffer(0), strings(NULL) + len(0), capacity(START_EXTRA), list(0), buffer(0), + bufferCapacity(0), patLen(0), strings(NULL), pat(NULL) { if(U_SUCCESS(status)){ list = (UChar32*) uprv_malloc(sizeof(UChar32) * capacity); @@ -283,7 +386,7 @@ UnicodeSet& UnicodeSet::applyPattern(const UnicodeString& pattern, status = U_MALFORMED_SET; return *this; } - pat = rebuiltPat; + setPattern(rebuiltPat); return *this; } @@ -1165,109 +1268,6 @@ void UnicodeSet::applyPropertyPattern(RuleCharacterIterator& chars, rebuiltPat.append(pattern, 0, pos.getIndex()); } -//---------------------------------------------------------------- -// Inclusions list -//---------------------------------------------------------------- - -U_CDECL_BEGIN - -// USetAdder implementation -// Does not use uset.h to reduce code dependencies -static void U_CALLCONV -_set_add(USet *set, UChar32 c) { - ((UnicodeSet *)set)->add(c); -} - -static void U_CALLCONV -_set_addRange(USet *set, UChar32 start, UChar32 end) { - ((UnicodeSet *)set)->add(start, end); -} - -static void U_CALLCONV -_set_addString(USet *set, const UChar *str, int32_t length) { - ((UnicodeSet *)set)->add(UnicodeString((UBool)(length<0), str, length)); -} - -/** - * Cleanup function for UnicodeSet - */ -static UBool U_CALLCONV uset_cleanup(void) { - int32_t i; - - for(i = UPROPS_SRC_NONE; i < UPROPS_SRC_COUNT; ++i) { - if (INCLUSIONS[i] != NULL) { - delete INCLUSIONS[i]; - INCLUSIONS[i] = NULL; - } - } - - return TRUE; -} - -U_CDECL_END - -const UnicodeSet* UnicodeSet::getInclusions(int32_t src, UErrorCode &status) { - UBool needInit; - UMTX_CHECK(NULL, (INCLUSIONS[src] == NULL), needInit); - if (needInit) { - UnicodeSet* incl = new UnicodeSet(); - USetAdder sa = { - (USet *)incl, - _set_add, - _set_addRange, - _set_addString, - NULL // don't need remove() - }; - - if (incl != NULL) { - switch(src) { - case UPROPS_SRC_CHAR: - uchar_addPropertyStarts(&sa, &status); - break; - case UPROPS_SRC_PROPSVEC: - upropsvec_addPropertyStarts(&sa, &status); - break; - case UPROPS_SRC_CHAR_AND_PROPSVEC: - uchar_addPropertyStarts(&sa, &status); - upropsvec_addPropertyStarts(&sa, &status); - break; - case UPROPS_SRC_HST: - uhst_addPropertyStarts(&sa, &status); - break; -#if !UCONFIG_NO_NORMALIZATION - case UPROPS_SRC_NORM: - unorm_addPropertyStarts(&sa, &status); - break; -#endif - case UPROPS_SRC_CASE: - ucase_addPropertyStarts(ucase_getSingleton(&status), &sa, &status); - break; - case UPROPS_SRC_BIDI: - ubidi_addPropertyStarts(ubidi_getSingleton(&status), &sa, &status); - break; - default: - status = U_INTERNAL_PROGRAM_ERROR; - break; - } - if (U_SUCCESS(status)) { - // Compact for caching - incl->compact(); - umtx_lock(NULL); - if (INCLUSIONS[src] == NULL) { - INCLUSIONS[src] = incl; - incl = NULL; - ucln_common_registerCleanup(UCLN_COMMON_USET, uset_cleanup); - } - umtx_unlock(NULL); - } - delete incl; - } else { - status = U_MEMORY_ALLOCATION_ERROR; - } - } - return INCLUSIONS[src]; -} - //---------------------------------------------------------------- // Case folding API //----------------------------------------------------------------