ICU-5426 Reduce the amount of unused memory in caches by at least 500KB.

X-SVN-Rev: 20476
2025-04-08 06:53:45 +00:00 · 2006-10-03 17:41:23 +00:00 · 2006-10-03 17:41:23 +00:00 · 1c37b55ae0
commit 1c37b55ae0
parent 70cb51cb3b
3 changed files with 173 additions and 141 deletions
--- a/icu4c/source/common/unicode/uniset.h
+++ b/icu4c/source/common/unicode/uniset.h
@ -262,11 +262,10 @@ class U_COMMON_API UnicodeSet : public UnicodeFilter {

    int32_t len; // length of list used; 0 <= len <= capacity
    int32_t capacity; // capacity of list
-    int32_t bufferCapacity; // capacity of buffer
    UChar32* list; // MUST be terminated with HIGH
    UChar32* buffer; // internal buffer, may be NULL
-
-    UVector* strings; // maintained in sorted order
+    int32_t bufferCapacity; // capacity of buffer
+    int32_t patLen;

    /**
     * The pattern representation of this set.  This may not be the
@ -277,7 +276,8 @@ class U_COMMON_API UnicodeSet : public UnicodeFilter {
     * indicating that toPattern() must generate a pattern
     * representation from the inversion list.
     */
-    UnicodeString pat;
+    UChar *pat;
+    UVector* strings; // maintained in sorted order

 public:

@ -1309,9 +1309,13 @@ private:
                     UErrorCode &status);

    /**
-     * Return a cached copy of the inclusions list for the property source.
+     * Set the new pattern to cache.
     */
-    static const UnicodeSet* getInclusions(int32_t src, UErrorCode &errorCode);
+    void setPattern(const UnicodeString& newPat);
+    /**
+     * Release existing cached pattern.
+     */
+    void releasePattern();

    friend class UnicodeSetIterator;
 };
--- a/icu4c/source/common/uniset.cpp
+++ b/icu4c/source/common/uniset.cpp
@ -138,8 +138,8 @@ static int8_t U_CALLCONV compareUnicodeString(UHashTok t1, UHashTok t2) {
 * Constructs an empty set.
 */
 UnicodeSet::UnicodeSet() :
-    len(1), capacity(1 + START_EXTRA), bufferCapacity(0),
-    list(0), buffer(0), strings(NULL)
+    len(1), capacity(1 + START_EXTRA), list(0), buffer(0),
+    bufferCapacity(0), patLen(0), strings(NULL), pat(NULL)
 {
    list = (UChar32*) uprv_malloc(sizeof(UChar32) * capacity);
    if(list!=NULL){
@ -158,8 +158,8 @@ UnicodeSet::UnicodeSet() :
 * @param end last character, inclusive, of range
 */
 UnicodeSet::UnicodeSet(UChar32 start, UChar32 end) :
-    len(1), capacity(1 + START_EXTRA), bufferCapacity(0),
-    list(0), buffer(0), strings(NULL)
+    len(1), capacity(1 + START_EXTRA), list(0), buffer(0),
+    bufferCapacity(0), patLen(0), strings(NULL), pat(NULL)
 {
    list = (UChar32*) uprv_malloc(sizeof(UChar32) * capacity);
    if(list!=NULL){
@ -177,8 +177,8 @@ UnicodeSet::UnicodeSet(UChar32 start, UChar32 end) :
 */
 UnicodeSet::UnicodeSet(const UnicodeSet& o) :
    UnicodeFilter(o),
-    len(0), capacity(o.len + GROW_EXTRA), bufferCapacity(0),
-    list(0), buffer(0), strings(NULL)
+    len(0), capacity(o.len + GROW_EXTRA), list(0), buffer(0),
+    bufferCapacity(0), patLen(0), strings(NULL), pat(NULL)
 {
    list = (UChar32*) uprv_malloc(sizeof(UChar32) * capacity);
    if(list!=NULL){
@ -199,6 +199,7 @@ UnicodeSet::~UnicodeSet() {
        uprv_free(buffer);
    }
    delete strings;
+    releasePattern();
 }

 /**
@ -210,7 +211,10 @@ UnicodeSet& UnicodeSet::operator=(const UnicodeSet& o) {
    uprv_memcpy(list, o.list, len*sizeof(UChar32));
    UErrorCode ec = U_ZERO_ERROR;
    strings->assign(*o.strings, cloneUnicodeString, ec);
-    pat = o.pat;
+    releasePattern();
+    if (o.pat) {
+        setPattern(UnicodeString(o.pat, o.patLen));
+    }
    return *this;
 }

@ -869,7 +873,7 @@ UnicodeSet& UnicodeSet::add(UChar32 c) {
    }
 #endif

-    pat.truncate(0);
+    releasePattern();
    return *this;
 }

@ -888,7 +892,7 @@ UnicodeSet& UnicodeSet::add(const UnicodeString& s) {
    if (cp < 0) {
        if (!strings->contains((void*) &s)) {
            _add(s);
-            pat.truncate(0);
+            releasePattern();
        }
    } else {
        add((UChar32)cp, (UChar32)cp);
@ -1069,7 +1073,7 @@ UnicodeSet& UnicodeSet::remove(const UnicodeString& s) {
    int32_t cp = getSingleCP(s);
    if (cp < 0) {
        strings->removeElement((void*) &s);
-        pat.truncate(0);
+        releasePattern();
    } else {
        remove((UChar32)cp, (UChar32)cp);
    }
@ -1092,7 +1096,7 @@ UnicodeSet& UnicodeSet::complement(UChar32 start, UChar32 end) {
        UChar32 range[3] = { start, end+1, UNICODESET_HIGH };
        exclusiveOr(range, 2, 0);
    }
-    pat.truncate(0);
+    releasePattern();
    return *this;
 }

@ -1116,7 +1120,7 @@ UnicodeSet& UnicodeSet::complement(void) {
        ++len;
    }
    swapBuffers();
-    pat.truncate(0);
+    releasePattern();
    return *this;
 }

@ -1137,7 +1141,7 @@ UnicodeSet& UnicodeSet::complement(const UnicodeString& s) {
        } else {
            _add(s);
        }
-        pat.truncate(0);
+        releasePattern();
    } else {
        complement((UChar32)cp, (UChar32)cp);
    }
@ -1224,7 +1228,7 @@ UnicodeSet& UnicodeSet::complementAll(const UnicodeSet& c) {
 UnicodeSet& UnicodeSet::clear(void) {
    list[0] = UNICODESET_HIGH;
    len = 1;
-    pat.truncate(0);
+    releasePattern();
    strings->removeAllElements();
    return *this;
 }
@ -1465,7 +1469,7 @@ void UnicodeSet::exclusiveOr(const UChar32* other, int32_t otherLen, int8_t pola
        }
    }
    swapBuffers();
-    pat.truncate(0);
+    releasePattern();
 }

 // polarity = 0 is normal: x union y
@ -1570,7 +1574,7 @@ void UnicodeSet::add(const UChar32* other, int32_t otherLen, int8_t polarity) {
    buffer[k++] = UNICODESET_HIGH;    // terminate
    len = k;
    swapBuffers();
-    pat.truncate(0);
+    releasePattern();
 }

 // polarity = 0 is normal: x intersect y
@ -1659,7 +1663,7 @@ void UnicodeSet::retain(const UChar32* other, int32_t otherLen, int8_t polarity)
    buffer[k++] = UNICODESET_HIGH;    // terminate
    len = k;
    swapBuffers();
-    pat.truncate(0);
+    releasePattern();
 }

 /**
@ -1717,13 +1721,14 @@ escapeUnprintable) {
 * is one.  Otherwise it will be generated.
 */
 UnicodeString& UnicodeSet::_toPattern(UnicodeString& result,
-                                      UBool escapeUnprintable) const {
-    if (pat.length() > 0) {
+                                      UBool escapeUnprintable) const
+{
+    if (pat != NULL) {
        int32_t i;
        int32_t backslashCount = 0;
-        for (i=0; i<pat.length(); ) {
-            UChar32 c = pat.char32At(i);
-            i += UTF_CHAR_LENGTH(c);
+        for (i=0; i<patLen; ) {
+            UChar32 c;
+            U16_NEXT(pat, i, patLen, c);
            if (escapeUnprintable && ICU_Utility::isUnprintable(c)) {
                // If the unprintable character is preceded by an odd
                // number of backslashes, then it has been escaped.
@ -1755,7 +1760,8 @@ UnicodeString& UnicodeSet::_toPattern(UnicodeString& result,
 * will produce another set that is equal to this one.
 */
 UnicodeString& UnicodeSet::toPattern(UnicodeString& result,
-                                     UBool escapeUnprintable) const {
+                                     UBool escapeUnprintable) const
+{
    result.truncate(0);
    return _toPattern(result, escapeUnprintable);
 }
@ -1766,7 +1772,8 @@ UnicodeString& UnicodeSet::toPattern(UnicodeString& result,
 * passed to applyPattern().
 */
 UnicodeString& UnicodeSet::_generatePattern(UnicodeString& result,
-                                            UBool escapeUnprintable) const {
+                                            UBool escapeUnprintable) const
+{
    result.append(SET_OPEN);

 //  // Check against the predefined categories.  We implicitly build
@ -1829,5 +1836,26 @@ UnicodeString& UnicodeSet::_generatePattern(UnicodeString& result,
    return result.append(SET_CLOSE);
 }

+/**
+* Release existing cached pattern
+*/
+void UnicodeSet::releasePattern() {
+    if (pat) {
+        uprv_free(pat);
+        pat = NULL;
+        patLen = 0;
+    }
+}
+
+/**
+* Set the new pattern to cache.
+*/
+void UnicodeSet::setPattern(const UnicodeString& newPat) {
+    releasePattern();
+    patLen = newPat.length();
+    pat = (UChar *)uprv_malloc((patLen + 1) * sizeof(UChar));
+    newPat.extractBetween(0, patLen, pat);
+    pat[patLen] = 0;
+}

 U_NAMESPACE_END
--- a/icu4c/source/common/uniset_props.cpp
+++ b/icu4c/source/common/uniset_props.cpp
@ -89,10 +89,113 @@ static const char ASSIGNED[] = "Assigned"; // [:^Cn:]
 */
 //static const UChar CATEGORY_CLOSE[] = {COLON, SET_CLOSE, 0x0000}; /* ":]" */

-U_NAMESPACE_BEGIN
+U_CDECL_BEGIN

 static UnicodeSet *INCLUSIONS[UPROPS_SRC_COUNT] = { NULL }; // cached getInclusions()

+//----------------------------------------------------------------
+// Inclusions list
+//----------------------------------------------------------------
+
+// USetAdder implementation
+// Does not use uset.h to reduce code dependencies
+static void U_CALLCONV
+_set_add(USet *set, UChar32 c) {
+    ((UnicodeSet *)set)->add(c);
+}
+
+static void U_CALLCONV
+_set_addRange(USet *set, UChar32 start, UChar32 end) {
+    ((UnicodeSet *)set)->add(start, end);
+}
+
+static void U_CALLCONV
+_set_addString(USet *set, const UChar *str, int32_t length) {
+    ((UnicodeSet *)set)->add(UnicodeString((UBool)(length<0), str, length));
+}
+
+/**
+ * Cleanup function for UnicodeSet
+ */
+static UBool U_CALLCONV uset_cleanup(void) {
+    int32_t i;
+
+    for(i = UPROPS_SRC_NONE; i < UPROPS_SRC_COUNT; ++i) {
+        if (INCLUSIONS[i] != NULL) {
+            delete INCLUSIONS[i];
+            INCLUSIONS[i] = NULL;
+        }
+    }
+
+    return TRUE;
+}
+
+U_CDECL_END
+
+U_NAMESPACE_BEGIN
+
+static const UnicodeSet* getInclusions(int32_t src, UErrorCode &status) {
+    UBool needInit;
+    UMTX_CHECK(NULL, (INCLUSIONS[src] == NULL), needInit);
+    if (needInit) {
+        UnicodeSet* incl = new UnicodeSet();
+        USetAdder sa = {
+            (USet *)incl,
+            _set_add,
+            _set_addRange,
+            _set_addString,
+            NULL // don't need remove()
+        };
+
+        if (incl != NULL) {
+            switch(src) {
+            case UPROPS_SRC_CHAR:
+                uchar_addPropertyStarts(&sa, &status);
+                break;
+            case UPROPS_SRC_PROPSVEC:
+                upropsvec_addPropertyStarts(&sa, &status);
+                break;
+            case UPROPS_SRC_CHAR_AND_PROPSVEC:
+                uchar_addPropertyStarts(&sa, &status);
+                upropsvec_addPropertyStarts(&sa, &status);
+                break;
+            case UPROPS_SRC_HST:
+                uhst_addPropertyStarts(&sa, &status);
+                break;
+#if !UCONFIG_NO_NORMALIZATION
+            case UPROPS_SRC_NORM:
+                unorm_addPropertyStarts(&sa, &status);
+                break;
+#endif
+            case UPROPS_SRC_CASE:
+                ucase_addPropertyStarts(ucase_getSingleton(&status), &sa, &status);
+                break;
+            case UPROPS_SRC_BIDI:
+                ubidi_addPropertyStarts(ubidi_getSingleton(&status), &sa, &status);
+                break;
+            default:
+                status = U_INTERNAL_PROGRAM_ERROR;
+                break;
+            }
+            if (U_SUCCESS(status)) {
+                // Compact for caching
+                incl->compact();
+                umtx_lock(NULL);
+                if (INCLUSIONS[src] == NULL) {
+                    INCLUSIONS[src] = incl;
+                    incl = NULL;
+                    ucln_common_registerCleanup(UCLN_COMMON_USET, uset_cleanup);
+                }
+                umtx_unlock(NULL);
+            }
+            delete incl;
+        } else {
+            status = U_MEMORY_ALLOCATION_ERROR;
+        }
+    }
+    return INCLUSIONS[src];
+}
+
 // helper functions for matching of pattern syntax pieces ------------------ ***
 // these functions are parallel to the PERL_OPEN etc. strings above

@ -143,8 +246,8 @@ isPOSIXClose(const UnicodeString &pattern, int32_t pos) {
 */
 UnicodeSet::UnicodeSet(const UnicodeString& pattern,
                       UErrorCode& status) :
-    len(0), capacity(START_EXTRA), bufferCapacity(0),
-    list(0), buffer(0), strings(NULL)
+    len(0), capacity(START_EXTRA), list(0), buffer(0),
+    bufferCapacity(0), patLen(0), strings(NULL), pat(NULL)
 {   
    if(U_SUCCESS(status)){
        list = (UChar32*) uprv_malloc(sizeof(UChar32) * capacity);
@ -171,8 +274,8 @@ UnicodeSet::UnicodeSet(const UnicodeString& pattern,
                       uint32_t options,
                       const SymbolTable* symbols,
                       UErrorCode& status) :
-    len(0), capacity(START_EXTRA), bufferCapacity(0),
-    list(0), buffer(0), strings(NULL)
+    len(0), capacity(START_EXTRA), list(0), buffer(0),
+    bufferCapacity(0), patLen(0), strings(NULL), pat(NULL)
 {   
    if(U_SUCCESS(status)){
        list = (UChar32*) uprv_malloc(sizeof(UChar32) * capacity);
@ -191,8 +294,8 @@ UnicodeSet::UnicodeSet(const UnicodeString& pattern, ParsePosition& pos,
                       uint32_t options,
                       const SymbolTable* symbols,
                       UErrorCode& status) :
-    len(0), capacity(START_EXTRA), bufferCapacity(0),
-    list(0), buffer(0), strings(NULL)
+    len(0), capacity(START_EXTRA), list(0), buffer(0),
+    bufferCapacity(0), patLen(0), strings(NULL), pat(NULL)
 {
    if(U_SUCCESS(status)){
        list = (UChar32*) uprv_malloc(sizeof(UChar32) * capacity);
@ -283,7 +386,7 @@ UnicodeSet& UnicodeSet::applyPattern(const UnicodeString& pattern,
        status = U_MALFORMED_SET;
        return *this;
    }
-    pat = rebuiltPat;
+    setPattern(rebuiltPat);
    return *this;
 }

@ -1165,109 +1268,6 @@ void UnicodeSet::applyPropertyPattern(RuleCharacterIterator& chars,
    rebuiltPat.append(pattern, 0, pos.getIndex());
 }

-//----------------------------------------------------------------
-// Inclusions list
-//----------------------------------------------------------------
-
-U_CDECL_BEGIN
-
-// USetAdder implementation
-// Does not use uset.h to reduce code dependencies
-static void U_CALLCONV
-_set_add(USet *set, UChar32 c) {
-    ((UnicodeSet *)set)->add(c);
-}
-
-static void U_CALLCONV
-_set_addRange(USet *set, UChar32 start, UChar32 end) {
-    ((UnicodeSet *)set)->add(start, end);
-}
-
-static void U_CALLCONV
-_set_addString(USet *set, const UChar *str, int32_t length) {
-    ((UnicodeSet *)set)->add(UnicodeString((UBool)(length<0), str, length));
-}
-
-/**
- * Cleanup function for UnicodeSet
- */
-static UBool U_CALLCONV uset_cleanup(void) {
-    int32_t i;
-
-    for(i = UPROPS_SRC_NONE; i < UPROPS_SRC_COUNT; ++i) {
-        if (INCLUSIONS[i] != NULL) {
-            delete INCLUSIONS[i];
-            INCLUSIONS[i] = NULL;
-        }
-    }
-
-    return TRUE;
-}
-
-U_CDECL_END
-
-const UnicodeSet* UnicodeSet::getInclusions(int32_t src, UErrorCode &status) {
-    UBool needInit;
-    UMTX_CHECK(NULL, (INCLUSIONS[src] == NULL), needInit);
-    if (needInit) {
-        UnicodeSet* incl = new UnicodeSet();
-        USetAdder sa = {
-            (USet *)incl,
-            _set_add,
-            _set_addRange,
-            _set_addString,
-            NULL // don't need remove()
-        };
-
-        if (incl != NULL) {
-            switch(src) {
-            case UPROPS_SRC_CHAR:
-                uchar_addPropertyStarts(&sa, &status);
-                break;
-            case UPROPS_SRC_PROPSVEC:
-                upropsvec_addPropertyStarts(&sa, &status);
-                break;
-            case UPROPS_SRC_CHAR_AND_PROPSVEC:
-                uchar_addPropertyStarts(&sa, &status);
-                upropsvec_addPropertyStarts(&sa, &status);
-                break;
-            case UPROPS_SRC_HST:
-                uhst_addPropertyStarts(&sa, &status);
-                break;
-#if !UCONFIG_NO_NORMALIZATION
-            case UPROPS_SRC_NORM:
-                unorm_addPropertyStarts(&sa, &status);
-                break;
-#endif
-            case UPROPS_SRC_CASE:
-                ucase_addPropertyStarts(ucase_getSingleton(&status), &sa, &status);
-                break;
-            case UPROPS_SRC_BIDI:
-                ubidi_addPropertyStarts(ubidi_getSingleton(&status), &sa, &status);
-                break;
-            default:
-                status = U_INTERNAL_PROGRAM_ERROR;
-                break;
-            }
-            if (U_SUCCESS(status)) {
-                // Compact for caching
-                incl->compact();
-                umtx_lock(NULL);
-                if (INCLUSIONS[src] == NULL) {
-                    INCLUSIONS[src] = incl;
-                    incl = NULL;
-                    ucln_common_registerCleanup(UCLN_COMMON_USET, uset_cleanup);
-                }
-                umtx_unlock(NULL);
-            }
-            delete incl;
-        } else {
-            status = U_MEMORY_ALLOCATION_ERROR;
-        }
-    }
-    return INCLUSIONS[src];
-}
-
 //----------------------------------------------------------------
 // Case folding API
 //----------------------------------------------------------------