From 1c37b55ae05d6e752ecdb602a67f5ce5a3ee4a93 Mon Sep 17 00:00:00 2001
From: George Rhoten <grhoten@users.noreply.github.com>
Date: Tue, 3 Oct 2006 17:41:23 +0000
Subject: [PATCH] ICU-5426 Reduce the amount of unused memory in caches by at
 least 500KB.

X-SVN-Rev: 20476
---
 icu4c/source/common/unicode/uniset.h |  16 +-
 icu4c/source/common/uniset.cpp       |  76 ++++++---
 icu4c/source/common/uniset_props.cpp | 222 +++++++++++++--------------
 3 files changed, 173 insertions(+), 141 deletions(-)

diff --git a/icu4c/source/common/unicode/uniset.h b/icu4c/source/common/unicode/uniset.h
index 8f497bafb68..a63a65d6ed8 100644
--- a/icu4c/source/common/unicode/uniset.h
+++ b/icu4c/source/common/unicode/uniset.h
@@ -262,11 +262,10 @@ class U_COMMON_API UnicodeSet : public UnicodeFilter {
 
     int32_t len; // length of list used; 0 <= len <= capacity
     int32_t capacity; // capacity of list
-    int32_t bufferCapacity; // capacity of buffer
     UChar32* list; // MUST be terminated with HIGH
     UChar32* buffer; // internal buffer, may be NULL
-
-    UVector* strings; // maintained in sorted order
+    int32_t bufferCapacity; // capacity of buffer
+    int32_t patLen;
 
     /**
      * The pattern representation of this set.  This may not be the
@@ -277,7 +276,8 @@ class U_COMMON_API UnicodeSet : public UnicodeFilter {
      * indicating that toPattern() must generate a pattern
      * representation from the inversion list.
      */
-    UnicodeString pat;
+    UChar *pat;
+    UVector* strings; // maintained in sorted order
 
 public:
 
@@ -1309,9 +1309,13 @@ private:
                      UErrorCode &status);
 
     /**
-     * Return a cached copy of the inclusions list for the property source.
+     * Set the new pattern to cache.
      */
-    static const UnicodeSet* getInclusions(int32_t src, UErrorCode &errorCode);
+    void setPattern(const UnicodeString& newPat);
+    /**
+     * Release existing cached pattern.
+     */
+    void releasePattern();
 
     friend class UnicodeSetIterator;
 };
diff --git a/icu4c/source/common/uniset.cpp b/icu4c/source/common/uniset.cpp
index d7170ee9681..6e2b8a65c7e 100644
--- a/icu4c/source/common/uniset.cpp
+++ b/icu4c/source/common/uniset.cpp
@@ -138,8 +138,8 @@ static int8_t U_CALLCONV compareUnicodeString(UHashTok t1, UHashTok t2) {
  * Constructs an empty set.
  */
 UnicodeSet::UnicodeSet() :
-    len(1), capacity(1 + START_EXTRA), bufferCapacity(0),
-    list(0), buffer(0), strings(NULL)
+    len(1), capacity(1 + START_EXTRA), list(0), buffer(0),
+    bufferCapacity(0), patLen(0), strings(NULL), pat(NULL)
 {
     list = (UChar32*) uprv_malloc(sizeof(UChar32) * capacity);
     if(list!=NULL){
@@ -158,8 +158,8 @@ UnicodeSet::UnicodeSet() :
  * @param end last character, inclusive, of range
  */
 UnicodeSet::UnicodeSet(UChar32 start, UChar32 end) :
-    len(1), capacity(1 + START_EXTRA), bufferCapacity(0),
-    list(0), buffer(0), strings(NULL)
+    len(1), capacity(1 + START_EXTRA), list(0), buffer(0),
+    bufferCapacity(0), patLen(0), strings(NULL), pat(NULL)
 {
     list = (UChar32*) uprv_malloc(sizeof(UChar32) * capacity);
     if(list!=NULL){
@@ -177,8 +177,8 @@ UnicodeSet::UnicodeSet(UChar32 start, UChar32 end) :
  */
 UnicodeSet::UnicodeSet(const UnicodeSet& o) :
     UnicodeFilter(o),
-    len(0), capacity(o.len + GROW_EXTRA), bufferCapacity(0),
-    list(0), buffer(0), strings(NULL)
+    len(0), capacity(o.len + GROW_EXTRA), list(0), buffer(0),
+    bufferCapacity(0), patLen(0), strings(NULL), pat(NULL)
 {
     list = (UChar32*) uprv_malloc(sizeof(UChar32) * capacity);
     if(list!=NULL){
@@ -199,6 +199,7 @@ UnicodeSet::~UnicodeSet() {
         uprv_free(buffer);
     }
     delete strings;
+    releasePattern();
 }
 
 /**
@@ -210,7 +211,10 @@ UnicodeSet& UnicodeSet::operator=(const UnicodeSet& o) {
     uprv_memcpy(list, o.list, len*sizeof(UChar32));
     UErrorCode ec = U_ZERO_ERROR;
     strings->assign(*o.strings, cloneUnicodeString, ec);
-    pat = o.pat;
+    releasePattern();
+    if (o.pat) {
+        setPattern(UnicodeString(o.pat, o.patLen));
+    }
     return *this;
 }
 
@@ -869,7 +873,7 @@ UnicodeSet& UnicodeSet::add(UChar32 c) {
     }
 #endif
 
-    pat.truncate(0);
+    releasePattern();
     return *this;
 }
 
@@ -888,7 +892,7 @@ UnicodeSet& UnicodeSet::add(const UnicodeString& s) {
     if (cp < 0) {
         if (!strings->contains((void*) &s)) {
             _add(s);
-            pat.truncate(0);
+            releasePattern();
         }
     } else {
         add((UChar32)cp, (UChar32)cp);
@@ -1069,7 +1073,7 @@ UnicodeSet& UnicodeSet::remove(const UnicodeString& s) {
     int32_t cp = getSingleCP(s);
     if (cp < 0) {
         strings->removeElement((void*) &s);
-        pat.truncate(0);
+        releasePattern();
     } else {
         remove((UChar32)cp, (UChar32)cp);
     }
@@ -1092,7 +1096,7 @@ UnicodeSet& UnicodeSet::complement(UChar32 start, UChar32 end) {
         UChar32 range[3] = { start, end+1, UNICODESET_HIGH };
         exclusiveOr(range, 2, 0);
     }
-    pat.truncate(0);
+    releasePattern();
     return *this;
 }
 
@@ -1116,7 +1120,7 @@ UnicodeSet& UnicodeSet::complement(void) {
         ++len;
     }
     swapBuffers();
-    pat.truncate(0);
+    releasePattern();
     return *this;
 }
 
@@ -1137,7 +1141,7 @@ UnicodeSet& UnicodeSet::complement(const UnicodeString& s) {
         } else {
             _add(s);
         }
-        pat.truncate(0);
+        releasePattern();
     } else {
         complement((UChar32)cp, (UChar32)cp);
     }
@@ -1224,7 +1228,7 @@ UnicodeSet& UnicodeSet::complementAll(const UnicodeSet& c) {
 UnicodeSet& UnicodeSet::clear(void) {
     list[0] = UNICODESET_HIGH;
     len = 1;
-    pat.truncate(0);
+    releasePattern();
     strings->removeAllElements();
     return *this;
 }
@@ -1465,7 +1469,7 @@ void UnicodeSet::exclusiveOr(const UChar32* other, int32_t otherLen, int8_t pola
         }
     }
     swapBuffers();
-    pat.truncate(0);
+    releasePattern();
 }
 
 // polarity = 0 is normal: x union y
@@ -1570,7 +1574,7 @@ void UnicodeSet::add(const UChar32* other, int32_t otherLen, int8_t polarity) {
     buffer[k++] = UNICODESET_HIGH;    // terminate
     len = k;
     swapBuffers();
-    pat.truncate(0);
+    releasePattern();
 }
 
 // polarity = 0 is normal: x intersect y
@@ -1659,7 +1663,7 @@ void UnicodeSet::retain(const UChar32* other, int32_t otherLen, int8_t polarity)
     buffer[k++] = UNICODESET_HIGH;    // terminate
     len = k;
     swapBuffers();
-    pat.truncate(0);
+    releasePattern();
 }
 
 /**
@@ -1717,13 +1721,14 @@ escapeUnprintable) {
  * is one.  Otherwise it will be generated.
  */
 UnicodeString& UnicodeSet::_toPattern(UnicodeString& result,
-                                      UBool escapeUnprintable) const {
-    if (pat.length() > 0) {
+                                      UBool escapeUnprintable) const
+{
+    if (pat != NULL) {
         int32_t i;
         int32_t backslashCount = 0;
-        for (i=0; i<pat.length(); ) {
-            UChar32 c = pat.char32At(i);
-            i += UTF_CHAR_LENGTH(c);
+        for (i=0; i<patLen; ) {
+            UChar32 c;
+            U16_NEXT(pat, i, patLen, c);
             if (escapeUnprintable && ICU_Utility::isUnprintable(c)) {
                 // If the unprintable character is preceded by an odd
                 // number of backslashes, then it has been escaped.
@@ -1755,7 +1760,8 @@ UnicodeString& UnicodeSet::_toPattern(UnicodeString& result,
  * will produce another set that is equal to this one.
  */
 UnicodeString& UnicodeSet::toPattern(UnicodeString& result,
-                                     UBool escapeUnprintable) const {
+                                     UBool escapeUnprintable) const
+{
     result.truncate(0);
     return _toPattern(result, escapeUnprintable);
 }
@@ -1766,7 +1772,8 @@ UnicodeString& UnicodeSet::toPattern(UnicodeString& result,
  * passed to applyPattern().
  */
 UnicodeString& UnicodeSet::_generatePattern(UnicodeString& result,
-                                            UBool escapeUnprintable) const {
+                                            UBool escapeUnprintable) const
+{
     result.append(SET_OPEN);
 
 //  // Check against the predefined categories.  We implicitly build
@@ -1829,5 +1836,26 @@ UnicodeString& UnicodeSet::_generatePattern(UnicodeString& result,
     return result.append(SET_CLOSE);
 }
 
+/**
+* Release existing cached pattern
+*/
+void UnicodeSet::releasePattern() {
+    if (pat) {
+        uprv_free(pat);
+        pat = NULL;
+        patLen = 0;
+    }
+}
+
+/**
+* Set the new pattern to cache.
+*/
+void UnicodeSet::setPattern(const UnicodeString& newPat) {
+    releasePattern();
+    patLen = newPat.length();
+    pat = (UChar *)uprv_malloc((patLen + 1) * sizeof(UChar));
+    newPat.extractBetween(0, patLen, pat);
+    pat[patLen] = 0;
+}
 
 U_NAMESPACE_END
diff --git a/icu4c/source/common/uniset_props.cpp b/icu4c/source/common/uniset_props.cpp
index 414922e023a..dad0e364e15 100644
--- a/icu4c/source/common/uniset_props.cpp
+++ b/icu4c/source/common/uniset_props.cpp
@@ -89,10 +89,113 @@ static const char ASSIGNED[] = "Assigned"; // [:^Cn:]
  */
 //static const UChar CATEGORY_CLOSE[] = {COLON, SET_CLOSE, 0x0000}; /* ":]" */
 
-U_NAMESPACE_BEGIN
+U_CDECL_BEGIN
 
 static UnicodeSet *INCLUSIONS[UPROPS_SRC_COUNT] = { NULL }; // cached getInclusions()
 
+//----------------------------------------------------------------
+// Inclusions list
+//----------------------------------------------------------------
+
+// USetAdder implementation
+// Does not use uset.h to reduce code dependencies
+static void U_CALLCONV
+_set_add(USet *set, UChar32 c) {
+    ((UnicodeSet *)set)->add(c);
+}
+
+static void U_CALLCONV
+_set_addRange(USet *set, UChar32 start, UChar32 end) {
+    ((UnicodeSet *)set)->add(start, end);
+}
+
+static void U_CALLCONV
+_set_addString(USet *set, const UChar *str, int32_t length) {
+    ((UnicodeSet *)set)->add(UnicodeString((UBool)(length<0), str, length));
+}
+
+/**
+ * Cleanup function for UnicodeSet
+ */
+static UBool U_CALLCONV uset_cleanup(void) {
+    int32_t i;
+
+    for(i = UPROPS_SRC_NONE; i < UPROPS_SRC_COUNT; ++i) {
+        if (INCLUSIONS[i] != NULL) {
+            delete INCLUSIONS[i];
+            INCLUSIONS[i] = NULL;
+        }
+    }
+
+    return TRUE;
+}
+
+U_CDECL_END
+
+U_NAMESPACE_BEGIN
+
+static const UnicodeSet* getInclusions(int32_t src, UErrorCode &status) {
+    UBool needInit;
+    UMTX_CHECK(NULL, (INCLUSIONS[src] == NULL), needInit);
+    if (needInit) {
+        UnicodeSet* incl = new UnicodeSet();
+        USetAdder sa = {
+            (USet *)incl,
+            _set_add,
+            _set_addRange,
+            _set_addString,
+            NULL // don't need remove()
+        };
+
+        if (incl != NULL) {
+            switch(src) {
+            case UPROPS_SRC_CHAR:
+                uchar_addPropertyStarts(&sa, &status);
+                break;
+            case UPROPS_SRC_PROPSVEC:
+                upropsvec_addPropertyStarts(&sa, &status);
+                break;
+            case UPROPS_SRC_CHAR_AND_PROPSVEC:
+                uchar_addPropertyStarts(&sa, &status);
+                upropsvec_addPropertyStarts(&sa, &status);
+                break;
+            case UPROPS_SRC_HST:
+                uhst_addPropertyStarts(&sa, &status);
+                break;
+#if !UCONFIG_NO_NORMALIZATION
+            case UPROPS_SRC_NORM:
+                unorm_addPropertyStarts(&sa, &status);
+                break;
+#endif
+            case UPROPS_SRC_CASE:
+                ucase_addPropertyStarts(ucase_getSingleton(&status), &sa, &status);
+                break;
+            case UPROPS_SRC_BIDI:
+                ubidi_addPropertyStarts(ubidi_getSingleton(&status), &sa, &status);
+                break;
+            default:
+                status = U_INTERNAL_PROGRAM_ERROR;
+                break;
+            }
+            if (U_SUCCESS(status)) {
+                // Compact for caching
+                incl->compact();
+                umtx_lock(NULL);
+                if (INCLUSIONS[src] == NULL) {
+                    INCLUSIONS[src] = incl;
+                    incl = NULL;
+                    ucln_common_registerCleanup(UCLN_COMMON_USET, uset_cleanup);
+                }
+                umtx_unlock(NULL);
+            }
+            delete incl;
+        } else {
+            status = U_MEMORY_ALLOCATION_ERROR;
+        }
+    }
+    return INCLUSIONS[src];
+}
+
 // helper functions for matching of pattern syntax pieces ------------------ ***
 // these functions are parallel to the PERL_OPEN etc. strings above
 
@@ -143,8 +246,8 @@ isPOSIXClose(const UnicodeString &pattern, int32_t pos) {
  */
 UnicodeSet::UnicodeSet(const UnicodeString& pattern,
                        UErrorCode& status) :
-    len(0), capacity(START_EXTRA), bufferCapacity(0),
-    list(0), buffer(0), strings(NULL)
+    len(0), capacity(START_EXTRA), list(0), buffer(0),
+    bufferCapacity(0), patLen(0), strings(NULL), pat(NULL)
 {   
     if(U_SUCCESS(status)){
         list = (UChar32*) uprv_malloc(sizeof(UChar32) * capacity);
@@ -171,8 +274,8 @@ UnicodeSet::UnicodeSet(const UnicodeString& pattern,
                        uint32_t options,
                        const SymbolTable* symbols,
                        UErrorCode& status) :
-    len(0), capacity(START_EXTRA), bufferCapacity(0),
-    list(0), buffer(0), strings(NULL)
+    len(0), capacity(START_EXTRA), list(0), buffer(0),
+    bufferCapacity(0), patLen(0), strings(NULL), pat(NULL)
 {   
     if(U_SUCCESS(status)){
         list = (UChar32*) uprv_malloc(sizeof(UChar32) * capacity);
@@ -191,8 +294,8 @@ UnicodeSet::UnicodeSet(const UnicodeString& pattern, ParsePosition& pos,
                        uint32_t options,
                        const SymbolTable* symbols,
                        UErrorCode& status) :
-    len(0), capacity(START_EXTRA), bufferCapacity(0),
-    list(0), buffer(0), strings(NULL)
+    len(0), capacity(START_EXTRA), list(0), buffer(0),
+    bufferCapacity(0), patLen(0), strings(NULL), pat(NULL)
 {
     if(U_SUCCESS(status)){
         list = (UChar32*) uprv_malloc(sizeof(UChar32) * capacity);
@@ -283,7 +386,7 @@ UnicodeSet& UnicodeSet::applyPattern(const UnicodeString& pattern,
         status = U_MALFORMED_SET;
         return *this;
     }
-    pat = rebuiltPat;
+    setPattern(rebuiltPat);
     return *this;
 }
 
@@ -1165,109 +1268,6 @@ void UnicodeSet::applyPropertyPattern(RuleCharacterIterator& chars,
     rebuiltPat.append(pattern, 0, pos.getIndex());
 }
 
-//----------------------------------------------------------------
-// Inclusions list
-//----------------------------------------------------------------
-
-U_CDECL_BEGIN
-
-// USetAdder implementation
-// Does not use uset.h to reduce code dependencies
-static void U_CALLCONV
-_set_add(USet *set, UChar32 c) {
-    ((UnicodeSet *)set)->add(c);
-}
-
-static void U_CALLCONV
-_set_addRange(USet *set, UChar32 start, UChar32 end) {
-    ((UnicodeSet *)set)->add(start, end);
-}
-
-static void U_CALLCONV
-_set_addString(USet *set, const UChar *str, int32_t length) {
-    ((UnicodeSet *)set)->add(UnicodeString((UBool)(length<0), str, length));
-}
-
-/**
- * Cleanup function for UnicodeSet
- */
-static UBool U_CALLCONV uset_cleanup(void) {
-    int32_t i;
-
-    for(i = UPROPS_SRC_NONE; i < UPROPS_SRC_COUNT; ++i) {
-        if (INCLUSIONS[i] != NULL) {
-            delete INCLUSIONS[i];
-            INCLUSIONS[i] = NULL;
-        }
-    }
-
-    return TRUE;
-}
-
-U_CDECL_END
-
-const UnicodeSet* UnicodeSet::getInclusions(int32_t src, UErrorCode &status) {
-    UBool needInit;
-    UMTX_CHECK(NULL, (INCLUSIONS[src] == NULL), needInit);
-    if (needInit) {
-        UnicodeSet* incl = new UnicodeSet();
-        USetAdder sa = {
-            (USet *)incl,
-            _set_add,
-            _set_addRange,
-            _set_addString,
-            NULL // don't need remove()
-        };
-
-        if (incl != NULL) {
-            switch(src) {
-            case UPROPS_SRC_CHAR:
-                uchar_addPropertyStarts(&sa, &status);
-                break;
-            case UPROPS_SRC_PROPSVEC:
-                upropsvec_addPropertyStarts(&sa, &status);
-                break;
-            case UPROPS_SRC_CHAR_AND_PROPSVEC:
-                uchar_addPropertyStarts(&sa, &status);
-                upropsvec_addPropertyStarts(&sa, &status);
-                break;
-            case UPROPS_SRC_HST:
-                uhst_addPropertyStarts(&sa, &status);
-                break;
-#if !UCONFIG_NO_NORMALIZATION
-            case UPROPS_SRC_NORM:
-                unorm_addPropertyStarts(&sa, &status);
-                break;
-#endif
-            case UPROPS_SRC_CASE:
-                ucase_addPropertyStarts(ucase_getSingleton(&status), &sa, &status);
-                break;
-            case UPROPS_SRC_BIDI:
-                ubidi_addPropertyStarts(ubidi_getSingleton(&status), &sa, &status);
-                break;
-            default:
-                status = U_INTERNAL_PROGRAM_ERROR;
-                break;
-            }
-            if (U_SUCCESS(status)) {
-                // Compact for caching
-                incl->compact();
-                umtx_lock(NULL);
-                if (INCLUSIONS[src] == NULL) {
-                    INCLUSIONS[src] = incl;
-                    incl = NULL;
-                    ucln_common_registerCleanup(UCLN_COMMON_USET, uset_cleanup);
-                }
-                umtx_unlock(NULL);
-            }
-            delete incl;
-        } else {
-            status = U_MEMORY_ALLOCATION_ERROR;
-        }
-    }
-    return INCLUSIONS[src];
-}
-
 //----------------------------------------------------------------
 // Case folding API
 //----------------------------------------------------------------