mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-08 06:53:45 +00:00
ICU-5426 Reduce the amount of unused memory in caches by at least 500KB.
X-SVN-Rev: 20476
This commit is contained in:
parent
70cb51cb3b
commit
1c37b55ae0
3 changed files with 173 additions and 141 deletions
|
@ -262,11 +262,10 @@ class U_COMMON_API UnicodeSet : public UnicodeFilter {
|
|||
|
||||
int32_t len; // length of list used; 0 <= len <= capacity
|
||||
int32_t capacity; // capacity of list
|
||||
int32_t bufferCapacity; // capacity of buffer
|
||||
UChar32* list; // MUST be terminated with HIGH
|
||||
UChar32* buffer; // internal buffer, may be NULL
|
||||
|
||||
UVector* strings; // maintained in sorted order
|
||||
int32_t bufferCapacity; // capacity of buffer
|
||||
int32_t patLen;
|
||||
|
||||
/**
|
||||
* The pattern representation of this set. This may not be the
|
||||
|
@ -277,7 +276,8 @@ class U_COMMON_API UnicodeSet : public UnicodeFilter {
|
|||
* indicating that toPattern() must generate a pattern
|
||||
* representation from the inversion list.
|
||||
*/
|
||||
UnicodeString pat;
|
||||
UChar *pat;
|
||||
UVector* strings; // maintained in sorted order
|
||||
|
||||
public:
|
||||
|
||||
|
@ -1309,9 +1309,13 @@ private:
|
|||
UErrorCode &status);
|
||||
|
||||
/**
|
||||
* Return a cached copy of the inclusions list for the property source.
|
||||
* Set the new pattern to cache.
|
||||
*/
|
||||
static const UnicodeSet* getInclusions(int32_t src, UErrorCode &errorCode);
|
||||
void setPattern(const UnicodeString& newPat);
|
||||
/**
|
||||
* Release existing cached pattern.
|
||||
*/
|
||||
void releasePattern();
|
||||
|
||||
friend class UnicodeSetIterator;
|
||||
};
|
||||
|
|
|
@ -138,8 +138,8 @@ static int8_t U_CALLCONV compareUnicodeString(UHashTok t1, UHashTok t2) {
|
|||
* Constructs an empty set.
|
||||
*/
|
||||
UnicodeSet::UnicodeSet() :
|
||||
len(1), capacity(1 + START_EXTRA), bufferCapacity(0),
|
||||
list(0), buffer(0), strings(NULL)
|
||||
len(1), capacity(1 + START_EXTRA), list(0), buffer(0),
|
||||
bufferCapacity(0), patLen(0), strings(NULL), pat(NULL)
|
||||
{
|
||||
list = (UChar32*) uprv_malloc(sizeof(UChar32) * capacity);
|
||||
if(list!=NULL){
|
||||
|
@ -158,8 +158,8 @@ UnicodeSet::UnicodeSet() :
|
|||
* @param end last character, inclusive, of range
|
||||
*/
|
||||
UnicodeSet::UnicodeSet(UChar32 start, UChar32 end) :
|
||||
len(1), capacity(1 + START_EXTRA), bufferCapacity(0),
|
||||
list(0), buffer(0), strings(NULL)
|
||||
len(1), capacity(1 + START_EXTRA), list(0), buffer(0),
|
||||
bufferCapacity(0), patLen(0), strings(NULL), pat(NULL)
|
||||
{
|
||||
list = (UChar32*) uprv_malloc(sizeof(UChar32) * capacity);
|
||||
if(list!=NULL){
|
||||
|
@ -177,8 +177,8 @@ UnicodeSet::UnicodeSet(UChar32 start, UChar32 end) :
|
|||
*/
|
||||
UnicodeSet::UnicodeSet(const UnicodeSet& o) :
|
||||
UnicodeFilter(o),
|
||||
len(0), capacity(o.len + GROW_EXTRA), bufferCapacity(0),
|
||||
list(0), buffer(0), strings(NULL)
|
||||
len(0), capacity(o.len + GROW_EXTRA), list(0), buffer(0),
|
||||
bufferCapacity(0), patLen(0), strings(NULL), pat(NULL)
|
||||
{
|
||||
list = (UChar32*) uprv_malloc(sizeof(UChar32) * capacity);
|
||||
if(list!=NULL){
|
||||
|
@ -199,6 +199,7 @@ UnicodeSet::~UnicodeSet() {
|
|||
uprv_free(buffer);
|
||||
}
|
||||
delete strings;
|
||||
releasePattern();
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -210,7 +211,10 @@ UnicodeSet& UnicodeSet::operator=(const UnicodeSet& o) {
|
|||
uprv_memcpy(list, o.list, len*sizeof(UChar32));
|
||||
UErrorCode ec = U_ZERO_ERROR;
|
||||
strings->assign(*o.strings, cloneUnicodeString, ec);
|
||||
pat = o.pat;
|
||||
releasePattern();
|
||||
if (o.pat) {
|
||||
setPattern(UnicodeString(o.pat, o.patLen));
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
|
@ -869,7 +873,7 @@ UnicodeSet& UnicodeSet::add(UChar32 c) {
|
|||
}
|
||||
#endif
|
||||
|
||||
pat.truncate(0);
|
||||
releasePattern();
|
||||
return *this;
|
||||
}
|
||||
|
||||
|
@ -888,7 +892,7 @@ UnicodeSet& UnicodeSet::add(const UnicodeString& s) {
|
|||
if (cp < 0) {
|
||||
if (!strings->contains((void*) &s)) {
|
||||
_add(s);
|
||||
pat.truncate(0);
|
||||
releasePattern();
|
||||
}
|
||||
} else {
|
||||
add((UChar32)cp, (UChar32)cp);
|
||||
|
@ -1069,7 +1073,7 @@ UnicodeSet& UnicodeSet::remove(const UnicodeString& s) {
|
|||
int32_t cp = getSingleCP(s);
|
||||
if (cp < 0) {
|
||||
strings->removeElement((void*) &s);
|
||||
pat.truncate(0);
|
||||
releasePattern();
|
||||
} else {
|
||||
remove((UChar32)cp, (UChar32)cp);
|
||||
}
|
||||
|
@ -1092,7 +1096,7 @@ UnicodeSet& UnicodeSet::complement(UChar32 start, UChar32 end) {
|
|||
UChar32 range[3] = { start, end+1, UNICODESET_HIGH };
|
||||
exclusiveOr(range, 2, 0);
|
||||
}
|
||||
pat.truncate(0);
|
||||
releasePattern();
|
||||
return *this;
|
||||
}
|
||||
|
||||
|
@ -1116,7 +1120,7 @@ UnicodeSet& UnicodeSet::complement(void) {
|
|||
++len;
|
||||
}
|
||||
swapBuffers();
|
||||
pat.truncate(0);
|
||||
releasePattern();
|
||||
return *this;
|
||||
}
|
||||
|
||||
|
@ -1137,7 +1141,7 @@ UnicodeSet& UnicodeSet::complement(const UnicodeString& s) {
|
|||
} else {
|
||||
_add(s);
|
||||
}
|
||||
pat.truncate(0);
|
||||
releasePattern();
|
||||
} else {
|
||||
complement((UChar32)cp, (UChar32)cp);
|
||||
}
|
||||
|
@ -1224,7 +1228,7 @@ UnicodeSet& UnicodeSet::complementAll(const UnicodeSet& c) {
|
|||
UnicodeSet& UnicodeSet::clear(void) {
|
||||
list[0] = UNICODESET_HIGH;
|
||||
len = 1;
|
||||
pat.truncate(0);
|
||||
releasePattern();
|
||||
strings->removeAllElements();
|
||||
return *this;
|
||||
}
|
||||
|
@ -1465,7 +1469,7 @@ void UnicodeSet::exclusiveOr(const UChar32* other, int32_t otherLen, int8_t pola
|
|||
}
|
||||
}
|
||||
swapBuffers();
|
||||
pat.truncate(0);
|
||||
releasePattern();
|
||||
}
|
||||
|
||||
// polarity = 0 is normal: x union y
|
||||
|
@ -1570,7 +1574,7 @@ void UnicodeSet::add(const UChar32* other, int32_t otherLen, int8_t polarity) {
|
|||
buffer[k++] = UNICODESET_HIGH; // terminate
|
||||
len = k;
|
||||
swapBuffers();
|
||||
pat.truncate(0);
|
||||
releasePattern();
|
||||
}
|
||||
|
||||
// polarity = 0 is normal: x intersect y
|
||||
|
@ -1659,7 +1663,7 @@ void UnicodeSet::retain(const UChar32* other, int32_t otherLen, int8_t polarity)
|
|||
buffer[k++] = UNICODESET_HIGH; // terminate
|
||||
len = k;
|
||||
swapBuffers();
|
||||
pat.truncate(0);
|
||||
releasePattern();
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -1717,13 +1721,14 @@ escapeUnprintable) {
|
|||
* is one. Otherwise it will be generated.
|
||||
*/
|
||||
UnicodeString& UnicodeSet::_toPattern(UnicodeString& result,
|
||||
UBool escapeUnprintable) const {
|
||||
if (pat.length() > 0) {
|
||||
UBool escapeUnprintable) const
|
||||
{
|
||||
if (pat != NULL) {
|
||||
int32_t i;
|
||||
int32_t backslashCount = 0;
|
||||
for (i=0; i<pat.length(); ) {
|
||||
UChar32 c = pat.char32At(i);
|
||||
i += UTF_CHAR_LENGTH(c);
|
||||
for (i=0; i<patLen; ) {
|
||||
UChar32 c;
|
||||
U16_NEXT(pat, i, patLen, c);
|
||||
if (escapeUnprintable && ICU_Utility::isUnprintable(c)) {
|
||||
// If the unprintable character is preceded by an odd
|
||||
// number of backslashes, then it has been escaped.
|
||||
|
@ -1755,7 +1760,8 @@ UnicodeString& UnicodeSet::_toPattern(UnicodeString& result,
|
|||
* will produce another set that is equal to this one.
|
||||
*/
|
||||
UnicodeString& UnicodeSet::toPattern(UnicodeString& result,
|
||||
UBool escapeUnprintable) const {
|
||||
UBool escapeUnprintable) const
|
||||
{
|
||||
result.truncate(0);
|
||||
return _toPattern(result, escapeUnprintable);
|
||||
}
|
||||
|
@ -1766,7 +1772,8 @@ UnicodeString& UnicodeSet::toPattern(UnicodeString& result,
|
|||
* passed to applyPattern().
|
||||
*/
|
||||
UnicodeString& UnicodeSet::_generatePattern(UnicodeString& result,
|
||||
UBool escapeUnprintable) const {
|
||||
UBool escapeUnprintable) const
|
||||
{
|
||||
result.append(SET_OPEN);
|
||||
|
||||
// // Check against the predefined categories. We implicitly build
|
||||
|
@ -1829,5 +1836,26 @@ UnicodeString& UnicodeSet::_generatePattern(UnicodeString& result,
|
|||
return result.append(SET_CLOSE);
|
||||
}
|
||||
|
||||
/**
|
||||
* Release existing cached pattern
|
||||
*/
|
||||
void UnicodeSet::releasePattern() {
|
||||
if (pat) {
|
||||
uprv_free(pat);
|
||||
pat = NULL;
|
||||
patLen = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the new pattern to cache.
|
||||
*/
|
||||
void UnicodeSet::setPattern(const UnicodeString& newPat) {
|
||||
releasePattern();
|
||||
patLen = newPat.length();
|
||||
pat = (UChar *)uprv_malloc((patLen + 1) * sizeof(UChar));
|
||||
newPat.extractBetween(0, patLen, pat);
|
||||
pat[patLen] = 0;
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
|
|
@ -89,10 +89,113 @@ static const char ASSIGNED[] = "Assigned"; // [:^Cn:]
|
|||
*/
|
||||
//static const UChar CATEGORY_CLOSE[] = {COLON, SET_CLOSE, 0x0000}; /* ":]" */
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
U_CDECL_BEGIN
|
||||
|
||||
static UnicodeSet *INCLUSIONS[UPROPS_SRC_COUNT] = { NULL }; // cached getInclusions()
|
||||
|
||||
//----------------------------------------------------------------
|
||||
// Inclusions list
|
||||
//----------------------------------------------------------------
|
||||
|
||||
// USetAdder implementation
|
||||
// Does not use uset.h to reduce code dependencies
|
||||
static void U_CALLCONV
|
||||
_set_add(USet *set, UChar32 c) {
|
||||
((UnicodeSet *)set)->add(c);
|
||||
}
|
||||
|
||||
static void U_CALLCONV
|
||||
_set_addRange(USet *set, UChar32 start, UChar32 end) {
|
||||
((UnicodeSet *)set)->add(start, end);
|
||||
}
|
||||
|
||||
static void U_CALLCONV
|
||||
_set_addString(USet *set, const UChar *str, int32_t length) {
|
||||
((UnicodeSet *)set)->add(UnicodeString((UBool)(length<0), str, length));
|
||||
}
|
||||
|
||||
/**
|
||||
* Cleanup function for UnicodeSet
|
||||
*/
|
||||
static UBool U_CALLCONV uset_cleanup(void) {
|
||||
int32_t i;
|
||||
|
||||
for(i = UPROPS_SRC_NONE; i < UPROPS_SRC_COUNT; ++i) {
|
||||
if (INCLUSIONS[i] != NULL) {
|
||||
delete INCLUSIONS[i];
|
||||
INCLUSIONS[i] = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
U_CDECL_END
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
static const UnicodeSet* getInclusions(int32_t src, UErrorCode &status) {
|
||||
UBool needInit;
|
||||
UMTX_CHECK(NULL, (INCLUSIONS[src] == NULL), needInit);
|
||||
if (needInit) {
|
||||
UnicodeSet* incl = new UnicodeSet();
|
||||
USetAdder sa = {
|
||||
(USet *)incl,
|
||||
_set_add,
|
||||
_set_addRange,
|
||||
_set_addString,
|
||||
NULL // don't need remove()
|
||||
};
|
||||
|
||||
if (incl != NULL) {
|
||||
switch(src) {
|
||||
case UPROPS_SRC_CHAR:
|
||||
uchar_addPropertyStarts(&sa, &status);
|
||||
break;
|
||||
case UPROPS_SRC_PROPSVEC:
|
||||
upropsvec_addPropertyStarts(&sa, &status);
|
||||
break;
|
||||
case UPROPS_SRC_CHAR_AND_PROPSVEC:
|
||||
uchar_addPropertyStarts(&sa, &status);
|
||||
upropsvec_addPropertyStarts(&sa, &status);
|
||||
break;
|
||||
case UPROPS_SRC_HST:
|
||||
uhst_addPropertyStarts(&sa, &status);
|
||||
break;
|
||||
#if !UCONFIG_NO_NORMALIZATION
|
||||
case UPROPS_SRC_NORM:
|
||||
unorm_addPropertyStarts(&sa, &status);
|
||||
break;
|
||||
#endif
|
||||
case UPROPS_SRC_CASE:
|
||||
ucase_addPropertyStarts(ucase_getSingleton(&status), &sa, &status);
|
||||
break;
|
||||
case UPROPS_SRC_BIDI:
|
||||
ubidi_addPropertyStarts(ubidi_getSingleton(&status), &sa, &status);
|
||||
break;
|
||||
default:
|
||||
status = U_INTERNAL_PROGRAM_ERROR;
|
||||
break;
|
||||
}
|
||||
if (U_SUCCESS(status)) {
|
||||
// Compact for caching
|
||||
incl->compact();
|
||||
umtx_lock(NULL);
|
||||
if (INCLUSIONS[src] == NULL) {
|
||||
INCLUSIONS[src] = incl;
|
||||
incl = NULL;
|
||||
ucln_common_registerCleanup(UCLN_COMMON_USET, uset_cleanup);
|
||||
}
|
||||
umtx_unlock(NULL);
|
||||
}
|
||||
delete incl;
|
||||
} else {
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
}
|
||||
}
|
||||
return INCLUSIONS[src];
|
||||
}
|
||||
|
||||
// helper functions for matching of pattern syntax pieces ------------------ ***
|
||||
// these functions are parallel to the PERL_OPEN etc. strings above
|
||||
|
||||
|
@ -143,8 +246,8 @@ isPOSIXClose(const UnicodeString &pattern, int32_t pos) {
|
|||
*/
|
||||
UnicodeSet::UnicodeSet(const UnicodeString& pattern,
|
||||
UErrorCode& status) :
|
||||
len(0), capacity(START_EXTRA), bufferCapacity(0),
|
||||
list(0), buffer(0), strings(NULL)
|
||||
len(0), capacity(START_EXTRA), list(0), buffer(0),
|
||||
bufferCapacity(0), patLen(0), strings(NULL), pat(NULL)
|
||||
{
|
||||
if(U_SUCCESS(status)){
|
||||
list = (UChar32*) uprv_malloc(sizeof(UChar32) * capacity);
|
||||
|
@ -171,8 +274,8 @@ UnicodeSet::UnicodeSet(const UnicodeString& pattern,
|
|||
uint32_t options,
|
||||
const SymbolTable* symbols,
|
||||
UErrorCode& status) :
|
||||
len(0), capacity(START_EXTRA), bufferCapacity(0),
|
||||
list(0), buffer(0), strings(NULL)
|
||||
len(0), capacity(START_EXTRA), list(0), buffer(0),
|
||||
bufferCapacity(0), patLen(0), strings(NULL), pat(NULL)
|
||||
{
|
||||
if(U_SUCCESS(status)){
|
||||
list = (UChar32*) uprv_malloc(sizeof(UChar32) * capacity);
|
||||
|
@ -191,8 +294,8 @@ UnicodeSet::UnicodeSet(const UnicodeString& pattern, ParsePosition& pos,
|
|||
uint32_t options,
|
||||
const SymbolTable* symbols,
|
||||
UErrorCode& status) :
|
||||
len(0), capacity(START_EXTRA), bufferCapacity(0),
|
||||
list(0), buffer(0), strings(NULL)
|
||||
len(0), capacity(START_EXTRA), list(0), buffer(0),
|
||||
bufferCapacity(0), patLen(0), strings(NULL), pat(NULL)
|
||||
{
|
||||
if(U_SUCCESS(status)){
|
||||
list = (UChar32*) uprv_malloc(sizeof(UChar32) * capacity);
|
||||
|
@ -283,7 +386,7 @@ UnicodeSet& UnicodeSet::applyPattern(const UnicodeString& pattern,
|
|||
status = U_MALFORMED_SET;
|
||||
return *this;
|
||||
}
|
||||
pat = rebuiltPat;
|
||||
setPattern(rebuiltPat);
|
||||
return *this;
|
||||
}
|
||||
|
||||
|
@ -1165,109 +1268,6 @@ void UnicodeSet::applyPropertyPattern(RuleCharacterIterator& chars,
|
|||
rebuiltPat.append(pattern, 0, pos.getIndex());
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------
|
||||
// Inclusions list
|
||||
//----------------------------------------------------------------
|
||||
|
||||
U_CDECL_BEGIN
|
||||
|
||||
// USetAdder implementation
|
||||
// Does not use uset.h to reduce code dependencies
|
||||
static void U_CALLCONV
|
||||
_set_add(USet *set, UChar32 c) {
|
||||
((UnicodeSet *)set)->add(c);
|
||||
}
|
||||
|
||||
static void U_CALLCONV
|
||||
_set_addRange(USet *set, UChar32 start, UChar32 end) {
|
||||
((UnicodeSet *)set)->add(start, end);
|
||||
}
|
||||
|
||||
static void U_CALLCONV
|
||||
_set_addString(USet *set, const UChar *str, int32_t length) {
|
||||
((UnicodeSet *)set)->add(UnicodeString((UBool)(length<0), str, length));
|
||||
}
|
||||
|
||||
/**
|
||||
* Cleanup function for UnicodeSet
|
||||
*/
|
||||
static UBool U_CALLCONV uset_cleanup(void) {
|
||||
int32_t i;
|
||||
|
||||
for(i = UPROPS_SRC_NONE; i < UPROPS_SRC_COUNT; ++i) {
|
||||
if (INCLUSIONS[i] != NULL) {
|
||||
delete INCLUSIONS[i];
|
||||
INCLUSIONS[i] = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
U_CDECL_END
|
||||
|
||||
const UnicodeSet* UnicodeSet::getInclusions(int32_t src, UErrorCode &status) {
|
||||
UBool needInit;
|
||||
UMTX_CHECK(NULL, (INCLUSIONS[src] == NULL), needInit);
|
||||
if (needInit) {
|
||||
UnicodeSet* incl = new UnicodeSet();
|
||||
USetAdder sa = {
|
||||
(USet *)incl,
|
||||
_set_add,
|
||||
_set_addRange,
|
||||
_set_addString,
|
||||
NULL // don't need remove()
|
||||
};
|
||||
|
||||
if (incl != NULL) {
|
||||
switch(src) {
|
||||
case UPROPS_SRC_CHAR:
|
||||
uchar_addPropertyStarts(&sa, &status);
|
||||
break;
|
||||
case UPROPS_SRC_PROPSVEC:
|
||||
upropsvec_addPropertyStarts(&sa, &status);
|
||||
break;
|
||||
case UPROPS_SRC_CHAR_AND_PROPSVEC:
|
||||
uchar_addPropertyStarts(&sa, &status);
|
||||
upropsvec_addPropertyStarts(&sa, &status);
|
||||
break;
|
||||
case UPROPS_SRC_HST:
|
||||
uhst_addPropertyStarts(&sa, &status);
|
||||
break;
|
||||
#if !UCONFIG_NO_NORMALIZATION
|
||||
case UPROPS_SRC_NORM:
|
||||
unorm_addPropertyStarts(&sa, &status);
|
||||
break;
|
||||
#endif
|
||||
case UPROPS_SRC_CASE:
|
||||
ucase_addPropertyStarts(ucase_getSingleton(&status), &sa, &status);
|
||||
break;
|
||||
case UPROPS_SRC_BIDI:
|
||||
ubidi_addPropertyStarts(ubidi_getSingleton(&status), &sa, &status);
|
||||
break;
|
||||
default:
|
||||
status = U_INTERNAL_PROGRAM_ERROR;
|
||||
break;
|
||||
}
|
||||
if (U_SUCCESS(status)) {
|
||||
// Compact for caching
|
||||
incl->compact();
|
||||
umtx_lock(NULL);
|
||||
if (INCLUSIONS[src] == NULL) {
|
||||
INCLUSIONS[src] = incl;
|
||||
incl = NULL;
|
||||
ucln_common_registerCleanup(UCLN_COMMON_USET, uset_cleanup);
|
||||
}
|
||||
umtx_unlock(NULL);
|
||||
}
|
||||
delete incl;
|
||||
} else {
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
}
|
||||
}
|
||||
return INCLUSIONS[src];
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------
|
||||
// Case folding API
|
||||
//----------------------------------------------------------------
|
||||
|
|
Loading…
Add table
Reference in a new issue