From 8b18741503b8a18790baeb39534db4e57ea73871 Mon Sep 17 00:00:00 2001 From: Travis Keep Date: Wed, 5 Aug 2015 20:21:14 +0000 Subject: [PATCH] ICU-11767 Bound the ICU UnifiedCache. X-SVN-Rev: 37723 --- icu4c/source/common/sharedobject.cpp | 55 +++-- icu4c/source/common/sharedobject.h | 127 ++++++++-- icu4c/source/common/uhash.h | 13 +- icu4c/source/common/unifiedcache.cpp | 228 +++++++++++++++-- icu4c/source/common/unifiedcache.h | 94 ++++++- icu4c/source/test/intltest/tsmthred.cpp | 127 ++++++++-- .../source/test/intltest/unifiedcachetest.cpp | 233 +++++++++++++++++- 7 files changed, 774 insertions(+), 103 deletions(-) diff --git a/icu4c/source/common/sharedobject.cpp b/icu4c/source/common/sharedobject.cpp index 6affcd09cd5..e5e034bec13 100644 --- a/icu4c/source/common/sharedobject.cpp +++ b/icu4c/source/common/sharedobject.cpp @@ -1,42 +1,67 @@ /* ****************************************************************************** -* Copyright (C) 2014, International Business Machines +* Copyright (C) 2015, International Business Machines * Corporation and others. All Rights Reserved. ****************************************************************************** * sharedobject.cpp */ #include "sharedobject.h" +#include "uassert.h" U_NAMESPACE_BEGIN + SharedObject::~SharedObject() {} +UnifiedCacheBase::~UnifiedCacheBase() {} + void -SharedObject::addRef() const { +SharedObject::addRef(UBool fromWithinCache) const { umtx_atomic_inc(&totalRefCount); + + // Although items in use may not be correct immediately, it + // will be correct eventually. + if (umtx_atomic_inc(&hardRefCount) == 1 && cachePtr != NULL) { + // If this object is cached, and the hardRefCount goes from 0 to 1, + // then the increment must happen from within the cache while the + // cache global mutex is locked. In this way, we can be rest assured + // that data races can't happen if the cache performs some task if + // the hardRefCount is zero while the global cache mutex is locked. + U_ASSERT(fromWithinCache); + cachePtr->incrementItemsInUse(); + } } void -SharedObject::removeRef() const { - if(umtx_atomic_dec(&totalRefCount) == 0) { +SharedObject::removeRef(UBool fromWithinCache) const { + UBool decrementItemsInUse = (umtx_atomic_dec(&hardRefCount) == 0); + UBool allReferencesGone = (umtx_atomic_dec(&totalRefCount) == 0); + + // Although items in use may not be correct immediately, it + // will be correct eventually. + if (decrementItemsInUse && cachePtr != NULL) { + if (fromWithinCache) { + cachePtr->decrementItemsInUse(); + } else { + cachePtr->decrementItemsInUseWithLockingAndEviction(); + } + } + if (allReferencesGone) { delete this; } } void SharedObject::addSoftRef() const { - addRef(); - umtx_atomic_inc(&softRefCount); + umtx_atomic_inc(&totalRefCount); + ++softRefCount; } void SharedObject::removeSoftRef() const { - umtx_atomic_dec(&softRefCount); - removeRef(); -} - -UBool -SharedObject::allSoftReferences() const { - return umtx_loadAcquire(totalRefCount) == umtx_loadAcquire(softRefCount); + --softRefCount; + if (umtx_atomic_dec(&totalRefCount) == 0) { + delete this; + } } int32_t @@ -45,8 +70,8 @@ SharedObject::getRefCount() const { } int32_t -SharedObject::getSoftRefCount() const { - return umtx_loadAcquire(softRefCount); +SharedObject::getHardRefCount() const { + return umtx_loadAcquire(hardRefCount); } void diff --git a/icu4c/source/common/sharedobject.h b/icu4c/source/common/sharedobject.h index 432c79ba0b9..44028699975 100644 --- a/icu4c/source/common/sharedobject.h +++ b/icu4c/source/common/sharedobject.h @@ -1,6 +1,6 @@ /* ****************************************************************************** -* Copyright (C) 2014, International Business Machines +* Copyright (C) 2015, International Business Machines * Corporation and others. All Rights Reserved. ****************************************************************************** * sharedobject.h @@ -15,6 +15,39 @@ U_NAMESPACE_BEGIN +/** + * Base class for unified cache exposing enough methods to SharedObject + * instances to allow their addRef() and removeRef() methods to + * update cache metrics. No other part of ICU, except for SharedObject, + * should directly call the methods of this base class. + */ +class UnifiedCacheBase : public UObject { +public: + UnifiedCacheBase() { } + + /** + * Called by addRefWhileHoldingCacheLock() when the hard reference count + * of its instance goes from 0 to 1. + */ + virtual void incrementItemsInUse() const = 0; + + /** + * Called by removeRef() when the hard reference count of its instance + * drops from 1 to 0. + */ + virtual void decrementItemsInUseWithLockingAndEviction() const = 0; + + /** + * Called by removeRefWhileHoldingCacheLock() when the hard reference + * count of its instance drops from 1 to 0. + */ + virtual void decrementItemsInUse() const = 0; + virtual ~UnifiedCacheBase(); +private: + UnifiedCacheBase(const UnifiedCacheBase &); + UnifiedCacheBase &operator=(const UnifiedCacheBase &); +}; + /** * Base class for shared, reference-counted, auto-deleted objects. * Subclasses can be immutable. @@ -27,33 +60,57 @@ U_NAMESPACE_BEGIN class U_COMMON_API SharedObject : public UObject { public: /** Initializes totalRefCount, softRefCount to 0. */ - SharedObject() : totalRefCount(0), softRefCount(0) {} + SharedObject() : + totalRefCount(0), + softRefCount(0), + hardRefCount(0), + cachePtr(NULL) {} /** Initializes totalRefCount, softRefCount to 0. */ - SharedObject(const SharedObject &other) - : UObject(other), - totalRefCount(0), - softRefCount(0) {} + SharedObject(const SharedObject &other) : + UObject(other), + totalRefCount(0), + softRefCount(0), + hardRefCount(0), + cachePtr(NULL) {} virtual ~SharedObject(); /** * Increments the number of references to this object. Thread-safe. */ - void addRef() const; + void addRef() const { addRef(FALSE); } /** - * Increments the number of soft references to this object. Thread-safe. + * Increments the number of references to this object. + * Must be called only from within the internals of UnifiedCache and + * only while the cache global mutex is held. + */ + void addRefWhileHoldingCacheLock() const { addRef(TRUE); } + + /** + * Increments the number of soft references to this object. + * Must be called only from within the internals of UnifiedCache and + * only while the cache global mutex is held. */ void addSoftRef() const; /** * Decrements the number of references to this object. Thread-safe. */ - void removeRef() const; + void removeRef() const { removeRef(FALSE); } /** - * Decrements the number of soft references to this object. Thread-safe. + * Decrements the number of references to this object. + * Must be called only from within the internals of UnifiedCache and + * only while the cache global mutex is held. + */ + void removeRefWhileHoldingCacheLock() const { removeRef(TRUE); } + + /** + * Decrements the number of soft references to this object. + * Must be called only from within the internals of UnifiedCache and + * only while the cache global mutex is held. */ void removeSoftRef() const; @@ -64,22 +121,50 @@ public: int32_t getRefCount() const; /** - * Returns the count of soft references only. Uses a memory barrier. - * Used for testing the cache. Regular clients won't need this. + * Returns the count of soft references only. + * Must be called only from within the internals of UnifiedCache and + * only while the cache global mutex is held. */ - int32_t getSoftRefCount() const; + int32_t getSoftRefCount() const { return softRefCount; } /** - * If allSoftReferences() == TRUE then this object has only soft - * references. The converse is not necessarily true. + * Returns the count of hard references only. Uses a memory barrier. + * Used for testing the cache. Regular clients won't need this. */ - UBool allSoftReferences() const; + int32_t getHardRefCount() const; + + /** + * If noHardReferences() == TRUE then this object has no hard references. + * Must be called only from within the internals of UnifiedCache. + */ + inline UBool noHardReferences() const { return getHardRefCount() == 0; } + + /** + * If hasHardReferences() == TRUE then this object has hard references. + * Must be called only from within the internals of UnifiedCache. + */ + inline UBool hasHardReferences() const { return getHardRefCount() != 0; } + + /** + * If noSoftReferences() == TRUE then this object has no soft references. + * Must be called only from within the internals of UnifiedCache and + * only while the cache global mutex is held. + */ + UBool noSoftReferences() const { return (softRefCount == 0); } /** * Deletes this object if it has no references or soft references. */ void deleteIfZeroRefCount() const; + /** + * @internal For UnifedCache use only to register this object with itself. + * Must be called before this object is exposed to multiple threads. + */ + void registerWithCache(const UnifiedCacheBase *ptr) const { + cachePtr = ptr; + } + /** * Returns a writable version of ptr. * If there is exactly one owner, then ptr itself is returned as a @@ -133,7 +218,15 @@ public: private: mutable u_atomic_int32_t totalRefCount; - mutable u_atomic_int32_t softRefCount; + + // Any thread modifying softRefCount must hold the global cache mutex + mutable int32_t softRefCount; + + mutable u_atomic_int32_t hardRefCount; + mutable const UnifiedCacheBase *cachePtr; + void addRef(UBool withCacheLock) const; + void removeRef(UBool withCacheLock) const; + }; U_NAMESPACE_END diff --git a/icu4c/source/common/uhash.h b/icu4c/source/common/uhash.h index e217f5bbbb2..1761dd0a2e9 100644 --- a/icu4c/source/common/uhash.h +++ b/icu4c/source/common/uhash.h @@ -1,6 +1,6 @@ /* ****************************************************************************** -* Copyright (C) 1997-2014, International Business Machines +* Copyright (C) 1997-2015, International Business Machines * Corporation and others. All Rights Reserved. ****************************************************************************** * Date Name Description @@ -58,10 +58,13 @@ * hashcode. During iteration an element may be deleted by calling * uhash_removeElement(); iteration may safely continue thereafter. * The uhash_remove() function may also be safely called in - * mid-iteration. However, if uhash_put() is called during iteration - * then the iteration will be out of sync. Under no circumstances - * should the UHashElement returned by uhash_nextElement be modified - * directly. + * mid-iteration. If uhash_put() is called during iteration, + * the iteration is still guaranteed to terminate reasonably, but + * there is no guarantee that every element will be returned or that + * some won't be returned more than once. + * + * Under no circumstances should the UHashElement returned by + * uhash_nextElement be modified directly. * * By default, the hashtable grows when necessary, but never shrinks, * even if all items are removed. For most applications this is diff --git a/icu4c/source/common/unifiedcache.cpp b/icu4c/source/common/unifiedcache.cpp index 32899af73e9..5b429790c2d 100644 --- a/icu4c/source/common/unifiedcache.cpp +++ b/icu4c/source/common/unifiedcache.cpp @@ -1,6 +1,6 @@ /* ****************************************************************************** -* Copyright (C) 2014, International Business Machines Corporation and +* Copyright (C) 2015, International Business Machines Corporation and * others. All Rights Reserved. ****************************************************************************** * @@ -20,6 +20,11 @@ static icu::SharedObject *gNoValue = NULL; static UMutex gCacheMutex = U_MUTEX_INITIALIZER; static UConditionVar gInProgressValueAddedCond = U_CONDITION_INITIALIZER; static icu::UInitOnce gCacheInitOnce = U_INITONCE_INITIALIZER; +static const int32_t MAX_EVICT_ITERATIONS = 10; + +static int32_t DEFAULT_MAX_UNUSED = 1000; +static int32_t DEFAULT_PERCENTAGE_OF_IN_USE = 100; + U_CDECL_BEGIN static UBool U_CALLCONV unifiedcache_cleanup() { @@ -85,7 +90,7 @@ static void U_CALLCONV cacheInit(UErrorCode &status) { gNoValue->addSoftRef(); } -const UnifiedCache *UnifiedCache::getInstance(UErrorCode &status) { +UnifiedCache *UnifiedCache::getInstance(UErrorCode &status) { umtx_initOnce(gCacheInitOnce, &cacheInit, status); if (U_FAILURE(status)) { return NULL; @@ -94,7 +99,13 @@ const UnifiedCache *UnifiedCache::getInstance(UErrorCode &status) { return gCache; } -UnifiedCache::UnifiedCache(UErrorCode &status) { +UnifiedCache::UnifiedCache(UErrorCode &status) : + fHashtable(NULL), + fEvictPos(UHASH_FIRST), + fItemsInUseCount(0), + fMaxUnused(DEFAULT_MAX_UNUSED), + fMaxPercentageOfInUse(DEFAULT_PERCENTAGE_OF_IN_USE), + fAutoEvictedCount(0) { if (U_FAILURE(status)) { return; } @@ -110,6 +121,30 @@ UnifiedCache::UnifiedCache(UErrorCode &status) { uhash_setKeyDeleter(fHashtable, &ucache_deleteKey); } +void UnifiedCache::setEvictionPolicy( + int32_t count, int32_t percentageOfInUseItems, UErrorCode &status) { + if (U_FAILURE(status)) { + return; + } + if (count < 0 || percentageOfInUseItems < 0) { + status = U_ILLEGAL_ARGUMENT_ERROR; + return; + } + Mutex lock(&gCacheMutex); + fMaxUnused = count; + fMaxPercentageOfInUse = percentageOfInUseItems; +} + +int32_t UnifiedCache::unusedCount() const { + Mutex lock(&gCacheMutex); + return uhash_count(fHashtable) - fItemsInUseCount; +} + +int64_t UnifiedCache::autoEvictedCount() const { + Mutex lock(&gCacheMutex); + return fAutoEvictedCount; +} + int32_t UnifiedCache::keyCount() const { Mutex lock(&gCacheMutex); return uhash_count(fHashtable); @@ -122,7 +157,6 @@ void UnifiedCache::flush() const { // other cache items making those additional cache items eligible for // flushing. while (_flush(FALSE)); - umtx_condBroadcast(&gInProgressValueAddedCond); } #ifdef UNIFIED_CACHE_DEBUG @@ -156,7 +190,7 @@ void UnifiedCache::_dumpContents() const { (const SharedObject *) element->value.pointer; const CacheKeyBase *key = (const CacheKeyBase *) element->key.pointer; - if (!sharedObject->allSoftReferences()) { + if (sharedObject->hasHardReferences()) { ++cnt; fprintf( stderr, @@ -185,20 +219,32 @@ UnifiedCache::~UnifiedCache() { uhash_close(fHashtable); } +// Returns the next element in the cache round robin style. +// On entry, gCacheMutex must be held. +const UHashElement * +UnifiedCache::_nextElement() const { + const UHashElement *element = uhash_nextElement(fHashtable, &fEvictPos); + if (element == NULL) { + fEvictPos = UHASH_FIRST; + return uhash_nextElement(fHashtable, &fEvictPos); + } + return element; +} + // Flushes the contents of the cache. If cache values hold references to other // cache values then _flush should be called in a loop until it returns FALSE. // On entry, gCacheMutex must be held. -// On exit, those values with only soft references are flushed. If all is true -// then every value is flushed even if hard references are held. +// On exit, those values with are evictable are flushed. If all is true +// then every value is flushed even if it is not evictable. // Returns TRUE if any value in cache was flushed or FALSE otherwise. UBool UnifiedCache::_flush(UBool all) const { UBool result = FALSE; - int32_t pos = UHASH_FIRST; - const UHashElement *element = uhash_nextElement(fHashtable, &pos); - for (; element != NULL; element = uhash_nextElement(fHashtable, &pos)) { - const SharedObject *sharedObject = - (const SharedObject *) element->value.pointer; - if (all || sharedObject->allSoftReferences()) { + int32_t origSize = uhash_count(fHashtable); + for (int32_t i = 0; i < origSize; ++i) { + const UHashElement *element = _nextElement(); + if (all || _isEvictable(element)) { + const SharedObject *sharedObject = + (const SharedObject *) element->value.pointer; uhash_removeElement(fHashtable, element); sharedObject->removeSoftRef(); result = TRUE; @@ -207,6 +253,45 @@ UBool UnifiedCache::_flush(UBool all) const { return result; } +// Computes how many items should be evicted. +// On entry, gCacheMutex must be held. +// Returns number of items that should be evicted or a value <= 0 if no +// items need to be evicted. +int32_t UnifiedCache::_computeCountOfItemsToEvict() const { + int32_t maxPercentageOfInUseCount = + fItemsInUseCount * fMaxPercentageOfInUse / 100; + int32_t maxUnusedCount = fMaxUnused; + if (maxUnusedCount < maxPercentageOfInUseCount) { + maxUnusedCount = maxPercentageOfInUseCount; + } + return uhash_count(fHashtable) - fItemsInUseCount - maxUnusedCount; +} + +// Run an eviction slice. +// On entry, gCacheMutex must be held. +// _runEvictionSlice runs a slice of the evict pipeline by examining the next +// 10 entries in the cache round robin style evicting them if they are eligible. +void UnifiedCache::_runEvictionSlice() const { + int32_t maxItemsToEvict = _computeCountOfItemsToEvict(); + if (maxItemsToEvict <= 0) { + return; + } + for (int32_t i = 0; i < MAX_EVICT_ITERATIONS; ++i) { + const UHashElement *element = _nextElement(); + if (_isEvictable(element)) { + const SharedObject *sharedObject = + (const SharedObject *) element->value.pointer; + uhash_removeElement(fHashtable, element); + sharedObject->removeSoftRef(); + ++fAutoEvictedCount; + if (--maxItemsToEvict == 0) { + break; + } + } + } +} + + // Places a new value and creationStatus in the cache for the given key. // On entry, gCacheMutex must be held. key must not exist in the cache. // On exit, value and creation status placed under key. Soft reference added @@ -224,7 +309,10 @@ void UnifiedCache::_putNew( status = U_MEMORY_ALLOCATION_ERROR; return; } - keyToAdopt->creationStatus = creationStatus; + keyToAdopt->fCreationStatus = creationStatus; + if (value->noSoftReferences()) { + _registerMaster(keyToAdopt, value); + } uhash_put(fHashtable, keyToAdopt, (void *) value, &status); if (U_SUCCESS(status)) { value->addSoftRef(); @@ -254,9 +342,12 @@ void UnifiedCache::_putIfAbsentAndGet( UErrorCode putError = U_ZERO_ERROR; // best-effort basis only. _putNew(key, value, status, putError); - return; + } else { + _put(element, value, status); } - _put(element, value, status); + // Run an eviction slice. This will run even if we added a master entry + // which doesn't increase the unused count, but that is still o.k + _runEvictionSlice(); } // Attempts to fetch value and status for key from cache. @@ -294,8 +385,9 @@ UBool UnifiedCache::_poll( // On exit. value and status set to what is in cache at key or on cache // miss the key's createObject() is called and value and status are set to // the result of that. In this latter case, best effort is made to add the -// value and status to the cache. value will be set to NULL instead of -// gNoValue. Caller must call removeRef on value if non NULL. +// value and status to the cache. If createObject() fails to create a value, +// gNoValue is stored in cache, and value is set to NULL. Caller must call +// removeRef on value if non NULL. void UnifiedCache::_get( const CacheKeyBase &key, const SharedObject *&value, @@ -313,7 +405,7 @@ void UnifiedCache::_get( return; } value = key.createObject(creationContext, status); - U_ASSERT(value == NULL || !value->allSoftReferences()); + U_ASSERT(value == NULL || value->hasHardReferences()); U_ASSERT(value != NULL || status != U_ZERO_ERROR); if (value == NULL) { SharedObject::copyPtr(gNoValue, value); @@ -324,6 +416,32 @@ void UnifiedCache::_get( } } +void UnifiedCache::decrementItemsInUseWithLockingAndEviction() const { + Mutex mutex(&gCacheMutex); + decrementItemsInUse(); + _runEvictionSlice(); +} + +void UnifiedCache::incrementItemsInUse() const { + ++fItemsInUseCount; +} + +void UnifiedCache::decrementItemsInUse() const { + --fItemsInUseCount; +} + +// Register a master cache entry. +// On entry, gCacheMutex must be held. +// On exit, items in use count incremented, entry is marked as a master +// entry, and value registered with cache so that subsequent calls to +// addRef() and removeRef() on it correctly updates items in use count +void UnifiedCache::_registerMaster( + const CacheKeyBase *theKey, const SharedObject *value) const { + theKey->fIsMaster = TRUE; + ++fItemsInUseCount; + value->registerWithCache(this); +} + // Store a value and error in given hash entry. // On entry, gCacheMutex must be held. Hash entry element must be in progress. // value must be non NULL. @@ -333,11 +451,14 @@ void UnifiedCache::_get( void UnifiedCache::_put( const UHashElement *element, const SharedObject *value, - const UErrorCode status) { + const UErrorCode status) const { U_ASSERT(_inProgress(element)); const CacheKeyBase *theKey = (const CacheKeyBase *) element->key.pointer; const SharedObject *oldValue = (const SharedObject *) element->value.pointer; - theKey->creationStatus = status; + theKey->fCreationStatus = status; + if (value->noSoftReferences()) { + _registerMaster(theKey, value); + } value->addSoftRef(); UHashElement *ptr = const_cast(element); ptr->value.pointer = (void *) value; @@ -348,6 +469,28 @@ void UnifiedCache::_put( umtx_condBroadcast(&gInProgressValueAddedCond); } +void +UnifiedCache::copyPtr(const SharedObject *src, const SharedObject *&dest) { + if(src != dest) { + if(dest != NULL) { + dest->removeRefWhileHoldingCacheLock(); + } + dest = src; + if(src != NULL) { + src->addRefWhileHoldingCacheLock(); + } + } +} + +void +UnifiedCache::clearPtr(const SharedObject *&ptr) { + if (ptr != NULL) { + ptr->removeRefWhileHoldingCacheLock(); + ptr = NULL; + } +} + + // Fetch value and error code from a particular hash entry. // On entry, gCacheMutex must be held. value must be either NULL or must be // included in the ref count of the object to which it points. @@ -360,20 +503,51 @@ void UnifiedCache::_fetch( const SharedObject *&value, UErrorCode &status) { const CacheKeyBase *theKey = (const CacheKeyBase *) element->key.pointer; - status = theKey->creationStatus; - SharedObject::copyPtr( - (const SharedObject *) element->value.pointer, value); + status = theKey->fCreationStatus; + + // Since we have the cache lock, calling regular SharedObject methods + // could cause us to deadlock on ourselves since they may need to lock + // the cache mutex. + UnifiedCache::copyPtr((const SharedObject *) element->value.pointer, value); } - + // Determine if given hash entry is in progress. // On entry, gCacheMutex must be held. UBool UnifiedCache::_inProgress(const UHashElement *element) { const SharedObject *value = NULL; UErrorCode status = U_ZERO_ERROR; _fetch(element, value, status); - UBool result = (value == gNoValue && status == U_ZERO_ERROR); - SharedObject::clearPtr(value); + UBool result = _inProgress(value, status); + + // Since we have the cache lock, calling regular SharedObject methods + // could cause us to deadlock on ourselves since they may need to lock + // the cache mutex. + UnifiedCache::clearPtr(value); return result; } +// Determine if given hash entry is in progress. +// On entry, gCacheMutex must be held. +UBool UnifiedCache::_inProgress( + const SharedObject *theValue, UErrorCode creationStatus) { + return (theValue == gNoValue && creationStatus == U_ZERO_ERROR); +} + +// Determine if given hash entry is eligible for eviction. +// On entry, gCacheMutex must be held. +UBool UnifiedCache::_isEvictable(const UHashElement *element) { + const CacheKeyBase *theKey = (const CacheKeyBase *) element->key.pointer; + const SharedObject *theValue = + (const SharedObject *) element->value.pointer; + + // Entries that are under construction are never evictable + if (_inProgress(theValue, theKey->fCreationStatus)) { + return FALSE; + } + + // We can evict entries that are either not a master or have just + // one reference (The one reference being from the cache itself). + return (!theKey->fIsMaster || (theValue->getSoftRefCount() == 1 && theValue->noHardReferences())); +} + U_NAMESPACE_END diff --git a/icu4c/source/common/unifiedcache.h b/icu4c/source/common/unifiedcache.h index 1bab61c5d12..35dabbb535c 100644 --- a/icu4c/source/common/unifiedcache.h +++ b/icu4c/source/common/unifiedcache.h @@ -1,6 +1,6 @@ /* ****************************************************************************** -* Copyright (C) 2014, International Business Machines Corporation and +* Copyright (C) 2015, International Business Machines Corporation and * others. All Rights Reserved. ****************************************************************************** * @@ -28,17 +28,17 @@ U_NAMESPACE_BEGIN class UnifiedCache; /** - * A base class for all cache keys + * A base class for all cache keys. */ class U_COMMON_API CacheKeyBase : public UObject { public: - CacheKeyBase() : creationStatus(U_ZERO_ERROR) {} + CacheKeyBase() : fCreationStatus(U_ZERO_ERROR), fIsMaster(FALSE) {} /** * Copy constructor. Needed to support cloning. */ CacheKeyBase(const CacheKeyBase &other) - : UObject(other), creationStatus(other.creationStatus) { } + : UObject(other), fCreationStatus(other.fCreationStatus), fIsMaster(FALSE) { } virtual ~CacheKeyBase(); /** @@ -85,7 +85,8 @@ class U_COMMON_API CacheKeyBase : public UObject { return !(*this == other); } private: - mutable UErrorCode creationStatus; + mutable UErrorCode fCreationStatus; + mutable UBool fIsMaster; friend class UnifiedCache; }; @@ -174,18 +175,22 @@ class LocaleCacheKey : public CacheKey { /** * The unified cache. A singleton type. + * Design doc here: + * https://docs.google.com/document/d/1RwGQJs4N4tawNbf809iYDRCvXoMKqDJihxzYt1ysmd8/edit?usp=sharing */ -class U_COMMON_API UnifiedCache : public UObject { +class U_COMMON_API UnifiedCache : public UnifiedCacheBase { public: /** * @internal + * Do not call directly. Instead use UnifiedCache::getInstance() as + * there should be only one UnifiedCache in an application. */ UnifiedCache(UErrorCode &status); /** * Returns the cache instance. */ - static const UnifiedCache *getInstance(UErrorCode &status); + static UnifiedCache *getInstance(UErrorCode &status); /** * Fetches a value from the cache by key. Equivalent to @@ -285,9 +290,66 @@ class U_COMMON_API UnifiedCache : public UObject { */ void flush() const; + /** + * Configures at what point evcition of unused entries will begin. + * Eviction is triggered whenever the number of unused entries exeeds + * BOTH count AND (number of in-use items) * (percentageOfInUseItems / 100). + * Once the number of unused entries drops below one of these, + * eviction ceases. Because eviction happens incrementally, + * the actual unused entry count may exceed both these numbers + * from time to time. + * + * A cache entry is defined as unused if it is not essential to guarantee + * that for a given key X, the cache returns the same reference to the + * same value as long as the client already holds a reference to that + * value. + * + * If this method is never called, the default settings are 1000 and 100%. + * + * Although this method is thread-safe, it is designed to be called at + * application startup. If it is called in the middle of execution, it + * will have no immediate effect on the cache. However over time, the + * cache will perform eviction slices in an attempt to honor the new + * settings. + * + * If a client already holds references to many different unique values + * in the cache such that the number of those unique values far exeeds + * "count" then the cache may not be able to maintain this maximum. + * However, if this happens, the cache still guarantees that the number of + * unused entries will remain only a small percentage of the total cache + * size. + * + * If the parameters passed are negative, setEvctionPolicy sets status to + * U_ILLEGAL_ARGUMENT_ERROR. + */ + void setEvictionPolicy( + int32_t count, int32_t percentageOfInUseItems, UErrorCode &status); + + + /** + * Returns how many entries have been auto evicted during the lifetime + * of this cache. This only includes auto evicted entries, not + * entries evicted because of a call to flush(). + */ + int64_t autoEvictedCount() const; + + /** + * Returns the unused entry count in this cache. For testing only, + * Regular clients will not need this. + */ + int32_t unusedCount() const; + + virtual void incrementItemsInUse() const; + virtual void decrementItemsInUseWithLockingAndEviction() const; + virtual void decrementItemsInUse() const; virtual ~UnifiedCache(); private: UHashtable *fHashtable; + mutable int32_t fEvictPos; + mutable int32_t fItemsInUseCount; + int32_t fMaxUnused; + int32_t fMaxPercentageOfInUse; + mutable int64_t fAutoEvictedCount; UnifiedCache(const UnifiedCache &other); UnifiedCache &operator=(const UnifiedCache &other); UBool _flush(UBool all) const; @@ -309,18 +371,28 @@ class U_COMMON_API UnifiedCache : public UObject { const CacheKeyBase &key, const SharedObject *&value, UErrorCode &status) const; + const UHashElement *_nextElement() const; + int32_t _computeCountOfItemsToEvict() const; + void _runEvictionSlice() const; + void _registerMaster( + const CacheKeyBase *theKey, const SharedObject *value) const; + void _put( + const UHashElement *element, + const SharedObject *value, + const UErrorCode status) const; #ifdef UNIFIED_CACHE_DEBUG void _dumpContents() const; #endif - static void _put( - const UHashElement *element, - const SharedObject *value, - const UErrorCode status); + static void copyPtr(const SharedObject *src, const SharedObject *&dest); + static void clearPtr(const SharedObject *&ptr); static void _fetch( const UHashElement *element, const SharedObject *&value, UErrorCode &status); static UBool _inProgress(const UHashElement *element); + static UBool _inProgress( + const SharedObject *theValue, UErrorCode creationStatus); + static UBool _isEvictable(const UHashElement *element); }; U_NAMESPACE_END diff --git a/icu4c/source/test/intltest/tsmthred.cpp b/icu4c/source/test/intltest/tsmthred.cpp index 1a5fdc62f5a..bed6f83c6b0 100644 --- a/icu4c/source/test/intltest/tsmthred.cpp +++ b/icu4c/source/test/intltest/tsmthred.cpp @@ -1319,7 +1319,19 @@ void MultithreadTest::TestConditionVariables() { // Unified Cache Test // -static const char *gCacheLocales[] = {"en_US", "en_GB", "fr_FR", "fr"}; +// Each thread fetches a pair of objects. There are 8 distinct pairs: +// ("en_US", "bs"), ("en_GB", "ca"), ("fr_FR", "ca_AD") etc. +// These pairs represent 8 distinct languages + +// Note that only one value per language gets created in the cache. +// In particular each cached value can have multiple keys. +static const char *gCacheLocales[] = { + "en_US", "en_GB", "fr_FR", "fr", + "de", "sr_ME", "sr_BA", "sr_CS"}; +static const char *gCacheLocales2[] = { + "bs", "ca", "ca_AD", "ca_ES", + "en_US", "fi", "ff_CM", "ff_GN"}; + static int32_t gObjectsCreated = 0; // protected by gCTMutex static const int32_t CACHE_LOAD = 3; @@ -1338,7 +1350,19 @@ U_NAMESPACE_BEGIN template<> U_EXPORT const UCTMultiThreadItem *LocaleCacheKey::createObject( - const void * /*unused*/, UErrorCode & /* status */) const { + const void *context, UErrorCode &status) const { + const UnifiedCache *cacheContext = (const UnifiedCache *) context; + + if (uprv_strcmp(fLoc.getLanguage(), fLoc.getName()) != 0) { + const UCTMultiThreadItem *result = NULL; + if (cacheContext == NULL) { + UnifiedCache::getByLocale(fLoc.getLanguage(), result, status); + return result; + } + cacheContext->get(LocaleCacheKey(fLoc.getLanguage()), result, status); + return result; + } + umtx_lock(&gCTMutex); bool firstObject = (gObjectsCreated == 0); if (firstObject) { @@ -1355,9 +1379,14 @@ const UCTMultiThreadItem *LocaleCacheKey::createObject( } umtx_unlock(&gCTMutex); - UCTMultiThreadItem *result = new UCTMultiThreadItem(fLoc.getName()); - result->addRef(); - + const UCTMultiThreadItem *result = + new UCTMultiThreadItem(fLoc.getLanguage()); + if (result == NULL) { + status = U_MEMORY_ALLOCATION_ERROR; + } else { + result->addRef(); + } + // Log that we created an object. The first object was already counted, // don't do it again. umtx_lock(&gCTMutex); @@ -1366,6 +1395,7 @@ const UCTMultiThreadItem *LocaleCacheKey::createObject( } umtx_condBroadcast(&gCTConditionVar); umtx_unlock(&gCTMutex); + return result; } @@ -1373,37 +1403,78 @@ U_NAMESPACE_END class UnifiedCacheThread: public SimpleThread { public: - UnifiedCacheThread(const char *loc) : fLoc(loc) {}; + UnifiedCacheThread( + const UnifiedCache *cache, + const char *loc, + const char *loc2) : fCache(cache), fLoc(loc), fLoc2(loc2) {}; ~UnifiedCacheThread() {}; void run(); - const char *fLoc; + void exerciseByLocale(const Locale &); + const UnifiedCache *fCache; + Locale fLoc; + Locale fLoc2; }; -void UnifiedCacheThread::run() { +void UnifiedCacheThread::exerciseByLocale(const Locale &locale) { UErrorCode status = U_ZERO_ERROR; - const UnifiedCache *cache = UnifiedCache::getInstance(status); - U_ASSERT(status == U_ZERO_ERROR); - const UCTMultiThreadItem *item = NULL; - cache->get(LocaleCacheKey(fLoc), item, status); - U_ASSERT(item != NULL); - if (uprv_strcmp(fLoc, item->value)) { - IntlTest::gTest->errln("Expected %s, got %s", fLoc, item->value); + const UCTMultiThreadItem *origItem = NULL; + fCache->get( + LocaleCacheKey(locale), fCache, origItem, status); + U_ASSERT(U_SUCCESS(status)); + if (uprv_strcmp(locale.getLanguage(), origItem->value)) { + IntlTest::gTest->errln( + "%s:%d Expected %s, got %s", __FILE__, __LINE__, + locale.getLanguage(), + origItem->value); } - item->removeRef(); + + // Fetch the same item again many times. We should always get the same + // pointer since this client is already holding onto it + for (int32_t i = 0; i < 1000; ++i) { + const UCTMultiThreadItem *item = NULL; + fCache->get( + LocaleCacheKey(locale), fCache, item, status); + if (item != origItem) { + IntlTest::gTest->errln( + "%s:%d Expected to get the same pointer", + __FILE__, + __LINE__); + } + if (item != NULL) { + item->removeRef(); + } + } + origItem->removeRef(); +} + +void UnifiedCacheThread::run() { + // Run the exercise with 2 different locales so that we can exercise + // eviction more. If each thread exerices just one locale, then + // eviction can't start until the threads end. + exerciseByLocale(fLoc); + exerciseByLocale(fLoc2); } void MultithreadTest::TestUnifiedCache() { + + // Start with our own local cache so that we have complete control + // and set the eviction policy to evict starting with 2 unused + // values UErrorCode status = U_ZERO_ERROR; - const UnifiedCache *cache = UnifiedCache::getInstance(status); - U_ASSERT(cache != NULL); - cache->flush(); + UnifiedCache::getInstance(status); + UnifiedCache cache(status); + cache.setEvictionPolicy(2, 0, status); + U_ASSERT(U_SUCCESS(status)); + gFinishedThreads = 0; gObjectsCreated = 0; UnifiedCacheThread *threads[CACHE_LOAD][UPRV_LENGTHOF(gCacheLocales)]; for (int32_t i=0; istart(); } } @@ -1413,7 +1484,21 @@ void MultithreadTest::TestUnifiedCache() { threads[i][j]->join(); } } - assertEquals("Objects created", UPRV_LENGTHOF(gCacheLocales), gObjectsCreated); + // Because of cache eviction, we can't assert exactly how many + // distinct objects get created over the course of this run. + // However we know that at least 8 objects get created because that + // is how many distinct languages we have in our test. + if (gObjectsCreated < 8) { + errln("%s:%d Too few objects created.", __FILE__, __LINE__); + } + // We know that each thread cannot create more than 2 objects in + // the cache, and there are UPRV_LENGTHOF(gCacheLocales) pairs of + // objects fetched from the cache + if (gObjectsCreated > 2 * UPRV_LENGTHOF(gCacheLocales)) { + errln("Too many objects created.", __FILE__, __LINE__); + } + + assertEquals("unused values", 2, cache.unusedCount()); // clean up threads for (int32_t i=0; i U_EXPORT const UCTItem *LocaleCacheKey::createObject( - const void * /*unused*/, UErrorCode &status) const { + const void *context, UErrorCode &status) const { + const UnifiedCache *cacheContext = (const UnifiedCache *) context; if (uprv_strcmp(fLoc.getName(), "zh") == 0) { status = U_MISSING_RESOURCE_ERROR; return NULL; } if (uprv_strcmp(fLoc.getLanguage(), fLoc.getName()) != 0) { const UCTItem *item = NULL; - UnifiedCache::getByLocale(fLoc.getLanguage(), item, status); + if (cacheContext == NULL) { + UnifiedCache::getByLocale(fLoc.getLanguage(), item, status); + } else { + cacheContext->get(LocaleCacheKey(fLoc.getLanguage()), item, status); + } if (U_FAILURE(status)) { return NULL; } @@ -63,19 +69,227 @@ public: } void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par=0); private: + void TestEvictionPolicy(); + void TestBounded(); void TestBasic(); void TestError(); void TestHashEquals(); + void TestEvictionUnderStress(); }; void UnifiedCacheTest::runIndexedTest(int32_t index, UBool exec, const char* &name, char* /*par*/) { TESTCASE_AUTO_BEGIN; + TESTCASE_AUTO(TestEvictionPolicy); + TESTCASE_AUTO(TestBounded); TESTCASE_AUTO(TestBasic); TESTCASE_AUTO(TestError); TESTCASE_AUTO(TestHashEquals); + TESTCASE_AUTO(TestEvictionUnderStress); TESTCASE_AUTO_END; } +void UnifiedCacheTest::TestEvictionUnderStress() { + int32_t localeCount; + const Locale *locales = DateFormat::getAvailableLocales(localeCount); + UErrorCode status = U_ZERO_ERROR; + const UnifiedCache *cache = UnifiedCache::getInstance(status); + int64_t evictedCountBefore = cache->autoEvictedCount(); + for (int32_t i = 0; i < localeCount; ++i) { + LocalPointer ptr(DateFormat::createInstanceForSkeleton("yMd", locales[i], status)); + } + int64_t evictedCountAfter = cache->autoEvictedCount(); + if (evictedCountBefore == evictedCountAfter) { + errln("%s:%d Items should have been evicted from cache", + __FILE__, __LINE__); + } +} + +void UnifiedCacheTest::TestEvictionPolicy() { + UErrorCode status = U_ZERO_ERROR; + + // We have to call this first or else calling the UnifiedCache + // ctor will fail. This is by design to deter clients from using the + // cache API incorrectly by creating their own cache instances. + UnifiedCache::getInstance(status); + + // We create our own local UnifiedCache instance to ensure we have + // complete control over it. Real clients should never ever create + // their own cache! + UnifiedCache cache(status); + assertSuccess("", status); + + // Don't allow unused entries to exeed more than 100% of in use entries. + cache.setEvictionPolicy(0, 100, status); + + static const char *locales[] = { + "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", + "11", "12", "13", "14", "15", "16", "17", "18", "19", "20"}; + + const UCTItem *usedReferences[] = {NULL, NULL, NULL, NULL, NULL}; + const UCTItem *unusedReference = NULL; + + // Add 5 in-use entries + for (int32_t i = 0; i < UPRV_LENGTHOF(usedReferences); i++) { + cache.get( + LocaleCacheKey(locales[i]), + &cache, + usedReferences[i], + status); + } + + // Add 10 not in use entries. + for (int32_t i = 0; i < 10; ++i) { + cache.get( + LocaleCacheKey( + locales[i + UPRV_LENGTHOF(usedReferences)]), + &cache, + unusedReference, + status); + } + unusedReference->removeRef(); + + // unused count not to exeed in use count + assertEquals("", UPRV_LENGTHOF(usedReferences), cache.unusedCount()); + assertEquals("", 2*UPRV_LENGTHOF(usedReferences), cache.keyCount()); + + // Free up those used entries. + for (int32_t i = 0; i < UPRV_LENGTHOF(usedReferences); i++) { + usedReferences[i]->removeRef(); + } + + // This should free up all cache items + assertEquals("", 0, cache.keyCount()); + + assertSuccess("", status); +} + + + +void UnifiedCacheTest::TestBounded() { + UErrorCode status = U_ZERO_ERROR; + + // We have to call this first or else calling the UnifiedCache + // ctor will fail. This is by design to deter clients from using the + // cache API incorrectly by creating their own cache instances. + UnifiedCache::getInstance(status); + + // We create our own local UnifiedCache instance to ensure we have + // complete control over it. Real clients should never ever create + // their own cache! + UnifiedCache cache(status); + assertSuccess("", status); + + // Maximum unused count is 3. + cache.setEvictionPolicy(3, 0, status); + + // Our cache will hold up to 3 unused key-value pairs + // We test the following invariants: + // 1. unusedCount <= 3 + // 2. cache->get(X) always returns the same reference as long as caller + // already holds references to that same object. + + // We first add 5 key-value pairs with two distinct values, "en" and "fr" + // keeping all those references. + + const UCTItem *en = NULL; + const UCTItem *enGb = NULL; + const UCTItem *enUs = NULL; + const UCTItem *fr = NULL; + const UCTItem *frFr = NULL; + cache.get(LocaleCacheKey("en_US"), &cache, enUs, status); + cache.get(LocaleCacheKey("en"), &cache, en, status); + assertEquals("", 1, cache.unusedCount()); + cache.get(LocaleCacheKey("en_GB"), &cache, enGb, status); + cache.get(LocaleCacheKey("fr_FR"), &cache, frFr, status); + cache.get(LocaleCacheKey("fr"), &cache, fr, status); + + // Client holds two unique references, "en" and "fr" the other three + // entries are eligible for eviction. + assertEquals("", 3, cache.unusedCount()); + assertEquals("", 5, cache.keyCount()); + + // Exercise cache more but don't hold the references except for + // the last one. At the end of this, we will hold references to one + // additional distinct value, so we will have references to 3 distinct + // values. + const UCTItem *throwAway = NULL; + cache.get(LocaleCacheKey("zn_AA"), &cache, throwAway, status); + cache.get(LocaleCacheKey("sr_AA"), &cache, throwAway, status); + cache.get(LocaleCacheKey("de_AU"), &cache, throwAway, status); + + const UCTItem *deAu(throwAway); + deAu->addRef(); + + // Client holds three unique references, "en", "fr", "de" although we + // could have a total of 8 entries in the cache maxUnusedCount == 3 + // so we have only 6 entries. + assertEquals("", 3, cache.unusedCount()); + assertEquals("", 6, cache.keyCount()); + + // For all the references we have, cache must continue to return + // those same references (#2) + + cache.get(LocaleCacheKey("en"), &cache, throwAway, status); + if (throwAway != en) { + errln("Expected en to resolve to the same object."); + } + cache.get(LocaleCacheKey("en_US"), &cache, throwAway, status); + if (throwAway != enUs) { + errln("Expected enUs to resolve to the same object."); + } + cache.get(LocaleCacheKey("en_GB"), &cache, throwAway, status); + if (throwAway != enGb) { + errln("Expected enGb to resolve to the same object."); + } + cache.get(LocaleCacheKey("fr_FR"), &cache, throwAway, status); + if (throwAway != frFr) { + errln("Expected frFr to resolve to the same object."); + } + cache.get(LocaleCacheKey("fr_FR"), &cache, throwAway, status); + cache.get(LocaleCacheKey("fr"), &cache, throwAway, status); + if (throwAway != fr) { + errln("Expected fr to resolve to the same object."); + } + cache.get(LocaleCacheKey("de_AU"), &cache, throwAway, status); + if (throwAway != deAu) { + errln("Expected deAu to resolve to the same object."); + } + + assertEquals("", 3, cache.unusedCount()); + assertEquals("", 6, cache.keyCount()); + + // Now we hold a references to two more distinct values. Cache size + // should grow to 8. + const UCTItem *es = NULL; + const UCTItem *ru = NULL; + cache.get(LocaleCacheKey("es"), &cache, es, status); + cache.get(LocaleCacheKey("ru"), &cache, ru, status); + assertEquals("", 3, cache.unusedCount()); + assertEquals("", 8, cache.keyCount()); + + // Now release all the references we hold except for + // es, ru, and en + SharedObject::clearPtr(enGb); + SharedObject::clearPtr(enUs); + SharedObject::clearPtr(fr); + SharedObject::clearPtr(frFr); + SharedObject::clearPtr(deAu); + SharedObject::clearPtr(es); + SharedObject::clearPtr(ru); + SharedObject::clearPtr(en); + SharedObject::clearPtr(throwAway); + + // Size of cache should magically drop to 3. + assertEquals("", 3, cache.unusedCount()); + assertEquals("", 3, cache.keyCount()); + + // Be sure nothing happens setting the eviction policy in the middle of + // a run. + cache.setEvictionPolicy(3, 0, status); + assertSuccess("", status); + +} + void UnifiedCacheTest::TestBasic() { UErrorCode status = U_ZERO_ERROR; const UnifiedCache *cache = UnifiedCache::getInstance(status); @@ -110,20 +324,25 @@ void UnifiedCacheTest::TestBasic() { assertEquals("", baseCount + 5, cache->keyCount()); SharedObject::clearPtr(enGb); cache->flush(); - assertEquals("", baseCount + 5, cache->keyCount()); + + // Only 2 unique values in the cache. flushing trims cache down + // to this minimum size. + assertEquals("", baseCount + 2, cache->keyCount()); SharedObject::clearPtr(enUs); SharedObject::clearPtr(en); cache->flush(); // With en_GB and en_US and en cleared there are no more hard references to // the "en" object, so it gets flushed and the keys that refer to it - // get removed from the cache. - assertEquals("", baseCount + 2, cache->keyCount()); + // get removed from the cache. Now we have just one unique value, fr, in + // the cache + assertEquals("", baseCount + 1, cache->keyCount()); SharedObject::clearPtr(fr); cache->flush(); - assertEquals("", baseCount + 2, cache->keyCount()); + assertEquals("", baseCount + 1, cache->keyCount()); SharedObject::clearPtr(frFr); cache->flush(); assertEquals("", baseCount + 0, cache->keyCount()); + assertSuccess("", status); } void UnifiedCacheTest::TestError() {