ICU-12380 Size reductions for transliterator registry (and some speedup)

X-SVN-Rev: 40434
This commit is contained in:
Peter Edberg 2017-09-20 00:39:40 +00:00
parent 4e1c4096a6
commit 9075f9cb5a
7 changed files with 308 additions and 129 deletions

View file

@ -33,6 +33,8 @@ class U_COMMON_API Hashtable : public UMemory {
inline void init(UHashFunction *keyHash, UKeyComparator *keyComp, UValueComparator *valueComp, UErrorCode& status);
inline void initSize(UHashFunction *keyHash, UKeyComparator *keyComp, UValueComparator *valueComp, int32_t size, UErrorCode& status);
public:
/**
* Construct a hashtable
@ -41,6 +43,14 @@ public:
*/
Hashtable(UBool ignoreKeyCase, UErrorCode& status);
/**
* Construct a hashtable
* @param ignoreKeyCase If true, keys are case insensitive.
* @param size initial size allocation
* @param status Error code
*/
Hashtable(UBool ignoreKeyCase, int32_t size, UErrorCode& status);
/**
* Construct a hashtable
* @param keyComp Comparator for comparing the keys
@ -76,9 +86,9 @@ public:
int32_t puti(const UnicodeString& key, int32_t value, UErrorCode& status);
void* get(const UnicodeString& key) const;
int32_t geti(const UnicodeString& key) const;
void* remove(const UnicodeString& key);
int32_t removei(const UnicodeString& key);
@ -92,9 +102,9 @@ public:
* @see uhash_nextElement
*/
const UHashElement* nextElement(int32_t& pos) const;
UKeyComparator* setKeyComparator(UKeyComparator*keyComp);
UValueComparator* setValueComparator(UValueComparator* valueComp);
UBool equals(const Hashtable& that) const;
@ -107,7 +117,7 @@ private:
* Implementation
********************************************************************/
inline void Hashtable::init(UHashFunction *keyHash, UKeyComparator *keyComp,
inline void Hashtable::init(UHashFunction *keyHash, UKeyComparator *keyComp,
UValueComparator *valueComp, UErrorCode& status) {
if (U_FAILURE(status)) {
return;
@ -119,10 +129,23 @@ inline void Hashtable::init(UHashFunction *keyHash, UKeyComparator *keyComp,
}
}
inline Hashtable::Hashtable(UKeyComparator *keyComp, UValueComparator *valueComp,
inline void Hashtable::initSize(UHashFunction *keyHash, UKeyComparator *keyComp,
UValueComparator *valueComp, int32_t size, UErrorCode& status) {
if (U_FAILURE(status)) {
return;
}
uhash_initSize(&hashObj, keyHash, keyComp, valueComp, size, &status);
if (U_SUCCESS(status)) {
hash = &hashObj;
uhash_setKeyDeleter(hash, uprv_deleteUObject);
}
}
inline Hashtable::Hashtable(UKeyComparator *keyComp, UValueComparator *valueComp,
UErrorCode& status) : hash(0) {
init( uhash_hashUnicodeString, keyComp, valueComp, status);
}
inline Hashtable::Hashtable(UBool ignoreKeyCase, UErrorCode& status)
: hash(0)
{
@ -134,6 +157,17 @@ inline Hashtable::Hashtable(UBool ignoreKeyCase, UErrorCode& status)
status);
}
inline Hashtable::Hashtable(UBool ignoreKeyCase, int32_t size, UErrorCode& status)
: hash(0)
{
initSize(ignoreKeyCase ? uhash_hashCaselessUnicodeString
: uhash_hashUnicodeString,
ignoreKeyCase ? uhash_compareCaselessUnicodeString
: uhash_compareUnicodeString,
NULL, size,
status);
}
inline Hashtable::Hashtable(UErrorCode& status)
: hash(0)
{
@ -200,7 +234,7 @@ inline void Hashtable::removeAll(void) {
inline UKeyComparator* Hashtable::setKeyComparator(UKeyComparator*keyComp){
return uhash_setKeyComparator(hash, keyComp);
}
inline UValueComparator* Hashtable::setValueComparator(UValueComparator* valueComp){
return uhash_setValueComparator(hash, valueComp);
}

View file

@ -79,14 +79,14 @@
* prime number while being less than a power of two.
*/
static const int32_t PRIMES[] = {
13, 31, 61, 127, 251, 509, 1021, 2039, 4093, 8191, 16381, 32749,
7, 13, 31, 61, 127, 251, 509, 1021, 2039, 4093, 8191, 16381, 32749,
65521, 131071, 262139, 524287, 1048573, 2097143, 4194301, 8388593,
16777213, 33554393, 67108859, 134217689, 268435399, 536870909,
1073741789, 2147483647 /*, 4294967291 */
};
#define PRIMES_LENGTH UPRV_LENGTHOF(PRIMES)
#define DEFAULT_PRIME_INDEX 3
#define DEFAULT_PRIME_INDEX 4
/* These ratios are tuned to the PRIMES array such that a resize
* places the table back into the zone of non-resizing. That is,
@ -231,7 +231,7 @@ _uhash_allocate(UHashtable *hash,
emptytok.pointer = NULL; /* Only one of these two is needed */
emptytok.integer = 0; /* but we don't know which one. */
limit = p + hash->length;
while (p < limit) {
p->key = emptytok;
@ -247,7 +247,7 @@ _uhash_allocate(UHashtable *hash,
static UHashtable*
_uhash_init(UHashtable *result,
UHashFunction *keyHash,
UHashFunction *keyHash,
UKeyComparator *keyComp,
UValueComparator *valueComp,
int32_t primeIndex,
@ -275,7 +275,7 @@ _uhash_init(UHashtable *result,
}
static UHashtable*
_uhash_create(UHashFunction *keyHash,
_uhash_create(UHashFunction *keyHash,
UKeyComparator *keyComp,
UValueComparator *valueComp,
int32_t primeIndex,
@ -415,7 +415,7 @@ _uhash_rehash(UHashtable *hash, UErrorCode *status) {
if (U_FAILURE(*status)) {
hash->elements = old;
hash->length = oldLength;
hash->length = oldLength;
return;
}
@ -536,7 +536,7 @@ _uhash_put(UHashtable *hash,
********************************************************************/
U_CAPI UHashtable* U_EXPORT2
uhash_open(UHashFunction *keyHash,
uhash_open(UHashFunction *keyHash,
UKeyComparator *keyComp,
UValueComparator *valueComp,
UErrorCode *status) {
@ -545,7 +545,7 @@ uhash_open(UHashFunction *keyHash,
}
U_CAPI UHashtable* U_EXPORT2
uhash_openSize(UHashFunction *keyHash,
uhash_openSize(UHashFunction *keyHash,
UKeyComparator *keyComp,
UValueComparator *valueComp,
int32_t size,
@ -562,7 +562,7 @@ uhash_openSize(UHashFunction *keyHash,
U_CAPI UHashtable* U_EXPORT2
uhash_init(UHashtable *fillinResult,
UHashFunction *keyHash,
UHashFunction *keyHash,
UKeyComparator *keyComp,
UValueComparator *valueComp,
UErrorCode *status) {
@ -570,6 +570,22 @@ uhash_init(UHashtable *fillinResult,
return _uhash_init(fillinResult, keyHash, keyComp, valueComp, DEFAULT_PRIME_INDEX, status);
}
U_CAPI UHashtable* U_EXPORT2
uhash_initSize(UHashtable *fillinResult,
UHashFunction *keyHash,
UKeyComparator *keyComp,
UValueComparator *valueComp,
int32_t size,
UErrorCode *status) {
// Find the smallest index i for which PRIMES[i] >= size.
int32_t i = 0;
while (i<(PRIMES_LENGTH-1) && PRIMES[i]<size) {
++i;
}
return _uhash_init(fillinResult, keyHash, keyComp, valueComp, i, status);
}
U_CAPI void U_EXPORT2
uhash_close(UHashtable *hash) {
if (hash == NULL) {
@ -604,7 +620,7 @@ uhash_setKeyComparator(UHashtable *hash, UKeyComparator *fn) {
hash->keyComparator = fn;
return result;
}
U_CAPI UValueComparator *U_EXPORT2
U_CAPI UValueComparator *U_EXPORT2
uhash_setValueComparator(UHashtable *hash, UValueComparator *fn){
UValueComparator *result = hash->valueComparator;
hash->valueComparator = fn;
@ -630,7 +646,7 @@ uhash_setResizePolicy(UHashtable *hash, enum UHashResizePolicy policy) {
UErrorCode status = U_ZERO_ERROR;
_uhash_internalSetResizePolicy(hash, policy);
hash->lowWaterMark = (int32_t)(hash->length * hash->lowWaterRatio);
hash->highWaterMark = (int32_t)(hash->length * hash->highWaterRatio);
hash->highWaterMark = (int32_t)(hash->length * hash->highWaterRatio);
_uhash_rehash(hash, &status);
}
@ -853,7 +869,7 @@ uhash_hashIChars(const UHashTok key) {
return s == NULL ? 0 : ustr_hashICharsN(s, uprv_strlen(s));
}
U_CAPI UBool U_EXPORT2
U_CAPI UBool U_EXPORT2
uhash_equals(const UHashtable* hash1, const UHashtable* hash2){
int32_t count1, count2, pos, i;
@ -886,14 +902,14 @@ uhash_equals(const UHashtable* hash1, const UHashtable* hash2){
if(count1!=count2){
return FALSE;
}
pos=UHASH_FIRST;
for(i=0; i<count1; i++){
const UHashElement* elem1 = uhash_nextElement(hash1, &pos);
const UHashTok key1 = elem1->key;
const UHashTok val1 = elem1->value;
/* here the keys are not compared, instead the key form hash1 is used to fetch
* value from hash2. If the hashes are equal then then both hashes should
* value from hash2. If the hashes are equal then then both hashes should
* contain equal values for the same key!
*/
const UHashElement* elem2 = _uhash_find(hash2, key1, hash2->keyHasher(key1));

View file

@ -154,7 +154,7 @@ struct UHashtable {
* If NULL won't do anything */
/* Size parameters */
int32_t count; /* The number of key-value pairs in this table.
* 0 <= count <= length. In practice we
* never let count == length (see code). */
@ -162,12 +162,12 @@ struct UHashtable {
* and values. Must be prime. */
/* Rehashing thresholds */
int32_t highWaterMark; /* If count > highWaterMark, rehash */
int32_t lowWaterMark; /* If count < lowWaterMark, rehash */
float highWaterRatio; /* 0..1; high water as a fraction of length */
float lowWaterRatio; /* 0..1; low water as a fraction of length */
int8_t primeIndex; /* Index into our prime table for length.
* length == PRIMES[primeIndex] */
UBool allocated; /* Was this UHashtable allocated? */
@ -190,7 +190,7 @@ U_CDECL_END
* @return A pointer to a UHashtable, or 0 if an error occurred.
* @see uhash_openSize
*/
U_CAPI UHashtable* U_EXPORT2
U_CAPI UHashtable* U_EXPORT2
uhash_open(UHashFunction *keyHash,
UKeyComparator *keyComp,
UValueComparator *valueComp,
@ -207,7 +207,7 @@ uhash_open(UHashFunction *keyHash,
* @return A pointer to a UHashtable, or 0 if an error occurred.
* @see uhash_open
*/
U_CAPI UHashtable* U_EXPORT2
U_CAPI UHashtable* U_EXPORT2
uhash_openSize(UHashFunction *keyHash,
UKeyComparator *keyComp,
UValueComparator *valueComp,
@ -224,18 +224,37 @@ uhash_openSize(UHashFunction *keyHash,
* @return A pointer to a UHashtable, or 0 if an error occurred.
* @see uhash_openSize
*/
U_CAPI UHashtable* U_EXPORT2
U_CAPI UHashtable* U_EXPORT2
uhash_init(UHashtable *hash,
UHashFunction *keyHash,
UKeyComparator *keyComp,
UValueComparator *valueComp,
UErrorCode *status);
/**
* Initialize an existing UHashtable.
* @param keyHash A pointer to the key hashing function. Must not be
* NULL.
* @param keyComp A pointer to the function that compares keys. Must
* not be NULL.
* @param size The initial capacity of this hash table.
* @param status A pointer to an UErrorCode to receive any errors.
* @return A pointer to a UHashtable, or 0 if an error occurred.
* @see uhash_openSize
*/
U_CAPI UHashtable* U_EXPORT2
uhash_initSize(UHashtable *hash,
UHashFunction *keyHash,
UKeyComparator *keyComp,
UValueComparator *valueComp,
int32_t size,
UErrorCode *status);
/**
* Close a UHashtable, releasing the memory used.
* @param hash The UHashtable to close. If hash is NULL no operation is performed.
*/
U_CAPI void U_EXPORT2
U_CAPI void U_EXPORT2
uhash_close(UHashtable *hash);
@ -246,7 +265,7 @@ uhash_close(UHashtable *hash);
* @param fn the function to be used hash keys; must not be NULL
* @return the previous key hasher; non-NULL
*/
U_CAPI UHashFunction *U_EXPORT2
U_CAPI UHashFunction *U_EXPORT2
uhash_setKeyHasher(UHashtable *hash, UHashFunction *fn);
/**
@ -256,7 +275,7 @@ uhash_setKeyHasher(UHashtable *hash, UHashFunction *fn);
* @param fn the function to be used compare keys; must not be NULL
* @return the previous key comparator; non-NULL
*/
U_CAPI UKeyComparator *U_EXPORT2
U_CAPI UKeyComparator *U_EXPORT2
uhash_setKeyComparator(UHashtable *hash, UKeyComparator *fn);
/**
@ -266,7 +285,7 @@ uhash_setKeyComparator(UHashtable *hash, UKeyComparator *fn);
* @param fn the function to be used compare keys; must not be NULL
* @return the previous key comparator; non-NULL
*/
U_CAPI UValueComparator *U_EXPORT2
U_CAPI UValueComparator *U_EXPORT2
uhash_setValueComparator(UHashtable *hash, UValueComparator *fn);
/**
@ -279,7 +298,7 @@ uhash_setValueComparator(UHashtable *hash, UValueComparator *fn);
* @param fn the function to be used delete keys, or NULL
* @return the previous key deleter; may be NULL
*/
U_CAPI UObjectDeleter *U_EXPORT2
U_CAPI UObjectDeleter *U_EXPORT2
uhash_setKeyDeleter(UHashtable *hash, UObjectDeleter *fn);
/**
@ -292,7 +311,7 @@ uhash_setKeyDeleter(UHashtable *hash, UObjectDeleter *fn);
* @param fn the function to be used delete values, or NULL
* @return the previous value deleter; may be NULL
*/
U_CAPI UObjectDeleter *U_EXPORT2
U_CAPI UObjectDeleter *U_EXPORT2
uhash_setValueDeleter(UHashtable *hash, UObjectDeleter *fn);
/**
@ -302,7 +321,7 @@ uhash_setValueDeleter(UHashtable *hash, UObjectDeleter *fn);
* @param hash The UHashtable to set
* @param policy The way the hashtable resizes itself, {U_GROW, U_GROW_AND_SHRINK, U_FIXED}
*/
U_CAPI void U_EXPORT2
U_CAPI void U_EXPORT2
uhash_setResizePolicy(UHashtable *hash, enum UHashResizePolicy policy);
/**
@ -310,7 +329,7 @@ uhash_setResizePolicy(UHashtable *hash, enum UHashResizePolicy policy);
* @param hash The UHashtable to query.
* @return The number of key-value pairs stored in hash.
*/
U_CAPI int32_t U_EXPORT2
U_CAPI int32_t U_EXPORT2
uhash_count(const UHashtable *hash);
/**
@ -326,7 +345,7 @@ uhash_count(const UHashtable *hash);
* @return The previous value, or NULL if none.
* @see uhash_get
*/
U_CAPI void* U_EXPORT2
U_CAPI void* U_EXPORT2
uhash_put(UHashtable *hash,
void *key,
void *value,
@ -344,7 +363,7 @@ uhash_put(UHashtable *hash,
* @return The previous value, or NULL if none.
* @see uhash_get
*/
U_CAPI void* U_EXPORT2
U_CAPI void* U_EXPORT2
uhash_iput(UHashtable *hash,
int32_t key,
void* value,
@ -362,7 +381,7 @@ uhash_iput(UHashtable *hash,
* @return The previous value, or 0 if none.
* @see uhash_get
*/
U_CAPI int32_t U_EXPORT2
U_CAPI int32_t U_EXPORT2
uhash_puti(UHashtable *hash,
void* key,
int32_t value,
@ -380,7 +399,7 @@ uhash_puti(UHashtable *hash,
* @return The previous value, or 0 if none.
* @see uhash_get
*/
U_CAPI int32_t U_EXPORT2
U_CAPI int32_t U_EXPORT2
uhash_iputi(UHashtable *hash,
int32_t key,
int32_t value,
@ -393,8 +412,8 @@ uhash_iputi(UHashtable *hash,
* @param key A pointer key stored in a hashtable
* @return The requested item, or NULL if not found.
*/
U_CAPI void* U_EXPORT2
uhash_get(const UHashtable *hash,
U_CAPI void* U_EXPORT2
uhash_get(const UHashtable *hash,
const void *key);
/**
@ -404,7 +423,7 @@ uhash_get(const UHashtable *hash,
* @param key An integer key stored in a hashtable
* @return The requested item, or NULL if not found.
*/
U_CAPI void* U_EXPORT2
U_CAPI void* U_EXPORT2
uhash_iget(const UHashtable *hash,
int32_t key);
@ -415,7 +434,7 @@ uhash_iget(const UHashtable *hash,
* @param key A pointer key stored in a hashtable
* @return The requested item, or 0 if not found.
*/
U_CAPI int32_t U_EXPORT2
U_CAPI int32_t U_EXPORT2
uhash_geti(const UHashtable *hash,
const void* key);
/**
@ -425,7 +444,7 @@ uhash_geti(const UHashtable *hash,
* @param key An integer key stored in a hashtable
* @return The requested item, or 0 if not found.
*/
U_CAPI int32_t U_EXPORT2
U_CAPI int32_t U_EXPORT2
uhash_igeti(const UHashtable *hash,
int32_t key);
@ -435,7 +454,7 @@ uhash_igeti(const UHashtable *hash,
* @param key A key stored in a hashtable
* @return The item removed, or NULL if not found.
*/
U_CAPI void* U_EXPORT2
U_CAPI void* U_EXPORT2
uhash_remove(UHashtable *hash,
const void *key);
@ -445,7 +464,7 @@ uhash_remove(UHashtable *hash,
* @param key An integer key stored in a hashtable
* @return The item removed, or NULL if not found.
*/
U_CAPI void* U_EXPORT2
U_CAPI void* U_EXPORT2
uhash_iremove(UHashtable *hash,
int32_t key);
@ -455,7 +474,7 @@ uhash_iremove(UHashtable *hash,
* @param key An key stored in a hashtable
* @return The item removed, or 0 if not found.
*/
U_CAPI int32_t U_EXPORT2
U_CAPI int32_t U_EXPORT2
uhash_removei(UHashtable *hash,
const void* key);
@ -465,7 +484,7 @@ uhash_removei(UHashtable *hash,
* @param key An integer key stored in a hashtable
* @return The item removed, or 0 if not found.
*/
U_CAPI int32_t U_EXPORT2
U_CAPI int32_t U_EXPORT2
uhash_iremovei(UHashtable *hash,
int32_t key);
@ -473,7 +492,7 @@ uhash_iremovei(UHashtable *hash,
* Remove all items from a UHashtable.
* @param hash The target UHashtable.
*/
U_CAPI void U_EXPORT2
U_CAPI void U_EXPORT2
uhash_removeAll(UHashtable *hash);
/**
@ -487,7 +506,7 @@ uhash_removeAll(UHashtable *hash);
* @param key A key stored in a hashtable
* @return a hash element, or NULL if the key is not found.
*/
U_CAPI const UHashElement* U_EXPORT2
U_CAPI const UHashElement* U_EXPORT2
uhash_find(const UHashtable *hash, const void* key);
/**
@ -510,7 +529,7 @@ uhash_find(const UHashtable *hash, const void* key);
* @return a hash element, or NULL if no further key-value pairs
* exist in the table.
*/
U_CAPI const UHashElement* U_EXPORT2
U_CAPI const UHashElement* U_EXPORT2
uhash_nextElement(const UHashtable *hash,
int32_t *pos);
@ -525,7 +544,7 @@ uhash_nextElement(const UHashtable *hash,
* modified.
* @return the value that was removed.
*/
U_CAPI void* U_EXPORT2
U_CAPI void* U_EXPORT2
uhash_removeElement(UHashtable *hash, const UHashElement* e);
/********************************************************************
@ -537,7 +556,7 @@ uhash_removeElement(UHashtable *hash, const UHashElement* e);
* @param i The given integer
* @return a UHashTok for an integer.
*/
/*U_CAPI UHashTok U_EXPORT2
/*U_CAPI UHashTok U_EXPORT2
uhash_toki(int32_t i);*/
/**
@ -545,7 +564,7 @@ uhash_toki(int32_t i);*/
* @param p The given pointer
* @return a UHashTok for a pointer.
*/
/*U_CAPI UHashTok U_EXPORT2
/*U_CAPI UHashTok U_EXPORT2
uhash_tokp(void* p);*/
/********************************************************************
@ -559,7 +578,7 @@ uhash_tokp(void* p);*/
* @param key The string (const UChar*) to hash.
* @return A hash code for the key.
*/
U_CAPI int32_t U_EXPORT2
U_CAPI int32_t U_EXPORT2
uhash_hashUChars(const UHashTok key);
/**
@ -569,7 +588,7 @@ uhash_hashUChars(const UHashTok key);
* @param key The string (const char*) to hash.
* @return A hash code for the key.
*/
U_CAPI int32_t U_EXPORT2
U_CAPI int32_t U_EXPORT2
uhash_hashChars(const UHashTok key);
/**
@ -589,7 +608,7 @@ uhash_hashIChars(const UHashTok key);
* @param key2 The string for comparison
* @return true if key1 and key2 are equal, return false otherwise.
*/
U_CAPI UBool U_EXPORT2
U_CAPI UBool U_EXPORT2
uhash_compareUChars(const UHashTok key1, const UHashTok key2);
/**
@ -599,7 +618,7 @@ uhash_compareUChars(const UHashTok key1, const UHashTok key2);
* @param key2 The string for comparison
* @return true if key1 and key2 are equal, return false otherwise.
*/
U_CAPI UBool U_EXPORT2
U_CAPI UBool U_EXPORT2
uhash_compareChars(const UHashTok key1, const UHashTok key2);
/**
@ -609,7 +628,7 @@ uhash_compareChars(const UHashTok key1, const UHashTok key2);
* @param key2 The string for comparison
* @return true if key1 and key2 are equal, return false otherwise.
*/
U_CAPI UBool U_EXPORT2
U_CAPI UBool U_EXPORT2
uhash_compareIChars(const UHashTok key1, const UHashTok key2);
/********************************************************************
@ -621,7 +640,7 @@ uhash_compareIChars(const UHashTok key1, const UHashTok key2);
* @param key The string (const char*) to hash.
* @return A hash code for the key.
*/
U_CAPI int32_t U_EXPORT2
U_CAPI int32_t U_EXPORT2
uhash_hashUnicodeString(const UElement key);
/**
@ -630,7 +649,7 @@ uhash_hashUnicodeString(const UElement key);
* @param key The string (const char*) to hash.
* @return A hash code for the key.
*/
U_CAPI int32_t U_EXPORT2
U_CAPI int32_t U_EXPORT2
uhash_hashCaselessUnicodeString(const UElement key);
/********************************************************************
@ -642,7 +661,7 @@ uhash_hashCaselessUnicodeString(const UElement key);
* @param key The string (const char*) to hash.
* @return A hash code for the key.
*/
U_CAPI int32_t U_EXPORT2
U_CAPI int32_t U_EXPORT2
uhash_hashLong(const UHashTok key);
/**
@ -651,7 +670,7 @@ uhash_hashLong(const UHashTok key);
* @param Key2 The integer for comparison
* @return true if key1 and key2 are equal, return false otherwise
*/
U_CAPI UBool U_EXPORT2
U_CAPI UBool U_EXPORT2
uhash_compareLong(const UHashTok key1, const UHashTok key2);
/********************************************************************
@ -662,7 +681,7 @@ uhash_compareLong(const UHashTok key1, const UHashTok key2);
* Deleter for Hashtable objects.
* @param obj The object to be deleted
*/
U_CAPI void U_EXPORT2
U_CAPI void U_EXPORT2
uhash_deleteHashtable(void *obj);
/* Use uprv_free() itself as a deleter for any key or value allocated using uprv_malloc. */
@ -673,7 +692,7 @@ uhash_deleteHashtable(void *obj);
* @param hash2
* @return true if the hashtables are equal and false if not.
*/
U_CAPI UBool U_EXPORT2
U_CAPI UBool U_EXPORT2
uhash_equals(const UHashtable* hash1, const UHashtable* hash2);

View file

@ -31,9 +31,13 @@
static const UChar TARGET_SEP = 45; // '-'
static const UChar VARIANT_SEP = 47; // '/'
static const UChar ANY[] = {65,110,121,0}; // "Any"
static const UChar ANY[] = {0x41,0x6E,0x79,0}; // "Any"
static const UChar NULL_ID[] = {78,117,108,108,0}; // "Null"
static const UChar LATIN_PIVOT[] = {45,76,97,116,105,110,59,76,97,116,105,110,45,0}; // "-Latin;Latin-"
static const UChar LATIN_PIVOT[] = {0x2D,0x4C,0x61,0x74,0x6E,0x3B,0x4C,0x61,0x74,0x6E,0x2D,0}; // "-Latn;Latn-"
// initial size for an Any-XXXX transform's cache of script-XXXX transforms
// (will grow as necessary, but we don't expect to have source text with more than 7 scripts)
#define ANY_TRANS_CACHE_INIT_SIZE 7
//------------------------------------------------------------
@ -186,7 +190,7 @@ AnyTransliterator::AnyTransliterator(const UnicodeString& id,
Transliterator(id, NULL),
targetScript(theTargetScript)
{
cache = uhash_open(uhash_hashLong, uhash_compareLong, NULL, &ec);
cache = uhash_openSize(uhash_hashLong, uhash_compareLong, NULL, ANY_TRANS_CACHE_INIT_SIZE, &ec);
if (U_FAILURE(ec)) {
return;
}
@ -212,7 +216,7 @@ AnyTransliterator::AnyTransliterator(const AnyTransliterator& o) :
{
// Don't copy the cache contents
UErrorCode ec = U_ZERO_ERROR;
cache = uhash_open(uhash_hashLong, uhash_compareLong, NULL, &ec);
cache = uhash_openSize(uhash_hashLong, uhash_compareLong, NULL, ANY_TRANS_CACHE_INIT_SIZE, &ec);
if (U_FAILURE(ec)) {
return;
}
@ -286,7 +290,7 @@ Transliterator* AnyTransliterator::getTransliterator(UScriptCode source) const {
}
if (t == NULL) {
UErrorCode ec = U_ZERO_ERROR;
UnicodeString sourceName(uscript_getName(source), -1, US_INV);
UnicodeString sourceName(uscript_getShortName(source), -1, US_INV);
UnicodeString id(sourceName);
id.append(TARGET_SEP).append(target);

View file

@ -46,11 +46,29 @@ static const UChar LOCALE_SEP = 95; // '_'
//static const UChar VARIANT_SEP = 0x002F; // '/'
// String constants
static const UChar ANY[] = { 65, 110, 121, 0 }; // Any
static const UChar ANY[] = { 0x41, 0x6E, 0x79, 0 }; // Any
static const UChar LAT[] = { 0x4C, 0x61, 0x74, 0 }; // Lat
// empty string
#define NO_VARIANT UnicodeString()
// initial estimate for specDAG size
// ICU 60 Transliterator::countAvailableSources()
#define SPECDAG_INIT_SIZE 149
// initial estimate for number of variant names
#define VARIANT_LIST_INIT_SIZE 11
#define VARIANT_LIST_MAX_SIZE 31
// initial estimate for availableIDs count (default estimate is 8 => multiple reallocs)
// ICU 60 Transliterator::countAvailableIDs()
#define AVAILABLE_IDS_INIT_SIZE 641
// initial estimate for number of targets for source "Any", "Lat"
// ICU 60 Transliterator::countAvailableTargets("Any")/("Latn")
#define ANY_TARGETS_INIT_SIZE 125
#define LAT_TARGETS_INIT_SIZE 23
/**
* Resource bundle key for the RuleBasedTransliterator rule.
*/
@ -517,10 +535,17 @@ U_CDECL_END
TransliteratorRegistry::TransliteratorRegistry(UErrorCode& status) :
registry(TRUE, status),
specDAG(TRUE, status),
availableIDs(status)
specDAG(TRUE, SPECDAG_INIT_SIZE, status),
variantList(VARIANT_LIST_INIT_SIZE, status),
availableIDs(AVAILABLE_IDS_INIT_SIZE, status)
{
registry.setValueDeleter(deleteEntry);
variantList.setDeleter(uprv_deleteUObject);
variantList.setComparer(uhash_compareCaselessUnicodeString);
UnicodeString *emptyString = new UnicodeString();
if (emptyString != NULL) {
variantList.addElement(emptyString, status);
}
availableIDs.setDeleter(uprv_deleteUObject);
availableIDs.setComparer(uhash_compareCaselessUnicodeString);
specDAG.setValueDeleter(uhash_deleteHashtable);
@ -781,9 +806,15 @@ int32_t TransliteratorRegistry::countAvailableVariants(const UnicodeString& sour
if (targets == 0) {
return 0;
}
UVector *variants = (UVector*) targets->get(target);
// variants may be 0 if the source/target are invalid
return (variants == 0) ? 0 : variants->size();
int32_t varMask = targets->geti(target);
int32_t varCount = 0;
while (varMask > 0) {
if (varMask & 1) {
varCount++;
}
varMask >>= 1;
}
return varCount;
}
UnicodeString& TransliteratorRegistry::getAvailableVariant(int32_t index,
@ -795,17 +826,25 @@ UnicodeString& TransliteratorRegistry::getAvailableVariant(int32_t index,
result.truncate(0); // invalid source
return result;
}
UVector *variants = (UVector*) targets->get(target);
if (variants == 0) {
result.truncate(0); // invalid target
return result;
}
UnicodeString *v = (UnicodeString*) variants->elementAt(index);
if (v == 0) {
result.truncate(0); // invalid index
} else {
result = *v;
int32_t varMask = targets->geti(target);
int32_t varCount = 0;
int32_t varListIndex = 0;
while (varMask > 0) {
if (varMask & 1) {
if (varCount == index) {
UnicodeString *v = (UnicodeString*) variantList.elementAt(varListIndex);
if (v != NULL) {
result = *v;
return result;
}
break;
}
varCount++;
}
varMask >>= 1;
varListIndex++;
}
result.truncate(0); // invalid target or index
return result;
}
@ -911,9 +950,9 @@ void TransliteratorRegistry::registerEntry(const UnicodeString& ID,
UnicodeString *newID = (UnicodeString *)ID.clone();
// Check to make sure newID was created.
if (newID != NULL) {
// NUL-terminate the ID string
newID->getTerminatedBuffer();
availableIDs.addElement(newID, status);
// NUL-terminate the ID string
newID->getTerminatedBuffer();
availableIDs.addElement(newID, status);
}
}
} else {
@ -924,9 +963,7 @@ void TransliteratorRegistry::registerEntry(const UnicodeString& ID,
/**
* Register a source-target/variant in the specDAG. Variant may be
* empty, but source and target must not be. If variant is empty then
* the special variant NO_VARIANT is stored in slot zero of the
* UVector of variants.
* empty, but source and target must not be.
*/
void TransliteratorRegistry::registerSTV(const UnicodeString& source,
const UnicodeString& target,
@ -936,39 +973,38 @@ void TransliteratorRegistry::registerSTV(const UnicodeString& source,
UErrorCode status = U_ZERO_ERROR;
Hashtable *targets = (Hashtable*) specDAG.get(source);
if (targets == 0) {
targets = new Hashtable(TRUE, status);
if (U_FAILURE(status) || targets == 0) {
int32_t size = 3;
if (source.compare(ANY,3) == 0) {
size = ANY_TARGETS_INIT_SIZE;
} else if (source.compare(LAT,3) == 0) {
size = LAT_TARGETS_INIT_SIZE;
}
targets = new Hashtable(TRUE, size, status);
if (U_FAILURE(status) || targets == NULL) {
return;
}
targets->setValueDeleter(uprv_deleteUObject);
specDAG.put(source, targets, status);
}
UVector *variants = (UVector*) targets->get(target);
if (variants == 0) {
variants = new UVector(uprv_deleteUObject,
uhash_compareCaselessUnicodeString, status);
if (variants == 0) {
int32_t variantListIndex = variantList.indexOf((void*) &variant, 0);
if (variantListIndex < 0) {
if (variantList.size() >= VARIANT_LIST_MAX_SIZE) {
// can't handle any more variants
return;
}
targets->put(target, variants, status);
}
// assert(NO_VARIANT == "");
// We add the variant string. If it is the special "no variant"
// string, that is, the empty string, we add it at position zero.
if (!variants->contains((void*) &variant)) {
UnicodeString *tempus; // Used for null pointer check.
if (variant.length() > 0) {
tempus = new UnicodeString(variant);
if (tempus != NULL) {
variants->addElement(tempus, status);
}
} else {
tempus = new UnicodeString(); // = NO_VARIANT
if (tempus != NULL) {
variants->insertElementAt(tempus, 0, status);
}
UnicodeString *variantEntry = new UnicodeString(variant);
if (variantEntry != NULL) {
variantList.addElement(variantEntry, status);
if (U_SUCCESS(status)) {
variantListIndex = variantList.size() - 1;
}
}
if (variantListIndex < 0) {
return;
}
}
int32_t addMask = 1 << variantListIndex;
int32_t varMask = targets->geti(target);
targets->puti(target, varMask | addMask, status);
}
/**
@ -979,17 +1015,24 @@ void TransliteratorRegistry::removeSTV(const UnicodeString& source,
const UnicodeString& variant) {
// assert(source.length() > 0);
// assert(target.length() > 0);
// UErrorCode status = U_ZERO_ERROR;
UErrorCode status = U_ZERO_ERROR;
Hashtable *targets = (Hashtable*) specDAG.get(source);
if (targets == 0) {
if (targets == NULL) {
return; // should never happen for valid s-t/v
}
UVector *variants = (UVector*) targets->get(target);
if (variants == 0) {
int32_t varMask = targets->geti(target);
if (varMask == 0) {
return; // should never happen for valid s-t/v
}
variants->removeElement((void*) &variant);
if (variants->size() == 0) {
int32_t variantListIndex = variantList.indexOf((void*) &variant, 0);
if (variantListIndex < 0) {
return; // should never happen for valid s-t/v
}
int32_t remMask = 1 << variantListIndex;
varMask &= (~remMask);
if (varMask != 0) {
targets->puti(target, varMask, status);
} else {
targets->remove(target); // should delete variants
if (targets->count() == 0) {
specDAG.remove(source); // should delete targets
@ -1281,8 +1324,8 @@ Transliterator* TransliteratorRegistry::instantiateEntry(const UnicodeString& ID
UVector* rbts = new UVector(entry->u.dataVector->size(), status);
// Check for null pointer
if (rbts == NULL) {
status = U_MEMORY_ALLOCATION_ERROR;
return NULL;
status = U_MEMORY_ALLOCATION_ERROR;
return NULL;
}
int32_t passNumber = 1;
for (int32_t i = 0; U_SUCCESS(status) && i < entry->u.dataVector->size(); i++) {

View file

@ -440,13 +440,15 @@ class TransliteratorRegistry : public UMemory {
/**
* DAG of visible IDs by spec. Hashtable: source => (Hashtable:
* target => (UVector: variant)) The UVector of variants is never
* empty. For a source-target with no variant, the special
* variant NO_VARIANT (the empty string) is stored in slot zero of
* the UVector.
* target => variant bitmask)
*/
Hashtable specDAG;
/**
* Vector of all variant names
*/
UVector variantList;
/**
* Vector of public full IDs.
*/

View file

@ -33,6 +33,7 @@ static void TestRegisterUnregister(void);
static void TestExtractBetween(void);
static void TestUnicodeIDs(void);
static void TestGetRulesAndSourceSet(void);
static void TestDataVariantsCompounds(void);
static void _expectRules(const char*, const char*, const char*);
static void _expect(const UTransliterator* trans, const char* cfrom, const char* cto);
@ -51,6 +52,7 @@ addUTransTest(TestNode** root) {
TEST(TestExtractBetween);
TEST(TestUnicodeIDs);
TEST(TestGetRulesAndSourceSet);
TEST(TestDataVariantsCompounds);
}
/*------------------------------------------------------------------
@ -638,6 +640,65 @@ static void TestGetRulesAndSourceSet() {
}
}
typedef struct {
const char * transID;
const char * sourceText;
const char * targetText;
} TransIDSourceTarg;
static const TransIDSourceTarg dataVarCompItems[] = {
{ "Simplified-Traditional",
"\\u4E0B\\u9762\\u662F\\u4E00\\u4E9B\\u4ECE\\u7B80\\u4F53\\u8F6C\\u6362\\u4E3A\\u7E41\\u4F53\\u5B57\\u793A\\u4F8B\\u6587\\u672C\\u3002",
"\\u4E0B\\u9762\\u662F\\u4E00\\u4E9B\\u5F9E\\u7C21\\u9AD4\\u8F49\\u63DB\\u70BA\\u7E41\\u9AD4\\u5B57\\u793A\\u4F8B\\u6587\\u672C\\u3002" },
{ "Halfwidth-Fullwidth",
"Sample text, \\uFF7B\\uFF9D\\uFF8C\\uFF9F\\uFF99\\uFF83\\uFF77\\uFF7D\\uFF84.",
"\\uFF33\\uFF41\\uFF4D\\uFF50\\uFF4C\\uFF45\\u3000\\uFF54\\uFF45\\uFF58\\uFF54\\uFF0C\\u3000\\u30B5\\u30F3\\u30D7\\u30EB\\u30C6\\u30AD\\u30B9\\u30C8\\uFF0E" },
{ "Han-Latin/Names; Latin-Bopomofo",
"\\u4E07\\u4FDF\\u919C\\u5974\\u3001\\u533A\\u695A\\u826F\\u3001\\u4EFB\\u70E8\\u3001\\u5CB3\\u98DB",
"\\u3107\\u311B\\u02CB \\u3111\\u3127\\u02CA \\u3114\\u3121\\u02C7 \\u310B\\u3128\\u02CA\\u3001 \\u3121 \\u3114\\u3128\\u02C7 \\u310C\\u3127\\u3124\\u02CA\\u3001 \\u3116\\u3123\\u02CA \\u3127\\u311D\\u02CB\\u3001 \\u3129\\u311D\\u02CB \\u3108\\u311F" },
{ "Greek-Latin",
"\\u1F08 \\u1FBC \\u1F89 \\u1FEC",
"A \\u0100I H\\u0100I RH" },
{ "Greek-Latin/BGN",
"\\u1F08 \\u1FBC \\u1F89 \\u1FEC",
"A\\u0313 A\\u0345 A\\u0314\\u0345 \\u1FEC" },
{ "Greek-Latin/UNGEGN",
"\\u1F08 \\u1FBC \\u1F89 \\u1FEC",
"A A A R" },
{ NULL, NULL, NULL }
};
enum { kBBufMax = 384 };
static void TestDataVariantsCompounds() {
const TransIDSourceTarg* itemsPtr;
for (itemsPtr = dataVarCompItems; itemsPtr->transID != NULL; itemsPtr++) {
UErrorCode status = U_ZERO_ERROR;
UChar utrid[kUBufMax];
int32_t utridlen = u_unescape(itemsPtr->transID, utrid, kUBufMax);
UTransliterator* utrans = utrans_openU(utrid, utridlen, UTRANS_FORWARD, NULL, 0, NULL, &status);
if (U_FAILURE(status)) {
log_data_err("FAIL: utrans_openRules(%s) failed, error=%s (Are you missing data?)\n", itemsPtr->transID, u_errorName(status));
continue;
}
UChar text[kUBufMax];
int32_t textLen = u_unescape(itemsPtr->sourceText, text, kUBufMax);
int32_t textLim = textLen;
utrans_transUChars(utrans, text, &textLen, kUBufMax, 0, &textLim, &status);
if (U_FAILURE(status)) {
log_err("FAIL: utrans_transUChars(%s) failed, error=%s\n", itemsPtr->transID, u_errorName(status));
} else {
UChar expect[kUBufMax];
int32_t expectLen = u_unescape(itemsPtr->targetText, expect, kUBufMax);
if (textLen != expectLen || u_strncmp(text, expect, textLen) != 0) {
char btext[kBBufMax], bexpect[kBBufMax];
u_austrncpy(btext, text, textLen);
u_austrncpy(bexpect, expect, expectLen);
log_err("FAIL: utrans_transUChars(%s),\n expect %s\n get %s\n", itemsPtr->transID, bexpect, btext);
}
}
utrans_close(utrans);
}
}
static void _expectRules(const char* crules,
const char* cfrom,