From 1863bea230a50abd3e72fa0fe86b004568075a7b Mon Sep 17 00:00:00 2001 From: Markus Scherer Date: Mon, 28 Dec 2020 20:22:48 -0800 Subject: [PATCH] ICU-20698 UHashtable allow integer value zero --- icu4c/source/common/localematcher.cpp | 12 +-- icu4c/source/common/uhash.cpp | 81 ++++++++++++++- icu4c/source/common/uhash.h | 93 ++++++++++++++++++ icu4c/source/common/unicode/localematcher.h | 2 +- icu4c/source/test/cintltst/chashtst.c | 103 +++++++++++++++++--- icu4c/source/tools/toolutil/xmlparser.cpp | 2 +- 6 files changed, 266 insertions(+), 27 deletions(-) diff --git a/icu4c/source/common/localematcher.cpp b/icu4c/source/common/localematcher.cpp index 5795cbf87e6..132aee290e8 100644 --- a/icu4c/source/common/localematcher.cpp +++ b/icu4c/source/common/localematcher.cpp @@ -345,9 +345,8 @@ UBool compareLSRs(const UHashTok t1, const UHashTok t2) { int32_t LocaleMatcher::putIfAbsent(const LSR &lsr, int32_t i, int32_t suppLength, UErrorCode &errorCode) { if (U_FAILURE(errorCode)) { return suppLength; } - int32_t index = uhash_geti(supportedLsrToIndex, &lsr); - if (index == 0) { - uhash_puti(supportedLsrToIndex, const_cast(&lsr), i + 1, &errorCode); + if (!uhash_containsKey(supportedLsrToIndex, &lsr)) { + uhash_putiAllowZero(supportedLsrToIndex, const_cast(&lsr), i, &errorCode); if (U_SUCCESS(errorCode)) { supportedLSRs[suppLength] = &lsr; supportedIndexes[suppLength++] = i; @@ -685,12 +684,11 @@ int32_t LocaleMatcher::getBestSuppIndex(LSR desiredLSR, LocaleLsrIterator *remai int32_t bestSupportedLsrIndex = -1; for (int32_t bestShiftedDistance = LocaleDistance::shiftDistance(thresholdDistance);;) { // Quick check for exact maximized LSR. - // Returns suppIndex+1 where 0 means not found. if (supportedLsrToIndex != nullptr) { desiredLSR.setHashCode(); - int32_t index = uhash_geti(supportedLsrToIndex, &desiredLSR); - if (index != 0) { - int32_t suppIndex = index - 1; + UBool found = false; + int32_t suppIndex = uhash_getiAndFound(supportedLsrToIndex, &desiredLSR, &found); + if (found) { if (remainingIter != nullptr) { remainingIter->rememberCurrent(desiredIndex, errorCode); } diff --git a/icu4c/source/common/uhash.cpp b/icu4c/source/common/uhash.cpp index 86311ceb0b2..67c7c363540 100644 --- a/icu4c/source/common/uhash.cpp +++ b/icu4c/source/common/uhash.cpp @@ -133,8 +133,10 @@ static const float RESIZE_POLICY_RATIO_TABLE[6] = { * or a pointer. If a hint bit is zero, then the associated * token is assumed to be an integer. */ +#define HINT_BOTH_INTEGERS (0) #define HINT_KEY_POINTER (1) #define HINT_VALUE_POINTER (2) +#define HINT_ALLOW_ZERO (4) /******************************************************************** * PRIVATE Implementation @@ -479,8 +481,9 @@ _uhash_put(UHashtable *hash, goto err; } U_ASSERT(hash != NULL); - /* Cannot always check pointer here or iSeries sees NULL every time. */ - if ((hint & HINT_VALUE_POINTER) && value.pointer == NULL) { + if ((hint & HINT_VALUE_POINTER) ? + value.pointer == NULL : + value.integer == 0 && (hint & HINT_ALLOW_ZERO) == 0) { /* Disallow storage of NULL values, since NULL is returned by * get() to indicate an absent key. Storing NULL == removing. */ @@ -687,6 +690,28 @@ uhash_igeti(const UHashtable *hash, return _uhash_find(hash, keyholder, hash->keyHasher(keyholder))->value.integer; } +U_CAPI int32_t U_EXPORT2 +uhash_getiAndFound(const UHashtable *hash, + const void *key, + UBool *found) { + UHashTok keyholder; + keyholder.pointer = (void *)key; + const UHashElement *e = _uhash_find(hash, keyholder, hash->keyHasher(keyholder)); + *found = !IS_EMPTY_OR_DELETED(e->hashcode); + return e->value.integer; +} + +U_CAPI int32_t U_EXPORT2 +uhash_igetiAndFound(const UHashtable *hash, + int32_t key, + UBool *found) { + UHashTok keyholder; + keyholder.integer = key; + const UHashElement *e = _uhash_find(hash, keyholder, hash->keyHasher(keyholder)); + *found = !IS_EMPTY_OR_DELETED(e->hashcode); + return e->value.integer; +} + U_CAPI void* U_EXPORT2 uhash_put(UHashtable *hash, void* key, @@ -736,7 +761,34 @@ uhash_iputi(UHashtable *hash, keyholder.integer = key; valueholder.integer = value; return _uhash_put(hash, keyholder, valueholder, - 0, /* neither is a ptr */ + HINT_BOTH_INTEGERS, + status).integer; +} + +U_CAPI int32_t U_EXPORT2 +uhash_putiAllowZero(UHashtable *hash, + void *key, + int32_t value, + UErrorCode *status) { + UHashTok keyholder, valueholder; + keyholder.pointer = key; + valueholder.integer = value; + return _uhash_put(hash, keyholder, valueholder, + HINT_KEY_POINTER | HINT_ALLOW_ZERO, + status).integer; +} + + +U_CAPI int32_t U_EXPORT2 +uhash_iputiAllowZero(UHashtable *hash, + int32_t key, + int32_t value, + UErrorCode *status) { + UHashTok keyholder, valueholder; + keyholder.integer = key; + valueholder.integer = value; + return _uhash_put(hash, keyholder, valueholder, + HINT_BOTH_INTEGERS | HINT_ALLOW_ZERO, status).integer; } @@ -785,6 +837,29 @@ uhash_removeAll(UHashtable *hash) { U_ASSERT(hash->count == 0); } +U_CAPI UBool U_EXPORT2 +uhash_containsKey(const UHashtable *hash, const void *key) { + UHashTok keyholder; + keyholder.pointer = (void *)key; + const UHashElement *e = _uhash_find(hash, keyholder, hash->keyHasher(keyholder)); + return !IS_EMPTY_OR_DELETED(e->hashcode); +} + +/** + * Returns true if the UHashtable contains an item with this integer key. + * + * @param hash The target UHashtable. + * @param key An integer key stored in a hashtable + * @return true if the key is found. + */ +U_CAPI UBool U_EXPORT2 +uhash_icontainsKey(const UHashtable *hash, int32_t key) { + UHashTok keyholder; + keyholder.integer = key; + const UHashElement *e = _uhash_find(hash, keyholder, hash->keyHasher(keyholder)); + return !IS_EMPTY_OR_DELETED(e->hashcode); +} + U_CAPI const UHashElement* U_EXPORT2 uhash_find(const UHashtable *hash, const void* key) { UHashTok keyholder; diff --git a/icu4c/source/common/uhash.h b/icu4c/source/common/uhash.h index b59d2711bb2..3b3f8c50440 100644 --- a/icu4c/source/common/uhash.h +++ b/icu4c/source/common/uhash.h @@ -54,6 +54,13 @@ * uhash_remove() on that key. This keeps uhash_get(), uhash_count(), * and uhash_nextElement() consistent with one another. * + * Keys and values can be integers. + * Functions that work with an integer key have an "i" prefix. + * Functions that work with an integer value have an "i" suffix. + * As with putting a NULL value pointer, putting a zero value integer removes the item. + * Except, there are pairs of functions that allow setting zero values + * and fetching (value, found) pairs. + * * To see everything in a hashtable, use uhash_nextElement() to * iterate through its contents. Each call to this function returns a * UHashElement pointer. A hash element contains a key, value, and @@ -405,6 +412,44 @@ uhash_iputi(UHashtable *hash, int32_t value, UErrorCode *status); +/** + * Put a (key=pointer, value=integer) item in a UHashtable. If the + * keyDeleter is non-NULL, then the hashtable owns 'key' after this + * call. valueDeleter must be NULL. + * Storing a 0 value is possible; call uhash_igetiAndFound() to retrieve values including zero. + * + * @param hash The target UHashtable. + * @param key The key to store. + * @param value The integer value to store. + * @param status A pointer to an UErrorCode to receive any errors. + * @return The previous value, or 0 if none. + * @see uhash_getiAndFound + */ +U_CAPI int32_t U_EXPORT2 +uhash_putiAllowZero(UHashtable *hash, + void *key, + int32_t value, + UErrorCode *status); + +/** + * Put a (key=integer, value=integer) item in a UHashtable. If the + * keyDeleter is non-NULL, then the hashtable owns 'key' after this + * call. valueDeleter must be NULL. + * Storing a 0 value is possible; call uhash_igetiAndFound() to retrieve values including zero. + * + * @param hash The target UHashtable. + * @param key The key to store. + * @param value The integer value to store. + * @param status A pointer to an UErrorCode to receive any errors. + * @return The previous value, or 0 if none. + * @see uhash_igetiAndFound + */ +U_CAPI int32_t U_EXPORT2 +uhash_iputiAllowZero(UHashtable *hash, + int32_t key, + int32_t value, + UErrorCode *status); + /** * Retrieve a pointer value from a UHashtable using a pointer key, * as previously stored by uhash_put(). @@ -448,6 +493,34 @@ U_CAPI int32_t U_EXPORT2 uhash_igeti(const UHashtable *hash, int32_t key); +/** + * Retrieves an integer value from a UHashtable using a pointer key, + * as previously stored by uhash_putiAllowZero() or uhash_puti(). + * + * @param hash The target UHashtable. + * @param key A pointer key stored in a hashtable + * @param found A pointer to a boolean which will be set for whether the key was found. + * @return The requested item, or 0 if not found. + */ +U_CAPI int32_t U_EXPORT2 +uhash_getiAndFound(const UHashtable *hash, + const void *key, + UBool *found); + +/** + * Retrieves an integer value from a UHashtable using an integer key, + * as previously stored by uhash_iputiAllowZero() or uhash_iputi(). + * + * @param hash The target UHashtable. + * @param key An integer key stored in a hashtable + * @param found A pointer to a boolean which will be set for whether the key was found. + * @return The requested item, or 0 if not found. + */ +U_CAPI int32_t U_EXPORT2 +uhash_igetiAndFound(const UHashtable *hash, + int32_t key, + UBool *found); + /** * Remove an item from a UHashtable stored by uhash_put(). * @param hash The target UHashtable. @@ -495,6 +568,26 @@ uhash_iremovei(UHashtable *hash, U_CAPI void U_EXPORT2 uhash_removeAll(UHashtable *hash); +/** + * Returns true if the UHashtable contains an item with this pointer key. + * + * @param hash The target UHashtable. + * @param key A pointer key stored in a hashtable + * @return true if the key is found. + */ +U_CAPI UBool U_EXPORT2 +uhash_containsKey(const UHashtable *hash, const void *key); + +/** + * Returns true if the UHashtable contains an item with this integer key. + * + * @param hash The target UHashtable. + * @param key An integer key stored in a hashtable + * @return true if the key is found. + */ +U_CAPI UBool U_EXPORT2 +uhash_icontainsKey(const UHashtable *hash, int32_t key); + /** * Locate an element of a UHashtable. The caller must not modify the * returned object. The primary use of this function is to obtain the diff --git a/icu4c/source/common/unicode/localematcher.h b/icu4c/source/common/unicode/localematcher.h index 63a68b0b7fb..63d20f26860 100644 --- a/icu4c/source/common/unicode/localematcher.h +++ b/icu4c/source/common/unicode/localematcher.h @@ -704,7 +704,7 @@ private: LSR *lsrs; int32_t supportedLocalesLength; // These are in preference order: 1. Default locale 2. paradigm locales 3. others. - UHashtable *supportedLsrToIndex; // Map stores index+1 because 0 is "not found" + UHashtable *supportedLsrToIndex; // Map // Array versions of the supportedLsrToIndex keys and values. // The distance lookup loops over the supportedLSRs and returns the index of the best match. const LSR **supportedLSRs; diff --git a/icu4c/source/test/cintltst/chashtst.c b/icu4c/source/test/cintltst/chashtst.c index 5d1a4a8718f..405d56d4e5d 100644 --- a/icu4c/source/test/cintltst/chashtst.c +++ b/icu4c/source/test/cintltst/chashtst.c @@ -11,6 +11,7 @@ ******************************************************************************* */ +#include #include "cintltst.h" #include "uhash.h" #include "unicode/ctest.h" @@ -22,6 +23,7 @@ *********************************************************************/ static void TestBasic(void); +static void TestAllowZero(void); static void TestOtherAPI(void); static void hashIChars(void); @@ -87,6 +89,7 @@ _compareLong(int32_t a, int32_t b) { void addHashtableTest(TestNode** root) { addTest(root, &TestBasic, "tsutil/chashtst/TestBasic"); + addTest(root, &TestAllowZero, "tsutil/chashtst/TestAllowZero"); addTest(root, &TestOtherAPI, "tsutil/chashtst/TestOtherAPI"); addTest(root, &hashIChars, "tsutil/chashtst/hashIChars"); @@ -133,6 +136,9 @@ static void TestBasic(void) { _get(hash, omega, 48); _get(hash, two, 200); + // puti(key, value==0) removes the key's element. + _put(hash, two, 0, 200); + if(_compareChars((void*)one, (void*)three) == TRUE || _compareChars((void*)one, (void*)one2) != TRUE || _compareChars((void*)one, (void*)one) != TRUE || @@ -145,9 +151,56 @@ static void TestBasic(void) { _compareIChars((void*)one, NULL) == TRUE ) { log_err("FAIL: compareIChars failed\n"); } - - uhash_close(hash); + uhash_close(hash); +} + +static void TestAllowZero() { + UErrorCode status = U_ZERO_ERROR; + UHashtable *hash = uhash_open(hashChars, isEqualChars, NULL, &status); + if (U_FAILURE(status)) { + log_err("FAIL: uhash_open failed with %s and returned 0x%08x\n", + u_errorName(status), hash); + return; + } + if (hash == NULL) { + log_err("FAIL: uhash_open returned NULL\n"); + return; + } + log_verbose("Ok: uhash_open returned 0x%08X\n", hash); + + int32_t oldValue = uhash_putiAllowZero(hash, (char *)"one", 1, &status); + UBool found = false; + if (U_FAILURE(status) || oldValue != 0 || !uhash_containsKey(hash, "one") || + uhash_geti(hash, "one") != 1 || + uhash_getiAndFound(hash, "one", &found) != 1 || !found) { + log_err("FAIL: uhash_putiAllowZero(one, 1)"); + } + oldValue = uhash_putiAllowZero(hash, (char *)"zero", 0, &status); + found = false; + if (U_FAILURE(status) || oldValue != 0 || !uhash_containsKey(hash, "zero") || + uhash_geti(hash, "zero") != 0 || + uhash_getiAndFound(hash, "zero", &found) != 0 || !found) { + log_err("FAIL: uhash_putiAllowZero(zero, 0)"); + } + // Overwrite "one" to 0. + oldValue = uhash_putiAllowZero(hash, (char *)"one", 0, &status); + found = false; + if (U_FAILURE(status) || oldValue != 1 || !uhash_containsKey(hash, "one") || + uhash_geti(hash, "one") != 0 || + uhash_getiAndFound(hash, "one", &found) != 0 || !found) { + log_err("FAIL: uhash_putiAllowZero(one, 0)"); + } + // Remove "zero" using puti(zero, 0). + oldValue = uhash_puti(hash, (char *)"zero", 0, &status); + found = true; + if (U_FAILURE(status) || oldValue != 0 || uhash_containsKey(hash, "zero") || + uhash_geti(hash, "zero") != 0 || + uhash_getiAndFound(hash, "zero", &found) != 0 || found) { + log_err("FAIL: uhash_puti(zero, 0)"); + } + + uhash_close(hash); } static void TestOtherAPI(void){ @@ -343,30 +396,46 @@ static void _put(UHashtable* hash, int32_t oldValue = uhash_puti(hash, (void*) key, value, &status); if (U_FAILURE(status)) { - log_err("FAIL: uhash_put(%s) failed with %s and returned %ld\n", + log_err("FAIL: uhash_puti(%s) failed with %s and returned %ld\n", key, u_errorName(status), oldValue); } else if (oldValue != expectedOldValue) { - log_err("FAIL: uhash_put(%s) returned old value %ld; expected %ld\n", + log_err("FAIL: uhash_puti(%s) returned old value %ld; expected %ld\n", key, oldValue, expectedOldValue); } else { - log_verbose("Ok: uhash_put(%s, %d) returned old value %ld\n", + log_verbose("Ok: uhash_puti(%s, %d) returned old value %ld\n", key, value, oldValue); } + int32_t newValue = uhash_geti(hash, key); + if (newValue != value) { + log_err("FAIL: uhash_puti(%s) failed to set the intended value %ld: " + "uhash_geti() returns %ld\n", + key, value, newValue); + } + UBool contained = uhash_containsKey(hash, key); + if (value == 0) { + if (contained) { + log_err("FAIL: uhash_puti(%s, zero) failed to remove the key item: " + "uhash_containsKey() returns true\n", + key); + } + } else { + if (!contained) { + log_err("FAIL: uhash_puti(%s, not zero) appears to have removed the key item: " + "uhash_containsKey() returns false\n", + key); + } + } } static void _get(UHashtable* hash, const char* key, int32_t expectedValue) { - UErrorCode status = U_ZERO_ERROR; int32_t value = uhash_geti(hash, key); - if (U_FAILURE(status)) { - log_err("FAIL: uhash_get(%s) failed with %s and returned %ld\n", - key, u_errorName(status), value); - } else if (value != expectedValue) { - log_err("FAIL: uhash_get(%s) returned %ld; expected %ld\n", + if (value != expectedValue) { + log_err("FAIL: uhash_geti(%s) returned %ld; expected %ld\n", key, value, expectedValue); } else { - log_verbose("Ok: uhash_get(%s) returned value %ld\n", + log_verbose("Ok: uhash_geti(%s) returned value %ld\n", key, value); } } @@ -376,11 +445,15 @@ static void _remove(UHashtable* hash, int32_t expectedValue) { int32_t value = uhash_removei(hash, key); if (value != expectedValue) { - log_err("FAIL: uhash_remove(%s) returned %ld; expected %ld\n", + log_err("FAIL: uhash_removei(%s) returned %ld; expected %ld\n", key, value, expectedValue); } else { - log_verbose("Ok: uhash_remove(%s) returned old value %ld\n", + log_verbose("Ok: uhash_removei(%s) returned old value %ld\n", key, value); } + if (uhash_containsKey(hash, key)) { + log_err("FAIL: uhash_removei(%s) failed to remove the key item: " + "uhash_containsKey() returns false\n", + key); + } } - diff --git a/icu4c/source/tools/toolutil/xmlparser.cpp b/icu4c/source/tools/toolutil/xmlparser.cpp index 60cd49ca8b3..482d6f65aa7 100644 --- a/icu4c/source/tools/toolutil/xmlparser.cpp +++ b/icu4c/source/tools/toolutil/xmlparser.cpp @@ -658,7 +658,7 @@ UXMLParser::intern(const UnicodeString &s, UErrorCode &errorCode) { return (const UnicodeString *)he->key.pointer; } else { // add this new name and return its hashed key pointer - fNames.puti(s, 0, errorCode); + fNames.puti(s, 1, errorCode); he=fNames.find(s); return (const UnicodeString *)he->key.pointer; }