ICU-22390 Transliterator.<clinit> can be optimized - part 2

WIDTH_FIX instance could be lazily initialized, because it's used
only if transliterate() is called on the AnyTransliterator instance,
but apparently not used by other Transliterator instance.
This commit is contained in:
Victor Chang 2023-06-03 01:03:38 +01:00 committed by Frank Yung-Fong Tang
parent a9f7570892
commit 83ee7e662f
3 changed files with 60 additions and 41 deletions

View file

@ -11,6 +11,7 @@
*/
#include "unicode/utypes.h"
#include <unicode/rep.h>
#if !UCONFIG_NO_TRANSLITERATION
@ -531,7 +532,7 @@ TransliteratorRegistry::TransliteratorRegistry(UErrorCode& status) :
registry(true, status),
specDAG(true, SPECDAG_INIT_SIZE, status),
variantList(VARIANT_LIST_INIT_SIZE, status),
availableIDs(AVAILABLE_IDS_INIT_SIZE, status)
availableIDs(true, AVAILABLE_IDS_INIT_SIZE, status)
{
registry.setValueDeleter(deleteEntry);
variantList.setDeleter(uprv_deleteUObject);
@ -540,8 +541,6 @@ TransliteratorRegistry::TransliteratorRegistry(UErrorCode& status) :
if (emptyString != nullptr) {
variantList.adoptElement(emptyString, status);
}
availableIDs.setDeleter(uprv_deleteUObject);
availableIDs.setComparer(uhash_compareCaselessUnicodeString);
specDAG.setValueDeleter(uhash_deleteHashtable);
}
@ -714,7 +713,7 @@ void TransliteratorRegistry::remove(const UnicodeString& ID) {
TransliteratorIDParser::STVtoID(source, target, variant, id);
registry.remove(id);
removeSTV(source, target, variant);
availableIDs.removeElement((void*) &id);
availableIDs.remove(id);
}
//----------------------------------------------------------------------
@ -728,7 +727,7 @@ void TransliteratorRegistry::remove(const UnicodeString& ID) {
* i from 0 to countAvailableIDs() - 1.
*/
int32_t TransliteratorRegistry::countAvailableIDs() const {
return availableIDs.size();
return availableIDs.count();
}
/**
@ -738,10 +737,27 @@ int32_t TransliteratorRegistry::countAvailableIDs() const {
* range, the result of getAvailableID(0) is returned.
*/
const UnicodeString& TransliteratorRegistry::getAvailableID(int32_t index) const {
if (index < 0 || index >= availableIDs.size()) {
if (index < 0 || index >= availableIDs.count()) {
index = 0;
}
return *(const UnicodeString*) availableIDs[index];
int32_t pos = UHASH_FIRST;
const UHashElement *e = nullptr;
while (index-- >= 0) {
e = availableIDs.nextElement(pos);
if (e == nullptr) {
break;
}
}
if (e != nullptr) {
return *(UnicodeString*) e->key.pointer;
}
// If the code reaches here, the hash table was likely modified during iteration.
// Return an statically initialized empty string due to reference return type.
static UnicodeString empty;
return empty;
}
StringEnumeration* TransliteratorRegistry::getAvailableIDs() const {
@ -852,14 +868,14 @@ UnicodeString& TransliteratorRegistry::getAvailableVariant(int32_t index,
//----------------------------------------------------------------------
TransliteratorRegistry::Enumeration::Enumeration(const TransliteratorRegistry& _reg) :
index(0), reg(_reg) {
pos(UHASH_FIRST), size(_reg.availableIDs.count()), reg(_reg) {
}
TransliteratorRegistry::Enumeration::~Enumeration() {
}
int32_t TransliteratorRegistry::Enumeration::count(UErrorCode& /*status*/) const {
return reg.availableIDs.size();
return size;
}
const UnicodeString* TransliteratorRegistry::Enumeration::snext(UErrorCode& status) {
@ -875,22 +891,27 @@ const UnicodeString* TransliteratorRegistry::Enumeration::snext(UErrorCode& stat
if (U_FAILURE(status)) {
return nullptr;
}
int32_t n = reg.availableIDs.size();
if (index > n) {
int32_t n = reg.availableIDs.count();
if (n != size) {
status = U_ENUM_OUT_OF_SYNC_ERROR;
}
// index == n is okay -- this means we've reached the end
if (index < n) {
// Copy the string! This avoids lifetime problems.
unistr = *(const UnicodeString*)reg.availableIDs[index++];
return &unistr;
} else {
return nullptr;
}
const UHashElement* element = reg.availableIDs.nextElement(pos);
if (element == nullptr) {
// If the code reaches this point, it means that it's out of sync
// or the caller keeps asking for snext().
return nullptr;
}
// Copy the string! This avoids lifetime problems.
unistr = *(const UnicodeString*) element->key.pointer;
return &unistr;
}
void TransliteratorRegistry::Enumeration::reset(UErrorCode& /*status*/) {
index = 0;
pos = UHASH_FIRST;
size = reg.availableIDs.count();
}
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(TransliteratorRegistry::Enumeration)
@ -945,18 +966,12 @@ void TransliteratorRegistry::registerEntry(const UnicodeString& ID,
registry.put(ID, adopted, status);
if (visible) {
registerSTV(source, target, variant);
if (!availableIDs.contains((void*) &ID)) {
UnicodeString *newID = ID.clone();
// Check to make sure newID was created.
if (newID != nullptr) {
// NUL-terminate the ID string
newID->getTerminatedBuffer();
availableIDs.adoptElement(newID, status);
}
if (!availableIDs.containsKey(ID)) {
availableIDs.puti(ID, /* unused value */ 1, status);
}
} else {
removeSTV(source, target, variant);
availableIDs.removeElement((void*) &ID);
availableIDs.remove(ID);
}
}

View file

@ -423,7 +423,8 @@ class TransliteratorRegistry : public UMemory {
static UClassID U_EXPORT2 getStaticClassID();
virtual UClassID getDynamicClassID() const override;
private:
int32_t index;
int32_t pos;
int32_t size;
const TransliteratorRegistry& reg;
};
friend class Enumeration;
@ -452,7 +453,7 @@ class TransliteratorRegistry : public UMemory {
/**
* Vector of public full IDs.
*/
UVector availableIDs;
Hashtable availableIDs;
TransliteratorRegistry(const TransliteratorRegistry &other); // forbid copying of this class
TransliteratorRegistry &operator=(const TransliteratorRegistry &other); // forbid copying of this class

View file

@ -52,12 +52,6 @@ class AnyTransliterator extends Transliterator {
static final String NULL_ID = "Null";
static final String LATIN_PIVOT = "-Latin;Latin-";
/**
* Special code for handling width characters
*/
private static final Transliterator WIDTH_FIX =
Transliterator.getInstance("[[:dt=Nar:][:dt=Wide:]] nfkd");
/**
* Cache mapping UScriptCode values to Transliterator*.
*/
@ -73,6 +67,15 @@ class AnyTransliterator extends Transliterator {
*/
private int targetScript;
/**
* Lazily initialize a special Transliterator for handling width characters.
*/
private static class WidthFix {
private static final String ID = "[[:dt=Nar:][:dt=Wide:]] nfkd";
static final Transliterator INSTANCE = Transliterator.getInstance(ID);
}
/**
* Implements {@link Transliterator#handleTransliterate}.
*/
@ -153,7 +156,7 @@ class AnyTransliterator extends Transliterator {
* @param filter The Unicode filter.
* @param target2 the target name.
* @param targetScript2 the script code corresponding to theTarget.
* @param widthFix2 The Transliterator width fix.
* @param widthFix2 Not used. This parameter is deprecated.
* @param cache2 The Map object for cache.
*/
public AnyTransliterator(String id, UnicodeFilter filter, String target2,
@ -177,7 +180,7 @@ class AnyTransliterator extends Transliterator {
if (isWide(targetScript)) {
return null;
} else {
return WIDTH_FIX;
return WidthFix.INSTANCE;
}
}
@ -202,7 +205,7 @@ class AnyTransliterator extends Transliterator {
if (t != null) {
if (!isWide(targetScript)) {
List<Transliterator> v = new ArrayList<Transliterator>();
v.add(WIDTH_FIX);
v.add(WidthFix.INSTANCE);
v.add(t);
t = new CompoundTransliterator(v);
}
@ -211,7 +214,7 @@ class AnyTransliterator extends Transliterator {
t = prevCachedT;
}
} else if (!isWide(targetScript)) {
return WIDTH_FIX;
return WidthFix.INSTANCE;
}
}
@ -409,7 +412,7 @@ class AnyTransliterator extends Transliterator {
if (filter != null && filter instanceof UnicodeSet) {
filter = new UnicodeSet((UnicodeSet)filter);
}
return new AnyTransliterator(getID(), filter, target, targetScript, WIDTH_FIX, cache);
return new AnyTransliterator(getID(), filter, target, targetScript, null, cache);
}
/* (non-Javadoc)