mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-07 06:25:30 +00:00
ICU-22390 Transliterator.<clinit> can be optimized - part 2
WIDTH_FIX instance could be lazily initialized, because it's used only if transliterate() is called on the AnyTransliterator instance, but apparently not used by other Transliterator instance.
This commit is contained in:
parent
a9f7570892
commit
83ee7e662f
3 changed files with 60 additions and 41 deletions
|
@ -11,6 +11,7 @@
|
|||
*/
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include <unicode/rep.h>
|
||||
|
||||
#if !UCONFIG_NO_TRANSLITERATION
|
||||
|
||||
|
@ -531,7 +532,7 @@ TransliteratorRegistry::TransliteratorRegistry(UErrorCode& status) :
|
|||
registry(true, status),
|
||||
specDAG(true, SPECDAG_INIT_SIZE, status),
|
||||
variantList(VARIANT_LIST_INIT_SIZE, status),
|
||||
availableIDs(AVAILABLE_IDS_INIT_SIZE, status)
|
||||
availableIDs(true, AVAILABLE_IDS_INIT_SIZE, status)
|
||||
{
|
||||
registry.setValueDeleter(deleteEntry);
|
||||
variantList.setDeleter(uprv_deleteUObject);
|
||||
|
@ -540,8 +541,6 @@ TransliteratorRegistry::TransliteratorRegistry(UErrorCode& status) :
|
|||
if (emptyString != nullptr) {
|
||||
variantList.adoptElement(emptyString, status);
|
||||
}
|
||||
availableIDs.setDeleter(uprv_deleteUObject);
|
||||
availableIDs.setComparer(uhash_compareCaselessUnicodeString);
|
||||
specDAG.setValueDeleter(uhash_deleteHashtable);
|
||||
}
|
||||
|
||||
|
@ -714,7 +713,7 @@ void TransliteratorRegistry::remove(const UnicodeString& ID) {
|
|||
TransliteratorIDParser::STVtoID(source, target, variant, id);
|
||||
registry.remove(id);
|
||||
removeSTV(source, target, variant);
|
||||
availableIDs.removeElement((void*) &id);
|
||||
availableIDs.remove(id);
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
|
@ -728,7 +727,7 @@ void TransliteratorRegistry::remove(const UnicodeString& ID) {
|
|||
* i from 0 to countAvailableIDs() - 1.
|
||||
*/
|
||||
int32_t TransliteratorRegistry::countAvailableIDs() const {
|
||||
return availableIDs.size();
|
||||
return availableIDs.count();
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -738,10 +737,27 @@ int32_t TransliteratorRegistry::countAvailableIDs() const {
|
|||
* range, the result of getAvailableID(0) is returned.
|
||||
*/
|
||||
const UnicodeString& TransliteratorRegistry::getAvailableID(int32_t index) const {
|
||||
if (index < 0 || index >= availableIDs.size()) {
|
||||
if (index < 0 || index >= availableIDs.count()) {
|
||||
index = 0;
|
||||
}
|
||||
return *(const UnicodeString*) availableIDs[index];
|
||||
|
||||
int32_t pos = UHASH_FIRST;
|
||||
const UHashElement *e = nullptr;
|
||||
while (index-- >= 0) {
|
||||
e = availableIDs.nextElement(pos);
|
||||
if (e == nullptr) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (e != nullptr) {
|
||||
return *(UnicodeString*) e->key.pointer;
|
||||
}
|
||||
|
||||
// If the code reaches here, the hash table was likely modified during iteration.
|
||||
// Return an statically initialized empty string due to reference return type.
|
||||
static UnicodeString empty;
|
||||
return empty;
|
||||
}
|
||||
|
||||
StringEnumeration* TransliteratorRegistry::getAvailableIDs() const {
|
||||
|
@ -852,14 +868,14 @@ UnicodeString& TransliteratorRegistry::getAvailableVariant(int32_t index,
|
|||
//----------------------------------------------------------------------
|
||||
|
||||
TransliteratorRegistry::Enumeration::Enumeration(const TransliteratorRegistry& _reg) :
|
||||
index(0), reg(_reg) {
|
||||
pos(UHASH_FIRST), size(_reg.availableIDs.count()), reg(_reg) {
|
||||
}
|
||||
|
||||
TransliteratorRegistry::Enumeration::~Enumeration() {
|
||||
}
|
||||
|
||||
int32_t TransliteratorRegistry::Enumeration::count(UErrorCode& /*status*/) const {
|
||||
return reg.availableIDs.size();
|
||||
return size;
|
||||
}
|
||||
|
||||
const UnicodeString* TransliteratorRegistry::Enumeration::snext(UErrorCode& status) {
|
||||
|
@ -875,22 +891,27 @@ const UnicodeString* TransliteratorRegistry::Enumeration::snext(UErrorCode& stat
|
|||
if (U_FAILURE(status)) {
|
||||
return nullptr;
|
||||
}
|
||||
int32_t n = reg.availableIDs.size();
|
||||
if (index > n) {
|
||||
int32_t n = reg.availableIDs.count();
|
||||
if (n != size) {
|
||||
status = U_ENUM_OUT_OF_SYNC_ERROR;
|
||||
}
|
||||
// index == n is okay -- this means we've reached the end
|
||||
if (index < n) {
|
||||
// Copy the string! This avoids lifetime problems.
|
||||
unistr = *(const UnicodeString*)reg.availableIDs[index++];
|
||||
return &unistr;
|
||||
} else {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
const UHashElement* element = reg.availableIDs.nextElement(pos);
|
||||
if (element == nullptr) {
|
||||
// If the code reaches this point, it means that it's out of sync
|
||||
// or the caller keeps asking for snext().
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// Copy the string! This avoids lifetime problems.
|
||||
unistr = *(const UnicodeString*) element->key.pointer;
|
||||
return &unistr;
|
||||
}
|
||||
|
||||
void TransliteratorRegistry::Enumeration::reset(UErrorCode& /*status*/) {
|
||||
index = 0;
|
||||
pos = UHASH_FIRST;
|
||||
size = reg.availableIDs.count();
|
||||
}
|
||||
|
||||
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(TransliteratorRegistry::Enumeration)
|
||||
|
@ -945,18 +966,12 @@ void TransliteratorRegistry::registerEntry(const UnicodeString& ID,
|
|||
registry.put(ID, adopted, status);
|
||||
if (visible) {
|
||||
registerSTV(source, target, variant);
|
||||
if (!availableIDs.contains((void*) &ID)) {
|
||||
UnicodeString *newID = ID.clone();
|
||||
// Check to make sure newID was created.
|
||||
if (newID != nullptr) {
|
||||
// NUL-terminate the ID string
|
||||
newID->getTerminatedBuffer();
|
||||
availableIDs.adoptElement(newID, status);
|
||||
}
|
||||
if (!availableIDs.containsKey(ID)) {
|
||||
availableIDs.puti(ID, /* unused value */ 1, status);
|
||||
}
|
||||
} else {
|
||||
removeSTV(source, target, variant);
|
||||
availableIDs.removeElement((void*) &ID);
|
||||
availableIDs.remove(ID);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -423,7 +423,8 @@ class TransliteratorRegistry : public UMemory {
|
|||
static UClassID U_EXPORT2 getStaticClassID();
|
||||
virtual UClassID getDynamicClassID() const override;
|
||||
private:
|
||||
int32_t index;
|
||||
int32_t pos;
|
||||
int32_t size;
|
||||
const TransliteratorRegistry& reg;
|
||||
};
|
||||
friend class Enumeration;
|
||||
|
@ -452,7 +453,7 @@ class TransliteratorRegistry : public UMemory {
|
|||
/**
|
||||
* Vector of public full IDs.
|
||||
*/
|
||||
UVector availableIDs;
|
||||
Hashtable availableIDs;
|
||||
|
||||
TransliteratorRegistry(const TransliteratorRegistry &other); // forbid copying of this class
|
||||
TransliteratorRegistry &operator=(const TransliteratorRegistry &other); // forbid copying of this class
|
||||
|
|
|
@ -52,12 +52,6 @@ class AnyTransliterator extends Transliterator {
|
|||
static final String NULL_ID = "Null";
|
||||
static final String LATIN_PIVOT = "-Latin;Latin-";
|
||||
|
||||
/**
|
||||
* Special code for handling width characters
|
||||
*/
|
||||
private static final Transliterator WIDTH_FIX =
|
||||
Transliterator.getInstance("[[:dt=Nar:][:dt=Wide:]] nfkd");
|
||||
|
||||
/**
|
||||
* Cache mapping UScriptCode values to Transliterator*.
|
||||
*/
|
||||
|
@ -73,6 +67,15 @@ class AnyTransliterator extends Transliterator {
|
|||
*/
|
||||
private int targetScript;
|
||||
|
||||
/**
|
||||
* Lazily initialize a special Transliterator for handling width characters.
|
||||
*/
|
||||
private static class WidthFix {
|
||||
private static final String ID = "[[:dt=Nar:][:dt=Wide:]] nfkd";
|
||||
|
||||
static final Transliterator INSTANCE = Transliterator.getInstance(ID);
|
||||
}
|
||||
|
||||
/**
|
||||
* Implements {@link Transliterator#handleTransliterate}.
|
||||
*/
|
||||
|
@ -153,7 +156,7 @@ class AnyTransliterator extends Transliterator {
|
|||
* @param filter The Unicode filter.
|
||||
* @param target2 the target name.
|
||||
* @param targetScript2 the script code corresponding to theTarget.
|
||||
* @param widthFix2 The Transliterator width fix.
|
||||
* @param widthFix2 Not used. This parameter is deprecated.
|
||||
* @param cache2 The Map object for cache.
|
||||
*/
|
||||
public AnyTransliterator(String id, UnicodeFilter filter, String target2,
|
||||
|
@ -177,7 +180,7 @@ class AnyTransliterator extends Transliterator {
|
|||
if (isWide(targetScript)) {
|
||||
return null;
|
||||
} else {
|
||||
return WIDTH_FIX;
|
||||
return WidthFix.INSTANCE;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -202,7 +205,7 @@ class AnyTransliterator extends Transliterator {
|
|||
if (t != null) {
|
||||
if (!isWide(targetScript)) {
|
||||
List<Transliterator> v = new ArrayList<Transliterator>();
|
||||
v.add(WIDTH_FIX);
|
||||
v.add(WidthFix.INSTANCE);
|
||||
v.add(t);
|
||||
t = new CompoundTransliterator(v);
|
||||
}
|
||||
|
@ -211,7 +214,7 @@ class AnyTransliterator extends Transliterator {
|
|||
t = prevCachedT;
|
||||
}
|
||||
} else if (!isWide(targetScript)) {
|
||||
return WIDTH_FIX;
|
||||
return WidthFix.INSTANCE;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -409,7 +412,7 @@ class AnyTransliterator extends Transliterator {
|
|||
if (filter != null && filter instanceof UnicodeSet) {
|
||||
filter = new UnicodeSet((UnicodeSet)filter);
|
||||
}
|
||||
return new AnyTransliterator(getID(), filter, target, targetScript, WIDTH_FIX, cache);
|
||||
return new AnyTransliterator(getID(), filter, target, targetScript, null, cache);
|
||||
}
|
||||
|
||||
/* (non-Javadoc)
|
||||
|
|
Loading…
Add table
Reference in a new issue