ICU-1532 move STVtoID to TransliteratorIDParser

X-SVN-Rev: 8845
This commit is contained in:
Alan Liu 2002-06-11 22:36:11 +00:00
parent 0c224a31ef
commit c18249ca2b
6 changed files with 364 additions and 337 deletions

View file

@ -4,7 +4,7 @@
* and others. All Rights Reserved.
*****************************************************************
* $Source: /xsrl/Nsvn/icu/icu/source/i18n/anytrans.cpp,v $
* $Revision: 1.1 $
* $Revision: 1.2 $
*****************************************************************
* Date Name Description
* 06/06/2002 aliu Creation.
@ -12,195 +12,186 @@
*/
#include "anytrans.h"
#include "uvector.h"
#include "tridpars.h"
#include "hash.h"
#include "unicode/nultrans.h"
#include "unicode/uscript.h"
//------------------------------------------------------------
// Constants
static const UChar HYPHEN = 45; // '-'
static const UChar ANY[] = {65,110,121,45,0}; // "Any-"
static const UChar TARGET_SEP = 45; // '-'
static const UChar VARIANT_SEP = 47; // '/'
static const UChar ANY[] = {65,110,121,0}; // "Any"
static const UChar NULL_ID[] = {78,117,108,108,0}; // "Null"
static const UChar LATIN_PIVOT[] = {45,76,97,116,105,110,59,76,97,116,105,110,45,0}; // "-Latin;Latin-"
//------------------------------------------------------------
/**
* Deleter function for Transliterator*.
*/
static void _deleteTransliterator(void *obj) {
delete (Transliterator*) obj;
}
//------------------------------------------------------------
U_NAMESPACE_BEGIN
//------------------------------------------------------------
// ScriptRunIterator
/**
* Returns a series of ranges corresponding to scripts. They will be
* of the form:
*
* ccccSScSSccccTTcTcccc - c = common, S = first script, T = second
* | | - first run (start, limit)
* | | - second run (start, limit)
*
* That is, the runs will overlap. The reason for this is so that a
* transliterator can consider common characters both before and after
* the scripts.
*/
class ScriptRunIterator {
private:
const Replaceable& text;
int32_t textStart;
int32_t textLimit;
public:
/**
* The code of the current run, valid after next() returns. May
* be USCRIPT_INVALID_CODE if and only if the entire text is
* COMMON/INHERITED.
*/
UScriptCode scriptCode;
/**
* The start of the run, inclusive, valid after next() returns.
*/
int32_t start;
/**
* The end of the run, exclusive, valid after next() returns.
*/
int32_t limit;
/**
* Constructs a run iterator over the given text from start
* (inclusive) to limit (exclusive).
*/
ScriptRunIterator(const Replaceable& text, int32_t start, int32_t limit);
/**
* Returns TRUE if there are any more runs. TRUE is always
* returned at least once. Upon return, the caller should
* examine scriptCode, start, and limit.
*/
UBool next();
/**
* Adjusts internal indices for a change in the limit index of the
* given delta. A positive delta means the limit has increased.
*/
void adjustLimit(int32_t delta);
};
ScriptRunIterator::ScriptRunIterator(const Replaceable& theText,
int32_t start, int32_t limit) :
text(theText) {
this->textStart = start;
this->textLimit = limit;
this->limit = start;
}
UBool ScriptRunIterator::next() {
UChar32 ch;
UScriptCode s;
UErrorCode ec = U_ZERO_ERROR;
scriptCode = USCRIPT_INVALID_CODE; // don't know script yet
start = limit;
// Are we done?
if (start == textLimit) {
return FALSE;
}
// Move start back to include adjacent COMMON or INHERITED
// characters
while (start > textStart) {
ch = text.char32At(start - 1); // look back
s = uscript_getScript(ch, &ec);
if (s == USCRIPT_COMMON || s == USCRIPT_INHERITED) {
--start;
} else {
break;
}
}
// Move limit ahead to include COMMON, INHERITED, and characters
// of the current script.
while (limit < textLimit) {
ch = text.char32At(limit); // look ahead
s = uscript_getScript(ch, &ec);
if (s != USCRIPT_COMMON && s != USCRIPT_INHERITED) {
if (scriptCode == USCRIPT_INVALID_CODE) {
scriptCode = s;
} else if (s != scriptCode) {
break;
}
}
++limit;
}
// Return TRUE even if the entire text is COMMON / INHERITED, in
// which case scriptCode will be USCRIPT_INVALID_CODE.
return TRUE;
}
void ScriptRunIterator::adjustLimit(int32_t delta) {
limit += delta;
textLimit += delta;
}
//------------------------------------------------------------
// AnyTransliterator
U_NAMESPACE_BEGIN
/**
* Try to create a transliterator with the given ID, which should be
* of the form "Any-X". The "X" will be pulled off and passed to
* createInstance().
*/
Transliterator* AnyTransliterator::_create(const UnicodeString& ID, Token /*context*/) {
UnicodeString target(ID);
int32_t i = target.indexOf(HYPHEN);
if (i >= 0) {
target.remove(0, i+1);
}
return AnyTransliterator::createInstance(target, TRUE, TRUE);
}
/**
* Registers standard variants with the system. Called by
* Transliterator during initialization.
*/
void AnyTransliterator::registerIDs() {
Token t = integerToken(0);
// Register Any-Latin and make its inverse Null
Transliterator::_registerFactory("Any-Latin", _create, t);
Transliterator::_registerSpecialInverse("Latin", "Null", FALSE);
}
/**
* Return the script code for a given name, or -1 if not found.
*/
int32_t AnyTransliterator::scriptNameToCode(const UnicodeString& name) {
char buf[128];
UScriptCode code;
UErrorCode ec = U_ZERO_ERROR;
name.extract(0, 128, buf, 128, "");
if (uscript_getCode(buf, &code, 1, &ec) != 1 ||
U_FAILURE(ec)) {
code = (UScriptCode) -1;
}
return (int32_t) code;
}
/**
* Factory method to create an Any-X transliterator. Relies on
* registered transliterators at the time of the call to build the
* Any-X transliterator. If there are no registered transliterators
* of the form Y-X, then the logical result is Any-Null. If there is
* exactly one transliterator of the form Y-X, then the logical result
* is Y-X, a degenerate result. If there are 2 or more
* transliterators of the form Y-X, then an AnyTransliterator is
* instantiated and returned.
* @param allowNull if true, then return Any-Null if there are no
* transliterator to the given script; otherwise return NULL
* @param allowDegenerate if true, then return a transliterator of the
* form X-Y if there is only one such transliterator
* the given script; otherwise return NULL
*/
Transliterator* AnyTransliterator::createInstance(const UnicodeString& toTarget,
UBool allowNull,
UBool allowDegenerate) {
UErrorCode ec = U_ZERO_ERROR;
UVector translits(ec);
if (U_FAILURE(ec)) {
return NULL;
}
// Count transliterators _to_ the given target. This is
// inconvenient since we have to iterate over all sources.
int32_t sourceCount = Transliterator::countAvailableSources();
for (int32_t s=0; s<sourceCount; ++s) {
UnicodeString source;
Transliterator::getAvailableSource(s, source);
int32_t targetCount = Transliterator::countAvailableTargets(source);
for (int32_t t=0; t<targetCount; ++t) {
UnicodeString target;
Transliterator::getAvailableTarget(t, source, target);
if (target.caseCompare(toTarget, 0 /*U_FOLD_CASE_DEFAULT*/) == 0) {
// We have a source match. It must also be a script
// or we can't use it.
int32_t code = scriptNameToCode(source);
if (code < 0) {
continue;
}
// Try to instantiate the given transliterator
UnicodeString id(source);
id.append(HYPHEN).append(toTarget);
Transliterator* t = Transliterator::createInstance(
id, UTRANS_FORWARD, ec);
if (U_FAILURE(ec) || t == NULL) {
delete t;
continue;
}
// We have a script code and a transliterator; save
// them.
translits.addElement(new Elem((UScriptCode) code, t), ec);
}
}
}
switch (translits.size()) {
case 0:
// There is nothing registered going to the requested target,
// so return Any-Null, if allowed
return allowNull ? new NullTransliterator() : NULL;
case 1:
// Exactly one transliterator goes to the requested target, so
// return it, if allowed
{
Transliterator* t = NULL;
if (allowDegenerate) {
Elem *e = (Elem*) translits.orphanElementAt(0);
t = e->translit;
delete e;
}
return t;
}
}
// We have 2 or more script-toTarget transliterators. Assemble an
// AnyTransliterator and return it.
UnicodeString id(ANY);
id.append(toTarget);
return new AnyTransliterator(id, translits);
}
//|/**
//| * Factory method to create an Any-X transliterator. Convenience
//| * function that takes a script code.
//| */
//|Transliterator* AnyTransliterator::createInstance(UScriptCode target,
//| UBool allowNull,
//| UBool allowDegenerate) {
//| UnicodeString name(uscript_getName(target), "");
//| return createInstance(name, allowNull, allowDegenerate);
//|}
/**
* Constructs aa transliterator with the given ID. The vector should
* contain Elem objects. Each will be removed from the vector and
* ownership taken of its storage, including the contained
* transliterator. Upon return the vector will be empty.
*/
AnyTransliterator::AnyTransliterator(const UnicodeString& id, UVector& vec) :
Transliterator(id, NULL)
AnyTransliterator::AnyTransliterator(const UnicodeString& id,
const UnicodeString& theTarget,
const UnicodeString& theVariant,
UScriptCode theTargetScript,
UErrorCode& ec) :
Transliterator(id, NULL),
targetScript(theTargetScript)
{
count = vec.size();
elems = new Elem[count];
for (int32_t i=count-1; i>=0; --i) {
Elem* e = (Elem*) vec.orphanElementAt(i);
elems[i] = *e;
delete e;
cache = uhash_open(uhash_hashLong, uhash_compareLong, &ec);
uhash_setValueDeleter(cache, _deleteTransliterator);
target = theTarget;
if (theVariant.length() > 0) {
target.append(VARIANT_SEP).append(theVariant);
}
}
AnyTransliterator::~AnyTransliterator() {
for (int32_t i=0; i<count; ++i) {
delete elems[i].translit;
}
delete[] elems;
uhash_close(cache);
}
/**
* Copy constructor.
*/
AnyTransliterator::AnyTransliterator(const AnyTransliterator& o) :
Transliterator(o)
Transliterator(o),
target(o.target),
targetScript(o.targetScript)
{
count = o.count;
elems = new Elem[count];
for (int32_t i=0; i<count; ++i) {
elems[i] = o.elems[i];
elems[i].translit = elems[i].translit->clone();
}
// Don't copy the cache contents
UErrorCode ec = U_ZERO_ERROR;
cache = uhash_open(uhash_hashLong, uhash_compareLong, &ec);
uhash_setValueDeleter(cache, _deleteTransliterator);
}
/**
@ -215,85 +206,151 @@ Transliterator* AnyTransliterator::clone() const {
*/
void AnyTransliterator::handleTransliterate(Replaceable& text, UTransPosition& pos,
UBool isIncremental) const {
int32_t allStart = pos.start;
int32_t allLimit = pos.limit;
// Compute indices relative to contextStart
int32_t start = pos.start - pos.contextStart;
int32_t limit = pos.limit - pos.contextStart;
int32_t contextLimit = pos.contextLimit - pos.contextStart;
ScriptRunIterator it(text, pos.contextStart, pos.contextLimit);
if (start == limit) return; // Short circuit
while (it.next()) {
// Ignore runs in the ante context
if (it.limit <= allStart) continue;
// Extract contextStart..contextLimit
UnicodeString ustext;
text.extractBetween(pos.contextStart, pos.contextLimit, ustext);
// Try to instantiate transliterator from it.scriptCode to
// our target or target/variant
Transliterator* t = getTransliterator(it.scriptCode);
if (t == NULL) {
// We have no transliterator. Do nothing, but keep
// pos.start up to date.
pos.start = it.limit;
continue;
}
// Work directly on the buffer. We don't need to release the
// buffer since the UnicodeString is automatic scope.
UChar* utext = ustext.getBuffer(-1);
// If the run end is before the transliteration limit, do
// a non-incremental transliteration. Otherwise do an
// incremental one.
UBool incremental = isIncremental && (it.limit >= allLimit);
pos.start = uprv_max(allStart, it.start);
pos.limit = uprv_min(allLimit, it.limit);
int32_t limit = pos.limit;
t->filteredTransliterate(text, pos, incremental);
int32_t delta = pos.limit - limit;
allLimit += delta;
it.adjustLimit(delta);
UErrorCode ec = U_ZERO_ERROR;
UScriptRun* run = uscript_openRun(utext, contextLimit, &ec);
if (U_FAILURE(ec)) {
pos.start = pos.limit; // we're done
uscript_closeRun(run);
return;
// We're done if we enter the post context
if (it.limit >= allLimit) break;
}
int32_t origLimit = pos.limit; // save original limit
int32_t delta = 0; // cumulative change in length
// Restore limit. pos.start is fine where the last transliterator
// left it, or at the end of the last run.
pos.limit = allLimit;
}
// Iterate over runs
int32_t runStart, runLimit;
UScriptCode runScript;
Transliterator* AnyTransliterator::getTransliterator(UScriptCode source) const {
// We're done if we've entered the post context or when there are
// no more script runs (which should only happen when we call
// nextRun _after_ runLimit has been returned at contextLimit).
runLimit = 0;
while (runLimit < limit &&
uscript_nextRun(run, &runStart, &runLimit, &runScript)) {
if (source == targetScript || source == USCRIPT_INVALID_CODE) {
return NULL;
}
// Do nothing if we're still in the ante context
if (runLimit <= start) continue;
// See if we have a transliterator for this run
Transliterator* t = NULL;
for (int32_t i=0; i<count; ++i) {
if (elems[i].script == runScript) {
t = elems[i].translit;
break;
Transliterator* t = (Transliterator*) uhash_iget(cache, (int32_t) source);
if (t == NULL) {
UErrorCode ec = U_ZERO_ERROR;
UnicodeString sourceName(uscript_getName(source), "");
UnicodeString id(sourceName);
id.append(TARGET_SEP).append(target);
t = Transliterator::createInstance(id, UTRANS_FORWARD, ec);
if (U_FAILURE(ec) || t == NULL) {
delete t;
// Try to pivot around Latin, our most common script
id = sourceName;
id.append(LATIN_PIVOT).append(target);
t = Transliterator::createInstance(id, UTRANS_FORWARD, ec);
if (U_FAILURE(ec) || t == NULL) {
delete t;
t = NULL;
}
}
// Transliterate max(start, runStart) to min(limit, runLimit).
// Adjust indices to text-relative ones
pos.start = uprv_max(start, runStart) + pos.contextStart + delta;
pos.limit = uprv_min(limit, runLimit) + pos.contextStart + delta;
// If we don't have a transliterator for this script, then
// leave the text unchanged.
if (t == NULL) {
pos.start = pos.limit;
}
else {
// If the run end is before the transliteration limit, do
// a non-incremental transliteration. Otherwise do an
// incremental one.
UBool incremental = isIncremental && (runLimit >= limit);
// Transliterate and record change in length
int32_t l = pos.limit;
t->filteredTransliterate(text, pos, incremental);
delta += pos.limit - l;
if (t != NULL) {
uhash_iput(cache, (int32_t) source, t, &ec);
}
}
uscript_closeRun(run);
return t;
}
// pos.start can stay where the last transliterator left it. pos.limit
// needs to be adjusted for changes in length.
pos.limit = origLimit + delta;
/**
* Return the script code for a given name, or -1 if not found.
*/
UScriptCode AnyTransliterator::scriptNameToCode(const UnicodeString& name) {
char buf[128];
UScriptCode code;
UErrorCode ec = U_ZERO_ERROR;
name.extract(0, 128, buf, 128, "");
if (uscript_getCode(buf, &code, 1, &ec) != 1 ||
U_FAILURE(ec)) {
code = USCRIPT_INVALID_CODE;
}
return code;
}
/**
* Registers standard transliterators with the system. Called by
* Transliterator during initialization. Scan all current targets and
* register those that are scripts T as Any-T/V.
*/
void AnyTransliterator::registerIDs() {
UErrorCode ec;
Hashtable seen(TRUE);
int32_t sourceCount = Transliterator::countAvailableSources();
for (int32_t s=0; s<sourceCount; ++s) {
UnicodeString source;
Transliterator::getAvailableSource(s, source);
// Ignore the "Any" source
if (source.caseCompare(ANY, 0 /*U_FOLD_CASE_DEFAULT*/) == 0) continue;
int32_t targetCount = Transliterator::countAvailableTargets(source);
for (int32_t t=0; t<targetCount; ++t) {
UnicodeString target;
Transliterator::getAvailableTarget(t, source, target);
// Only process each target once
if (seen.geti(target) != 0) continue;
ec = U_ZERO_ERROR;
seen.puti(target, 1, ec);
// Get the script code for the target. If not a script, ignore.
UScriptCode targetScript = scriptNameToCode(target);
if (targetScript == USCRIPT_INVALID_CODE) continue;
int32_t variantCount = Transliterator::countAvailableVariants(source, target);
// assert(variantCount >= 1);
for (int32_t v=0; v<variantCount; ++v) {
UnicodeString variant;
Transliterator::getAvailableVariant(v, source, target, variant);
UnicodeString id;
TransliteratorIDParser::STVtoID(ANY, target, variant, id);
ec = U_ZERO_ERROR;
AnyTransliterator* t = new AnyTransliterator(id, target, variant,
targetScript, ec);
if (U_FAILURE(ec)) {
delete t;
} else {
Transliterator::_registerInstance(t);
Transliterator::_registerSpecialInverse(target, NULL_ID, FALSE);
}
}
}
}
}
U_NAMESPACE_END

View file

@ -4,7 +4,7 @@
* and others. All Rights Reserved.
*****************************************************************
* $Source: /xsrl/Nsvn/icu/icu/source/i18n/anytrans.h,v $
* $Revision: 1.1 $
* $Revision: 1.2 $
*****************************************************************
* Date Name Description
* 06/06/2002 aliu Creation.
@ -15,94 +15,40 @@
#include "unicode/translit.h"
#include "unicode/uscript.h"
#include "uhash.h"
U_NAMESPACE_BEGIN
/**
* A transliterator named Any-X, where X is the target, that contains
* multiple transliterators, all going to X, all with script sources.
* The target need not be a script. It uses the script run API
* (uscript.h) to partition text into runs of the same script, and
* then based on the script of each run, transliterates from that
* script to the given target.
*
* <p>For example, "Any-Latin" might contain two transliterators,
* "Greek-Latin" and "Hiragana-Latin". It would then transliterate
* runs of Greek with Greek-Latin, runs of Hiragana with
* Hirgana-Latin, and pass other runs through unchanged.
*
* <p>There is no inverse of an Any-X transliterator. Although it
* would be possible to tag the output text with script markers to
* make inversion possible, this is not currently implemented.
* A transliterator named Any-T or Any-T/V, where T is the target
* script and V is the optional variant, that uses multiple
* transliterators, all going to T or T/V, all with script sources.
* The target must be a script. It partitions text into runs of the
* same script, and then based on the script of each run,
* transliterates from that script to the given target or
* target/variant. Adjacent COMMON or INHERITED script characters are
* included in each run.
*
* @author Alan Liu
*/
class U_I18N_API AnyTransliterator : public Transliterator {
/**
* A script code and associated transliterator. It does _not_ own
* the transliterator.
* Cache mapping UScriptCode values to Transliterator*.
*/
class Elem {
public:
UScriptCode script;
Transliterator* translit;
Elem(UScriptCode s=(UScriptCode)0, Transliterator* t=NULL) {
script = s;
translit = t;
}
Elem& operator=(const Elem& o) {
script = o.script;
translit = o.translit;
return *this;
}
};
UHashtable* cache;
/**
* Array of script codes and associated transliterators. We
* own the transliterators.
* The target or target/variant string.
*/
Elem* elems;
UnicodeString target;
/**
* Length of elems, always at least 2.
* The target script code. Never USCRIPT_INVALID_CODE.
*/
int32_t count;
UScriptCode targetScript;
public:
/**
* Factory method to create an Any-X transliterator. Relies on
* registered transliterators at the time of the call to build the
* Any-X transliterator. If there are no registered transliterators
* of the form Y-X, then the logical result is Any-Null. If there is
* exactly one transliterator of the form Y-X, then the logical result
* is Y-X, a degenerate result. If there are 2 or more
* transliterators of the form Y-X, then an AnyTransliterator is
* instantiated and returned.
* @param target the target, which need not be a script. This
* be a string such as "Latin", <em>not</em> "Any-Latin".
* @param allowNull if true, then return Any-Null if there are no
* transliterator to the given script; otherwise return NULL
* @param allowDegenerate if true, then return a transliterator of the
* form X-Y if there is only one such transliterator
* the given script; otherwise return NULL
* @return a new Transliterator, or NULL. If allowNull or
* allowDegenerate is TRUE, the result may not be an
* AnyTransliterator. If they are both false, the result will be
* an AnyTransliterator.
*/
static Transliterator* createInstance(const UnicodeString& target,
UBool allowNull,
UBool allowDegenerate);
//| /**
//| * Factory method to create an Any-X transliterator. Convenience
//| * function that takes a script code.
//| */
//| static Transliterator* createInstance(UScriptCode target,
//| UBool allowNull,
//| UBool allowDegenerate);
/**
* Destructor.
@ -128,18 +74,36 @@ public:
private:
/**
* Private constructor for Transliterator.
* Private constructor
* @param id the ID of the form S-T or S-T/V, where T is theTarget
* and V is theVariant. Must not be empty.
* @param theTarget the target name. Must not be empty, and must
* name a script corresponding to theTargetScript.
* @param theVariant the variant name, or the empty string if
* there is no variant
* @param theTargetScript the script code corresponding to
* theTarget.
* @param ec error code, fails if the internal hashtable cannot be
* allocated
*/
AnyTransliterator(const UnicodeString& id, UVector& vec);
AnyTransliterator(const UnicodeString& id,
const UnicodeString& theTarget,
const UnicodeString& theVariant,
UScriptCode theTargetScript,
UErrorCode& ec);
/**
* Try to create a transliterator with the given ID, which should
* be of the form "Any-X".
* Returns a transliterator from the given source to our target or
* target/variant. Returns NULL if the source is the same as our
* target script, or if the source is USCRIPT_INVALID_CODE.
* Caches the result and returns the same transliterator the next
* time. The caller does NOT own the result and must not delete
* it.
*/
static Transliterator* _create(const UnicodeString& ID, Token /*context*/);
Transliterator* getTransliterator(UScriptCode source) const;
/**
* Registers standard variants with the system. Called by
* Registers standard transliterators with the system. Called by
* Transliterator during initialization.
*/
static void registerIDs();
@ -147,9 +111,10 @@ private:
friend class Transliterator; // for registerIDs()
/**
* Return the script code for a given name, or -1 if not found.
* Return the script code for a given name, or
* USCRIPT_INVALID_CODE if not found.
*/
static int32_t scriptNameToCode(const UnicodeString& name);
static UScriptCode scriptNameToCode(const UnicodeString& name);
};
U_NAMESPACE_END

View file

@ -465,7 +465,7 @@ void TransliteratorRegistry::remove(const UnicodeString& ID) {
TransliteratorIDParser::IDtoSTV(ID, source, target, variant, sawSource);
// Only need to do this if ID.indexOf('-') < 0
UnicodeString id;
STVtoID(source, target, variant, id);
TransliteratorIDParser::STVtoID(source, target, variant, id);
registry.remove(id);
removeSTV(source, target, variant);
availableIDs.removeElement((void*) &id);
@ -585,25 +585,6 @@ UnicodeString& TransliteratorRegistry::getAvailableVariant(int32_t index,
// class TransliteratorRegistry: internal
//----------------------------------------------------------------------
/**
* Given source, target, and variant strings, concatenate them into a
* full ID. If the source is empty, then "Any" will be used for the
* source, so the ID will always be of the form s-t/v or s-t.
*/
void TransliteratorRegistry::STVtoID(const UnicodeString& source,
const UnicodeString& target,
const UnicodeString& variant,
UnicodeString& id) {
id = source;
if (id.length() == 0) {
id = ANY;
}
id.append(ID_SEP).append(target);
if (variant.length() != 0) {
id.append(VARIANT_SEP).append(variant);
}
}
/**
* Convenience method. Calls 6-arg registerEntry().
*/
@ -617,7 +598,7 @@ void TransliteratorRegistry::registerEntry(const UnicodeString& source,
if (s.length() == 0) {
s = ANY;
}
STVtoID(source, target, variant, ID);
TransliteratorIDParser::STVtoID(source, target, variant, ID);
registerEntry(ID, s, target, variant, adopted, visible);
}
@ -632,7 +613,7 @@ void TransliteratorRegistry::registerEntry(const UnicodeString& ID,
TransliteratorIDParser::IDtoSTV(ID, source, target, variant, sawSource);
// Only need to do this if ID.indexOf('-') < 0
UnicodeString id;
STVtoID(source, target, variant, id);
TransliteratorIDParser::STVtoID(source, target, variant, id);
registerEntry(id, source, target, variant, adopted, visible);
}
@ -737,7 +718,7 @@ Entry* TransliteratorRegistry::findInDynamicStore(const Spec& src,
const Spec& trg,
const UnicodeString& variant) {
UnicodeString ID;
STVtoID(src, trg, variant, ID);
TransliteratorIDParser::STVtoID(src, trg, variant, ID);
Entry *e = (Entry*) registry.get(ID);
DEBUG_useEntry(e);
return e;

View file

@ -307,11 +307,6 @@ class TransliteratorRegistry {
UParseError& parseError,
UErrorCode& status);
static void STVtoID(const UnicodeString& source,
const UnicodeString& target,
const UnicodeString& variant,
UnicodeString& id);
private:
/**

View file

@ -567,6 +567,25 @@ void TransliteratorIDParser::IDtoSTV(const UnicodeString& id,
}
}
/**
* Given source, target, and variant strings, concatenate them into a
* full ID. If the source is empty, then "Any" will be used for the
* source, so the ID will always be of the form s-t/v or s-t.
*/
void TransliteratorIDParser::STVtoID(const UnicodeString& source,
const UnicodeString& target,
const UnicodeString& variant,
UnicodeString& id) {
id = source;
if (id.length() == 0) {
id = ANY;
}
id.append(TARGET_SEP).append(target);
if (variant.length() != 0) {
id.append(VARIANT_SEP).append(variant);
}
}
/**
* Register two targets as being inverses of one another. For
* example, calling registerSpecialInverse("NFC", "NFD", TRUE) causes

View file

@ -222,6 +222,16 @@ class TransliteratorIDParser {
UnicodeString& variant,
UBool& isSourcePresent);
/**
* Given source, target, and variant strings, concatenate them into a
* full ID. If the source is empty, then "Any" will be used for the
* source, so the ID will always be of the form s-t/v or s-t.
*/
static void STVtoID(const UnicodeString& source,
const UnicodeString& target,
const UnicodeString& variant,
UnicodeString& id);
/**
* Register two targets as being inverses of one another. For
* example, calling registerSpecialInverse("NFC", "NFD", true) causes