ICU-474 fix UTransPosition handling

X-SVN-Rev: 1688
This commit is contained in:
Alan Liu 2000-06-29 00:18:43 +00:00
parent d7b44985af
commit eff9454c76
5 changed files with 37 additions and 52 deletions

View file

@ -88,10 +88,6 @@ RuleBasedTransliterator::handleTransliterate(Replaceable& text, UTransPosition&
* exzd| done
*/
int32_t contextStart = index.contextStart;
int32_t limit = index.limit;
int32_t cursor = index.start;
/* A rule like
* a>b|a
* creates an infinite loop. To prevent that, we put an arbitrary
@ -102,7 +98,7 @@ RuleBasedTransliterator::handleTransliterate(Replaceable& text, UTransPosition&
* uint32_t.
*/
uint32_t loopCount = 0;
uint32_t loopLimit = limit - cursor;
uint32_t loopLimit = index.limit - index.start;
if (loopLimit >= 0x10000000) {
loopLimit = 0xFFFFFFFF;
} else {
@ -111,13 +107,11 @@ RuleBasedTransliterator::handleTransliterate(Replaceable& text, UTransPosition&
UBool isPartial = FALSE;
while (cursor < limit && loopCount <= loopLimit) {
while (index.start < index.limit && loopCount <= loopLimit) {
TransliterationRule* r = isIncremental ?
data->ruleSet.findIncrementalMatch(text, contextStart, limit, cursor,
*data, isPartial,
data->ruleSet.findIncrementalMatch(text, index, *data, isPartial,
getFilter()) :
data->ruleSet.findMatch(text, contextStart, limit,
cursor, *data,
data->ruleSet.findMatch(text, index, *data,
getFilter());
/* If we match a rule then apply it by replacing the key
@ -132,17 +126,15 @@ RuleBasedTransliterator::handleTransliterate(Replaceable& text, UTransPosition&
if (isPartial) { // always FALSE unless isIncremental
break;
} else {
++cursor;
++index.start;
}
} else {
// Delegate replacement to TransliterationRule object
limit += r->replace(text, cursor, *data);
cursor += r->getCursorPos();
int32_t lenDelta = r->replace(text, index.start, *data);
index.limit += lenDelta;
index.contextLimit += lenDelta;
index.start += r->getCursorPos();
++loopCount;
}
}
index.contextLimit += limit - index.limit;
index.limit = limit;
index.start = cursor;
}

View file

@ -297,13 +297,13 @@ UBool TransliterationRule::masks(const TransliterationRule& r2) const {
* <tt>null</tt> then no filtering is applied.
*/
UBool TransliterationRule::matches(const Replaceable& text,
int32_t start, int32_t limit,
int32_t cursor,
const TransliterationRuleData& data,
const UnicodeFilter* filter) const {
const UTransPosition& pos,
const TransliterationRuleData& data,
const UnicodeFilter* filter) const {
// Match anteContext, key, and postContext
cursor -= anteContextLength;
if (cursor < start || (cursor + pattern.length()) > limit) {
int32_t cursor = pos.start - anteContextLength;
if (cursor < pos.contextStart ||
(cursor + pattern.length()) > pos.contextLimit) {
return FALSE;
}
for (int32_t i=0; i<pattern.length(); ++i, ++cursor) {
@ -341,12 +341,10 @@ UBool TransliterationRule::matches(const Replaceable& text,
* @see #FULL_MATCH
*/
int32_t TransliterationRule::getMatchDegree(const Replaceable& text,
int32_t start, int32_t limit,
int32_t cursor,
const UTransPosition& pos,
const TransliterationRuleData& data,
const UnicodeFilter* filter) const {
int len = getRegionMatchLength(text, start, limit, cursor - anteContextLength,
pattern, data, filter);
int len = getRegionMatchLength(text, pos, pattern, data, filter);
return len < anteContextLength ? MISMATCH :
(len < pattern.length() ? PARTIAL_MATCH : FULL_MATCH);
}
@ -375,16 +373,16 @@ int32_t TransliterationRule::getMatchDegree(const Replaceable& text,
* match this rule.
*/
int32_t TransliterationRule::getRegionMatchLength(const Replaceable& text,
int32_t start,
int32_t limit, int32_t cursor,
const UTransPosition& pos,
const UnicodeString& templ,
const TransliterationRuleData& data,
const UnicodeFilter* filter) const {
if (cursor < start) {
int32_t cursor = pos.start - anteContextLength;
if (cursor < pos.contextStart) {
return -1;
}
int32_t i;
for (i=0; i<templ.length() && cursor<limit; ++i, ++cursor) {
for (i=0; i<templ.length() && cursor<pos.contextLimit; ++i, ++cursor) {
if (!charMatches(templ.charAt(i), text.charAt(cursor),
data, filter)) {
return -1;

View file

@ -9,6 +9,7 @@
#define RBT_RULE_H
#include "unicode/unistr.h"
#include "unicode/utrans.h"
class Replaceable;
class TransliterationRuleData;
@ -256,10 +257,9 @@ public:
* <tt>null</tt> then no filtering is applied.
*/
virtual UBool matches(const Replaceable& text,
int32_t start, int32_t limit,
int32_t cursor,
const TransliterationRuleData& data,
const UnicodeFilter* filter) const;
const UTransPosition& pos,
const TransliterationRuleData& data,
const UnicodeFilter* filter) const;
/**
* Return the degree of match between this rule and the given text. The
@ -287,8 +287,7 @@ public:
* @see #FULL_MATCH
*/
virtual int32_t getMatchDegree(const Replaceable& text,
int32_t start, int32_t limit,
int32_t cursor,
const UTransPosition& pos,
const TransliterationRuleData& data,
const UnicodeFilter* filter) const;
@ -315,8 +314,8 @@ public:
* match any characters, otherwise the number of characters of text that
* match this rule.
*/
virtual int32_t getRegionMatchLength(const Replaceable& text, int32_t start,
int32_t limit, int32_t cursor,
virtual int32_t getRegionMatchLength(const Replaceable& text,
const UTransPosition& pos,
const UnicodeString& templ,
const TransliterationRuleData& data,
const UnicodeFilter* filter) const;

View file

@ -203,16 +203,15 @@ void TransliterationRuleSet::freeze(const TransliterationRuleData& data,
*/
TransliterationRule*
TransliterationRuleSet::findMatch(const Replaceable& text,
int32_t start, int32_t limit,
int32_t cursor,
const UTransPosition& pos,
const TransliterationRuleData& data,
const UnicodeFilter* filter) const {
/* We only need to check our indexed bin of the rule table,
* based on the low byte of the first key character.
*/
int16_t x = text.charAt(cursor) & 0xFF;
int16_t x = text.charAt(pos.start) & 0xFF;
for (int32_t i=index[x]; i<index[x+1]; ++i) {
if (rules[i]->matches(text, start, limit, cursor, data, filter)) {
if (rules[i]->matches(text, pos, data, filter)) {
return rules[i];
}
}
@ -248,8 +247,7 @@ TransliterationRuleSet::findMatch(const Replaceable& text,
*/
TransliterationRule*
TransliterationRuleSet::findIncrementalMatch(const Replaceable& text,
int32_t start,
int32_t limit, int32_t cursor,
const UTransPosition& pos,
const TransliterationRuleData& data,
UBool& isPartial,
const UnicodeFilter* filter) const {
@ -258,10 +256,9 @@ TransliterationRuleSet::findIncrementalMatch(const Replaceable& text,
* based on the low byte of the first key character.
*/
isPartial = FALSE;
int16_t x = text.charAt(cursor) & 0xFF;
int16_t x = text.charAt(pos.start) & 0xFF;
for (int32_t i=index[x]; i<index[x+1]; ++i) {
int32_t match = rules[i]->getMatchDegree(text, start, limit, cursor,
data, filter);
int32_t match = rules[i]->getMatchDegree(text, pos, data, filter);
switch (match) {
case TransliterationRule::FULL_MATCH:
return rules[i];

View file

@ -9,6 +9,7 @@
#define RBT_SET_H
#include "uvector.h"
#include "unicode/utrans.h"
class Replaceable;
class TransliterationRule;
@ -110,8 +111,7 @@ public:
* @return the matching rule, or null if none found.
*/
virtual TransliterationRule* findMatch(const Replaceable& text,
int32_t start, int32_t limit,
int32_t cursor,
const UTransPosition& pos,
const TransliterationRuleData& data,
const UnicodeFilter* filter) const;
@ -143,8 +143,7 @@ public:
* does not have enough text yet to unambiguously match a rule.
*/
virtual TransliterationRule* findIncrementalMatch(const Replaceable& text,
int32_t start,
int32_t limit, int32_t cursor,
const UTransPosition& pos,
const TransliterationRuleData& data,
UBool& isPartial,
const UnicodeFilter* filter) const;