mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-07 14:31:31 +00:00
ICU-474 fix UTransPosition handling
X-SVN-Rev: 1688
This commit is contained in:
parent
d7b44985af
commit
eff9454c76
5 changed files with 37 additions and 52 deletions
|
@ -88,10 +88,6 @@ RuleBasedTransliterator::handleTransliterate(Replaceable& text, UTransPosition&
|
|||
* exzd| done
|
||||
*/
|
||||
|
||||
int32_t contextStart = index.contextStart;
|
||||
int32_t limit = index.limit;
|
||||
int32_t cursor = index.start;
|
||||
|
||||
/* A rule like
|
||||
* a>b|a
|
||||
* creates an infinite loop. To prevent that, we put an arbitrary
|
||||
|
@ -102,7 +98,7 @@ RuleBasedTransliterator::handleTransliterate(Replaceable& text, UTransPosition&
|
|||
* uint32_t.
|
||||
*/
|
||||
uint32_t loopCount = 0;
|
||||
uint32_t loopLimit = limit - cursor;
|
||||
uint32_t loopLimit = index.limit - index.start;
|
||||
if (loopLimit >= 0x10000000) {
|
||||
loopLimit = 0xFFFFFFFF;
|
||||
} else {
|
||||
|
@ -111,13 +107,11 @@ RuleBasedTransliterator::handleTransliterate(Replaceable& text, UTransPosition&
|
|||
|
||||
UBool isPartial = FALSE;
|
||||
|
||||
while (cursor < limit && loopCount <= loopLimit) {
|
||||
while (index.start < index.limit && loopCount <= loopLimit) {
|
||||
TransliterationRule* r = isIncremental ?
|
||||
data->ruleSet.findIncrementalMatch(text, contextStart, limit, cursor,
|
||||
*data, isPartial,
|
||||
data->ruleSet.findIncrementalMatch(text, index, *data, isPartial,
|
||||
getFilter()) :
|
||||
data->ruleSet.findMatch(text, contextStart, limit,
|
||||
cursor, *data,
|
||||
data->ruleSet.findMatch(text, index, *data,
|
||||
getFilter());
|
||||
|
||||
/* If we match a rule then apply it by replacing the key
|
||||
|
@ -132,17 +126,15 @@ RuleBasedTransliterator::handleTransliterate(Replaceable& text, UTransPosition&
|
|||
if (isPartial) { // always FALSE unless isIncremental
|
||||
break;
|
||||
} else {
|
||||
++cursor;
|
||||
++index.start;
|
||||
}
|
||||
} else {
|
||||
// Delegate replacement to TransliterationRule object
|
||||
limit += r->replace(text, cursor, *data);
|
||||
cursor += r->getCursorPos();
|
||||
int32_t lenDelta = r->replace(text, index.start, *data);
|
||||
index.limit += lenDelta;
|
||||
index.contextLimit += lenDelta;
|
||||
index.start += r->getCursorPos();
|
||||
++loopCount;
|
||||
}
|
||||
}
|
||||
|
||||
index.contextLimit += limit - index.limit;
|
||||
index.limit = limit;
|
||||
index.start = cursor;
|
||||
}
|
||||
|
|
|
@ -297,13 +297,13 @@ UBool TransliterationRule::masks(const TransliterationRule& r2) const {
|
|||
* <tt>null</tt> then no filtering is applied.
|
||||
*/
|
||||
UBool TransliterationRule::matches(const Replaceable& text,
|
||||
int32_t start, int32_t limit,
|
||||
int32_t cursor,
|
||||
const TransliterationRuleData& data,
|
||||
const UnicodeFilter* filter) const {
|
||||
const UTransPosition& pos,
|
||||
const TransliterationRuleData& data,
|
||||
const UnicodeFilter* filter) const {
|
||||
// Match anteContext, key, and postContext
|
||||
cursor -= anteContextLength;
|
||||
if (cursor < start || (cursor + pattern.length()) > limit) {
|
||||
int32_t cursor = pos.start - anteContextLength;
|
||||
if (cursor < pos.contextStart ||
|
||||
(cursor + pattern.length()) > pos.contextLimit) {
|
||||
return FALSE;
|
||||
}
|
||||
for (int32_t i=0; i<pattern.length(); ++i, ++cursor) {
|
||||
|
@ -341,12 +341,10 @@ UBool TransliterationRule::matches(const Replaceable& text,
|
|||
* @see #FULL_MATCH
|
||||
*/
|
||||
int32_t TransliterationRule::getMatchDegree(const Replaceable& text,
|
||||
int32_t start, int32_t limit,
|
||||
int32_t cursor,
|
||||
const UTransPosition& pos,
|
||||
const TransliterationRuleData& data,
|
||||
const UnicodeFilter* filter) const {
|
||||
int len = getRegionMatchLength(text, start, limit, cursor - anteContextLength,
|
||||
pattern, data, filter);
|
||||
int len = getRegionMatchLength(text, pos, pattern, data, filter);
|
||||
return len < anteContextLength ? MISMATCH :
|
||||
(len < pattern.length() ? PARTIAL_MATCH : FULL_MATCH);
|
||||
}
|
||||
|
@ -375,16 +373,16 @@ int32_t TransliterationRule::getMatchDegree(const Replaceable& text,
|
|||
* match this rule.
|
||||
*/
|
||||
int32_t TransliterationRule::getRegionMatchLength(const Replaceable& text,
|
||||
int32_t start,
|
||||
int32_t limit, int32_t cursor,
|
||||
const UTransPosition& pos,
|
||||
const UnicodeString& templ,
|
||||
const TransliterationRuleData& data,
|
||||
const UnicodeFilter* filter) const {
|
||||
if (cursor < start) {
|
||||
int32_t cursor = pos.start - anteContextLength;
|
||||
if (cursor < pos.contextStart) {
|
||||
return -1;
|
||||
}
|
||||
int32_t i;
|
||||
for (i=0; i<templ.length() && cursor<limit; ++i, ++cursor) {
|
||||
for (i=0; i<templ.length() && cursor<pos.contextLimit; ++i, ++cursor) {
|
||||
if (!charMatches(templ.charAt(i), text.charAt(cursor),
|
||||
data, filter)) {
|
||||
return -1;
|
||||
|
|
|
@ -9,6 +9,7 @@
|
|||
#define RBT_RULE_H
|
||||
|
||||
#include "unicode/unistr.h"
|
||||
#include "unicode/utrans.h"
|
||||
|
||||
class Replaceable;
|
||||
class TransliterationRuleData;
|
||||
|
@ -256,10 +257,9 @@ public:
|
|||
* <tt>null</tt> then no filtering is applied.
|
||||
*/
|
||||
virtual UBool matches(const Replaceable& text,
|
||||
int32_t start, int32_t limit,
|
||||
int32_t cursor,
|
||||
const TransliterationRuleData& data,
|
||||
const UnicodeFilter* filter) const;
|
||||
const UTransPosition& pos,
|
||||
const TransliterationRuleData& data,
|
||||
const UnicodeFilter* filter) const;
|
||||
|
||||
/**
|
||||
* Return the degree of match between this rule and the given text. The
|
||||
|
@ -287,8 +287,7 @@ public:
|
|||
* @see #FULL_MATCH
|
||||
*/
|
||||
virtual int32_t getMatchDegree(const Replaceable& text,
|
||||
int32_t start, int32_t limit,
|
||||
int32_t cursor,
|
||||
const UTransPosition& pos,
|
||||
const TransliterationRuleData& data,
|
||||
const UnicodeFilter* filter) const;
|
||||
|
||||
|
@ -315,8 +314,8 @@ public:
|
|||
* match any characters, otherwise the number of characters of text that
|
||||
* match this rule.
|
||||
*/
|
||||
virtual int32_t getRegionMatchLength(const Replaceable& text, int32_t start,
|
||||
int32_t limit, int32_t cursor,
|
||||
virtual int32_t getRegionMatchLength(const Replaceable& text,
|
||||
const UTransPosition& pos,
|
||||
const UnicodeString& templ,
|
||||
const TransliterationRuleData& data,
|
||||
const UnicodeFilter* filter) const;
|
||||
|
|
|
@ -203,16 +203,15 @@ void TransliterationRuleSet::freeze(const TransliterationRuleData& data,
|
|||
*/
|
||||
TransliterationRule*
|
||||
TransliterationRuleSet::findMatch(const Replaceable& text,
|
||||
int32_t start, int32_t limit,
|
||||
int32_t cursor,
|
||||
const UTransPosition& pos,
|
||||
const TransliterationRuleData& data,
|
||||
const UnicodeFilter* filter) const {
|
||||
/* We only need to check our indexed bin of the rule table,
|
||||
* based on the low byte of the first key character.
|
||||
*/
|
||||
int16_t x = text.charAt(cursor) & 0xFF;
|
||||
int16_t x = text.charAt(pos.start) & 0xFF;
|
||||
for (int32_t i=index[x]; i<index[x+1]; ++i) {
|
||||
if (rules[i]->matches(text, start, limit, cursor, data, filter)) {
|
||||
if (rules[i]->matches(text, pos, data, filter)) {
|
||||
return rules[i];
|
||||
}
|
||||
}
|
||||
|
@ -248,8 +247,7 @@ TransliterationRuleSet::findMatch(const Replaceable& text,
|
|||
*/
|
||||
TransliterationRule*
|
||||
TransliterationRuleSet::findIncrementalMatch(const Replaceable& text,
|
||||
int32_t start,
|
||||
int32_t limit, int32_t cursor,
|
||||
const UTransPosition& pos,
|
||||
const TransliterationRuleData& data,
|
||||
UBool& isPartial,
|
||||
const UnicodeFilter* filter) const {
|
||||
|
@ -258,10 +256,9 @@ TransliterationRuleSet::findIncrementalMatch(const Replaceable& text,
|
|||
* based on the low byte of the first key character.
|
||||
*/
|
||||
isPartial = FALSE;
|
||||
int16_t x = text.charAt(cursor) & 0xFF;
|
||||
int16_t x = text.charAt(pos.start) & 0xFF;
|
||||
for (int32_t i=index[x]; i<index[x+1]; ++i) {
|
||||
int32_t match = rules[i]->getMatchDegree(text, start, limit, cursor,
|
||||
data, filter);
|
||||
int32_t match = rules[i]->getMatchDegree(text, pos, data, filter);
|
||||
switch (match) {
|
||||
case TransliterationRule::FULL_MATCH:
|
||||
return rules[i];
|
||||
|
|
|
@ -9,6 +9,7 @@
|
|||
#define RBT_SET_H
|
||||
|
||||
#include "uvector.h"
|
||||
#include "unicode/utrans.h"
|
||||
|
||||
class Replaceable;
|
||||
class TransliterationRule;
|
||||
|
@ -110,8 +111,7 @@ public:
|
|||
* @return the matching rule, or null if none found.
|
||||
*/
|
||||
virtual TransliterationRule* findMatch(const Replaceable& text,
|
||||
int32_t start, int32_t limit,
|
||||
int32_t cursor,
|
||||
const UTransPosition& pos,
|
||||
const TransliterationRuleData& data,
|
||||
const UnicodeFilter* filter) const;
|
||||
|
||||
|
@ -143,8 +143,7 @@ public:
|
|||
* does not have enough text yet to unambiguously match a rule.
|
||||
*/
|
||||
virtual TransliterationRule* findIncrementalMatch(const Replaceable& text,
|
||||
int32_t start,
|
||||
int32_t limit, int32_t cursor,
|
||||
const UTransPosition& pos,
|
||||
const TransliterationRuleData& data,
|
||||
UBool& isPartial,
|
||||
const UnicodeFilter* filter) const;
|
||||
|
|
Loading…
Add table
Reference in a new issue