mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-08 23:10:40 +00:00
ICU-1261 initial implementation of compound filters in IDs and ::ID blocks
X-SVN-Rev: 6154
This commit is contained in:
parent
c968b1ea77
commit
267a914bc3
7 changed files with 413 additions and 302 deletions
|
@ -10,6 +10,7 @@
|
|||
#include "unicode/cpdtrans.h"
|
||||
#include "unicode/unifilt.h"
|
||||
#include "unicode/unifltlg.h"
|
||||
#include "unicode/uniset.h"
|
||||
#include "uvector.h"
|
||||
|
||||
// keep in sync with Transliterator
|
||||
|
@ -129,13 +130,18 @@ void CompoundTransliterator::init(const UnicodeString& id,
|
|||
}
|
||||
|
||||
UVector list(status);
|
||||
UnicodeSet* compoundFilter = NULL;
|
||||
UnicodeString regenID;
|
||||
Transliterator::parseCompoundID(id, regenID, direction,
|
||||
idSplitPoint, adoptedSplitTrans,
|
||||
list, compoundRBTIndex,
|
||||
list, compoundRBTIndex, compoundFilter,
|
||||
parseError, status);
|
||||
|
||||
init(list, direction, fixReverseID, status);
|
||||
|
||||
if (compoundFilter != NULL) {
|
||||
adoptFilter(compoundFilter);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -23,12 +23,24 @@ void RuleBasedTransliterator::_construct(const UnicodeString& rules,
|
|||
UErrorCode& status) {
|
||||
data = 0;
|
||||
isDataOwned = TRUE;
|
||||
if (U_SUCCESS(status)) {
|
||||
data = TransliteratorParser::parse(rules, direction, parseError,status);
|
||||
if (U_SUCCESS(status)) {
|
||||
setMaximumContextLength(data->ruleSet.getMaximumContextLength());
|
||||
}
|
||||
if (U_FAILURE(status)) {
|
||||
return;
|
||||
}
|
||||
|
||||
TransliteratorParser parser;
|
||||
parser.parse(rules, direction, parseError, status);
|
||||
if (U_FAILURE(status)) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (parser.idBlock.length() != 0 ||
|
||||
parser.compoundFilter != NULL) {
|
||||
status = U_INVALID_RBT_SYNTAX; // ::ID blocks disallowed in RBT
|
||||
return;
|
||||
}
|
||||
|
||||
data = parser.orphanData();
|
||||
setMaximumContextLength(data->ruleSet.getMaximumContextLength());
|
||||
}
|
||||
|
||||
RuleBasedTransliterator::RuleBasedTransliterator(const UnicodeString& id,
|
||||
|
|
|
@ -16,6 +16,7 @@
|
|||
#include "strmatch.h"
|
||||
#include "symtable.h"
|
||||
#include "unirange.h"
|
||||
#include "uvector.h"
|
||||
#include "unicode/parseerr.h"
|
||||
#include "unicode/parsepos.h"
|
||||
#include "unicode/putil.h"
|
||||
|
@ -795,89 +796,63 @@ int32_t* RuleHalf::createSegments(UErrorCode& status) const {
|
|||
}
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// END RuleHalf
|
||||
// PUBLIC API
|
||||
//----------------------------------------------------------------------
|
||||
|
||||
TransliterationRuleData*
|
||||
TransliteratorParser::parse(const UnicodeString& rules,
|
||||
UTransDirection direction,
|
||||
UParseError& parseError,
|
||||
UErrorCode& ec) {
|
||||
TransliteratorParser parser(rules, direction, parseError);
|
||||
UnicodeString idBlock;
|
||||
int32_t idSplitPoint, count;
|
||||
parser.parseRules(idBlock, idSplitPoint, count);
|
||||
if (U_FAILURE(parser.status) || idBlock.length() != 0) {
|
||||
delete parser.data;
|
||||
parser.data = 0;
|
||||
ec = U_FAILURE(parser.status) ? parser.status : U_ILLEGAL_ARGUMENT_ERROR;
|
||||
}
|
||||
return parser.data;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse a given set of rules. Return up to three pieces of
|
||||
* parsed data. These are the header ::id block, the rule block,
|
||||
* and the footer ::id block. Any or all of these may be empty.
|
||||
* If the ::id blocks are empty, their corresponding parameters
|
||||
* are returned as the empty string. If there are no rules, the
|
||||
* TransliterationRuleData result is 0.
|
||||
* @param ruleDataResult caller owns the pointer stored here.
|
||||
* May be NULL.
|
||||
* @param headerRule string including semicolons for the header
|
||||
* ::id block. May be empty.
|
||||
* @param footerRule string including semicolons for the footer
|
||||
* ::id block. May be empty.
|
||||
* Constructor.
|
||||
*/
|
||||
void TransliteratorParser::parse(const UnicodeString& rules,
|
||||
UTransDirection direction,
|
||||
TransliterationRuleData*& ruleDataResult,
|
||||
UnicodeString& idBlockResult,
|
||||
int32_t& idSplitPointResult,
|
||||
UParseError& parseError,
|
||||
UErrorCode& ec) {
|
||||
if (U_FAILURE(ec)) {
|
||||
ruleDataResult = 0;
|
||||
return;
|
||||
}
|
||||
TransliteratorParser parser(rules, direction, parseError);
|
||||
int32_t count;
|
||||
parser.parseRules(idBlockResult, idSplitPointResult, count);
|
||||
if (U_FAILURE(parser.status) || count == 0) {
|
||||
delete parser.data;
|
||||
parser.data = 0;
|
||||
}
|
||||
ruleDataResult = parser.data;
|
||||
ec = parser.status;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param rules list of rules, separated by newline characters
|
||||
* @exception IllegalArgumentException if there is a syntax error in the
|
||||
* rules
|
||||
*/
|
||||
|
||||
/* Ram: Reordered member initializers to match declaration order and make GCC happy */
|
||||
TransliteratorParser::TransliteratorParser(
|
||||
const UnicodeString& theRules,
|
||||
UTransDirection theDirection,
|
||||
UParseError& theParseError)
|
||||
:
|
||||
rules(theRules), direction(theDirection),data(0),parseError(theParseError), variablesVector(status)
|
||||
{
|
||||
parseData = new ParseData(0, &variablesVector);
|
||||
if (parseData == NULL) {
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
}
|
||||
TransliteratorParser::TransliteratorParser() {
|
||||
data = NULL;
|
||||
compoundFilter = NULL;
|
||||
parseData = NULL;
|
||||
variablesVector = NULL;
|
||||
}
|
||||
|
||||
/**
|
||||
* Destructor.
|
||||
*/
|
||||
TransliteratorParser::~TransliteratorParser() {
|
||||
delete data;
|
||||
delete compoundFilter;
|
||||
delete parseData;
|
||||
delete variablesVector;
|
||||
}
|
||||
|
||||
void
|
||||
TransliteratorParser::parse(const UnicodeString& rules,
|
||||
UTransDirection direction,
|
||||
UParseError& pe,
|
||||
UErrorCode& ec) {
|
||||
if (U_SUCCESS(ec)) {
|
||||
parseRules(rules, direction);
|
||||
pe = parseError;
|
||||
ec = status;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the compound filter parsed by parse(). Caller owns result.
|
||||
*/
|
||||
UnicodeSet* TransliteratorParser::orphanCompoundFilter() {
|
||||
UnicodeSet* f = compoundFilter;
|
||||
compoundFilter = NULL;
|
||||
return f;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the data object parsed by parse(). Caller owns result.
|
||||
*/
|
||||
TransliterationRuleData* TransliteratorParser::orphanData() {
|
||||
TransliterationRuleData* d = data;
|
||||
data = NULL;
|
||||
return d;
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Private implementation
|
||||
//----------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Parse the given string as a sequence of rules, separated by newline
|
||||
* characters ('\n'), and cause this object to implement those rules. Any
|
||||
|
@ -886,18 +861,12 @@ TransliteratorParser::~TransliteratorParser() {
|
|||
* @exception IllegalArgumentException if there is a syntax error in the
|
||||
* rules
|
||||
*/
|
||||
void TransliteratorParser::parseRules(UnicodeString& idBlockResult,
|
||||
int32_t& idSplitPointResult,
|
||||
int32_t& ruleCount) {
|
||||
status = U_ZERO_ERROR;
|
||||
ruleCount = 0;
|
||||
|
||||
void TransliteratorParser::parseRules(const UnicodeString& rules,
|
||||
UTransDirection theDirection) {
|
||||
// Clear error struct
|
||||
//if (parseError != 0) {
|
||||
//parseError->code = parseError->line = 0;
|
||||
parseError.offset = 0;
|
||||
parseError.preContext[0] = parseError.postContext[0] = (UChar)0;
|
||||
//}
|
||||
parseError.line = parseError.offset = 0;
|
||||
parseError.preContext[0] = parseError.postContext[0] = (UChar)0;
|
||||
status = U_ZERO_ERROR;
|
||||
|
||||
delete data;
|
||||
data = new TransliterationRuleData(status);
|
||||
|
@ -905,17 +874,28 @@ void TransliteratorParser::parseRules(UnicodeString& idBlockResult,
|
|||
return;
|
||||
}
|
||||
|
||||
parseData->data = data;
|
||||
variablesVector.removeAllElements();
|
||||
/* if (parseError != 0) {
|
||||
parseError->code = 0;
|
||||
direction = theDirection;
|
||||
ruleCount = 0;
|
||||
|
||||
delete compoundFilter;
|
||||
compoundFilter = NULL;
|
||||
|
||||
if (variablesVector == NULL) {
|
||||
variablesVector = new UVector(status);
|
||||
} else {
|
||||
variablesVector->removeAllElements();
|
||||
}
|
||||
*/
|
||||
determineVariableRange();
|
||||
parseData = new ParseData(0, variablesVector);
|
||||
if (parseData == NULL) {
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
return;
|
||||
}
|
||||
parseData->data = data;
|
||||
determineVariableRange(rules);
|
||||
|
||||
UnicodeString str; // scratch
|
||||
idBlockResult.truncate(0);
|
||||
idSplitPointResult = -1;
|
||||
idBlock.truncate(0);
|
||||
idSplitPoint = -1;
|
||||
int32_t pos = 0;
|
||||
int32_t limit = rules.length();
|
||||
// The mode marks whether we are in the header ::id block, the
|
||||
|
@ -924,6 +904,15 @@ void TransliteratorParser::parseRules(UnicodeString& idBlockResult,
|
|||
// mode == 1: in rules: rule->1, ::id->2
|
||||
// mode == 2: in footer rule block: rule->ERROR, ::id->2
|
||||
int32_t mode = 0;
|
||||
|
||||
// The compound filter offset is an index into idBlockResult.
|
||||
// If it is 0, then the compound filter occurred at the start,
|
||||
// and it is the offset to the _start_ of the compound filter
|
||||
// pattern. Otherwise it is the offset to the _limit_ of the
|
||||
// compound filter pattern within idBlockResult.
|
||||
compoundFilter = NULL;
|
||||
int32_t compoundFilterOffset = -1;
|
||||
|
||||
while (pos < limit && U_SUCCESS(status)) {
|
||||
UChar c = rules.charAt(pos++);
|
||||
if (u_isWhitespace(c)) {
|
||||
|
@ -954,25 +943,39 @@ void TransliteratorParser::parseRules(UnicodeString& idBlockResult,
|
|||
int32_t p = pos;
|
||||
UBool sawDelim;
|
||||
UnicodeString regenID;
|
||||
Transliterator::parseID(rules, regenID, p, sawDelim, direction,parseError, FALSE,status);
|
||||
UnicodeSet* cpdFilter = NULL;
|
||||
Transliterator::parseID(rules, regenID, p, sawDelim, cpdFilter, direction,parseError, FALSE,status);
|
||||
if (p == pos || !sawDelim) {
|
||||
// Invalid ::id
|
||||
delete cpdFilter;
|
||||
syntaxError(U_ILLEGAL_ARGUMENT_ERROR, rules, pos);
|
||||
} else {
|
||||
if (mode == 1) {
|
||||
mode = 2;
|
||||
idSplitPointResult = idBlockResult.length();
|
||||
idSplitPoint = idBlock.length();
|
||||
}
|
||||
if (cpdFilter != NULL) {
|
||||
if (compoundFilter != NULL) {
|
||||
syntaxError(U_MULTIPLE_COMPOUND_FILTERS, rules, pos);
|
||||
}
|
||||
compoundFilter = cpdFilter;
|
||||
if (idBlock.length() == 0) {
|
||||
compoundFilterOffset = 0;
|
||||
}
|
||||
}
|
||||
rules.extractBetween(pos, p, str);
|
||||
idBlockResult.append(str);
|
||||
idBlock.append(str);
|
||||
if (!sawDelim) {
|
||||
idBlockResult.append((UChar)0x003B /*;*/);
|
||||
idBlock.append((UChar)0x003B /*;*/);
|
||||
}
|
||||
if (cpdFilter != NULL && compoundFilterOffset < 0) {
|
||||
compoundFilterOffset = idBlock.length();
|
||||
}
|
||||
pos = p;
|
||||
}
|
||||
} else {
|
||||
// Parse a rule
|
||||
pos = parseRule(pos, limit);
|
||||
pos = parseRule(rules, pos, limit);
|
||||
if (U_SUCCESS(status)) {
|
||||
++ruleCount;
|
||||
if (mode == 2) {
|
||||
|
@ -988,7 +991,7 @@ void TransliteratorParser::parseRules(UnicodeString& idBlockResult,
|
|||
}
|
||||
|
||||
// Convert the set vector to an array
|
||||
data->variablesLength = variablesVector.size();
|
||||
data->variablesLength = variablesVector->size();
|
||||
data->variables = data->variablesLength == 0 ? 0 : new UnicodeMatcher*[data->variablesLength];
|
||||
// orphanElement removes the given element and shifts all other
|
||||
// elements down. For performance (and code clarity) we work from
|
||||
|
@ -997,14 +1000,29 @@ void TransliteratorParser::parseRules(UnicodeString& idBlockResult,
|
|||
for (i=data->variablesLength; i>0; ) {
|
||||
--i;
|
||||
data->variables[i] =
|
||||
(UnicodeSet*) variablesVector.orphanElementAt(i);
|
||||
(UnicodeSet*) variablesVector->orphanElementAt(i);
|
||||
}
|
||||
|
||||
// Index the rules
|
||||
if (U_SUCCESS(status)) {
|
||||
if (compoundFilter != NULL) {
|
||||
if ((direction == UTRANS_FORWARD &&
|
||||
compoundFilterOffset != 0) ||
|
||||
(direction == UTRANS_REVERSE &&
|
||||
compoundFilterOffset != idBlock.length())) {
|
||||
status = U_MISPLACED_COMPOUND_FILTER;
|
||||
}
|
||||
}
|
||||
|
||||
data->ruleSet.freeze(parseError,status);
|
||||
if (idSplitPointResult < 0) {
|
||||
idSplitPointResult = idBlockResult.length();
|
||||
|
||||
if (idSplitPoint < 0) {
|
||||
idSplitPoint = idBlock.length();
|
||||
}
|
||||
|
||||
if (ruleCount == 0) {
|
||||
delete data;
|
||||
data = NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1022,11 +1040,10 @@ void TransliteratorParser::parseRules(UnicodeString& idBlockResult,
|
|||
* indicators. Once it does a lexical breakdown of the rule at pos, it
|
||||
* creates a rule object and adds it to our rule list.
|
||||
*/
|
||||
int32_t TransliteratorParser::parseRule(int32_t pos, int32_t limit) {
|
||||
int32_t TransliteratorParser::parseRule(const UnicodeString& rule, int32_t pos, int32_t limit) {
|
||||
// Locate the left side, operator, and right side
|
||||
int32_t start = pos;
|
||||
UChar op = 0;
|
||||
const UnicodeString& rule = rules; // TEMPORARY: FIX LATER
|
||||
|
||||
// Use pointers to automatics to make swapping possible.
|
||||
RuleHalf _left(*this), _right(*this);
|
||||
|
@ -1188,41 +1205,26 @@ int32_t TransliteratorParser::parseRule(int32_t pos, int32_t limit) {
|
|||
int32_t TransliteratorParser::syntaxError(UErrorCode parseErrorCode,
|
||||
const UnicodeString& rule,
|
||||
int32_t pos) {
|
||||
// if (parseError != 0) {
|
||||
/* parseError->line = 0; // We don't return a line #
|
||||
parseError->offset = start; // Character offset from rule start
|
||||
int32_t end = quotedIndexOf(rule, start, rule.length(), END_OF_RULE);
|
||||
if (end < 0) {
|
||||
end = rule.length();
|
||||
}
|
||||
int32_t len = uprv_min(end - start, U_PARSE_CONTEXT_LEN-1);
|
||||
// Extract everything into the preContext and leave the postContext
|
||||
// blank, since we don't have precise error position.
|
||||
// TODO: Fix this.
|
||||
rule.extract(start, len, parseError->preContext); // Current rule
|
||||
parseError->preContext[len] = 0;
|
||||
parseError->postContext[0] = 0;
|
||||
*/
|
||||
parseError.offset = pos;
|
||||
parseError.line = 0 ; /* we are not using line numbers */
|
||||
parseError.offset = pos;
|
||||
parseError.line = 0 ; /* we are not using line numbers */
|
||||
|
||||
// for pre-context
|
||||
int32_t start = (pos <=U_PARSE_CONTEXT_LEN)? 0 : (pos - (U_PARSE_CONTEXT_LEN-1));
|
||||
int32_t stop = pos;
|
||||
// for pre-context
|
||||
int32_t start = (pos <=U_PARSE_CONTEXT_LEN)? 0 : (pos - (U_PARSE_CONTEXT_LEN-1));
|
||||
int32_t stop = pos;
|
||||
|
||||
rule.extract(start,stop-start,parseError.preContext);
|
||||
//null terminate the buffer
|
||||
parseError.preContext[stop-start] = 0;
|
||||
rule.extract(start,stop-start,parseError.preContext);
|
||||
//null terminate the buffer
|
||||
parseError.preContext[stop-start] = 0;
|
||||
|
||||
//for post-context
|
||||
start = pos+1;
|
||||
stop = ((pos+U_PARSE_CONTEXT_LEN)<= rule.length() )? (pos+(U_PARSE_CONTEXT_LEN-1)) :
|
||||
rule.length();
|
||||
//for post-context
|
||||
start = pos+1;
|
||||
stop = ((pos+U_PARSE_CONTEXT_LEN)<= rule.length() )? (pos+(U_PARSE_CONTEXT_LEN-1)) :
|
||||
rule.length();
|
||||
|
||||
rule.extract(start,stop-start,parseError.postContext);
|
||||
//null terminate the buffer
|
||||
parseError.postContext[stop-start]= 0;
|
||||
|
||||
rule.extract(start,stop-start,parseError.postContext);
|
||||
//null terminate the buffer
|
||||
parseError.postContext[stop-start]= 0;
|
||||
// }
|
||||
status = (UErrorCode)parseErrorCode;
|
||||
return pos;
|
||||
|
||||
|
@ -1251,7 +1253,7 @@ UChar TransliteratorParser::generateStandInFor(UnicodeMatcher* adopted) {
|
|||
status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return 0;
|
||||
}
|
||||
variablesVector.addElement(adopted, status);
|
||||
variablesVector->addElement(adopted, status);
|
||||
return variableNext++;
|
||||
}
|
||||
|
||||
|
@ -1306,7 +1308,7 @@ UChar TransliteratorParser::getSegmentStandin(int32_t r) {
|
|||
* When done, everything not in the hash is available for use. In practice,
|
||||
* this method may employ some other algorithm for improved speed.
|
||||
*/
|
||||
void TransliteratorParser::determineVariableRange(void) {
|
||||
void TransliteratorParser::determineVariableRange(const UnicodeString& rules) {
|
||||
UnicodeRange privateUse(0xE000, 0x1900); // Private use area
|
||||
|
||||
UnicodeRange* r = privateUse.largestUnusedSubrange(rules, status);
|
||||
|
|
|
@ -9,7 +9,6 @@
|
|||
#define RBT_PARS_H
|
||||
|
||||
#include "unicode/rbt.h"
|
||||
#include "uvector.h"
|
||||
#include "unicode/parseerr.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
@ -19,19 +18,49 @@ class UnicodeMatcher;
|
|||
class ParseData;
|
||||
class RuleHalf;
|
||||
class ParsePosition;
|
||||
class UVector;
|
||||
|
||||
class TransliteratorParser {
|
||||
|
||||
public:
|
||||
|
||||
/**
|
||||
* This is a reference to external data we don't own. This works because
|
||||
* we only hold this for the duration of the call to parse().
|
||||
* PUBLIC data member containing the parsed data object, or null if
|
||||
* there were no rules.
|
||||
*/
|
||||
const UnicodeString& rules;
|
||||
TransliterationRuleData* data;
|
||||
|
||||
/**
|
||||
* PUBLIC data member.
|
||||
* The block of ::IDs, both at the top and at the bottom.
|
||||
* Inserted into these may be additional rules at the
|
||||
* idSplitPoint.
|
||||
*/
|
||||
UnicodeString idBlock;
|
||||
|
||||
/**
|
||||
* PUBLIC data member.
|
||||
* In a compound RBT, the index at which the RBT rules are
|
||||
* inserted into the ID block. Index 0 means before any IDs
|
||||
* in the block. Index idBlock.length() means after all IDs
|
||||
* in the block. Index is a string index.
|
||||
*/
|
||||
int32_t idSplitPoint;
|
||||
|
||||
/**
|
||||
* PUBLIC data member containing the parsed compound filter, if any.
|
||||
*/
|
||||
UnicodeSet* compoundFilter;
|
||||
|
||||
private:
|
||||
|
||||
// The number of rules parsed. This tells us if there were
|
||||
// any actual transliterator rules, or if there were just ::ID
|
||||
// block IDs.
|
||||
int32_t ruleCount;
|
||||
|
||||
UTransDirection direction;
|
||||
|
||||
TransliterationRuleData* data;
|
||||
|
||||
/**
|
||||
* We use a single error code during parsing. Rather than pass it
|
||||
* through each API, we keep it here.
|
||||
|
@ -39,10 +68,9 @@ class TransliteratorParser {
|
|||
UErrorCode status;
|
||||
|
||||
/**
|
||||
* Pointer to user structure in which to return parse error information.
|
||||
* May be NULL.
|
||||
* Parse error information.
|
||||
*/
|
||||
UParseError& parseError;
|
||||
UParseError parseError;
|
||||
|
||||
/**
|
||||
* Temporary symbol table used during parsing.
|
||||
|
@ -54,7 +82,7 @@ class TransliteratorParser {
|
|||
* is copied into the array data.variables. As with data.variables,
|
||||
* element 0 corresponds to character data.variablesBase.
|
||||
*/
|
||||
UVector variablesVector;
|
||||
UVector* variablesVector;
|
||||
|
||||
/**
|
||||
* The next available stand-in for variables. This starts at some point in
|
||||
|
@ -82,44 +110,10 @@ class TransliteratorParser {
|
|||
|
||||
public:
|
||||
|
||||
static TransliterationRuleData*
|
||||
parse(const UnicodeString& rules,
|
||||
UTransDirection direction,
|
||||
UParseError& parseError,
|
||||
UErrorCode& ec);
|
||||
|
||||
/**
|
||||
* Parse a given set of rules. Return up to three pieces of
|
||||
* parsed data. These are the header ::id block, the rule block,
|
||||
* and the footer ::id block. Any or all of these may be empty.
|
||||
* If the ::id blocks are empty, their corresponding parameters
|
||||
* are returned as the empty string. If there are no rules, the
|
||||
* TransliterationRuleData result is 0.
|
||||
* @param ruleDataResult caller owns the pointer stored here.
|
||||
* May be NULL.
|
||||
* @param headerRule string including semicolons for the header
|
||||
* ::id block. May be empty.
|
||||
* @param footerRule string including semicolons for the footer
|
||||
* ::id block. May be empty.
|
||||
* Constructor.
|
||||
*/
|
||||
static void parse(const UnicodeString& rules,
|
||||
UTransDirection direction,
|
||||
TransliterationRuleData*& ruleDataResult,
|
||||
UnicodeString& idBlockResult,
|
||||
int32_t& idSplitPointResult,
|
||||
UParseError& parseError,
|
||||
UErrorCode& ec);
|
||||
|
||||
private:
|
||||
|
||||
/**
|
||||
* @param rules list of rules, separated by newline characters
|
||||
* @exception IllegalArgumentException if there is a syntax error in the
|
||||
* rules
|
||||
*/
|
||||
TransliteratorParser(const UnicodeString& rules,
|
||||
UTransDirection direction,
|
||||
UParseError& parseError);
|
||||
TransliteratorParser();
|
||||
|
||||
/**
|
||||
* Destructor.
|
||||
|
@ -130,12 +124,32 @@ private:
|
|||
* Parse the given string as a sequence of rules, separated by newline
|
||||
* characters ('\n'), and cause this object to implement those rules. Any
|
||||
* previous rules are discarded. Typically this method is called exactly
|
||||
* once, during construction.
|
||||
* @exception IllegalArgumentException if there is a syntax error in the
|
||||
* rules
|
||||
* once after construction.
|
||||
*
|
||||
* Parse the given rules, in the given direction. After this call
|
||||
* returns, query the public data members for results. The caller
|
||||
* owns the 'data' and 'compoundFilter' data members after this
|
||||
* call returns.
|
||||
*/
|
||||
void parseRules(UnicodeString& idBlockResult, int32_t& idSplitPointResult,
|
||||
int32_t& ruleCount);
|
||||
void parse(const UnicodeString& rules,
|
||||
UTransDirection direction,
|
||||
UParseError& pe,
|
||||
UErrorCode& ec);
|
||||
|
||||
/**
|
||||
* Return the compound filter parsed by parse(). Caller owns result.
|
||||
*/
|
||||
UnicodeSet* orphanCompoundFilter();
|
||||
|
||||
/**
|
||||
* Return the data object parsed by parse(). Caller owns result.
|
||||
*/
|
||||
TransliterationRuleData* orphanData();
|
||||
|
||||
private:
|
||||
|
||||
void parseRules(const UnicodeString& rules,
|
||||
UTransDirection direction);
|
||||
|
||||
/**
|
||||
* MAIN PARSER. Parse the next rule in the given rule string, starting
|
||||
|
@ -150,7 +164,7 @@ private:
|
|||
* indicators. Once it does a lexical breakdown of the rule at pos, it
|
||||
* creates a rule object and adds it to our rule list.
|
||||
*/
|
||||
int32_t parseRule(int32_t pos, int32_t limit);
|
||||
int32_t parseRule(const UnicodeString& rule, int32_t pos, int32_t limit);
|
||||
|
||||
/**
|
||||
* Called by main parser upon syntax error. Search the rule string
|
||||
|
@ -198,7 +212,7 @@ private:
|
|||
* When done, everything not in the hash is available for use. In practice,
|
||||
* this method may employ some other algorithm for improved speed.
|
||||
*/
|
||||
void determineVariableRange(void);
|
||||
void determineVariableRange(const UnicodeString&);
|
||||
|
||||
/**
|
||||
* Returns the index of a character, ignoring quoted text.
|
||||
|
|
|
@ -43,6 +43,7 @@ static const UChar ID_DELIM = 0x003B; /*;*/
|
|||
static const UChar VARIANT_SEP = 0x002F; // '/'
|
||||
static const UChar OPEN_PAREN = 40;
|
||||
static const UChar CLOSE_PAREN = 41;
|
||||
|
||||
/**
|
||||
* Prefix for resource bundle key for the display name for a
|
||||
* transliterator. The ID is appended to this to form the key.
|
||||
|
@ -688,8 +689,9 @@ Transliterator* Transliterator::createInstance(const UnicodeString& ID,
|
|||
UVector list(status);
|
||||
int32_t ignored;
|
||||
UnicodeString regenID;
|
||||
UnicodeSet* compoundFilter = 0;
|
||||
parseCompoundID(ID, regenID, dir, idSplitPoint, adoptedSplitTrans,
|
||||
list, ignored, parseError, status);
|
||||
list, ignored, compoundFilter, parseError, status);
|
||||
|
||||
if (U_FAILURE(status)) {
|
||||
return 0;
|
||||
|
@ -708,6 +710,9 @@ Transliterator* Transliterator::createInstance(const UnicodeString& ID,
|
|||
break;
|
||||
}
|
||||
t->setID(regenID);
|
||||
if (compoundFilter != NULL) {
|
||||
t->adoptFilter(compoundFilter);
|
||||
}
|
||||
return t;
|
||||
}
|
||||
|
||||
|
@ -724,52 +729,52 @@ Transliterator* Transliterator::createFromRules(const UnicodeString& ID,
|
|||
UTransDirection dir,
|
||||
UParseError& parseError,
|
||||
UErrorCode& status) {
|
||||
UnicodeString idBlock;
|
||||
int32_t idSplitPoint = -1;
|
||||
TransliterationRuleData *data = 0;
|
||||
Transliterator* t = NULL;
|
||||
|
||||
TransliteratorParser::parse(rules, dir, data,
|
||||
idBlock, idSplitPoint,
|
||||
parseError, status);
|
||||
TransliteratorParser parser;
|
||||
parser.parse(rules, dir, parseError, status);
|
||||
|
||||
if (U_FAILURE(status)) {
|
||||
delete data;
|
||||
return 0;
|
||||
}
|
||||
|
||||
// NOTE: The logic here matches that in TransliteratorRegistry.
|
||||
if (idBlock.length() == 0) {
|
||||
if (data == 0) {
|
||||
if (parser.idBlock.length() == 0) {
|
||||
if (parser.data == NULL) {
|
||||
// No idBlock, no data -- this is just an
|
||||
// alias for Null
|
||||
return new NullTransliterator();
|
||||
t = new NullTransliterator();
|
||||
} else {
|
||||
// No idBlock, data != 0 -- this is an
|
||||
// ordinary RBT_DATA.
|
||||
return new RuleBasedTransliterator(ID, data, TRUE); // TRUE == adopt data object
|
||||
t = new RuleBasedTransliterator(ID, parser.orphanData(), TRUE); // TRUE == adopt data object
|
||||
}
|
||||
} else {
|
||||
if (data == 0) {
|
||||
if (parser.data == NULL) {
|
||||
// idBlock, no data -- this is an alias
|
||||
Transliterator *t = createInstance(idBlock, dir, parseError,status);
|
||||
if (t != 0) {
|
||||
t = createInstance(parser.idBlock, dir, parseError, status);
|
||||
if (t != NULL) {
|
||||
t->setID(ID);
|
||||
}
|
||||
return t;
|
||||
} else {
|
||||
// idBlock and data -- this is a compound
|
||||
// RBT
|
||||
UnicodeString id("_", "");
|
||||
Transliterator *t = new RuleBasedTransliterator(id, data, TRUE); // TRUE == adopt data object
|
||||
t = new CompoundTransliterator(ID, idBlock, idSplitPoint,
|
||||
t,parseError,status);
|
||||
t = new RuleBasedTransliterator(id, parser.orphanData(), TRUE); // TRUE == adopt data object
|
||||
t = new CompoundTransliterator(ID, parser.idBlock, parser.idSplitPoint,
|
||||
t, parseError, status);
|
||||
if (U_FAILURE(status)) {
|
||||
delete t;
|
||||
t = 0;
|
||||
}
|
||||
if (parser.compoundFilter != NULL) {
|
||||
t->adoptFilter(parser.orphanCompoundFilter());
|
||||
}
|
||||
return t;
|
||||
}
|
||||
}
|
||||
|
||||
return t;
|
||||
}
|
||||
|
||||
UnicodeString& Transliterator::toRules(UnicodeString& rulesSource,
|
||||
|
@ -806,6 +811,7 @@ void Transliterator::parseCompoundID(const UnicodeString& id,
|
|||
Transliterator *adoptedSplitTrans,
|
||||
UVector& result,
|
||||
int32_t& splitTransIndex,
|
||||
UnicodeSet*& compoundFilter,
|
||||
UParseError& parseError,
|
||||
UErrorCode& status) {
|
||||
if (U_FAILURE(status)) {
|
||||
|
@ -816,6 +822,15 @@ void Transliterator::parseCompoundID(const UnicodeString& id,
|
|||
splitTransIndex = -1;
|
||||
int32_t pos = 0;
|
||||
int32_t i;
|
||||
|
||||
// A compound filter is a filter on an entire compound
|
||||
// transliterator. It is indicated by the syntax [abc]; A-B;
|
||||
// B-C or in the reverse direction A-B; B-C; ([abc]). We
|
||||
// record the filter and its index (in terms of the result
|
||||
// vector).
|
||||
compoundFilter = NULL;
|
||||
int32_t compoundFilterIndex = -1;
|
||||
|
||||
while (pos < id.length()) {
|
||||
// We compare (pos >= split), not (pos == split), so we can
|
||||
// skip over whitespace (see below).
|
||||
|
@ -826,13 +841,25 @@ void Transliterator::parseCompoundID(const UnicodeString& id,
|
|||
}
|
||||
int32_t p = pos;
|
||||
UBool sawDelimiter; // We ignore this
|
||||
UnicodeSet* cpdFilter = NULL;
|
||||
Transliterator *t =
|
||||
parseID(id, regenID, p, sawDelimiter, dir, parseError, TRUE,status);
|
||||
parseID(id, regenID, p, sawDelimiter, cpdFilter, dir, parseError, TRUE,status);
|
||||
|
||||
if(U_FAILURE(status)){
|
||||
delete t;
|
||||
delete cpdFilter;
|
||||
break;
|
||||
}
|
||||
if (cpdFilter != NULL) {
|
||||
if (compoundFilter != NULL) {
|
||||
status = U_MULTIPLE_COMPOUND_FILTERS;
|
||||
delete t;
|
||||
delete cpdFilter;
|
||||
break;
|
||||
}
|
||||
compoundFilter = cpdFilter;
|
||||
compoundFilterIndex = result.size();
|
||||
}
|
||||
|
||||
if (p == pos || (p < id.length() && !sawDelimiter)) {
|
||||
delete t;
|
||||
|
@ -848,18 +875,28 @@ void Transliterator::parseCompoundID(const UnicodeString& id,
|
|||
}
|
||||
|
||||
// Handle case of idSplitPoint == id.length()
|
||||
if (pos >= idSplitPoint && adoptedSplitTrans != 0) {
|
||||
if (U_SUCCESS(status) && pos >= idSplitPoint && adoptedSplitTrans != 0) {
|
||||
splitTransIndex = result.size();
|
||||
result.addElement(adoptedSplitTrans, status);
|
||||
adoptedSplitTrans = 0;
|
||||
}
|
||||
|
||||
// Check validity of compound filter position
|
||||
if (compoundFilter != NULL) {
|
||||
if ((dir == UTRANS_FORWARD && compoundFilterIndex != 0) ||
|
||||
(dir == UTRANS_REVERSE && compoundFilterIndex != result.size())) {
|
||||
status = U_MISPLACED_COMPOUND_FILTER;
|
||||
}
|
||||
}
|
||||
|
||||
if (U_FAILURE(status)) {
|
||||
for (i=0; i<result.size(); ++i) {
|
||||
delete (Transliterator*)result.elementAt(i);
|
||||
}
|
||||
result.removeAllElements();
|
||||
delete adoptedSplitTrans;
|
||||
delete compoundFilter;
|
||||
compoundFilter = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -885,6 +922,9 @@ void Transliterator::parseCompoundID(const UnicodeString& id,
|
|||
* first character to parse. On output, the position after the last
|
||||
* character parsed. This will be a semicolon or ID.length(). In the
|
||||
* case of an error this value will be unchanged.
|
||||
* @param compoundFilter OUTPUT parameter to receive a compound
|
||||
* filter, if one is parsed. When a non-null compound filter is
|
||||
* returned then a null Transliterator pointer is returned.
|
||||
* @param create if TRUE, create and return the result. If FALSE,
|
||||
* only scan the ID, and return NULL.
|
||||
* @return a newly created transliterator, or NULL. NULL is returned
|
||||
|
@ -898,6 +938,7 @@ Transliterator* Transliterator::parseID(const UnicodeString& ID,
|
|||
UnicodeString& regenID,
|
||||
int32_t& pos,
|
||||
UBool& sawDelimiter,
|
||||
UnicodeSet*& compoundFilter,
|
||||
UTransDirection dir,
|
||||
UParseError& parseError,
|
||||
UBool create,
|
||||
|
@ -907,19 +948,22 @@ Transliterator* Transliterator::parseID(const UnicodeString& ID,
|
|||
idStart, idLimit,
|
||||
setStart, setLimit;
|
||||
|
||||
UnicodeSet* fwdFilter = NULL;
|
||||
UnicodeSet* revFilter = NULL;
|
||||
UnicodeSet* filter = 0;
|
||||
|
||||
if (!parseIDBounds(ID, pos, FALSE, limit,
|
||||
setStart, setLimit, revStart, filter)) {
|
||||
delete filter;
|
||||
setStart, setLimit, revStart, fwdFilter)) {
|
||||
delete fwdFilter;
|
||||
return 0;
|
||||
}
|
||||
filter = fwdFilter;
|
||||
|
||||
idStart = pos;
|
||||
idLimit = limit;
|
||||
|
||||
if (revStart >= 0 && revStart < limit) {
|
||||
int32_t revSetStart, revSetLimit, dummy;
|
||||
UnicodeSet* revFilter = 0;
|
||||
if (!parseIDBounds(ID, revStart+1, TRUE, revLimit,
|
||||
revSetStart, revSetLimit, dummy, revFilter)) {
|
||||
delete filter;
|
||||
|
@ -981,83 +1025,103 @@ Transliterator* Transliterator::parseID(const UnicodeString& ID,
|
|||
}
|
||||
}
|
||||
|
||||
// Fix the id, if necessary, by reversing it (A-B => B-A). This
|
||||
// is only done if the id is NOT of the form Foo(Bar). Record the
|
||||
// position of the separator.
|
||||
//
|
||||
// For both A-B and Foo(Bar) ids, detect the special case of Null,
|
||||
// whose inverse is itself. Given an ID with no separator "Foo",
|
||||
// an abbreviation for "Any-Foo", consider the inverse to be
|
||||
// "Foo-Any".
|
||||
int32_t sep = id.indexOf(ID_SEP);
|
||||
if (sep < 0 && id.caseCompare(NullTransliterator::SHORT_ID,
|
||||
U_FOLD_CASE_DEFAULT) == 0) {
|
||||
// Handle "Null"
|
||||
sep = id.length();
|
||||
} else if (dir == UTRANS_REVERSE &&
|
||||
id.caseCompare(NullTransliterator::ID,
|
||||
U_FOLD_CASE_DEFAULT) == 0) {
|
||||
// Reverse of "Any-Null" => "Null"
|
||||
id.removeBetween(0, sep+1);
|
||||
sep = id.length();
|
||||
} else if (dir == UTRANS_REVERSE && revStart < 0) {
|
||||
if (sep >= 0) {
|
||||
id.extractBetween(0, sep, str);
|
||||
id.removeBetween(0, sep+1);
|
||||
Transliterator* t = NULL;
|
||||
int32_t sep = 0; // index of the separator ('-') in id
|
||||
|
||||
// If id is empty, then we have either an empty specifier,
|
||||
// which is illegal, or a compound filter, which is legal
|
||||
// as long as its in the right place -- we let the caller
|
||||
// decide that.
|
||||
UBool isCompoundFilter = (id.length() == 0 && filter != NULL);
|
||||
if (isCompoundFilter) {
|
||||
if (dir == UTRANS_FORWARD) {
|
||||
compoundFilter = fwdFilter;
|
||||
delete revFilter;
|
||||
revFilter = NULL;
|
||||
} else {
|
||||
str = UnicodeString("Any", "");
|
||||
compoundFilter = revFilter;
|
||||
delete fwdFilter;
|
||||
fwdFilter = NULL;
|
||||
}
|
||||
sep = id.length();
|
||||
id.append(ID_SEP).append(str);
|
||||
} else if (sep < 0 && id.length() > 0) {
|
||||
// Don't do anything for empty IDs -- we handle these specially below
|
||||
str = UnicodeString("Any-", "");
|
||||
sep = str.length() - 1;
|
||||
id.insert(0, str);
|
||||
}
|
||||
|
||||
Transliterator *t = 0;
|
||||
|
||||
// If we have a reverse part of the ID, e.g., Foo(Bar), then we
|
||||
// need to check for an empty part, which represents a Null
|
||||
// transliterator. We return 0 (not a NullTransliterator). If we
|
||||
// are not of the form Foo(Bar) then an empty string is illegal.
|
||||
if (revStart >= 0 && id.length() == 0) {
|
||||
// Ignore any filters; filters on Null are meaningless (and we
|
||||
// can't attach them to 0 anyway)
|
||||
delete filter;
|
||||
}
|
||||
|
||||
|
||||
else {
|
||||
// Create the actual transliterator from the registry
|
||||
if (registry == 0) {
|
||||
initializeRegistry();
|
||||
}
|
||||
parseError.line = parseError.offset = 0;
|
||||
parseError.preContext[0] = parseError.postContext[0] = 0;
|
||||
TransliteratorAlias* alias = 0;
|
||||
{
|
||||
Mutex lock(®istryMutex);
|
||||
t = registry->get(id, alias, parseError,status);
|
||||
// Need to enclose this in a block to prevent deadlock when
|
||||
// instantiating aliases (below).
|
||||
}
|
||||
|
||||
if (alias != 0) {
|
||||
// assert(t==0);
|
||||
// Instantiate an alias
|
||||
t = alias->create(parseError, status);
|
||||
delete alias;
|
||||
// Fix the id, if necessary, by reversing it (A-B => B-A). This
|
||||
// is only done if the id is NOT of the form Foo(Bar). Record the
|
||||
// position of the separator.
|
||||
//
|
||||
// For both A-B and Foo(Bar) ids, detect the special case of Null,
|
||||
// whose inverse is itself. Given an ID with no separator "Foo",
|
||||
// an abbreviation for "Any-Foo", consider the inverse to be
|
||||
// "Foo-Any".
|
||||
sep = id.indexOf(ID_SEP);
|
||||
if (sep < 0 && id.caseCompare(NullTransliterator::SHORT_ID,
|
||||
U_FOLD_CASE_DEFAULT) == 0) {
|
||||
// Handle "Null"
|
||||
sep = id.length();
|
||||
} else if (dir == UTRANS_REVERSE &&
|
||||
id.caseCompare(NullTransliterator::ID,
|
||||
U_FOLD_CASE_DEFAULT) == 0) {
|
||||
// Reverse of "Any-Null" => "Null"
|
||||
id.removeBetween(0, sep+1);
|
||||
sep = id.length();
|
||||
} else if (dir == UTRANS_REVERSE && revStart < 0) {
|
||||
if (sep >= 0) {
|
||||
id.extractBetween(0, sep, str);
|
||||
id.removeBetween(0, sep+1);
|
||||
} else {
|
||||
str = UnicodeString("Any", "");
|
||||
}
|
||||
sep = id.length();
|
||||
id.append(ID_SEP).append(str);
|
||||
} else if (sep < 0 && id.length() > 0) {
|
||||
// Don't do anything for empty IDs -- we handle these specially below
|
||||
str = UnicodeString("Any-", "");
|
||||
sep = str.length() - 1;
|
||||
id.insert(0, str);
|
||||
}
|
||||
|
||||
if (t == 0) {
|
||||
// Creation failed; the ID is invalid
|
||||
// If we have a reverse part of the ID, e.g., Foo(Bar), then we
|
||||
// need to check for an empty part, which represents a Null
|
||||
// transliterator. We return 0 (not a NullTransliterator). If we
|
||||
// are not of the form Foo(Bar) then an empty string is illegal.
|
||||
if (revStart >= 0 && id.length() == 0) {
|
||||
// Ignore any filters; filters on Null are meaningless (and we
|
||||
// can't attach them to 0 anyway)
|
||||
delete filter;
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Set the filter, if any
|
||||
t->adoptFilter(filter);
|
||||
else {
|
||||
// Create the actual transliterator from the registry
|
||||
if (registry == 0) {
|
||||
initializeRegistry();
|
||||
}
|
||||
parseError.line = parseError.offset = 0;
|
||||
parseError.preContext[0] = parseError.postContext[0] = 0;
|
||||
TransliteratorAlias* alias = 0;
|
||||
{
|
||||
Mutex lock(®istryMutex);
|
||||
t = registry->get(id, alias, parseError,status);
|
||||
// Need to enclose this in a block to prevent deadlock when
|
||||
// instantiating aliases (below).
|
||||
}
|
||||
|
||||
if (alias != 0) {
|
||||
// assert(t==0);
|
||||
// Instantiate an alias
|
||||
t = alias->create(parseError, status);
|
||||
delete alias;
|
||||
}
|
||||
|
||||
if (t == 0) {
|
||||
// Creation failed; the ID is invalid
|
||||
delete filter;
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Set the filter, if any
|
||||
t->adoptFilter(filter);
|
||||
}
|
||||
}
|
||||
|
||||
// Set the ID. This is normally just a substring of the input
|
||||
|
|
|
@ -16,6 +16,7 @@
|
|||
#include "unicode/rbt.h"
|
||||
#include "unicode/resbund.h"
|
||||
#include "unicode/translit.h"
|
||||
#include "unicode/uniset.h"
|
||||
#include "unicode/uscript.h"
|
||||
|
||||
// UChar constants
|
||||
|
@ -77,11 +78,13 @@ TransliteratorAlias::TransliteratorAlias(const UnicodeString& theAliasID) :
|
|||
TransliteratorAlias::TransliteratorAlias(const UnicodeString& theID,
|
||||
const UnicodeString& idBlock,
|
||||
Transliterator* adopted,
|
||||
int32_t theIDSplitPoint) :
|
||||
int32_t theIDSplitPoint,
|
||||
const UnicodeSet* cpdFilter) :
|
||||
ID(theID),
|
||||
aliasID(idBlock),
|
||||
trans(adopted),
|
||||
idSplitPoint(theIDSplitPoint) {
|
||||
idSplitPoint(theIDSplitPoint),
|
||||
compoundFilter(cpdFilter) {
|
||||
}
|
||||
|
||||
TransliteratorAlias::~TransliteratorAlias() {
|
||||
|
@ -90,16 +93,19 @@ TransliteratorAlias::~TransliteratorAlias() {
|
|||
|
||||
|
||||
Transliterator* TransliteratorAlias::create(UParseError& pe,
|
||||
UErrorCode& ec) {
|
||||
UErrorCode& ec) {
|
||||
Transliterator *t;
|
||||
if (trans == 0) {
|
||||
return Transliterator::createInstance(aliasID, UTRANS_FORWARD, pe, ec);
|
||||
t = Transliterator::createInstance(aliasID, UTRANS_FORWARD, pe, ec);
|
||||
} else {
|
||||
Transliterator *t = trans;
|
||||
t = new CompoundTransliterator(ID, aliasID, idSplitPoint,
|
||||
trans, pe, ec);
|
||||
trans = 0; // so we don't delete it later
|
||||
return new CompoundTransliterator(ID, aliasID, idSplitPoint,
|
||||
t, pe, ec);
|
||||
|
||||
if (compoundFilter) {
|
||||
t->adoptFilter((UnicodeSet*) compoundFilter->clone());
|
||||
}
|
||||
}
|
||||
return t;
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
|
@ -277,6 +283,7 @@ public:
|
|||
// it has a copy constructor
|
||||
UnicodeString stringArg; // For RULES_*, ALIAS, COMPOUND_RBT
|
||||
int32_t intArg; // For COMPOUND_RBT
|
||||
UnicodeSet* compoundFilter; // For COMPOUND_RBT
|
||||
union {
|
||||
Transliterator* prototype; // For PROTOTYPE
|
||||
TransliterationRuleData* data; // For RBT_DATA, COMPOUND_RBT
|
||||
|
@ -290,6 +297,7 @@ public:
|
|||
|
||||
Entry::Entry() {
|
||||
u.prototype = 0;
|
||||
compoundFilter = NULL;
|
||||
entryType = NONE;
|
||||
}
|
||||
|
||||
|
@ -303,6 +311,7 @@ Entry::~Entry() {
|
|||
// invalidates any RBTs that the user has instantiated.
|
||||
delete u.data;
|
||||
}
|
||||
delete compoundFilter;
|
||||
}
|
||||
|
||||
void Entry::adoptPrototype(Transliterator* adopted) {
|
||||
|
@ -906,7 +915,7 @@ Transliterator* TransliteratorRegistry::instantiateEntry(const UnicodeString& ID
|
|||
} else if (entry->entryType == Entry::COMPOUND_RBT) {
|
||||
UnicodeString id("_", "");
|
||||
Transliterator *t = new RuleBasedTransliterator(id, entry->u.data);
|
||||
aliasReturn = new TransliteratorAlias(ID, entry->stringArg, t, entry->intArg);
|
||||
aliasReturn = new TransliteratorAlias(ID, entry->stringArg, t, entry->intArg, entry->compoundFilter);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -935,13 +944,9 @@ Transliterator* TransliteratorRegistry::instantiateEntry(const UnicodeString& ID
|
|||
// transliterators; if it lists something that's not
|
||||
// installed, we'll get an error from ResourceBundle.
|
||||
|
||||
TransliteratorParser::parse(rules, isReverse ?
|
||||
UTRANS_REVERSE : UTRANS_FORWARD,
|
||||
entry->u.data,
|
||||
entry->stringArg,
|
||||
entry->intArg,
|
||||
parseError,
|
||||
status);
|
||||
TransliteratorParser parser;
|
||||
parser.parse(rules, isReverse ? UTRANS_REVERSE : UTRANS_FORWARD,
|
||||
parseError, status);
|
||||
|
||||
if (U_FAILURE(status)) {
|
||||
// We have a failure of some kind. Remove the ID from the
|
||||
|
@ -954,6 +959,11 @@ Transliterator* TransliteratorRegistry::instantiateEntry(const UnicodeString& ID
|
|||
break;
|
||||
}
|
||||
|
||||
entry->u.data = parser.orphanData();
|
||||
entry->stringArg = parser.idBlock;
|
||||
entry->intArg = parser.idSplitPoint;
|
||||
entry->compoundFilter = parser.orphanCompoundFilter();
|
||||
|
||||
// Reset entry->entryType to something that we process at the
|
||||
// top of the loop, then loop back to the top. As long as we
|
||||
// do this, we only loop through twice at most.
|
||||
|
|
|
@ -45,7 +45,8 @@ class TransliteratorAlias {
|
|||
* Construct a compound RBT alias.
|
||||
*/
|
||||
TransliteratorAlias(const UnicodeString& ID, const UnicodeString& idBlock,
|
||||
Transliterator* adopted, int32_t idSplitPoint);
|
||||
Transliterator* adopted, int32_t idSplitPoint,
|
||||
const UnicodeSet* compoundFilter);
|
||||
|
||||
~TransliteratorAlias();
|
||||
|
||||
|
@ -64,10 +65,12 @@ class TransliteratorAlias {
|
|||
// 2. CompoundRBT
|
||||
// Here ID is the ID, aliasID is the idBlock, trans is the
|
||||
// contained RBT, and idSplitPoint is the offet in aliasID
|
||||
// where the contained RBT goes.
|
||||
// where the contained RBT goes. compoundFilter is the
|
||||
// compound filter, and it is _not_ owned.
|
||||
UnicodeString ID;
|
||||
UnicodeString aliasID;
|
||||
Transliterator* trans; // owned
|
||||
const UnicodeSet* compoundFilter; // alias
|
||||
int32_t idSplitPoint;
|
||||
};
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue