ICU-1232 disallow UnicodeSets (and other standins) in translit output

X-SVN-Rev: 6702
This commit is contained in:
Alan Liu 2001-11-09 01:22:33 +00:00
parent 9a9e9fc74e
commit 51f23530aa
6 changed files with 67 additions and 8 deletions

View file

@ -93,7 +93,7 @@ public:
virtual const UnicodeString* lookup(const UnicodeString& s) const;
virtual const UnicodeSet* lookupSet(UChar32 ch) const;
virtual const UnicodeMatcher* lookupMatcher(UChar32 ch) const;
virtual UnicodeString parseReference(const UnicodeString& text,
ParsePosition& pos, int32_t limit) const;
@ -113,15 +113,15 @@ const UnicodeString* ParseData::lookup(const UnicodeString& name) const {
/**
* Implement SymbolTable API.
*/
const UnicodeSet* ParseData::lookupSet(UChar32 ch) const {
const UnicodeMatcher* ParseData::lookupMatcher(UChar32 ch) const {
// Note that we cannot use data.lookupSet() because the
// set array has not been constructed yet.
const UnicodeSet* set = NULL;
const UnicodeMatcher* set = NULL;
int32_t i = ch - data->variablesBase;
if (i >= 0 && i < variablesVector->size()) {
int32_t i = ch - data->variablesBase;
set = (i < variablesVector->size()) ?
(UnicodeSet*) variablesVector->elementAt(i) : 0;
(UnicodeMatcher*) variablesVector->elementAt(i) : 0;
}
return set;
}
@ -1137,7 +1137,8 @@ int32_t TransliteratorParser::parseRule(const UnicodeString& rule, int32_t pos,
// - allow arbitrary cursor offsets and do runtime checking.
//(right->cursorOffset > (left->text.length() - left->post)) ||
//(-right->cursorOffset > left->ante) ||
right->anchorStart || right->anchorEnd) {
right->anchorStart || right->anchorEnd ||
!isValidOutput(right->text)) {
return syntaxError(U_MALFORMED_RULE, rule, start);
}
@ -1161,6 +1162,21 @@ int32_t TransliteratorParser::parseRule(const UnicodeString& rule, int32_t pos,
return pos;
}
/**
* Return true if the given string looks like valid output, that is,
* does not contain quantifiers or other special input-only elements.
*/
UBool TransliteratorParser::isValidOutput(const UnicodeString& output) const {
for (int32_t i=0; i<output.length(); ++i) {
UChar32 c = output.char32At(i);
i += UTF_CHAR_LENGTH(c);
if (parseData->lookupMatcher(c) != NULL) {
return FALSE;
}
}
return TRUE;
}
/**
* Called by main parser upon syntax error. Search the rule string
* for the probable end of the rule. Of course, if the error is that

View file

@ -210,6 +210,12 @@ private:
*/
int32_t parsePragma(const UnicodeString& rule, int32_t pos, int32_t limit);
/**
* Return true if the given string looks like valid output, that is,
* does not contain quantifiers or other special input-only elements.
*/
UBool isValidOutput(const UnicodeString& output) const;
/**
* Called by main parser upon syntax error. Search the rule string
* for the probable end of the rule. Of course, if the error is that

View file

@ -44,10 +44,10 @@ public:
virtual const UnicodeString* lookup(const UnicodeString& s) const = 0;
/**
* Lookup the UnicodeSet associated with the given character, and
* Lookup the UnicodeMatcher associated with the given character, and
* return it. Return <tt>null</tt> if not found.
*/
virtual const UnicodeSet* lookupSet(UChar32 ch) const = 0;
virtual const UnicodeMatcher* lookupMatcher(UChar32 ch) const = 0;
/**
* Parse a symbol reference name from the given string, starting

View file

@ -106,6 +106,13 @@ const UChar32 UnicodeSet::MIN_VALUE = UNICODESET_LOW;
*/
const UChar32 UnicodeSet::MAX_VALUE = UNICODESET_HIGH - 1;
// HEY WHAT'S THIS DOING HERE?
// This is here until we have sufficient reason to add an entire
// separate unimatch.cpp source file just for one line.
const char UnicodeMatcher::fgClassID = 0;
const char UnicodeSet::fgClassID = 0;
//----------------------------------------------------------------
// Constructors &c
//----------------------------------------------------------------
@ -978,7 +985,12 @@ void UnicodeSet::_applyPattern(const UnicodeString& pattern,
if (ivarValueBuffer < varValueBuffer->length()) {
c = varValueBuffer->char32At(ivarValueBuffer);
ivarValueBuffer += UTF_CHAR_LENGTH(c);
nestedSet = symbols->lookupSet(c); // may be NULL
const UnicodeMatcher *m = symbols->lookupMatcher(c); // may be NULL
if (m != NULL && m->getDynamicClassID() != UnicodeSet::getStaticClassID()) {
status = U_ILLEGAL_ARGUMENT_ERROR;
return;
}
nestedSet = (UnicodeSet*) m;
nestedPatDone = FALSE;
} else {
varValueBuffer = NULL;

View file

@ -135,6 +135,7 @@ TransliteratorTest::runIndexedTest(int32_t index, UBool exec,
TESTCASE(53,TestTitleAccents);
TESTCASE(54,TestLocaleResource);
TESTCASE(55,TestParseError);
TESTCASE(56,TestOutputSet);
default: name = ""; break;
}
}
@ -2612,6 +2613,7 @@ void TransliteratorTest::TestParseError() {
UErrorCode ec = U_ZERO_ERROR;
UParseError pe;
Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
delete t;
if (U_FAILURE(ec)) {
UnicodeString err(pe.preContext);
err.append((UChar)124/*|*/).append(pe.postContext);
@ -2625,6 +2627,24 @@ void TransliteratorTest::TestParseError() {
errln("FAIL: no syntax error");
}
/**
* Make sure sets on output are disallowed.
*/
void TransliteratorTest::TestOutputSet() {
UnicodeString rule = "$set = [a-cm-n]; b > $set;";
UErrorCode ec = U_ZERO_ERROR;
UParseError pe;
Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
delete t;
if (U_FAILURE(ec)) {
UnicodeString err(pe.preContext);
err.append((UChar)124/*|*/).append(pe.postContext);
logln("Ok: " + err);
return;
}
errln("FAIL: No syntax error");
}
//======================================================================
// icu4c ONLY
// These tests are not mirrored (yet) in icu4j at

View file

@ -261,6 +261,11 @@ class TransliteratorTest : public IntlTest {
*/
void TestParseError();
/**
* Make sure sets on output are disallowed.
*/
void TestOutputSet();
//======================================================================
// Support methods
//======================================================================