mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-10 07:39:16 +00:00
ICU-1232 disallow UnicodeSets (and other standins) in translit output
X-SVN-Rev: 6702
This commit is contained in:
parent
9a9e9fc74e
commit
51f23530aa
6 changed files with 67 additions and 8 deletions
|
@ -93,7 +93,7 @@ public:
|
|||
|
||||
virtual const UnicodeString* lookup(const UnicodeString& s) const;
|
||||
|
||||
virtual const UnicodeSet* lookupSet(UChar32 ch) const;
|
||||
virtual const UnicodeMatcher* lookupMatcher(UChar32 ch) const;
|
||||
|
||||
virtual UnicodeString parseReference(const UnicodeString& text,
|
||||
ParsePosition& pos, int32_t limit) const;
|
||||
|
@ -113,15 +113,15 @@ const UnicodeString* ParseData::lookup(const UnicodeString& name) const {
|
|||
/**
|
||||
* Implement SymbolTable API.
|
||||
*/
|
||||
const UnicodeSet* ParseData::lookupSet(UChar32 ch) const {
|
||||
const UnicodeMatcher* ParseData::lookupMatcher(UChar32 ch) const {
|
||||
// Note that we cannot use data.lookupSet() because the
|
||||
// set array has not been constructed yet.
|
||||
const UnicodeSet* set = NULL;
|
||||
const UnicodeMatcher* set = NULL;
|
||||
int32_t i = ch - data->variablesBase;
|
||||
if (i >= 0 && i < variablesVector->size()) {
|
||||
int32_t i = ch - data->variablesBase;
|
||||
set = (i < variablesVector->size()) ?
|
||||
(UnicodeSet*) variablesVector->elementAt(i) : 0;
|
||||
(UnicodeMatcher*) variablesVector->elementAt(i) : 0;
|
||||
}
|
||||
return set;
|
||||
}
|
||||
|
@ -1137,7 +1137,8 @@ int32_t TransliteratorParser::parseRule(const UnicodeString& rule, int32_t pos,
|
|||
// - allow arbitrary cursor offsets and do runtime checking.
|
||||
//(right->cursorOffset > (left->text.length() - left->post)) ||
|
||||
//(-right->cursorOffset > left->ante) ||
|
||||
right->anchorStart || right->anchorEnd) {
|
||||
right->anchorStart || right->anchorEnd ||
|
||||
!isValidOutput(right->text)) {
|
||||
|
||||
return syntaxError(U_MALFORMED_RULE, rule, start);
|
||||
}
|
||||
|
@ -1161,6 +1162,21 @@ int32_t TransliteratorParser::parseRule(const UnicodeString& rule, int32_t pos,
|
|||
return pos;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return true if the given string looks like valid output, that is,
|
||||
* does not contain quantifiers or other special input-only elements.
|
||||
*/
|
||||
UBool TransliteratorParser::isValidOutput(const UnicodeString& output) const {
|
||||
for (int32_t i=0; i<output.length(); ++i) {
|
||||
UChar32 c = output.char32At(i);
|
||||
i += UTF_CHAR_LENGTH(c);
|
||||
if (parseData->lookupMatcher(c) != NULL) {
|
||||
return FALSE;
|
||||
}
|
||||
}
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
/**
|
||||
* Called by main parser upon syntax error. Search the rule string
|
||||
* for the probable end of the rule. Of course, if the error is that
|
||||
|
|
|
@ -210,6 +210,12 @@ private:
|
|||
*/
|
||||
int32_t parsePragma(const UnicodeString& rule, int32_t pos, int32_t limit);
|
||||
|
||||
/**
|
||||
* Return true if the given string looks like valid output, that is,
|
||||
* does not contain quantifiers or other special input-only elements.
|
||||
*/
|
||||
UBool isValidOutput(const UnicodeString& output) const;
|
||||
|
||||
/**
|
||||
* Called by main parser upon syntax error. Search the rule string
|
||||
* for the probable end of the rule. Of course, if the error is that
|
||||
|
|
|
@ -44,10 +44,10 @@ public:
|
|||
virtual const UnicodeString* lookup(const UnicodeString& s) const = 0;
|
||||
|
||||
/**
|
||||
* Lookup the UnicodeSet associated with the given character, and
|
||||
* Lookup the UnicodeMatcher associated with the given character, and
|
||||
* return it. Return <tt>null</tt> if not found.
|
||||
*/
|
||||
virtual const UnicodeSet* lookupSet(UChar32 ch) const = 0;
|
||||
virtual const UnicodeMatcher* lookupMatcher(UChar32 ch) const = 0;
|
||||
|
||||
/**
|
||||
* Parse a symbol reference name from the given string, starting
|
||||
|
|
|
@ -106,6 +106,13 @@ const UChar32 UnicodeSet::MIN_VALUE = UNICODESET_LOW;
|
|||
*/
|
||||
const UChar32 UnicodeSet::MAX_VALUE = UNICODESET_HIGH - 1;
|
||||
|
||||
// HEY WHAT'S THIS DOING HERE?
|
||||
// This is here until we have sufficient reason to add an entire
|
||||
// separate unimatch.cpp source file just for one line.
|
||||
const char UnicodeMatcher::fgClassID = 0;
|
||||
|
||||
const char UnicodeSet::fgClassID = 0;
|
||||
|
||||
//----------------------------------------------------------------
|
||||
// Constructors &c
|
||||
//----------------------------------------------------------------
|
||||
|
@ -978,7 +985,12 @@ void UnicodeSet::_applyPattern(const UnicodeString& pattern,
|
|||
if (ivarValueBuffer < varValueBuffer->length()) {
|
||||
c = varValueBuffer->char32At(ivarValueBuffer);
|
||||
ivarValueBuffer += UTF_CHAR_LENGTH(c);
|
||||
nestedSet = symbols->lookupSet(c); // may be NULL
|
||||
const UnicodeMatcher *m = symbols->lookupMatcher(c); // may be NULL
|
||||
if (m != NULL && m->getDynamicClassID() != UnicodeSet::getStaticClassID()) {
|
||||
status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return;
|
||||
}
|
||||
nestedSet = (UnicodeSet*) m;
|
||||
nestedPatDone = FALSE;
|
||||
} else {
|
||||
varValueBuffer = NULL;
|
||||
|
|
|
@ -135,6 +135,7 @@ TransliteratorTest::runIndexedTest(int32_t index, UBool exec,
|
|||
TESTCASE(53,TestTitleAccents);
|
||||
TESTCASE(54,TestLocaleResource);
|
||||
TESTCASE(55,TestParseError);
|
||||
TESTCASE(56,TestOutputSet);
|
||||
default: name = ""; break;
|
||||
}
|
||||
}
|
||||
|
@ -2612,6 +2613,7 @@ void TransliteratorTest::TestParseError() {
|
|||
UErrorCode ec = U_ZERO_ERROR;
|
||||
UParseError pe;
|
||||
Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
|
||||
delete t;
|
||||
if (U_FAILURE(ec)) {
|
||||
UnicodeString err(pe.preContext);
|
||||
err.append((UChar)124/*|*/).append(pe.postContext);
|
||||
|
@ -2625,6 +2627,24 @@ void TransliteratorTest::TestParseError() {
|
|||
errln("FAIL: no syntax error");
|
||||
}
|
||||
|
||||
/**
|
||||
* Make sure sets on output are disallowed.
|
||||
*/
|
||||
void TransliteratorTest::TestOutputSet() {
|
||||
UnicodeString rule = "$set = [a-cm-n]; b > $set;";
|
||||
UErrorCode ec = U_ZERO_ERROR;
|
||||
UParseError pe;
|
||||
Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
|
||||
delete t;
|
||||
if (U_FAILURE(ec)) {
|
||||
UnicodeString err(pe.preContext);
|
||||
err.append((UChar)124/*|*/).append(pe.postContext);
|
||||
logln("Ok: " + err);
|
||||
return;
|
||||
}
|
||||
errln("FAIL: No syntax error");
|
||||
}
|
||||
|
||||
//======================================================================
|
||||
// icu4c ONLY
|
||||
// These tests are not mirrored (yet) in icu4j at
|
||||
|
|
|
@ -261,6 +261,11 @@ class TransliteratorTest : public IntlTest {
|
|||
*/
|
||||
void TestParseError();
|
||||
|
||||
/**
|
||||
* Make sure sets on output are disallowed.
|
||||
*/
|
||||
void TestOutputSet();
|
||||
|
||||
//======================================================================
|
||||
// Support methods
|
||||
//======================================================================
|
||||
|
|
Loading…
Add table
Reference in a new issue