ICU-1232 disallow UnicodeSets (and other standins) in translit output

X-SVN-Rev: 6699
This commit is contained in:
Alan Liu 2001-11-09 00:51:54 +00:00
parent 1b8e587611
commit 6422d38661
8 changed files with 104 additions and 32 deletions

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/translit/TransliteratorTest.java,v $
* $Date: 2001/11/09 00:11:01 $
* $Revision: 1.64 $
* $Date: 2001/11/09 00:49:49 $
* $Revision: 1.65 $
*
*****************************************************************************************
*/
@ -1957,6 +1957,21 @@ public class TransliteratorTest extends TestFmwk {
errln("FAIL: no syntax error");
}
/**
* Make sure sets on output are disallowed.
*/
public void TestOutputSet() {
String rule = "$set = [a-cm-n]; b > $set;";
Transliterator t = null;
try {
t = Transliterator.createFromRules("ID", rule, Transliterator.FORWARD);
} catch (IllegalArgumentException e) {
logln("Ok: " + e.getMessage());
return;
}
errln("FAIL: No syntax error");
}
//======================================================================
// icu4j ONLY
// These tests are not mirrored (yet) in icu4c at

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/SymbolTable.java,v $
* $Date: 2001/09/24 19:57:18 $
* $Revision: 1.7 $
* $Date: 2001/11/09 00:51:53 $
* $Revision: 1.8 $
*
*****************************************************************************************
*/
@ -39,11 +39,11 @@ public interface SymbolTable {
char[] lookup(String s);
/**
* Lookup the UnicodeSet associated with the given character, and
* Lookup the UnicodeMatcher associated with the given character, and
* return it. Return <tt>null</tt> if not found.
* @param ch a 32-bit code point from 0 to 0x10FFFF.
*/
UnicodeSet lookupSet(int ch);
UnicodeMatcher lookupMatcher(int ch);
/**
* Parse a symbol reference name from the given string, starting

View file

@ -4,8 +4,8 @@
* Corporation and others. All Rights Reserved.
**********************************************************************
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/TransliteratorParser.java,v $
* $Date: 2001/10/30 18:04:09 $
* $Revision: 1.8 $
* $Date: 2001/11/09 00:51:53 $
* $Revision: 1.9 $
**********************************************************************
*/
package com.ibm.text;
@ -169,12 +169,12 @@ class TransliteratorParser {
/**
* Implement SymbolTable API.
*/
public UnicodeSet lookupSet(int ch) {
// Note that we cannot use data.lookupSet() because the
public UnicodeMatcher lookupMatcher(int ch) {
// Note that we cannot use data.lookup() because the
// set array has not been constructed yet.
int i = ch - data.variablesBase;
if (i >= 0 && i < variablesVector.size()) {
return (UnicodeSet) variablesVector.elementAt(i);
return (UnicodeMatcher) variablesVector.elementAt(i);
}
return null;
}
@ -1091,7 +1091,8 @@ class TransliteratorParser {
// - allow arbitrary cursor offsets and do runtime checking.
//(right.cursorOffset > (left.text.length() - left.post)) ||
//(-right.cursorOffset > left.ante) ||
right.anchorStart || right.anchorEnd) {
right.anchorStart || right.anchorEnd ||
!isValidOutput(right.text)) {
syntaxError("Malformed rule", rule, start);
}
@ -1112,6 +1113,21 @@ class TransliteratorParser {
return pos;
}
/**
* Return true if the given string looks like valid output, that is,
* does not contain quantifiers or other special input-only elements.
*/
private boolean isValidOutput(String output) {
for (int i=0; i<output.length(); ++i) {
int c = UTF16.charAt(output, i);
i += UTF16.getCharCount(c);
if (parseData.lookupMatcher(c) != null) {
return false;
}
}
return true;
}
/**
* Set the variable range to [start, end] (inclusive).
*/

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/UnicodeSet.java,v $
* $Date: 2001/11/01 16:53:04 $
* $Revision: 1.43 $
* $Date: 2001/11/09 00:51:54 $
* $Revision: 1.44 $
*
*****************************************************************************************
*/
@ -220,7 +220,7 @@ import com.ibm.util.Utility;
* added in the future.
*
* @author Alan Liu
* @version $RCSfile: UnicodeSet.java,v $ $Revision: 1.43 $ $Date: 2001/11/01 16:53:04 $
* @version $RCSfile: UnicodeSet.java,v $ $Revision: 1.44 $ $Date: 2001/11/09 00:51:54 $
*/
public class UnicodeSet extends UnicodeFilter {
@ -1231,7 +1231,12 @@ public class UnicodeSet extends UnicodeFilter {
if (ivarValueBuffer < varValueBuffer.length) {
c = UTF16.charAt(varValueBuffer, 0, varValueBuffer.length, ivarValueBuffer);
ivarValueBuffer += UTF16.getCharCount(c);
nestedSet = symbols.lookupSet(c); // may be NULL
UnicodeMatcher m = symbols.lookupMatcher(c); // may be NULL
try {
nestedSet = (UnicodeSet) m;
} catch (ClassCastException e) {
throw new IllegalArgumentException("Syntax error");
}
nestedPatDone = false;
} else {
varValueBuffer = null;

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/test/translit/Attic/TransliteratorTest.java,v $
* $Date: 2001/11/09 00:11:01 $
* $Revision: 1.64 $
* $Date: 2001/11/09 00:49:49 $
* $Revision: 1.65 $
*
*****************************************************************************************
*/
@ -1957,6 +1957,21 @@ public class TransliteratorTest extends TestFmwk {
errln("FAIL: no syntax error");
}
/**
* Make sure sets on output are disallowed.
*/
public void TestOutputSet() {
String rule = "$set = [a-cm-n]; b > $set;";
Transliterator t = null;
try {
t = Transliterator.createFromRules("ID", rule, Transliterator.FORWARD);
} catch (IllegalArgumentException e) {
logln("Ok: " + e.getMessage());
return;
}
errln("FAIL: No syntax error");
}
//======================================================================
// icu4j ONLY
// These tests are not mirrored (yet) in icu4c at

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/text/Attic/SymbolTable.java,v $
* $Date: 2001/09/24 19:57:18 $
* $Revision: 1.7 $
* $Date: 2001/11/09 00:51:53 $
* $Revision: 1.8 $
*
*****************************************************************************************
*/
@ -39,11 +39,11 @@ public interface SymbolTable {
char[] lookup(String s);
/**
* Lookup the UnicodeSet associated with the given character, and
* Lookup the UnicodeMatcher associated with the given character, and
* return it. Return <tt>null</tt> if not found.
* @param ch a 32-bit code point from 0 to 0x10FFFF.
*/
UnicodeSet lookupSet(int ch);
UnicodeMatcher lookupMatcher(int ch);
/**
* Parse a symbol reference name from the given string, starting

View file

@ -4,8 +4,8 @@
* Corporation and others. All Rights Reserved.
**********************************************************************
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/text/Attic/TransliteratorParser.java,v $
* $Date: 2001/10/30 18:04:09 $
* $Revision: 1.8 $
* $Date: 2001/11/09 00:51:53 $
* $Revision: 1.9 $
**********************************************************************
*/
package com.ibm.text;
@ -169,12 +169,12 @@ class TransliteratorParser {
/**
* Implement SymbolTable API.
*/
public UnicodeSet lookupSet(int ch) {
// Note that we cannot use data.lookupSet() because the
public UnicodeMatcher lookupMatcher(int ch) {
// Note that we cannot use data.lookup() because the
// set array has not been constructed yet.
int i = ch - data.variablesBase;
if (i >= 0 && i < variablesVector.size()) {
return (UnicodeSet) variablesVector.elementAt(i);
return (UnicodeMatcher) variablesVector.elementAt(i);
}
return null;
}
@ -1091,7 +1091,8 @@ class TransliteratorParser {
// - allow arbitrary cursor offsets and do runtime checking.
//(right.cursorOffset > (left.text.length() - left.post)) ||
//(-right.cursorOffset > left.ante) ||
right.anchorStart || right.anchorEnd) {
right.anchorStart || right.anchorEnd ||
!isValidOutput(right.text)) {
syntaxError("Malformed rule", rule, start);
}
@ -1112,6 +1113,21 @@ class TransliteratorParser {
return pos;
}
/**
* Return true if the given string looks like valid output, that is,
* does not contain quantifiers or other special input-only elements.
*/
private boolean isValidOutput(String output) {
for (int i=0; i<output.length(); ++i) {
int c = UTF16.charAt(output, i);
i += UTF16.getCharCount(c);
if (parseData.lookupMatcher(c) != null) {
return false;
}
}
return true;
}
/**
* Set the variable range to [start, end] (inclusive).
*/

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/text/Attic/UnicodeSet.java,v $
* $Date: 2001/11/01 16:53:04 $
* $Revision: 1.43 $
* $Date: 2001/11/09 00:51:54 $
* $Revision: 1.44 $
*
*****************************************************************************************
*/
@ -220,7 +220,7 @@ import com.ibm.util.Utility;
* added in the future.
*
* @author Alan Liu
* @version $RCSfile: UnicodeSet.java,v $ $Revision: 1.43 $ $Date: 2001/11/01 16:53:04 $
* @version $RCSfile: UnicodeSet.java,v $ $Revision: 1.44 $ $Date: 2001/11/09 00:51:54 $
*/
public class UnicodeSet extends UnicodeFilter {
@ -1231,7 +1231,12 @@ public class UnicodeSet extends UnicodeFilter {
if (ivarValueBuffer < varValueBuffer.length) {
c = UTF16.charAt(varValueBuffer, 0, varValueBuffer.length, ivarValueBuffer);
ivarValueBuffer += UTF16.getCharCount(c);
nestedSet = symbols.lookupSet(c); // may be NULL
UnicodeMatcher m = symbols.lookupMatcher(c); // may be NULL
try {
nestedSet = (UnicodeSet) m;
} catch (ClassCastException e) {
throw new IllegalArgumentException("Syntax error");
}
nestedPatDone = false;
} else {
varValueBuffer = null;