mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-14 17:24:01 +00:00
ICU-1232 disallow UnicodeSets (and other standins) in translit output
X-SVN-Rev: 6699
This commit is contained in:
parent
1b8e587611
commit
6422d38661
8 changed files with 104 additions and 32 deletions
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/translit/TransliteratorTest.java,v $
|
||||
* $Date: 2001/11/09 00:11:01 $
|
||||
* $Revision: 1.64 $
|
||||
* $Date: 2001/11/09 00:49:49 $
|
||||
* $Revision: 1.65 $
|
||||
*
|
||||
*****************************************************************************************
|
||||
*/
|
||||
|
@ -1957,6 +1957,21 @@ public class TransliteratorTest extends TestFmwk {
|
|||
errln("FAIL: no syntax error");
|
||||
}
|
||||
|
||||
/**
|
||||
* Make sure sets on output are disallowed.
|
||||
*/
|
||||
public void TestOutputSet() {
|
||||
String rule = "$set = [a-cm-n]; b > $set;";
|
||||
Transliterator t = null;
|
||||
try {
|
||||
t = Transliterator.createFromRules("ID", rule, Transliterator.FORWARD);
|
||||
} catch (IllegalArgumentException e) {
|
||||
logln("Ok: " + e.getMessage());
|
||||
return;
|
||||
}
|
||||
errln("FAIL: No syntax error");
|
||||
}
|
||||
|
||||
//======================================================================
|
||||
// icu4j ONLY
|
||||
// These tests are not mirrored (yet) in icu4c at
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/SymbolTable.java,v $
|
||||
* $Date: 2001/09/24 19:57:18 $
|
||||
* $Revision: 1.7 $
|
||||
* $Date: 2001/11/09 00:51:53 $
|
||||
* $Revision: 1.8 $
|
||||
*
|
||||
*****************************************************************************************
|
||||
*/
|
||||
|
@ -39,11 +39,11 @@ public interface SymbolTable {
|
|||
char[] lookup(String s);
|
||||
|
||||
/**
|
||||
* Lookup the UnicodeSet associated with the given character, and
|
||||
* Lookup the UnicodeMatcher associated with the given character, and
|
||||
* return it. Return <tt>null</tt> if not found.
|
||||
* @param ch a 32-bit code point from 0 to 0x10FFFF.
|
||||
*/
|
||||
UnicodeSet lookupSet(int ch);
|
||||
UnicodeMatcher lookupMatcher(int ch);
|
||||
|
||||
/**
|
||||
* Parse a symbol reference name from the given string, starting
|
||||
|
|
|
@ -4,8 +4,8 @@
|
|||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/TransliteratorParser.java,v $
|
||||
* $Date: 2001/10/30 18:04:09 $
|
||||
* $Revision: 1.8 $
|
||||
* $Date: 2001/11/09 00:51:53 $
|
||||
* $Revision: 1.9 $
|
||||
**********************************************************************
|
||||
*/
|
||||
package com.ibm.text;
|
||||
|
@ -169,12 +169,12 @@ class TransliteratorParser {
|
|||
/**
|
||||
* Implement SymbolTable API.
|
||||
*/
|
||||
public UnicodeSet lookupSet(int ch) {
|
||||
// Note that we cannot use data.lookupSet() because the
|
||||
public UnicodeMatcher lookupMatcher(int ch) {
|
||||
// Note that we cannot use data.lookup() because the
|
||||
// set array has not been constructed yet.
|
||||
int i = ch - data.variablesBase;
|
||||
if (i >= 0 && i < variablesVector.size()) {
|
||||
return (UnicodeSet) variablesVector.elementAt(i);
|
||||
return (UnicodeMatcher) variablesVector.elementAt(i);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
@ -1091,7 +1091,8 @@ class TransliteratorParser {
|
|||
// - allow arbitrary cursor offsets and do runtime checking.
|
||||
//(right.cursorOffset > (left.text.length() - left.post)) ||
|
||||
//(-right.cursorOffset > left.ante) ||
|
||||
right.anchorStart || right.anchorEnd) {
|
||||
right.anchorStart || right.anchorEnd ||
|
||||
!isValidOutput(right.text)) {
|
||||
syntaxError("Malformed rule", rule, start);
|
||||
}
|
||||
|
||||
|
@ -1112,6 +1113,21 @@ class TransliteratorParser {
|
|||
return pos;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return true if the given string looks like valid output, that is,
|
||||
* does not contain quantifiers or other special input-only elements.
|
||||
*/
|
||||
private boolean isValidOutput(String output) {
|
||||
for (int i=0; i<output.length(); ++i) {
|
||||
int c = UTF16.charAt(output, i);
|
||||
i += UTF16.getCharCount(c);
|
||||
if (parseData.lookupMatcher(c) != null) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the variable range to [start, end] (inclusive).
|
||||
*/
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/UnicodeSet.java,v $
|
||||
* $Date: 2001/11/01 16:53:04 $
|
||||
* $Revision: 1.43 $
|
||||
* $Date: 2001/11/09 00:51:54 $
|
||||
* $Revision: 1.44 $
|
||||
*
|
||||
*****************************************************************************************
|
||||
*/
|
||||
|
@ -220,7 +220,7 @@ import com.ibm.util.Utility;
|
|||
* added in the future.
|
||||
*
|
||||
* @author Alan Liu
|
||||
* @version $RCSfile: UnicodeSet.java,v $ $Revision: 1.43 $ $Date: 2001/11/01 16:53:04 $
|
||||
* @version $RCSfile: UnicodeSet.java,v $ $Revision: 1.44 $ $Date: 2001/11/09 00:51:54 $
|
||||
*/
|
||||
public class UnicodeSet extends UnicodeFilter {
|
||||
|
||||
|
@ -1231,7 +1231,12 @@ public class UnicodeSet extends UnicodeFilter {
|
|||
if (ivarValueBuffer < varValueBuffer.length) {
|
||||
c = UTF16.charAt(varValueBuffer, 0, varValueBuffer.length, ivarValueBuffer);
|
||||
ivarValueBuffer += UTF16.getCharCount(c);
|
||||
nestedSet = symbols.lookupSet(c); // may be NULL
|
||||
UnicodeMatcher m = symbols.lookupMatcher(c); // may be NULL
|
||||
try {
|
||||
nestedSet = (UnicodeSet) m;
|
||||
} catch (ClassCastException e) {
|
||||
throw new IllegalArgumentException("Syntax error");
|
||||
}
|
||||
nestedPatDone = false;
|
||||
} else {
|
||||
varValueBuffer = null;
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/test/translit/Attic/TransliteratorTest.java,v $
|
||||
* $Date: 2001/11/09 00:11:01 $
|
||||
* $Revision: 1.64 $
|
||||
* $Date: 2001/11/09 00:49:49 $
|
||||
* $Revision: 1.65 $
|
||||
*
|
||||
*****************************************************************************************
|
||||
*/
|
||||
|
@ -1957,6 +1957,21 @@ public class TransliteratorTest extends TestFmwk {
|
|||
errln("FAIL: no syntax error");
|
||||
}
|
||||
|
||||
/**
|
||||
* Make sure sets on output are disallowed.
|
||||
*/
|
||||
public void TestOutputSet() {
|
||||
String rule = "$set = [a-cm-n]; b > $set;";
|
||||
Transliterator t = null;
|
||||
try {
|
||||
t = Transliterator.createFromRules("ID", rule, Transliterator.FORWARD);
|
||||
} catch (IllegalArgumentException e) {
|
||||
logln("Ok: " + e.getMessage());
|
||||
return;
|
||||
}
|
||||
errln("FAIL: No syntax error");
|
||||
}
|
||||
|
||||
//======================================================================
|
||||
// icu4j ONLY
|
||||
// These tests are not mirrored (yet) in icu4c at
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/text/Attic/SymbolTable.java,v $
|
||||
* $Date: 2001/09/24 19:57:18 $
|
||||
* $Revision: 1.7 $
|
||||
* $Date: 2001/11/09 00:51:53 $
|
||||
* $Revision: 1.8 $
|
||||
*
|
||||
*****************************************************************************************
|
||||
*/
|
||||
|
@ -39,11 +39,11 @@ public interface SymbolTable {
|
|||
char[] lookup(String s);
|
||||
|
||||
/**
|
||||
* Lookup the UnicodeSet associated with the given character, and
|
||||
* Lookup the UnicodeMatcher associated with the given character, and
|
||||
* return it. Return <tt>null</tt> if not found.
|
||||
* @param ch a 32-bit code point from 0 to 0x10FFFF.
|
||||
*/
|
||||
UnicodeSet lookupSet(int ch);
|
||||
UnicodeMatcher lookupMatcher(int ch);
|
||||
|
||||
/**
|
||||
* Parse a symbol reference name from the given string, starting
|
||||
|
|
|
@ -4,8 +4,8 @@
|
|||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/text/Attic/TransliteratorParser.java,v $
|
||||
* $Date: 2001/10/30 18:04:09 $
|
||||
* $Revision: 1.8 $
|
||||
* $Date: 2001/11/09 00:51:53 $
|
||||
* $Revision: 1.9 $
|
||||
**********************************************************************
|
||||
*/
|
||||
package com.ibm.text;
|
||||
|
@ -169,12 +169,12 @@ class TransliteratorParser {
|
|||
/**
|
||||
* Implement SymbolTable API.
|
||||
*/
|
||||
public UnicodeSet lookupSet(int ch) {
|
||||
// Note that we cannot use data.lookupSet() because the
|
||||
public UnicodeMatcher lookupMatcher(int ch) {
|
||||
// Note that we cannot use data.lookup() because the
|
||||
// set array has not been constructed yet.
|
||||
int i = ch - data.variablesBase;
|
||||
if (i >= 0 && i < variablesVector.size()) {
|
||||
return (UnicodeSet) variablesVector.elementAt(i);
|
||||
return (UnicodeMatcher) variablesVector.elementAt(i);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
@ -1091,7 +1091,8 @@ class TransliteratorParser {
|
|||
// - allow arbitrary cursor offsets and do runtime checking.
|
||||
//(right.cursorOffset > (left.text.length() - left.post)) ||
|
||||
//(-right.cursorOffset > left.ante) ||
|
||||
right.anchorStart || right.anchorEnd) {
|
||||
right.anchorStart || right.anchorEnd ||
|
||||
!isValidOutput(right.text)) {
|
||||
syntaxError("Malformed rule", rule, start);
|
||||
}
|
||||
|
||||
|
@ -1112,6 +1113,21 @@ class TransliteratorParser {
|
|||
return pos;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return true if the given string looks like valid output, that is,
|
||||
* does not contain quantifiers or other special input-only elements.
|
||||
*/
|
||||
private boolean isValidOutput(String output) {
|
||||
for (int i=0; i<output.length(); ++i) {
|
||||
int c = UTF16.charAt(output, i);
|
||||
i += UTF16.getCharCount(c);
|
||||
if (parseData.lookupMatcher(c) != null) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the variable range to [start, end] (inclusive).
|
||||
*/
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/text/Attic/UnicodeSet.java,v $
|
||||
* $Date: 2001/11/01 16:53:04 $
|
||||
* $Revision: 1.43 $
|
||||
* $Date: 2001/11/09 00:51:54 $
|
||||
* $Revision: 1.44 $
|
||||
*
|
||||
*****************************************************************************************
|
||||
*/
|
||||
|
@ -220,7 +220,7 @@ import com.ibm.util.Utility;
|
|||
* added in the future.
|
||||
*
|
||||
* @author Alan Liu
|
||||
* @version $RCSfile: UnicodeSet.java,v $ $Revision: 1.43 $ $Date: 2001/11/01 16:53:04 $
|
||||
* @version $RCSfile: UnicodeSet.java,v $ $Revision: 1.44 $ $Date: 2001/11/09 00:51:54 $
|
||||
*/
|
||||
public class UnicodeSet extends UnicodeFilter {
|
||||
|
||||
|
@ -1231,7 +1231,12 @@ public class UnicodeSet extends UnicodeFilter {
|
|||
if (ivarValueBuffer < varValueBuffer.length) {
|
||||
c = UTF16.charAt(varValueBuffer, 0, varValueBuffer.length, ivarValueBuffer);
|
||||
ivarValueBuffer += UTF16.getCharCount(c);
|
||||
nestedSet = symbols.lookupSet(c); // may be NULL
|
||||
UnicodeMatcher m = symbols.lookupMatcher(c); // may be NULL
|
||||
try {
|
||||
nestedSet = (UnicodeSet) m;
|
||||
} catch (ClassCastException e) {
|
||||
throw new IllegalArgumentException("Syntax error");
|
||||
}
|
||||
nestedPatDone = false;
|
||||
} else {
|
||||
varValueBuffer = null;
|
||||
|
|
Loading…
Add table
Reference in a new issue