ICU-1232 disallow UnicodeSets (and other standins) in translit output

X-SVN-Rev: 6699
2025-04-14 17:24:01 +00:00 · 2001-11-09 00:51:54 +00:00 · 2001-11-09 00:51:54 +00:00 · 6422d38661
commit 6422d38661
parent 1b8e587611
8 changed files with 104 additions and 32 deletions
--- a/icu4j/src/com/ibm/icu/dev/test/translit/TransliteratorTest.java
+++ b/icu4j/src/com/ibm/icu/dev/test/translit/TransliteratorTest.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/translit/TransliteratorTest.java,v $
- * $Date: 2001/11/09 00:11:01 $
- * $Revision: 1.64 $
+ * $Date: 2001/11/09 00:49:49 $
+ * $Revision: 1.65 $
 *
 *****************************************************************************************
 */
@ -1957,6 +1957,21 @@ public class TransliteratorTest extends TestFmwk {
        errln("FAIL: no syntax error");
    }

+    /**
+     * Make sure sets on output are disallowed.
+     */
+    public void TestOutputSet() {
+        String rule = "$set = [a-cm-n]; b > $set;";
+        Transliterator t = null;
+        try {
+            t = Transliterator.createFromRules("ID", rule, Transliterator.FORWARD);
+        } catch (IllegalArgumentException e) {
+            logln("Ok: " + e.getMessage());
+            return;
+        }
+        errln("FAIL: No syntax error");
+    }        
+
    //======================================================================
    // icu4j ONLY
    // These tests are not mirrored (yet) in icu4c at
--- a/icu4j/src/com/ibm/icu/text/SymbolTable.java
+++ b/icu4j/src/com/ibm/icu/text/SymbolTable.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/SymbolTable.java,v $ 
- * $Date: 2001/09/24 19:57:18 $ 
- * $Revision: 1.7 $
+ * $Date: 2001/11/09 00:51:53 $ 
+ * $Revision: 1.8 $
 *
 *****************************************************************************************
 */
@ -39,11 +39,11 @@ public interface SymbolTable {
    char[] lookup(String s);

    /**
-     * Lookup the UnicodeSet associated with the given character, and
+     * Lookup the UnicodeMatcher associated with the given character, and
     * return it.  Return <tt>null</tt> if not found.
     * @param ch a 32-bit code point from 0 to 0x10FFFF.
     */
-    UnicodeSet lookupSet(int ch);
+    UnicodeMatcher lookupMatcher(int ch);

    /**
     * Parse a symbol reference name from the given string, starting
--- a/icu4j/src/com/ibm/icu/text/TransliteratorParser.java
+++ b/icu4j/src/com/ibm/icu/text/TransliteratorParser.java
@ -4,8 +4,8 @@
 *   Corporation and others.  All Rights Reserved.
 **********************************************************************
 * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/TransliteratorParser.java,v $
-* $Date: 2001/10/30 18:04:09 $
-* $Revision: 1.8 $
+* $Date: 2001/11/09 00:51:53 $
+* $Revision: 1.9 $
 **********************************************************************
 */
 package com.ibm.text;
@ -169,12 +169,12 @@ class TransliteratorParser {
        /**
         * Implement SymbolTable API.
         */
-        public UnicodeSet lookupSet(int ch) {
-            // Note that we cannot use data.lookupSet() because the
+        public UnicodeMatcher lookupMatcher(int ch) {
+            // Note that we cannot use data.lookup() because the
            // set array has not been constructed yet.
            int i = ch - data.variablesBase;
            if (i >= 0 && i < variablesVector.size()) {
-                return (UnicodeSet) variablesVector.elementAt(i);
+                return (UnicodeMatcher) variablesVector.elementAt(i);
            }
            return null;
        }
@ -1091,7 +1091,8 @@ class TransliteratorParser {
            // - allow arbitrary cursor offsets and do runtime checking.
            //(right.cursorOffset > (left.text.length() - left.post)) ||
            //(-right.cursorOffset > left.ante) ||
-            right.anchorStart || right.anchorEnd) {
+            right.anchorStart || right.anchorEnd ||
+            !isValidOutput(right.text)) {
            syntaxError("Malformed rule", rule, start);
        }

@ -1112,6 +1113,21 @@ class TransliteratorParser {
        return pos;
    }

+    /**
+     * Return true if the given string looks like valid output, that is,
+     * does not contain quantifiers or other special input-only elements.
+     */
+    private boolean isValidOutput(String output) {
+        for (int i=0; i<output.length(); ++i) {
+            int c = UTF16.charAt(output, i);
+            i += UTF16.getCharCount(c);
+            if (parseData.lookupMatcher(c) != null) {
+                return false;
+            }
+        }
+        return true;
+    }
+
    /**
     * Set the variable range to [start, end] (inclusive).
     */
--- a/icu4j/src/com/ibm/icu/text/UnicodeSet.java
+++ b/icu4j/src/com/ibm/icu/text/UnicodeSet.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/UnicodeSet.java,v $
- * $Date: 2001/11/01 16:53:04 $
- * $Revision: 1.43 $
+ * $Date: 2001/11/09 00:51:54 $
+ * $Revision: 1.44 $
 *
 *****************************************************************************************
 */
@ -220,7 +220,7 @@ import com.ibm.util.Utility;
 * added in the future.
 *
 * @author Alan Liu
- * @version $RCSfile: UnicodeSet.java,v $ $Revision: 1.43 $ $Date: 2001/11/01 16:53:04 $
+ * @version $RCSfile: UnicodeSet.java,v $ $Revision: 1.44 $ $Date: 2001/11/09 00:51:54 $
 */
 public class UnicodeSet extends UnicodeFilter {

@ -1231,7 +1231,12 @@ public class UnicodeSet extends UnicodeFilter {
                if (ivarValueBuffer < varValueBuffer.length) {
                    c = UTF16.charAt(varValueBuffer, 0, varValueBuffer.length, ivarValueBuffer);
                    ivarValueBuffer += UTF16.getCharCount(c);
-                    nestedSet = symbols.lookupSet(c); // may be NULL
+                    UnicodeMatcher m = symbols.lookupMatcher(c); // may be NULL
+                    try {
+                        nestedSet = (UnicodeSet) m;
+                    } catch (ClassCastException e) {
+                        throw new IllegalArgumentException("Syntax error");
+                    }
                    nestedPatDone = false;
                } else {
                    varValueBuffer = null;
--- a/icu4j/src/com/ibm/test/translit/TransliteratorTest.java
+++ b/icu4j/src/com/ibm/test/translit/TransliteratorTest.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/test/translit/Attic/TransliteratorTest.java,v $
- * $Date: 2001/11/09 00:11:01 $
- * $Revision: 1.64 $
+ * $Date: 2001/11/09 00:49:49 $
+ * $Revision: 1.65 $
 *
 *****************************************************************************************
 */
@ -1957,6 +1957,21 @@ public class TransliteratorTest extends TestFmwk {
        errln("FAIL: no syntax error");
    }

+    /**
+     * Make sure sets on output are disallowed.
+     */
+    public void TestOutputSet() {
+        String rule = "$set = [a-cm-n]; b > $set;";
+        Transliterator t = null;
+        try {
+            t = Transliterator.createFromRules("ID", rule, Transliterator.FORWARD);
+        } catch (IllegalArgumentException e) {
+            logln("Ok: " + e.getMessage());
+            return;
+        }
+        errln("FAIL: No syntax error");
+    }        
+
    //======================================================================
    // icu4j ONLY
    // These tests are not mirrored (yet) in icu4c at
--- a/icu4j/src/com/ibm/text/SymbolTable.java
+++ b/icu4j/src/com/ibm/text/SymbolTable.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/text/Attic/SymbolTable.java,v $ 
- * $Date: 2001/09/24 19:57:18 $ 
- * $Revision: 1.7 $
+ * $Date: 2001/11/09 00:51:53 $ 
+ * $Revision: 1.8 $
 *
 *****************************************************************************************
 */
@ -39,11 +39,11 @@ public interface SymbolTable {
    char[] lookup(String s);

    /**
-     * Lookup the UnicodeSet associated with the given character, and
+     * Lookup the UnicodeMatcher associated with the given character, and
     * return it.  Return <tt>null</tt> if not found.
     * @param ch a 32-bit code point from 0 to 0x10FFFF.
     */
-    UnicodeSet lookupSet(int ch);
+    UnicodeMatcher lookupMatcher(int ch);

    /**
     * Parse a symbol reference name from the given string, starting
--- a/icu4j/src/com/ibm/text/TransliteratorParser.java
+++ b/icu4j/src/com/ibm/text/TransliteratorParser.java
@ -4,8 +4,8 @@
 *   Corporation and others.  All Rights Reserved.
 **********************************************************************
 * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/text/Attic/TransliteratorParser.java,v $
-* $Date: 2001/10/30 18:04:09 $
-* $Revision: 1.8 $
+* $Date: 2001/11/09 00:51:53 $
+* $Revision: 1.9 $
 **********************************************************************
 */
 package com.ibm.text;
@ -169,12 +169,12 @@ class TransliteratorParser {
        /**
         * Implement SymbolTable API.
         */
-        public UnicodeSet lookupSet(int ch) {
-            // Note that we cannot use data.lookupSet() because the
+        public UnicodeMatcher lookupMatcher(int ch) {
+            // Note that we cannot use data.lookup() because the
            // set array has not been constructed yet.
            int i = ch - data.variablesBase;
            if (i >= 0 && i < variablesVector.size()) {
-                return (UnicodeSet) variablesVector.elementAt(i);
+                return (UnicodeMatcher) variablesVector.elementAt(i);
            }
            return null;
        }
@ -1091,7 +1091,8 @@ class TransliteratorParser {
            // - allow arbitrary cursor offsets and do runtime checking.
            //(right.cursorOffset > (left.text.length() - left.post)) ||
            //(-right.cursorOffset > left.ante) ||
-            right.anchorStart || right.anchorEnd) {
+            right.anchorStart || right.anchorEnd ||
+            !isValidOutput(right.text)) {
            syntaxError("Malformed rule", rule, start);
        }

@ -1112,6 +1113,21 @@ class TransliteratorParser {
        return pos;
    }

+    /**
+     * Return true if the given string looks like valid output, that is,
+     * does not contain quantifiers or other special input-only elements.
+     */
+    private boolean isValidOutput(String output) {
+        for (int i=0; i<output.length(); ++i) {
+            int c = UTF16.charAt(output, i);
+            i += UTF16.getCharCount(c);
+            if (parseData.lookupMatcher(c) != null) {
+                return false;
+            }
+        }
+        return true;
+    }
+
    /**
     * Set the variable range to [start, end] (inclusive).
     */
--- a/icu4j/src/com/ibm/text/UnicodeSet.java
+++ b/icu4j/src/com/ibm/text/UnicodeSet.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/text/Attic/UnicodeSet.java,v $
- * $Date: 2001/11/01 16:53:04 $
- * $Revision: 1.43 $
+ * $Date: 2001/11/09 00:51:54 $
+ * $Revision: 1.44 $
 *
 *****************************************************************************************
 */
@ -220,7 +220,7 @@ import com.ibm.util.Utility;
 * added in the future.
 *
 * @author Alan Liu
- * @version $RCSfile: UnicodeSet.java,v $ $Revision: 1.43 $ $Date: 2001/11/01 16:53:04 $
+ * @version $RCSfile: UnicodeSet.java,v $ $Revision: 1.44 $ $Date: 2001/11/09 00:51:54 $
 */
 public class UnicodeSet extends UnicodeFilter {

@ -1231,7 +1231,12 @@ public class UnicodeSet extends UnicodeFilter {
                if (ivarValueBuffer < varValueBuffer.length) {
                    c = UTF16.charAt(varValueBuffer, 0, varValueBuffer.length, ivarValueBuffer);
                    ivarValueBuffer += UTF16.getCharCount(c);
-                    nestedSet = symbols.lookupSet(c); // may be NULL
+                    UnicodeMatcher m = symbols.lookupMatcher(c); // may be NULL
+                    try {
+                        nestedSet = (UnicodeSet) m;
+                    } catch (ClassCastException e) {
+                        throw new IllegalArgumentException("Syntax error");
+                    }
                    nestedPatDone = false;
                } else {
                    varValueBuffer = null;