Delete variable name parsing to SymbolTable interface to consolidate parsing code.

X-SVN-Rev: 1212
2025-04-13 08:53:20 +00:00 · 2000-04-21 22:16:29 +00:00 · 2000-04-21 22:16:29 +00:00 · aa61987272
commit aa61987272
parent ffaec2e342
6 changed files with 168 additions and 150 deletions
--- a/icu4j/src/com/ibm/icu/text/RuleBasedTransliterator.java
+++ b/icu4j/src/com/ibm/icu/text/RuleBasedTransliterator.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/RuleBasedTransliterator.java,v $ 
- * $Date: 2000/04/21 21:16:40 $ 
- * $Revision: 1.21 $
+ * $Date: 2000/04/21 22:16:29 $ 
+ * $Revision: 1.22 $
 *
 *****************************************************************************************
 */
@ -274,7 +274,7 @@ import com.ibm.util.Utility;
 * <p>Copyright (c) IBM Corporation 1999-2000. All rights reserved.</p>
 *
 * @author Alan Liu
- * @version $RCSfile: RuleBasedTransliterator.java,v $ $Revision: 1.21 $ $Date: 2000/04/21 21:16:40 $
+ * @version $RCSfile: RuleBasedTransliterator.java,v $ $Revision: 1.22 $ $Date: 2000/04/21 22:16:29 $
 */
 public class RuleBasedTransliterator extends Transliterator {

@ -534,6 +534,31 @@ public class RuleBasedTransliterator extends Transliterator {
                }
                return ch;
            }
+
+            /**
+             * Implement SymbolTable API.  Parse out a symbol reference
+             * name.
+             */
+            public String parseReference(String text, ParsePosition pos, int limit) {
+                int start = pos.getIndex();
+                int i = start;
+                if (i < limit) {
+                    char c = text.charAt(i);
+                    if (Character.isUnicodeIdentifierStart(c)) {
+                        ++i;
+                        while (i < limit &&
+                               Character.isUnicodeIdentifierPart(text.charAt(i))) {
+                            ++i;
+                        }
+                    }
+                }
+                if (i == start) { // No valid name chars
+                    throw new IllegalArgumentException("Illegal variable reference " +
+                                                       text.substring(start, limit));
+                }
+                pos.setIndex(i);
+                return text.substring(start, i);
+            }
        }

        /**
@ -587,7 +612,6 @@ public class RuleBasedTransliterator extends Transliterator {
        private static final char END_OF_RULE         = ';';
        private static final char RULE_COMMENT_CHAR   = '#';

-        private static final char VARIABLE_REF        = '$'; // also segment refs
        private static final char CONTEXT_ANTE        = '{'; // ante{key
        private static final char CONTEXT_POST        = '}'; // key}post
        private static final char SET_OPEN            = '[';
@ -711,6 +735,7 @@ public class RuleBasedTransliterator extends Transliterator {
                             RuleBasedTransliterator.Parser parser) {
                int start = pos;
                StringBuffer buf = new StringBuffer();
+                ParsePosition pp = null;

            main:
                while (pos < limit) {
@ -781,7 +806,7 @@ public class RuleBasedTransliterator extends Transliterator {
                    case END_OF_RULE:
                        --pos; // Backup to point to END_OF_RULE
                        break main;
-                    case VARIABLE_REF:
+                    case SymbolTable.SYMBOL_REF:
                        // Handle variable references and segment references "$1" .. "$9"
                        {
                            // A variable reference must be followed immediately
@ -792,28 +817,26 @@ public class RuleBasedTransliterator extends Transliterator {
                                syntaxError("Trailing " + c, rule, start);
                            }
                            // Parse "$1" "$2" .. "$9"
-                            c = rule.charAt(pos++);
+                            c = rule.charAt(pos);
                            int r = Character.digit(c, 10);
                            if (r >= 1 && r <= 9) {
                                if (r > maxRef) {
                                    maxRef = r;
                                }
                                buf.append((char) (parser.data.segmentBase + r - 1));
-                            } else if (Character.isUnicodeIdentifierStart(c)) {
-                                int j = pos;
-                                while (j < limit &&
-                                       Character.isUnicodeIdentifierPart(rule.charAt(j))) {
-                                    ++j;
+                                ++pos;
+                            } else {
+                                if (pp == null) { // Lazy create
+                                    pp = new ParsePosition(0);
                                }
-                                String name = rule.substring(pos-1, j);
-                                pos = j;
+                                pp.setIndex(pos);
+                                String name = parser.parseData.
+                                                parseReference(rule, pp, limit);
+                                pos = pp.getIndex();
                                // If this is a variable definition statement, then the LHS
                                // variable will be undefined.  In that case getVariableName()
                                // will return the special placeholder variableLimit-1.
                                buf.append(parser.getVariableDef(name));
-                            } else {
-                                syntaxError("Illegal char after " + VARIABLE_REF,
-                                            rule, start);
                            }
                        }
                        break;
@ -830,7 +853,10 @@ public class RuleBasedTransliterator extends Transliterator {
                        post = buf.length();
                        break;
                    case SET_OPEN:
-                        ParsePosition pp = new ParsePosition(pos-1); // Backup to opening '['
+                        if (pp == null) {
+                            pp = new ParsePosition(0);
+                        }
+                        pp.setIndex(pos-1); // Backup to opening '['
                        buf.append(parser.registerSet(new UnicodeSet(rule, pp, parser.parseData)));
                        pos = pp.getIndex();
                        break;
@ -1231,6 +1257,9 @@ public class RuleBasedTransliterator extends Transliterator {

 /**
 * $Log: RuleBasedTransliterator.java,v $
+ * Revision 1.22  2000/04/21 22:16:29  alan
+ * Delete variable name parsing to SymbolTable interface to consolidate parsing code.
+ *
 * Revision 1.21  2000/04/21 21:16:40  alan
 * Modify rule syntax
 *
--- a/icu4j/src/com/ibm/icu/text/SymbolTable.java
+++ b/icu4j/src/com/ibm/icu/text/SymbolTable.java
@ -5,21 +5,49 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/SymbolTable.java,v $ 
- * $Date: 2000/03/10 04:07:24 $ 
- * $Revision: 1.2 $
+ * $Date: 2000/04/21 22:16:29 $ 
+ * $Revision: 1.3 $
 *
 *****************************************************************************************
 */
 package com.ibm.text;
+import java.text.ParsePosition;

 /**
- * An interface that maps strings to objects.
+ * An interface that maps strings to objects.  This interface defines
+ * both lookup protocol and parsing.  This allows different components
+ * to share a symbol table and to handle name parsing uniformly.  It
+ * is expected that client parse code look for the SYMBOL_REF
+ * character and, when seen, attempt to parse the characters after it
+ * using parseReference().
+ *
+ * <p>Currently, RuleBasedTransliterator and UnicodeSet use this
+ * interface to share variable definitions.
 */
 public interface SymbolTable {

+    /**
+     * The character preceding a symbol reference name.
+     */
+    final char SYMBOL_REF = '$';
+
    /**
     * Lookup the object associated with this string and return it.
     * Return <tt>null</tt> if no such name exists.
     */
    Object lookup(String s);
+
+    /**
+     * Parse a symbol reference name from the given string, starting
+     * at the given position.  If no valid symbol reference name is
+     * found, throw an exception.
+     * @param text the text to parse for the name
+     * @param pos on entry, the index of the first character to parse.
+     * This is the character following the SYMBOL_REF character.  On
+     * exit, the index after the last parsed character.
+     * @param limit the index after the last character to be parsed.
+     * @return the parsed name.
+     * @exception IllegalArgumentException if no valid name is found.
+     */
+    String parseReference(String text, ParsePosition pos, int limit);
 }
--- a/icu4j/src/com/ibm/icu/text/UnicodeSet.java
+++ b/icu4j/src/com/ibm/icu/text/UnicodeSet.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/UnicodeSet.java,v $ 
- * $Date: 2000/04/21 21:16:40 $ 
- * $Revision: 1.17 $
+ * $Date: 2000/04/21 22:16:29 $ 
+ * $Revision: 1.18 $
 *
 *****************************************************************************************
 */
@ -241,7 +241,7 @@ import java.text.*;
 * *Unsupported by Java (and hence unsupported by UnicodeSet).
 *
 * @author Alan Liu
- * @version $RCSfile: UnicodeSet.java,v $ $Revision: 1.17 $ $Date: 2000/04/21 21:16:40 $
+ * @version $RCSfile: UnicodeSet.java,v $ $Revision: 1.18 $ $Date: 2000/04/21 22:16:29 $
 */
 public class UnicodeSet implements UnicodeFilter {
    /**
@ -268,9 +268,6 @@ public class UnicodeSet implements UnicodeFilter {

    private static final int UNSUPPORTED_CATEGORY = 17;

-    private static final char VARIABLE_REF_OPEN = '{';
-    private static final char VARIABLE_REF_CLOSE = '}';
-
    private static final int CATEGORY_COUNT = 29;

    /**
@ -866,13 +863,9 @@ public class UnicodeSet implements UnicodeFilter {
             * Variable names are only parsed if varNameToChar is not null.
             * Set variables are only looked up if varCharToSet is not null.
             */
-            else if (symbols != null && !isLiteral && c == VARIABLE_REF_OPEN) {
-                ++i;
-                int j = pattern.indexOf(VARIABLE_REF_CLOSE, i);
-                if (i == j || j < 0) { // empty or unterminated
-                    throw new IllegalArgumentException("Illegal variable reference");
-                }
-                String name = pattern.substring(i, j);
+            else if (symbols != null && !isLiteral && c == SymbolTable.SYMBOL_REF) {
+                pos.setIndex(++i);
+                String name = symbols.parseReference(pattern, pos, limit);
                Object obj = symbols.lookup(name);
                if (obj == null) {
                    throw new IllegalArgumentException("Undefined variable: "
@ -884,50 +877,9 @@ public class UnicodeSet implements UnicodeFilter {
                } else {
                    nestedPairs = ((UnicodeSet) obj).pairs.toString();
                }
-                i = j; // Make i point at closing '}'
+                i = pos.getIndex()-1; // Make i point at last char of var name
            }

-            /* Parse variable references.  These are treated as literals.  If a
-             * variable refers to a UnicodeSet, nestedPairs is assigned here.
-             * Variable names are only parsed if varNameToChar is not null.
-             * Set variables are only looked up if varCharToSet is not null.
-             */
-            // TEMPORARY
-            // TEMPORARY
-            // TEMPORARY
-            else if (symbols != null && !isLiteral && c == '$') {
-                ++i;
-                c = pattern.charAt(i);
-                int j = i;
-                if (Character.isUnicodeIdentifierStart(c)) {
-                    ++j;
-                    while (j < limit &&
-                           Character.isUnicodeIdentifierPart(pattern.charAt(j))) {
-                        ++j;
-                    }
-                }
-                if (i == j || j < 0) { // empty or unterminated
-                    throw new IllegalArgumentException("Illegal variable reference " +
-                                                       pattern.substring(i-1, limit));
-                }
-                String name = pattern.substring(i, j);
-                Object obj = symbols.lookup(name);
-                if (obj == null) {
-                    throw new IllegalArgumentException("Undefined variable: "
-                                                       + name);
-                }
-                isLiteral = true;
-                if (obj instanceof Character) {
-                    c = ((Character) obj).charValue();
-                } else {
-                    nestedPairs = ((UnicodeSet) obj).pairs.toString();
-                }
-                i = j-1; // Make i point at last char of var name
-            }
-            // TEMPORARY
-            // TEMPORARY
-            // TEMPORARY
-
            /* An opening bracket indicates the first bracket of a nested
             * subpattern, either a normal pattern or a category pattern.  We
             * recognize these here and set nestedPairs accordingly.
--- a/icu4j/src/com/ibm/text/RuleBasedTransliterator.java
+++ b/icu4j/src/com/ibm/text/RuleBasedTransliterator.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/text/Attic/RuleBasedTransliterator.java,v $ 
- * $Date: 2000/04/21 21:16:40 $ 
- * $Revision: 1.21 $
+ * $Date: 2000/04/21 22:16:29 $ 
+ * $Revision: 1.22 $
 *
 *****************************************************************************************
 */
@ -274,7 +274,7 @@ import com.ibm.util.Utility;
 * <p>Copyright (c) IBM Corporation 1999-2000. All rights reserved.</p>
 *
 * @author Alan Liu
- * @version $RCSfile: RuleBasedTransliterator.java,v $ $Revision: 1.21 $ $Date: 2000/04/21 21:16:40 $
+ * @version $RCSfile: RuleBasedTransliterator.java,v $ $Revision: 1.22 $ $Date: 2000/04/21 22:16:29 $
 */
 public class RuleBasedTransliterator extends Transliterator {

@ -534,6 +534,31 @@ public class RuleBasedTransliterator extends Transliterator {
                }
                return ch;
            }
+
+            /**
+             * Implement SymbolTable API.  Parse out a symbol reference
+             * name.
+             */
+            public String parseReference(String text, ParsePosition pos, int limit) {
+                int start = pos.getIndex();
+                int i = start;
+                if (i < limit) {
+                    char c = text.charAt(i);
+                    if (Character.isUnicodeIdentifierStart(c)) {
+                        ++i;
+                        while (i < limit &&
+                               Character.isUnicodeIdentifierPart(text.charAt(i))) {
+                            ++i;
+                        }
+                    }
+                }
+                if (i == start) { // No valid name chars
+                    throw new IllegalArgumentException("Illegal variable reference " +
+                                                       text.substring(start, limit));
+                }
+                pos.setIndex(i);
+                return text.substring(start, i);
+            }
        }

        /**
@ -587,7 +612,6 @@ public class RuleBasedTransliterator extends Transliterator {
        private static final char END_OF_RULE         = ';';
        private static final char RULE_COMMENT_CHAR   = '#';

-        private static final char VARIABLE_REF        = '$'; // also segment refs
        private static final char CONTEXT_ANTE        = '{'; // ante{key
        private static final char CONTEXT_POST        = '}'; // key}post
        private static final char SET_OPEN            = '[';
@ -711,6 +735,7 @@ public class RuleBasedTransliterator extends Transliterator {
                             RuleBasedTransliterator.Parser parser) {
                int start = pos;
                StringBuffer buf = new StringBuffer();
+                ParsePosition pp = null;

            main:
                while (pos < limit) {
@ -781,7 +806,7 @@ public class RuleBasedTransliterator extends Transliterator {
                    case END_OF_RULE:
                        --pos; // Backup to point to END_OF_RULE
                        break main;
-                    case VARIABLE_REF:
+                    case SymbolTable.SYMBOL_REF:
                        // Handle variable references and segment references "$1" .. "$9"
                        {
                            // A variable reference must be followed immediately
@ -792,28 +817,26 @@ public class RuleBasedTransliterator extends Transliterator {
                                syntaxError("Trailing " + c, rule, start);
                            }
                            // Parse "$1" "$2" .. "$9"
-                            c = rule.charAt(pos++);
+                            c = rule.charAt(pos);
                            int r = Character.digit(c, 10);
                            if (r >= 1 && r <= 9) {
                                if (r > maxRef) {
                                    maxRef = r;
                                }
                                buf.append((char) (parser.data.segmentBase + r - 1));
-                            } else if (Character.isUnicodeIdentifierStart(c)) {
-                                int j = pos;
-                                while (j < limit &&
-                                       Character.isUnicodeIdentifierPart(rule.charAt(j))) {
-                                    ++j;
+                                ++pos;
+                            } else {
+                                if (pp == null) { // Lazy create
+                                    pp = new ParsePosition(0);
                                }
-                                String name = rule.substring(pos-1, j);
-                                pos = j;
+                                pp.setIndex(pos);
+                                String name = parser.parseData.
+                                                parseReference(rule, pp, limit);
+                                pos = pp.getIndex();
                                // If this is a variable definition statement, then the LHS
                                // variable will be undefined.  In that case getVariableName()
                                // will return the special placeholder variableLimit-1.
                                buf.append(parser.getVariableDef(name));
-                            } else {
-                                syntaxError("Illegal char after " + VARIABLE_REF,
-                                            rule, start);
                            }
                        }
                        break;
@ -830,7 +853,10 @@ public class RuleBasedTransliterator extends Transliterator {
                        post = buf.length();
                        break;
                    case SET_OPEN:
-                        ParsePosition pp = new ParsePosition(pos-1); // Backup to opening '['
+                        if (pp == null) {
+                            pp = new ParsePosition(0);
+                        }
+                        pp.setIndex(pos-1); // Backup to opening '['
                        buf.append(parser.registerSet(new UnicodeSet(rule, pp, parser.parseData)));
                        pos = pp.getIndex();
                        break;
@ -1231,6 +1257,9 @@ public class RuleBasedTransliterator extends Transliterator {

 /**
 * $Log: RuleBasedTransliterator.java,v $
+ * Revision 1.22  2000/04/21 22:16:29  alan
+ * Delete variable name parsing to SymbolTable interface to consolidate parsing code.
+ *
 * Revision 1.21  2000/04/21 21:16:40  alan
 * Modify rule syntax
 *
--- a/icu4j/src/com/ibm/text/SymbolTable.java
+++ b/icu4j/src/com/ibm/text/SymbolTable.java
@ -5,21 +5,49 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/text/Attic/SymbolTable.java,v $ 
- * $Date: 2000/03/10 04:07:24 $ 
- * $Revision: 1.2 $
+ * $Date: 2000/04/21 22:16:29 $ 
+ * $Revision: 1.3 $
 *
 *****************************************************************************************
 */
 package com.ibm.text;
+import java.text.ParsePosition;

 /**
- * An interface that maps strings to objects.
+ * An interface that maps strings to objects.  This interface defines
+ * both lookup protocol and parsing.  This allows different components
+ * to share a symbol table and to handle name parsing uniformly.  It
+ * is expected that client parse code look for the SYMBOL_REF
+ * character and, when seen, attempt to parse the characters after it
+ * using parseReference().
+ *
+ * <p>Currently, RuleBasedTransliterator and UnicodeSet use this
+ * interface to share variable definitions.
 */
 public interface SymbolTable {

+    /**
+     * The character preceding a symbol reference name.
+     */
+    final char SYMBOL_REF = '$';
+
    /**
     * Lookup the object associated with this string and return it.
     * Return <tt>null</tt> if no such name exists.
     */
    Object lookup(String s);
+
+    /**
+     * Parse a symbol reference name from the given string, starting
+     * at the given position.  If no valid symbol reference name is
+     * found, throw an exception.
+     * @param text the text to parse for the name
+     * @param pos on entry, the index of the first character to parse.
+     * This is the character following the SYMBOL_REF character.  On
+     * exit, the index after the last parsed character.
+     * @param limit the index after the last character to be parsed.
+     * @return the parsed name.
+     * @exception IllegalArgumentException if no valid name is found.
+     */
+    String parseReference(String text, ParsePosition pos, int limit);
 }
--- a/icu4j/src/com/ibm/text/UnicodeSet.java
+++ b/icu4j/src/com/ibm/text/UnicodeSet.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/text/Attic/UnicodeSet.java,v $ 
- * $Date: 2000/04/21 21:16:40 $ 
- * $Revision: 1.17 $
+ * $Date: 2000/04/21 22:16:29 $ 
+ * $Revision: 1.18 $
 *
 *****************************************************************************************
 */
@ -241,7 +241,7 @@ import java.text.*;
 * *Unsupported by Java (and hence unsupported by UnicodeSet).
 *
 * @author Alan Liu
- * @version $RCSfile: UnicodeSet.java,v $ $Revision: 1.17 $ $Date: 2000/04/21 21:16:40 $
+ * @version $RCSfile: UnicodeSet.java,v $ $Revision: 1.18 $ $Date: 2000/04/21 22:16:29 $
 */
 public class UnicodeSet implements UnicodeFilter {
    /**
@ -268,9 +268,6 @@ public class UnicodeSet implements UnicodeFilter {

    private static final int UNSUPPORTED_CATEGORY = 17;

-    private static final char VARIABLE_REF_OPEN = '{';
-    private static final char VARIABLE_REF_CLOSE = '}';
-
    private static final int CATEGORY_COUNT = 29;

    /**
@ -866,13 +863,9 @@ public class UnicodeSet implements UnicodeFilter {
             * Variable names are only parsed if varNameToChar is not null.
             * Set variables are only looked up if varCharToSet is not null.
             */
-            else if (symbols != null && !isLiteral && c == VARIABLE_REF_OPEN) {
-                ++i;
-                int j = pattern.indexOf(VARIABLE_REF_CLOSE, i);
-                if (i == j || j < 0) { // empty or unterminated
-                    throw new IllegalArgumentException("Illegal variable reference");
-                }
-                String name = pattern.substring(i, j);
+            else if (symbols != null && !isLiteral && c == SymbolTable.SYMBOL_REF) {
+                pos.setIndex(++i);
+                String name = symbols.parseReference(pattern, pos, limit);
                Object obj = symbols.lookup(name);
                if (obj == null) {
                    throw new IllegalArgumentException("Undefined variable: "
@ -884,50 +877,9 @@ public class UnicodeSet implements UnicodeFilter {
                } else {
                    nestedPairs = ((UnicodeSet) obj).pairs.toString();
                }
-                i = j; // Make i point at closing '}'
+                i = pos.getIndex()-1; // Make i point at last char of var name
            }

-            /* Parse variable references.  These are treated as literals.  If a
-             * variable refers to a UnicodeSet, nestedPairs is assigned here.
-             * Variable names are only parsed if varNameToChar is not null.
-             * Set variables are only looked up if varCharToSet is not null.
-             */
-            // TEMPORARY
-            // TEMPORARY
-            // TEMPORARY
-            else if (symbols != null && !isLiteral && c == '$') {
-                ++i;
-                c = pattern.charAt(i);
-                int j = i;
-                if (Character.isUnicodeIdentifierStart(c)) {
-                    ++j;
-                    while (j < limit &&
-                           Character.isUnicodeIdentifierPart(pattern.charAt(j))) {
-                        ++j;
-                    }
-                }
-                if (i == j || j < 0) { // empty or unterminated
-                    throw new IllegalArgumentException("Illegal variable reference " +
-                                                       pattern.substring(i-1, limit));
-                }
-                String name = pattern.substring(i, j);
-                Object obj = symbols.lookup(name);
-                if (obj == null) {
-                    throw new IllegalArgumentException("Undefined variable: "
-                                                       + name);
-                }
-                isLiteral = true;
-                if (obj instanceof Character) {
-                    c = ((Character) obj).charValue();
-                } else {
-                    nestedPairs = ((UnicodeSet) obj).pairs.toString();
-                }
-                i = j-1; // Make i point at last char of var name
-            }
-            // TEMPORARY
-            // TEMPORARY
-            // TEMPORARY
-
            /* An opening bracket indicates the first bracket of a nested
             * subpattern, either a normal pattern or a category pattern.  We
             * recognize these here and set nestedPairs accordingly.