Delete variable name parsing to SymbolTable interface to consolidate parsing code.

X-SVN-Rev: 1212
This commit is contained in:
Alan Liu 2000-04-21 22:16:29 +00:00
parent ffaec2e342
commit aa61987272
6 changed files with 168 additions and 150 deletions

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/RuleBasedTransliterator.java,v $
* $Date: 2000/04/21 21:16:40 $
* $Revision: 1.21 $
* $Date: 2000/04/21 22:16:29 $
* $Revision: 1.22 $
*
*****************************************************************************************
*/
@ -274,7 +274,7 @@ import com.ibm.util.Utility;
* <p>Copyright (c) IBM Corporation 1999-2000. All rights reserved.</p>
*
* @author Alan Liu
* @version $RCSfile: RuleBasedTransliterator.java,v $ $Revision: 1.21 $ $Date: 2000/04/21 21:16:40 $
* @version $RCSfile: RuleBasedTransliterator.java,v $ $Revision: 1.22 $ $Date: 2000/04/21 22:16:29 $
*/
public class RuleBasedTransliterator extends Transliterator {
@ -534,6 +534,31 @@ public class RuleBasedTransliterator extends Transliterator {
}
return ch;
}
/**
* Implement SymbolTable API. Parse out a symbol reference
* name.
*/
public String parseReference(String text, ParsePosition pos, int limit) {
int start = pos.getIndex();
int i = start;
if (i < limit) {
char c = text.charAt(i);
if (Character.isUnicodeIdentifierStart(c)) {
++i;
while (i < limit &&
Character.isUnicodeIdentifierPart(text.charAt(i))) {
++i;
}
}
}
if (i == start) { // No valid name chars
throw new IllegalArgumentException("Illegal variable reference " +
text.substring(start, limit));
}
pos.setIndex(i);
return text.substring(start, i);
}
}
/**
@ -587,7 +612,6 @@ public class RuleBasedTransliterator extends Transliterator {
private static final char END_OF_RULE = ';';
private static final char RULE_COMMENT_CHAR = '#';
private static final char VARIABLE_REF = '$'; // also segment refs
private static final char CONTEXT_ANTE = '{'; // ante{key
private static final char CONTEXT_POST = '}'; // key}post
private static final char SET_OPEN = '[';
@ -711,6 +735,7 @@ public class RuleBasedTransliterator extends Transliterator {
RuleBasedTransliterator.Parser parser) {
int start = pos;
StringBuffer buf = new StringBuffer();
ParsePosition pp = null;
main:
while (pos < limit) {
@ -781,7 +806,7 @@ public class RuleBasedTransliterator extends Transliterator {
case END_OF_RULE:
--pos; // Backup to point to END_OF_RULE
break main;
case VARIABLE_REF:
case SymbolTable.SYMBOL_REF:
// Handle variable references and segment references "$1" .. "$9"
{
// A variable reference must be followed immediately
@ -792,28 +817,26 @@ public class RuleBasedTransliterator extends Transliterator {
syntaxError("Trailing " + c, rule, start);
}
// Parse "$1" "$2" .. "$9"
c = rule.charAt(pos++);
c = rule.charAt(pos);
int r = Character.digit(c, 10);
if (r >= 1 && r <= 9) {
if (r > maxRef) {
maxRef = r;
}
buf.append((char) (parser.data.segmentBase + r - 1));
} else if (Character.isUnicodeIdentifierStart(c)) {
int j = pos;
while (j < limit &&
Character.isUnicodeIdentifierPart(rule.charAt(j))) {
++j;
++pos;
} else {
if (pp == null) { // Lazy create
pp = new ParsePosition(0);
}
String name = rule.substring(pos-1, j);
pos = j;
pp.setIndex(pos);
String name = parser.parseData.
parseReference(rule, pp, limit);
pos = pp.getIndex();
// If this is a variable definition statement, then the LHS
// variable will be undefined. In that case getVariableName()
// will return the special placeholder variableLimit-1.
buf.append(parser.getVariableDef(name));
} else {
syntaxError("Illegal char after " + VARIABLE_REF,
rule, start);
}
}
break;
@ -830,7 +853,10 @@ public class RuleBasedTransliterator extends Transliterator {
post = buf.length();
break;
case SET_OPEN:
ParsePosition pp = new ParsePosition(pos-1); // Backup to opening '['
if (pp == null) {
pp = new ParsePosition(0);
}
pp.setIndex(pos-1); // Backup to opening '['
buf.append(parser.registerSet(new UnicodeSet(rule, pp, parser.parseData)));
pos = pp.getIndex();
break;
@ -1231,6 +1257,9 @@ public class RuleBasedTransliterator extends Transliterator {
/**
* $Log: RuleBasedTransliterator.java,v $
* Revision 1.22 2000/04/21 22:16:29 alan
* Delete variable name parsing to SymbolTable interface to consolidate parsing code.
*
* Revision 1.21 2000/04/21 21:16:40 alan
* Modify rule syntax
*

View file

@ -5,21 +5,49 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/SymbolTable.java,v $
* $Date: 2000/03/10 04:07:24 $
* $Revision: 1.2 $
* $Date: 2000/04/21 22:16:29 $
* $Revision: 1.3 $
*
*****************************************************************************************
*/
package com.ibm.text;
import java.text.ParsePosition;
/**
* An interface that maps strings to objects.
* An interface that maps strings to objects. This interface defines
* both lookup protocol and parsing. This allows different components
* to share a symbol table and to handle name parsing uniformly. It
* is expected that client parse code look for the SYMBOL_REF
* character and, when seen, attempt to parse the characters after it
* using parseReference().
*
* <p>Currently, RuleBasedTransliterator and UnicodeSet use this
* interface to share variable definitions.
*/
public interface SymbolTable {
/**
* The character preceding a symbol reference name.
*/
final char SYMBOL_REF = '$';
/**
* Lookup the object associated with this string and return it.
* Return <tt>null</tt> if no such name exists.
*/
Object lookup(String s);
/**
* Parse a symbol reference name from the given string, starting
* at the given position. If no valid symbol reference name is
* found, throw an exception.
* @param text the text to parse for the name
* @param pos on entry, the index of the first character to parse.
* This is the character following the SYMBOL_REF character. On
* exit, the index after the last parsed character.
* @param limit the index after the last character to be parsed.
* @return the parsed name.
* @exception IllegalArgumentException if no valid name is found.
*/
String parseReference(String text, ParsePosition pos, int limit);
}

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/UnicodeSet.java,v $
* $Date: 2000/04/21 21:16:40 $
* $Revision: 1.17 $
* $Date: 2000/04/21 22:16:29 $
* $Revision: 1.18 $
*
*****************************************************************************************
*/
@ -241,7 +241,7 @@ import java.text.*;
* *Unsupported by Java (and hence unsupported by UnicodeSet).
*
* @author Alan Liu
* @version $RCSfile: UnicodeSet.java,v $ $Revision: 1.17 $ $Date: 2000/04/21 21:16:40 $
* @version $RCSfile: UnicodeSet.java,v $ $Revision: 1.18 $ $Date: 2000/04/21 22:16:29 $
*/
public class UnicodeSet implements UnicodeFilter {
/**
@ -268,9 +268,6 @@ public class UnicodeSet implements UnicodeFilter {
private static final int UNSUPPORTED_CATEGORY = 17;
private static final char VARIABLE_REF_OPEN = '{';
private static final char VARIABLE_REF_CLOSE = '}';
private static final int CATEGORY_COUNT = 29;
/**
@ -866,13 +863,9 @@ public class UnicodeSet implements UnicodeFilter {
* Variable names are only parsed if varNameToChar is not null.
* Set variables are only looked up if varCharToSet is not null.
*/
else if (symbols != null && !isLiteral && c == VARIABLE_REF_OPEN) {
++i;
int j = pattern.indexOf(VARIABLE_REF_CLOSE, i);
if (i == j || j < 0) { // empty or unterminated
throw new IllegalArgumentException("Illegal variable reference");
}
String name = pattern.substring(i, j);
else if (symbols != null && !isLiteral && c == SymbolTable.SYMBOL_REF) {
pos.setIndex(++i);
String name = symbols.parseReference(pattern, pos, limit);
Object obj = symbols.lookup(name);
if (obj == null) {
throw new IllegalArgumentException("Undefined variable: "
@ -884,50 +877,9 @@ public class UnicodeSet implements UnicodeFilter {
} else {
nestedPairs = ((UnicodeSet) obj).pairs.toString();
}
i = j; // Make i point at closing '}'
i = pos.getIndex()-1; // Make i point at last char of var name
}
/* Parse variable references. These are treated as literals. If a
* variable refers to a UnicodeSet, nestedPairs is assigned here.
* Variable names are only parsed if varNameToChar is not null.
* Set variables are only looked up if varCharToSet is not null.
*/
// TEMPORARY
// TEMPORARY
// TEMPORARY
else if (symbols != null && !isLiteral && c == '$') {
++i;
c = pattern.charAt(i);
int j = i;
if (Character.isUnicodeIdentifierStart(c)) {
++j;
while (j < limit &&
Character.isUnicodeIdentifierPart(pattern.charAt(j))) {
++j;
}
}
if (i == j || j < 0) { // empty or unterminated
throw new IllegalArgumentException("Illegal variable reference " +
pattern.substring(i-1, limit));
}
String name = pattern.substring(i, j);
Object obj = symbols.lookup(name);
if (obj == null) {
throw new IllegalArgumentException("Undefined variable: "
+ name);
}
isLiteral = true;
if (obj instanceof Character) {
c = ((Character) obj).charValue();
} else {
nestedPairs = ((UnicodeSet) obj).pairs.toString();
}
i = j-1; // Make i point at last char of var name
}
// TEMPORARY
// TEMPORARY
// TEMPORARY
/* An opening bracket indicates the first bracket of a nested
* subpattern, either a normal pattern or a category pattern. We
* recognize these here and set nestedPairs accordingly.

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/text/Attic/RuleBasedTransliterator.java,v $
* $Date: 2000/04/21 21:16:40 $
* $Revision: 1.21 $
* $Date: 2000/04/21 22:16:29 $
* $Revision: 1.22 $
*
*****************************************************************************************
*/
@ -274,7 +274,7 @@ import com.ibm.util.Utility;
* <p>Copyright (c) IBM Corporation 1999-2000. All rights reserved.</p>
*
* @author Alan Liu
* @version $RCSfile: RuleBasedTransliterator.java,v $ $Revision: 1.21 $ $Date: 2000/04/21 21:16:40 $
* @version $RCSfile: RuleBasedTransliterator.java,v $ $Revision: 1.22 $ $Date: 2000/04/21 22:16:29 $
*/
public class RuleBasedTransliterator extends Transliterator {
@ -534,6 +534,31 @@ public class RuleBasedTransliterator extends Transliterator {
}
return ch;
}
/**
* Implement SymbolTable API. Parse out a symbol reference
* name.
*/
public String parseReference(String text, ParsePosition pos, int limit) {
int start = pos.getIndex();
int i = start;
if (i < limit) {
char c = text.charAt(i);
if (Character.isUnicodeIdentifierStart(c)) {
++i;
while (i < limit &&
Character.isUnicodeIdentifierPart(text.charAt(i))) {
++i;
}
}
}
if (i == start) { // No valid name chars
throw new IllegalArgumentException("Illegal variable reference " +
text.substring(start, limit));
}
pos.setIndex(i);
return text.substring(start, i);
}
}
/**
@ -587,7 +612,6 @@ public class RuleBasedTransliterator extends Transliterator {
private static final char END_OF_RULE = ';';
private static final char RULE_COMMENT_CHAR = '#';
private static final char VARIABLE_REF = '$'; // also segment refs
private static final char CONTEXT_ANTE = '{'; // ante{key
private static final char CONTEXT_POST = '}'; // key}post
private static final char SET_OPEN = '[';
@ -711,6 +735,7 @@ public class RuleBasedTransliterator extends Transliterator {
RuleBasedTransliterator.Parser parser) {
int start = pos;
StringBuffer buf = new StringBuffer();
ParsePosition pp = null;
main:
while (pos < limit) {
@ -781,7 +806,7 @@ public class RuleBasedTransliterator extends Transliterator {
case END_OF_RULE:
--pos; // Backup to point to END_OF_RULE
break main;
case VARIABLE_REF:
case SymbolTable.SYMBOL_REF:
// Handle variable references and segment references "$1" .. "$9"
{
// A variable reference must be followed immediately
@ -792,28 +817,26 @@ public class RuleBasedTransliterator extends Transliterator {
syntaxError("Trailing " + c, rule, start);
}
// Parse "$1" "$2" .. "$9"
c = rule.charAt(pos++);
c = rule.charAt(pos);
int r = Character.digit(c, 10);
if (r >= 1 && r <= 9) {
if (r > maxRef) {
maxRef = r;
}
buf.append((char) (parser.data.segmentBase + r - 1));
} else if (Character.isUnicodeIdentifierStart(c)) {
int j = pos;
while (j < limit &&
Character.isUnicodeIdentifierPart(rule.charAt(j))) {
++j;
++pos;
} else {
if (pp == null) { // Lazy create
pp = new ParsePosition(0);
}
String name = rule.substring(pos-1, j);
pos = j;
pp.setIndex(pos);
String name = parser.parseData.
parseReference(rule, pp, limit);
pos = pp.getIndex();
// If this is a variable definition statement, then the LHS
// variable will be undefined. In that case getVariableName()
// will return the special placeholder variableLimit-1.
buf.append(parser.getVariableDef(name));
} else {
syntaxError("Illegal char after " + VARIABLE_REF,
rule, start);
}
}
break;
@ -830,7 +853,10 @@ public class RuleBasedTransliterator extends Transliterator {
post = buf.length();
break;
case SET_OPEN:
ParsePosition pp = new ParsePosition(pos-1); // Backup to opening '['
if (pp == null) {
pp = new ParsePosition(0);
}
pp.setIndex(pos-1); // Backup to opening '['
buf.append(parser.registerSet(new UnicodeSet(rule, pp, parser.parseData)));
pos = pp.getIndex();
break;
@ -1231,6 +1257,9 @@ public class RuleBasedTransliterator extends Transliterator {
/**
* $Log: RuleBasedTransliterator.java,v $
* Revision 1.22 2000/04/21 22:16:29 alan
* Delete variable name parsing to SymbolTable interface to consolidate parsing code.
*
* Revision 1.21 2000/04/21 21:16:40 alan
* Modify rule syntax
*

View file

@ -5,21 +5,49 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/text/Attic/SymbolTable.java,v $
* $Date: 2000/03/10 04:07:24 $
* $Revision: 1.2 $
* $Date: 2000/04/21 22:16:29 $
* $Revision: 1.3 $
*
*****************************************************************************************
*/
package com.ibm.text;
import java.text.ParsePosition;
/**
* An interface that maps strings to objects.
* An interface that maps strings to objects. This interface defines
* both lookup protocol and parsing. This allows different components
* to share a symbol table and to handle name parsing uniformly. It
* is expected that client parse code look for the SYMBOL_REF
* character and, when seen, attempt to parse the characters after it
* using parseReference().
*
* <p>Currently, RuleBasedTransliterator and UnicodeSet use this
* interface to share variable definitions.
*/
public interface SymbolTable {
/**
* The character preceding a symbol reference name.
*/
final char SYMBOL_REF = '$';
/**
* Lookup the object associated with this string and return it.
* Return <tt>null</tt> if no such name exists.
*/
Object lookup(String s);
/**
* Parse a symbol reference name from the given string, starting
* at the given position. If no valid symbol reference name is
* found, throw an exception.
* @param text the text to parse for the name
* @param pos on entry, the index of the first character to parse.
* This is the character following the SYMBOL_REF character. On
* exit, the index after the last parsed character.
* @param limit the index after the last character to be parsed.
* @return the parsed name.
* @exception IllegalArgumentException if no valid name is found.
*/
String parseReference(String text, ParsePosition pos, int limit);
}

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/text/Attic/UnicodeSet.java,v $
* $Date: 2000/04/21 21:16:40 $
* $Revision: 1.17 $
* $Date: 2000/04/21 22:16:29 $
* $Revision: 1.18 $
*
*****************************************************************************************
*/
@ -241,7 +241,7 @@ import java.text.*;
* *Unsupported by Java (and hence unsupported by UnicodeSet).
*
* @author Alan Liu
* @version $RCSfile: UnicodeSet.java,v $ $Revision: 1.17 $ $Date: 2000/04/21 21:16:40 $
* @version $RCSfile: UnicodeSet.java,v $ $Revision: 1.18 $ $Date: 2000/04/21 22:16:29 $
*/
public class UnicodeSet implements UnicodeFilter {
/**
@ -268,9 +268,6 @@ public class UnicodeSet implements UnicodeFilter {
private static final int UNSUPPORTED_CATEGORY = 17;
private static final char VARIABLE_REF_OPEN = '{';
private static final char VARIABLE_REF_CLOSE = '}';
private static final int CATEGORY_COUNT = 29;
/**
@ -866,13 +863,9 @@ public class UnicodeSet implements UnicodeFilter {
* Variable names are only parsed if varNameToChar is not null.
* Set variables are only looked up if varCharToSet is not null.
*/
else if (symbols != null && !isLiteral && c == VARIABLE_REF_OPEN) {
++i;
int j = pattern.indexOf(VARIABLE_REF_CLOSE, i);
if (i == j || j < 0) { // empty or unterminated
throw new IllegalArgumentException("Illegal variable reference");
}
String name = pattern.substring(i, j);
else if (symbols != null && !isLiteral && c == SymbolTable.SYMBOL_REF) {
pos.setIndex(++i);
String name = symbols.parseReference(pattern, pos, limit);
Object obj = symbols.lookup(name);
if (obj == null) {
throw new IllegalArgumentException("Undefined variable: "
@ -884,50 +877,9 @@ public class UnicodeSet implements UnicodeFilter {
} else {
nestedPairs = ((UnicodeSet) obj).pairs.toString();
}
i = j; // Make i point at closing '}'
i = pos.getIndex()-1; // Make i point at last char of var name
}
/* Parse variable references. These are treated as literals. If a
* variable refers to a UnicodeSet, nestedPairs is assigned here.
* Variable names are only parsed if varNameToChar is not null.
* Set variables are only looked up if varCharToSet is not null.
*/
// TEMPORARY
// TEMPORARY
// TEMPORARY
else if (symbols != null && !isLiteral && c == '$') {
++i;
c = pattern.charAt(i);
int j = i;
if (Character.isUnicodeIdentifierStart(c)) {
++j;
while (j < limit &&
Character.isUnicodeIdentifierPart(pattern.charAt(j))) {
++j;
}
}
if (i == j || j < 0) { // empty or unterminated
throw new IllegalArgumentException("Illegal variable reference " +
pattern.substring(i-1, limit));
}
String name = pattern.substring(i, j);
Object obj = symbols.lookup(name);
if (obj == null) {
throw new IllegalArgumentException("Undefined variable: "
+ name);
}
isLiteral = true;
if (obj instanceof Character) {
c = ((Character) obj).charValue();
} else {
nestedPairs = ((UnicodeSet) obj).pairs.toString();
}
i = j-1; // Make i point at last char of var name
}
// TEMPORARY
// TEMPORARY
// TEMPORARY
/* An opening bracket indicates the first bracket of a nested
* subpattern, either a normal pattern or a category pattern. We
* recognize these here and set nestedPairs accordingly.