From 7a49adef39a9ac65d16d46b3c03cd59b3921b1f0 Mon Sep 17 00:00:00 2001
From: Alan Liu Character.isWhitespace()
, is ignored.
- * If the first non-blank character on a line is '#', the entire
- * line is ignored as a comment.
Character.isWhitespace()
,
+ * is ignored. If the first non-blank character on a line is '#',
+ * the entire line is ignored as a comment.
*
* Each set of rules consists of two groups, one forward, and one * reverse. This is a convention that is not enforced; rules for one * direction may be omitted, with the result that translations in - * that direction will not modify the source text. Alternatively, + * that direction will not modify the source text. In addition, * bidirectional forward-reverse rules may be specified for * symmetrical transformations.
* @@ -39,69 +39,27 @@ import com.ibm.util.Utility; *Rule statements take one of the following forms:
* *$alefmadda=\u0622
$alefmadda=\u0622;
$alefmadda
", will be replaced by
- * the Unicode character U+0622. The right hand side must be
- * exactly one character long (current limitation).$softvowel=[eiyEIY]
[abc] |
- * The set containing the - * characters 'a', 'b', and 'c'. | - *
[^abc] |
- * The set of all characters except - * 'a', 'b', and 'c'. | - *
[A-Z] |
- * The set of all characters from - * 'A' to 'Z' in Unicode order. | - *
[:Lu:] |
- * The set of Unicode uppercase - * letters. See www.unicode.org - * for a complete list of categories and their - * two-letter codes. | - *
[^a-z[:Lu:][:Ll:]] |
- * The set of all characters except - * 'a' through 'z' and uppercase or lowercase - * letters. | - *
Patterns may contain variable references, such as
- * "$a=[a-z];$not_a=[^$a]
". See
- * {@link UnicodeSet} for more documentation and examples.
ai>$alefmadda
$empty=;
").
+ * The right hand side may contain embedded UnicodeSet
+ * patterns, for example, "$softvowel=[eiyEIY]
".
+ * ai>$alefmadda;
ai<$alefmadda
ai<$alefmadda;
ai<>$alefmadda
ai<>$alefmadda;
In addition to being defined in variables, UnicodeSet
- * patterns may be embedded directly into rule strings. Thus, the
- * following two rules are equivalent:
UnicodeSet
+ * + *UnicodeSet
patterns may appear anywhere that
+ * makes sense. They may appear in variable definitions.
+ * Contrariwise, UnicodeSet
patterns may themselves
+ * contain variable references, such as "$a=[a-z];$not_a=[^$a]
",
+ * or "$range=a-z;$ll=[$range]
".
UnicodeSet
patterns may also be embedded directly
+ * into rule strings. Thus, the following two rules are equivalent:
** + **
$vowel=[aeiou]; $vowel>'*'; # One way to do this
@@ -162,6 +127,8 @@ import com.ibm.util.Utility; * Another way
See {@link UnicodeSet} for more documentation and examples.
+ * *Segments
* *Segments of the input string can be matched and copied to the @@ -169,7 +136,8 @@ import com.ibm.util.Utility; * general, and makes reordering possible. For example:
* *- *@@ -284,7 +252,7 @@ import com.ibm.util.Utility; *
([a-z]) > $1 $1; # + *
*
([a-z]) > $1 $1; + * # * double lowercase letters
* ([:Lu:]) ([:Ll:]) > $2 $1; # reverse order of Lu-Ll pairs
Copyright (c) IBM Corporation 1999-2000. All rights reserved.
* * @author Alan Liu - * @version $RCSfile: RuleBasedTransliterator.java,v $ $Revision: 1.26 $ $Date: 2000/04/22 01:25:10 $ + * @version $RCSfile: RuleBasedTransliterator.java,v $ $Revision: 1.27 $ $Date: 2000/04/25 01:42:58 $ */ public class RuleBasedTransliterator extends Transliterator { @@ -455,15 +423,15 @@ public class RuleBasedTransliterator extends Transliterator { public TransliterationRuleSet ruleSet; /** - * Map variable name (String) to variable (Character). A variable - * name may correspond to a single literal character, in which - * case the character is stored in this hash. It may also - * correspond to a UnicodeSet, in which case a character is - * again stored in this hash, but the character is a stand-in: it - * is an index for a secondary lookup in data.setVariables. The stand-in - * also represents the UnicodeSet in the stored rules. + * Map variable name (String) to variable (char[]). A variable name + * corresponds to zero or more characters, stored in a char[] array in + * this hash. One or more of these chars may also correspond to a + * UnicodeSet, in which case the character in the char[] in this hash is + * a stand-in: it is an index for a secondary lookup in + * data.setVariables. The stand-in also represents the UnicodeSet in + * the stored rules. */ - public Hashtable variableNames; + private Hashtable variableNames; /** * Map category variable (Character) to set (UnicodeSet). @@ -474,30 +442,30 @@ public class RuleBasedTransliterator extends Transliterator { * stored in the rule text to represent the set of characters. * setVariables[i] represents character (setVariablesBase + i). */ - public UnicodeSet[] setVariables; + private UnicodeSet[] setVariables; /** * The character that represents setVariables[0]. Characters * setVariablesBase through setVariablesBase + * setVariables.length - 1 represent UnicodeSet objects. */ - public char setVariablesBase; - - /** - * Return the UnicodeSet represented by the given character, or - * null if none. - */ - public UnicodeSet lookup(char c) { - int i = c - setVariablesBase; - return (i >= 0 && i < setVariables.length) - ? setVariables[i] : null; - } + private char setVariablesBase; /** * The character that represents segment 1. Characters segmentBase * through segmentBase + 8 represent segments 1 through 9. */ - public char segmentBase; + private char segmentBase; + + /** + * Return the UnicodeSet represented by the given character, or + * null if none. + */ + public UnicodeSet lookupSet(char c) { + int i = c - setVariablesBase; + return (i >= 0 && i < setVariables.length) + ? setVariables[i] : null; + } /** * Return the zero-based index of the segment represented by the given @@ -531,18 +499,23 @@ public class RuleBasedTransliterator extends Transliterator { private class ParseData implements SymbolTable { /** - * Implement SymbolTable API. Lookup a variable, returning - * either a Character, a UnicodeSet, or null. + * Implement SymbolTable API. */ - public Object lookup(String name) { - Character ch = (Character) data.variableNames.get(name); - if (ch != null) { - int i = ch.charValue() - data.setVariablesBase; - if (i >= 0 && i < setVariablesVector.size()) { - return setVariablesVector.elementAt(i); - } + public char[] lookup(String name) { + return (char[]) data.variableNames.get(name); + } + + /** + * Implement SymbolTable API. + */ + public UnicodeSet lookupSet(char ch) { + // Note that we cannot use data.lookupSet() because the + // set array has not been constructed yet. + int i = ch - data.setVariablesBase; + if (i >= 0 && i < setVariablesVector.size()) { + return (UnicodeSet) setVariablesVector.elementAt(i); } - return ch; + return null; } /** @@ -869,10 +842,13 @@ public class RuleBasedTransliterator extends Transliterator { String name = parser.parseData. parseReference(rule, pp, limit); pos = pp.getIndex(); - // If this is a variable definition statement, then the LHS - // variable will be undefined. In that case getVariableName() - // will return the special placeholder variableLimit-1. - buf.append(parser.getVariableDef(name)); + // If this is a variable definition statement, + // then the LHS variable will be undefined. In + // that case appendVariableDef() will append the + // special placeholder char variableLimit-1. + + //buf.append(parser.getVariableDef(name)); + parser.appendVariableDef(name, buf); } } break; @@ -1035,11 +1011,12 @@ public class RuleBasedTransliterator extends Transliterator { if (left.text.length() != 1 || left.text.charAt(0) != variableLimit) { syntaxError("Malformed LHS", rule, start); } - if (right.text.length() != 1) { - syntaxError("Malformed RHS", rule, start); - } - data.variableNames.put(undefinedVariableName, - new Character(right.text.charAt(0))); + // We allow anything on the right, including an empty string. + int n = right.text.length(); + char[] value = new char[n]; + right.text.getChars(0, n, value, 0); + data.variableNames.put(undefinedVariableName, value); + ++variableLimit; return pos; } @@ -1157,12 +1134,12 @@ public class RuleBasedTransliterator extends Transliterator { } /** - * Returns the single character value of the given variable name. Defined - * names are recognized. + * Append the value of the given variable name to the given + * StringBuffer. * @exception IllegalArgumentException if the name is unknown. */ - private char getVariableDef(String name) { - Character ch = (Character) data.variableNames.get(name); + private void appendVariableDef(String name, StringBuffer buf) { + char[] ch = (char[]) data.variableNames.get(name); if (ch == null) { // We allow one undefined variable so that variable definition // statements work. For the first undefined variable we return @@ -1173,12 +1150,14 @@ public class RuleBasedTransliterator extends Transliterator { if (variableNext >= variableLimit) { throw new RuntimeException("Private use variables exhausted"); } - return --variableLimit; + buf.append((char) --variableLimit); + } else { + throw new IllegalArgumentException("Undefined variable $" + + name); } - throw new IllegalArgumentException("Undefined variable $" - + name); + } else { + buf.append(ch); } - return ch.charValue(); } /** @@ -1346,6 +1325,9 @@ public class RuleBasedTransliterator extends Transliterator { /** * $Log: RuleBasedTransliterator.java,v $ + * Revision 1.27 2000/04/25 01:42:58 alan + * Allow arbitrary length variable values. Clean up Data API. Update javadocs. + * * Revision 1.26 2000/04/22 01:25:10 alan * Add support for cursor positioner '@'; update javadoc * diff --git a/icu4j/src/com/ibm/icu/text/SymbolTable.java b/icu4j/src/com/ibm/icu/text/SymbolTable.java index c3f9a36f410..cf75c2334b9 100755 --- a/icu4j/src/com/ibm/icu/text/SymbolTable.java +++ b/icu4j/src/com/ibm/icu/text/SymbolTable.java @@ -5,8 +5,8 @@ ******************************************************************************* * * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/SymbolTable.java,v $ - * $Date: 2000/04/21 22:16:29 $ - * $Revision: 1.3 $ + * $Date: 2000/04/25 01:42:58 $ + * $Revision: 1.4 $ * ***************************************************************************************** */ @@ -32,10 +32,17 @@ public interface SymbolTable { final char SYMBOL_REF = '$'; /** - * Lookup the object associated with this string and return it. - * Return null if no such name exists. + * Lookup the characters associated with this string and return it. + * Return null if no such name exists. The resultant + * array may have length zero. */ - Object lookup(String s); + char[] lookup(String s); + + /** + * Lookup the UnicodeSet associated with the given character, and + * return it. Return null if not found. + */ + UnicodeSet lookupSet(char ch); /** * Parse a symbol reference name from the given string, starting diff --git a/icu4j/src/com/ibm/icu/text/TransliterationRule.java b/icu4j/src/com/ibm/icu/text/TransliterationRule.java index f476594416f..3815b33dabf 100755 --- a/icu4j/src/com/ibm/icu/text/TransliterationRule.java +++ b/icu4j/src/com/ibm/icu/text/TransliterationRule.java @@ -5,8 +5,8 @@ ******************************************************************************* * * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/TransliterationRule.java,v $ - * $Date: 2000/04/22 01:25:10 $ - * $Revision: 1.18 $ + * $Date: 2000/04/25 01:42:58 $ + * $Revision: 1.19 $ * ***************************************************************************************** */ @@ -44,7 +44,7 @@ import com.ibm.util.Utility; *Copyright © IBM Corporation 1999. All rights reserved.
*
* @author Alan Liu
- * @version $RCSfile: TransliterationRule.java,v $ $Revision: 1.18 $ $Date: 2000/04/22 01:25:10 $
+ * @version $RCSfile: TransliterationRule.java,v $ $Revision: 1.19 $ $Date: 2000/04/25 01:42:58 $
*/
class TransliterationRule {
/**
@@ -240,7 +240,7 @@ class TransliterationRule {
return -1;
}
char c = pattern.charAt(anteContextLength);
- return variables.lookup(c) == null ? (c & 0xFF) : -1;
+ return variables.lookupSet(c) == null ? (c & 0xFF) : -1;
}
/**
@@ -300,7 +300,7 @@ class TransliterationRule {
return true;
}
char c = pattern.charAt(anteContextLength);
- UnicodeSet set = variables.lookup(c);
+ UnicodeSet set = variables.lookupSet(c);
return set == null ? (c & 0xFF) == v : set.containsIndexValue(v);
}
@@ -486,13 +486,16 @@ class TransliterationRule {
UnicodeFilter filter) {
UnicodeSet set = null;
return (filter == null || filter.contains(textChar)) &&
- (((set = variables.lookup(keyChar)) == null) ?
+ (((set = variables.lookupSet(keyChar)) == null) ?
keyChar == textChar : set.contains(textChar));
}
}
/**
* $Log: TransliterationRule.java,v $
+ * Revision 1.19 2000/04/25 01:42:58 alan
+ * Allow arbitrary length variable values. Clean up Data API. Update javadocs.
+ *
* Revision 1.18 2000/04/22 01:25:10 alan
* Add support for cursor positioner '@'; update javadoc
*
diff --git a/icu4j/src/com/ibm/icu/text/UnicodeSet.java b/icu4j/src/com/ibm/icu/text/UnicodeSet.java
index b5316a4d2ae..8e67ca27ba2 100755
--- a/icu4j/src/com/ibm/icu/text/UnicodeSet.java
+++ b/icu4j/src/com/ibm/icu/text/UnicodeSet.java
@@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/UnicodeSet.java,v $
- * $Date: 2000/04/21 22:16:29 $
- * $Revision: 1.18 $
+ * $Date: 2000/04/25 01:42:58 $
+ * $Revision: 1.19 $
*
*****************************************************************************************
*/
@@ -241,7 +241,7 @@ import java.text.*;
* *Unsupported by Java (and hence unsupported by UnicodeSet).
*
* @author Alan Liu
- * @version $RCSfile: UnicodeSet.java,v $ $Revision: 1.18 $ $Date: 2000/04/21 22:16:29 $
+ * @version $RCSfile: UnicodeSet.java,v $ $Revision: 1.19 $ $Date: 2000/04/25 01:42:58 $
*/
public class UnicodeSet implements UnicodeFilter {
/**
@@ -774,7 +774,13 @@ public class UnicodeSet implements UnicodeFilter {
int start = pos.getIndex();
int i = start;
int limit = pattern.length();
- for (; iCharacter.isWhitespace()
, is ignored.
- * If the first non-blank character on a line is '#', the entire
- * line is ignored as a comment.
Character.isWhitespace()
,
+ * is ignored. If the first non-blank character on a line is '#',
+ * the entire line is ignored as a comment.
*
* Each set of rules consists of two groups, one forward, and one * reverse. This is a convention that is not enforced; rules for one * direction may be omitted, with the result that translations in - * that direction will not modify the source text. Alternatively, + * that direction will not modify the source text. In addition, * bidirectional forward-reverse rules may be specified for * symmetrical transformations.
* @@ -39,69 +39,27 @@ import com.ibm.util.Utility; *Rule statements take one of the following forms:
* *$alefmadda=\u0622
$alefmadda=\u0622;
$alefmadda
", will be replaced by
- * the Unicode character U+0622. The right hand side must be
- * exactly one character long (current limitation).$softvowel=[eiyEIY]
[abc] |
- * The set containing the - * characters 'a', 'b', and 'c'. | - *
[^abc] |
- * The set of all characters except - * 'a', 'b', and 'c'. | - *
[A-Z] |
- * The set of all characters from - * 'A' to 'Z' in Unicode order. | - *
[:Lu:] |
- * The set of Unicode uppercase - * letters. See www.unicode.org - * for a complete list of categories and their - * two-letter codes. | - *
[^a-z[:Lu:][:Ll:]] |
- * The set of all characters except - * 'a' through 'z' and uppercase or lowercase - * letters. | - *
Patterns may contain variable references, such as
- * "$a=[a-z];$not_a=[^$a]
". See
- * {@link UnicodeSet} for more documentation and examples.
ai>$alefmadda
$empty=;
").
+ * The right hand side may contain embedded UnicodeSet
+ * patterns, for example, "$softvowel=[eiyEIY]
".ai>$alefmadda;
ai<$alefmadda
ai<$alefmadda;
ai<>$alefmadda
ai<>$alefmadda;
In addition to being defined in variables, UnicodeSet
- * patterns may be embedded directly into rule strings. Thus, the
- * following two rules are equivalent:
UnicodeSet
+ * + *UnicodeSet
patterns may appear anywhere that
+ * makes sense. They may appear in variable definitions.
+ * Contrariwise, UnicodeSet
patterns may themselves
+ * contain variable references, such as "$a=[a-z];$not_a=[^$a]
",
+ * or "$range=a-z;$ll=[$range]
".
UnicodeSet
patterns may also be embedded directly
+ * into rule strings. Thus, the following two rules are equivalent:
** + **
$vowel=[aeiou]; $vowel>'*'; # One way to do this
@@ -162,6 +127,8 @@ import com.ibm.util.Utility; * Another way
See {@link UnicodeSet} for more documentation and examples.
+ * *Segments
* *Segments of the input string can be matched and copied to the @@ -169,7 +136,8 @@ import com.ibm.util.Utility; * general, and makes reordering possible. For example:
* *- *@@ -284,7 +252,7 @@ import com.ibm.util.Utility; *
([a-z]) > $1 $1; # + *
*
([a-z]) > $1 $1; + * # * double lowercase letters
* ([:Lu:]) ([:Ll:]) > $2 $1; # reverse order of Lu-Ll pairs
Copyright (c) IBM Corporation 1999-2000. All rights reserved.
* * @author Alan Liu - * @version $RCSfile: RuleBasedTransliterator.java,v $ $Revision: 1.26 $ $Date: 2000/04/22 01:25:10 $ + * @version $RCSfile: RuleBasedTransliterator.java,v $ $Revision: 1.27 $ $Date: 2000/04/25 01:42:58 $ */ public class RuleBasedTransliterator extends Transliterator { @@ -455,15 +423,15 @@ public class RuleBasedTransliterator extends Transliterator { public TransliterationRuleSet ruleSet; /** - * Map variable name (String) to variable (Character). A variable - * name may correspond to a single literal character, in which - * case the character is stored in this hash. It may also - * correspond to a UnicodeSet, in which case a character is - * again stored in this hash, but the character is a stand-in: it - * is an index for a secondary lookup in data.setVariables. The stand-in - * also represents the UnicodeSet in the stored rules. + * Map variable name (String) to variable (char[]). A variable name + * corresponds to zero or more characters, stored in a char[] array in + * this hash. One or more of these chars may also correspond to a + * UnicodeSet, in which case the character in the char[] in this hash is + * a stand-in: it is an index for a secondary lookup in + * data.setVariables. The stand-in also represents the UnicodeSet in + * the stored rules. */ - public Hashtable variableNames; + private Hashtable variableNames; /** * Map category variable (Character) to set (UnicodeSet). @@ -474,30 +442,30 @@ public class RuleBasedTransliterator extends Transliterator { * stored in the rule text to represent the set of characters. * setVariables[i] represents character (setVariablesBase + i). */ - public UnicodeSet[] setVariables; + private UnicodeSet[] setVariables; /** * The character that represents setVariables[0]. Characters * setVariablesBase through setVariablesBase + * setVariables.length - 1 represent UnicodeSet objects. */ - public char setVariablesBase; - - /** - * Return the UnicodeSet represented by the given character, or - * null if none. - */ - public UnicodeSet lookup(char c) { - int i = c - setVariablesBase; - return (i >= 0 && i < setVariables.length) - ? setVariables[i] : null; - } + private char setVariablesBase; /** * The character that represents segment 1. Characters segmentBase * through segmentBase + 8 represent segments 1 through 9. */ - public char segmentBase; + private char segmentBase; + + /** + * Return the UnicodeSet represented by the given character, or + * null if none. + */ + public UnicodeSet lookupSet(char c) { + int i = c - setVariablesBase; + return (i >= 0 && i < setVariables.length) + ? setVariables[i] : null; + } /** * Return the zero-based index of the segment represented by the given @@ -531,18 +499,23 @@ public class RuleBasedTransliterator extends Transliterator { private class ParseData implements SymbolTable { /** - * Implement SymbolTable API. Lookup a variable, returning - * either a Character, a UnicodeSet, or null. + * Implement SymbolTable API. */ - public Object lookup(String name) { - Character ch = (Character) data.variableNames.get(name); - if (ch != null) { - int i = ch.charValue() - data.setVariablesBase; - if (i >= 0 && i < setVariablesVector.size()) { - return setVariablesVector.elementAt(i); - } + public char[] lookup(String name) { + return (char[]) data.variableNames.get(name); + } + + /** + * Implement SymbolTable API. + */ + public UnicodeSet lookupSet(char ch) { + // Note that we cannot use data.lookupSet() because the + // set array has not been constructed yet. + int i = ch - data.setVariablesBase; + if (i >= 0 && i < setVariablesVector.size()) { + return (UnicodeSet) setVariablesVector.elementAt(i); } - return ch; + return null; } /** @@ -869,10 +842,13 @@ public class RuleBasedTransliterator extends Transliterator { String name = parser.parseData. parseReference(rule, pp, limit); pos = pp.getIndex(); - // If this is a variable definition statement, then the LHS - // variable will be undefined. In that case getVariableName() - // will return the special placeholder variableLimit-1. - buf.append(parser.getVariableDef(name)); + // If this is a variable definition statement, + // then the LHS variable will be undefined. In + // that case appendVariableDef() will append the + // special placeholder char variableLimit-1. + + //buf.append(parser.getVariableDef(name)); + parser.appendVariableDef(name, buf); } } break; @@ -1035,11 +1011,12 @@ public class RuleBasedTransliterator extends Transliterator { if (left.text.length() != 1 || left.text.charAt(0) != variableLimit) { syntaxError("Malformed LHS", rule, start); } - if (right.text.length() != 1) { - syntaxError("Malformed RHS", rule, start); - } - data.variableNames.put(undefinedVariableName, - new Character(right.text.charAt(0))); + // We allow anything on the right, including an empty string. + int n = right.text.length(); + char[] value = new char[n]; + right.text.getChars(0, n, value, 0); + data.variableNames.put(undefinedVariableName, value); + ++variableLimit; return pos; } @@ -1157,12 +1134,12 @@ public class RuleBasedTransliterator extends Transliterator { } /** - * Returns the single character value of the given variable name. Defined - * names are recognized. + * Append the value of the given variable name to the given + * StringBuffer. * @exception IllegalArgumentException if the name is unknown. */ - private char getVariableDef(String name) { - Character ch = (Character) data.variableNames.get(name); + private void appendVariableDef(String name, StringBuffer buf) { + char[] ch = (char[]) data.variableNames.get(name); if (ch == null) { // We allow one undefined variable so that variable definition // statements work. For the first undefined variable we return @@ -1173,12 +1150,14 @@ public class RuleBasedTransliterator extends Transliterator { if (variableNext >= variableLimit) { throw new RuntimeException("Private use variables exhausted"); } - return --variableLimit; + buf.append((char) --variableLimit); + } else { + throw new IllegalArgumentException("Undefined variable $" + + name); } - throw new IllegalArgumentException("Undefined variable $" - + name); + } else { + buf.append(ch); } - return ch.charValue(); } /** @@ -1346,6 +1325,9 @@ public class RuleBasedTransliterator extends Transliterator { /** * $Log: RuleBasedTransliterator.java,v $ + * Revision 1.27 2000/04/25 01:42:58 alan + * Allow arbitrary length variable values. Clean up Data API. Update javadocs. + * * Revision 1.26 2000/04/22 01:25:10 alan * Add support for cursor positioner '@'; update javadoc * diff --git a/icu4j/src/com/ibm/text/SymbolTable.java b/icu4j/src/com/ibm/text/SymbolTable.java index 714bae4b66d..60487048569 100755 --- a/icu4j/src/com/ibm/text/SymbolTable.java +++ b/icu4j/src/com/ibm/text/SymbolTable.java @@ -5,8 +5,8 @@ ******************************************************************************* * * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/text/Attic/SymbolTable.java,v $ - * $Date: 2000/04/21 22:16:29 $ - * $Revision: 1.3 $ + * $Date: 2000/04/25 01:42:58 $ + * $Revision: 1.4 $ * ***************************************************************************************** */ @@ -32,10 +32,17 @@ public interface SymbolTable { final char SYMBOL_REF = '$'; /** - * Lookup the object associated with this string and return it. - * Return null if no such name exists. + * Lookup the characters associated with this string and return it. + * Return null if no such name exists. The resultant + * array may have length zero. */ - Object lookup(String s); + char[] lookup(String s); + + /** + * Lookup the UnicodeSet associated with the given character, and + * return it. Return null if not found. + */ + UnicodeSet lookupSet(char ch); /** * Parse a symbol reference name from the given string, starting diff --git a/icu4j/src/com/ibm/text/TransliterationRule.java b/icu4j/src/com/ibm/text/TransliterationRule.java index 817aa334ccb..b78cfed5e86 100755 --- a/icu4j/src/com/ibm/text/TransliterationRule.java +++ b/icu4j/src/com/ibm/text/TransliterationRule.java @@ -5,8 +5,8 @@ ******************************************************************************* * * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/text/Attic/TransliterationRule.java,v $ - * $Date: 2000/04/22 01:25:10 $ - * $Revision: 1.18 $ + * $Date: 2000/04/25 01:42:58 $ + * $Revision: 1.19 $ * ***************************************************************************************** */ @@ -44,7 +44,7 @@ import com.ibm.util.Utility; *Copyright © IBM Corporation 1999. All rights reserved.
*
* @author Alan Liu
- * @version $RCSfile: TransliterationRule.java,v $ $Revision: 1.18 $ $Date: 2000/04/22 01:25:10 $
+ * @version $RCSfile: TransliterationRule.java,v $ $Revision: 1.19 $ $Date: 2000/04/25 01:42:58 $
*/
class TransliterationRule {
/**
@@ -240,7 +240,7 @@ class TransliterationRule {
return -1;
}
char c = pattern.charAt(anteContextLength);
- return variables.lookup(c) == null ? (c & 0xFF) : -1;
+ return variables.lookupSet(c) == null ? (c & 0xFF) : -1;
}
/**
@@ -300,7 +300,7 @@ class TransliterationRule {
return true;
}
char c = pattern.charAt(anteContextLength);
- UnicodeSet set = variables.lookup(c);
+ UnicodeSet set = variables.lookupSet(c);
return set == null ? (c & 0xFF) == v : set.containsIndexValue(v);
}
@@ -486,13 +486,16 @@ class TransliterationRule {
UnicodeFilter filter) {
UnicodeSet set = null;
return (filter == null || filter.contains(textChar)) &&
- (((set = variables.lookup(keyChar)) == null) ?
+ (((set = variables.lookupSet(keyChar)) == null) ?
keyChar == textChar : set.contains(textChar));
}
}
/**
* $Log: TransliterationRule.java,v $
+ * Revision 1.19 2000/04/25 01:42:58 alan
+ * Allow arbitrary length variable values. Clean up Data API. Update javadocs.
+ *
* Revision 1.18 2000/04/22 01:25:10 alan
* Add support for cursor positioner '@'; update javadoc
*
diff --git a/icu4j/src/com/ibm/text/UnicodeSet.java b/icu4j/src/com/ibm/text/UnicodeSet.java
index 82ed7bbbe92..d0072e29702 100755
--- a/icu4j/src/com/ibm/text/UnicodeSet.java
+++ b/icu4j/src/com/ibm/text/UnicodeSet.java
@@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/text/Attic/UnicodeSet.java,v $
- * $Date: 2000/04/21 22:16:29 $
- * $Revision: 1.18 $
+ * $Date: 2000/04/25 01:42:58 $
+ * $Revision: 1.19 $
*
*****************************************************************************************
*/
@@ -241,7 +241,7 @@ import java.text.*;
* *Unsupported by Java (and hence unsupported by UnicodeSet).
*
* @author Alan Liu
- * @version $RCSfile: UnicodeSet.java,v $ $Revision: 1.18 $ $Date: 2000/04/21 22:16:29 $
+ * @version $RCSfile: UnicodeSet.java,v $ $Revision: 1.19 $ $Date: 2000/04/25 01:42:58 $
*/
public class UnicodeSet implements UnicodeFilter {
/**
@@ -774,7 +774,13 @@ public class UnicodeSet implements UnicodeFilter {
int start = pos.getIndex();
int i = start;
int limit = pattern.length();
- for (; i