diff --git a/icu4j/src/com/ibm/icu/text/RuleBasedTransliterator.java b/icu4j/src/com/ibm/icu/text/RuleBasedTransliterator.java index 2d19a61ae80..9bf1b97f876 100755 --- a/icu4j/src/com/ibm/icu/text/RuleBasedTransliterator.java +++ b/icu4j/src/com/ibm/icu/text/RuleBasedTransliterator.java @@ -181,9 +181,12 @@ import java.util.Vector; *
Copyright © IBM Corporation 1999. All rights reserved. * * @author Alan Liu - * @version $RCSfile: RuleBasedTransliterator.java,v $ $Revision: 1.4 $ $Date: 1999/12/22 01:05:54 $ + * @version $RCSfile: RuleBasedTransliterator.java,v $ $Revision: 1.5 $ $Date: 1999/12/22 01:40:54 $ * * $Log: RuleBasedTransliterator.java,v $ + * Revision 1.5 1999/12/22 01:40:54 Alan + * Consolidate rule pattern anteContext, key, and postContext into one string. + * * Revision 1.4 1999/12/22 01:05:54 Alan * Improve masking checking; turn it off by default, for better performance * @@ -205,7 +208,7 @@ public class RuleBasedTransliterator extends Transliterator { static final boolean DEBUG = false; - static final boolean CHECK_MASKING = false; + static final boolean CHECK_MASKING = true; private static final String COPYRIGHT = "\u00A9 IBM Corporation 1999. All rights reserved."; @@ -654,8 +657,6 @@ public class RuleBasedTransliterator extends Transliterator { if (errors != null) { throw new IllegalArgumentException(errors.toString()); } - - data.ruleSet.freeze(); } /** diff --git a/icu4j/src/com/ibm/icu/text/TransliterationRule.java b/icu4j/src/com/ibm/icu/text/TransliterationRule.java index 9b8623e539c..be2ef1c90dd 100755 --- a/icu4j/src/com/ibm/icu/text/TransliterationRule.java +++ b/icu4j/src/com/ibm/icu/text/TransliterationRule.java @@ -21,9 +21,12 @@ import java.util.Dictionary; *
Copyright © IBM Corporation 1999. All rights reserved.
*
* @author Alan Liu
- * @version $RCSfile: TransliterationRule.java,v $ $Revision: 1.3 $ $Date: 1999/12/22 01:05:54 $
+ * @version $RCSfile: TransliterationRule.java,v $ $Revision: 1.4 $ $Date: 1999/12/22 01:40:54 $
*
* $Log: TransliterationRule.java,v $
+ * Revision 1.4 1999/12/22 01:40:54 Alan
+ * Consolidate rule pattern anteContext, key, and postContext into one string.
+ *
* Revision 1.3 1999/12/22 01:05:54 Alan
* Improve masking checking; turn it off by default, for better performance
*
@@ -59,9 +62,13 @@ class TransliterationRule {
public static final int FULL_MATCH = 2;
/**
- * The string that must be matched.
+ * The string that must be matched, consisting of the anteContext, key,
+ * and postContext, concatenated together, in that order. Some components
+ * may be empty (zero length).
+ * @see anteContextLength
+ * @see keyLength
*/
- private String key;
+ private String pattern;
/**
* The string that is emitted if the key, anteContext, and postContext
@@ -70,18 +77,17 @@ class TransliterationRule {
private String output;
/**
- * The string that must match before the key. Must not be the empty string.
- * May be null; if null, then there is no matching requirement before the
- * key.
+ * The length of the string that must match before the key. If
+ * zero, then there is no matching requirement before the key.
+ * Substring [0,anteContextLength) of pattern is the anteContext.
*/
- private String anteContext;
+ private int anteContextLength;
/**
- * The string that must match after the key. Must not be the empty string.
- * May be null; if null, then there is no matching requirement after the
- * key.
+ * The length of the key. Substring [anteContextLength,
+ * anteContextLength + keyLength) is the key.
*/
- private String postContext;
+ private int keyLength;
/**
* The position of the cursor after emitting the output string, from 0 to
@@ -90,12 +96,6 @@ class TransliterationRule {
*/
private int cursorPos;
- /**
- * A string used to implement masks(). It is the concatenated anteContext,
- * key, and postContext. See freeze() method.
- */
- private String maskKey;
-
private static final String COPYRIGHT =
"\u00A9 IBM Corporation 1999. All rights reserved.";
@@ -120,28 +120,15 @@ class TransliterationRule {
public TransliterationRule(String key, String output,
String anteContext, String postContext,
int cursorPos) {
- this.key = key;
+ keyLength = key.length();
+ anteContextLength = (anteContext != null) ? anteContext.length() : 0;
+ pattern = (anteContextLength > 0 ? (anteContext + key) : key) +
+ (postContext != null ? postContext : "");
this.output = output;
- this.anteContext = (anteContext != null && anteContext.length() > 0)
- ? anteContext : null;
- this.postContext = (postContext != null && postContext.length() > 0)
- ? postContext : null;
this.cursorPos = cursorPos < 0 ? output.length() : cursorPos;
if (this.cursorPos > output.length()) {
throw new IllegalArgumentException("Illegal cursor position");
}
-
- /* The mask key is needed when we are adding individual rules to a rule
- * set, for performance. Here are the numbers: Without mask key, 13.0
- * seconds. With mask key, 6.2 seconds. However, once the rules have
- * been added to the set, then they can be discarded to free up space.
- * This is what the freeze() method does. After freeze() has been
- * called, the method masks() must NOT be called.
- */
- maskKey = anteContext != null ? (anteContext + key) : key;
- if (postContext != null) {
- maskKey += postContext;
- }
}
/**
@@ -149,15 +136,7 @@ class TransliterationRule {
* @return the length of the match key.
*/
public int getKeyLength() {
- return key.length();
- }
-
- /**
- * Return the key.
- * @return the match key.
- */
- public String getKey() {
- return key;
+ return keyLength;
}
/**
@@ -182,7 +161,7 @@ class TransliterationRule {
* getMaximumContextLength()
.
*/
public int getAnteContextLength() {
- return anteContext == null ? 0 : anteContext.length();
+ return anteContextLength;
}
/**
@@ -190,8 +169,6 @@ class TransliterationRule {
* r1 matches any input string that r2 matches. If r1 masks r2 and r2 masks
* r1 then r1 == r2. Examples: "a>x" masks "ab>y". "a>x" masks "a[b]>y".
* "[c]a>x" masks "[dc]a>y".
- *
- *
This method must not be called after freeze() is called. */ public boolean masks(TransliterationRule r2) { /* Rule r1 masks rule r2 if the string formed of the @@ -218,21 +195,12 @@ class TransliterationRule { * currently do not have. This can be added later. */ - // maskKey = anteContext + key + postContext - int left = getAnteContextLength(); - int left2 = r2.getAnteContextLength(); - int right = maskKey.length() - left; - int right2 = r2.maskKey.length() - left2; + int left = anteContextLength; + int left2 = r2.anteContextLength; + int right = pattern.length() - left; + int right2 = r2.pattern.length() - left2; return left <= left2 && right <= right2 && - r2.maskKey.substring(left2 - left).startsWith(maskKey); - } - - /** - * Free up space. Once this method is called, masks() must NOT be called. - * If it is called, an exception will be thrown. - */ - public void freeze() { - maskKey = null; + r2.pattern.substring(left2 - left).startsWith(pattern); } /** @@ -241,13 +209,15 @@ class TransliterationRule { */ public String toString() { return getClass().getName() + '{' - + escape((anteContext != null ? ("[" + anteContext + ']') : "") - + key - + (postContext != null ? ("[" + postContext + ']') : "") + + escape(anteContextLength > 0 ? ("[" + pattern.substring(0, anteContextLength) + + ']') : "") + + pattern.substring(anteContextLength, anteContextLength + keyLength) + + (anteContextLength + keyLength < pattern.length() ? + ("[" + pattern.substring(anteContextLength + keyLength) + ']') : "") + " -> " + (cursorPos < output.length() ? (output.substring(0, cursorPos) + '|' + output.substring(cursorPos)) - : output)) + : output) + '}'; } @@ -274,21 +244,14 @@ class TransliterationRule { * altered by this transliterator. If filter is * null then no filtering is applied. */ - public boolean matches(String text, int start, int limit, - StringBuffer result, int cursor, - Dictionary variables, - UnicodeFilter filter) { - return - (anteContext == null - || regionMatches(text, start, limit, result, - cursor - anteContext.length(), - anteContext, variables, filter)) && - regionMatches(text, start, limit, result, cursor, - key, variables, filter) && - (postContext == null - || regionMatches(text, start, limit, result, - cursor + key.length(), - postContext, variables, filter)); + public final boolean matches(String text, int start, int limit, + StringBuffer result, int cursor, + Dictionary variables, + UnicodeFilter filter) { + // Match anteContext, key, and postContext + return regionMatches(text, start, limit, result, + cursor - anteContextLength, + pattern, variables, filter); } /** @@ -306,18 +269,13 @@ class TransliterationRule { * altered by this transliterator. If filter is * null then no filtering is applied. */ - public boolean matches(Replaceable text, int start, int limit, - int cursor, Dictionary variables, - UnicodeFilter filter) { - return - (anteContext == null - || regionMatches(text, start, limit, cursor - anteContext.length(), - anteContext, variables, filter)) && - regionMatches(text, start, limit, cursor, - key, variables, filter) && - (postContext == null - || regionMatches(text, start, limit, cursor + key.length(), - postContext, variables, filter)); + public final boolean matches(Replaceable text, int start, int limit, + int cursor, Dictionary variables, + UnicodeFilter filter) { + // Match anteContext, key, and postContext + return regionMatches(text, start, limit, + cursor - anteContextLength, + pattern, variables, filter); } /** @@ -348,28 +306,10 @@ class TransliterationRule { public int getMatchDegree(Replaceable text, int start, int limit, int cursor, Dictionary variables, UnicodeFilter filter) { - if (anteContext != null - && !regionMatches(text, start, limit, cursor - anteContext.length(), - anteContext, variables, filter)) { - return MISMATCH; - } - int len = getRegionMatchLength(text, start, limit, cursor, - key, variables, filter); - if (len < 0) { - return MISMATCH; - } - if (len < key.length()) { - return PARTIAL_MATCH; - } - if (postContext == null) { - return FULL_MATCH; - } - len = getRegionMatchLength(text, start, limit, - cursor + key.length(), - postContext, variables, filter); - return (len < 0) ? MISMATCH - : ((len == postContext.length()) ? FULL_MATCH - : PARTIAL_MATCH); + int len = getRegionMatchLength(text, start, limit, cursor - anteContextLength, + pattern, variables, filter); + return len < anteContextLength ? MISMATCH : + (len < pattern.length() ? PARTIAL_MATCH : FULL_MATCH); } /** diff --git a/icu4j/src/com/ibm/icu/text/TransliterationRuleSet.java b/icu4j/src/com/ibm/icu/text/TransliterationRuleSet.java index de6905ea39c..ecbc8e7630d 100755 --- a/icu4j/src/com/ibm/icu/text/TransliterationRuleSet.java +++ b/icu4j/src/com/ibm/icu/text/TransliterationRuleSet.java @@ -15,9 +15,12 @@ import java.util.*; *
Copyright © IBM Corporation 1999. All rights reserved. * * @author Alan Liu - * @version $RCSfile: TransliterationRuleSet.java,v $ $Revision: 1.3 $ $Date: 1999/12/22 01:05:54 $ + * @version $RCSfile: TransliterationRuleSet.java,v $ $Revision: 1.4 $ $Date: 1999/12/22 01:40:54 $ * * $Log: TransliterationRuleSet.java,v $ + * Revision 1.4 1999/12/22 01:40:54 Alan + * Consolidate rule pattern anteContext, key, and postContext into one string. + * * Revision 1.3 1999/12/22 01:05:54 Alan * Improve masking checking; turn it off by default, for better performance * @@ -72,8 +75,6 @@ class TransliterationRuleSet { /** * Add a rule to this set. Rules are added in order, and order is * significant. - * - *
Once freeze() is called, this method must not be called.
* @param rule the rule to add
*/
public void addRule(TransliterationRule rule) {
@@ -92,16 +93,6 @@ class TransliterationRuleSet {
return (TransliterationRule) rules.elementAt(i);
}
- /**
- * Free up space. Once this method is called, the maskKey is
- * invalid.
- */
- public void freeze() {
- for (int i=0; i Copyright © IBM Corporation 1999. All rights reserved.
*
* @author Alan Liu
- * @version $RCSfile: TransliterationRule.java,v $ $Revision: 1.3 $ $Date: 1999/12/22 01:05:54 $
+ * @version $RCSfile: TransliterationRule.java,v $ $Revision: 1.4 $ $Date: 1999/12/22 01:40:54 $
*
* $Log: TransliterationRule.java,v $
+ * Revision 1.4 1999/12/22 01:40:54 Alan
+ * Consolidate rule pattern anteContext, key, and postContext into one string.
+ *
* Revision 1.3 1999/12/22 01:05:54 Alan
* Improve masking checking; turn it off by default, for better performance
*
@@ -59,9 +62,13 @@ class TransliterationRule {
public static final int FULL_MATCH = 2;
/**
- * The string that must be matched.
+ * The string that must be matched, consisting of the anteContext, key,
+ * and postContext, concatenated together, in that order. Some components
+ * may be empty (zero length).
+ * @see anteContextLength
+ * @see keyLength
*/
- private String key;
+ private String pattern;
/**
* The string that is emitted if the key, anteContext, and postContext
@@ -70,18 +77,17 @@ class TransliterationRule {
private String output;
/**
- * The string that must match before the key. Must not be the empty string.
- * May be null; if null, then there is no matching requirement before the
- * key.
+ * The length of the string that must match before the key. If
+ * zero, then there is no matching requirement before the key.
+ * Substring [0,anteContextLength) of pattern is the anteContext.
*/
- private String anteContext;
+ private int anteContextLength;
/**
- * The string that must match after the key. Must not be the empty string.
- * May be null; if null, then there is no matching requirement after the
- * key.
+ * The length of the key. Substring [anteContextLength,
+ * anteContextLength + keyLength) is the key.
*/
- private String postContext;
+ private int keyLength;
/**
* The position of the cursor after emitting the output string, from 0 to
@@ -90,12 +96,6 @@ class TransliterationRule {
*/
private int cursorPos;
- /**
- * A string used to implement masks(). It is the concatenated anteContext,
- * key, and postContext. See freeze() method.
- */
- private String maskKey;
-
private static final String COPYRIGHT =
"\u00A9 IBM Corporation 1999. All rights reserved.";
@@ -120,28 +120,15 @@ class TransliterationRule {
public TransliterationRule(String key, String output,
String anteContext, String postContext,
int cursorPos) {
- this.key = key;
+ keyLength = key.length();
+ anteContextLength = (anteContext != null) ? anteContext.length() : 0;
+ pattern = (anteContextLength > 0 ? (anteContext + key) : key) +
+ (postContext != null ? postContext : "");
this.output = output;
- this.anteContext = (anteContext != null && anteContext.length() > 0)
- ? anteContext : null;
- this.postContext = (postContext != null && postContext.length() > 0)
- ? postContext : null;
this.cursorPos = cursorPos < 0 ? output.length() : cursorPos;
if (this.cursorPos > output.length()) {
throw new IllegalArgumentException("Illegal cursor position");
}
-
- /* The mask key is needed when we are adding individual rules to a rule
- * set, for performance. Here are the numbers: Without mask key, 13.0
- * seconds. With mask key, 6.2 seconds. However, once the rules have
- * been added to the set, then they can be discarded to free up space.
- * This is what the freeze() method does. After freeze() has been
- * called, the method masks() must NOT be called.
- */
- maskKey = anteContext != null ? (anteContext + key) : key;
- if (postContext != null) {
- maskKey += postContext;
- }
}
/**
@@ -149,15 +136,7 @@ class TransliterationRule {
* @return the length of the match key.
*/
public int getKeyLength() {
- return key.length();
- }
-
- /**
- * Return the key.
- * @return the match key.
- */
- public String getKey() {
- return key;
+ return keyLength;
}
/**
@@ -182,7 +161,7 @@ class TransliterationRule {
* This method must not be called after freeze() is called.
*/
public boolean masks(TransliterationRule r2) {
/* Rule r1 masks rule r2 if the string formed of the
@@ -218,21 +195,12 @@ class TransliterationRule {
* currently do not have. This can be added later.
*/
- // maskKey = anteContext + key + postContext
- int left = getAnteContextLength();
- int left2 = r2.getAnteContextLength();
- int right = maskKey.length() - left;
- int right2 = r2.maskKey.length() - left2;
+ int left = anteContextLength;
+ int left2 = r2.anteContextLength;
+ int right = pattern.length() - left;
+ int right2 = r2.pattern.length() - left2;
return left <= left2 && right <= right2 &&
- r2.maskKey.substring(left2 - left).startsWith(maskKey);
- }
-
- /**
- * Free up space. Once this method is called, masks() must NOT be called.
- * If it is called, an exception will be thrown.
- */
- public void freeze() {
- maskKey = null;
+ r2.pattern.substring(left2 - left).startsWith(pattern);
}
/**
@@ -241,13 +209,15 @@ class TransliterationRule {
*/
public String toString() {
return getClass().getName() + '{'
- + escape((anteContext != null ? ("[" + anteContext + ']') : "")
- + key
- + (postContext != null ? ("[" + postContext + ']') : "")
+ + escape(anteContextLength > 0 ? ("[" + pattern.substring(0, anteContextLength) +
+ ']') : "")
+ + pattern.substring(anteContextLength, anteContextLength + keyLength)
+ + (anteContextLength + keyLength < pattern.length() ?
+ ("[" + pattern.substring(anteContextLength + keyLength) + ']') : "")
+ " -> "
+ (cursorPos < output.length()
? (output.substring(0, cursorPos) + '|' + output.substring(cursorPos))
- : output))
+ : output)
+ '}';
}
@@ -274,21 +244,14 @@ class TransliterationRule {
* altered by this transliterator. If filter is
* null then no filtering is applied.
*/
- public boolean matches(String text, int start, int limit,
- StringBuffer result, int cursor,
- Dictionary variables,
- UnicodeFilter filter) {
- return
- (anteContext == null
- || regionMatches(text, start, limit, result,
- cursor - anteContext.length(),
- anteContext, variables, filter)) &&
- regionMatches(text, start, limit, result, cursor,
- key, variables, filter) &&
- (postContext == null
- || regionMatches(text, start, limit, result,
- cursor + key.length(),
- postContext, variables, filter));
+ public final boolean matches(String text, int start, int limit,
+ StringBuffer result, int cursor,
+ Dictionary variables,
+ UnicodeFilter filter) {
+ // Match anteContext, key, and postContext
+ return regionMatches(text, start, limit, result,
+ cursor - anteContextLength,
+ pattern, variables, filter);
}
/**
@@ -306,18 +269,13 @@ class TransliterationRule {
* altered by this transliterator. If filter is
* null then no filtering is applied.
*/
- public boolean matches(Replaceable text, int start, int limit,
- int cursor, Dictionary variables,
- UnicodeFilter filter) {
- return
- (anteContext == null
- || regionMatches(text, start, limit, cursor - anteContext.length(),
- anteContext, variables, filter)) &&
- regionMatches(text, start, limit, cursor,
- key, variables, filter) &&
- (postContext == null
- || regionMatches(text, start, limit, cursor + key.length(),
- postContext, variables, filter));
+ public final boolean matches(Replaceable text, int start, int limit,
+ int cursor, Dictionary variables,
+ UnicodeFilter filter) {
+ // Match anteContext, key, and postContext
+ return regionMatches(text, start, limit,
+ cursor - anteContextLength,
+ pattern, variables, filter);
}
/**
@@ -348,28 +306,10 @@ class TransliterationRule {
public int getMatchDegree(Replaceable text, int start, int limit,
int cursor, Dictionary variables,
UnicodeFilter filter) {
- if (anteContext != null
- && !regionMatches(text, start, limit, cursor - anteContext.length(),
- anteContext, variables, filter)) {
- return MISMATCH;
- }
- int len = getRegionMatchLength(text, start, limit, cursor,
- key, variables, filter);
- if (len < 0) {
- return MISMATCH;
- }
- if (len < key.length()) {
- return PARTIAL_MATCH;
- }
- if (postContext == null) {
- return FULL_MATCH;
- }
- len = getRegionMatchLength(text, start, limit,
- cursor + key.length(),
- postContext, variables, filter);
- return (len < 0) ? MISMATCH
- : ((len == postContext.length()) ? FULL_MATCH
- : PARTIAL_MATCH);
+ int len = getRegionMatchLength(text, start, limit, cursor - anteContextLength,
+ pattern, variables, filter);
+ return len < anteContextLength ? MISMATCH :
+ (len < pattern.length() ? PARTIAL_MATCH : FULL_MATCH);
}
/**
diff --git a/icu4j/src/com/ibm/text/TransliterationRuleSet.java b/icu4j/src/com/ibm/text/TransliterationRuleSet.java
index de6905ea39c..ecbc8e7630d 100755
--- a/icu4j/src/com/ibm/text/TransliterationRuleSet.java
+++ b/icu4j/src/com/ibm/text/TransliterationRuleSet.java
@@ -15,9 +15,12 @@ import java.util.*;
* Copyright © IBM Corporation 1999. All rights reserved.
*
* @author Alan Liu
- * @version $RCSfile: TransliterationRuleSet.java,v $ $Revision: 1.3 $ $Date: 1999/12/22 01:05:54 $
+ * @version $RCSfile: TransliterationRuleSet.java,v $ $Revision: 1.4 $ $Date: 1999/12/22 01:40:54 $
*
* $Log: TransliterationRuleSet.java,v $
+ * Revision 1.4 1999/12/22 01:40:54 Alan
+ * Consolidate rule pattern anteContext, key, and postContext into one string.
+ *
* Revision 1.3 1999/12/22 01:05:54 Alan
* Improve masking checking; turn it off by default, for better performance
*
@@ -72,8 +75,6 @@ class TransliterationRuleSet {
/**
* Add a rule to this set. Rules are added in order, and order is
* significant.
- *
- * Once freeze() is called, this method must not be called.
* @param rule the rule to add
*/
public void addRule(TransliterationRule rule) {
@@ -92,16 +93,6 @@ class TransliterationRuleSet {
return (TransliterationRule) rules.elementAt(i);
}
- /**
- * Free up space. Once this method is called, the maskKey is
- * invalid.
- */
- public void freeze() {
- for (int i=0; igetMaximumContextLength()
.
*/
public int getAnteContextLength() {
- return anteContext == null ? 0 : anteContext.length();
+ return anteContextLength;
}
/**
@@ -190,8 +169,6 @@ class TransliterationRule {
* r1 matches any input string that r2 matches. If r1 masks r2 and r2 masks
* r1 then r1 == r2. Examples: "a>x" masks "ab>y". "a>x" masks "a[b]>y".
* "[c]a>x" masks "[dc]a>y".
- *
- *