mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-07 22:44:49 +00:00
ICU-64 allow ::ID blocks in rules
X-SVN-Rev: 5878
This commit is contained in:
parent
e186f84db0
commit
e39cca2e96
8 changed files with 712 additions and 102 deletions
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/translit/TransliteratorTest.java,v $
|
||||
* $Date: 2001/09/20 21:21:10 $
|
||||
* $Revision: 1.44 $
|
||||
* $Date: 2001/09/21 21:23:34 $
|
||||
* $Revision: 1.45 $
|
||||
*
|
||||
*****************************************************************************************
|
||||
*/
|
||||
|
@ -357,8 +357,8 @@ public class TransliteratorTest extends TestFmwk {
|
|||
* Compose the hex transliterators forward and reverse.
|
||||
*/
|
||||
public void TestCompoundHex() {
|
||||
Transliterator a = Transliterator.getInstance("Unicode-Hex");
|
||||
Transliterator b = Transliterator.getInstance("Hex-Unicode");
|
||||
Transliterator a = Transliterator.getInstance("Any-Hex");
|
||||
Transliterator b = Transliterator.getInstance("Hex-Any");
|
||||
Transliterator[] trans = { a, b };
|
||||
Transliterator ab = new CompoundTransliterator(trans);
|
||||
|
||||
|
@ -379,7 +379,7 @@ public class TransliteratorTest extends TestFmwk {
|
|||
* Do some basic tests of filtering.
|
||||
*/
|
||||
public void TestFiltering() {
|
||||
Transliterator hex = Transliterator.getInstance("Unicode-Hex");
|
||||
Transliterator hex = Transliterator.getInstance("Any-Hex");
|
||||
hex.setFilter(new UnicodeFilter() {
|
||||
public boolean contains(char c) {
|
||||
return c != 'c';
|
||||
|
@ -510,18 +510,18 @@ public class TransliteratorTest extends TestFmwk {
|
|||
* Prefix, suffix support in hex transliterators
|
||||
*/
|
||||
public void TestJ243() {
|
||||
// Test default Hex-Unicode, which should handle
|
||||
// Test default Hex-Any, which should handle
|
||||
// \\u, \\U, u+, and U+
|
||||
HexToUnicodeTransliterator hex = new HexToUnicodeTransliterator();
|
||||
expect(hex, "\\u0041+\\U0042,u+0043uu+0044z", "A+B,CuDz");
|
||||
|
||||
// Try a custom Hex-Unicode
|
||||
// Try a custom Hex-Any
|
||||
// \\uXXXX and &#xXXXX;
|
||||
HexToUnicodeTransliterator hex2 = new HexToUnicodeTransliterator("\\\\u###0;&\\#x###0\\;");
|
||||
expect(hex2, "\\u61\\u062\\u0063\\u00645\\u66x0123",
|
||||
"abcd5fx0123");
|
||||
|
||||
// Try custom Unicode-Hex (default is tested elsewhere)
|
||||
// Try custom Any-Hex (default is tested elsewhere)
|
||||
UnicodeToHexTransliterator hex3 = new UnicodeToHexTransliterator("&\\#x###0;");
|
||||
expect(hex3, "012", "012");
|
||||
}
|
||||
|
@ -748,13 +748,13 @@ public class TransliteratorTest extends TestFmwk {
|
|||
*/
|
||||
public void TestFilterIDs() {
|
||||
String[] DATA = {
|
||||
"Unicode[aeiou]-Hex",
|
||||
"Hex[aeiou]-Unicode",
|
||||
"Any[aeiou]-Hex",
|
||||
"Hex[aeiou]-Any",
|
||||
"quizzical",
|
||||
"q\\u0075\\u0069zz\\u0069c\\u0061l",
|
||||
|
||||
"Unicode[aeiou]-Hex;Hex[^5]-Unicode",
|
||||
"Unicode[^5]-Hex;Hex[aeiou]-Unicode",
|
||||
"Any[aeiou]-Hex;Hex[^5]-Any",
|
||||
"Any[^5]-Hex;Hex[aeiou]-Any",
|
||||
"quizzical",
|
||||
"q\\u0075izzical",
|
||||
|
||||
|
@ -961,6 +961,96 @@ public class TransliteratorTest extends TestFmwk {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test compound RBT rules.
|
||||
*/
|
||||
public void TestCompoundRBT() {
|
||||
// Careful with spacing and ';' here: Phrase this exactly
|
||||
// as toRules() is going to return it. If toRules() changes
|
||||
// with regard to spacing or ';', then adjust this string.
|
||||
String rule = "::Hex-Any;\n" +
|
||||
"::Any-Lower;\n" +
|
||||
"a > '.A.';\n" +
|
||||
"b > '.B.';\n" +
|
||||
"::Any[^t]-Upper;";
|
||||
Transliterator t = Transliterator.createFromRules("Test", rule, Transliterator.FORWARD);
|
||||
if (t == null) {
|
||||
errln("FAIL: createFromRules failed");
|
||||
return;
|
||||
}
|
||||
expect(t, "\u0043at in the hat, bat on the mat",
|
||||
"C.A.t IN tHE H.A.t, .B..A.t ON tHE M.A.t");
|
||||
String r = t.toRules(true);
|
||||
if (r.equals(rule)) {
|
||||
logln("OK: toRules() => " + r);
|
||||
} else {
|
||||
errln("FAIL: toRules() => " + r +
|
||||
", expected " + rule);
|
||||
}
|
||||
|
||||
// Now test toRules
|
||||
t = Transliterator.getInstance("Greek-Latin; Latin-Cyrillic", Transliterator.FORWARD);
|
||||
if (t == null) {
|
||||
errln("FAIL: createInstance failed");
|
||||
return;
|
||||
}
|
||||
String exp = "::Greek-Latin;\n::Latin-Cyrillic;";
|
||||
r = t.toRules(true);
|
||||
if (!r.equals(exp)) {
|
||||
errln("FAIL: toRules() => " + r +
|
||||
", expected " + exp);
|
||||
} else {
|
||||
logln("OK: toRules() => " + r);
|
||||
}
|
||||
|
||||
// Round trip the result of toRules
|
||||
t = Transliterator.createFromRules("Test", r, Transliterator.FORWARD);
|
||||
if (t == null) {
|
||||
errln("FAIL: createFromRules #2 failed");
|
||||
return;
|
||||
} else {
|
||||
logln("OK: createFromRules(" + r + ") succeeded");
|
||||
}
|
||||
|
||||
// Test toRules again
|
||||
r = t.toRules(true);
|
||||
if (!r.equals(exp)) {
|
||||
errln("FAIL: toRules() => " + r +
|
||||
", expected " + exp);
|
||||
} else {
|
||||
logln("OK: toRules() => " + r);
|
||||
}
|
||||
|
||||
// Test Foo(Bar) IDs. Careful with spacing in id; make it conform
|
||||
// to what the regenerated ID will look like.
|
||||
String id = "Upper(Lower);(NFKC)";
|
||||
t = Transliterator.getInstance(id, Transliterator.FORWARD);
|
||||
if (t == null) {
|
||||
errln("FAIL: createInstance #2 failed");
|
||||
return;
|
||||
}
|
||||
if (t.getID().equals(id)) {
|
||||
logln("OK: created " + id);
|
||||
} else {
|
||||
errln("FAIL: createInstance(" + id +
|
||||
").getID() => " + t.getID());
|
||||
}
|
||||
|
||||
Transliterator u = t.getInverse();
|
||||
if (u == null) {
|
||||
errln("FAIL: createInverse failed");
|
||||
return;
|
||||
}
|
||||
exp = "NFKC();Lower(Upper)";
|
||||
if (u.getID().equals(exp)) {
|
||||
logln("OK: createInverse(" + id + ") => " +
|
||||
u.getID());
|
||||
} else {
|
||||
errln("FAIL: createInverse(" + id + ") => " +
|
||||
u.getID());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test inverse of Greek-Latin; Title()
|
||||
*/
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/CompoundTransliterator.java,v $
|
||||
* $Date: 2001/09/20 21:20:39 $
|
||||
* $Revision: 1.13 $
|
||||
* $Date: 2001/09/21 21:24:04 $
|
||||
* $Revision: 1.14 $
|
||||
*
|
||||
*****************************************************************************************
|
||||
*/
|
||||
|
@ -35,7 +35,7 @@ import java.util.Vector;
|
|||
* <p>Copyright © IBM Corporation 1999. All rights reserved.
|
||||
*
|
||||
* @author Alan Liu
|
||||
* @version $RCSfile: CompoundTransliterator.java,v $ $Revision: 1.13 $ $Date: 2001/09/20 21:20:39 $
|
||||
* @version $RCSfile: CompoundTransliterator.java,v $ $Revision: 1.14 $ $Date: 2001/09/21 21:24:04 $
|
||||
*/
|
||||
public class CompoundTransliterator extends Transliterator {
|
||||
|
||||
|
@ -139,6 +139,19 @@ public class CompoundTransliterator extends Transliterator {
|
|||
this(ID, FORWARD, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Package private constructor for compound RBTs. Construct a
|
||||
* compound transliterator using the given idBlock, with the
|
||||
* splitTrans inserted at the idSplitPoint.
|
||||
*/
|
||||
CompoundTransliterator(String ID,
|
||||
String idBlock,
|
||||
int idSplitPoint,
|
||||
Transliterator splitTrans) {
|
||||
super(ID, null);
|
||||
init(idBlock, FORWARD, idSplitPoint, splitTrans, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Package private constructor for Transliterator from a vector of
|
||||
* transliterators. The vector order is FORWARD, so if dir is
|
||||
|
@ -154,6 +167,39 @@ public class CompoundTransliterator extends Transliterator {
|
|||
// assume caller will fixup ID
|
||||
}
|
||||
|
||||
/**
|
||||
* Finish constructing a transliterator: only to be called by
|
||||
* constructors. Before calling init(), set trans and filter to NULL.
|
||||
* @param id the id containing ';'-separated entries
|
||||
* @param direction either FORWARD or REVERSE
|
||||
* @param idSplitPoint the index into id at which the
|
||||
* splitTrans should be inserted, if there is one, or
|
||||
* -1 if there is none.
|
||||
* @param splitTrans a transliterator to be inserted
|
||||
* before the entry at offset idSplitPoint in the id string. May be
|
||||
* NULL to insert no entry.
|
||||
* @param fixReverseID if TRUE, then reconstruct the ID of reverse
|
||||
* entries by calling getID() of component entries. Some constructors
|
||||
* do not require this because they apply a facade ID anyway.
|
||||
*/
|
||||
private void init(String id,
|
||||
int direction,
|
||||
int idSplitPoint,
|
||||
Transliterator splitTrans,
|
||||
boolean fixReverseID) {
|
||||
// assert(trans == 0);
|
||||
|
||||
Vector list = new Vector();
|
||||
int[] splitTransIndex = new int[1];
|
||||
StringBuffer regenID = new StringBuffer();
|
||||
Transliterator.parseCompoundID(id, regenID, direction,
|
||||
idSplitPoint, splitTrans,
|
||||
list, splitTransIndex);
|
||||
compoundRBTIndex = splitTransIndex[0];
|
||||
|
||||
init(list, direction, fixReverseID);
|
||||
}
|
||||
|
||||
/**
|
||||
* Finish constructing a transliterator: only to be called by
|
||||
* constructors. Before calling init(), set trans and filter to NULL.
|
||||
|
@ -165,7 +211,6 @@ public class CompoundTransliterator extends Transliterator {
|
|||
* @param fixReverseID if TRUE, then reconstruct the ID of reverse
|
||||
* entries by calling getID() of component entries. Some constructors
|
||||
* do not require this because they apply a facade ID anyway.
|
||||
* @param status the error code indicating success or failure
|
||||
*/
|
||||
private void init(Vector list,
|
||||
int direction,
|
||||
|
@ -302,6 +347,34 @@ public class CompoundTransliterator extends Transliterator {
|
|||
super.setFilter(f);
|
||||
}
|
||||
|
||||
public String toRules(boolean escapeUnprintable) {
|
||||
// We do NOT call toRules() on our component transliterators, in
|
||||
// general. If we have several rule-based transliterators, this
|
||||
// yields a concatenation of the rules -- not what we want. We do
|
||||
// handle compound RBT transliterators specially -- those for which
|
||||
// compoundRBTIndex >= 0. For the transliterator at compoundRBTIndex,
|
||||
// we do call toRules() recursively.
|
||||
StringBuffer rulesSource = new StringBuffer();
|
||||
for (int i=0; i<trans.length; ++i) {
|
||||
String rule;
|
||||
if (i == compoundRBTIndex) {
|
||||
rule = trans[i].toRules(escapeUnprintable);
|
||||
} else {
|
||||
rule = trans[i].baseToRules(escapeUnprintable);
|
||||
}
|
||||
if (rulesSource.length() != 0 &&
|
||||
rulesSource.charAt(rulesSource.length() - 1) != '\n') {
|
||||
rulesSource.append('\n');
|
||||
}
|
||||
rulesSource.append(rule);
|
||||
if (rulesSource.length() != 0 &&
|
||||
rulesSource.charAt(rulesSource.length() - 1) != ID_DELIM) {
|
||||
rulesSource.append(ID_DELIM);
|
||||
}
|
||||
}
|
||||
return rulesSource.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* Implements {@link Transliterator#handleTransliterate}.
|
||||
*/
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/RuleBasedTransliterator.java,v $
|
||||
* $Date: 2001/09/19 17:43:37 $
|
||||
* $Revision: 1.43 $
|
||||
* $Date: 2001/09/21 21:24:04 $
|
||||
* $Revision: 1.44 $
|
||||
*
|
||||
*****************************************************************************************
|
||||
*/
|
||||
|
@ -279,12 +279,16 @@ import com.ibm.text.resources.ResourceReader;
|
|||
* <p>Copyright (c) IBM Corporation 1999-2000. All rights reserved.</p>
|
||||
*
|
||||
* @author Alan Liu
|
||||
* @version $RCSfile: RuleBasedTransliterator.java,v $ $Revision: 1.43 $ $Date: 2001/09/19 17:43:37 $
|
||||
* @version $RCSfile: RuleBasedTransliterator.java,v $ $Revision: 1.44 $ $Date: 2001/09/21 21:24:04 $
|
||||
*/
|
||||
public class RuleBasedTransliterator extends Transliterator {
|
||||
|
||||
private Data data;
|
||||
|
||||
// Indicator for ID blocks
|
||||
private static final String ID_TOKEN = "::";
|
||||
private static final int ID_TOKEN_LEN = 2;
|
||||
|
||||
private static final String COPYRIGHT =
|
||||
"\u00A9 IBM Corporation 1999. All rights reserved.";
|
||||
|
||||
|
@ -334,6 +338,31 @@ public class RuleBasedTransliterator extends Transliterator {
|
|||
return new Parser(rules, direction).getData();
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse a given set of rules. Return up to three pieces of
|
||||
* parsed data. These are the header ::id block, the rule block,
|
||||
* and the footer ::id block. Any or all of these may be empty.
|
||||
* If the ::id blocks are empty, their corresponding parameters
|
||||
* are returned as the empty string. If there are no rules, the
|
||||
* TransliterationRuleData result is 0.
|
||||
* @param ruleDataResult caller owns the pointer stored here.
|
||||
* May be NULL.
|
||||
* @param headerRule string including semicolons for the header
|
||||
* ::id block. May be empty.
|
||||
* @param footerRule string including semicolons for the footer
|
||||
* ::id block. May be empty.
|
||||
*/
|
||||
static Data parse(String rules,
|
||||
int direction,
|
||||
StringBuffer idBlockResult,
|
||||
int[] idSplitPointResult) {
|
||||
Parser parser = new Parser(new String[] { rules }, direction);
|
||||
idBlockResult.setLength(0);
|
||||
idBlockResult.append(parser.idBlock);
|
||||
idSplitPointResult[0] = parser.idSplitPoint;
|
||||
return (parser.ruleCount == 0) ? null : parser.getData();
|
||||
}
|
||||
|
||||
/**
|
||||
* Implements {@link Transliterator#handleTransliterate}.
|
||||
*/
|
||||
|
@ -474,15 +503,27 @@ public class RuleBasedTransliterator extends Transliterator {
|
|||
|
||||
|
||||
private static class Parser {
|
||||
/**
|
||||
* Current rule being parsed.
|
||||
*/
|
||||
private String rules;
|
||||
|
||||
private int direction;
|
||||
|
||||
private Data data;
|
||||
|
||||
// In a compound RBT, the index at which the RBT rules are
|
||||
// inserted into the ID block. Index 0 means before any IDs
|
||||
// in the block. Index idBlock.length() means after all IDs
|
||||
// in the block. Index is a string index.
|
||||
int idSplitPoint;
|
||||
|
||||
// The block of ::IDs, both at the top and at the bottom.
|
||||
// Inserted into these may be additional rules at the
|
||||
// idSplitPoint.
|
||||
String idBlock;
|
||||
|
||||
// The number of rules parsed. This tells us if there were
|
||||
// any actual transliterator rules, or if there were just ::ID
|
||||
// block IDs.
|
||||
int ruleCount;
|
||||
|
||||
/**
|
||||
* This class implements the SymbolTable interface. It is used
|
||||
* during parsing to give UnicodeSet access to variables that
|
||||
|
@ -717,6 +758,8 @@ public class RuleBasedTransliterator extends Transliterator {
|
|||
* rules
|
||||
*/
|
||||
private void parseRules(RuleBody ruleArray) {
|
||||
ruleCount = 0;
|
||||
|
||||
determineVariableRange(ruleArray);
|
||||
setVariablesVector = new Vector();
|
||||
parseData = new ParseData();
|
||||
|
@ -725,6 +768,16 @@ public class RuleBasedTransliterator extends Transliterator {
|
|||
int errorCount = 0;
|
||||
|
||||
ruleArray.reset();
|
||||
|
||||
StringBuffer idBlockResult = new StringBuffer();
|
||||
idSplitPoint = -1;
|
||||
// The mode marks whether we are in the header ::id block, the
|
||||
// rule block, or the footer ::id block.
|
||||
// mode == 0: start: rule->1, ::id->0
|
||||
// mode == 1: in rules: rule->1, ::id->2
|
||||
// mode == 2: in footer rule block: rule->ERROR, ::id->2
|
||||
int mode = 0;
|
||||
|
||||
main:
|
||||
for (;;) {
|
||||
String rule = ruleArray.nextLine();
|
||||
|
@ -754,10 +807,54 @@ public class RuleBasedTransliterator extends Transliterator {
|
|||
// at once. We keep parsing rules even after a failure, up
|
||||
// to a specified limit, and report all errors at once.
|
||||
try {
|
||||
// We've found the start of a rule. c is its first
|
||||
// character, and pos points past c. Lexically parse the
|
||||
// rule into component pieces.
|
||||
pos = parseRule(rule, --pos, limit);
|
||||
// We've found the start of a rule or ID. c is its first
|
||||
// character, and pos points past c.
|
||||
--pos;
|
||||
// Look for an ID token. Must have at least ID_TOKEN_LEN + 1
|
||||
// chars left.
|
||||
if ((pos + ID_TOKEN_LEN + 1) <= limit &&
|
||||
rule.regionMatches(pos, ID_TOKEN, 0, ID_TOKEN_LEN)) {
|
||||
pos += ID_TOKEN_LEN;
|
||||
c = rule.charAt(pos);
|
||||
while (UCharacter.isWhitespace(c) && pos < limit) {
|
||||
++pos;
|
||||
c = rule.charAt(pos);
|
||||
}
|
||||
int[] p = new int[] { pos };
|
||||
boolean[] sawDelim = new boolean[1];
|
||||
StringBuffer regenID = new StringBuffer();
|
||||
Transliterator.parseID(rule, regenID, p, sawDelim, direction, false);
|
||||
if (p[0] == pos || !sawDelim[0]) {
|
||||
// Invalid ::id
|
||||
int i1 = pos + 2;
|
||||
while (i1 < rule.length() && rule.charAt(i1) != ';') {
|
||||
++i1;
|
||||
}
|
||||
throw new IllegalArgumentException("Invalid ::ID " +
|
||||
rule.substring(pos, i1));
|
||||
} else {
|
||||
if (mode == 1) {
|
||||
mode = 2;
|
||||
idSplitPoint = idBlockResult.length();
|
||||
}
|
||||
String str = rule.substring(pos, p[0]);
|
||||
idBlockResult.append(str);
|
||||
if (!sawDelim[0]) {
|
||||
idBlockResult.append(';');
|
||||
}
|
||||
pos = p[0];
|
||||
}
|
||||
} else {
|
||||
// Parse a rule
|
||||
pos = parseRule(rule, pos, limit);
|
||||
++ruleCount;
|
||||
if (mode == 2) {
|
||||
// ::id in illegal position (because a rule
|
||||
// occurred after the ::id footer block)
|
||||
throw new IllegalArgumentException("::ID in illegal position");
|
||||
}
|
||||
mode = 1;
|
||||
}
|
||||
} catch (IllegalArgumentException e) {
|
||||
if (errorCount == 30) {
|
||||
errors.append("\nMore than 30 errors; further messages squelched");
|
||||
|
@ -774,6 +871,8 @@ public class RuleBasedTransliterator extends Transliterator {
|
|||
}
|
||||
}
|
||||
|
||||
idBlock = idBlockResult.toString();
|
||||
|
||||
// Convert the set vector to an array
|
||||
data.setVariables = new UnicodeSet[setVariablesVector.size()];
|
||||
setVariablesVector.copyInto(data.setVariables);
|
||||
|
@ -1480,6 +1579,9 @@ public class RuleBasedTransliterator extends Transliterator {
|
|||
|
||||
/**
|
||||
* $Log: RuleBasedTransliterator.java,v $
|
||||
* Revision 1.44 2001/09/21 21:24:04 alan
|
||||
* jitterbug 64: allow ::ID blocks in rules
|
||||
*
|
||||
* Revision 1.43 2001/09/19 17:43:37 alan
|
||||
* jitterbug 60: initial implementation of toRules()
|
||||
*
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/Transliterator.java,v $
|
||||
* $Date: 2001/09/20 21:20:39 $
|
||||
* $Revision: 1.39 $
|
||||
* $Date: 2001/09/21 21:24:04 $
|
||||
* $Revision: 1.40 $
|
||||
*
|
||||
*****************************************************************************************
|
||||
*/
|
||||
|
@ -241,7 +241,7 @@ import com.ibm.util.CaseInsensitiveString;
|
|||
* <p>Copyright © IBM Corporation 1999. All rights reserved.
|
||||
*
|
||||
* @author Alan Liu
|
||||
* @version $RCSfile: Transliterator.java,v $ $Revision: 1.39 $ $Date: 2001/09/20 21:20:39 $
|
||||
* @version $RCSfile: Transliterator.java,v $ $Revision: 1.40 $ $Date: 2001/09/21 21:24:04 $
|
||||
*/
|
||||
public abstract class Transliterator {
|
||||
/**
|
||||
|
@ -832,12 +832,52 @@ public abstract class Transliterator {
|
|||
* NullTransliterator, if it contains ID blocks which parse as
|
||||
* empty for the given direction.
|
||||
*/
|
||||
public static final Transliterator createFromRules(String ID, String rules, int direction) {
|
||||
public static final Transliterator createFromRules(String ID, String rules, int dir) {
|
||||
// TODO Flesh this out
|
||||
return new RuleBasedTransliterator(ID, rules, direction, null);
|
||||
//// return new RuleBasedTransliterator(ID, rules, direction, null);
|
||||
|
||||
StringBuffer idBlock = new StringBuffer();
|
||||
int[] idSplitPoint = new int[] { -1 };
|
||||
RuleBasedTransliterator.Data data = null;
|
||||
|
||||
data = RuleBasedTransliterator.parse(rules, dir,
|
||||
idBlock, idSplitPoint);
|
||||
|
||||
// NOTE: The logic here matches that in TransliteratorRegistry.
|
||||
if (idBlock.length() == 0) {
|
||||
if (data == null) {
|
||||
// No idBlock, no data -- this is just an
|
||||
// alias for Null
|
||||
return new NullTransliterator();
|
||||
} else {
|
||||
// No idBlock, data != 0 -- this is an
|
||||
// ordinary RBT_DATA.
|
||||
return new RuleBasedTransliterator(ID, data, null);
|
||||
}
|
||||
} else {
|
||||
if (data == null) {
|
||||
// idBlock, no data -- this is an alias
|
||||
Transliterator t = getInstance(idBlock.toString(), dir);
|
||||
if (t != null) {
|
||||
t.setID(ID);
|
||||
}
|
||||
return t;
|
||||
} else {
|
||||
// idBlock and data -- this is a compound
|
||||
// RBT
|
||||
Transliterator t = new RuleBasedTransliterator("_", data, null);
|
||||
t = new CompoundTransliterator(ID, idBlock.toString(), idSplitPoint[0],
|
||||
t);
|
||||
return t;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public String toRules(boolean escapeUnprintable) {
|
||||
return baseToRules(escapeUnprintable);
|
||||
}
|
||||
|
||||
protected final String baseToRules(boolean escapeUnprintable) {
|
||||
// The base class implementation of toRules munges the ID into
|
||||
// the correct format. That is: foo => ::foo
|
||||
// KEEP in sync with rbt_pars
|
||||
|
@ -860,13 +900,13 @@ public abstract class Transliterator {
|
|||
* 'result' at which the adoptedSplitTrans is stored, or -1 if
|
||||
* adoptedSplitTrans == 0
|
||||
*/
|
||||
private static void parseCompoundID(String id,
|
||||
StringBuffer regenID,
|
||||
int dir,
|
||||
int idSplitPoint,
|
||||
Transliterator adoptedSplitTrans,
|
||||
Vector result,
|
||||
int[] splitTransIndex) {
|
||||
static void parseCompoundID(String id,
|
||||
StringBuffer regenID,
|
||||
int dir,
|
||||
int idSplitPoint,
|
||||
Transliterator splitTrans,
|
||||
Vector result,
|
||||
int[] splitTransIndex) {
|
||||
regenID.setLength(0);
|
||||
splitTransIndex[0] = -1;
|
||||
int pos = 0;
|
||||
|
@ -874,10 +914,10 @@ public abstract class Transliterator {
|
|||
while (pos < id.length()) {
|
||||
// We compare (pos >= split), not (pos == split), so we can
|
||||
// skip over whitespace (see below).
|
||||
if (pos >= idSplitPoint && adoptedSplitTrans != null) {
|
||||
if (pos >= idSplitPoint && splitTrans != null) {
|
||||
splitTransIndex[0] = result.size();
|
||||
result.addElement(adoptedSplitTrans);
|
||||
adoptedSplitTrans = null;
|
||||
result.addElement(splitTrans);
|
||||
splitTrans = null;
|
||||
}
|
||||
int[] p = new int[] { pos };
|
||||
boolean[] sawDelimiter = new boolean[1];
|
||||
|
@ -903,10 +943,10 @@ public abstract class Transliterator {
|
|||
}
|
||||
|
||||
// Handle case of idSplitPoint == id.length()
|
||||
if (pos >= idSplitPoint && adoptedSplitTrans != null) {
|
||||
if (pos >= idSplitPoint && splitTrans != null) {
|
||||
splitTransIndex[0] = result.size();
|
||||
result.addElement(adoptedSplitTrans);
|
||||
adoptedSplitTrans = null;
|
||||
result.addElement(splitTrans);
|
||||
splitTrans = null;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -941,12 +981,12 @@ public abstract class Transliterator {
|
|||
* determine if there was an error. Instead, check to see if pos
|
||||
* moved.
|
||||
*/
|
||||
private static Transliterator parseID(String ID,
|
||||
StringBuffer regenID,
|
||||
int[] pos,
|
||||
boolean[] sawDelimiter,
|
||||
int dir,
|
||||
boolean create) {
|
||||
static Transliterator parseID(String ID,
|
||||
StringBuffer regenID,
|
||||
int[] pos,
|
||||
boolean[] sawDelimiter,
|
||||
int dir,
|
||||
boolean create) {
|
||||
int limit, preDelimLimit,
|
||||
revStart, revLimit=0,
|
||||
idStart, idLimit,
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/test/translit/Attic/TransliteratorTest.java,v $
|
||||
* $Date: 2001/09/20 21:21:10 $
|
||||
* $Revision: 1.44 $
|
||||
* $Date: 2001/09/21 21:23:34 $
|
||||
* $Revision: 1.45 $
|
||||
*
|
||||
*****************************************************************************************
|
||||
*/
|
||||
|
@ -357,8 +357,8 @@ public class TransliteratorTest extends TestFmwk {
|
|||
* Compose the hex transliterators forward and reverse.
|
||||
*/
|
||||
public void TestCompoundHex() {
|
||||
Transliterator a = Transliterator.getInstance("Unicode-Hex");
|
||||
Transliterator b = Transliterator.getInstance("Hex-Unicode");
|
||||
Transliterator a = Transliterator.getInstance("Any-Hex");
|
||||
Transliterator b = Transliterator.getInstance("Hex-Any");
|
||||
Transliterator[] trans = { a, b };
|
||||
Transliterator ab = new CompoundTransliterator(trans);
|
||||
|
||||
|
@ -379,7 +379,7 @@ public class TransliteratorTest extends TestFmwk {
|
|||
* Do some basic tests of filtering.
|
||||
*/
|
||||
public void TestFiltering() {
|
||||
Transliterator hex = Transliterator.getInstance("Unicode-Hex");
|
||||
Transliterator hex = Transliterator.getInstance("Any-Hex");
|
||||
hex.setFilter(new UnicodeFilter() {
|
||||
public boolean contains(char c) {
|
||||
return c != 'c';
|
||||
|
@ -510,18 +510,18 @@ public class TransliteratorTest extends TestFmwk {
|
|||
* Prefix, suffix support in hex transliterators
|
||||
*/
|
||||
public void TestJ243() {
|
||||
// Test default Hex-Unicode, which should handle
|
||||
// Test default Hex-Any, which should handle
|
||||
// \\u, \\U, u+, and U+
|
||||
HexToUnicodeTransliterator hex = new HexToUnicodeTransliterator();
|
||||
expect(hex, "\\u0041+\\U0042,u+0043uu+0044z", "A+B,CuDz");
|
||||
|
||||
// Try a custom Hex-Unicode
|
||||
// Try a custom Hex-Any
|
||||
// \\uXXXX and &#xXXXX;
|
||||
HexToUnicodeTransliterator hex2 = new HexToUnicodeTransliterator("\\\\u###0;&\\#x###0\\;");
|
||||
expect(hex2, "\\u61\\u062\\u0063\\u00645\\u66x0123",
|
||||
"abcd5fx0123");
|
||||
|
||||
// Try custom Unicode-Hex (default is tested elsewhere)
|
||||
// Try custom Any-Hex (default is tested elsewhere)
|
||||
UnicodeToHexTransliterator hex3 = new UnicodeToHexTransliterator("&\\#x###0;");
|
||||
expect(hex3, "012", "012");
|
||||
}
|
||||
|
@ -748,13 +748,13 @@ public class TransliteratorTest extends TestFmwk {
|
|||
*/
|
||||
public void TestFilterIDs() {
|
||||
String[] DATA = {
|
||||
"Unicode[aeiou]-Hex",
|
||||
"Hex[aeiou]-Unicode",
|
||||
"Any[aeiou]-Hex",
|
||||
"Hex[aeiou]-Any",
|
||||
"quizzical",
|
||||
"q\\u0075\\u0069zz\\u0069c\\u0061l",
|
||||
|
||||
"Unicode[aeiou]-Hex;Hex[^5]-Unicode",
|
||||
"Unicode[^5]-Hex;Hex[aeiou]-Unicode",
|
||||
"Any[aeiou]-Hex;Hex[^5]-Any",
|
||||
"Any[^5]-Hex;Hex[aeiou]-Any",
|
||||
"quizzical",
|
||||
"q\\u0075izzical",
|
||||
|
||||
|
@ -961,6 +961,96 @@ public class TransliteratorTest extends TestFmwk {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test compound RBT rules.
|
||||
*/
|
||||
public void TestCompoundRBT() {
|
||||
// Careful with spacing and ';' here: Phrase this exactly
|
||||
// as toRules() is going to return it. If toRules() changes
|
||||
// with regard to spacing or ';', then adjust this string.
|
||||
String rule = "::Hex-Any;\n" +
|
||||
"::Any-Lower;\n" +
|
||||
"a > '.A.';\n" +
|
||||
"b > '.B.';\n" +
|
||||
"::Any[^t]-Upper;";
|
||||
Transliterator t = Transliterator.createFromRules("Test", rule, Transliterator.FORWARD);
|
||||
if (t == null) {
|
||||
errln("FAIL: createFromRules failed");
|
||||
return;
|
||||
}
|
||||
expect(t, "\u0043at in the hat, bat on the mat",
|
||||
"C.A.t IN tHE H.A.t, .B..A.t ON tHE M.A.t");
|
||||
String r = t.toRules(true);
|
||||
if (r.equals(rule)) {
|
||||
logln("OK: toRules() => " + r);
|
||||
} else {
|
||||
errln("FAIL: toRules() => " + r +
|
||||
", expected " + rule);
|
||||
}
|
||||
|
||||
// Now test toRules
|
||||
t = Transliterator.getInstance("Greek-Latin; Latin-Cyrillic", Transliterator.FORWARD);
|
||||
if (t == null) {
|
||||
errln("FAIL: createInstance failed");
|
||||
return;
|
||||
}
|
||||
String exp = "::Greek-Latin;\n::Latin-Cyrillic;";
|
||||
r = t.toRules(true);
|
||||
if (!r.equals(exp)) {
|
||||
errln("FAIL: toRules() => " + r +
|
||||
", expected " + exp);
|
||||
} else {
|
||||
logln("OK: toRules() => " + r);
|
||||
}
|
||||
|
||||
// Round trip the result of toRules
|
||||
t = Transliterator.createFromRules("Test", r, Transliterator.FORWARD);
|
||||
if (t == null) {
|
||||
errln("FAIL: createFromRules #2 failed");
|
||||
return;
|
||||
} else {
|
||||
logln("OK: createFromRules(" + r + ") succeeded");
|
||||
}
|
||||
|
||||
// Test toRules again
|
||||
r = t.toRules(true);
|
||||
if (!r.equals(exp)) {
|
||||
errln("FAIL: toRules() => " + r +
|
||||
", expected " + exp);
|
||||
} else {
|
||||
logln("OK: toRules() => " + r);
|
||||
}
|
||||
|
||||
// Test Foo(Bar) IDs. Careful with spacing in id; make it conform
|
||||
// to what the regenerated ID will look like.
|
||||
String id = "Upper(Lower);(NFKC)";
|
||||
t = Transliterator.getInstance(id, Transliterator.FORWARD);
|
||||
if (t == null) {
|
||||
errln("FAIL: createInstance #2 failed");
|
||||
return;
|
||||
}
|
||||
if (t.getID().equals(id)) {
|
||||
logln("OK: created " + id);
|
||||
} else {
|
||||
errln("FAIL: createInstance(" + id +
|
||||
").getID() => " + t.getID());
|
||||
}
|
||||
|
||||
Transliterator u = t.getInverse();
|
||||
if (u == null) {
|
||||
errln("FAIL: createInverse failed");
|
||||
return;
|
||||
}
|
||||
exp = "NFKC();Lower(Upper)";
|
||||
if (u.getID().equals(exp)) {
|
||||
logln("OK: createInverse(" + id + ") => " +
|
||||
u.getID());
|
||||
} else {
|
||||
errln("FAIL: createInverse(" + id + ") => " +
|
||||
u.getID());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test inverse of Greek-Latin; Title()
|
||||
*/
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/text/Attic/CompoundTransliterator.java,v $
|
||||
* $Date: 2001/09/20 21:20:39 $
|
||||
* $Revision: 1.13 $
|
||||
* $Date: 2001/09/21 21:24:04 $
|
||||
* $Revision: 1.14 $
|
||||
*
|
||||
*****************************************************************************************
|
||||
*/
|
||||
|
@ -35,7 +35,7 @@ import java.util.Vector;
|
|||
* <p>Copyright © IBM Corporation 1999. All rights reserved.
|
||||
*
|
||||
* @author Alan Liu
|
||||
* @version $RCSfile: CompoundTransliterator.java,v $ $Revision: 1.13 $ $Date: 2001/09/20 21:20:39 $
|
||||
* @version $RCSfile: CompoundTransliterator.java,v $ $Revision: 1.14 $ $Date: 2001/09/21 21:24:04 $
|
||||
*/
|
||||
public class CompoundTransliterator extends Transliterator {
|
||||
|
||||
|
@ -139,6 +139,19 @@ public class CompoundTransliterator extends Transliterator {
|
|||
this(ID, FORWARD, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Package private constructor for compound RBTs. Construct a
|
||||
* compound transliterator using the given idBlock, with the
|
||||
* splitTrans inserted at the idSplitPoint.
|
||||
*/
|
||||
CompoundTransliterator(String ID,
|
||||
String idBlock,
|
||||
int idSplitPoint,
|
||||
Transliterator splitTrans) {
|
||||
super(ID, null);
|
||||
init(idBlock, FORWARD, idSplitPoint, splitTrans, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Package private constructor for Transliterator from a vector of
|
||||
* transliterators. The vector order is FORWARD, so if dir is
|
||||
|
@ -154,6 +167,39 @@ public class CompoundTransliterator extends Transliterator {
|
|||
// assume caller will fixup ID
|
||||
}
|
||||
|
||||
/**
|
||||
* Finish constructing a transliterator: only to be called by
|
||||
* constructors. Before calling init(), set trans and filter to NULL.
|
||||
* @param id the id containing ';'-separated entries
|
||||
* @param direction either FORWARD or REVERSE
|
||||
* @param idSplitPoint the index into id at which the
|
||||
* splitTrans should be inserted, if there is one, or
|
||||
* -1 if there is none.
|
||||
* @param splitTrans a transliterator to be inserted
|
||||
* before the entry at offset idSplitPoint in the id string. May be
|
||||
* NULL to insert no entry.
|
||||
* @param fixReverseID if TRUE, then reconstruct the ID of reverse
|
||||
* entries by calling getID() of component entries. Some constructors
|
||||
* do not require this because they apply a facade ID anyway.
|
||||
*/
|
||||
private void init(String id,
|
||||
int direction,
|
||||
int idSplitPoint,
|
||||
Transliterator splitTrans,
|
||||
boolean fixReverseID) {
|
||||
// assert(trans == 0);
|
||||
|
||||
Vector list = new Vector();
|
||||
int[] splitTransIndex = new int[1];
|
||||
StringBuffer regenID = new StringBuffer();
|
||||
Transliterator.parseCompoundID(id, regenID, direction,
|
||||
idSplitPoint, splitTrans,
|
||||
list, splitTransIndex);
|
||||
compoundRBTIndex = splitTransIndex[0];
|
||||
|
||||
init(list, direction, fixReverseID);
|
||||
}
|
||||
|
||||
/**
|
||||
* Finish constructing a transliterator: only to be called by
|
||||
* constructors. Before calling init(), set trans and filter to NULL.
|
||||
|
@ -165,7 +211,6 @@ public class CompoundTransliterator extends Transliterator {
|
|||
* @param fixReverseID if TRUE, then reconstruct the ID of reverse
|
||||
* entries by calling getID() of component entries. Some constructors
|
||||
* do not require this because they apply a facade ID anyway.
|
||||
* @param status the error code indicating success or failure
|
||||
*/
|
||||
private void init(Vector list,
|
||||
int direction,
|
||||
|
@ -302,6 +347,34 @@ public class CompoundTransliterator extends Transliterator {
|
|||
super.setFilter(f);
|
||||
}
|
||||
|
||||
public String toRules(boolean escapeUnprintable) {
|
||||
// We do NOT call toRules() on our component transliterators, in
|
||||
// general. If we have several rule-based transliterators, this
|
||||
// yields a concatenation of the rules -- not what we want. We do
|
||||
// handle compound RBT transliterators specially -- those for which
|
||||
// compoundRBTIndex >= 0. For the transliterator at compoundRBTIndex,
|
||||
// we do call toRules() recursively.
|
||||
StringBuffer rulesSource = new StringBuffer();
|
||||
for (int i=0; i<trans.length; ++i) {
|
||||
String rule;
|
||||
if (i == compoundRBTIndex) {
|
||||
rule = trans[i].toRules(escapeUnprintable);
|
||||
} else {
|
||||
rule = trans[i].baseToRules(escapeUnprintable);
|
||||
}
|
||||
if (rulesSource.length() != 0 &&
|
||||
rulesSource.charAt(rulesSource.length() - 1) != '\n') {
|
||||
rulesSource.append('\n');
|
||||
}
|
||||
rulesSource.append(rule);
|
||||
if (rulesSource.length() != 0 &&
|
||||
rulesSource.charAt(rulesSource.length() - 1) != ID_DELIM) {
|
||||
rulesSource.append(ID_DELIM);
|
||||
}
|
||||
}
|
||||
return rulesSource.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* Implements {@link Transliterator#handleTransliterate}.
|
||||
*/
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/text/Attic/RuleBasedTransliterator.java,v $
|
||||
* $Date: 2001/09/19 17:43:37 $
|
||||
* $Revision: 1.43 $
|
||||
* $Date: 2001/09/21 21:24:04 $
|
||||
* $Revision: 1.44 $
|
||||
*
|
||||
*****************************************************************************************
|
||||
*/
|
||||
|
@ -279,12 +279,16 @@ import com.ibm.text.resources.ResourceReader;
|
|||
* <p>Copyright (c) IBM Corporation 1999-2000. All rights reserved.</p>
|
||||
*
|
||||
* @author Alan Liu
|
||||
* @version $RCSfile: RuleBasedTransliterator.java,v $ $Revision: 1.43 $ $Date: 2001/09/19 17:43:37 $
|
||||
* @version $RCSfile: RuleBasedTransliterator.java,v $ $Revision: 1.44 $ $Date: 2001/09/21 21:24:04 $
|
||||
*/
|
||||
public class RuleBasedTransliterator extends Transliterator {
|
||||
|
||||
private Data data;
|
||||
|
||||
// Indicator for ID blocks
|
||||
private static final String ID_TOKEN = "::";
|
||||
private static final int ID_TOKEN_LEN = 2;
|
||||
|
||||
private static final String COPYRIGHT =
|
||||
"\u00A9 IBM Corporation 1999. All rights reserved.";
|
||||
|
||||
|
@ -334,6 +338,31 @@ public class RuleBasedTransliterator extends Transliterator {
|
|||
return new Parser(rules, direction).getData();
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse a given set of rules. Return up to three pieces of
|
||||
* parsed data. These are the header ::id block, the rule block,
|
||||
* and the footer ::id block. Any or all of these may be empty.
|
||||
* If the ::id blocks are empty, their corresponding parameters
|
||||
* are returned as the empty string. If there are no rules, the
|
||||
* TransliterationRuleData result is 0.
|
||||
* @param ruleDataResult caller owns the pointer stored here.
|
||||
* May be NULL.
|
||||
* @param headerRule string including semicolons for the header
|
||||
* ::id block. May be empty.
|
||||
* @param footerRule string including semicolons for the footer
|
||||
* ::id block. May be empty.
|
||||
*/
|
||||
static Data parse(String rules,
|
||||
int direction,
|
||||
StringBuffer idBlockResult,
|
||||
int[] idSplitPointResult) {
|
||||
Parser parser = new Parser(new String[] { rules }, direction);
|
||||
idBlockResult.setLength(0);
|
||||
idBlockResult.append(parser.idBlock);
|
||||
idSplitPointResult[0] = parser.idSplitPoint;
|
||||
return (parser.ruleCount == 0) ? null : parser.getData();
|
||||
}
|
||||
|
||||
/**
|
||||
* Implements {@link Transliterator#handleTransliterate}.
|
||||
*/
|
||||
|
@ -474,15 +503,27 @@ public class RuleBasedTransliterator extends Transliterator {
|
|||
|
||||
|
||||
private static class Parser {
|
||||
/**
|
||||
* Current rule being parsed.
|
||||
*/
|
||||
private String rules;
|
||||
|
||||
private int direction;
|
||||
|
||||
private Data data;
|
||||
|
||||
// In a compound RBT, the index at which the RBT rules are
|
||||
// inserted into the ID block. Index 0 means before any IDs
|
||||
// in the block. Index idBlock.length() means after all IDs
|
||||
// in the block. Index is a string index.
|
||||
int idSplitPoint;
|
||||
|
||||
// The block of ::IDs, both at the top and at the bottom.
|
||||
// Inserted into these may be additional rules at the
|
||||
// idSplitPoint.
|
||||
String idBlock;
|
||||
|
||||
// The number of rules parsed. This tells us if there were
|
||||
// any actual transliterator rules, or if there were just ::ID
|
||||
// block IDs.
|
||||
int ruleCount;
|
||||
|
||||
/**
|
||||
* This class implements the SymbolTable interface. It is used
|
||||
* during parsing to give UnicodeSet access to variables that
|
||||
|
@ -717,6 +758,8 @@ public class RuleBasedTransliterator extends Transliterator {
|
|||
* rules
|
||||
*/
|
||||
private void parseRules(RuleBody ruleArray) {
|
||||
ruleCount = 0;
|
||||
|
||||
determineVariableRange(ruleArray);
|
||||
setVariablesVector = new Vector();
|
||||
parseData = new ParseData();
|
||||
|
@ -725,6 +768,16 @@ public class RuleBasedTransliterator extends Transliterator {
|
|||
int errorCount = 0;
|
||||
|
||||
ruleArray.reset();
|
||||
|
||||
StringBuffer idBlockResult = new StringBuffer();
|
||||
idSplitPoint = -1;
|
||||
// The mode marks whether we are in the header ::id block, the
|
||||
// rule block, or the footer ::id block.
|
||||
// mode == 0: start: rule->1, ::id->0
|
||||
// mode == 1: in rules: rule->1, ::id->2
|
||||
// mode == 2: in footer rule block: rule->ERROR, ::id->2
|
||||
int mode = 0;
|
||||
|
||||
main:
|
||||
for (;;) {
|
||||
String rule = ruleArray.nextLine();
|
||||
|
@ -754,10 +807,54 @@ public class RuleBasedTransliterator extends Transliterator {
|
|||
// at once. We keep parsing rules even after a failure, up
|
||||
// to a specified limit, and report all errors at once.
|
||||
try {
|
||||
// We've found the start of a rule. c is its first
|
||||
// character, and pos points past c. Lexically parse the
|
||||
// rule into component pieces.
|
||||
pos = parseRule(rule, --pos, limit);
|
||||
// We've found the start of a rule or ID. c is its first
|
||||
// character, and pos points past c.
|
||||
--pos;
|
||||
// Look for an ID token. Must have at least ID_TOKEN_LEN + 1
|
||||
// chars left.
|
||||
if ((pos + ID_TOKEN_LEN + 1) <= limit &&
|
||||
rule.regionMatches(pos, ID_TOKEN, 0, ID_TOKEN_LEN)) {
|
||||
pos += ID_TOKEN_LEN;
|
||||
c = rule.charAt(pos);
|
||||
while (UCharacter.isWhitespace(c) && pos < limit) {
|
||||
++pos;
|
||||
c = rule.charAt(pos);
|
||||
}
|
||||
int[] p = new int[] { pos };
|
||||
boolean[] sawDelim = new boolean[1];
|
||||
StringBuffer regenID = new StringBuffer();
|
||||
Transliterator.parseID(rule, regenID, p, sawDelim, direction, false);
|
||||
if (p[0] == pos || !sawDelim[0]) {
|
||||
// Invalid ::id
|
||||
int i1 = pos + 2;
|
||||
while (i1 < rule.length() && rule.charAt(i1) != ';') {
|
||||
++i1;
|
||||
}
|
||||
throw new IllegalArgumentException("Invalid ::ID " +
|
||||
rule.substring(pos, i1));
|
||||
} else {
|
||||
if (mode == 1) {
|
||||
mode = 2;
|
||||
idSplitPoint = idBlockResult.length();
|
||||
}
|
||||
String str = rule.substring(pos, p[0]);
|
||||
idBlockResult.append(str);
|
||||
if (!sawDelim[0]) {
|
||||
idBlockResult.append(';');
|
||||
}
|
||||
pos = p[0];
|
||||
}
|
||||
} else {
|
||||
// Parse a rule
|
||||
pos = parseRule(rule, pos, limit);
|
||||
++ruleCount;
|
||||
if (mode == 2) {
|
||||
// ::id in illegal position (because a rule
|
||||
// occurred after the ::id footer block)
|
||||
throw new IllegalArgumentException("::ID in illegal position");
|
||||
}
|
||||
mode = 1;
|
||||
}
|
||||
} catch (IllegalArgumentException e) {
|
||||
if (errorCount == 30) {
|
||||
errors.append("\nMore than 30 errors; further messages squelched");
|
||||
|
@ -774,6 +871,8 @@ public class RuleBasedTransliterator extends Transliterator {
|
|||
}
|
||||
}
|
||||
|
||||
idBlock = idBlockResult.toString();
|
||||
|
||||
// Convert the set vector to an array
|
||||
data.setVariables = new UnicodeSet[setVariablesVector.size()];
|
||||
setVariablesVector.copyInto(data.setVariables);
|
||||
|
@ -1480,6 +1579,9 @@ public class RuleBasedTransliterator extends Transliterator {
|
|||
|
||||
/**
|
||||
* $Log: RuleBasedTransliterator.java,v $
|
||||
* Revision 1.44 2001/09/21 21:24:04 alan
|
||||
* jitterbug 64: allow ::ID blocks in rules
|
||||
*
|
||||
* Revision 1.43 2001/09/19 17:43:37 alan
|
||||
* jitterbug 60: initial implementation of toRules()
|
||||
*
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/text/Attic/Transliterator.java,v $
|
||||
* $Date: 2001/09/20 21:20:39 $
|
||||
* $Revision: 1.39 $
|
||||
* $Date: 2001/09/21 21:24:04 $
|
||||
* $Revision: 1.40 $
|
||||
*
|
||||
*****************************************************************************************
|
||||
*/
|
||||
|
@ -241,7 +241,7 @@ import com.ibm.util.CaseInsensitiveString;
|
|||
* <p>Copyright © IBM Corporation 1999. All rights reserved.
|
||||
*
|
||||
* @author Alan Liu
|
||||
* @version $RCSfile: Transliterator.java,v $ $Revision: 1.39 $ $Date: 2001/09/20 21:20:39 $
|
||||
* @version $RCSfile: Transliterator.java,v $ $Revision: 1.40 $ $Date: 2001/09/21 21:24:04 $
|
||||
*/
|
||||
public abstract class Transliterator {
|
||||
/**
|
||||
|
@ -832,12 +832,52 @@ public abstract class Transliterator {
|
|||
* NullTransliterator, if it contains ID blocks which parse as
|
||||
* empty for the given direction.
|
||||
*/
|
||||
public static final Transliterator createFromRules(String ID, String rules, int direction) {
|
||||
public static final Transliterator createFromRules(String ID, String rules, int dir) {
|
||||
// TODO Flesh this out
|
||||
return new RuleBasedTransliterator(ID, rules, direction, null);
|
||||
//// return new RuleBasedTransliterator(ID, rules, direction, null);
|
||||
|
||||
StringBuffer idBlock = new StringBuffer();
|
||||
int[] idSplitPoint = new int[] { -1 };
|
||||
RuleBasedTransliterator.Data data = null;
|
||||
|
||||
data = RuleBasedTransliterator.parse(rules, dir,
|
||||
idBlock, idSplitPoint);
|
||||
|
||||
// NOTE: The logic here matches that in TransliteratorRegistry.
|
||||
if (idBlock.length() == 0) {
|
||||
if (data == null) {
|
||||
// No idBlock, no data -- this is just an
|
||||
// alias for Null
|
||||
return new NullTransliterator();
|
||||
} else {
|
||||
// No idBlock, data != 0 -- this is an
|
||||
// ordinary RBT_DATA.
|
||||
return new RuleBasedTransliterator(ID, data, null);
|
||||
}
|
||||
} else {
|
||||
if (data == null) {
|
||||
// idBlock, no data -- this is an alias
|
||||
Transliterator t = getInstance(idBlock.toString(), dir);
|
||||
if (t != null) {
|
||||
t.setID(ID);
|
||||
}
|
||||
return t;
|
||||
} else {
|
||||
// idBlock and data -- this is a compound
|
||||
// RBT
|
||||
Transliterator t = new RuleBasedTransliterator("_", data, null);
|
||||
t = new CompoundTransliterator(ID, idBlock.toString(), idSplitPoint[0],
|
||||
t);
|
||||
return t;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public String toRules(boolean escapeUnprintable) {
|
||||
return baseToRules(escapeUnprintable);
|
||||
}
|
||||
|
||||
protected final String baseToRules(boolean escapeUnprintable) {
|
||||
// The base class implementation of toRules munges the ID into
|
||||
// the correct format. That is: foo => ::foo
|
||||
// KEEP in sync with rbt_pars
|
||||
|
@ -860,13 +900,13 @@ public abstract class Transliterator {
|
|||
* 'result' at which the adoptedSplitTrans is stored, or -1 if
|
||||
* adoptedSplitTrans == 0
|
||||
*/
|
||||
private static void parseCompoundID(String id,
|
||||
StringBuffer regenID,
|
||||
int dir,
|
||||
int idSplitPoint,
|
||||
Transliterator adoptedSplitTrans,
|
||||
Vector result,
|
||||
int[] splitTransIndex) {
|
||||
static void parseCompoundID(String id,
|
||||
StringBuffer regenID,
|
||||
int dir,
|
||||
int idSplitPoint,
|
||||
Transliterator splitTrans,
|
||||
Vector result,
|
||||
int[] splitTransIndex) {
|
||||
regenID.setLength(0);
|
||||
splitTransIndex[0] = -1;
|
||||
int pos = 0;
|
||||
|
@ -874,10 +914,10 @@ public abstract class Transliterator {
|
|||
while (pos < id.length()) {
|
||||
// We compare (pos >= split), not (pos == split), so we can
|
||||
// skip over whitespace (see below).
|
||||
if (pos >= idSplitPoint && adoptedSplitTrans != null) {
|
||||
if (pos >= idSplitPoint && splitTrans != null) {
|
||||
splitTransIndex[0] = result.size();
|
||||
result.addElement(adoptedSplitTrans);
|
||||
adoptedSplitTrans = null;
|
||||
result.addElement(splitTrans);
|
||||
splitTrans = null;
|
||||
}
|
||||
int[] p = new int[] { pos };
|
||||
boolean[] sawDelimiter = new boolean[1];
|
||||
|
@ -903,10 +943,10 @@ public abstract class Transliterator {
|
|||
}
|
||||
|
||||
// Handle case of idSplitPoint == id.length()
|
||||
if (pos >= idSplitPoint && adoptedSplitTrans != null) {
|
||||
if (pos >= idSplitPoint && splitTrans != null) {
|
||||
splitTransIndex[0] = result.size();
|
||||
result.addElement(adoptedSplitTrans);
|
||||
adoptedSplitTrans = null;
|
||||
result.addElement(splitTrans);
|
||||
splitTrans = null;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -941,12 +981,12 @@ public abstract class Transliterator {
|
|||
* determine if there was an error. Instead, check to see if pos
|
||||
* moved.
|
||||
*/
|
||||
private static Transliterator parseID(String ID,
|
||||
StringBuffer regenID,
|
||||
int[] pos,
|
||||
boolean[] sawDelimiter,
|
||||
int dir,
|
||||
boolean create) {
|
||||
static Transliterator parseID(String ID,
|
||||
StringBuffer regenID,
|
||||
int[] pos,
|
||||
boolean[] sawDelimiter,
|
||||
int dir,
|
||||
boolean create) {
|
||||
int limit, preDelimLimit,
|
||||
revStart, revLimit=0,
|
||||
idStart, idLimit,
|
||||
|
|
Loading…
Add table
Reference in a new issue