mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-19 11:45:45 +00:00
ICU-74 finish port of Source-Target/Variant code incl. TransliteratorRegistry and tests
X-SVN-Rev: 6084
This commit is contained in:
parent
50408eca3a
commit
5746e4c2fc
8 changed files with 2030 additions and 2244 deletions
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/translit/TransliteratorTest.java,v $
|
||||
* $Date: 2001/10/05 06:30:38 $
|
||||
* $Revision: 1.49 $
|
||||
* $Date: 2001/10/05 18:16:59 $
|
||||
* $Revision: 1.50 $
|
||||
*
|
||||
*****************************************************************************************
|
||||
*/
|
||||
|
@ -1204,7 +1204,77 @@ public class TransliteratorTest extends TestFmwk {
|
|||
"bb x xb");
|
||||
}
|
||||
|
||||
public void TestSTV_TODO() {
|
||||
static class TestFact implements Transliterator.Factory {
|
||||
static class NameableNullTrans extends NullTransliterator {
|
||||
public NameableNullTrans(String id) {
|
||||
setID(id);
|
||||
}
|
||||
};
|
||||
String id;
|
||||
public TestFact(String theID) {
|
||||
id = theID;
|
||||
}
|
||||
public Transliterator getInstance() {
|
||||
return new NameableNullTrans(id);
|
||||
}
|
||||
};
|
||||
|
||||
public void TestSTV() {
|
||||
Enumeration es = Transliterator.getAvailableSources();
|
||||
for (int i=0; es.hasMoreElements(); ++i) {
|
||||
String source = (String) es.nextElement();
|
||||
logln("" + i + ": " + source);
|
||||
if (source.length() == 0) {
|
||||
errln("FAIL: empty source");
|
||||
continue;
|
||||
}
|
||||
Enumeration et = Transliterator.getAvailableTargets(source);
|
||||
for (int j=0; et.hasMoreElements(); ++j) {
|
||||
String target = (String) et.nextElement();
|
||||
logln(" " + j + ": " + target);
|
||||
if (target.length() == 0) {
|
||||
errln("FAIL: empty target");
|
||||
continue;
|
||||
}
|
||||
Enumeration ev = Transliterator.getAvailableVariants(source, target);
|
||||
for (int k=0; ev.hasMoreElements(); ++k) {
|
||||
String variant = (String) ev.nextElement();
|
||||
if (variant.length() == 0) {
|
||||
logln(" " + k + ": <empty>");
|
||||
} else {
|
||||
logln(" " + k + ": " + variant);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Test registration
|
||||
String[] IDS = { "Fieruwer", "Seoridf-Sweorie", "Oewoir-Oweri/Vsie" };
|
||||
for (int i=0; i<3; ++i) {
|
||||
Transliterator.registerFactory(IDS[i], new TestFact(IDS[i]));
|
||||
try {
|
||||
Transliterator t = Transliterator.getInstance(IDS[i]);
|
||||
if (t.getID().equals(IDS[i])) {
|
||||
logln("Ok: Registration/creation succeeded for ID " +
|
||||
IDS[i]);
|
||||
} else {
|
||||
errln("FAIL: Registration of ID " +
|
||||
IDS[i] + " creates ID " + t.getID());
|
||||
}
|
||||
Transliterator.unregister(IDS[i]);
|
||||
try {
|
||||
t = Transliterator.getInstance(IDS[i]);
|
||||
errln("FAIL: Unregistration failed for ID " +
|
||||
IDS[i] + "; still receiving ID " + t.getID());
|
||||
} catch (IllegalArgumentException e2) {
|
||||
// Good; this is what we expect
|
||||
logln("Ok; Unregistered " + IDS[i]);
|
||||
}
|
||||
} catch (IllegalArgumentException e) {
|
||||
errln("FAIL: Registration/creation failed for ID " +
|
||||
IDS[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -1227,6 +1297,16 @@ public class TransliteratorTest extends TestFmwk {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test NFD chaining with RBT
|
||||
*/
|
||||
public void TestNFDChainRBT() {
|
||||
Transliterator t = Transliterator.createFromRules(
|
||||
"TEST", "::NFD; aa > Q; a > q;",
|
||||
Transliterator.FORWARD);
|
||||
expect(t, "aa", "Q");
|
||||
}
|
||||
|
||||
//======================================================================
|
||||
// icu4j only
|
||||
//======================================================================
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/RuleBasedTransliterator.java,v $
|
||||
* $Date: 2001/10/03 00:14:22 $
|
||||
* $Revision: 1.47 $
|
||||
* $Date: 2001/10/05 18:15:54 $
|
||||
* $Revision: 1.48 $
|
||||
*
|
||||
*****************************************************************************************
|
||||
*/
|
||||
|
@ -279,7 +279,7 @@ import com.ibm.text.resources.ResourceReader;
|
|||
* <p>Copyright (c) IBM Corporation 1999-2000. All rights reserved.</p>
|
||||
*
|
||||
* @author Alan Liu
|
||||
* @version $RCSfile: RuleBasedTransliterator.java,v $ $Revision: 1.47 $ $Date: 2001/10/03 00:14:22 $
|
||||
* @version $RCSfile: RuleBasedTransliterator.java,v $ $Revision: 1.48 $ $Date: 2001/10/05 18:15:54 $
|
||||
*/
|
||||
public class RuleBasedTransliterator extends Transliterator {
|
||||
|
||||
|
@ -352,7 +352,35 @@ public class RuleBasedTransliterator extends Transliterator {
|
|||
int direction,
|
||||
StringBuffer idBlockResult,
|
||||
int[] idSplitPointResult) {
|
||||
TransliteratorParser parser = new TransliteratorParser(new String[] { rules }, direction);
|
||||
return _parse(new TransliteratorParser(new String[] { rules }, direction),
|
||||
idBlockResult, idSplitPointResult);
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse a given set of rules. Return up to three pieces of
|
||||
* parsed data. These are the header ::id block, the rule block,
|
||||
* and the footer ::id block. Any or all of these may be empty.
|
||||
* If the ::id blocks are empty, their corresponding parameters
|
||||
* are returned as the empty string. If there are no rules, the
|
||||
* TransliterationRuleData result is 0.
|
||||
* @param ruleDataResult caller owns the pointer stored here.
|
||||
* May be NULL.
|
||||
* @param headerRule string including semicolons for the header
|
||||
* ::id block. May be empty.
|
||||
* @param footerRule string including semicolons for the footer
|
||||
* ::id block. May be empty.
|
||||
*/
|
||||
static Data parse(ResourceReader rules,
|
||||
int direction,
|
||||
StringBuffer idBlockResult,
|
||||
int[] idSplitPointResult) {
|
||||
return _parse(new TransliteratorParser(rules, direction),
|
||||
idBlockResult, idSplitPointResult);
|
||||
}
|
||||
|
||||
static Data _parse(TransliteratorParser parser,
|
||||
StringBuffer idBlockResult,
|
||||
int[] idSplitPointResult) {
|
||||
idBlockResult.setLength(0);
|
||||
idBlockResult.append(parser.idBlock);
|
||||
idSplitPointResult[0] = parser.idSplitPoint;
|
||||
|
@ -498,6 +526,9 @@ public class RuleBasedTransliterator extends Transliterator {
|
|||
|
||||
/**
|
||||
* $Log: RuleBasedTransliterator.java,v $
|
||||
* Revision 1.48 2001/10/05 18:15:54 alan
|
||||
* jitterbug 74: finish port of Source-Target/Variant code incl. TransliteratorRegistry and tests
|
||||
*
|
||||
* Revision 1.47 2001/10/03 00:14:22 alan
|
||||
* jitterbug 73: finish quantifier and supplemental char support
|
||||
*
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/Transliterator.java,v $
|
||||
* $Date: 2001/10/03 16:26:50 $
|
||||
* $Revision: 1.45 $
|
||||
* $Date: 2001/10/05 18:15:54 $
|
||||
* $Revision: 1.46 $
|
||||
*
|
||||
*****************************************************************************************
|
||||
*/
|
||||
|
@ -241,7 +241,7 @@ import com.ibm.util.CaseInsensitiveString;
|
|||
* <p>Copyright © IBM Corporation 1999. All rights reserved.
|
||||
*
|
||||
* @author Alan Liu
|
||||
* @version $RCSfile: Transliterator.java,v $ $Revision: 1.45 $ $Date: 2001/10/03 16:26:50 $
|
||||
* @version $RCSfile: Transliterator.java,v $ $Revision: 1.46 $ $Date: 2001/10/05 18:15:54 $
|
||||
*/
|
||||
public abstract class Transliterator {
|
||||
/**
|
||||
|
@ -348,44 +348,12 @@ public abstract class Transliterator {
|
|||
private int maximumContextLength = 0;
|
||||
|
||||
/**
|
||||
* Cache of system transliterators. Keys are <code>String</code>
|
||||
* names, values are one of the following:
|
||||
*
|
||||
* <ul><li><code>String</code> objects. These represent
|
||||
* RuleBasedTransliterators that have not been loaded yet, or
|
||||
* aliases. The first character determines the type: 'f'
|
||||
* indicates a FORWARD RBT, with the rest of the string giving the
|
||||
* resource name and encoding, separated by a colon. 'r' is
|
||||
* similar, but indicates a REVERSE RBT. 'a' indicates an alias,
|
||||
* with the rest of the string giving the ID to create.
|
||||
*
|
||||
* <li><code>Class</code> objects. Such objects must represent
|
||||
* subclasses of <code>Transliterator</code>, and must satisfy the
|
||||
* constraints described in <code>registerClass()</code>.
|
||||
*
|
||||
* <li><code>RuleBasedTransliterator.Data</code> objects. These
|
||||
* are built in-memory transliterator data cores that are wrapped
|
||||
* thinly to create RuleBasedTransliterator objects. When an RBT
|
||||
* is created, its Data core is cached and shared among future
|
||||
* instances of the same ID.
|
||||
* </ul>
|
||||
* System transliterator registry.
|
||||
*/
|
||||
private static Hashtable cache;
|
||||
|
||||
/**
|
||||
* Identical to 'cache' but contains internal transliterators.
|
||||
* These are not enumerated by getAvailableIDs().
|
||||
*/
|
||||
private static Hashtable internalCache;
|
||||
private static TransliteratorRegistry registry;
|
||||
|
||||
private static Hashtable displayNameCache;
|
||||
|
||||
// TODO Add documentation
|
||||
// TODO Add documentation
|
||||
// TODO Add documentation
|
||||
// TODO Add documentation
|
||||
private static TransliteratorRegistry registry;
|
||||
|
||||
/**
|
||||
* Prefix for resource bundle key for the display name for a
|
||||
* transliterator. The ID is appended to this to form the key.
|
||||
|
@ -981,9 +949,8 @@ public abstract class Transliterator {
|
|||
// idBlock and data -- this is a compound
|
||||
// RBT
|
||||
Transliterator t = new RuleBasedTransliterator("_", data, null);
|
||||
t = new CompoundTransliterator(ID, idBlock.toString(), idSplitPoint[0],
|
||||
t);
|
||||
return t;
|
||||
return new CompoundTransliterator(ID, idBlock.toString(), idSplitPoint[0],
|
||||
t);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1040,14 +1007,7 @@ public abstract class Transliterator {
|
|||
parseID(id, regenID, p, sawDelimiter, dir, true);
|
||||
|
||||
if (p[0] == pos || (p[0] < id.length() && !sawDelimiter[0])) {
|
||||
// TODO
|
||||
//throw new IllegalArgumentException("Invalid ID " + id);
|
||||
throw new IllegalArgumentException("Invalid ID " + id +
|
||||
" p[0]=" + p[0] +
|
||||
" pos=" + pos +
|
||||
" id.length()=" + id.length() +
|
||||
" sawDelimite[0]=" + sawDelimiter[0] +
|
||||
"");
|
||||
throw new IllegalArgumentException("Invalid ID " + id);
|
||||
}
|
||||
pos = p[0];
|
||||
// The return value may be NULL when, for instance, creating a
|
||||
|
@ -1394,26 +1354,6 @@ public abstract class Transliterator {
|
|||
return pos;
|
||||
}
|
||||
|
||||
// TODO Remove remove remove
|
||||
// TODO Remove remove remove
|
||||
// TODO Remove remove remove
|
||||
// TODO Remove remove remove
|
||||
// TODO Remove remove remove
|
||||
// TODO Remove remove remove
|
||||
// TODO Remove remove remove
|
||||
// TODO Remove remove remove
|
||||
// TODO Remove remove remove
|
||||
// TODO Remove remove remove
|
||||
// TODO Remove remove remove
|
||||
static Transliterator tempGet(String id, StringBuffer aliasReturn) {
|
||||
aliasReturn.setLength(0);
|
||||
if (id.equalsIgnoreCase(NullTransliterator.SHORT_ID)) {
|
||||
id = NullTransliterator._ID;
|
||||
// Temporary hack to make this work
|
||||
}
|
||||
return internalGetInstance(id);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns this transliterator's inverse. See the class
|
||||
* documentation for details. This implementation simply inverts
|
||||
|
@ -1438,116 +1378,6 @@ public abstract class Transliterator {
|
|||
return getInstance(ID, REVERSE);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a transliterator object given its ID. Unlike getInstance(),
|
||||
* this method returns null if it cannot make use of the given ID.
|
||||
*/
|
||||
private static Transliterator internalGetInstance(String ID) {
|
||||
RuleBasedTransliterator.Data data = null;
|
||||
Hashtable sourceCache = cache;
|
||||
CaseInsensitiveString ciID = new CaseInsensitiveString(ID);
|
||||
Object obj = cache.get(ciID);
|
||||
if (obj == null) {
|
||||
obj = internalCache.get(ciID);
|
||||
sourceCache = internalCache;
|
||||
}
|
||||
|
||||
if (obj != null) {
|
||||
if (obj instanceof RuleBasedTransliterator.Data) {
|
||||
data = (RuleBasedTransliterator.Data) obj;
|
||||
// Fall through to construct transliterator from cached Data object.
|
||||
} else if (obj instanceof Class) {
|
||||
try {
|
||||
return (Transliterator) ((Class) obj).newInstance();
|
||||
} catch (InstantiationException e) {
|
||||
} catch (IllegalAccessException e2) {}
|
||||
} else if (obj instanceof Factory) {
|
||||
return ((Factory) obj).getInstance();
|
||||
} else if (obj instanceof String) {
|
||||
String spec = (String) obj;
|
||||
if (spec.charAt(0) == 'a') {
|
||||
// alias
|
||||
Transliterator t = getInstance(spec.substring(1));
|
||||
t.ID = ID;
|
||||
return t;
|
||||
} else {
|
||||
synchronized (cache) {
|
||||
// file, either forward or reverse
|
||||
int dir = (spec.charAt(0) == 'f') ? FORWARD:REVERSE;
|
||||
int colon = spec.indexOf(':', 1);
|
||||
String resourceName = spec.substring(1, colon);
|
||||
String encoding = spec.substring(colon+1);
|
||||
ResourceReader r = null;
|
||||
try {
|
||||
r = new ResourceReader(resourceName, encoding);
|
||||
} catch (UnsupportedEncodingException e) {
|
||||
// This should never happen; UTF8 is always supported
|
||||
} catch (IllegalArgumentException e2) {
|
||||
// Can't load UTF8 file
|
||||
}
|
||||
|
||||
if (r != null) {
|
||||
data = RuleBasedTransliterator.parse(r, dir);
|
||||
sourceCache.put(ciID, data);
|
||||
// Fall through to construct transliterator from Data object.
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
throw new RuntimeException("Bogus cache object");
|
||||
}
|
||||
|
||||
if (data != null) {
|
||||
return new RuleBasedTransliterator(ID, data, null);
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
// Currently unused, but may be of use in the future.
|
||||
// /**
|
||||
// * Find a path through the composed transliterator graph. This
|
||||
// * will not necessarily be the only path, or the shortest path.
|
||||
// * This is a simple recursive algorithm.
|
||||
// *
|
||||
// * <p><code>composedGraph</code> is the links table.
|
||||
// * composedGraph.get(x) should return a String[] array, each of
|
||||
// * which is a node that x is connected to.
|
||||
// * @param start the starting node
|
||||
// * @param end the ending node
|
||||
// * @param path the result vector; should be empty on entry. Upon
|
||||
// * success, it will contain successive nodes on the path from
|
||||
// * start to end, including start and end. If false is returned,
|
||||
// * then path is unchanged.
|
||||
// * @return true if a path from start to end is found
|
||||
// */
|
||||
// private static boolean findComposedPath(String start, String end,
|
||||
// Vector path) {
|
||||
// path.addElement(start);
|
||||
// // composedGraph lists all links emanating from a node
|
||||
// String[] links = (String[]) composedGraph.get(start);
|
||||
// if (links != null) {
|
||||
// for (int i=0; i<links.length; ++i) {
|
||||
// if (links[i].equals(end)) {
|
||||
// path.addElement(end);
|
||||
// return true;
|
||||
// }
|
||||
// }
|
||||
// for (int i=0; i<links.length; ++i) {
|
||||
// // Avoid cycles: ignore links already on our path
|
||||
// if (path.indexOf(links[i]) >= 0) {
|
||||
// continue;
|
||||
// }
|
||||
// if (findComposedPath(links[i], end, path)) {
|
||||
// return true;
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// path.removeElementAt(path.size() - 1);
|
||||
// return false;
|
||||
// }
|
||||
|
||||
/**
|
||||
* Registers a subclass of <code>Transliterator</code> with the
|
||||
* system. This subclass must have a public constructor taking no
|
||||
|
@ -1561,7 +1391,7 @@ public abstract class Transliterator {
|
|||
* @see #unregister
|
||||
*/
|
||||
public static void registerClass(String ID, Class transClass, String displayName) {
|
||||
cache.put(new CaseInsensitiveString(ID), transClass);
|
||||
registry.put(ID, transClass, true);
|
||||
if (displayName != null) {
|
||||
displayNameCache.put(new CaseInsensitiveString(ID), displayName);
|
||||
}
|
||||
|
@ -1574,7 +1404,7 @@ public abstract class Transliterator {
|
|||
* @param factory the factory object
|
||||
*/
|
||||
public static void registerFactory(String ID, Factory factory) {
|
||||
cache.put(new CaseInsensitiveString(ID), factory);
|
||||
registry.put(ID, factory, true);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -1582,36 +1412,13 @@ public abstract class Transliterator {
|
|||
* a system transliterator or a user transliterator or class.
|
||||
*
|
||||
* @param ID the ID of the transliterator or class
|
||||
* @return the <code>Object</code> that was registered with
|
||||
* <code>ID</code>, or <code>null</code> if none was
|
||||
* @see #registerClass
|
||||
*/
|
||||
public static Object unregister(String ID) {
|
||||
CaseInsensitiveString ciID = new CaseInsensitiveString(ID);
|
||||
displayNameCache.remove(ciID);
|
||||
return cache.remove(ciID);
|
||||
public static void unregister(String ID) {
|
||||
displayNameCache.remove(new CaseInsensitiveString(ID));
|
||||
registry.remove(ID);
|
||||
}
|
||||
|
||||
/**
|
||||
* An internal class that adapts an enumeration over
|
||||
* CaseInsensitiveStrings to an enumeration over Strings.
|
||||
*/
|
||||
private static class IDEnumeration implements Enumeration {
|
||||
Enumeration enum;
|
||||
|
||||
public IDEnumeration(Enumeration e) {
|
||||
enum = e;
|
||||
}
|
||||
|
||||
public boolean hasMoreElements() {
|
||||
return enum.hasMoreElements();
|
||||
}
|
||||
|
||||
public Object nextElement() {
|
||||
return ((CaseInsensitiveString) enum.nextElement()).getString();
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Returns an enumeration over the programmatic names of registered
|
||||
* <code>Transliterator</code> objects. This includes both system
|
||||
|
@ -1624,9 +1431,20 @@ public abstract class Transliterator {
|
|||
* @see #registerClass
|
||||
*/
|
||||
public static final Enumeration getAvailableIDs() {
|
||||
// Since the cache contains CaseInsensitiveString objects, but
|
||||
// the caller expects Strings, we have to use an intermediary.
|
||||
return new IDEnumeration(cache.keys());
|
||||
return registry.getAvailableIDs();
|
||||
}
|
||||
|
||||
public static final Enumeration getAvailableSources() {
|
||||
return registry.getAvailableSources();
|
||||
}
|
||||
|
||||
public static final Enumeration getAvailableTargets(String source) {
|
||||
return registry.getAvailableTargets(source);
|
||||
}
|
||||
|
||||
public static final Enumeration getAvailableVariants(String source,
|
||||
String target) {
|
||||
return registry.getAvailableVariants(source, target);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -1651,21 +1469,18 @@ public abstract class Transliterator {
|
|||
}
|
||||
|
||||
static {
|
||||
// TODO FINISH
|
||||
registry = new TransliteratorRegistry();
|
||||
|
||||
// The display name cache starts out empty
|
||||
displayNameCache = new Hashtable();
|
||||
|
||||
// Read the index file and construct the cache/internalCache.
|
||||
// Read the index file and populate the registry.
|
||||
// Each line of the index file is either blank, a '#' comment,
|
||||
// or a colon-delimited line. In the latter case the first
|
||||
// field is the ID being defined. The second field is one of
|
||||
// three strings: "file", "internal", or "alias". Remaining
|
||||
// fields vary according the value fo the second field. See
|
||||
// the index file itself for further documentation.
|
||||
cache = new Hashtable();
|
||||
internalCache = new Hashtable();
|
||||
ResourceReader r = new ResourceReader("Transliterator_index.txt");
|
||||
for (;;) {
|
||||
String line = null;
|
||||
|
@ -1693,19 +1508,21 @@ public abstract class Transliterator {
|
|||
String type = line.substring(pos, colon);
|
||||
pos = colon+1;
|
||||
|
||||
CaseInsensitiveString ciID = new CaseInsensitiveString(ID);
|
||||
if (type.equals("file") || type.equals("internal")) {
|
||||
// Rest of line is <resource>:<encoding>:<direction>
|
||||
// pos colon c2
|
||||
colon = line.indexOf(':', pos);
|
||||
colon = line.indexOf(':', colon+1); // skip over 1 colon
|
||||
String fileNameAndEncoding = line.substring(pos, colon);
|
||||
pos = colon+1;
|
||||
boolean isForward = line.substring(pos).equals("FORWARD");
|
||||
Hashtable h = type.equals("internal") ? internalCache:cache;
|
||||
h.put(ciID, (isForward ? "f" : "r") + fileNameAndEncoding);
|
||||
int c2 = line.indexOf(':', colon+1);
|
||||
int dir = line.substring(c2+1).equals("FORWARD") ?
|
||||
FORWARD : REVERSE;
|
||||
registry.put(ID,
|
||||
line.substring(pos, colon), // resource
|
||||
line.substring(colon+1, c2), // encoding
|
||||
dir,
|
||||
!type.equals("internal"));
|
||||
} else if (type.equals("alias")) {
|
||||
// Rest of line is the <getInstanceArg>
|
||||
cache.put(ciID, "a" + line.substring(pos));
|
||||
registry.put(ID, line.substring(pos), true);
|
||||
} else {
|
||||
// Unknown type
|
||||
throw new RuntimeException("Can't parse line: " + line);
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/test/translit/Attic/TransliteratorTest.java,v $
|
||||
* $Date: 2001/10/05 06:30:38 $
|
||||
* $Revision: 1.49 $
|
||||
* $Date: 2001/10/05 18:16:59 $
|
||||
* $Revision: 1.50 $
|
||||
*
|
||||
*****************************************************************************************
|
||||
*/
|
||||
|
@ -1204,7 +1204,77 @@ public class TransliteratorTest extends TestFmwk {
|
|||
"bb x xb");
|
||||
}
|
||||
|
||||
public void TestSTV_TODO() {
|
||||
static class TestFact implements Transliterator.Factory {
|
||||
static class NameableNullTrans extends NullTransliterator {
|
||||
public NameableNullTrans(String id) {
|
||||
setID(id);
|
||||
}
|
||||
};
|
||||
String id;
|
||||
public TestFact(String theID) {
|
||||
id = theID;
|
||||
}
|
||||
public Transliterator getInstance() {
|
||||
return new NameableNullTrans(id);
|
||||
}
|
||||
};
|
||||
|
||||
public void TestSTV() {
|
||||
Enumeration es = Transliterator.getAvailableSources();
|
||||
for (int i=0; es.hasMoreElements(); ++i) {
|
||||
String source = (String) es.nextElement();
|
||||
logln("" + i + ": " + source);
|
||||
if (source.length() == 0) {
|
||||
errln("FAIL: empty source");
|
||||
continue;
|
||||
}
|
||||
Enumeration et = Transliterator.getAvailableTargets(source);
|
||||
for (int j=0; et.hasMoreElements(); ++j) {
|
||||
String target = (String) et.nextElement();
|
||||
logln(" " + j + ": " + target);
|
||||
if (target.length() == 0) {
|
||||
errln("FAIL: empty target");
|
||||
continue;
|
||||
}
|
||||
Enumeration ev = Transliterator.getAvailableVariants(source, target);
|
||||
for (int k=0; ev.hasMoreElements(); ++k) {
|
||||
String variant = (String) ev.nextElement();
|
||||
if (variant.length() == 0) {
|
||||
logln(" " + k + ": <empty>");
|
||||
} else {
|
||||
logln(" " + k + ": " + variant);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Test registration
|
||||
String[] IDS = { "Fieruwer", "Seoridf-Sweorie", "Oewoir-Oweri/Vsie" };
|
||||
for (int i=0; i<3; ++i) {
|
||||
Transliterator.registerFactory(IDS[i], new TestFact(IDS[i]));
|
||||
try {
|
||||
Transliterator t = Transliterator.getInstance(IDS[i]);
|
||||
if (t.getID().equals(IDS[i])) {
|
||||
logln("Ok: Registration/creation succeeded for ID " +
|
||||
IDS[i]);
|
||||
} else {
|
||||
errln("FAIL: Registration of ID " +
|
||||
IDS[i] + " creates ID " + t.getID());
|
||||
}
|
||||
Transliterator.unregister(IDS[i]);
|
||||
try {
|
||||
t = Transliterator.getInstance(IDS[i]);
|
||||
errln("FAIL: Unregistration failed for ID " +
|
||||
IDS[i] + "; still receiving ID " + t.getID());
|
||||
} catch (IllegalArgumentException e2) {
|
||||
// Good; this is what we expect
|
||||
logln("Ok; Unregistered " + IDS[i]);
|
||||
}
|
||||
} catch (IllegalArgumentException e) {
|
||||
errln("FAIL: Registration/creation failed for ID " +
|
||||
IDS[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -1227,6 +1297,16 @@ public class TransliteratorTest extends TestFmwk {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test NFD chaining with RBT
|
||||
*/
|
||||
public void TestNFDChainRBT() {
|
||||
Transliterator t = Transliterator.createFromRules(
|
||||
"TEST", "::NFD; aa > Q; a > q;",
|
||||
Transliterator.FORWARD);
|
||||
expect(t, "aa", "Q");
|
||||
}
|
||||
|
||||
//======================================================================
|
||||
// icu4j only
|
||||
//======================================================================
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/text/Attic/RuleBasedTransliterator.java,v $
|
||||
* $Date: 2001/10/03 00:14:22 $
|
||||
* $Revision: 1.47 $
|
||||
* $Date: 2001/10/05 18:15:54 $
|
||||
* $Revision: 1.48 $
|
||||
*
|
||||
*****************************************************************************************
|
||||
*/
|
||||
|
@ -279,7 +279,7 @@ import com.ibm.text.resources.ResourceReader;
|
|||
* <p>Copyright (c) IBM Corporation 1999-2000. All rights reserved.</p>
|
||||
*
|
||||
* @author Alan Liu
|
||||
* @version $RCSfile: RuleBasedTransliterator.java,v $ $Revision: 1.47 $ $Date: 2001/10/03 00:14:22 $
|
||||
* @version $RCSfile: RuleBasedTransliterator.java,v $ $Revision: 1.48 $ $Date: 2001/10/05 18:15:54 $
|
||||
*/
|
||||
public class RuleBasedTransliterator extends Transliterator {
|
||||
|
||||
|
@ -352,7 +352,35 @@ public class RuleBasedTransliterator extends Transliterator {
|
|||
int direction,
|
||||
StringBuffer idBlockResult,
|
||||
int[] idSplitPointResult) {
|
||||
TransliteratorParser parser = new TransliteratorParser(new String[] { rules }, direction);
|
||||
return _parse(new TransliteratorParser(new String[] { rules }, direction),
|
||||
idBlockResult, idSplitPointResult);
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse a given set of rules. Return up to three pieces of
|
||||
* parsed data. These are the header ::id block, the rule block,
|
||||
* and the footer ::id block. Any or all of these may be empty.
|
||||
* If the ::id blocks are empty, their corresponding parameters
|
||||
* are returned as the empty string. If there are no rules, the
|
||||
* TransliterationRuleData result is 0.
|
||||
* @param ruleDataResult caller owns the pointer stored here.
|
||||
* May be NULL.
|
||||
* @param headerRule string including semicolons for the header
|
||||
* ::id block. May be empty.
|
||||
* @param footerRule string including semicolons for the footer
|
||||
* ::id block. May be empty.
|
||||
*/
|
||||
static Data parse(ResourceReader rules,
|
||||
int direction,
|
||||
StringBuffer idBlockResult,
|
||||
int[] idSplitPointResult) {
|
||||
return _parse(new TransliteratorParser(rules, direction),
|
||||
idBlockResult, idSplitPointResult);
|
||||
}
|
||||
|
||||
static Data _parse(TransliteratorParser parser,
|
||||
StringBuffer idBlockResult,
|
||||
int[] idSplitPointResult) {
|
||||
idBlockResult.setLength(0);
|
||||
idBlockResult.append(parser.idBlock);
|
||||
idSplitPointResult[0] = parser.idSplitPoint;
|
||||
|
@ -498,6 +526,9 @@ public class RuleBasedTransliterator extends Transliterator {
|
|||
|
||||
/**
|
||||
* $Log: RuleBasedTransliterator.java,v $
|
||||
* Revision 1.48 2001/10/05 18:15:54 alan
|
||||
* jitterbug 74: finish port of Source-Target/Variant code incl. TransliteratorRegistry and tests
|
||||
*
|
||||
* Revision 1.47 2001/10/03 00:14:22 alan
|
||||
* jitterbug 73: finish quantifier and supplemental char support
|
||||
*
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/text/Attic/Transliterator.java,v $
|
||||
* $Date: 2001/10/03 16:26:50 $
|
||||
* $Revision: 1.45 $
|
||||
* $Date: 2001/10/05 18:15:54 $
|
||||
* $Revision: 1.46 $
|
||||
*
|
||||
*****************************************************************************************
|
||||
*/
|
||||
|
@ -241,7 +241,7 @@ import com.ibm.util.CaseInsensitiveString;
|
|||
* <p>Copyright © IBM Corporation 1999. All rights reserved.
|
||||
*
|
||||
* @author Alan Liu
|
||||
* @version $RCSfile: Transliterator.java,v $ $Revision: 1.45 $ $Date: 2001/10/03 16:26:50 $
|
||||
* @version $RCSfile: Transliterator.java,v $ $Revision: 1.46 $ $Date: 2001/10/05 18:15:54 $
|
||||
*/
|
||||
public abstract class Transliterator {
|
||||
/**
|
||||
|
@ -348,44 +348,12 @@ public abstract class Transliterator {
|
|||
private int maximumContextLength = 0;
|
||||
|
||||
/**
|
||||
* Cache of system transliterators. Keys are <code>String</code>
|
||||
* names, values are one of the following:
|
||||
*
|
||||
* <ul><li><code>String</code> objects. These represent
|
||||
* RuleBasedTransliterators that have not been loaded yet, or
|
||||
* aliases. The first character determines the type: 'f'
|
||||
* indicates a FORWARD RBT, with the rest of the string giving the
|
||||
* resource name and encoding, separated by a colon. 'r' is
|
||||
* similar, but indicates a REVERSE RBT. 'a' indicates an alias,
|
||||
* with the rest of the string giving the ID to create.
|
||||
*
|
||||
* <li><code>Class</code> objects. Such objects must represent
|
||||
* subclasses of <code>Transliterator</code>, and must satisfy the
|
||||
* constraints described in <code>registerClass()</code>.
|
||||
*
|
||||
* <li><code>RuleBasedTransliterator.Data</code> objects. These
|
||||
* are built in-memory transliterator data cores that are wrapped
|
||||
* thinly to create RuleBasedTransliterator objects. When an RBT
|
||||
* is created, its Data core is cached and shared among future
|
||||
* instances of the same ID.
|
||||
* </ul>
|
||||
* System transliterator registry.
|
||||
*/
|
||||
private static Hashtable cache;
|
||||
|
||||
/**
|
||||
* Identical to 'cache' but contains internal transliterators.
|
||||
* These are not enumerated by getAvailableIDs().
|
||||
*/
|
||||
private static Hashtable internalCache;
|
||||
private static TransliteratorRegistry registry;
|
||||
|
||||
private static Hashtable displayNameCache;
|
||||
|
||||
// TODO Add documentation
|
||||
// TODO Add documentation
|
||||
// TODO Add documentation
|
||||
// TODO Add documentation
|
||||
private static TransliteratorRegistry registry;
|
||||
|
||||
/**
|
||||
* Prefix for resource bundle key for the display name for a
|
||||
* transliterator. The ID is appended to this to form the key.
|
||||
|
@ -981,9 +949,8 @@ public abstract class Transliterator {
|
|||
// idBlock and data -- this is a compound
|
||||
// RBT
|
||||
Transliterator t = new RuleBasedTransliterator("_", data, null);
|
||||
t = new CompoundTransliterator(ID, idBlock.toString(), idSplitPoint[0],
|
||||
t);
|
||||
return t;
|
||||
return new CompoundTransliterator(ID, idBlock.toString(), idSplitPoint[0],
|
||||
t);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1040,14 +1007,7 @@ public abstract class Transliterator {
|
|||
parseID(id, regenID, p, sawDelimiter, dir, true);
|
||||
|
||||
if (p[0] == pos || (p[0] < id.length() && !sawDelimiter[0])) {
|
||||
// TODO
|
||||
//throw new IllegalArgumentException("Invalid ID " + id);
|
||||
throw new IllegalArgumentException("Invalid ID " + id +
|
||||
" p[0]=" + p[0] +
|
||||
" pos=" + pos +
|
||||
" id.length()=" + id.length() +
|
||||
" sawDelimite[0]=" + sawDelimiter[0] +
|
||||
"");
|
||||
throw new IllegalArgumentException("Invalid ID " + id);
|
||||
}
|
||||
pos = p[0];
|
||||
// The return value may be NULL when, for instance, creating a
|
||||
|
@ -1394,26 +1354,6 @@ public abstract class Transliterator {
|
|||
return pos;
|
||||
}
|
||||
|
||||
// TODO Remove remove remove
|
||||
// TODO Remove remove remove
|
||||
// TODO Remove remove remove
|
||||
// TODO Remove remove remove
|
||||
// TODO Remove remove remove
|
||||
// TODO Remove remove remove
|
||||
// TODO Remove remove remove
|
||||
// TODO Remove remove remove
|
||||
// TODO Remove remove remove
|
||||
// TODO Remove remove remove
|
||||
// TODO Remove remove remove
|
||||
static Transliterator tempGet(String id, StringBuffer aliasReturn) {
|
||||
aliasReturn.setLength(0);
|
||||
if (id.equalsIgnoreCase(NullTransliterator.SHORT_ID)) {
|
||||
id = NullTransliterator._ID;
|
||||
// Temporary hack to make this work
|
||||
}
|
||||
return internalGetInstance(id);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns this transliterator's inverse. See the class
|
||||
* documentation for details. This implementation simply inverts
|
||||
|
@ -1438,116 +1378,6 @@ public abstract class Transliterator {
|
|||
return getInstance(ID, REVERSE);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a transliterator object given its ID. Unlike getInstance(),
|
||||
* this method returns null if it cannot make use of the given ID.
|
||||
*/
|
||||
private static Transliterator internalGetInstance(String ID) {
|
||||
RuleBasedTransliterator.Data data = null;
|
||||
Hashtable sourceCache = cache;
|
||||
CaseInsensitiveString ciID = new CaseInsensitiveString(ID);
|
||||
Object obj = cache.get(ciID);
|
||||
if (obj == null) {
|
||||
obj = internalCache.get(ciID);
|
||||
sourceCache = internalCache;
|
||||
}
|
||||
|
||||
if (obj != null) {
|
||||
if (obj instanceof RuleBasedTransliterator.Data) {
|
||||
data = (RuleBasedTransliterator.Data) obj;
|
||||
// Fall through to construct transliterator from cached Data object.
|
||||
} else if (obj instanceof Class) {
|
||||
try {
|
||||
return (Transliterator) ((Class) obj).newInstance();
|
||||
} catch (InstantiationException e) {
|
||||
} catch (IllegalAccessException e2) {}
|
||||
} else if (obj instanceof Factory) {
|
||||
return ((Factory) obj).getInstance();
|
||||
} else if (obj instanceof String) {
|
||||
String spec = (String) obj;
|
||||
if (spec.charAt(0) == 'a') {
|
||||
// alias
|
||||
Transliterator t = getInstance(spec.substring(1));
|
||||
t.ID = ID;
|
||||
return t;
|
||||
} else {
|
||||
synchronized (cache) {
|
||||
// file, either forward or reverse
|
||||
int dir = (spec.charAt(0) == 'f') ? FORWARD:REVERSE;
|
||||
int colon = spec.indexOf(':', 1);
|
||||
String resourceName = spec.substring(1, colon);
|
||||
String encoding = spec.substring(colon+1);
|
||||
ResourceReader r = null;
|
||||
try {
|
||||
r = new ResourceReader(resourceName, encoding);
|
||||
} catch (UnsupportedEncodingException e) {
|
||||
// This should never happen; UTF8 is always supported
|
||||
} catch (IllegalArgumentException e2) {
|
||||
// Can't load UTF8 file
|
||||
}
|
||||
|
||||
if (r != null) {
|
||||
data = RuleBasedTransliterator.parse(r, dir);
|
||||
sourceCache.put(ciID, data);
|
||||
// Fall through to construct transliterator from Data object.
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
throw new RuntimeException("Bogus cache object");
|
||||
}
|
||||
|
||||
if (data != null) {
|
||||
return new RuleBasedTransliterator(ID, data, null);
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
// Currently unused, but may be of use in the future.
|
||||
// /**
|
||||
// * Find a path through the composed transliterator graph. This
|
||||
// * will not necessarily be the only path, or the shortest path.
|
||||
// * This is a simple recursive algorithm.
|
||||
// *
|
||||
// * <p><code>composedGraph</code> is the links table.
|
||||
// * composedGraph.get(x) should return a String[] array, each of
|
||||
// * which is a node that x is connected to.
|
||||
// * @param start the starting node
|
||||
// * @param end the ending node
|
||||
// * @param path the result vector; should be empty on entry. Upon
|
||||
// * success, it will contain successive nodes on the path from
|
||||
// * start to end, including start and end. If false is returned,
|
||||
// * then path is unchanged.
|
||||
// * @return true if a path from start to end is found
|
||||
// */
|
||||
// private static boolean findComposedPath(String start, String end,
|
||||
// Vector path) {
|
||||
// path.addElement(start);
|
||||
// // composedGraph lists all links emanating from a node
|
||||
// String[] links = (String[]) composedGraph.get(start);
|
||||
// if (links != null) {
|
||||
// for (int i=0; i<links.length; ++i) {
|
||||
// if (links[i].equals(end)) {
|
||||
// path.addElement(end);
|
||||
// return true;
|
||||
// }
|
||||
// }
|
||||
// for (int i=0; i<links.length; ++i) {
|
||||
// // Avoid cycles: ignore links already on our path
|
||||
// if (path.indexOf(links[i]) >= 0) {
|
||||
// continue;
|
||||
// }
|
||||
// if (findComposedPath(links[i], end, path)) {
|
||||
// return true;
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// path.removeElementAt(path.size() - 1);
|
||||
// return false;
|
||||
// }
|
||||
|
||||
/**
|
||||
* Registers a subclass of <code>Transliterator</code> with the
|
||||
* system. This subclass must have a public constructor taking no
|
||||
|
@ -1561,7 +1391,7 @@ public abstract class Transliterator {
|
|||
* @see #unregister
|
||||
*/
|
||||
public static void registerClass(String ID, Class transClass, String displayName) {
|
||||
cache.put(new CaseInsensitiveString(ID), transClass);
|
||||
registry.put(ID, transClass, true);
|
||||
if (displayName != null) {
|
||||
displayNameCache.put(new CaseInsensitiveString(ID), displayName);
|
||||
}
|
||||
|
@ -1574,7 +1404,7 @@ public abstract class Transliterator {
|
|||
* @param factory the factory object
|
||||
*/
|
||||
public static void registerFactory(String ID, Factory factory) {
|
||||
cache.put(new CaseInsensitiveString(ID), factory);
|
||||
registry.put(ID, factory, true);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -1582,36 +1412,13 @@ public abstract class Transliterator {
|
|||
* a system transliterator or a user transliterator or class.
|
||||
*
|
||||
* @param ID the ID of the transliterator or class
|
||||
* @return the <code>Object</code> that was registered with
|
||||
* <code>ID</code>, or <code>null</code> if none was
|
||||
* @see #registerClass
|
||||
*/
|
||||
public static Object unregister(String ID) {
|
||||
CaseInsensitiveString ciID = new CaseInsensitiveString(ID);
|
||||
displayNameCache.remove(ciID);
|
||||
return cache.remove(ciID);
|
||||
public static void unregister(String ID) {
|
||||
displayNameCache.remove(new CaseInsensitiveString(ID));
|
||||
registry.remove(ID);
|
||||
}
|
||||
|
||||
/**
|
||||
* An internal class that adapts an enumeration over
|
||||
* CaseInsensitiveStrings to an enumeration over Strings.
|
||||
*/
|
||||
private static class IDEnumeration implements Enumeration {
|
||||
Enumeration enum;
|
||||
|
||||
public IDEnumeration(Enumeration e) {
|
||||
enum = e;
|
||||
}
|
||||
|
||||
public boolean hasMoreElements() {
|
||||
return enum.hasMoreElements();
|
||||
}
|
||||
|
||||
public Object nextElement() {
|
||||
return ((CaseInsensitiveString) enum.nextElement()).getString();
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Returns an enumeration over the programmatic names of registered
|
||||
* <code>Transliterator</code> objects. This includes both system
|
||||
|
@ -1624,9 +1431,20 @@ public abstract class Transliterator {
|
|||
* @see #registerClass
|
||||
*/
|
||||
public static final Enumeration getAvailableIDs() {
|
||||
// Since the cache contains CaseInsensitiveString objects, but
|
||||
// the caller expects Strings, we have to use an intermediary.
|
||||
return new IDEnumeration(cache.keys());
|
||||
return registry.getAvailableIDs();
|
||||
}
|
||||
|
||||
public static final Enumeration getAvailableSources() {
|
||||
return registry.getAvailableSources();
|
||||
}
|
||||
|
||||
public static final Enumeration getAvailableTargets(String source) {
|
||||
return registry.getAvailableTargets(source);
|
||||
}
|
||||
|
||||
public static final Enumeration getAvailableVariants(String source,
|
||||
String target) {
|
||||
return registry.getAvailableVariants(source, target);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -1651,21 +1469,18 @@ public abstract class Transliterator {
|
|||
}
|
||||
|
||||
static {
|
||||
// TODO FINISH
|
||||
registry = new TransliteratorRegistry();
|
||||
|
||||
// The display name cache starts out empty
|
||||
displayNameCache = new Hashtable();
|
||||
|
||||
// Read the index file and construct the cache/internalCache.
|
||||
// Read the index file and populate the registry.
|
||||
// Each line of the index file is either blank, a '#' comment,
|
||||
// or a colon-delimited line. In the latter case the first
|
||||
// field is the ID being defined. The second field is one of
|
||||
// three strings: "file", "internal", or "alias". Remaining
|
||||
// fields vary according the value fo the second field. See
|
||||
// the index file itself for further documentation.
|
||||
cache = new Hashtable();
|
||||
internalCache = new Hashtable();
|
||||
ResourceReader r = new ResourceReader("Transliterator_index.txt");
|
||||
for (;;) {
|
||||
String line = null;
|
||||
|
@ -1693,19 +1508,21 @@ public abstract class Transliterator {
|
|||
String type = line.substring(pos, colon);
|
||||
pos = colon+1;
|
||||
|
||||
CaseInsensitiveString ciID = new CaseInsensitiveString(ID);
|
||||
if (type.equals("file") || type.equals("internal")) {
|
||||
// Rest of line is <resource>:<encoding>:<direction>
|
||||
// pos colon c2
|
||||
colon = line.indexOf(':', pos);
|
||||
colon = line.indexOf(':', colon+1); // skip over 1 colon
|
||||
String fileNameAndEncoding = line.substring(pos, colon);
|
||||
pos = colon+1;
|
||||
boolean isForward = line.substring(pos).equals("FORWARD");
|
||||
Hashtable h = type.equals("internal") ? internalCache:cache;
|
||||
h.put(ciID, (isForward ? "f" : "r") + fileNameAndEncoding);
|
||||
int c2 = line.indexOf(':', colon+1);
|
||||
int dir = line.substring(c2+1).equals("FORWARD") ?
|
||||
FORWARD : REVERSE;
|
||||
registry.put(ID,
|
||||
line.substring(pos, colon), // resource
|
||||
line.substring(colon+1, c2), // encoding
|
||||
dir,
|
||||
!type.equals("internal"));
|
||||
} else if (type.equals("alias")) {
|
||||
// Rest of line is the <getInstanceArg>
|
||||
cache.put(ciID, "a" + line.substring(pos));
|
||||
registry.put(ID, line.substring(pos), true);
|
||||
} else {
|
||||
// Unknown type
|
||||
throw new RuntimeException("Can't parse line: " + line);
|
||||
|
|
File diff suppressed because it is too large
Load diff
Loading…
Add table
Reference in a new issue