mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-13 17:01:16 +00:00
fixed how the common characters between script runs were handled;
added registration for all scripts X-SVN-Rev: 8713
This commit is contained in:
parent
bdf50940f9
commit
5e952008c7
2 changed files with 108 additions and 46 deletions
icu4j/src/com/ibm/icu/dev/demo/translit
|
@ -2,10 +2,11 @@ package com.ibm.icu.dev.demo.translit;
|
|||
import com.ibm.icu.dev.demo.impl.*;
|
||||
import com.ibm.icu.lang.*;
|
||||
import com.ibm.icu.text.*;
|
||||
import java.util.*;
|
||||
|
||||
public class AnyTransliterator extends Transliterator {
|
||||
|
||||
static final boolean DEBUG = true;
|
||||
static final boolean DEBUG = false;
|
||||
private String targetName;
|
||||
private RunIterator it;
|
||||
private Position run;
|
||||
|
@ -105,13 +106,26 @@ public class AnyTransliterator extends Transliterator {
|
|||
public boolean atEnd();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a series of ranges corresponding to scripts. They will be of the form:
|
||||
* ccccSScSSccccTTcTcccc - where c is common, S is the first script and T is the second
|
||||
*| | - first run
|
||||
* | | - second run
|
||||
* That is, the runs will overlap. The reason for this is so that a transliterator can
|
||||
* consider common characters both before and after the scripts.
|
||||
* The only time that contextStart != start is for the first run
|
||||
* (the context is the start context of the entire expanse)
|
||||
* The only time that contextLimit != limit is for the last run
|
||||
* (the context is the end context of the entire expanse)
|
||||
*/
|
||||
public static class ScriptRunIterator implements RunIterator {
|
||||
Replaceable text;
|
||||
Position expanse = new Position();
|
||||
Position current = new Position();
|
||||
int script;
|
||||
boolean done = true;
|
||||
private Replaceable text;
|
||||
private Position expanse = new Position();
|
||||
private Position current = new Position();
|
||||
private int script;
|
||||
private boolean done = true;
|
||||
|
||||
|
||||
public void reset(Replaceable text, Position expanse) {
|
||||
set(this.expanse, expanse);
|
||||
this.text = text;
|
||||
|
@ -124,19 +138,7 @@ public class AnyTransliterator extends Transliterator {
|
|||
script = UScript.INVALID_CODE;
|
||||
// set up first range to be empty, at beginning
|
||||
current.contextStart = expanse.contextStart;
|
||||
current.start = current.limit = expanse.start;
|
||||
|
||||
// find the COMMON stuff at the start of the expanse
|
||||
int i, cp;
|
||||
int limit = expanse.limit;
|
||||
for (i = current.limit; i < limit; i += UTF16.getCharCount(cp)) {
|
||||
cp = text.char32At(i);
|
||||
int script = UScript.getScript(cp);
|
||||
if (script != UScript.COMMON && script != UScript.INHERITED) break;
|
||||
}
|
||||
if (i == limit) done = true;
|
||||
else current.contextLimit = i;
|
||||
|
||||
current.start = current.limit = current.contextLimit = expanse.start;
|
||||
}
|
||||
|
||||
public boolean next(Position run) {
|
||||
|
@ -148,15 +150,24 @@ public class AnyTransliterator extends Transliterator {
|
|||
+ ", cl: " + current.contextLimit);
|
||||
}
|
||||
// reset start context run to the last end
|
||||
current.contextStart = current.limit;
|
||||
current.start = current.contextLimit;
|
||||
current.start = current.limit;
|
||||
|
||||
// set up variables and loop
|
||||
int limit = expanse.limit;
|
||||
// Phase 1. Backup the START value through COMMON until we get to expanse.start or a real script.
|
||||
int i, cp;
|
||||
int limit = expanse.start;
|
||||
for (i = current.start; i > limit; i -= UTF16.getCharCount(cp)) {
|
||||
cp = text.char32At(i);
|
||||
int script = UScript.getScript(cp);
|
||||
if (script != UScript.COMMON && script != UScript.INHERITED) break;
|
||||
}
|
||||
current.start = i;
|
||||
current.contextStart = (i == limit) ? expanse.contextStart : i; // extend at start
|
||||
|
||||
// PHASE 2. Move up the LIMIT value through COMMON or single script until we get to expanse.limit
|
||||
int lastScript = UScript.COMMON;
|
||||
int veryLastScript = UScript.COMMON;
|
||||
int i, cp;
|
||||
for (i = current.start; i < limit; i += UTF16.getCharCount(cp)) {
|
||||
limit = expanse.limit;
|
||||
for (i = current.limit; i < limit; i += UTF16.getCharCount(cp)) {
|
||||
cp = text.char32At(i);
|
||||
int script = UScript.getScript(cp);
|
||||
if (script == UScript.INHERITED) script = UScript.COMMON;
|
||||
|
@ -166,25 +177,13 @@ public class AnyTransliterator extends Transliterator {
|
|||
// otherwise set our script
|
||||
if (lastScript == UScript.COMMON) lastScript = script;
|
||||
else if (lastScript != script) break;
|
||||
} else if (veryLastScript != UScript.COMMON) {
|
||||
// if we found COMMON -- and -- the last character was not, reset
|
||||
current.limit = i;
|
||||
}
|
||||
veryLastScript = script;
|
||||
}
|
||||
// fix end
|
||||
if (veryLastScript != UScript.COMMON) {
|
||||
// if we found COMMON -- and -- the last character was not, reset
|
||||
current.limit = i;
|
||||
}
|
||||
// if we are at the very end of the expanse, then expand it.
|
||||
if (i == limit) {
|
||||
current.contextLimit = expanse.contextLimit;
|
||||
done = true;
|
||||
} else {
|
||||
current.contextLimit = i;
|
||||
}
|
||||
current.limit = i;
|
||||
current.contextLimit = (i == limit) ? expanse.contextLimit : i; // extend at end
|
||||
done = (i == limit);
|
||||
script = lastScript;
|
||||
|
||||
if (DEBUG) {
|
||||
System.out.println("-cs: " + current.contextStart
|
||||
+ ", s: " + current.start
|
||||
|
@ -231,5 +230,65 @@ public class AnyTransliterator extends Transliterator {
|
|||
expanse.contextLimit += delta;
|
||||
}
|
||||
|
||||
// register Any-Script for every script.
|
||||
|
||||
private static Set scriptList = new HashSet();
|
||||
|
||||
public static void registerAnyToScript() {
|
||||
synchronized (scriptList) {
|
||||
Enumeration sources = Transliterator.getAvailableSources();
|
||||
while(sources.hasMoreElements()) {
|
||||
String source = (String) sources.nextElement();
|
||||
if (source.equals("Any")) continue; // to keep from looping
|
||||
|
||||
Enumeration targets = Transliterator.getAvailableTargets(source);
|
||||
while(targets.hasMoreElements()) {
|
||||
String target = (String) targets.nextElement();
|
||||
if (UScript.getCode(target) == null) continue; // SKIP unless we have a script (or locale)
|
||||
if (scriptList.contains(target)) continue; // already encountered
|
||||
scriptList.add(target); // otherwise add for later testing
|
||||
|
||||
Set variantSet = add(new TreeSet(), Transliterator.getAvailableVariants(source, target));
|
||||
if (variantSet.size() < 2) {
|
||||
AnyTransliterator at = new AnyTransliterator(target, null);
|
||||
DummyFactory.add(at.getID(), at);
|
||||
} else {
|
||||
Iterator variants = variantSet.iterator();
|
||||
while(variants.hasNext()) {
|
||||
String variant = (String) variants.next();
|
||||
AnyTransliterator at = new AnyTransliterator(
|
||||
(variant.length() > 0) ? target + "/" + variant : target, null);
|
||||
DummyFactory.add(at.getID(), at);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static class DummyFactory implements Transliterator.Factory {
|
||||
static DummyFactory singleton = new DummyFactory();
|
||||
static HashMap m = new HashMap();
|
||||
|
||||
// Since Transliterators are immutable, we don't have to clone on set & get
|
||||
static void add(String ID, Transliterator t) {
|
||||
m.put(ID, t);
|
||||
System.out.println("Registering: " + ID + ", " + t.toRules(true));
|
||||
Transliterator.registerFactory(ID, singleton);
|
||||
}
|
||||
public Transliterator getInstance(String ID) {
|
||||
return (Transliterator) m.get(ID);
|
||||
}
|
||||
}
|
||||
|
||||
// Nice little Utility for converting Enumeration to collection
|
||||
static Set add(Set s, Enumeration enum) {
|
||||
while(enum.hasMoreElements()) {
|
||||
s.add(enum.nextElement());
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
}
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/demo/translit/Demo.java,v $
|
||||
* $Date: 2002/05/25 15:20:10 $
|
||||
* $Revision: 1.15 $
|
||||
* $Date: 2002/05/29 00:43:43 $
|
||||
* $Revision: 1.16 $
|
||||
*
|
||||
*****************************************************************************************
|
||||
*/
|
||||
|
@ -28,12 +28,12 @@ import com.ibm.icu.text.*;
|
|||
* <p>Copyright (c) IBM Corporation 1999. All rights reserved.
|
||||
*
|
||||
* @author Alan Liu
|
||||
* @version $RCSfile: Demo.java,v $ $Revision: 1.15 $ $Date: 2002/05/25 15:20:10 $
|
||||
* @version $RCSfile: Demo.java,v $ $Revision: 1.16 $ $Date: 2002/05/29 00:43:43 $
|
||||
*/
|
||||
public class Demo extends Frame {
|
||||
|
||||
static final boolean DEBUG = false;
|
||||
static final String START_TEXT = "(cut,\u03BA\u03C5\u03C4,\u05D0,\u3042,\u4E80,\u091A\u0941\u0924\u094D)";
|
||||
static final String START_TEXT = "(cut,\u03BA\u03C5\u03C4,\u05D0,\u30AF\u30C8,\u4E80,\u091A\u0941\u0924\u094D)";
|
||||
|
||||
Transliterator translit = null;
|
||||
String fontName = "Arial Unicode MS";
|
||||
|
@ -406,6 +406,10 @@ public class Demo extends Frame {
|
|||
}
|
||||
|
||||
static {
|
||||
|
||||
AnyTransliterator.ScriptRunIterator.registerAnyToScript();
|
||||
|
||||
if (false) {
|
||||
AnyTransliterator at = new AnyTransliterator("Greek", null);
|
||||
at.transliterate("(cat,\u03b1,\u0915)");
|
||||
DummyFactory.add(at.getID(), at);
|
||||
|
@ -418,7 +422,6 @@ public class Demo extends Frame {
|
|||
at.transliterate("(cat,\u03b1,\u0915)");
|
||||
DummyFactory.add(at.getID(), at);
|
||||
|
||||
if (false) {
|
||||
DummyFactory.add("Any-gif", Transliterator.createFromRules("gif", "'\\'u(..)(..) > '<img src=\"http://www.unicode.org/gifs/24/' $1 '/U' $1$2 '.gif\">';", Transliterator.FORWARD));
|
||||
DummyFactory.add("gif-Any", Transliterator.getInstance("Any-Null"));
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue