Added tests for collation parsing, some utilities for display

X-SVN-Rev: 13800
This commit is contained in:
Mark Davis 2003-11-21 01:03:39 +00:00
parent 08b4557e06
commit 501b2fdc41
12 changed files with 3566 additions and 0 deletions

View file

@ -0,0 +1,301 @@
/*
*******************************************************************************
* Copyright (C) 2002, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
* $Source:
* $Date:
* $Revision:
*
*****************************************************************************************
*/
package com.ibm.icu.dev.test.collator;
import com.ibm.icu.lang.UCharacter;
import com.ibm.icu.lang.UProperty;
import com.ibm.icu.text.*;
import com.ibm.icu.impl.Utility;
import com.ibm.icu.dev.test.util.*;
import com.ibm.icu.dev.test.*;
import com.ibm.icu.text.*;
import java.text.ParseException;
import java.io.*;
import java.util.Random;
public class RandomCollator extends TestFmwk {
public static void main(String[] args) throws Exception {
new RandomCollator().run(args);
//new CollationAPITest().TestGetTailoredSet();
}
static final int CONSTRUCT_RANDOM_COUNT = 100;
static final int FORMAL_TEST_COUNT = 1000;
static final String POSITION = "{$$$}";
class Shower implements BagFormatter.Shower {
public void println(String arg) {
logln(arg);
}
}
public Shower LOG = new Shower();
public void TestRandom() throws IOException {
String fileName;
PrintWriter pw = BagFormatter.openUTF8Writer("", "RandomCollationTestLog.txt", BagFormatter.CONSOLE);
TestCollator tc = new TestCollator(chars);
pw.println("Collation Test Run");
pw.println("Note: For parse-exception, " + POSITION + " indicates the errorOffset");
pw.println("Rules:");
pw.println(currentRules);
String rules = "<unknown>";
int sCount = 0;
int peCount = 0;
int oeCount = 0;
for (int i = 0; i < CONSTRUCT_RANDOM_COUNT; ++i) {
try {
rules = get();
if (true) {
Collator c = new RuleBasedCollator(rules.toString());
tc.test(c, FORMAL_TEST_COUNT);
} else {
pw.println(rules);
}
logln("ok");
sCount++;
} catch (ParseException pe) {
peCount++;
pw.println("========PARSE EXCEPTION======== (" + i + ")");
int errorOffset = pe.getErrorOffset();
pw.print(rules.substring(0,errorOffset));
pw.print(POSITION);
pw.println(rules.substring(errorOffset));
//pw.println("========ERROR======== (" + i + ")");
//pe.printStackTrace(pw);
//pw.println("========END======== (" + i + ")");
errln("ParseException");
} catch (Exception e) {
oeCount++;
pw.println("========OTHER EXCEPTION======== (" + i + ")");
e.printStackTrace(pw);
pw.println("========RULES======== (" + i + ")");
pw.println(rules);
//pw.println("========END======== (" + i + ")");
errln("ParseException");
}
}
pw.println("Successful: " + sCount
+ ",\tParseException: " + peCount
+ ",\tOther Exception: " + oeCount);
logln("Successful: " + sCount
+ ",\tParseException: " + peCount
+ ",\tOther Exception: " + oeCount);
pw.close();
}
public static class TestCollator extends TestComparator {
BNF rs;
TestCollator(UnicodeSet chars) {
rs = new BNF(new Random(0), new Quoter.RuleQuoter())
.addRules("$root = " + chars + "{1,8};").complete();
}
public Object newObject(Object c) {
return rs.next();
}
public String format(Object c) {
return BagFormatter.hex.transliterate(c.toString());
}
}
private BNF bnf;
String currentRules = null;
UnicodeSet chars;
public String get() {
return bnf.next();
}
public RandomCollator() {
this(1,10, new UnicodeSet("[AZa-z<\\&\\[\\]]"));
}
public RandomCollator(int minRuleCount, int maxRuleCount, UnicodeSet chars) {
this.chars = chars;
bnf = new BNF(new Random(0), new Quoter.RuleQuoter())
.addSet("$chars", chars)
.addRules(collationBNF)
.complete();
}
static String collationBNF =
"$s = ' '? 50%;\r\n" +
"$relationList = (" +
" '<'" +
" | ' <<'" +
" | ' ;'" +
" | ' <<<'" +
" | ' ,'" +
" | ' ='" +
");\r\n" +
"$alternateOptions = non'-'ignorable | shifted;\r\n" +
"$caseFirstOptions = off | upper | lower;\r\n" +
"$strengthOptions = '1' | '2' | '3' | '4' | 'I';\r\n" +
"$commandList = '['" +
" ( alternate ' ' $alternateOptions" +
" | backwards' 2'" +
" | normalization ' ' $onoff " +
" | caseLevel ' ' $onoff " +
" | hiraganaQ ' ' $onoff" +
" | caseFirst ' ' $caseFirstOptions" +
" | strength ' ' $strengthOptions" +
" ) ']';\r\n" +
"$ignorableTypes = (tertiary | secondary | primary) ' ' ignorable;\r\n" +
"$allTypes = variable | regular | implicit | trailing | $ignorableTypes;\r\n" +
"$onoff = on | off;\r\n" +
"$positionList = '[' (first | last) ' ' $allTypes ']';\r\n" +
"$beforeList = '[before ' ('1' | '2' | '3') ']';\r\n" +
"$string = $chars{1,5}~@;\r\n" +
"$crlf = '\r\n';\r\n" +
"$rel1 = '[variable top]' $s ;\r\n" +
"$p1 = ($string $s '|' $s)? 25%;\r\n" +
"$p2 = ('\\' $s $string $s)? 25%;\r\n" +
"$rel2 = $p1 $string $s $p2;\r\n" +
"$relation = $relationList $s ($rel1 | $rel2) $crlf;\r\n" +
"$command = $commandList $crlf;\r\n" +
"$reset = '&' $s ($beforeList $s)? 10% ($positionList | $string 10%) $crlf;\r\n" +
"$mostRules = $command 1% | $reset 5% | $relation 25%;\r\n" +
"$root = $command{0,5} $reset $mostRules{1,20};\r\n";
/*
gc ; C ; Other # Cc | Cf | Cn | Co | Cs
gc ; Cc ; Control
gc ; Cf ; Format
gc ; Cn ; Unassigned
gc ; Co ; Private_Use
gc ; Cs ; Surrogate
gc ; L ; Letter # Ll | Lm | Lo | Lt | Lu
gc ; LC ; Cased_Letter # Ll | Lt | Lu
gc ; Ll ; Lowercase_Letter
gc ; Lm ; Modifier_Letter
gc ; Lo ; Other_Letter
gc ; Lt ; Titlecase_Letter
gc ; Lu ; Uppercase_Letter
gc ; M ; Mark # Mc | Me | Mn
gc ; Mc ; Spacing_Mark
gc ; Me ; Enclosing_Mark
gc ; Mn ; Nonspacing_Mark
gc ; N ; Number # Nd | Nl | No
gc ; Nd ; Decimal_Number
gc ; Nl ; Letter_Number
gc ; No ; Other_Number
gc ; P ; Punctuation # Pc | Pd | Pe | Pf | Pi | Po | Ps
gc ; Pc ; Connector_Punctuation
gc ; Pd ; Dash_Punctuation
gc ; Pe ; Close_Punctuation
gc ; Pf ; Final_Punctuation
gc ; Pi ; Initial_Punctuation
gc ; Po ; Other_Punctuation
gc ; Ps ; Open_Punctuation
gc ; S ; Symbol # Sc | Sk | Sm | So
gc ; Sc ; Currency_Symbol
gc ; Sk ; Modifier_Symbol
gc ; Sm ; Math_Symbol
gc ; So ; Other_Symbol
gc ; Z ; Separator # Zl | Zp | Zs
gc ; Zl ; Line_Separator
gc ; Zp ; Paragraph_Separator
gc ; Zs ; Space_Separator
*/
/*
// each rule can be:
// "[" command "]"
// "& [" position "]"
// "&" before chars
// relation "[variable top]"
// relation (chars "|")? chars ("/" chars)?
// plus, a reset must come before a relation
// the following reflects the above rules, plus allows whitespace.
Pick chars = Pick.string(1, 5, Pick.codePoint(uSet)); // insert something needing quotes
Pick s = Pick.maybe(0.8, Pick.unquoted(" ")).name("Space"); // optional space
Pick CRLF = Pick.unquoted("\r\n");
Pick rel1 = Pick.and(Pick.unquoted("[variable top]")).and2(s);
Pick p1 = Pick.maybe(0.25, Pick.and(chars).and2(s).and2("|").and2(s));
Pick p2 = Pick.maybe(0.25, Pick.and("/").and2(s).and2(chars).and2(s));
Pick rel2 = Pick.and(p1).and2(chars).and2(s).and2(p2);
Pick relation = Pick.and(Pick.or(relationList)).and2(s)
.and2(Pick.or(1, rel1).or2(10, rel2))
.and2(CRLF).name("Relation");
Pick command = Pick.and(Pick.or(commandList)).and2(CRLF).name("Command");
Pick reset = Pick.and("&").and2(s)
.and2(0.1, Pick.or(beforeList)).and2(s)
.and2(Pick.or(0.1, Pick.or(positionList)).or2(1.0, chars))
.and2(CRLF).name("Reset");
Pick rule = Pick.and(Pick.or(1, command).or2(5, reset).or2(25, relation)).name("Rule");
Pick rules2 = Pick.and(Pick.repeat(0,5,command))
.and2(reset)
.and2(Pick.repeat(1,20,rule)).name("Rules");
rules = Pick.Target.make(rules2);
static final String[] relationList = {" <", " <<", " <<<", " =", " ;", " ,"};
static final String[] commandList = {
"[alternate non-ignorable]",
"[alternate shifted]",
"[backwards 2]",
"[normalization off]",
"[normalization on]",
"[caseLevel off]",
"[caseLevel on]",
"[caseFirst off]",
"[caseFirst upper]",
"[caseFirst lower]",
"[strength 1]",
"[strength 2]",
"[strength 3]",
"[strength 4]",
"[strength I]",
"[hiraganaQ off]",
"[hiraganaQ on]"
};
static final String[] positionList = {
"[first tertiary ignorable]",
"[last tertiary ignorable]",
"[first secondary ignorable]",
"[last secondary ignorable]",
"[first primary ignorable]",
"[last primary ignorable]",
"[first variable]",
"[last variable]",
"[first regular]",
"[last regular]",
"[first implicit]",
"[last implicit]",
"[first trailing]",
"[last trailing]"
};
static final String[] beforeList = {
"[before 1]",
"[before 2]",
"[before 3]"
};
*/
}

View file

@ -0,0 +1,79 @@
/*
*******************************************************************************
* Copyright (C) 2002, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
* $Source:
* $Date:
* $Revision:
*
*****************************************************************************************
*/
package com.ibm.icu.dev.test.collator;
import java.util.*;
public class TestComparator {
// test the symmetry and transitivity
public void test(Comparator comp, int count) {
Object c = null;
Object b = newObject(c);
Object a = newObject(b);
int compab = comp.compare(a,b);
while (--count >= 0) {
// rotate old values
c = b;
b = a;
int compbc = compab;
// allocate new and get comparisons
a = newObject(b);
compab = comp.compare(a,b);
int compba = comp.compare(b,a);
int compac = comp.compare(a,c);
// check symmetry
if (compab != -compba) {
log("Symmetry Failure", new Object[] {a, b});
}
// check transitivity
check(a, b, c, compab, compbc, compac);
check(a, c, b, compab, -compbc, compab);
check(b, a, c, -compab, compac, compbc);
check(b, c, a, compbc, -compac, -compab);
check(c, a, b, -compac, compab, -compbc);
check(c, b, a, -compbc, -compab, -compac);
}
}
private void check(Object a, Object b, Object c,
int compab, int compbc, int compac) {
if (compab <= 0 && compbc <= 0 && !(compac <= 0)) {
log("Transitivity Failure", new Object[] {a, b, c});
}
}
public Object newObject(Object c) {
// return a new object
return "";
}
public String format(Object c) {
// return a new object
return c.toString();
}
public void log(String title, Object[] arguments) {
String result = title + ": [";
for (int i = 0; i < arguments.length; ++i) {
if (i != 0) result += ", ";
result += format(arguments[i]);
}
result += "]";
throw new RuntimeException(result);
}
}

View file

@ -0,0 +1,331 @@
/*
*******************************************************************************
* Copyright (C) 2002, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/util/BNF.java,v $
* $Date: 2003/11/21 01:03:38 $
* $Revision: 1.1 $
*
*****************************************************************************************
*/
package com.ibm.icu.dev.test.util;
import java.text.ParsePosition;
import java.util.ArrayList;
import java.util.Map;
import java.util.Set;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import com.ibm.icu.text.SymbolTable;
import com.ibm.icu.text.UnicodeMatcher;
import com.ibm.icu.text.UnicodeSet;
import com.ibm.icu.text.UTF16;
import java.util.Random;
public class BNF {
private Map map = new HashMap();
private Set variables = new HashSet();
private Pick pick = null;
private Pick.Target target = null;
private Tokenizer t;
private Quoter quoter;
private Random random;
public String next() {
return target.next();
}
public String getInternal() {
return pick.getInternal(0, new HashSet());
}
/*
+ "weight = integer '%';"
+ "range = '{' integer (',' integer?)? '}' weight*;"
+ "quote = '@';"
+ "star = '*' weight*;"
+ "plus = '+' weight*;"
+ "maybe = '?' weight?;"
+ "quantifier = range | star | maybe | plus;"
+ "core = string | unicodeSet | '(' alternation ')';"
+ "sequence = (core quantifier*)+;"
+ "alternation = sequence (weight? ('|' sequence weight?)+)?;"
+ "rule = string '=' alternation;";
* Match 0 or more times
+ Match 1 or more times
? Match 1 or 0 times
{n} Match exactly n times
{n,} Match at least n times
{n,m} Match at least n but not more than m times
*/
public BNF(Random random, Quoter quoter) {
this.random = random;
this.quoter = quoter;
t = new Tokenizer();
}
public BNF addRules(String rules) {
t.setSource(rules);
while (addRule());
return this; // for chaining
}
public BNF complete() {
// check that the rules match the variables, except for $root in rules
Set ruleSet = map.keySet();
// add also
variables.add("$root");
variables.addAll(t.getLookedUpItems());
if (!ruleSet.equals(variables)) {
String msg = showDiff(variables, ruleSet);
if (msg.length() != 0) msg = "Error: Missing definitions for: " + msg;
String temp = showDiff(ruleSet, variables);
if (temp.length() != 0) temp = "Warning: Defined but not used: " + temp;
if (msg.length() == 0) msg = temp;
else if (temp.length() != 0) {
msg = msg + "; " + temp;
}
error(msg);
}
if (!ruleSet.equals(variables)) {
String msg = showDiff(variables, ruleSet);
if (msg.length() != 0) msg = "Missing definitions for: " + msg;
String temp = showDiff(ruleSet, variables);
if (temp.length() != 0) temp = "Defined but not used: " + temp;
if (msg.length() == 0) msg = temp;
else if (temp.length() != 0) {
msg = msg + "; " + temp;
}
error(msg);
}
// replace variables by definitions
Iterator it = ruleSet.iterator();
while (it.hasNext()) {
String key = (String) it.next();
Pick expression = (Pick) map.get(key);
Iterator it2 = ruleSet.iterator();
if (false && key.equals("$crlf")) {
System.out.println("debug") ;
}
while (it2.hasNext()) {
Object key2 = it2.next();
if (key.equals(key2)) continue;
Pick expression2 = (Pick) map.get(key2);
expression2.replace(key, expression);
}
}
pick = (Pick) map.get("$root");
target = Pick.Target.make(pick, random, quoter);
// TODO remove temp collections
return this;
}
String showDiff(Set a, Set b) {
Set temp = new HashSet();
temp.addAll(a);
temp.removeAll(b);
if (temp.size() == 0) return "";
StringBuffer buffer = new StringBuffer();
Iterator it = temp.iterator();
while (it.hasNext()) {
if (buffer.length() != 0) buffer.append(", ");
buffer.append(it.next().toString());
}
return buffer.toString();
}
void error(String msg) {
throw new IllegalArgumentException(msg
+ "\r\n" + t.toString());
}
private boolean addRule() {
int type = t.next();
if (type == t.DONE) return false;
if (type != t.STRING) error("missing weight");
String s = t.getString();
if (s.length() == 0 || s.charAt(0) != '$') error("missing $ in variable");
if (t.next() != '=') error("missing =");
int startBody = t.index;
Pick rule = getAlternation();
if (rule == null) error("missing expression");
t.addSymbol(s, t.getSource(), startBody, t.index);
if (t.next() != ';') error("missing ;");
return addPick(s, rule);
}
protected boolean addPick(String s, Pick rule) {
Object temp = map.get(s);
if (temp != null) error("duplicate variable");
if (rule.name == null) rule.name(s);
map.put(s, rule);
return true;
}
public BNF addSet(String variable, UnicodeSet set) {
if (set != null) {
String body = set.toString();
t.addSymbol(variable, body, 0, body.length());
addPick(variable, Pick.codePoint(set));
}
return this;
}
int maxRepeat = 99;
Pick qualify(Pick item) {
int[] weights;
int type = t.next();
switch(type) {
case '@':
return new Pick.Quote(item);
case '~':
return new Pick.Morph(item);
case '?':
int weight = getWeight();
if (weight == NO_WEIGHT) weight = 50;
weights = new int[] {100-weight, weight};
return Pick.repeat(0, 1, weights, item);
case '*':
weights = getWeights();
return Pick.repeat(1, maxRepeat, weights, item);
case '+':
weights = getWeights();
return Pick.repeat(1, maxRepeat, weights, item);
case '{':
if (t.next() != t.NUMBER) error("missing number");
int start = (int) t.getNumber();
int end = start;
type = t.next();
if (type == ',') {
end = maxRepeat;
type = t.next();
if (type == t.NUMBER) {
end = (int)t.getNumber();
type = t.next();
}
}
if (type != '}') error("missing }");
weights = getWeights();
return Pick.repeat(start, end, weights, item);
}
t.backup();
return item;
}
Pick getCore() {
int token = t.next();
if (token == t.STRING) {
String s = t.getString();
if (s.charAt(0) == '$') variables.add(s);
return Pick.string(s);
}
if (token == t.UNICODESET) {
return Pick.codePoint(t.getUnicodeSet());
}
if (token != '(') {
t.backup();
return null;
}
Pick temp = getAlternation();
token = t.next();
if (token != ')') error("missing )");
return temp;
}
Pick getSequence() {
Pick.Sequence result = null;
Pick last = null;
while (true) {
Pick item = getCore();
if (item == null) {
if (result != null) return result;
if (last != null) return last;
error("missing item");
}
// qualify it as many times as possible
Pick oldItem;
do {
oldItem = item;
item = qualify(item);
} while (item != oldItem);
// add it in
if (last == null) {
last = item;
} else {
if (result == null) result = Pick.makeSequence().and2(last);
result = result.and2(item);
}
}
}
// for simplicity, we just use recursive descent
Pick getAlternation() {
Pick.Alternation result = null;
Pick last = null;
int lastWeight = NO_WEIGHT;
while (true) {
Pick temp = getSequence();
if (temp == null) error("empty alternation");
int weight = getWeight();
if (weight == NO_WEIGHT) weight = 1;
if (last == null) {
last = temp;
lastWeight = weight;
} else {
if (result == null) result = Pick.makeAlternation().or2(lastWeight, last);
result = result.or2(weight, temp);
}
int token = t.next();
if (token != '|') {
t.backup();
if (result != null) return result;
if (last != null) return last;
}
}
}
private static final int NO_WEIGHT = Integer.MIN_VALUE;
int getWeight() {
int weight;
int token = t.next();
if (token != t.NUMBER) {
t.backup();
return NO_WEIGHT;
}
weight = (int)t.getNumber();
token = t.next();
if (token != '%') error("missing %");
return weight;
}
int[] getWeights() {
ArrayList list = new ArrayList();
while (true) {
int weight = getWeight();
if (weight == NO_WEIGHT) break;
list.add(new Integer(weight));
}
if (list.size() == 0) return null;
int[] result = new int[list.size()];
for (int i = 0; i < list.size(); ++i) {
result[i] = ((Integer)list.get(i)).intValue();
}
return result;
}
}

View file

@ -0,0 +1,621 @@
/*
*******************************************************************************
* Copyright (C) 2002, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/util/BagFormatter.java,v $
* $Date: 2003/11/21 01:03:38 $
* $Revision: 1.1 $
*
*****************************************************************************************
*/
package com.ibm.icu.dev.test.util;
import com.ibm.icu.text.*;
import com.ibm.icu.lang.*;
import com.ibm.icu.util.*;
import com.ibm.icu.impl.*;
import java.io.*;
import java.util.*;
import java.util.regex.*;
import java.text.MessageFormat;
public class BagFormatter {
boolean abbreviated = false;
/**
* Compare two UnicodeSets, and show the differences
* @param name1 name of first set to be compared
* @param set1 first set
* @param name2 name of second set to be compared
* @param set2 second set
* @return formatted string
*/
public String showSetDifferences(
String name1,
UnicodeSet set1,
String name2,
UnicodeSet set2) {
StringWriter sw = new StringWriter();
showSetDifferences(new PrintWriter(sw), name1, set1, name2, set2);
sw.flush();
return sw.getBuffer().toString();
}
public String showSetDifferences(
String name1,
Collection set1,
String name2,
Collection set2) {
StringWriter sw = new StringWriter();
showSetDifferences(new PrintWriter(sw), name1, set1, name2, set2);
sw.flush();
return sw.getBuffer().toString();
}
/**
* Compare two UnicodeSets, and show the differences
* @param name1 name of first set to be compared
* @param set1 first set
* @param name2 name of second set to be compared
* @param set2 second set
* @return formatted string
*/
public void showSetDifferences(
PrintWriter pw,
String name1,
UnicodeSet set1,
String name2,
UnicodeSet set2) {
String[] names = { name1, name2 };
UnicodeSet temp = new UnicodeSet(set1).removeAll(set2);
pw.println();
showSetNames(pw, inOut.format(names), temp);
temp = new UnicodeSet(set2).removeAll(set1);
pw.println();
showSetNames(pw, outIn.format(names), temp);
temp = new UnicodeSet(set2).retainAll(set1);
pw.println();
showSetNames(pw, inIn.format(names), temp);
}
public void showSetDifferences(
PrintWriter pw,
String name1,
Collection set1,
String name2,
Collection set2) {
String[] names = { name1, name2 };
// damn'd collection doesn't have a clone, so
// we go with Set, even though that
// may not preserve order and duplicates
Collection temp = new HashSet(set1);
temp.removeAll(set2);
pw.println();
showSetNames(pw, inOut.format(names), temp);
temp.clear();
temp.addAll(set2);
temp.removeAll(set1);
pw.println();
showSetNames(pw, outIn.format(names), temp);
temp.clear();
temp.addAll(set1);
temp.retainAll(set2);
pw.println();
showSetNames(pw, inIn.format(names), temp);
}
public String showSetNames(String title, Object set1) {
StringWriter sw = new StringWriter();
PrintWriter pw = new PrintWriter(sw);
showSetNames(pw, title, set1);
pw.flush();
String result = sw.getBuffer().toString();
pw.close();
return result;
}
/**
* Returns a list of items in the collection, with each separated by the separator.
* Each item must not be null; its toString() is called for a printable representation
* @param c source collection
* @param separator to be placed between any strings
* @return
* @internal
*/
public void showSetNames(PrintWriter output, String title, Object c) {
output.println(title);
mainVisitor.output = output;
mainVisitor.doAt(c);
}
/**
* Returns a list of items in the collection, with each separated by the separator.
* Each item must not be null; its toString() is called for a printable representation
* @param c source collection
* @param separator to be placed between any strings
* @return
* @internal
*/
public void showSetNames(String filename, String title, Object c) throws IOException {
PrintWriter pw = new PrintWriter(
new OutputStreamWriter(
new FileOutputStream(filename),"utf-8"));
}
public String getAbbreviatedName(
String source,
String pattern,
String substitute) {
int matchEnd = NameIterator.findMatchingEnd(source, pattern);
int sdiv = source.length() - matchEnd;
int pdiv = pattern.length() - matchEnd;
StringBuffer result = new StringBuffer();
addMatching(
source.substring(0, sdiv),
pattern.substring(0, pdiv),
substitute,
result);
addMatching(
source.substring(sdiv),
pattern.substring(pdiv),
substitute,
result);
return result.toString();
}
abstract public static class Relation {
abstract public String getRelation(String a, String b);
}
static class NullRelation extends Relation {
public String getRelation(String a, String b) { return ""; }
}
private Relation r = new NullRelation();
public BagFormatter setRelation(Relation r) {
this.r = r;
return this; // for chaining
}
public Relation getRelation() {
return r;
}
/*
r.getRelati on(last, s) + quote(s) + "\t#" + UnicodeSetFormatter.getResolvedName(s)
*/
/*
static final UnicodeSet NO_NAME =
new UnicodeSet("[\\u0080\\u0081\\u0084\\u0099\\p{Cn}\\p{Co}]");
static final UnicodeSet HAS_NAME = new UnicodeSet(NO_NAME).complement();
static final UnicodeSet NAME_CHARACTERS =
new UnicodeSet("[A-Za-z0-9\\<\\>\\-\\ ]");
public UnicodeSet getSetForName(String namePattern) {
UnicodeSet result = new UnicodeSet();
Matcher m = Pattern.compile(namePattern).matcher("");
// check for no-name items, and add in bulk
m.reset("<no name>");
if (m.matches()) {
result.addAll(NO_NAME);
}
// check all others
UnicodeSetIterator usi = new UnicodeSetIterator(HAS_NAME);
while (usi.next()) {
String name = getName(usi.codepoint);
if (name == null)
continue;
m.reset(name);
if (m.matches()) {
result.add(usi.codepoint);
}
}
// Note: if Regex had some API so that if we could tell that
// an initial substring couldn't match, e.g. "CJK IDEOGRAPH-"
// then we could optimize by skipping whole swathes of characters
return result;
}
*/
public void setMergeRanges(boolean in) {
mergeRanges = in;
}
public void setShowSetAlso(boolean b) {
showSetAlso = b;
}
public String getName(int codePoint) {
String hcp = "U+" + Utility.hex(codePoint, 4) + " ";
String result = nameProp.getPropertyValue(codePoint);
if (result != null)
return hcp + result;
String prop = catProp.getPropertyValue(codePoint);
if (prop.equals("Control")) {
result = nameProp.getPropertyValue(codePoint);
if (result != null)
return hcp + "<" + result + ">";
}
return hcp + "<reserved>";
}
UnicodePropertySource source;
UnicodePropertySource labelSource;
UnicodePropertySource nameProp;
UnicodePropertySource name1Prop;
UnicodePropertySource catProp;
UnicodePropertySource shortCatProp;
public void setUnicodePropertySource(UnicodePropertySource source) {
this.source = source;
nameProp = ((UnicodePropertySource)source.clone())
.setPropertyAlias("Name");
name1Prop = ((UnicodePropertySource)source.clone())
.setPropertyAlias("Unicode_1_Name");
catProp = ((UnicodePropertySource)source.clone())
.setPropertyAlias("General_Category");
shortCatProp = ((UnicodePropertySource)source.clone())
.setPropertyAlias("General_Category")
.setNameChoice(UProperty.NameChoice.SHORT);
}
{
setUnicodePropertySource(new UnicodePropertySource.ICU());
Map labelMap = new HashMap();
labelMap.put("Lo","L&");
labelMap.put("Lu","L&");
labelMap.put("Lt","L&");
setLabelSource(new UnicodePropertySource.ICU()
.setPropertyAlias("General_Category")
.setNameChoice(UProperty.NameChoice.SHORT)
.setFilter(
new UnicodePropertySource.MapFilter().setMap(labelMap)));
}
// ===== PRIVATES =====
private Visitor.Join labelVisitor = new Visitor.Join();
private boolean mergeRanges = true;
private boolean literalCharacter = false;
private boolean showSetAlso = false;
private RangeFinder rf = new RangeFinder();
private MessageFormat inOut = new MessageFormat("In {0}, but not in {1}:");
private MessageFormat outIn = new MessageFormat("Not in {0}, but in {1}:");
private MessageFormat inIn = new MessageFormat("In both {0}, and in {1}:");
private MyVisitor mainVisitor = new MyVisitor();
/*
private String getLabels(int start, int end) {
Set names = new TreeSet();
for (int cp = start; cp <= end; ++cp) {
names.add(getLabel(cp));
}
return labelVisitor.join(names);
}
*/
private void addMatching(
String source,
String pattern,
String substitute,
StringBuffer result) {
NameIterator n1 = new NameIterator(source);
NameIterator n2 = new NameIterator(pattern);
boolean first = true;
while (true) {
String s1 = n1.next();
if (s1 == null)
break;
String s2 = n2.next();
if (!first)
result.append(" ");
first = false;
if (s1.equals(s2))
result.append(substitute);
else
result.append(s1);
}
}
private Tabber singleTabber =
new Tabber.MonoTabber(
new int[] {
0,
Tabber.LEFT,
6,
Tabber.LEFT,
10,
Tabber.LEFT,
14,
Tabber.LEFT });
private Tabber rangeTabber =
new Tabber.MonoTabber(
new int[] {
0,
Tabber.LEFT,
14,
Tabber.LEFT,
18,
Tabber.LEFT,
27,
Tabber.LEFT,
34,
Tabber.LEFT });
private static NumberFormat nf =
NumberFormat.getIntegerInstance(Locale.ENGLISH);
private class MyVisitor extends Visitor {
PrintWriter output;
public String format(Object o) {
StringWriter sw = new StringWriter();
PrintWriter pw = new PrintWriter(sw);
doAt(o);
pw.flush();
String result = sw.getBuffer().toString();
pw.close();
return result;
}
protected void doBefore(Object container, Object o) {
if (showSetAlso && container instanceof UnicodeSet) {
output.println("# " + container);
}
}
protected void doBetween(Object container, Object lastItem, Object nextItem) {
}
protected void doAfter(Object container, Object o) {
output.println("# Total: " + nf.format(count(container)));
}
protected void doSimpleAt(Object o) {
if (o instanceof Map.Entry) {
Map.Entry oo = (Map.Entry)o;
Object key = oo.getKey();
Object value = oo.getValue();
doBefore(o, key);
doAt(key);
output.print("->");
doAt(value);
doAfter(o, value);
} else if (o instanceof Visitor.CodePointRange) {
doAt((Visitor.CodePointRange) o);
} else {
output.print(o.toString());
}
}
protected void doAt(Visitor.CodePointRange usi) {
if (!mergeRanges) {
for (int cp = usi.codepoint; cp <= usi.codepointEnd; ++cp) {
String label = labelSource.getPropertyValue(cp);
if (label.length() != 0)
label += " ";
output.println(
singleTabber.process(
Utility.hex(cp, 4)
+ " \t# "
+ label
+ (literalCharacter
&& (cp >= 0x20)
? " \t(" + UTF16.valueOf(cp) + ") "
: "")
+ " \t"
+ getName(cp)));
}
} else {
rf.reset(usi.codepoint, usi.codepointEnd + 1);
String label;
while ((label = rf.next()) != null) {
/*
String label = (usi.codepoint != usi.codepointEnd)
? label = getLabels(usi.codepoint, usi.codepointEnd)
: getLabel(usi.codepoint);
*/
int start = rf.start;
int end = rf.limit - 1;
if (label.length() != 0)
label += " ";
output.println(
rangeTabber.process(
Utility.hex(start, 4)
+ ((start != end)
? (".." + Utility.hex(end, 4))
: "")
+ " \t# "
+ label
+ " \t["
+ nf.format(end - start + 1)
+ "]"
+ (literalCharacter
&& (start >= 0x20)
? " \t("
+ UTF16.valueOf(start)
+ ((start != end)
? (".." + UTF16.valueOf(end))
: "")
+ ") "
: "")
+ " \t"
+ getName(start)
+ ((start != end)
? (".."
+ (abbreviated
? getAbbreviatedName(
getName(end),
getName(start),
"~")
: getName(end)))
: "")));
}
}
}
}
/**
* Iterate through a string, breaking at words.
* @author Davis
*/
private static class NameIterator {
String source;
int position;
int start;
int limit;
NameIterator(String source) {
this.source = source;
this.start = 0;
this.limit = source.length();
}
/**
* Find next word, including trailing spaces
* @return
*/
String next() {
if (position >= limit)
return null;
int pos = source.indexOf(' ', position);
if (pos < 0 || pos >= limit)
pos = limit;
String result = source.substring(position, pos);
position = pos + 1;
return result;
}
static int findMatchingEnd(String s1, String s2) {
int i = s1.length();
int j = s2.length();
try {
while (true) {
--i; // decrement both before calling function!
--j;
if (s1.charAt(i) != s2.charAt(j))
break;
}
} catch (Exception e) {} // run off start
++i; // counteract increment
i = s1.indexOf(' ', i); // move forward to space
if (i < 0)
return 0;
return s1.length() - i;
}
}
private class RangeFinder {
int start, limit;
private int veryLimit;
void reset(int start, int end) {
this.limit = start;
this.veryLimit = end;
}
String next() {
if (limit >= veryLimit)
return null;
start = limit;
String label = labelSource.getPropertyValue(limit++);
for (; limit < veryLimit; ++limit) {
String s = labelSource.getPropertyValue(limit);
if (!s.equals(label))
break;
}
return label;
}
}
public boolean isAbbreviated() {
return abbreviated;
}
public void setAbbreviated(boolean b) {
abbreviated = b;
}
public UnicodePropertySource getSource() {
return source;
}
public UnicodePropertySource getLabelSource() {
return labelSource;
}
public void setLabelSource(UnicodePropertySource source) {
labelSource = source;
}
/**
* @deprecated
*/
public static void addAll(UnicodeSet source, Collection target) {
source.addAllTo(target);
}
// UTILITIES
public static final Transliterator hex = Transliterator.getInstance(
"[^\\u0021-\\u007E\\u00A0-\\u00FF] hex");
public interface Shower {
public void println(String arg);
}
public static Shower CONSOLE = new Shower() {
public void println(String arg) {
System.out.println(arg);
}
};
public static BufferedReader openUTF8Reader(String dir, String filename, Shower shower) throws IOException {
File file = new File(dir + filename);
if (shower != null) {
shower.println("Creating File: "
+ file.getCanonicalPath());
}
return new BufferedReader(
new InputStreamReader(
new FileInputStream(file),
"UTF-8"),
4*1024);
}
public static PrintWriter openUTF8Writer(String dir, String filename, Shower shower) throws IOException {
File file = new File(dir + filename);
if (shower != null) {
shower.println("Creating File: "
+ file.getCanonicalPath());
}
//File parent = new File(file.getParent());
//parent.mkdirs();
return new PrintWriter(
new BufferedWriter(
new OutputStreamWriter(
new FileOutputStream(file),
"UTF-8"),
4*1024));
}
}

View file

@ -0,0 +1,804 @@
/*
*******************************************************************************
* Copyright (C) 2002, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/util/Pick.java,v $
* $Date: 2003/11/21 01:03:38 $
* $Revision: 1.1 $
*
*****************************************************************************************
*/
package com.ibm.icu.dev.test.util;
import com.ibm.icu.text.UnicodeSet;
import com.ibm.icu.lang.UCharacter;
import com.ibm.icu.text.UTF16;
import com.ibm.icu.impl.Utility;
import java.util.ArrayList;
import java.util.Random;
import java.util.Arrays;
import java.util.Set;
import java.util.HashSet;
import java.text.ParsePosition;
import java.text.ParseException;
import java.util.Map;
import java.util.HashMap;
abstract public class Pick {
private static boolean DEBUG = false;
// for using to get strings
static class Target {
private Pick pick;
private Random random;
private Quoter quoter;
public static Target make(Pick pick, Random random, Quoter quoter) {
Target result = new Target();
result.pick = pick;
result.random = random;
result.quoter = quoter;
return result;
}
public String next() {
quoter.clear();
pick.addTo(this);
return get();
}
public String get() {
return quoter.toString();
}
private void copyState(Target other) {
random = other.random;
}
private void clear() {
quoter.clear();
}
private int length() {
return quoter.length();
}
private Target append(int codepoint) {
quoter.append(codepoint);
return this;
}
private Target append(String s) {
quoter.append(s);
return this;
}
// must return value between 0 (inc) and 1 (exc)
private double nextDouble() {
return random.nextDouble();
}
}
// for Building
public Pick replace(String toReplace, Pick replacement) {
Replacer visitor = new Replacer(toReplace, replacement);
return visit(visitor);
}
public Pick name(String name) {
this.name = name;
return this;
}
static public Pick.Sequence makeSequence() {
return new Sequence();
}
static public Pick.Alternation makeAlternation() {
return new Alternation();
}
/*
static public Pick.Sequence and(Object item) {
return new Sequence().and2(item);
}
static public Pick.Sequence and(Object[] items) {
return new Sequence().and2(items);
}
static public Pick.Alternation or(int itemWeight, Object item) {
return new Alternation().or2(itemWeight, item);
}
static public Pick.Alternation or(Object[] items) {
return new Alternation().or2(1, items);
}
static public Pick.Alternation or(int itemWeight, Object[] items) {
return new Alternation().or2(itemWeight, items);
}
static public Pick.Alternation or(int[] itemWeights, Object[] items) {
return new Alternation().or2(itemWeights, items);
}
static public Pick maybe(int percent, Object item) {
return new Repeat(0, 1, new int[]{100-percent, percent}, item);
//return Pick.or(1.0-percent, NOTHING).or2(percent, item);
}
static public Pick repeat(int minCount, int maxCount, int itemWeights, Object item) {
return new Repeat(minCount, maxCount, itemWeights, item);
}
static public Pick codePoint(String source) {
return new CodePoint(new UnicodeSet(source));
}
*/
static public Pick repeat(int minCount, int maxCount, int[] itemWeights, Pick item) {
return new Repeat(minCount, maxCount, itemWeights, item);
}
static public Pick codePoint(UnicodeSet source) {
return new CodePoint(source);
}
static public Pick string(String source) {
return new Literal(source);
}
/*
static public Pick unquoted(String source) {
return new Literal(source);
}
static public Pick string(int minLength, int maxLength, Pick item) {
return new Morph(item, minLength, maxLength);
}
*/
public abstract String getInternal(int depth, Set alreadySeen);
// Internals
protected String name;
protected abstract void addTo(Target target);
protected abstract boolean match(String input, Position p);
public static class Sequence extends ListPick {
public Sequence and2 (Pick item) {
addInternal(new Pick[] {item}); // we don't care about perf
return this; // for chaining
}
public Sequence and2 (Pick[] items) {
addInternal(items);
return this; // for chaining
}
protected void addTo(Target target) {
for (int i = 0; i < items.length; ++i) {
items[i].addTo(target);
}
}
public String getInternal(int depth, Set alreadySeen) {
String result = checkName(name, alreadySeen);
if (result.startsWith("$")) return result;
result = indent(depth) + result + "SEQ(";
for (int i = 0; i < items.length; ++i) {
if (i != 0) result += ", ";
result += items[i].getInternal(depth+1, alreadySeen);
}
result += ")";
return result;
}
// keep private
private Sequence() {}
protected boolean match(String input, Position p) {
int originalIndex = p.index;
for (int i = 0; i < items.length; ++i) {
if (!items[i].match(input, p)) {
p.index = originalIndex;
return false;
}
}
return true;
}
}
String checkName(String name, Set alreadySeen) {
if (name == null) return "";
if (alreadySeen.contains(name)) return name;
alreadySeen.add(name);
return "{" + name + "=}";
}
public static class Alternation extends ListPick {
private WeightedIndex weightedIndex = new WeightedIndex(0);
public Alternation or2 (Pick[] newItems) {
return or2(1, newItems);
}
public Alternation or2 (int itemWeight, Pick item) {
return or2(itemWeight, new Pick[] {item}); // we don't care about perf
}
public Alternation or2 (int itemWeight, Pick[] newItems) {
int[] itemWeights = new int[newItems.length];
Arrays.fill(itemWeights,itemWeight);
return or2(itemWeights, newItems); // we don't care about perf
}
public Alternation or2 (int[] itemWeights, Pick[] newItems) {
if (newItems.length != itemWeights.length) {
throw new ArrayIndexOutOfBoundsException(
"or lengths must be equal: " + newItems.length + " != " + itemWeights.length);
}
int lastLen = this.items.length;
addInternal(newItems);
weightedIndex.add(itemWeights);
return this; // for chaining
}
protected void addTo(Target target) {
items[weightedIndex.toIndex(target.nextDouble())].addTo(target);
}
public String getInternal(int depth, Set alreadySeen) {
String result = checkName(name, alreadySeen);
if (result.startsWith("$")) return result;
result = indent(depth) + result + "OR(";
for (int i = 0; i < items.length; ++i) {
if (i != 0) result += ", ";
result += items[i].getInternal(depth+1, alreadySeen) + "/" + weightedIndex.weights[i];
}
return result + ")";
}
// keep private
private Alternation() {}
// take first matching option
protected boolean match(String input, Position p) {
for (int i = 0; i < weightedIndex.weights.length; ++i) {
if (p.isFailure(this,i)) continue;
if (items[i].match(input, p)) return true;
p.setFailure(this, i);
}
return false;
}
}
private static String indent(int depth) {
String result = "\r\n";
for (int i = 0; i < depth; ++i) {
result += " ";
}
return result;
}
private static class Repeat extends ItemPick {
WeightedIndex weightedIndex;
int minCount = 0;
private Repeat(int minCount, int maxCount, int[] itemWeights, Pick item) {
super(item);
weightedIndex = new WeightedIndex(minCount).add(maxCount-minCount+1, itemWeights);
}
private Repeat(int minCount, int maxCount, int itemWeight, Pick item) {
super(item);
weightedIndex = new WeightedIndex(minCount).add(maxCount-minCount+1, itemWeight);
}
/*
private Repeat(int minCount, int maxCount, Object item) {
this.item = convert(item);
weightedIndex = new WeightedIndex(minCount).add(maxCount-minCount+1, 1);
}
*/
protected void addTo(Target target) {
int count ;
for (int i = weightedIndex.toIndex(target.nextDouble()); i > 0; --i) {
item.addTo(target);
}
}
public String getInternal(int depth, Set alreadySeen) {
String result = checkName(name, alreadySeen);
if (result.startsWith("$")) return result;
result = indent(depth) + result + "REPEAT(" + weightedIndex
+ "; "+ item.getInternal(depth+1, alreadySeen)
+ ")";
return result;
}
// match longest, e.g. up to just before a failure
protected boolean match(String input, Position p) {
int bestMatch = p.index;
int count = 0;
for (int i = 0; i < weightedIndex.weights.length; ++i) {
if (p.isFailure(this,i)) break;
if (!item.match(input, p)) {
p.setFailure(this,i);
break;
}
bestMatch = p.index;
count++;
}
if (count >= minCount) {
return true;
}
// TODO fix failure
return false;
}
}
private static class CodePoint extends FinalPick {
private UnicodeSet source;
private CodePoint(UnicodeSet source) {
this.source = source;
}
protected void addTo(Target target) {
target.append(source.charAt(pick(target.random,0,source.size()-1)));
}
protected boolean match(String s, Position p) {
int cp = UTF16.charAt(s, p.index);
if (source.contains(cp)) {
p.index += UTF16.getCharCount(cp);
return true;
}
p.setMax("codePoint");
return false;
}
public String getInternal(int depth, Set alreadySeen) {
String result = checkName(name, alreadySeen);
if (result.startsWith("$")) return result;
return source.toString();
}
}
static class Morph extends ItemPick {
Morph(Pick item) {
super(item);
}
private String lastValue = null;
private Target addBuffer = Target.make(this, null, new Quoter.RuleQuoter());
private StringBuffer mergeBuffer = new StringBuffer();
private static final int COPY_NEW = 0, COPY_BOTH = 1, COPY_LAST = 3, SKIP = 4,
LEAST_SKIP = 4;
// give weights to the above. make sure we delete about the same as we insert
private static final WeightedIndex choice = new WeightedIndex(0)
.add(new int[] {10, 10, 100, 10});
protected void addTo(Target target) {
// get contents into separate buffer
addBuffer.copyState(target);
addBuffer.clear();
item.addTo(addBuffer);
String newValue = addBuffer.get();
if (DEBUG) System.out.println("Old: " + lastValue + ", New:" + newValue);
// if not first one, merge with old
if (lastValue != null) {
mergeBuffer.setLength(0);
int lastIndex = 0;
int newIndex = 0;
// the new length is a random value between old and new.
int newLenLimit = (int) pick(target.random, lastValue.length(), newValue.length());
while (mergeBuffer.length() < newLenLimit
&& newIndex < newValue.length()
&& lastIndex < lastValue.length()) {
int c = choice.toIndex(target.nextDouble());
if (c == COPY_NEW || c == COPY_BOTH || c == SKIP) {
newIndex = getChar(newValue, newIndex, mergeBuffer, c < LEAST_SKIP);
if (mergeBuffer.length() >= newLenLimit) break;
}
if (c == COPY_LAST || c == COPY_BOTH || c == SKIP) {
lastIndex = getChar(lastValue, lastIndex, mergeBuffer, c < LEAST_SKIP);
}
}
newValue = mergeBuffer.toString();
}
lastValue = newValue;
target.append(newValue);
if (DEBUG) System.out.println("Result: " + newValue);
}
public String getInternal(int depth, Set alreadySeen) {
String result = checkName(name, alreadySeen);
if (result.startsWith("$")) return result;
return indent(depth) + result + "MORPH("
+ item.getInternal(depth+1, alreadySeen)
+ ")";
}
/* (non-Javadoc)
* @see Pick#match(java.lang.String, Pick.Position)
*/
protected boolean match(String input, Position p) {
// TODO Auto-generated method stub
return false;
}
}
/* Add character if we can
*/
static int getChar(String newValue, int newIndex, StringBuffer mergeBuffer, boolean copy) {
if (newIndex >= newValue.length()) return newIndex;
int cp = UTF16.charAt(newValue,newIndex);
if (copy) UTF16.append(mergeBuffer, cp);
return newIndex + UTF16.getCharCount(cp);
}
/*
// quoted add
appendQuoted(target, addBuffer.toString(), quoteBuffer);
// fix buffers
StringBuffer swapTemp = addBuffer;
addBuffer = source;
source = swapTemp;
}
}
*/
static class Quote extends ItemPick {
Quote(Pick item) {
super(item);
}
protected void addTo(Target target) {
target.quoter.setQuoting(true);
item.addTo(target);
target.quoter.setQuoting(false);
}
protected boolean match(String s, Position p) {
return false;
}
public String getInternal(int depth, Set alreadySeen) {
String result = checkName(name, alreadySeen);
if (result.startsWith("$")) return result;
return indent(depth) + result + "QUOTE(" + item.getInternal(depth+1, alreadySeen)
+ ")";
}
}
private static class Literal extends FinalPick {
public String toString() {
return name;
}
private Literal(String source) {
this.name = source;
}
protected void addTo(Target target) {
target.append(name);
}
protected boolean match(String input, Position p) {
int len = name.length();
if (input.regionMatches(p.index, name, 0, len)) {
p.index += len;
return true;
}
p.setMax("literal");
return false;
}
public String getInternal(int depth, Set alreadySeen) {
return "'" + name + "'";
}
}
public static class Position {
public ArrayList failures = new ArrayList();
public int index;
public int maxInt;
public String maxType;
public void setMax(String type) {
if (index >= maxInt) {
maxType = type;
}
}
public String toString() {
return "index; " + index
+ ", maxInt:" + maxInt
+ ", maxType: " + maxType;
}
private static final Object BAD = new Object();
private static final Object GOOD = new Object();
public boolean isFailure(Pick pick, int item) {
ArrayList val = (ArrayList)failures.get(index);
if (val == null) return false;
Set set = (Set)val.get(item);
if (set == null) return false;
return !set.contains(pick);
}
public void setFailure(Pick pick, int item) {
ArrayList val = (ArrayList)failures.get(index);
if (val == null) {
val = new ArrayList();
failures.set(index, val);
}
Set set = (Set)val.get(item);
if (set == null) {
set = new HashSet();
val.set(item, set);
}
set.add(pick);
}
}
/*
public static final Pick NOTHING = new Nothing();
private static class Nothing extends FinalPick {
protected void addTo(Target target) {}
protected boolean match(String input, Position p) {
return true;
}
public String getInternal(int depth, Set alreadySeen) {
return indent(depth) + "ø";
}
}
*/
// intermediates
abstract static class Visitor {
Set already = new HashSet();
// Note: each visitor should return the Pick that will replace a (or a itself)
abstract Pick handle(Pick a);
boolean alreadyEntered(Pick item) {
boolean result = already.contains(item);
already.add(item);
return result;
}
void reset() {
already.clear();
}
}
protected abstract Pick visit(Visitor visitor);
static class Replacer extends Visitor {
String toReplace;
Pick replacement;
Replacer(String toReplace, Pick replacement) {
this.toReplace = toReplace;
this.replacement = replacement;
}
public Pick handle(Pick a) {
if (toReplace.equals(a.name)) {
a = replacement;
}
return a;
}
}
abstract private static class FinalPick extends Pick {
public Pick visit(Visitor visitor) {
return visitor.handle(this);
}
}
private abstract static class ItemPick extends Pick {
protected Pick item;
ItemPick (Pick item) {
this.item = item;
}
public Pick visit(Visitor visitor) {
Pick result = visitor.handle(this);
if (visitor.alreadyEntered(this)) return result;
if (item != null) item = item.visit(visitor);
return result;
}
}
private abstract static class ListPick extends Pick {
protected Pick[] items = new Pick[0];
Pick simplify() {
if (items.length > 1) return this;
if (items.length == 1) return items[0];
return null;
}
int size() {
return items.length;
}
Pick getLast() {
return items[items.length-1];
}
void setLast(Pick newOne) {
items[items.length-1] = newOne;
}
protected void addInternal(Pick[] objs) {
int lastLen = items.length;
items = realloc(items, items.length + objs.length);
for (int i = 0; i < objs.length; ++i) {
items[lastLen + i] = objs[i];
}
}
public Pick visit(Visitor visitor) {
Pick result = visitor.handle(this);
if (visitor.alreadyEntered(this)) return result;
for (int i = 0; i < items.length; ++i) {
items[i] = items[i].visit(visitor);
}
return result;
}
}
/**
* Simple class to distribute a number between 0 (inclusive) and 1 (exclusive) among
* a number of indices, where each index is weighted.
* Item weights may be zero, but cannot be negative.
* @author Davis
*/
// As in other case, we use an array for runtime speed; don't care about buildspeed.
public static class WeightedIndex {
private int[] weights = new int[0];
private int minCount = 0;
private double total;
public WeightedIndex(int minCount) {
this.minCount = minCount;
}
public WeightedIndex add(int count, int itemWeights) {
if (count > 0) {
int[] newWeights = new int[count];
if (itemWeights < 1) itemWeights = 1;
Arrays.fill(newWeights, 0, count, itemWeights);
add(1, newWeights);
}
return this; // for chaining
}
public WeightedIndex add(int[] newWeights) {
return add(newWeights.length, newWeights);
}
public WeightedIndex add(int maxCount, int[] newWeights) {
if (newWeights == null) newWeights = new int[]{1};
int oldLen = weights.length;
if (maxCount < newWeights.length) maxCount = newWeights.length;
weights = (int[]) realloc(weights, weights.length + maxCount);
System.arraycopy(newWeights, 0, weights, oldLen, newWeights.length);
int lastWeight = weights[oldLen + newWeights.length-1];
for (int i = oldLen + newWeights.length; i < maxCount; ++i) {
weights[i] = lastWeight;
}
total = 0;
for (int i = 0; i < weights.length; ++i) {
if (weights[i] < 0) {
throw new RuntimeException("only positive weights: " + i);
}
total += weights[i];
}
return this; // for chaining
}
// TODO, make this more efficient
public int toIndex(double zeroToOne) {
double weight = zeroToOne*total;
int i;
for (i = 0; i < weights.length; ++i) {
weight -= weights[i];
if (weight <= 0) break;
}
return i + minCount;
}
public String toString() {
String result = "";
for (int i = 0; i < minCount; ++i) {
if (result.length() != 0) result += ",";
result += "0";
}
for (int i = 0; i < weights.length; ++i) {
if (result.length() != 0) result += ",";
result += weights[i];
}
return result;
}
}
/*
private static Pick convert(Object obj) {
if (obj instanceof Pick) return (Pick)obj;
return new Literal(obj.toString(), false);
}
*/
// Useful statics
static public int pick(Random random, int start, int end) {
return start + (int)(random.nextDouble() * (end + 1 - start));
}
static public double pick(Random random, double start, double end) {
return start + (random.nextDouble() * (end + 1 - start));
}
static public boolean pick(Random random, double percent) {
return random.nextDouble() <= percent;
}
static public int pick(Random random, UnicodeSet s) {
return s.charAt(pick(random, 0,s.size()-1));
}
static public String pick(Random random, String[] source) {
return source[pick(random, 0, source.length-1)];
}
// these utilities really ought to be in Java
public static double[] realloc(double[] source, int newSize) {
double[] temp = new double[newSize];
if (newSize > source.length) newSize = source.length;
if (newSize != 0) System.arraycopy(source,0,temp,0,newSize);
return temp;
}
public static int[] realloc(int[] source, int newSize) {
int[] temp = new int[newSize];
if (newSize > source.length) newSize = source.length;
if (newSize != 0) System.arraycopy(source,0,temp,0,newSize);
return temp;
}
public static Pick[] realloc(Pick[] source, int newSize) {
Pick[] temp = new Pick[newSize];
if (newSize > source.length) newSize = source.length;
if (newSize != 0) System.arraycopy(source,0,temp,0,newSize);
return temp;
}
// test utilities
private static void append(StringBuffer target, String toAdd, StringBuffer quoteBuffer) {
Utility.appendToRule(target, (int)-1, true, false, quoteBuffer); // close previous quote
if (DEBUG) System.out.println("\"" + toAdd + "\"");
target.append(toAdd);
}
private static void appendQuoted(StringBuffer target, String toAdd, StringBuffer quoteBuffer) {
if (DEBUG) System.out.println("\"" + toAdd + "\"");
Utility.appendToRule(target, toAdd, false, false, quoteBuffer);
}
/*
public static abstract class MatchHandler {
public abstract void handleString(String source, int start, int limit);
public abstract void handleSequence(String source, int start, int limit);
public abstract void handleAlternation(String source, int start, int limit);
}
*/
/*
// redistributes random value
// values are still between 0 and 1, but with a different distribution
public interface Spread {
public double spread(double value);
}
// give the weight for the high end.
// values are linearly scaled according to the weight.
static public class SimpleSpread implements Spread {
static final Spread FLAT = new SimpleSpread(1.0);
boolean flat = false;
double aa, bb, cc;
public SimpleSpread(double maxWeight) {
if (maxWeight > 0.999 && maxWeight < 1.001) {
flat = true;
} else {
double q = (maxWeight - 1.0);
aa = -1/q;
bb = 1/(q*q);
cc = (2.0+q)/q;
}
}
public double spread(double value) {
if (flat) return value;
value = aa + Math.sqrt(bb + cc*value);
if (value < 0.0) return 0.0; // catch math gorp
if (value >= 1.0) return 1.0;
return value;
}
}
static public int pick(Spread spread, Random random, int start, int end) {
return start + (int)(spread.spread(random.nextDouble()) * (end + 1 - start));
}
*/
}

View file

@ -0,0 +1,71 @@
/*
*******************************************************************************
* Copyright (C) 2002, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/util/Quoter.java,v $
* $Date: 2003/11/21 01:03:39 $
* $Revision: 1.1 $
*
*****************************************************************************************
*/
package com.ibm.icu.dev.test.util;
import com.ibm.icu.text.UTF16;
import com.ibm.icu.impl.Utility;
public abstract class Quoter {
private static boolean DEBUG = false;
protected boolean quoting = false;
protected StringBuffer output = new StringBuffer();
public void setQuoting(boolean value) {
quoting = value;
}
public boolean isQuoting() {
return quoting;
}
public void clear() {
quoting = false;
output.setLength(0);
}
public int length() {
return output.length();
}
public Quoter append(String string) {
output.append(string);
return this;
}
public Quoter append(int codepoint) {
return append(UTF16.valueOf(codepoint));
}
// warning, allows access to internals
public String toString() {
setQuoting(false); // finish quoting
return output.toString();
}
/**
* Implements standard ICU rule quoting
*/
public static class RuleQuoter extends Quoter {
private StringBuffer quoteBuffer = new StringBuffer();
public void setQuoting(boolean value) {
if (quoting == value) return;
if (quoting) { // stop quoting
Utility.appendToRule(output, (int)-1, true, false, quoteBuffer); // close previous quote
}
quoting = value;
}
public Quoter append(String s) {
if (DEBUG) System.out.println("\"" + s + "\"");
if (quoting) {
Utility.appendToRule(output, s, false, false, quoteBuffer);
} else {
output.append(s);
}
return this;
}
}
}

View file

@ -0,0 +1,96 @@
/*
*******************************************************************************
* Copyright (C) 2002, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/util/Tabber.java,v $
* $Date: 2003/11/21 01:03:39 $
* $Revision: 1.1 $
*
*****************************************************************************************
*/
package com.ibm.icu.dev.test.util;
public abstract class Tabber {
static final byte LEFT = 0, CENTER = 1, RIGHT = 2;
/**
* Repeats a string n times
* @param source
* @param times
* @return
*/
// TODO - optimize repeats using doubling?
public static String repeat(String source, int times) {
if (times <= 0) return "";
if (times == 1) return source;
StringBuffer result = new StringBuffer();
for (; times > 0; --times) {
result.append(source);
}
return result.toString();
}
public String process(String source) {
StringBuffer result = new StringBuffer();
int lastPos = 0;
int count = 0;
while (lastPos < source.length()) {
int pos = source.indexOf('\t', lastPos);
if (pos < 0) pos = source.length();
process_field(count, source, lastPos, pos, result);
lastPos = pos+1;
++count; // skip type
}
if (lastPos < source.length()) {
result.append(source.substring(lastPos));
}
return result.toString();
}
public abstract void process_field(int count, String source, int start, int limit, StringBuffer output);
public static class MonoTabber extends Tabber {
private int[] tabs;
public MonoTabber(int[] tabs) {
this.tabs = (int[]) tabs.clone();
}
public String process(String source) {
StringBuffer result = new StringBuffer();
int lastPos = 0;
int count = 0;
while (lastPos < source.length() && count < tabs.length) {
int pos = source.indexOf('\t', lastPos);
if (pos < 0) pos = source.length();
String piece = source.substring(lastPos, pos);
if (result.length() < tabs[count]) {
result.append(repeat(" ", tabs[count] - result.length()));
// TODO fix type
}
result.append(piece);
lastPos = pos+1;
count += 2; // skip type
}
if (lastPos < source.length()) {
result.append(source.substring(lastPos));
}
return result.toString();
}
public void process_field(int count, String source, int start, int limit, StringBuffer output) {
String piece = source.substring(start, limit);
if (output.length() < tabs[count*2]) {
output.append(repeat(" ", tabs[count*2] - output.length()));
// TODO fix type
} else {
output.append(" ");
}
output.append(piece);
}
}
}

View file

@ -0,0 +1,247 @@
/*
*******************************************************************************
* Copyright (C) 2002, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/util/TestBNF.java,v $
* $Date: 2003/11/21 01:03:39 $
* $Revision: 1.1 $
*
*****************************************************************************************
*/
package com.ibm.icu.dev.test.util;
import java.text.ParseException;
import java.util.Random;
//TODO integrate this into the test framework
import com.ibm.icu.text.UnicodeSet;
public class TestBNF {
static final String[] testRules = {
"$root = [ab]{3};",
"$root = [ab]{3,};",
"$root = [ab]{3,5};",
"$root = [ab]*;",
"$root = [ab]?;",
"$root = [ab]+;",
"$us = [a-z];" +
"$root = [0-9$us];",
"$root = a $foo b? 25% end 30% | $foo 50%;\r\n" +
"$foo = c{1,5} 20%;",
"$root = [a-z]{1,5}~;",
"$root = [a-z]{5}~;",
"$root = '\\' (u | U0010 | U000 $hex) $hex{4} ;\r\n" +
"$hex = [0-9A-Fa-f];",
};
static String unicodeSetBNF = "" +
"$root = $leaf | '[' $s $root2 $s ']' ;\r\n" +
"$root2 = $leaf | '[' $s $root3 $s ']' | ($root3 $s ($op $root3 $s){0,3}) ;\r\n" +
"$root3 = $leaf | '[' $s $root4 $s ']' | ($root4 $s ($op $root4 $s){0,3}) ;\r\n" +
"$root4 = $leaf | ($leaf $s ($op $leaf $s){0,3}) ;\r\n" +
"$op = (('&' | '-') $s)? 70%;" +
"$leaf = '[' $s $list $s ']' | $prop;\r\n" +
"$list = ($char $s ('-' $s $char $s)? 30%){1,5} ;\r\n" +
"$prop = '\\' (p | P) '{' $s $propName $s '}' | '[:' '^'? $s $propName $s ':]';\r\n" +
"$needsQuote = [\\-\\][:whitespace:][:control:]] ;\r\n" +
"$char = [[\\u0000-\\U00010FFFF]-$needsQuote] | $quoted ;\r\n" +
"$quoted = '\\' ('u' | 'U0010' | 'U000' $hex) $hex{4} ;\r\n" +
"$hex = [0-9A-Fa-f];\r\n" +
"$s = ' '? 20%;\r\n" +
"$propName = (whitespace | ws) | (uppercase | uc) | (lowercase | lc) | $category;\r\n" +
"$category = ((general | gc) $s '=' $s)? $catvalue;\r\n" +
"$catvalue = (C | Other | Cc | Control | Cf | Format | Cn | Unassigned | L | Letter);\r\n";
public static void main (String[] args) {
testTokenizer();
for (int i = 0; i < testRules.length; ++i) {
testBNF(testRules[i], null, 20);
}
testBNF(unicodeSetBNF, null, 20);
//testParser();
}
static void testBNF(String rules, UnicodeSet chars, int count) {
BNF bnf = new BNF(new Random(0), new Quoter.RuleQuoter())
.addSet("$chars", chars)
.addRules(rules)
.complete();
System.out.println("====================================");
System.out.println("BNF");
System.out.println(rules);
System.out.println(bnf.getInternal());
for (int i = 0; i < count; ++i) {
System.out.println(i + ": " + bnf.next());
}
}
/*
public static testManual() {
Pick p = Pick.maybe(75,Pick.unquoted("a"));
testOr(p, 1);
p = Pick.or(new String[]{"", "a", "bb", "ccc"});
testOr(p, 3);
p = Pick.repeat(3, 5, new int[]{20, 30, 20}, "a");
testOr(p, 5);
p = Pick.codePoint("[a-ce]");
testCodePoints(p);
p = Pick.codePoint("[a-ce]");
testCodePoints(p);
p = Pick.string(2, 8, p);
testOr(p,10);
p = Pick.or(new String[]{"", "a", "bb", "ccc"});
p = Pick.and(p).and2(p).and2("&");
testMatch(p, "abb&");
testMatch(p, "bba");
// testEnglish();
}
*/
static void testMatch(Pick p, String source) {
Pick.Position pp = new Pick.Position();
boolean value = p.match(source, pp);
System.out.println("Match: " + value + ", " + pp);
}
/*
static void testParser() {
try {
Pick.Target target = new Pick.Target();
for (int i = 0; i < rules.length; ++i) {
target.addRule(rules[i]);
}
} catch (ParseException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
*/
static class Counts {
int[] counts;
Counts(int max) {
counts = new int[max+1];
}
void inc(int index) {
counts[index]++;
}
void show() {
System.out.println("Printing Counts");
for (int i = 0; i < counts.length; ++i) {
if (counts[i] == 0) continue;
System.out.println(i + ": " + counts[i]);
}
System.out.println();
}
}
static final String[] rules = {
"$s = ' ';",
"$noun = dog | house | government | wall | street | zebra;",
"$adjective = red | glorious | simple | nasty | heavy | clean;",
"$article = quickly | oddly | silently | boldly;",
"$adjectivePhrase = ($adverb $s)? 50% $adjective* 0% 30% 20% 10%;",
"$nounPhrase = $articles $s ($adjectivePhrase $s)? 30% $noun;",
"$verb = goes | fishes | walks | sleeps;",
"$tverb = carries | lifts | overturns | hits | jumps on;",
"$copula = is 30% | seems 10%;",
"$sentence1 = $nounPhrase $s $verb $s ($s $adverb)? 30%;",
"$sentence2 = $nounPhrase $s $tverb $s $nounPhrase ($s $adverb)? 30%;",
"$sentence3 = $nounPhrase $s $copula $s $adjectivePhrase;",
"$conj = but | and | or;",
"$sentence4 = $sentence1 | $sentence2 | $sentence3 20% | $sentence4 $conj $sentence4 20%;",
"$sentence = $sentence4 '.';"};
/*
private static void testEnglish() {
Pick s = Pick.unquoted(" ");
Pick verbs = Pick.or(new String[]{"goes", "fishes", "walks", "sleeps"});
Pick transitive = Pick.or(new String[]{"carries", "lifts", "overturns", "hits", "jumps on"});
Pick nouns = Pick.or(new String[]{"dog", "house", "government", "wall", "street", "zebra"});
Pick adjectives = Pick.or(new String[]{"red", "glorious", "simple", "nasty", "heavy", "clean"});
Pick articles = Pick.or(new String[]{"the", "a"});
Pick adverbs = Pick.or(new String[]{"quickly", "oddly", "silently", "boldly"});
Pick adjectivePhrase = Pick.and(0.5, Pick.and(adverbs).and2(s)).and2(adjectives);
Pick nounPhrase = Pick.and(articles).and2(s)
.and2(0.3, Pick.and(adjectivePhrase).and2(s))
.and2(nouns);
Pick copula = Pick.or(new String[]{"is", "seems"});
Pick sentence1 = Pick.and(nounPhrase).and2(s).and2(verbs)
.and2(0.3, Pick.and(s).and2(adverbs)).name("s1");
Pick sentence2 = Pick.and(nounPhrase).and2(s).and2(transitive).and2(s).and2(nounPhrase)
.and2(0.3, Pick.and(s).and2(adverbs)).name("s2");
Pick sentence3 = Pick.and(nounPhrase).and2(s).and2(copula).and2(s).and2(adjectivePhrase).name("s3");
Pick conj = Pick.or(new String[]{", but", ", and", ", or"});
Pick forward = Pick.unquoted("forward");
Pick pair = Pick.and(forward).and2(conj).and2(s).and2(forward).name("part");
Pick sentenceBase = Pick.or(sentence1).or2(sentence2).or2(sentence3).or2(0.6666, pair).name("sentence");
sentenceBase.replace(forward, sentenceBase);
Pick sentence = Pick.and(sentenceBase).and2(Pick.unquoted("."));
Pick.Target target = Pick.Target.make(sentence);
for (int i = 0; i < 50; ++i) {
System.out.println(i + ": " + target.next());
}
}
private static void testOr(Pick p, int count) {
Pick.Target target = Pick.Target.make(p);
Counts counts = new Counts(count + 10);
for (int i = 0; i < 1000; ++i) {
String s = target.next();
counts.inc(s.length());
}
counts.show();
}
private static void testCodePoints(Pick p) {
Pick.Target target = Pick.Target.make(p);
Counts counts = new Counts(128);
for (int i = 0; i < 10000; ++i) {
String s = target.next();
counts.inc(s.charAt(0));
}
counts.show();
}
*/
public static void printRandoms() {
BNF bnf = new BNF(new Random(0), new Quoter.RuleQuoter())
.addRules("[a-z]{2,5}").complete();
System.out.println("Start");
for (int i = 0; i < 100; ++i) {
String temp = bnf.next();
System.out.println(i + ")\t" + temp);
}
}
public static void testTokenizer() {
Tokenizer t = new Tokenizer();
String[] samples = {"a'b'c d #abc\r e", "'a '123 321",
"\\\\", "a'b", "a'", "abc def%?ghi", "%", "a", "\\ a", "a''''b"};
for (int i = 0; i < samples.length; ++i) {
t.setSource(samples[i]);
System.out.println();
System.out.println("Input: " + t.getSource());
int type = 0;
while (type != t.DONE) {
type = t.next();
System.out.println(t.toString(type, false));
}
}
}
}

View file

@ -0,0 +1,213 @@
/*
*******************************************************************************
* Copyright (C) 2002, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/util/TestBagFormatter.java,v $
* $Date: 2003/11/21 01:03:39 $
* $Revision: 1.1 $
*
*****************************************************************************************
*/
package com.ibm.icu.dev.test.util;
// TODO integrate this into the test framework
import java.util.TreeSet;
import java.util.Iterator;
import java.text.Collator;
import java.util.Locale;
import java.io.*;
import java.util.Random;
import java.text.ParseException;
import java.util.Set;
import java.util.Iterator;
import java.util.TreeSet;
import java.util.Locale;
import java.util.regex.*;
import com.ibm.icu.text.*;
import com.ibm.icu.lang.UScript;
import com.ibm.icu.lang.UProperty;
import com.ibm.icu.lang.UCharacter;
import com.ibm.icu.text.UnicodeSet;
public class TestBagFormatter {
static final void generatePropertyAliases(boolean showValues) {
UnicodePropertySource ups = new UnicodePropertySource.ICU().setNameChoice(UProperty.NameChoice.SHORT);
Collator order = Collator.getInstance(Locale.ENGLISH);
TreeSet props = new TreeSet(order);
TreeSet values = new TreeSet(order);
ups.getAvailablePropertyAliases(props);
Iterator it = props.iterator();
while (it.hasNext()) {
String propAlias = (String)it.next();
ups.setPropertyAlias(propAlias);
System.out.println();
System.out.println(propAlias + ";\t" + ups.getPropertyAlias(UProperty.NameChoice.LONG));
if (!showValues) continue;
values.clear();
ups.getAvailablePropertyValueAliases(values);
Iterator it2 = values.iterator();
while (it2.hasNext()) {
String valueAlias = (String)it2.next();
System.out.println("\t" + valueAlias
+ ";\t" + ups.getPropertyValueAlias(valueAlias, UProperty.NameChoice.LONG));
}
}
}
public static void main(String[] args) throws Exception {
System.out.println("Start");
try {
//readCharacters();
generatePropertyAliases(true);
BagFormatter bf = new BagFormatter();
UnicodeSet us = new UnicodeSet("[:numeric_value=2:]");
System.out.println(bf.showSetNames("[:numeric_value=2:]", us));
us = new UnicodeSet("[:numeric_type=numeric:]");
System.out.println(bf.showSetNames("[:numeric_type=numeric:]", us));
if (true) return;
showNames("Name", ".*MARK.*");
//showNames("NFD", "a.+");
//showNames("NFD", false);
//showNames("Lowercase_Mapping", false);
//TestUnicodePropertySource.test(true);
//showNames(".*\\ \\-.*");
//checkHTML();
//testIsRTL();
//TestTokenizer.test();
//RandomCollator.generate("collationTest.txt", null);
//TestPick.test();
//printRandoms();
//if (true) return;
//testLocales();
//if (true) return;
/*
TestCollator tc = new TestCollator();
tc.test(RuleBasedCollator.getInstance(),1000);
*/
/*
StringBuffer sb = new StringBuffer();
for (int i = 0; i < 100; ++i) {
sb.setLength(0);
rc.nextRule(sb);
System.out.println(sb);
}
*/
} finally {
System.out.println("End");
}
}
static void testLocales() throws IOException {
Locale[] locales = Collator.getAvailableLocales();
Set s = new TreeSet(Collator.getInstance());
for (int i = 0; i < locales.length; ++i) {
String lang = locales[i].getLanguage();
String dlang = locales[i].getDisplayLanguage();
String country = locales[i].getCountry();
String dcountry = locales[i].getDisplayCountry();
if (country.equals("")) continue;
s.add(""
+ "\t" + dcountry
+ "\t" + country
+ "\t" + dlang
+ "\t" + lang
);
}
//CollectionFormatter cf = new CollectionFormatter();
PrintWriter pw = BagFormatter.openUTF8Writer("", "countries.txt", BagFormatter.CONSOLE);
Iterator it = s.iterator();
while (it.hasNext()) {
pw.println(it.next());
}
pw.close();
}
/*
* Use the number of significant digits to round get a rounding value.
*/
static final double LOG10 = Math.log(10);
public static void useSignificantDigits(double value, int digits) {
double log10 = Math.log(value)/LOG10; // log[e]
}
static final UnicodeSet RTL = new UnicodeSet("[[:L:]&[[:bidi class=R:][:bidi class=AL:]]]");
static boolean isRTL(Locale loc) {
// in 2.8 we can use the exemplar characters, but for 2.6 we have to work around it
int[] scripts = UScript.getCode(loc);
return new UnicodeSet()
.applyIntPropertyValue(UProperty.SCRIPT, scripts == null ? UScript.LATIN : scripts[0])
.retainAll(RTL).size() != 0;
}
static void testIsRTL() {
Locale[] locales = Locale.getAvailableLocales();
Set s = new TreeSet();
for (int i = 0; i < locales.length; ++i) {
s.add((isRTL(locales[i]) ? "R " : "L ") + locales[i].getDisplayName());
}
Iterator it = s.iterator();
while (it.hasNext()) {
System.out.println(it.next());
}
}
static final Transliterator toHTML = Transliterator.createFromRules(
"any-html",
"'<' > '&lt;' ;" +
"'&' > '&amp;' ;" +
"'>' > '&gt;' ;" +
"'\"' > '&quot;' ; ",
Transliterator.FORWARD);
static final Transliterator fromHTML = Transliterator.createFromRules(
"html-any",
"'<' < '&'[lL][Tt]';' ;" +
"'&' < '&'[aA][mM][pP]';' ;" +
"'>' < '&'[gG][tT]';' ;" +
"'\"' < '&'[qQ][uU][oO][tT]';' ; ",
Transliterator.REVERSE);
static void checkHTML() {
String foo = "& n < b < \"ab\"";
String fii = toHTML.transliterate(foo);
System.out.println("in: " + foo);
System.out.println("out: " + fii);
System.out.println("in*: " + fromHTML.transliterate(fii));
System.out.println("IN*: " + fromHTML.transliterate(fii.toUpperCase()));
}
static void showNames(String propAlias, boolean matches) {
BagFormatter bf = new BagFormatter();
UnicodeSet stuff;
stuff = new UnicodePropertySource.ICU()
.setPropertyAlias(propAlias)
.getPropertySet(matches, null);
System.out.println(bf.showSetNames(propAlias + " with " + matches, stuff));
}
static void showNames(String propAlias, String pattern) {
BagFormatter bf = new BagFormatter();
UnicodeSet stuff;
stuff = new UnicodePropertySource.ICU()
.setPropertyAlias(propAlias)
.getPropertySet(Pattern.compile(pattern).matcher(""), null);
System.out.println(bf.showSetNames(propAlias + "with " + pattern, stuff));
}
}

View file

@ -0,0 +1,325 @@
/*
*******************************************************************************
* Copyright (C) 2002, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/util/Tokenizer.java,v $
* $Date: 2003/11/21 01:03:39 $
* $Revision: 1.1 $
*
*****************************************************************************************
*/
package com.ibm.icu.dev.test.util;
import java.text.ParsePosition;
import com.ibm.icu.text.*;
import com.ibm.icu.lang.*;
import com.ibm.icu.util.*;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Set;
import java.util.Map;
public class Tokenizer {
protected String source;
protected StringBuffer buffer = new StringBuffer();
protected long number;
protected UnicodeSet unicodeSet = null;
protected int index;
boolean backedup = false;
protected int lastIndex = -1;
protected int nextIndex;
int lastValue = BACKEDUP_TOO_FAR;
TokenSymbolTable symbolTable = new TokenSymbolTable();
private static final char
QUOTE = '\'',
BSLASH = '\\';
private static final UnicodeSet QUOTERS = new UnicodeSet().add(QUOTE).add(BSLASH);
private static final UnicodeSet WHITESPACE = new UnicodeSet("[" +
"\\u0009-\\u000D\\u0020\\u0085\\u200E\\u200F\\u2028\\u2029" +
"]");
private static final UnicodeSet SYNTAX = new UnicodeSet("[" +
"\\u0021-\\u002F\\u003A-\\u0040\\u005B-\\u0060\\u007B-\\u007E" +
"\\u00A1-\\u00A7\\u00A9\\u00AB-\\u00AC\\u00AE" +
"\\u00B0-\\u00B1\\u00B6\\u00B7\\u00BB\\u00BF\\u00D7\\u00F7" +
"\\u2010-\\u2027\\u2030-\\u205E\\u2190-\\u2BFF" +
"\\u3001\\u3003\\u3008-\\u3020\\u3030" +
"\\uFD3E\\uFD3F\\uFE45\\uFE46" +
"]").removeAll(QUOTERS).remove('$');
private static final UnicodeSet NEWLINE = new UnicodeSet("[\\u000A\\u000D\\u0085\\u2028\\u2029]");
private static final UnicodeSet DECIMAL = new UnicodeSet("[:Nd:]");
private static final UnicodeSet NON_STRING = new UnicodeSet()
.addAll(WHITESPACE)
.addAll(SYNTAX);
protected UnicodeSet whiteSpace = WHITESPACE;
protected UnicodeSet syntax = SYNTAX;
private UnicodeSet non_string = NON_STRING;
private void fixSets() {
if (syntax.containsSome(QUOTERS) || syntax.containsSome(whiteSpace)) {
syntax = ((UnicodeSet)syntax.clone()).removeAll(QUOTERS).removeAll(whiteSpace);
}
if (whiteSpace.containsSome(QUOTERS)) {
whiteSpace = ((UnicodeSet)whiteSpace.clone()).removeAll(QUOTERS);
}
non_string = new UnicodeSet(syntax)
.addAll(whiteSpace);
}
public Tokenizer setSource(String source) {
this.source = source;
this.index = 0;
return this; // for chaining
}
public Tokenizer setIndex(int index) {
this.index = index;
return this; // for chaining
}
public static final int
DONE = -1,
NUMBER = -2,
STRING = -3,
UNICODESET = -4,
UNTERMINATED_QUOTE = -5,
BACKEDUP_TOO_FAR = -6;
private static final int
FIRST = 0,
IN_NUMBER = 1,
IN_SPACE = 2,
AFTER_QUOTE = 3, // warning: order is important for switch statement
IN_STRING = 4,
AFTER_BSLASH = 5,
IN_QUOTE = 6;
public String toString(int type, boolean backedupBefore) {
String s = backedup ? "@" : "*";
switch(type) {
case DONE:
return s+"Done"+s;
case BACKEDUP_TOO_FAR:
return s+"Illegal Backup"+s;
case UNTERMINATED_QUOTE:
return s+"Unterminated Quote=" + getString() + s;
case STRING:
return s+"s=" + getString() + s;
case NUMBER:
return s+"n=" + getNumber() + s;
case UNICODESET:
return s+"n=" + getUnicodeSet() + s;
default:
return s+"c=" + usf.getName(type) + s;
}
}
private static final BagFormatter usf = new BagFormatter();
public void backup() {
if (backedup) throw new IllegalArgumentException("backup too far");
backedup = true;
nextIndex = index;
index = lastIndex;
}
/*
public int next2() {
boolean backedupBefore = backedup;
int result = next();
System.out.println(toString(result, backedupBefore));
return result;
}
*/
public int next() {
if (backedup) {
backedup = false;
index = nextIndex;
return lastValue;
}
int cp = 0;
boolean inComment = false;
// clean off any leading whitespace or comments
while (true) {
if (index >= source.length()) return lastValue = DONE;
cp = nextChar();
if (inComment) {
if (NEWLINE.contains(cp)) inComment = false;
} else {
if (cp == '#') inComment = true;
else if (!whiteSpace.contains(cp)) break;
}
}
// record the last index in case we have to backup
lastIndex = index;
if (cp == '[') {
ParsePosition pos = new ParsePosition(index-1);
unicodeSet = new UnicodeSet(source,pos,symbolTable);
index = pos.getIndex();
return lastValue = UNICODESET;
}
// get syntax character
if (syntax.contains(cp)) return lastValue = cp;
// get number, if there is one
if (UCharacter.getType(cp) == Character.DECIMAL_DIGIT_NUMBER) {
number = UCharacter.getNumericValue(cp);
while (index < source.length()) {
cp = nextChar();
if (UCharacter.getType(cp) != Character.DECIMAL_DIGIT_NUMBER) {
index -= UTF16.getCharCount(cp); // BACKUP!
break;
}
number *= 10;
number += UCharacter.getNumericValue(cp);
}
return lastValue = NUMBER;
}
buffer.setLength(0);
int status = IN_STRING;
main:
while (true) {
switch (status) {
case AFTER_QUOTE: // check for double ''?
if (cp == QUOTE) {
UTF16.append(buffer, QUOTE);
status = IN_QUOTE;
break;
}
// OTHERWISE FALL THROUGH!!!
case IN_STRING:
if (cp == QUOTE) status = IN_QUOTE;
else if (cp == BSLASH) status = AFTER_BSLASH;
else if (non_string.contains(cp)) {
index -= UTF16.getCharCount(cp); // BACKUP!
break main;
} else UTF16.append(buffer,cp);
break;
case IN_QUOTE:
if (cp == QUOTE) status = AFTER_QUOTE;
else UTF16.append(buffer,cp);
break;
case AFTER_BSLASH:
switch(cp) {
case 'n': cp = '\n'; break;
case 'r': cp = '\r'; break;
case 't': cp = '\t'; break;
}
UTF16.append(buffer,cp);
status = IN_STRING;
break;
default: throw new IllegalArgumentException("Internal Error");
}
if (index >= source.length()) break;
cp = nextChar();
}
if (status > IN_STRING) return lastValue = UNTERMINATED_QUOTE;
return lastValue = STRING;
}
public String getString() {
return buffer.toString();
}
public String toString() {
return source.substring(0,index) + "$$$" + source.substring(index);
}
public long getNumber() {
return number;
}
public UnicodeSet getUnicodeSet() {
return unicodeSet;
}
private int nextChar() {
int cp = UTF16.charAt(source,index);
index += UTF16.getCharCount(cp);
return cp;
}
public int getIndex() {
return index;
}
public String getSource() {
return source;
}
public UnicodeSet getSyntax() {
return syntax;
}
public UnicodeSet getWhiteSpace() {
return whiteSpace;
}
public void setSyntax(UnicodeSet set) {
syntax = set;
fixSets();
}
public void setWhiteSpace(UnicodeSet set) {
whiteSpace = set;
fixSets();
}
public Set getLookedUpItems() {
return symbolTable.itemsLookedUp;
}
public void addSymbol(String var, String value, int start, int limit) {
// the limit is after the ';', so remove it
--limit;
char[] body = new char[limit - start];
value.getChars(start, limit, body, 0);
symbolTable.add(var, body);
}
public class TokenSymbolTable implements SymbolTable {
Map contents = new HashMap();
Set itemsLookedUp = new HashSet();
public void add(String var, char[] body) {
// start from 1 to avoid the $
contents.put(var.substring(1), body);
}
/* (non-Javadoc)
* @see com.ibm.icu.text.SymbolTable#lookup(java.lang.String)
*/
public char[] lookup(String s) {
itemsLookedUp.add('$' + s);
return (char[])contents.get(s);
}
/* (non-Javadoc)
* @see com.ibm.icu.text.SymbolTable#lookupMatcher(int)
*/
public UnicodeMatcher lookupMatcher(int ch) {
// TODO Auto-generated method stub
return null;
}
/* (non-Javadoc)
* @see com.ibm.icu.text.SymbolTable#parseReference(java.lang.String, java.text.ParsePosition, int)
*/
public String parseReference(String text, ParsePosition pos, int limit) {
int cp;
int start = pos.getIndex();
int i;
for (i = start; i < limit; i += UTF16.getCharCount(cp)) {
cp = UTF16.charAt(text, i);
if (!com.ibm.icu.lang.UCharacter.isUnicodeIdentifierPart(cp)) {
break;
}
}
pos.setIndex(i);
return text.substring(start,i);
}
}
}

View file

@ -0,0 +1,323 @@
/*
*******************************************************************************
* Copyright (C) 2002, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/util/UnicodePropertySource.java,v $
* $Date: 2003/11/21 01:03:39 $
* $Revision: 1.1 $
*
*****************************************************************************************
*/
package com.ibm.icu.dev.test.util;
import java.util.regex.*;
import java.util.Set;
import java.util.Locale;
import java.util.Map;
import java.util.List;
import java.util.Arrays;
import com.ibm.icu.lang.*;
import com.ibm.icu.util.*;
import com.ibm.icu.impl.*;
import com.ibm.icu.text.*;
/**
* Provides a general interface for Unicode Properties, and
* extracting sets based on those values.
* @author Davis
*/
public abstract class UnicodePropertySource implements Cloneable {
protected String propertyAlias;
protected int nameChoice = UProperty.NameChoice.LONG;
protected StringFilter filter = new StringFilter();
protected UnicodeSetIterator matchIterator = new UnicodeSetIterator(new UnicodeSet(0,0x10FFFF));
abstract public String getPropertyValue(int codepoint);
abstract public Set getAvailablePropertyAliases(Set result);
abstract public Set getAvailablePropertyValueAliases(Set result);
abstract public String getPropertyAlias(int nameChoice);
abstract public String getPropertyValueAlias(String valueAlias, int nameChoice);
/**
* Subclasses should override
*/
public Object clone() {
try {
UnicodePropertySource result = (UnicodePropertySource)super.clone();
result.filter = (StringFilter)filter.clone();
return result;
} catch (CloneNotSupportedException e) {
throw new InternalError("Should never happen.");
}
}
public UnicodePropertySource setPropertyAlias(String propertyAlias) {
this.propertyAlias = propertyAlias;
return this;
}
public String getPropertyAlias() {
return propertyAlias;
}
public static final boolean equals(int codepoint, String other) {
if (other.length() == 1) {
return codepoint == other.charAt(0);
}
return other.equals(UTF16.valueOf(codepoint));
}
public UnicodeSet getPropertySet(boolean charEqualsValue, UnicodeSet result){
if (result == null) result = new UnicodeSet();
matchIterator.reset();
while (matchIterator.next()) {
String value = filter.remap(getPropertyValue(matchIterator.codepoint));
if (equals(matchIterator.codepoint, value) == charEqualsValue) {
result.add(matchIterator.codepoint);
}
}
return result;
}
public UnicodeSet getPropertySet(String propertyValue, UnicodeSet result){
if (result == null) result = new UnicodeSet();
matchIterator.reset();
while (matchIterator.next()) {
String value = filter.remap(getPropertyValue(matchIterator.codepoint));
if (propertyValue.equals(value)) {
result.add(matchIterator.codepoint);
}
}
return result;
}
public UnicodeSet getPropertySet(Matcher matcher, UnicodeSet result) {
if (result == null) result = new UnicodeSet();
matchIterator.reset();
while (matchIterator.next()) {
String value = filter.remap(getPropertyValue(matchIterator.codepoint));
if (value == null)
continue;
matcher.reset(value);
if (matcher.matches()) {
result.add(matchIterator.codepoint);
}
}
return result;
}
public int getNameChoice() {
return nameChoice;
}
public UnicodePropertySource setNameChoice(int choice) {
nameChoice = choice;
return this;
}
public static class StringFilter implements Cloneable {
public String remap(String original) {
return original;
}
public Object clone() {
try {
return super.clone();
} catch (CloneNotSupportedException e) {
throw new InternalError("Should never happen.");
}
}
}
public static class MapFilter extends StringFilter {
Map valueMap;
public String remap(String original) {
Object changed = valueMap.get(original);
return changed == null ? original : (String) changed;
}
public Map getMap() {
return valueMap;
}
public MapFilter setMap(Map map) {
valueMap = map;
return this;
}
}
static public class ICU extends UnicodePropertySource {
protected int propEnum = Integer.MIN_VALUE;
{
matchIterator = new UnicodeSetIterator(
new UnicodeSet("[^[:Cn:]-[:Default_Ignorable_Code_Point:]]"));
}
public UnicodePropertySource setPropertyAlias(String propertyAlias) {
super.setPropertyAlias(propertyAlias);
int extraPosition = Extras.indexOf(propertyAlias);
if (extraPosition >= 0) {
propEnum = EXTRA_START + extraPosition;
} else {
propEnum = UCharacter.getPropertyEnum(propertyAlias);
}
return this;
}
public String getPropertyValue(int codePoint) {
if (propEnum < UProperty.INT_LIMIT) {
int enumValue = UCharacter.getIntPropertyValue(codePoint, propEnum);
return UCharacter.getPropertyValueName(propEnum,enumValue, (int)nameChoice);
} else if (propEnum < UProperty.DOUBLE_LIMIT) {
return Double.toString(UCharacter.getUnicodeNumericValue(codePoint));
// TODO: Fix HACK -- API deficient
} else switch(propEnum) {
case UProperty.AGE: return UCharacter.getAge(codePoint).toString();
case UProperty.BIDI_MIRRORING_GLYPH: return UTF16.valueOf(UCharacter.getMirror(codePoint));
case UProperty.CASE_FOLDING: return UCharacter.foldCase(UTF16.valueOf(codePoint),true);
case UProperty.ISO_COMMENT: return UCharacter.getISOComment(codePoint);
case UProperty.LOWERCASE_MAPPING: return UCharacter.toLowerCase(Locale.ENGLISH,UTF16.valueOf(codePoint));
case UProperty.NAME: return UCharacter.getName(codePoint);
case UProperty.SIMPLE_CASE_FOLDING: return UTF16.valueOf(UCharacter.foldCase(codePoint,true));
case UProperty.SIMPLE_LOWERCASE_MAPPING: return UTF16.valueOf(UCharacter.toLowerCase(codePoint));
case UProperty.SIMPLE_TITLECASE_MAPPING: return UTF16.valueOf(UCharacter.toTitleCase(codePoint));
case UProperty.SIMPLE_UPPERCASE_MAPPING: return UTF16.valueOf(UCharacter.toUpperCase(codePoint));
case UProperty.TITLECASE_MAPPING: return UCharacter.toTitleCase(Locale.ENGLISH,UTF16.valueOf(codePoint),null);
case UProperty.UNICODE_1_NAME: return UCharacter.getName1_0(codePoint);
case UProperty.UPPERCASE_MAPPING: return UCharacter.toUpperCase(Locale.ENGLISH,UTF16.valueOf(codePoint));
case NFC: return Normalizer.normalize(codePoint, Normalizer.NFC);
case NFD: return Normalizer.normalize(codePoint, Normalizer.NFD);
case NFKC: return Normalizer.normalize(codePoint, Normalizer.NFKC);
case NFKD: return Normalizer.normalize(codePoint, Normalizer.NFKD);
}
return null;
}
static final List Extras = Arrays.asList(new String[] {
"NFC", "NFD", "NFKC", "NKFD"
});
static final int
NFC = 0x8000,
NFD = 0x8001,
NFKC = 0x8002,
NFKD = 0x8003,
EXTRA_START = NFC,
EXTRA_LIMIT = NFKD+1;
static final int[][] ranges = {
{UProperty.BINARY_START, UProperty.BINARY_LIMIT},
{UProperty.INT_START, UProperty.INT_LIMIT},
{UProperty.DOUBLE_START, UProperty.DOUBLE_LIMIT},
{UProperty.STRING_START, UProperty.STRING_LIMIT},
};
public Set getAvailablePropertyAliases(Set result) {
for (int i = 0; i < ranges.length; ++i) {
for (int j = ranges[i][0]; j < ranges[i][1]; ++j) {
result.add(UCharacter.getPropertyName(j, nameChoice));
}
}
result.addAll(Extras);
return result;
}
public Set getAvailablePropertyValueAliases(Set result) {
if (propEnum < UProperty.INT_LIMIT) {
int start = UCharacter.getIntPropertyMinValue(propEnum);
int end = UCharacter.getIntPropertyMaxValue(propEnum);
for (int i = start; i <= end; ++i) {
result.add(getFixedValueAlias(null, i,nameChoice));
}
} else {
result.add(getFixedValueAlias(null, -1,nameChoice));
}
return result;
}
/**
* @param valueAlias null if unused.
* @param valueEnum -1 if unused
* @param nameChoice
* @return
*/
private String getFixedValueAlias(String valueAlias, int valueEnum, int nameChoice) {
if (propEnum >= UProperty.STRING_START) {
return "<string>";
} else if (propEnum >= UProperty.DOUBLE_START) {
return "<double>";
}
if (valueAlias != null && !valueAlias.equals("<integer>")) {
valueEnum = UCharacter.getPropertyValueEnum(propEnum,valueAlias);
}
String result = fixedGetPropertyValueName(propEnum, valueEnum, nameChoice);
if (result != null) return result;
// try other namechoice
result = fixedGetPropertyValueName(propEnum,valueEnum,
nameChoice == UProperty.NameChoice.LONG ? UProperty.NameChoice.SHORT : UProperty.NameChoice.LONG);
if (result != null) return result;
return "<integer>";
}
private static String fixedGetPropertyValueName(int propEnum, int valueEnum, int nameChoice) {
try {
return UCharacter.getPropertyValueName(propEnum,valueEnum,nameChoice);
} catch (Exception e) {
return null;
}
}
public String getPropertyAlias(int nameChoice) {
if (propEnum < EXTRA_START) {
return UCharacter.getPropertyName(propEnum, nameChoice);
}
return (String)Extras.get(propEnum-EXTRA_START);
}
public String getPropertyValueAlias(String valueAlias, int nameChoice) {
return getFixedValueAlias(valueAlias, -1, nameChoice);
}
}
// TODO file bug on getPropertyValueName for Canonical_Combining_Class
public StringFilter getFilter() {
return filter;
}
public UnicodePropertySource setFilter(StringFilter filter) {
this.filter = filter;
return this;
}
/**
* @return
*/
static public void addAll(UnicodeSetIterator source, UnicodeSet result) {
while (source.nextRange()) {
if (source.codepoint == source.IS_STRING) {
result.add(source.string);
} else {
result.add(source.codepoint, source.codepointEnd);
}
}
}
public UnicodeSet getMatchSet(UnicodeSet result) {
if (result == null) result = new UnicodeSet();
addAll(matchIterator, result);
return result;
}
/**
* @param set
*/
public void setMatchSet(UnicodeSet set) {
matchIterator = new UnicodeSetIterator(set);
}
}

View file

@ -0,0 +1,155 @@
/*
*******************************************************************************
* Copyright (C) 2002, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/util/Visitor.java,v $
* $Date: 2003/11/21 01:03:39 $
* $Revision: 1.1 $
*
*****************************************************************************************
*/
package com.ibm.icu.dev.test.util;
import java.util.*;
import com.ibm.icu.text.UnicodeSet;
import com.ibm.icu.text.UnicodeSetIterator;
public abstract class Visitor {
public void doAt(Object item) {
if (item instanceof Collection) {
doAt((Collection) item);
} else if (item instanceof Map) {
doAt((Map) item);
} else if (item instanceof Object[]) {
doAt((Object[]) item);
} else if (item instanceof UnicodeSet) {
doAt((UnicodeSet) item);
} else {
doSimpleAt(item);
}
}
public int count(Object item) {
if (item instanceof Collection) {
return ((Collection) item).size();
} else if (item instanceof Map) {
return ((Map) item).size();
} else if (item instanceof Object[]) {
return ((Object[]) item).length;
} else if (item instanceof UnicodeSet) {
return ((UnicodeSet) item).size();
} else {
return 1;
}
}
// the default implementation boxing
public void doAt(int o) {
doSimpleAt(new Integer(o));
}
public void doAt(double o) {
doSimpleAt(new Double(o));
}
public void doAt(char o) {
doSimpleAt(new Character(o));
}
// for subclassing
protected void doAt (Collection c) {
if (c.size() == 0) doBefore(c, null);
Iterator it = c.iterator();
boolean first = true;
Object last = null;
while (it.hasNext()) {
Object item = it.next();
if (first) {
doBefore(c, item);
first = false;
} else {
doBetween(c, last, item);
}
doAt(last=item);
}
doAfter(c, last);
}
protected void doAt (Map c) {
doAt(c.entrySet());
}
protected void doAt (UnicodeSet c) {
if (c.size() == 0) doBefore(c, null);
UnicodeSetIterator it = new UnicodeSetIterator(c);
boolean first = true;
Object last = null;
Object item;
CodePointRange cpr0 = new CodePointRange();
CodePointRange cpr1 = new CodePointRange();
CodePointRange cpr;
while(it.nextRange()) {
if (it.codepoint == it.IS_STRING) {
item = it.string;
} else {
cpr = last == cpr0 ? cpr1 : cpr0; // make sure we don't override last
cpr.codepoint = it.codepoint;
cpr.codepointEnd = it.codepointEnd;
item = cpr;
}
if (!first) {
doBefore(c, item);
first = true;
} else {
doBetween(c, last, item);
}
doAt(last = item);
}
doAfter(c, last);
}
protected void doAt (Object[] c) {
doBefore(c, c.length == 0 ? null : c[0]);
Object last = null;
for (int i = 0; i < c.length; ++i) {
if (i != 0) doBetween(c, last, c[i]);
doAt(last = c[i]);
}
doAfter(c, last);
}
public static class CodePointRange{
public int codepoint, codepointEnd;
}
// ===== MUST BE OVERRIDEN =====
abstract protected void doBefore(Object container, Object item);
abstract protected void doBetween(Object container, Object lastItem, Object nextItem);
abstract protected void doAfter(Object container, Object item);
abstract protected void doSimpleAt(Object o);
// ===== CONVENIENCES =====
static class Join extends Visitor {
StringBuffer output = new StringBuffer();
String join (Object o) {
output.setLength(0);
doAt(o);
return output.toString();
}
protected void doBefore(Object container, Object item) {}
protected void doAfter(Object container, Object item) {}
protected void doBetween(Object container, Object lastItem, Object nextItem) {
output.append(",");
}
protected void doSimpleAt(Object o) {
output.append(o.toString());
}
}
}