mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-10 07:39:16 +00:00
Added tests for collation parsing, some utilities for display
X-SVN-Rev: 13800
This commit is contained in:
parent
08b4557e06
commit
501b2fdc41
12 changed files with 3566 additions and 0 deletions
301
icu4j/src/com/ibm/icu/dev/test/collator/RandomCollator.java
Normal file
301
icu4j/src/com/ibm/icu/dev/test/collator/RandomCollator.java
Normal file
|
@ -0,0 +1,301 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2002, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
* $Source:
|
||||
* $Date:
|
||||
* $Revision:
|
||||
*
|
||||
*****************************************************************************************
|
||||
*/
|
||||
|
||||
|
||||
package com.ibm.icu.dev.test.collator;
|
||||
|
||||
import com.ibm.icu.lang.UCharacter;
|
||||
import com.ibm.icu.lang.UProperty;
|
||||
import com.ibm.icu.text.*;
|
||||
import com.ibm.icu.impl.Utility;
|
||||
import com.ibm.icu.dev.test.util.*;
|
||||
|
||||
import com.ibm.icu.dev.test.*;
|
||||
import com.ibm.icu.text.*;
|
||||
|
||||
import java.text.ParseException;
|
||||
import java.io.*;
|
||||
import java.util.Random;
|
||||
|
||||
public class RandomCollator extends TestFmwk {
|
||||
public static void main(String[] args) throws Exception {
|
||||
new RandomCollator().run(args);
|
||||
//new CollationAPITest().TestGetTailoredSet();
|
||||
}
|
||||
|
||||
static final int CONSTRUCT_RANDOM_COUNT = 100;
|
||||
static final int FORMAL_TEST_COUNT = 1000;
|
||||
|
||||
static final String POSITION = "{$$$}";
|
||||
|
||||
class Shower implements BagFormatter.Shower {
|
||||
public void println(String arg) {
|
||||
logln(arg);
|
||||
}
|
||||
}
|
||||
|
||||
public Shower LOG = new Shower();
|
||||
|
||||
public void TestRandom() throws IOException {
|
||||
String fileName;
|
||||
PrintWriter pw = BagFormatter.openUTF8Writer("", "RandomCollationTestLog.txt", BagFormatter.CONSOLE);
|
||||
TestCollator tc = new TestCollator(chars);
|
||||
pw.println("Collation Test Run");
|
||||
pw.println("Note: For parse-exception, " + POSITION + " indicates the errorOffset");
|
||||
pw.println("Rules:");
|
||||
pw.println(currentRules);
|
||||
String rules = "<unknown>";
|
||||
int sCount = 0;
|
||||
int peCount = 0;
|
||||
int oeCount = 0;
|
||||
for (int i = 0; i < CONSTRUCT_RANDOM_COUNT; ++i) {
|
||||
try {
|
||||
rules = get();
|
||||
if (true) {
|
||||
Collator c = new RuleBasedCollator(rules.toString());
|
||||
tc.test(c, FORMAL_TEST_COUNT);
|
||||
} else {
|
||||
pw.println(rules);
|
||||
}
|
||||
logln("ok");
|
||||
sCount++;
|
||||
} catch (ParseException pe) {
|
||||
peCount++;
|
||||
pw.println("========PARSE EXCEPTION======== (" + i + ")");
|
||||
int errorOffset = pe.getErrorOffset();
|
||||
pw.print(rules.substring(0,errorOffset));
|
||||
pw.print(POSITION);
|
||||
pw.println(rules.substring(errorOffset));
|
||||
//pw.println("========ERROR======== (" + i + ")");
|
||||
//pe.printStackTrace(pw);
|
||||
//pw.println("========END======== (" + i + ")");
|
||||
errln("ParseException");
|
||||
} catch (Exception e) {
|
||||
oeCount++;
|
||||
pw.println("========OTHER EXCEPTION======== (" + i + ")");
|
||||
e.printStackTrace(pw);
|
||||
pw.println("========RULES======== (" + i + ")");
|
||||
pw.println(rules);
|
||||
//pw.println("========END======== (" + i + ")");
|
||||
errln("ParseException");
|
||||
}
|
||||
}
|
||||
pw.println("Successful: " + sCount
|
||||
+ ",\tParseException: " + peCount
|
||||
+ ",\tOther Exception: " + oeCount);
|
||||
logln("Successful: " + sCount
|
||||
+ ",\tParseException: " + peCount
|
||||
+ ",\tOther Exception: " + oeCount);
|
||||
pw.close();
|
||||
|
||||
}
|
||||
|
||||
public static class TestCollator extends TestComparator {
|
||||
BNF rs;
|
||||
|
||||
TestCollator(UnicodeSet chars) {
|
||||
rs = new BNF(new Random(0), new Quoter.RuleQuoter())
|
||||
.addRules("$root = " + chars + "{1,8};").complete();
|
||||
}
|
||||
|
||||
public Object newObject(Object c) {
|
||||
return rs.next();
|
||||
}
|
||||
|
||||
public String format(Object c) {
|
||||
return BagFormatter.hex.transliterate(c.toString());
|
||||
}
|
||||
}
|
||||
|
||||
private BNF bnf;
|
||||
String currentRules = null;
|
||||
UnicodeSet chars;
|
||||
|
||||
public String get() {
|
||||
return bnf.next();
|
||||
}
|
||||
|
||||
public RandomCollator() {
|
||||
this(1,10, new UnicodeSet("[AZa-z<\\&\\[\\]]"));
|
||||
}
|
||||
|
||||
public RandomCollator(int minRuleCount, int maxRuleCount, UnicodeSet chars) {
|
||||
this.chars = chars;
|
||||
bnf = new BNF(new Random(0), new Quoter.RuleQuoter())
|
||||
.addSet("$chars", chars)
|
||||
.addRules(collationBNF)
|
||||
.complete();
|
||||
}
|
||||
|
||||
static String collationBNF =
|
||||
"$s = ' '? 50%;\r\n" +
|
||||
"$relationList = (" +
|
||||
" '<'" +
|
||||
" | ' <<'" +
|
||||
" | ' ;'" +
|
||||
" | ' <<<'" +
|
||||
" | ' ,'" +
|
||||
" | ' ='" +
|
||||
");\r\n" +
|
||||
"$alternateOptions = non'-'ignorable | shifted;\r\n" +
|
||||
"$caseFirstOptions = off | upper | lower;\r\n" +
|
||||
"$strengthOptions = '1' | '2' | '3' | '4' | 'I';\r\n" +
|
||||
"$commandList = '['" +
|
||||
" ( alternate ' ' $alternateOptions" +
|
||||
" | backwards' 2'" +
|
||||
" | normalization ' ' $onoff " +
|
||||
" | caseLevel ' ' $onoff " +
|
||||
" | hiraganaQ ' ' $onoff" +
|
||||
" | caseFirst ' ' $caseFirstOptions" +
|
||||
" | strength ' ' $strengthOptions" +
|
||||
" ) ']';\r\n" +
|
||||
"$ignorableTypes = (tertiary | secondary | primary) ' ' ignorable;\r\n" +
|
||||
"$allTypes = variable | regular | implicit | trailing | $ignorableTypes;\r\n" +
|
||||
"$onoff = on | off;\r\n" +
|
||||
"$positionList = '[' (first | last) ' ' $allTypes ']';\r\n" +
|
||||
"$beforeList = '[before ' ('1' | '2' | '3') ']';\r\n" +
|
||||
"$string = $chars{1,5}~@;\r\n" +
|
||||
"$crlf = '\r\n';\r\n" +
|
||||
"$rel1 = '[variable top]' $s ;\r\n" +
|
||||
"$p1 = ($string $s '|' $s)? 25%;\r\n" +
|
||||
"$p2 = ('\\' $s $string $s)? 25%;\r\n" +
|
||||
"$rel2 = $p1 $string $s $p2;\r\n" +
|
||||
"$relation = $relationList $s ($rel1 | $rel2) $crlf;\r\n" +
|
||||
"$command = $commandList $crlf;\r\n" +
|
||||
"$reset = '&' $s ($beforeList $s)? 10% ($positionList | $string 10%) $crlf;\r\n" +
|
||||
"$mostRules = $command 1% | $reset 5% | $relation 25%;\r\n" +
|
||||
"$root = $command{0,5} $reset $mostRules{1,20};\r\n";
|
||||
|
||||
|
||||
/*
|
||||
|
||||
|
||||
gc ; C ; Other # Cc | Cf | Cn | Co | Cs
|
||||
gc ; Cc ; Control
|
||||
gc ; Cf ; Format
|
||||
gc ; Cn ; Unassigned
|
||||
gc ; Co ; Private_Use
|
||||
gc ; Cs ; Surrogate
|
||||
gc ; L ; Letter # Ll | Lm | Lo | Lt | Lu
|
||||
gc ; LC ; Cased_Letter # Ll | Lt | Lu
|
||||
gc ; Ll ; Lowercase_Letter
|
||||
gc ; Lm ; Modifier_Letter
|
||||
gc ; Lo ; Other_Letter
|
||||
gc ; Lt ; Titlecase_Letter
|
||||
gc ; Lu ; Uppercase_Letter
|
||||
gc ; M ; Mark # Mc | Me | Mn
|
||||
gc ; Mc ; Spacing_Mark
|
||||
gc ; Me ; Enclosing_Mark
|
||||
gc ; Mn ; Nonspacing_Mark
|
||||
gc ; N ; Number # Nd | Nl | No
|
||||
gc ; Nd ; Decimal_Number
|
||||
gc ; Nl ; Letter_Number
|
||||
gc ; No ; Other_Number
|
||||
gc ; P ; Punctuation # Pc | Pd | Pe | Pf | Pi | Po | Ps
|
||||
gc ; Pc ; Connector_Punctuation
|
||||
gc ; Pd ; Dash_Punctuation
|
||||
gc ; Pe ; Close_Punctuation
|
||||
gc ; Pf ; Final_Punctuation
|
||||
gc ; Pi ; Initial_Punctuation
|
||||
gc ; Po ; Other_Punctuation
|
||||
gc ; Ps ; Open_Punctuation
|
||||
gc ; S ; Symbol # Sc | Sk | Sm | So
|
||||
gc ; Sc ; Currency_Symbol
|
||||
gc ; Sk ; Modifier_Symbol
|
||||
gc ; Sm ; Math_Symbol
|
||||
gc ; So ; Other_Symbol
|
||||
gc ; Z ; Separator # Zl | Zp | Zs
|
||||
gc ; Zl ; Line_Separator
|
||||
gc ; Zp ; Paragraph_Separator
|
||||
gc ; Zs ; Space_Separator
|
||||
*/
|
||||
|
||||
/*
|
||||
// each rule can be:
|
||||
// "[" command "]"
|
||||
// "& [" position "]"
|
||||
// "&" before chars
|
||||
// relation "[variable top]"
|
||||
// relation (chars "|")? chars ("/" chars)?
|
||||
// plus, a reset must come before a relation
|
||||
|
||||
// the following reflects the above rules, plus allows whitespace.
|
||||
Pick chars = Pick.string(1, 5, Pick.codePoint(uSet)); // insert something needing quotes
|
||||
Pick s = Pick.maybe(0.8, Pick.unquoted(" ")).name("Space"); // optional space
|
||||
Pick CRLF = Pick.unquoted("\r\n");
|
||||
|
||||
Pick rel1 = Pick.and(Pick.unquoted("[variable top]")).and2(s);
|
||||
Pick p1 = Pick.maybe(0.25, Pick.and(chars).and2(s).and2("|").and2(s));
|
||||
Pick p2 = Pick.maybe(0.25, Pick.and("/").and2(s).and2(chars).and2(s));
|
||||
Pick rel2 = Pick.and(p1).and2(chars).and2(s).and2(p2);
|
||||
Pick relation = Pick.and(Pick.or(relationList)).and2(s)
|
||||
.and2(Pick.or(1, rel1).or2(10, rel2))
|
||||
.and2(CRLF).name("Relation");
|
||||
|
||||
Pick command = Pick.and(Pick.or(commandList)).and2(CRLF).name("Command");
|
||||
|
||||
Pick reset = Pick.and("&").and2(s)
|
||||
.and2(0.1, Pick.or(beforeList)).and2(s)
|
||||
.and2(Pick.or(0.1, Pick.or(positionList)).or2(1.0, chars))
|
||||
.and2(CRLF).name("Reset");
|
||||
Pick rule = Pick.and(Pick.or(1, command).or2(5, reset).or2(25, relation)).name("Rule");
|
||||
Pick rules2 = Pick.and(Pick.repeat(0,5,command))
|
||||
.and2(reset)
|
||||
.and2(Pick.repeat(1,20,rule)).name("Rules");
|
||||
rules = Pick.Target.make(rules2);
|
||||
|
||||
static final String[] relationList = {" <", " <<", " <<<", " =", " ;", " ,"};
|
||||
|
||||
static final String[] commandList = {
|
||||
"[alternate non-ignorable]",
|
||||
"[alternate shifted]",
|
||||
"[backwards 2]",
|
||||
"[normalization off]",
|
||||
"[normalization on]",
|
||||
"[caseLevel off]",
|
||||
"[caseLevel on]",
|
||||
"[caseFirst off]",
|
||||
"[caseFirst upper]",
|
||||
"[caseFirst lower]",
|
||||
"[strength 1]",
|
||||
"[strength 2]",
|
||||
"[strength 3]",
|
||||
"[strength 4]",
|
||||
"[strength I]",
|
||||
"[hiraganaQ off]",
|
||||
"[hiraganaQ on]"
|
||||
};
|
||||
|
||||
static final String[] positionList = {
|
||||
"[first tertiary ignorable]",
|
||||
"[last tertiary ignorable]",
|
||||
"[first secondary ignorable]",
|
||||
"[last secondary ignorable]",
|
||||
"[first primary ignorable]",
|
||||
"[last primary ignorable]",
|
||||
"[first variable]",
|
||||
"[last variable]",
|
||||
"[first regular]",
|
||||
"[last regular]",
|
||||
"[first implicit]",
|
||||
"[last implicit]",
|
||||
"[first trailing]",
|
||||
"[last trailing]"
|
||||
};
|
||||
|
||||
static final String[] beforeList = {
|
||||
"[before 1]",
|
||||
"[before 2]",
|
||||
"[before 3]"
|
||||
};
|
||||
*/
|
||||
}
|
79
icu4j/src/com/ibm/icu/dev/test/collator/TestComparator.java
Normal file
79
icu4j/src/com/ibm/icu/dev/test/collator/TestComparator.java
Normal file
|
@ -0,0 +1,79 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2002, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
* $Source:
|
||||
* $Date:
|
||||
* $Revision:
|
||||
*
|
||||
*****************************************************************************************
|
||||
*/
|
||||
|
||||
|
||||
package com.ibm.icu.dev.test.collator;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
public class TestComparator {
|
||||
|
||||
// test the symmetry and transitivity
|
||||
public void test(Comparator comp, int count) {
|
||||
Object c = null;
|
||||
Object b = newObject(c);
|
||||
Object a = newObject(b);
|
||||
int compab = comp.compare(a,b);
|
||||
while (--count >= 0) {
|
||||
// rotate old values
|
||||
c = b;
|
||||
b = a;
|
||||
int compbc = compab;
|
||||
|
||||
// allocate new and get comparisons
|
||||
a = newObject(b);
|
||||
compab = comp.compare(a,b);
|
||||
int compba = comp.compare(b,a);
|
||||
int compac = comp.compare(a,c);
|
||||
|
||||
// check symmetry
|
||||
if (compab != -compba) {
|
||||
log("Symmetry Failure", new Object[] {a, b});
|
||||
}
|
||||
|
||||
// check transitivity
|
||||
check(a, b, c, compab, compbc, compac);
|
||||
check(a, c, b, compab, -compbc, compab);
|
||||
check(b, a, c, -compab, compac, compbc);
|
||||
check(b, c, a, compbc, -compac, -compab);
|
||||
check(c, a, b, -compac, compab, -compbc);
|
||||
check(c, b, a, -compbc, -compab, -compac);
|
||||
}
|
||||
}
|
||||
|
||||
private void check(Object a, Object b, Object c,
|
||||
int compab, int compbc, int compac) {
|
||||
if (compab <= 0 && compbc <= 0 && !(compac <= 0)) {
|
||||
log("Transitivity Failure", new Object[] {a, b, c});
|
||||
}
|
||||
}
|
||||
|
||||
public Object newObject(Object c) {
|
||||
// return a new object
|
||||
return "";
|
||||
}
|
||||
|
||||
public String format(Object c) {
|
||||
// return a new object
|
||||
return c.toString();
|
||||
}
|
||||
|
||||
public void log(String title, Object[] arguments) {
|
||||
String result = title + ": [";
|
||||
for (int i = 0; i < arguments.length; ++i) {
|
||||
if (i != 0) result += ", ";
|
||||
result += format(arguments[i]);
|
||||
}
|
||||
result += "]";
|
||||
throw new RuntimeException(result);
|
||||
}
|
||||
}
|
331
icu4j/src/com/ibm/icu/dev/test/util/BNF.java
Normal file
331
icu4j/src/com/ibm/icu/dev/test/util/BNF.java
Normal file
|
@ -0,0 +1,331 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2002, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/util/BNF.java,v $
|
||||
* $Date: 2003/11/21 01:03:38 $
|
||||
* $Revision: 1.1 $
|
||||
*
|
||||
*****************************************************************************************
|
||||
*/
|
||||
package com.ibm.icu.dev.test.util;
|
||||
|
||||
import java.text.ParsePosition;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.Iterator;
|
||||
|
||||
import com.ibm.icu.text.SymbolTable;
|
||||
import com.ibm.icu.text.UnicodeMatcher;
|
||||
import com.ibm.icu.text.UnicodeSet;
|
||||
import com.ibm.icu.text.UTF16;
|
||||
import java.util.Random;
|
||||
|
||||
public class BNF {
|
||||
private Map map = new HashMap();
|
||||
private Set variables = new HashSet();
|
||||
private Pick pick = null;
|
||||
private Pick.Target target = null;
|
||||
private Tokenizer t;
|
||||
private Quoter quoter;
|
||||
private Random random;
|
||||
|
||||
public String next() {
|
||||
return target.next();
|
||||
}
|
||||
|
||||
public String getInternal() {
|
||||
return pick.getInternal(0, new HashSet());
|
||||
}
|
||||
|
||||
/*
|
||||
+ "weight = integer '%';"
|
||||
+ "range = '{' integer (',' integer?)? '}' weight*;"
|
||||
+ "quote = '@';"
|
||||
+ "star = '*' weight*;"
|
||||
+ "plus = '+' weight*;"
|
||||
+ "maybe = '?' weight?;"
|
||||
+ "quantifier = range | star | maybe | plus;"
|
||||
+ "core = string | unicodeSet | '(' alternation ')';"
|
||||
+ "sequence = (core quantifier*)+;"
|
||||
+ "alternation = sequence (weight? ('|' sequence weight?)+)?;"
|
||||
+ "rule = string '=' alternation;";
|
||||
|
||||
|
||||
* Match 0 or more times
|
||||
+ Match 1 or more times
|
||||
? Match 1 or 0 times
|
||||
{n} Match exactly n times
|
||||
{n,} Match at least n times
|
||||
{n,m} Match at least n but not more than m times
|
||||
|
||||
|
||||
|
||||
*/
|
||||
|
||||
public BNF(Random random, Quoter quoter) {
|
||||
this.random = random;
|
||||
this.quoter = quoter;
|
||||
t = new Tokenizer();
|
||||
}
|
||||
|
||||
public BNF addRules(String rules) {
|
||||
t.setSource(rules);
|
||||
while (addRule());
|
||||
return this; // for chaining
|
||||
}
|
||||
|
||||
public BNF complete() {
|
||||
// check that the rules match the variables, except for $root in rules
|
||||
Set ruleSet = map.keySet();
|
||||
// add also
|
||||
variables.add("$root");
|
||||
variables.addAll(t.getLookedUpItems());
|
||||
if (!ruleSet.equals(variables)) {
|
||||
String msg = showDiff(variables, ruleSet);
|
||||
if (msg.length() != 0) msg = "Error: Missing definitions for: " + msg;
|
||||
String temp = showDiff(ruleSet, variables);
|
||||
if (temp.length() != 0) temp = "Warning: Defined but not used: " + temp;
|
||||
if (msg.length() == 0) msg = temp;
|
||||
else if (temp.length() != 0) {
|
||||
msg = msg + "; " + temp;
|
||||
}
|
||||
error(msg);
|
||||
}
|
||||
|
||||
if (!ruleSet.equals(variables)) {
|
||||
String msg = showDiff(variables, ruleSet);
|
||||
if (msg.length() != 0) msg = "Missing definitions for: " + msg;
|
||||
String temp = showDiff(ruleSet, variables);
|
||||
if (temp.length() != 0) temp = "Defined but not used: " + temp;
|
||||
if (msg.length() == 0) msg = temp;
|
||||
else if (temp.length() != 0) {
|
||||
msg = msg + "; " + temp;
|
||||
}
|
||||
error(msg);
|
||||
}
|
||||
|
||||
// replace variables by definitions
|
||||
Iterator it = ruleSet.iterator();
|
||||
while (it.hasNext()) {
|
||||
String key = (String) it.next();
|
||||
Pick expression = (Pick) map.get(key);
|
||||
Iterator it2 = ruleSet.iterator();
|
||||
if (false && key.equals("$crlf")) {
|
||||
System.out.println("debug") ;
|
||||
}
|
||||
while (it2.hasNext()) {
|
||||
Object key2 = it2.next();
|
||||
if (key.equals(key2)) continue;
|
||||
Pick expression2 = (Pick) map.get(key2);
|
||||
expression2.replace(key, expression);
|
||||
}
|
||||
}
|
||||
pick = (Pick) map.get("$root");
|
||||
target = Pick.Target.make(pick, random, quoter);
|
||||
// TODO remove temp collections
|
||||
return this;
|
||||
}
|
||||
|
||||
String showDiff(Set a, Set b) {
|
||||
Set temp = new HashSet();
|
||||
temp.addAll(a);
|
||||
temp.removeAll(b);
|
||||
if (temp.size() == 0) return "";
|
||||
StringBuffer buffer = new StringBuffer();
|
||||
Iterator it = temp.iterator();
|
||||
while (it.hasNext()) {
|
||||
if (buffer.length() != 0) buffer.append(", ");
|
||||
buffer.append(it.next().toString());
|
||||
}
|
||||
return buffer.toString();
|
||||
}
|
||||
|
||||
void error(String msg) {
|
||||
throw new IllegalArgumentException(msg
|
||||
+ "\r\n" + t.toString());
|
||||
}
|
||||
|
||||
|
||||
|
||||
private boolean addRule() {
|
||||
int type = t.next();
|
||||
if (type == t.DONE) return false;
|
||||
if (type != t.STRING) error("missing weight");
|
||||
String s = t.getString();
|
||||
if (s.length() == 0 || s.charAt(0) != '$') error("missing $ in variable");
|
||||
if (t.next() != '=') error("missing =");
|
||||
int startBody = t.index;
|
||||
Pick rule = getAlternation();
|
||||
if (rule == null) error("missing expression");
|
||||
t.addSymbol(s, t.getSource(), startBody, t.index);
|
||||
if (t.next() != ';') error("missing ;");
|
||||
return addPick(s, rule);
|
||||
}
|
||||
|
||||
protected boolean addPick(String s, Pick rule) {
|
||||
Object temp = map.get(s);
|
||||
if (temp != null) error("duplicate variable");
|
||||
if (rule.name == null) rule.name(s);
|
||||
map.put(s, rule);
|
||||
return true;
|
||||
}
|
||||
|
||||
public BNF addSet(String variable, UnicodeSet set) {
|
||||
if (set != null) {
|
||||
String body = set.toString();
|
||||
t.addSymbol(variable, body, 0, body.length());
|
||||
addPick(variable, Pick.codePoint(set));
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
int maxRepeat = 99;
|
||||
|
||||
Pick qualify(Pick item) {
|
||||
int[] weights;
|
||||
int type = t.next();
|
||||
switch(type) {
|
||||
case '@':
|
||||
return new Pick.Quote(item);
|
||||
case '~':
|
||||
return new Pick.Morph(item);
|
||||
case '?':
|
||||
int weight = getWeight();
|
||||
if (weight == NO_WEIGHT) weight = 50;
|
||||
weights = new int[] {100-weight, weight};
|
||||
return Pick.repeat(0, 1, weights, item);
|
||||
case '*':
|
||||
weights = getWeights();
|
||||
return Pick.repeat(1, maxRepeat, weights, item);
|
||||
case '+':
|
||||
weights = getWeights();
|
||||
return Pick.repeat(1, maxRepeat, weights, item);
|
||||
case '{':
|
||||
if (t.next() != t.NUMBER) error("missing number");
|
||||
int start = (int) t.getNumber();
|
||||
int end = start;
|
||||
type = t.next();
|
||||
if (type == ',') {
|
||||
end = maxRepeat;
|
||||
type = t.next();
|
||||
if (type == t.NUMBER) {
|
||||
end = (int)t.getNumber();
|
||||
type = t.next();
|
||||
}
|
||||
}
|
||||
if (type != '}') error("missing }");
|
||||
weights = getWeights();
|
||||
return Pick.repeat(start, end, weights, item);
|
||||
}
|
||||
t.backup();
|
||||
return item;
|
||||
}
|
||||
|
||||
Pick getCore() {
|
||||
int token = t.next();
|
||||
if (token == t.STRING) {
|
||||
String s = t.getString();
|
||||
if (s.charAt(0) == '$') variables.add(s);
|
||||
return Pick.string(s);
|
||||
}
|
||||
if (token == t.UNICODESET) {
|
||||
return Pick.codePoint(t.getUnicodeSet());
|
||||
}
|
||||
if (token != '(') {
|
||||
t.backup();
|
||||
return null;
|
||||
}
|
||||
Pick temp = getAlternation();
|
||||
token = t.next();
|
||||
if (token != ')') error("missing )");
|
||||
return temp;
|
||||
}
|
||||
|
||||
Pick getSequence() {
|
||||
Pick.Sequence result = null;
|
||||
Pick last = null;
|
||||
while (true) {
|
||||
Pick item = getCore();
|
||||
if (item == null) {
|
||||
if (result != null) return result;
|
||||
if (last != null) return last;
|
||||
error("missing item");
|
||||
}
|
||||
// qualify it as many times as possible
|
||||
Pick oldItem;
|
||||
do {
|
||||
oldItem = item;
|
||||
item = qualify(item);
|
||||
} while (item != oldItem);
|
||||
// add it in
|
||||
if (last == null) {
|
||||
last = item;
|
||||
} else {
|
||||
if (result == null) result = Pick.makeSequence().and2(last);
|
||||
result = result.and2(item);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// for simplicity, we just use recursive descent
|
||||
Pick getAlternation() {
|
||||
Pick.Alternation result = null;
|
||||
Pick last = null;
|
||||
int lastWeight = NO_WEIGHT;
|
||||
while (true) {
|
||||
Pick temp = getSequence();
|
||||
if (temp == null) error("empty alternation");
|
||||
int weight = getWeight();
|
||||
if (weight == NO_WEIGHT) weight = 1;
|
||||
if (last == null) {
|
||||
last = temp;
|
||||
lastWeight = weight;
|
||||
} else {
|
||||
if (result == null) result = Pick.makeAlternation().or2(lastWeight, last);
|
||||
result = result.or2(weight, temp);
|
||||
}
|
||||
int token = t.next();
|
||||
if (token != '|') {
|
||||
t.backup();
|
||||
if (result != null) return result;
|
||||
if (last != null) return last;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static final int NO_WEIGHT = Integer.MIN_VALUE;
|
||||
|
||||
int getWeight() {
|
||||
int weight;
|
||||
int token = t.next();
|
||||
if (token != t.NUMBER) {
|
||||
t.backup();
|
||||
return NO_WEIGHT;
|
||||
}
|
||||
weight = (int)t.getNumber();
|
||||
token = t.next();
|
||||
if (token != '%') error("missing %");
|
||||
return weight;
|
||||
}
|
||||
|
||||
int[] getWeights() {
|
||||
ArrayList list = new ArrayList();
|
||||
while (true) {
|
||||
int weight = getWeight();
|
||||
if (weight == NO_WEIGHT) break;
|
||||
list.add(new Integer(weight));
|
||||
}
|
||||
if (list.size() == 0) return null;
|
||||
int[] result = new int[list.size()];
|
||||
for (int i = 0; i < list.size(); ++i) {
|
||||
result[i] = ((Integer)list.get(i)).intValue();
|
||||
}
|
||||
return result;
|
||||
}
|
||||
}
|
621
icu4j/src/com/ibm/icu/dev/test/util/BagFormatter.java
Normal file
621
icu4j/src/com/ibm/icu/dev/test/util/BagFormatter.java
Normal file
|
@ -0,0 +1,621 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2002, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/util/BagFormatter.java,v $
|
||||
* $Date: 2003/11/21 01:03:38 $
|
||||
* $Revision: 1.1 $
|
||||
*
|
||||
*****************************************************************************************
|
||||
*/
|
||||
package com.ibm.icu.dev.test.util;
|
||||
|
||||
import com.ibm.icu.text.*;
|
||||
import com.ibm.icu.lang.*;
|
||||
import com.ibm.icu.util.*;
|
||||
import com.ibm.icu.impl.*;
|
||||
|
||||
import java.io.*;
|
||||
import java.util.*;
|
||||
import java.util.regex.*;
|
||||
import java.text.MessageFormat;
|
||||
|
||||
public class BagFormatter {
|
||||
|
||||
boolean abbreviated = false;
|
||||
|
||||
/**
|
||||
* Compare two UnicodeSets, and show the differences
|
||||
* @param name1 name of first set to be compared
|
||||
* @param set1 first set
|
||||
* @param name2 name of second set to be compared
|
||||
* @param set2 second set
|
||||
* @return formatted string
|
||||
*/
|
||||
public String showSetDifferences(
|
||||
String name1,
|
||||
UnicodeSet set1,
|
||||
String name2,
|
||||
UnicodeSet set2) {
|
||||
|
||||
StringWriter sw = new StringWriter();
|
||||
showSetDifferences(new PrintWriter(sw), name1, set1, name2, set2);
|
||||
sw.flush();
|
||||
return sw.getBuffer().toString();
|
||||
}
|
||||
|
||||
public String showSetDifferences(
|
||||
String name1,
|
||||
Collection set1,
|
||||
String name2,
|
||||
Collection set2) {
|
||||
|
||||
StringWriter sw = new StringWriter();
|
||||
showSetDifferences(new PrintWriter(sw), name1, set1, name2, set2);
|
||||
sw.flush();
|
||||
return sw.getBuffer().toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* Compare two UnicodeSets, and show the differences
|
||||
* @param name1 name of first set to be compared
|
||||
* @param set1 first set
|
||||
* @param name2 name of second set to be compared
|
||||
* @param set2 second set
|
||||
* @return formatted string
|
||||
*/
|
||||
public void showSetDifferences(
|
||||
PrintWriter pw,
|
||||
String name1,
|
||||
UnicodeSet set1,
|
||||
String name2,
|
||||
UnicodeSet set2) {
|
||||
|
||||
String[] names = { name1, name2 };
|
||||
|
||||
UnicodeSet temp = new UnicodeSet(set1).removeAll(set2);
|
||||
pw.println();
|
||||
showSetNames(pw, inOut.format(names), temp);
|
||||
|
||||
temp = new UnicodeSet(set2).removeAll(set1);
|
||||
pw.println();
|
||||
showSetNames(pw, outIn.format(names), temp);
|
||||
|
||||
temp = new UnicodeSet(set2).retainAll(set1);
|
||||
pw.println();
|
||||
showSetNames(pw, inIn.format(names), temp);
|
||||
}
|
||||
|
||||
public void showSetDifferences(
|
||||
PrintWriter pw,
|
||||
String name1,
|
||||
Collection set1,
|
||||
String name2,
|
||||
Collection set2) {
|
||||
|
||||
String[] names = { name1, name2 };
|
||||
// damn'd collection doesn't have a clone, so
|
||||
// we go with Set, even though that
|
||||
// may not preserve order and duplicates
|
||||
Collection temp = new HashSet(set1);
|
||||
temp.removeAll(set2);
|
||||
pw.println();
|
||||
showSetNames(pw, inOut.format(names), temp);
|
||||
|
||||
temp.clear();
|
||||
temp.addAll(set2);
|
||||
temp.removeAll(set1);
|
||||
pw.println();
|
||||
showSetNames(pw, outIn.format(names), temp);
|
||||
|
||||
temp.clear();
|
||||
temp.addAll(set1);
|
||||
temp.retainAll(set2);
|
||||
pw.println();
|
||||
showSetNames(pw, inIn.format(names), temp);
|
||||
}
|
||||
|
||||
public String showSetNames(String title, Object set1) {
|
||||
StringWriter sw = new StringWriter();
|
||||
PrintWriter pw = new PrintWriter(sw);
|
||||
showSetNames(pw, title, set1);
|
||||
pw.flush();
|
||||
String result = sw.getBuffer().toString();
|
||||
pw.close();
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a list of items in the collection, with each separated by the separator.
|
||||
* Each item must not be null; its toString() is called for a printable representation
|
||||
* @param c source collection
|
||||
* @param separator to be placed between any strings
|
||||
* @return
|
||||
* @internal
|
||||
*/
|
||||
public void showSetNames(PrintWriter output, String title, Object c) {
|
||||
output.println(title);
|
||||
mainVisitor.output = output;
|
||||
mainVisitor.doAt(c);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a list of items in the collection, with each separated by the separator.
|
||||
* Each item must not be null; its toString() is called for a printable representation
|
||||
* @param c source collection
|
||||
* @param separator to be placed between any strings
|
||||
* @return
|
||||
* @internal
|
||||
*/
|
||||
public void showSetNames(String filename, String title, Object c) throws IOException {
|
||||
PrintWriter pw = new PrintWriter(
|
||||
new OutputStreamWriter(
|
||||
new FileOutputStream(filename),"utf-8"));
|
||||
}
|
||||
|
||||
public String getAbbreviatedName(
|
||||
String source,
|
||||
String pattern,
|
||||
String substitute) {
|
||||
|
||||
int matchEnd = NameIterator.findMatchingEnd(source, pattern);
|
||||
int sdiv = source.length() - matchEnd;
|
||||
int pdiv = pattern.length() - matchEnd;
|
||||
StringBuffer result = new StringBuffer();
|
||||
addMatching(
|
||||
source.substring(0, sdiv),
|
||||
pattern.substring(0, pdiv),
|
||||
substitute,
|
||||
result);
|
||||
addMatching(
|
||||
source.substring(sdiv),
|
||||
pattern.substring(pdiv),
|
||||
substitute,
|
||||
result);
|
||||
return result.toString();
|
||||
}
|
||||
|
||||
abstract public static class Relation {
|
||||
abstract public String getRelation(String a, String b);
|
||||
}
|
||||
|
||||
static class NullRelation extends Relation {
|
||||
public String getRelation(String a, String b) { return ""; }
|
||||
}
|
||||
|
||||
private Relation r = new NullRelation();
|
||||
|
||||
public BagFormatter setRelation(Relation r) {
|
||||
this.r = r;
|
||||
return this; // for chaining
|
||||
}
|
||||
|
||||
public Relation getRelation() {
|
||||
return r;
|
||||
}
|
||||
|
||||
/*
|
||||
r.getRelati on(last, s) + quote(s) + "\t#" + UnicodeSetFormatter.getResolvedName(s)
|
||||
*/
|
||||
/*
|
||||
static final UnicodeSet NO_NAME =
|
||||
new UnicodeSet("[\\u0080\\u0081\\u0084\\u0099\\p{Cn}\\p{Co}]");
|
||||
static final UnicodeSet HAS_NAME = new UnicodeSet(NO_NAME).complement();
|
||||
static final UnicodeSet NAME_CHARACTERS =
|
||||
new UnicodeSet("[A-Za-z0-9\\<\\>\\-\\ ]");
|
||||
|
||||
public UnicodeSet getSetForName(String namePattern) {
|
||||
UnicodeSet result = new UnicodeSet();
|
||||
Matcher m = Pattern.compile(namePattern).matcher("");
|
||||
// check for no-name items, and add in bulk
|
||||
m.reset("<no name>");
|
||||
if (m.matches()) {
|
||||
result.addAll(NO_NAME);
|
||||
}
|
||||
// check all others
|
||||
UnicodeSetIterator usi = new UnicodeSetIterator(HAS_NAME);
|
||||
while (usi.next()) {
|
||||
String name = getName(usi.codepoint);
|
||||
if (name == null)
|
||||
continue;
|
||||
m.reset(name);
|
||||
if (m.matches()) {
|
||||
result.add(usi.codepoint);
|
||||
}
|
||||
}
|
||||
// Note: if Regex had some API so that if we could tell that
|
||||
// an initial substring couldn't match, e.g. "CJK IDEOGRAPH-"
|
||||
// then we could optimize by skipping whole swathes of characters
|
||||
return result;
|
||||
}
|
||||
*/
|
||||
|
||||
public void setMergeRanges(boolean in) {
|
||||
mergeRanges = in;
|
||||
}
|
||||
public void setShowSetAlso(boolean b) {
|
||||
showSetAlso = b;
|
||||
}
|
||||
public String getName(int codePoint) {
|
||||
String hcp = "U+" + Utility.hex(codePoint, 4) + " ";
|
||||
String result = nameProp.getPropertyValue(codePoint);
|
||||
if (result != null)
|
||||
return hcp + result;
|
||||
String prop = catProp.getPropertyValue(codePoint);
|
||||
if (prop.equals("Control")) {
|
||||
result = nameProp.getPropertyValue(codePoint);
|
||||
if (result != null)
|
||||
return hcp + "<" + result + ">";
|
||||
}
|
||||
return hcp + "<reserved>";
|
||||
}
|
||||
|
||||
UnicodePropertySource source;
|
||||
UnicodePropertySource labelSource;
|
||||
|
||||
UnicodePropertySource nameProp;
|
||||
UnicodePropertySource name1Prop;
|
||||
UnicodePropertySource catProp;
|
||||
UnicodePropertySource shortCatProp;
|
||||
|
||||
public void setUnicodePropertySource(UnicodePropertySource source) {
|
||||
this.source = source;
|
||||
nameProp = ((UnicodePropertySource)source.clone())
|
||||
.setPropertyAlias("Name");
|
||||
|
||||
name1Prop = ((UnicodePropertySource)source.clone())
|
||||
.setPropertyAlias("Unicode_1_Name");
|
||||
|
||||
catProp = ((UnicodePropertySource)source.clone())
|
||||
.setPropertyAlias("General_Category");
|
||||
|
||||
shortCatProp = ((UnicodePropertySource)source.clone())
|
||||
.setPropertyAlias("General_Category")
|
||||
.setNameChoice(UProperty.NameChoice.SHORT);
|
||||
}
|
||||
|
||||
{
|
||||
setUnicodePropertySource(new UnicodePropertySource.ICU());
|
||||
Map labelMap = new HashMap();
|
||||
labelMap.put("Lo","L&");
|
||||
labelMap.put("Lu","L&");
|
||||
labelMap.put("Lt","L&");
|
||||
setLabelSource(new UnicodePropertySource.ICU()
|
||||
.setPropertyAlias("General_Category")
|
||||
.setNameChoice(UProperty.NameChoice.SHORT)
|
||||
.setFilter(
|
||||
new UnicodePropertySource.MapFilter().setMap(labelMap)));
|
||||
}
|
||||
|
||||
// ===== PRIVATES =====
|
||||
|
||||
private Visitor.Join labelVisitor = new Visitor.Join();
|
||||
|
||||
private boolean mergeRanges = true;
|
||||
private boolean literalCharacter = false;
|
||||
private boolean showSetAlso = false;
|
||||
|
||||
private RangeFinder rf = new RangeFinder();
|
||||
|
||||
private MessageFormat inOut = new MessageFormat("In {0}, but not in {1}:");
|
||||
private MessageFormat outIn = new MessageFormat("Not in {0}, but in {1}:");
|
||||
private MessageFormat inIn = new MessageFormat("In both {0}, and in {1}:");
|
||||
|
||||
private MyVisitor mainVisitor = new MyVisitor();
|
||||
|
||||
/*
|
||||
private String getLabels(int start, int end) {
|
||||
Set names = new TreeSet();
|
||||
for (int cp = start; cp <= end; ++cp) {
|
||||
names.add(getLabel(cp));
|
||||
}
|
||||
return labelVisitor.join(names);
|
||||
}
|
||||
*/
|
||||
|
||||
private void addMatching(
|
||||
String source,
|
||||
String pattern,
|
||||
String substitute,
|
||||
StringBuffer result) {
|
||||
NameIterator n1 = new NameIterator(source);
|
||||
NameIterator n2 = new NameIterator(pattern);
|
||||
boolean first = true;
|
||||
while (true) {
|
||||
String s1 = n1.next();
|
||||
if (s1 == null)
|
||||
break;
|
||||
String s2 = n2.next();
|
||||
if (!first)
|
||||
result.append(" ");
|
||||
first = false;
|
||||
if (s1.equals(s2))
|
||||
result.append(substitute);
|
||||
else
|
||||
result.append(s1);
|
||||
}
|
||||
}
|
||||
|
||||
private Tabber singleTabber =
|
||||
new Tabber.MonoTabber(
|
||||
new int[] {
|
||||
0,
|
||||
Tabber.LEFT,
|
||||
6,
|
||||
Tabber.LEFT,
|
||||
10,
|
||||
Tabber.LEFT,
|
||||
14,
|
||||
Tabber.LEFT });
|
||||
private Tabber rangeTabber =
|
||||
new Tabber.MonoTabber(
|
||||
new int[] {
|
||||
0,
|
||||
Tabber.LEFT,
|
||||
14,
|
||||
Tabber.LEFT,
|
||||
18,
|
||||
Tabber.LEFT,
|
||||
27,
|
||||
Tabber.LEFT,
|
||||
34,
|
||||
Tabber.LEFT });
|
||||
|
||||
private static NumberFormat nf =
|
||||
NumberFormat.getIntegerInstance(Locale.ENGLISH);
|
||||
|
||||
private class MyVisitor extends Visitor {
|
||||
PrintWriter output;
|
||||
|
||||
public String format(Object o) {
|
||||
StringWriter sw = new StringWriter();
|
||||
PrintWriter pw = new PrintWriter(sw);
|
||||
doAt(o);
|
||||
pw.flush();
|
||||
String result = sw.getBuffer().toString();
|
||||
pw.close();
|
||||
return result;
|
||||
}
|
||||
|
||||
protected void doBefore(Object container, Object o) {
|
||||
if (showSetAlso && container instanceof UnicodeSet) {
|
||||
output.println("# " + container);
|
||||
}
|
||||
}
|
||||
|
||||
protected void doBetween(Object container, Object lastItem, Object nextItem) {
|
||||
}
|
||||
|
||||
protected void doAfter(Object container, Object o) {
|
||||
output.println("# Total: " + nf.format(count(container)));
|
||||
}
|
||||
|
||||
protected void doSimpleAt(Object o) {
|
||||
if (o instanceof Map.Entry) {
|
||||
Map.Entry oo = (Map.Entry)o;
|
||||
Object key = oo.getKey();
|
||||
Object value = oo.getValue();
|
||||
doBefore(o, key);
|
||||
doAt(key);
|
||||
output.print("->");
|
||||
doAt(value);
|
||||
doAfter(o, value);
|
||||
} else if (o instanceof Visitor.CodePointRange) {
|
||||
doAt((Visitor.CodePointRange) o);
|
||||
} else {
|
||||
output.print(o.toString());
|
||||
}
|
||||
}
|
||||
|
||||
protected void doAt(Visitor.CodePointRange usi) {
|
||||
if (!mergeRanges) {
|
||||
for (int cp = usi.codepoint; cp <= usi.codepointEnd; ++cp) {
|
||||
String label = labelSource.getPropertyValue(cp);
|
||||
if (label.length() != 0)
|
||||
label += " ";
|
||||
output.println(
|
||||
singleTabber.process(
|
||||
Utility.hex(cp, 4)
|
||||
+ " \t# "
|
||||
+ label
|
||||
+ (literalCharacter
|
||||
&& (cp >= 0x20)
|
||||
? " \t(" + UTF16.valueOf(cp) + ") "
|
||||
: "")
|
||||
+ " \t"
|
||||
+ getName(cp)));
|
||||
}
|
||||
} else {
|
||||
rf.reset(usi.codepoint, usi.codepointEnd + 1);
|
||||
String label;
|
||||
while ((label = rf.next()) != null) {
|
||||
/*
|
||||
String label = (usi.codepoint != usi.codepointEnd)
|
||||
? label = getLabels(usi.codepoint, usi.codepointEnd)
|
||||
: getLabel(usi.codepoint);
|
||||
*/
|
||||
int start = rf.start;
|
||||
int end = rf.limit - 1;
|
||||
if (label.length() != 0)
|
||||
label += " ";
|
||||
output.println(
|
||||
rangeTabber.process(
|
||||
Utility.hex(start, 4)
|
||||
+ ((start != end)
|
||||
? (".." + Utility.hex(end, 4))
|
||||
: "")
|
||||
+ " \t# "
|
||||
+ label
|
||||
+ " \t["
|
||||
+ nf.format(end - start + 1)
|
||||
+ "]"
|
||||
+ (literalCharacter
|
||||
&& (start >= 0x20)
|
||||
? " \t("
|
||||
+ UTF16.valueOf(start)
|
||||
+ ((start != end)
|
||||
? (".." + UTF16.valueOf(end))
|
||||
: "")
|
||||
+ ") "
|
||||
: "")
|
||||
+ " \t"
|
||||
+ getName(start)
|
||||
+ ((start != end)
|
||||
? (".."
|
||||
+ (abbreviated
|
||||
? getAbbreviatedName(
|
||||
getName(end),
|
||||
getName(start),
|
||||
"~")
|
||||
: getName(end)))
|
||||
: "")));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Iterate through a string, breaking at words.
|
||||
* @author Davis
|
||||
*/
|
||||
private static class NameIterator {
|
||||
String source;
|
||||
int position;
|
||||
int start;
|
||||
int limit;
|
||||
|
||||
NameIterator(String source) {
|
||||
this.source = source;
|
||||
this.start = 0;
|
||||
this.limit = source.length();
|
||||
}
|
||||
/**
|
||||
* Find next word, including trailing spaces
|
||||
* @return
|
||||
*/
|
||||
String next() {
|
||||
if (position >= limit)
|
||||
return null;
|
||||
int pos = source.indexOf(' ', position);
|
||||
if (pos < 0 || pos >= limit)
|
||||
pos = limit;
|
||||
String result = source.substring(position, pos);
|
||||
position = pos + 1;
|
||||
return result;
|
||||
}
|
||||
|
||||
static int findMatchingEnd(String s1, String s2) {
|
||||
int i = s1.length();
|
||||
int j = s2.length();
|
||||
try {
|
||||
while (true) {
|
||||
--i; // decrement both before calling function!
|
||||
--j;
|
||||
if (s1.charAt(i) != s2.charAt(j))
|
||||
break;
|
||||
}
|
||||
} catch (Exception e) {} // run off start
|
||||
|
||||
++i; // counteract increment
|
||||
i = s1.indexOf(' ', i); // move forward to space
|
||||
if (i < 0)
|
||||
return 0;
|
||||
return s1.length() - i;
|
||||
}
|
||||
}
|
||||
|
||||
private class RangeFinder {
|
||||
int start, limit;
|
||||
private int veryLimit;
|
||||
void reset(int start, int end) {
|
||||
this.limit = start;
|
||||
this.veryLimit = end;
|
||||
}
|
||||
String next() {
|
||||
if (limit >= veryLimit)
|
||||
return null;
|
||||
start = limit;
|
||||
String label = labelSource.getPropertyValue(limit++);
|
||||
for (; limit < veryLimit; ++limit) {
|
||||
String s = labelSource.getPropertyValue(limit);
|
||||
if (!s.equals(label))
|
||||
break;
|
||||
}
|
||||
return label;
|
||||
}
|
||||
}
|
||||
|
||||
public boolean isAbbreviated() {
|
||||
return abbreviated;
|
||||
}
|
||||
|
||||
public void setAbbreviated(boolean b) {
|
||||
abbreviated = b;
|
||||
}
|
||||
|
||||
public UnicodePropertySource getSource() {
|
||||
return source;
|
||||
}
|
||||
|
||||
public UnicodePropertySource getLabelSource() {
|
||||
return labelSource;
|
||||
}
|
||||
|
||||
public void setLabelSource(UnicodePropertySource source) {
|
||||
labelSource = source;
|
||||
}
|
||||
|
||||
/**
|
||||
* @deprecated
|
||||
*/
|
||||
public static void addAll(UnicodeSet source, Collection target) {
|
||||
source.addAllTo(target);
|
||||
}
|
||||
|
||||
// UTILITIES
|
||||
|
||||
public static final Transliterator hex = Transliterator.getInstance(
|
||||
"[^\\u0021-\\u007E\\u00A0-\\u00FF] hex");
|
||||
|
||||
public interface Shower {
|
||||
public void println(String arg);
|
||||
}
|
||||
|
||||
public static Shower CONSOLE = new Shower() {
|
||||
public void println(String arg) {
|
||||
System.out.println(arg);
|
||||
}
|
||||
};
|
||||
|
||||
public static BufferedReader openUTF8Reader(String dir, String filename, Shower shower) throws IOException {
|
||||
File file = new File(dir + filename);
|
||||
if (shower != null) {
|
||||
shower.println("Creating File: "
|
||||
+ file.getCanonicalPath());
|
||||
}
|
||||
return new BufferedReader(
|
||||
new InputStreamReader(
|
||||
new FileInputStream(file),
|
||||
"UTF-8"),
|
||||
4*1024);
|
||||
}
|
||||
|
||||
public static PrintWriter openUTF8Writer(String dir, String filename, Shower shower) throws IOException {
|
||||
File file = new File(dir + filename);
|
||||
if (shower != null) {
|
||||
shower.println("Creating File: "
|
||||
+ file.getCanonicalPath());
|
||||
}
|
||||
//File parent = new File(file.getParent());
|
||||
//parent.mkdirs();
|
||||
return new PrintWriter(
|
||||
new BufferedWriter(
|
||||
new OutputStreamWriter(
|
||||
new FileOutputStream(file),
|
||||
"UTF-8"),
|
||||
4*1024));
|
||||
}
|
||||
|
||||
}
|
804
icu4j/src/com/ibm/icu/dev/test/util/Pick.java
Normal file
804
icu4j/src/com/ibm/icu/dev/test/util/Pick.java
Normal file
|
@ -0,0 +1,804 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2002, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/util/Pick.java,v $
|
||||
* $Date: 2003/11/21 01:03:38 $
|
||||
* $Revision: 1.1 $
|
||||
*
|
||||
*****************************************************************************************
|
||||
*/
|
||||
package com.ibm.icu.dev.test.util;
|
||||
|
||||
import com.ibm.icu.text.UnicodeSet;
|
||||
import com.ibm.icu.lang.UCharacter;
|
||||
import com.ibm.icu.text.UTF16;
|
||||
import com.ibm.icu.impl.Utility;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Random;
|
||||
import java.util.Arrays;
|
||||
import java.util.Set;
|
||||
import java.util.HashSet;
|
||||
import java.text.ParsePosition;
|
||||
import java.text.ParseException;
|
||||
import java.util.Map;
|
||||
import java.util.HashMap;
|
||||
|
||||
abstract public class Pick {
|
||||
private static boolean DEBUG = false;
|
||||
|
||||
// for using to get strings
|
||||
|
||||
static class Target {
|
||||
private Pick pick;
|
||||
private Random random;
|
||||
private Quoter quoter;
|
||||
|
||||
public static Target make(Pick pick, Random random, Quoter quoter) {
|
||||
Target result = new Target();
|
||||
result.pick = pick;
|
||||
result.random = random;
|
||||
result.quoter = quoter;
|
||||
return result;
|
||||
}
|
||||
public String next() {
|
||||
quoter.clear();
|
||||
pick.addTo(this);
|
||||
return get();
|
||||
}
|
||||
public String get() {
|
||||
return quoter.toString();
|
||||
}
|
||||
private void copyState(Target other) {
|
||||
random = other.random;
|
||||
}
|
||||
private void clear() {
|
||||
quoter.clear();
|
||||
}
|
||||
private int length() {
|
||||
return quoter.length();
|
||||
}
|
||||
private Target append(int codepoint) {
|
||||
quoter.append(codepoint);
|
||||
return this;
|
||||
}
|
||||
private Target append(String s) {
|
||||
quoter.append(s);
|
||||
return this;
|
||||
}
|
||||
// must return value between 0 (inc) and 1 (exc)
|
||||
private double nextDouble() {
|
||||
return random.nextDouble();
|
||||
}
|
||||
}
|
||||
|
||||
// for Building
|
||||
|
||||
public Pick replace(String toReplace, Pick replacement) {
|
||||
Replacer visitor = new Replacer(toReplace, replacement);
|
||||
return visit(visitor);
|
||||
}
|
||||
|
||||
public Pick name(String name) {
|
||||
this.name = name;
|
||||
return this;
|
||||
}
|
||||
|
||||
static public Pick.Sequence makeSequence() {
|
||||
return new Sequence();
|
||||
}
|
||||
static public Pick.Alternation makeAlternation() {
|
||||
return new Alternation();
|
||||
}
|
||||
/*
|
||||
static public Pick.Sequence and(Object item) {
|
||||
return new Sequence().and2(item);
|
||||
}
|
||||
static public Pick.Sequence and(Object[] items) {
|
||||
return new Sequence().and2(items);
|
||||
}
|
||||
static public Pick.Alternation or(int itemWeight, Object item) {
|
||||
return new Alternation().or2(itemWeight, item);
|
||||
}
|
||||
static public Pick.Alternation or(Object[] items) {
|
||||
return new Alternation().or2(1, items);
|
||||
}
|
||||
static public Pick.Alternation or(int itemWeight, Object[] items) {
|
||||
return new Alternation().or2(itemWeight, items);
|
||||
}
|
||||
static public Pick.Alternation or(int[] itemWeights, Object[] items) {
|
||||
return new Alternation().or2(itemWeights, items);
|
||||
}
|
||||
|
||||
static public Pick maybe(int percent, Object item) {
|
||||
return new Repeat(0, 1, new int[]{100-percent, percent}, item);
|
||||
//return Pick.or(1.0-percent, NOTHING).or2(percent, item);
|
||||
}
|
||||
static public Pick repeat(int minCount, int maxCount, int itemWeights, Object item) {
|
||||
return new Repeat(minCount, maxCount, itemWeights, item);
|
||||
}
|
||||
|
||||
static public Pick codePoint(String source) {
|
||||
return new CodePoint(new UnicodeSet(source));
|
||||
}
|
||||
*/
|
||||
|
||||
static public Pick repeat(int minCount, int maxCount, int[] itemWeights, Pick item) {
|
||||
return new Repeat(minCount, maxCount, itemWeights, item);
|
||||
}
|
||||
|
||||
static public Pick codePoint(UnicodeSet source) {
|
||||
return new CodePoint(source);
|
||||
}
|
||||
static public Pick string(String source) {
|
||||
return new Literal(source);
|
||||
}
|
||||
/*
|
||||
static public Pick unquoted(String source) {
|
||||
return new Literal(source);
|
||||
}
|
||||
static public Pick string(int minLength, int maxLength, Pick item) {
|
||||
return new Morph(item, minLength, maxLength);
|
||||
}
|
||||
*/
|
||||
|
||||
public abstract String getInternal(int depth, Set alreadySeen);
|
||||
// Internals
|
||||
|
||||
protected String name;
|
||||
|
||||
protected abstract void addTo(Target target);
|
||||
protected abstract boolean match(String input, Position p);
|
||||
|
||||
public static class Sequence extends ListPick {
|
||||
public Sequence and2 (Pick item) {
|
||||
addInternal(new Pick[] {item}); // we don't care about perf
|
||||
return this; // for chaining
|
||||
}
|
||||
public Sequence and2 (Pick[] items) {
|
||||
addInternal(items);
|
||||
return this; // for chaining
|
||||
}
|
||||
protected void addTo(Target target) {
|
||||
for (int i = 0; i < items.length; ++i) {
|
||||
items[i].addTo(target);
|
||||
}
|
||||
}
|
||||
public String getInternal(int depth, Set alreadySeen) {
|
||||
String result = checkName(name, alreadySeen);
|
||||
if (result.startsWith("$")) return result;
|
||||
result = indent(depth) + result + "SEQ(";
|
||||
for (int i = 0; i < items.length; ++i) {
|
||||
if (i != 0) result += ", ";
|
||||
result += items[i].getInternal(depth+1, alreadySeen);
|
||||
}
|
||||
result += ")";
|
||||
return result;
|
||||
}
|
||||
// keep private
|
||||
private Sequence() {}
|
||||
protected boolean match(String input, Position p) {
|
||||
int originalIndex = p.index;
|
||||
for (int i = 0; i < items.length; ++i) {
|
||||
if (!items[i].match(input, p)) {
|
||||
p.index = originalIndex;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
String checkName(String name, Set alreadySeen) {
|
||||
if (name == null) return "";
|
||||
if (alreadySeen.contains(name)) return name;
|
||||
alreadySeen.add(name);
|
||||
return "{" + name + "=}";
|
||||
}
|
||||
|
||||
public static class Alternation extends ListPick {
|
||||
private WeightedIndex weightedIndex = new WeightedIndex(0);
|
||||
|
||||
public Alternation or2 (Pick[] newItems) {
|
||||
return or2(1, newItems);
|
||||
}
|
||||
public Alternation or2 (int itemWeight, Pick item) {
|
||||
return or2(itemWeight, new Pick[] {item}); // we don't care about perf
|
||||
}
|
||||
public Alternation or2 (int itemWeight, Pick[] newItems) {
|
||||
int[] itemWeights = new int[newItems.length];
|
||||
Arrays.fill(itemWeights,itemWeight);
|
||||
return or2(itemWeights, newItems); // we don't care about perf
|
||||
}
|
||||
public Alternation or2 (int[] itemWeights, Pick[] newItems) {
|
||||
if (newItems.length != itemWeights.length) {
|
||||
throw new ArrayIndexOutOfBoundsException(
|
||||
"or lengths must be equal: " + newItems.length + " != " + itemWeights.length);
|
||||
}
|
||||
int lastLen = this.items.length;
|
||||
addInternal(newItems);
|
||||
weightedIndex.add(itemWeights);
|
||||
return this; // for chaining
|
||||
}
|
||||
protected void addTo(Target target) {
|
||||
items[weightedIndex.toIndex(target.nextDouble())].addTo(target);
|
||||
}
|
||||
|
||||
public String getInternal(int depth, Set alreadySeen) {
|
||||
String result = checkName(name, alreadySeen);
|
||||
if (result.startsWith("$")) return result;
|
||||
result = indent(depth) + result + "OR(";
|
||||
for (int i = 0; i < items.length; ++i) {
|
||||
if (i != 0) result += ", ";
|
||||
result += items[i].getInternal(depth+1, alreadySeen) + "/" + weightedIndex.weights[i];
|
||||
}
|
||||
return result + ")";
|
||||
}
|
||||
// keep private
|
||||
private Alternation() {}
|
||||
// take first matching option
|
||||
protected boolean match(String input, Position p) {
|
||||
for (int i = 0; i < weightedIndex.weights.length; ++i) {
|
||||
if (p.isFailure(this,i)) continue;
|
||||
if (items[i].match(input, p)) return true;
|
||||
p.setFailure(this, i);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
private static String indent(int depth) {
|
||||
String result = "\r\n";
|
||||
for (int i = 0; i < depth; ++i) {
|
||||
result += " ";
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
private static class Repeat extends ItemPick {
|
||||
WeightedIndex weightedIndex;
|
||||
int minCount = 0;
|
||||
|
||||
private Repeat(int minCount, int maxCount, int[] itemWeights, Pick item) {
|
||||
super(item);
|
||||
weightedIndex = new WeightedIndex(minCount).add(maxCount-minCount+1, itemWeights);
|
||||
}
|
||||
private Repeat(int minCount, int maxCount, int itemWeight, Pick item) {
|
||||
super(item);
|
||||
weightedIndex = new WeightedIndex(minCount).add(maxCount-minCount+1, itemWeight);
|
||||
}
|
||||
/*
|
||||
private Repeat(int minCount, int maxCount, Object item) {
|
||||
this.item = convert(item);
|
||||
weightedIndex = new WeightedIndex(minCount).add(maxCount-minCount+1, 1);
|
||||
}
|
||||
*/
|
||||
protected void addTo(Target target) {
|
||||
int count ;
|
||||
for (int i = weightedIndex.toIndex(target.nextDouble()); i > 0; --i) {
|
||||
item.addTo(target);
|
||||
}
|
||||
}
|
||||
public String getInternal(int depth, Set alreadySeen) {
|
||||
String result = checkName(name, alreadySeen);
|
||||
if (result.startsWith("$")) return result;
|
||||
result = indent(depth) + result + "REPEAT(" + weightedIndex
|
||||
+ "; "+ item.getInternal(depth+1, alreadySeen)
|
||||
+ ")";
|
||||
return result;
|
||||
}
|
||||
|
||||
// match longest, e.g. up to just before a failure
|
||||
protected boolean match(String input, Position p) {
|
||||
int bestMatch = p.index;
|
||||
int count = 0;
|
||||
for (int i = 0; i < weightedIndex.weights.length; ++i) {
|
||||
if (p.isFailure(this,i)) break;
|
||||
if (!item.match(input, p)) {
|
||||
p.setFailure(this,i);
|
||||
break;
|
||||
}
|
||||
bestMatch = p.index;
|
||||
count++;
|
||||
}
|
||||
if (count >= minCount) {
|
||||
return true;
|
||||
}
|
||||
// TODO fix failure
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
private static class CodePoint extends FinalPick {
|
||||
private UnicodeSet source;
|
||||
|
||||
private CodePoint(UnicodeSet source) {
|
||||
this.source = source;
|
||||
}
|
||||
protected void addTo(Target target) {
|
||||
target.append(source.charAt(pick(target.random,0,source.size()-1)));
|
||||
}
|
||||
protected boolean match(String s, Position p) {
|
||||
int cp = UTF16.charAt(s, p.index);
|
||||
if (source.contains(cp)) {
|
||||
p.index += UTF16.getCharCount(cp);
|
||||
return true;
|
||||
}
|
||||
p.setMax("codePoint");
|
||||
return false;
|
||||
}
|
||||
public String getInternal(int depth, Set alreadySeen) {
|
||||
String result = checkName(name, alreadySeen);
|
||||
if (result.startsWith("$")) return result;
|
||||
return source.toString();
|
||||
}
|
||||
}
|
||||
|
||||
static class Morph extends ItemPick {
|
||||
Morph(Pick item) {
|
||||
super(item);
|
||||
}
|
||||
|
||||
private String lastValue = null;
|
||||
private Target addBuffer = Target.make(this, null, new Quoter.RuleQuoter());
|
||||
private StringBuffer mergeBuffer = new StringBuffer();
|
||||
|
||||
private static final int COPY_NEW = 0, COPY_BOTH = 1, COPY_LAST = 3, SKIP = 4,
|
||||
LEAST_SKIP = 4;
|
||||
// give weights to the above. make sure we delete about the same as we insert
|
||||
private static final WeightedIndex choice = new WeightedIndex(0)
|
||||
.add(new int[] {10, 10, 100, 10});
|
||||
|
||||
protected void addTo(Target target) {
|
||||
// get contents into separate buffer
|
||||
addBuffer.copyState(target);
|
||||
addBuffer.clear();
|
||||
item.addTo(addBuffer);
|
||||
String newValue = addBuffer.get();
|
||||
if (DEBUG) System.out.println("Old: " + lastValue + ", New:" + newValue);
|
||||
|
||||
// if not first one, merge with old
|
||||
if (lastValue != null) {
|
||||
mergeBuffer.setLength(0);
|
||||
int lastIndex = 0;
|
||||
int newIndex = 0;
|
||||
// the new length is a random value between old and new.
|
||||
int newLenLimit = (int) pick(target.random, lastValue.length(), newValue.length());
|
||||
|
||||
while (mergeBuffer.length() < newLenLimit
|
||||
&& newIndex < newValue.length()
|
||||
&& lastIndex < lastValue.length()) {
|
||||
int c = choice.toIndex(target.nextDouble());
|
||||
if (c == COPY_NEW || c == COPY_BOTH || c == SKIP) {
|
||||
newIndex = getChar(newValue, newIndex, mergeBuffer, c < LEAST_SKIP);
|
||||
if (mergeBuffer.length() >= newLenLimit) break;
|
||||
}
|
||||
if (c == COPY_LAST || c == COPY_BOTH || c == SKIP) {
|
||||
lastIndex = getChar(lastValue, lastIndex, mergeBuffer, c < LEAST_SKIP);
|
||||
}
|
||||
}
|
||||
newValue = mergeBuffer.toString();
|
||||
}
|
||||
lastValue = newValue;
|
||||
target.append(newValue);
|
||||
if (DEBUG) System.out.println("Result: " + newValue);
|
||||
}
|
||||
|
||||
public String getInternal(int depth, Set alreadySeen) {
|
||||
String result = checkName(name, alreadySeen);
|
||||
if (result.startsWith("$")) return result;
|
||||
return indent(depth) + result + "MORPH("
|
||||
+ item.getInternal(depth+1, alreadySeen)
|
||||
+ ")";
|
||||
}
|
||||
|
||||
/* (non-Javadoc)
|
||||
* @see Pick#match(java.lang.String, Pick.Position)
|
||||
*/
|
||||
protected boolean match(String input, Position p) {
|
||||
// TODO Auto-generated method stub
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/* Add character if we can
|
||||
*/
|
||||
static int getChar(String newValue, int newIndex, StringBuffer mergeBuffer, boolean copy) {
|
||||
if (newIndex >= newValue.length()) return newIndex;
|
||||
int cp = UTF16.charAt(newValue,newIndex);
|
||||
if (copy) UTF16.append(mergeBuffer, cp);
|
||||
return newIndex + UTF16.getCharCount(cp);
|
||||
}
|
||||
|
||||
/*
|
||||
// quoted add
|
||||
appendQuoted(target, addBuffer.toString(), quoteBuffer);
|
||||
// fix buffers
|
||||
StringBuffer swapTemp = addBuffer;
|
||||
addBuffer = source;
|
||||
source = swapTemp;
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
|
||||
static class Quote extends ItemPick {
|
||||
Quote(Pick item) {
|
||||
super(item);
|
||||
}
|
||||
protected void addTo(Target target) {
|
||||
target.quoter.setQuoting(true);
|
||||
item.addTo(target);
|
||||
target.quoter.setQuoting(false);
|
||||
}
|
||||
|
||||
protected boolean match(String s, Position p) {
|
||||
return false;
|
||||
}
|
||||
|
||||
public String getInternal(int depth, Set alreadySeen) {
|
||||
String result = checkName(name, alreadySeen);
|
||||
if (result.startsWith("$")) return result;
|
||||
return indent(depth) + result + "QUOTE(" + item.getInternal(depth+1, alreadySeen)
|
||||
+ ")";
|
||||
}
|
||||
}
|
||||
|
||||
private static class Literal extends FinalPick {
|
||||
public String toString() {
|
||||
return name;
|
||||
}
|
||||
private Literal(String source) {
|
||||
this.name = source;
|
||||
}
|
||||
protected void addTo(Target target) {
|
||||
target.append(name);
|
||||
}
|
||||
protected boolean match(String input, Position p) {
|
||||
int len = name.length();
|
||||
if (input.regionMatches(p.index, name, 0, len)) {
|
||||
p.index += len;
|
||||
return true;
|
||||
}
|
||||
p.setMax("literal");
|
||||
return false;
|
||||
}
|
||||
public String getInternal(int depth, Set alreadySeen) {
|
||||
return "'" + name + "'";
|
||||
}
|
||||
}
|
||||
|
||||
public static class Position {
|
||||
public ArrayList failures = new ArrayList();
|
||||
public int index;
|
||||
public int maxInt;
|
||||
public String maxType;
|
||||
public void setMax(String type) {
|
||||
if (index >= maxInt) {
|
||||
maxType = type;
|
||||
}
|
||||
}
|
||||
public String toString() {
|
||||
return "index; " + index
|
||||
+ ", maxInt:" + maxInt
|
||||
+ ", maxType: " + maxType;
|
||||
}
|
||||
private static final Object BAD = new Object();
|
||||
private static final Object GOOD = new Object();
|
||||
|
||||
public boolean isFailure(Pick pick, int item) {
|
||||
ArrayList val = (ArrayList)failures.get(index);
|
||||
if (val == null) return false;
|
||||
Set set = (Set)val.get(item);
|
||||
if (set == null) return false;
|
||||
return !set.contains(pick);
|
||||
}
|
||||
public void setFailure(Pick pick, int item) {
|
||||
ArrayList val = (ArrayList)failures.get(index);
|
||||
if (val == null) {
|
||||
val = new ArrayList();
|
||||
failures.set(index, val);
|
||||
}
|
||||
Set set = (Set)val.get(item);
|
||||
if (set == null) {
|
||||
set = new HashSet();
|
||||
val.set(item, set);
|
||||
}
|
||||
set.add(pick);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
public static final Pick NOTHING = new Nothing();
|
||||
|
||||
|
||||
private static class Nothing extends FinalPick {
|
||||
protected void addTo(Target target) {}
|
||||
protected boolean match(String input, Position p) {
|
||||
return true;
|
||||
}
|
||||
public String getInternal(int depth, Set alreadySeen) {
|
||||
return indent(depth) + "ø";
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
// intermediates
|
||||
|
||||
abstract static class Visitor {
|
||||
Set already = new HashSet();
|
||||
// Note: each visitor should return the Pick that will replace a (or a itself)
|
||||
abstract Pick handle(Pick a);
|
||||
boolean alreadyEntered(Pick item) {
|
||||
boolean result = already.contains(item);
|
||||
already.add(item);
|
||||
return result;
|
||||
}
|
||||
void reset() {
|
||||
already.clear();
|
||||
}
|
||||
}
|
||||
|
||||
protected abstract Pick visit(Visitor visitor);
|
||||
|
||||
static class Replacer extends Visitor {
|
||||
String toReplace;
|
||||
Pick replacement;
|
||||
Replacer(String toReplace, Pick replacement) {
|
||||
this.toReplace = toReplace;
|
||||
this.replacement = replacement;
|
||||
}
|
||||
public Pick handle(Pick a) {
|
||||
if (toReplace.equals(a.name)) {
|
||||
a = replacement;
|
||||
}
|
||||
return a;
|
||||
}
|
||||
}
|
||||
|
||||
abstract private static class FinalPick extends Pick {
|
||||
public Pick visit(Visitor visitor) {
|
||||
return visitor.handle(this);
|
||||
}
|
||||
}
|
||||
|
||||
private abstract static class ItemPick extends Pick {
|
||||
protected Pick item;
|
||||
|
||||
ItemPick (Pick item) {
|
||||
this.item = item;
|
||||
}
|
||||
|
||||
public Pick visit(Visitor visitor) {
|
||||
Pick result = visitor.handle(this);
|
||||
if (visitor.alreadyEntered(this)) return result;
|
||||
if (item != null) item = item.visit(visitor);
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
private abstract static class ListPick extends Pick {
|
||||
protected Pick[] items = new Pick[0];
|
||||
|
||||
Pick simplify() {
|
||||
if (items.length > 1) return this;
|
||||
if (items.length == 1) return items[0];
|
||||
return null;
|
||||
}
|
||||
|
||||
int size() {
|
||||
return items.length;
|
||||
}
|
||||
|
||||
Pick getLast() {
|
||||
return items[items.length-1];
|
||||
}
|
||||
|
||||
void setLast(Pick newOne) {
|
||||
items[items.length-1] = newOne;
|
||||
}
|
||||
|
||||
protected void addInternal(Pick[] objs) {
|
||||
int lastLen = items.length;
|
||||
items = realloc(items, items.length + objs.length);
|
||||
for (int i = 0; i < objs.length; ++i) {
|
||||
items[lastLen + i] = objs[i];
|
||||
}
|
||||
}
|
||||
|
||||
public Pick visit(Visitor visitor) {
|
||||
Pick result = visitor.handle(this);
|
||||
if (visitor.alreadyEntered(this)) return result;
|
||||
for (int i = 0; i < items.length; ++i) {
|
||||
items[i] = items[i].visit(visitor);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Simple class to distribute a number between 0 (inclusive) and 1 (exclusive) among
|
||||
* a number of indices, where each index is weighted.
|
||||
* Item weights may be zero, but cannot be negative.
|
||||
* @author Davis
|
||||
*/
|
||||
// As in other case, we use an array for runtime speed; don't care about buildspeed.
|
||||
public static class WeightedIndex {
|
||||
private int[] weights = new int[0];
|
||||
private int minCount = 0;
|
||||
private double total;
|
||||
|
||||
public WeightedIndex(int minCount) {
|
||||
this.minCount = minCount;
|
||||
}
|
||||
|
||||
public WeightedIndex add(int count, int itemWeights) {
|
||||
if (count > 0) {
|
||||
int[] newWeights = new int[count];
|
||||
if (itemWeights < 1) itemWeights = 1;
|
||||
Arrays.fill(newWeights, 0, count, itemWeights);
|
||||
add(1, newWeights);
|
||||
}
|
||||
return this; // for chaining
|
||||
}
|
||||
|
||||
public WeightedIndex add(int[] newWeights) {
|
||||
return add(newWeights.length, newWeights);
|
||||
}
|
||||
|
||||
public WeightedIndex add(int maxCount, int[] newWeights) {
|
||||
if (newWeights == null) newWeights = new int[]{1};
|
||||
int oldLen = weights.length;
|
||||
if (maxCount < newWeights.length) maxCount = newWeights.length;
|
||||
weights = (int[]) realloc(weights, weights.length + maxCount);
|
||||
System.arraycopy(newWeights, 0, weights, oldLen, newWeights.length);
|
||||
int lastWeight = weights[oldLen + newWeights.length-1];
|
||||
for (int i = oldLen + newWeights.length; i < maxCount; ++i) {
|
||||
weights[i] = lastWeight;
|
||||
}
|
||||
total = 0;
|
||||
for (int i = 0; i < weights.length; ++i) {
|
||||
if (weights[i] < 0) {
|
||||
throw new RuntimeException("only positive weights: " + i);
|
||||
}
|
||||
total += weights[i];
|
||||
}
|
||||
return this; // for chaining
|
||||
}
|
||||
|
||||
// TODO, make this more efficient
|
||||
public int toIndex(double zeroToOne) {
|
||||
double weight = zeroToOne*total;
|
||||
int i;
|
||||
for (i = 0; i < weights.length; ++i) {
|
||||
weight -= weights[i];
|
||||
if (weight <= 0) break;
|
||||
}
|
||||
return i + minCount;
|
||||
}
|
||||
public String toString() {
|
||||
String result = "";
|
||||
for (int i = 0; i < minCount; ++i) {
|
||||
if (result.length() != 0) result += ",";
|
||||
result += "0";
|
||||
}
|
||||
for (int i = 0; i < weights.length; ++i) {
|
||||
if (result.length() != 0) result += ",";
|
||||
result += weights[i];
|
||||
}
|
||||
return result;
|
||||
}
|
||||
}
|
||||
/*
|
||||
private static Pick convert(Object obj) {
|
||||
if (obj instanceof Pick) return (Pick)obj;
|
||||
return new Literal(obj.toString(), false);
|
||||
}
|
||||
*/
|
||||
// Useful statics
|
||||
|
||||
static public int pick(Random random, int start, int end) {
|
||||
return start + (int)(random.nextDouble() * (end + 1 - start));
|
||||
}
|
||||
|
||||
static public double pick(Random random, double start, double end) {
|
||||
return start + (random.nextDouble() * (end + 1 - start));
|
||||
}
|
||||
|
||||
static public boolean pick(Random random, double percent) {
|
||||
return random.nextDouble() <= percent;
|
||||
}
|
||||
|
||||
static public int pick(Random random, UnicodeSet s) {
|
||||
return s.charAt(pick(random, 0,s.size()-1));
|
||||
}
|
||||
|
||||
static public String pick(Random random, String[] source) {
|
||||
return source[pick(random, 0, source.length-1)];
|
||||
}
|
||||
|
||||
// these utilities really ought to be in Java
|
||||
|
||||
public static double[] realloc(double[] source, int newSize) {
|
||||
double[] temp = new double[newSize];
|
||||
if (newSize > source.length) newSize = source.length;
|
||||
if (newSize != 0) System.arraycopy(source,0,temp,0,newSize);
|
||||
return temp;
|
||||
}
|
||||
|
||||
public static int[] realloc(int[] source, int newSize) {
|
||||
int[] temp = new int[newSize];
|
||||
if (newSize > source.length) newSize = source.length;
|
||||
if (newSize != 0) System.arraycopy(source,0,temp,0,newSize);
|
||||
return temp;
|
||||
}
|
||||
|
||||
public static Pick[] realloc(Pick[] source, int newSize) {
|
||||
Pick[] temp = new Pick[newSize];
|
||||
if (newSize > source.length) newSize = source.length;
|
||||
if (newSize != 0) System.arraycopy(source,0,temp,0,newSize);
|
||||
return temp;
|
||||
}
|
||||
|
||||
// test utilities
|
||||
private static void append(StringBuffer target, String toAdd, StringBuffer quoteBuffer) {
|
||||
Utility.appendToRule(target, (int)-1, true, false, quoteBuffer); // close previous quote
|
||||
if (DEBUG) System.out.println("\"" + toAdd + "\"");
|
||||
target.append(toAdd);
|
||||
}
|
||||
|
||||
private static void appendQuoted(StringBuffer target, String toAdd, StringBuffer quoteBuffer) {
|
||||
if (DEBUG) System.out.println("\"" + toAdd + "\"");
|
||||
Utility.appendToRule(target, toAdd, false, false, quoteBuffer);
|
||||
}
|
||||
|
||||
/*
|
||||
public static abstract class MatchHandler {
|
||||
public abstract void handleString(String source, int start, int limit);
|
||||
public abstract void handleSequence(String source, int start, int limit);
|
||||
public abstract void handleAlternation(String source, int start, int limit);
|
||||
|
||||
}
|
||||
*/
|
||||
/*
|
||||
// redistributes random value
|
||||
// values are still between 0 and 1, but with a different distribution
|
||||
public interface Spread {
|
||||
public double spread(double value);
|
||||
}
|
||||
|
||||
// give the weight for the high end.
|
||||
// values are linearly scaled according to the weight.
|
||||
static public class SimpleSpread implements Spread {
|
||||
static final Spread FLAT = new SimpleSpread(1.0);
|
||||
boolean flat = false;
|
||||
double aa, bb, cc;
|
||||
public SimpleSpread(double maxWeight) {
|
||||
if (maxWeight > 0.999 && maxWeight < 1.001) {
|
||||
flat = true;
|
||||
} else {
|
||||
double q = (maxWeight - 1.0);
|
||||
aa = -1/q;
|
||||
bb = 1/(q*q);
|
||||
cc = (2.0+q)/q;
|
||||
}
|
||||
}
|
||||
public double spread(double value) {
|
||||
if (flat) return value;
|
||||
value = aa + Math.sqrt(bb + cc*value);
|
||||
if (value < 0.0) return 0.0; // catch math gorp
|
||||
if (value >= 1.0) return 1.0;
|
||||
return value;
|
||||
}
|
||||
}
|
||||
static public int pick(Spread spread, Random random, int start, int end) {
|
||||
return start + (int)(spread.spread(random.nextDouble()) * (end + 1 - start));
|
||||
}
|
||||
|
||||
*/
|
||||
|
||||
|
||||
}
|
71
icu4j/src/com/ibm/icu/dev/test/util/Quoter.java
Normal file
71
icu4j/src/com/ibm/icu/dev/test/util/Quoter.java
Normal file
|
@ -0,0 +1,71 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2002, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/util/Quoter.java,v $
|
||||
* $Date: 2003/11/21 01:03:39 $
|
||||
* $Revision: 1.1 $
|
||||
*
|
||||
*****************************************************************************************
|
||||
*/
|
||||
package com.ibm.icu.dev.test.util;
|
||||
|
||||
import com.ibm.icu.text.UTF16;
|
||||
import com.ibm.icu.impl.Utility;
|
||||
|
||||
public abstract class Quoter {
|
||||
private static boolean DEBUG = false;
|
||||
|
||||
protected boolean quoting = false;
|
||||
protected StringBuffer output = new StringBuffer();
|
||||
|
||||
public void setQuoting(boolean value) {
|
||||
quoting = value;
|
||||
}
|
||||
public boolean isQuoting() {
|
||||
return quoting;
|
||||
}
|
||||
public void clear() {
|
||||
quoting = false;
|
||||
output.setLength(0);
|
||||
}
|
||||
public int length() {
|
||||
return output.length();
|
||||
}
|
||||
public Quoter append(String string) {
|
||||
output.append(string);
|
||||
return this;
|
||||
}
|
||||
public Quoter append(int codepoint) {
|
||||
return append(UTF16.valueOf(codepoint));
|
||||
}
|
||||
// warning, allows access to internals
|
||||
public String toString() {
|
||||
setQuoting(false); // finish quoting
|
||||
return output.toString();
|
||||
}
|
||||
/**
|
||||
* Implements standard ICU rule quoting
|
||||
*/
|
||||
public static class RuleQuoter extends Quoter {
|
||||
private StringBuffer quoteBuffer = new StringBuffer();
|
||||
public void setQuoting(boolean value) {
|
||||
if (quoting == value) return;
|
||||
if (quoting) { // stop quoting
|
||||
Utility.appendToRule(output, (int)-1, true, false, quoteBuffer); // close previous quote
|
||||
}
|
||||
quoting = value;
|
||||
}
|
||||
public Quoter append(String s) {
|
||||
if (DEBUG) System.out.println("\"" + s + "\"");
|
||||
if (quoting) {
|
||||
Utility.appendToRule(output, s, false, false, quoteBuffer);
|
||||
} else {
|
||||
output.append(s);
|
||||
}
|
||||
return this;
|
||||
}
|
||||
}
|
||||
}
|
96
icu4j/src/com/ibm/icu/dev/test/util/Tabber.java
Normal file
96
icu4j/src/com/ibm/icu/dev/test/util/Tabber.java
Normal file
|
@ -0,0 +1,96 @@
|
|||
|
||||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2002, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/util/Tabber.java,v $
|
||||
* $Date: 2003/11/21 01:03:39 $
|
||||
* $Revision: 1.1 $
|
||||
*
|
||||
*****************************************************************************************
|
||||
*/
|
||||
package com.ibm.icu.dev.test.util;
|
||||
|
||||
public abstract class Tabber {
|
||||
static final byte LEFT = 0, CENTER = 1, RIGHT = 2;
|
||||
|
||||
/**
|
||||
* Repeats a string n times
|
||||
* @param source
|
||||
* @param times
|
||||
* @return
|
||||
*/
|
||||
// TODO - optimize repeats using doubling?
|
||||
public static String repeat(String source, int times) {
|
||||
if (times <= 0) return "";
|
||||
if (times == 1) return source;
|
||||
StringBuffer result = new StringBuffer();
|
||||
for (; times > 0; --times) {
|
||||
result.append(source);
|
||||
}
|
||||
return result.toString();
|
||||
}
|
||||
|
||||
public String process(String source) {
|
||||
StringBuffer result = new StringBuffer();
|
||||
int lastPos = 0;
|
||||
int count = 0;
|
||||
while (lastPos < source.length()) {
|
||||
int pos = source.indexOf('\t', lastPos);
|
||||
if (pos < 0) pos = source.length();
|
||||
process_field(count, source, lastPos, pos, result);
|
||||
lastPos = pos+1;
|
||||
++count; // skip type
|
||||
}
|
||||
if (lastPos < source.length()) {
|
||||
result.append(source.substring(lastPos));
|
||||
}
|
||||
return result.toString();
|
||||
}
|
||||
|
||||
public abstract void process_field(int count, String source, int start, int limit, StringBuffer output);
|
||||
|
||||
public static class MonoTabber extends Tabber {
|
||||
|
||||
private int[] tabs;
|
||||
|
||||
public MonoTabber(int[] tabs) {
|
||||
this.tabs = (int[]) tabs.clone();
|
||||
}
|
||||
|
||||
public String process(String source) {
|
||||
StringBuffer result = new StringBuffer();
|
||||
int lastPos = 0;
|
||||
int count = 0;
|
||||
while (lastPos < source.length() && count < tabs.length) {
|
||||
int pos = source.indexOf('\t', lastPos);
|
||||
if (pos < 0) pos = source.length();
|
||||
String piece = source.substring(lastPos, pos);
|
||||
if (result.length() < tabs[count]) {
|
||||
result.append(repeat(" ", tabs[count] - result.length()));
|
||||
// TODO fix type
|
||||
}
|
||||
result.append(piece);
|
||||
lastPos = pos+1;
|
||||
count += 2; // skip type
|
||||
}
|
||||
if (lastPos < source.length()) {
|
||||
result.append(source.substring(lastPos));
|
||||
}
|
||||
return result.toString();
|
||||
}
|
||||
|
||||
public void process_field(int count, String source, int start, int limit, StringBuffer output) {
|
||||
String piece = source.substring(start, limit);
|
||||
if (output.length() < tabs[count*2]) {
|
||||
output.append(repeat(" ", tabs[count*2] - output.length()));
|
||||
// TODO fix type
|
||||
} else {
|
||||
output.append(" ");
|
||||
}
|
||||
output.append(piece);
|
||||
}
|
||||
}
|
||||
}
|
247
icu4j/src/com/ibm/icu/dev/test/util/TestBNF.java
Normal file
247
icu4j/src/com/ibm/icu/dev/test/util/TestBNF.java
Normal file
|
@ -0,0 +1,247 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2002, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/util/TestBNF.java,v $
|
||||
* $Date: 2003/11/21 01:03:39 $
|
||||
* $Revision: 1.1 $
|
||||
*
|
||||
*****************************************************************************************
|
||||
*/
|
||||
package com.ibm.icu.dev.test.util;
|
||||
|
||||
import java.text.ParseException;
|
||||
import java.util.Random;
|
||||
|
||||
//TODO integrate this into the test framework
|
||||
|
||||
import com.ibm.icu.text.UnicodeSet;
|
||||
|
||||
public class TestBNF {
|
||||
|
||||
static final String[] testRules = {
|
||||
"$root = [ab]{3};",
|
||||
|
||||
"$root = [ab]{3,};",
|
||||
|
||||
"$root = [ab]{3,5};",
|
||||
|
||||
"$root = [ab]*;",
|
||||
|
||||
"$root = [ab]?;",
|
||||
|
||||
"$root = [ab]+;",
|
||||
|
||||
"$us = [a-z];" +
|
||||
"$root = [0-9$us];",
|
||||
|
||||
"$root = a $foo b? 25% end 30% | $foo 50%;\r\n" +
|
||||
"$foo = c{1,5} 20%;",
|
||||
|
||||
"$root = [a-z]{1,5}~;",
|
||||
|
||||
"$root = [a-z]{5}~;",
|
||||
|
||||
"$root = '\\' (u | U0010 | U000 $hex) $hex{4} ;\r\n" +
|
||||
"$hex = [0-9A-Fa-f];",
|
||||
};
|
||||
|
||||
static String unicodeSetBNF = "" +
|
||||
"$root = $leaf | '[' $s $root2 $s ']' ;\r\n" +
|
||||
"$root2 = $leaf | '[' $s $root3 $s ']' | ($root3 $s ($op $root3 $s){0,3}) ;\r\n" +
|
||||
"$root3 = $leaf | '[' $s $root4 $s ']' | ($root4 $s ($op $root4 $s){0,3}) ;\r\n" +
|
||||
"$root4 = $leaf | ($leaf $s ($op $leaf $s){0,3}) ;\r\n" +
|
||||
"$op = (('&' | '-') $s)? 70%;" +
|
||||
"$leaf = '[' $s $list $s ']' | $prop;\r\n" +
|
||||
"$list = ($char $s ('-' $s $char $s)? 30%){1,5} ;\r\n" +
|
||||
"$prop = '\\' (p | P) '{' $s $propName $s '}' | '[:' '^'? $s $propName $s ':]';\r\n" +
|
||||
"$needsQuote = [\\-\\][:whitespace:][:control:]] ;\r\n" +
|
||||
"$char = [[\\u0000-\\U00010FFFF]-$needsQuote] | $quoted ;\r\n" +
|
||||
"$quoted = '\\' ('u' | 'U0010' | 'U000' $hex) $hex{4} ;\r\n" +
|
||||
"$hex = [0-9A-Fa-f];\r\n" +
|
||||
"$s = ' '? 20%;\r\n" +
|
||||
"$propName = (whitespace | ws) | (uppercase | uc) | (lowercase | lc) | $category;\r\n" +
|
||||
"$category = ((general | gc) $s '=' $s)? $catvalue;\r\n" +
|
||||
"$catvalue = (C | Other | Cc | Control | Cf | Format | Cn | Unassigned | L | Letter);\r\n";
|
||||
|
||||
public static void main (String[] args) {
|
||||
testTokenizer();
|
||||
for (int i = 0; i < testRules.length; ++i) {
|
||||
testBNF(testRules[i], null, 20);
|
||||
}
|
||||
|
||||
testBNF(unicodeSetBNF, null, 20);
|
||||
//testParser();
|
||||
}
|
||||
|
||||
static void testBNF(String rules, UnicodeSet chars, int count) {
|
||||
BNF bnf = new BNF(new Random(0), new Quoter.RuleQuoter())
|
||||
.addSet("$chars", chars)
|
||||
.addRules(rules)
|
||||
.complete();
|
||||
|
||||
System.out.println("====================================");
|
||||
System.out.println("BNF");
|
||||
System.out.println(rules);
|
||||
System.out.println(bnf.getInternal());
|
||||
for (int i = 0; i < count; ++i) {
|
||||
System.out.println(i + ": " + bnf.next());
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
public static testManual() {
|
||||
Pick p = Pick.maybe(75,Pick.unquoted("a"));
|
||||
testOr(p, 1);
|
||||
p = Pick.or(new String[]{"", "a", "bb", "ccc"});
|
||||
testOr(p, 3);
|
||||
p = Pick.repeat(3, 5, new int[]{20, 30, 20}, "a");
|
||||
testOr(p, 5);
|
||||
p = Pick.codePoint("[a-ce]");
|
||||
testCodePoints(p);
|
||||
p = Pick.codePoint("[a-ce]");
|
||||
testCodePoints(p);
|
||||
p = Pick.string(2, 8, p);
|
||||
testOr(p,10);
|
||||
|
||||
p = Pick.or(new String[]{"", "a", "bb", "ccc"});
|
||||
p = Pick.and(p).and2(p).and2("&");
|
||||
testMatch(p, "abb&");
|
||||
testMatch(p, "bba");
|
||||
|
||||
// testEnglish();
|
||||
}
|
||||
*/
|
||||
|
||||
static void testMatch(Pick p, String source) {
|
||||
Pick.Position pp = new Pick.Position();
|
||||
boolean value = p.match(source, pp);
|
||||
System.out.println("Match: " + value + ", " + pp);
|
||||
}
|
||||
/*
|
||||
static void testParser() {
|
||||
try {
|
||||
Pick.Target target = new Pick.Target();
|
||||
for (int i = 0; i < rules.length; ++i) {
|
||||
target.addRule(rules[i]);
|
||||
}
|
||||
} catch (ParseException e) {
|
||||
// TODO Auto-generated catch block
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
static class Counts {
|
||||
int[] counts;
|
||||
Counts(int max) {
|
||||
counts = new int[max+1];
|
||||
}
|
||||
void inc(int index) {
|
||||
counts[index]++;
|
||||
}
|
||||
void show() {
|
||||
System.out.println("Printing Counts");
|
||||
for (int i = 0; i < counts.length; ++i) {
|
||||
if (counts[i] == 0) continue;
|
||||
System.out.println(i + ": " + counts[i]);
|
||||
}
|
||||
System.out.println();
|
||||
}
|
||||
}
|
||||
|
||||
static final String[] rules = {
|
||||
"$s = ' ';",
|
||||
"$noun = dog | house | government | wall | street | zebra;",
|
||||
"$adjective = red | glorious | simple | nasty | heavy | clean;",
|
||||
"$article = quickly | oddly | silently | boldly;",
|
||||
"$adjectivePhrase = ($adverb $s)? 50% $adjective* 0% 30% 20% 10%;",
|
||||
"$nounPhrase = $articles $s ($adjectivePhrase $s)? 30% $noun;",
|
||||
"$verb = goes | fishes | walks | sleeps;",
|
||||
"$tverb = carries | lifts | overturns | hits | jumps on;",
|
||||
"$copula = is 30% | seems 10%;",
|
||||
"$sentence1 = $nounPhrase $s $verb $s ($s $adverb)? 30%;",
|
||||
"$sentence2 = $nounPhrase $s $tverb $s $nounPhrase ($s $adverb)? 30%;",
|
||||
"$sentence3 = $nounPhrase $s $copula $s $adjectivePhrase;",
|
||||
"$conj = but | and | or;",
|
||||
"$sentence4 = $sentence1 | $sentence2 | $sentence3 20% | $sentence4 $conj $sentence4 20%;",
|
||||
"$sentence = $sentence4 '.';"};
|
||||
/*
|
||||
private static void testEnglish() {
|
||||
Pick s = Pick.unquoted(" ");
|
||||
Pick verbs = Pick.or(new String[]{"goes", "fishes", "walks", "sleeps"});
|
||||
Pick transitive = Pick.or(new String[]{"carries", "lifts", "overturns", "hits", "jumps on"});
|
||||
Pick nouns = Pick.or(new String[]{"dog", "house", "government", "wall", "street", "zebra"});
|
||||
Pick adjectives = Pick.or(new String[]{"red", "glorious", "simple", "nasty", "heavy", "clean"});
|
||||
Pick articles = Pick.or(new String[]{"the", "a"});
|
||||
Pick adverbs = Pick.or(new String[]{"quickly", "oddly", "silently", "boldly"});
|
||||
Pick adjectivePhrase = Pick.and(0.5, Pick.and(adverbs).and2(s)).and2(adjectives);
|
||||
Pick nounPhrase = Pick.and(articles).and2(s)
|
||||
.and2(0.3, Pick.and(adjectivePhrase).and2(s))
|
||||
.and2(nouns);
|
||||
Pick copula = Pick.or(new String[]{"is", "seems"});
|
||||
Pick sentence1 = Pick.and(nounPhrase).and2(s).and2(verbs)
|
||||
.and2(0.3, Pick.and(s).and2(adverbs)).name("s1");
|
||||
Pick sentence2 = Pick.and(nounPhrase).and2(s).and2(transitive).and2(s).and2(nounPhrase)
|
||||
.and2(0.3, Pick.and(s).and2(adverbs)).name("s2");
|
||||
Pick sentence3 = Pick.and(nounPhrase).and2(s).and2(copula).and2(s).and2(adjectivePhrase).name("s3");
|
||||
Pick conj = Pick.or(new String[]{", but", ", and", ", or"});
|
||||
Pick forward = Pick.unquoted("forward");
|
||||
Pick pair = Pick.and(forward).and2(conj).and2(s).and2(forward).name("part");
|
||||
Pick sentenceBase = Pick.or(sentence1).or2(sentence2).or2(sentence3).or2(0.6666, pair).name("sentence");
|
||||
sentenceBase.replace(forward, sentenceBase);
|
||||
Pick sentence = Pick.and(sentenceBase).and2(Pick.unquoted("."));
|
||||
Pick.Target target = Pick.Target.make(sentence);
|
||||
for (int i = 0; i < 50; ++i) {
|
||||
System.out.println(i + ": " + target.next());
|
||||
}
|
||||
}
|
||||
private static void testOr(Pick p, int count) {
|
||||
Pick.Target target = Pick.Target.make(p);
|
||||
Counts counts = new Counts(count + 10);
|
||||
for (int i = 0; i < 1000; ++i) {
|
||||
String s = target.next();
|
||||
counts.inc(s.length());
|
||||
}
|
||||
counts.show();
|
||||
}
|
||||
private static void testCodePoints(Pick p) {
|
||||
Pick.Target target = Pick.Target.make(p);
|
||||
Counts counts = new Counts(128);
|
||||
for (int i = 0; i < 10000; ++i) {
|
||||
String s = target.next();
|
||||
counts.inc(s.charAt(0));
|
||||
}
|
||||
counts.show();
|
||||
}
|
||||
*/
|
||||
public static void printRandoms() {
|
||||
BNF bnf = new BNF(new Random(0), new Quoter.RuleQuoter())
|
||||
.addRules("[a-z]{2,5}").complete();
|
||||
System.out.println("Start");
|
||||
for (int i = 0; i < 100; ++i) {
|
||||
String temp = bnf.next();
|
||||
System.out.println(i + ")\t" + temp);
|
||||
}
|
||||
}
|
||||
|
||||
public static void testTokenizer() {
|
||||
Tokenizer t = new Tokenizer();
|
||||
|
||||
String[] samples = {"a'b'c d #abc\r e", "'a '123 321",
|
||||
"\\\\", "a'b", "a'", "abc def%?ghi", "%", "a", "\\ a", "a''''b"};
|
||||
for (int i = 0; i < samples.length; ++i) {
|
||||
t.setSource(samples[i]);
|
||||
System.out.println();
|
||||
System.out.println("Input: " + t.getSource());
|
||||
int type = 0;
|
||||
while (type != t.DONE) {
|
||||
type = t.next();
|
||||
System.out.println(t.toString(type, false));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
213
icu4j/src/com/ibm/icu/dev/test/util/TestBagFormatter.java
Normal file
213
icu4j/src/com/ibm/icu/dev/test/util/TestBagFormatter.java
Normal file
|
@ -0,0 +1,213 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2002, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/util/TestBagFormatter.java,v $
|
||||
* $Date: 2003/11/21 01:03:39 $
|
||||
* $Revision: 1.1 $
|
||||
*
|
||||
*****************************************************************************************
|
||||
*/
|
||||
package com.ibm.icu.dev.test.util;
|
||||
|
||||
// TODO integrate this into the test framework
|
||||
|
||||
import java.util.TreeSet;
|
||||
import java.util.Iterator;
|
||||
import java.text.Collator;
|
||||
import java.util.Locale;
|
||||
|
||||
import java.io.*;
|
||||
import java.util.Random;
|
||||
import java.text.ParseException;
|
||||
import java.util.Set;
|
||||
import java.util.Iterator;
|
||||
import java.util.TreeSet;
|
||||
import java.util.Locale;
|
||||
import java.util.regex.*;
|
||||
|
||||
import com.ibm.icu.text.*;
|
||||
|
||||
import com.ibm.icu.lang.UScript;
|
||||
import com.ibm.icu.lang.UProperty;
|
||||
import com.ibm.icu.lang.UCharacter;
|
||||
import com.ibm.icu.text.UnicodeSet;
|
||||
|
||||
public class TestBagFormatter {
|
||||
static final void generatePropertyAliases(boolean showValues) {
|
||||
UnicodePropertySource ups = new UnicodePropertySource.ICU().setNameChoice(UProperty.NameChoice.SHORT);
|
||||
Collator order = Collator.getInstance(Locale.ENGLISH);
|
||||
TreeSet props = new TreeSet(order);
|
||||
TreeSet values = new TreeSet(order);
|
||||
ups.getAvailablePropertyAliases(props);
|
||||
Iterator it = props.iterator();
|
||||
while (it.hasNext()) {
|
||||
String propAlias = (String)it.next();
|
||||
ups.setPropertyAlias(propAlias);
|
||||
System.out.println();
|
||||
System.out.println(propAlias + ";\t" + ups.getPropertyAlias(UProperty.NameChoice.LONG));
|
||||
if (!showValues) continue;
|
||||
values.clear();
|
||||
ups.getAvailablePropertyValueAliases(values);
|
||||
Iterator it2 = values.iterator();
|
||||
while (it2.hasNext()) {
|
||||
String valueAlias = (String)it2.next();
|
||||
System.out.println("\t" + valueAlias
|
||||
+ ";\t" + ups.getPropertyValueAlias(valueAlias, UProperty.NameChoice.LONG));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
System.out.println("Start");
|
||||
try {
|
||||
//readCharacters();
|
||||
|
||||
generatePropertyAliases(true);
|
||||
|
||||
BagFormatter bf = new BagFormatter();
|
||||
|
||||
UnicodeSet us = new UnicodeSet("[:numeric_value=2:]");
|
||||
System.out.println(bf.showSetNames("[:numeric_value=2:]", us));
|
||||
us = new UnicodeSet("[:numeric_type=numeric:]");
|
||||
System.out.println(bf.showSetNames("[:numeric_type=numeric:]", us));
|
||||
|
||||
if (true) return;
|
||||
showNames("Name", ".*MARK.*");
|
||||
//showNames("NFD", "a.+");
|
||||
//showNames("NFD", false);
|
||||
//showNames("Lowercase_Mapping", false);
|
||||
//TestUnicodePropertySource.test(true);
|
||||
//showNames(".*\\ \\-.*");
|
||||
|
||||
|
||||
//checkHTML();
|
||||
//testIsRTL();
|
||||
|
||||
//TestTokenizer.test();
|
||||
//RandomCollator.generate("collationTest.txt", null);
|
||||
|
||||
//TestPick.test();
|
||||
//printRandoms();
|
||||
//if (true) return;
|
||||
//testLocales();
|
||||
//if (true) return;
|
||||
/*
|
||||
TestCollator tc = new TestCollator();
|
||||
tc.test(RuleBasedCollator.getInstance(),1000);
|
||||
*/
|
||||
/*
|
||||
StringBuffer sb = new StringBuffer();
|
||||
for (int i = 0; i < 100; ++i) {
|
||||
sb.setLength(0);
|
||||
rc.nextRule(sb);
|
||||
System.out.println(sb);
|
||||
}
|
||||
*/
|
||||
} finally {
|
||||
System.out.println("End");
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
static void testLocales() throws IOException {
|
||||
Locale[] locales = Collator.getAvailableLocales();
|
||||
Set s = new TreeSet(Collator.getInstance());
|
||||
for (int i = 0; i < locales.length; ++i) {
|
||||
String lang = locales[i].getLanguage();
|
||||
String dlang = locales[i].getDisplayLanguage();
|
||||
String country = locales[i].getCountry();
|
||||
String dcountry = locales[i].getDisplayCountry();
|
||||
if (country.equals("")) continue;
|
||||
s.add(""
|
||||
+ "\t" + dcountry
|
||||
+ "\t" + country
|
||||
+ "\t" + dlang
|
||||
+ "\t" + lang
|
||||
);
|
||||
}
|
||||
//CollectionFormatter cf = new CollectionFormatter();
|
||||
PrintWriter pw = BagFormatter.openUTF8Writer("", "countries.txt", BagFormatter.CONSOLE);
|
||||
Iterator it = s.iterator();
|
||||
while (it.hasNext()) {
|
||||
pw.println(it.next());
|
||||
}
|
||||
pw.close();
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Use the number of significant digits to round get a rounding value.
|
||||
*/
|
||||
static final double LOG10 = Math.log(10);
|
||||
public static void useSignificantDigits(double value, int digits) {
|
||||
double log10 = Math.log(value)/LOG10; // log[e]
|
||||
|
||||
}
|
||||
|
||||
static final UnicodeSet RTL = new UnicodeSet("[[:L:]&[[:bidi class=R:][:bidi class=AL:]]]");
|
||||
|
||||
static boolean isRTL(Locale loc) {
|
||||
// in 2.8 we can use the exemplar characters, but for 2.6 we have to work around it
|
||||
int[] scripts = UScript.getCode(loc);
|
||||
return new UnicodeSet()
|
||||
.applyIntPropertyValue(UProperty.SCRIPT, scripts == null ? UScript.LATIN : scripts[0])
|
||||
.retainAll(RTL).size() != 0;
|
||||
}
|
||||
|
||||
static void testIsRTL() {
|
||||
Locale[] locales = Locale.getAvailableLocales();
|
||||
Set s = new TreeSet();
|
||||
for (int i = 0; i < locales.length; ++i) {
|
||||
s.add((isRTL(locales[i]) ? "R " : "L ") + locales[i].getDisplayName());
|
||||
}
|
||||
Iterator it = s.iterator();
|
||||
while (it.hasNext()) {
|
||||
System.out.println(it.next());
|
||||
}
|
||||
}
|
||||
|
||||
static final Transliterator toHTML = Transliterator.createFromRules(
|
||||
"any-html",
|
||||
"'<' > '<' ;" +
|
||||
"'&' > '&' ;" +
|
||||
"'>' > '>' ;" +
|
||||
"'\"' > '"' ; ",
|
||||
Transliterator.FORWARD);
|
||||
static final Transliterator fromHTML = Transliterator.createFromRules(
|
||||
"html-any",
|
||||
"'<' < '&'[lL][Tt]';' ;" +
|
||||
"'&' < '&'[aA][mM][pP]';' ;" +
|
||||
"'>' < '&'[gG][tT]';' ;" +
|
||||
"'\"' < '&'[qQ][uU][oO][tT]';' ; ",
|
||||
Transliterator.REVERSE);
|
||||
|
||||
static void checkHTML() {
|
||||
String foo = "& n < b < \"ab\"";
|
||||
String fii = toHTML.transliterate(foo);
|
||||
System.out.println("in: " + foo);
|
||||
System.out.println("out: " + fii);
|
||||
System.out.println("in*: " + fromHTML.transliterate(fii));
|
||||
System.out.println("IN*: " + fromHTML.transliterate(fii.toUpperCase()));
|
||||
}
|
||||
|
||||
static void showNames(String propAlias, boolean matches) {
|
||||
BagFormatter bf = new BagFormatter();
|
||||
UnicodeSet stuff;
|
||||
stuff = new UnicodePropertySource.ICU()
|
||||
.setPropertyAlias(propAlias)
|
||||
.getPropertySet(matches, null);
|
||||
System.out.println(bf.showSetNames(propAlias + " with " + matches, stuff));
|
||||
}
|
||||
|
||||
static void showNames(String propAlias, String pattern) {
|
||||
BagFormatter bf = new BagFormatter();
|
||||
UnicodeSet stuff;
|
||||
stuff = new UnicodePropertySource.ICU()
|
||||
.setPropertyAlias(propAlias)
|
||||
.getPropertySet(Pattern.compile(pattern).matcher(""), null);
|
||||
System.out.println(bf.showSetNames(propAlias + "with " + pattern, stuff));
|
||||
}
|
||||
}
|
325
icu4j/src/com/ibm/icu/dev/test/util/Tokenizer.java
Normal file
325
icu4j/src/com/ibm/icu/dev/test/util/Tokenizer.java
Normal file
|
@ -0,0 +1,325 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2002, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/util/Tokenizer.java,v $
|
||||
* $Date: 2003/11/21 01:03:39 $
|
||||
* $Revision: 1.1 $
|
||||
*
|
||||
*****************************************************************************************
|
||||
*/
|
||||
package com.ibm.icu.dev.test.util;
|
||||
|
||||
import java.text.ParsePosition;
|
||||
|
||||
import com.ibm.icu.text.*;
|
||||
import com.ibm.icu.lang.*;
|
||||
import com.ibm.icu.util.*;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
import java.util.Map;
|
||||
|
||||
public class Tokenizer {
|
||||
protected String source;
|
||||
|
||||
protected StringBuffer buffer = new StringBuffer();
|
||||
protected long number;
|
||||
protected UnicodeSet unicodeSet = null;
|
||||
protected int index;
|
||||
boolean backedup = false;
|
||||
protected int lastIndex = -1;
|
||||
protected int nextIndex;
|
||||
int lastValue = BACKEDUP_TOO_FAR;
|
||||
TokenSymbolTable symbolTable = new TokenSymbolTable();
|
||||
|
||||
private static final char
|
||||
QUOTE = '\'',
|
||||
BSLASH = '\\';
|
||||
private static final UnicodeSet QUOTERS = new UnicodeSet().add(QUOTE).add(BSLASH);
|
||||
private static final UnicodeSet WHITESPACE = new UnicodeSet("[" +
|
||||
"\\u0009-\\u000D\\u0020\\u0085\\u200E\\u200F\\u2028\\u2029" +
|
||||
"]");
|
||||
private static final UnicodeSet SYNTAX = new UnicodeSet("[" +
|
||||
"\\u0021-\\u002F\\u003A-\\u0040\\u005B-\\u0060\\u007B-\\u007E" +
|
||||
"\\u00A1-\\u00A7\\u00A9\\u00AB-\\u00AC\\u00AE" +
|
||||
"\\u00B0-\\u00B1\\u00B6\\u00B7\\u00BB\\u00BF\\u00D7\\u00F7" +
|
||||
"\\u2010-\\u2027\\u2030-\\u205E\\u2190-\\u2BFF" +
|
||||
"\\u3001\\u3003\\u3008-\\u3020\\u3030" +
|
||||
"\\uFD3E\\uFD3F\\uFE45\\uFE46" +
|
||||
"]").removeAll(QUOTERS).remove('$');
|
||||
private static final UnicodeSet NEWLINE = new UnicodeSet("[\\u000A\\u000D\\u0085\\u2028\\u2029]");
|
||||
private static final UnicodeSet DECIMAL = new UnicodeSet("[:Nd:]");
|
||||
private static final UnicodeSet NON_STRING = new UnicodeSet()
|
||||
.addAll(WHITESPACE)
|
||||
.addAll(SYNTAX);
|
||||
|
||||
protected UnicodeSet whiteSpace = WHITESPACE;
|
||||
protected UnicodeSet syntax = SYNTAX;
|
||||
private UnicodeSet non_string = NON_STRING;
|
||||
|
||||
private void fixSets() {
|
||||
if (syntax.containsSome(QUOTERS) || syntax.containsSome(whiteSpace)) {
|
||||
syntax = ((UnicodeSet)syntax.clone()).removeAll(QUOTERS).removeAll(whiteSpace);
|
||||
}
|
||||
if (whiteSpace.containsSome(QUOTERS)) {
|
||||
whiteSpace = ((UnicodeSet)whiteSpace.clone()).removeAll(QUOTERS);
|
||||
}
|
||||
non_string = new UnicodeSet(syntax)
|
||||
.addAll(whiteSpace);
|
||||
}
|
||||
|
||||
public Tokenizer setSource(String source) {
|
||||
this.source = source;
|
||||
this.index = 0;
|
||||
return this; // for chaining
|
||||
}
|
||||
|
||||
public Tokenizer setIndex(int index) {
|
||||
this.index = index;
|
||||
return this; // for chaining
|
||||
}
|
||||
|
||||
public static final int
|
||||
DONE = -1,
|
||||
NUMBER = -2,
|
||||
STRING = -3,
|
||||
UNICODESET = -4,
|
||||
UNTERMINATED_QUOTE = -5,
|
||||
BACKEDUP_TOO_FAR = -6;
|
||||
|
||||
private static final int
|
||||
FIRST = 0,
|
||||
IN_NUMBER = 1,
|
||||
IN_SPACE = 2,
|
||||
AFTER_QUOTE = 3, // warning: order is important for switch statement
|
||||
IN_STRING = 4,
|
||||
AFTER_BSLASH = 5,
|
||||
IN_QUOTE = 6;
|
||||
|
||||
public String toString(int type, boolean backedupBefore) {
|
||||
String s = backedup ? "@" : "*";
|
||||
switch(type) {
|
||||
case DONE:
|
||||
return s+"Done"+s;
|
||||
case BACKEDUP_TOO_FAR:
|
||||
return s+"Illegal Backup"+s;
|
||||
case UNTERMINATED_QUOTE:
|
||||
return s+"Unterminated Quote=" + getString() + s;
|
||||
case STRING:
|
||||
return s+"s=" + getString() + s;
|
||||
case NUMBER:
|
||||
return s+"n=" + getNumber() + s;
|
||||
case UNICODESET:
|
||||
return s+"n=" + getUnicodeSet() + s;
|
||||
default:
|
||||
return s+"c=" + usf.getName(type) + s;
|
||||
}
|
||||
}
|
||||
|
||||
private static final BagFormatter usf = new BagFormatter();
|
||||
|
||||
public void backup() {
|
||||
if (backedup) throw new IllegalArgumentException("backup too far");
|
||||
backedup = true;
|
||||
nextIndex = index;
|
||||
index = lastIndex;
|
||||
}
|
||||
|
||||
/*
|
||||
public int next2() {
|
||||
boolean backedupBefore = backedup;
|
||||
int result = next();
|
||||
System.out.println(toString(result, backedupBefore));
|
||||
return result;
|
||||
}
|
||||
*/
|
||||
|
||||
public int next() {
|
||||
if (backedup) {
|
||||
backedup = false;
|
||||
index = nextIndex;
|
||||
return lastValue;
|
||||
}
|
||||
int cp = 0;
|
||||
boolean inComment = false;
|
||||
// clean off any leading whitespace or comments
|
||||
while (true) {
|
||||
if (index >= source.length()) return lastValue = DONE;
|
||||
cp = nextChar();
|
||||
if (inComment) {
|
||||
if (NEWLINE.contains(cp)) inComment = false;
|
||||
} else {
|
||||
if (cp == '#') inComment = true;
|
||||
else if (!whiteSpace.contains(cp)) break;
|
||||
}
|
||||
}
|
||||
// record the last index in case we have to backup
|
||||
lastIndex = index;
|
||||
|
||||
if (cp == '[') {
|
||||
ParsePosition pos = new ParsePosition(index-1);
|
||||
unicodeSet = new UnicodeSet(source,pos,symbolTable);
|
||||
index = pos.getIndex();
|
||||
return lastValue = UNICODESET;
|
||||
}
|
||||
// get syntax character
|
||||
if (syntax.contains(cp)) return lastValue = cp;
|
||||
|
||||
// get number, if there is one
|
||||
if (UCharacter.getType(cp) == Character.DECIMAL_DIGIT_NUMBER) {
|
||||
number = UCharacter.getNumericValue(cp);
|
||||
while (index < source.length()) {
|
||||
cp = nextChar();
|
||||
if (UCharacter.getType(cp) != Character.DECIMAL_DIGIT_NUMBER) {
|
||||
index -= UTF16.getCharCount(cp); // BACKUP!
|
||||
break;
|
||||
}
|
||||
number *= 10;
|
||||
number += UCharacter.getNumericValue(cp);
|
||||
}
|
||||
return lastValue = NUMBER;
|
||||
}
|
||||
buffer.setLength(0);
|
||||
int status = IN_STRING;
|
||||
main:
|
||||
while (true) {
|
||||
switch (status) {
|
||||
case AFTER_QUOTE: // check for double ''?
|
||||
if (cp == QUOTE) {
|
||||
UTF16.append(buffer, QUOTE);
|
||||
status = IN_QUOTE;
|
||||
break;
|
||||
}
|
||||
// OTHERWISE FALL THROUGH!!!
|
||||
case IN_STRING:
|
||||
if (cp == QUOTE) status = IN_QUOTE;
|
||||
else if (cp == BSLASH) status = AFTER_BSLASH;
|
||||
else if (non_string.contains(cp)) {
|
||||
index -= UTF16.getCharCount(cp); // BACKUP!
|
||||
break main;
|
||||
} else UTF16.append(buffer,cp);
|
||||
break;
|
||||
case IN_QUOTE:
|
||||
if (cp == QUOTE) status = AFTER_QUOTE;
|
||||
else UTF16.append(buffer,cp);
|
||||
break;
|
||||
case AFTER_BSLASH:
|
||||
switch(cp) {
|
||||
case 'n': cp = '\n'; break;
|
||||
case 'r': cp = '\r'; break;
|
||||
case 't': cp = '\t'; break;
|
||||
}
|
||||
UTF16.append(buffer,cp);
|
||||
status = IN_STRING;
|
||||
break;
|
||||
default: throw new IllegalArgumentException("Internal Error");
|
||||
}
|
||||
if (index >= source.length()) break;
|
||||
cp = nextChar();
|
||||
}
|
||||
if (status > IN_STRING) return lastValue = UNTERMINATED_QUOTE;
|
||||
return lastValue = STRING;
|
||||
}
|
||||
|
||||
public String getString() {
|
||||
return buffer.toString();
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
return source.substring(0,index) + "$$$" + source.substring(index);
|
||||
}
|
||||
|
||||
public long getNumber() {
|
||||
return number;
|
||||
}
|
||||
|
||||
public UnicodeSet getUnicodeSet() {
|
||||
return unicodeSet;
|
||||
}
|
||||
|
||||
private int nextChar() {
|
||||
int cp = UTF16.charAt(source,index);
|
||||
index += UTF16.getCharCount(cp);
|
||||
return cp;
|
||||
}
|
||||
public int getIndex() {
|
||||
return index;
|
||||
}
|
||||
public String getSource() {
|
||||
return source;
|
||||
}
|
||||
public UnicodeSet getSyntax() {
|
||||
return syntax;
|
||||
}
|
||||
public UnicodeSet getWhiteSpace() {
|
||||
return whiteSpace;
|
||||
}
|
||||
public void setSyntax(UnicodeSet set) {
|
||||
syntax = set;
|
||||
fixSets();
|
||||
}
|
||||
public void setWhiteSpace(UnicodeSet set) {
|
||||
whiteSpace = set;
|
||||
fixSets();
|
||||
}
|
||||
|
||||
public Set getLookedUpItems() {
|
||||
return symbolTable.itemsLookedUp;
|
||||
}
|
||||
|
||||
public void addSymbol(String var, String value, int start, int limit) {
|
||||
// the limit is after the ';', so remove it
|
||||
--limit;
|
||||
char[] body = new char[limit - start];
|
||||
value.getChars(start, limit, body, 0);
|
||||
symbolTable.add(var, body);
|
||||
}
|
||||
|
||||
public class TokenSymbolTable implements SymbolTable {
|
||||
Map contents = new HashMap();
|
||||
Set itemsLookedUp = new HashSet();
|
||||
|
||||
public void add(String var, char[] body) {
|
||||
// start from 1 to avoid the $
|
||||
contents.put(var.substring(1), body);
|
||||
}
|
||||
|
||||
/* (non-Javadoc)
|
||||
* @see com.ibm.icu.text.SymbolTable#lookup(java.lang.String)
|
||||
*/
|
||||
public char[] lookup(String s) {
|
||||
itemsLookedUp.add('$' + s);
|
||||
return (char[])contents.get(s);
|
||||
}
|
||||
|
||||
/* (non-Javadoc)
|
||||
* @see com.ibm.icu.text.SymbolTable#lookupMatcher(int)
|
||||
*/
|
||||
public UnicodeMatcher lookupMatcher(int ch) {
|
||||
// TODO Auto-generated method stub
|
||||
return null;
|
||||
}
|
||||
|
||||
/* (non-Javadoc)
|
||||
* @see com.ibm.icu.text.SymbolTable#parseReference(java.lang.String, java.text.ParsePosition, int)
|
||||
*/
|
||||
public String parseReference(String text, ParsePosition pos, int limit) {
|
||||
int cp;
|
||||
int start = pos.getIndex();
|
||||
int i;
|
||||
for (i = start; i < limit; i += UTF16.getCharCount(cp)) {
|
||||
cp = UTF16.charAt(text, i);
|
||||
if (!com.ibm.icu.lang.UCharacter.isUnicodeIdentifierPart(cp)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
pos.setIndex(i);
|
||||
return text.substring(start,i);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
323
icu4j/src/com/ibm/icu/dev/test/util/UnicodePropertySource.java
Normal file
323
icu4j/src/com/ibm/icu/dev/test/util/UnicodePropertySource.java
Normal file
|
@ -0,0 +1,323 @@
|
|||
|
||||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2002, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/util/UnicodePropertySource.java,v $
|
||||
* $Date: 2003/11/21 01:03:39 $
|
||||
* $Revision: 1.1 $
|
||||
*
|
||||
*****************************************************************************************
|
||||
*/
|
||||
package com.ibm.icu.dev.test.util;
|
||||
|
||||
import java.util.regex.*;
|
||||
import java.util.Set;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.List;
|
||||
import java.util.Arrays;
|
||||
|
||||
import com.ibm.icu.lang.*;
|
||||
import com.ibm.icu.util.*;
|
||||
import com.ibm.icu.impl.*;
|
||||
import com.ibm.icu.text.*;
|
||||
|
||||
/**
|
||||
* Provides a general interface for Unicode Properties, and
|
||||
* extracting sets based on those values.
|
||||
* @author Davis
|
||||
*/
|
||||
public abstract class UnicodePropertySource implements Cloneable {
|
||||
|
||||
protected String propertyAlias;
|
||||
protected int nameChoice = UProperty.NameChoice.LONG;
|
||||
protected StringFilter filter = new StringFilter();
|
||||
protected UnicodeSetIterator matchIterator = new UnicodeSetIterator(new UnicodeSet(0,0x10FFFF));
|
||||
|
||||
abstract public String getPropertyValue(int codepoint);
|
||||
abstract public Set getAvailablePropertyAliases(Set result);
|
||||
abstract public Set getAvailablePropertyValueAliases(Set result);
|
||||
|
||||
abstract public String getPropertyAlias(int nameChoice);
|
||||
abstract public String getPropertyValueAlias(String valueAlias, int nameChoice);
|
||||
|
||||
/**
|
||||
* Subclasses should override
|
||||
*/
|
||||
public Object clone() {
|
||||
try {
|
||||
UnicodePropertySource result = (UnicodePropertySource)super.clone();
|
||||
result.filter = (StringFilter)filter.clone();
|
||||
return result;
|
||||
} catch (CloneNotSupportedException e) {
|
||||
throw new InternalError("Should never happen.");
|
||||
}
|
||||
}
|
||||
|
||||
public UnicodePropertySource setPropertyAlias(String propertyAlias) {
|
||||
this.propertyAlias = propertyAlias;
|
||||
return this;
|
||||
}
|
||||
|
||||
public String getPropertyAlias() {
|
||||
return propertyAlias;
|
||||
}
|
||||
|
||||
public static final boolean equals(int codepoint, String other) {
|
||||
if (other.length() == 1) {
|
||||
return codepoint == other.charAt(0);
|
||||
}
|
||||
return other.equals(UTF16.valueOf(codepoint));
|
||||
}
|
||||
|
||||
public UnicodeSet getPropertySet(boolean charEqualsValue, UnicodeSet result){
|
||||
if (result == null) result = new UnicodeSet();
|
||||
matchIterator.reset();
|
||||
while (matchIterator.next()) {
|
||||
String value = filter.remap(getPropertyValue(matchIterator.codepoint));
|
||||
if (equals(matchIterator.codepoint, value) == charEqualsValue) {
|
||||
result.add(matchIterator.codepoint);
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
public UnicodeSet getPropertySet(String propertyValue, UnicodeSet result){
|
||||
if (result == null) result = new UnicodeSet();
|
||||
matchIterator.reset();
|
||||
while (matchIterator.next()) {
|
||||
String value = filter.remap(getPropertyValue(matchIterator.codepoint));
|
||||
if (propertyValue.equals(value)) {
|
||||
result.add(matchIterator.codepoint);
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
public UnicodeSet getPropertySet(Matcher matcher, UnicodeSet result) {
|
||||
if (result == null) result = new UnicodeSet();
|
||||
matchIterator.reset();
|
||||
while (matchIterator.next()) {
|
||||
String value = filter.remap(getPropertyValue(matchIterator.codepoint));
|
||||
if (value == null)
|
||||
continue;
|
||||
matcher.reset(value);
|
||||
if (matcher.matches()) {
|
||||
result.add(matchIterator.codepoint);
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
public int getNameChoice() {
|
||||
return nameChoice;
|
||||
}
|
||||
|
||||
public UnicodePropertySource setNameChoice(int choice) {
|
||||
nameChoice = choice;
|
||||
return this;
|
||||
}
|
||||
|
||||
public static class StringFilter implements Cloneable {
|
||||
public String remap(String original) {
|
||||
return original;
|
||||
}
|
||||
public Object clone() {
|
||||
try {
|
||||
return super.clone();
|
||||
} catch (CloneNotSupportedException e) {
|
||||
throw new InternalError("Should never happen.");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public static class MapFilter extends StringFilter {
|
||||
Map valueMap;
|
||||
public String remap(String original) {
|
||||
Object changed = valueMap.get(original);
|
||||
return changed == null ? original : (String) changed;
|
||||
}
|
||||
public Map getMap() {
|
||||
return valueMap;
|
||||
}
|
||||
|
||||
public MapFilter setMap(Map map) {
|
||||
valueMap = map;
|
||||
return this;
|
||||
}
|
||||
}
|
||||
|
||||
static public class ICU extends UnicodePropertySource {
|
||||
protected int propEnum = Integer.MIN_VALUE;
|
||||
{
|
||||
matchIterator = new UnicodeSetIterator(
|
||||
new UnicodeSet("[^[:Cn:]-[:Default_Ignorable_Code_Point:]]"));
|
||||
}
|
||||
|
||||
public UnicodePropertySource setPropertyAlias(String propertyAlias) {
|
||||
super.setPropertyAlias(propertyAlias);
|
||||
int extraPosition = Extras.indexOf(propertyAlias);
|
||||
if (extraPosition >= 0) {
|
||||
propEnum = EXTRA_START + extraPosition;
|
||||
} else {
|
||||
propEnum = UCharacter.getPropertyEnum(propertyAlias);
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
public String getPropertyValue(int codePoint) {
|
||||
if (propEnum < UProperty.INT_LIMIT) {
|
||||
int enumValue = UCharacter.getIntPropertyValue(codePoint, propEnum);
|
||||
return UCharacter.getPropertyValueName(propEnum,enumValue, (int)nameChoice);
|
||||
} else if (propEnum < UProperty.DOUBLE_LIMIT) {
|
||||
return Double.toString(UCharacter.getUnicodeNumericValue(codePoint));
|
||||
// TODO: Fix HACK -- API deficient
|
||||
} else switch(propEnum) {
|
||||
case UProperty.AGE: return UCharacter.getAge(codePoint).toString();
|
||||
case UProperty.BIDI_MIRRORING_GLYPH: return UTF16.valueOf(UCharacter.getMirror(codePoint));
|
||||
case UProperty.CASE_FOLDING: return UCharacter.foldCase(UTF16.valueOf(codePoint),true);
|
||||
case UProperty.ISO_COMMENT: return UCharacter.getISOComment(codePoint);
|
||||
case UProperty.LOWERCASE_MAPPING: return UCharacter.toLowerCase(Locale.ENGLISH,UTF16.valueOf(codePoint));
|
||||
case UProperty.NAME: return UCharacter.getName(codePoint);
|
||||
case UProperty.SIMPLE_CASE_FOLDING: return UTF16.valueOf(UCharacter.foldCase(codePoint,true));
|
||||
case UProperty.SIMPLE_LOWERCASE_MAPPING: return UTF16.valueOf(UCharacter.toLowerCase(codePoint));
|
||||
case UProperty.SIMPLE_TITLECASE_MAPPING: return UTF16.valueOf(UCharacter.toTitleCase(codePoint));
|
||||
case UProperty.SIMPLE_UPPERCASE_MAPPING: return UTF16.valueOf(UCharacter.toUpperCase(codePoint));
|
||||
case UProperty.TITLECASE_MAPPING: return UCharacter.toTitleCase(Locale.ENGLISH,UTF16.valueOf(codePoint),null);
|
||||
case UProperty.UNICODE_1_NAME: return UCharacter.getName1_0(codePoint);
|
||||
case UProperty.UPPERCASE_MAPPING: return UCharacter.toUpperCase(Locale.ENGLISH,UTF16.valueOf(codePoint));
|
||||
case NFC: return Normalizer.normalize(codePoint, Normalizer.NFC);
|
||||
case NFD: return Normalizer.normalize(codePoint, Normalizer.NFD);
|
||||
case NFKC: return Normalizer.normalize(codePoint, Normalizer.NFKC);
|
||||
case NFKD: return Normalizer.normalize(codePoint, Normalizer.NFKD);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
static final List Extras = Arrays.asList(new String[] {
|
||||
"NFC", "NFD", "NFKC", "NKFD"
|
||||
});
|
||||
|
||||
static final int
|
||||
NFC = 0x8000,
|
||||
NFD = 0x8001,
|
||||
NFKC = 0x8002,
|
||||
NFKD = 0x8003,
|
||||
EXTRA_START = NFC,
|
||||
EXTRA_LIMIT = NFKD+1;
|
||||
|
||||
static final int[][] ranges = {
|
||||
{UProperty.BINARY_START, UProperty.BINARY_LIMIT},
|
||||
{UProperty.INT_START, UProperty.INT_LIMIT},
|
||||
{UProperty.DOUBLE_START, UProperty.DOUBLE_LIMIT},
|
||||
{UProperty.STRING_START, UProperty.STRING_LIMIT},
|
||||
};
|
||||
|
||||
public Set getAvailablePropertyAliases(Set result) {
|
||||
for (int i = 0; i < ranges.length; ++i) {
|
||||
for (int j = ranges[i][0]; j < ranges[i][1]; ++j) {
|
||||
result.add(UCharacter.getPropertyName(j, nameChoice));
|
||||
}
|
||||
}
|
||||
result.addAll(Extras);
|
||||
return result;
|
||||
}
|
||||
|
||||
public Set getAvailablePropertyValueAliases(Set result) {
|
||||
if (propEnum < UProperty.INT_LIMIT) {
|
||||
int start = UCharacter.getIntPropertyMinValue(propEnum);
|
||||
int end = UCharacter.getIntPropertyMaxValue(propEnum);
|
||||
for (int i = start; i <= end; ++i) {
|
||||
result.add(getFixedValueAlias(null, i,nameChoice));
|
||||
}
|
||||
} else {
|
||||
result.add(getFixedValueAlias(null, -1,nameChoice));
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param valueAlias null if unused.
|
||||
* @param valueEnum -1 if unused
|
||||
* @param nameChoice
|
||||
* @return
|
||||
*/
|
||||
private String getFixedValueAlias(String valueAlias, int valueEnum, int nameChoice) {
|
||||
if (propEnum >= UProperty.STRING_START) {
|
||||
return "<string>";
|
||||
} else if (propEnum >= UProperty.DOUBLE_START) {
|
||||
return "<double>";
|
||||
}
|
||||
if (valueAlias != null && !valueAlias.equals("<integer>")) {
|
||||
valueEnum = UCharacter.getPropertyValueEnum(propEnum,valueAlias);
|
||||
}
|
||||
String result = fixedGetPropertyValueName(propEnum, valueEnum, nameChoice);
|
||||
if (result != null) return result;
|
||||
// try other namechoice
|
||||
result = fixedGetPropertyValueName(propEnum,valueEnum,
|
||||
nameChoice == UProperty.NameChoice.LONG ? UProperty.NameChoice.SHORT : UProperty.NameChoice.LONG);
|
||||
if (result != null) return result;
|
||||
return "<integer>";
|
||||
}
|
||||
|
||||
private static String fixedGetPropertyValueName(int propEnum, int valueEnum, int nameChoice) {
|
||||
try {
|
||||
return UCharacter.getPropertyValueName(propEnum,valueEnum,nameChoice);
|
||||
} catch (Exception e) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
public String getPropertyAlias(int nameChoice) {
|
||||
if (propEnum < EXTRA_START) {
|
||||
return UCharacter.getPropertyName(propEnum, nameChoice);
|
||||
}
|
||||
return (String)Extras.get(propEnum-EXTRA_START);
|
||||
}
|
||||
|
||||
public String getPropertyValueAlias(String valueAlias, int nameChoice) {
|
||||
return getFixedValueAlias(valueAlias, -1, nameChoice);
|
||||
}
|
||||
}
|
||||
// TODO file bug on getPropertyValueName for Canonical_Combining_Class
|
||||
|
||||
public StringFilter getFilter() {
|
||||
return filter;
|
||||
}
|
||||
|
||||
|
||||
public UnicodePropertySource setFilter(StringFilter filter) {
|
||||
this.filter = filter;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return
|
||||
*/
|
||||
static public void addAll(UnicodeSetIterator source, UnicodeSet result) {
|
||||
while (source.nextRange()) {
|
||||
if (source.codepoint == source.IS_STRING) {
|
||||
result.add(source.string);
|
||||
} else {
|
||||
result.add(source.codepoint, source.codepointEnd);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public UnicodeSet getMatchSet(UnicodeSet result) {
|
||||
if (result == null) result = new UnicodeSet();
|
||||
addAll(matchIterator, result);
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param set
|
||||
*/
|
||||
public void setMatchSet(UnicodeSet set) {
|
||||
matchIterator = new UnicodeSetIterator(set);
|
||||
}
|
||||
|
||||
}
|
155
icu4j/src/com/ibm/icu/dev/test/util/Visitor.java
Normal file
155
icu4j/src/com/ibm/icu/dev/test/util/Visitor.java
Normal file
|
@ -0,0 +1,155 @@
|
|||
|
||||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2002, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/util/Visitor.java,v $
|
||||
* $Date: 2003/11/21 01:03:39 $
|
||||
* $Revision: 1.1 $
|
||||
*
|
||||
*****************************************************************************************
|
||||
*/
|
||||
package com.ibm.icu.dev.test.util;
|
||||
|
||||
import java.util.*;
|
||||
import com.ibm.icu.text.UnicodeSet;
|
||||
import com.ibm.icu.text.UnicodeSetIterator;
|
||||
|
||||
public abstract class Visitor {
|
||||
|
||||
public void doAt(Object item) {
|
||||
if (item instanceof Collection) {
|
||||
doAt((Collection) item);
|
||||
} else if (item instanceof Map) {
|
||||
doAt((Map) item);
|
||||
} else if (item instanceof Object[]) {
|
||||
doAt((Object[]) item);
|
||||
} else if (item instanceof UnicodeSet) {
|
||||
doAt((UnicodeSet) item);
|
||||
} else {
|
||||
doSimpleAt(item);
|
||||
}
|
||||
}
|
||||
|
||||
public int count(Object item) {
|
||||
if (item instanceof Collection) {
|
||||
return ((Collection) item).size();
|
||||
} else if (item instanceof Map) {
|
||||
return ((Map) item).size();
|
||||
} else if (item instanceof Object[]) {
|
||||
return ((Object[]) item).length;
|
||||
} else if (item instanceof UnicodeSet) {
|
||||
return ((UnicodeSet) item).size();
|
||||
} else {
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
// the default implementation boxing
|
||||
|
||||
public void doAt(int o) {
|
||||
doSimpleAt(new Integer(o));
|
||||
}
|
||||
public void doAt(double o) {
|
||||
doSimpleAt(new Double(o));
|
||||
}
|
||||
public void doAt(char o) {
|
||||
doSimpleAt(new Character(o));
|
||||
}
|
||||
|
||||
// for subclassing
|
||||
|
||||
protected void doAt (Collection c) {
|
||||
if (c.size() == 0) doBefore(c, null);
|
||||
Iterator it = c.iterator();
|
||||
boolean first = true;
|
||||
Object last = null;
|
||||
while (it.hasNext()) {
|
||||
Object item = it.next();
|
||||
if (first) {
|
||||
doBefore(c, item);
|
||||
first = false;
|
||||
} else {
|
||||
doBetween(c, last, item);
|
||||
}
|
||||
doAt(last=item);
|
||||
}
|
||||
doAfter(c, last);
|
||||
}
|
||||
|
||||
protected void doAt (Map c) {
|
||||
doAt(c.entrySet());
|
||||
}
|
||||
|
||||
protected void doAt (UnicodeSet c) {
|
||||
if (c.size() == 0) doBefore(c, null);
|
||||
UnicodeSetIterator it = new UnicodeSetIterator(c);
|
||||
boolean first = true;
|
||||
Object last = null;
|
||||
Object item;
|
||||
CodePointRange cpr0 = new CodePointRange();
|
||||
CodePointRange cpr1 = new CodePointRange();
|
||||
CodePointRange cpr;
|
||||
|
||||
while(it.nextRange()) {
|
||||
if (it.codepoint == it.IS_STRING) {
|
||||
item = it.string;
|
||||
} else {
|
||||
cpr = last == cpr0 ? cpr1 : cpr0; // make sure we don't override last
|
||||
cpr.codepoint = it.codepoint;
|
||||
cpr.codepointEnd = it.codepointEnd;
|
||||
item = cpr;
|
||||
}
|
||||
if (!first) {
|
||||
doBefore(c, item);
|
||||
first = true;
|
||||
} else {
|
||||
doBetween(c, last, item);
|
||||
}
|
||||
doAt(last = item);
|
||||
}
|
||||
doAfter(c, last);
|
||||
}
|
||||
|
||||
protected void doAt (Object[] c) {
|
||||
doBefore(c, c.length == 0 ? null : c[0]);
|
||||
Object last = null;
|
||||
for (int i = 0; i < c.length; ++i) {
|
||||
if (i != 0) doBetween(c, last, c[i]);
|
||||
doAt(last = c[i]);
|
||||
}
|
||||
doAfter(c, last);
|
||||
}
|
||||
|
||||
public static class CodePointRange{
|
||||
public int codepoint, codepointEnd;
|
||||
}
|
||||
|
||||
// ===== MUST BE OVERRIDEN =====
|
||||
|
||||
abstract protected void doBefore(Object container, Object item);
|
||||
abstract protected void doBetween(Object container, Object lastItem, Object nextItem);
|
||||
abstract protected void doAfter(Object container, Object item);
|
||||
abstract protected void doSimpleAt(Object o);
|
||||
|
||||
// ===== CONVENIENCES =====
|
||||
static class Join extends Visitor {
|
||||
StringBuffer output = new StringBuffer();
|
||||
String join (Object o) {
|
||||
output.setLength(0);
|
||||
doAt(o);
|
||||
return output.toString();
|
||||
}
|
||||
protected void doBefore(Object container, Object item) {}
|
||||
protected void doAfter(Object container, Object item) {}
|
||||
protected void doBetween(Object container, Object lastItem, Object nextItem) {
|
||||
output.append(",");
|
||||
}
|
||||
protected void doSimpleAt(Object o) {
|
||||
output.append(o.toString());
|
||||
}
|
||||
}
|
||||
|
||||
}
|
Loading…
Add table
Reference in a new issue