mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-08 06:53:45 +00:00
ICU-12450 move com.ibm.icu.dev.util.BNF, Pick, Quoter, Tokenizer to org.unicode.cldr.util
X-SVN-Rev: 38615
This commit is contained in:
parent
ebb7620ad0
commit
c291532c83
5 changed files with 0 additions and 1750 deletions
|
@ -1,792 +0,0 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2002-2012, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*/
|
||||
package com.ibm.icu.dev.util;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.HashSet;
|
||||
import java.util.Random;
|
||||
import java.util.Set;
|
||||
|
||||
import com.ibm.icu.text.UTF16;
|
||||
import com.ibm.icu.text.UnicodeSet;
|
||||
|
||||
abstract public class Pick {
|
||||
private static boolean DEBUG = false;
|
||||
|
||||
// for using to get strings
|
||||
|
||||
static class Target {
|
||||
private Pick pick;
|
||||
private Random random;
|
||||
private Quoter quoter;
|
||||
|
||||
public static Target make(Pick pick, Random random, Quoter quoter) {
|
||||
Target result = new Target();
|
||||
result.pick = pick;
|
||||
result.random = random;
|
||||
result.quoter = quoter;
|
||||
return result;
|
||||
}
|
||||
public String next() {
|
||||
quoter.clear();
|
||||
pick.addTo(this);
|
||||
return get();
|
||||
}
|
||||
public String get() {
|
||||
return quoter.toString();
|
||||
}
|
||||
private void copyState(Target other) {
|
||||
random = other.random;
|
||||
}
|
||||
private void clear() {
|
||||
quoter.clear();
|
||||
}
|
||||
/*private int length() {
|
||||
return quoter.length();
|
||||
}*/
|
||||
private Target append(int codepoint) {
|
||||
quoter.append(codepoint);
|
||||
return this;
|
||||
}
|
||||
private Target append(String s) {
|
||||
quoter.append(s);
|
||||
return this;
|
||||
}
|
||||
// must return value between 0 (inc) and 1 (exc)
|
||||
private double nextDouble() {
|
||||
return random.nextDouble();
|
||||
}
|
||||
}
|
||||
|
||||
// for Building
|
||||
|
||||
public Pick replace(String toReplace, Pick replacement) {
|
||||
Replacer visitor = new Replacer(toReplace, replacement);
|
||||
return visit(visitor);
|
||||
}
|
||||
|
||||
public Pick name(String nameStr) {
|
||||
name = nameStr;
|
||||
return this;
|
||||
}
|
||||
|
||||
static public Pick.Sequence makeSequence() {
|
||||
return new Sequence();
|
||||
}
|
||||
static public Pick.Alternation makeAlternation() {
|
||||
return new Alternation();
|
||||
}
|
||||
/*
|
||||
static public Pick.Sequence and(Object item) {
|
||||
return new Sequence().and2(item);
|
||||
}
|
||||
static public Pick.Sequence and(Object[] items) {
|
||||
return new Sequence().and2(items);
|
||||
}
|
||||
static public Pick.Alternation or(int itemWeight, Object item) {
|
||||
return new Alternation().or2(itemWeight, item);
|
||||
}
|
||||
static public Pick.Alternation or(Object[] items) {
|
||||
return new Alternation().or2(1, items);
|
||||
}
|
||||
static public Pick.Alternation or(int itemWeight, Object[] items) {
|
||||
return new Alternation().or2(itemWeight, items);
|
||||
}
|
||||
static public Pick.Alternation or(int[] itemWeights, Object[] items) {
|
||||
return new Alternation().or2(itemWeights, items);
|
||||
}
|
||||
|
||||
static public Pick maybe(int percent, Object item) {
|
||||
return new Repeat(0, 1, new int[]{100-percent, percent}, item);
|
||||
//return Pick.or(1.0-percent, NOTHING).or2(percent, item);
|
||||
}
|
||||
static public Pick repeat(int minCount, int maxCount, int itemWeights, Object item) {
|
||||
return new Repeat(minCount, maxCount, itemWeights, item);
|
||||
}
|
||||
|
||||
static public Pick codePoint(String source) {
|
||||
return new CodePoint(new UnicodeSet(source));
|
||||
}
|
||||
*/
|
||||
|
||||
static public Pick repeat(int minCount, int maxCount, int[] itemWeights, Pick item) {
|
||||
return new Repeat(minCount, maxCount, itemWeights, item);
|
||||
}
|
||||
|
||||
static public Pick codePoint(UnicodeSet source) {
|
||||
return new CodePoint(source);
|
||||
}
|
||||
static public Pick string(String source) {
|
||||
return new Literal(source);
|
||||
}
|
||||
/*
|
||||
static public Pick unquoted(String source) {
|
||||
return new Literal(source);
|
||||
}
|
||||
static public Pick string(int minLength, int maxLength, Pick item) {
|
||||
return new Morph(item, minLength, maxLength);
|
||||
}
|
||||
*/
|
||||
|
||||
public abstract String getInternal(int depth, Set alreadySeen);
|
||||
// Internals
|
||||
|
||||
protected String name;
|
||||
|
||||
protected abstract void addTo(Target target);
|
||||
public abstract boolean match(String input, Position p);
|
||||
|
||||
public static class Sequence extends ListPick {
|
||||
public Sequence and2 (Pick item) {
|
||||
addInternal(new Pick[] {item}); // we don't care about perf
|
||||
return this; // for chaining
|
||||
}
|
||||
public Sequence and2 (Pick[] itemArray) {
|
||||
addInternal(itemArray);
|
||||
return this; // for chaining
|
||||
}
|
||||
protected void addTo(Target target) {
|
||||
for (int i = 0; i < items.length; ++i) {
|
||||
items[i].addTo(target);
|
||||
}
|
||||
}
|
||||
public String getInternal(int depth, Set alreadySeen) {
|
||||
String result = checkName(name, alreadySeen);
|
||||
if (result.startsWith("$")) return result;
|
||||
result = indent(depth) + result + "SEQ(";
|
||||
for (int i = 0; i < items.length; ++i) {
|
||||
if (i != 0) result += ", ";
|
||||
result += items[i].getInternal(depth+1, alreadySeen);
|
||||
}
|
||||
result += ")";
|
||||
return result;
|
||||
}
|
||||
// keep private
|
||||
private Sequence() {}
|
||||
public boolean match(String input, Position p) {
|
||||
int originalIndex = p.index;
|
||||
for (int i = 0; i < items.length; ++i) {
|
||||
if (!items[i].match(input, p)) {
|
||||
p.index = originalIndex;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
String checkName(String nameStr, Set alreadySeen) {
|
||||
if (nameStr == null) return "";
|
||||
if (alreadySeen.contains(nameStr)) return nameStr;
|
||||
alreadySeen.add(nameStr);
|
||||
return "{" + nameStr + "=}";
|
||||
}
|
||||
|
||||
public static class Alternation extends ListPick {
|
||||
private WeightedIndex weightedIndex = new WeightedIndex(0);
|
||||
|
||||
public Alternation or2 (Pick[] newItems) {
|
||||
return or2(1, newItems);
|
||||
}
|
||||
public Alternation or2 (int itemWeight, Pick item) {
|
||||
return or2(itemWeight, new Pick[] {item}); // we don't care about perf
|
||||
}
|
||||
public Alternation or2 (int itemWeight, Pick[] newItems) {
|
||||
int[] itemWeights = new int[newItems.length];
|
||||
Arrays.fill(itemWeights,itemWeight);
|
||||
return or2(itemWeights, newItems); // we don't care about perf
|
||||
}
|
||||
public Alternation or2 (int[] itemWeights, Pick[] newItems) {
|
||||
if (newItems.length != itemWeights.length) {
|
||||
throw new ArrayIndexOutOfBoundsException(
|
||||
"or lengths must be equal: " + newItems.length + " != " + itemWeights.length);
|
||||
}
|
||||
// int lastLen = this.items.length;
|
||||
addInternal(newItems);
|
||||
weightedIndex.add(itemWeights);
|
||||
return this; // for chaining
|
||||
}
|
||||
protected void addTo(Target target) {
|
||||
items[weightedIndex.toIndex(target.nextDouble())].addTo(target);
|
||||
}
|
||||
|
||||
public String getInternal(int depth, Set alreadySeen) {
|
||||
String result = checkName(name, alreadySeen);
|
||||
if (result.startsWith("$")) return result;
|
||||
result = indent(depth) + result + "OR(";
|
||||
for (int i = 0; i < items.length; ++i) {
|
||||
if (i != 0) result += ", ";
|
||||
result += items[i].getInternal(depth+1, alreadySeen) + "/" + weightedIndex.weights[i];
|
||||
}
|
||||
return result + ")";
|
||||
}
|
||||
// keep private
|
||||
private Alternation() {}
|
||||
// take first matching option
|
||||
public boolean match(String input, Position p) {
|
||||
for (int i = 0; i < weightedIndex.weights.length; ++i) {
|
||||
if (p.isFailure(this,i)) continue;
|
||||
if (items[i].match(input, p)) return true;
|
||||
p.setFailure(this, i);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
private static String indent(int depth) {
|
||||
String result = "\r\n";
|
||||
for (int i = 0; i < depth; ++i) {
|
||||
result += " ";
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
private static class Repeat extends ItemPick {
|
||||
WeightedIndex weightedIndex;
|
||||
int minCount = 0;
|
||||
|
||||
private Repeat(int minCount, int maxCount, int[] itemWeights, Pick item) {
|
||||
super(item);
|
||||
weightedIndex = new WeightedIndex(minCount).add(maxCount-minCount+1, itemWeights);
|
||||
}
|
||||
/*private Repeat(int minCount, int maxCount, int itemWeight, Pick item) {
|
||||
super(item);
|
||||
weightedIndex = new WeightedIndex(minCount).add(maxCount-minCount+1, itemWeight);
|
||||
}*/
|
||||
/*
|
||||
private Repeat(int minCount, int maxCount, Object item) {
|
||||
this.item = convert(item);
|
||||
weightedIndex = new WeightedIndex(minCount).add(maxCount-minCount+1, 1);
|
||||
}
|
||||
*/
|
||||
protected void addTo(Target target) {
|
||||
//int count ;
|
||||
for (int i = weightedIndex.toIndex(target.nextDouble()); i > 0; --i) {
|
||||
item.addTo(target);
|
||||
}
|
||||
}
|
||||
public String getInternal(int depth, Set alreadySeen) {
|
||||
String result = checkName(name, alreadySeen);
|
||||
if (result.startsWith("$")) return result;
|
||||
result = indent(depth) + result + "REPEAT(" + weightedIndex
|
||||
+ "; "+ item.getInternal(depth+1, alreadySeen)
|
||||
+ ")";
|
||||
return result;
|
||||
}
|
||||
|
||||
// match longest, e.g. up to just before a failure
|
||||
public boolean match(String input, Position p) {
|
||||
//int bestMatch = p.index;
|
||||
int count = 0;
|
||||
for (int i = 0; i < weightedIndex.weights.length; ++i) {
|
||||
if (p.isFailure(this,i)) break;
|
||||
if (!item.match(input, p)) {
|
||||
p.setFailure(this,i);
|
||||
break;
|
||||
}
|
||||
//bestMatch = p.index;
|
||||
count++;
|
||||
}
|
||||
if (count >= minCount) {
|
||||
return true;
|
||||
}
|
||||
// TODO fix failure
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
private static class CodePoint extends FinalPick {
|
||||
private UnicodeSet source;
|
||||
|
||||
private CodePoint(UnicodeSet source) {
|
||||
this.source = source;
|
||||
}
|
||||
protected void addTo(Target target) {
|
||||
target.append(source.charAt(pick(target.random,0,source.size()-1)));
|
||||
}
|
||||
public boolean match(String s, Position p) {
|
||||
int cp = UTF16.charAt(s, p.index);
|
||||
if (source.contains(cp)) {
|
||||
p.index += UTF16.getCharCount(cp);
|
||||
return true;
|
||||
}
|
||||
p.setMax("codePoint");
|
||||
return false;
|
||||
}
|
||||
public String getInternal(int depth, Set alreadySeen) {
|
||||
String result = checkName(name, alreadySeen);
|
||||
if (result.startsWith("$")) return result;
|
||||
return source.toString();
|
||||
}
|
||||
}
|
||||
|
||||
static class Morph extends ItemPick {
|
||||
Morph(Pick item) {
|
||||
super(item);
|
||||
}
|
||||
|
||||
private String lastValue = null;
|
||||
private Target addBuffer = Target.make(this, null, new Quoter.RuleQuoter());
|
||||
private StringBuffer mergeBuffer = new StringBuffer();
|
||||
|
||||
private static final int COPY_NEW = 0, COPY_BOTH = 1, COPY_LAST = 3, SKIP = 4,
|
||||
LEAST_SKIP = 4;
|
||||
// give weights to the above. make sure we delete about the same as we insert
|
||||
private static final WeightedIndex choice = new WeightedIndex(0)
|
||||
.add(new int[] {10, 10, 100, 10});
|
||||
|
||||
protected void addTo(Target target) {
|
||||
// get contents into separate buffer
|
||||
addBuffer.copyState(target);
|
||||
addBuffer.clear();
|
||||
item.addTo(addBuffer);
|
||||
String newValue = addBuffer.get();
|
||||
if (DEBUG) System.out.println("Old: " + lastValue + ", New:" + newValue);
|
||||
|
||||
// if not first one, merge with old
|
||||
if (lastValue != null) {
|
||||
mergeBuffer.setLength(0);
|
||||
int lastIndex = 0;
|
||||
int newIndex = 0;
|
||||
// the new length is a random value between old and new.
|
||||
int newLenLimit = (int) pick(target.random, lastValue.length(), newValue.length());
|
||||
|
||||
while (mergeBuffer.length() < newLenLimit
|
||||
&& newIndex < newValue.length()
|
||||
&& lastIndex < lastValue.length()) {
|
||||
int c = choice.toIndex(target.nextDouble());
|
||||
if (c == COPY_NEW || c == COPY_BOTH || c == SKIP) {
|
||||
newIndex = getChar(newValue, newIndex, mergeBuffer, c < LEAST_SKIP);
|
||||
if (mergeBuffer.length() >= newLenLimit) break;
|
||||
}
|
||||
if (c == COPY_LAST || c == COPY_BOTH || c == SKIP) {
|
||||
lastIndex = getChar(lastValue, lastIndex, mergeBuffer, c < LEAST_SKIP);
|
||||
}
|
||||
}
|
||||
newValue = mergeBuffer.toString();
|
||||
}
|
||||
lastValue = newValue;
|
||||
target.append(newValue);
|
||||
if (DEBUG) System.out.println("Result: " + newValue);
|
||||
}
|
||||
|
||||
public String getInternal(int depth, Set alreadySeen) {
|
||||
String result = checkName(name, alreadySeen);
|
||||
if (result.startsWith("$")) return result;
|
||||
return indent(depth) + result + "MORPH("
|
||||
+ item.getInternal(depth+1, alreadySeen)
|
||||
+ ")";
|
||||
}
|
||||
|
||||
/* (non-Javadoc)
|
||||
* @see Pick#match(java.lang.String, Pick.Position)
|
||||
*/
|
||||
public boolean match(String input, Position p) {
|
||||
// TODO Auto-generated method stub
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/* Add character if we can
|
||||
*/
|
||||
static int getChar(String newValue, int newIndex, StringBuffer mergeBuffer, boolean copy) {
|
||||
if (newIndex >= newValue.length()) return newIndex;
|
||||
int cp = UTF16.charAt(newValue,newIndex);
|
||||
if (copy) UTF16.append(mergeBuffer, cp);
|
||||
return newIndex + UTF16.getCharCount(cp);
|
||||
}
|
||||
|
||||
/*
|
||||
// quoted add
|
||||
appendQuoted(target, addBuffer.toString(), quoteBuffer);
|
||||
// fix buffers
|
||||
StringBuffer swapTemp = addBuffer;
|
||||
addBuffer = source;
|
||||
source = swapTemp;
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
|
||||
static class Quote extends ItemPick {
|
||||
Quote(Pick item) {
|
||||
super(item);
|
||||
}
|
||||
protected void addTo(Target target) {
|
||||
target.quoter.setQuoting(true);
|
||||
item.addTo(target);
|
||||
target.quoter.setQuoting(false);
|
||||
}
|
||||
|
||||
public boolean match(String s, Position p) {
|
||||
return false;
|
||||
}
|
||||
|
||||
public String getInternal(int depth, Set alreadySeen) {
|
||||
String result = checkName(name, alreadySeen);
|
||||
if (result.startsWith("$")) return result;
|
||||
return indent(depth) + result + "QUOTE(" + item.getInternal(depth+1, alreadySeen)
|
||||
+ ")";
|
||||
}
|
||||
}
|
||||
|
||||
private static class Literal extends FinalPick {
|
||||
public String toString() {
|
||||
return name;
|
||||
}
|
||||
private Literal(String source) {
|
||||
this.name = source;
|
||||
}
|
||||
protected void addTo(Target target) {
|
||||
target.append(name);
|
||||
}
|
||||
public boolean match(String input, Position p) {
|
||||
int len = name.length();
|
||||
if (input.regionMatches(p.index, name, 0, len)) {
|
||||
p.index += len;
|
||||
return true;
|
||||
}
|
||||
p.setMax("literal");
|
||||
return false;
|
||||
}
|
||||
public String getInternal(int depth, Set alreadySeen) {
|
||||
return "'" + name + "'";
|
||||
}
|
||||
}
|
||||
|
||||
public static class Position {
|
||||
public ArrayList failures = new ArrayList();
|
||||
public int index;
|
||||
public int maxInt;
|
||||
public String maxType;
|
||||
public void setMax(String type) {
|
||||
if (index >= maxInt) {
|
||||
maxType = type;
|
||||
}
|
||||
}
|
||||
public String toString() {
|
||||
return "index; " + index
|
||||
+ ", maxInt:" + maxInt
|
||||
+ ", maxType: " + maxType;
|
||||
}
|
||||
/*private static final Object BAD = new Object();
|
||||
private static final Object GOOD = new Object();*/
|
||||
|
||||
public boolean isFailure(Pick pick, int item) {
|
||||
ArrayList val = (ArrayList)failures.get(index);
|
||||
if (val == null) return false;
|
||||
Set set = (Set)val.get(item);
|
||||
if (set == null) return false;
|
||||
return !set.contains(pick);
|
||||
}
|
||||
public void setFailure(Pick pick, int item) {
|
||||
ArrayList val = (ArrayList)failures.get(index);
|
||||
if (val == null) {
|
||||
val = new ArrayList();
|
||||
failures.set(index, val);
|
||||
}
|
||||
Set set = (Set)val.get(item);
|
||||
if (set == null) {
|
||||
set = new HashSet();
|
||||
val.set(item, set);
|
||||
}
|
||||
set.add(pick);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
public static final Pick NOTHING = new Nothing();
|
||||
|
||||
|
||||
private static class Nothing extends FinalPick {
|
||||
protected void addTo(Target target) {}
|
||||
protected boolean match(String input, Position p) {
|
||||
return true;
|
||||
}
|
||||
public String getInternal(int depth, Set alreadySeen) {
|
||||
return indent(depth) + "\u00F8";
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
// intermediates
|
||||
|
||||
abstract static class Visitor {
|
||||
Set already = new HashSet();
|
||||
// Note: each visitor should return the Pick that will replace a (or a itself)
|
||||
abstract Pick handle(Pick a);
|
||||
boolean alreadyEntered(Pick item) {
|
||||
boolean result = already.contains(item);
|
||||
already.add(item);
|
||||
return result;
|
||||
}
|
||||
void reset() {
|
||||
already.clear();
|
||||
}
|
||||
}
|
||||
|
||||
protected abstract Pick visit(Visitor visitor);
|
||||
|
||||
static class Replacer extends Visitor {
|
||||
String toReplace;
|
||||
Pick replacement;
|
||||
Replacer(String toReplace, Pick replacement) {
|
||||
this.toReplace = toReplace;
|
||||
this.replacement = replacement;
|
||||
}
|
||||
public Pick handle(Pick a) {
|
||||
if (toReplace.equals(a.name)) {
|
||||
a = replacement;
|
||||
}
|
||||
return a;
|
||||
}
|
||||
}
|
||||
|
||||
abstract private static class FinalPick extends Pick {
|
||||
public Pick visit(Visitor visitor) {
|
||||
return visitor.handle(this);
|
||||
}
|
||||
}
|
||||
|
||||
private abstract static class ItemPick extends Pick {
|
||||
protected Pick item;
|
||||
|
||||
ItemPick (Pick item) {
|
||||
this.item = item;
|
||||
}
|
||||
|
||||
public Pick visit(Visitor visitor) {
|
||||
Pick result = visitor.handle(this);
|
||||
if (visitor.alreadyEntered(this)) return result;
|
||||
if (item != null) item = item.visit(visitor);
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
private abstract static class ListPick extends Pick {
|
||||
protected Pick[] items = new Pick[0];
|
||||
|
||||
Pick simplify() {
|
||||
if (items.length > 1) return this;
|
||||
if (items.length == 1) return items[0];
|
||||
return null;
|
||||
}
|
||||
|
||||
int size() {
|
||||
return items.length;
|
||||
}
|
||||
|
||||
Pick getLast() {
|
||||
return items[items.length-1];
|
||||
}
|
||||
|
||||
void setLast(Pick newOne) {
|
||||
items[items.length-1] = newOne;
|
||||
}
|
||||
|
||||
protected void addInternal(Pick[] objs) {
|
||||
int lastLen = items.length;
|
||||
items = realloc(items, items.length + objs.length);
|
||||
for (int i = 0; i < objs.length; ++i) {
|
||||
items[lastLen + i] = objs[i];
|
||||
}
|
||||
}
|
||||
|
||||
public Pick visit(Visitor visitor) {
|
||||
Pick result = visitor.handle(this);
|
||||
if (visitor.alreadyEntered(this)) return result;
|
||||
for (int i = 0; i < items.length; ++i) {
|
||||
items[i] = items[i].visit(visitor);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Simple class to distribute a number between 0 (inclusive) and 1 (exclusive) among
|
||||
* a number of indices, where each index is weighted.
|
||||
* Item weights may be zero, but cannot be negative.
|
||||
* @author Davis
|
||||
*/
|
||||
// As in other case, we use an array for runtime speed; don't care about buildspeed.
|
||||
public static class WeightedIndex {
|
||||
private int[] weights = new int[0];
|
||||
private int minCount = 0;
|
||||
private double total;
|
||||
|
||||
public WeightedIndex(int minCount) {
|
||||
this.minCount = minCount;
|
||||
}
|
||||
|
||||
public WeightedIndex add(int count, int itemWeights) {
|
||||
if (count > 0) {
|
||||
int[] newWeights = new int[count];
|
||||
if (itemWeights < 1) itemWeights = 1;
|
||||
Arrays.fill(newWeights, 0, count, itemWeights);
|
||||
add(1, newWeights);
|
||||
}
|
||||
return this; // for chaining
|
||||
}
|
||||
|
||||
public WeightedIndex add(int[] newWeights) {
|
||||
return add(newWeights.length, newWeights);
|
||||
}
|
||||
|
||||
public WeightedIndex add(int maxCount, int[] newWeights) {
|
||||
if (newWeights == null) newWeights = new int[]{1};
|
||||
int oldLen = weights.length;
|
||||
if (maxCount < newWeights.length) maxCount = newWeights.length;
|
||||
weights = (int[]) realloc(weights, weights.length + maxCount);
|
||||
System.arraycopy(newWeights, 0, weights, oldLen, newWeights.length);
|
||||
int lastWeight = weights[oldLen + newWeights.length-1];
|
||||
for (int i = oldLen + newWeights.length; i < maxCount; ++i) {
|
||||
weights[i] = lastWeight;
|
||||
}
|
||||
total = 0;
|
||||
for (int i = 0; i < weights.length; ++i) {
|
||||
if (weights[i] < 0) {
|
||||
throw new RuntimeException("only positive weights: " + i);
|
||||
}
|
||||
total += weights[i];
|
||||
}
|
||||
return this; // for chaining
|
||||
}
|
||||
|
||||
// TODO, make this more efficient
|
||||
public int toIndex(double zeroToOne) {
|
||||
double weight = zeroToOne*total;
|
||||
int i;
|
||||
for (i = 0; i < weights.length; ++i) {
|
||||
weight -= weights[i];
|
||||
if (weight <= 0) break;
|
||||
}
|
||||
return i + minCount;
|
||||
}
|
||||
public String toString() {
|
||||
String result = "";
|
||||
for (int i = 0; i < minCount; ++i) {
|
||||
if (result.length() != 0) result += ",";
|
||||
result += "0";
|
||||
}
|
||||
for (int i = 0; i < weights.length; ++i) {
|
||||
if (result.length() != 0) result += ",";
|
||||
result += weights[i];
|
||||
}
|
||||
return result;
|
||||
}
|
||||
}
|
||||
/*
|
||||
private static Pick convert(Object obj) {
|
||||
if (obj instanceof Pick) return (Pick)obj;
|
||||
return new Literal(obj.toString(), false);
|
||||
}
|
||||
*/
|
||||
// Useful statics
|
||||
|
||||
static public int pick(Random random, int start, int end) {
|
||||
return start + (int)(random.nextDouble() * (end + 1 - start));
|
||||
}
|
||||
|
||||
static public double pick(Random random, double start, double end) {
|
||||
return start + (random.nextDouble() * (end + 1 - start));
|
||||
}
|
||||
|
||||
static public boolean pick(Random random, double percent) {
|
||||
return random.nextDouble() <= percent;
|
||||
}
|
||||
|
||||
static public int pick(Random random, UnicodeSet s) {
|
||||
return s.charAt(pick(random, 0,s.size()-1));
|
||||
}
|
||||
|
||||
static public String pick(Random random, String[] source) {
|
||||
return source[pick(random, 0, source.length-1)];
|
||||
}
|
||||
|
||||
// these utilities really ought to be in Java
|
||||
|
||||
public static double[] realloc(double[] source, int newSize) {
|
||||
double[] temp = new double[newSize];
|
||||
if (newSize > source.length) newSize = source.length;
|
||||
if (newSize != 0) System.arraycopy(source,0,temp,0,newSize);
|
||||
return temp;
|
||||
}
|
||||
|
||||
public static int[] realloc(int[] source, int newSize) {
|
||||
int[] temp = new int[newSize];
|
||||
if (newSize > source.length) newSize = source.length;
|
||||
if (newSize != 0) System.arraycopy(source,0,temp,0,newSize);
|
||||
return temp;
|
||||
}
|
||||
|
||||
public static Pick[] realloc(Pick[] source, int newSize) {
|
||||
Pick[] temp = new Pick[newSize];
|
||||
if (newSize > source.length) newSize = source.length;
|
||||
if (newSize != 0) System.arraycopy(source,0,temp,0,newSize);
|
||||
return temp;
|
||||
}
|
||||
|
||||
// test utilities
|
||||
/*private static void append(StringBuffer target, String toAdd, StringBuffer quoteBuffer) {
|
||||
Utility.appendToRule(target, (int)-1, true, false, quoteBuffer); // close previous quote
|
||||
if (DEBUG) System.out.println("\"" + toAdd + "\"");
|
||||
target.append(toAdd);
|
||||
}
|
||||
|
||||
private static void appendQuoted(StringBuffer target, String toAdd, StringBuffer quoteBuffer) {
|
||||
if (DEBUG) System.out.println("\"" + toAdd + "\"");
|
||||
Utility.appendToRule(target, toAdd, false, false, quoteBuffer);
|
||||
}*/
|
||||
|
||||
/*
|
||||
public static abstract class MatchHandler {
|
||||
public abstract void handleString(String source, int start, int limit);
|
||||
public abstract void handleSequence(String source, int start, int limit);
|
||||
public abstract void handleAlternation(String source, int start, int limit);
|
||||
|
||||
}
|
||||
*/
|
||||
/*
|
||||
// redistributes random value
|
||||
// values are still between 0 and 1, but with a different distribution
|
||||
public interface Spread {
|
||||
public double spread(double value);
|
||||
}
|
||||
|
||||
// give the weight for the high end.
|
||||
// values are linearly scaled according to the weight.
|
||||
static public class SimpleSpread implements Spread {
|
||||
static final Spread FLAT = new SimpleSpread(1.0);
|
||||
boolean flat = false;
|
||||
double aa, bb, cc;
|
||||
public SimpleSpread(double maxWeight) {
|
||||
if (maxWeight > 0.999 && maxWeight < 1.001) {
|
||||
flat = true;
|
||||
} else {
|
||||
double q = (maxWeight - 1.0);
|
||||
aa = -1/q;
|
||||
bb = 1/(q*q);
|
||||
cc = (2.0+q)/q;
|
||||
}
|
||||
}
|
||||
public double spread(double value) {
|
||||
if (flat) return value;
|
||||
value = aa + Math.sqrt(bb + cc*value);
|
||||
if (value < 0.0) return 0.0; // catch math gorp
|
||||
if (value >= 1.0) return 1.0;
|
||||
return value;
|
||||
}
|
||||
}
|
||||
static public int pick(Spread spread, Random random, int start, int end) {
|
||||
return start + (int)(spread.spread(random.nextDouble()) * (end + 1 - start));
|
||||
}
|
||||
|
||||
*/
|
||||
|
||||
|
||||
}
|
|
@ -1,65 +0,0 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2002-2012, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*/
|
||||
package com.ibm.icu.dev.util;
|
||||
|
||||
import com.ibm.icu.impl.Utility;
|
||||
import com.ibm.icu.text.UTF16;
|
||||
|
||||
public abstract class Quoter {
|
||||
private static boolean DEBUG = false;
|
||||
|
||||
protected boolean quoting = false;
|
||||
protected StringBuffer output = new StringBuffer();
|
||||
|
||||
public void setQuoting(boolean value) {
|
||||
quoting = value;
|
||||
}
|
||||
public boolean isQuoting() {
|
||||
return quoting;
|
||||
}
|
||||
public void clear() {
|
||||
quoting = false;
|
||||
output.setLength(0);
|
||||
}
|
||||
public int length() {
|
||||
return output.length();
|
||||
}
|
||||
public Quoter append(String string) {
|
||||
output.append(string);
|
||||
return this;
|
||||
}
|
||||
public Quoter append(int codepoint) {
|
||||
return append(UTF16.valueOf(codepoint));
|
||||
}
|
||||
// warning, allows access to internals
|
||||
public String toString() {
|
||||
setQuoting(false); // finish quoting
|
||||
return output.toString();
|
||||
}
|
||||
/**
|
||||
* Implements standard ICU rule quoting
|
||||
*/
|
||||
public static class RuleQuoter extends Quoter {
|
||||
private StringBuffer quoteBuffer = new StringBuffer();
|
||||
public void setQuoting(boolean value) {
|
||||
if (quoting == value) return;
|
||||
if (quoting) { // stop quoting
|
||||
Utility.appendToRule(output, (int)-1, true, false, quoteBuffer); // close previous quote
|
||||
}
|
||||
quoting = value;
|
||||
}
|
||||
public Quoter append(String s) {
|
||||
if (DEBUG) System.out.println("\"" + s + "\"");
|
||||
if (quoting) {
|
||||
Utility.appendToRule(output, s, false, false, quoteBuffer);
|
||||
} else {
|
||||
output.append(s);
|
||||
}
|
||||
return this;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,244 +0,0 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2002-2012, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*/
|
||||
package com.ibm.icu.dev.test.util;
|
||||
|
||||
import java.util.Random;
|
||||
|
||||
import com.ibm.icu.dev.util.BNF;
|
||||
import com.ibm.icu.dev.util.Pick;
|
||||
import com.ibm.icu.dev.util.Quoter;
|
||||
import com.ibm.icu.dev.util.Tokenizer;
|
||||
import com.ibm.icu.text.UnicodeSet;
|
||||
|
||||
public class TestBNF {
|
||||
|
||||
static final String[] testRules = {
|
||||
"$root = [ab]{3};",
|
||||
|
||||
"$root = [ab]{3,};",
|
||||
|
||||
"$root = [ab]{3,5};",
|
||||
|
||||
"$root = [ab]*;",
|
||||
|
||||
"$root = [ab]?;",
|
||||
|
||||
"$root = [ab]+;",
|
||||
|
||||
"$us = [a-z];" +
|
||||
"$root = [0-9$us];",
|
||||
|
||||
"$root = a $foo b? 25% end 30% | $foo 50%;\r\n" +
|
||||
"$foo = c{1,5} 20%;",
|
||||
|
||||
"$root = [a-z]{1,5}~;",
|
||||
|
||||
"$root = [a-z]{5}~;",
|
||||
|
||||
"$root = '\\' (u | U0010 | U000 $hex) $hex{4} ;\r\n" +
|
||||
"$hex = [0-9A-Fa-f];",
|
||||
};
|
||||
|
||||
static String unicodeSetBNF = "" +
|
||||
"$root = $leaf | '[' $s $root2 $s ']' ;\r\n" +
|
||||
"$root2 = $leaf | '[' $s $root3 $s ']' | ($root3 $s ($op $root3 $s){0,3}) ;\r\n" +
|
||||
"$root3 = $leaf | '[' $s $root4 $s ']' | ($root4 $s ($op $root4 $s){0,3}) ;\r\n" +
|
||||
"$root4 = $leaf | ($leaf $s ($op $leaf $s){0,3}) ;\r\n" +
|
||||
"$op = (('&' | '-') $s)? 70%;" +
|
||||
"$leaf = '[' $s $list $s ']' | $prop;\r\n" +
|
||||
"$list = ($char $s ('-' $s $char $s)? 30%){1,5} ;\r\n" +
|
||||
"$prop = '\\' (p | P) '{' $s $propName $s '}' | '[:' '^'? $s $propName $s ':]';\r\n" +
|
||||
"$needsQuote = [\\-\\][:whitespace:][:control:]] ;\r\n" +
|
||||
"$char = [[\\u0000-\\U00010FFFF]-$needsQuote] | $quoted ;\r\n" +
|
||||
"$quoted = '\\' ('u' | 'U0010' | 'U000' $hex) $hex{4} ;\r\n" +
|
||||
"$hex = [0-9A-Fa-f];\r\n" +
|
||||
"$s = ' '? 20%;\r\n" +
|
||||
"$propName = (whitespace | ws) | (uppercase | uc) | (lowercase | lc) | $category;\r\n" +
|
||||
"$category = ((general | gc) $s '=' $s)? $catvalue;\r\n" +
|
||||
"$catvalue = (C | Other | Cc | Control | Cf | Format | Cn | Unassigned | L | Letter);\r\n";
|
||||
|
||||
public static void main (String[] args) {
|
||||
testTokenizer();
|
||||
for (int i = 0; i < testRules.length; ++i) {
|
||||
testBNF(testRules[i], null, 20);
|
||||
}
|
||||
|
||||
testBNF(unicodeSetBNF, null, 20);
|
||||
//testParser();
|
||||
}
|
||||
|
||||
static void testBNF(String rules, UnicodeSet chars, int count) {
|
||||
BNF bnf = new BNF(new Random(0), new Quoter.RuleQuoter())
|
||||
.addSet("$chars", chars)
|
||||
.addRules(rules)
|
||||
.complete();
|
||||
|
||||
System.out.println("====================================");
|
||||
System.out.println("BNF");
|
||||
System.out.println(rules);
|
||||
System.out.println(bnf.getInternal());
|
||||
for (int i = 0; i < count; ++i) {
|
||||
System.out.println(i + ": " + bnf.next());
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
public static testManual() {
|
||||
Pick p = Pick.maybe(75,Pick.unquoted("a"));
|
||||
testOr(p, 1);
|
||||
p = Pick.or(new String[]{"", "a", "bb", "ccc"});
|
||||
testOr(p, 3);
|
||||
p = Pick.repeat(3, 5, new int[]{20, 30, 20}, "a");
|
||||
testOr(p, 5);
|
||||
p = Pick.codePoint("[a-ce]");
|
||||
testCodePoints(p);
|
||||
p = Pick.codePoint("[a-ce]");
|
||||
testCodePoints(p);
|
||||
p = Pick.string(2, 8, p);
|
||||
testOr(p,10);
|
||||
|
||||
p = Pick.or(new String[]{"", "a", "bb", "ccc"});
|
||||
p = Pick.and(p).and2(p).and2("&");
|
||||
testMatch(p, "abb&");
|
||||
testMatch(p, "bba");
|
||||
|
||||
// testEnglish();
|
||||
}
|
||||
*/
|
||||
|
||||
static void testMatch(Pick p, String source) {
|
||||
Pick.Position pp = new Pick.Position();
|
||||
boolean value = p.match(source, pp);
|
||||
System.out.println("Match: " + value + ", " + pp);
|
||||
}
|
||||
/*
|
||||
static void testParser() {
|
||||
try {
|
||||
Pick.Target target = new Pick.Target();
|
||||
for (int i = 0; i < rules.length; ++i) {
|
||||
target.addRule(rules[i]);
|
||||
}
|
||||
} catch (ParseException e) {
|
||||
// TODO Auto-generated catch block
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
static class Counts {
|
||||
int[] counts;
|
||||
Counts(int max) {
|
||||
counts = new int[max+1];
|
||||
}
|
||||
void inc(int index) {
|
||||
counts[index]++;
|
||||
}
|
||||
void show() {
|
||||
System.out.println("Printing Counts");
|
||||
for (int i = 0; i < counts.length; ++i) {
|
||||
if (counts[i] == 0) continue;
|
||||
System.out.println(i + ": " + counts[i]);
|
||||
}
|
||||
System.out.println();
|
||||
}
|
||||
}
|
||||
|
||||
/* static final String[] rules = {
|
||||
"$s = ' ';",
|
||||
"$noun = dog | house | government | wall | street | zebra;",
|
||||
"$adjective = red | glorious | simple | nasty | heavy | clean;",
|
||||
"$article = quickly | oddly | silently | boldly;",
|
||||
"$adjectivePhrase = ($adverb $s)? 50% $adjective* 0% 30% 20% 10%;",
|
||||
"$nounPhrase = $articles $s ($adjectivePhrase $s)? 30% $noun;",
|
||||
"$verb = goes | fishes | walks | sleeps;",
|
||||
"$tverb = carries | lifts | overturns | hits | jumps on;",
|
||||
"$copula = is 30% | seems 10%;",
|
||||
"$sentence1 = $nounPhrase $s $verb $s ($s $adverb)? 30%;",
|
||||
"$sentence2 = $nounPhrase $s $tverb $s $nounPhrase ($s $adverb)? 30%;",
|
||||
"$sentence3 = $nounPhrase $s $copula $s $adjectivePhrase;",
|
||||
"$conj = but | and | or;",
|
||||
"$sentence4 = $sentence1 | $sentence2 | $sentence3 20% | $sentence4 $conj $sentence4 20%;",
|
||||
"$sentence = $sentence4 '.';"};
|
||||
*/
|
||||
/*
|
||||
private static void testEnglish() {
|
||||
Pick s = Pick.unquoted(" ");
|
||||
Pick verbs = Pick.or(new String[]{"goes", "fishes", "walks", "sleeps"});
|
||||
Pick transitive = Pick.or(new String[]{"carries", "lifts", "overturns", "hits", "jumps on"});
|
||||
Pick nouns = Pick.or(new String[]{"dog", "house", "government", "wall", "street", "zebra"});
|
||||
Pick adjectives = Pick.or(new String[]{"red", "glorious", "simple", "nasty", "heavy", "clean"});
|
||||
Pick articles = Pick.or(new String[]{"the", "a"});
|
||||
Pick adverbs = Pick.or(new String[]{"quickly", "oddly", "silently", "boldly"});
|
||||
Pick adjectivePhrase = Pick.and(0.5, Pick.and(adverbs).and2(s)).and2(adjectives);
|
||||
Pick nounPhrase = Pick.and(articles).and2(s)
|
||||
.and2(0.3, Pick.and(adjectivePhrase).and2(s))
|
||||
.and2(nouns);
|
||||
Pick copula = Pick.or(new String[]{"is", "seems"});
|
||||
Pick sentence1 = Pick.and(nounPhrase).and2(s).and2(verbs)
|
||||
.and2(0.3, Pick.and(s).and2(adverbs)).name("s1");
|
||||
Pick sentence2 = Pick.and(nounPhrase).and2(s).and2(transitive).and2(s).and2(nounPhrase)
|
||||
.and2(0.3, Pick.and(s).and2(adverbs)).name("s2");
|
||||
Pick sentence3 = Pick.and(nounPhrase).and2(s).and2(copula).and2(s).and2(adjectivePhrase).name("s3");
|
||||
Pick conj = Pick.or(new String[]{", but", ", and", ", or"});
|
||||
Pick forward = Pick.unquoted("forward");
|
||||
Pick pair = Pick.and(forward).and2(conj).and2(s).and2(forward).name("part");
|
||||
Pick sentenceBase = Pick.or(sentence1).or2(sentence2).or2(sentence3).or2(0.6666, pair).name("sentence");
|
||||
sentenceBase.replace(forward, sentenceBase);
|
||||
Pick sentence = Pick.and(sentenceBase).and2(Pick.unquoted("."));
|
||||
Pick.Target target = Pick.Target.make(sentence);
|
||||
for (int i = 0; i < 50; ++i) {
|
||||
System.out.println(i + ": " + target.next());
|
||||
}
|
||||
}
|
||||
private static void testOr(Pick p, int count) {
|
||||
Pick.Target target = Pick.Target.make(p);
|
||||
Counts counts = new Counts(count + 10);
|
||||
for (int i = 0; i < 1000; ++i) {
|
||||
String s = target.next();
|
||||
counts.inc(s.length());
|
||||
}
|
||||
counts.show();
|
||||
}
|
||||
private static void testCodePoints(Pick p) {
|
||||
Pick.Target target = Pick.Target.make(p);
|
||||
Counts counts = new Counts(128);
|
||||
for (int i = 0; i < 10000; ++i) {
|
||||
String s = target.next();
|
||||
counts.inc(s.charAt(0));
|
||||
}
|
||||
counts.show();
|
||||
}
|
||||
*/
|
||||
public static void printRandoms() {
|
||||
BNF bnf = new BNF(new Random(0), new Quoter.RuleQuoter())
|
||||
.addRules("[a-z]{2,5}").complete();
|
||||
System.out.println("Start");
|
||||
for (int i = 0; i < 100; ++i) {
|
||||
String temp = bnf.next();
|
||||
System.out.println(i + ")\t" + temp);
|
||||
}
|
||||
}
|
||||
|
||||
public static void testTokenizer() {
|
||||
Tokenizer t = new Tokenizer();
|
||||
|
||||
String[] samples = {"a'b'c d #abc\r e", "'a '123 321",
|
||||
"\\\\", "a'b", "a'", "abc def%?ghi", "%", "a", "\\ a", "a''''b"};
|
||||
for (int i = 0; i < samples.length; ++i) {
|
||||
t.setSource(samples[i]);
|
||||
System.out.println();
|
||||
System.out.println("Input: " + t.getSource());
|
||||
int type = 0;
|
||||
while (type != Tokenizer.DONE) {
|
||||
type = t.next();
|
||||
System.out.println(t.toString(type, false));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -1,329 +0,0 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2002-2012, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*/
|
||||
package com.ibm.icu.dev.util;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.Iterator;
|
||||
import java.util.Map;
|
||||
import java.util.Random;
|
||||
import java.util.Set;
|
||||
|
||||
import com.ibm.icu.text.UnicodeSet;
|
||||
|
||||
public class BNF {
|
||||
private Map map = new HashMap();
|
||||
private Set variables = new HashSet();
|
||||
private Pick pick = null;
|
||||
private Pick.Target target = null;
|
||||
private Tokenizer t;
|
||||
private Quoter quoter;
|
||||
private Random random;
|
||||
|
||||
public String next() {
|
||||
return target.next();
|
||||
}
|
||||
|
||||
public String getInternal() {
|
||||
return pick.getInternal(0, new HashSet());
|
||||
}
|
||||
|
||||
/*
|
||||
+ "weight = integer '%';"
|
||||
+ "range = '{' integer (',' integer?)? '}' weight*;"
|
||||
+ "quote = '@';"
|
||||
+ "star = '*' weight*;"
|
||||
+ "plus = '+' weight*;"
|
||||
+ "maybe = '?' weight?;"
|
||||
+ "quantifier = range | star | maybe | plus;"
|
||||
+ "core = string | unicodeSet | '(' alternation ')';"
|
||||
+ "sequence = (core quantifier*)+;"
|
||||
+ "alternation = sequence (weight? ('|' sequence weight?)+)?;"
|
||||
+ "rule = string '=' alternation;";
|
||||
|
||||
|
||||
* Match 0 or more times
|
||||
+ Match 1 or more times
|
||||
? Match 1 or 0 times
|
||||
{n} Match exactly n times
|
||||
{n,} Match at least n times
|
||||
{n,m} Match at least n but not more than m times
|
||||
|
||||
|
||||
|
||||
*/
|
||||
|
||||
public BNF(Random random, Quoter quoter) {
|
||||
this.random = random;
|
||||
this.quoter = quoter;
|
||||
t = new Tokenizer();
|
||||
}
|
||||
|
||||
public BNF addRules(String rules) {
|
||||
t.setSource(rules);
|
||||
while (addRule()) {
|
||||
}
|
||||
return this; // for chaining
|
||||
}
|
||||
|
||||
public BNF complete() {
|
||||
// check that the rules match the variables, except for $root in rules
|
||||
Set ruleSet = map.keySet();
|
||||
// add also
|
||||
variables.add("$root");
|
||||
variables.addAll(t.getLookedUpItems());
|
||||
if (!ruleSet.equals(variables)) {
|
||||
String msg = showDiff(variables, ruleSet);
|
||||
if (msg.length() != 0) msg = "Error: Missing definitions for: " + msg;
|
||||
String temp = showDiff(ruleSet, variables);
|
||||
if (temp.length() != 0) temp = "Warning: Defined but not used: " + temp;
|
||||
if (msg.length() == 0) msg = temp;
|
||||
else if (temp.length() != 0) {
|
||||
msg = msg + "; " + temp;
|
||||
}
|
||||
error(msg);
|
||||
}
|
||||
|
||||
if (!ruleSet.equals(variables)) {
|
||||
String msg = showDiff(variables, ruleSet);
|
||||
if (msg.length() != 0) msg = "Missing definitions for: " + msg;
|
||||
String temp = showDiff(ruleSet, variables);
|
||||
if (temp.length() != 0) temp = "Defined but not used: " + temp;
|
||||
if (msg.length() == 0) msg = temp;
|
||||
else if (temp.length() != 0) {
|
||||
msg = msg + "; " + temp;
|
||||
}
|
||||
error(msg);
|
||||
}
|
||||
|
||||
// replace variables by definitions
|
||||
Iterator it = ruleSet.iterator();
|
||||
while (it.hasNext()) {
|
||||
String key = (String) it.next();
|
||||
Pick expression = (Pick) map.get(key);
|
||||
Iterator it2 = ruleSet.iterator();
|
||||
if (false && key.equals("$crlf")) {
|
||||
System.out.println("debug") ;
|
||||
}
|
||||
while (it2.hasNext()) {
|
||||
Object key2 = it2.next();
|
||||
if (key.equals(key2)) continue;
|
||||
Pick expression2 = (Pick) map.get(key2);
|
||||
expression2.replace(key, expression);
|
||||
}
|
||||
}
|
||||
pick = (Pick) map.get("$root");
|
||||
target = Pick.Target.make(pick, random, quoter);
|
||||
// TODO remove temp collections
|
||||
return this;
|
||||
}
|
||||
|
||||
String showDiff(Set a, Set b) {
|
||||
Set temp = new HashSet();
|
||||
temp.addAll(a);
|
||||
temp.removeAll(b);
|
||||
if (temp.size() == 0) return "";
|
||||
StringBuffer buffer = new StringBuffer();
|
||||
Iterator it = temp.iterator();
|
||||
while (it.hasNext()) {
|
||||
if (buffer.length() != 0) buffer.append(", ");
|
||||
buffer.append(it.next().toString());
|
||||
}
|
||||
return buffer.toString();
|
||||
}
|
||||
|
||||
void error(String msg) {
|
||||
throw new IllegalArgumentException(msg
|
||||
+ "\r\n" + t.toString());
|
||||
}
|
||||
|
||||
private boolean addRule() {
|
||||
int type = t.next();
|
||||
if (type == Tokenizer.DONE) return false;
|
||||
if (type != Tokenizer.STRING) error("missing weight");
|
||||
String s = t.getString();
|
||||
if (s.length() == 0 || s.charAt(0) != '$') error("missing $ in variable");
|
||||
if (t.next() != '=') error("missing =");
|
||||
int startBody = t.index;
|
||||
Pick rule = getAlternation();
|
||||
if (rule == null) error("missing expression");
|
||||
t.addSymbol(s, t.getSource(), startBody, t.index);
|
||||
if (t.next() != ';') error("missing ;");
|
||||
return addPick(s, rule);
|
||||
}
|
||||
|
||||
protected boolean addPick(String s, Pick rule) {
|
||||
Object temp = map.get(s);
|
||||
if (temp != null) error("duplicate variable");
|
||||
if (rule.name == null) rule.name(s);
|
||||
map.put(s, rule);
|
||||
return true;
|
||||
}
|
||||
|
||||
public BNF addSet(String variable, UnicodeSet set) {
|
||||
if (set != null) {
|
||||
String body = set.toString();
|
||||
t.addSymbol(variable, body, 0, body.length());
|
||||
addPick(variable, Pick.codePoint(set));
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
int maxRepeat = 99;
|
||||
|
||||
Pick qualify(Pick item) {
|
||||
int[] weights;
|
||||
int type = t.next();
|
||||
switch(type) {
|
||||
case '@':
|
||||
return new Pick.Quote(item);
|
||||
case '~':
|
||||
return new Pick.Morph(item);
|
||||
case '?':
|
||||
int weight = getWeight();
|
||||
if (weight == NO_WEIGHT) weight = 50;
|
||||
weights = new int[] {100-weight, weight};
|
||||
return Pick.repeat(0, 1, weights, item);
|
||||
case '*':
|
||||
weights = getWeights();
|
||||
return Pick.repeat(1, maxRepeat, weights, item);
|
||||
case '+':
|
||||
weights = getWeights();
|
||||
return Pick.repeat(1, maxRepeat, weights, item);
|
||||
case '{':
|
||||
if (t.next() != Tokenizer.NUMBER) error("missing number");
|
||||
int start = (int) t.getNumber();
|
||||
int end = start;
|
||||
type = t.next();
|
||||
if (type == ',') {
|
||||
end = maxRepeat;
|
||||
type = t.next();
|
||||
if (type == Tokenizer.NUMBER) {
|
||||
end = (int)t.getNumber();
|
||||
type = t.next();
|
||||
}
|
||||
}
|
||||
if (type != '}') error("missing }");
|
||||
weights = getWeights();
|
||||
return Pick.repeat(start, end, weights, item);
|
||||
}
|
||||
t.backup();
|
||||
return item;
|
||||
}
|
||||
|
||||
Pick getCore() {
|
||||
int token = t.next();
|
||||
if (token == Tokenizer.STRING) {
|
||||
String s = t.getString();
|
||||
if (s.charAt(0) == '$') variables.add(s);
|
||||
return Pick.string(s);
|
||||
}
|
||||
if (token == Tokenizer.UNICODESET) {
|
||||
return Pick.codePoint(t.getUnicodeSet());
|
||||
}
|
||||
if (token != '(') {
|
||||
t.backup();
|
||||
return null;
|
||||
}
|
||||
Pick temp = getAlternation();
|
||||
token = t.next();
|
||||
if (token != ')') error("missing )");
|
||||
return temp;
|
||||
}
|
||||
|
||||
Pick getSequence() {
|
||||
Pick.Sequence result = null;
|
||||
Pick last = null;
|
||||
while (true) {
|
||||
Pick item = getCore();
|
||||
if (item == null) {
|
||||
if (result != null) return result;
|
||||
if (last != null) return last;
|
||||
error("missing item");
|
||||
}
|
||||
// qualify it as many times as possible
|
||||
Pick oldItem;
|
||||
do {
|
||||
oldItem = item;
|
||||
item = qualify(item);
|
||||
} while (item != oldItem);
|
||||
// add it in
|
||||
if (last == null) {
|
||||
last = item;
|
||||
} else {
|
||||
if (result == null) result = Pick.makeSequence().and2(last);
|
||||
result = result.and2(item);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// for simplicity, we just use recursive descent
|
||||
Pick getAlternation() {
|
||||
Pick.Alternation result = null;
|
||||
Pick last = null;
|
||||
int lastWeight = NO_WEIGHT;
|
||||
while (true) {
|
||||
Pick temp = getSequence();
|
||||
if (temp == null) error("empty alternation");
|
||||
int weight = getWeight();
|
||||
if (weight == NO_WEIGHT) weight = 1;
|
||||
if (last == null) {
|
||||
last = temp;
|
||||
lastWeight = weight;
|
||||
} else {
|
||||
if (result == null) result = Pick.makeAlternation().or2(lastWeight, last);
|
||||
result = result.or2(weight, temp);
|
||||
}
|
||||
int token = t.next();
|
||||
if (token != '|') {
|
||||
t.backup();
|
||||
if (result != null) return result;
|
||||
if (last != null) return last;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static final int NO_WEIGHT = Integer.MIN_VALUE;
|
||||
|
||||
int getWeight() {
|
||||
int weight;
|
||||
int token = t.next();
|
||||
if (token != Tokenizer.NUMBER) {
|
||||
t.backup();
|
||||
return NO_WEIGHT;
|
||||
}
|
||||
weight = (int)t.getNumber();
|
||||
token = t.next();
|
||||
if (token != '%') error("missing %");
|
||||
return weight;
|
||||
}
|
||||
|
||||
int[] getWeights() {
|
||||
ArrayList list = new ArrayList();
|
||||
while (true) {
|
||||
int weight = getWeight();
|
||||
if (weight == NO_WEIGHT) break;
|
||||
list.add(new Integer(weight));
|
||||
}
|
||||
if (list.size() == 0) return null;
|
||||
int[] result = new int[list.size()];
|
||||
for (int i = 0; i < list.size(); ++i) {
|
||||
result[i] = ((Integer)list.get(i)).intValue();
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
public int getMaxRepeat() {
|
||||
return maxRepeat;
|
||||
}
|
||||
|
||||
public BNF setMaxRepeat(int maxRepeat) {
|
||||
this.maxRepeat = maxRepeat;
|
||||
return this;
|
||||
}
|
||||
}
|
|
@ -1,320 +0,0 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2002-2012, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*/
|
||||
package com.ibm.icu.dev.util;
|
||||
|
||||
import java.text.ParsePosition;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import com.ibm.icu.lang.UCharacter;
|
||||
import com.ibm.icu.text.SymbolTable;
|
||||
import com.ibm.icu.text.UTF16;
|
||||
import com.ibm.icu.text.UnicodeMatcher;
|
||||
import com.ibm.icu.text.UnicodeSet;
|
||||
|
||||
public class Tokenizer {
|
||||
protected String source;
|
||||
|
||||
protected StringBuffer buffer = new StringBuffer();
|
||||
protected long number;
|
||||
protected UnicodeSet unicodeSet = null;
|
||||
protected int index;
|
||||
boolean backedup = false;
|
||||
protected int lastIndex = -1;
|
||||
protected int nextIndex;
|
||||
int lastValue = BACKEDUP_TOO_FAR;
|
||||
TokenSymbolTable symbolTable = new TokenSymbolTable();
|
||||
|
||||
private static final char
|
||||
QUOTE = '\'',
|
||||
BSLASH = '\\';
|
||||
private static final UnicodeSet QUOTERS = new UnicodeSet().add(QUOTE).add(BSLASH);
|
||||
private static final UnicodeSet WHITESPACE = new UnicodeSet("[" +
|
||||
"\\u0009-\\u000D\\u0020\\u0085\\u200E\\u200F\\u2028\\u2029" +
|
||||
"]");
|
||||
private static final UnicodeSet SYNTAX = new UnicodeSet("[" +
|
||||
"\\u0021-\\u002F\\u003A-\\u0040\\u005B-\\u0060\\u007B-\\u007E" +
|
||||
"\\u00A1-\\u00A7\\u00A9\\u00AB-\\u00AC\\u00AE" +
|
||||
"\\u00B0-\\u00B1\\u00B6\\u00B7\\u00BB\\u00BF\\u00D7\\u00F7" +
|
||||
"\\u2010-\\u2027\\u2030-\\u205E\\u2190-\\u2BFF" +
|
||||
"\\u3001\\u3003\\u3008-\\u3020\\u3030" +
|
||||
"\\uFD3E\\uFD3F\\uFE45\\uFE46" +
|
||||
"]").removeAll(QUOTERS).remove('$');
|
||||
private static final UnicodeSet NEWLINE = new UnicodeSet("[\\u000A\\u000D\\u0085\\u2028\\u2029]");
|
||||
//private static final UnicodeSet DECIMAL = new UnicodeSet("[:Nd:]");
|
||||
private static final UnicodeSet NON_STRING = new UnicodeSet()
|
||||
.addAll(WHITESPACE)
|
||||
.addAll(SYNTAX);
|
||||
|
||||
protected UnicodeSet whiteSpace = WHITESPACE;
|
||||
protected UnicodeSet syntax = SYNTAX;
|
||||
private UnicodeSet non_string = NON_STRING;
|
||||
|
||||
private void fixSets() {
|
||||
if (syntax.containsSome(QUOTERS) || syntax.containsSome(whiteSpace)) {
|
||||
syntax = ((UnicodeSet)syntax.clone()).removeAll(QUOTERS).removeAll(whiteSpace);
|
||||
}
|
||||
if (whiteSpace.containsSome(QUOTERS)) {
|
||||
whiteSpace = ((UnicodeSet)whiteSpace.clone()).removeAll(QUOTERS);
|
||||
}
|
||||
non_string = new UnicodeSet(syntax)
|
||||
.addAll(whiteSpace);
|
||||
}
|
||||
|
||||
public Tokenizer setSource(String source) {
|
||||
this.source = source;
|
||||
this.index = 0;
|
||||
return this; // for chaining
|
||||
}
|
||||
|
||||
public Tokenizer setIndex(int index) {
|
||||
this.index = index;
|
||||
return this; // for chaining
|
||||
}
|
||||
|
||||
public static final int
|
||||
DONE = -1,
|
||||
NUMBER = -2,
|
||||
STRING = -3,
|
||||
UNICODESET = -4,
|
||||
UNTERMINATED_QUOTE = -5,
|
||||
BACKEDUP_TOO_FAR = -6;
|
||||
|
||||
private static final int
|
||||
//FIRST = 0,
|
||||
//IN_NUMBER = 1,
|
||||
//IN_SPACE = 2,
|
||||
AFTER_QUOTE = 3, // warning: order is important for switch statement
|
||||
IN_STRING = 4,
|
||||
AFTER_BSLASH = 5,
|
||||
IN_QUOTE = 6;
|
||||
|
||||
public String toString(int type, boolean backedupBefore) {
|
||||
String s = backedup ? "@" : "*";
|
||||
switch(type) {
|
||||
case DONE:
|
||||
return s+"Done"+s;
|
||||
case BACKEDUP_TOO_FAR:
|
||||
return s+"Illegal Backup"+s;
|
||||
case UNTERMINATED_QUOTE:
|
||||
return s+"Unterminated Quote=" + getString() + s;
|
||||
case STRING:
|
||||
return s+"s=" + getString() + s;
|
||||
case NUMBER:
|
||||
return s+"n=" + getNumber() + s;
|
||||
case UNICODESET:
|
||||
return s+"n=" + getUnicodeSet() + s;
|
||||
default:
|
||||
return s+"c=" + usf.getName(type,true) + s;
|
||||
}
|
||||
}
|
||||
|
||||
private static final BagFormatter usf = new BagFormatter();
|
||||
|
||||
public void backup() {
|
||||
if (backedup) throw new IllegalArgumentException("backup too far");
|
||||
backedup = true;
|
||||
nextIndex = index;
|
||||
index = lastIndex;
|
||||
}
|
||||
|
||||
/*
|
||||
public int next2() {
|
||||
boolean backedupBefore = backedup;
|
||||
int result = next();
|
||||
System.out.println(toString(result, backedupBefore));
|
||||
return result;
|
||||
}
|
||||
*/
|
||||
|
||||
public int next() {
|
||||
if (backedup) {
|
||||
backedup = false;
|
||||
index = nextIndex;
|
||||
return lastValue;
|
||||
}
|
||||
int cp = 0;
|
||||
boolean inComment = false;
|
||||
// clean off any leading whitespace or comments
|
||||
while (true) {
|
||||
if (index >= source.length()) return lastValue = DONE;
|
||||
cp = nextChar();
|
||||
if (inComment) {
|
||||
if (NEWLINE.contains(cp)) inComment = false;
|
||||
} else {
|
||||
if (cp == '#') inComment = true;
|
||||
else if (!whiteSpace.contains(cp)) break;
|
||||
}
|
||||
}
|
||||
// record the last index in case we have to backup
|
||||
lastIndex = index;
|
||||
|
||||
if (cp == '[') {
|
||||
ParsePosition pos = new ParsePosition(index-1);
|
||||
unicodeSet = new UnicodeSet(source,pos,symbolTable);
|
||||
index = pos.getIndex();
|
||||
return lastValue = UNICODESET;
|
||||
}
|
||||
// get syntax character
|
||||
if (syntax.contains(cp)) return lastValue = cp;
|
||||
|
||||
// get number, if there is one
|
||||
if (UCharacter.getType(cp) == Character.DECIMAL_DIGIT_NUMBER) {
|
||||
number = UCharacter.getNumericValue(cp);
|
||||
while (index < source.length()) {
|
||||
cp = nextChar();
|
||||
if (UCharacter.getType(cp) != Character.DECIMAL_DIGIT_NUMBER) {
|
||||
index -= UTF16.getCharCount(cp); // BACKUP!
|
||||
break;
|
||||
}
|
||||
number *= 10;
|
||||
number += UCharacter.getNumericValue(cp);
|
||||
}
|
||||
return lastValue = NUMBER;
|
||||
}
|
||||
buffer.setLength(0);
|
||||
int status = IN_STRING;
|
||||
main:
|
||||
while (true) {
|
||||
switch (status) {
|
||||
case AFTER_QUOTE: // check for double ''?
|
||||
if (cp == QUOTE) {
|
||||
UTF16.append(buffer, QUOTE);
|
||||
status = IN_QUOTE;
|
||||
break;
|
||||
}
|
||||
// OTHERWISE FALL THROUGH!!!
|
||||
case IN_STRING:
|
||||
if (cp == QUOTE) status = IN_QUOTE;
|
||||
else if (cp == BSLASH) status = AFTER_BSLASH;
|
||||
else if (non_string.contains(cp)) {
|
||||
index -= UTF16.getCharCount(cp); // BACKUP!
|
||||
break main;
|
||||
} else UTF16.append(buffer,cp);
|
||||
break;
|
||||
case IN_QUOTE:
|
||||
if (cp == QUOTE) status = AFTER_QUOTE;
|
||||
else UTF16.append(buffer,cp);
|
||||
break;
|
||||
case AFTER_BSLASH:
|
||||
switch(cp) {
|
||||
case 'n': cp = '\n'; break;
|
||||
case 'r': cp = '\r'; break;
|
||||
case 't': cp = '\t'; break;
|
||||
}
|
||||
UTF16.append(buffer,cp);
|
||||
status = IN_STRING;
|
||||
break;
|
||||
default: throw new IllegalArgumentException("Internal Error");
|
||||
}
|
||||
if (index >= source.length()) break;
|
||||
cp = nextChar();
|
||||
}
|
||||
if (status > IN_STRING) return lastValue = UNTERMINATED_QUOTE;
|
||||
return lastValue = STRING;
|
||||
}
|
||||
|
||||
public String getString() {
|
||||
return buffer.toString();
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
return source.substring(0,index) + "$$$" + source.substring(index);
|
||||
}
|
||||
|
||||
public long getNumber() {
|
||||
return number;
|
||||
}
|
||||
|
||||
public UnicodeSet getUnicodeSet() {
|
||||
return unicodeSet;
|
||||
}
|
||||
|
||||
private int nextChar() {
|
||||
int cp = UTF16.charAt(source,index);
|
||||
index += UTF16.getCharCount(cp);
|
||||
return cp;
|
||||
}
|
||||
public int getIndex() {
|
||||
return index;
|
||||
}
|
||||
public String getSource() {
|
||||
return source;
|
||||
}
|
||||
public UnicodeSet getSyntax() {
|
||||
return syntax;
|
||||
}
|
||||
public UnicodeSet getWhiteSpace() {
|
||||
return whiteSpace;
|
||||
}
|
||||
public void setSyntax(UnicodeSet set) {
|
||||
syntax = set;
|
||||
fixSets();
|
||||
}
|
||||
public void setWhiteSpace(UnicodeSet set) {
|
||||
whiteSpace = set;
|
||||
fixSets();
|
||||
}
|
||||
|
||||
public Set getLookedUpItems() {
|
||||
return symbolTable.itemsLookedUp;
|
||||
}
|
||||
|
||||
public void addSymbol(String var, String value, int start, int limit) {
|
||||
// the limit is after the ';', so remove it
|
||||
--limit;
|
||||
char[] body = new char[limit - start];
|
||||
value.getChars(start, limit, body, 0);
|
||||
symbolTable.add(var, body);
|
||||
}
|
||||
|
||||
public class TokenSymbolTable implements SymbolTable {
|
||||
Map contents = new HashMap();
|
||||
Set itemsLookedUp = new HashSet();
|
||||
|
||||
public void add(String var, char[] body) {
|
||||
// start from 1 to avoid the $
|
||||
contents.put(var.substring(1), body);
|
||||
}
|
||||
|
||||
/* (non-Javadoc)
|
||||
* @see com.ibm.icu.text.SymbolTable#lookup(java.lang.String)
|
||||
*/
|
||||
public char[] lookup(String s) {
|
||||
itemsLookedUp.add('$' + s);
|
||||
return (char[])contents.get(s);
|
||||
}
|
||||
|
||||
/* (non-Javadoc)
|
||||
* @see com.ibm.icu.text.SymbolTable#lookupMatcher(int)
|
||||
*/
|
||||
public UnicodeMatcher lookupMatcher(int ch) {
|
||||
// TODO Auto-generated method stub
|
||||
return null;
|
||||
}
|
||||
|
||||
/* (non-Javadoc)
|
||||
* @see com.ibm.icu.text.SymbolTable#parseReference(java.lang.String, java.text.ParsePosition, int)
|
||||
*/
|
||||
public String parseReference(String text, ParsePosition pos, int limit) {
|
||||
int cp;
|
||||
int start = pos.getIndex();
|
||||
int i;
|
||||
for (i = start; i < limit; i += UTF16.getCharCount(cp)) {
|
||||
cp = UTF16.charAt(text, i);
|
||||
if (!com.ibm.icu.lang.UCharacter.isUnicodeIdentifierPart(cp)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
pos.setIndex(i);
|
||||
return text.substring(start,i);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
Loading…
Add table
Reference in a new issue