ICU-12450 move com.ibm.icu.dev.util.BNF, Pick, Quoter, Tokenizer to org.unicode.cldr.util

X-SVN-Rev: 38615
This commit is contained in:
Markus Scherer 2016-04-13 15:41:12 +00:00
parent ebb7620ad0
commit c291532c83
5 changed files with 0 additions and 1750 deletions

View file

@ -1,792 +0,0 @@
/*
*******************************************************************************
* Copyright (C) 2002-2012, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*/
package com.ibm.icu.dev.util;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Random;
import java.util.Set;
import com.ibm.icu.text.UTF16;
import com.ibm.icu.text.UnicodeSet;
abstract public class Pick {
private static boolean DEBUG = false;
// for using to get strings
static class Target {
private Pick pick;
private Random random;
private Quoter quoter;
public static Target make(Pick pick, Random random, Quoter quoter) {
Target result = new Target();
result.pick = pick;
result.random = random;
result.quoter = quoter;
return result;
}
public String next() {
quoter.clear();
pick.addTo(this);
return get();
}
public String get() {
return quoter.toString();
}
private void copyState(Target other) {
random = other.random;
}
private void clear() {
quoter.clear();
}
/*private int length() {
return quoter.length();
}*/
private Target append(int codepoint) {
quoter.append(codepoint);
return this;
}
private Target append(String s) {
quoter.append(s);
return this;
}
// must return value between 0 (inc) and 1 (exc)
private double nextDouble() {
return random.nextDouble();
}
}
// for Building
public Pick replace(String toReplace, Pick replacement) {
Replacer visitor = new Replacer(toReplace, replacement);
return visit(visitor);
}
public Pick name(String nameStr) {
name = nameStr;
return this;
}
static public Pick.Sequence makeSequence() {
return new Sequence();
}
static public Pick.Alternation makeAlternation() {
return new Alternation();
}
/*
static public Pick.Sequence and(Object item) {
return new Sequence().and2(item);
}
static public Pick.Sequence and(Object[] items) {
return new Sequence().and2(items);
}
static public Pick.Alternation or(int itemWeight, Object item) {
return new Alternation().or2(itemWeight, item);
}
static public Pick.Alternation or(Object[] items) {
return new Alternation().or2(1, items);
}
static public Pick.Alternation or(int itemWeight, Object[] items) {
return new Alternation().or2(itemWeight, items);
}
static public Pick.Alternation or(int[] itemWeights, Object[] items) {
return new Alternation().or2(itemWeights, items);
}
static public Pick maybe(int percent, Object item) {
return new Repeat(0, 1, new int[]{100-percent, percent}, item);
//return Pick.or(1.0-percent, NOTHING).or2(percent, item);
}
static public Pick repeat(int minCount, int maxCount, int itemWeights, Object item) {
return new Repeat(minCount, maxCount, itemWeights, item);
}
static public Pick codePoint(String source) {
return new CodePoint(new UnicodeSet(source));
}
*/
static public Pick repeat(int minCount, int maxCount, int[] itemWeights, Pick item) {
return new Repeat(minCount, maxCount, itemWeights, item);
}
static public Pick codePoint(UnicodeSet source) {
return new CodePoint(source);
}
static public Pick string(String source) {
return new Literal(source);
}
/*
static public Pick unquoted(String source) {
return new Literal(source);
}
static public Pick string(int minLength, int maxLength, Pick item) {
return new Morph(item, minLength, maxLength);
}
*/
public abstract String getInternal(int depth, Set alreadySeen);
// Internals
protected String name;
protected abstract void addTo(Target target);
public abstract boolean match(String input, Position p);
public static class Sequence extends ListPick {
public Sequence and2 (Pick item) {
addInternal(new Pick[] {item}); // we don't care about perf
return this; // for chaining
}
public Sequence and2 (Pick[] itemArray) {
addInternal(itemArray);
return this; // for chaining
}
protected void addTo(Target target) {
for (int i = 0; i < items.length; ++i) {
items[i].addTo(target);
}
}
public String getInternal(int depth, Set alreadySeen) {
String result = checkName(name, alreadySeen);
if (result.startsWith("$")) return result;
result = indent(depth) + result + "SEQ(";
for (int i = 0; i < items.length; ++i) {
if (i != 0) result += ", ";
result += items[i].getInternal(depth+1, alreadySeen);
}
result += ")";
return result;
}
// keep private
private Sequence() {}
public boolean match(String input, Position p) {
int originalIndex = p.index;
for (int i = 0; i < items.length; ++i) {
if (!items[i].match(input, p)) {
p.index = originalIndex;
return false;
}
}
return true;
}
}
String checkName(String nameStr, Set alreadySeen) {
if (nameStr == null) return "";
if (alreadySeen.contains(nameStr)) return nameStr;
alreadySeen.add(nameStr);
return "{" + nameStr + "=}";
}
public static class Alternation extends ListPick {
private WeightedIndex weightedIndex = new WeightedIndex(0);
public Alternation or2 (Pick[] newItems) {
return or2(1, newItems);
}
public Alternation or2 (int itemWeight, Pick item) {
return or2(itemWeight, new Pick[] {item}); // we don't care about perf
}
public Alternation or2 (int itemWeight, Pick[] newItems) {
int[] itemWeights = new int[newItems.length];
Arrays.fill(itemWeights,itemWeight);
return or2(itemWeights, newItems); // we don't care about perf
}
public Alternation or2 (int[] itemWeights, Pick[] newItems) {
if (newItems.length != itemWeights.length) {
throw new ArrayIndexOutOfBoundsException(
"or lengths must be equal: " + newItems.length + " != " + itemWeights.length);
}
// int lastLen = this.items.length;
addInternal(newItems);
weightedIndex.add(itemWeights);
return this; // for chaining
}
protected void addTo(Target target) {
items[weightedIndex.toIndex(target.nextDouble())].addTo(target);
}
public String getInternal(int depth, Set alreadySeen) {
String result = checkName(name, alreadySeen);
if (result.startsWith("$")) return result;
result = indent(depth) + result + "OR(";
for (int i = 0; i < items.length; ++i) {
if (i != 0) result += ", ";
result += items[i].getInternal(depth+1, alreadySeen) + "/" + weightedIndex.weights[i];
}
return result + ")";
}
// keep private
private Alternation() {}
// take first matching option
public boolean match(String input, Position p) {
for (int i = 0; i < weightedIndex.weights.length; ++i) {
if (p.isFailure(this,i)) continue;
if (items[i].match(input, p)) return true;
p.setFailure(this, i);
}
return false;
}
}
private static String indent(int depth) {
String result = "\r\n";
for (int i = 0; i < depth; ++i) {
result += " ";
}
return result;
}
private static class Repeat extends ItemPick {
WeightedIndex weightedIndex;
int minCount = 0;
private Repeat(int minCount, int maxCount, int[] itemWeights, Pick item) {
super(item);
weightedIndex = new WeightedIndex(minCount).add(maxCount-minCount+1, itemWeights);
}
/*private Repeat(int minCount, int maxCount, int itemWeight, Pick item) {
super(item);
weightedIndex = new WeightedIndex(minCount).add(maxCount-minCount+1, itemWeight);
}*/
/*
private Repeat(int minCount, int maxCount, Object item) {
this.item = convert(item);
weightedIndex = new WeightedIndex(minCount).add(maxCount-minCount+1, 1);
}
*/
protected void addTo(Target target) {
//int count ;
for (int i = weightedIndex.toIndex(target.nextDouble()); i > 0; --i) {
item.addTo(target);
}
}
public String getInternal(int depth, Set alreadySeen) {
String result = checkName(name, alreadySeen);
if (result.startsWith("$")) return result;
result = indent(depth) + result + "REPEAT(" + weightedIndex
+ "; "+ item.getInternal(depth+1, alreadySeen)
+ ")";
return result;
}
// match longest, e.g. up to just before a failure
public boolean match(String input, Position p) {
//int bestMatch = p.index;
int count = 0;
for (int i = 0; i < weightedIndex.weights.length; ++i) {
if (p.isFailure(this,i)) break;
if (!item.match(input, p)) {
p.setFailure(this,i);
break;
}
//bestMatch = p.index;
count++;
}
if (count >= minCount) {
return true;
}
// TODO fix failure
return false;
}
}
private static class CodePoint extends FinalPick {
private UnicodeSet source;
private CodePoint(UnicodeSet source) {
this.source = source;
}
protected void addTo(Target target) {
target.append(source.charAt(pick(target.random,0,source.size()-1)));
}
public boolean match(String s, Position p) {
int cp = UTF16.charAt(s, p.index);
if (source.contains(cp)) {
p.index += UTF16.getCharCount(cp);
return true;
}
p.setMax("codePoint");
return false;
}
public String getInternal(int depth, Set alreadySeen) {
String result = checkName(name, alreadySeen);
if (result.startsWith("$")) return result;
return source.toString();
}
}
static class Morph extends ItemPick {
Morph(Pick item) {
super(item);
}
private String lastValue = null;
private Target addBuffer = Target.make(this, null, new Quoter.RuleQuoter());
private StringBuffer mergeBuffer = new StringBuffer();
private static final int COPY_NEW = 0, COPY_BOTH = 1, COPY_LAST = 3, SKIP = 4,
LEAST_SKIP = 4;
// give weights to the above. make sure we delete about the same as we insert
private static final WeightedIndex choice = new WeightedIndex(0)
.add(new int[] {10, 10, 100, 10});
protected void addTo(Target target) {
// get contents into separate buffer
addBuffer.copyState(target);
addBuffer.clear();
item.addTo(addBuffer);
String newValue = addBuffer.get();
if (DEBUG) System.out.println("Old: " + lastValue + ", New:" + newValue);
// if not first one, merge with old
if (lastValue != null) {
mergeBuffer.setLength(0);
int lastIndex = 0;
int newIndex = 0;
// the new length is a random value between old and new.
int newLenLimit = (int) pick(target.random, lastValue.length(), newValue.length());
while (mergeBuffer.length() < newLenLimit
&& newIndex < newValue.length()
&& lastIndex < lastValue.length()) {
int c = choice.toIndex(target.nextDouble());
if (c == COPY_NEW || c == COPY_BOTH || c == SKIP) {
newIndex = getChar(newValue, newIndex, mergeBuffer, c < LEAST_SKIP);
if (mergeBuffer.length() >= newLenLimit) break;
}
if (c == COPY_LAST || c == COPY_BOTH || c == SKIP) {
lastIndex = getChar(lastValue, lastIndex, mergeBuffer, c < LEAST_SKIP);
}
}
newValue = mergeBuffer.toString();
}
lastValue = newValue;
target.append(newValue);
if (DEBUG) System.out.println("Result: " + newValue);
}
public String getInternal(int depth, Set alreadySeen) {
String result = checkName(name, alreadySeen);
if (result.startsWith("$")) return result;
return indent(depth) + result + "MORPH("
+ item.getInternal(depth+1, alreadySeen)
+ ")";
}
/* (non-Javadoc)
* @see Pick#match(java.lang.String, Pick.Position)
*/
public boolean match(String input, Position p) {
// TODO Auto-generated method stub
return false;
}
}
/* Add character if we can
*/
static int getChar(String newValue, int newIndex, StringBuffer mergeBuffer, boolean copy) {
if (newIndex >= newValue.length()) return newIndex;
int cp = UTF16.charAt(newValue,newIndex);
if (copy) UTF16.append(mergeBuffer, cp);
return newIndex + UTF16.getCharCount(cp);
}
/*
// quoted add
appendQuoted(target, addBuffer.toString(), quoteBuffer);
// fix buffers
StringBuffer swapTemp = addBuffer;
addBuffer = source;
source = swapTemp;
}
}
*/
static class Quote extends ItemPick {
Quote(Pick item) {
super(item);
}
protected void addTo(Target target) {
target.quoter.setQuoting(true);
item.addTo(target);
target.quoter.setQuoting(false);
}
public boolean match(String s, Position p) {
return false;
}
public String getInternal(int depth, Set alreadySeen) {
String result = checkName(name, alreadySeen);
if (result.startsWith("$")) return result;
return indent(depth) + result + "QUOTE(" + item.getInternal(depth+1, alreadySeen)
+ ")";
}
}
private static class Literal extends FinalPick {
public String toString() {
return name;
}
private Literal(String source) {
this.name = source;
}
protected void addTo(Target target) {
target.append(name);
}
public boolean match(String input, Position p) {
int len = name.length();
if (input.regionMatches(p.index, name, 0, len)) {
p.index += len;
return true;
}
p.setMax("literal");
return false;
}
public String getInternal(int depth, Set alreadySeen) {
return "'" + name + "'";
}
}
public static class Position {
public ArrayList failures = new ArrayList();
public int index;
public int maxInt;
public String maxType;
public void setMax(String type) {
if (index >= maxInt) {
maxType = type;
}
}
public String toString() {
return "index; " + index
+ ", maxInt:" + maxInt
+ ", maxType: " + maxType;
}
/*private static final Object BAD = new Object();
private static final Object GOOD = new Object();*/
public boolean isFailure(Pick pick, int item) {
ArrayList val = (ArrayList)failures.get(index);
if (val == null) return false;
Set set = (Set)val.get(item);
if (set == null) return false;
return !set.contains(pick);
}
public void setFailure(Pick pick, int item) {
ArrayList val = (ArrayList)failures.get(index);
if (val == null) {
val = new ArrayList();
failures.set(index, val);
}
Set set = (Set)val.get(item);
if (set == null) {
set = new HashSet();
val.set(item, set);
}
set.add(pick);
}
}
/*
public static final Pick NOTHING = new Nothing();
private static class Nothing extends FinalPick {
protected void addTo(Target target) {}
protected boolean match(String input, Position p) {
return true;
}
public String getInternal(int depth, Set alreadySeen) {
return indent(depth) + "\u00F8";
}
}
*/
// intermediates
abstract static class Visitor {
Set already = new HashSet();
// Note: each visitor should return the Pick that will replace a (or a itself)
abstract Pick handle(Pick a);
boolean alreadyEntered(Pick item) {
boolean result = already.contains(item);
already.add(item);
return result;
}
void reset() {
already.clear();
}
}
protected abstract Pick visit(Visitor visitor);
static class Replacer extends Visitor {
String toReplace;
Pick replacement;
Replacer(String toReplace, Pick replacement) {
this.toReplace = toReplace;
this.replacement = replacement;
}
public Pick handle(Pick a) {
if (toReplace.equals(a.name)) {
a = replacement;
}
return a;
}
}
abstract private static class FinalPick extends Pick {
public Pick visit(Visitor visitor) {
return visitor.handle(this);
}
}
private abstract static class ItemPick extends Pick {
protected Pick item;
ItemPick (Pick item) {
this.item = item;
}
public Pick visit(Visitor visitor) {
Pick result = visitor.handle(this);
if (visitor.alreadyEntered(this)) return result;
if (item != null) item = item.visit(visitor);
return result;
}
}
private abstract static class ListPick extends Pick {
protected Pick[] items = new Pick[0];
Pick simplify() {
if (items.length > 1) return this;
if (items.length == 1) return items[0];
return null;
}
int size() {
return items.length;
}
Pick getLast() {
return items[items.length-1];
}
void setLast(Pick newOne) {
items[items.length-1] = newOne;
}
protected void addInternal(Pick[] objs) {
int lastLen = items.length;
items = realloc(items, items.length + objs.length);
for (int i = 0; i < objs.length; ++i) {
items[lastLen + i] = objs[i];
}
}
public Pick visit(Visitor visitor) {
Pick result = visitor.handle(this);
if (visitor.alreadyEntered(this)) return result;
for (int i = 0; i < items.length; ++i) {
items[i] = items[i].visit(visitor);
}
return result;
}
}
/**
* Simple class to distribute a number between 0 (inclusive) and 1 (exclusive) among
* a number of indices, where each index is weighted.
* Item weights may be zero, but cannot be negative.
* @author Davis
*/
// As in other case, we use an array for runtime speed; don't care about buildspeed.
public static class WeightedIndex {
private int[] weights = new int[0];
private int minCount = 0;
private double total;
public WeightedIndex(int minCount) {
this.minCount = minCount;
}
public WeightedIndex add(int count, int itemWeights) {
if (count > 0) {
int[] newWeights = new int[count];
if (itemWeights < 1) itemWeights = 1;
Arrays.fill(newWeights, 0, count, itemWeights);
add(1, newWeights);
}
return this; // for chaining
}
public WeightedIndex add(int[] newWeights) {
return add(newWeights.length, newWeights);
}
public WeightedIndex add(int maxCount, int[] newWeights) {
if (newWeights == null) newWeights = new int[]{1};
int oldLen = weights.length;
if (maxCount < newWeights.length) maxCount = newWeights.length;
weights = (int[]) realloc(weights, weights.length + maxCount);
System.arraycopy(newWeights, 0, weights, oldLen, newWeights.length);
int lastWeight = weights[oldLen + newWeights.length-1];
for (int i = oldLen + newWeights.length; i < maxCount; ++i) {
weights[i] = lastWeight;
}
total = 0;
for (int i = 0; i < weights.length; ++i) {
if (weights[i] < 0) {
throw new RuntimeException("only positive weights: " + i);
}
total += weights[i];
}
return this; // for chaining
}
// TODO, make this more efficient
public int toIndex(double zeroToOne) {
double weight = zeroToOne*total;
int i;
for (i = 0; i < weights.length; ++i) {
weight -= weights[i];
if (weight <= 0) break;
}
return i + minCount;
}
public String toString() {
String result = "";
for (int i = 0; i < minCount; ++i) {
if (result.length() != 0) result += ",";
result += "0";
}
for (int i = 0; i < weights.length; ++i) {
if (result.length() != 0) result += ",";
result += weights[i];
}
return result;
}
}
/*
private static Pick convert(Object obj) {
if (obj instanceof Pick) return (Pick)obj;
return new Literal(obj.toString(), false);
}
*/
// Useful statics
static public int pick(Random random, int start, int end) {
return start + (int)(random.nextDouble() * (end + 1 - start));
}
static public double pick(Random random, double start, double end) {
return start + (random.nextDouble() * (end + 1 - start));
}
static public boolean pick(Random random, double percent) {
return random.nextDouble() <= percent;
}
static public int pick(Random random, UnicodeSet s) {
return s.charAt(pick(random, 0,s.size()-1));
}
static public String pick(Random random, String[] source) {
return source[pick(random, 0, source.length-1)];
}
// these utilities really ought to be in Java
public static double[] realloc(double[] source, int newSize) {
double[] temp = new double[newSize];
if (newSize > source.length) newSize = source.length;
if (newSize != 0) System.arraycopy(source,0,temp,0,newSize);
return temp;
}
public static int[] realloc(int[] source, int newSize) {
int[] temp = new int[newSize];
if (newSize > source.length) newSize = source.length;
if (newSize != 0) System.arraycopy(source,0,temp,0,newSize);
return temp;
}
public static Pick[] realloc(Pick[] source, int newSize) {
Pick[] temp = new Pick[newSize];
if (newSize > source.length) newSize = source.length;
if (newSize != 0) System.arraycopy(source,0,temp,0,newSize);
return temp;
}
// test utilities
/*private static void append(StringBuffer target, String toAdd, StringBuffer quoteBuffer) {
Utility.appendToRule(target, (int)-1, true, false, quoteBuffer); // close previous quote
if (DEBUG) System.out.println("\"" + toAdd + "\"");
target.append(toAdd);
}
private static void appendQuoted(StringBuffer target, String toAdd, StringBuffer quoteBuffer) {
if (DEBUG) System.out.println("\"" + toAdd + "\"");
Utility.appendToRule(target, toAdd, false, false, quoteBuffer);
}*/
/*
public static abstract class MatchHandler {
public abstract void handleString(String source, int start, int limit);
public abstract void handleSequence(String source, int start, int limit);
public abstract void handleAlternation(String source, int start, int limit);
}
*/
/*
// redistributes random value
// values are still between 0 and 1, but with a different distribution
public interface Spread {
public double spread(double value);
}
// give the weight for the high end.
// values are linearly scaled according to the weight.
static public class SimpleSpread implements Spread {
static final Spread FLAT = new SimpleSpread(1.0);
boolean flat = false;
double aa, bb, cc;
public SimpleSpread(double maxWeight) {
if (maxWeight > 0.999 && maxWeight < 1.001) {
flat = true;
} else {
double q = (maxWeight - 1.0);
aa = -1/q;
bb = 1/(q*q);
cc = (2.0+q)/q;
}
}
public double spread(double value) {
if (flat) return value;
value = aa + Math.sqrt(bb + cc*value);
if (value < 0.0) return 0.0; // catch math gorp
if (value >= 1.0) return 1.0;
return value;
}
}
static public int pick(Spread spread, Random random, int start, int end) {
return start + (int)(spread.spread(random.nextDouble()) * (end + 1 - start));
}
*/
}

View file

@ -1,65 +0,0 @@
/*
*******************************************************************************
* Copyright (C) 2002-2012, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*/
package com.ibm.icu.dev.util;
import com.ibm.icu.impl.Utility;
import com.ibm.icu.text.UTF16;
public abstract class Quoter {
private static boolean DEBUG = false;
protected boolean quoting = false;
protected StringBuffer output = new StringBuffer();
public void setQuoting(boolean value) {
quoting = value;
}
public boolean isQuoting() {
return quoting;
}
public void clear() {
quoting = false;
output.setLength(0);
}
public int length() {
return output.length();
}
public Quoter append(String string) {
output.append(string);
return this;
}
public Quoter append(int codepoint) {
return append(UTF16.valueOf(codepoint));
}
// warning, allows access to internals
public String toString() {
setQuoting(false); // finish quoting
return output.toString();
}
/**
* Implements standard ICU rule quoting
*/
public static class RuleQuoter extends Quoter {
private StringBuffer quoteBuffer = new StringBuffer();
public void setQuoting(boolean value) {
if (quoting == value) return;
if (quoting) { // stop quoting
Utility.appendToRule(output, (int)-1, true, false, quoteBuffer); // close previous quote
}
quoting = value;
}
public Quoter append(String s) {
if (DEBUG) System.out.println("\"" + s + "\"");
if (quoting) {
Utility.appendToRule(output, s, false, false, quoteBuffer);
} else {
output.append(s);
}
return this;
}
}
}

View file

@ -1,244 +0,0 @@
/*
*******************************************************************************
* Copyright (C) 2002-2012, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*/
package com.ibm.icu.dev.test.util;
import java.util.Random;
import com.ibm.icu.dev.util.BNF;
import com.ibm.icu.dev.util.Pick;
import com.ibm.icu.dev.util.Quoter;
import com.ibm.icu.dev.util.Tokenizer;
import com.ibm.icu.text.UnicodeSet;
public class TestBNF {
static final String[] testRules = {
"$root = [ab]{3};",
"$root = [ab]{3,};",
"$root = [ab]{3,5};",
"$root = [ab]*;",
"$root = [ab]?;",
"$root = [ab]+;",
"$us = [a-z];" +
"$root = [0-9$us];",
"$root = a $foo b? 25% end 30% | $foo 50%;\r\n" +
"$foo = c{1,5} 20%;",
"$root = [a-z]{1,5}~;",
"$root = [a-z]{5}~;",
"$root = '\\' (u | U0010 | U000 $hex) $hex{4} ;\r\n" +
"$hex = [0-9A-Fa-f];",
};
static String unicodeSetBNF = "" +
"$root = $leaf | '[' $s $root2 $s ']' ;\r\n" +
"$root2 = $leaf | '[' $s $root3 $s ']' | ($root3 $s ($op $root3 $s){0,3}) ;\r\n" +
"$root3 = $leaf | '[' $s $root4 $s ']' | ($root4 $s ($op $root4 $s){0,3}) ;\r\n" +
"$root4 = $leaf | ($leaf $s ($op $leaf $s){0,3}) ;\r\n" +
"$op = (('&' | '-') $s)? 70%;" +
"$leaf = '[' $s $list $s ']' | $prop;\r\n" +
"$list = ($char $s ('-' $s $char $s)? 30%){1,5} ;\r\n" +
"$prop = '\\' (p | P) '{' $s $propName $s '}' | '[:' '^'? $s $propName $s ':]';\r\n" +
"$needsQuote = [\\-\\][:whitespace:][:control:]] ;\r\n" +
"$char = [[\\u0000-\\U00010FFFF]-$needsQuote] | $quoted ;\r\n" +
"$quoted = '\\' ('u' | 'U0010' | 'U000' $hex) $hex{4} ;\r\n" +
"$hex = [0-9A-Fa-f];\r\n" +
"$s = ' '? 20%;\r\n" +
"$propName = (whitespace | ws) | (uppercase | uc) | (lowercase | lc) | $category;\r\n" +
"$category = ((general | gc) $s '=' $s)? $catvalue;\r\n" +
"$catvalue = (C | Other | Cc | Control | Cf | Format | Cn | Unassigned | L | Letter);\r\n";
public static void main (String[] args) {
testTokenizer();
for (int i = 0; i < testRules.length; ++i) {
testBNF(testRules[i], null, 20);
}
testBNF(unicodeSetBNF, null, 20);
//testParser();
}
static void testBNF(String rules, UnicodeSet chars, int count) {
BNF bnf = new BNF(new Random(0), new Quoter.RuleQuoter())
.addSet("$chars", chars)
.addRules(rules)
.complete();
System.out.println("====================================");
System.out.println("BNF");
System.out.println(rules);
System.out.println(bnf.getInternal());
for (int i = 0; i < count; ++i) {
System.out.println(i + ": " + bnf.next());
}
}
/*
public static testManual() {
Pick p = Pick.maybe(75,Pick.unquoted("a"));
testOr(p, 1);
p = Pick.or(new String[]{"", "a", "bb", "ccc"});
testOr(p, 3);
p = Pick.repeat(3, 5, new int[]{20, 30, 20}, "a");
testOr(p, 5);
p = Pick.codePoint("[a-ce]");
testCodePoints(p);
p = Pick.codePoint("[a-ce]");
testCodePoints(p);
p = Pick.string(2, 8, p);
testOr(p,10);
p = Pick.or(new String[]{"", "a", "bb", "ccc"});
p = Pick.and(p).and2(p).and2("&");
testMatch(p, "abb&");
testMatch(p, "bba");
// testEnglish();
}
*/
static void testMatch(Pick p, String source) {
Pick.Position pp = new Pick.Position();
boolean value = p.match(source, pp);
System.out.println("Match: " + value + ", " + pp);
}
/*
static void testParser() {
try {
Pick.Target target = new Pick.Target();
for (int i = 0; i < rules.length; ++i) {
target.addRule(rules[i]);
}
} catch (ParseException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
*/
static class Counts {
int[] counts;
Counts(int max) {
counts = new int[max+1];
}
void inc(int index) {
counts[index]++;
}
void show() {
System.out.println("Printing Counts");
for (int i = 0; i < counts.length; ++i) {
if (counts[i] == 0) continue;
System.out.println(i + ": " + counts[i]);
}
System.out.println();
}
}
/* static final String[] rules = {
"$s = ' ';",
"$noun = dog | house | government | wall | street | zebra;",
"$adjective = red | glorious | simple | nasty | heavy | clean;",
"$article = quickly | oddly | silently | boldly;",
"$adjectivePhrase = ($adverb $s)? 50% $adjective* 0% 30% 20% 10%;",
"$nounPhrase = $articles $s ($adjectivePhrase $s)? 30% $noun;",
"$verb = goes | fishes | walks | sleeps;",
"$tverb = carries | lifts | overturns | hits | jumps on;",
"$copula = is 30% | seems 10%;",
"$sentence1 = $nounPhrase $s $verb $s ($s $adverb)? 30%;",
"$sentence2 = $nounPhrase $s $tverb $s $nounPhrase ($s $adverb)? 30%;",
"$sentence3 = $nounPhrase $s $copula $s $adjectivePhrase;",
"$conj = but | and | or;",
"$sentence4 = $sentence1 | $sentence2 | $sentence3 20% | $sentence4 $conj $sentence4 20%;",
"$sentence = $sentence4 '.';"};
*/
/*
private static void testEnglish() {
Pick s = Pick.unquoted(" ");
Pick verbs = Pick.or(new String[]{"goes", "fishes", "walks", "sleeps"});
Pick transitive = Pick.or(new String[]{"carries", "lifts", "overturns", "hits", "jumps on"});
Pick nouns = Pick.or(new String[]{"dog", "house", "government", "wall", "street", "zebra"});
Pick adjectives = Pick.or(new String[]{"red", "glorious", "simple", "nasty", "heavy", "clean"});
Pick articles = Pick.or(new String[]{"the", "a"});
Pick adverbs = Pick.or(new String[]{"quickly", "oddly", "silently", "boldly"});
Pick adjectivePhrase = Pick.and(0.5, Pick.and(adverbs).and2(s)).and2(adjectives);
Pick nounPhrase = Pick.and(articles).and2(s)
.and2(0.3, Pick.and(adjectivePhrase).and2(s))
.and2(nouns);
Pick copula = Pick.or(new String[]{"is", "seems"});
Pick sentence1 = Pick.and(nounPhrase).and2(s).and2(verbs)
.and2(0.3, Pick.and(s).and2(adverbs)).name("s1");
Pick sentence2 = Pick.and(nounPhrase).and2(s).and2(transitive).and2(s).and2(nounPhrase)
.and2(0.3, Pick.and(s).and2(adverbs)).name("s2");
Pick sentence3 = Pick.and(nounPhrase).and2(s).and2(copula).and2(s).and2(adjectivePhrase).name("s3");
Pick conj = Pick.or(new String[]{", but", ", and", ", or"});
Pick forward = Pick.unquoted("forward");
Pick pair = Pick.and(forward).and2(conj).and2(s).and2(forward).name("part");
Pick sentenceBase = Pick.or(sentence1).or2(sentence2).or2(sentence3).or2(0.6666, pair).name("sentence");
sentenceBase.replace(forward, sentenceBase);
Pick sentence = Pick.and(sentenceBase).and2(Pick.unquoted("."));
Pick.Target target = Pick.Target.make(sentence);
for (int i = 0; i < 50; ++i) {
System.out.println(i + ": " + target.next());
}
}
private static void testOr(Pick p, int count) {
Pick.Target target = Pick.Target.make(p);
Counts counts = new Counts(count + 10);
for (int i = 0; i < 1000; ++i) {
String s = target.next();
counts.inc(s.length());
}
counts.show();
}
private static void testCodePoints(Pick p) {
Pick.Target target = Pick.Target.make(p);
Counts counts = new Counts(128);
for (int i = 0; i < 10000; ++i) {
String s = target.next();
counts.inc(s.charAt(0));
}
counts.show();
}
*/
public static void printRandoms() {
BNF bnf = new BNF(new Random(0), new Quoter.RuleQuoter())
.addRules("[a-z]{2,5}").complete();
System.out.println("Start");
for (int i = 0; i < 100; ++i) {
String temp = bnf.next();
System.out.println(i + ")\t" + temp);
}
}
public static void testTokenizer() {
Tokenizer t = new Tokenizer();
String[] samples = {"a'b'c d #abc\r e", "'a '123 321",
"\\\\", "a'b", "a'", "abc def%?ghi", "%", "a", "\\ a", "a''''b"};
for (int i = 0; i < samples.length; ++i) {
t.setSource(samples[i]);
System.out.println();
System.out.println("Input: " + t.getSource());
int type = 0;
while (type != Tokenizer.DONE) {
type = t.next();
System.out.println(t.toString(type, false));
}
}
}
}

View file

@ -1,329 +0,0 @@
/*
*******************************************************************************
* Copyright (C) 2002-2012, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*/
package com.ibm.icu.dev.util;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.Random;
import java.util.Set;
import com.ibm.icu.text.UnicodeSet;
public class BNF {
private Map map = new HashMap();
private Set variables = new HashSet();
private Pick pick = null;
private Pick.Target target = null;
private Tokenizer t;
private Quoter quoter;
private Random random;
public String next() {
return target.next();
}
public String getInternal() {
return pick.getInternal(0, new HashSet());
}
/*
+ "weight = integer '%';"
+ "range = '{' integer (',' integer?)? '}' weight*;"
+ "quote = '@';"
+ "star = '*' weight*;"
+ "plus = '+' weight*;"
+ "maybe = '?' weight?;"
+ "quantifier = range | star | maybe | plus;"
+ "core = string | unicodeSet | '(' alternation ')';"
+ "sequence = (core quantifier*)+;"
+ "alternation = sequence (weight? ('|' sequence weight?)+)?;"
+ "rule = string '=' alternation;";
* Match 0 or more times
+ Match 1 or more times
? Match 1 or 0 times
{n} Match exactly n times
{n,} Match at least n times
{n,m} Match at least n but not more than m times
*/
public BNF(Random random, Quoter quoter) {
this.random = random;
this.quoter = quoter;
t = new Tokenizer();
}
public BNF addRules(String rules) {
t.setSource(rules);
while (addRule()) {
}
return this; // for chaining
}
public BNF complete() {
// check that the rules match the variables, except for $root in rules
Set ruleSet = map.keySet();
// add also
variables.add("$root");
variables.addAll(t.getLookedUpItems());
if (!ruleSet.equals(variables)) {
String msg = showDiff(variables, ruleSet);
if (msg.length() != 0) msg = "Error: Missing definitions for: " + msg;
String temp = showDiff(ruleSet, variables);
if (temp.length() != 0) temp = "Warning: Defined but not used: " + temp;
if (msg.length() == 0) msg = temp;
else if (temp.length() != 0) {
msg = msg + "; " + temp;
}
error(msg);
}
if (!ruleSet.equals(variables)) {
String msg = showDiff(variables, ruleSet);
if (msg.length() != 0) msg = "Missing definitions for: " + msg;
String temp = showDiff(ruleSet, variables);
if (temp.length() != 0) temp = "Defined but not used: " + temp;
if (msg.length() == 0) msg = temp;
else if (temp.length() != 0) {
msg = msg + "; " + temp;
}
error(msg);
}
// replace variables by definitions
Iterator it = ruleSet.iterator();
while (it.hasNext()) {
String key = (String) it.next();
Pick expression = (Pick) map.get(key);
Iterator it2 = ruleSet.iterator();
if (false && key.equals("$crlf")) {
System.out.println("debug") ;
}
while (it2.hasNext()) {
Object key2 = it2.next();
if (key.equals(key2)) continue;
Pick expression2 = (Pick) map.get(key2);
expression2.replace(key, expression);
}
}
pick = (Pick) map.get("$root");
target = Pick.Target.make(pick, random, quoter);
// TODO remove temp collections
return this;
}
String showDiff(Set a, Set b) {
Set temp = new HashSet();
temp.addAll(a);
temp.removeAll(b);
if (temp.size() == 0) return "";
StringBuffer buffer = new StringBuffer();
Iterator it = temp.iterator();
while (it.hasNext()) {
if (buffer.length() != 0) buffer.append(", ");
buffer.append(it.next().toString());
}
return buffer.toString();
}
void error(String msg) {
throw new IllegalArgumentException(msg
+ "\r\n" + t.toString());
}
private boolean addRule() {
int type = t.next();
if (type == Tokenizer.DONE) return false;
if (type != Tokenizer.STRING) error("missing weight");
String s = t.getString();
if (s.length() == 0 || s.charAt(0) != '$') error("missing $ in variable");
if (t.next() != '=') error("missing =");
int startBody = t.index;
Pick rule = getAlternation();
if (rule == null) error("missing expression");
t.addSymbol(s, t.getSource(), startBody, t.index);
if (t.next() != ';') error("missing ;");
return addPick(s, rule);
}
protected boolean addPick(String s, Pick rule) {
Object temp = map.get(s);
if (temp != null) error("duplicate variable");
if (rule.name == null) rule.name(s);
map.put(s, rule);
return true;
}
public BNF addSet(String variable, UnicodeSet set) {
if (set != null) {
String body = set.toString();
t.addSymbol(variable, body, 0, body.length());
addPick(variable, Pick.codePoint(set));
}
return this;
}
int maxRepeat = 99;
Pick qualify(Pick item) {
int[] weights;
int type = t.next();
switch(type) {
case '@':
return new Pick.Quote(item);
case '~':
return new Pick.Morph(item);
case '?':
int weight = getWeight();
if (weight == NO_WEIGHT) weight = 50;
weights = new int[] {100-weight, weight};
return Pick.repeat(0, 1, weights, item);
case '*':
weights = getWeights();
return Pick.repeat(1, maxRepeat, weights, item);
case '+':
weights = getWeights();
return Pick.repeat(1, maxRepeat, weights, item);
case '{':
if (t.next() != Tokenizer.NUMBER) error("missing number");
int start = (int) t.getNumber();
int end = start;
type = t.next();
if (type == ',') {
end = maxRepeat;
type = t.next();
if (type == Tokenizer.NUMBER) {
end = (int)t.getNumber();
type = t.next();
}
}
if (type != '}') error("missing }");
weights = getWeights();
return Pick.repeat(start, end, weights, item);
}
t.backup();
return item;
}
Pick getCore() {
int token = t.next();
if (token == Tokenizer.STRING) {
String s = t.getString();
if (s.charAt(0) == '$') variables.add(s);
return Pick.string(s);
}
if (token == Tokenizer.UNICODESET) {
return Pick.codePoint(t.getUnicodeSet());
}
if (token != '(') {
t.backup();
return null;
}
Pick temp = getAlternation();
token = t.next();
if (token != ')') error("missing )");
return temp;
}
Pick getSequence() {
Pick.Sequence result = null;
Pick last = null;
while (true) {
Pick item = getCore();
if (item == null) {
if (result != null) return result;
if (last != null) return last;
error("missing item");
}
// qualify it as many times as possible
Pick oldItem;
do {
oldItem = item;
item = qualify(item);
} while (item != oldItem);
// add it in
if (last == null) {
last = item;
} else {
if (result == null) result = Pick.makeSequence().and2(last);
result = result.and2(item);
}
}
}
// for simplicity, we just use recursive descent
Pick getAlternation() {
Pick.Alternation result = null;
Pick last = null;
int lastWeight = NO_WEIGHT;
while (true) {
Pick temp = getSequence();
if (temp == null) error("empty alternation");
int weight = getWeight();
if (weight == NO_WEIGHT) weight = 1;
if (last == null) {
last = temp;
lastWeight = weight;
} else {
if (result == null) result = Pick.makeAlternation().or2(lastWeight, last);
result = result.or2(weight, temp);
}
int token = t.next();
if (token != '|') {
t.backup();
if (result != null) return result;
if (last != null) return last;
}
}
}
private static final int NO_WEIGHT = Integer.MIN_VALUE;
int getWeight() {
int weight;
int token = t.next();
if (token != Tokenizer.NUMBER) {
t.backup();
return NO_WEIGHT;
}
weight = (int)t.getNumber();
token = t.next();
if (token != '%') error("missing %");
return weight;
}
int[] getWeights() {
ArrayList list = new ArrayList();
while (true) {
int weight = getWeight();
if (weight == NO_WEIGHT) break;
list.add(new Integer(weight));
}
if (list.size() == 0) return null;
int[] result = new int[list.size()];
for (int i = 0; i < list.size(); ++i) {
result[i] = ((Integer)list.get(i)).intValue();
}
return result;
}
public int getMaxRepeat() {
return maxRepeat;
}
public BNF setMaxRepeat(int maxRepeat) {
this.maxRepeat = maxRepeat;
return this;
}
}

View file

@ -1,320 +0,0 @@
/*
*******************************************************************************
* Copyright (C) 2002-2012, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*/
package com.ibm.icu.dev.util;
import java.text.ParsePosition;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import com.ibm.icu.lang.UCharacter;
import com.ibm.icu.text.SymbolTable;
import com.ibm.icu.text.UTF16;
import com.ibm.icu.text.UnicodeMatcher;
import com.ibm.icu.text.UnicodeSet;
public class Tokenizer {
protected String source;
protected StringBuffer buffer = new StringBuffer();
protected long number;
protected UnicodeSet unicodeSet = null;
protected int index;
boolean backedup = false;
protected int lastIndex = -1;
protected int nextIndex;
int lastValue = BACKEDUP_TOO_FAR;
TokenSymbolTable symbolTable = new TokenSymbolTable();
private static final char
QUOTE = '\'',
BSLASH = '\\';
private static final UnicodeSet QUOTERS = new UnicodeSet().add(QUOTE).add(BSLASH);
private static final UnicodeSet WHITESPACE = new UnicodeSet("[" +
"\\u0009-\\u000D\\u0020\\u0085\\u200E\\u200F\\u2028\\u2029" +
"]");
private static final UnicodeSet SYNTAX = new UnicodeSet("[" +
"\\u0021-\\u002F\\u003A-\\u0040\\u005B-\\u0060\\u007B-\\u007E" +
"\\u00A1-\\u00A7\\u00A9\\u00AB-\\u00AC\\u00AE" +
"\\u00B0-\\u00B1\\u00B6\\u00B7\\u00BB\\u00BF\\u00D7\\u00F7" +
"\\u2010-\\u2027\\u2030-\\u205E\\u2190-\\u2BFF" +
"\\u3001\\u3003\\u3008-\\u3020\\u3030" +
"\\uFD3E\\uFD3F\\uFE45\\uFE46" +
"]").removeAll(QUOTERS).remove('$');
private static final UnicodeSet NEWLINE = new UnicodeSet("[\\u000A\\u000D\\u0085\\u2028\\u2029]");
//private static final UnicodeSet DECIMAL = new UnicodeSet("[:Nd:]");
private static final UnicodeSet NON_STRING = new UnicodeSet()
.addAll(WHITESPACE)
.addAll(SYNTAX);
protected UnicodeSet whiteSpace = WHITESPACE;
protected UnicodeSet syntax = SYNTAX;
private UnicodeSet non_string = NON_STRING;
private void fixSets() {
if (syntax.containsSome(QUOTERS) || syntax.containsSome(whiteSpace)) {
syntax = ((UnicodeSet)syntax.clone()).removeAll(QUOTERS).removeAll(whiteSpace);
}
if (whiteSpace.containsSome(QUOTERS)) {
whiteSpace = ((UnicodeSet)whiteSpace.clone()).removeAll(QUOTERS);
}
non_string = new UnicodeSet(syntax)
.addAll(whiteSpace);
}
public Tokenizer setSource(String source) {
this.source = source;
this.index = 0;
return this; // for chaining
}
public Tokenizer setIndex(int index) {
this.index = index;
return this; // for chaining
}
public static final int
DONE = -1,
NUMBER = -2,
STRING = -3,
UNICODESET = -4,
UNTERMINATED_QUOTE = -5,
BACKEDUP_TOO_FAR = -6;
private static final int
//FIRST = 0,
//IN_NUMBER = 1,
//IN_SPACE = 2,
AFTER_QUOTE = 3, // warning: order is important for switch statement
IN_STRING = 4,
AFTER_BSLASH = 5,
IN_QUOTE = 6;
public String toString(int type, boolean backedupBefore) {
String s = backedup ? "@" : "*";
switch(type) {
case DONE:
return s+"Done"+s;
case BACKEDUP_TOO_FAR:
return s+"Illegal Backup"+s;
case UNTERMINATED_QUOTE:
return s+"Unterminated Quote=" + getString() + s;
case STRING:
return s+"s=" + getString() + s;
case NUMBER:
return s+"n=" + getNumber() + s;
case UNICODESET:
return s+"n=" + getUnicodeSet() + s;
default:
return s+"c=" + usf.getName(type,true) + s;
}
}
private static final BagFormatter usf = new BagFormatter();
public void backup() {
if (backedup) throw new IllegalArgumentException("backup too far");
backedup = true;
nextIndex = index;
index = lastIndex;
}
/*
public int next2() {
boolean backedupBefore = backedup;
int result = next();
System.out.println(toString(result, backedupBefore));
return result;
}
*/
public int next() {
if (backedup) {
backedup = false;
index = nextIndex;
return lastValue;
}
int cp = 0;
boolean inComment = false;
// clean off any leading whitespace or comments
while (true) {
if (index >= source.length()) return lastValue = DONE;
cp = nextChar();
if (inComment) {
if (NEWLINE.contains(cp)) inComment = false;
} else {
if (cp == '#') inComment = true;
else if (!whiteSpace.contains(cp)) break;
}
}
// record the last index in case we have to backup
lastIndex = index;
if (cp == '[') {
ParsePosition pos = new ParsePosition(index-1);
unicodeSet = new UnicodeSet(source,pos,symbolTable);
index = pos.getIndex();
return lastValue = UNICODESET;
}
// get syntax character
if (syntax.contains(cp)) return lastValue = cp;
// get number, if there is one
if (UCharacter.getType(cp) == Character.DECIMAL_DIGIT_NUMBER) {
number = UCharacter.getNumericValue(cp);
while (index < source.length()) {
cp = nextChar();
if (UCharacter.getType(cp) != Character.DECIMAL_DIGIT_NUMBER) {
index -= UTF16.getCharCount(cp); // BACKUP!
break;
}
number *= 10;
number += UCharacter.getNumericValue(cp);
}
return lastValue = NUMBER;
}
buffer.setLength(0);
int status = IN_STRING;
main:
while (true) {
switch (status) {
case AFTER_QUOTE: // check for double ''?
if (cp == QUOTE) {
UTF16.append(buffer, QUOTE);
status = IN_QUOTE;
break;
}
// OTHERWISE FALL THROUGH!!!
case IN_STRING:
if (cp == QUOTE) status = IN_QUOTE;
else if (cp == BSLASH) status = AFTER_BSLASH;
else if (non_string.contains(cp)) {
index -= UTF16.getCharCount(cp); // BACKUP!
break main;
} else UTF16.append(buffer,cp);
break;
case IN_QUOTE:
if (cp == QUOTE) status = AFTER_QUOTE;
else UTF16.append(buffer,cp);
break;
case AFTER_BSLASH:
switch(cp) {
case 'n': cp = '\n'; break;
case 'r': cp = '\r'; break;
case 't': cp = '\t'; break;
}
UTF16.append(buffer,cp);
status = IN_STRING;
break;
default: throw new IllegalArgumentException("Internal Error");
}
if (index >= source.length()) break;
cp = nextChar();
}
if (status > IN_STRING) return lastValue = UNTERMINATED_QUOTE;
return lastValue = STRING;
}
public String getString() {
return buffer.toString();
}
public String toString() {
return source.substring(0,index) + "$$$" + source.substring(index);
}
public long getNumber() {
return number;
}
public UnicodeSet getUnicodeSet() {
return unicodeSet;
}
private int nextChar() {
int cp = UTF16.charAt(source,index);
index += UTF16.getCharCount(cp);
return cp;
}
public int getIndex() {
return index;
}
public String getSource() {
return source;
}
public UnicodeSet getSyntax() {
return syntax;
}
public UnicodeSet getWhiteSpace() {
return whiteSpace;
}
public void setSyntax(UnicodeSet set) {
syntax = set;
fixSets();
}
public void setWhiteSpace(UnicodeSet set) {
whiteSpace = set;
fixSets();
}
public Set getLookedUpItems() {
return symbolTable.itemsLookedUp;
}
public void addSymbol(String var, String value, int start, int limit) {
// the limit is after the ';', so remove it
--limit;
char[] body = new char[limit - start];
value.getChars(start, limit, body, 0);
symbolTable.add(var, body);
}
public class TokenSymbolTable implements SymbolTable {
Map contents = new HashMap();
Set itemsLookedUp = new HashSet();
public void add(String var, char[] body) {
// start from 1 to avoid the $
contents.put(var.substring(1), body);
}
/* (non-Javadoc)
* @see com.ibm.icu.text.SymbolTable#lookup(java.lang.String)
*/
public char[] lookup(String s) {
itemsLookedUp.add('$' + s);
return (char[])contents.get(s);
}
/* (non-Javadoc)
* @see com.ibm.icu.text.SymbolTable#lookupMatcher(int)
*/
public UnicodeMatcher lookupMatcher(int ch) {
// TODO Auto-generated method stub
return null;
}
/* (non-Javadoc)
* @see com.ibm.icu.text.SymbolTable#parseReference(java.lang.String, java.text.ParsePosition, int)
*/
public String parseReference(String text, ParsePosition pos, int limit) {
int cp;
int start = pos.getIndex();
int i;
for (i = start; i < limit; i += UTF16.getCharCount(cp)) {
cp = UTF16.charAt(text, i);
if (!com.ibm.icu.lang.UCharacter.isUnicodeIdentifierPart(cp)) {
break;
}
}
pos.setIndex(i);
return text.substring(start,i);
}
}
}