ICU-5410 Add Clover:off

X-SVN-Rev: 20493
This commit is contained in:
George Rhoten 2006-10-05 20:30:05 +00:00
parent 348c2eb1ff
commit e9cbf49aaa
4 changed files with 1571 additions and 1594 deletions

View file

@ -39,7 +39,7 @@ class RBBINode {
static final int opLParen = 15;
static final int nodeTypeLimit = 16; // For Assertion checking only.
static String [] nodeTypeNames = {
static final String [] nodeTypeNames = {
"setRef",
"uset",
"varRef",
@ -56,7 +56,7 @@ class RBBINode {
"opBreak",
"opReverse",
"opLParen"
};
};
// enum OpPrecedence {
static final int precZero = 0;
@ -101,174 +101,174 @@ class RBBINode {
static int gLastSerial;
RBBINode(int t) {
Assert.assrt(t<nodeTypeLimit);
fSerialNum = ++gLastSerial;
fType = t;
Assert.assrt(t < nodeTypeLimit);
fSerialNum = ++gLastSerial;
fType = t;
fFirstPosSet = new HashSet();
fLastPosSet = new HashSet();
fFollowPos = new HashSet();
if (t==opCat) {fPrecedence = precOpCat;}
else if (t==opOr) {fPrecedence = precOpOr;}
else if (t==opStart) {fPrecedence = precStart;}
else if (t==opLParen) {fPrecedence = precLParen;}
else fPrecedence = precZero;
}
RBBINode(RBBINode other) {
fSerialNum = ++gLastSerial;
fType = other.fType;
fInputSet = other.fInputSet;
fPrecedence = other.fPrecedence;
fText = other.fText;
fFirstPos = other.fFirstPos;
fLastPos = other.fLastPos;
fNullable = other.fNullable;
fVal = other.fVal;
fFirstPosSet = new HashSet(other.fFirstPosSet);
fLastPosSet = new HashSet(other.fLastPosSet);
fFollowPos = new HashSet(other.fFollowPos);
}
//-------------------------------------------------------------------------
//
// cloneTree Make a copy of the subtree rooted at this node.
// Discard any variable references encountered along the way,
// and replace with copies of the variable's definitions.
// Used to replicate the expression underneath variable
// references in preparation for generating the DFA tables.
//
//-------------------------------------------------------------------------
RBBINode cloneTree() {
RBBINode n;
if (fType == RBBINode.varRef) {
// If the current node is a variable reference, skip over it
// and clone the definition of the variable instead.
n = fLeftChild.cloneTree();
} else if (fType == RBBINode.uset) {
n = this;
} else {
n = new RBBINode(this);
if (fLeftChild != null) {
n.fLeftChild = fLeftChild.cloneTree();
n.fLeftChild.fParent = n;
}
if (fRightChild != null) {
n.fRightChild = fRightChild.cloneTree();
n.fRightChild.fParent = n;
}
fFirstPosSet = new HashSet();
fLastPosSet = new HashSet();
fFollowPos = new HashSet();
if (t == opCat) {
fPrecedence = precOpCat;
} else if (t == opOr) {
fPrecedence = precOpOr;
} else if (t == opStart) {
fPrecedence = precStart;
} else if (t == opLParen) {
fPrecedence = precLParen;
} else {
fPrecedence = precZero;
}
return n;
}
}
RBBINode(RBBINode other) {
fSerialNum = ++gLastSerial;
fType = other.fType;
fInputSet = other.fInputSet;
fPrecedence = other.fPrecedence;
fText = other.fText;
fFirstPos = other.fFirstPos;
fLastPos = other.fLastPos;
fNullable = other.fNullable;
fVal = other.fVal;
fFirstPosSet = new HashSet(other.fFirstPosSet);
fLastPosSet = new HashSet(other.fLastPosSet);
fFollowPos = new HashSet(other.fFollowPos);
}
//-------------------------------------------------------------------------
//
// cloneTree Make a copy of the subtree rooted at this node.
// Discard any variable references encountered along the way,
// and replace with copies of the variable's definitions.
// Used to replicate the expression underneath variable
// references in preparation for generating the DFA tables.
//
//-------------------------------------------------------------------------
RBBINode cloneTree() {
RBBINode n;
if (fType == RBBINode.varRef) {
// If the current node is a variable reference, skip over it
// and clone the definition of the variable instead.
n = fLeftChild.cloneTree();
} else if (fType == RBBINode.uset) {
n = this;
} else {
n = new RBBINode(this);
if (fLeftChild != null) {
n.fLeftChild = fLeftChild.cloneTree();
n.fLeftChild.fParent = n;
}
if (fRightChild != null) {
n.fRightChild = fRightChild.cloneTree();
n.fRightChild.fParent = n;
}
}
return n;
}
//-------------------------------------------------------------------------
//
// flattenVariables Walk a parse tree, replacing any variable
// references with a copy of the variable's definition.
// Aside from variables, the tree is not changed.
//
// Return the root of the tree. If the root was not a variable
// reference, it remains unchanged - the root we started with
// is the root we return. If, however, the root was a variable
// reference, the root of the newly cloned replacement tree will
// be returned, and the original tree deleted.
//
// This function works by recursively walking the tree
// without doing anything until a variable reference is
// found, then calling cloneTree() at that point. Any
// nested references are handled by cloneTree(), not here.
//
//-------------------------------------------------------------------------
RBBINode flattenVariables() {
if (fType == varRef) {
RBBINode retNode = fLeftChild.cloneTree();
// delete this;
return retNode;
}
//-------------------------------------------------------------------------
//
// flattenVariables Walk a parse tree, replacing any variable
// references with a copy of the variable's definition.
// Aside from variables, the tree is not changed.
//
// Return the root of the tree. If the root was not a variable
// reference, it remains unchanged - the root we started with
// is the root we return. If, however, the root was a variable
// reference, the root of the newly cloned replacement tree will
// be returned, and the original tree deleted.
//
// This function works by recursively walking the tree
// without doing anything until a variable reference is
// found, then calling cloneTree() at that point. Any
// nested references are handled by cloneTree(), not here.
//
//-------------------------------------------------------------------------
RBBINode flattenVariables() {
if (fType == varRef) {
RBBINode retNode = fLeftChild.cloneTree();
// delete this;
return retNode;
}
if (fLeftChild != null) {
fLeftChild = fLeftChild.flattenVariables();
fLeftChild.fParent = this;
}
if (fRightChild != null) {
fRightChild = fRightChild.flattenVariables();
fRightChild.fParent = this;
}
return this;
}
if (fLeftChild != null) {
fLeftChild = fLeftChild.flattenVariables();
fLeftChild.fParent = this;
}
if (fRightChild != null) {
fRightChild = fRightChild.flattenVariables();
fRightChild.fParent = this;
}
return this;
}
//-------------------------------------------------------------------------
//
// flattenSets Walk the parse tree, replacing any nodes of type setRef
// with a copy of the expression tree for the set. A set's
// equivalent expression tree is precomputed and saved as
// the left child of the uset node.
//
//-------------------------------------------------------------------------
void flattenSets() {
Assert.assrt(fType != setRef);
//-------------------------------------------------------------------------
//
// flattenSets Walk the parse tree, replacing any nodes of type setRef
// with a copy of the expression tree for the set. A set's
// equivalent expression tree is precomputed and saved as
// the left child of the uset node.
//
//-------------------------------------------------------------------------
void flattenSets() {
Assert.assrt(fType != setRef);
if (fLeftChild != null) {
if (fLeftChild.fType == setRef) {
RBBINode setRefNode = fLeftChild;
RBBINode usetNode = setRefNode.fLeftChild;
RBBINode replTree = usetNode.fLeftChild;
fLeftChild = replTree.cloneTree();
fLeftChild.fParent = this;
} else {
fLeftChild.flattenSets();
}
}
if (fLeftChild != null) {
if (fLeftChild.fType==setRef) {
RBBINode setRefNode = fLeftChild;
RBBINode usetNode = setRefNode.fLeftChild;
RBBINode replTree = usetNode.fLeftChild;
fLeftChild = replTree.cloneTree();
fLeftChild.fParent = this;
} else {
fLeftChild.flattenSets();
}
}
if (fRightChild != null) {
if (fRightChild.fType == setRef) {
RBBINode setRefNode = fRightChild;
RBBINode usetNode = setRefNode.fLeftChild;
RBBINode replTree = usetNode.fLeftChild;
fRightChild = replTree.cloneTree();
fRightChild.fParent = this;
// delete setRefNode;
} else {
fRightChild.flattenSets();
}
}
}
if (fRightChild != null) {
if (fRightChild.fType==setRef) {
RBBINode setRefNode = fRightChild;
RBBINode usetNode = setRefNode.fLeftChild;
RBBINode replTree = usetNode.fLeftChild;
fRightChild = replTree.cloneTree();
fRightChild.fParent = this;
// delete setRefNode;
} else {
fRightChild.flattenSets();
}
}
}
//-------------------------------------------------------------------------
//
// findNodes() Locate all the nodes of the specified type, starting
// at the specified root.
//
//-------------------------------------------------------------------------
void findNodes(List dest, int kind) {
if (fType == kind) {
dest.add(this);
}
if (fLeftChild != null) {
fLeftChild.findNodes(dest, kind);
}
if (fRightChild != null) {
fRightChild.findNodes(dest, kind);
}
}
//-------------------------------------------------------------------------
//
// findNodes() Locate all the nodes of the specified type, starting
// at the specified root.
//
//-------------------------------------------------------------------------
void findNodes(List dest, int kind) {
if (fType == kind) {
dest.add(this);
}
if (fLeftChild != null) {
fLeftChild.findNodes(dest, kind);
}
if (fRightChild != null) {
fRightChild.findNodes(dest, kind);
}
}
//-------------------------------------------------------------------------
//
// print. Print out a single node, for debugging.
// print. Print out a single node, for debugging.
//
//-------------------------------------------------------------------------
///CLOVER:OFF
static void printNode(RBBINode n) {
if (n==null) {
@ -288,44 +288,52 @@ class RBBINode {
}
System.out.println("");
}
///CLOVER:ON
// Print a String in a fixed field size.
// Debugging function.
static void printString(String s, int minWidth)
{
for (int i=minWidth; i<0; i++) {
// negative width means pad leading spaces, not fixed width.
System.out.print(' ');
}
for (int i=s.length(); i<minWidth; i++) {
System.out.print(' ');
}
System.out.print(s);
}
//
// Print an int in a fixed size field.
// Debugging function.
//
static void printInt(int i, int minWidth) {
String s = Integer.toString(i);
printString(s, Math.max(minWidth, s.length()+1));
}
static void printHex(int i, int minWidth) {
String s = Integer.toString(i, 16);
String leadingZeroes = "00000".substring(0, Math.max(0, 5-s.length()));
s = leadingZeroes+s;
printString(s, minWidth);
}
// Debugging function.
///CLOVER:OFF
static void printString(String s, int minWidth) {
for (int i = minWidth; i < 0; i++) {
// negative width means pad leading spaces, not fixed width.
System.out.print(' ');
}
for (int i = s.length(); i < minWidth; i++) {
System.out.print(' ');
}
System.out.print(s);
}
///CLOVER:ON
// -------------------------------------------------------------------------
//
// print. Print out the tree of nodes rooted at "this"
//
// -------------------------------------------------------------------------
//
// Print an int in a fixed size field.
// Debugging function.
//
///CLOVER:OFF
static void printInt(int i, int minWidth) {
String s = Integer.toString(i);
printString(s, Math.max(minWidth, s.length() + 1));
}
///CLOVER:ON
///CLOVER:OFF
static void printHex(int i, int minWidth) {
String s = Integer.toString(i, 16);
String leadingZeroes = "00000"
.substring(0, Math.max(0, 5 - s.length()));
s = leadingZeroes + s;
printString(s, minWidth);
}
///CLOVER:ON
// -------------------------------------------------------------------------
//
// print. Print out the tree of nodes rooted at "this"
//
// -------------------------------------------------------------------------
///CLOVER:OFF
void printTree(boolean printHeading) {
if (printHeading) {
System.out.println( "-------------------------------------------------------------------");
@ -344,5 +352,6 @@ class RBBINode {
}
}
}
///CLOVER:ON
}

File diff suppressed because it is too large Load diff

View file

@ -13,8 +13,6 @@ import java.io.OutputStream;
import java.io.IOException;
import com.ibm.icu.impl.Assert;
import com.ibm.icu.impl.CharTrie;
import com.ibm.icu.impl.Trie;
import com.ibm.icu.impl.IntTrieBuilder;
//
@ -117,7 +115,7 @@ class RBBISetBuilder {
}
}
};
}
}
@ -149,29 +147,28 @@ class RBBISetBuilder {
}
//------------------------------------------------------------------------
//------------------------------------------------------------------------
//
// build Build the list of non-overlapping character ranges
// from the Unicode Sets.
//
//------------------------------------------------------------------------
void build() {
RBBINode usetNode;
RangeDescriptor rlRange;
if (fRB.fDebugEnv!=null && fRB.fDebugEnv.indexOf("usets")>=0) {printSets();}
// Initialize the process by creating a single range encompassing all characters
// that is in no sets.
//
// build Build the list of non-overlapping character ranges
// from the Unicode Sets.
fRangeList = new RangeDescriptor();
fRangeList.fStartChar = 0;
fRangeList.fEndChar = 0x10ffff;
//
// Find the set of non-overlapping ranges of characters
//
//------------------------------------------------------------------------
void build() {
RBBINode usetNode;
RangeDescriptor rlRange;
if (fRB.fDebugEnv!=null && fRB.fDebugEnv.indexOf("usets")>=0) {printSets();}
// Initialize the process by creating a single range encompassing all characters
// that is in no sets.
//
fRangeList = new RangeDescriptor();
fRangeList.fStartChar = 0;
fRangeList.fEndChar = 0x10ffff;
//
// Find the set of non-overlapping ranges of characters
//
Iterator ni = fRB.fUSetNodes.iterator();
while (ni.hasNext()) {
usetNode = (RBBINode)ni.next();
@ -189,274 +186,316 @@ class RBBISetBuilder {
int inputSetRangeEnd = inputSet.getRangeEnd(inputSetRangeIndex);
// skip over ranges from the range list that are completely
// below the current range from the input unicode set.
while (rlRange.fEndChar < inputSetRangeBegin) {
rlRange = rlRange.fNext;
}
// If the start of the range from the range list is before with
// the start of the range from the unicode set, split the range list range
// in two, with one part being before (wholly outside of) the unicode set
// and the other containing the rest.
// Then continue the loop; the post-split current range will then be skipped
// over
if (rlRange.fStartChar < inputSetRangeBegin) {
rlRange.split(inputSetRangeBegin);
continue;
}
// Same thing at the end of the ranges...
// If the end of the range from the range list doesn't coincide with
// the end of the range from the unicode set, split the range list
// range in two. The first part of the split range will be
// wholly inside the Unicode set.
if (rlRange.fEndChar > inputSetRangeEnd) {
rlRange.split(inputSetRangeEnd+1);
}
// The current rlRange is now entirely within the UnicodeSet range.
// Add this unicode set to the list of sets for this rlRange
if (rlRange.fIncludesSets.indexOf(usetNode) == -1) {
rlRange.fIncludesSets.add(usetNode);
}
// Advance over ranges that we are finished with.
if (inputSetRangeEnd == rlRange.fEndChar) {
inputSetRangeIndex++;
}
// below the current range from the input unicode set.
while (rlRange.fEndChar < inputSetRangeBegin) {
rlRange = rlRange.fNext;
}
}
if (fRB.fDebugEnv!=null && fRB.fDebugEnv.indexOf("range")>=0) { printRanges();}
//
// Group the above ranges, with each group consisting of one or more
// ranges that are in exactly the same set of original UnicodeSets.
// The groups are numbered, and these group numbers are the set of
// input symbols recognized by the run-time state machine.
//
// Numbering: # 0 (state table column 0) is unused.
// # 1 is reserved - table column 1 is for end-of-input
// # 2 is reserved - table column 2 is for beginning-in-input
// # 3 is the first range list.
//
RangeDescriptor rlSearchRange;
for (rlRange = fRangeList; rlRange!=null; rlRange=rlRange.fNext) {
for (rlSearchRange=fRangeList; rlSearchRange != rlRange; rlSearchRange=rlSearchRange.fNext) {
if (rlRange.fIncludesSets.equals(rlSearchRange.fIncludesSets)) {
rlRange.fNum = rlSearchRange.fNum;
break;
}
// If the start of the range from the range list is before with
// the start of the range from the unicode set, split the range list range
// in two, with one part being before (wholly outside of) the unicode set
// and the other containing the rest.
// Then continue the loop; the post-split current range will then be skipped
// over
if (rlRange.fStartChar < inputSetRangeBegin) {
rlRange.split(inputSetRangeBegin);
continue;
}
if (rlRange.fNum == 0) {
fGroupCount ++;
rlRange.fNum = fGroupCount+2;
rlRange.setDictionaryFlag();
addValToSets(rlRange.fIncludesSets, fGroupCount+2);
// Same thing at the end of the ranges...
// If the end of the range from the range list doesn't coincide with
// the end of the range from the unicode set, split the range list
// range in two. The first part of the split range will be
// wholly inside the Unicode set.
if (rlRange.fEndChar > inputSetRangeEnd) {
rlRange.split(inputSetRangeEnd+1);
}
// The current rlRange is now entirely within the UnicodeSet range.
// Add this unicode set to the list of sets for this rlRange
if (rlRange.fIncludesSets.indexOf(usetNode) == -1) {
rlRange.fIncludesSets.add(usetNode);
}
}
// Handle input sets that contain the special string {eof}.
// Column 1 of the state table is reserved for EOF on input.
// Column 2 is reserved for before-the-start-input.
// (This column can be optimized away later if there are no rule
// references to {bof}.)
// Add this column value (1 or 2) to the equivalent expression
// subtree for each UnicodeSet that contains the string {eof}
// Because {bof} and {eof} are not a characters in the normal sense,
// they doesn't affect the computation of ranges or TRIE.
String eofString = "eof";
String bofString = "bof";
ni = fRB.fUSetNodes.iterator();
while (ni.hasNext()) {
usetNode = (RBBINode )ni.next();
UnicodeSet inputSet = usetNode.fInputSet;
if (inputSet.contains(eofString)) {
addValToSet(usetNode, 1);
// Advance over ranges that we are finished with.
if (inputSetRangeEnd == rlRange.fEndChar) {
inputSetRangeIndex++;
}
if (inputSet.contains(bofString)) {
addValToSet(usetNode, 2);
fSawBOF = true;
}
}
if (fRB.fDebugEnv!=null && fRB.fDebugEnv.indexOf("rgroup")>=0) {printRangeGroups();}
if (fRB.fDebugEnv!=null && fRB.fDebugEnv.indexOf("esets")>=0) {printSets();}
//IntTrieBuilder(int aliasdata[], int maxdatalength,
// int initialvalue, int leadunitvalue,
// boolean latin1linear)
fTrie = new IntTrieBuilder(null, // Data array (utrie will allocate one)
100000, // Max Data Length
0, // Initial value for all code points
0, // Lead Surrogate unit value,
true); // Keep Latin 1 in separately.
for (rlRange = fRangeList; rlRange!=null; rlRange=rlRange.fNext) {
fTrie.setRange(rlRange.fStartChar, rlRange.fEndChar+1, rlRange.fNum, true);
rlRange = rlRange.fNext;
}
}
if (fRB.fDebugEnv!=null && fRB.fDebugEnv.indexOf("range")>=0) { printRanges();}
//-----------------------------------------------------------------------------------
//
// RBBIDataManipulate A little internal class needed only to wrap of the
// getFoldedValue() function needed for Trie table creation.
// Group the above ranges, with each group consisting of one or more
// ranges that are in exactly the same set of original UnicodeSets.
// The groups are numbered, and these group numbers are the set of
// input symbols recognized by the run-time state machine.
//
//-----------------------------------------------------------------------------------
class RBBIDataManipulate implements IntTrieBuilder.DataManipulate {
public int getFoldedValue(int start, int offset) {
int value;
int limit;
boolean [] inBlockZero = new boolean[1];
limit = start + 0x400;
while(start<limit) {
value = fTrie.getValue(start, inBlockZero);
if (inBlockZero[0]) {
start += IntTrieBuilder.DATA_BLOCK_LENGTH;
} else if (value != 0) {
return offset | 0x08000;
} else {
++start;
}
}
return 0;
}
}
RBBIDataManipulate dm = new RBBIDataManipulate();
//-----------------------------------------------------------------------------------
// Numbering: # 0 (state table column 0) is unused.
// # 1 is reserved - table column 1 is for end-of-input
// # 2 is reserved - table column 2 is for beginning-in-input
// # 3 is the first range list.
//
// getTrieSize() Return the size that will be required to serialize the Trie.
//
//-----------------------------------------------------------------------------------
int getTrieSize() {
int size = 0;
try {
// The trie serialize function returns the size of the data written.
// null output stream says give size only, don't actually write anything.
size = fTrie.serialize(null, true, dm );
} catch (IOException e) {
Assert.assrt (false);
}
return size;
}
//-----------------------------------------------------------------------------------
//
// serializeTrie() Write the serialized trie to an output stream
//
//-----------------------------------------------------------------------------------
void serializeTrie(OutputStream os) throws IOException {
fTrie.serialize(os, true, dm );
}
//------------------------------------------------------------------------
//
// addValToSets Add a runtime-mapped input value to each uset from a
// list of uset nodes. (val corresponds to a state table column.)
// For each of the original Unicode sets - which correspond
// directly to uset nodes - a logically equivalent expression
// is constructed in terms of the remapped runtime input
// symbol set. This function adds one runtime input symbol to
// a list of sets.
//
// The "logically equivalent expression" is the tree for an
// or-ing together of all of the symbols that go into the set.
//
//------------------------------------------------------------------------
void addValToSets(List sets, int val) {
int ix;
for (ix=0; ix<sets.size(); ix++) {
RBBINode usetNode = (RBBINode )sets.get(ix);
addValToSet(usetNode, val);
}
}
void addValToSet(RBBINode usetNode, int val) {
RBBINode leafNode = new RBBINode(RBBINode.leafChar);
leafNode.fVal = val;
if (usetNode.fLeftChild == null) {
usetNode.fLeftChild = leafNode;
leafNode.fParent = usetNode;
} else {
// There are already input symbols present for this set.
// Set up an OR node, with the previous stuff as the left child
// and the new value as the right child.
RBBINode orNode = new RBBINode(RBBINode.opOr);
orNode.fLeftChild = usetNode.fLeftChild;
orNode.fRightChild = leafNode;
orNode.fLeftChild.fParent = orNode;
orNode.fRightChild.fParent = orNode;
usetNode.fLeftChild = orNode;
orNode.fParent = usetNode;
}
}
//------------------------------------------------------------------------
//
// getNumCharCategories
//
//------------------------------------------------------------------------
int getNumCharCategories() {
return fGroupCount + 3;
}
//------------------------------------------------------------------------
//
// sawBOF
//
//------------------------------------------------------------------------
boolean sawBOF() {
return fSawBOF;
}
//------------------------------------------------------------------------
//
// getFirstChar Given a runtime RBBI character category, find
// the first UChar32 that is in the set of chars
// in the category.
//------------------------------------------------------------------------
int getFirstChar(int category) {
RangeDescriptor rlRange;
int retVal = -1;
for (rlRange = fRangeList; rlRange!=null; rlRange=rlRange.fNext) {
if (rlRange.fNum == category) {
retVal = rlRange.fStartChar;
RangeDescriptor rlSearchRange;
for (rlRange = fRangeList; rlRange!=null; rlRange=rlRange.fNext) {
for (rlSearchRange=fRangeList; rlSearchRange != rlRange; rlSearchRange=rlSearchRange.fNext) {
if (rlRange.fIncludesSets.equals(rlSearchRange.fIncludesSets)) {
rlRange.fNum = rlSearchRange.fNum;
break;
}
}
return retVal;
if (rlRange.fNum == 0) {
fGroupCount ++;
rlRange.fNum = fGroupCount+2;
rlRange.setDictionaryFlag();
addValToSets(rlRange.fIncludesSets, fGroupCount+2);
}
}
// Handle input sets that contain the special string {eof}.
// Column 1 of the state table is reserved for EOF on input.
// Column 2 is reserved for before-the-start-input.
// (This column can be optimized away later if there are no rule
// references to {bof}.)
// Add this column value (1 or 2) to the equivalent expression
// subtree for each UnicodeSet that contains the string {eof}
// Because {bof} and {eof} are not a characters in the normal sense,
// they doesn't affect the computation of ranges or TRIE.
String eofString = "eof";
String bofString = "bof";
ni = fRB.fUSetNodes.iterator();
while (ni.hasNext()) {
usetNode = (RBBINode )ni.next();
UnicodeSet inputSet = usetNode.fInputSet;
if (inputSet.contains(eofString)) {
addValToSet(usetNode, 1);
}
if (inputSet.contains(bofString)) {
addValToSet(usetNode, 2);
fSawBOF = true;
}
}
if (fRB.fDebugEnv!=null && fRB.fDebugEnv.indexOf("rgroup")>=0) {printRangeGroups();}
if (fRB.fDebugEnv!=null && fRB.fDebugEnv.indexOf("esets")>=0) {printSets();}
//------------------------------------------------------------------------
//
// printRanges A debugging function.
// dump out all of the range definitions.
//
//------------------------------------------------------------------------
void printRanges() {
RangeDescriptor rlRange;
int i;
System.out.print("\n\n Nonoverlapping Ranges ...\n");
for (rlRange = fRangeList; rlRange!=null; rlRange=rlRange.fNext) {
System.out.print(" " + rlRange.fNum + " " + (int)rlRange.fStartChar + "-" + (int)rlRange.fEndChar);
//IntTrieBuilder(int aliasdata[], int maxdatalength,
// int initialvalue, int leadunitvalue,
// boolean latin1linear)
fTrie = new IntTrieBuilder(null, // Data array (utrie will allocate one)
100000, // Max Data Length
0, // Initial value for all code points
0, // Lead Surrogate unit value,
true); // Keep Latin 1 in separately.
for (rlRange = fRangeList; rlRange!=null; rlRange=rlRange.fNext) {
fTrie.setRange(rlRange.fStartChar, rlRange.fEndChar+1, rlRange.fNum, true);
}
}
//-----------------------------------------------------------------------------------
//
// RBBIDataManipulate A little internal class needed only to wrap of the
// getFoldedValue() function needed for Trie table creation.
//
//-----------------------------------------------------------------------------------
class RBBIDataManipulate implements IntTrieBuilder.DataManipulate {
public int getFoldedValue(int start, int offset) {
int value;
int limit;
boolean [] inBlockZero = new boolean[1];
limit = start + 0x400;
while(start<limit) {
value = fTrie.getValue(start, inBlockZero);
if (inBlockZero[0]) {
start += IntTrieBuilder.DATA_BLOCK_LENGTH;
} else if (value != 0) {
return offset | 0x08000;
} else {
++start;
}
}
return 0;
}
}
RBBIDataManipulate dm = new RBBIDataManipulate();
//-----------------------------------------------------------------------------------
//
// getTrieSize() Return the size that will be required to serialize the Trie.
//
//-----------------------------------------------------------------------------------
int getTrieSize() {
int size = 0;
try {
// The trie serialize function returns the size of the data written.
// null output stream says give size only, don't actually write anything.
size = fTrie.serialize(null, true, dm );
} catch (IOException e) {
Assert.assrt (false);
}
return size;
}
//-----------------------------------------------------------------------------------
//
// serializeTrie() Write the serialized trie to an output stream
//
//-----------------------------------------------------------------------------------
void serializeTrie(OutputStream os) throws IOException {
fTrie.serialize(os, true, dm );
}
//------------------------------------------------------------------------
//
// addValToSets Add a runtime-mapped input value to each uset from a
// list of uset nodes. (val corresponds to a state table column.)
// For each of the original Unicode sets - which correspond
// directly to uset nodes - a logically equivalent expression
// is constructed in terms of the remapped runtime input
// symbol set. This function adds one runtime input symbol to
// a list of sets.
//
// The "logically equivalent expression" is the tree for an
// or-ing together of all of the symbols that go into the set.
//
//------------------------------------------------------------------------
void addValToSets(List sets, int val) {
int ix;
for (ix=0; ix<sets.size(); ix++) {
RBBINode usetNode = (RBBINode )sets.get(ix);
addValToSet(usetNode, val);
}
}
void addValToSet(RBBINode usetNode, int val) {
RBBINode leafNode = new RBBINode(RBBINode.leafChar);
leafNode.fVal = val;
if (usetNode.fLeftChild == null) {
usetNode.fLeftChild = leafNode;
leafNode.fParent = usetNode;
} else {
// There are already input symbols present for this set.
// Set up an OR node, with the previous stuff as the left child
// and the new value as the right child.
RBBINode orNode = new RBBINode(RBBINode.opOr);
orNode.fLeftChild = usetNode.fLeftChild;
orNode.fRightChild = leafNode;
orNode.fLeftChild.fParent = orNode;
orNode.fRightChild.fParent = orNode;
usetNode.fLeftChild = orNode;
orNode.fParent = usetNode;
}
}
//------------------------------------------------------------------------
//
// getNumCharCategories
//
//------------------------------------------------------------------------
int getNumCharCategories() {
return fGroupCount + 3;
}
//------------------------------------------------------------------------
//
// sawBOF
//
//------------------------------------------------------------------------
boolean sawBOF() {
return fSawBOF;
}
//------------------------------------------------------------------------
//
// getFirstChar Given a runtime RBBI character category, find
// the first UChar32 that is in the set of chars
// in the category.
//------------------------------------------------------------------------
int getFirstChar(int category) {
RangeDescriptor rlRange;
int retVal = -1;
for (rlRange = fRangeList; rlRange!=null; rlRange=rlRange.fNext) {
if (rlRange.fNum == category) {
retVal = rlRange.fStartChar;
break;
}
}
return retVal;
}
//------------------------------------------------------------------------
//
// printRanges A debugging function.
// dump out all of the range definitions.
//
//------------------------------------------------------------------------
///CLOVER:OFF
void printRanges() {
RangeDescriptor rlRange;
int i;
System.out.print("\n\n Nonoverlapping Ranges ...\n");
for (rlRange = fRangeList; rlRange!=null; rlRange=rlRange.fNext) {
System.out.print(" " + rlRange.fNum + " " + (int)rlRange.fStartChar + "-" + (int)rlRange.fEndChar);
for (i=0; i<rlRange.fIncludesSets.size(); i++) {
RBBINode usetNode = (RBBINode )rlRange.fIncludesSets.get(i);
String setName = "anon";
RBBINode setRef = usetNode.fParent;
if (setRef != null) {
RBBINode varRef = setRef.fParent;
if (varRef != null && varRef.fType == RBBINode.varRef) {
setName = varRef.fText;
}
}
System.out.print(setName); System.out.print(" ");
}
System.out.println("");
}
}
///CLOVER:ON
//------------------------------------------------------------------------
//
// printRangeGroups A debugging function.
// dump out all of the range groups.
//
//------------------------------------------------------------------------
///CLOVER:OFF
void printRangeGroups() {
RangeDescriptor rlRange;
RangeDescriptor tRange;
int i;
int lastPrintedGroupNum = 0;
System.out.print("\nRanges grouped by Unicode Set Membership...\n");
for (rlRange = fRangeList; rlRange!=null; rlRange=rlRange.fNext) {
int groupNum = rlRange.fNum & 0xbfff;
if (groupNum > lastPrintedGroupNum) {
lastPrintedGroupNum = groupNum;
if (groupNum<10) {System.out.print(" ");}
System.out.print(groupNum + " ");
if ((rlRange.fNum & 0x4000) != 0) { System.out.print(" <DICT> ");}
for (i=0; i<rlRange.fIncludesSets.size(); i++) {
RBBINode usetNode = (RBBINode )rlRange.fIncludesSets.get(i);
@ -468,105 +507,65 @@ class RBBISetBuilder {
setName = varRef.fText;
}
}
System.out.print(setName); System.out.print(" ");
System.out.print(setName); System.out.print(" ");
}
System.out.println("");
}
}
//------------------------------------------------------------------------
//
// printRangeGroups A debugging function.
// dump out all of the range groups.
//
//------------------------------------------------------------------------
void printRangeGroups() {
RangeDescriptor rlRange;
RangeDescriptor tRange;
int i;
int lastPrintedGroupNum = 0;
System.out.print("\nRanges grouped by Unicode Set Membership...\n");
for (rlRange = fRangeList; rlRange!=null; rlRange=rlRange.fNext) {
int groupNum = rlRange.fNum & 0xbfff;
if (groupNum > lastPrintedGroupNum) {
lastPrintedGroupNum = groupNum;
if (groupNum<10) {System.out.print(" ");}
System.out.print(groupNum + " ");
if ((rlRange.fNum & 0x4000) != 0) { System.out.print(" <DICT> ");}
for (i=0; i<rlRange.fIncludesSets.size(); i++) {
RBBINode usetNode = (RBBINode )rlRange.fIncludesSets.get(i);
String setName = "anon";
RBBINode setRef = usetNode.fParent;
if (setRef != null) {
RBBINode varRef = setRef.fParent;
if (varRef != null && varRef.fType == RBBINode.varRef) {
setName = varRef.fText;
}
i = 0;
for (tRange = rlRange; tRange != null; tRange = tRange.fNext) {
if (tRange.fNum == rlRange.fNum) {
if (i++ % 5 == 0) {
System.out.print("\n ");
}
System.out.print(setName); System.out.print(" ");
}
i = 0;
for (tRange = rlRange; tRange != null; tRange = tRange.fNext) {
if (tRange.fNum == rlRange.fNum) {
if (i++ % 5 == 0) {
System.out.print("\n ");
}
RBBINode.printHex((int)tRange.fStartChar, -1);
System.out.print("-");
RBBINode.printHex((int)tRange.fEndChar, 0);
}
}
System.out.print("\n");
}
}
System.out.print("\n");
}
//------------------------------------------------------------------------
//
// printSets A debugging function.
// dump out all of the set definitions.
//
//------------------------------------------------------------------------
void printSets() {
int i;
System.out.print("\n\nUnicode Sets List\n------------------\n");
for (i=0; i<fRB.fUSetNodes.size(); i++) {
RBBINode usetNode;
RBBINode setRef;
RBBINode varRef;
String setName;
usetNode = (RBBINode )fRB.fUSetNodes.get(i);
//System.out.print(" " + i + " ");
RBBINode.printInt(2, i);
setName = "anonymous";
setRef = usetNode.fParent;
if (setRef != null) {
varRef = setRef.fParent;
if (varRef != null && varRef.fType == RBBINode.varRef) {
setName = varRef.fText;
RBBINode.printHex((int)tRange.fStartChar, -1);
System.out.print("-");
RBBINode.printHex((int)tRange.fEndChar, 0);
}
}
System.out.print(" " + setName);
System.out.print(" ");
System.out.print(usetNode.fText);
System.out.print("\n");
if (usetNode.fLeftChild != null) {
usetNode.fLeftChild.printTree(true);
}
}
System.out.print("\n");
}
///CLOVER:ON
//------------------------------------------------------------------------
//
// printSets A debugging function.
// dump out all of the set definitions.
//
//------------------------------------------------------------------------
///CLOVER:OFF
void printSets() {
int i;
System.out.print("\n\nUnicode Sets List\n------------------\n");
for (i=0; i<fRB.fUSetNodes.size(); i++) {
RBBINode usetNode;
RBBINode setRef;
RBBINode varRef;
String setName;
usetNode = (RBBINode )fRB.fUSetNodes.get(i);
//System.out.print(" " + i + " ");
RBBINode.printInt(2, i);
setName = "anonymous";
setRef = usetNode.fParent;
if (setRef != null) {
varRef = setRef.fParent;
if (varRef != null && varRef.fType == RBBINode.varRef) {
setName = varRef.fText;
}
}
System.out.print(" " + setName);
System.out.print(" ");
System.out.print(usetNode.fText);
System.out.print("\n");
if (usetNode.fLeftChild != null) {
usetNode.fLeftChild.printTree(true);
}
}
System.out.print("\n");
}
///CLOVER:ON
}

View file

@ -29,193 +29,176 @@ class RBBISymbolTable implements SymbolTable{
static class RBBISymbolTableEntry {
String key;
RBBINode val;
};
}
RBBISymbolTable(RBBIRuleScanner rs, String rules) {
fRules = rules;
fRuleScanner = rs;
fHashTable = new HashMap();
ffffString = "\uffff";
}
//
// RBBISymbolTable::lookup This function from the abstract symbol table inteface
// looks up a variable name and returns a UnicodeString
// containing the substitution text.
//
// The variable name does NOT include the leading $.
//
public char[] lookup(String s)
{
RBBISymbolTableEntry el;
RBBINode varRefNode;
RBBINode exprNode;
RBBINode usetNode;
String retString;
el = (RBBISymbolTableEntry)fHashTable.get(s);
if (el == null) {
return null;
RBBISymbolTable(RBBIRuleScanner rs, String rules) {
fRules = rules;
fRuleScanner = rs;
fHashTable = new HashMap();
ffffString = "\uffff";
}
// Walk through any chain of variable assignments that ultimately resolve to a Set Ref.
varRefNode = el.val;
while (varRefNode.fLeftChild.fType == RBBINode.varRef) {
varRefNode = varRefNode.fLeftChild;
}
exprNode = varRefNode.fLeftChild; // Root node of expression for variable
if (exprNode.fType == RBBINode.setRef) {
// The $variable refers to a single UnicodeSet
// return the ffffString, which will subsequently be interpreted as a
// stand-in character for the set by RBBISymbolTable::lookupMatcher()
usetNode = exprNode.fLeftChild;
fCachedSetLookup = usetNode.fInputSet;
retString = ffffString;
}
else
{
// The variable refers to something other than just a set.
// This is an error in the rules being compiled. $Variables inside of UnicodeSets
// must refer only to another set, not to some random non-set expression.
// Note: single characters are represented as sets, so they are ok.
fRuleScanner.error(RBBIRuleBuilder.U_BRK_MALFORMED_SET);
retString = exprNode.fText;
fCachedSetLookup = null;
}
return retString.toCharArray();
}
//
// RBBISymbolTable::lookup This function from the abstract symbol table inteface
// looks up a variable name and returns a UnicodeString
// containing the substitution text.
//
// The variable name does NOT include the leading $.
//
public char[] lookup(String s) {
RBBISymbolTableEntry el;
RBBINode varRefNode;
RBBINode exprNode;
RBBINode usetNode;
String retString;
//
// RBBISymbolTable::lookupMatcher This function from the abstract symbol table
// interface maps a single stand-in character to a
// pointer to a Unicode Set. The Unicode Set code uses this
// mechanism to get all references to the same $variable
// name to refer to a single common Unicode Set instance.
//
// This implementation cheats a little, and does not maintain a map of stand-in chars
// to sets. Instead, it takes advantage of the fact that the UnicodeSet
// constructor will always call this function right after calling lookup(),
// and we just need to remember what set to return between these two calls.
public UnicodeMatcher lookupMatcher(int ch)
{
UnicodeSet retVal = null;
if (ch == 0xffff) {
retVal = fCachedSetLookup;
fCachedSetLookup = null;
}
return retVal;
}
//
// RBBISymbolTable::parseReference This function from the abstract symbol table interface
// looks for a $variable name in the source text.
// It does not look it up, only scans for it.
// It is used by the UnicodeSet parser.
//
public String parseReference( String text, ParsePosition pos, int limit)
{
int start = pos.getIndex();
int i = start;
String result = "";
while (i < limit) {
int c = UTF16.charAt(text, i);
if ((i==start && !UCharacter.isUnicodeIdentifierStart(c)) || !UCharacter.isUnicodeIdentifierPart(c)) {
break;
el = (RBBISymbolTableEntry) fHashTable.get(s);
if (el == null) {
return null;
}
i += UTF16.getCharCount(c);
}
if (i == start) { // No valid name chars
return result; // Indicate failure with empty string
}
pos.setIndex(i);
result = text.substring(start, i);
return result;
}
// Walk through any chain of variable assignments that ultimately resolve to a Set Ref.
varRefNode = el.val;
while (varRefNode.fLeftChild.fType == RBBINode.varRef) {
varRefNode = varRefNode.fLeftChild;
}
//
// RBBISymbolTable::lookupNode Given a key (a variable name), return the
// corresponding RBBI Node. If there is no entry
// in the table for this name, return NULL.
//
RBBINode lookupNode(String key) {
RBBINode retNode = null;
RBBISymbolTableEntry el;
el = (RBBISymbolTableEntry)fHashTable.get(key);
if (el != null) {
retNode = el.val;
}
return retNode;
}
//
// RBBISymbolTable::addEntry Add a new entry to the symbol table.
// Indicate an error if the name already exists -
// this will only occur in the case of duplicate
// variable assignments.
//
void addEntry (String key, RBBINode val) {
RBBISymbolTableEntry e;
e = (RBBISymbolTableEntry )fHashTable.get(key);
if (e != null) {
fRuleScanner.error(RBBIRuleBuilder.U_BRK_VARIABLE_REDFINITION);
return;
exprNode = varRefNode.fLeftChild; // Root node of expression for variable
if (exprNode.fType == RBBINode.setRef) {
// The $variable refers to a single UnicodeSet
// return the ffffString, which will subsequently be interpreted as a
// stand-in character for the set by RBBISymbolTable::lookupMatcher()
usetNode = exprNode.fLeftChild;
fCachedSetLookup = usetNode.fInputSet;
retString = ffffString;
} else {
// The variable refers to something other than just a set.
// This is an error in the rules being compiled. $Variables inside of UnicodeSets
// must refer only to another set, not to some random non-set expression.
// Note: single characters are represented as sets, so they are ok.
fRuleScanner.error(RBBIRuleBuilder.U_BRK_MALFORMED_SET);
retString = exprNode.fText;
fCachedSetLookup = null;
}
return retString.toCharArray();
}
e = new RBBISymbolTableEntry();
e.key = key;
e.val = val;
fHashTable.put(e.key, e);
}
//
// RBBISymbolTable::print Debugging function, dump out the symbol table contents.
//
void rbbiSymtablePrint() {
System.out.print("Variable Definitions\n" +
"Name Node Val String Val\n" +
"----------------------------------------------------------------------\n");
int pos = -1;
RBBISymbolTableEntry [] syms = new RBBISymbolTableEntry[0];
Collection t = fHashTable.values();
syms = (RBBISymbolTableEntry[]) t.toArray(syms);
for (int i=0; i<syms.length; i++) {
RBBISymbolTableEntry s = syms[i];
System.out.print(" " + s.key + " "); // TODO: format output into columns.
System.out.print(" " + s.val + " ");
System.out.print(s.val.fLeftChild.fText);
System.out.print("\n");
//
// RBBISymbolTable::lookupMatcher This function from the abstract symbol table
// interface maps a single stand-in character to a
// pointer to a Unicode Set. The Unicode Set code uses this
// mechanism to get all references to the same $variable
// name to refer to a single common Unicode Set instance.
//
// This implementation cheats a little, and does not maintain a map of stand-in chars
// to sets. Instead, it takes advantage of the fact that the UnicodeSet
// constructor will always call this function right after calling lookup(),
// and we just need to remember what set to return between these two calls.
public UnicodeMatcher lookupMatcher(int ch) {
UnicodeSet retVal = null;
if (ch == 0xffff) {
retVal = fCachedSetLookup;
fCachedSetLookup = null;
}
return retVal;
}
System.out.println("\nParsed Variable Definitions\n");
pos = -1;
for (int i=0; i<syms.length; i++) {
RBBISymbolTableEntry s = syms[i];
System.out.print(s.key);
s.val.fLeftChild.printTree(true);
System.out.print("\n");
//
// RBBISymbolTable::parseReference This function from the abstract symbol table interface
// looks for a $variable name in the source text.
// It does not look it up, only scans for it.
// It is used by the UnicodeSet parser.
//
public String parseReference(String text, ParsePosition pos, int limit) {
int start = pos.getIndex();
int i = start;
String result = "";
while (i < limit) {
int c = UTF16.charAt(text, i);
if ((i == start && !UCharacter.isUnicodeIdentifierStart(c))
|| !UCharacter.isUnicodeIdentifierPart(c)) {
break;
}
i += UTF16.getCharCount(c);
}
if (i == start) { // No valid name chars
return result; // Indicate failure with empty string
}
pos.setIndex(i);
result = text.substring(start, i);
return result;
}
}
//
// RBBISymbolTable::lookupNode Given a key (a variable name), return the
// corresponding RBBI Node. If there is no entry
// in the table for this name, return NULL.
//
RBBINode lookupNode(String key) {
RBBINode retNode = null;
RBBISymbolTableEntry el;
el = (RBBISymbolTableEntry) fHashTable.get(key);
if (el != null) {
retNode = el.val;
}
return retNode;
}
//
// RBBISymbolTable::addEntry Add a new entry to the symbol table.
// Indicate an error if the name already exists -
// this will only occur in the case of duplicate
// variable assignments.
//
void addEntry(String key, RBBINode val) {
RBBISymbolTableEntry e;
e = (RBBISymbolTableEntry) fHashTable.get(key);
if (e != null) {
fRuleScanner.error(RBBIRuleBuilder.U_BRK_VARIABLE_REDFINITION);
return;
}
e = new RBBISymbolTableEntry();
e.key = key;
e.val = val;
fHashTable.put(e.key, e);
}
//
// RBBISymbolTable::print Debugging function, dump out the symbol table contents.
//
///CLOVER:OFF
void rbbiSymtablePrint() {
System.out
.print("Variable Definitions\n"
+ "Name Node Val String Val\n"
+ "----------------------------------------------------------------------\n");
RBBISymbolTableEntry[] syms = new RBBISymbolTableEntry[0];
Collection t = fHashTable.values();
syms = (RBBISymbolTableEntry[]) t.toArray(syms);
for (int i = 0; i < syms.length; i++) {
RBBISymbolTableEntry s = syms[i];
System.out.print(" " + s.key + " "); // TODO: format output into columns.
System.out.print(" " + s.val + " ");
System.out.print(s.val.fLeftChild.fText);
System.out.print("\n");
}
System.out.println("\nParsed Variable Definitions\n");
for (int i = 0; i < syms.length; i++) {
RBBISymbolTableEntry s = syms[i];
System.out.print(s.key);
s.val.fLeftChild.printTree(true);
System.out.print("\n");
}
}
///CLOVER:ON
}