mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-08 06:53:45 +00:00
ICU-5410 Add Clover:off
X-SVN-Rev: 20493
This commit is contained in:
parent
348c2eb1ff
commit
e9cbf49aaa
4 changed files with 1571 additions and 1594 deletions
|
@ -39,7 +39,7 @@ class RBBINode {
|
|||
static final int opLParen = 15;
|
||||
static final int nodeTypeLimit = 16; // For Assertion checking only.
|
||||
|
||||
static String [] nodeTypeNames = {
|
||||
static final String [] nodeTypeNames = {
|
||||
"setRef",
|
||||
"uset",
|
||||
"varRef",
|
||||
|
@ -56,7 +56,7 @@ class RBBINode {
|
|||
"opBreak",
|
||||
"opReverse",
|
||||
"opLParen"
|
||||
};
|
||||
};
|
||||
|
||||
// enum OpPrecedence {
|
||||
static final int precZero = 0;
|
||||
|
@ -101,174 +101,174 @@ class RBBINode {
|
|||
static int gLastSerial;
|
||||
|
||||
RBBINode(int t) {
|
||||
Assert.assrt(t<nodeTypeLimit);
|
||||
fSerialNum = ++gLastSerial;
|
||||
fType = t;
|
||||
Assert.assrt(t < nodeTypeLimit);
|
||||
fSerialNum = ++gLastSerial;
|
||||
fType = t;
|
||||
|
||||
fFirstPosSet = new HashSet();
|
||||
fLastPosSet = new HashSet();
|
||||
fFollowPos = new HashSet();
|
||||
if (t==opCat) {fPrecedence = precOpCat;}
|
||||
else if (t==opOr) {fPrecedence = precOpOr;}
|
||||
else if (t==opStart) {fPrecedence = precStart;}
|
||||
else if (t==opLParen) {fPrecedence = precLParen;}
|
||||
else fPrecedence = precZero;
|
||||
}
|
||||
|
||||
|
||||
RBBINode(RBBINode other) {
|
||||
fSerialNum = ++gLastSerial;
|
||||
fType = other.fType;
|
||||
fInputSet = other.fInputSet;
|
||||
fPrecedence = other.fPrecedence;
|
||||
fText = other.fText;
|
||||
fFirstPos = other.fFirstPos;
|
||||
fLastPos = other.fLastPos;
|
||||
fNullable = other.fNullable;
|
||||
fVal = other.fVal;
|
||||
fFirstPosSet = new HashSet(other.fFirstPosSet);
|
||||
fLastPosSet = new HashSet(other.fLastPosSet);
|
||||
fFollowPos = new HashSet(other.fFollowPos);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
//-------------------------------------------------------------------------
|
||||
//
|
||||
// cloneTree Make a copy of the subtree rooted at this node.
|
||||
// Discard any variable references encountered along the way,
|
||||
// and replace with copies of the variable's definitions.
|
||||
// Used to replicate the expression underneath variable
|
||||
// references in preparation for generating the DFA tables.
|
||||
//
|
||||
//-------------------------------------------------------------------------
|
||||
RBBINode cloneTree() {
|
||||
RBBINode n;
|
||||
|
||||
if (fType == RBBINode.varRef) {
|
||||
// If the current node is a variable reference, skip over it
|
||||
// and clone the definition of the variable instead.
|
||||
n = fLeftChild.cloneTree();
|
||||
} else if (fType == RBBINode.uset) {
|
||||
n = this;
|
||||
} else {
|
||||
n = new RBBINode(this);
|
||||
if (fLeftChild != null) {
|
||||
n.fLeftChild = fLeftChild.cloneTree();
|
||||
n.fLeftChild.fParent = n;
|
||||
}
|
||||
if (fRightChild != null) {
|
||||
n.fRightChild = fRightChild.cloneTree();
|
||||
n.fRightChild.fParent = n;
|
||||
}
|
||||
fFirstPosSet = new HashSet();
|
||||
fLastPosSet = new HashSet();
|
||||
fFollowPos = new HashSet();
|
||||
if (t == opCat) {
|
||||
fPrecedence = precOpCat;
|
||||
} else if (t == opOr) {
|
||||
fPrecedence = precOpOr;
|
||||
} else if (t == opStart) {
|
||||
fPrecedence = precStart;
|
||||
} else if (t == opLParen) {
|
||||
fPrecedence = precLParen;
|
||||
} else {
|
||||
fPrecedence = precZero;
|
||||
}
|
||||
return n;
|
||||
}
|
||||
}
|
||||
|
||||
RBBINode(RBBINode other) {
|
||||
fSerialNum = ++gLastSerial;
|
||||
fType = other.fType;
|
||||
fInputSet = other.fInputSet;
|
||||
fPrecedence = other.fPrecedence;
|
||||
fText = other.fText;
|
||||
fFirstPos = other.fFirstPos;
|
||||
fLastPos = other.fLastPos;
|
||||
fNullable = other.fNullable;
|
||||
fVal = other.fVal;
|
||||
fFirstPosSet = new HashSet(other.fFirstPosSet);
|
||||
fLastPosSet = new HashSet(other.fLastPosSet);
|
||||
fFollowPos = new HashSet(other.fFollowPos);
|
||||
}
|
||||
|
||||
//-------------------------------------------------------------------------
|
||||
//
|
||||
// cloneTree Make a copy of the subtree rooted at this node.
|
||||
// Discard any variable references encountered along the way,
|
||||
// and replace with copies of the variable's definitions.
|
||||
// Used to replicate the expression underneath variable
|
||||
// references in preparation for generating the DFA tables.
|
||||
//
|
||||
//-------------------------------------------------------------------------
|
||||
RBBINode cloneTree() {
|
||||
RBBINode n;
|
||||
|
||||
if (fType == RBBINode.varRef) {
|
||||
// If the current node is a variable reference, skip over it
|
||||
// and clone the definition of the variable instead.
|
||||
n = fLeftChild.cloneTree();
|
||||
} else if (fType == RBBINode.uset) {
|
||||
n = this;
|
||||
} else {
|
||||
n = new RBBINode(this);
|
||||
if (fLeftChild != null) {
|
||||
n.fLeftChild = fLeftChild.cloneTree();
|
||||
n.fLeftChild.fParent = n;
|
||||
}
|
||||
if (fRightChild != null) {
|
||||
n.fRightChild = fRightChild.cloneTree();
|
||||
n.fRightChild.fParent = n;
|
||||
}
|
||||
}
|
||||
return n;
|
||||
}
|
||||
|
||||
|
||||
|
||||
//-------------------------------------------------------------------------
|
||||
//
|
||||
// flattenVariables Walk a parse tree, replacing any variable
|
||||
// references with a copy of the variable's definition.
|
||||
// Aside from variables, the tree is not changed.
|
||||
//
|
||||
// Return the root of the tree. If the root was not a variable
|
||||
// reference, it remains unchanged - the root we started with
|
||||
// is the root we return. If, however, the root was a variable
|
||||
// reference, the root of the newly cloned replacement tree will
|
||||
// be returned, and the original tree deleted.
|
||||
//
|
||||
// This function works by recursively walking the tree
|
||||
// without doing anything until a variable reference is
|
||||
// found, then calling cloneTree() at that point. Any
|
||||
// nested references are handled by cloneTree(), not here.
|
||||
//
|
||||
//-------------------------------------------------------------------------
|
||||
RBBINode flattenVariables() {
|
||||
if (fType == varRef) {
|
||||
RBBINode retNode = fLeftChild.cloneTree();
|
||||
// delete this;
|
||||
return retNode;
|
||||
}
|
||||
//-------------------------------------------------------------------------
|
||||
//
|
||||
// flattenVariables Walk a parse tree, replacing any variable
|
||||
// references with a copy of the variable's definition.
|
||||
// Aside from variables, the tree is not changed.
|
||||
//
|
||||
// Return the root of the tree. If the root was not a variable
|
||||
// reference, it remains unchanged - the root we started with
|
||||
// is the root we return. If, however, the root was a variable
|
||||
// reference, the root of the newly cloned replacement tree will
|
||||
// be returned, and the original tree deleted.
|
||||
//
|
||||
// This function works by recursively walking the tree
|
||||
// without doing anything until a variable reference is
|
||||
// found, then calling cloneTree() at that point. Any
|
||||
// nested references are handled by cloneTree(), not here.
|
||||
//
|
||||
//-------------------------------------------------------------------------
|
||||
RBBINode flattenVariables() {
|
||||
if (fType == varRef) {
|
||||
RBBINode retNode = fLeftChild.cloneTree();
|
||||
// delete this;
|
||||
return retNode;
|
||||
}
|
||||
|
||||
if (fLeftChild != null) {
|
||||
fLeftChild = fLeftChild.flattenVariables();
|
||||
fLeftChild.fParent = this;
|
||||
}
|
||||
if (fRightChild != null) {
|
||||
fRightChild = fRightChild.flattenVariables();
|
||||
fRightChild.fParent = this;
|
||||
}
|
||||
return this;
|
||||
}
|
||||
if (fLeftChild != null) {
|
||||
fLeftChild = fLeftChild.flattenVariables();
|
||||
fLeftChild.fParent = this;
|
||||
}
|
||||
if (fRightChild != null) {
|
||||
fRightChild = fRightChild.flattenVariables();
|
||||
fRightChild.fParent = this;
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
//-------------------------------------------------------------------------
|
||||
//
|
||||
// flattenSets Walk the parse tree, replacing any nodes of type setRef
|
||||
// with a copy of the expression tree for the set. A set's
|
||||
// equivalent expression tree is precomputed and saved as
|
||||
// the left child of the uset node.
|
||||
//
|
||||
//-------------------------------------------------------------------------
|
||||
void flattenSets() {
|
||||
Assert.assrt(fType != setRef);
|
||||
|
||||
//-------------------------------------------------------------------------
|
||||
//
|
||||
// flattenSets Walk the parse tree, replacing any nodes of type setRef
|
||||
// with a copy of the expression tree for the set. A set's
|
||||
// equivalent expression tree is precomputed and saved as
|
||||
// the left child of the uset node.
|
||||
//
|
||||
//-------------------------------------------------------------------------
|
||||
void flattenSets() {
|
||||
Assert.assrt(fType != setRef);
|
||||
if (fLeftChild != null) {
|
||||
if (fLeftChild.fType == setRef) {
|
||||
RBBINode setRefNode = fLeftChild;
|
||||
RBBINode usetNode = setRefNode.fLeftChild;
|
||||
RBBINode replTree = usetNode.fLeftChild;
|
||||
fLeftChild = replTree.cloneTree();
|
||||
fLeftChild.fParent = this;
|
||||
} else {
|
||||
fLeftChild.flattenSets();
|
||||
}
|
||||
}
|
||||
|
||||
if (fLeftChild != null) {
|
||||
if (fLeftChild.fType==setRef) {
|
||||
RBBINode setRefNode = fLeftChild;
|
||||
RBBINode usetNode = setRefNode.fLeftChild;
|
||||
RBBINode replTree = usetNode.fLeftChild;
|
||||
fLeftChild = replTree.cloneTree();
|
||||
fLeftChild.fParent = this;
|
||||
} else {
|
||||
fLeftChild.flattenSets();
|
||||
}
|
||||
}
|
||||
if (fRightChild != null) {
|
||||
if (fRightChild.fType == setRef) {
|
||||
RBBINode setRefNode = fRightChild;
|
||||
RBBINode usetNode = setRefNode.fLeftChild;
|
||||
RBBINode replTree = usetNode.fLeftChild;
|
||||
fRightChild = replTree.cloneTree();
|
||||
fRightChild.fParent = this;
|
||||
// delete setRefNode;
|
||||
} else {
|
||||
fRightChild.flattenSets();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (fRightChild != null) {
|
||||
if (fRightChild.fType==setRef) {
|
||||
RBBINode setRefNode = fRightChild;
|
||||
RBBINode usetNode = setRefNode.fLeftChild;
|
||||
RBBINode replTree = usetNode.fLeftChild;
|
||||
fRightChild = replTree.cloneTree();
|
||||
fRightChild.fParent = this;
|
||||
// delete setRefNode;
|
||||
} else {
|
||||
fRightChild.flattenSets();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
//-------------------------------------------------------------------------
|
||||
//
|
||||
// findNodes() Locate all the nodes of the specified type, starting
|
||||
// at the specified root.
|
||||
//
|
||||
//-------------------------------------------------------------------------
|
||||
void findNodes(List dest, int kind) {
|
||||
if (fType == kind) {
|
||||
dest.add(this);
|
||||
}
|
||||
if (fLeftChild != null) {
|
||||
fLeftChild.findNodes(dest, kind);
|
||||
}
|
||||
if (fRightChild != null) {
|
||||
fRightChild.findNodes(dest, kind);
|
||||
}
|
||||
}
|
||||
//-------------------------------------------------------------------------
|
||||
//
|
||||
// findNodes() Locate all the nodes of the specified type, starting
|
||||
// at the specified root.
|
||||
//
|
||||
//-------------------------------------------------------------------------
|
||||
void findNodes(List dest, int kind) {
|
||||
if (fType == kind) {
|
||||
dest.add(this);
|
||||
}
|
||||
if (fLeftChild != null) {
|
||||
fLeftChild.findNodes(dest, kind);
|
||||
}
|
||||
if (fRightChild != null) {
|
||||
fRightChild.findNodes(dest, kind);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
//-------------------------------------------------------------------------
|
||||
//
|
||||
// print. Print out a single node, for debugging.
|
||||
// print. Print out a single node, for debugging.
|
||||
//
|
||||
//-------------------------------------------------------------------------
|
||||
///CLOVER:OFF
|
||||
static void printNode(RBBINode n) {
|
||||
|
||||
if (n==null) {
|
||||
|
@ -288,44 +288,52 @@ class RBBINode {
|
|||
}
|
||||
System.out.println("");
|
||||
}
|
||||
///CLOVER:ON
|
||||
|
||||
|
||||
// Print a String in a fixed field size.
|
||||
// Debugging function.
|
||||
static void printString(String s, int minWidth)
|
||||
{
|
||||
for (int i=minWidth; i<0; i++) {
|
||||
// negative width means pad leading spaces, not fixed width.
|
||||
System.out.print(' ');
|
||||
}
|
||||
for (int i=s.length(); i<minWidth; i++) {
|
||||
System.out.print(' ');
|
||||
}
|
||||
System.out.print(s);
|
||||
}
|
||||
|
||||
//
|
||||
// Print an int in a fixed size field.
|
||||
// Debugging function.
|
||||
//
|
||||
static void printInt(int i, int minWidth) {
|
||||
String s = Integer.toString(i);
|
||||
printString(s, Math.max(minWidth, s.length()+1));
|
||||
}
|
||||
|
||||
static void printHex(int i, int minWidth) {
|
||||
String s = Integer.toString(i, 16);
|
||||
String leadingZeroes = "00000".substring(0, Math.max(0, 5-s.length()));
|
||||
s = leadingZeroes+s;
|
||||
printString(s, minWidth);
|
||||
}
|
||||
|
||||
// Debugging function.
|
||||
///CLOVER:OFF
|
||||
static void printString(String s, int minWidth) {
|
||||
for (int i = minWidth; i < 0; i++) {
|
||||
// negative width means pad leading spaces, not fixed width.
|
||||
System.out.print(' ');
|
||||
}
|
||||
for (int i = s.length(); i < minWidth; i++) {
|
||||
System.out.print(' ');
|
||||
}
|
||||
System.out.print(s);
|
||||
}
|
||||
///CLOVER:ON
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
//
|
||||
// print. Print out the tree of nodes rooted at "this"
|
||||
//
|
||||
// -------------------------------------------------------------------------
|
||||
//
|
||||
// Print an int in a fixed size field.
|
||||
// Debugging function.
|
||||
//
|
||||
///CLOVER:OFF
|
||||
static void printInt(int i, int minWidth) {
|
||||
String s = Integer.toString(i);
|
||||
printString(s, Math.max(minWidth, s.length() + 1));
|
||||
}
|
||||
///CLOVER:ON
|
||||
|
||||
///CLOVER:OFF
|
||||
static void printHex(int i, int minWidth) {
|
||||
String s = Integer.toString(i, 16);
|
||||
String leadingZeroes = "00000"
|
||||
.substring(0, Math.max(0, 5 - s.length()));
|
||||
s = leadingZeroes + s;
|
||||
printString(s, minWidth);
|
||||
}
|
||||
///CLOVER:ON
|
||||
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
//
|
||||
// print. Print out the tree of nodes rooted at "this"
|
||||
//
|
||||
// -------------------------------------------------------------------------
|
||||
///CLOVER:OFF
|
||||
void printTree(boolean printHeading) {
|
||||
if (printHeading) {
|
||||
System.out.println( "-------------------------------------------------------------------");
|
||||
|
@ -344,5 +352,6 @@ class RBBINode {
|
|||
}
|
||||
}
|
||||
}
|
||||
///CLOVER:ON
|
||||
|
||||
}
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -13,8 +13,6 @@ import java.io.OutputStream;
|
|||
import java.io.IOException;
|
||||
|
||||
import com.ibm.icu.impl.Assert;
|
||||
import com.ibm.icu.impl.CharTrie;
|
||||
import com.ibm.icu.impl.Trie;
|
||||
import com.ibm.icu.impl.IntTrieBuilder;
|
||||
|
||||
//
|
||||
|
@ -117,7 +115,7 @@ class RBBISetBuilder {
|
|||
}
|
||||
}
|
||||
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
@ -149,29 +147,28 @@ class RBBISetBuilder {
|
|||
}
|
||||
|
||||
|
||||
|
||||
//------------------------------------------------------------------------
|
||||
//------------------------------------------------------------------------
|
||||
//
|
||||
// build Build the list of non-overlapping character ranges
|
||||
// from the Unicode Sets.
|
||||
//
|
||||
//------------------------------------------------------------------------
|
||||
void build() {
|
||||
RBBINode usetNode;
|
||||
RangeDescriptor rlRange;
|
||||
|
||||
if (fRB.fDebugEnv!=null && fRB.fDebugEnv.indexOf("usets")>=0) {printSets();}
|
||||
|
||||
// Initialize the process by creating a single range encompassing all characters
|
||||
// that is in no sets.
|
||||
//
|
||||
// build Build the list of non-overlapping character ranges
|
||||
// from the Unicode Sets.
|
||||
fRangeList = new RangeDescriptor();
|
||||
fRangeList.fStartChar = 0;
|
||||
fRangeList.fEndChar = 0x10ffff;
|
||||
|
||||
//
|
||||
// Find the set of non-overlapping ranges of characters
|
||||
//
|
||||
//------------------------------------------------------------------------
|
||||
void build() {
|
||||
RBBINode usetNode;
|
||||
RangeDescriptor rlRange;
|
||||
|
||||
if (fRB.fDebugEnv!=null && fRB.fDebugEnv.indexOf("usets")>=0) {printSets();}
|
||||
|
||||
// Initialize the process by creating a single range encompassing all characters
|
||||
// that is in no sets.
|
||||
//
|
||||
fRangeList = new RangeDescriptor();
|
||||
fRangeList.fStartChar = 0;
|
||||
fRangeList.fEndChar = 0x10ffff;
|
||||
|
||||
//
|
||||
// Find the set of non-overlapping ranges of characters
|
||||
//
|
||||
Iterator ni = fRB.fUSetNodes.iterator();
|
||||
while (ni.hasNext()) {
|
||||
usetNode = (RBBINode)ni.next();
|
||||
|
@ -189,274 +186,316 @@ class RBBISetBuilder {
|
|||
int inputSetRangeEnd = inputSet.getRangeEnd(inputSetRangeIndex);
|
||||
|
||||
// skip over ranges from the range list that are completely
|
||||
// below the current range from the input unicode set.
|
||||
while (rlRange.fEndChar < inputSetRangeBegin) {
|
||||
rlRange = rlRange.fNext;
|
||||
}
|
||||
|
||||
// If the start of the range from the range list is before with
|
||||
// the start of the range from the unicode set, split the range list range
|
||||
// in two, with one part being before (wholly outside of) the unicode set
|
||||
// and the other containing the rest.
|
||||
// Then continue the loop; the post-split current range will then be skipped
|
||||
// over
|
||||
if (rlRange.fStartChar < inputSetRangeBegin) {
|
||||
rlRange.split(inputSetRangeBegin);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Same thing at the end of the ranges...
|
||||
// If the end of the range from the range list doesn't coincide with
|
||||
// the end of the range from the unicode set, split the range list
|
||||
// range in two. The first part of the split range will be
|
||||
// wholly inside the Unicode set.
|
||||
if (rlRange.fEndChar > inputSetRangeEnd) {
|
||||
rlRange.split(inputSetRangeEnd+1);
|
||||
}
|
||||
|
||||
// The current rlRange is now entirely within the UnicodeSet range.
|
||||
// Add this unicode set to the list of sets for this rlRange
|
||||
if (rlRange.fIncludesSets.indexOf(usetNode) == -1) {
|
||||
rlRange.fIncludesSets.add(usetNode);
|
||||
}
|
||||
|
||||
// Advance over ranges that we are finished with.
|
||||
if (inputSetRangeEnd == rlRange.fEndChar) {
|
||||
inputSetRangeIndex++;
|
||||
}
|
||||
// below the current range from the input unicode set.
|
||||
while (rlRange.fEndChar < inputSetRangeBegin) {
|
||||
rlRange = rlRange.fNext;
|
||||
}
|
||||
}
|
||||
|
||||
if (fRB.fDebugEnv!=null && fRB.fDebugEnv.indexOf("range")>=0) { printRanges();}
|
||||
|
||||
//
|
||||
// Group the above ranges, with each group consisting of one or more
|
||||
// ranges that are in exactly the same set of original UnicodeSets.
|
||||
// The groups are numbered, and these group numbers are the set of
|
||||
// input symbols recognized by the run-time state machine.
|
||||
//
|
||||
// Numbering: # 0 (state table column 0) is unused.
|
||||
// # 1 is reserved - table column 1 is for end-of-input
|
||||
// # 2 is reserved - table column 2 is for beginning-in-input
|
||||
// # 3 is the first range list.
|
||||
//
|
||||
RangeDescriptor rlSearchRange;
|
||||
for (rlRange = fRangeList; rlRange!=null; rlRange=rlRange.fNext) {
|
||||
for (rlSearchRange=fRangeList; rlSearchRange != rlRange; rlSearchRange=rlSearchRange.fNext) {
|
||||
if (rlRange.fIncludesSets.equals(rlSearchRange.fIncludesSets)) {
|
||||
rlRange.fNum = rlSearchRange.fNum;
|
||||
break;
|
||||
}
|
||||
// If the start of the range from the range list is before with
|
||||
// the start of the range from the unicode set, split the range list range
|
||||
// in two, with one part being before (wholly outside of) the unicode set
|
||||
// and the other containing the rest.
|
||||
// Then continue the loop; the post-split current range will then be skipped
|
||||
// over
|
||||
if (rlRange.fStartChar < inputSetRangeBegin) {
|
||||
rlRange.split(inputSetRangeBegin);
|
||||
continue;
|
||||
}
|
||||
if (rlRange.fNum == 0) {
|
||||
fGroupCount ++;
|
||||
rlRange.fNum = fGroupCount+2;
|
||||
rlRange.setDictionaryFlag();
|
||||
addValToSets(rlRange.fIncludesSets, fGroupCount+2);
|
||||
|
||||
// Same thing at the end of the ranges...
|
||||
// If the end of the range from the range list doesn't coincide with
|
||||
// the end of the range from the unicode set, split the range list
|
||||
// range in two. The first part of the split range will be
|
||||
// wholly inside the Unicode set.
|
||||
if (rlRange.fEndChar > inputSetRangeEnd) {
|
||||
rlRange.split(inputSetRangeEnd+1);
|
||||
}
|
||||
|
||||
// The current rlRange is now entirely within the UnicodeSet range.
|
||||
// Add this unicode set to the list of sets for this rlRange
|
||||
if (rlRange.fIncludesSets.indexOf(usetNode) == -1) {
|
||||
rlRange.fIncludesSets.add(usetNode);
|
||||
}
|
||||
}
|
||||
|
||||
// Handle input sets that contain the special string {eof}.
|
||||
// Column 1 of the state table is reserved for EOF on input.
|
||||
// Column 2 is reserved for before-the-start-input.
|
||||
// (This column can be optimized away later if there are no rule
|
||||
// references to {bof}.)
|
||||
// Add this column value (1 or 2) to the equivalent expression
|
||||
// subtree for each UnicodeSet that contains the string {eof}
|
||||
// Because {bof} and {eof} are not a characters in the normal sense,
|
||||
// they doesn't affect the computation of ranges or TRIE.
|
||||
|
||||
String eofString = "eof";
|
||||
String bofString = "bof";
|
||||
|
||||
ni = fRB.fUSetNodes.iterator();
|
||||
while (ni.hasNext()) {
|
||||
usetNode = (RBBINode )ni.next();
|
||||
UnicodeSet inputSet = usetNode.fInputSet;
|
||||
if (inputSet.contains(eofString)) {
|
||||
addValToSet(usetNode, 1);
|
||||
// Advance over ranges that we are finished with.
|
||||
if (inputSetRangeEnd == rlRange.fEndChar) {
|
||||
inputSetRangeIndex++;
|
||||
}
|
||||
if (inputSet.contains(bofString)) {
|
||||
addValToSet(usetNode, 2);
|
||||
fSawBOF = true;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if (fRB.fDebugEnv!=null && fRB.fDebugEnv.indexOf("rgroup")>=0) {printRangeGroups();}
|
||||
if (fRB.fDebugEnv!=null && fRB.fDebugEnv.indexOf("esets")>=0) {printSets();}
|
||||
|
||||
|
||||
//IntTrieBuilder(int aliasdata[], int maxdatalength,
|
||||
// int initialvalue, int leadunitvalue,
|
||||
// boolean latin1linear)
|
||||
|
||||
fTrie = new IntTrieBuilder(null, // Data array (utrie will allocate one)
|
||||
100000, // Max Data Length
|
||||
0, // Initial value for all code points
|
||||
0, // Lead Surrogate unit value,
|
||||
true); // Keep Latin 1 in separately.
|
||||
|
||||
for (rlRange = fRangeList; rlRange!=null; rlRange=rlRange.fNext) {
|
||||
fTrie.setRange(rlRange.fStartChar, rlRange.fEndChar+1, rlRange.fNum, true);
|
||||
rlRange = rlRange.fNext;
|
||||
}
|
||||
}
|
||||
|
||||
if (fRB.fDebugEnv!=null && fRB.fDebugEnv.indexOf("range")>=0) { printRanges();}
|
||||
|
||||
|
||||
//-----------------------------------------------------------------------------------
|
||||
//
|
||||
// RBBIDataManipulate A little internal class needed only to wrap of the
|
||||
// getFoldedValue() function needed for Trie table creation.
|
||||
// Group the above ranges, with each group consisting of one or more
|
||||
// ranges that are in exactly the same set of original UnicodeSets.
|
||||
// The groups are numbered, and these group numbers are the set of
|
||||
// input symbols recognized by the run-time state machine.
|
||||
//
|
||||
//-----------------------------------------------------------------------------------
|
||||
class RBBIDataManipulate implements IntTrieBuilder.DataManipulate {
|
||||
public int getFoldedValue(int start, int offset) {
|
||||
int value;
|
||||
int limit;
|
||||
boolean [] inBlockZero = new boolean[1];
|
||||
|
||||
limit = start + 0x400;
|
||||
while(start<limit) {
|
||||
value = fTrie.getValue(start, inBlockZero);
|
||||
if (inBlockZero[0]) {
|
||||
start += IntTrieBuilder.DATA_BLOCK_LENGTH;
|
||||
} else if (value != 0) {
|
||||
return offset | 0x08000;
|
||||
} else {
|
||||
++start;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
RBBIDataManipulate dm = new RBBIDataManipulate();
|
||||
|
||||
//-----------------------------------------------------------------------------------
|
||||
// Numbering: # 0 (state table column 0) is unused.
|
||||
// # 1 is reserved - table column 1 is for end-of-input
|
||||
// # 2 is reserved - table column 2 is for beginning-in-input
|
||||
// # 3 is the first range list.
|
||||
//
|
||||
// getTrieSize() Return the size that will be required to serialize the Trie.
|
||||
//
|
||||
//-----------------------------------------------------------------------------------
|
||||
int getTrieSize() {
|
||||
int size = 0;
|
||||
try {
|
||||
// The trie serialize function returns the size of the data written.
|
||||
// null output stream says give size only, don't actually write anything.
|
||||
size = fTrie.serialize(null, true, dm );
|
||||
} catch (IOException e) {
|
||||
Assert.assrt (false);
|
||||
}
|
||||
return size;
|
||||
}
|
||||
|
||||
|
||||
//-----------------------------------------------------------------------------------
|
||||
//
|
||||
// serializeTrie() Write the serialized trie to an output stream
|
||||
//
|
||||
//-----------------------------------------------------------------------------------
|
||||
void serializeTrie(OutputStream os) throws IOException {
|
||||
fTrie.serialize(os, true, dm );
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------
|
||||
//
|
||||
// addValToSets Add a runtime-mapped input value to each uset from a
|
||||
// list of uset nodes. (val corresponds to a state table column.)
|
||||
// For each of the original Unicode sets - which correspond
|
||||
// directly to uset nodes - a logically equivalent expression
|
||||
// is constructed in terms of the remapped runtime input
|
||||
// symbol set. This function adds one runtime input symbol to
|
||||
// a list of sets.
|
||||
//
|
||||
// The "logically equivalent expression" is the tree for an
|
||||
// or-ing together of all of the symbols that go into the set.
|
||||
//
|
||||
//------------------------------------------------------------------------
|
||||
void addValToSets(List sets, int val) {
|
||||
int ix;
|
||||
|
||||
for (ix=0; ix<sets.size(); ix++) {
|
||||
RBBINode usetNode = (RBBINode )sets.get(ix);
|
||||
addValToSet(usetNode, val);
|
||||
}
|
||||
}
|
||||
|
||||
void addValToSet(RBBINode usetNode, int val) {
|
||||
RBBINode leafNode = new RBBINode(RBBINode.leafChar);
|
||||
leafNode.fVal = val;
|
||||
if (usetNode.fLeftChild == null) {
|
||||
usetNode.fLeftChild = leafNode;
|
||||
leafNode.fParent = usetNode;
|
||||
} else {
|
||||
// There are already input symbols present for this set.
|
||||
// Set up an OR node, with the previous stuff as the left child
|
||||
// and the new value as the right child.
|
||||
RBBINode orNode = new RBBINode(RBBINode.opOr);
|
||||
orNode.fLeftChild = usetNode.fLeftChild;
|
||||
orNode.fRightChild = leafNode;
|
||||
orNode.fLeftChild.fParent = orNode;
|
||||
orNode.fRightChild.fParent = orNode;
|
||||
usetNode.fLeftChild = orNode;
|
||||
orNode.fParent = usetNode;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
//------------------------------------------------------------------------
|
||||
//
|
||||
// getNumCharCategories
|
||||
//
|
||||
//------------------------------------------------------------------------
|
||||
int getNumCharCategories() {
|
||||
return fGroupCount + 3;
|
||||
}
|
||||
|
||||
|
||||
//------------------------------------------------------------------------
|
||||
//
|
||||
// sawBOF
|
||||
//
|
||||
//------------------------------------------------------------------------
|
||||
boolean sawBOF() {
|
||||
return fSawBOF;
|
||||
}
|
||||
|
||||
|
||||
//------------------------------------------------------------------------
|
||||
//
|
||||
// getFirstChar Given a runtime RBBI character category, find
|
||||
// the first UChar32 that is in the set of chars
|
||||
// in the category.
|
||||
//------------------------------------------------------------------------
|
||||
int getFirstChar(int category) {
|
||||
RangeDescriptor rlRange;
|
||||
int retVal = -1;
|
||||
for (rlRange = fRangeList; rlRange!=null; rlRange=rlRange.fNext) {
|
||||
if (rlRange.fNum == category) {
|
||||
retVal = rlRange.fStartChar;
|
||||
RangeDescriptor rlSearchRange;
|
||||
for (rlRange = fRangeList; rlRange!=null; rlRange=rlRange.fNext) {
|
||||
for (rlSearchRange=fRangeList; rlSearchRange != rlRange; rlSearchRange=rlSearchRange.fNext) {
|
||||
if (rlRange.fIncludesSets.equals(rlSearchRange.fIncludesSets)) {
|
||||
rlRange.fNum = rlSearchRange.fNum;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return retVal;
|
||||
if (rlRange.fNum == 0) {
|
||||
fGroupCount ++;
|
||||
rlRange.fNum = fGroupCount+2;
|
||||
rlRange.setDictionaryFlag();
|
||||
addValToSets(rlRange.fIncludesSets, fGroupCount+2);
|
||||
}
|
||||
}
|
||||
|
||||
// Handle input sets that contain the special string {eof}.
|
||||
// Column 1 of the state table is reserved for EOF on input.
|
||||
// Column 2 is reserved for before-the-start-input.
|
||||
// (This column can be optimized away later if there are no rule
|
||||
// references to {bof}.)
|
||||
// Add this column value (1 or 2) to the equivalent expression
|
||||
// subtree for each UnicodeSet that contains the string {eof}
|
||||
// Because {bof} and {eof} are not a characters in the normal sense,
|
||||
// they doesn't affect the computation of ranges or TRIE.
|
||||
|
||||
String eofString = "eof";
|
||||
String bofString = "bof";
|
||||
|
||||
ni = fRB.fUSetNodes.iterator();
|
||||
while (ni.hasNext()) {
|
||||
usetNode = (RBBINode )ni.next();
|
||||
UnicodeSet inputSet = usetNode.fInputSet;
|
||||
if (inputSet.contains(eofString)) {
|
||||
addValToSet(usetNode, 1);
|
||||
}
|
||||
if (inputSet.contains(bofString)) {
|
||||
addValToSet(usetNode, 2);
|
||||
fSawBOF = true;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if (fRB.fDebugEnv!=null && fRB.fDebugEnv.indexOf("rgroup")>=0) {printRangeGroups();}
|
||||
if (fRB.fDebugEnv!=null && fRB.fDebugEnv.indexOf("esets")>=0) {printSets();}
|
||||
|
||||
//------------------------------------------------------------------------
|
||||
//
|
||||
// printRanges A debugging function.
|
||||
// dump out all of the range definitions.
|
||||
//
|
||||
//------------------------------------------------------------------------
|
||||
void printRanges() {
|
||||
RangeDescriptor rlRange;
|
||||
int i;
|
||||
|
||||
System.out.print("\n\n Nonoverlapping Ranges ...\n");
|
||||
for (rlRange = fRangeList; rlRange!=null; rlRange=rlRange.fNext) {
|
||||
System.out.print(" " + rlRange.fNum + " " + (int)rlRange.fStartChar + "-" + (int)rlRange.fEndChar);
|
||||
//IntTrieBuilder(int aliasdata[], int maxdatalength,
|
||||
// int initialvalue, int leadunitvalue,
|
||||
// boolean latin1linear)
|
||||
|
||||
fTrie = new IntTrieBuilder(null, // Data array (utrie will allocate one)
|
||||
100000, // Max Data Length
|
||||
0, // Initial value for all code points
|
||||
0, // Lead Surrogate unit value,
|
||||
true); // Keep Latin 1 in separately.
|
||||
|
||||
for (rlRange = fRangeList; rlRange!=null; rlRange=rlRange.fNext) {
|
||||
fTrie.setRange(rlRange.fStartChar, rlRange.fEndChar+1, rlRange.fNum, true);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
//-----------------------------------------------------------------------------------
|
||||
//
|
||||
// RBBIDataManipulate A little internal class needed only to wrap of the
|
||||
// getFoldedValue() function needed for Trie table creation.
|
||||
//
|
||||
//-----------------------------------------------------------------------------------
|
||||
class RBBIDataManipulate implements IntTrieBuilder.DataManipulate {
|
||||
public int getFoldedValue(int start, int offset) {
|
||||
int value;
|
||||
int limit;
|
||||
boolean [] inBlockZero = new boolean[1];
|
||||
|
||||
limit = start + 0x400;
|
||||
while(start<limit) {
|
||||
value = fTrie.getValue(start, inBlockZero);
|
||||
if (inBlockZero[0]) {
|
||||
start += IntTrieBuilder.DATA_BLOCK_LENGTH;
|
||||
} else if (value != 0) {
|
||||
return offset | 0x08000;
|
||||
} else {
|
||||
++start;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
RBBIDataManipulate dm = new RBBIDataManipulate();
|
||||
|
||||
//-----------------------------------------------------------------------------------
|
||||
//
|
||||
// getTrieSize() Return the size that will be required to serialize the Trie.
|
||||
//
|
||||
//-----------------------------------------------------------------------------------
|
||||
int getTrieSize() {
|
||||
int size = 0;
|
||||
try {
|
||||
// The trie serialize function returns the size of the data written.
|
||||
// null output stream says give size only, don't actually write anything.
|
||||
size = fTrie.serialize(null, true, dm );
|
||||
} catch (IOException e) {
|
||||
Assert.assrt (false);
|
||||
}
|
||||
return size;
|
||||
}
|
||||
|
||||
|
||||
//-----------------------------------------------------------------------------------
|
||||
//
|
||||
// serializeTrie() Write the serialized trie to an output stream
|
||||
//
|
||||
//-----------------------------------------------------------------------------------
|
||||
void serializeTrie(OutputStream os) throws IOException {
|
||||
fTrie.serialize(os, true, dm );
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------
|
||||
//
|
||||
// addValToSets Add a runtime-mapped input value to each uset from a
|
||||
// list of uset nodes. (val corresponds to a state table column.)
|
||||
// For each of the original Unicode sets - which correspond
|
||||
// directly to uset nodes - a logically equivalent expression
|
||||
// is constructed in terms of the remapped runtime input
|
||||
// symbol set. This function adds one runtime input symbol to
|
||||
// a list of sets.
|
||||
//
|
||||
// The "logically equivalent expression" is the tree for an
|
||||
// or-ing together of all of the symbols that go into the set.
|
||||
//
|
||||
//------------------------------------------------------------------------
|
||||
void addValToSets(List sets, int val) {
|
||||
int ix;
|
||||
|
||||
for (ix=0; ix<sets.size(); ix++) {
|
||||
RBBINode usetNode = (RBBINode )sets.get(ix);
|
||||
addValToSet(usetNode, val);
|
||||
}
|
||||
}
|
||||
|
||||
void addValToSet(RBBINode usetNode, int val) {
|
||||
RBBINode leafNode = new RBBINode(RBBINode.leafChar);
|
||||
leafNode.fVal = val;
|
||||
if (usetNode.fLeftChild == null) {
|
||||
usetNode.fLeftChild = leafNode;
|
||||
leafNode.fParent = usetNode;
|
||||
} else {
|
||||
// There are already input symbols present for this set.
|
||||
// Set up an OR node, with the previous stuff as the left child
|
||||
// and the new value as the right child.
|
||||
RBBINode orNode = new RBBINode(RBBINode.opOr);
|
||||
orNode.fLeftChild = usetNode.fLeftChild;
|
||||
orNode.fRightChild = leafNode;
|
||||
orNode.fLeftChild.fParent = orNode;
|
||||
orNode.fRightChild.fParent = orNode;
|
||||
usetNode.fLeftChild = orNode;
|
||||
orNode.fParent = usetNode;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
//------------------------------------------------------------------------
|
||||
//
|
||||
// getNumCharCategories
|
||||
//
|
||||
//------------------------------------------------------------------------
|
||||
int getNumCharCategories() {
|
||||
return fGroupCount + 3;
|
||||
}
|
||||
|
||||
|
||||
//------------------------------------------------------------------------
|
||||
//
|
||||
// sawBOF
|
||||
//
|
||||
//------------------------------------------------------------------------
|
||||
boolean sawBOF() {
|
||||
return fSawBOF;
|
||||
}
|
||||
|
||||
|
||||
//------------------------------------------------------------------------
|
||||
//
|
||||
// getFirstChar Given a runtime RBBI character category, find
|
||||
// the first UChar32 that is in the set of chars
|
||||
// in the category.
|
||||
//------------------------------------------------------------------------
|
||||
int getFirstChar(int category) {
|
||||
RangeDescriptor rlRange;
|
||||
int retVal = -1;
|
||||
for (rlRange = fRangeList; rlRange!=null; rlRange=rlRange.fNext) {
|
||||
if (rlRange.fNum == category) {
|
||||
retVal = rlRange.fStartChar;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return retVal;
|
||||
}
|
||||
|
||||
|
||||
|
||||
//------------------------------------------------------------------------
|
||||
//
|
||||
// printRanges A debugging function.
|
||||
// dump out all of the range definitions.
|
||||
//
|
||||
//------------------------------------------------------------------------
|
||||
///CLOVER:OFF
|
||||
void printRanges() {
|
||||
RangeDescriptor rlRange;
|
||||
int i;
|
||||
|
||||
System.out.print("\n\n Nonoverlapping Ranges ...\n");
|
||||
for (rlRange = fRangeList; rlRange!=null; rlRange=rlRange.fNext) {
|
||||
System.out.print(" " + rlRange.fNum + " " + (int)rlRange.fStartChar + "-" + (int)rlRange.fEndChar);
|
||||
|
||||
for (i=0; i<rlRange.fIncludesSets.size(); i++) {
|
||||
RBBINode usetNode = (RBBINode )rlRange.fIncludesSets.get(i);
|
||||
String setName = "anon";
|
||||
RBBINode setRef = usetNode.fParent;
|
||||
if (setRef != null) {
|
||||
RBBINode varRef = setRef.fParent;
|
||||
if (varRef != null && varRef.fType == RBBINode.varRef) {
|
||||
setName = varRef.fText;
|
||||
}
|
||||
}
|
||||
System.out.print(setName); System.out.print(" ");
|
||||
}
|
||||
System.out.println("");
|
||||
}
|
||||
}
|
||||
///CLOVER:ON
|
||||
|
||||
|
||||
//------------------------------------------------------------------------
|
||||
//
|
||||
// printRangeGroups A debugging function.
|
||||
// dump out all of the range groups.
|
||||
//
|
||||
//------------------------------------------------------------------------
|
||||
///CLOVER:OFF
|
||||
void printRangeGroups() {
|
||||
RangeDescriptor rlRange;
|
||||
RangeDescriptor tRange;
|
||||
int i;
|
||||
int lastPrintedGroupNum = 0;
|
||||
|
||||
System.out.print("\nRanges grouped by Unicode Set Membership...\n");
|
||||
for (rlRange = fRangeList; rlRange!=null; rlRange=rlRange.fNext) {
|
||||
int groupNum = rlRange.fNum & 0xbfff;
|
||||
if (groupNum > lastPrintedGroupNum) {
|
||||
lastPrintedGroupNum = groupNum;
|
||||
if (groupNum<10) {System.out.print(" ");}
|
||||
System.out.print(groupNum + " ");
|
||||
|
||||
if ((rlRange.fNum & 0x4000) != 0) { System.out.print(" <DICT> ");}
|
||||
|
||||
for (i=0; i<rlRange.fIncludesSets.size(); i++) {
|
||||
RBBINode usetNode = (RBBINode )rlRange.fIncludesSets.get(i);
|
||||
|
@ -468,105 +507,65 @@ class RBBISetBuilder {
|
|||
setName = varRef.fText;
|
||||
}
|
||||
}
|
||||
System.out.print(setName); System.out.print(" ");
|
||||
System.out.print(setName); System.out.print(" ");
|
||||
}
|
||||
System.out.println("");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
//------------------------------------------------------------------------
|
||||
//
|
||||
// printRangeGroups A debugging function.
|
||||
// dump out all of the range groups.
|
||||
//
|
||||
//------------------------------------------------------------------------
|
||||
void printRangeGroups() {
|
||||
RangeDescriptor rlRange;
|
||||
RangeDescriptor tRange;
|
||||
int i;
|
||||
int lastPrintedGroupNum = 0;
|
||||
|
||||
System.out.print("\nRanges grouped by Unicode Set Membership...\n");
|
||||
for (rlRange = fRangeList; rlRange!=null; rlRange=rlRange.fNext) {
|
||||
int groupNum = rlRange.fNum & 0xbfff;
|
||||
if (groupNum > lastPrintedGroupNum) {
|
||||
lastPrintedGroupNum = groupNum;
|
||||
if (groupNum<10) {System.out.print(" ");}
|
||||
System.out.print(groupNum + " ");
|
||||
|
||||
if ((rlRange.fNum & 0x4000) != 0) { System.out.print(" <DICT> ");}
|
||||
|
||||
for (i=0; i<rlRange.fIncludesSets.size(); i++) {
|
||||
RBBINode usetNode = (RBBINode )rlRange.fIncludesSets.get(i);
|
||||
String setName = "anon";
|
||||
RBBINode setRef = usetNode.fParent;
|
||||
if (setRef != null) {
|
||||
RBBINode varRef = setRef.fParent;
|
||||
if (varRef != null && varRef.fType == RBBINode.varRef) {
|
||||
setName = varRef.fText;
|
||||
}
|
||||
i = 0;
|
||||
for (tRange = rlRange; tRange != null; tRange = tRange.fNext) {
|
||||
if (tRange.fNum == rlRange.fNum) {
|
||||
if (i++ % 5 == 0) {
|
||||
System.out.print("\n ");
|
||||
}
|
||||
System.out.print(setName); System.out.print(" ");
|
||||
}
|
||||
|
||||
i = 0;
|
||||
for (tRange = rlRange; tRange != null; tRange = tRange.fNext) {
|
||||
if (tRange.fNum == rlRange.fNum) {
|
||||
if (i++ % 5 == 0) {
|
||||
System.out.print("\n ");
|
||||
}
|
||||
RBBINode.printHex((int)tRange.fStartChar, -1);
|
||||
System.out.print("-");
|
||||
RBBINode.printHex((int)tRange.fEndChar, 0);
|
||||
}
|
||||
}
|
||||
System.out.print("\n");
|
||||
}
|
||||
}
|
||||
System.out.print("\n");
|
||||
}
|
||||
|
||||
|
||||
//------------------------------------------------------------------------
|
||||
//
|
||||
// printSets A debugging function.
|
||||
// dump out all of the set definitions.
|
||||
//
|
||||
//------------------------------------------------------------------------
|
||||
void printSets() {
|
||||
int i;
|
||||
System.out.print("\n\nUnicode Sets List\n------------------\n");
|
||||
for (i=0; i<fRB.fUSetNodes.size(); i++) {
|
||||
RBBINode usetNode;
|
||||
RBBINode setRef;
|
||||
RBBINode varRef;
|
||||
String setName;
|
||||
|
||||
usetNode = (RBBINode )fRB.fUSetNodes.get(i);
|
||||
|
||||
//System.out.print(" " + i + " ");
|
||||
RBBINode.printInt(2, i);
|
||||
setName = "anonymous";
|
||||
setRef = usetNode.fParent;
|
||||
if (setRef != null) {
|
||||
varRef = setRef.fParent;
|
||||
if (varRef != null && varRef.fType == RBBINode.varRef) {
|
||||
setName = varRef.fText;
|
||||
RBBINode.printHex((int)tRange.fStartChar, -1);
|
||||
System.out.print("-");
|
||||
RBBINode.printHex((int)tRange.fEndChar, 0);
|
||||
}
|
||||
}
|
||||
System.out.print(" " + setName);
|
||||
System.out.print(" ");
|
||||
System.out.print(usetNode.fText);
|
||||
System.out.print("\n");
|
||||
if (usetNode.fLeftChild != null) {
|
||||
usetNode.fLeftChild.printTree(true);
|
||||
}
|
||||
}
|
||||
System.out.print("\n");
|
||||
}
|
||||
///CLOVER:ON
|
||||
|
||||
|
||||
//------------------------------------------------------------------------
|
||||
//
|
||||
// printSets A debugging function.
|
||||
// dump out all of the set definitions.
|
||||
//
|
||||
//------------------------------------------------------------------------
|
||||
///CLOVER:OFF
|
||||
void printSets() {
|
||||
int i;
|
||||
System.out.print("\n\nUnicode Sets List\n------------------\n");
|
||||
for (i=0; i<fRB.fUSetNodes.size(); i++) {
|
||||
RBBINode usetNode;
|
||||
RBBINode setRef;
|
||||
RBBINode varRef;
|
||||
String setName;
|
||||
|
||||
usetNode = (RBBINode )fRB.fUSetNodes.get(i);
|
||||
|
||||
//System.out.print(" " + i + " ");
|
||||
RBBINode.printInt(2, i);
|
||||
setName = "anonymous";
|
||||
setRef = usetNode.fParent;
|
||||
if (setRef != null) {
|
||||
varRef = setRef.fParent;
|
||||
if (varRef != null && varRef.fType == RBBINode.varRef) {
|
||||
setName = varRef.fText;
|
||||
}
|
||||
}
|
||||
System.out.print(" " + setName);
|
||||
System.out.print(" ");
|
||||
System.out.print(usetNode.fText);
|
||||
System.out.print("\n");
|
||||
if (usetNode.fLeftChild != null) {
|
||||
usetNode.fLeftChild.printTree(true);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
System.out.print("\n");
|
||||
}
|
||||
///CLOVER:ON
|
||||
}
|
||||
|
|
|
@ -29,193 +29,176 @@ class RBBISymbolTable implements SymbolTable{
|
|||
static class RBBISymbolTableEntry {
|
||||
String key;
|
||||
RBBINode val;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
|
||||
RBBISymbolTable(RBBIRuleScanner rs, String rules) {
|
||||
fRules = rules;
|
||||
fRuleScanner = rs;
|
||||
fHashTable = new HashMap();
|
||||
ffffString = "\uffff";
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
//
|
||||
// RBBISymbolTable::lookup This function from the abstract symbol table inteface
|
||||
// looks up a variable name and returns a UnicodeString
|
||||
// containing the substitution text.
|
||||
//
|
||||
// The variable name does NOT include the leading $.
|
||||
//
|
||||
public char[] lookup(String s)
|
||||
{
|
||||
RBBISymbolTableEntry el;
|
||||
RBBINode varRefNode;
|
||||
RBBINode exprNode;
|
||||
|
||||
RBBINode usetNode;
|
||||
String retString;
|
||||
|
||||
el = (RBBISymbolTableEntry)fHashTable.get(s);
|
||||
if (el == null) {
|
||||
return null;
|
||||
RBBISymbolTable(RBBIRuleScanner rs, String rules) {
|
||||
fRules = rules;
|
||||
fRuleScanner = rs;
|
||||
fHashTable = new HashMap();
|
||||
ffffString = "\uffff";
|
||||
}
|
||||
|
||||
// Walk through any chain of variable assignments that ultimately resolve to a Set Ref.
|
||||
varRefNode = el.val;
|
||||
while (varRefNode.fLeftChild.fType == RBBINode.varRef) {
|
||||
varRefNode = varRefNode.fLeftChild;
|
||||
}
|
||||
|
||||
exprNode = varRefNode.fLeftChild; // Root node of expression for variable
|
||||
if (exprNode.fType == RBBINode.setRef) {
|
||||
// The $variable refers to a single UnicodeSet
|
||||
// return the ffffString, which will subsequently be interpreted as a
|
||||
// stand-in character for the set by RBBISymbolTable::lookupMatcher()
|
||||
usetNode = exprNode.fLeftChild;
|
||||
fCachedSetLookup = usetNode.fInputSet;
|
||||
retString = ffffString;
|
||||
}
|
||||
else
|
||||
{
|
||||
// The variable refers to something other than just a set.
|
||||
// This is an error in the rules being compiled. $Variables inside of UnicodeSets
|
||||
// must refer only to another set, not to some random non-set expression.
|
||||
// Note: single characters are represented as sets, so they are ok.
|
||||
fRuleScanner.error(RBBIRuleBuilder.U_BRK_MALFORMED_SET);
|
||||
retString = exprNode.fText;
|
||||
fCachedSetLookup = null;
|
||||
}
|
||||
return retString.toCharArray();
|
||||
}
|
||||
//
|
||||
// RBBISymbolTable::lookup This function from the abstract symbol table inteface
|
||||
// looks up a variable name and returns a UnicodeString
|
||||
// containing the substitution text.
|
||||
//
|
||||
// The variable name does NOT include the leading $.
|
||||
//
|
||||
public char[] lookup(String s) {
|
||||
RBBISymbolTableEntry el;
|
||||
RBBINode varRefNode;
|
||||
RBBINode exprNode;
|
||||
|
||||
RBBINode usetNode;
|
||||
String retString;
|
||||
|
||||
|
||||
//
|
||||
// RBBISymbolTable::lookupMatcher This function from the abstract symbol table
|
||||
// interface maps a single stand-in character to a
|
||||
// pointer to a Unicode Set. The Unicode Set code uses this
|
||||
// mechanism to get all references to the same $variable
|
||||
// name to refer to a single common Unicode Set instance.
|
||||
//
|
||||
// This implementation cheats a little, and does not maintain a map of stand-in chars
|
||||
// to sets. Instead, it takes advantage of the fact that the UnicodeSet
|
||||
// constructor will always call this function right after calling lookup(),
|
||||
// and we just need to remember what set to return between these two calls.
|
||||
public UnicodeMatcher lookupMatcher(int ch)
|
||||
{
|
||||
UnicodeSet retVal = null;
|
||||
if (ch == 0xffff) {
|
||||
retVal = fCachedSetLookup;
|
||||
fCachedSetLookup = null;
|
||||
}
|
||||
return retVal;
|
||||
}
|
||||
|
||||
//
|
||||
// RBBISymbolTable::parseReference This function from the abstract symbol table interface
|
||||
// looks for a $variable name in the source text.
|
||||
// It does not look it up, only scans for it.
|
||||
// It is used by the UnicodeSet parser.
|
||||
//
|
||||
public String parseReference( String text, ParsePosition pos, int limit)
|
||||
{
|
||||
int start = pos.getIndex();
|
||||
int i = start;
|
||||
String result = "";
|
||||
while (i < limit) {
|
||||
int c = UTF16.charAt(text, i);
|
||||
if ((i==start && !UCharacter.isUnicodeIdentifierStart(c)) || !UCharacter.isUnicodeIdentifierPart(c)) {
|
||||
break;
|
||||
el = (RBBISymbolTableEntry) fHashTable.get(s);
|
||||
if (el == null) {
|
||||
return null;
|
||||
}
|
||||
i += UTF16.getCharCount(c);
|
||||
}
|
||||
if (i == start) { // No valid name chars
|
||||
return result; // Indicate failure with empty string
|
||||
}
|
||||
pos.setIndex(i);
|
||||
result = text.substring(start, i);
|
||||
return result;
|
||||
}
|
||||
|
||||
// Walk through any chain of variable assignments that ultimately resolve to a Set Ref.
|
||||
varRefNode = el.val;
|
||||
while (varRefNode.fLeftChild.fType == RBBINode.varRef) {
|
||||
varRefNode = varRefNode.fLeftChild;
|
||||
}
|
||||
|
||||
|
||||
//
|
||||
// RBBISymbolTable::lookupNode Given a key (a variable name), return the
|
||||
// corresponding RBBI Node. If there is no entry
|
||||
// in the table for this name, return NULL.
|
||||
//
|
||||
RBBINode lookupNode(String key) {
|
||||
|
||||
RBBINode retNode = null;
|
||||
RBBISymbolTableEntry el;
|
||||
|
||||
el = (RBBISymbolTableEntry)fHashTable.get(key);
|
||||
if (el != null) {
|
||||
retNode = el.val;
|
||||
}
|
||||
return retNode;
|
||||
}
|
||||
|
||||
|
||||
//
|
||||
// RBBISymbolTable::addEntry Add a new entry to the symbol table.
|
||||
// Indicate an error if the name already exists -
|
||||
// this will only occur in the case of duplicate
|
||||
// variable assignments.
|
||||
//
|
||||
void addEntry (String key, RBBINode val) {
|
||||
RBBISymbolTableEntry e;
|
||||
e = (RBBISymbolTableEntry )fHashTable.get(key);
|
||||
if (e != null) {
|
||||
fRuleScanner.error(RBBIRuleBuilder.U_BRK_VARIABLE_REDFINITION);
|
||||
return;
|
||||
exprNode = varRefNode.fLeftChild; // Root node of expression for variable
|
||||
if (exprNode.fType == RBBINode.setRef) {
|
||||
// The $variable refers to a single UnicodeSet
|
||||
// return the ffffString, which will subsequently be interpreted as a
|
||||
// stand-in character for the set by RBBISymbolTable::lookupMatcher()
|
||||
usetNode = exprNode.fLeftChild;
|
||||
fCachedSetLookup = usetNode.fInputSet;
|
||||
retString = ffffString;
|
||||
} else {
|
||||
// The variable refers to something other than just a set.
|
||||
// This is an error in the rules being compiled. $Variables inside of UnicodeSets
|
||||
// must refer only to another set, not to some random non-set expression.
|
||||
// Note: single characters are represented as sets, so they are ok.
|
||||
fRuleScanner.error(RBBIRuleBuilder.U_BRK_MALFORMED_SET);
|
||||
retString = exprNode.fText;
|
||||
fCachedSetLookup = null;
|
||||
}
|
||||
return retString.toCharArray();
|
||||
}
|
||||
|
||||
e = new RBBISymbolTableEntry();
|
||||
e.key = key;
|
||||
e.val = val;
|
||||
fHashTable.put(e.key, e);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
//
|
||||
// RBBISymbolTable::print Debugging function, dump out the symbol table contents.
|
||||
//
|
||||
void rbbiSymtablePrint() {
|
||||
System.out.print("Variable Definitions\n" +
|
||||
"Name Node Val String Val\n" +
|
||||
"----------------------------------------------------------------------\n");
|
||||
|
||||
int pos = -1;
|
||||
RBBISymbolTableEntry [] syms = new RBBISymbolTableEntry[0];
|
||||
Collection t = fHashTable.values();
|
||||
syms = (RBBISymbolTableEntry[]) t.toArray(syms);
|
||||
|
||||
for (int i=0; i<syms.length; i++) {
|
||||
RBBISymbolTableEntry s = syms[i];
|
||||
|
||||
System.out.print(" " + s.key + " "); // TODO: format output into columns.
|
||||
System.out.print(" " + s.val + " ");
|
||||
System.out.print(s.val.fLeftChild.fText);
|
||||
System.out.print("\n");
|
||||
//
|
||||
// RBBISymbolTable::lookupMatcher This function from the abstract symbol table
|
||||
// interface maps a single stand-in character to a
|
||||
// pointer to a Unicode Set. The Unicode Set code uses this
|
||||
// mechanism to get all references to the same $variable
|
||||
// name to refer to a single common Unicode Set instance.
|
||||
//
|
||||
// This implementation cheats a little, and does not maintain a map of stand-in chars
|
||||
// to sets. Instead, it takes advantage of the fact that the UnicodeSet
|
||||
// constructor will always call this function right after calling lookup(),
|
||||
// and we just need to remember what set to return between these two calls.
|
||||
public UnicodeMatcher lookupMatcher(int ch) {
|
||||
UnicodeSet retVal = null;
|
||||
if (ch == 0xffff) {
|
||||
retVal = fCachedSetLookup;
|
||||
fCachedSetLookup = null;
|
||||
}
|
||||
return retVal;
|
||||
}
|
||||
|
||||
System.out.println("\nParsed Variable Definitions\n");
|
||||
pos = -1;
|
||||
for (int i=0; i<syms.length; i++) {
|
||||
RBBISymbolTableEntry s = syms[i];
|
||||
System.out.print(s.key);
|
||||
s.val.fLeftChild.printTree(true);
|
||||
System.out.print("\n");
|
||||
//
|
||||
// RBBISymbolTable::parseReference This function from the abstract symbol table interface
|
||||
// looks for a $variable name in the source text.
|
||||
// It does not look it up, only scans for it.
|
||||
// It is used by the UnicodeSet parser.
|
||||
//
|
||||
public String parseReference(String text, ParsePosition pos, int limit) {
|
||||
int start = pos.getIndex();
|
||||
int i = start;
|
||||
String result = "";
|
||||
while (i < limit) {
|
||||
int c = UTF16.charAt(text, i);
|
||||
if ((i == start && !UCharacter.isUnicodeIdentifierStart(c))
|
||||
|| !UCharacter.isUnicodeIdentifierPart(c)) {
|
||||
break;
|
||||
}
|
||||
i += UTF16.getCharCount(c);
|
||||
}
|
||||
if (i == start) { // No valid name chars
|
||||
return result; // Indicate failure with empty string
|
||||
}
|
||||
pos.setIndex(i);
|
||||
result = text.substring(start, i);
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
//
|
||||
// RBBISymbolTable::lookupNode Given a key (a variable name), return the
|
||||
// corresponding RBBI Node. If there is no entry
|
||||
// in the table for this name, return NULL.
|
||||
//
|
||||
RBBINode lookupNode(String key) {
|
||||
|
||||
RBBINode retNode = null;
|
||||
RBBISymbolTableEntry el;
|
||||
|
||||
el = (RBBISymbolTableEntry) fHashTable.get(key);
|
||||
if (el != null) {
|
||||
retNode = el.val;
|
||||
}
|
||||
return retNode;
|
||||
}
|
||||
|
||||
//
|
||||
// RBBISymbolTable::addEntry Add a new entry to the symbol table.
|
||||
// Indicate an error if the name already exists -
|
||||
// this will only occur in the case of duplicate
|
||||
// variable assignments.
|
||||
//
|
||||
void addEntry(String key, RBBINode val) {
|
||||
RBBISymbolTableEntry e;
|
||||
e = (RBBISymbolTableEntry) fHashTable.get(key);
|
||||
if (e != null) {
|
||||
fRuleScanner.error(RBBIRuleBuilder.U_BRK_VARIABLE_REDFINITION);
|
||||
return;
|
||||
}
|
||||
|
||||
e = new RBBISymbolTableEntry();
|
||||
e.key = key;
|
||||
e.val = val;
|
||||
fHashTable.put(e.key, e);
|
||||
}
|
||||
|
||||
//
|
||||
// RBBISymbolTable::print Debugging function, dump out the symbol table contents.
|
||||
//
|
||||
///CLOVER:OFF
|
||||
void rbbiSymtablePrint() {
|
||||
System.out
|
||||
.print("Variable Definitions\n"
|
||||
+ "Name Node Val String Val\n"
|
||||
+ "----------------------------------------------------------------------\n");
|
||||
|
||||
RBBISymbolTableEntry[] syms = new RBBISymbolTableEntry[0];
|
||||
Collection t = fHashTable.values();
|
||||
syms = (RBBISymbolTableEntry[]) t.toArray(syms);
|
||||
|
||||
for (int i = 0; i < syms.length; i++) {
|
||||
RBBISymbolTableEntry s = syms[i];
|
||||
|
||||
System.out.print(" " + s.key + " "); // TODO: format output into columns.
|
||||
System.out.print(" " + s.val + " ");
|
||||
System.out.print(s.val.fLeftChild.fText);
|
||||
System.out.print("\n");
|
||||
}
|
||||
|
||||
System.out.println("\nParsed Variable Definitions\n");
|
||||
for (int i = 0; i < syms.length; i++) {
|
||||
RBBISymbolTableEntry s = syms[i];
|
||||
System.out.print(s.key);
|
||||
s.val.fLeftChild.printTree(true);
|
||||
System.out.print("\n");
|
||||
}
|
||||
}
|
||||
///CLOVER:ON
|
||||
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue