mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-16 10:17:23 +00:00
ICU-3295 rbbi rt port to Java.
X-SVN-Rev: 14986
This commit is contained in:
parent
c957f85632
commit
77136a5b24
3 changed files with 513 additions and 30 deletions
|
@ -219,7 +219,7 @@ RuleBasedBreakIterator::operator==(const BreakIterator& that) const {
|
|||
const RuleBasedBreakIterator& that2 = (const RuleBasedBreakIterator&) that;
|
||||
if (fText == that2.fText ||
|
||||
(fText != NULL && that2.fText != NULL && *that2.fText == *fText)) {
|
||||
if (that2.fData == fData ||
|
||||
if (that2.fData == fData ||
|
||||
(fData != NULL && that2.fData != NULL && *that2.fData == *fData)) {
|
||||
r = TRUE;
|
||||
}
|
||||
|
@ -475,12 +475,12 @@ int32_t RuleBasedBreakIterator::following(int32_t offset) {
|
|||
// otherwise, set our internal iteration position (temporarily)
|
||||
// to the position passed in. If this is the _beginning_ position,
|
||||
// then we can just use next() to get our return value
|
||||
|
||||
|
||||
int32_t result = 0;
|
||||
|
||||
if (fData->fSafeRevTable != NULL) {
|
||||
// new rule syntax
|
||||
/// todo synwee
|
||||
/// todo synwee
|
||||
fText->setIndex(offset);
|
||||
// move forward one codepoint to prepare for moving back to a
|
||||
// safe point.
|
||||
|
@ -500,9 +500,9 @@ int32_t RuleBasedBreakIterator::following(int32_t offset) {
|
|||
fText->previous32();
|
||||
// handle next will give result >= offset
|
||||
handleNext(fData->fSafeFwdTable);
|
||||
// previous will give result 0 or 1 boundary away from offset,
|
||||
// previous will give result 0 or 1 boundary away from offset,
|
||||
// most of the time
|
||||
// we have to
|
||||
// we have to
|
||||
int32_t oldresult = previous();
|
||||
while (oldresult > offset) {
|
||||
int32_t result = previous();
|
||||
|
@ -584,9 +584,9 @@ int32_t RuleBasedBreakIterator::preceding(int32_t offset) {
|
|||
// handle previous will give result <= offset
|
||||
handlePrevious(fData->fSafeRevTable);
|
||||
|
||||
// next will give result 0 or 1 boundary away from offset,
|
||||
// next will give result 0 or 1 boundary away from offset,
|
||||
// most of the time
|
||||
// we have to
|
||||
// we have to
|
||||
int32_t oldresult = next();
|
||||
while (oldresult < offset) {
|
||||
int32_t result = next();
|
||||
|
@ -779,8 +779,8 @@ int32_t RuleBasedBreakIterator::handleNext(const RBBIStateTable *statetable) {
|
|||
}
|
||||
|
||||
if (row->fLookAhead != 0) {
|
||||
if (lookaheadStatus != 0
|
||||
&& row->fAccepting == lookaheadStatus) {
|
||||
if (lookaheadStatus != 0
|
||||
&& row->fAccepting == lookaheadStatus) {
|
||||
// Lookahead match is completed. Set the result accordingly, but only
|
||||
// if no other rule has matched further in the mean time.
|
||||
result = lookaheadResult;
|
||||
|
@ -817,7 +817,7 @@ continueOn:
|
|||
// We have advanced through the string until it is certain that no
|
||||
// longer match is possible, no matter what characters follow.
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// The state machine is done. Check whether it found a match...
|
||||
|
@ -839,6 +839,13 @@ continueOn:
|
|||
return result;
|
||||
}
|
||||
|
||||
|
||||
//----------------------------------------------------------------
|
||||
//
|
||||
// handlePrevious(void) This is the variant used with old style rules
|
||||
// (Overshoot to a safe point, then move forward)
|
||||
//
|
||||
//----------------------------------------------------------------
|
||||
int32_t RuleBasedBreakIterator::handlePrevious(void) {
|
||||
if (fText == NULL || fData == NULL) {
|
||||
return 0;
|
||||
|
@ -991,10 +998,10 @@ int32_t RuleBasedBreakIterator::handlePrevious(const RBBIStateTable *statetable)
|
|||
int32_t state = START_STATE;
|
||||
int32_t category;
|
||||
int32_t lastCategory = 0;
|
||||
UBool hasPassedStartText = !fText->hasPrevious();
|
||||
UBool hasPassedStartText = !fText->hasPrevious();
|
||||
UChar32 c = fText->previous32();
|
||||
// previous character
|
||||
int32_t result = fText->getIndex();
|
||||
int32_t result = fText->getIndex();
|
||||
int32_t lookaheadStatus = 0;
|
||||
int32_t lookaheadResult = 0;
|
||||
int32_t lookaheadTagIdx = 0;
|
||||
|
@ -1017,7 +1024,7 @@ int32_t RuleBasedBreakIterator::handlePrevious(const RBBIStateTable *statetable)
|
|||
// loop until we reach the beginning of the text or transition to state 0
|
||||
for (;;) {
|
||||
// if (c == CharacterIterator::DONE && fText->hasPrevious()==FALSE) {
|
||||
if (hasPassedStartText) {
|
||||
if (hasPassedStartText) {
|
||||
// if we have already considered the start of the text
|
||||
if (row->fLookAhead != 0 && lookaheadResult == 0) {
|
||||
result = 0;
|
||||
|
@ -1052,7 +1059,7 @@ int32_t RuleBasedBreakIterator::handlePrevious(const RBBIStateTable *statetable)
|
|||
state = row->fNextState[category];
|
||||
row = (RBBIStateTableRow *)
|
||||
(statetable->fTableData + (state * statetable->fRowLen));
|
||||
|
||||
|
||||
if (row->fAccepting == -1) {
|
||||
// Match found, common case, could have lookahead so we move on to check it
|
||||
result = fText->getIndex();
|
||||
|
@ -1061,8 +1068,8 @@ int32_t RuleBasedBreakIterator::handlePrevious(const RBBIStateTable *statetable)
|
|||
}
|
||||
|
||||
if (row->fLookAhead != 0) {
|
||||
if (lookaheadStatus != 0
|
||||
&& row->fAccepting == lookaheadStatus) {
|
||||
if (lookaheadStatus != 0
|
||||
&& row->fAccepting == lookaheadStatus) {
|
||||
// Lookahead match is completed. Set the result accordingly, but only
|
||||
// if no other rule has matched further in the mean time.
|
||||
result = lookaheadResult;
|
||||
|
@ -1085,7 +1092,7 @@ int32_t RuleBasedBreakIterator::handlePrevious(const RBBIStateTable *statetable)
|
|||
}
|
||||
category = lastCategory;
|
||||
fText->setIndex(result);
|
||||
|
||||
|
||||
goto continueOn;
|
||||
}
|
||||
|
||||
|
@ -1105,12 +1112,12 @@ int32_t RuleBasedBreakIterator::handlePrevious(const RBBIStateTable *statetable)
|
|||
lookaheadStatus = 0; // clear out any pending look-ahead matches.
|
||||
|
||||
continueOn:
|
||||
if (state == STOP_STATE) {
|
||||
if (state == STOP_STATE) {
|
||||
break;
|
||||
}
|
||||
|
||||
// then advance one character backwards
|
||||
hasPassedStartText = !fText->hasPrevious();
|
||||
hasPassedStartText = !fText->hasPrevious();
|
||||
c = fText->previous32();
|
||||
}
|
||||
|
||||
|
@ -1186,7 +1193,7 @@ int32_t RuleBasedBreakIterator::getRuleStatus() const {
|
|||
|
||||
|
||||
int32_t RuleBasedBreakIterator::getRuleStatusVec(
|
||||
int32_t *fillInVec, int32_t capacity, UErrorCode &status)
|
||||
int32_t *fillInVec, int32_t capacity, UErrorCode &status)
|
||||
{
|
||||
if (U_FAILURE(status)) {
|
||||
return 0;
|
||||
|
@ -1197,7 +1204,7 @@ int32_t RuleBasedBreakIterator::getRuleStatusVec(
|
|||
int32_t numVals = fData->fRuleStatusTable[fLastRuleStatusIndex];
|
||||
int32_t numValsToCopy = numVals;
|
||||
if (numVals > capacity) {
|
||||
status = U_BUFFER_OVERFLOW_ERROR;
|
||||
status = U_BUFFER_OVERFLOW_ERROR;
|
||||
numValsToCopy = capacity;
|
||||
}
|
||||
int i;
|
||||
|
|
284
icu4j/src/com/ibm/icu/text/RBBIDataWrapper.java
Normal file
284
icu4j/src/com/ibm/icu/text/RBBIDataWrapper.java
Normal file
|
@ -0,0 +1,284 @@
|
|||
/**
|
||||
*******************************************************************************
|
||||
* Copyright (C) 1996-2003, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
package com.ibm.icu.text;
|
||||
|
||||
import java.io.InputStream;
|
||||
import java.io.BufferedInputStream;
|
||||
import java.io.DataInputStream;
|
||||
import java.io.IOException;
|
||||
import java.util.Locale;
|
||||
|
||||
import com.ibm.icu.util.RangeValueIterator;
|
||||
import com.ibm.icu.util.VersionInfo;
|
||||
import com.ibm.icu.lang.UCharacter;
|
||||
import com.ibm.icu.lang.UCharacterCategory;
|
||||
import com.ibm.icu.lang.UProperty;
|
||||
import com.ibm.icu.impl.ICUData;
|
||||
import com.ibm.icu.impl.Trie;
|
||||
import com.ibm.icu.impl.CharTrie;
|
||||
|
||||
/**
|
||||
* <p>Internal class used for Rule Based Break Iterators</p>
|
||||
* <p>This class provides access to the compiled break rule data, as
|
||||
* it is stored in a .brk file.
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
public class RBBIDataWrapper {
|
||||
//
|
||||
// These fields are the ready-to-use compiled rule data, as
|
||||
// read from the file.
|
||||
//
|
||||
public RBBIDataHeader fHeader;
|
||||
public short fFTable[];
|
||||
public short fRTable[];
|
||||
public short fSFTable[];
|
||||
public short fSRTable[];
|
||||
public CharTrie fTrie;
|
||||
public String fRuleSource;
|
||||
public int fStatusTable[];
|
||||
|
||||
// Data Header. A struct-like class with the fields from the RBBI data file header.
|
||||
//
|
||||
static class RBBIDataHeader {
|
||||
int fMagic; // == 0xbla0
|
||||
int fVersion; // == 1
|
||||
int fLength; // Total length in bytes of this RBBI Data,
|
||||
// including all sections, not just the header.
|
||||
int fCatCount; // Number of character categories.
|
||||
|
||||
//
|
||||
// Offsets and sizes of each of the subsections within the RBBI data.
|
||||
// All offsets are bytes from the start of the RBBIDataHeader.
|
||||
// All sizes are in bytes.
|
||||
//
|
||||
int fFTable; // forward state transition table.
|
||||
int fFTableLen;
|
||||
int fRTable; // Offset to the reverse state transition table.
|
||||
int fRTableLen;
|
||||
int fSFTable; // safe point forward transition table
|
||||
int fSFTableLen;
|
||||
int fSRTable; // safe point reverse transition table
|
||||
int fSRTableLen;
|
||||
int fTrie; // Offset to Trie data for character categories
|
||||
int fTrieLen;
|
||||
int fRuleSource; // Offset to the source for for the break
|
||||
int fRuleSourceLen; // rules. Stored UChar *.
|
||||
int fStatusTable; // Offset to the table of rule status values
|
||||
int fStatusTableLen;
|
||||
|
||||
public RBBIDataHeader() {
|
||||
fMagic = 0;
|
||||
};
|
||||
};
|
||||
|
||||
static class TrieFoldingFunc implements Trie.DataManipulate {
|
||||
public int getFoldingOffset(int data) {
|
||||
if ((data & 0x8000) == 0) {
|
||||
return data & 0x7fff;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
};
|
||||
static TrieFoldingFunc fTrieFoldingFunc;
|
||||
|
||||
|
||||
RBBIDataWrapper() {
|
||||
};
|
||||
|
||||
static RBBIDataWrapper get(String name) throws IOException {
|
||||
String fullName = "data/" + name;
|
||||
InputStream is = ICUData.getRequiredStream(fullName);
|
||||
return get(is);
|
||||
}
|
||||
|
||||
/*
|
||||
* Get an RBBIDataWrapper from an InputStream onto a pre-compiled set
|
||||
* of RBBI rules.
|
||||
*/
|
||||
static RBBIDataWrapper get(InputStream is) throws IOException {
|
||||
int i;
|
||||
|
||||
DataInputStream dis = new DataInputStream(is);
|
||||
RBBIDataWrapper This = new RBBIDataWrapper();
|
||||
|
||||
// Seek past the ICU data header.
|
||||
// TODO: verify that it looks good.
|
||||
dis.skip(0x80);
|
||||
|
||||
// Read in the RBBI data header...
|
||||
This.fHeader = new RBBIDataHeader();
|
||||
This.fHeader.fMagic = dis.readInt();
|
||||
This.fHeader.fVersion = dis.readInt();
|
||||
This.fHeader.fLength = dis.readInt();
|
||||
This.fHeader.fCatCount = dis.readInt();
|
||||
This.fHeader.fFTable = dis.readInt();
|
||||
This.fHeader.fFTableLen = dis.readInt();
|
||||
This.fHeader.fRTable = dis.readInt();
|
||||
This.fHeader.fRTableLen = dis.readInt();
|
||||
This.fHeader.fSFTable = dis.readInt();
|
||||
This.fHeader.fSFTableLen = dis.readInt();
|
||||
This.fHeader.fSRTable = dis.readInt();
|
||||
This.fHeader.fSRTableLen = dis.readInt();
|
||||
This.fHeader.fTrie = dis.readInt();
|
||||
This.fHeader.fTrieLen = dis.readInt();
|
||||
This.fHeader.fRuleSource = dis.readInt();
|
||||
This.fHeader.fRuleSourceLen = dis.readInt();
|
||||
This.fHeader.fStatusTable = dis.readInt();
|
||||
This.fHeader.fStatusTableLen = dis.readInt();
|
||||
dis.skip(6 * 4); // uint32_t fReserved[6];
|
||||
|
||||
|
||||
if (This.fHeader.fMagic != 0xb1a0) {
|
||||
throw new IOException("Break Iterator Rule Data Magic Number Incorrect");
|
||||
}
|
||||
|
||||
// Current position in input stream.
|
||||
int pos = 24 * 4; // offset of end of header, which has 24 fields, all int32_t (4 bytes)
|
||||
|
||||
//
|
||||
// Read in the Forward state transition table as an array of shorts.
|
||||
//
|
||||
|
||||
// Quick Sanity Check
|
||||
if (This.fHeader.fFTable < pos || This.fHeader.fFTable > This.fHeader.fLength) {
|
||||
throw new IOException("Break iterator Rule data corrupt");
|
||||
}
|
||||
|
||||
// Skip over any padding preceding this table
|
||||
dis.skip(This.fHeader.fFTable - pos);
|
||||
pos = This.fHeader.fFTable;
|
||||
|
||||
This.fFTable = new short[This.fHeader.fFTableLen / 2];
|
||||
for ( i=0; i<This.fFTable.length; i++) {
|
||||
This.fFTable[i] = dis.readShort();
|
||||
pos += 2;
|
||||
}
|
||||
|
||||
//
|
||||
// Read in the Reverse state table
|
||||
//
|
||||
|
||||
// Skip over any padding in the file
|
||||
dis.skip(This.fHeader.fRTable - pos);
|
||||
pos = This.fHeader.fRTable;
|
||||
|
||||
// Create & fill the table itself.
|
||||
This.fRTable = new short[This.fHeader.fRTableLen / 2];
|
||||
for (i=0; i<This.fRTable.length; i++) {
|
||||
This.fRTable[i] = dis.readShort();
|
||||
pos += 2;
|
||||
}
|
||||
|
||||
//
|
||||
// Read in the Safe Forward state table
|
||||
//
|
||||
if (This.fHeader.fSFTableLen > 0) {
|
||||
// Skip over any padding in the file
|
||||
dis.skip(This.fHeader.fSFTable - pos);
|
||||
pos = This.fHeader.fSFTable;
|
||||
|
||||
// Create & fill the table itself.
|
||||
This.fSFTable = new short[This.fHeader.fSFTableLen / 2];
|
||||
for (i=0; i<This.fSFTable.length; i++) {
|
||||
This.fSFTable[i] = dis.readShort();
|
||||
pos += 2;
|
||||
}
|
||||
}
|
||||
|
||||
//
|
||||
// Read in the Safe Reverse state table
|
||||
//
|
||||
if (This.fHeader.fSRTableLen > 0) {
|
||||
// Skip over any padding in the file
|
||||
dis.skip(This.fHeader.fSRTable - pos);
|
||||
pos = This.fHeader.fSRTable;
|
||||
|
||||
// Create & fill the table itself.
|
||||
This.fSRTable = new short[This.fHeader.fSRTableLen / 2];
|
||||
for (i=0; i<This.fSRTable.length; i++) {
|
||||
This.fSRTable[i] = dis.readShort();
|
||||
pos += 2;
|
||||
}
|
||||
}
|
||||
|
||||
//
|
||||
// Unserialize the Character categories TRIE
|
||||
// Because we can't be absolutely certain where the Trie deserialize will
|
||||
// leave the input stream, leave position unchanged.
|
||||
// The seek to the start of the next item following the TRIE will get us
|
||||
// back in sync.
|
||||
//
|
||||
dis.skip(This.fHeader.fTrie - pos);
|
||||
pos = This.fHeader.fTrie;
|
||||
dis.mark(This.fHeader.fTrieLen+100);
|
||||
This.fTrie = new CharTrie(dis, fTrieFoldingFunc);
|
||||
dis.reset();
|
||||
|
||||
//
|
||||
// Read the Rule Status Table
|
||||
//
|
||||
if (pos > This.fHeader.fStatusTable) {
|
||||
throw new IOException("Break iterator Rule data corrupt");
|
||||
}
|
||||
dis.skip(This.fHeader.fStatusTable - pos);
|
||||
pos = This.fHeader.fStatusTable;
|
||||
This.fStatusTable = new int[This.fHeader.fStatusTableLen / 4];
|
||||
for (i=0; i<This.fStatusTable.length; i++) {
|
||||
This.fStatusTable[i] = dis.readInt();
|
||||
pos += 4;
|
||||
}
|
||||
|
||||
//
|
||||
// Put the break rule source into a String
|
||||
//
|
||||
if (pos > This.fHeader.fRuleSource) {
|
||||
throw new IOException("Break iterator Rule data corrupt");
|
||||
}
|
||||
dis.skip(This.fHeader.fRuleSource - pos);
|
||||
pos = This.fHeader.fRuleSource;
|
||||
StringBuffer sb = new StringBuffer(This.fHeader.fRuleSourceLen / 2);
|
||||
for (i=0; i<This.fHeader.fRuleSourceLen; i+=2) {
|
||||
sb.append(dis.readChar());
|
||||
pos += 2;
|
||||
}
|
||||
This.fRuleSource = sb.toString();
|
||||
|
||||
|
||||
return This;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/** Debug function to display the break iterator data. */
|
||||
void dump() {
|
||||
System.out.println("RBBI Data Wrapper dump ...");
|
||||
System.out.println("Source Rules: " + fRuleSource);
|
||||
}
|
||||
|
||||
public static void main(String[] args) {
|
||||
String s;
|
||||
if (args.length == 0) {
|
||||
s = "icudt28b_char.brk";
|
||||
} else {
|
||||
s = args[0];
|
||||
}
|
||||
System.out.println("RBBIDataWrapper.main(" + s + ") ");
|
||||
try {
|
||||
RBBIDataWrapper This = RBBIDataWrapper.get(s);
|
||||
This.dump();
|
||||
}
|
||||
catch (Exception e) {
|
||||
System.out.println("Exception: " + e.toString());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
|
@ -16,6 +16,42 @@ import java.text.StringCharacterIterator;
|
|||
* Window - Preferences - Java - Code Generation - Code and Comments
|
||||
*/
|
||||
public class RuleBasedBreakIterator_New extends RuleBasedBreakIterator {
|
||||
|
||||
private static final int START_STATE = 1; // The state number of the starting state
|
||||
private static final int STOP_STATE = 0; // The state-transition value indicating "stop"
|
||||
|
||||
/**
|
||||
* The character iterator through which this BreakIterator accesses the text
|
||||
* @internal
|
||||
*/
|
||||
private CharacterIterator fText;
|
||||
|
||||
/**
|
||||
* The rule data for this BreakIterator instance
|
||||
* @internal
|
||||
*/
|
||||
private RBBIDataWrapper fData;
|
||||
|
||||
/** Index of the Rule {tag} values for the most recent match.
|
||||
* @internal
|
||||
*/
|
||||
private int fLastRuleStatusIndex;
|
||||
|
||||
/**
|
||||
* Rule tag value valid flag.
|
||||
* Some iterator operations don't intrinsically set the correct tag value.
|
||||
* This flag lets us lazily compute the value if we are ever asked for it.
|
||||
* @internal
|
||||
*/
|
||||
private boolean fLastStatusIndexValid;
|
||||
|
||||
/**
|
||||
* Debugging flag. Trace operation of state machine when true.
|
||||
* @internal
|
||||
*/
|
||||
public static boolean fTrace;
|
||||
|
||||
|
||||
//=======================================================================
|
||||
// boilerplate
|
||||
//=======================================================================
|
||||
|
@ -42,11 +78,16 @@ public class RuleBasedBreakIterator_New extends RuleBasedBreakIterator {
|
|||
}
|
||||
|
||||
/**
|
||||
* Returns the description used to create this iterator
|
||||
* Returns the description (rules) used to create this iterator.
|
||||
* (In ICU4C, the same function is RuleBasedBreakIterator::getRules())
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
public String toString() {
|
||||
return ""; // TODO:
|
||||
String retStr = null;
|
||||
if (fData != null) {
|
||||
retStr = fData.fRuleSource;
|
||||
}
|
||||
return retStr;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -70,8 +111,16 @@ public class RuleBasedBreakIterator_New extends RuleBasedBreakIterator {
|
|||
* @stable ICU 2.0
|
||||
*/
|
||||
public int first() {
|
||||
return 0; // TODO;
|
||||
fLastRuleStatusIndex = 0;
|
||||
fLastStatusIndexValid = true;
|
||||
if (fText == null) {
|
||||
return BreakIterator.DONE;
|
||||
}
|
||||
fText.first();
|
||||
return fText.getIndex();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Sets the current iteration position to the end of the text.
|
||||
* (i.e., the CharacterIterator's ending offset).
|
||||
|
@ -79,8 +128,26 @@ public class RuleBasedBreakIterator_New extends RuleBasedBreakIterator {
|
|||
* @stable ICU 2.0
|
||||
*/
|
||||
public int last() {
|
||||
return 0; // TODO:
|
||||
if (fText == null) {
|
||||
fLastRuleStatusIndex = 0;
|
||||
fLastStatusIndexValid = true;
|
||||
return BreakIterator.DONE;
|
||||
}
|
||||
|
||||
// I'm not sure why, but t.last() returns the offset of the last character,
|
||||
// rather than the past-the-end offset
|
||||
//
|
||||
// (It's so a loop like for(p=it.last(); p!=DONE; p=it.previous()) ...
|
||||
// will work correctly.)
|
||||
|
||||
|
||||
fLastStatusIndexValid = false;
|
||||
int pos = fText.getEndIndex();
|
||||
fText.setIndex(pos);
|
||||
return pos;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Advances the iterator either forward or backward the specified number of steps.
|
||||
* Negative values move backward, and positive values move forward. This is
|
||||
|
@ -92,23 +159,88 @@ public class RuleBasedBreakIterator_New extends RuleBasedBreakIterator {
|
|||
* @stable ICU 2.0
|
||||
*/
|
||||
public int next(int n) {
|
||||
return 0; // TODO:
|
||||
int result = current();
|
||||
while (n > 0) {
|
||||
result = handleNext();
|
||||
--n;
|
||||
}
|
||||
while (n < 0) {
|
||||
result = previous();
|
||||
++n;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Advances the iterator to the next boundary position.
|
||||
* @return The position of the first boundary after this one.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
public int next() {
|
||||
return 0; // TODO:
|
||||
return handleNext();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Advances the iterator backwards, to the last boundary preceding this one.
|
||||
* Moves the iterator backwards, to the last boundary preceding this one.
|
||||
* @return The position of the last boundary position preceding this one.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
public int previous() {
|
||||
return 0; // TODO:
|
||||
// if we're already sitting at the beginning of the text, return DONE
|
||||
if (fText == null || current() == fText.getBeginIndex()) {
|
||||
fLastRuleStatusIndex = 0;
|
||||
fLastStatusIndexValid = true;
|
||||
return BreakIterator.DONE;
|
||||
}
|
||||
|
||||
if (fData.fSRTable != null || fData.fSFTable != null) {
|
||||
return handlePrevious(fData.fRTable);
|
||||
}
|
||||
|
||||
// old rule syntax
|
||||
// set things up. handlePrevious() will back us up to some valid
|
||||
// break position before the current position (we back our internal
|
||||
// iterator up one step to prevent handlePrevious() from returning
|
||||
// the current position), but not necessarily the last one before
|
||||
// where we started
|
||||
|
||||
int start = current();
|
||||
|
||||
CIPrevious32(fText);
|
||||
int lastResult = handlePrevious();
|
||||
int result = lastResult;
|
||||
int lastTag = 0;
|
||||
boolean breakTagValid = false;
|
||||
|
||||
// iterate forward from the known break position until we pass our
|
||||
// starting point. The last break position before the starting
|
||||
// point is our return value
|
||||
|
||||
for (;;) {
|
||||
result = handleNext();
|
||||
if (result == BreakIterator.DONE || result >= start) {
|
||||
break;
|
||||
}
|
||||
lastResult = result;
|
||||
lastTag = fLastRuleStatusIndex;
|
||||
breakTagValid = true;
|
||||
}
|
||||
|
||||
// fLastBreakTag wants to have the value for section of text preceding
|
||||
// the result position that we are to return (in lastResult.) If
|
||||
// the backwards rules overshot and the above loop had to do two or more
|
||||
// handleNext()s to move up to the desired return position, we will have a valid
|
||||
// tag value. But, if handlePrevious() took us to exactly the correct result positon,
|
||||
// we wont have a tag value for that position, which is only set by handleNext().
|
||||
|
||||
// set the current iteration position to be the last break position
|
||||
// before where we started, and then return that value
|
||||
fText.setIndex(lastResult);
|
||||
fLastRuleStatusIndex = lastTag; // for use by getRuleStatus()
|
||||
fLastStatusIndexValid = breakTagValid;
|
||||
return lastResult;
|
||||
}
|
||||
/**
|
||||
* Sets the iterator to refer to the first boundary position following
|
||||
|
@ -218,7 +350,7 @@ public int getRuleStatusVec(int[] fillInArray) {
|
|||
* @stable ICU 2.0
|
||||
*/
|
||||
public CharacterIterator getText() {
|
||||
return new StringCharacterIterator("");
|
||||
return fText;
|
||||
}
|
||||
|
||||
|
||||
|
@ -229,6 +361,66 @@ public int getRuleStatusVec(int[] fillInArray) {
|
|||
* @stable ICU 2.0
|
||||
*/
|
||||
public void setText(CharacterIterator newText) {
|
||||
fText = newText;
|
||||
this.first();
|
||||
}
|
||||
|
||||
|
||||
private static int CINext32(CharacterIterator ci) {
|
||||
int retVal = 0;
|
||||
char cLead = ci.next();
|
||||
retVal = (int)cLead;
|
||||
if (UTF16.isLeadSurrogate(cLead)) {
|
||||
char cTrail = ci.next();
|
||||
if (UTF16.isTrailSurrogate(cTrail)) {
|
||||
retVal = ((int)cLead - UTF16.LEAD_SURROGATE_MIN_VALUE) << 10 +
|
||||
((int)cTrail - UTF16.TRAIL_SURROGATE_MIN_VALUE);
|
||||
} else {
|
||||
ci.previous();
|
||||
}
|
||||
}
|
||||
return retVal;
|
||||
}
|
||||
|
||||
private static int CIPrevious32(CharacterIterator ci) {
|
||||
int retVal = 0;
|
||||
char cTrail = ci.previous();
|
||||
retVal = (int)cTrail;
|
||||
if (UTF16.isTrailSurrogate(cTrail)) {
|
||||
char cLead = ci.previous();
|
||||
if (UTF16.isLeadSurrogate(cLead)) {
|
||||
retVal = ((int)cLead - UTF16.LEAD_SURROGATE_MIN_VALUE) << 10 +
|
||||
((int)cTrail - UTF16.TRAIL_SURROGATE_MIN_VALUE);
|
||||
} else {
|
||||
ci.next();
|
||||
}
|
||||
}
|
||||
return retVal;
|
||||
}
|
||||
/**
|
||||
* Internal implementation of next() for RBBI.
|
||||
* @internal
|
||||
*/
|
||||
private int handleNext() {
|
||||
// TODO:
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
private int handlePrevious() {
|
||||
// TODO:
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
private int handlePrevious(short statetable[]) {
|
||||
// TODO:
|
||||
return 0;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue