mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-20 20:19:32 +00:00
ICU-12507 rbbi, switch impl from UTrie to UTrie2.
X-SVN-Rev: 40270
This commit is contained in:
commit
1292197198
12 changed files with 138 additions and 277 deletions
|
@ -1078,7 +1078,7 @@ int32_t RuleBasedBreakIterator::handleNext(const RBBIStateTable *statetable) {
|
|||
// Note: the 16 in UTRIE_GET16 refers to the size of the data being returned,
|
||||
// not the size of the character going in, which is a UChar32.
|
||||
//
|
||||
UTRIE_GET16(&fData->fTrie, c, category);
|
||||
category = UTRIE2_GET16(fData->fTrie, c);
|
||||
|
||||
// Check the dictionary bit in the character's category.
|
||||
// Counter is only used by dictionary based iterators (subclasses).
|
||||
|
@ -1275,7 +1275,7 @@ int32_t RuleBasedBreakIterator::handlePrevious(const RBBIStateTable *statetable)
|
|||
// Note: the 16 in UTRIE_GET16 refers to the size of the data being returned,
|
||||
// not the size of the character going in, which is a UChar32.
|
||||
//
|
||||
UTRIE_GET16(&fData->fTrie, c, category);
|
||||
category = UTRIE2_GET16(fData->fTrie, c);
|
||||
|
||||
// Check the dictionary bit in the character's category.
|
||||
// Counter is only used by dictionary based iterators (subclasses).
|
||||
|
@ -1510,26 +1510,6 @@ BreakIterator * RuleBasedBreakIterator::createBufferClone(void * /*stackBuffer*
|
|||
}
|
||||
|
||||
|
||||
//-------------------------------------------------------------------------------
|
||||
//
|
||||
// isDictionaryChar Return true if the category lookup for this char
|
||||
// indicates that it is in the set of dictionary lookup
|
||||
// chars.
|
||||
//
|
||||
// This function is intended for use by dictionary based
|
||||
// break iterators.
|
||||
//
|
||||
//-------------------------------------------------------------------------------
|
||||
/*UBool RuleBasedBreakIterator::isDictionaryChar(UChar32 c) {
|
||||
if (fData == NULL) {
|
||||
return FALSE;
|
||||
}
|
||||
uint16_t category;
|
||||
UTRIE_GET16(&fData->fTrie, c, category);
|
||||
return (category & 0x4000) != 0;
|
||||
}*/
|
||||
|
||||
|
||||
//-------------------------------------------------------------------------------
|
||||
//
|
||||
// checkDictionary This function handles all processing of characters in
|
||||
|
@ -1569,7 +1549,7 @@ int32_t RuleBasedBreakIterator::checkDictionary(int32_t startPos,
|
|||
int32_t foundBreakCount = 0;
|
||||
UChar32 c = utext_current32(fText);
|
||||
|
||||
UTRIE_GET16(&fData->fTrie, c, category);
|
||||
category = UTRIE2_GET16(fData->fTrie, c);
|
||||
|
||||
// Is the character we're starting on a dictionary character? If so, we
|
||||
// need to back up to include the entire run; otherwise the results of
|
||||
|
@ -1581,7 +1561,7 @@ int32_t RuleBasedBreakIterator::checkDictionary(int32_t startPos,
|
|||
do {
|
||||
utext_next32(fText); // TODO: recast to work directly with postincrement.
|
||||
c = utext_current32(fText);
|
||||
UTRIE_GET16(&fData->fTrie, c, category);
|
||||
category = UTRIE2_GET16(fData->fTrie, c);
|
||||
} while (c != U_SENTINEL && (category & 0x4000));
|
||||
// Back up to the last dictionary character
|
||||
rangeEnd = (int32_t)UTEXT_GETNATIVEINDEX(fText);
|
||||
|
@ -1597,7 +1577,7 @@ int32_t RuleBasedBreakIterator::checkDictionary(int32_t startPos,
|
|||
else {
|
||||
do {
|
||||
c = UTEXT_PREVIOUS32(fText);
|
||||
UTRIE_GET16(&fData->fTrie, c, category);
|
||||
category = UTRIE2_GET16(fData->fTrie, c);
|
||||
}
|
||||
while (c != U_SENTINEL && (category & 0x4000));
|
||||
// Back up to the last dictionary character
|
||||
|
@ -1611,7 +1591,7 @@ int32_t RuleBasedBreakIterator::checkDictionary(int32_t startPos,
|
|||
}
|
||||
rangeStart = (int32_t)UTEXT_GETNATIVEINDEX(fText);;
|
||||
}
|
||||
UTRIE_GET16(&fData->fTrie, c, category);
|
||||
category = UTRIE2_GET16(fData->fTrie, c);
|
||||
}
|
||||
|
||||
// Loop through the text, looking for ranges of dictionary characters.
|
||||
|
@ -1622,13 +1602,13 @@ int32_t RuleBasedBreakIterator::checkDictionary(int32_t startPos,
|
|||
if (reverse) {
|
||||
utext_setNativeIndex(fText, rangeStart);
|
||||
c = utext_current32(fText);
|
||||
UTRIE_GET16(&fData->fTrie, c, category);
|
||||
category = UTRIE2_GET16(fData->fTrie, c);
|
||||
}
|
||||
while(U_SUCCESS(status)) {
|
||||
while((current = (int32_t)UTEXT_GETNATIVEINDEX(fText)) < rangeEnd && (category & 0x4000) == 0) {
|
||||
utext_next32(fText); // TODO: tweak for post-increment operation
|
||||
c = utext_current32(fText);
|
||||
UTRIE_GET16(&fData->fTrie, c, category);
|
||||
category = UTRIE2_GET16(fData->fTrie, c);
|
||||
}
|
||||
if (current >= rangeEnd) {
|
||||
break;
|
||||
|
@ -1646,7 +1626,7 @@ int32_t RuleBasedBreakIterator::checkDictionary(int32_t startPos,
|
|||
|
||||
// Reload the loop variables for the next go-round
|
||||
c = utext_current32(fText);
|
||||
UTRIE_GET16(&fData->fTrie, c, category);
|
||||
category = UTRIE2_GET16(fData->fTrie, c);
|
||||
}
|
||||
|
||||
// If we found breaks, build a new break cache. The first and last entries must
|
||||
|
|
|
@ -23,23 +23,6 @@
|
|||
#include "uassert.h"
|
||||
|
||||
|
||||
//-----------------------------------------------------------------------------------
|
||||
//
|
||||
// Trie access folding function. Copied as-is from properties code in uchar.c
|
||||
//
|
||||
//-----------------------------------------------------------------------------------
|
||||
U_CDECL_BEGIN
|
||||
static int32_t U_CALLCONV
|
||||
getFoldingOffset(uint32_t data) {
|
||||
/* if bit 15 is set, then the folding offset is in bits 14..0 of the 16-bit trie result */
|
||||
if(data&0x8000) {
|
||||
return (int32_t)(data&0x7fff);
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
U_CDECL_END
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
|
@ -71,9 +54,8 @@ RBBIDataWrapper::RBBIDataWrapper(UDataMemory* udm, UErrorCode &status) {
|
|||
dh->info.dataFormat[0] == 0x42 && // dataFormat="Brk "
|
||||
dh->info.dataFormat[1] == 0x72 &&
|
||||
dh->info.dataFormat[2] == 0x6b &&
|
||||
dh->info.dataFormat[3] == 0x20)
|
||||
// Note: info.fFormatVersion is duplicated in the RBBIDataHeader, and is
|
||||
// validated when checking that.
|
||||
dh->info.dataFormat[3] == 0x20 &&
|
||||
isDataVersionAcceptable(dh->info.formatVersion))
|
||||
) {
|
||||
status = U_INVALID_FORMAT_ERROR;
|
||||
return;
|
||||
|
@ -84,6 +66,11 @@ RBBIDataWrapper::RBBIDataWrapper(UDataMemory* udm, UErrorCode &status) {
|
|||
fUDataMem = udm;
|
||||
}
|
||||
|
||||
UBool RBBIDataWrapper::isDataVersionAcceptable(const UVersionInfo version) {
|
||||
return RBBI_DATA_FORMAT_VERSION[0] == version[0];
|
||||
}
|
||||
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
//
|
||||
// init(). Does most of the work of construction, shared between the
|
||||
|
@ -98,6 +85,7 @@ void RBBIDataWrapper::init0() {
|
|||
fSafeRevTable = NULL;
|
||||
fRuleSource = NULL;
|
||||
fRuleStatusTable = NULL;
|
||||
fTrie = NULL;
|
||||
fUDataMem = NULL;
|
||||
fRefCount = 0;
|
||||
fDontFreeData = TRUE;
|
||||
|
@ -108,8 +96,7 @@ void RBBIDataWrapper::init(const RBBIDataHeader *data, UErrorCode &status) {
|
|||
return;
|
||||
}
|
||||
fHeader = data;
|
||||
if (fHeader->fMagic != 0xb1a0 || fHeader->fFormatVersion[0] != 3)
|
||||
{
|
||||
if (fHeader->fMagic != 0xb1a0 || !isDataVersionAcceptable(fHeader->fFormatVersion)) {
|
||||
status = U_INVALID_FORMAT_ERROR;
|
||||
return;
|
||||
}
|
||||
|
@ -132,15 +119,14 @@ void RBBIDataWrapper::init(const RBBIDataHeader *data, UErrorCode &status) {
|
|||
}
|
||||
|
||||
|
||||
utrie_unserialize(&fTrie,
|
||||
(uint8_t *)data + fHeader->fTrie,
|
||||
fHeader->fTrieLen,
|
||||
&status);
|
||||
fTrie = utrie2_openFromSerialized(UTRIE2_16_VALUE_BITS,
|
||||
(uint8_t *)data + fHeader->fTrie,
|
||||
fHeader->fTrieLen,
|
||||
NULL, // *actual length
|
||||
&status);
|
||||
if (U_FAILURE(status)) {
|
||||
return;
|
||||
}
|
||||
fTrie.getFoldingOffset=getFoldingOffset;
|
||||
|
||||
|
||||
fRuleSource = (UChar *)((char *)data + fHeader->fRuleSource);
|
||||
fRuleString.setTo(TRUE, fRuleSource, -1);
|
||||
|
@ -165,6 +151,8 @@ void RBBIDataWrapper::init(const RBBIDataHeader *data, UErrorCode &status) {
|
|||
//-----------------------------------------------------------------------------
|
||||
RBBIDataWrapper::~RBBIDataWrapper() {
|
||||
U_ASSERT(fRefCount == 0);
|
||||
utrie2_close(fTrie);
|
||||
fTrie = NULL;
|
||||
if (fUDataMem) {
|
||||
udata_close(fUDataMem);
|
||||
} else if (!fDontFreeData) {
|
||||
|
@ -323,7 +311,7 @@ ubrk_swap(const UDataSwapper *ds, const void *inData, int32_t length, void *outD
|
|||
pInfo->dataFormat[1]==0x72 &&
|
||||
pInfo->dataFormat[2]==0x6b &&
|
||||
pInfo->dataFormat[3]==0x20 &&
|
||||
pInfo->formatVersion[0]==3 )) {
|
||||
RBBIDataWrapper::isDataVersionAcceptable(pInfo->formatVersion) )) {
|
||||
udata_printError(ds, "ubrk_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized\n",
|
||||
pInfo->dataFormat[0], pInfo->dataFormat[1],
|
||||
pInfo->dataFormat[2], pInfo->dataFormat[3],
|
||||
|
@ -344,17 +332,11 @@ ubrk_swap(const UDataSwapper *ds, const void *inData, int32_t length, void *outD
|
|||
//
|
||||
// Get the RRBI Data Header, and check that it appears to be OK.
|
||||
//
|
||||
// Note: ICU 3.2 and earlier, RBBIDataHeader::fDataFormat was actually
|
||||
// an int32_t with a value of 1. Starting with ICU 3.4,
|
||||
// RBBI's fDataFormat matches the dataFormat field from the
|
||||
// UDataInfo header, four int8_t bytes. The value is {3,1,0,0}
|
||||
//
|
||||
const uint8_t *inBytes =(const uint8_t *)inData+headerSize;
|
||||
RBBIDataHeader *rbbiDH = (RBBIDataHeader *)inBytes;
|
||||
if (ds->readUInt32(rbbiDH->fMagic) != 0xb1a0 ||
|
||||
rbbiDH->fFormatVersion[0] != 3 ||
|
||||
ds->readUInt32(rbbiDH->fLength) < sizeof(RBBIDataHeader))
|
||||
{
|
||||
!RBBIDataWrapper::isDataVersionAcceptable(rbbiDH->fFormatVersion) ||
|
||||
ds->readUInt32(rbbiDH->fLength) < sizeof(RBBIDataHeader)) {
|
||||
udata_printError(ds, "ubrk_swap(): RBBI Data header is invalid.\n");
|
||||
*status=U_UNSUPPORTED_ERROR;
|
||||
return 0;
|
||||
|
@ -451,8 +433,8 @@ ubrk_swap(const UDataSwapper *ds, const void *inData, int32_t length, void *outD
|
|||
}
|
||||
|
||||
// Trie table for character categories
|
||||
utrie_swap(ds, inBytes+ds->readUInt32(rbbiDH->fTrie), ds->readUInt32(rbbiDH->fTrieLen),
|
||||
outBytes+ds->readUInt32(rbbiDH->fTrie), status);
|
||||
utrie2_swap(ds, inBytes+ds->readUInt32(rbbiDH->fTrie), ds->readUInt32(rbbiDH->fTrieLen),
|
||||
outBytes+ds->readUInt32(rbbiDH->fTrie), status);
|
||||
|
||||
// Source Rules Text. It's UChar data
|
||||
ds->swapArray16(ds, inBytes+ds->readUInt32(rbbiDH->fRuleSource), ds->readUInt32(rbbiDH->fRuleSourceLen),
|
||||
|
|
|
@ -51,22 +51,23 @@ ubrk_swap(const UDataSwapper *ds,
|
|||
|
||||
#include "unicode/uobject.h"
|
||||
#include "unicode/unistr.h"
|
||||
#include "unicode/uversion.h"
|
||||
#include "umutex.h"
|
||||
#include "utrie.h"
|
||||
#include "utrie2.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
// The current RBBI data format version.
|
||||
static const uint8_t RBBI_DATA_FORMAT_VERSION[] = {4, 0, 0, 0};
|
||||
|
||||
/*
|
||||
* The following structs map exactly onto the raw data from ICU common data file.
|
||||
*/
|
||||
struct RBBIDataHeader {
|
||||
uint32_t fMagic; /* == 0xbla0 */
|
||||
uint8_t fFormatVersion[4]; /* Data Format. Same as the value in struct UDataInfo */
|
||||
UVersionInfo fFormatVersion; /* Data Format. Same as the value in struct UDataInfo */
|
||||
/* if there is one associated with this data. */
|
||||
/* (version originates in rbbi, is copied to UDataInfo) */
|
||||
/* For ICU 3.2 and earlier, this field was */
|
||||
/* uint32_t fVersion */
|
||||
/* with a value of 1. */
|
||||
uint32_t fLength; /* Total length in bytes of this RBBI Data, */
|
||||
/* including all sections, not just the header. */
|
||||
uint32_t fCatCount; /* Number of character categories. */
|
||||
|
@ -152,6 +153,8 @@ public:
|
|||
RBBIDataWrapper(UDataMemory* udm, UErrorCode &status);
|
||||
~RBBIDataWrapper();
|
||||
|
||||
static UBool isDataVersionAcceptable(const UVersionInfo version);
|
||||
|
||||
void init0();
|
||||
void init(const RBBIDataHeader *data, UErrorCode &status);
|
||||
RBBIDataWrapper *addReference();
|
||||
|
@ -181,7 +184,7 @@ public:
|
|||
/* number of int32_t values in the rule status table. Used to sanity check indexing */
|
||||
int32_t fStatusMaxIdx;
|
||||
|
||||
UTrie fTrie;
|
||||
UTrie2 *fTrie;
|
||||
|
||||
private:
|
||||
u_atomic_int32_t fRefCount;
|
||||
|
|
|
@ -177,10 +177,10 @@ RBBIDataHeader *RBBIRuleBuilder::flattenData() {
|
|||
|
||||
|
||||
data->fMagic = 0xb1a0;
|
||||
data->fFormatVersion[0] = 3;
|
||||
data->fFormatVersion[1] = 1;
|
||||
data->fFormatVersion[2] = 0;
|
||||
data->fFormatVersion[3] = 0;
|
||||
data->fFormatVersion[0] = RBBI_DATA_FORMAT_VERSION[0];
|
||||
data->fFormatVersion[1] = RBBI_DATA_FORMAT_VERSION[1];
|
||||
data->fFormatVersion[2] = RBBI_DATA_FORMAT_VERSION[2];
|
||||
data->fFormatVersion[3] = RBBI_DATA_FORMAT_VERSION[3];
|
||||
data->fLength = totalSize;
|
||||
data->fCatCount = fSetBuilder->getNumCharCategories();
|
||||
|
||||
|
|
|
@ -35,7 +35,7 @@
|
|||
#if !UCONFIG_NO_BREAK_ITERATION
|
||||
|
||||
#include "unicode/uniset.h"
|
||||
#include "utrie.h"
|
||||
#include "utrie2.h"
|
||||
#include "uvector.h"
|
||||
#include "uassert.h"
|
||||
#include "cmemory.h"
|
||||
|
@ -44,43 +44,6 @@
|
|||
#include "rbbisetb.h"
|
||||
#include "rbbinode.h"
|
||||
|
||||
|
||||
//------------------------------------------------------------------------
|
||||
//
|
||||
// getFoldedRBBIValue Call-back function used during building of Trie table.
|
||||
// Folding value: just store the offset (16 bits)
|
||||
// if there is any non-0 entry.
|
||||
// (It'd really be nice if the Trie builder would provide a
|
||||
// simple default, so this function could go away from here.)
|
||||
//
|
||||
//------------------------------------------------------------------------
|
||||
/* folding value: just store the offset (16 bits) if there is any non-0 entry */
|
||||
U_CDECL_BEGIN
|
||||
static uint32_t U_CALLCONV
|
||||
getFoldedRBBIValue(UNewTrie *trie, UChar32 start, int32_t offset) {
|
||||
uint32_t value;
|
||||
UChar32 limit;
|
||||
UBool inBlockZero;
|
||||
|
||||
limit=start+0x400;
|
||||
while(start<limit) {
|
||||
value=utrie_get32(trie, start, &inBlockZero);
|
||||
if(inBlockZero) {
|
||||
start+=UTRIE_DATA_BLOCK_LENGTH;
|
||||
} else if(value!=0) {
|
||||
return (uint32_t)(offset|0x8000);
|
||||
} else {
|
||||
++start;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
U_CDECL_END
|
||||
|
||||
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
//------------------------------------------------------------------------
|
||||
|
@ -116,7 +79,7 @@ RBBISetBuilder::~RBBISetBuilder()
|
|||
delete r;
|
||||
}
|
||||
|
||||
utrie_close(fTrie);
|
||||
utrie2_close(fTrie);
|
||||
}
|
||||
|
||||
|
||||
|
@ -287,33 +250,30 @@ void RBBISetBuilder::build() {
|
|||
// Build the Trie table for mapping UChar32 values to the corresponding
|
||||
// range group number
|
||||
//
|
||||
fTrie = utrie_open(NULL, // Pre-existing trie to be filled in
|
||||
NULL, // Data array (utrie will allocate one)
|
||||
100000, // Max Data Length
|
||||
0, // Initial value for all code points
|
||||
0, // Lead surrogate unit value
|
||||
TRUE); // Keep Latin 1 in separately
|
||||
|
||||
fTrie = utrie2_open(0, // Initial value for all code points
|
||||
0, // errorValue
|
||||
fStatus);
|
||||
|
||||
for (rlRange = fRangeList; rlRange!=0; rlRange=rlRange->fNext) {
|
||||
utrie_setRange32(fTrie, rlRange->fStartChar, rlRange->fEndChar+1, rlRange->fNum, TRUE);
|
||||
utrie2_setRange32(fTrie, rlRange->fStartChar, rlRange->fEndChar, rlRange->fNum, TRUE, fStatus);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
//-----------------------------------------------------------------------------------
|
||||
//
|
||||
// getTrieSize() Return the size that will be required to serialize the Trie.
|
||||
//
|
||||
//-----------------------------------------------------------------------------------
|
||||
int32_t RBBISetBuilder::getTrieSize() /*const*/ {
|
||||
fTrieSize = utrie_serialize(fTrie,
|
||||
NULL, // Buffer
|
||||
0, // Capacity
|
||||
getFoldedRBBIValue,
|
||||
TRUE, // Reduce to 16 bits
|
||||
fStatus);
|
||||
utrie2_freeze(fTrie, UTRIE2_16_VALUE_BITS, fStatus);
|
||||
fTrieSize = utrie2_serialize(fTrie,
|
||||
NULL, // Buffer
|
||||
0, // Capacity
|
||||
fStatus);
|
||||
if (*fStatus == U_BUFFER_OVERFLOW_ERROR) {
|
||||
*fStatus = U_ZERO_ERROR;
|
||||
}
|
||||
// RBBIDebugPrintf("Trie table size is %d\n", trieSize);
|
||||
return fTrieSize;
|
||||
}
|
||||
|
@ -327,12 +287,10 @@ int32_t RBBISetBuilder::getTrieSize() /*const*/ {
|
|||
//
|
||||
//-----------------------------------------------------------------------------------
|
||||
void RBBISetBuilder::serializeTrie(uint8_t *where) {
|
||||
utrie_serialize(fTrie,
|
||||
where, // Buffer
|
||||
fTrieSize, // Capacity
|
||||
getFoldedRBBIValue,
|
||||
TRUE, // Reduce to 16 bits
|
||||
fStatus);
|
||||
utrie2_serialize(fTrie,
|
||||
where, // Buffer
|
||||
fTrieSize, // Capacity
|
||||
fStatus);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------
|
||||
|
|
|
@ -15,10 +15,9 @@
|
|||
#include "unicode/utypes.h"
|
||||
#include "unicode/uobject.h"
|
||||
#include "rbbirb.h"
|
||||
#include "utrie2.h"
|
||||
#include "uvector.h"
|
||||
|
||||
struct UNewTrie;
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
//
|
||||
|
@ -109,7 +108,7 @@ private:
|
|||
|
||||
RangeDescriptor *fRangeList; // Head of the linked list of RangeDescriptors
|
||||
|
||||
UNewTrie *fTrie; // The mapping TRIE that is the end result of processing
|
||||
UTrie2 *fTrie; // The mapping TRIE that is the end result of processing
|
||||
uint32_t fTrieSize; // the Unicode Sets.
|
||||
|
||||
// Groups correspond to character categories -
|
||||
|
|
|
@ -32,8 +32,6 @@
|
|||
#include "unicode/uchriter.h"
|
||||
|
||||
|
||||
struct UTrie;
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
/** @internal */
|
||||
|
|
|
@ -13,10 +13,9 @@ import java.io.IOException;
|
|||
import java.nio.ByteBuffer;
|
||||
import java.nio.ByteOrder;
|
||||
|
||||
import com.ibm.icu.impl.CharTrie;
|
||||
import com.ibm.icu.impl.ICUBinary;
|
||||
import com.ibm.icu.impl.ICUBinary.Authenticate;
|
||||
import com.ibm.icu.impl.Trie;
|
||||
import com.ibm.icu.impl.Trie2;
|
||||
|
||||
/**
|
||||
* <p>Internal class used for Rule Based Break Iterators</p>
|
||||
|
@ -33,20 +32,20 @@ final class RBBIDataWrapper {
|
|||
short fRTable[];
|
||||
short fSFTable[];
|
||||
short fSRTable[];
|
||||
CharTrie fTrie;
|
||||
Trie2 fTrie;
|
||||
String fRuleSource;
|
||||
int fStatusTable[];
|
||||
|
||||
private boolean isBigEndian;
|
||||
|
||||
static final int DATA_FORMAT = 0x42726b20; // "Brk "
|
||||
static final int FORMAT_VERSION = 0x03010000; // 3.1
|
||||
static final int DATA_FORMAT = 0x42726b20; // "Brk "
|
||||
static final int FORMAT_VERSION = 0x04000000; // 4.0.0.0
|
||||
|
||||
private static final class IsAcceptable implements Authenticate {
|
||||
// @Override when we switch to Java 6
|
||||
@Override
|
||||
public boolean isDataVersionAcceptable(byte version[]) {
|
||||
return version[0] == (FORMAT_VERSION >>> 24);
|
||||
int intVersion = (version[0] << 24) + (version[1] << 16) + (version[2] << 8) + version[3];
|
||||
return intVersion == FORMAT_VERSION;
|
||||
}
|
||||
}
|
||||
private static final IsAcceptable IS_ACCEPTABLE = new IsAcceptable();
|
||||
|
@ -105,7 +104,6 @@ final class RBBIDataWrapper {
|
|||
*/
|
||||
final static class RBBIDataHeader {
|
||||
int fMagic; // == 0xbla0
|
||||
int fVersion; // == 1 (for ICU 3.2 and earlier.
|
||||
byte[] fFormatVersion; // For ICU 3.4 and later.
|
||||
int fLength; // Total length in bytes of this RBBI Data,
|
||||
// including all sections, not just the header.
|
||||
|
@ -147,19 +145,6 @@ final class RBBIDataWrapper {
|
|||
return ROW_DATA + state * (fHeader.fCatCount + 4);
|
||||
}
|
||||
|
||||
static class TrieFoldingFunc implements Trie.DataManipulate {
|
||||
@Override
|
||||
public int getFoldingOffset(int data) {
|
||||
if ((data & 0x8000) != 0) {
|
||||
return data & 0x7fff;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
static TrieFoldingFunc fTrieFoldingFunc = new TrieFoldingFunc();
|
||||
|
||||
|
||||
RBBIDataWrapper() {
|
||||
}
|
||||
|
||||
|
@ -176,10 +161,6 @@ final class RBBIDataWrapper {
|
|||
// Read in the RBBI data header...
|
||||
This.fHeader = new RBBIDataHeader();
|
||||
This.fHeader.fMagic = bytes.getInt();
|
||||
// Read the same 4 bytes as an int and as a byte array: The data format could be
|
||||
// the old fVersion=1 (TODO: probably not with a real ICU data header?)
|
||||
// or the new fFormatVersion=3.x.
|
||||
This.fHeader.fVersion = bytes.getInt(bytes.position());
|
||||
This.fHeader.fFormatVersion[0] = bytes.get();
|
||||
This.fHeader.fFormatVersion[1] = bytes.get();
|
||||
This.fHeader.fFormatVersion[2] = bytes.get();
|
||||
|
@ -203,10 +184,7 @@ final class RBBIDataWrapper {
|
|||
ICUBinary.skipBytes(bytes, 6 * 4); // uint32_t fReserved[6];
|
||||
|
||||
|
||||
if (This.fHeader.fMagic != 0xb1a0 ||
|
||||
! (This.fHeader.fVersion == 1 || // ICU 3.2 and earlier
|
||||
This.fHeader.fFormatVersion[0] == 3) // ICU 3.4
|
||||
) {
|
||||
if (This.fHeader.fMagic != 0xb1a0 || !IS_ACCEPTABLE.isDataVersionAcceptable(This.fHeader.fFormatVersion)) {
|
||||
throw new IOException("Break Iterator Rule Data Magic Number Incorrect, or unsupported data version.");
|
||||
}
|
||||
|
||||
|
@ -286,7 +264,7 @@ final class RBBIDataWrapper {
|
|||
// as we don't go more than 100 bytes past the
|
||||
// past the end of the TRIE.
|
||||
|
||||
This.fTrie = new CharTrie(bytes, fTrieFoldingFunc); // Deserialize the TRIE, leaving buffer
|
||||
This.fTrie = Trie2.createFromSerialized(bytes); // Deserialize the TRIE, leaving buffer
|
||||
// at an unknown position, preceding the
|
||||
// padding between TRIE and following section.
|
||||
|
||||
|
@ -461,7 +439,7 @@ final class RBBIDataWrapper {
|
|||
out.println("\nCharacter Categories");
|
||||
out.println("--------------------");
|
||||
for (char32 = 0; char32<=0x10ffff; char32++) {
|
||||
category = fTrie.getCodePointValue(char32);
|
||||
category = fTrie.get(char32);
|
||||
category &= ~0x4000; // Mask off dictionary bit.
|
||||
if (category < 0 || category > fHeader.fCatCount) {
|
||||
out.println("Error, bad category " + Integer.toHexString(category) +
|
||||
|
|
|
@ -25,17 +25,17 @@ class RBBIRuleBuilder {
|
|||
// This is the main class for building (compiling) break rules into the tables
|
||||
// required by the runtime RBBI engine.
|
||||
//
|
||||
|
||||
|
||||
String fDebugEnv; // controls debug trace output
|
||||
String fRules; // The rule string that we are compiling
|
||||
RBBIRuleScanner fScanner; // The scanner.
|
||||
|
||||
|
||||
|
||||
//
|
||||
// There are four separate parse trees generated, one for each of the
|
||||
// forward rules, reverse rules, safe forward rules and safe reverse rules.
|
||||
// This array references the root of each of the trees.
|
||||
//
|
||||
//
|
||||
RBBINode[] fTreeRoots = new RBBINode[4];
|
||||
static final int fForwardTree = 0; // Indexes into the above fTreeRoots array
|
||||
static final int fReverseTree = 1; // for each of the trees.
|
||||
|
@ -69,7 +69,7 @@ class RBBIRuleBuilder {
|
|||
// Map Value is the runtime array index.
|
||||
|
||||
List<Integer> fRuleStatusVals; // List of Integer objects. Has same layout as the
|
||||
// runtime array of status (tag) values -
|
||||
// runtime array of status (tag) values -
|
||||
// number of values in group 1
|
||||
// first status value in group 1
|
||||
// 2nd status value in group 1
|
||||
|
@ -84,50 +84,50 @@ class RBBIRuleBuilder {
|
|||
//
|
||||
static final int U_BRK_ERROR_START = 0x10200;
|
||||
/**< Start of codes indicating Break Iterator failures */
|
||||
|
||||
|
||||
static final int U_BRK_INTERNAL_ERROR = 0x10201;
|
||||
/**< An internal error (bug) was detected. */
|
||||
|
||||
|
||||
static final int U_BRK_HEX_DIGITS_EXPECTED = 0x10202;
|
||||
/**< Hex digits expected as part of a escaped char in a rule. */
|
||||
|
||||
|
||||
static final int U_BRK_SEMICOLON_EXPECTED = 0x10203;
|
||||
/**< Missing ';' at the end of a RBBI rule. */
|
||||
|
||||
|
||||
static final int U_BRK_RULE_SYNTAX = 0x10204;
|
||||
/**< Syntax error in RBBI rule. */
|
||||
|
||||
|
||||
static final int U_BRK_UNCLOSED_SET = 0x10205;
|
||||
/**< UnicodeSet witing an RBBI rule missing a closing ']'. */
|
||||
|
||||
|
||||
static final int U_BRK_ASSIGN_ERROR = 0x10206;
|
||||
/**< Syntax error in RBBI rule assignment statement. */
|
||||
|
||||
|
||||
static final int U_BRK_VARIABLE_REDFINITION = 0x10207;
|
||||
/**< RBBI rule $Variable redefined. */
|
||||
|
||||
|
||||
static final int U_BRK_MISMATCHED_PAREN = 0x10208;
|
||||
/**< Mis-matched parentheses in an RBBI rule. */
|
||||
|
||||
|
||||
static final int U_BRK_NEW_LINE_IN_QUOTED_STRING = 0x10209;
|
||||
/**< Missing closing quote in an RBBI rule. */
|
||||
|
||||
|
||||
static final int U_BRK_UNDEFINED_VARIABLE = 0x1020a;
|
||||
/**< Use of an undefined $Variable in an RBBI rule. */
|
||||
|
||||
|
||||
static final int U_BRK_INIT_ERROR = 0x1020b;
|
||||
/**< Initialization failure. Probable missing ICU Data. */
|
||||
|
||||
|
||||
static final int U_BRK_RULE_EMPTY_SET = 0x1020c;
|
||||
/**< Rule contains an empty Unicode Set. */
|
||||
|
||||
|
||||
static final int U_BRK_UNRECOGNIZED_OPTION = 0x1020d;
|
||||
/**< !!option in RBBI rules not recognized. */
|
||||
|
||||
|
||||
static final int U_BRK_MALFORMED_RULE_TAG = 0x1020e;
|
||||
/**< The {nnn} tag on a rule is mal formed */
|
||||
static final int U_BRK_MALFORMED_SET = 0x1020f;
|
||||
|
||||
|
||||
static final int U_BRK_ERROR_LIMIT = 0x10210;
|
||||
/**< This must always be the last value to indicate the limit for Break Iterator failures */
|
||||
|
||||
|
@ -196,7 +196,7 @@ class RBBIRuleBuilder {
|
|||
//
|
||||
int[] header = new int[RBBIDataWrapper.DH_SIZE]; // sizeof struct RBBIDataHeader
|
||||
header[RBBIDataWrapper.DH_MAGIC] = 0xb1a0;
|
||||
header[RBBIDataWrapper.DH_FORMATVERSION] = 0x03010000; // uint8_t fFormatVersion[4];
|
||||
header[RBBIDataWrapper.DH_FORMATVERSION] = RBBIDataWrapper.FORMAT_VERSION;
|
||||
header[RBBIDataWrapper.DH_LENGTH] = totalSize; // fLength, the total size of all rule sections.
|
||||
header[RBBIDataWrapper.DH_CATCOUNT] = fSetBuilder.getNumCharCategories(); // fCatCount.
|
||||
header[RBBIDataWrapper.DH_FTABLE] = headerSize; // fFTable
|
||||
|
|
|
@ -14,7 +14,8 @@ import java.util.ArrayList;
|
|||
import java.util.List;
|
||||
|
||||
import com.ibm.icu.impl.Assert;
|
||||
import com.ibm.icu.impl.IntTrieBuilder;
|
||||
import com.ibm.icu.impl.Trie2Writable;
|
||||
import com.ibm.icu.impl.Trie2_16;
|
||||
|
||||
//
|
||||
// RBBISetBuilder Handles processing of Unicode Sets from RBBI rules
|
||||
|
@ -49,14 +50,14 @@ class RBBISetBuilder {
|
|||
RangeDescriptor() {
|
||||
fIncludesSets = new ArrayList<RBBINode>();
|
||||
}
|
||||
|
||||
|
||||
RangeDescriptor(RangeDescriptor other) {
|
||||
fStartChar = other.fStartChar;
|
||||
fEndChar = other.fEndChar;
|
||||
fNum = other.fNum;
|
||||
fIncludesSets = new ArrayList<RBBINode>(other.fIncludesSets);
|
||||
}
|
||||
|
||||
|
||||
//-------------------------------------------------------------------------------------
|
||||
//
|
||||
// RangeDesriptor::split()
|
||||
|
@ -65,20 +66,20 @@ class RBBISetBuilder {
|
|||
void split(int where) {
|
||||
Assert.assrt(where>fStartChar && where<=fEndChar);
|
||||
RangeDescriptor nr = new RangeDescriptor(this);
|
||||
|
||||
|
||||
// RangeDescriptor copy constructor copies all fields.
|
||||
// Only need to update those that are different after the split.
|
||||
nr.fStartChar = where;
|
||||
this.fEndChar = where-1;
|
||||
nr.fNext = this.fNext;
|
||||
this.fNext = nr;
|
||||
|
||||
|
||||
// TODO: fIncludesSets is not updated. Check it out.
|
||||
// Probably because they haven't been populated yet,
|
||||
// Probably because they haven't been populated yet,
|
||||
// but still sloppy.
|
||||
}
|
||||
|
||||
|
||||
|
||||
//-------------------------------------------------------------------------------------
|
||||
//
|
||||
// RangeDescriptor::setDictionaryFlag
|
||||
|
@ -95,11 +96,11 @@ class RBBISetBuilder {
|
|||
// TODO: a faster way would be to find the set node for
|
||||
// "dictionary" just once, rather than looking it
|
||||
// up by name every time.
|
||||
//
|
||||
//
|
||||
// -------------------------------------------------------------------------------------
|
||||
void setDictionaryFlag() {
|
||||
int i;
|
||||
|
||||
|
||||
for (i=0; i<this.fIncludesSets.size(); i++) {
|
||||
RBBINode usetNode = fIncludesSets.get(i);
|
||||
String setName = "";
|
||||
|
@ -119,12 +120,13 @@ class RBBISetBuilder {
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
RBBIRuleBuilder fRB; // The RBBI Rule Compiler that owns us.
|
||||
RangeDescriptor fRangeList; // Head of the linked list of RangeDescriptors
|
||||
|
||||
IntTrieBuilder fTrie; // The mapping TRIE that is the end result of processing
|
||||
Trie2Writable fTrie; // The mapping TRIE that is the end result of processing
|
||||
// the Unicode Sets.
|
||||
Trie2_16 fFrozenTrie;
|
||||
|
||||
// Groups correspond to character categories -
|
||||
// groups of ranges that are in the same original UnicodeSets.
|
||||
|
@ -135,8 +137,8 @@ class RBBISetBuilder {
|
|||
int fGroupCount;
|
||||
|
||||
boolean fSawBOF;
|
||||
|
||||
|
||||
|
||||
|
||||
//------------------------------------------------------------------------
|
||||
//
|
||||
// RBBISetBuilder Constructor
|
||||
|
@ -162,7 +164,7 @@ class RBBISetBuilder {
|
|||
// Initialize the process by creating a single range encompassing all characters
|
||||
// that is in no sets.
|
||||
//
|
||||
fRangeList = new RangeDescriptor();
|
||||
fRangeList = new RangeDescriptor();
|
||||
fRangeList.fStartChar = 0;
|
||||
fRangeList.fEndChar = 0x10ffff;
|
||||
|
||||
|
@ -245,7 +247,7 @@ class RBBISetBuilder {
|
|||
}
|
||||
if (rlRange.fNum == 0) {
|
||||
fGroupCount ++;
|
||||
rlRange.fNum = fGroupCount+2;
|
||||
rlRange.fNum = fGroupCount+2;
|
||||
rlRange.setDictionaryFlag();
|
||||
addValToSets(rlRange.fIncludesSets, fGroupCount+2);
|
||||
}
|
||||
|
@ -260,7 +262,7 @@ class RBBISetBuilder {
|
|||
// subtree for each UnicodeSet that contains the string {eof}
|
||||
// Because {bof} and {eof} are not a characters in the normal sense,
|
||||
// they doesn't affect the computation of ranges or TRIE.
|
||||
|
||||
|
||||
String eofString = "eof";
|
||||
String bofString = "bof";
|
||||
|
||||
|
@ -279,67 +281,26 @@ class RBBISetBuilder {
|
|||
if (fRB.fDebugEnv!=null && fRB.fDebugEnv.indexOf("rgroup")>=0) {printRangeGroups();}
|
||||
if (fRB.fDebugEnv!=null && fRB.fDebugEnv.indexOf("esets")>=0) {printSets();}
|
||||
|
||||
fTrie = new Trie2Writable(0, // Initial value for all code points
|
||||
0); // Error value.
|
||||
|
||||
//IntTrieBuilder(int aliasdata[], int maxdatalength,
|
||||
// int initialvalue, int leadunitvalue,
|
||||
// boolean latin1linear)
|
||||
|
||||
fTrie = new IntTrieBuilder(null, // Data array (utrie will allocate one)
|
||||
100000, // Max Data Length
|
||||
0, // Initial value for all code points
|
||||
0, // Lead Surrogate unit value,
|
||||
true); // Keep Latin 1 in separately.
|
||||
|
||||
for (rlRange = fRangeList; rlRange!=null; rlRange=rlRange.fNext) {
|
||||
fTrie.setRange(rlRange.fStartChar, rlRange.fEndChar+1, rlRange.fNum, true);
|
||||
fTrie.setRange(rlRange.fStartChar, rlRange.fEndChar, rlRange.fNum, true);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
//-----------------------------------------------------------------------------------
|
||||
//
|
||||
// RBBIDataManipulate A little internal class needed only to wrap of the
|
||||
// getFoldedValue() function needed for Trie table creation.
|
||||
//
|
||||
//-----------------------------------------------------------------------------------
|
||||
class RBBIDataManipulate implements IntTrieBuilder.DataManipulate {
|
||||
public int getFoldedValue(int start, int offset) {
|
||||
int value;
|
||||
int limit;
|
||||
boolean [] inBlockZero = new boolean[1];
|
||||
|
||||
limit = start + 0x400;
|
||||
while(start<limit) {
|
||||
value = fTrie.getValue(start, inBlockZero);
|
||||
if (inBlockZero[0]) {
|
||||
start += IntTrieBuilder.DATA_BLOCK_LENGTH;
|
||||
} else if (value != 0) {
|
||||
return offset | 0x08000;
|
||||
} else {
|
||||
++start;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
RBBIDataManipulate dm = new RBBIDataManipulate();
|
||||
|
||||
//-----------------------------------------------------------------------------------
|
||||
//
|
||||
// getTrieSize() Return the size that will be required to serialize the Trie.
|
||||
//
|
||||
//-----------------------------------------------------------------------------------
|
||||
int getTrieSize() {
|
||||
int size = 0;
|
||||
try {
|
||||
// The trie serialize function returns the size of the data written.
|
||||
// null output stream says give size only, don't actually write anything.
|
||||
size = fTrie.serialize(null, true, dm );
|
||||
} catch (IOException e) {
|
||||
Assert.assrt (false);
|
||||
if (fFrozenTrie == null) {
|
||||
fFrozenTrie = fTrie.toTrie2_16();
|
||||
fTrie = null;
|
||||
}
|
||||
return size;
|
||||
return fFrozenTrie.getSerializedLength();
|
||||
}
|
||||
|
||||
|
||||
|
@ -349,7 +310,11 @@ class RBBISetBuilder {
|
|||
//
|
||||
//-----------------------------------------------------------------------------------
|
||||
void serializeTrie(OutputStream os) throws IOException {
|
||||
fTrie.serialize(os, true, dm );
|
||||
if (fFrozenTrie == null) {
|
||||
fFrozenTrie = fTrie.toTrie2_16();
|
||||
fTrie = null;
|
||||
}
|
||||
fFrozenTrie.serialize(os);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------
|
||||
|
@ -416,7 +381,7 @@ class RBBISetBuilder {
|
|||
//------------------------------------------------------------------------
|
||||
//
|
||||
// getFirstChar Given a runtime RBBI character category, find
|
||||
// the first UChar32 that is in the set of chars
|
||||
// the first UChar32 that is in the set of chars
|
||||
// in the category.
|
||||
//------------------------------------------------------------------------
|
||||
int getFirstChar(int category) {
|
||||
|
|
|
@ -24,10 +24,10 @@ import java.util.ArrayList;
|
|||
import java.util.List;
|
||||
|
||||
import com.ibm.icu.impl.Assert;
|
||||
import com.ibm.icu.impl.CharTrie;
|
||||
import com.ibm.icu.impl.CharacterIteration;
|
||||
import com.ibm.icu.impl.ICUBinary;
|
||||
import com.ibm.icu.impl.ICUDebug;
|
||||
import com.ibm.icu.impl.Trie2;
|
||||
import com.ibm.icu.lang.UCharacter;
|
||||
import com.ibm.icu.lang.UProperty;
|
||||
import com.ibm.icu.lang.UScript;
|
||||
|
@ -495,7 +495,7 @@ public class RuleBasedBreakIterator extends BreakIterator {
|
|||
DictionaryBreakEngine.DequeI breaks = new DictionaryBreakEngine.DequeI();
|
||||
int foundBreakCount = 0;
|
||||
int c = CharacterIteration.current32(fText);
|
||||
category = (short)fRData.fTrie.getCodePointValue(c);
|
||||
category = (short)fRData.fTrie.get(c);
|
||||
|
||||
// Is the character we're starting on a dictionary character? If so, we
|
||||
// need to back up to include the entire run; otherwise the results of
|
||||
|
@ -507,7 +507,7 @@ public class RuleBasedBreakIterator extends BreakIterator {
|
|||
do {
|
||||
CharacterIteration.next32(fText);
|
||||
c = CharacterIteration.current32(fText);
|
||||
category = (short)fRData.fTrie.getCodePointValue(c);
|
||||
category = (short)fRData.fTrie.get(c);
|
||||
} while (c != CharacterIteration.DONE32 && ((category & 0x4000)) != 0);
|
||||
|
||||
// Back up to the last dictionary character
|
||||
|
@ -524,7 +524,7 @@ public class RuleBasedBreakIterator extends BreakIterator {
|
|||
else {
|
||||
do {
|
||||
c = CharacterIteration.previous32(fText);
|
||||
category = (short)fRData.fTrie.getCodePointValue(c);
|
||||
category = (short)fRData.fTrie.get(c);
|
||||
}
|
||||
while (c != CharacterIteration.DONE32 && ((category & 0x4000) != 0));
|
||||
// Back up to the last dictionary character
|
||||
|
@ -538,7 +538,7 @@ public class RuleBasedBreakIterator extends BreakIterator {
|
|||
}
|
||||
rangeStart = fText.getIndex();
|
||||
}
|
||||
category = (short)fRData.fTrie.getCodePointValue(c);
|
||||
category = (short)fRData.fTrie.get(c);
|
||||
}
|
||||
|
||||
|
||||
|
@ -550,14 +550,14 @@ public class RuleBasedBreakIterator extends BreakIterator {
|
|||
if (reverse) {
|
||||
fText.setIndex(rangeStart);
|
||||
c = CharacterIteration.current32(fText);
|
||||
category = (short)fRData.fTrie.getCodePointValue(c);
|
||||
category = (short)fRData.fTrie.get(c);
|
||||
}
|
||||
LanguageBreakEngine lbe = null;
|
||||
while(true) {
|
||||
while((current = fText.getIndex()) < rangeEnd && (category & 0x4000) == 0) {
|
||||
CharacterIteration.next32(fText);
|
||||
c = CharacterIteration.current32(fText);
|
||||
category = (short)fRData.fTrie.getCodePointValue(c);
|
||||
category = (short)fRData.fTrie.get(c);
|
||||
}
|
||||
if (current >= rangeEnd) {
|
||||
break;
|
||||
|
@ -577,7 +577,7 @@ public class RuleBasedBreakIterator extends BreakIterator {
|
|||
|
||||
// Reload the loop variables for the next go-round
|
||||
c = CharacterIteration.current32(fText);
|
||||
category = (short)fRData.fTrie.getCodePointValue(c);
|
||||
category = (short)fRData.fTrie.get(c);
|
||||
}
|
||||
|
||||
// If we found breaks, build a new break cache. The first and last entries must
|
||||
|
@ -1285,7 +1285,7 @@ public class RuleBasedBreakIterator extends BreakIterator {
|
|||
|
||||
// caches for quicker access
|
||||
CharacterIterator text = fText;
|
||||
CharTrie trie = fRData.fTrie;
|
||||
Trie2 trie = fRData.fTrie;
|
||||
|
||||
// Set up the starting char
|
||||
int c = text.current();
|
||||
|
@ -1338,7 +1338,7 @@ public class RuleBasedBreakIterator extends BreakIterator {
|
|||
// look up the current character's character category, which tells us
|
||||
// which column in the state table to look at.
|
||||
//
|
||||
category = (short) trie.getCodePointValue(c);
|
||||
category = (short) trie.get(c);
|
||||
|
||||
// Check the dictionary bit in the character's category.
|
||||
// Counter is only used by dictionary based iterators (subclasses).
|
||||
|
@ -1483,10 +1483,8 @@ public class RuleBasedBreakIterator extends BreakIterator {
|
|||
mainLoop: for (;;) {
|
||||
if (c == DONE32) {
|
||||
// Reached end of input string.
|
||||
if (mode == RBBI_END || fRData.fHeader.fVersion == 1) {
|
||||
// Either this is the old (ICU 3.2 and earlier) format data which
|
||||
// does not support explicit support for matching {eof}, or
|
||||
// we have already done the {eof} iteration. Now is the time
|
||||
if (mode == RBBI_END) {
|
||||
// We have already done the {eof} iteration. Now is the time
|
||||
// to unconditionally bail out.
|
||||
if (result == initialPosition) {
|
||||
// Ran off start, no match found.
|
||||
|
@ -1504,7 +1502,7 @@ public class RuleBasedBreakIterator extends BreakIterator {
|
|||
// look up the current character's category, which tells us
|
||||
// which column in the state table to look at.
|
||||
//
|
||||
category = (short) fRData.fTrie.getCodePointValue(c);
|
||||
category = (short) fRData.fTrie.get(c);
|
||||
|
||||
// Check the dictionary bit in the character's category.
|
||||
// Counter is only used by dictionary based iterators (subclasses).
|
||||
|
|
|
@ -1,3 +1,3 @@
|
|||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:d4b1866a85ceb079d912a3283e5ec6a7d6988df8c0e56e98fd67def82c35dcf3
|
||||
size 12225515
|
||||
oid sha256:f0d65ed59329e1eaae1813db0fa8e1236a3b58ddfa5e7e1ff33d4bea7eef3c31
|
||||
size 12226292
|
||||
|
|
Loading…
Add table
Reference in a new issue