mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-14 09:21:03 +00:00
ICU-3064 updated trie java port
X-SVN-Rev: 12880
This commit is contained in:
parent
95cf50134e
commit
0e57d10a60
5 changed files with 694 additions and 726 deletions
File diff suppressed because it is too large
Load diff
|
@ -5,8 +5,8 @@
|
|||
******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/impl/IntTrieBuilder.java,v $
|
||||
* $Date: 2002/10/31 01:09:18 $
|
||||
* $Revision: 1.3 $
|
||||
* $Date: 2003/08/20 00:19:19 $
|
||||
* $Revision: 1.4 $
|
||||
*
|
||||
******************************************************************************
|
||||
*/
|
||||
|
@ -14,6 +14,7 @@
|
|||
package com.ibm.icu.impl;
|
||||
|
||||
import com.ibm.icu.lang.UCharacter;
|
||||
import com.ibm.icu.text.UTF16;
|
||||
import java.util.Arrays;
|
||||
|
||||
/**
|
||||
|
@ -31,7 +32,7 @@ import java.util.Arrays;
|
|||
* <LI>Smaller memory footprint.
|
||||
* </UL>
|
||||
* This is a direct port from the ICU4C version
|
||||
* @version $Revision: 1.3 $
|
||||
* @version $Revision: 1.4 $
|
||||
* @author Syn Wee Quek
|
||||
*/
|
||||
public class IntTrieBuilder extends TrieBuilder
|
||||
|
@ -47,6 +48,7 @@ public class IntTrieBuilder extends TrieBuilder
|
|||
m_data_ = new int[m_dataCapacity_];
|
||||
System.arraycopy(table.m_data_, 0, m_data_, 0, m_dataLength_);
|
||||
m_initialValue_ = table.m_initialValue_;
|
||||
m_leadUnitValue_ = table.m_leadUnitValue_;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -58,10 +60,11 @@ public class IntTrieBuilder extends TrieBuilder
|
|||
* @return updated table
|
||||
*/
|
||||
public IntTrieBuilder(int aliasdata[], int maxdatalength,
|
||||
int initialvalue, boolean latin1linear)
|
||||
int initialvalue, int leadunitvalue,
|
||||
boolean latin1linear)
|
||||
{
|
||||
super();
|
||||
if (maxdatalength < DATA_BLOCK_LENGTH_ || (latin1linear
|
||||
if (maxdatalength < DATA_BLOCK_LENGTH || (latin1linear
|
||||
&& maxdatalength < 1024)) {
|
||||
throw new IllegalArgumentException(
|
||||
"Argument maxdatalength is too small");
|
||||
|
@ -75,7 +78,7 @@ public class IntTrieBuilder extends TrieBuilder
|
|||
}
|
||||
|
||||
// preallocate and reset the first data block (block index 0)
|
||||
int j = DATA_BLOCK_LENGTH_;
|
||||
int j = DATA_BLOCK_LENGTH;
|
||||
|
||||
if (latin1linear) {
|
||||
// preallocate and reset the first block (number 0) and Latin-1
|
||||
|
@ -87,7 +90,7 @@ public class IntTrieBuilder extends TrieBuilder
|
|||
// do this at least for trie->index[0] even if that block is
|
||||
// only partly used for Latin-1
|
||||
m_index_[i ++] = j;
|
||||
j += DATA_BLOCK_LENGTH_;
|
||||
j += DATA_BLOCK_LENGTH;
|
||||
} while (i < (256 >> SHIFT_));
|
||||
}
|
||||
|
||||
|
@ -95,6 +98,7 @@ public class IntTrieBuilder extends TrieBuilder
|
|||
// reset the initially allocated blocks to the initial value
|
||||
Arrays.fill(m_data_, 0, m_dataLength_, initialvalue);
|
||||
m_initialValue_ = initialvalue;
|
||||
m_leadUnitValue_ = leadunitvalue;
|
||||
m_dataCapacity_ = maxdatalength;
|
||||
m_isLatin1Linear_ = latin1linear;
|
||||
m_isCompacted_ = false;
|
||||
|
@ -246,13 +250,135 @@ public class IntTrieBuilder extends TrieBuilder
|
|||
triedatamanipulate);
|
||||
}
|
||||
|
||||
// public data member ---------------------------------------------
|
||||
/**
|
||||
* Set a value in a range of code points [start..limit].
|
||||
* All code points c with start <= c < limit will get the value if
|
||||
* overwrite is true or if the old value is 0.
|
||||
* @param start the first code point to get the value
|
||||
* @param limit one past the last code point to get the value
|
||||
* @param value the value
|
||||
* @param overwrite flag for whether old non-initial values are to be
|
||||
* overwritten
|
||||
* @return false if a failure occurred (illegal argument or data array
|
||||
* overrun)
|
||||
*/
|
||||
public boolean setRange(int start, int limit, int value,
|
||||
boolean overwrite)
|
||||
{
|
||||
// repeat value in [start..limit[
|
||||
// mark index values for repeat-data blocks by setting bit 31 of the
|
||||
// index values fill around existing values if any, if(overwrite)
|
||||
|
||||
// valid, uncompacted trie and valid indexes?
|
||||
if (m_isCompacted_ || start < UCharacter.MIN_VALUE
|
||||
|| start > UCharacter.MAX_VALUE || limit < UCharacter.MIN_VALUE
|
||||
|| limit > (UCharacter.MAX_VALUE + 1) || start > limit) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (start == limit) {
|
||||
return true; // nothing to do
|
||||
}
|
||||
|
||||
if ((start & MASK_) != 0) {
|
||||
// set partial block at [start..following block boundary[
|
||||
int block = getDataBlock(start);
|
||||
if (block < 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
int nextStart = (start + DATA_BLOCK_LENGTH) & ~MASK_;
|
||||
if (nextStart <= limit) {
|
||||
fillBlock(block, start & MASK_, DATA_BLOCK_LENGTH,
|
||||
value, overwrite);
|
||||
start = nextStart;
|
||||
}
|
||||
else {
|
||||
fillBlock(block, start & MASK_, limit & MASK_,
|
||||
value, overwrite);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
// number of positions in the last, partial block
|
||||
int rest = limit & MASK_;
|
||||
|
||||
// round down limit to a block boundary
|
||||
limit &= ~MASK_;
|
||||
|
||||
// iterate over all-value blocks
|
||||
int repeatBlock = 0;
|
||||
if (value == m_initialValue_) {
|
||||
// repeatBlock = 0; assigned above
|
||||
}
|
||||
else {
|
||||
repeatBlock = -1;
|
||||
}
|
||||
while (start < limit) {
|
||||
// get index value
|
||||
int block = m_index_[start >> SHIFT_];
|
||||
if (block > 0) {
|
||||
// already allocated, fill in value
|
||||
fillBlock(block, 0, DATA_BLOCK_LENGTH, value, overwrite);
|
||||
}
|
||||
else if (m_data_[-block] != value && (block == 0 || overwrite)) {
|
||||
// set the repeatBlock instead of the current block 0 or range
|
||||
// block
|
||||
if (repeatBlock >= 0) {
|
||||
m_index_[start >> SHIFT_] = -repeatBlock;
|
||||
}
|
||||
else {
|
||||
// create and set and fill the repeatBlock
|
||||
repeatBlock = getDataBlock(start);
|
||||
if (repeatBlock < 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// set the negative block number to indicate that it is a
|
||||
// repeat block
|
||||
m_index_[start >> SHIFT_] = -repeatBlock;
|
||||
fillBlock(repeatBlock, 0, DATA_BLOCK_LENGTH, value, true);
|
||||
}
|
||||
}
|
||||
|
||||
start += DATA_BLOCK_LENGTH;
|
||||
}
|
||||
|
||||
if (rest > 0) {
|
||||
// set partial block at [last block boundary..limit[
|
||||
int block = getDataBlock(start);
|
||||
if (block < 0) {
|
||||
return false;
|
||||
}
|
||||
fillBlock(block, 0, rest, value, overwrite);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// protected data member ------------------------------------------------
|
||||
|
||||
protected int m_data_[];
|
||||
protected int m_initialValue_;
|
||||
|
||||
|
||||
// private data member ------------------------------------------------
|
||||
|
||||
private int m_leadUnitValue_;
|
||||
|
||||
// private methods ------------------------------------------------------
|
||||
|
||||
private int allocDataBlock()
|
||||
{
|
||||
int newBlock = m_dataLength_;
|
||||
int newTop = newBlock + DATA_BLOCK_LENGTH;
|
||||
if (newTop > m_dataCapacity_) {
|
||||
// out of memory in the data array
|
||||
return -1;
|
||||
}
|
||||
m_dataLength_ = newTop;
|
||||
return newBlock;
|
||||
}
|
||||
|
||||
/**
|
||||
* No error checking for illegal arguments.
|
||||
* @param ch codepoint to look for
|
||||
|
@ -267,18 +393,16 @@ public class IntTrieBuilder extends TrieBuilder
|
|||
}
|
||||
|
||||
// allocate a new data block
|
||||
int newBlock = m_dataLength_;
|
||||
int newTop = newBlock + DATA_BLOCK_LENGTH_;
|
||||
if (newTop > m_dataCapacity_) {
|
||||
int newBlock = allocDataBlock();
|
||||
if (newBlock < 0) {
|
||||
// out of memory in the data array
|
||||
return -1;
|
||||
}
|
||||
m_dataLength_ = newTop;
|
||||
m_index_[ch] = newBlock;
|
||||
|
||||
// copy-on-write for a block from a setRange()
|
||||
Arrays.fill(m_data_, newBlock, newBlock + DATA_BLOCK_LENGTH_,
|
||||
m_initialValue_);
|
||||
System.arraycopy(m_data_, Math.abs(indexValue), m_data_, newBlock,
|
||||
DATA_BLOCK_LENGTH << 2);
|
||||
return newBlock;
|
||||
}
|
||||
|
||||
|
@ -307,34 +431,34 @@ public class IntTrieBuilder extends TrieBuilder
|
|||
|
||||
// if Latin-1 is preallocated and linear, then do not compact Latin-1
|
||||
// data
|
||||
int overlapStart = DATA_BLOCK_LENGTH_;
|
||||
int overlapStart = DATA_BLOCK_LENGTH;
|
||||
if (m_isLatin1Linear_ && SHIFT_ <= 8) {
|
||||
overlapStart += 256;
|
||||
}
|
||||
|
||||
int newStart = DATA_BLOCK_LENGTH_;
|
||||
int newStart = DATA_BLOCK_LENGTH;
|
||||
int prevEnd = newStart - 1;
|
||||
for (int start = newStart; start < m_dataLength_;) {
|
||||
// start: index of first entry of current block
|
||||
// prevEnd: index to last entry of previous block
|
||||
// newStart: index where the current block is to be moved
|
||||
// skip blocks that are not used
|
||||
if (m_map_[start >> SHIFT_] < 0) {
|
||||
if (m_map_[start >>> SHIFT_] < 0) {
|
||||
// advance start to the next block
|
||||
start += DATA_BLOCK_LENGTH_;
|
||||
start += DATA_BLOCK_LENGTH;
|
||||
// leave prevEnd and newStart with the previous block!
|
||||
continue;
|
||||
}
|
||||
// search for an identical block
|
||||
if (start >= overlapStart) {
|
||||
int i = findSameDataBlock(m_data_, newStart, start,
|
||||
overlap ? DATA_GRANULARITY_ : DATA_BLOCK_LENGTH_);
|
||||
overlap ? DATA_GRANULARITY_ : DATA_BLOCK_LENGTH);
|
||||
if (i >= 0) {
|
||||
// found an identical block, set the other block's index
|
||||
// value for the current block
|
||||
m_map_[start >> SHIFT_] = i;
|
||||
m_map_[start >>> SHIFT_] = i;
|
||||
// advance start to the next block
|
||||
start += DATA_BLOCK_LENGTH_;
|
||||
start += DATA_BLOCK_LENGTH;
|
||||
// leave prevEnd and newStart with the previous block!
|
||||
continue;
|
||||
}
|
||||
|
@ -347,7 +471,7 @@ public class IntTrieBuilder extends TrieBuilder
|
|||
if (x == m_data_[prevEnd] && overlap && start >= overlapStart)
|
||||
{
|
||||
// overlap by at least one
|
||||
for (i = 1; i < DATA_BLOCK_LENGTH_
|
||||
for (i = 1; i < DATA_BLOCK_LENGTH
|
||||
&& x == m_data_[start + i]
|
||||
&& x == m_data_[prevEnd - i]; ++ i)
|
||||
{
|
||||
|
@ -358,23 +482,23 @@ public class IntTrieBuilder extends TrieBuilder
|
|||
}
|
||||
if (i > 0) {
|
||||
// some overlap
|
||||
m_map_[start >> SHIFT_] = newStart - i;
|
||||
m_map_[start >>> SHIFT_] = newStart - i;
|
||||
// move the non-overlapping indexes to their new positions
|
||||
start += i;
|
||||
for (i = DATA_BLOCK_LENGTH_ - i; i > 0; -- i) {
|
||||
for (i = DATA_BLOCK_LENGTH - i; i > 0; -- i) {
|
||||
m_data_[newStart ++] = m_data_[start ++];
|
||||
}
|
||||
}
|
||||
else if (newStart < start) {
|
||||
// no overlap, just move the indexes to their new positions
|
||||
m_map_[start >> SHIFT_] = newStart;
|
||||
for (i = DATA_BLOCK_LENGTH_; i > 0; -- i) {
|
||||
m_map_[start >>> SHIFT_] = newStart;
|
||||
for (i = DATA_BLOCK_LENGTH; i > 0; -- i) {
|
||||
m_data_[newStart ++] = m_data_[start ++];
|
||||
}
|
||||
}
|
||||
else { // no overlap && newStart==start
|
||||
m_map_[start >> SHIFT_] = start;
|
||||
newStart += DATA_BLOCK_LENGTH_;
|
||||
m_map_[start >>> SHIFT_] = start;
|
||||
newStart += DATA_BLOCK_LENGTH;
|
||||
start = newStart;
|
||||
}
|
||||
|
||||
|
@ -382,7 +506,7 @@ public class IntTrieBuilder extends TrieBuilder
|
|||
}
|
||||
// now adjust the index (stage 1) table
|
||||
for (int i = 0; i < m_indexLength_; ++ i) {
|
||||
m_index_[i] = m_map_[m_index_[i] >>> SHIFT_];
|
||||
m_index_[i] = m_map_[Math.abs(m_index_[i]) >>> SHIFT_];
|
||||
}
|
||||
m_dataLength_ = newStart;
|
||||
}
|
||||
|
@ -398,16 +522,16 @@ public class IntTrieBuilder extends TrieBuilder
|
|||
int otherBlock, int step)
|
||||
{
|
||||
// ensure that we do not even partially get past dataLength
|
||||
dataLength -= DATA_BLOCK_LENGTH_;
|
||||
dataLength -= DATA_BLOCK_LENGTH;
|
||||
|
||||
for (int block = 0; block <= dataLength; block += step) {
|
||||
int i = 0;
|
||||
for (i = 0; i < DATA_BLOCK_LENGTH_; ++ i) {
|
||||
for (i = 0; i < DATA_BLOCK_LENGTH; ++ i) {
|
||||
if (data[block + i] != data[otherBlock + i]) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (i == DATA_BLOCK_LENGTH_) {
|
||||
if (i == DATA_BLOCK_LENGTH) {
|
||||
return block;
|
||||
}
|
||||
}
|
||||
|
@ -433,16 +557,33 @@ public class IntTrieBuilder extends TrieBuilder
|
|||
System.arraycopy(index, 0xd800 >> SHIFT_, leadIndexes, 0,
|
||||
SURROGATE_BLOCK_COUNT_);
|
||||
|
||||
// to protect the copied lead surrogate values,
|
||||
// mark all their indexes as repeat blocks
|
||||
// (causes copy-on-write)
|
||||
for (char c = 0xd800; c <= 0xdbff; ++ c) {
|
||||
int block = index[c >> SHIFT_];
|
||||
if (block > 0) {
|
||||
index[c >> SHIFT_] =- block;
|
||||
// set all values for lead surrogate code *units* to leadUnitValue
|
||||
// so that by default runtime lookups will find no data for associated
|
||||
// supplementary code points, unless there is data for such code points
|
||||
// which will result in a non-zero folding value below that is set for
|
||||
// the respective lead units
|
||||
// the above saved the indexes for surrogate code *points*
|
||||
// fill the indexes with simplified code from utrie_setRange32()
|
||||
int block = 0;
|
||||
if (m_leadUnitValue_ == m_initialValue_) {
|
||||
// leadUnitValue == initialValue, use all-initial-value block
|
||||
// block = 0; if block here left empty
|
||||
}
|
||||
else {
|
||||
// create and fill the repeatBlock
|
||||
block = allocDataBlock();
|
||||
if (block < 0) {
|
||||
// data table overflow
|
||||
throw new InternalError("Internal error: Out of memory space");
|
||||
}
|
||||
fillBlock(block, 0, DATA_BLOCK_LENGTH, m_leadUnitValue_, true);
|
||||
// negative block number to indicate that it is a repeat block
|
||||
block = -block;
|
||||
}
|
||||
|
||||
for (int c = (0xd800 >> SHIFT_); c < (0xdc00 >> SHIFT_); ++ c) {
|
||||
m_index_[c] = block;
|
||||
}
|
||||
|
||||
// Fold significant index values into the area just after the BMP
|
||||
// indexes.
|
||||
// In case the first lead surrogate has significant data,
|
||||
|
@ -457,13 +598,16 @@ public class IntTrieBuilder extends TrieBuilder
|
|||
// there is data, treat the full block for a lead surrogate
|
||||
c &= ~0x3ff;
|
||||
// is there an identical index block?
|
||||
int block = findSameIndexBlock(index, indexLength, c >> SHIFT_);
|
||||
// get a folded value for [c..c+0x400[ and, if 0, set it for
|
||||
// the lead surrogate
|
||||
block = findSameIndexBlock(index, indexLength, c >> SHIFT_);
|
||||
|
||||
// get a folded value for [c..c+0x400[ and,
|
||||
// if different from the value for the lead surrogate code
|
||||
// point, set it for the lead surrogate code unit
|
||||
|
||||
int value = manipulate.getFoldedValue(c,
|
||||
block + SURROGATE_BLOCK_COUNT_);
|
||||
if (value != 0) {
|
||||
if (!setValue(0xd7c0 + (c >> 10), value)) {
|
||||
if (value != getValue(UTF16.getLeadSurrogate(c))) {
|
||||
if (!setValue(UTF16.getLeadSurrogate(c), value)) {
|
||||
// data table overflow
|
||||
throw new ArrayIndexOutOfBoundsException(
|
||||
"Data table overflow");
|
||||
|
@ -480,7 +624,7 @@ public class IntTrieBuilder extends TrieBuilder
|
|||
c += 0x400;
|
||||
}
|
||||
else {
|
||||
c += DATA_BLOCK_LENGTH_;
|
||||
c += DATA_BLOCK_LENGTH;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -505,5 +649,28 @@ public class IntTrieBuilder extends TrieBuilder
|
|||
indexLength += SURROGATE_BLOCK_COUNT_;
|
||||
m_indexLength_ = indexLength;
|
||||
}
|
||||
|
||||
/**
|
||||
* @internal
|
||||
*/
|
||||
private void fillBlock(int block, int start, int limit, int value,
|
||||
boolean overwrite)
|
||||
{
|
||||
limit += block;
|
||||
block += start;
|
||||
if (overwrite) {
|
||||
while (block < limit) {
|
||||
m_data_[block ++] = value;
|
||||
}
|
||||
}
|
||||
else {
|
||||
while (block < limit) {
|
||||
if (m_data_[block] == m_initialValue_) {
|
||||
m_data_[block] = value;
|
||||
}
|
||||
++ block;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -5,8 +5,8 @@
|
|||
******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/impl/TrieBuilder.java,v $
|
||||
* $Date: 2002/09/06 01:50:43 $
|
||||
* $Revision: 1.8 $
|
||||
* $Date: 2003/08/20 00:19:20 $
|
||||
* $Revision: 1.9 $
|
||||
*
|
||||
******************************************************************************
|
||||
*/
|
||||
|
@ -31,7 +31,7 @@ import java.util.Arrays;
|
|||
* <LI>Smaller memory footprint.
|
||||
* </UL>
|
||||
* This is a direct port from the ICU4C version
|
||||
* @version $Revision: 1.8 $
|
||||
* @version $Revision: 1.9 $
|
||||
* @author Syn Wee Quek
|
||||
*/
|
||||
public class TrieBuilder
|
||||
|
@ -42,7 +42,7 @@ public class TrieBuilder
|
|||
* Number of data values in a stage 2 (data array) block. 2, 4, 8, ..,
|
||||
* 0x200
|
||||
*/
|
||||
public static final int DATA_BLOCK_LENGTH_ = 1 << Trie.INDEX_STAGE_1_SHIFT_;
|
||||
public static final int DATA_BLOCK_LENGTH = 1 << Trie.INDEX_STAGE_1_SHIFT_;
|
||||
|
||||
// public class declaration ----------------------------------------
|
||||
|
||||
|
@ -141,7 +141,7 @@ public class TrieBuilder
|
|||
protected static final int SURROGATE_BLOCK_COUNT_ = 1 << (10 - SHIFT_);
|
||||
/**
|
||||
* Mask for getting the lower bits from the input index.
|
||||
* DATA_BLOCK_LENGTH_ - 1.
|
||||
* DATA_BLOCK_LENGTH - 1.
|
||||
*/
|
||||
protected static final int MASK_ = Trie.INDEX_STAGE_3_MASK_;
|
||||
/**
|
||||
|
@ -253,10 +253,10 @@ public class TrieBuilder
|
|||
|
||||
/**
|
||||
* Maximum length of the build-time data (stage 2) array.
|
||||
* The maximum length is 0x110000 + DATA_BLOCK_LENGTH_ + 0x400.
|
||||
* The maximum length is 0x110000 + DATA_BLOCK_LENGTH + 0x400.
|
||||
* (Number of Unicode code points + one all-initial-value block +
|
||||
* possible duplicate entries for 1024 lead surrogates.)
|
||||
*/
|
||||
private static final int MAX_BUILD_TIME_DATA_LENGTH_ =
|
||||
0x110000 + DATA_BLOCK_LENGTH_ + 0x400;
|
||||
0x110000 + DATA_BLOCK_LENGTH + 0x400;
|
||||
}
|
|
@ -5,8 +5,8 @@
|
|||
******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/impl/TrieIterator.java,v $
|
||||
* $Date: 2002/11/16 01:49:26 $
|
||||
* $Revision: 1.8 $
|
||||
* $Date: 2003/08/20 00:19:19 $
|
||||
* $Revision: 1.9 $
|
||||
*
|
||||
******************************************************************************
|
||||
*/
|
||||
|
@ -131,7 +131,7 @@ public class TrieIterator implements RangeValueIterator
|
|||
}
|
||||
if (m_nextCodepoint_ < UCharacter.SUPPLEMENTARY_MIN_VALUE &&
|
||||
calculateNextBMPElement(element)) {
|
||||
return true;
|
||||
return true;
|
||||
}
|
||||
calculateNextSupplementaryElement(element);
|
||||
return true;
|
||||
|
@ -268,22 +268,26 @@ public class TrieIterator implements RangeValueIterator
|
|||
m_nextCodepoint_ ++;
|
||||
m_nextBlockIndex_ ++;
|
||||
|
||||
if (!checkNullNextTrailIndex() && !checkBlockDetail(currentValue)) {
|
||||
setResult(element, m_currentCodepoint_, m_nextCodepoint_,
|
||||
currentValue);
|
||||
m_currentCodepoint_ = m_nextCodepoint_;
|
||||
return;
|
||||
if (UTF16.getTrailSurrogate(m_nextCodepoint_)
|
||||
!= UTF16.TRAIL_SURROGATE_MIN_VALUE) {
|
||||
// this piece is only called when we are in the middle of a lead
|
||||
// surrogate block
|
||||
if (!checkNullNextTrailIndex() && !checkBlockDetail(currentValue)) {
|
||||
setResult(element, m_currentCodepoint_, m_nextCodepoint_,
|
||||
currentValue);
|
||||
m_currentCodepoint_ = m_nextCodepoint_;
|
||||
return;
|
||||
}
|
||||
// we have cleared one block
|
||||
m_nextIndex_ ++;
|
||||
m_nextTrailIndexOffset_ ++;
|
||||
if (!checkTrailBlock(currentBlock, currentValue)) {
|
||||
setResult(element, m_currentCodepoint_, m_nextCodepoint_,
|
||||
currentValue);
|
||||
m_currentCodepoint_ = m_nextCodepoint_;
|
||||
return;
|
||||
}
|
||||
}
|
||||
// we have cleared one block
|
||||
m_nextIndex_ ++;
|
||||
m_nextTrailIndexOffset_ ++;
|
||||
if (!checkTrailBlock(currentBlock, currentValue)) {
|
||||
setResult(element, m_currentCodepoint_, m_nextCodepoint_,
|
||||
currentValue);
|
||||
m_currentCodepoint_ = m_nextCodepoint_;
|
||||
return;
|
||||
}
|
||||
|
||||
int nextLead = UTF16.getLeadSurrogate(m_nextCodepoint_);
|
||||
// enumerate supplementary code points
|
||||
while (nextLead < TRAIL_SURROGATE_MIN_VALUE_) {
|
||||
|
@ -293,10 +297,25 @@ public class TrieIterator implements RangeValueIterator
|
|||
Trie.INDEX_STAGE_2_SHIFT_;
|
||||
if (leadBlock == m_trie_.m_dataOffset_) {
|
||||
// no entries for a whole block of lead surrogates
|
||||
if (currentValue != m_initialValue_) {
|
||||
m_nextValue_ = m_initialValue_;
|
||||
m_nextBlock_ = 0;
|
||||
m_nextBlockIndex_ = 0;
|
||||
setResult(element, m_currentCodepoint_, m_nextCodepoint_,
|
||||
currentValue);
|
||||
m_currentCodepoint_ = m_nextCodepoint_;
|
||||
return;
|
||||
}
|
||||
|
||||
nextLead += DATA_BLOCK_LENGTH_;
|
||||
// number of total affected supplementary codepoints in one
|
||||
// block
|
||||
m_nextCodepoint_ += DATA_BLOCK_SUPPLEMENTARY_LENGTH_;
|
||||
// this is not a simple addition of
|
||||
// DATA_BLOCK_SUPPLEMENTARY_LENGTH since we need to consider
|
||||
// that we might have moved some of the codepoints
|
||||
m_nextCodepoint_ = UCharacterProperty.getRawSupplementary(
|
||||
(char)nextLead,
|
||||
(char)UTF16.TRAIL_SURROGATE_MIN_VALUE);
|
||||
continue;
|
||||
}
|
||||
if (m_trie_.m_dataManipulate_ == null) {
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/CollationParsedRuleBuilder.java,v $
|
||||
* $Date: 2003/07/16 05:52:08 $
|
||||
* $Revision: 1.22 $
|
||||
* $Date: 2003/08/20 00:20:37 $
|
||||
* $Revision: 1.23 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -848,7 +848,7 @@ final class CollationParsedRuleBuilder
|
|||
boolean inBlockZero = m_mapping_.isInZeroBlock(cp);
|
||||
int tag = getCETag(value);
|
||||
if (inBlockZero == true) {
|
||||
cp += TrieBuilder.DATA_BLOCK_LENGTH_;
|
||||
cp += TrieBuilder.DATA_BLOCK_LENGTH;
|
||||
}
|
||||
else if (!(isSpecial(value) && (tag == CE_IMPLICIT_TAG_
|
||||
|| tag == CE_NOT_FOUND_TAG_))) {
|
||||
|
@ -882,10 +882,10 @@ final class CollationParsedRuleBuilder
|
|||
m_expansions_ = new Vector();
|
||||
// Do your own mallocs for the structure, array and have linear
|
||||
// Latin 1
|
||||
m_mapping_ = new IntTrieBuilder(null, 0x100000,
|
||||
RuleBasedCollator.CE_SPECIAL_FLAG_
|
||||
| (CE_NOT_FOUND_TAG_ << 24),
|
||||
true);
|
||||
int trieinitialvalue = RuleBasedCollator.CE_SPECIAL_FLAG_
|
||||
| (CE_NOT_FOUND_TAG_ << 24);
|
||||
m_mapping_ = new IntTrieBuilder(null, 0x100000, trieinitialvalue,
|
||||
trieinitialvalue, true);
|
||||
m_prefixLookup_ = new Hashtable();
|
||||
// uhash_open(prefixLookupHash, prefixLookupComp);
|
||||
m_contractions_ = new ContractionTable(m_mapping_);
|
||||
|
|
Loading…
Add table
Reference in a new issue