mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-06 05:55:35 +00:00
ICU-8167 rename ByteTrie to BytesTrie, DictTrie to StringTrie, etc.
X-SVN-Rev: 29265
This commit is contained in:
parent
a0575a248c
commit
62306a038c
28 changed files with 908 additions and 909 deletions
|
@ -1,6 +1,6 @@
|
|||
#******************************************************************************
|
||||
#
|
||||
# Copyright (C) 1999-2010, International Business Machines
|
||||
# Copyright (C) 1999-2011, International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
#
|
||||
#******************************************************************************
|
||||
|
@ -85,7 +85,7 @@ ucnv_u7.o ucnv_u8.o ucnv_u16.o ucnv_u32.o ucnvscsu.o ucnvbocu.o \
|
|||
ucnv_ext.o ucnvmbcs.o ucnv2022.o ucnvhz.o ucnv_lmb.o ucnvisci.o ucnvdisp.o ucnv_set.o ucnv_ct.o \
|
||||
uresbund.o ures_cnv.o uresdata.o resbund.o resbund_cnv.o \
|
||||
ucat.o locmap.o uloc.o locid.o locutil.o locavailable.o locdispnames.o loclikely.o locresdata.o \
|
||||
bytestream.o stringpiece.o bytetrie.o \
|
||||
bytestream.o stringpiece.o bytestrie.o \
|
||||
ustr_cnv.o unistr_cnv.o unistr.o unistr_case.o unistr_props.o \
|
||||
utf_impl.o ustring.o ustrcase.o ucasemap.o cstring.o ustrfmt.o ustrtrns.o ustr_wcs.o utext.o \
|
||||
normalizer2impl.o normalizer2.o filterednormalizer2.o normlzr.o unorm.o unormcmp.o unorm_it.o \
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2010, International Business Machines
|
||||
* Copyright (C) 2010-2011, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*******************************************************************************
|
||||
* file name: bytetrie.cpp
|
||||
* file name: bytestrie.cpp
|
||||
* encoding: US-ASCII
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
|
@ -16,13 +16,13 @@
|
|||
#include "unicode/bytestream.h"
|
||||
#include "unicode/uobject.h"
|
||||
#include "uassert.h"
|
||||
#include "bytetrie.h"
|
||||
#include "bytestrie.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
// lead byte already shifted right by 1.
|
||||
int32_t
|
||||
ByteTrie::readValue(const uint8_t *pos, int32_t leadByte) {
|
||||
BytesTrie::readValue(const uint8_t *pos, int32_t leadByte) {
|
||||
int32_t value;
|
||||
if(leadByte<kMinTwoByteValueLead) {
|
||||
value=leadByte-kMinOneByteValueLead;
|
||||
|
@ -39,7 +39,7 @@ ByteTrie::readValue(const uint8_t *pos, int32_t leadByte) {
|
|||
}
|
||||
|
||||
const uint8_t *
|
||||
ByteTrie::jumpByDelta(const uint8_t *pos) {
|
||||
BytesTrie::jumpByDelta(const uint8_t *pos) {
|
||||
int32_t delta=*pos++;
|
||||
if(delta<kMinTwoByteDeltaLead) {
|
||||
// nothing to do
|
||||
|
@ -58,20 +58,20 @@ ByteTrie::jumpByDelta(const uint8_t *pos) {
|
|||
return pos+delta;
|
||||
}
|
||||
|
||||
UDictTrieResult
|
||||
ByteTrie::current() const {
|
||||
UStringTrieResult
|
||||
BytesTrie::current() const {
|
||||
const uint8_t *pos=pos_;
|
||||
if(pos==NULL) {
|
||||
return UDICTTRIE_NO_MATCH;
|
||||
return USTRINGTRIE_NO_MATCH;
|
||||
} else {
|
||||
int32_t node;
|
||||
return (remainingMatchLength_<0 && (node=*pos)>=kMinValueLead) ?
|
||||
valueResult(node) : UDICTTRIE_NO_VALUE;
|
||||
valueResult(node) : USTRINGTRIE_NO_VALUE;
|
||||
}
|
||||
}
|
||||
|
||||
UDictTrieResult
|
||||
ByteTrie::branchNext(const uint8_t *pos, int32_t length, int32_t inByte) {
|
||||
UStringTrieResult
|
||||
BytesTrie::branchNext(const uint8_t *pos, int32_t length, int32_t inByte) {
|
||||
// Branch according to the current byte.
|
||||
if(length==0) {
|
||||
length=*pos++;
|
||||
|
@ -93,12 +93,12 @@ ByteTrie::branchNext(const uint8_t *pos, int32_t length, int32_t inByte) {
|
|||
// and divides length by 2.
|
||||
do {
|
||||
if(inByte==*pos++) {
|
||||
UDictTrieResult result;
|
||||
UStringTrieResult result;
|
||||
int32_t node=*pos;
|
||||
U_ASSERT(node>=kMinValueLead);
|
||||
if(node&kValueIsFinal) {
|
||||
// Leave the final value for getValue() to read.
|
||||
result=UDICTTRIE_HAS_FINAL_VALUE;
|
||||
result=USTRINGTRIE_FINAL_VALUE;
|
||||
} else {
|
||||
// Use the non-final value as the jump delta.
|
||||
++pos;
|
||||
|
@ -122,7 +122,7 @@ ByteTrie::branchNext(const uint8_t *pos, int32_t length, int32_t inByte) {
|
|||
// end readValue()
|
||||
pos+=delta;
|
||||
node=*pos;
|
||||
result= node>=kMinValueLead ? valueResult(node) : UDICTTRIE_NO_VALUE;
|
||||
result= node>=kMinValueLead ? valueResult(node) : USTRINGTRIE_NO_VALUE;
|
||||
}
|
||||
pos_=pos;
|
||||
return result;
|
||||
|
@ -133,15 +133,15 @@ ByteTrie::branchNext(const uint8_t *pos, int32_t length, int32_t inByte) {
|
|||
if(inByte==*pos++) {
|
||||
pos_=pos;
|
||||
int32_t node=*pos;
|
||||
return node>=kMinValueLead ? valueResult(node) : UDICTTRIE_NO_VALUE;
|
||||
return node>=kMinValueLead ? valueResult(node) : USTRINGTRIE_NO_VALUE;
|
||||
} else {
|
||||
stop();
|
||||
return UDICTTRIE_NO_MATCH;
|
||||
return USTRINGTRIE_NO_MATCH;
|
||||
}
|
||||
}
|
||||
|
||||
UDictTrieResult
|
||||
ByteTrie::nextImpl(const uint8_t *pos, int32_t inByte) {
|
||||
UStringTrieResult
|
||||
BytesTrie::nextImpl(const uint8_t *pos, int32_t inByte) {
|
||||
for(;;) {
|
||||
int32_t node=*pos++;
|
||||
if(node<kMinLinearMatch) {
|
||||
|
@ -153,7 +153,7 @@ ByteTrie::nextImpl(const uint8_t *pos, int32_t inByte) {
|
|||
remainingMatchLength_=--length;
|
||||
pos_=pos;
|
||||
return (length<0 && (node=*pos)>=kMinValueLead) ?
|
||||
valueResult(node) : UDICTTRIE_NO_VALUE;
|
||||
valueResult(node) : USTRINGTRIE_NO_VALUE;
|
||||
} else {
|
||||
// No match.
|
||||
break;
|
||||
|
@ -169,14 +169,14 @@ ByteTrie::nextImpl(const uint8_t *pos, int32_t inByte) {
|
|||
}
|
||||
}
|
||||
stop();
|
||||
return UDICTTRIE_NO_MATCH;
|
||||
return USTRINGTRIE_NO_MATCH;
|
||||
}
|
||||
|
||||
UDictTrieResult
|
||||
ByteTrie::next(int32_t inByte) {
|
||||
UStringTrieResult
|
||||
BytesTrie::next(int32_t inByte) {
|
||||
const uint8_t *pos=pos_;
|
||||
if(pos==NULL) {
|
||||
return UDICTTRIE_NO_MATCH;
|
||||
return USTRINGTRIE_NO_MATCH;
|
||||
}
|
||||
int32_t length=remainingMatchLength_; // Actual remaining match length minus 1.
|
||||
if(length>=0) {
|
||||
|
@ -186,24 +186,24 @@ ByteTrie::next(int32_t inByte) {
|
|||
pos_=pos;
|
||||
int32_t node;
|
||||
return (length<0 && (node=*pos)>=kMinValueLead) ?
|
||||
valueResult(node) : UDICTTRIE_NO_VALUE;
|
||||
valueResult(node) : USTRINGTRIE_NO_VALUE;
|
||||
} else {
|
||||
stop();
|
||||
return UDICTTRIE_NO_MATCH;
|
||||
return USTRINGTRIE_NO_MATCH;
|
||||
}
|
||||
}
|
||||
return nextImpl(pos, inByte);
|
||||
}
|
||||
|
||||
UDictTrieResult
|
||||
ByteTrie::next(const char *s, int32_t sLength) {
|
||||
UStringTrieResult
|
||||
BytesTrie::next(const char *s, int32_t sLength) {
|
||||
if(sLength<0 ? *s==0 : sLength==0) {
|
||||
// Empty input.
|
||||
return current();
|
||||
}
|
||||
const uint8_t *pos=pos_;
|
||||
if(pos==NULL) {
|
||||
return UDICTTRIE_NO_MATCH;
|
||||
return USTRINGTRIE_NO_MATCH;
|
||||
}
|
||||
int32_t length=remainingMatchLength_; // Actual remaining match length minus 1.
|
||||
for(;;) {
|
||||
|
@ -217,7 +217,7 @@ ByteTrie::next(const char *s, int32_t sLength) {
|
|||
pos_=pos;
|
||||
int32_t node;
|
||||
return (length<0 && (node=*pos)>=kMinValueLead) ?
|
||||
valueResult(node) : UDICTTRIE_NO_VALUE;
|
||||
valueResult(node) : USTRINGTRIE_NO_VALUE;
|
||||
}
|
||||
if(length<0) {
|
||||
remainingMatchLength_=length;
|
||||
|
@ -225,7 +225,7 @@ ByteTrie::next(const char *s, int32_t sLength) {
|
|||
}
|
||||
if(inByte!=*pos) {
|
||||
stop();
|
||||
return UDICTTRIE_NO_MATCH;
|
||||
return USTRINGTRIE_NO_MATCH;
|
||||
}
|
||||
++pos;
|
||||
--length;
|
||||
|
@ -237,7 +237,7 @@ ByteTrie::next(const char *s, int32_t sLength) {
|
|||
pos_=pos;
|
||||
int32_t node;
|
||||
return (length<0 && (node=*pos)>=kMinValueLead) ?
|
||||
valueResult(node) : UDICTTRIE_NO_VALUE;
|
||||
valueResult(node) : USTRINGTRIE_NO_VALUE;
|
||||
}
|
||||
inByte=*s++;
|
||||
--sLength;
|
||||
|
@ -247,7 +247,7 @@ ByteTrie::next(const char *s, int32_t sLength) {
|
|||
}
|
||||
if(inByte!=*pos) {
|
||||
stop();
|
||||
return UDICTTRIE_NO_MATCH;
|
||||
return USTRINGTRIE_NO_MATCH;
|
||||
}
|
||||
++pos;
|
||||
--length;
|
||||
|
@ -256,9 +256,9 @@ ByteTrie::next(const char *s, int32_t sLength) {
|
|||
for(;;) {
|
||||
int32_t node=*pos++;
|
||||
if(node<kMinLinearMatch) {
|
||||
UDictTrieResult result=branchNext(pos, node, inByte);
|
||||
if(result==UDICTTRIE_NO_MATCH) {
|
||||
return UDICTTRIE_NO_MATCH;
|
||||
UStringTrieResult result=branchNext(pos, node, inByte);
|
||||
if(result==USTRINGTRIE_NO_MATCH) {
|
||||
return USTRINGTRIE_NO_MATCH;
|
||||
}
|
||||
// Fetch the next input byte, if there is one.
|
||||
if(sLength<0) {
|
||||
|
@ -272,10 +272,10 @@ ByteTrie::next(const char *s, int32_t sLength) {
|
|||
inByte=*s++;
|
||||
--sLength;
|
||||
}
|
||||
if(result==UDICTTRIE_HAS_FINAL_VALUE) {
|
||||
if(result==USTRINGTRIE_FINAL_VALUE) {
|
||||
// No further matching bytes.
|
||||
stop();
|
||||
return UDICTTRIE_NO_MATCH;
|
||||
return USTRINGTRIE_NO_MATCH;
|
||||
}
|
||||
pos=pos_; // branchNext() advanced pos and wrote it to pos_ .
|
||||
} else if(node<kMinValueLead) {
|
||||
|
@ -283,7 +283,7 @@ ByteTrie::next(const char *s, int32_t sLength) {
|
|||
length=node-kMinLinearMatch; // Actual match length minus 1.
|
||||
if(inByte!=*pos) {
|
||||
stop();
|
||||
return UDICTTRIE_NO_MATCH;
|
||||
return USTRINGTRIE_NO_MATCH;
|
||||
}
|
||||
++pos;
|
||||
--length;
|
||||
|
@ -291,7 +291,7 @@ ByteTrie::next(const char *s, int32_t sLength) {
|
|||
} else if(node&kValueIsFinal) {
|
||||
// No further matching bytes.
|
||||
stop();
|
||||
return UDICTTRIE_NO_MATCH;
|
||||
return USTRINGTRIE_NO_MATCH;
|
||||
} else {
|
||||
// Skip intermediate value.
|
||||
pos=skipValue(pos, node);
|
||||
|
@ -303,8 +303,8 @@ ByteTrie::next(const char *s, int32_t sLength) {
|
|||
}
|
||||
|
||||
const uint8_t *
|
||||
ByteTrie::findUniqueValueFromBranch(const uint8_t *pos, int32_t length,
|
||||
UBool haveUniqueValue, int32_t &uniqueValue) {
|
||||
BytesTrie::findUniqueValueFromBranch(const uint8_t *pos, int32_t length,
|
||||
UBool haveUniqueValue, int32_t &uniqueValue) {
|
||||
while(length>kMaxBranchLinearSubNodeLength) {
|
||||
++pos; // ignore the comparison byte
|
||||
if(NULL==findUniqueValueFromBranch(jumpByDelta(pos), length>>1, haveUniqueValue, uniqueValue)) {
|
||||
|
@ -340,7 +340,7 @@ ByteTrie::findUniqueValueFromBranch(const uint8_t *pos, int32_t length,
|
|||
}
|
||||
|
||||
UBool
|
||||
ByteTrie::findUniqueValue(const uint8_t *pos, UBool haveUniqueValue, int32_t &uniqueValue) {
|
||||
BytesTrie::findUniqueValue(const uint8_t *pos, UBool haveUniqueValue, int32_t &uniqueValue) {
|
||||
for(;;) {
|
||||
int32_t node=*pos++;
|
||||
if(node<kMinLinearMatch) {
|
||||
|
@ -375,7 +375,7 @@ ByteTrie::findUniqueValue(const uint8_t *pos, UBool haveUniqueValue, int32_t &un
|
|||
}
|
||||
|
||||
int32_t
|
||||
ByteTrie::getNextBytes(ByteSink &out) const {
|
||||
BytesTrie::getNextBytes(ByteSink &out) const {
|
||||
const uint8_t *pos=pos_;
|
||||
if(pos==NULL) {
|
||||
return 0;
|
||||
|
@ -408,7 +408,7 @@ ByteTrie::getNextBytes(ByteSink &out) const {
|
|||
}
|
||||
|
||||
void
|
||||
ByteTrie::getNextBranchBytes(const uint8_t *pos, int32_t length, ByteSink &out) {
|
||||
BytesTrie::getNextBranchBytes(const uint8_t *pos, int32_t length, ByteSink &out) {
|
||||
while(length>kMaxBranchLinearSubNodeLength) {
|
||||
++pos; // ignore the comparison byte
|
||||
getNextBranchBytes(jumpByDelta(pos), length>>1, out);
|
||||
|
@ -423,7 +423,7 @@ ByteTrie::getNextBranchBytes(const uint8_t *pos, int32_t length, ByteSink &out)
|
|||
}
|
||||
|
||||
void
|
||||
ByteTrie::append(ByteSink &out, int c) {
|
||||
BytesTrie::append(ByteSink &out, int c) {
|
||||
char ch=(char)c;
|
||||
out.Append(&ch, 1);
|
||||
}
|
|
@ -1,9 +1,9 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2010, International Business Machines
|
||||
* Copyright (C) 2010-2011, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*******************************************************************************
|
||||
* file name: bytetrie.h
|
||||
* file name: bytestrie.h
|
||||
* encoding: US-ASCII
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
|
@ -12,55 +12,54 @@
|
|||
* created by: Markus W. Scherer
|
||||
*/
|
||||
|
||||
#ifndef __BYTETRIE_H__
|
||||
#define __BYTETRIE_H__
|
||||
#ifndef __BYTESTRIE_H__
|
||||
#define __BYTESTRIE_H__
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C++ API: Dictionary trie for mapping arbitrary byte sequences
|
||||
* to integer values.
|
||||
* \brief C++ API: Trie for mapping byte sequences to integer values.
|
||||
*/
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/uobject.h"
|
||||
#include "uassert.h"
|
||||
#include "udicttrie.h"
|
||||
#include "ustringtrie.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
class ByteSink;
|
||||
class ByteTrieBuilder;
|
||||
class ByteTrieIterator;
|
||||
class BytesTrieBuilder;
|
||||
class BytesTrieIterator;
|
||||
|
||||
/**
|
||||
* Light-weight, non-const reader class for a ByteTrie.
|
||||
* Light-weight, non-const reader class for a BytesTrie.
|
||||
* Traverses a byte-serialized data structure with minimal state,
|
||||
* for mapping byte sequences to non-negative integer values.
|
||||
*/
|
||||
class U_COMMON_API ByteTrie : public UMemory {
|
||||
class U_COMMON_API BytesTrie : public UMemory {
|
||||
public:
|
||||
ByteTrie(const void *trieBytes)
|
||||
BytesTrie(const void *trieBytes)
|
||||
: bytes_(reinterpret_cast<const uint8_t *>(trieBytes)),
|
||||
pos_(bytes_), remainingMatchLength_(-1) {}
|
||||
|
||||
/**
|
||||
* Resets this trie to its initial state.
|
||||
*/
|
||||
ByteTrie &reset() {
|
||||
BytesTrie &reset() {
|
||||
pos_=bytes_;
|
||||
remainingMatchLength_=-1;
|
||||
return *this;
|
||||
}
|
||||
|
||||
/**
|
||||
* ByteTrie state object, for saving a trie's current state
|
||||
* BytesTrie state object, for saving a trie's current state
|
||||
* and resetting the trie back to this state later.
|
||||
*/
|
||||
class State : public UMemory {
|
||||
public:
|
||||
State() { bytes=NULL; }
|
||||
private:
|
||||
friend class ByteTrie;
|
||||
friend class BytesTrie;
|
||||
|
||||
const uint8_t *bytes;
|
||||
const uint8_t *pos;
|
||||
|
@ -71,7 +70,7 @@ public:
|
|||
* Saves the state of this trie.
|
||||
* @see resetToState
|
||||
*/
|
||||
const ByteTrie &saveState(State &state) const {
|
||||
const BytesTrie &saveState(State &state) const {
|
||||
state.bytes=bytes_;
|
||||
state.pos=pos_;
|
||||
state.remainingMatchLength=remainingMatchLength_;
|
||||
|
@ -85,7 +84,7 @@ public:
|
|||
* @see saveState
|
||||
* @see reset
|
||||
*/
|
||||
ByteTrie &resetToState(const State &state) {
|
||||
BytesTrie &resetToState(const State &state) {
|
||||
if(bytes_==state.bytes && bytes_!=NULL) {
|
||||
pos_=state.pos;
|
||||
remainingMatchLength_=state.remainingMatchLength;
|
||||
|
@ -98,14 +97,14 @@ public:
|
|||
* and whether another input byte can continue a matching byte sequence.
|
||||
* @return The match/value Result.
|
||||
*/
|
||||
UDictTrieResult current() const;
|
||||
UStringTrieResult current() const;
|
||||
|
||||
/**
|
||||
* Traverses the trie from the initial state for this input byte.
|
||||
* Equivalent to reset().next(inByte).
|
||||
* @return The match/value Result.
|
||||
*/
|
||||
inline UDictTrieResult first(int32_t inByte) {
|
||||
inline UStringTrieResult first(int32_t inByte) {
|
||||
remainingMatchLength_=-1;
|
||||
return nextImpl(bytes_, inByte);
|
||||
}
|
||||
|
@ -114,7 +113,7 @@ public:
|
|||
* Traverses the trie from the current state for this input byte.
|
||||
* @return The match/value Result.
|
||||
*/
|
||||
UDictTrieResult next(int32_t inByte);
|
||||
UStringTrieResult next(int32_t inByte);
|
||||
|
||||
/**
|
||||
* Traverses the trie from the current state for this byte sequence.
|
||||
|
@ -122,19 +121,20 @@ public:
|
|||
* \code
|
||||
* Result result=current();
|
||||
* for(each c in s)
|
||||
* if((result=next(c))==UDICTTRIE_NO_MATCH) return UDICTTRIE_NO_MATCH;
|
||||
* if(!USTRINGTRIE_HAS_NEXT(result)) return USTRINGTRIE_NO_MATCH;
|
||||
* result=next(c);
|
||||
* return result;
|
||||
* \endcode
|
||||
* @return The match/value Result.
|
||||
*/
|
||||
UDictTrieResult next(const char *s, int32_t length);
|
||||
UStringTrieResult next(const char *s, int32_t length);
|
||||
|
||||
/**
|
||||
* Returns a matching byte sequence's value if called immediately after
|
||||
* current()/first()/next() returned UDICTTRIE_HAS_VALUE or UDICTTRIE_HAS_FINAL_VALUE.
|
||||
* current()/first()/next() returned USTRINGTRIE_INTERMEDIATE_VALUE or USTRINGTRIE_FINAL_VALUE.
|
||||
* getValue() can be called multiple times.
|
||||
*
|
||||
* Do not call getValue() after UDICTTRIE_NO_MATCH or UDICTTRIE_NO_VALUE!
|
||||
* Do not call getValue() after USTRINGTRIE_NO_MATCH or USTRINGTRIE_NO_VALUE!
|
||||
*/
|
||||
inline int32_t getValue() const {
|
||||
const uint8_t *pos=pos_;
|
||||
|
@ -159,7 +159,7 @@ public:
|
|||
|
||||
/**
|
||||
* Finds each byte which continues the byte sequence from the current state.
|
||||
* That is, each byte b for which it would be next(b)!=UDICTTRIE_NO_MATCH now.
|
||||
* That is, each byte b for which it would be next(b)!=USTRINGTRIE_NO_MATCH now.
|
||||
* @param out Each next byte is appended to this object.
|
||||
* (Only uses the out.Append(s, length) method.)
|
||||
* @return the number of bytes which continue the byte sequence from here
|
||||
|
@ -167,8 +167,8 @@ public:
|
|||
int32_t getNextBytes(ByteSink &out) const;
|
||||
|
||||
private:
|
||||
friend class ByteTrieBuilder;
|
||||
friend class ByteTrieIterator;
|
||||
friend class BytesTrieBuilder;
|
||||
friend class BytesTrieIterator;
|
||||
|
||||
inline void stop() {
|
||||
pos_=NULL;
|
||||
|
@ -212,15 +212,15 @@ private:
|
|||
return pos;
|
||||
}
|
||||
|
||||
static inline UDictTrieResult valueResult(int32_t node) {
|
||||
return (UDictTrieResult)(UDICTTRIE_HAS_VALUE-(node&kValueIsFinal));
|
||||
static inline UStringTrieResult valueResult(int32_t node) {
|
||||
return (UStringTrieResult)(USTRINGTRIE_INTERMEDIATE_VALUE-(node&kValueIsFinal));
|
||||
}
|
||||
|
||||
// Handles a branch node for both next(byte) and next(string).
|
||||
UDictTrieResult branchNext(const uint8_t *pos, int32_t length, int32_t inByte);
|
||||
UStringTrieResult branchNext(const uint8_t *pos, int32_t length, int32_t inByte);
|
||||
|
||||
// Requires remainingLength_<0.
|
||||
UDictTrieResult nextImpl(const uint8_t *pos, int32_t inByte);
|
||||
UStringTrieResult nextImpl(const uint8_t *pos, int32_t inByte);
|
||||
|
||||
// Helper functions for hasUniqueValue().
|
||||
// Recursively finds a unique value (or whether there is not a unique one)
|
||||
|
@ -236,7 +236,7 @@ private:
|
|||
static void getNextBranchBytes(const uint8_t *pos, int32_t length, ByteSink &out);
|
||||
static void append(ByteSink &out, int c);
|
||||
|
||||
// ByteTrie data structure
|
||||
// BytesTrie data structure
|
||||
//
|
||||
// The trie consists of a series of byte-serialized nodes for incremental
|
||||
// string/byte sequence matching. The root node is at the beginning of the trie data.
|
||||
|
@ -315,7 +315,7 @@ private:
|
|||
static const int32_t kMaxTwoByteDelta=((kMinThreeByteDeltaLead-kMinTwoByteDeltaLead)<<8)-1; // 0x2fff
|
||||
static const int32_t kMaxThreeByteDelta=((kFourByteDeltaLead-kMinThreeByteDeltaLead)<<16)-1; // 0xdffff
|
||||
|
||||
// Fixed value referencing the ByteTrie bytes.
|
||||
// Fixed value referencing the BytesTrie bytes.
|
||||
const uint8_t *bytes_;
|
||||
|
||||
// Iterator variables.
|
||||
|
@ -328,4 +328,4 @@ private:
|
|||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif // __BYTETRIE_H__
|
||||
#endif // __BYTESTRIE_H__
|
|
@ -400,7 +400,7 @@
|
|||
<ClCompile Include="servslkf.cpp" />
|
||||
<ClCompile Include="usprep.cpp" />
|
||||
<ClCompile Include="bytestream.cpp" />
|
||||
<ClCompile Include="bytetrie.cpp" />
|
||||
<ClCompile Include="bytestrie.cpp" />
|
||||
<ClCompile Include="chariter.cpp" />
|
||||
<ClCompile Include="charstr.cpp" />
|
||||
<ClCompile Include="cstring.c" />
|
||||
|
@ -557,6 +557,7 @@
|
|||
<ClInclude Include="uhash.h" />
|
||||
<ClInclude Include="ulist.h" />
|
||||
<ClInclude Include="ustrenum.h" />
|
||||
<ClInclude Include="ustringtrie.h" />
|
||||
<ClInclude Include="utrie.h" />
|
||||
<ClInclude Include="utrie2.h" />
|
||||
<ClInclude Include="utrie2_impl.h" />
|
||||
|
@ -1366,7 +1367,7 @@
|
|||
</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
|
||||
</CustomBuild>
|
||||
<ClInclude Include="bytetrie.h" />
|
||||
<ClInclude Include="bytestrie.h" />
|
||||
<CustomBuild Include="unicode\chariter.h">
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">copy "%(FullPath)" ..\..\include\unicode
|
||||
</Command>
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
**********************************************************************
|
||||
* Copyright (c) 2002-2010, International Business Machines
|
||||
* Copyright (c) 2002-2011, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
* Author: Alan Liu
|
||||
|
@ -165,7 +165,7 @@ int32_t PropNameData::findPropertyValueNameGroup(int32_t valueMapIndex, int32_t
|
|||
if(valueMapIndex==0) {
|
||||
return 0; // The property does not have named values.
|
||||
}
|
||||
++valueMapIndex; // Skip the ByteTrie offset.
|
||||
++valueMapIndex; // Skip the BytesTrie offset.
|
||||
int32_t numRanges=valueMaps[valueMapIndex++];
|
||||
if(numRanges<0x10) {
|
||||
// Ranges of values.
|
||||
|
@ -214,11 +214,11 @@ const char *PropNameData::getName(const char *nameGroup, int32_t nameIndex) {
|
|||
return nameGroup;
|
||||
}
|
||||
|
||||
UBool PropNameData::containsName(ByteTrie &trie, const char *name) {
|
||||
UBool PropNameData::containsName(BytesTrie &trie, const char *name) {
|
||||
if(name==NULL) {
|
||||
return FALSE;
|
||||
}
|
||||
UDictTrieResult result=UDICTTRIE_NO_VALUE;
|
||||
UStringTrieResult result=USTRINGTRIE_NO_VALUE;
|
||||
char c;
|
||||
while((c=*name++)!=0) {
|
||||
c=uprv_invCharToLowercaseAscii(c);
|
||||
|
@ -226,12 +226,12 @@ UBool PropNameData::containsName(ByteTrie &trie, const char *name) {
|
|||
if(c==0x2d || c==0x5f || c==0x20 || (0x09<=c && c<=0x0d)) {
|
||||
continue;
|
||||
}
|
||||
if(!UDICTTRIE_RESULT_HAS_NEXT(result)) {
|
||||
if(!USTRINGTRIE_HAS_NEXT(result)) {
|
||||
return FALSE;
|
||||
}
|
||||
result=trie.next((uint8_t)c);
|
||||
}
|
||||
return UDICTTRIE_RESULT_HAS_VALUE(result);
|
||||
return USTRINGTRIE_HAS_VALUE(result);
|
||||
}
|
||||
|
||||
const char *PropNameData::getPropertyName(int32_t property, int32_t nameChoice) {
|
||||
|
@ -254,8 +254,8 @@ const char *PropNameData::getPropertyValueName(int32_t property, int32_t value,
|
|||
return getName(nameGroups+nameGroupOffset, nameChoice);
|
||||
}
|
||||
|
||||
int32_t PropNameData::getPropertyOrValueEnum(int32_t byteTrieOffset, const char *alias) {
|
||||
ByteTrie trie(byteTries+byteTrieOffset);
|
||||
int32_t PropNameData::getPropertyOrValueEnum(int32_t bytesTrieOffset, const char *alias) {
|
||||
BytesTrie trie(bytesTries+bytesTrieOffset);
|
||||
if(containsName(trie, alias)) {
|
||||
return trie.getValue();
|
||||
} else {
|
||||
|
@ -277,7 +277,7 @@ int32_t PropNameData::getPropertyValueEnum(int32_t property, const char *alias)
|
|||
return UCHAR_INVALID_CODE; // The property does not have named values.
|
||||
}
|
||||
// valueMapIndex is the start of the property's valueMap,
|
||||
// where the first word is the ByteTrie offset.
|
||||
// where the first word is the BytesTrie offset.
|
||||
return getPropertyOrValueEnum(valueMaps[valueMapIndex], alias);
|
||||
}
|
||||
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
**********************************************************************
|
||||
* Copyright (c) 2002-2010, International Business Machines
|
||||
* Copyright (c) 2002-2011, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
* Author: Alan Liu
|
||||
|
@ -14,7 +14,7 @@
|
|||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/uchar.h"
|
||||
#include "bytetrie.h"
|
||||
#include "bytestrie.h"
|
||||
#include "udataswp.h"
|
||||
#include "uprops.h"
|
||||
|
||||
|
@ -106,13 +106,13 @@ private:
|
|||
static int32_t findProperty(int32_t property);
|
||||
static int32_t findPropertyValueNameGroup(int32_t valueMapIndex, int32_t value);
|
||||
static const char *getName(const char *nameGroup, int32_t nameIndex);
|
||||
static UBool containsName(ByteTrie &trie, const char *name);
|
||||
static UBool containsName(BytesTrie &trie, const char *name);
|
||||
|
||||
static int32_t getPropertyOrValueEnum(int32_t byteTrieOffset, const char *alias);
|
||||
static int32_t getPropertyOrValueEnum(int32_t bytesTrieOffset, const char *alias);
|
||||
|
||||
static const int32_t indexes[];
|
||||
static const int32_t valueMaps[];
|
||||
static const uint8_t byteTries[];
|
||||
static const uint8_t bytesTries[];
|
||||
static const char nameGroups[];
|
||||
};
|
||||
|
||||
|
@ -164,7 +164,7 @@ private:
|
|||
* If the valueMapIndex is 0, then the property does not have named values.
|
||||
*
|
||||
* For each property's value map:
|
||||
* int32_t byteTrieOffset; -- Offset into byteTries[] for name->value mapping.
|
||||
* int32_t bytesTrieOffset; -- Offset into bytesTries[] for name->value mapping.
|
||||
* int32_t numRanges;
|
||||
* If numRanges is in the range 1..15, then that many ranges of values follow.
|
||||
* Per range:
|
||||
|
@ -181,12 +181,12 @@ private:
|
|||
*
|
||||
* For both properties and property values, ranges are sorted by their start/limit values.
|
||||
*
|
||||
* uint8_t byteTries[];
|
||||
* uint8_t bytesTries[];
|
||||
*
|
||||
* This is a sequence of ByteTrie structures, byte-serialized tries for
|
||||
* This is a sequence of BytesTrie structures, byte-serialized tries for
|
||||
* mapping from names/aliases to values.
|
||||
* The first one maps from property names/aliases to UProperty enum constants.
|
||||
* The following ones are indexed by property value map byteTrieOffsets
|
||||
* The following ones are indexed by property value map bytesTrieOffsets
|
||||
* for mapping each property's names/aliases to their property values.
|
||||
*
|
||||
* char nameGroups[];
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
/*
|
||||
* Copyright (C) 1999-2010, International Business Machines
|
||||
* Copyright (C) 1999-2011, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
* file name: propname_data.h
|
||||
*
|
||||
* machine-generated on: 2010-12-31
|
||||
* machine-generated on: 2011-01-05
|
||||
*/
|
||||
|
||||
#ifndef INCLUDED_FROM_PROPNAME_CPP
|
||||
|
@ -80,7 +80,7 @@ const int32_t PropNameData::valueMaps[989]={
|
|||
0x2eb1,0x2f20,0x2ec6,0x2e97,0x2f0a,0x2f72,0x2f4a,0x2f5e,0x2f82,0x2f93,0x2ef2,0x2edc,0x2f35
|
||||
};
|
||||
|
||||
const uint8_t PropNameData::byteTries[10229]={
|
||||
const uint8_t PropNameData::bytesTries[10229]={
|
||||
0,0x15,0x6d,0xc3,0x16,0x73,0xc1,0xea,0x76,0x5f,0x76,0x68,0x77,0x90,0x78,1,
|
||||
0x64,0x50,0x69,0x10,0x64,1,0x63,0x30,0x73,0x62,0x13,0x74,0x61,0x72,0x74,0x63,
|
||||
0x60,0x16,0x6f,0x6e,0x74,0x69,0x6e,0x75,0x65,0x61,0x13,0x69,0x67,0x69,0x74,0x81,
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2010, International Business Machines
|
||||
* Copyright (C) 2010-2011, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*******************************************************************************
|
||||
* file name: udicttrie.h
|
||||
|
@ -12,8 +12,8 @@
|
|||
* created by: Markus W. Scherer
|
||||
*/
|
||||
|
||||
#ifndef __UDICTTRIE_H__
|
||||
#define __UDICTTRIE_H__
|
||||
#ifndef __USTRINGTRIE_H__
|
||||
#define __USTRINGTRIE_H__
|
||||
|
||||
/**
|
||||
* \file
|
||||
|
@ -23,61 +23,61 @@
|
|||
#include "unicode/utypes.h"
|
||||
|
||||
/**
|
||||
* Return values for ByteTrie::next(), UCharTrie::next() and similar methods.
|
||||
* @see UDICTTRIE_RESULT_MATCHES
|
||||
* @see UDICTTRIE_RESULT_HAS_VALUE
|
||||
* @see UDICTTRIE_RESULT_HAS_NEXT
|
||||
* Return values for BytesTrie::next(), UCharsTrie::next() and similar methods.
|
||||
* @see USTRINGTRIE_MATCHES
|
||||
* @see USTRINGTRIE_HAS_VALUE
|
||||
* @see USTRINGTRIE_HAS_NEXT
|
||||
*/
|
||||
enum UDictTrieResult {
|
||||
enum UStringTrieResult {
|
||||
/**
|
||||
* The input unit(s) did not continue a matching string.
|
||||
*/
|
||||
UDICTTRIE_NO_MATCH,
|
||||
USTRINGTRIE_NO_MATCH,
|
||||
/**
|
||||
* The input unit(s) continued a matching string
|
||||
* but there is no value for the string so far.
|
||||
* (It is a prefix of a longer string.)
|
||||
*/
|
||||
UDICTTRIE_NO_VALUE,
|
||||
USTRINGTRIE_NO_VALUE,
|
||||
/**
|
||||
* The input unit(s) continued a matching string
|
||||
* and there is a value for the string so far.
|
||||
* This value will be returned by getValue().
|
||||
* No further input byte/unit can continue a matching string.
|
||||
*/
|
||||
UDICTTRIE_HAS_FINAL_VALUE,
|
||||
USTRINGTRIE_FINAL_VALUE,
|
||||
/**
|
||||
* The input unit(s) continued a matching string
|
||||
* and there is a value for the string so far.
|
||||
* This value will be returned by getValue().
|
||||
* Another input byte/unit can continue a matching string.
|
||||
*/
|
||||
UDICTTRIE_HAS_VALUE
|
||||
USTRINGTRIE_INTERMEDIATE_VALUE
|
||||
};
|
||||
|
||||
/**
|
||||
* Same as (result!=UDICTTRIE_NO_MATCH).
|
||||
* @param result A result from ByteTrie::first(), UCharTrie::next() etc.
|
||||
* Same as (result!=USTRINGTRIE_NO_MATCH).
|
||||
* @param result A result from BytesTrie::first(), UCharsTrie::next() etc.
|
||||
* @return true if the input bytes/units so far are part of a matching string/byte sequence.
|
||||
*/
|
||||
#define UDICTTRIE_RESULT_MATCHES(result) ((result)!=UDICTTRIE_NO_MATCH)
|
||||
#define USTRINGTRIE_MATCHES(result) ((result)!=USTRINGTRIE_NO_MATCH)
|
||||
|
||||
/**
|
||||
* Equivalent to (result==UDICTTRIE_HAS_VALUE || result==UDICTTRIE_HAS_FINAL_VALUE) but
|
||||
* Equivalent to (result==USTRINGTRIE_INTERMEDIATE_VALUE || result==USTRINGTRIE_FINAL_VALUE) but
|
||||
* this macro evaluates result exactly once.
|
||||
* @param result A result from ByteTrie::first(), UCharTrie::next() etc.
|
||||
* @param result A result from BytesTrie::first(), UCharsTrie::next() etc.
|
||||
* @return true if there is a value for the input bytes/units so far.
|
||||
* @see ByteTrie::getValue
|
||||
* @see UCharTrie::getValue
|
||||
* @see BytesTrie::getValue
|
||||
* @see UCharsTrie::getValue
|
||||
*/
|
||||
#define UDICTTRIE_RESULT_HAS_VALUE(result) ((result)>=UDICTTRIE_HAS_FINAL_VALUE)
|
||||
#define USTRINGTRIE_HAS_VALUE(result) ((result)>=USTRINGTRIE_FINAL_VALUE)
|
||||
|
||||
/**
|
||||
* Equivalent to (result==UDICTTRIE_NO_VALUE || result==UDICTTRIE_HAS_VALUE) but
|
||||
* Equivalent to (result==USTRINGTRIE_NO_VALUE || result==USTRINGTRIE_INTERMEDIATE_VALUE) but
|
||||
* this macro evaluates result exactly once.
|
||||
* @param result A result from ByteTrie::first(), UCharTrie::next() etc.
|
||||
* @param result A result from BytesTrie::first(), UCharsTrie::next() etc.
|
||||
* @return true if another input byte/unit can continue a matching string.
|
||||
*/
|
||||
#define UDICTTRIE_RESULT_HAS_NEXT(result) ((result)&1)
|
||||
#define USTRINGTRIE_HAS_NEXT(result) ((result)&1)
|
||||
|
||||
#endif /* __UDICTTRIE_H__ */
|
||||
#endif /* __USTRINGTRIE_H__ */
|
|
@ -1,6 +1,6 @@
|
|||
#******************************************************************************
|
||||
#
|
||||
# Copyright (C) 1999-2010, International Business Machines
|
||||
# Copyright (C) 1999-2011, International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
#
|
||||
#******************************************************************************
|
||||
|
@ -50,7 +50,7 @@ sdtfmtts.o svccoll.o tchcfmt.o selfmts.o \
|
|||
tfsmalls.o tmsgfmt.o trcoll.o tscoll.o tsdate.o tsdcfmsy.o tsdtfmsy.o \
|
||||
tsmthred.o tsnmfmt.o tsputil.o tstnrapi.o tstnorm.o tzbdtest.o \
|
||||
tzregts.o tztest.o ucdtest.o usettest.o ustrtest.o strcase.o transtst.o strtest.o thcoll.o \
|
||||
bytetrietest.o uchartrietest.o \
|
||||
bytestrietest.o ucharstrietest.o \
|
||||
itrbbi.o rbbiapts.o rbbitst.o ittrans.o transapi.o cpdtrtst.o \
|
||||
testutil.o transrt.o trnserr.o normconf.o sfwdchit.o \
|
||||
jamotest.o srchtest.o reptest.o regextst.o \
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2010, International Business Machines
|
||||
* Copyright (C) 2010-2011, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*******************************************************************************
|
||||
* file name: bytetrietest.cpp
|
||||
|
@ -16,9 +16,9 @@
|
|||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/stringpiece.h"
|
||||
#include "bytetrie.h"
|
||||
#include "bytetriebuilder.h"
|
||||
#include "bytetrieiterator.h"
|
||||
#include "bytestrie.h"
|
||||
#include "bytestriebuilder.h"
|
||||
#include "bytestrieiterator.h"
|
||||
#include "intltest.h"
|
||||
|
||||
#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
|
||||
|
@ -28,10 +28,10 @@ struct StringAndValue {
|
|||
int32_t value;
|
||||
};
|
||||
|
||||
class ByteTrieTest : public IntlTest {
|
||||
class BytesTrieTest : public IntlTest {
|
||||
public:
|
||||
ByteTrieTest() {}
|
||||
virtual ~ByteTrieTest();
|
||||
BytesTrieTest() {}
|
||||
virtual ~BytesTrieTest();
|
||||
|
||||
void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par=NULL);
|
||||
void TestBuilder();
|
||||
|
@ -45,7 +45,7 @@ public:
|
|||
void TestValuesForState();
|
||||
void TestCompact();
|
||||
|
||||
StringPiece buildMonthsTrie(ByteTrieBuilder &builder, UDictTrieBuildOption buildOption);
|
||||
StringPiece buildMonthsTrie(BytesTrieBuilder &builder, UStringTrieBuildOption buildOption);
|
||||
void TestHasUniqueValue();
|
||||
void TestGetNextBytes();
|
||||
void TestIteratorFromBranch();
|
||||
|
@ -55,27 +55,27 @@ public:
|
|||
void TestTruncatingIteratorFromLinearMatchLong();
|
||||
|
||||
void checkData(const StringAndValue data[], int32_t dataLength);
|
||||
void checkData(const StringAndValue data[], int32_t dataLength, UDictTrieBuildOption buildOption);
|
||||
void checkData(const StringAndValue data[], int32_t dataLength, UStringTrieBuildOption buildOption);
|
||||
StringPiece buildTrie(const StringAndValue data[], int32_t dataLength,
|
||||
ByteTrieBuilder &builder, UDictTrieBuildOption buildOption);
|
||||
BytesTrieBuilder &builder, UStringTrieBuildOption buildOption);
|
||||
void checkFirst(const StringPiece &trieBytes, const StringAndValue data[], int32_t dataLength);
|
||||
void checkNext(const StringPiece &trieBytes, const StringAndValue data[], int32_t dataLength);
|
||||
void checkNextWithState(const StringPiece &trieBytes, const StringAndValue data[], int32_t dataLength);
|
||||
void checkNextString(const StringPiece &trieBytes, const StringAndValue data[], int32_t dataLength);
|
||||
void checkIterator(const StringPiece &trieBytes, const StringAndValue data[], int32_t dataLength);
|
||||
void checkIterator(ByteTrieIterator &iter, const StringAndValue data[], int32_t dataLength);
|
||||
void checkIterator(BytesTrieIterator &iter, const StringAndValue data[], int32_t dataLength);
|
||||
};
|
||||
|
||||
extern IntlTest *createByteTrieTest() {
|
||||
return new ByteTrieTest();
|
||||
extern IntlTest *createBytesTrieTest() {
|
||||
return new BytesTrieTest();
|
||||
}
|
||||
|
||||
ByteTrieTest::~ByteTrieTest() {
|
||||
BytesTrieTest::~BytesTrieTest() {
|
||||
}
|
||||
|
||||
void ByteTrieTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) {
|
||||
void BytesTrieTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) {
|
||||
if(exec) {
|
||||
logln("TestSuite ByteTrieTest: ");
|
||||
logln("TestSuite BytesTrieTest: ");
|
||||
}
|
||||
TESTCASE_AUTO_BEGIN;
|
||||
TESTCASE_AUTO(TestBuilder);
|
||||
|
@ -98,36 +98,36 @@ void ByteTrieTest::runIndexedTest(int32_t index, UBool exec, const char *&name,
|
|||
TESTCASE_AUTO_END;
|
||||
}
|
||||
|
||||
void ByteTrieTest::TestBuilder() {
|
||||
void BytesTrieTest::TestBuilder() {
|
||||
IcuTestErrorCode errorCode(*this, "TestBuilder()");
|
||||
ByteTrieBuilder builder;
|
||||
builder.build(UDICTTRIE_BUILD_FAST, errorCode);
|
||||
BytesTrieBuilder builder;
|
||||
builder.build(USTRINGTRIE_BUILD_FAST, errorCode);
|
||||
if(errorCode.reset()!=U_INDEX_OUTOFBOUNDS_ERROR) {
|
||||
errln("ByteTrieBuilder().build() did not set U_INDEX_OUTOFBOUNDS_ERROR");
|
||||
errln("BytesTrieBuilder().build() did not set U_INDEX_OUTOFBOUNDS_ERROR");
|
||||
return;
|
||||
}
|
||||
builder.add("=", 0, errorCode).add("=", 1, errorCode).build(UDICTTRIE_BUILD_FAST, errorCode);
|
||||
builder.add("=", 0, errorCode).add("=", 1, errorCode).build(USTRINGTRIE_BUILD_FAST, errorCode);
|
||||
if(errorCode.reset()!=U_ILLEGAL_ARGUMENT_ERROR) {
|
||||
errln("ByteTrieBuilder.build() did not detect duplicates");
|
||||
errln("BytesTrieBuilder.build() did not detect duplicates");
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
void ByteTrieTest::TestEmpty() {
|
||||
void BytesTrieTest::TestEmpty() {
|
||||
static const StringAndValue data[]={
|
||||
{ "", 0 }
|
||||
};
|
||||
checkData(data, LENGTHOF(data));
|
||||
}
|
||||
|
||||
void ByteTrieTest::Test_a() {
|
||||
void BytesTrieTest::Test_a() {
|
||||
static const StringAndValue data[]={
|
||||
{ "a", 1 }
|
||||
};
|
||||
checkData(data, LENGTHOF(data));
|
||||
}
|
||||
|
||||
void ByteTrieTest::Test_a_ab() {
|
||||
void BytesTrieTest::Test_a_ab() {
|
||||
static const StringAndValue data[]={
|
||||
{ "a", 1 },
|
||||
{ "ab", 100 }
|
||||
|
@ -135,7 +135,7 @@ void ByteTrieTest::Test_a_ab() {
|
|||
checkData(data, LENGTHOF(data));
|
||||
}
|
||||
|
||||
void ByteTrieTest::TestShortestBranch() {
|
||||
void BytesTrieTest::TestShortestBranch() {
|
||||
static const StringAndValue data[]={
|
||||
{ "a", 1000 },
|
||||
{ "b", 2000 }
|
||||
|
@ -143,7 +143,7 @@ void ByteTrieTest::TestShortestBranch() {
|
|||
checkData(data, LENGTHOF(data));
|
||||
}
|
||||
|
||||
void ByteTrieTest::TestBranches() {
|
||||
void BytesTrieTest::TestBranches() {
|
||||
static const StringAndValue data[]={
|
||||
{ "a", 0x10 },
|
||||
{ "cc", 0x40 },
|
||||
|
@ -166,7 +166,7 @@ void ByteTrieTest::TestBranches() {
|
|||
}
|
||||
}
|
||||
|
||||
void ByteTrieTest::TestLongSequence() {
|
||||
void BytesTrieTest::TestLongSequence() {
|
||||
static const StringAndValue data[]={
|
||||
{ "a", -1 },
|
||||
// sequence of linear-match nodes
|
||||
|
@ -182,7 +182,7 @@ void ByteTrieTest::TestLongSequence() {
|
|||
checkData(data, LENGTHOF(data));
|
||||
}
|
||||
|
||||
void ByteTrieTest::TestLongBranch() {
|
||||
void BytesTrieTest::TestLongBranch() {
|
||||
// Split-branch and interesting compact-integer values.
|
||||
static const StringAndValue data[]={
|
||||
{ "a", -2 },
|
||||
|
@ -210,7 +210,7 @@ void ByteTrieTest::TestLongBranch() {
|
|||
checkData(data, LENGTHOF(data));
|
||||
}
|
||||
|
||||
void ByteTrieTest::TestValuesForState() {
|
||||
void BytesTrieTest::TestValuesForState() {
|
||||
// Check that saveState() and resetToState() interact properly
|
||||
// with next() and current().
|
||||
static const StringAndValue data[]={
|
||||
|
@ -224,7 +224,7 @@ void ByteTrieTest::TestValuesForState() {
|
|||
checkData(data, LENGTHOF(data));
|
||||
}
|
||||
|
||||
void ByteTrieTest::TestCompact() {
|
||||
void BytesTrieTest::TestCompact() {
|
||||
// Duplicate trailing strings and values provide opportunities for compacting.
|
||||
static const StringAndValue data[]={
|
||||
{ "+", 0 },
|
||||
|
@ -251,7 +251,7 @@ void ByteTrieTest::TestCompact() {
|
|||
checkData(data, LENGTHOF(data));
|
||||
}
|
||||
|
||||
StringPiece ByteTrieTest::buildMonthsTrie(ByteTrieBuilder &builder, UDictTrieBuildOption buildOption) {
|
||||
StringPiece BytesTrieTest::buildMonthsTrie(BytesTrieBuilder &builder, UStringTrieBuildOption buildOption) {
|
||||
// All types of nodes leading to the same value,
|
||||
// for code coverage of recursive functions.
|
||||
// In particular, we need a lot of branches on some single level
|
||||
|
@ -291,13 +291,13 @@ StringPiece ByteTrieTest::buildMonthsTrie(ByteTrieBuilder &builder, UDictTrieBui
|
|||
return buildTrie(data, LENGTHOF(data), builder, buildOption);
|
||||
}
|
||||
|
||||
void ByteTrieTest::TestHasUniqueValue() {
|
||||
ByteTrieBuilder builder;
|
||||
StringPiece sp=buildMonthsTrie(builder, UDICTTRIE_BUILD_FAST);
|
||||
void BytesTrieTest::TestHasUniqueValue() {
|
||||
BytesTrieBuilder builder;
|
||||
StringPiece sp=buildMonthsTrie(builder, USTRINGTRIE_BUILD_FAST);
|
||||
if(sp.empty()) {
|
||||
return; // buildTrie() reported an error
|
||||
}
|
||||
ByteTrie trie(sp.data());
|
||||
BytesTrie trie(sp.data());
|
||||
int32_t uniqueValue;
|
||||
if(trie.hasUniqueValue(uniqueValue)) {
|
||||
errln("unique value at root");
|
||||
|
@ -314,7 +314,7 @@ void ByteTrieTest::TestHasUniqueValue() {
|
|||
if(trie.hasUniqueValue(uniqueValue)) {
|
||||
errln("unique value after \"ju\"");
|
||||
}
|
||||
if(trie.next('n')!=UDICTTRIE_HAS_VALUE || 6!=trie.getValue()) {
|
||||
if(trie.next('n')!=USTRINGTRIE_INTERMEDIATE_VALUE || 6!=trie.getValue()) {
|
||||
errln("not normal value 6 after \"jun\"");
|
||||
}
|
||||
// hasUniqueValue() after getValue()
|
||||
|
@ -329,13 +329,13 @@ void ByteTrieTest::TestHasUniqueValue() {
|
|||
}
|
||||
}
|
||||
|
||||
void ByteTrieTest::TestGetNextBytes() {
|
||||
ByteTrieBuilder builder;
|
||||
StringPiece sp=buildMonthsTrie(builder, UDICTTRIE_BUILD_SMALL);
|
||||
void BytesTrieTest::TestGetNextBytes() {
|
||||
BytesTrieBuilder builder;
|
||||
StringPiece sp=buildMonthsTrie(builder, USTRINGTRIE_BUILD_SMALL);
|
||||
if(sp.empty()) {
|
||||
return; // buildTrie() reported an error
|
||||
}
|
||||
ByteTrie trie(sp.data());
|
||||
BytesTrie trie(sp.data());
|
||||
char buffer[40];
|
||||
CheckedArrayByteSink sink(buffer, LENGTHOF(buffer));
|
||||
int32_t count=trie.getNextBytes(sink);
|
||||
|
@ -352,7 +352,7 @@ void ByteTrieTest::TestGetNextBytes() {
|
|||
errln("months getNextBytes()!=[.abcdefghijklmnopqru] after \"jan\"");
|
||||
}
|
||||
// getNextBytes() after getValue()
|
||||
trie.getValue(); // next() had returned UDICTTRIE_HAS_VALUE.
|
||||
trie.getValue(); // next() had returned USTRINGTRIE_INTERMEDIATE_VALUE.
|
||||
memset(buffer, 0, sizeof(buffer));
|
||||
count=trie.getNextBytes(sink.Reset());
|
||||
if(count!=20 || sink.NumberOfBytesAppended()!=20 || 0!=strcmp(buffer, ".abcdefghijklmnopqru")) {
|
||||
|
@ -380,20 +380,20 @@ void ByteTrieTest::TestGetNextBytes() {
|
|||
}
|
||||
}
|
||||
|
||||
void ByteTrieTest::TestIteratorFromBranch() {
|
||||
ByteTrieBuilder builder;
|
||||
StringPiece sp=buildMonthsTrie(builder, UDICTTRIE_BUILD_FAST);
|
||||
void BytesTrieTest::TestIteratorFromBranch() {
|
||||
BytesTrieBuilder builder;
|
||||
StringPiece sp=buildMonthsTrie(builder, USTRINGTRIE_BUILD_FAST);
|
||||
if(sp.empty()) {
|
||||
return; // buildTrie() reported an error
|
||||
}
|
||||
ByteTrie trie(sp.data());
|
||||
BytesTrie trie(sp.data());
|
||||
// Go to a branch node.
|
||||
trie.next('j');
|
||||
trie.next('a');
|
||||
trie.next('n');
|
||||
IcuTestErrorCode errorCode(*this, "TestIteratorFromBranch()");
|
||||
ByteTrieIterator iter(trie, 0, errorCode);
|
||||
if(errorCode.logIfFailureAndReset("ByteTrieIterator(trie) constructor")) {
|
||||
BytesTrieIterator iter(trie, 0, errorCode);
|
||||
if(errorCode.logIfFailureAndReset("BytesTrieIterator(trie) constructor")) {
|
||||
return;
|
||||
}
|
||||
// Expected data: Same as in buildMonthsTrie(), except only the suffixes
|
||||
|
@ -431,13 +431,13 @@ void ByteTrieTest::TestIteratorFromBranch() {
|
|||
checkIterator(iter.reset(), data, LENGTHOF(data));
|
||||
}
|
||||
|
||||
void ByteTrieTest::TestIteratorFromLinearMatch() {
|
||||
ByteTrieBuilder builder;
|
||||
StringPiece sp=buildMonthsTrie(builder, UDICTTRIE_BUILD_SMALL);
|
||||
void BytesTrieTest::TestIteratorFromLinearMatch() {
|
||||
BytesTrieBuilder builder;
|
||||
StringPiece sp=buildMonthsTrie(builder, USTRINGTRIE_BUILD_SMALL);
|
||||
if(sp.empty()) {
|
||||
return; // buildTrie() reported an error
|
||||
}
|
||||
ByteTrie trie(sp.data());
|
||||
BytesTrie trie(sp.data());
|
||||
// Go into a linear-match node.
|
||||
trie.next('j');
|
||||
trie.next('a');
|
||||
|
@ -445,8 +445,8 @@ void ByteTrieTest::TestIteratorFromLinearMatch() {
|
|||
trie.next('u');
|
||||
trie.next('a');
|
||||
IcuTestErrorCode errorCode(*this, "TestIteratorFromLinearMatch()");
|
||||
ByteTrieIterator iter(trie, 0, errorCode);
|
||||
if(errorCode.logIfFailureAndReset("ByteTrieIterator(trie) constructor")) {
|
||||
BytesTrieIterator iter(trie, 0, errorCode);
|
||||
if(errorCode.logIfFailureAndReset("BytesTrieIterator(trie) constructor")) {
|
||||
return;
|
||||
}
|
||||
// Expected data: Same as in buildMonthsTrie(), except only the suffixes
|
||||
|
@ -461,15 +461,15 @@ void ByteTrieTest::TestIteratorFromLinearMatch() {
|
|||
checkIterator(iter.reset(), data, LENGTHOF(data));
|
||||
}
|
||||
|
||||
void ByteTrieTest::TestTruncatingIteratorFromRoot() {
|
||||
ByteTrieBuilder builder;
|
||||
StringPiece sp=buildMonthsTrie(builder, UDICTTRIE_BUILD_FAST);
|
||||
void BytesTrieTest::TestTruncatingIteratorFromRoot() {
|
||||
BytesTrieBuilder builder;
|
||||
StringPiece sp=buildMonthsTrie(builder, USTRINGTRIE_BUILD_FAST);
|
||||
if(sp.empty()) {
|
||||
return; // buildTrie() reported an error
|
||||
}
|
||||
IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromRoot()");
|
||||
ByteTrieIterator iter(sp.data(), 4, errorCode);
|
||||
if(errorCode.logIfFailureAndReset("ByteTrieIterator(trie) constructor")) {
|
||||
BytesTrieIterator iter(sp.data(), 4, errorCode);
|
||||
if(errorCode.logIfFailureAndReset("BytesTrieIterator(trie) constructor")) {
|
||||
return;
|
||||
}
|
||||
// Expected data: Same as in buildMonthsTrie(), except only the first 4 characters
|
||||
|
@ -508,25 +508,25 @@ void ByteTrieTest::TestTruncatingIteratorFromRoot() {
|
|||
checkIterator(iter.reset(), data, LENGTHOF(data));
|
||||
}
|
||||
|
||||
void ByteTrieTest::TestTruncatingIteratorFromLinearMatchShort() {
|
||||
void BytesTrieTest::TestTruncatingIteratorFromLinearMatchShort() {
|
||||
static const StringAndValue data[]={
|
||||
{ "abcdef", 10 },
|
||||
{ "abcdepq", 200 },
|
||||
{ "abcdeyz", 3000 }
|
||||
};
|
||||
ByteTrieBuilder builder;
|
||||
StringPiece sp=buildTrie(data, LENGTHOF(data), builder, UDICTTRIE_BUILD_FAST);
|
||||
BytesTrieBuilder builder;
|
||||
StringPiece sp=buildTrie(data, LENGTHOF(data), builder, USTRINGTRIE_BUILD_FAST);
|
||||
if(sp.empty()) {
|
||||
return; // buildTrie() reported an error
|
||||
}
|
||||
ByteTrie trie(sp.data());
|
||||
BytesTrie trie(sp.data());
|
||||
// Go into a linear-match node.
|
||||
trie.next('a');
|
||||
trie.next('b');
|
||||
IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromLinearMatchShort()");
|
||||
// Truncate within the linear-match node.
|
||||
ByteTrieIterator iter(trie, 2, errorCode);
|
||||
if(errorCode.logIfFailureAndReset("ByteTrieIterator(trie) constructor")) {
|
||||
BytesTrieIterator iter(trie, 2, errorCode);
|
||||
if(errorCode.logIfFailureAndReset("BytesTrieIterator(trie) constructor")) {
|
||||
return;
|
||||
}
|
||||
static const StringAndValue expected[]={
|
||||
|
@ -538,26 +538,26 @@ void ByteTrieTest::TestTruncatingIteratorFromLinearMatchShort() {
|
|||
checkIterator(iter.reset(), expected, LENGTHOF(expected));
|
||||
}
|
||||
|
||||
void ByteTrieTest::TestTruncatingIteratorFromLinearMatchLong() {
|
||||
void BytesTrieTest::TestTruncatingIteratorFromLinearMatchLong() {
|
||||
static const StringAndValue data[]={
|
||||
{ "abcdef", 10 },
|
||||
{ "abcdepq", 200 },
|
||||
{ "abcdeyz", 3000 }
|
||||
};
|
||||
ByteTrieBuilder builder;
|
||||
StringPiece sp=buildTrie(data, LENGTHOF(data), builder, UDICTTRIE_BUILD_FAST);
|
||||
BytesTrieBuilder builder;
|
||||
StringPiece sp=buildTrie(data, LENGTHOF(data), builder, USTRINGTRIE_BUILD_FAST);
|
||||
if(sp.empty()) {
|
||||
return; // buildTrie() reported an error
|
||||
}
|
||||
ByteTrie trie(sp.data());
|
||||
BytesTrie trie(sp.data());
|
||||
// Go into a linear-match node.
|
||||
trie.next('a');
|
||||
trie.next('b');
|
||||
trie.next('c');
|
||||
IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromLinearMatchLong()");
|
||||
// Truncate after the linear-match node.
|
||||
ByteTrieIterator iter(trie, 3, errorCode);
|
||||
if(errorCode.logIfFailureAndReset("ByteTrieIterator(trie) constructor")) {
|
||||
BytesTrieIterator iter(trie, 3, errorCode);
|
||||
if(errorCode.logIfFailureAndReset("BytesTrieIterator(trie) constructor")) {
|
||||
return;
|
||||
}
|
||||
static const StringAndValue expected[]={
|
||||
|
@ -571,15 +571,15 @@ void ByteTrieTest::TestTruncatingIteratorFromLinearMatchLong() {
|
|||
checkIterator(iter.reset(), expected, LENGTHOF(expected));
|
||||
}
|
||||
|
||||
void ByteTrieTest::checkData(const StringAndValue data[], int32_t dataLength) {
|
||||
void BytesTrieTest::checkData(const StringAndValue data[], int32_t dataLength) {
|
||||
logln("checkData(dataLength=%d, fast)", (int)dataLength);
|
||||
checkData(data, dataLength, UDICTTRIE_BUILD_FAST);
|
||||
checkData(data, dataLength, USTRINGTRIE_BUILD_FAST);
|
||||
logln("checkData(dataLength=%d, small)", (int)dataLength);
|
||||
checkData(data, dataLength, UDICTTRIE_BUILD_SMALL);
|
||||
checkData(data, dataLength, USTRINGTRIE_BUILD_SMALL);
|
||||
}
|
||||
|
||||
void ByteTrieTest::checkData(const StringAndValue data[], int32_t dataLength, UDictTrieBuildOption buildOption) {
|
||||
ByteTrieBuilder builder;
|
||||
void BytesTrieTest::checkData(const StringAndValue data[], int32_t dataLength, UStringTrieBuildOption buildOption) {
|
||||
BytesTrieBuilder builder;
|
||||
StringPiece sp=buildTrie(data, dataLength, builder, buildOption);
|
||||
if(sp.empty()) {
|
||||
return; // buildTrie() reported an error
|
||||
|
@ -591,8 +591,8 @@ void ByteTrieTest::checkData(const StringAndValue data[], int32_t dataLength, UD
|
|||
checkIterator(sp, data, dataLength);
|
||||
}
|
||||
|
||||
StringPiece ByteTrieTest::buildTrie(const StringAndValue data[], int32_t dataLength,
|
||||
ByteTrieBuilder &builder, UDictTrieBuildOption buildOption) {
|
||||
StringPiece BytesTrieTest::buildTrie(const StringAndValue data[], int32_t dataLength,
|
||||
BytesTrieBuilder &builder, UStringTrieBuildOption buildOption) {
|
||||
IcuTestErrorCode errorCode(*this, "buildTrie()");
|
||||
// Add the items to the trie builder in an interesting (not trivial, not random) order.
|
||||
int32_t index, step;
|
||||
|
@ -624,20 +624,20 @@ StringPiece ByteTrieTest::buildTrie(const StringAndValue data[], int32_t dataLen
|
|||
return sp;
|
||||
}
|
||||
|
||||
void ByteTrieTest::checkFirst(const StringPiece &trieBytes,
|
||||
const StringAndValue data[], int32_t dataLength) {
|
||||
ByteTrie trie(trieBytes.data());
|
||||
void BytesTrieTest::checkFirst(const StringPiece &trieBytes,
|
||||
const StringAndValue data[], int32_t dataLength) {
|
||||
BytesTrie trie(trieBytes.data());
|
||||
for(int32_t i=0; i<dataLength; ++i) {
|
||||
int c=(uint8_t)*data[i].s;
|
||||
if(c==0) {
|
||||
continue; // skip empty string
|
||||
}
|
||||
UDictTrieResult firstResult=trie.first(c);
|
||||
int32_t firstValue=UDICTTRIE_RESULT_HAS_VALUE(firstResult) ? trie.getValue() : -1;
|
||||
UDictTrieResult nextResult=trie.next((uint8_t)data[i].s[1]);
|
||||
UStringTrieResult firstResult=trie.first(c);
|
||||
int32_t firstValue=USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1;
|
||||
UStringTrieResult nextResult=trie.next((uint8_t)data[i].s[1]);
|
||||
if(firstResult!=trie.reset().next(c) ||
|
||||
firstResult!=trie.current() ||
|
||||
firstValue!=(UDICTTRIE_RESULT_HAS_VALUE(firstResult) ? trie.getValue() : -1) ||
|
||||
firstValue!=(USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1) ||
|
||||
nextResult!=trie.next((uint8_t)data[i].s[1])
|
||||
) {
|
||||
errln("trie.first(%c)!=trie.reset().next(same) for %s",
|
||||
|
@ -646,14 +646,14 @@ void ByteTrieTest::checkFirst(const StringPiece &trieBytes,
|
|||
}
|
||||
}
|
||||
|
||||
void ByteTrieTest::checkNext(const StringPiece &trieBytes,
|
||||
const StringAndValue data[], int32_t dataLength) {
|
||||
ByteTrie trie(trieBytes.data());
|
||||
ByteTrie::State state;
|
||||
void BytesTrieTest::checkNext(const StringPiece &trieBytes,
|
||||
const StringAndValue data[], int32_t dataLength) {
|
||||
BytesTrie trie(trieBytes.data());
|
||||
BytesTrie::State state;
|
||||
for(int32_t i=0; i<dataLength; ++i) {
|
||||
int32_t stringLength= (i&1) ? -1 : strlen(data[i].s);
|
||||
UDictTrieResult result;
|
||||
if( !UDICTTRIE_RESULT_HAS_VALUE(result=trie.next(data[i].s, stringLength)) ||
|
||||
UStringTrieResult result;
|
||||
if( !USTRINGTRIE_HAS_VALUE(result=trie.next(data[i].s, stringLength)) ||
|
||||
result!=trie.current()
|
||||
) {
|
||||
errln("trie does not seem to contain %s", data[i].s);
|
||||
|
@ -669,20 +669,20 @@ void ByteTrieTest::checkNext(const StringPiece &trieBytes,
|
|||
stringLength=strlen(data[i].s);
|
||||
result=trie.current();
|
||||
for(int32_t j=0; j<stringLength; ++j) {
|
||||
if(!UDICTTRIE_RESULT_HAS_NEXT(result)) {
|
||||
if(!USTRINGTRIE_HAS_NEXT(result)) {
|
||||
errln("trie.current()!=hasNext before end of %s (at index %d)", data[i].s, j);
|
||||
break;
|
||||
}
|
||||
if(result==UDICTTRIE_HAS_VALUE) {
|
||||
if(result==USTRINGTRIE_INTERMEDIATE_VALUE) {
|
||||
trie.getValue();
|
||||
if(trie.current()!=UDICTTRIE_HAS_VALUE) {
|
||||
errln("trie.getValue().current()!=UDICTTRIE_HAS_VALUE before end of %s (at index %d)", data[i].s, j);
|
||||
if(trie.current()!=USTRINGTRIE_INTERMEDIATE_VALUE) {
|
||||
errln("trie.getValue().current()!=USTRINGTRIE_INTERMEDIATE_VALUE before end of %s (at index %d)", data[i].s, j);
|
||||
break;
|
||||
}
|
||||
}
|
||||
result=trie.next(data[i].s[j]);
|
||||
if(!UDICTTRIE_RESULT_MATCHES(result)) {
|
||||
errln("trie.next()=UDICTTRIE_NO_MATCH before end of %s (at index %d)", data[i].s, j);
|
||||
if(!USTRINGTRIE_MATCHES(result)) {
|
||||
errln("trie.next()=USTRINGTRIE_NO_MATCH before end of %s (at index %d)", data[i].s, j);
|
||||
break;
|
||||
}
|
||||
if(result!=trie.current()) {
|
||||
|
@ -690,7 +690,7 @@ void ByteTrieTest::checkNext(const StringPiece &trieBytes,
|
|||
break;
|
||||
}
|
||||
}
|
||||
if(!UDICTTRIE_RESULT_HAS_VALUE(result)) {
|
||||
if(!USTRINGTRIE_HAS_VALUE(result)) {
|
||||
errln("trie.next()!=hasValue at the end of %s", data[i].s);
|
||||
continue;
|
||||
}
|
||||
|
@ -708,18 +708,18 @@ void ByteTrieTest::checkNext(const StringPiece &trieBytes,
|
|||
break;
|
||||
}
|
||||
}
|
||||
if((result==UDICTTRIE_HAS_VALUE)!=nextContinues) {
|
||||
errln("(trie.current()==UDICTTRIE_HAS_VALUE) contradicts "
|
||||
"(trie.next(some UChar)!=UDICTTRIE_NO_MATCH) after end of %s", data[i].s);
|
||||
if((result==USTRINGTRIE_INTERMEDIATE_VALUE)!=nextContinues) {
|
||||
errln("(trie.current()==USTRINGTRIE_INTERMEDIATE_VALUE) contradicts "
|
||||
"(trie.next(some UChar)!=USTRINGTRIE_NO_MATCH) after end of %s", data[i].s);
|
||||
}
|
||||
trie.reset();
|
||||
}
|
||||
}
|
||||
|
||||
void ByteTrieTest::checkNextWithState(const StringPiece &trieBytes,
|
||||
const StringAndValue data[], int32_t dataLength) {
|
||||
ByteTrie trie(trieBytes.data());
|
||||
ByteTrie::State noState, state;
|
||||
void BytesTrieTest::checkNextWithState(const StringPiece &trieBytes,
|
||||
const StringAndValue data[], int32_t dataLength) {
|
||||
BytesTrie trie(trieBytes.data());
|
||||
BytesTrie::State noState, state;
|
||||
for(int32_t i=0; i<dataLength; ++i) {
|
||||
if((i&1)==0) {
|
||||
// This should have no effect.
|
||||
|
@ -729,36 +729,36 @@ void ByteTrieTest::checkNextWithState(const StringPiece &trieBytes,
|
|||
int32_t stringLength=strlen(expectedString);
|
||||
int32_t partialLength=stringLength/3;
|
||||
for(int32_t j=0; j<partialLength; ++j) {
|
||||
if(!UDICTTRIE_RESULT_MATCHES(trie.next(expectedString[j]))) {
|
||||
errln("trie.next()=UDICTTRIE_NO_MATCH for a prefix of %s", data[i].s);
|
||||
if(!USTRINGTRIE_MATCHES(trie.next(expectedString[j]))) {
|
||||
errln("trie.next()=USTRINGTRIE_NO_MATCH for a prefix of %s", data[i].s);
|
||||
return;
|
||||
}
|
||||
}
|
||||
trie.saveState(state);
|
||||
UDictTrieResult resultAtState=trie.current();
|
||||
UDictTrieResult result;
|
||||
UStringTrieResult resultAtState=trie.current();
|
||||
UStringTrieResult result;
|
||||
int32_t valueAtState=-99;
|
||||
if(UDICTTRIE_RESULT_HAS_VALUE(resultAtState)) {
|
||||
if(USTRINGTRIE_HAS_VALUE(resultAtState)) {
|
||||
valueAtState=trie.getValue();
|
||||
}
|
||||
result=trie.next(0); // mismatch
|
||||
if(result!=UDICTTRIE_NO_MATCH || result!=trie.current()) {
|
||||
if(result!=USTRINGTRIE_NO_MATCH || result!=trie.current()) {
|
||||
errln("trie.next(0) matched after part of %s", data[i].s);
|
||||
}
|
||||
if( resultAtState!=trie.resetToState(state).current() ||
|
||||
(UDICTTRIE_RESULT_HAS_VALUE(resultAtState) && valueAtState!=trie.getValue())
|
||||
(USTRINGTRIE_HAS_VALUE(resultAtState) && valueAtState!=trie.getValue())
|
||||
) {
|
||||
errln("trie.next(part of %s) changes current()/getValue() after "
|
||||
"saveState/next(0)/resetToState",
|
||||
data[i].s);
|
||||
} else if(!UDICTTRIE_RESULT_HAS_VALUE(
|
||||
} else if(!USTRINGTRIE_HAS_VALUE(
|
||||
result=trie.next(expectedString+partialLength,
|
||||
stringLength-partialLength)) ||
|
||||
result!=trie.current()) {
|
||||
errln("trie.next(rest of %s) does not seem to contain %s after "
|
||||
"saveState/next(0)/resetToState",
|
||||
data[i].s);
|
||||
} else if(!UDICTTRIE_RESULT_HAS_VALUE(
|
||||
} else if(!USTRINGTRIE_HAS_VALUE(
|
||||
result=trie.resetToState(state).
|
||||
next(expectedString+partialLength,
|
||||
stringLength-partialLength)) ||
|
||||
|
@ -777,36 +777,36 @@ void ByteTrieTest::checkNextWithState(const StringPiece &trieBytes,
|
|||
|
||||
// next(string) is also tested in other functions,
|
||||
// but here we try to go partway through the string, and then beyond it.
|
||||
void ByteTrieTest::checkNextString(const StringPiece &trieBytes,
|
||||
const StringAndValue data[], int32_t dataLength) {
|
||||
ByteTrie trie(trieBytes.data());
|
||||
void BytesTrieTest::checkNextString(const StringPiece &trieBytes,
|
||||
const StringAndValue data[], int32_t dataLength) {
|
||||
BytesTrie trie(trieBytes.data());
|
||||
for(int32_t i=0; i<dataLength; ++i) {
|
||||
const char *expectedString=data[i].s;
|
||||
int32_t stringLength=strlen(expectedString);
|
||||
if(!trie.next(expectedString, stringLength/2)) {
|
||||
errln("trie.next(up to middle of string)=UDICTTRIE_NO_MATCH for %s", data[i].s);
|
||||
errln("trie.next(up to middle of string)=USTRINGTRIE_NO_MATCH for %s", data[i].s);
|
||||
continue;
|
||||
}
|
||||
// Test that we stop properly at the end of the string.
|
||||
if(trie.next(expectedString+stringLength/2, stringLength+1-stringLength/2)) {
|
||||
errln("trie.next(string+NUL)!=UDICTTRIE_NO_MATCH for %s", data[i].s);
|
||||
errln("trie.next(string+NUL)!=USTRINGTRIE_NO_MATCH for %s", data[i].s);
|
||||
}
|
||||
trie.reset();
|
||||
}
|
||||
}
|
||||
|
||||
void ByteTrieTest::checkIterator(const StringPiece &trieBytes,
|
||||
const StringAndValue data[], int32_t dataLength) {
|
||||
void BytesTrieTest::checkIterator(const StringPiece &trieBytes,
|
||||
const StringAndValue data[], int32_t dataLength) {
|
||||
IcuTestErrorCode errorCode(*this, "checkIterator()");
|
||||
ByteTrieIterator iter(trieBytes.data(), 0, errorCode);
|
||||
if(errorCode.logIfFailureAndReset("ByteTrieIterator(trieBytes) constructor")) {
|
||||
BytesTrieIterator iter(trieBytes.data(), 0, errorCode);
|
||||
if(errorCode.logIfFailureAndReset("BytesTrieIterator(trieBytes) constructor")) {
|
||||
return;
|
||||
}
|
||||
checkIterator(iter, data, dataLength);
|
||||
}
|
||||
|
||||
void ByteTrieTest::checkIterator(ByteTrieIterator &iter,
|
||||
const StringAndValue data[], int32_t dataLength) {
|
||||
void BytesTrieTest::checkIterator(BytesTrieIterator &iter,
|
||||
const StringAndValue data[], int32_t dataLength) {
|
||||
IcuTestErrorCode errorCode(*this, "checkIterator()");
|
||||
for(int32_t i=0; i<dataLength; ++i) {
|
||||
if(!iter.hasNext()) {
|
|
@ -223,8 +223,8 @@
|
|||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="bytetrietest.cpp" />
|
||||
<ClCompile Include="uchartrietest.cpp" />
|
||||
<ClCompile Include="bytestrietest.cpp" />
|
||||
<ClCompile Include="ucharstrietest.cpp" />
|
||||
<ClCompile Include="itrbbi.cpp" />
|
||||
<ClCompile Include="rbbiapts.cpp" />
|
||||
<ClCompile Include="rbbitst.cpp" />
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/********************************************************************
|
||||
* COPYRIGHT:
|
||||
* Copyright (c) 1997-2010, International Business Machines Corporation and
|
||||
* Copyright (c) 1997-2011, International Business Machines Corporation and
|
||||
* others. All Rights Reserved.
|
||||
********************************************************************/
|
||||
|
||||
|
@ -29,9 +29,9 @@
|
|||
#include "aliastst.h"
|
||||
#include "usettest.h"
|
||||
|
||||
extern IntlTest *createByteTrieTest();
|
||||
extern IntlTest *createBytesTrieTest();
|
||||
static IntlTest *createLocalPointerTest();
|
||||
extern IntlTest *createUCharTrieTest();
|
||||
extern IntlTest *createUCharsTrieTest();
|
||||
|
||||
#define CASE(id, test) case id: \
|
||||
name = #test; \
|
||||
|
@ -73,16 +73,16 @@ void IntlTestUtilities::runIndexedTest( int32_t index, UBool exec, const char* &
|
|||
case 17:
|
||||
name = "ByteTrieTest";
|
||||
if (exec) {
|
||||
logln("TestSuite ByteTrieTest---"); logln();
|
||||
LocalPointer<IntlTest> test(createByteTrieTest());
|
||||
logln("TestSuite BytesTrieTest---"); logln();
|
||||
LocalPointer<IntlTest> test(createBytesTrieTest());
|
||||
callTest(*test, par);
|
||||
}
|
||||
break;
|
||||
case 18:
|
||||
name = "UCharTrieTest";
|
||||
if (exec) {
|
||||
logln("TestSuite UCharTrieTest---"); logln();
|
||||
LocalPointer<IntlTest> test(createUCharTrieTest());
|
||||
logln("TestSuite UCharsTrieTest---"); logln();
|
||||
LocalPointer<IntlTest> test(createUCharsTrieTest());
|
||||
callTest(*test, par);
|
||||
}
|
||||
break;
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2010, International Business Machines
|
||||
* Copyright (C) 2010-2011, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*******************************************************************************
|
||||
* file name: uchartrietest.cpp
|
||||
* file name: ucharstrietest.cpp
|
||||
* encoding: US-ASCII
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
|
@ -16,9 +16,9 @@
|
|||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/uniset.h"
|
||||
#include "uchartrie.h"
|
||||
#include "uchartriebuilder.h"
|
||||
#include "uchartrieiterator.h"
|
||||
#include "ucharstrie.h"
|
||||
#include "ucharstriebuilder.h"
|
||||
#include "ucharstrieiterator.h"
|
||||
#include "intltest.h"
|
||||
|
||||
#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
|
||||
|
@ -28,10 +28,10 @@ struct StringAndValue {
|
|||
int32_t value;
|
||||
};
|
||||
|
||||
class UCharTrieTest : public IntlTest {
|
||||
class UCharsTrieTest : public IntlTest {
|
||||
public:
|
||||
UCharTrieTest() {}
|
||||
virtual ~UCharTrieTest();
|
||||
UCharsTrieTest() {}
|
||||
virtual ~UCharsTrieTest();
|
||||
|
||||
void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par=NULL);
|
||||
void TestBuilder();
|
||||
|
@ -47,10 +47,10 @@ public:
|
|||
void TestNextForCodePoint();
|
||||
void TestFirstForCodePoint();
|
||||
|
||||
UBool buildLargeTrie(UCharTrieBuilder &builder, UnicodeString &result, int32_t numUniqueFirst);
|
||||
UBool buildLargeTrie(UCharsTrieBuilder &builder, UnicodeString &result, int32_t numUniqueFirst);
|
||||
void TestLargeTrie();
|
||||
|
||||
UBool buildMonthsTrie(UCharTrieBuilder &builder, UDictTrieBuildOption buildOption,
|
||||
UBool buildMonthsTrie(UCharsTrieBuilder &builder, UStringTrieBuildOption buildOption,
|
||||
UnicodeString &result);
|
||||
void TestHasUniqueValue();
|
||||
void TestGetNextUChars();
|
||||
|
@ -61,27 +61,27 @@ public:
|
|||
void TestTruncatingIteratorFromLinearMatchLong();
|
||||
|
||||
void checkData(const StringAndValue data[], int32_t dataLength);
|
||||
void checkData(const StringAndValue data[], int32_t dataLength, UDictTrieBuildOption buildOption);
|
||||
void checkData(const StringAndValue data[], int32_t dataLength, UStringTrieBuildOption buildOption);
|
||||
UBool buildTrie(const StringAndValue data[], int32_t dataLength,
|
||||
UCharTrieBuilder &builder, UDictTrieBuildOption buildOption, UnicodeString &result);
|
||||
UCharsTrieBuilder &builder, UStringTrieBuildOption buildOption, UnicodeString &result);
|
||||
void checkFirst(const UnicodeString &trieUChars, const StringAndValue data[], int32_t dataLength);
|
||||
void checkNext(const UnicodeString &trieUChars, const StringAndValue data[], int32_t dataLength);
|
||||
void checkNextWithState(const UnicodeString &trieUChars, const StringAndValue data[], int32_t dataLength);
|
||||
void checkNextString(const UnicodeString &trieUChars, const StringAndValue data[], int32_t dataLength);
|
||||
void checkIterator(const UnicodeString &trieUChars, const StringAndValue data[], int32_t dataLength);
|
||||
void checkIterator(UCharTrieIterator &iter, const StringAndValue data[], int32_t dataLength);
|
||||
void checkIterator(UCharsTrieIterator &iter, const StringAndValue data[], int32_t dataLength);
|
||||
};
|
||||
|
||||
extern IntlTest *createUCharTrieTest() {
|
||||
return new UCharTrieTest();
|
||||
extern IntlTest *createUCharsTrieTest() {
|
||||
return new UCharsTrieTest();
|
||||
}
|
||||
|
||||
UCharTrieTest::~UCharTrieTest() {
|
||||
UCharsTrieTest::~UCharsTrieTest() {
|
||||
}
|
||||
|
||||
void UCharTrieTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) {
|
||||
void UCharsTrieTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) {
|
||||
if(exec) {
|
||||
logln("TestSuite UCharTrieTest: ");
|
||||
logln("TestSuite UCharsTrieTest: ");
|
||||
}
|
||||
TESTCASE_AUTO_BEGIN;
|
||||
TESTCASE_AUTO(TestBuilder);
|
||||
|
@ -107,37 +107,37 @@ void UCharTrieTest::runIndexedTest(int32_t index, UBool exec, const char *&name,
|
|||
TESTCASE_AUTO_END;
|
||||
}
|
||||
|
||||
void UCharTrieTest::TestBuilder() {
|
||||
void UCharsTrieTest::TestBuilder() {
|
||||
IcuTestErrorCode errorCode(*this, "TestBuilder()");
|
||||
UCharTrieBuilder builder;
|
||||
UCharsTrieBuilder builder;
|
||||
UnicodeString trieUChars;
|
||||
builder.build(UDICTTRIE_BUILD_FAST, trieUChars, errorCode);
|
||||
builder.build(USTRINGTRIE_BUILD_FAST, trieUChars, errorCode);
|
||||
if(errorCode.reset()!=U_INDEX_OUTOFBOUNDS_ERROR) {
|
||||
errln("UCharTrieBuilder().build() did not set U_INDEX_OUTOFBOUNDS_ERROR");
|
||||
errln("UCharsTrieBuilder().build() did not set U_INDEX_OUTOFBOUNDS_ERROR");
|
||||
return;
|
||||
}
|
||||
builder.add("=", 0, errorCode).add("=", 1, errorCode).build(UDICTTRIE_BUILD_FAST, trieUChars, errorCode);
|
||||
builder.add("=", 0, errorCode).add("=", 1, errorCode).build(USTRINGTRIE_BUILD_FAST, trieUChars, errorCode);
|
||||
if(errorCode.reset()!=U_ILLEGAL_ARGUMENT_ERROR) {
|
||||
errln("UCharTrieBuilder.build() did not detect duplicates");
|
||||
errln("UCharsTrieBuilder.build() did not detect duplicates");
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
void UCharTrieTest::TestEmpty() {
|
||||
void UCharsTrieTest::TestEmpty() {
|
||||
static const StringAndValue data[]={
|
||||
{ "", 0 }
|
||||
};
|
||||
checkData(data, LENGTHOF(data));
|
||||
}
|
||||
|
||||
void UCharTrieTest::Test_a() {
|
||||
void UCharsTrieTest::Test_a() {
|
||||
static const StringAndValue data[]={
|
||||
{ "a", 1 }
|
||||
};
|
||||
checkData(data, LENGTHOF(data));
|
||||
}
|
||||
|
||||
void UCharTrieTest::Test_a_ab() {
|
||||
void UCharsTrieTest::Test_a_ab() {
|
||||
static const StringAndValue data[]={
|
||||
{ "a", 1 },
|
||||
{ "ab", 100 }
|
||||
|
@ -145,7 +145,7 @@ void UCharTrieTest::Test_a_ab() {
|
|||
checkData(data, LENGTHOF(data));
|
||||
}
|
||||
|
||||
void UCharTrieTest::TestShortestBranch() {
|
||||
void UCharsTrieTest::TestShortestBranch() {
|
||||
static const StringAndValue data[]={
|
||||
{ "a", 1000 },
|
||||
{ "b", 2000 }
|
||||
|
@ -153,7 +153,7 @@ void UCharTrieTest::TestShortestBranch() {
|
|||
checkData(data, LENGTHOF(data));
|
||||
}
|
||||
|
||||
void UCharTrieTest::TestBranches() {
|
||||
void UCharsTrieTest::TestBranches() {
|
||||
static const StringAndValue data[]={
|
||||
{ "a", 0x10 },
|
||||
{ "cc", 0x40 },
|
||||
|
@ -176,7 +176,7 @@ void UCharTrieTest::TestBranches() {
|
|||
}
|
||||
}
|
||||
|
||||
void UCharTrieTest::TestLongSequence() {
|
||||
void UCharsTrieTest::TestLongSequence() {
|
||||
static const StringAndValue data[]={
|
||||
{ "a", -1 },
|
||||
// sequence of linear-match nodes
|
||||
|
@ -192,7 +192,7 @@ void UCharTrieTest::TestLongSequence() {
|
|||
checkData(data, LENGTHOF(data));
|
||||
}
|
||||
|
||||
void UCharTrieTest::TestLongBranch() {
|
||||
void UCharsTrieTest::TestLongBranch() {
|
||||
// Split-branch and interesting compact-integer values.
|
||||
static const StringAndValue data[]={
|
||||
{ "a", -2 },
|
||||
|
@ -220,7 +220,7 @@ void UCharTrieTest::TestLongBranch() {
|
|||
checkData(data, LENGTHOF(data));
|
||||
}
|
||||
|
||||
void UCharTrieTest::TestValuesForState() {
|
||||
void UCharsTrieTest::TestValuesForState() {
|
||||
// Check that saveState() and resetToState() interact properly
|
||||
// with next() and current().
|
||||
static const StringAndValue data[]={
|
||||
|
@ -234,7 +234,7 @@ void UCharTrieTest::TestValuesForState() {
|
|||
checkData(data, LENGTHOF(data));
|
||||
}
|
||||
|
||||
void UCharTrieTest::TestCompact() {
|
||||
void UCharsTrieTest::TestCompact() {
|
||||
// Duplicate trailing strings and values provide opportunities for compacting.
|
||||
static const StringAndValue data[]={
|
||||
{ "+", 0 },
|
||||
|
@ -261,7 +261,7 @@ void UCharTrieTest::TestCompact() {
|
|||
checkData(data, LENGTHOF(data));
|
||||
}
|
||||
|
||||
void UCharTrieTest::TestFirstForCodePoint() {
|
||||
void UCharsTrieTest::TestFirstForCodePoint() {
|
||||
static const StringAndValue data[]={
|
||||
{ "a", 1 },
|
||||
{ "a\\uD800", 2 },
|
||||
|
@ -276,49 +276,49 @@ void UCharTrieTest::TestFirstForCodePoint() {
|
|||
checkData(data, LENGTHOF(data));
|
||||
}
|
||||
|
||||
void UCharTrieTest::TestNextForCodePoint() {
|
||||
void UCharsTrieTest::TestNextForCodePoint() {
|
||||
static const StringAndValue data[]={
|
||||
{ "\\u4dff\\U00010000\\u9999\\U00020000\\udfff\\U0010ffff", 2000000000 },
|
||||
{ "\\u4dff\\U00010000\\u9999\\U00020002", 44444 },
|
||||
{ "\\u4dff\\U000103ff", 99999 }
|
||||
};
|
||||
UCharTrieBuilder builder;
|
||||
UCharsTrieBuilder builder;
|
||||
UnicodeString trieUChars;
|
||||
if(!buildTrie(data, LENGTHOF(data), builder, UDICTTRIE_BUILD_FAST, trieUChars)) {
|
||||
if(!buildTrie(data, LENGTHOF(data), builder, USTRINGTRIE_BUILD_FAST, trieUChars)) {
|
||||
return; // buildTrie() reported an error
|
||||
}
|
||||
UCharTrie trie(trieUChars.getBuffer());
|
||||
UDictTrieResult result;
|
||||
if( (result=trie.nextForCodePoint(0x4dff))!=UDICTTRIE_NO_VALUE || result!=trie.current() ||
|
||||
(result=trie.nextForCodePoint(0x10000))!=UDICTTRIE_NO_VALUE || result!=trie.current() ||
|
||||
(result=trie.nextForCodePoint(0x9999))!=UDICTTRIE_NO_VALUE || result!=trie.current() ||
|
||||
(result=trie.nextForCodePoint(0x20000))!=UDICTTRIE_NO_VALUE || result!=trie.current() ||
|
||||
(result=trie.nextForCodePoint(0xdfff))!=UDICTTRIE_NO_VALUE || result!=trie.current() ||
|
||||
(result=trie.nextForCodePoint(0x10ffff))!=UDICTTRIE_HAS_FINAL_VALUE || result!=trie.current() ||
|
||||
UCharsTrie trie(trieUChars.getBuffer());
|
||||
UStringTrieResult result;
|
||||
if( (result=trie.nextForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE || result!=trie.current() ||
|
||||
(result=trie.nextForCodePoint(0x10000))!=USTRINGTRIE_NO_VALUE || result!=trie.current() ||
|
||||
(result=trie.nextForCodePoint(0x9999))!=USTRINGTRIE_NO_VALUE || result!=trie.current() ||
|
||||
(result=trie.nextForCodePoint(0x20000))!=USTRINGTRIE_NO_VALUE || result!=trie.current() ||
|
||||
(result=trie.nextForCodePoint(0xdfff))!=USTRINGTRIE_NO_VALUE || result!=trie.current() ||
|
||||
(result=trie.nextForCodePoint(0x10ffff))!=USTRINGTRIE_FINAL_VALUE || result!=trie.current() ||
|
||||
trie.getValue()!=2000000000
|
||||
) {
|
||||
errln("UCharTrie.nextForCodePoint() fails for %s", data[0].s);
|
||||
errln("UCharsTrie.nextForCodePoint() fails for %s", data[0].s);
|
||||
}
|
||||
if( (result=trie.firstForCodePoint(0x4dff))!=UDICTTRIE_NO_VALUE || result!=trie.current() ||
|
||||
(result=trie.nextForCodePoint(0x10000))!=UDICTTRIE_NO_VALUE || result!=trie.current() ||
|
||||
(result=trie.nextForCodePoint(0x9999))!=UDICTTRIE_NO_VALUE || result!=trie.current() ||
|
||||
(result=trie.nextForCodePoint(0x20002))!=UDICTTRIE_HAS_FINAL_VALUE || result!=trie.current() ||
|
||||
if( (result=trie.firstForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE || result!=trie.current() ||
|
||||
(result=trie.nextForCodePoint(0x10000))!=USTRINGTRIE_NO_VALUE || result!=trie.current() ||
|
||||
(result=trie.nextForCodePoint(0x9999))!=USTRINGTRIE_NO_VALUE || result!=trie.current() ||
|
||||
(result=trie.nextForCodePoint(0x20002))!=USTRINGTRIE_FINAL_VALUE || result!=trie.current() ||
|
||||
trie.getValue()!=44444
|
||||
) {
|
||||
errln("UCharTrie.nextForCodePoint() fails for %s", data[1].s);
|
||||
errln("UCharsTrie.nextForCodePoint() fails for %s", data[1].s);
|
||||
}
|
||||
if( (result=trie.reset().nextForCodePoint(0x4dff))!=UDICTTRIE_NO_VALUE || result!=trie.current() ||
|
||||
(result=trie.nextForCodePoint(0x10000))!=UDICTTRIE_NO_VALUE || result!=trie.current() ||
|
||||
(result=trie.nextForCodePoint(0x9999))!=UDICTTRIE_NO_VALUE || result!=trie.current() ||
|
||||
(result=trie.nextForCodePoint(0x20222))!=UDICTTRIE_NO_MATCH || result!=trie.current() // no match for trail surrogate
|
||||
if( (result=trie.reset().nextForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE || result!=trie.current() ||
|
||||
(result=trie.nextForCodePoint(0x10000))!=USTRINGTRIE_NO_VALUE || result!=trie.current() ||
|
||||
(result=trie.nextForCodePoint(0x9999))!=USTRINGTRIE_NO_VALUE || result!=trie.current() ||
|
||||
(result=trie.nextForCodePoint(0x20222))!=USTRINGTRIE_NO_MATCH || result!=trie.current() // no match for trail surrogate
|
||||
) {
|
||||
errln("UCharTrie.nextForCodePoint() fails for \\u4dff\\U00010000\\u9999\\U00020222");
|
||||
errln("UCharsTrie.nextForCodePoint() fails for \\u4dff\\U00010000\\u9999\\U00020222");
|
||||
}
|
||||
if( (result=trie.reset().nextForCodePoint(0x4dff))!=UDICTTRIE_NO_VALUE || result!=trie.current() ||
|
||||
(result=trie.nextForCodePoint(0x103ff))!=UDICTTRIE_HAS_FINAL_VALUE || result!=trie.current() ||
|
||||
if( (result=trie.reset().nextForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE || result!=trie.current() ||
|
||||
(result=trie.nextForCodePoint(0x103ff))!=USTRINGTRIE_FINAL_VALUE || result!=trie.current() ||
|
||||
trie.getValue()!=99999
|
||||
) {
|
||||
errln("UCharTrie.nextForCodePoint() fails for %s", data[2].s);
|
||||
errln("UCharsTrie.nextForCodePoint() fails for %s", data[2].s);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -356,8 +356,8 @@ private:
|
|||
|
||||
} // end namespace
|
||||
|
||||
UBool UCharTrieTest::buildLargeTrie(UCharTrieBuilder &builder, UnicodeString &result,
|
||||
int32_t numUniqueFirst) {
|
||||
UBool UCharsTrieTest::buildLargeTrie(UCharsTrieBuilder &builder, UnicodeString &result,
|
||||
int32_t numUniqueFirst) {
|
||||
IcuTestErrorCode errorCode(*this, "buildLargeTrie()");
|
||||
Generator gen;
|
||||
builder.clear();
|
||||
|
@ -366,33 +366,33 @@ UBool UCharTrieTest::buildLargeTrie(UCharTrieBuilder &builder, UnicodeString &re
|
|||
gen.next();
|
||||
}
|
||||
infoln("buildLargeTrie(%ld) added %ld strings", (long)numUniqueFirst, (long)gen.getIndex());
|
||||
builder.build(UDICTTRIE_BUILD_FAST, result, errorCode);
|
||||
builder.build(USTRINGTRIE_BUILD_FAST, result, errorCode);
|
||||
logln("serialized trie size: %ld UChars\n", (long)result.length());
|
||||
return errorCode.isSuccess();
|
||||
}
|
||||
|
||||
// Exercise a large branch node.
|
||||
void UCharTrieTest::TestLargeTrie() {
|
||||
UCharTrieBuilder builder;
|
||||
void UCharsTrieTest::TestLargeTrie() {
|
||||
UCharsTrieBuilder builder;
|
||||
UnicodeString trieUChars;
|
||||
if(!buildLargeTrie(builder, trieUChars, 1111)) {
|
||||
return; // buildTrie() reported an error
|
||||
}
|
||||
UCharTrie trie(trieUChars.getBuffer());
|
||||
UCharsTrie trie(trieUChars.getBuffer());
|
||||
Generator gen;
|
||||
while(gen.countUniqueFirstChars()<1111) {
|
||||
UnicodeString x(gen.getString());
|
||||
int32_t value=gen.getValue();
|
||||
if(!x.isEmpty()) {
|
||||
if(trie.first(x[0])==UDICTTRIE_NO_MATCH) {
|
||||
errln("next(first char U+%04X)=UDICTTRIE_NO_MATCH for string %ld\n",
|
||||
if(trie.first(x[0])==USTRINGTRIE_NO_MATCH) {
|
||||
errln("next(first char U+%04X)=USTRINGTRIE_NO_MATCH for string %ld\n",
|
||||
x[0], (long)gen.getIndex());
|
||||
break;
|
||||
}
|
||||
x.remove(0, 1);
|
||||
}
|
||||
UDictTrieResult result=trie.next(x.getBuffer(), x.length());
|
||||
if(!UDICTTRIE_RESULT_HAS_VALUE(result) || result!=trie.current() || value!=trie.getValue()) {
|
||||
UStringTrieResult result=trie.next(x.getBuffer(), x.length());
|
||||
if(!USTRINGTRIE_HAS_VALUE(result) || result!=trie.current() || value!=trie.getValue()) {
|
||||
errln("next(%d chars U+%04X U+%04X)!=hasValue or "
|
||||
"next()!=current() or getValue() wrong "
|
||||
"for string %ld\n", (int)x.length(), x[0], x[1], (long)gen.getIndex());
|
||||
|
@ -413,8 +413,8 @@ enum {
|
|||
u_y=0x79
|
||||
};
|
||||
|
||||
UBool UCharTrieTest::buildMonthsTrie(UCharTrieBuilder &builder, UDictTrieBuildOption buildOption,
|
||||
UnicodeString &result) {
|
||||
UBool UCharsTrieTest::buildMonthsTrie(UCharsTrieBuilder &builder, UStringTrieBuildOption buildOption,
|
||||
UnicodeString &result) {
|
||||
// All types of nodes leading to the same value,
|
||||
// for code coverage of recursive functions.
|
||||
// In particular, we need a lot of branches on some single level
|
||||
|
@ -454,13 +454,13 @@ UBool UCharTrieTest::buildMonthsTrie(UCharTrieBuilder &builder, UDictTrieBuildOp
|
|||
return buildTrie(data, LENGTHOF(data), builder, buildOption, result);
|
||||
}
|
||||
|
||||
void UCharTrieTest::TestHasUniqueValue() {
|
||||
UCharTrieBuilder builder;
|
||||
void UCharsTrieTest::TestHasUniqueValue() {
|
||||
UCharsTrieBuilder builder;
|
||||
UnicodeString trieUChars;
|
||||
if(!buildMonthsTrie(builder, UDICTTRIE_BUILD_FAST, trieUChars)) {
|
||||
if(!buildMonthsTrie(builder, USTRINGTRIE_BUILD_FAST, trieUChars)) {
|
||||
return; // buildTrie() reported an error
|
||||
}
|
||||
UCharTrie trie(trieUChars.getBuffer());
|
||||
UCharsTrie trie(trieUChars.getBuffer());
|
||||
int32_t uniqueValue;
|
||||
if(trie.hasUniqueValue(uniqueValue)) {
|
||||
errln("unique value at root");
|
||||
|
@ -477,7 +477,7 @@ void UCharTrieTest::TestHasUniqueValue() {
|
|||
if(trie.hasUniqueValue(uniqueValue)) {
|
||||
errln("unique value after \"ju\"");
|
||||
}
|
||||
if(trie.next(u_n)!=UDICTTRIE_HAS_VALUE || 6!=trie.getValue()) {
|
||||
if(trie.next(u_n)!=USTRINGTRIE_INTERMEDIATE_VALUE || 6!=trie.getValue()) {
|
||||
errln("not normal value 6 after \"jun\"");
|
||||
}
|
||||
// hasUniqueValue() after getValue()
|
||||
|
@ -501,13 +501,13 @@ private:
|
|||
UnicodeString &str;
|
||||
};
|
||||
|
||||
void UCharTrieTest::TestGetNextUChars() {
|
||||
UCharTrieBuilder builder;
|
||||
void UCharsTrieTest::TestGetNextUChars() {
|
||||
UCharsTrieBuilder builder;
|
||||
UnicodeString trieUChars;
|
||||
if(!buildMonthsTrie(builder, UDICTTRIE_BUILD_SMALL, trieUChars)) {
|
||||
if(!buildMonthsTrie(builder, USTRINGTRIE_BUILD_SMALL, trieUChars)) {
|
||||
return; // buildTrie() reported an error
|
||||
}
|
||||
UCharTrie trie(trieUChars.getBuffer());
|
||||
UCharsTrie trie(trieUChars.getBuffer());
|
||||
UnicodeString buffer;
|
||||
UnicodeStringAppendable app(buffer);
|
||||
int32_t count=trie.getNextUChars(app);
|
||||
|
@ -523,7 +523,7 @@ void UCharTrieTest::TestGetNextUChars() {
|
|||
errln("months getNextUChars()!=[.abcdefghijklmnopqru] after \"jan\"");
|
||||
}
|
||||
// getNextUChars() after getValue()
|
||||
trie.getValue(); // next() had returned UDICTTRIE_HAS_VALUE.
|
||||
trie.getValue(); // next() had returned USTRINGTRIE_INTERMEDIATE_VALUE.
|
||||
count=trie.getNextUChars(app.reset());
|
||||
if(count!=20 || buffer!=UNICODE_STRING_SIMPLE(".abcdefghijklmnopqru")) {
|
||||
errln("months getNextUChars()!=[.abcdefghijklmnopqru] after \"jan\"+getValue()");
|
||||
|
@ -548,20 +548,20 @@ void UCharTrieTest::TestGetNextUChars() {
|
|||
}
|
||||
}
|
||||
|
||||
void UCharTrieTest::TestIteratorFromBranch() {
|
||||
UCharTrieBuilder builder;
|
||||
void UCharsTrieTest::TestIteratorFromBranch() {
|
||||
UCharsTrieBuilder builder;
|
||||
UnicodeString trieUChars;
|
||||
if(!buildMonthsTrie(builder, UDICTTRIE_BUILD_FAST, trieUChars)) {
|
||||
if(!buildMonthsTrie(builder, USTRINGTRIE_BUILD_FAST, trieUChars)) {
|
||||
return; // buildTrie() reported an error
|
||||
}
|
||||
UCharTrie trie(trieUChars.getBuffer());
|
||||
UCharsTrie trie(trieUChars.getBuffer());
|
||||
// Go to a branch node.
|
||||
trie.next(u_j);
|
||||
trie.next(u_a);
|
||||
trie.next(u_n);
|
||||
IcuTestErrorCode errorCode(*this, "TestIteratorFromBranch()");
|
||||
UCharTrieIterator iter(trie, 0, errorCode);
|
||||
if(errorCode.logIfFailureAndReset("UCharTrieIterator(trie) constructor")) {
|
||||
UCharsTrieIterator iter(trie, 0, errorCode);
|
||||
if(errorCode.logIfFailureAndReset("UCharsTrieIterator(trie) constructor")) {
|
||||
return;
|
||||
}
|
||||
// Expected data: Same as in buildMonthsTrie(), except only the suffixes
|
||||
|
@ -599,13 +599,13 @@ void UCharTrieTest::TestIteratorFromBranch() {
|
|||
checkIterator(iter.reset(), data, LENGTHOF(data));
|
||||
}
|
||||
|
||||
void UCharTrieTest::TestIteratorFromLinearMatch() {
|
||||
UCharTrieBuilder builder;
|
||||
void UCharsTrieTest::TestIteratorFromLinearMatch() {
|
||||
UCharsTrieBuilder builder;
|
||||
UnicodeString trieUChars;
|
||||
if(!buildMonthsTrie(builder, UDICTTRIE_BUILD_SMALL, trieUChars)) {
|
||||
if(!buildMonthsTrie(builder, USTRINGTRIE_BUILD_SMALL, trieUChars)) {
|
||||
return; // buildTrie() reported an error
|
||||
}
|
||||
UCharTrie trie(trieUChars.getBuffer());
|
||||
UCharsTrie trie(trieUChars.getBuffer());
|
||||
// Go into a linear-match node.
|
||||
trie.next(u_j);
|
||||
trie.next(u_a);
|
||||
|
@ -613,8 +613,8 @@ void UCharTrieTest::TestIteratorFromLinearMatch() {
|
|||
trie.next(u_u);
|
||||
trie.next(u_a);
|
||||
IcuTestErrorCode errorCode(*this, "TestIteratorFromLinearMatch()");
|
||||
UCharTrieIterator iter(trie, 0, errorCode);
|
||||
if(errorCode.logIfFailureAndReset("UCharTrieIterator(trie) constructor")) {
|
||||
UCharsTrieIterator iter(trie, 0, errorCode);
|
||||
if(errorCode.logIfFailureAndReset("UCharsTrieIterator(trie) constructor")) {
|
||||
return;
|
||||
}
|
||||
// Expected data: Same as in buildMonthsTrie(), except only the suffixes
|
||||
|
@ -629,15 +629,15 @@ void UCharTrieTest::TestIteratorFromLinearMatch() {
|
|||
checkIterator(iter.reset(), data, LENGTHOF(data));
|
||||
}
|
||||
|
||||
void UCharTrieTest::TestTruncatingIteratorFromRoot() {
|
||||
UCharTrieBuilder builder;
|
||||
void UCharsTrieTest::TestTruncatingIteratorFromRoot() {
|
||||
UCharsTrieBuilder builder;
|
||||
UnicodeString trieUChars;
|
||||
if(!buildMonthsTrie(builder, UDICTTRIE_BUILD_FAST, trieUChars)) {
|
||||
if(!buildMonthsTrie(builder, USTRINGTRIE_BUILD_FAST, trieUChars)) {
|
||||
return; // buildTrie() reported an error
|
||||
}
|
||||
IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromRoot()");
|
||||
UCharTrieIterator iter(trieUChars.getBuffer(), 4, errorCode);
|
||||
if(errorCode.logIfFailureAndReset("UCharTrieIterator(trie) constructor")) {
|
||||
UCharsTrieIterator iter(trieUChars.getBuffer(), 4, errorCode);
|
||||
if(errorCode.logIfFailureAndReset("UCharsTrieIterator(trie) constructor")) {
|
||||
return;
|
||||
}
|
||||
// Expected data: Same as in buildMonthsTrie(), except only the first 4 characters
|
||||
|
@ -676,25 +676,25 @@ void UCharTrieTest::TestTruncatingIteratorFromRoot() {
|
|||
checkIterator(iter.reset(), data, LENGTHOF(data));
|
||||
}
|
||||
|
||||
void UCharTrieTest::TestTruncatingIteratorFromLinearMatchShort() {
|
||||
void UCharsTrieTest::TestTruncatingIteratorFromLinearMatchShort() {
|
||||
static const StringAndValue data[]={
|
||||
{ "abcdef", 10 },
|
||||
{ "abcdepq", 200 },
|
||||
{ "abcdeyz", 3000 }
|
||||
};
|
||||
UCharTrieBuilder builder;
|
||||
UCharsTrieBuilder builder;
|
||||
UnicodeString trieUChars;
|
||||
if(!buildTrie(data, LENGTHOF(data), builder, UDICTTRIE_BUILD_FAST, trieUChars)) {
|
||||
if(!buildTrie(data, LENGTHOF(data), builder, USTRINGTRIE_BUILD_FAST, trieUChars)) {
|
||||
return; // buildTrie() reported an error
|
||||
}
|
||||
UCharTrie trie(trieUChars.getBuffer());
|
||||
UCharsTrie trie(trieUChars.getBuffer());
|
||||
// Go into a linear-match node.
|
||||
trie.next(u_a);
|
||||
trie.next(u_b);
|
||||
IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromLinearMatchShort()");
|
||||
// Truncate within the linear-match node.
|
||||
UCharTrieIterator iter(trie, 2, errorCode);
|
||||
if(errorCode.logIfFailureAndReset("UCharTrieIterator(trie) constructor")) {
|
||||
UCharsTrieIterator iter(trie, 2, errorCode);
|
||||
if(errorCode.logIfFailureAndReset("UCharsTrieIterator(trie) constructor")) {
|
||||
return;
|
||||
}
|
||||
static const StringAndValue expected[]={
|
||||
|
@ -706,26 +706,26 @@ void UCharTrieTest::TestTruncatingIteratorFromLinearMatchShort() {
|
|||
checkIterator(iter.reset(), expected, LENGTHOF(expected));
|
||||
}
|
||||
|
||||
void UCharTrieTest::TestTruncatingIteratorFromLinearMatchLong() {
|
||||
void UCharsTrieTest::TestTruncatingIteratorFromLinearMatchLong() {
|
||||
static const StringAndValue data[]={
|
||||
{ "abcdef", 10 },
|
||||
{ "abcdepq", 200 },
|
||||
{ "abcdeyz", 3000 }
|
||||
};
|
||||
UCharTrieBuilder builder;
|
||||
UCharsTrieBuilder builder;
|
||||
UnicodeString trieUChars;
|
||||
if(!buildTrie(data, LENGTHOF(data), builder, UDICTTRIE_BUILD_FAST, trieUChars)) {
|
||||
if(!buildTrie(data, LENGTHOF(data), builder, USTRINGTRIE_BUILD_FAST, trieUChars)) {
|
||||
return; // buildTrie() reported an error
|
||||
}
|
||||
UCharTrie trie(trieUChars.getBuffer());
|
||||
UCharsTrie trie(trieUChars.getBuffer());
|
||||
// Go into a linear-match node.
|
||||
trie.next(u_a);
|
||||
trie.next(u_b);
|
||||
trie.next(u_c);
|
||||
IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromLinearMatchLong()");
|
||||
// Truncate after the linear-match node.
|
||||
UCharTrieIterator iter(trie, 3, errorCode);
|
||||
if(errorCode.logIfFailureAndReset("UCharTrieIterator(trie) constructor")) {
|
||||
UCharsTrieIterator iter(trie, 3, errorCode);
|
||||
if(errorCode.logIfFailureAndReset("UCharsTrieIterator(trie) constructor")) {
|
||||
return;
|
||||
}
|
||||
static const StringAndValue expected[]={
|
||||
|
@ -739,15 +739,15 @@ void UCharTrieTest::TestTruncatingIteratorFromLinearMatchLong() {
|
|||
checkIterator(iter.reset(), expected, LENGTHOF(expected));
|
||||
}
|
||||
|
||||
void UCharTrieTest::checkData(const StringAndValue data[], int32_t dataLength) {
|
||||
void UCharsTrieTest::checkData(const StringAndValue data[], int32_t dataLength) {
|
||||
logln("checkData(dataLength=%d, fast)", (int)dataLength);
|
||||
checkData(data, dataLength, UDICTTRIE_BUILD_FAST);
|
||||
checkData(data, dataLength, USTRINGTRIE_BUILD_FAST);
|
||||
logln("checkData(dataLength=%d, small)", (int)dataLength);
|
||||
checkData(data, dataLength, UDICTTRIE_BUILD_SMALL);
|
||||
checkData(data, dataLength, USTRINGTRIE_BUILD_SMALL);
|
||||
}
|
||||
|
||||
void UCharTrieTest::checkData(const StringAndValue data[], int32_t dataLength, UDictTrieBuildOption buildOption) {
|
||||
UCharTrieBuilder builder;
|
||||
void UCharsTrieTest::checkData(const StringAndValue data[], int32_t dataLength, UStringTrieBuildOption buildOption) {
|
||||
UCharsTrieBuilder builder;
|
||||
UnicodeString trieUChars;
|
||||
if(!buildTrie(data, dataLength, builder, buildOption, trieUChars)) {
|
||||
return; // buildTrie() reported an error
|
||||
|
@ -759,8 +759,8 @@ void UCharTrieTest::checkData(const StringAndValue data[], int32_t dataLength, U
|
|||
checkIterator(trieUChars, data, dataLength);
|
||||
}
|
||||
|
||||
UBool UCharTrieTest::buildTrie(const StringAndValue data[], int32_t dataLength,
|
||||
UCharTrieBuilder &builder, UDictTrieBuildOption buildOption, UnicodeString &result) {
|
||||
UBool UCharsTrieTest::buildTrie(const StringAndValue data[], int32_t dataLength,
|
||||
UCharsTrieBuilder &builder, UStringTrieBuildOption buildOption, UnicodeString &result) {
|
||||
IcuTestErrorCode errorCode(*this, "buildTrie()");
|
||||
// Add the items to the trie builder in an interesting (not trivial, not random) order.
|
||||
int32_t index, step;
|
||||
|
@ -793,9 +793,9 @@ UBool UCharTrieTest::buildTrie(const StringAndValue data[], int32_t dataLength,
|
|||
return errorCode.isSuccess();
|
||||
}
|
||||
|
||||
void UCharTrieTest::checkFirst(const UnicodeString &trieUChars,
|
||||
const StringAndValue data[], int32_t dataLength) {
|
||||
UCharTrie trie(trieUChars.getBuffer());
|
||||
void UCharsTrieTest::checkFirst(const UnicodeString &trieUChars,
|
||||
const StringAndValue data[], int32_t dataLength) {
|
||||
UCharsTrie trie(trieUChars.getBuffer());
|
||||
for(int32_t i=0; i<dataLength; ++i) {
|
||||
if(*data[i].s==0) {
|
||||
continue; // skip empty string
|
||||
|
@ -803,12 +803,12 @@ void UCharTrieTest::checkFirst(const UnicodeString &trieUChars,
|
|||
UnicodeString expectedString=UnicodeString(data[i].s, -1, US_INV).unescape();
|
||||
UChar32 c=expectedString[0];
|
||||
UChar32 nextCp=expectedString.length()>1 ? expectedString[1] : 0;
|
||||
UDictTrieResult firstResult=trie.first(c);
|
||||
int32_t firstValue=UDICTTRIE_RESULT_HAS_VALUE(firstResult) ? trie.getValue() : -1;
|
||||
UDictTrieResult nextResult=trie.next(nextCp);
|
||||
UStringTrieResult firstResult=trie.first(c);
|
||||
int32_t firstValue=USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1;
|
||||
UStringTrieResult nextResult=trie.next(nextCp);
|
||||
if(firstResult!=trie.reset().next(c) ||
|
||||
firstResult!=trie.current() ||
|
||||
firstValue!=(UDICTTRIE_RESULT_HAS_VALUE(firstResult) ? trie.getValue() : -1) ||
|
||||
firstValue!=(USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1) ||
|
||||
nextResult!=trie.next(nextCp)
|
||||
) {
|
||||
errln("trie.first(U+%04X)!=trie.reset().next(same) for %s",
|
||||
|
@ -818,11 +818,11 @@ void UCharTrieTest::checkFirst(const UnicodeString &trieUChars,
|
|||
int32_t cLength=U16_LENGTH(c);
|
||||
nextCp=expectedString.length()>cLength ? expectedString.char32At(cLength) : 0;
|
||||
firstResult=trie.firstForCodePoint(c);
|
||||
firstValue=UDICTTRIE_RESULT_HAS_VALUE(firstResult) ? trie.getValue() : -1;
|
||||
firstValue=USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1;
|
||||
nextResult=trie.nextForCodePoint(nextCp);
|
||||
if(firstResult!=trie.reset().nextForCodePoint(c) ||
|
||||
firstResult!=trie.current() ||
|
||||
firstValue!=(UDICTTRIE_RESULT_HAS_VALUE(firstResult) ? trie.getValue() : -1) ||
|
||||
firstValue!=(USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1) ||
|
||||
nextResult!=trie.nextForCodePoint(nextCp)
|
||||
) {
|
||||
errln("trie.firstForCodePoint(U+%04X)!=trie.reset().nextForCodePoint(same) for %s",
|
||||
|
@ -831,15 +831,15 @@ void UCharTrieTest::checkFirst(const UnicodeString &trieUChars,
|
|||
}
|
||||
}
|
||||
|
||||
void UCharTrieTest::checkNext(const UnicodeString &trieUChars,
|
||||
const StringAndValue data[], int32_t dataLength) {
|
||||
UCharTrie trie(trieUChars.getBuffer());
|
||||
UCharTrie::State state;
|
||||
void UCharsTrieTest::checkNext(const UnicodeString &trieUChars,
|
||||
const StringAndValue data[], int32_t dataLength) {
|
||||
UCharsTrie trie(trieUChars.getBuffer());
|
||||
UCharsTrie::State state;
|
||||
for(int32_t i=0; i<dataLength; ++i) {
|
||||
UnicodeString expectedString=UnicodeString(data[i].s, -1, US_INV).unescape();
|
||||
int32_t stringLength= (i&1) ? -1 : expectedString.length();
|
||||
UDictTrieResult result;
|
||||
if( !UDICTTRIE_RESULT_HAS_VALUE(
|
||||
UStringTrieResult result;
|
||||
if( !USTRINGTRIE_HAS_VALUE(
|
||||
result=trie.next(expectedString.getTerminatedBuffer(), stringLength)) ||
|
||||
result!=trie.current()
|
||||
) {
|
||||
|
@ -856,20 +856,20 @@ void UCharTrieTest::checkNext(const UnicodeString &trieUChars,
|
|||
stringLength=expectedString.length();
|
||||
result=trie.current();
|
||||
for(int32_t j=0; j<stringLength; ++j) {
|
||||
if(!UDICTTRIE_RESULT_HAS_NEXT(result)) {
|
||||
if(!USTRINGTRIE_HAS_NEXT(result)) {
|
||||
errln("trie.current()!=hasNext before end of %s (at index %d)", data[i].s, j);
|
||||
break;
|
||||
}
|
||||
if(result==UDICTTRIE_HAS_VALUE) {
|
||||
if(result==USTRINGTRIE_INTERMEDIATE_VALUE) {
|
||||
trie.getValue();
|
||||
if(trie.current()!=UDICTTRIE_HAS_VALUE) {
|
||||
errln("trie.getValue().current()!=UDICTTRIE_HAS_VALUE before end of %s (at index %d)", data[i].s, j);
|
||||
if(trie.current()!=USTRINGTRIE_INTERMEDIATE_VALUE) {
|
||||
errln("trie.getValue().current()!=USTRINGTRIE_INTERMEDIATE_VALUE before end of %s (at index %d)", data[i].s, j);
|
||||
break;
|
||||
}
|
||||
}
|
||||
result=trie.next(expectedString[j]);
|
||||
if(!UDICTTRIE_RESULT_MATCHES(result)) {
|
||||
errln("trie.next()=UDICTTRIE_NO_MATCH before end of %s (at index %d)", data[i].s, j);
|
||||
if(!USTRINGTRIE_MATCHES(result)) {
|
||||
errln("trie.next()=USTRINGTRIE_NO_MATCH before end of %s (at index %d)", data[i].s, j);
|
||||
break;
|
||||
}
|
||||
if(result!=trie.current()) {
|
||||
|
@ -877,7 +877,7 @@ void UCharTrieTest::checkNext(const UnicodeString &trieUChars,
|
|||
break;
|
||||
}
|
||||
}
|
||||
if(!UDICTTRIE_RESULT_HAS_VALUE(result)) {
|
||||
if(!USTRINGTRIE_HAS_VALUE(result)) {
|
||||
errln("trie.next()!=hasValue at the end of %s", data[i].s);
|
||||
continue;
|
||||
}
|
||||
|
@ -898,18 +898,18 @@ void UCharTrieTest::checkNext(const UnicodeString &trieUChars,
|
|||
break;
|
||||
}
|
||||
}
|
||||
if((result==UDICTTRIE_HAS_VALUE)!=nextContinues) {
|
||||
errln("(trie.current()==UDICTTRIE_HAS_VALUE) contradicts "
|
||||
"(trie.next(some UChar)!=UDICTTRIE_NO_MATCH) after end of %s", data[i].s);
|
||||
if((result==USTRINGTRIE_INTERMEDIATE_VALUE)!=nextContinues) {
|
||||
errln("(trie.current()==USTRINGTRIE_INTERMEDIATE_VALUE) contradicts "
|
||||
"(trie.next(some UChar)!=USTRINGTRIE_NO_MATCH) after end of %s", data[i].s);
|
||||
}
|
||||
trie.reset();
|
||||
}
|
||||
}
|
||||
|
||||
void UCharTrieTest::checkNextWithState(const UnicodeString &trieUChars,
|
||||
const StringAndValue data[], int32_t dataLength) {
|
||||
UCharTrie trie(trieUChars.getBuffer());
|
||||
UCharTrie::State noState, state;
|
||||
void UCharsTrieTest::checkNextWithState(const UnicodeString &trieUChars,
|
||||
const StringAndValue data[], int32_t dataLength) {
|
||||
UCharsTrie trie(trieUChars.getBuffer());
|
||||
UCharsTrie::State noState, state;
|
||||
for(int32_t i=0; i<dataLength; ++i) {
|
||||
if((i&1)==0) {
|
||||
// This should have no effect.
|
||||
|
@ -919,36 +919,36 @@ void UCharTrieTest::checkNextWithState(const UnicodeString &trieUChars,
|
|||
int32_t stringLength=expectedString.length();
|
||||
int32_t partialLength=stringLength/3;
|
||||
for(int32_t j=0; j<partialLength; ++j) {
|
||||
if(!UDICTTRIE_RESULT_MATCHES(trie.next(expectedString[j]))) {
|
||||
errln("trie.next()=UDICTTRIE_NO_MATCH for a prefix of %s", data[i].s);
|
||||
if(!USTRINGTRIE_MATCHES(trie.next(expectedString[j]))) {
|
||||
errln("trie.next()=USTRINGTRIE_NO_MATCH for a prefix of %s", data[i].s);
|
||||
return;
|
||||
}
|
||||
}
|
||||
trie.saveState(state);
|
||||
UDictTrieResult resultAtState=trie.current();
|
||||
UDictTrieResult result;
|
||||
UStringTrieResult resultAtState=trie.current();
|
||||
UStringTrieResult result;
|
||||
int32_t valueAtState=-99;
|
||||
if(UDICTTRIE_RESULT_HAS_VALUE(resultAtState)) {
|
||||
if(USTRINGTRIE_HAS_VALUE(resultAtState)) {
|
||||
valueAtState=trie.getValue();
|
||||
}
|
||||
result=trie.next(0); // mismatch
|
||||
if(result!=UDICTTRIE_NO_MATCH || result!=trie.current()) {
|
||||
if(result!=USTRINGTRIE_NO_MATCH || result!=trie.current()) {
|
||||
errln("trie.next(0) matched after part of %s", data[i].s);
|
||||
}
|
||||
if( resultAtState!=trie.resetToState(state).current() ||
|
||||
(UDICTTRIE_RESULT_HAS_VALUE(resultAtState) && valueAtState!=trie.getValue())
|
||||
(USTRINGTRIE_HAS_VALUE(resultAtState) && valueAtState!=trie.getValue())
|
||||
) {
|
||||
errln("trie.next(part of %s) changes current()/getValue() after "
|
||||
"saveState/next(0)/resetToState",
|
||||
data[i].s);
|
||||
} else if(!UDICTTRIE_RESULT_HAS_VALUE(
|
||||
} else if(!USTRINGTRIE_HAS_VALUE(
|
||||
result=trie.next(expectedString.getTerminatedBuffer()+partialLength,
|
||||
stringLength-partialLength)) ||
|
||||
result!=trie.current()) {
|
||||
errln("trie.next(rest of %s) does not seem to contain %s after "
|
||||
"saveState/next(0)/resetToState",
|
||||
data[i].s);
|
||||
} else if(!UDICTTRIE_RESULT_HAS_VALUE(
|
||||
} else if(!USTRINGTRIE_HAS_VALUE(
|
||||
result=trie.resetToState(state).
|
||||
next(expectedString.getTerminatedBuffer()+partialLength,
|
||||
stringLength-partialLength)) ||
|
||||
|
@ -967,37 +967,37 @@ void UCharTrieTest::checkNextWithState(const UnicodeString &trieUChars,
|
|||
|
||||
// next(string) is also tested in other functions,
|
||||
// but here we try to go partway through the string, and then beyond it.
|
||||
void UCharTrieTest::checkNextString(const UnicodeString &trieUChars,
|
||||
const StringAndValue data[], int32_t dataLength) {
|
||||
UCharTrie trie(trieUChars.getBuffer());
|
||||
void UCharsTrieTest::checkNextString(const UnicodeString &trieUChars,
|
||||
const StringAndValue data[], int32_t dataLength) {
|
||||
UCharsTrie trie(trieUChars.getBuffer());
|
||||
for(int32_t i=0; i<dataLength; ++i) {
|
||||
UnicodeString expectedString=UnicodeString(data[i].s, -1, US_INV).unescape();
|
||||
int32_t stringLength=expectedString.length();
|
||||
if(!trie.next(expectedString.getTerminatedBuffer(), stringLength/2)) {
|
||||
errln("trie.next(up to middle of string)=UDICTTRIE_NO_MATCH for %s", data[i].s);
|
||||
errln("trie.next(up to middle of string)=USTRINGTRIE_NO_MATCH for %s", data[i].s);
|
||||
continue;
|
||||
}
|
||||
// Test that we stop properly at the end of the string.
|
||||
if(trie.next(expectedString.getTerminatedBuffer()+stringLength/2,
|
||||
stringLength+1-stringLength/2)) {
|
||||
errln("trie.next(string+NUL)!=UDICTTRIE_NO_MATCH for %s", data[i].s);
|
||||
errln("trie.next(string+NUL)!=USTRINGTRIE_NO_MATCH for %s", data[i].s);
|
||||
}
|
||||
trie.reset();
|
||||
}
|
||||
}
|
||||
|
||||
void UCharTrieTest::checkIterator(const UnicodeString &trieUChars,
|
||||
const StringAndValue data[], int32_t dataLength) {
|
||||
void UCharsTrieTest::checkIterator(const UnicodeString &trieUChars,
|
||||
const StringAndValue data[], int32_t dataLength) {
|
||||
IcuTestErrorCode errorCode(*this, "checkIterator()");
|
||||
UCharTrieIterator iter(trieUChars.getBuffer(), 0, errorCode);
|
||||
if(errorCode.logIfFailureAndReset("UCharTrieIterator(trieUChars) constructor")) {
|
||||
UCharsTrieIterator iter(trieUChars.getBuffer(), 0, errorCode);
|
||||
if(errorCode.logIfFailureAndReset("UCharsTrieIterator(trieUChars) constructor")) {
|
||||
return;
|
||||
}
|
||||
checkIterator(iter, data, dataLength);
|
||||
}
|
||||
|
||||
void UCharTrieTest::checkIterator(UCharTrieIterator &iter,
|
||||
const StringAndValue data[], int32_t dataLength) {
|
||||
void UCharsTrieTest::checkIterator(UCharsTrieIterator &iter,
|
||||
const StringAndValue data[], int32_t dataLength) {
|
||||
IcuTestErrorCode errorCode(*this, "checkIterator()");
|
||||
for(int32_t i=0; i<dataLength; ++i) {
|
||||
if(!iter.hasNext()) {
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
**********************************************************************
|
||||
* Copyright (C) 2002-2010, International Business Machines
|
||||
* Copyright (C) 2010-2011, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
* file name: dicttrieperf.cpp
|
||||
|
@ -26,15 +26,15 @@
|
|||
#include <stdlib.h>
|
||||
#include "unicode/uperf.h"
|
||||
#include "unicode/utext.h"
|
||||
#include "bytetrie.h"
|
||||
#include "bytetriebuilder.h"
|
||||
#include "bytestrie.h"
|
||||
#include "bytestriebuilder.h"
|
||||
#include "charstr.h"
|
||||
#include "package.h"
|
||||
#include "toolutil.h"
|
||||
#include "triedict.h"
|
||||
#include "ucbuf.h" // struct ULine
|
||||
#include "uchartrie.h"
|
||||
#include "uchartriebuilder.h"
|
||||
#include "ucharstrie.h"
|
||||
#include "ucharstriebuilder.h"
|
||||
#include "uoptions.h"
|
||||
#include "uvectr32.h"
|
||||
|
||||
|
@ -259,18 +259,18 @@ public:
|
|||
}
|
||||
};
|
||||
|
||||
static int32_t byteTrieLookup(const char *s, const char *nameTrieBytes) {
|
||||
ByteTrie trie(nameTrieBytes);
|
||||
if(UDICTTRIE_RESULT_HAS_VALUE(trie.next(s, -1))) {
|
||||
static int32_t bytesTrieLookup(const char *s, const char *nameTrieBytes) {
|
||||
BytesTrie trie(nameTrieBytes);
|
||||
if(USTRINGTRIE_HAS_VALUE(trie.next(s, -1))) {
|
||||
return trie.getValue();
|
||||
} else {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
class ByteTriePackageLookup : public PackageLookup {
|
||||
class BytesTriePackageLookup : public PackageLookup {
|
||||
public:
|
||||
ByteTriePackageLookup(const DictionaryTriePerfTest &perf)
|
||||
BytesTriePackageLookup(const DictionaryTriePerfTest &perf)
|
||||
: PackageLookup(perf) {
|
||||
IcuToolErrorCode errorCode("BinarySearchPackageLookup()");
|
||||
int32_t count=pkg.getItemCount();
|
||||
|
@ -292,20 +292,20 @@ public:
|
|||
// NUL-terminate the name for call() to find the next one.
|
||||
itemNames.append(0, errorCode);
|
||||
}
|
||||
int32_t length=builder.build(UDICTTRIE_BUILD_SMALL, errorCode).length();
|
||||
printf("size of ByteTrie: %6ld\n", (long)length);
|
||||
int32_t length=builder.build(USTRINGTRIE_BUILD_SMALL, errorCode).length();
|
||||
printf("size of BytesTrie: %6ld\n", (long)length);
|
||||
// count+1: +1 for the last-item limit offset which we should have always had
|
||||
printf("size of dataOffsets:%6ld\n", (long)((count+1)*4));
|
||||
printf("total index size: %6ld\n", (long)(length+(count+1)*4));
|
||||
}
|
||||
virtual ~ByteTriePackageLookup() {}
|
||||
virtual ~BytesTriePackageLookup() {}
|
||||
|
||||
virtual void call(UErrorCode *pErrorCode) {
|
||||
int32_t count=pkg.getItemCount();
|
||||
const char *nameTrieBytes=builder.build(UDICTTRIE_BUILD_SMALL, *pErrorCode).data();
|
||||
const char *nameTrieBytes=builder.build(USTRINGTRIE_BUILD_SMALL, *pErrorCode).data();
|
||||
const char *name=itemNames.data();
|
||||
for(int32_t i=0; i<count; ++i) {
|
||||
if(byteTrieLookup(name, nameTrieBytes)<0) {
|
||||
if(bytesTrieLookup(name, nameTrieBytes)<0) {
|
||||
fprintf(stderr, "item not found: %s\n", name);
|
||||
}
|
||||
name=strchr(name, 0)+1;
|
||||
|
@ -313,7 +313,7 @@ public:
|
|||
}
|
||||
|
||||
protected:
|
||||
ByteTrieBuilder builder;
|
||||
BytesTrieBuilder builder;
|
||||
CharString itemNames;
|
||||
};
|
||||
|
||||
|
@ -337,9 +337,9 @@ class CompactTrieDictLookup : public DictLookup {
|
|||
public:
|
||||
CompactTrieDictLookup(const DictionaryTriePerfTest &perfTest)
|
||||
: DictLookup(perfTest), ctd(NULL) {
|
||||
IcuToolErrorCode errorCode("UCharTrieDictLookup()");
|
||||
IcuToolErrorCode errorCode("UCharsTrieDictLookup()");
|
||||
// U+0E1C is the median code unit, from
|
||||
// the UCharTrie root node (split-branch node) for thaidict.txt.
|
||||
// the UCharsTrie root node (split-branch node) for thaidict.txt.
|
||||
MutableTrieDictionary builder(0xe1c, errorCode);
|
||||
const ULine *lines=perf.getCachedLines();
|
||||
int32_t numLines=perf.getNumLines();
|
||||
|
@ -386,13 +386,13 @@ protected:
|
|||
// Closely imitate CompactTrieDictionary::matches().
|
||||
// Note: CompactTrieDictionary::matches() is part of its trie implementation,
|
||||
// and while it loops over the text, it knows the current state.
|
||||
// By contrast, this implementation uses UCharTrie API functions that have to
|
||||
// By contrast, this implementation uses UCharsTrie API functions that have to
|
||||
// check the trie state each time and load/store state in the object.
|
||||
// (Whether it hasNext() and whether it is in the middle of a linear-match node.)
|
||||
static int32_t
|
||||
ucharTrieMatches(UCharTrie &trie,
|
||||
UText *text, int32_t textLimit,
|
||||
int32_t *lengths, int &count, int limit ) {
|
||||
ucharsTrieMatches(UCharsTrie &trie,
|
||||
UText *text, int32_t textLimit,
|
||||
int32_t *lengths, int &count, int limit ) {
|
||||
UChar32 c=utext_next32(text);
|
||||
// Notes:
|
||||
// a) CompactTrieDictionary::matches() does not check for U_SENTINEL.
|
||||
|
@ -402,19 +402,19 @@ ucharTrieMatches(UCharTrie &trie,
|
|||
}
|
||||
// Should be firstForCodePoint() but CompactTrieDictionary
|
||||
// handles only code units.
|
||||
UDictTrieResult result=trie.first(c);
|
||||
UStringTrieResult result=trie.first(c);
|
||||
int32_t numChars=1;
|
||||
count=0;
|
||||
for(;;) {
|
||||
if(UDICTTRIE_RESULT_HAS_VALUE(result)) {
|
||||
if(USTRINGTRIE_HAS_VALUE(result)) {
|
||||
if(count<limit) {
|
||||
// lengths[count++]=(int32_t)utext_getNativeIndex(text);
|
||||
lengths[count++]=numChars; // CompactTrieDictionary just counts chars too.
|
||||
}
|
||||
if(result==UDICTTRIE_HAS_FINAL_VALUE) {
|
||||
if(result==USTRINGTRIE_FINAL_VALUE) {
|
||||
break;
|
||||
}
|
||||
} else if(result==UDICTTRIE_NO_MATCH) {
|
||||
} else if(result==USTRINGTRIE_NO_MATCH) {
|
||||
break;
|
||||
}
|
||||
if(numChars>=textLimit) {
|
||||
|
@ -447,11 +447,11 @@ ucharTrieMatches(UCharTrie &trie,
|
|||
return numChars;
|
||||
}
|
||||
|
||||
class UCharTrieDictLookup : public DictLookup {
|
||||
class UCharsTrieDictLookup : public DictLookup {
|
||||
public:
|
||||
UCharTrieDictLookup(const DictionaryTriePerfTest &perfTest)
|
||||
UCharsTrieDictLookup(const DictionaryTriePerfTest &perfTest)
|
||||
: DictLookup(perfTest) {
|
||||
IcuToolErrorCode errorCode("UCharTrieDictLookup()");
|
||||
IcuToolErrorCode errorCode("UCharsTrieDictLookup()");
|
||||
const ULine *lines=perf.getCachedLines();
|
||||
int32_t numLines=perf.getNumLines();
|
||||
for(int32_t i=0; i<numLines; ++i) {
|
||||
|
@ -462,24 +462,24 @@ public:
|
|||
builder.add(UnicodeString(FALSE, lines[i].name, lines[i].len), 0, errorCode);
|
||||
}
|
||||
UnicodeString trieUChars;
|
||||
int32_t length=builder.build(UDICTTRIE_BUILD_SMALL, trieUChars, errorCode).length();
|
||||
printf("size of UCharTrie: %6ld bytes\n", (long)length*2);
|
||||
int32_t length=builder.build(USTRINGTRIE_BUILD_SMALL, trieUChars, errorCode).length();
|
||||
printf("size of UCharsTrie: %6ld bytes\n", (long)length*2);
|
||||
}
|
||||
|
||||
virtual ~UCharTrieDictLookup() {}
|
||||
virtual ~UCharsTrieDictLookup() {}
|
||||
|
||||
protected:
|
||||
UCharTrieBuilder builder;
|
||||
UCharsTrieBuilder builder;
|
||||
};
|
||||
|
||||
class UCharTrieDictMatches : public UCharTrieDictLookup {
|
||||
class UCharsTrieDictMatches : public UCharsTrieDictLookup {
|
||||
public:
|
||||
UCharTrieDictMatches(const DictionaryTriePerfTest &perfTest)
|
||||
: UCharTrieDictLookup(perfTest) {}
|
||||
UCharsTrieDictMatches(const DictionaryTriePerfTest &perfTest)
|
||||
: UCharsTrieDictLookup(perfTest) {}
|
||||
|
||||
virtual void call(UErrorCode *pErrorCode) {
|
||||
UnicodeString uchars;
|
||||
UCharTrie trie(builder.build(UDICTTRIE_BUILD_SMALL, uchars, *pErrorCode).getBuffer());
|
||||
UCharsTrie trie(builder.build(USTRINGTRIE_BUILD_SMALL, uchars, *pErrorCode).getBuffer());
|
||||
UText text=UTEXT_INITIALIZER;
|
||||
int32_t lengths[20];
|
||||
const ULine *lines=perf.getCachedLines();
|
||||
|
@ -491,8 +491,8 @@ public:
|
|||
}
|
||||
utext_openUChars(&text, lines[i].name, lines[i].len, pErrorCode);
|
||||
int32_t count=0;
|
||||
ucharTrieMatches(trie, &text, lines[i].len,
|
||||
lengths, count, LENGTHOF(lengths));
|
||||
ucharsTrieMatches(trie, &text, lines[i].len,
|
||||
lengths, count, LENGTHOF(lengths));
|
||||
if(count==0 || lengths[count-1]!=lines[i].len) {
|
||||
fprintf(stderr, "word %ld (0-based) not found\n", (long)i);
|
||||
}
|
||||
|
@ -500,14 +500,14 @@ public:
|
|||
}
|
||||
};
|
||||
|
||||
class UCharTrieDictContains : public UCharTrieDictLookup {
|
||||
class UCharsTrieDictContains : public UCharsTrieDictLookup {
|
||||
public:
|
||||
UCharTrieDictContains(const DictionaryTriePerfTest &perfTest)
|
||||
: UCharTrieDictLookup(perfTest) {}
|
||||
UCharsTrieDictContains(const DictionaryTriePerfTest &perfTest)
|
||||
: UCharsTrieDictLookup(perfTest) {}
|
||||
|
||||
virtual void call(UErrorCode *pErrorCode) {
|
||||
UnicodeString uchars;
|
||||
UCharTrie trie(builder.build(UDICTTRIE_BUILD_SMALL, uchars, *pErrorCode).getBuffer());
|
||||
UCharsTrie trie(builder.build(USTRINGTRIE_BUILD_SMALL, uchars, *pErrorCode).getBuffer());
|
||||
const ULine *lines=perf.getCachedLines();
|
||||
int32_t numLines=perf.getNumLines();
|
||||
for(int32_t i=0; i<numLines; ++i) {
|
||||
|
@ -515,7 +515,7 @@ public:
|
|||
if(lines[i].name[0]<0x41) {
|
||||
continue;
|
||||
}
|
||||
if(!UDICTTRIE_RESULT_HAS_VALUE(trie.reset().next(lines[i].name, lines[i].len))) {
|
||||
if(!USTRINGTRIE_HAS_VALUE(trie.reset().next(lines[i].name, lines[i].len))) {
|
||||
fprintf(stderr, "word %ld (0-based) not found\n", (long)i);
|
||||
}
|
||||
}
|
||||
|
@ -547,11 +547,11 @@ static UBool thaiWordToBytes(const UChar *s, int32_t length,
|
|||
return TRUE;
|
||||
}
|
||||
|
||||
class ByteTrieDictLookup : public DictLookup {
|
||||
class BytesTrieDictLookup : public DictLookup {
|
||||
public:
|
||||
ByteTrieDictLookup(const DictionaryTriePerfTest &perfTest)
|
||||
BytesTrieDictLookup(const DictionaryTriePerfTest &perfTest)
|
||||
: DictLookup(perfTest), noDict(FALSE) {
|
||||
IcuToolErrorCode errorCode("ByteTrieDictLookup()");
|
||||
IcuToolErrorCode errorCode("BytesTrieDictLookup()");
|
||||
CharString str;
|
||||
const ULine *lines=perf.getCachedLines();
|
||||
int32_t numLines=perf.getNumLines();
|
||||
|
@ -568,39 +568,39 @@ public:
|
|||
builder.add(str.toStringPiece(), 0, errorCode);
|
||||
}
|
||||
if(!noDict) {
|
||||
int32_t length=builder.build(UDICTTRIE_BUILD_SMALL, errorCode).length();
|
||||
printf("size of ByteTrie: %6ld bytes\n", (long)length);
|
||||
int32_t length=builder.build(USTRINGTRIE_BUILD_SMALL, errorCode).length();
|
||||
printf("size of BytesTrie: %6ld bytes\n", (long)length);
|
||||
}
|
||||
}
|
||||
|
||||
virtual ~ByteTrieDictLookup() {}
|
||||
virtual ~BytesTrieDictLookup() {}
|
||||
|
||||
protected:
|
||||
ByteTrieBuilder builder;
|
||||
BytesTrieBuilder builder;
|
||||
UBool noDict;
|
||||
};
|
||||
|
||||
static int32_t
|
||||
byteTrieMatches(ByteTrie &trie,
|
||||
UText *text, int32_t textLimit,
|
||||
int32_t *lengths, int &count, int limit ) {
|
||||
bytesTrieMatches(BytesTrie &trie,
|
||||
UText *text, int32_t textLimit,
|
||||
int32_t *lengths, int &count, int limit ) {
|
||||
UChar32 c=utext_next32(text);
|
||||
if(c<0) {
|
||||
return 0;
|
||||
}
|
||||
UDictTrieResult result=trie.first(thaiCharToByte(c));
|
||||
UStringTrieResult result=trie.first(thaiCharToByte(c));
|
||||
int32_t numChars=1;
|
||||
count=0;
|
||||
for(;;) {
|
||||
if(UDICTTRIE_RESULT_HAS_VALUE(result)) {
|
||||
if(USTRINGTRIE_HAS_VALUE(result)) {
|
||||
if(count<limit) {
|
||||
// lengths[count++]=(int32_t)utext_getNativeIndex(text);
|
||||
lengths[count++]=numChars; // CompactTrieDictionary just counts chars too.
|
||||
}
|
||||
if(result==UDICTTRIE_HAS_FINAL_VALUE) {
|
||||
if(result==USTRINGTRIE_FINAL_VALUE) {
|
||||
break;
|
||||
}
|
||||
} else if(result==UDICTTRIE_NO_MATCH) {
|
||||
} else if(result==USTRINGTRIE_NO_MATCH) {
|
||||
break;
|
||||
}
|
||||
if(numChars>=textLimit) {
|
||||
|
@ -616,16 +616,16 @@ byteTrieMatches(ByteTrie &trie,
|
|||
return numChars;
|
||||
}
|
||||
|
||||
class ByteTrieDictMatches : public ByteTrieDictLookup {
|
||||
class BytesTrieDictMatches : public BytesTrieDictLookup {
|
||||
public:
|
||||
ByteTrieDictMatches(const DictionaryTriePerfTest &perfTest)
|
||||
: ByteTrieDictLookup(perfTest) {}
|
||||
BytesTrieDictMatches(const DictionaryTriePerfTest &perfTest)
|
||||
: BytesTrieDictLookup(perfTest) {}
|
||||
|
||||
virtual void call(UErrorCode *pErrorCode) {
|
||||
if(noDict) {
|
||||
return;
|
||||
}
|
||||
ByteTrie trie(builder.build(UDICTTRIE_BUILD_SMALL, *pErrorCode).data());
|
||||
BytesTrie trie(builder.build(USTRINGTRIE_BUILD_SMALL, *pErrorCode).data());
|
||||
UText text=UTEXT_INITIALIZER;
|
||||
int32_t lengths[20];
|
||||
const ULine *lines=perf.getCachedLines();
|
||||
|
@ -637,8 +637,8 @@ public:
|
|||
}
|
||||
utext_openUChars(&text, lines[i].name, lines[i].len, pErrorCode);
|
||||
int32_t count=0;
|
||||
byteTrieMatches(trie, &text, lines[i].len,
|
||||
lengths, count, LENGTHOF(lengths));
|
||||
bytesTrieMatches(trie, &text, lines[i].len,
|
||||
lengths, count, LENGTHOF(lengths));
|
||||
if(count==0 || lengths[count-1]!=lines[i].len) {
|
||||
fprintf(stderr, "word %ld (0-based) not found\n", (long)i);
|
||||
}
|
||||
|
@ -646,16 +646,16 @@ public:
|
|||
}
|
||||
};
|
||||
|
||||
class ByteTrieDictContains : public ByteTrieDictLookup {
|
||||
class BytesTrieDictContains : public BytesTrieDictLookup {
|
||||
public:
|
||||
ByteTrieDictContains(const DictionaryTriePerfTest &perfTest)
|
||||
: ByteTrieDictLookup(perfTest) {}
|
||||
BytesTrieDictContains(const DictionaryTriePerfTest &perfTest)
|
||||
: BytesTrieDictLookup(perfTest) {}
|
||||
|
||||
virtual void call(UErrorCode *pErrorCode) {
|
||||
if(noDict) {
|
||||
return;
|
||||
}
|
||||
ByteTrie trie(builder.build(UDICTTRIE_BUILD_SMALL, *pErrorCode).data());
|
||||
BytesTrie trie(builder.build(USTRINGTRIE_BUILD_SMALL, *pErrorCode).data());
|
||||
const ULine *lines=perf.getCachedLines();
|
||||
int32_t numLines=perf.getNumLines();
|
||||
for(int32_t i=0; i<numLines; ++i) {
|
||||
|
@ -664,16 +664,16 @@ public:
|
|||
if(line[0]<0x41) {
|
||||
continue;
|
||||
}
|
||||
UDictTrieResult result=trie.first(thaiCharToByte(line[0]));
|
||||
UStringTrieResult result=trie.first(thaiCharToByte(line[0]));
|
||||
int32_t lineLength=lines[i].len;
|
||||
for(int32_t j=1; j<lineLength; ++j) {
|
||||
if(!UDICTTRIE_RESULT_HAS_NEXT(result)) {
|
||||
if(!USTRINGTRIE_HAS_NEXT(result)) {
|
||||
fprintf(stderr, "word %ld (0-based) not found\n", (long)i);
|
||||
break;
|
||||
}
|
||||
result=trie.next(thaiCharToByte(line[j]));
|
||||
}
|
||||
if(!UDICTTRIE_RESULT_HAS_VALUE(result)) {
|
||||
if(!USTRINGTRIE_HAS_VALUE(result)) {
|
||||
fprintf(stderr, "word %ld (0-based) not found\n", (long)i);
|
||||
}
|
||||
}
|
||||
|
@ -691,27 +691,27 @@ UPerfFunction *DictionaryTriePerfTest::runIndexedTest(int32_t index, UBool exec,
|
|||
}
|
||||
break;
|
||||
case 1:
|
||||
name="uchartriematches";
|
||||
name="ucharstriematches";
|
||||
if(exec) {
|
||||
return new UCharTrieDictMatches(*this);
|
||||
return new UCharsTrieDictMatches(*this);
|
||||
}
|
||||
break;
|
||||
case 2:
|
||||
name="uchartriecontains";
|
||||
name="ucharstriecontains";
|
||||
if(exec) {
|
||||
return new UCharTrieDictContains(*this);
|
||||
return new UCharsTrieDictContains(*this);
|
||||
}
|
||||
break;
|
||||
case 3:
|
||||
name="bytetriematches";
|
||||
name="bytestriematches";
|
||||
if(exec) {
|
||||
return new ByteTrieDictMatches(*this);
|
||||
return new BytesTrieDictMatches(*this);
|
||||
}
|
||||
break;
|
||||
case 4:
|
||||
name="bytetriecontains";
|
||||
name="bytestriecontains";
|
||||
if(exec) {
|
||||
return new ByteTrieDictContains(*this);
|
||||
return new BytesTrieDictContains(*this);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
|
@ -720,8 +720,8 @@ UPerfFunction *DictionaryTriePerfTest::runIndexedTest(int32_t index, UBool exec,
|
|||
}
|
||||
} else {
|
||||
if(index==0 && exec) {
|
||||
puts("Running ByteTrie perf tests on the .dat package file from the --sourcedir.\n"
|
||||
"For UCharTrie perf tests on a dictionary text file, specify the -f or --file-name.\n");
|
||||
puts("Running BytesTrie perf tests on the .dat package file from the --sourcedir.\n"
|
||||
"For UCharsTrie perf tests on a dictionary text file, specify the -f or --file-name.\n");
|
||||
}
|
||||
switch(index) {
|
||||
case 0:
|
||||
|
@ -737,9 +737,9 @@ UPerfFunction *DictionaryTriePerfTest::runIndexedTest(int32_t index, UBool exec,
|
|||
}
|
||||
break;
|
||||
case 2:
|
||||
name="bytetrie";
|
||||
name="bytestrie";
|
||||
if(exec) {
|
||||
return new ByteTriePackageLookup(*this);
|
||||
return new BytesTriePackageLookup(*this);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
#******************************************************************************
|
||||
#
|
||||
# Copyright (C) 1999-2010, International Business Machines
|
||||
# Copyright (C) 1999-2011, International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
#
|
||||
#******************************************************************************
|
||||
|
@ -52,8 +52,8 @@ LDFLAGS += $(LDFLAGSICUTOOLUTIL)
|
|||
LIBS = $(LIBICUI18N) $(LIBICUUC) $(DEFAULT_LIBS)
|
||||
|
||||
OBJECTS = filestrm.o package.o pkgitems.o swapimpl.o toolutil.o unewdata.o \
|
||||
dicttriebuilder.o bytetriebuilder.o bytetrieiterator.o \
|
||||
uchartrie.o uchartriebuilder.o uchartrieiterator.o \
|
||||
stringtriebuilder.o bytestriebuilder.o bytestrieiterator.o \
|
||||
ucharstrie.o ucharstriebuilder.o ucharstrieiterator.o \
|
||||
denseranges.o \
|
||||
ucm.o ucmstate.o uoptions.o uparse.o \
|
||||
ucbuf.o xmlparser.o writesrc.o \
|
||||
|
|
|
@ -3,21 +3,19 @@
|
|||
* Copyright (C) 2010-2011, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*******************************************************************************
|
||||
* file name: bytetriebuilder.cpp
|
||||
* file name: bytestriebuilder.cpp
|
||||
* encoding: US-ASCII
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 2010sep25
|
||||
* created by: Markus W. Scherer
|
||||
*
|
||||
* Builder class for ByteTrie dictionary trie.
|
||||
*/
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/stringpiece.h"
|
||||
#include "bytetrie.h"
|
||||
#include "bytetriebuilder.h"
|
||||
#include "bytestrie.h"
|
||||
#include "bytestriebuilder.h"
|
||||
#include "charstr.h"
|
||||
#include "cmemory.h"
|
||||
#include "uarrsort.h"
|
||||
|
@ -26,11 +24,11 @@ U_NAMESPACE_BEGIN
|
|||
|
||||
/*
|
||||
* Note: This builder implementation stores (bytes, value) pairs with full copies
|
||||
* of the byte sequences, until the ByteTrie is built.
|
||||
* of the byte sequences, until the BytesTrie is built.
|
||||
* It might(!) take less memory if we collected the data in a temporary, dynamic trie.
|
||||
*/
|
||||
|
||||
class ByteTrieElement : public UMemory {
|
||||
class BytesTrieElement : public UMemory {
|
||||
public:
|
||||
// Use compiler's default constructor, initializes nothing.
|
||||
|
||||
|
@ -62,7 +60,7 @@ public:
|
|||
|
||||
int32_t getValue() const { return value; }
|
||||
|
||||
int32_t compareStringTo(const ByteTrieElement &o, const CharString &strings) const;
|
||||
int32_t compareStringTo(const BytesTrieElement &o, const CharString &strings) const;
|
||||
|
||||
private:
|
||||
const char *data(const CharString &strings) const {
|
||||
|
@ -85,8 +83,8 @@ private:
|
|||
};
|
||||
|
||||
void
|
||||
ByteTrieElement::setTo(const StringPiece &s, int32_t val,
|
||||
CharString &strings, UErrorCode &errorCode) {
|
||||
BytesTrieElement::setTo(const StringPiece &s, int32_t val,
|
||||
CharString &strings, UErrorCode &errorCode) {
|
||||
if(U_FAILURE(errorCode)) {
|
||||
return;
|
||||
}
|
||||
|
@ -108,7 +106,7 @@ ByteTrieElement::setTo(const StringPiece &s, int32_t val,
|
|||
}
|
||||
|
||||
int32_t
|
||||
ByteTrieElement::compareStringTo(const ByteTrieElement &other, const CharString &strings) const {
|
||||
BytesTrieElement::compareStringTo(const BytesTrieElement &other, const CharString &strings) const {
|
||||
// TODO: add StringPiece::compare(), see ticket #8187
|
||||
StringPiece thisString=getString(strings);
|
||||
StringPiece otherString=other.getString(strings);
|
||||
|
@ -123,13 +121,13 @@ ByteTrieElement::compareStringTo(const ByteTrieElement &other, const CharString
|
|||
return diff!=0 ? diff : lengthDiff;
|
||||
}
|
||||
|
||||
ByteTrieBuilder::~ByteTrieBuilder() {
|
||||
BytesTrieBuilder::~BytesTrieBuilder() {
|
||||
delete[] elements;
|
||||
uprv_free(bytes);
|
||||
}
|
||||
|
||||
ByteTrieBuilder &
|
||||
ByteTrieBuilder::add(const StringPiece &s, int32_t value, UErrorCode &errorCode) {
|
||||
BytesTrieBuilder &
|
||||
BytesTrieBuilder::add(const StringPiece &s, int32_t value, UErrorCode &errorCode) {
|
||||
if(U_FAILURE(errorCode)) {
|
||||
return *this;
|
||||
}
|
||||
|
@ -146,12 +144,12 @@ ByteTrieBuilder::add(const StringPiece &s, int32_t value, UErrorCode &errorCode)
|
|||
} else {
|
||||
newCapacity=4*elementsCapacity;
|
||||
}
|
||||
ByteTrieElement *newElements=new ByteTrieElement[newCapacity];
|
||||
BytesTrieElement *newElements=new BytesTrieElement[newCapacity];
|
||||
if(newElements==NULL) {
|
||||
errorCode=U_MEMORY_ALLOCATION_ERROR;
|
||||
}
|
||||
if(elementsLength>0) {
|
||||
uprv_memcpy(newElements, elements, elementsLength*sizeof(ByteTrieElement));
|
||||
uprv_memcpy(newElements, elements, elementsLength*sizeof(BytesTrieElement));
|
||||
}
|
||||
delete[] elements;
|
||||
elements=newElements;
|
||||
|
@ -166,15 +164,15 @@ U_CDECL_BEGIN
|
|||
static int32_t U_CALLCONV
|
||||
compareElementStrings(const void *context, const void *left, const void *right) {
|
||||
const CharString *strings=reinterpret_cast<const CharString *>(context);
|
||||
const ByteTrieElement *leftElement=reinterpret_cast<const ByteTrieElement *>(left);
|
||||
const ByteTrieElement *rightElement=reinterpret_cast<const ByteTrieElement *>(right);
|
||||
const BytesTrieElement *leftElement=reinterpret_cast<const BytesTrieElement *>(left);
|
||||
const BytesTrieElement *rightElement=reinterpret_cast<const BytesTrieElement *>(right);
|
||||
return leftElement->compareStringTo(*rightElement, *strings);
|
||||
}
|
||||
|
||||
U_CDECL_END
|
||||
|
||||
StringPiece
|
||||
ByteTrieBuilder::build(UDictTrieBuildOption buildOption, UErrorCode &errorCode) {
|
||||
BytesTrieBuilder::build(UStringTrieBuildOption buildOption, UErrorCode &errorCode) {
|
||||
StringPiece result;
|
||||
if(U_FAILURE(errorCode)) {
|
||||
return result;
|
||||
|
@ -188,7 +186,7 @@ ByteTrieBuilder::build(UDictTrieBuildOption buildOption, UErrorCode &errorCode)
|
|||
errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
|
||||
return result;
|
||||
}
|
||||
uprv_sortArray(elements, elementsLength, (int32_t)sizeof(ByteTrieElement),
|
||||
uprv_sortArray(elements, elementsLength, (int32_t)sizeof(BytesTrieElement),
|
||||
compareElementStrings, &strings,
|
||||
FALSE, // need not be a stable sort
|
||||
&errorCode);
|
||||
|
@ -214,7 +212,7 @@ ByteTrieBuilder::build(UDictTrieBuildOption buildOption, UErrorCode &errorCode)
|
|||
errorCode=U_MEMORY_ALLOCATION_ERROR;
|
||||
return result;
|
||||
}
|
||||
DictTrieBuilder::build(buildOption, elementsLength, errorCode);
|
||||
StringTrieBuilder::build(buildOption, elementsLength, errorCode);
|
||||
if(bytes==NULL) {
|
||||
errorCode=U_MEMORY_ALLOCATION_ERROR;
|
||||
} else {
|
||||
|
@ -224,24 +222,24 @@ ByteTrieBuilder::build(UDictTrieBuildOption buildOption, UErrorCode &errorCode)
|
|||
}
|
||||
|
||||
int32_t
|
||||
ByteTrieBuilder::getElementStringLength(int32_t i) const {
|
||||
BytesTrieBuilder::getElementStringLength(int32_t i) const {
|
||||
return elements[i].getStringLength(strings);
|
||||
}
|
||||
|
||||
UChar
|
||||
ByteTrieBuilder::getElementUnit(int32_t i, int32_t byteIndex) const {
|
||||
BytesTrieBuilder::getElementUnit(int32_t i, int32_t byteIndex) const {
|
||||
return (uint8_t)elements[i].charAt(byteIndex, strings);
|
||||
}
|
||||
|
||||
int32_t
|
||||
ByteTrieBuilder::getElementValue(int32_t i) const {
|
||||
BytesTrieBuilder::getElementValue(int32_t i) const {
|
||||
return elements[i].getValue();
|
||||
}
|
||||
|
||||
int32_t
|
||||
ByteTrieBuilder::getLimitOfLinearMatch(int32_t first, int32_t last, int32_t byteIndex) const {
|
||||
const ByteTrieElement &firstElement=elements[first];
|
||||
const ByteTrieElement &lastElement=elements[last];
|
||||
BytesTrieBuilder::getLimitOfLinearMatch(int32_t first, int32_t last, int32_t byteIndex) const {
|
||||
const BytesTrieElement &firstElement=elements[first];
|
||||
const BytesTrieElement &lastElement=elements[last];
|
||||
int32_t minStringLength=firstElement.getStringLength(strings);
|
||||
while(++byteIndex<minStringLength &&
|
||||
firstElement.charAt(byteIndex, strings)==
|
||||
|
@ -250,7 +248,7 @@ ByteTrieBuilder::getLimitOfLinearMatch(int32_t first, int32_t last, int32_t byte
|
|||
}
|
||||
|
||||
int32_t
|
||||
ByteTrieBuilder::countElementUnits(int32_t start, int32_t limit, int32_t byteIndex) const {
|
||||
BytesTrieBuilder::countElementUnits(int32_t start, int32_t limit, int32_t byteIndex) const {
|
||||
int32_t length=0; // Number of different units at unitIndex.
|
||||
int32_t i=start;
|
||||
do {
|
||||
|
@ -264,7 +262,7 @@ ByteTrieBuilder::countElementUnits(int32_t start, int32_t limit, int32_t byteInd
|
|||
}
|
||||
|
||||
int32_t
|
||||
ByteTrieBuilder::skipElementsBySomeUnits(int32_t i, int32_t byteIndex, int32_t count) const {
|
||||
BytesTrieBuilder::skipElementsBySomeUnits(int32_t i, int32_t byteIndex, int32_t count) const {
|
||||
do {
|
||||
char byte=elements[i++].charAt(byteIndex, strings);
|
||||
while(byte==elements[i].charAt(byteIndex, strings)) {
|
||||
|
@ -275,7 +273,7 @@ ByteTrieBuilder::skipElementsBySomeUnits(int32_t i, int32_t byteIndex, int32_t c
|
|||
}
|
||||
|
||||
int32_t
|
||||
ByteTrieBuilder::indexOfElementWithNextUnit(int32_t i, int32_t byteIndex, UChar byte) const {
|
||||
BytesTrieBuilder::indexOfElementWithNextUnit(int32_t i, int32_t byteIndex, UChar byte) const {
|
||||
char b=(char)byte;
|
||||
while(b==elements[i].charAt(byteIndex, strings)) {
|
||||
++i;
|
||||
|
@ -283,13 +281,13 @@ ByteTrieBuilder::indexOfElementWithNextUnit(int32_t i, int32_t byteIndex, UChar
|
|||
return i;
|
||||
}
|
||||
|
||||
ByteTrieBuilder::BTLinearMatchNode::BTLinearMatchNode(const char *bytes, int32_t len, Node *nextNode)
|
||||
BytesTrieBuilder::BTLinearMatchNode::BTLinearMatchNode(const char *bytes, int32_t len, Node *nextNode)
|
||||
: LinearMatchNode(len, nextNode), s(bytes) {
|
||||
hash=hash*37+uhash_hashCharsN(bytes, len);
|
||||
}
|
||||
|
||||
UBool
|
||||
ByteTrieBuilder::BTLinearMatchNode::operator==(const Node &other) const {
|
||||
BytesTrieBuilder::BTLinearMatchNode::operator==(const Node &other) const {
|
||||
if(this==&other) {
|
||||
return TRUE;
|
||||
}
|
||||
|
@ -301,16 +299,16 @@ ByteTrieBuilder::BTLinearMatchNode::operator==(const Node &other) const {
|
|||
}
|
||||
|
||||
void
|
||||
ByteTrieBuilder::BTLinearMatchNode::write(DictTrieBuilder &builder) {
|
||||
ByteTrieBuilder &b=(ByteTrieBuilder &)builder;
|
||||
BytesTrieBuilder::BTLinearMatchNode::write(StringTrieBuilder &builder) {
|
||||
BytesTrieBuilder &b=(BytesTrieBuilder &)builder;
|
||||
next->write(builder);
|
||||
b.write(s, length);
|
||||
offset=b.write(b.getMinLinearMatch()+length-1);
|
||||
}
|
||||
|
||||
DictTrieBuilder::Node *
|
||||
ByteTrieBuilder::createLinearMatchNode(int32_t i, int32_t byteIndex, int32_t length,
|
||||
Node *nextNode) const {
|
||||
StringTrieBuilder::Node *
|
||||
BytesTrieBuilder::createLinearMatchNode(int32_t i, int32_t byteIndex, int32_t length,
|
||||
Node *nextNode) const {
|
||||
return new BTLinearMatchNode(
|
||||
elements[i].getString(strings).data()+byteIndex,
|
||||
length,
|
||||
|
@ -318,7 +316,7 @@ ByteTrieBuilder::createLinearMatchNode(int32_t i, int32_t byteIndex, int32_t len
|
|||
}
|
||||
|
||||
UBool
|
||||
ByteTrieBuilder::ensureCapacity(int32_t length) {
|
||||
BytesTrieBuilder::ensureCapacity(int32_t length) {
|
||||
if(bytes==NULL) {
|
||||
return FALSE; // previous memory allocation had failed
|
||||
}
|
||||
|
@ -344,7 +342,7 @@ ByteTrieBuilder::ensureCapacity(int32_t length) {
|
|||
}
|
||||
|
||||
int32_t
|
||||
ByteTrieBuilder::write(int32_t byte) {
|
||||
BytesTrieBuilder::write(int32_t byte) {
|
||||
int32_t newLength=bytesLength+1;
|
||||
if(ensureCapacity(newLength)) {
|
||||
bytesLength=newLength;
|
||||
|
@ -354,7 +352,7 @@ ByteTrieBuilder::write(int32_t byte) {
|
|||
}
|
||||
|
||||
int32_t
|
||||
ByteTrieBuilder::write(const char *b, int32_t length) {
|
||||
BytesTrieBuilder::write(const char *b, int32_t length) {
|
||||
int32_t newLength=bytesLength+length;
|
||||
if(ensureCapacity(newLength)) {
|
||||
bytesLength=newLength;
|
||||
|
@ -364,31 +362,31 @@ ByteTrieBuilder::write(const char *b, int32_t length) {
|
|||
}
|
||||
|
||||
int32_t
|
||||
ByteTrieBuilder::writeElementUnits(int32_t i, int32_t byteIndex, int32_t length) {
|
||||
BytesTrieBuilder::writeElementUnits(int32_t i, int32_t byteIndex, int32_t length) {
|
||||
return write(elements[i].getString(strings).data()+byteIndex, length);
|
||||
}
|
||||
|
||||
int32_t
|
||||
ByteTrieBuilder::writeValueAndFinal(int32_t i, UBool final) {
|
||||
BytesTrieBuilder::writeValueAndFinal(int32_t i, UBool final) {
|
||||
char intBytes[5];
|
||||
int32_t length=1;
|
||||
if(i<0 || i>0xffffff) {
|
||||
intBytes[0]=(char)ByteTrie::kFiveByteValueLead;
|
||||
intBytes[0]=(char)BytesTrie::kFiveByteValueLead;
|
||||
intBytes[1]=(char)(i>>24);
|
||||
intBytes[2]=(char)(i>>16);
|
||||
intBytes[3]=(char)(i>>8);
|
||||
intBytes[4]=(char)i;
|
||||
length=5;
|
||||
} else if(i<=ByteTrie::kMaxOneByteValue) {
|
||||
intBytes[0]=(char)(ByteTrie::kMinOneByteValueLead+i);
|
||||
} else if(i<=BytesTrie::kMaxOneByteValue) {
|
||||
intBytes[0]=(char)(BytesTrie::kMinOneByteValueLead+i);
|
||||
} else {
|
||||
if(i<=ByteTrie::kMaxTwoByteValue) {
|
||||
intBytes[0]=(char)(ByteTrie::kMinTwoByteValueLead+(i>>8));
|
||||
if(i<=BytesTrie::kMaxTwoByteValue) {
|
||||
intBytes[0]=(char)(BytesTrie::kMinTwoByteValueLead+(i>>8));
|
||||
} else {
|
||||
if(i<=ByteTrie::kMaxThreeByteValue) {
|
||||
intBytes[0]=(char)(ByteTrie::kMinThreeByteValueLead+(i>>16));
|
||||
if(i<=BytesTrie::kMaxThreeByteValue) {
|
||||
intBytes[0]=(char)(BytesTrie::kMinThreeByteValueLead+(i>>16));
|
||||
} else {
|
||||
intBytes[0]=(char)ByteTrie::kFourByteValueLead;
|
||||
intBytes[0]=(char)BytesTrie::kFourByteValueLead;
|
||||
intBytes[1]=(char)(i>>16);
|
||||
length=2;
|
||||
}
|
||||
|
@ -401,7 +399,7 @@ ByteTrieBuilder::writeValueAndFinal(int32_t i, UBool final) {
|
|||
}
|
||||
|
||||
int32_t
|
||||
ByteTrieBuilder::writeValueAndType(UBool hasValue, int32_t value, int32_t node) {
|
||||
BytesTrieBuilder::writeValueAndType(UBool hasValue, int32_t value, int32_t node) {
|
||||
int32_t offset=write(node);
|
||||
if(hasValue) {
|
||||
offset=writeValueAndFinal(value, FALSE);
|
||||
|
@ -410,26 +408,26 @@ ByteTrieBuilder::writeValueAndType(UBool hasValue, int32_t value, int32_t node)
|
|||
}
|
||||
|
||||
int32_t
|
||||
ByteTrieBuilder::writeDeltaTo(int32_t jumpTarget) {
|
||||
BytesTrieBuilder::writeDeltaTo(int32_t jumpTarget) {
|
||||
int32_t i=bytesLength-jumpTarget;
|
||||
char intBytes[5];
|
||||
int32_t length;
|
||||
U_ASSERT(i>=0);
|
||||
if(i<=ByteTrie::kMaxOneByteDelta) {
|
||||
if(i<=BytesTrie::kMaxOneByteDelta) {
|
||||
length=0;
|
||||
} else if(i<=ByteTrie::kMaxTwoByteDelta) {
|
||||
intBytes[0]=(char)(ByteTrie::kMinTwoByteDeltaLead+(i>>8));
|
||||
} else if(i<=BytesTrie::kMaxTwoByteDelta) {
|
||||
intBytes[0]=(char)(BytesTrie::kMinTwoByteDeltaLead+(i>>8));
|
||||
length=1;
|
||||
} else {
|
||||
if(i<=ByteTrie::kMaxThreeByteDelta) {
|
||||
intBytes[0]=(char)(ByteTrie::kMinThreeByteDeltaLead+(i>>16));
|
||||
if(i<=BytesTrie::kMaxThreeByteDelta) {
|
||||
intBytes[0]=(char)(BytesTrie::kMinThreeByteDeltaLead+(i>>16));
|
||||
length=2;
|
||||
} else {
|
||||
if(i<=0xffffff) {
|
||||
intBytes[0]=(char)ByteTrie::kFourByteDeltaLead;
|
||||
intBytes[0]=(char)BytesTrie::kFourByteDeltaLead;
|
||||
length=3;
|
||||
} else {
|
||||
intBytes[0]=(char)ByteTrie::kFiveByteDeltaLead;
|
||||
intBytes[0]=(char)BytesTrie::kFiveByteDeltaLead;
|
||||
intBytes[1]=(char)(i>>24);
|
||||
length=4;
|
||||
}
|
|
@ -3,42 +3,43 @@
|
|||
* Copyright (C) 2010-2011, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*******************************************************************************
|
||||
* file name: bytetriebuilder.h
|
||||
* file name: bytestriebuilder.h
|
||||
* encoding: US-ASCII
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 2010sep25
|
||||
* created by: Markus W. Scherer
|
||||
*
|
||||
* Builder class for ByteTrie dictionary trie.
|
||||
*/
|
||||
|
||||
#ifndef __BYTETRIEBUILDER_H__
|
||||
#define __BYTETRIEBUILDER_H__
|
||||
#ifndef __BYTESTRIEBUILDER_H__
|
||||
#define __BYTESTRIEBUILDER_H__
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/stringpiece.h"
|
||||
#include "bytetrie.h"
|
||||
#include "bytestrie.h"
|
||||
#include "charstr.h"
|
||||
#include "dicttriebuilder.h"
|
||||
#include "stringtriebuilder.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
class ByteTrieElement;
|
||||
class BytesTrieElement;
|
||||
|
||||
class U_TOOLUTIL_API ByteTrieBuilder : public DictTrieBuilder {
|
||||
/**
|
||||
* Builder class for BytesTrie.
|
||||
*/
|
||||
class U_TOOLUTIL_API BytesTrieBuilder : public StringTrieBuilder {
|
||||
public:
|
||||
ByteTrieBuilder()
|
||||
BytesTrieBuilder()
|
||||
: elements(NULL), elementsCapacity(0), elementsLength(0),
|
||||
bytes(NULL), bytesCapacity(0), bytesLength(0) {}
|
||||
virtual ~ByteTrieBuilder();
|
||||
virtual ~BytesTrieBuilder();
|
||||
|
||||
ByteTrieBuilder &add(const StringPiece &s, int32_t value, UErrorCode &errorCode);
|
||||
BytesTrieBuilder &add(const StringPiece &s, int32_t value, UErrorCode &errorCode);
|
||||
|
||||
StringPiece build(UDictTrieBuildOption buildOption, UErrorCode &errorCode);
|
||||
StringPiece build(UStringTrieBuildOption buildOption, UErrorCode &errorCode);
|
||||
|
||||
ByteTrieBuilder &clear() {
|
||||
BytesTrieBuilder &clear() {
|
||||
strings.clear();
|
||||
elementsLength=0;
|
||||
bytesLength=0;
|
||||
|
@ -58,15 +59,15 @@ private:
|
|||
|
||||
virtual UBool matchNodesCanHaveValues() const { return FALSE; }
|
||||
|
||||
virtual int32_t getMaxBranchLinearSubNodeLength() const { return ByteTrie::kMaxBranchLinearSubNodeLength; }
|
||||
virtual int32_t getMinLinearMatch() const { return ByteTrie::kMinLinearMatch; }
|
||||
virtual int32_t getMaxLinearMatchLength() const { return ByteTrie::kMaxLinearMatchLength; }
|
||||
virtual int32_t getMaxBranchLinearSubNodeLength() const { return BytesTrie::kMaxBranchLinearSubNodeLength; }
|
||||
virtual int32_t getMinLinearMatch() const { return BytesTrie::kMinLinearMatch; }
|
||||
virtual int32_t getMaxLinearMatchLength() const { return BytesTrie::kMaxLinearMatchLength; }
|
||||
|
||||
class BTLinearMatchNode : public LinearMatchNode {
|
||||
public:
|
||||
BTLinearMatchNode(const char *units, int32_t len, Node *nextNode);
|
||||
virtual UBool operator==(const Node &other) const;
|
||||
virtual void write(DictTrieBuilder &builder);
|
||||
virtual void write(StringTrieBuilder &builder);
|
||||
private:
|
||||
const char *s;
|
||||
};
|
||||
|
@ -83,7 +84,7 @@ private:
|
|||
virtual int32_t writeDeltaTo(int32_t jumpTarget);
|
||||
|
||||
CharString strings;
|
||||
ByteTrieElement *elements;
|
||||
BytesTrieElement *elements;
|
||||
int32_t elementsCapacity;
|
||||
int32_t elementsLength;
|
||||
|
||||
|
@ -96,4 +97,4 @@ private:
|
|||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif // __BYTETRIEBUILDER_H__
|
||||
#endif // __BYTESTRIEBUILDER_H__
|
|
@ -1,9 +1,9 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2010, International Business Machines
|
||||
* Copyright (C) 2010-2011, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*******************************************************************************
|
||||
* file name: bytetrieiterator.cpp
|
||||
* file name: bytestrieiterator.cpp
|
||||
* encoding: US-ASCII
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
|
@ -14,22 +14,22 @@
|
|||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/stringpiece.h"
|
||||
#include "bytetrie.h"
|
||||
#include "bytetrieiterator.h"
|
||||
#include "bytestrie.h"
|
||||
#include "bytestrieiterator.h"
|
||||
#include "charstr.h"
|
||||
#include "uvectr32.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
ByteTrieIterator::ByteTrieIterator(const void *trieBytes, int32_t maxStringLength,
|
||||
UErrorCode &errorCode)
|
||||
BytesTrieIterator::BytesTrieIterator(const void *trieBytes, int32_t maxStringLength,
|
||||
UErrorCode &errorCode)
|
||||
: bytes_(reinterpret_cast<const uint8_t *>(trieBytes)),
|
||||
pos_(bytes_), initialPos_(bytes_),
|
||||
remainingMatchLength_(-1), initialRemainingMatchLength_(-1),
|
||||
maxLength_(maxStringLength), value_(0), stack_(errorCode) {}
|
||||
|
||||
ByteTrieIterator::ByteTrieIterator(const ByteTrie &trie, int32_t maxStringLength,
|
||||
UErrorCode &errorCode)
|
||||
BytesTrieIterator::BytesTrieIterator(const BytesTrie &trie, int32_t maxStringLength,
|
||||
UErrorCode &errorCode)
|
||||
: bytes_(trie.bytes_), pos_(trie.pos_), initialPos_(trie.pos_),
|
||||
remainingMatchLength_(trie.remainingMatchLength_),
|
||||
initialRemainingMatchLength_(trie.remainingMatchLength_),
|
||||
|
@ -47,7 +47,7 @@ ByteTrieIterator::ByteTrieIterator(const ByteTrie &trie, int32_t maxStringLength
|
|||
}
|
||||
}
|
||||
|
||||
ByteTrieIterator &ByteTrieIterator::reset() {
|
||||
BytesTrieIterator &BytesTrieIterator::reset() {
|
||||
pos_=initialPos_;
|
||||
remainingMatchLength_=initialRemainingMatchLength_;
|
||||
int32_t length=remainingMatchLength_+1; // Remaining match length.
|
||||
|
@ -62,7 +62,7 @@ ByteTrieIterator &ByteTrieIterator::reset() {
|
|||
}
|
||||
|
||||
UBool
|
||||
ByteTrieIterator::next(UErrorCode &errorCode) {
|
||||
BytesTrieIterator::next(UErrorCode &errorCode) {
|
||||
if(U_FAILURE(errorCode)) {
|
||||
return FALSE;
|
||||
}
|
||||
|
@ -95,14 +95,14 @@ ByteTrieIterator::next(UErrorCode &errorCode) {
|
|||
}
|
||||
for(;;) {
|
||||
int32_t node=*pos++;
|
||||
if(node>=ByteTrie::kMinValueLead) {
|
||||
if(node>=BytesTrie::kMinValueLead) {
|
||||
// Deliver value for the byte sequence so far.
|
||||
UBool isFinal=(UBool)(node&ByteTrie::kValueIsFinal);
|
||||
value_=ByteTrie::readValue(pos, node>>1);
|
||||
UBool isFinal=(UBool)(node&BytesTrie::kValueIsFinal);
|
||||
value_=BytesTrie::readValue(pos, node>>1);
|
||||
if(isFinal || (maxLength_>0 && str_.length()==maxLength_)) {
|
||||
pos_=NULL;
|
||||
} else {
|
||||
pos_=ByteTrie::skipValue(pos, node);
|
||||
pos_=BytesTrie::skipValue(pos, node);
|
||||
}
|
||||
sp_.set(str_.data(), str_.length());
|
||||
return TRUE;
|
||||
|
@ -110,7 +110,7 @@ ByteTrieIterator::next(UErrorCode &errorCode) {
|
|||
if(maxLength_>0 && str_.length()==maxLength_) {
|
||||
return truncateAndStop();
|
||||
}
|
||||
if(node<ByteTrie::kMinLinearMatch) {
|
||||
if(node<BytesTrie::kMinLinearMatch) {
|
||||
if(node==0) {
|
||||
node=*pos++;
|
||||
}
|
||||
|
@ -120,7 +120,7 @@ ByteTrieIterator::next(UErrorCode &errorCode) {
|
|||
}
|
||||
} else {
|
||||
// Linear-match node, append length bytes to str_.
|
||||
int32_t length=node-ByteTrie::kMinLinearMatch+1;
|
||||
int32_t length=node-BytesTrie::kMinLinearMatch+1;
|
||||
if(maxLength_>0 && str_.length()+length>maxLength_) {
|
||||
str_.append(reinterpret_cast<const char *>(pos),
|
||||
maxLength_-str_.length(), errorCode);
|
||||
|
@ -134,23 +134,23 @@ ByteTrieIterator::next(UErrorCode &errorCode) {
|
|||
|
||||
// Branch node, needs to take the first outbound edge and push state for the rest.
|
||||
const uint8_t *
|
||||
ByteTrieIterator::branchNext(const uint8_t *pos, int32_t length, UErrorCode &errorCode) {
|
||||
while(length>ByteTrie::kMaxBranchLinearSubNodeLength) {
|
||||
BytesTrieIterator::branchNext(const uint8_t *pos, int32_t length, UErrorCode &errorCode) {
|
||||
while(length>BytesTrie::kMaxBranchLinearSubNodeLength) {
|
||||
++pos; // ignore the comparison byte
|
||||
// Push state for the greater-or-equal edge.
|
||||
stack_.addElement((int32_t)(ByteTrie::skipDelta(pos)-bytes_), errorCode);
|
||||
stack_.addElement((int32_t)(BytesTrie::skipDelta(pos)-bytes_), errorCode);
|
||||
stack_.addElement(((length-(length>>1))<<16)|str_.length(), errorCode);
|
||||
// Follow the less-than edge.
|
||||
length>>=1;
|
||||
pos=ByteTrie::jumpByDelta(pos);
|
||||
pos=BytesTrie::jumpByDelta(pos);
|
||||
}
|
||||
// List of key-value pairs where values are either final values or jump deltas.
|
||||
// Read the first (key, value) pair.
|
||||
uint8_t trieByte=*pos++;
|
||||
int32_t node=*pos++;
|
||||
UBool isFinal=(UBool)(node&ByteTrie::kValueIsFinal);
|
||||
int32_t value=ByteTrie::readValue(pos, node>>1);
|
||||
pos=ByteTrie::skipValue(pos, node);
|
||||
UBool isFinal=(UBool)(node&BytesTrie::kValueIsFinal);
|
||||
int32_t value=BytesTrie::readValue(pos, node>>1);
|
||||
pos=BytesTrie::skipValue(pos, node);
|
||||
stack_.addElement((int32_t)(pos-bytes_), errorCode);
|
||||
stack_.addElement(((length-1)<<16)|str_.length(), errorCode);
|
||||
str_.append((char)trieByte, errorCode);
|
|
@ -1,9 +1,9 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2010, International Business Machines
|
||||
* Copyright (C) 2010-2011, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*******************************************************************************
|
||||
* file name: bytetrieiterator.h
|
||||
* file name: bytestrieiterator.h
|
||||
* encoding: US-ASCII
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
|
@ -12,32 +12,32 @@
|
|||
* created by: Markus W. Scherer
|
||||
*/
|
||||
|
||||
#ifndef __BYTETRIEITERATOR_H__
|
||||
#define __BYTETRIEITERATOR_H__
|
||||
#ifndef __BYTESTRIEITERATOR_H__
|
||||
#define __BYTESTRIEITERATOR_H__
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C++ API: ByteTrie iterator for all of its (byte sequence, value) pairs.
|
||||
* \brief C++ API: BytesTrie iterator for all of its (byte sequence, value) pairs.
|
||||
*/
|
||||
|
||||
// Needed if and when we change the .dat package index to a ByteTrie,
|
||||
// Needed if and when we change the .dat package index to a BytesTrie,
|
||||
// so that icupkg can work with an input package.
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/stringpiece.h"
|
||||
#include "bytetrie.h"
|
||||
#include "bytestrie.h"
|
||||
#include "charstr.h"
|
||||
#include "uvectr32.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
/**
|
||||
* Iterator for all of the (byte sequence, value) pairs in a ByteTrie.
|
||||
* Iterator for all of the (byte sequence, value) pairs in a BytesTrie.
|
||||
*/
|
||||
class U_TOOLUTIL_API ByteTrieIterator : public UMemory {
|
||||
class U_TOOLUTIL_API BytesTrieIterator : public UMemory {
|
||||
public:
|
||||
/**
|
||||
* Iterates from the root of a byte-serialized ByteTrie.
|
||||
* Iterates from the root of a byte-serialized BytesTrie.
|
||||
* @param trieBytes The trie bytes.
|
||||
* @param maxStringLength If 0, the iterator returns full strings/byte sequences.
|
||||
* Otherwise, the iterator returns strings with this maximum length.
|
||||
|
@ -46,10 +46,10 @@ public:
|
|||
* immediately. Check for U_FAILURE() on output or use with
|
||||
* function chaining. (See User Guide for details.)
|
||||
*/
|
||||
ByteTrieIterator(const void *trieBytes, int32_t maxStringLength, UErrorCode &errorCode);
|
||||
BytesTrieIterator(const void *trieBytes, int32_t maxStringLength, UErrorCode &errorCode);
|
||||
|
||||
/**
|
||||
* Iterates from the current state of the specified ByteTrie.
|
||||
* Iterates from the current state of the specified BytesTrie.
|
||||
* @param trie The trie whose state will be copied for iteration.
|
||||
* @param maxStringLength If 0, the iterator returns full strings/byte sequences.
|
||||
* Otherwise, the iterator returns strings with this maximum length.
|
||||
|
@ -58,12 +58,12 @@ public:
|
|||
* immediately. Check for U_FAILURE() on output or use with
|
||||
* function chaining. (See User Guide for details.)
|
||||
*/
|
||||
ByteTrieIterator(const ByteTrie &trie, int32_t maxStringLength, UErrorCode &errorCode);
|
||||
BytesTrieIterator(const BytesTrie &trie, int32_t maxStringLength, UErrorCode &errorCode);
|
||||
|
||||
/**
|
||||
* Resets this iterator to its initial state.
|
||||
*/
|
||||
ByteTrieIterator &reset();
|
||||
BytesTrieIterator &reset();
|
||||
|
||||
/**
|
||||
* Finds the next (byte sequence, value) pair if there is one.
|
||||
|
@ -113,7 +113,7 @@ private:
|
|||
|
||||
// The stack stores pairs of integers for backtracking to another
|
||||
// outbound edge of a branch node.
|
||||
// The first integer is an offset from ByteTrie.bytes.
|
||||
// The first integer is an offset from BytesTrie.bytes.
|
||||
// The second integer has the str.length() from before the node in bits 15..0,
|
||||
// and the remaining branch length in bits 24..16. (Bits 31..25 are unused.)
|
||||
// (We could store the remaining branch length minus 1 in bits 23..16 and not use bits 31..24,
|
||||
|
@ -123,4 +123,4 @@ private:
|
|||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif // __BYTETRIEITERATOR_H__
|
||||
#endif // __BYTESTRIEITERATOR_H__
|
|
@ -3,51 +3,49 @@
|
|||
* Copyright (C) 2010-2011, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*******************************************************************************
|
||||
* file name: dicttriebuilder.cpp
|
||||
* file name: stringtriebuilder.cpp
|
||||
* encoding: US-ASCII
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 2010dec24
|
||||
* created by: Markus W. Scherer
|
||||
*
|
||||
* Base class for dictionary-trie builder classes.
|
||||
*/
|
||||
|
||||
#include <typeinfo> // for 'typeid' to work
|
||||
#include "unicode/utypes.h"
|
||||
#include "dicttriebuilder.h"
|
||||
#include "stringtriebuilder.h"
|
||||
#include "uassert.h"
|
||||
#include "uhash.h"
|
||||
|
||||
U_CDECL_BEGIN
|
||||
|
||||
static int32_t U_CALLCONV
|
||||
hashDictTrieNode(const UHashTok key) {
|
||||
return U_NAMESPACE_QUALIFIER DictTrieBuilder::hashNode(key.pointer);
|
||||
hashStringTrieNode(const UHashTok key) {
|
||||
return U_NAMESPACE_QUALIFIER StringTrieBuilder::hashNode(key.pointer);
|
||||
}
|
||||
|
||||
static UBool U_CALLCONV
|
||||
equalDictTrieNodes(const UHashTok key1, const UHashTok key2) {
|
||||
return U_NAMESPACE_QUALIFIER DictTrieBuilder::equalNodes(key1.pointer, key2.pointer);
|
||||
equalStringTrieNodes(const UHashTok key1, const UHashTok key2) {
|
||||
return U_NAMESPACE_QUALIFIER StringTrieBuilder::equalNodes(key1.pointer, key2.pointer);
|
||||
}
|
||||
|
||||
U_CDECL_END
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
DictTrieBuilder::DictTrieBuilder() : nodes(NULL) {}
|
||||
StringTrieBuilder::StringTrieBuilder() : nodes(NULL) {}
|
||||
|
||||
DictTrieBuilder::~DictTrieBuilder() {
|
||||
StringTrieBuilder::~StringTrieBuilder() {
|
||||
deleteCompactBuilder();
|
||||
}
|
||||
|
||||
void
|
||||
DictTrieBuilder::createCompactBuilder(int32_t sizeGuess, UErrorCode &errorCode) {
|
||||
StringTrieBuilder::createCompactBuilder(int32_t sizeGuess, UErrorCode &errorCode) {
|
||||
if(U_FAILURE(errorCode)) {
|
||||
return;
|
||||
}
|
||||
nodes=uhash_openSize(hashDictTrieNode, equalDictTrieNodes, NULL,
|
||||
nodes=uhash_openSize(hashStringTrieNode, equalStringTrieNodes, NULL,
|
||||
sizeGuess, &errorCode);
|
||||
if(U_SUCCESS(errorCode) && nodes==NULL) {
|
||||
errorCode=U_MEMORY_ALLOCATION_ERROR;
|
||||
|
@ -58,17 +56,17 @@ DictTrieBuilder::createCompactBuilder(int32_t sizeGuess, UErrorCode &errorCode)
|
|||
}
|
||||
|
||||
void
|
||||
DictTrieBuilder::deleteCompactBuilder() {
|
||||
StringTrieBuilder::deleteCompactBuilder() {
|
||||
uhash_close(nodes);
|
||||
nodes=NULL;
|
||||
}
|
||||
|
||||
void
|
||||
DictTrieBuilder::build(UDictTrieBuildOption buildOption, int32_t elementsLength,
|
||||
StringTrieBuilder::build(UStringTrieBuildOption buildOption, int32_t elementsLength,
|
||||
UErrorCode &errorCode) {
|
||||
if(buildOption==UDICTTRIE_BUILD_FAST) {
|
||||
if(buildOption==USTRINGTRIE_BUILD_FAST) {
|
||||
writeNode(0, elementsLength, 0);
|
||||
} else /* UDICTTRIE_BUILD_SMALL */ {
|
||||
} else /* USTRINGTRIE_BUILD_SMALL */ {
|
||||
createCompactBuilder(2*elementsLength, errorCode);
|
||||
Node *root=makeNode(0, elementsLength, 0, errorCode);
|
||||
if(U_SUCCESS(errorCode)) {
|
||||
|
@ -83,7 +81,7 @@ DictTrieBuilder::build(UDictTrieBuildOption buildOption, int32_t elementsLength,
|
|||
// and all strings of the [start..limit[ elements must be sorted and
|
||||
// have a common prefix of length unitIndex.
|
||||
int32_t
|
||||
DictTrieBuilder::writeNode(int32_t start, int32_t limit, int32_t unitIndex) {
|
||||
StringTrieBuilder::writeNode(int32_t start, int32_t limit, int32_t unitIndex) {
|
||||
UBool hasValue=FALSE;
|
||||
int32_t value=0;
|
||||
int32_t type;
|
||||
|
@ -131,7 +129,7 @@ DictTrieBuilder::writeNode(int32_t start, int32_t limit, int32_t unitIndex) {
|
|||
// start<limit && all strings longer than unitIndex &&
|
||||
// length different units at unitIndex
|
||||
int32_t
|
||||
DictTrieBuilder::writeBranchSubNode(int32_t start, int32_t limit, int32_t unitIndex, int32_t length) {
|
||||
StringTrieBuilder::writeBranchSubNode(int32_t start, int32_t limit, int32_t unitIndex, int32_t length) {
|
||||
UChar middleUnits[kMaxSplitBranchLevels];
|
||||
int32_t lessThan[kMaxSplitBranchLevels];
|
||||
int32_t ltLength=0;
|
||||
|
@ -203,8 +201,8 @@ DictTrieBuilder::writeBranchSubNode(int32_t start, int32_t limit, int32_t unitIn
|
|||
// Requires start<limit,
|
||||
// and all strings of the [start..limit[ elements must be sorted and
|
||||
// have a common prefix of length unitIndex.
|
||||
DictTrieBuilder::Node *
|
||||
DictTrieBuilder::makeNode(int32_t start, int32_t limit, int32_t unitIndex, UErrorCode &errorCode) {
|
||||
StringTrieBuilder::Node *
|
||||
StringTrieBuilder::makeNode(int32_t start, int32_t limit, int32_t unitIndex, UErrorCode &errorCode) {
|
||||
if(U_FAILURE(errorCode)) {
|
||||
return NULL;
|
||||
}
|
||||
|
@ -255,8 +253,8 @@ DictTrieBuilder::makeNode(int32_t start, int32_t limit, int32_t unitIndex, UErro
|
|||
|
||||
// start<limit && all strings longer than unitIndex &&
|
||||
// length different units at unitIndex
|
||||
DictTrieBuilder::Node *
|
||||
DictTrieBuilder::makeBranchSubNode(int32_t start, int32_t limit, int32_t unitIndex,
|
||||
StringTrieBuilder::Node *
|
||||
StringTrieBuilder::makeBranchSubNode(int32_t start, int32_t limit, int32_t unitIndex,
|
||||
int32_t length, UErrorCode &errorCode) {
|
||||
if(U_FAILURE(errorCode)) {
|
||||
return NULL;
|
||||
|
@ -314,8 +312,8 @@ DictTrieBuilder::makeBranchSubNode(int32_t start, int32_t limit, int32_t unitInd
|
|||
return node;
|
||||
}
|
||||
|
||||
DictTrieBuilder::Node *
|
||||
DictTrieBuilder::registerNode(Node *newNode, UErrorCode &errorCode) {
|
||||
StringTrieBuilder::Node *
|
||||
StringTrieBuilder::registerNode(Node *newNode, UErrorCode &errorCode) {
|
||||
if(U_FAILURE(errorCode)) {
|
||||
delete newNode;
|
||||
return NULL;
|
||||
|
@ -343,8 +341,8 @@ DictTrieBuilder::registerNode(Node *newNode, UErrorCode &errorCode) {
|
|||
return newNode;
|
||||
}
|
||||
|
||||
DictTrieBuilder::Node *
|
||||
DictTrieBuilder::registerFinalValue(int32_t value, UErrorCode &errorCode) {
|
||||
StringTrieBuilder::Node *
|
||||
StringTrieBuilder::registerFinalValue(int32_t value, UErrorCode &errorCode) {
|
||||
if(U_FAILURE(errorCode)) {
|
||||
return NULL;
|
||||
}
|
||||
|
@ -373,34 +371,34 @@ DictTrieBuilder::registerFinalValue(int32_t value, UErrorCode &errorCode) {
|
|||
}
|
||||
|
||||
UBool
|
||||
DictTrieBuilder::hashNode(const void *node) {
|
||||
StringTrieBuilder::hashNode(const void *node) {
|
||||
return ((const Node *)node)->hashCode();
|
||||
}
|
||||
|
||||
UBool
|
||||
DictTrieBuilder::equalNodes(const void *left, const void *right) {
|
||||
StringTrieBuilder::equalNodes(const void *left, const void *right) {
|
||||
return *(const Node *)left==*(const Node *)right;
|
||||
}
|
||||
|
||||
UOBJECT_DEFINE_NO_RTTI_IMPLEMENTATION(DictTrieBuilder)
|
||||
UOBJECT_DEFINE_NO_RTTI_IMPLEMENTATION(StringTrieBuilder)
|
||||
|
||||
UBool
|
||||
DictTrieBuilder::Node::operator==(const Node &other) const {
|
||||
StringTrieBuilder::Node::operator==(const Node &other) const {
|
||||
return this==&other || (typeid(*this)==typeid(other) && hash==other.hash);
|
||||
}
|
||||
|
||||
int32_t
|
||||
DictTrieBuilder::Node::markRightEdgesFirst(int32_t edgeNumber) {
|
||||
StringTrieBuilder::Node::markRightEdgesFirst(int32_t edgeNumber) {
|
||||
if(offset==0) {
|
||||
offset=edgeNumber;
|
||||
}
|
||||
return edgeNumber;
|
||||
}
|
||||
|
||||
UOBJECT_DEFINE_NO_RTTI_IMPLEMENTATION(DictTrieBuilder::Node)
|
||||
UOBJECT_DEFINE_NO_RTTI_IMPLEMENTATION(StringTrieBuilder::Node)
|
||||
|
||||
UBool
|
||||
DictTrieBuilder::FinalValueNode::operator==(const Node &other) const {
|
||||
StringTrieBuilder::FinalValueNode::operator==(const Node &other) const {
|
||||
if(this==&other) {
|
||||
return TRUE;
|
||||
}
|
||||
|
@ -412,12 +410,12 @@ DictTrieBuilder::FinalValueNode::operator==(const Node &other) const {
|
|||
}
|
||||
|
||||
void
|
||||
DictTrieBuilder::FinalValueNode::write(DictTrieBuilder &builder) {
|
||||
StringTrieBuilder::FinalValueNode::write(StringTrieBuilder &builder) {
|
||||
offset=builder.writeValueAndFinal(value, TRUE);
|
||||
}
|
||||
|
||||
UBool
|
||||
DictTrieBuilder::ValueNode::operator==(const Node &other) const {
|
||||
StringTrieBuilder::ValueNode::operator==(const Node &other) const {
|
||||
if(this==&other) {
|
||||
return TRUE;
|
||||
}
|
||||
|
@ -429,7 +427,7 @@ DictTrieBuilder::ValueNode::operator==(const Node &other) const {
|
|||
}
|
||||
|
||||
UBool
|
||||
DictTrieBuilder::IntermediateValueNode::operator==(const Node &other) const {
|
||||
StringTrieBuilder::IntermediateValueNode::operator==(const Node &other) const {
|
||||
if(this==&other) {
|
||||
return TRUE;
|
||||
}
|
||||
|
@ -441,7 +439,7 @@ DictTrieBuilder::IntermediateValueNode::operator==(const Node &other) const {
|
|||
}
|
||||
|
||||
int32_t
|
||||
DictTrieBuilder::IntermediateValueNode::markRightEdgesFirst(int32_t edgeNumber) {
|
||||
StringTrieBuilder::IntermediateValueNode::markRightEdgesFirst(int32_t edgeNumber) {
|
||||
if(offset==0) {
|
||||
offset=edgeNumber=next->markRightEdgesFirst(edgeNumber);
|
||||
}
|
||||
|
@ -449,13 +447,13 @@ DictTrieBuilder::IntermediateValueNode::markRightEdgesFirst(int32_t edgeNumber)
|
|||
}
|
||||
|
||||
void
|
||||
DictTrieBuilder::IntermediateValueNode::write(DictTrieBuilder &builder) {
|
||||
StringTrieBuilder::IntermediateValueNode::write(StringTrieBuilder &builder) {
|
||||
next->write(builder);
|
||||
offset=builder.writeValueAndFinal(value, FALSE);
|
||||
}
|
||||
|
||||
UBool
|
||||
DictTrieBuilder::LinearMatchNode::operator==(const Node &other) const {
|
||||
StringTrieBuilder::LinearMatchNode::operator==(const Node &other) const {
|
||||
if(this==&other) {
|
||||
return TRUE;
|
||||
}
|
||||
|
@ -467,7 +465,7 @@ DictTrieBuilder::LinearMatchNode::operator==(const Node &other) const {
|
|||
}
|
||||
|
||||
int32_t
|
||||
DictTrieBuilder::LinearMatchNode::markRightEdgesFirst(int32_t edgeNumber) {
|
||||
StringTrieBuilder::LinearMatchNode::markRightEdgesFirst(int32_t edgeNumber) {
|
||||
if(offset==0) {
|
||||
offset=edgeNumber=next->markRightEdgesFirst(edgeNumber);
|
||||
}
|
||||
|
@ -475,7 +473,7 @@ DictTrieBuilder::LinearMatchNode::markRightEdgesFirst(int32_t edgeNumber) {
|
|||
}
|
||||
|
||||
UBool
|
||||
DictTrieBuilder::ListBranchNode::operator==(const Node &other) const {
|
||||
StringTrieBuilder::ListBranchNode::operator==(const Node &other) const {
|
||||
if(this==&other) {
|
||||
return TRUE;
|
||||
}
|
||||
|
@ -492,7 +490,7 @@ DictTrieBuilder::ListBranchNode::operator==(const Node &other) const {
|
|||
}
|
||||
|
||||
int32_t
|
||||
DictTrieBuilder::ListBranchNode::markRightEdgesFirst(int32_t edgeNumber) {
|
||||
StringTrieBuilder::ListBranchNode::markRightEdgesFirst(int32_t edgeNumber) {
|
||||
if(offset==0) {
|
||||
firstEdgeNumber=edgeNumber;
|
||||
int32_t step=0;
|
||||
|
@ -511,7 +509,7 @@ DictTrieBuilder::ListBranchNode::markRightEdgesFirst(int32_t edgeNumber) {
|
|||
}
|
||||
|
||||
void
|
||||
DictTrieBuilder::ListBranchNode::write(DictTrieBuilder &builder) {
|
||||
StringTrieBuilder::ListBranchNode::write(StringTrieBuilder &builder) {
|
||||
// Write the sub-nodes in reverse order: The jump lengths are deltas from
|
||||
// after their own positions, so if we wrote the minUnit sub-node first,
|
||||
// then its jump delta would be larger.
|
||||
|
@ -554,7 +552,7 @@ DictTrieBuilder::ListBranchNode::write(DictTrieBuilder &builder) {
|
|||
}
|
||||
|
||||
UBool
|
||||
DictTrieBuilder::SplitBranchNode::operator==(const Node &other) const {
|
||||
StringTrieBuilder::SplitBranchNode::operator==(const Node &other) const {
|
||||
if(this==&other) {
|
||||
return TRUE;
|
||||
}
|
||||
|
@ -566,7 +564,7 @@ DictTrieBuilder::SplitBranchNode::operator==(const Node &other) const {
|
|||
}
|
||||
|
||||
int32_t
|
||||
DictTrieBuilder::SplitBranchNode::markRightEdgesFirst(int32_t edgeNumber) {
|
||||
StringTrieBuilder::SplitBranchNode::markRightEdgesFirst(int32_t edgeNumber) {
|
||||
if(offset==0) {
|
||||
firstEdgeNumber=edgeNumber;
|
||||
edgeNumber=greaterOrEqual->markRightEdgesFirst(edgeNumber);
|
||||
|
@ -576,7 +574,7 @@ DictTrieBuilder::SplitBranchNode::markRightEdgesFirst(int32_t edgeNumber) {
|
|||
}
|
||||
|
||||
void
|
||||
DictTrieBuilder::SplitBranchNode::write(DictTrieBuilder &builder) {
|
||||
StringTrieBuilder::SplitBranchNode::write(StringTrieBuilder &builder) {
|
||||
// Encode the less-than branch first.
|
||||
lessThan->writeUnlessInsideRightEdge(firstEdgeNumber, greaterOrEqual->getOffset(), builder);
|
||||
// Encode the greater-or-equal branch last because we do not jump for it at all.
|
||||
|
@ -588,7 +586,7 @@ DictTrieBuilder::SplitBranchNode::write(DictTrieBuilder &builder) {
|
|||
}
|
||||
|
||||
UBool
|
||||
DictTrieBuilder::BranchHeadNode::operator==(const Node &other) const {
|
||||
StringTrieBuilder::BranchHeadNode::operator==(const Node &other) const {
|
||||
if(this==&other) {
|
||||
return TRUE;
|
||||
}
|
||||
|
@ -600,7 +598,7 @@ DictTrieBuilder::BranchHeadNode::operator==(const Node &other) const {
|
|||
}
|
||||
|
||||
int32_t
|
||||
DictTrieBuilder::BranchHeadNode::markRightEdgesFirst(int32_t edgeNumber) {
|
||||
StringTrieBuilder::BranchHeadNode::markRightEdgesFirst(int32_t edgeNumber) {
|
||||
if(offset==0) {
|
||||
offset=edgeNumber=next->markRightEdgesFirst(edgeNumber);
|
||||
}
|
||||
|
@ -608,7 +606,7 @@ DictTrieBuilder::BranchHeadNode::markRightEdgesFirst(int32_t edgeNumber) {
|
|||
}
|
||||
|
||||
void
|
||||
DictTrieBuilder::BranchHeadNode::write(DictTrieBuilder &builder) {
|
||||
StringTrieBuilder::BranchHeadNode::write(StringTrieBuilder &builder) {
|
||||
next->write(builder);
|
||||
if(length<=builder.getMinLinearMatch()) {
|
||||
offset=builder.writeValueAndType(hasValue, value, length-1);
|
|
@ -3,32 +3,33 @@
|
|||
* Copyright (C) 2010-2011, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*******************************************************************************
|
||||
* file name: dicttriebuilder.h
|
||||
* file name: stringtriebuilder.h
|
||||
* encoding: US-ASCII
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 2010dec24
|
||||
* created by: Markus W. Scherer
|
||||
*
|
||||
* Base class for dictionary-trie builder classes.
|
||||
*/
|
||||
|
||||
#ifndef __DICTTRIEBUILDER_H__
|
||||
#define __DICTTRIEBUILDER_H__
|
||||
#ifndef __STRINGTRIEBUILDER_H__
|
||||
#define __STRINGTRIEBUILDER_H__
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/uobject.h"
|
||||
#include "uhash.h"
|
||||
|
||||
enum UDictTrieBuildOption {
|
||||
UDICTTRIE_BUILD_FAST,
|
||||
UDICTTRIE_BUILD_SMALL
|
||||
enum UStringTrieBuildOption {
|
||||
USTRINGTRIE_BUILD_FAST,
|
||||
USTRINGTRIE_BUILD_SMALL
|
||||
};
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
class U_TOOLUTIL_API DictTrieBuilder : public UObject {
|
||||
/**
|
||||
* Base class for string trie builder classes.
|
||||
*/
|
||||
class U_TOOLUTIL_API StringTrieBuilder : public UObject {
|
||||
public:
|
||||
/** @internal */
|
||||
static UBool hashNode(const void *node);
|
||||
|
@ -36,13 +37,13 @@ public:
|
|||
static UBool equalNodes(const void *left, const void *right);
|
||||
|
||||
protected:
|
||||
DictTrieBuilder();
|
||||
virtual ~DictTrieBuilder();
|
||||
StringTrieBuilder();
|
||||
virtual ~StringTrieBuilder();
|
||||
|
||||
void createCompactBuilder(int32_t sizeGuess, UErrorCode &errorCode);
|
||||
void deleteCompactBuilder();
|
||||
|
||||
void build(UDictTrieBuildOption buildOption, int32_t elementsLength, UErrorCode &errorCode);
|
||||
void build(UStringTrieBuildOption buildOption, int32_t elementsLength, UErrorCode &errorCode);
|
||||
|
||||
int32_t writeNode(int32_t start, int32_t limit, int32_t byteIndex);
|
||||
int32_t writeBranchSubNode(int32_t start, int32_t limit, int32_t byteIndex, int32_t length);
|
||||
|
@ -157,10 +158,10 @@ protected:
|
|||
*/
|
||||
virtual int32_t markRightEdgesFirst(int32_t edgeNumber);
|
||||
// write() must set the offset to a positive value.
|
||||
virtual void write(DictTrieBuilder &builder) = 0;
|
||||
virtual void write(StringTrieBuilder &builder) = 0;
|
||||
// See markRightEdgesFirst.
|
||||
inline void writeUnlessInsideRightEdge(int32_t firstRight, int32_t lastRight,
|
||||
DictTrieBuilder &builder) {
|
||||
StringTrieBuilder &builder) {
|
||||
// Note: Edge numbers are negative, lastRight<=firstRight.
|
||||
// If offset>0 then this node and its sub-nodes have been written already
|
||||
// and we need not write them again.
|
||||
|
@ -189,7 +190,7 @@ protected:
|
|||
public:
|
||||
FinalValueNode(int32_t v) : Node(0x111111*37+v), value(v) {}
|
||||
virtual UBool operator==(const Node &other) const;
|
||||
virtual void write(DictTrieBuilder &builder);
|
||||
virtual void write(StringTrieBuilder &builder);
|
||||
protected:
|
||||
int32_t value;
|
||||
};
|
||||
|
@ -214,7 +215,7 @@ protected:
|
|||
: ValueNode(0x222222*37+hashCode(nextNode)), next(nextNode) { setValue(v); }
|
||||
virtual UBool operator==(const Node &other) const;
|
||||
virtual int32_t markRightEdgesFirst(int32_t edgeNumber);
|
||||
virtual void write(DictTrieBuilder &builder);
|
||||
virtual void write(StringTrieBuilder &builder);
|
||||
protected:
|
||||
Node *next;
|
||||
};
|
||||
|
@ -243,7 +244,7 @@ protected:
|
|||
ListBranchNode() : BranchNode(0x444444), length(0) {}
|
||||
virtual UBool operator==(const Node &other) const;
|
||||
virtual int32_t markRightEdgesFirst(int32_t edgeNumber);
|
||||
virtual void write(DictTrieBuilder &builder);
|
||||
virtual void write(StringTrieBuilder &builder);
|
||||
// Adds a unit with a final value.
|
||||
void add(int32_t c, int32_t value) {
|
||||
units[length]=(UChar)c;
|
||||
|
@ -275,7 +276,7 @@ protected:
|
|||
unit(middleUnit), lessThan(lessThanNode), greaterOrEqual(greaterOrEqualNode) {}
|
||||
virtual UBool operator==(const Node &other) const;
|
||||
virtual int32_t markRightEdgesFirst(int32_t edgeNumber);
|
||||
virtual void write(DictTrieBuilder &builder);
|
||||
virtual void write(StringTrieBuilder &builder);
|
||||
protected:
|
||||
UChar unit;
|
||||
Node *lessThan;
|
||||
|
@ -290,7 +291,7 @@ protected:
|
|||
length(len), next(subNode) {}
|
||||
virtual UBool operator==(const Node &other) const;
|
||||
virtual int32_t markRightEdgesFirst(int32_t edgeNumber);
|
||||
virtual void write(DictTrieBuilder &builder);
|
||||
virtual void write(StringTrieBuilder &builder);
|
||||
protected:
|
||||
int32_t length;
|
||||
Node *next; // A branch sub-node.
|
||||
|
@ -312,4 +313,4 @@ private:
|
|||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif // __DICTTRIEBUILDER_H__
|
||||
#endif // __STRINGTRIEBUILDER_H__
|
|
@ -246,10 +246,9 @@
|
|||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="bytetriebuilder.cpp" />
|
||||
<ClCompile Include="bytetrieiterator.cpp" />
|
||||
<ClCompile Include="bytestriebuilder.cpp" />
|
||||
<ClCompile Include="bytestrieiterator.cpp" />
|
||||
<ClCompile Include="denseranges.cpp" />
|
||||
<ClCompile Include="dicttriebuilder.cpp" />
|
||||
<ClCompile Include="filestrm.c" />
|
||||
<ClCompile Include="filetools.cpp" />
|
||||
<ClCompile Include="flagparser.c" />
|
||||
|
@ -268,6 +267,7 @@
|
|||
</ClCompile>
|
||||
<ClCompile Include="pkg_icu.cpp" />
|
||||
<ClCompile Include="pkgitems.cpp" />
|
||||
<ClCompile Include="stringtriebuilder.cpp" />
|
||||
<ClCompile Include="swapimpl.cpp" />
|
||||
<ClCompile Include="toolutil.cpp">
|
||||
<DisableLanguageExtensions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">false</DisableLanguageExtensions>
|
||||
|
@ -276,9 +276,9 @@
|
|||
<DisableLanguageExtensions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">false</DisableLanguageExtensions>
|
||||
</ClCompile>
|
||||
<ClCompile Include="ucbuf.c" />
|
||||
<ClCompile Include="uchartrie.cpp" />
|
||||
<ClCompile Include="uchartriebuilder.cpp" />
|
||||
<ClCompile Include="uchartrieiterator.cpp" />
|
||||
<ClCompile Include="ucharstrie.cpp" />
|
||||
<ClCompile Include="ucharstriebuilder.cpp" />
|
||||
<ClCompile Include="ucharstrieiterator.cpp" />
|
||||
<ClCompile Include="ucm.c" />
|
||||
<ClCompile Include="ucmstate.c" />
|
||||
<ClCompile Include="unewdata.c" />
|
||||
|
@ -296,10 +296,9 @@
|
|||
</ClCompile>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClInclude Include="bytetriebuilder.h" />
|
||||
<ClInclude Include="bytetrieiterator.h" />
|
||||
<ClInclude Include="bytestriebuilder.h" />
|
||||
<ClInclude Include="bytestrieiterator.h" />
|
||||
<ClInclude Include="denseranges.h" />
|
||||
<ClInclude Include="dicttriebuilder.h" />
|
||||
<ClInclude Include="filestrm.h" />
|
||||
<ClInclude Include="filetools.h" />
|
||||
<ClInclude Include="flagparser.h" />
|
||||
|
@ -309,12 +308,13 @@
|
|||
<ClInclude Include="pkg_icu.h" />
|
||||
<ClInclude Include="pkg_imp.h" />
|
||||
<ClInclude Include="platform_xopen_source_extended.h" />
|
||||
<ClInclude Include="stringtriebuilder.h" />
|
||||
<ClInclude Include="swapimpl.h" />
|
||||
<ClInclude Include="toolutil.h" />
|
||||
<ClInclude Include="ucbuf.h" />
|
||||
<ClInclude Include="uchartrie.h" />
|
||||
<ClInclude Include="uchartriebuilder.h" />
|
||||
<ClInclude Include="uchartrieiterator.h" />
|
||||
<ClInclude Include="ucharstrie.h" />
|
||||
<ClInclude Include="ucharstriebuilder.h" />
|
||||
<ClInclude Include="ucharstrieiterator.h" />
|
||||
<ClInclude Include="ucm.h" />
|
||||
<ClInclude Include="unewdata.h" />
|
||||
<ClInclude Include="uoptions.h" />
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2010, International Business Machines
|
||||
* Copyright (C) 2010-2011, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*******************************************************************************
|
||||
* file name: uchartrie.h
|
||||
* file name: ucharstrie.h
|
||||
* encoding: US-ASCII
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
|
@ -15,7 +15,7 @@
|
|||
#include "unicode/utypes.h"
|
||||
#include "unicode/uobject.h"
|
||||
#include "uassert.h"
|
||||
#include "uchartrie.h"
|
||||
#include "ucharstrie.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
|
@ -48,20 +48,20 @@ Appendable::append(const UChar *s, int32_t length) {
|
|||
|
||||
UOBJECT_DEFINE_NO_RTTI_IMPLEMENTATION(Appendable)
|
||||
|
||||
UDictTrieResult
|
||||
UCharTrie::current() const {
|
||||
UStringTrieResult
|
||||
UCharsTrie::current() const {
|
||||
const UChar *pos=pos_;
|
||||
if(pos==NULL) {
|
||||
return UDICTTRIE_NO_MATCH;
|
||||
return USTRINGTRIE_NO_MATCH;
|
||||
} else {
|
||||
int32_t node;
|
||||
return (remainingMatchLength_<0 && (node=*pos)>=kMinValueLead) ?
|
||||
valueResult(node) : UDICTTRIE_NO_VALUE;
|
||||
valueResult(node) : USTRINGTRIE_NO_VALUE;
|
||||
}
|
||||
}
|
||||
|
||||
UDictTrieResult
|
||||
UCharTrie::branchNext(const UChar *pos, int32_t length, int32_t uchar) {
|
||||
UStringTrieResult
|
||||
UCharsTrie::branchNext(const UChar *pos, int32_t length, int32_t uchar) {
|
||||
// Branch according to the current unit.
|
||||
if(length==0) {
|
||||
length=*pos++;
|
||||
|
@ -83,11 +83,11 @@ UCharTrie::branchNext(const UChar *pos, int32_t length, int32_t uchar) {
|
|||
// and divides length by 2.
|
||||
do {
|
||||
if(uchar==*pos++) {
|
||||
UDictTrieResult result;
|
||||
UStringTrieResult result;
|
||||
int32_t node=*pos;
|
||||
if(node&kValueIsFinal) {
|
||||
// Leave the final value for getValue() to read.
|
||||
result=UDICTTRIE_HAS_FINAL_VALUE;
|
||||
result=USTRINGTRIE_FINAL_VALUE;
|
||||
} else {
|
||||
// Use the non-final value as the jump delta.
|
||||
++pos;
|
||||
|
@ -104,7 +104,7 @@ UCharTrie::branchNext(const UChar *pos, int32_t length, int32_t uchar) {
|
|||
// end readValue()
|
||||
pos+=delta;
|
||||
node=*pos;
|
||||
result= node>=kMinValueLead ? valueResult(node) : UDICTTRIE_NO_VALUE;
|
||||
result= node>=kMinValueLead ? valueResult(node) : USTRINGTRIE_NO_VALUE;
|
||||
}
|
||||
pos_=pos;
|
||||
return result;
|
||||
|
@ -115,15 +115,15 @@ UCharTrie::branchNext(const UChar *pos, int32_t length, int32_t uchar) {
|
|||
if(uchar==*pos++) {
|
||||
pos_=pos;
|
||||
int32_t node=*pos;
|
||||
return node>=kMinValueLead ? valueResult(node) : UDICTTRIE_NO_VALUE;
|
||||
return node>=kMinValueLead ? valueResult(node) : USTRINGTRIE_NO_VALUE;
|
||||
} else {
|
||||
stop();
|
||||
return UDICTTRIE_NO_MATCH;
|
||||
return USTRINGTRIE_NO_MATCH;
|
||||
}
|
||||
}
|
||||
|
||||
UDictTrieResult
|
||||
UCharTrie::nextImpl(const UChar *pos, int32_t uchar) {
|
||||
UStringTrieResult
|
||||
UCharsTrie::nextImpl(const UChar *pos, int32_t uchar) {
|
||||
int32_t node=*pos++;
|
||||
for(;;) {
|
||||
if(node<kMinLinearMatch) {
|
||||
|
@ -135,7 +135,7 @@ UCharTrie::nextImpl(const UChar *pos, int32_t uchar) {
|
|||
remainingMatchLength_=--length;
|
||||
pos_=pos;
|
||||
return (length<0 && (node=*pos)>=kMinValueLead) ?
|
||||
valueResult(node) : UDICTTRIE_NO_VALUE;
|
||||
valueResult(node) : USTRINGTRIE_NO_VALUE;
|
||||
} else {
|
||||
// No match.
|
||||
break;
|
||||
|
@ -150,14 +150,14 @@ UCharTrie::nextImpl(const UChar *pos, int32_t uchar) {
|
|||
}
|
||||
}
|
||||
stop();
|
||||
return UDICTTRIE_NO_MATCH;
|
||||
return USTRINGTRIE_NO_MATCH;
|
||||
}
|
||||
|
||||
UDictTrieResult
|
||||
UCharTrie::next(int32_t uchar) {
|
||||
UStringTrieResult
|
||||
UCharsTrie::next(int32_t uchar) {
|
||||
const UChar *pos=pos_;
|
||||
if(pos==NULL) {
|
||||
return UDICTTRIE_NO_MATCH;
|
||||
return USTRINGTRIE_NO_MATCH;
|
||||
}
|
||||
int32_t length=remainingMatchLength_; // Actual remaining match length minus 1.
|
||||
if(length>=0) {
|
||||
|
@ -167,24 +167,24 @@ UCharTrie::next(int32_t uchar) {
|
|||
pos_=pos;
|
||||
int32_t node;
|
||||
return (length<0 && (node=*pos)>=kMinValueLead) ?
|
||||
valueResult(node) : UDICTTRIE_NO_VALUE;
|
||||
valueResult(node) : USTRINGTRIE_NO_VALUE;
|
||||
} else {
|
||||
stop();
|
||||
return UDICTTRIE_NO_MATCH;
|
||||
return USTRINGTRIE_NO_MATCH;
|
||||
}
|
||||
}
|
||||
return nextImpl(pos, uchar);
|
||||
}
|
||||
|
||||
UDictTrieResult
|
||||
UCharTrie::next(const UChar *s, int32_t sLength) {
|
||||
UStringTrieResult
|
||||
UCharsTrie::next(const UChar *s, int32_t sLength) {
|
||||
if(sLength<0 ? *s==0 : sLength==0) {
|
||||
// Empty input.
|
||||
return current();
|
||||
}
|
||||
const UChar *pos=pos_;
|
||||
if(pos==NULL) {
|
||||
return UDICTTRIE_NO_MATCH;
|
||||
return USTRINGTRIE_NO_MATCH;
|
||||
}
|
||||
int32_t length=remainingMatchLength_; // Actual remaining match length minus 1.
|
||||
for(;;) {
|
||||
|
@ -198,7 +198,7 @@ UCharTrie::next(const UChar *s, int32_t sLength) {
|
|||
pos_=pos;
|
||||
int32_t node;
|
||||
return (length<0 && (node=*pos)>=kMinValueLead) ?
|
||||
valueResult(node) : UDICTTRIE_NO_VALUE;
|
||||
valueResult(node) : USTRINGTRIE_NO_VALUE;
|
||||
}
|
||||
if(length<0) {
|
||||
remainingMatchLength_=length;
|
||||
|
@ -206,7 +206,7 @@ UCharTrie::next(const UChar *s, int32_t sLength) {
|
|||
}
|
||||
if(uchar!=*pos) {
|
||||
stop();
|
||||
return UDICTTRIE_NO_MATCH;
|
||||
return USTRINGTRIE_NO_MATCH;
|
||||
}
|
||||
++pos;
|
||||
--length;
|
||||
|
@ -218,7 +218,7 @@ UCharTrie::next(const UChar *s, int32_t sLength) {
|
|||
pos_=pos;
|
||||
int32_t node;
|
||||
return (length<0 && (node=*pos)>=kMinValueLead) ?
|
||||
valueResult(node) : UDICTTRIE_NO_VALUE;
|
||||
valueResult(node) : USTRINGTRIE_NO_VALUE;
|
||||
}
|
||||
uchar=*s++;
|
||||
--sLength;
|
||||
|
@ -228,7 +228,7 @@ UCharTrie::next(const UChar *s, int32_t sLength) {
|
|||
}
|
||||
if(uchar!=*pos) {
|
||||
stop();
|
||||
return UDICTTRIE_NO_MATCH;
|
||||
return USTRINGTRIE_NO_MATCH;
|
||||
}
|
||||
++pos;
|
||||
--length;
|
||||
|
@ -237,9 +237,9 @@ UCharTrie::next(const UChar *s, int32_t sLength) {
|
|||
int32_t node=*pos++;
|
||||
for(;;) {
|
||||
if(node<kMinLinearMatch) {
|
||||
UDictTrieResult result=branchNext(pos, node, uchar);
|
||||
if(result==UDICTTRIE_NO_MATCH) {
|
||||
return UDICTTRIE_NO_MATCH;
|
||||
UStringTrieResult result=branchNext(pos, node, uchar);
|
||||
if(result==USTRINGTRIE_NO_MATCH) {
|
||||
return USTRINGTRIE_NO_MATCH;
|
||||
}
|
||||
// Fetch the next input unit, if there is one.
|
||||
if(sLength<0) {
|
||||
|
@ -253,10 +253,10 @@ UCharTrie::next(const UChar *s, int32_t sLength) {
|
|||
uchar=*s++;
|
||||
--sLength;
|
||||
}
|
||||
if(result==UDICTTRIE_HAS_FINAL_VALUE) {
|
||||
if(result==USTRINGTRIE_FINAL_VALUE) {
|
||||
// No further matching units.
|
||||
stop();
|
||||
return UDICTTRIE_NO_MATCH;
|
||||
return USTRINGTRIE_NO_MATCH;
|
||||
}
|
||||
pos=pos_; // branchNext() advanced pos and wrote it to pos_ .
|
||||
node=*pos++;
|
||||
|
@ -265,7 +265,7 @@ UCharTrie::next(const UChar *s, int32_t sLength) {
|
|||
length=node-kMinLinearMatch; // Actual match length minus 1.
|
||||
if(uchar!=*pos) {
|
||||
stop();
|
||||
return UDICTTRIE_NO_MATCH;
|
||||
return USTRINGTRIE_NO_MATCH;
|
||||
}
|
||||
++pos;
|
||||
--length;
|
||||
|
@ -273,7 +273,7 @@ UCharTrie::next(const UChar *s, int32_t sLength) {
|
|||
} else if(node&kValueIsFinal) {
|
||||
// No further matching units.
|
||||
stop();
|
||||
return UDICTTRIE_NO_MATCH;
|
||||
return USTRINGTRIE_NO_MATCH;
|
||||
} else {
|
||||
// Skip intermediate value.
|
||||
pos=skipNodeValue(pos, node);
|
||||
|
@ -284,8 +284,8 @@ UCharTrie::next(const UChar *s, int32_t sLength) {
|
|||
}
|
||||
|
||||
const UChar *
|
||||
UCharTrie::findUniqueValueFromBranch(const UChar *pos, int32_t length,
|
||||
UBool haveUniqueValue, int32_t &uniqueValue) {
|
||||
UCharsTrie::findUniqueValueFromBranch(const UChar *pos, int32_t length,
|
||||
UBool haveUniqueValue, int32_t &uniqueValue) {
|
||||
while(length>kMaxBranchLinearSubNodeLength) {
|
||||
++pos; // ignore the comparison byte
|
||||
if(NULL==findUniqueValueFromBranch(jumpByDelta(pos), length>>1, haveUniqueValue, uniqueValue)) {
|
||||
|
@ -322,7 +322,7 @@ UCharTrie::findUniqueValueFromBranch(const UChar *pos, int32_t length,
|
|||
}
|
||||
|
||||
UBool
|
||||
UCharTrie::findUniqueValue(const UChar *pos, UBool haveUniqueValue, int32_t &uniqueValue) {
|
||||
UCharsTrie::findUniqueValue(const UChar *pos, UBool haveUniqueValue, int32_t &uniqueValue) {
|
||||
int32_t node=*pos++;
|
||||
for(;;) {
|
||||
if(node<kMinLinearMatch) {
|
||||
|
@ -365,7 +365,7 @@ UCharTrie::findUniqueValue(const UChar *pos, UBool haveUniqueValue, int32_t &uni
|
|||
}
|
||||
|
||||
int32_t
|
||||
UCharTrie::getNextUChars(Appendable &out) const {
|
||||
UCharsTrie::getNextUChars(Appendable &out) const {
|
||||
const UChar *pos=pos_;
|
||||
if(pos==NULL) {
|
||||
return 0;
|
||||
|
@ -397,7 +397,7 @@ UCharTrie::getNextUChars(Appendable &out) const {
|
|||
}
|
||||
|
||||
void
|
||||
UCharTrie::getNextBranchUChars(const UChar *pos, int32_t length, Appendable &out) {
|
||||
UCharsTrie::getNextBranchUChars(const UChar *pos, int32_t length, Appendable &out) {
|
||||
while(length>kMaxBranchLinearSubNodeLength) {
|
||||
++pos; // ignore the comparison unit
|
||||
getNextBranchUChars(jumpByDelta(pos), length>>1, out);
|
|
@ -1,9 +1,9 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2010, International Business Machines
|
||||
* Copyright (C) 2010-2011, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*******************************************************************************
|
||||
* file name: uchartrie.h
|
||||
* file name: ucharstrie.h
|
||||
* encoding: US-ASCII
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
|
@ -12,24 +12,24 @@
|
|||
* created by: Markus W. Scherer
|
||||
*/
|
||||
|
||||
#ifndef __UCHARTRIE_H__
|
||||
#define __UCHARTRIE_H__
|
||||
#ifndef __UCHARSTRIE_H__
|
||||
#define __UCHARSTRIE_H__
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C++ API: Dictionary trie for mapping Unicode strings (or 16-bit-unit sequences)
|
||||
* \brief C++ API: Trie for mapping Unicode strings (or 16-bit-unit sequences)
|
||||
* to integer values.
|
||||
*/
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/uobject.h"
|
||||
#include "uassert.h"
|
||||
#include "udicttrie.h"
|
||||
#include "ustringtrie.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
class UCharTrieBuilder;
|
||||
class UCharTrieIterator;
|
||||
class UCharsTrieBuilder;
|
||||
class UCharsTrieIterator;
|
||||
|
||||
/**
|
||||
* Base class for objects to which Unicode characters and strings can be appended.
|
||||
|
@ -67,34 +67,34 @@ private:
|
|||
};
|
||||
|
||||
/**
|
||||
* Light-weight, non-const reader class for a UCharTrie.
|
||||
* Light-weight, non-const reader class for a UCharsTrie.
|
||||
* Traverses a UChar-serialized data structure with minimal state,
|
||||
* for mapping strings (16-bit-unit sequences) to non-negative integer values.
|
||||
*/
|
||||
class U_TOOLUTIL_API UCharTrie : public UMemory {
|
||||
class U_TOOLUTIL_API UCharsTrie : public UMemory {
|
||||
public:
|
||||
UCharTrie(const UChar *trieUChars)
|
||||
UCharsTrie(const UChar *trieUChars)
|
||||
: uchars_(trieUChars),
|
||||
pos_(uchars_), remainingMatchLength_(-1) {}
|
||||
|
||||
/**
|
||||
* Resets this trie to its initial state.
|
||||
*/
|
||||
UCharTrie &reset() {
|
||||
UCharsTrie &reset() {
|
||||
pos_=uchars_;
|
||||
remainingMatchLength_=-1;
|
||||
return *this;
|
||||
}
|
||||
|
||||
/**
|
||||
* UCharTrie state object, for saving a trie's current state
|
||||
* UCharsTrie state object, for saving a trie's current state
|
||||
* and resetting the trie back to this state later.
|
||||
*/
|
||||
class State : public UMemory {
|
||||
public:
|
||||
State() { uchars=NULL; }
|
||||
private:
|
||||
friend class UCharTrie;
|
||||
friend class UCharsTrie;
|
||||
|
||||
const UChar *uchars;
|
||||
const UChar *pos;
|
||||
|
@ -105,7 +105,7 @@ public:
|
|||
* Saves the state of this trie.
|
||||
* @see resetToState
|
||||
*/
|
||||
const UCharTrie &saveState(State &state) const {
|
||||
const UCharsTrie &saveState(State &state) const {
|
||||
state.uchars=uchars_;
|
||||
state.pos=pos_;
|
||||
state.remainingMatchLength=remainingMatchLength_;
|
||||
|
@ -119,7 +119,7 @@ public:
|
|||
* @see saveState
|
||||
* @see reset
|
||||
*/
|
||||
UCharTrie &resetToState(const State &state) {
|
||||
UCharsTrie &resetToState(const State &state) {
|
||||
if(uchars_==state.uchars && uchars_!=NULL) {
|
||||
pos_=state.pos;
|
||||
remainingMatchLength_=state.remainingMatchLength;
|
||||
|
@ -132,14 +132,14 @@ public:
|
|||
* and whether another input UChar can continue a matching string.
|
||||
* @return The match/value Result.
|
||||
*/
|
||||
UDictTrieResult current() const;
|
||||
UStringTrieResult current() const;
|
||||
|
||||
/**
|
||||
* Traverses the trie from the initial state for this input UChar.
|
||||
* Equivalent to reset().next(uchar).
|
||||
* @return The match/value Result.
|
||||
*/
|
||||
inline UDictTrieResult first(int32_t uchar) {
|
||||
inline UStringTrieResult first(int32_t uchar) {
|
||||
remainingMatchLength_=-1;
|
||||
return nextImpl(uchars_, uchar);
|
||||
}
|
||||
|
@ -150,31 +150,31 @@ public:
|
|||
* Equivalent to reset().nextForCodePoint(cp).
|
||||
* @return The match/value Result.
|
||||
*/
|
||||
inline UDictTrieResult firstForCodePoint(UChar32 cp) {
|
||||
inline UStringTrieResult firstForCodePoint(UChar32 cp) {
|
||||
return cp<=0xffff ?
|
||||
first(cp) :
|
||||
(first(U16_LEAD(cp))!=UDICTTRIE_NO_MATCH ?
|
||||
(USTRINGTRIE_HAS_NEXT(first(U16_LEAD(cp))) ?
|
||||
next(U16_TRAIL(cp)) :
|
||||
UDICTTRIE_NO_MATCH);
|
||||
USTRINGTRIE_NO_MATCH);
|
||||
}
|
||||
|
||||
/**
|
||||
* Traverses the trie from the current state for this input UChar.
|
||||
* @return The match/value Result.
|
||||
*/
|
||||
UDictTrieResult next(int32_t uchar);
|
||||
UStringTrieResult next(int32_t uchar);
|
||||
|
||||
/**
|
||||
* Traverses the trie from the current state for the
|
||||
* one or two UTF-16 code units for this input code point.
|
||||
* @return The match/value Result.
|
||||
*/
|
||||
inline UDictTrieResult nextForCodePoint(UChar32 cp) {
|
||||
inline UStringTrieResult nextForCodePoint(UChar32 cp) {
|
||||
return cp<=0xffff ?
|
||||
next(cp) :
|
||||
(next(U16_LEAD(cp))!=UDICTTRIE_NO_MATCH ?
|
||||
(USTRINGTRIE_HAS_NEXT(next(U16_LEAD(cp))) ?
|
||||
next(U16_TRAIL(cp)) :
|
||||
UDICTTRIE_NO_MATCH);
|
||||
USTRINGTRIE_NO_MATCH);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -183,19 +183,20 @@ public:
|
|||
* \code
|
||||
* Result result=current();
|
||||
* for(each c in s)
|
||||
* if((result=next(c))==UDICTTRIE_NO_MATCH) return UDICTTRIE_NO_MATCH;
|
||||
* if(!USTRINGTRIE_HAS_NEXT(result)) return USTRINGTRIE_NO_MATCH;
|
||||
* result=next(c);
|
||||
* return result;
|
||||
* \endcode
|
||||
* @return The match/value Result.
|
||||
*/
|
||||
UDictTrieResult next(const UChar *s, int32_t length);
|
||||
UStringTrieResult next(const UChar *s, int32_t length);
|
||||
|
||||
/**
|
||||
* Returns a matching string's value if called immediately after
|
||||
* current()/first()/next() returned UDICTTRIE_HAS_VALUE or UDICTTRIE_HAS_FINAL_VALUE.
|
||||
* current()/first()/next() returned USTRINGTRIE_INTERMEDIATE_VALUE or USTRINGTRIE_FINAL_VALUE.
|
||||
* getValue() can be called multiple times.
|
||||
*
|
||||
* Do not call getValue() after UDICTTRIE_NO_MATCH or UDICTTRIE_NO_VALUE!
|
||||
* Do not call getValue() after USTRINGTRIE_NO_MATCH or USTRINGTRIE_NO_VALUE!
|
||||
*/
|
||||
inline int32_t getValue() const {
|
||||
const UChar *pos=pos_;
|
||||
|
@ -221,7 +222,7 @@ public:
|
|||
|
||||
/**
|
||||
* Finds each UChar which continues the string from the current state.
|
||||
* That is, each UChar c for which it would be next(c)!=UDICTTRIE_NO_MATCH now.
|
||||
* That is, each UChar c for which it would be next(c)!=USTRINGTRIE_NO_MATCH now.
|
||||
* @param out Each next UChar is appended to this object.
|
||||
* (Only uses the out.append(c) method.)
|
||||
* @return the number of UChars which continue the string from here
|
||||
|
@ -229,8 +230,8 @@ public:
|
|||
int32_t getNextUChars(Appendable &out) const;
|
||||
|
||||
private:
|
||||
friend class UCharTrieBuilder;
|
||||
friend class UCharTrieIterator;
|
||||
friend class UCharsTrieBuilder;
|
||||
friend class UCharsTrieIterator;
|
||||
|
||||
inline void stop() {
|
||||
pos_=NULL;
|
||||
|
@ -313,15 +314,15 @@ private:
|
|||
return pos;
|
||||
}
|
||||
|
||||
static inline UDictTrieResult valueResult(int32_t node) {
|
||||
return (UDictTrieResult)(UDICTTRIE_HAS_VALUE-(node>>15));
|
||||
static inline UStringTrieResult valueResult(int32_t node) {
|
||||
return (UStringTrieResult)(USTRINGTRIE_INTERMEDIATE_VALUE-(node>>15));
|
||||
}
|
||||
|
||||
// Handles a branch node for both next(uchar) and next(string).
|
||||
UDictTrieResult branchNext(const UChar *pos, int32_t length, int32_t uchar);
|
||||
UStringTrieResult branchNext(const UChar *pos, int32_t length, int32_t uchar);
|
||||
|
||||
// Requires remainingLength_<0.
|
||||
UDictTrieResult nextImpl(const UChar *pos, int32_t uchar);
|
||||
UStringTrieResult nextImpl(const UChar *pos, int32_t uchar);
|
||||
|
||||
// Helper functions for hasUniqueValue().
|
||||
// Recursively finds a unique value (or whether there is not a unique one)
|
||||
|
@ -336,7 +337,7 @@ private:
|
|||
// getNextUChars() when pos is on a branch node.
|
||||
static void getNextBranchUChars(const UChar *pos, int32_t length, Appendable &out);
|
||||
|
||||
// UCharTrie data structure
|
||||
// UCharsTrie data structure
|
||||
//
|
||||
// The trie consists of a series of UChar-serialized nodes for incremental
|
||||
// Unicode string/UChar sequence matching. (UChar=16-bit unsigned integer)
|
||||
|
@ -417,7 +418,7 @@ private:
|
|||
|
||||
static const int32_t kMaxTwoUnitDelta=((kThreeUnitDeltaLead-kMinTwoUnitDeltaLead)<<16)-1; // 0x03feffff
|
||||
|
||||
// Fixed value referencing the UCharTrie words.
|
||||
// Fixed value referencing the UCharsTrie words.
|
||||
const UChar *uchars_;
|
||||
|
||||
// Iterator variables.
|
||||
|
@ -430,4 +431,4 @@ private:
|
|||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif // __UCHARTRIE_H__
|
||||
#endif // __UCHARSTRIE_H__
|
|
@ -3,15 +3,13 @@
|
|||
* Copyright (C) 2010-2011, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*******************************************************************************
|
||||
* file name: uchartriebuilder.h
|
||||
* file name: ucharstriebuilder.h
|
||||
* encoding: US-ASCII
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 2010nov14
|
||||
* created by: Markus W. Scherer
|
||||
*
|
||||
* Builder class for UCharTrie dictionary trie.
|
||||
*/
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
@ -19,18 +17,18 @@
|
|||
#include "unicode/ustring.h"
|
||||
#include "cmemory.h"
|
||||
#include "uarrsort.h"
|
||||
#include "uchartrie.h"
|
||||
#include "uchartriebuilder.h"
|
||||
#include "ucharstrie.h"
|
||||
#include "ucharstriebuilder.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
/*
|
||||
* Note: This builder implementation stores (string, value) pairs with full copies
|
||||
* of the 16-bit-unit sequences, until the UCharTrie is built.
|
||||
* of the 16-bit-unit sequences, until the UCharsTrie is built.
|
||||
* It might(!) take less memory if we collected the data in a temporary, dynamic trie.
|
||||
*/
|
||||
|
||||
class UCharTrieElement : public UMemory {
|
||||
class UCharsTrieElement : public UMemory {
|
||||
public:
|
||||
// Use compiler's default constructor, initializes nothing.
|
||||
|
||||
|
@ -50,7 +48,7 @@ public:
|
|||
|
||||
int32_t getValue() const { return value; }
|
||||
|
||||
int32_t compareStringTo(const UCharTrieElement &o, const UnicodeString &strings) const;
|
||||
int32_t compareStringTo(const UCharsTrieElement &o, const UnicodeString &strings) const;
|
||||
|
||||
private:
|
||||
// The first strings unit contains the string length.
|
||||
|
@ -60,8 +58,8 @@ private:
|
|||
};
|
||||
|
||||
void
|
||||
UCharTrieElement::setTo(const UnicodeString &s, int32_t val,
|
||||
UnicodeString &strings, UErrorCode &errorCode) {
|
||||
UCharsTrieElement::setTo(const UnicodeString &s, int32_t val,
|
||||
UnicodeString &strings, UErrorCode &errorCode) {
|
||||
if(U_FAILURE(errorCode)) {
|
||||
return;
|
||||
}
|
||||
|
@ -78,17 +76,17 @@ UCharTrieElement::setTo(const UnicodeString &s, int32_t val,
|
|||
}
|
||||
|
||||
int32_t
|
||||
UCharTrieElement::compareStringTo(const UCharTrieElement &other, const UnicodeString &strings) const {
|
||||
UCharsTrieElement::compareStringTo(const UCharsTrieElement &other, const UnicodeString &strings) const {
|
||||
return getString(strings).compare(other.getString(strings));
|
||||
}
|
||||
|
||||
UCharTrieBuilder::~UCharTrieBuilder() {
|
||||
UCharsTrieBuilder::~UCharsTrieBuilder() {
|
||||
delete[] elements;
|
||||
uprv_free(uchars);
|
||||
}
|
||||
|
||||
UCharTrieBuilder &
|
||||
UCharTrieBuilder::add(const UnicodeString &s, int32_t value, UErrorCode &errorCode) {
|
||||
UCharsTrieBuilder &
|
||||
UCharsTrieBuilder::add(const UnicodeString &s, int32_t value, UErrorCode &errorCode) {
|
||||
if(U_FAILURE(errorCode)) {
|
||||
return *this;
|
||||
}
|
||||
|
@ -105,12 +103,12 @@ UCharTrieBuilder::add(const UnicodeString &s, int32_t value, UErrorCode &errorCo
|
|||
} else {
|
||||
newCapacity=4*elementsCapacity;
|
||||
}
|
||||
UCharTrieElement *newElements=new UCharTrieElement[newCapacity];
|
||||
UCharsTrieElement *newElements=new UCharsTrieElement[newCapacity];
|
||||
if(newElements==NULL) {
|
||||
errorCode=U_MEMORY_ALLOCATION_ERROR;
|
||||
}
|
||||
if(elementsLength>0) {
|
||||
uprv_memcpy(newElements, elements, elementsLength*sizeof(UCharTrieElement));
|
||||
uprv_memcpy(newElements, elements, elementsLength*sizeof(UCharsTrieElement));
|
||||
}
|
||||
delete[] elements;
|
||||
elements=newElements;
|
||||
|
@ -128,15 +126,15 @@ U_CDECL_BEGIN
|
|||
static int32_t U_CALLCONV
|
||||
compareElementStrings(const void *context, const void *left, const void *right) {
|
||||
const UnicodeString *strings=reinterpret_cast<const UnicodeString *>(context);
|
||||
const UCharTrieElement *leftElement=reinterpret_cast<const UCharTrieElement *>(left);
|
||||
const UCharTrieElement *rightElement=reinterpret_cast<const UCharTrieElement *>(right);
|
||||
const UCharsTrieElement *leftElement=reinterpret_cast<const UCharsTrieElement *>(left);
|
||||
const UCharsTrieElement *rightElement=reinterpret_cast<const UCharsTrieElement *>(right);
|
||||
return leftElement->compareStringTo(*rightElement, *strings);
|
||||
}
|
||||
|
||||
U_CDECL_END
|
||||
|
||||
UnicodeString &
|
||||
UCharTrieBuilder::build(UDictTrieBuildOption buildOption, UnicodeString &result, UErrorCode &errorCode) {
|
||||
UCharsTrieBuilder::build(UStringTrieBuildOption buildOption, UnicodeString &result, UErrorCode &errorCode) {
|
||||
if(U_FAILURE(errorCode)) {
|
||||
return result;
|
||||
}
|
||||
|
@ -153,7 +151,7 @@ UCharTrieBuilder::build(UDictTrieBuildOption buildOption, UnicodeString &result,
|
|||
errorCode=U_MEMORY_ALLOCATION_ERROR;
|
||||
return result;
|
||||
}
|
||||
uprv_sortArray(elements, elementsLength, (int32_t)sizeof(UCharTrieElement),
|
||||
uprv_sortArray(elements, elementsLength, (int32_t)sizeof(UCharsTrieElement),
|
||||
compareElementStrings, &strings,
|
||||
FALSE, // need not be a stable sort
|
||||
&errorCode);
|
||||
|
@ -179,7 +177,7 @@ UCharTrieBuilder::build(UDictTrieBuildOption buildOption, UnicodeString &result,
|
|||
errorCode=U_MEMORY_ALLOCATION_ERROR;
|
||||
return result;
|
||||
}
|
||||
DictTrieBuilder::build(buildOption, elementsLength, errorCode);
|
||||
StringTrieBuilder::build(buildOption, elementsLength, errorCode);
|
||||
if(uchars==NULL) {
|
||||
errorCode=U_MEMORY_ALLOCATION_ERROR;
|
||||
} else {
|
||||
|
@ -189,24 +187,24 @@ UCharTrieBuilder::build(UDictTrieBuildOption buildOption, UnicodeString &result,
|
|||
}
|
||||
|
||||
int32_t
|
||||
UCharTrieBuilder::getElementStringLength(int32_t i) const {
|
||||
UCharsTrieBuilder::getElementStringLength(int32_t i) const {
|
||||
return elements[i].getStringLength(strings);
|
||||
}
|
||||
|
||||
UChar
|
||||
UCharTrieBuilder::getElementUnit(int32_t i, int32_t unitIndex) const {
|
||||
UCharsTrieBuilder::getElementUnit(int32_t i, int32_t unitIndex) const {
|
||||
return elements[i].charAt(unitIndex, strings);
|
||||
}
|
||||
|
||||
int32_t
|
||||
UCharTrieBuilder::getElementValue(int32_t i) const {
|
||||
UCharsTrieBuilder::getElementValue(int32_t i) const {
|
||||
return elements[i].getValue();
|
||||
}
|
||||
|
||||
int32_t
|
||||
UCharTrieBuilder::getLimitOfLinearMatch(int32_t first, int32_t last, int32_t unitIndex) const {
|
||||
const UCharTrieElement &firstElement=elements[first];
|
||||
const UCharTrieElement &lastElement=elements[last];
|
||||
UCharsTrieBuilder::getLimitOfLinearMatch(int32_t first, int32_t last, int32_t unitIndex) const {
|
||||
const UCharsTrieElement &firstElement=elements[first];
|
||||
const UCharsTrieElement &lastElement=elements[last];
|
||||
int32_t minStringLength=firstElement.getStringLength(strings);
|
||||
while(++unitIndex<minStringLength &&
|
||||
firstElement.charAt(unitIndex, strings)==
|
||||
|
@ -215,7 +213,7 @@ UCharTrieBuilder::getLimitOfLinearMatch(int32_t first, int32_t last, int32_t uni
|
|||
}
|
||||
|
||||
int32_t
|
||||
UCharTrieBuilder::countElementUnits(int32_t start, int32_t limit, int32_t unitIndex) const {
|
||||
UCharsTrieBuilder::countElementUnits(int32_t start, int32_t limit, int32_t unitIndex) const {
|
||||
int32_t length=0; // Number of different units at unitIndex.
|
||||
int32_t i=start;
|
||||
do {
|
||||
|
@ -229,7 +227,7 @@ UCharTrieBuilder::countElementUnits(int32_t start, int32_t limit, int32_t unitIn
|
|||
}
|
||||
|
||||
int32_t
|
||||
UCharTrieBuilder::skipElementsBySomeUnits(int32_t i, int32_t unitIndex, int32_t count) const {
|
||||
UCharsTrieBuilder::skipElementsBySomeUnits(int32_t i, int32_t unitIndex, int32_t count) const {
|
||||
do {
|
||||
UChar unit=elements[i++].charAt(unitIndex, strings);
|
||||
while(unit==elements[i].charAt(unitIndex, strings)) {
|
||||
|
@ -240,20 +238,20 @@ UCharTrieBuilder::skipElementsBySomeUnits(int32_t i, int32_t unitIndex, int32_t
|
|||
}
|
||||
|
||||
int32_t
|
||||
UCharTrieBuilder::indexOfElementWithNextUnit(int32_t i, int32_t unitIndex, UChar unit) const {
|
||||
UCharsTrieBuilder::indexOfElementWithNextUnit(int32_t i, int32_t unitIndex, UChar unit) const {
|
||||
while(unit==elements[i].charAt(unitIndex, strings)) {
|
||||
++i;
|
||||
}
|
||||
return i;
|
||||
}
|
||||
|
||||
UCharTrieBuilder::UCTLinearMatchNode::UCTLinearMatchNode(const UChar *units, int32_t len, Node *nextNode)
|
||||
UCharsTrieBuilder::UCTLinearMatchNode::UCTLinearMatchNode(const UChar *units, int32_t len, Node *nextNode)
|
||||
: LinearMatchNode(len, nextNode), s(units) {
|
||||
hash=hash*37+uhash_hashUCharsN(units, len);
|
||||
}
|
||||
|
||||
UBool
|
||||
UCharTrieBuilder::UCTLinearMatchNode::operator==(const Node &other) const {
|
||||
UCharsTrieBuilder::UCTLinearMatchNode::operator==(const Node &other) const {
|
||||
if(this==&other) {
|
||||
return TRUE;
|
||||
}
|
||||
|
@ -265,16 +263,16 @@ UCharTrieBuilder::UCTLinearMatchNode::operator==(const Node &other) const {
|
|||
}
|
||||
|
||||
void
|
||||
UCharTrieBuilder::UCTLinearMatchNode::write(DictTrieBuilder &builder) {
|
||||
UCharTrieBuilder &b=(UCharTrieBuilder &)builder;
|
||||
UCharsTrieBuilder::UCTLinearMatchNode::write(StringTrieBuilder &builder) {
|
||||
UCharsTrieBuilder &b=(UCharsTrieBuilder &)builder;
|
||||
next->write(builder);
|
||||
b.write(s, length);
|
||||
offset=b.writeValueAndType(hasValue, value, b.getMinLinearMatch()+length-1);
|
||||
}
|
||||
|
||||
DictTrieBuilder::Node *
|
||||
UCharTrieBuilder::createLinearMatchNode(int32_t i, int32_t unitIndex, int32_t length,
|
||||
Node *nextNode) const {
|
||||
StringTrieBuilder::Node *
|
||||
UCharsTrieBuilder::createLinearMatchNode(int32_t i, int32_t unitIndex, int32_t length,
|
||||
Node *nextNode) const {
|
||||
return new UCTLinearMatchNode(
|
||||
elements[i].getString(strings).getBuffer()+unitIndex,
|
||||
length,
|
||||
|
@ -282,7 +280,7 @@ UCharTrieBuilder::createLinearMatchNode(int32_t i, int32_t unitIndex, int32_t le
|
|||
}
|
||||
|
||||
UBool
|
||||
UCharTrieBuilder::ensureCapacity(int32_t length) {
|
||||
UCharsTrieBuilder::ensureCapacity(int32_t length) {
|
||||
if(uchars==NULL) {
|
||||
return FALSE; // previous memory allocation had failed
|
||||
}
|
||||
|
@ -308,7 +306,7 @@ UCharTrieBuilder::ensureCapacity(int32_t length) {
|
|||
}
|
||||
|
||||
int32_t
|
||||
UCharTrieBuilder::write(int32_t unit) {
|
||||
UCharsTrieBuilder::write(int32_t unit) {
|
||||
int32_t newLength=ucharsLength+1;
|
||||
if(ensureCapacity(newLength)) {
|
||||
ucharsLength=newLength;
|
||||
|
@ -318,7 +316,7 @@ UCharTrieBuilder::write(int32_t unit) {
|
|||
}
|
||||
|
||||
int32_t
|
||||
UCharTrieBuilder::write(const UChar *s, int32_t length) {
|
||||
UCharsTrieBuilder::write(const UChar *s, int32_t length) {
|
||||
int32_t newLength=ucharsLength+length;
|
||||
if(ensureCapacity(newLength)) {
|
||||
ucharsLength=newLength;
|
||||
|
@ -328,24 +326,24 @@ UCharTrieBuilder::write(const UChar *s, int32_t length) {
|
|||
}
|
||||
|
||||
int32_t
|
||||
UCharTrieBuilder::writeElementUnits(int32_t i, int32_t unitIndex, int32_t length) {
|
||||
UCharsTrieBuilder::writeElementUnits(int32_t i, int32_t unitIndex, int32_t length) {
|
||||
return write(elements[i].getString(strings).getBuffer()+unitIndex, length);
|
||||
}
|
||||
|
||||
int32_t
|
||||
UCharTrieBuilder::writeValueAndFinal(int32_t i, UBool final) {
|
||||
UCharsTrieBuilder::writeValueAndFinal(int32_t i, UBool final) {
|
||||
UChar intUnits[3];
|
||||
int32_t length;
|
||||
if(i<0 || i>UCharTrie::kMaxTwoUnitValue) {
|
||||
intUnits[0]=(UChar)(UCharTrie::kThreeUnitValueLead);
|
||||
if(i<0 || i>UCharsTrie::kMaxTwoUnitValue) {
|
||||
intUnits[0]=(UChar)(UCharsTrie::kThreeUnitValueLead);
|
||||
intUnits[1]=(UChar)(i>>16);
|
||||
intUnits[2]=(UChar)i;
|
||||
length=3;
|
||||
} else if(i<=UCharTrie::kMaxOneUnitValue) {
|
||||
} else if(i<=UCharsTrie::kMaxOneUnitValue) {
|
||||
intUnits[0]=(UChar)(i);
|
||||
length=1;
|
||||
} else {
|
||||
intUnits[0]=(UChar)(UCharTrie::kMinTwoUnitValueLead+(i>>16));
|
||||
intUnits[0]=(UChar)(UCharsTrie::kMinTwoUnitValueLead+(i>>16));
|
||||
intUnits[1]=(UChar)i;
|
||||
length=2;
|
||||
}
|
||||
|
@ -354,22 +352,22 @@ UCharTrieBuilder::writeValueAndFinal(int32_t i, UBool final) {
|
|||
}
|
||||
|
||||
int32_t
|
||||
UCharTrieBuilder::writeValueAndType(UBool hasValue, int32_t value, int32_t node) {
|
||||
UCharsTrieBuilder::writeValueAndType(UBool hasValue, int32_t value, int32_t node) {
|
||||
if(!hasValue) {
|
||||
return write(node);
|
||||
}
|
||||
UChar intUnits[3];
|
||||
int32_t length;
|
||||
if(value<0 || value>UCharTrie::kMaxTwoUnitNodeValue) {
|
||||
intUnits[0]=(UChar)(UCharTrie::kThreeUnitNodeValueLead);
|
||||
if(value<0 || value>UCharsTrie::kMaxTwoUnitNodeValue) {
|
||||
intUnits[0]=(UChar)(UCharsTrie::kThreeUnitNodeValueLead);
|
||||
intUnits[1]=(UChar)(value>>16);
|
||||
intUnits[2]=(UChar)value;
|
||||
length=3;
|
||||
} else if(value<=UCharTrie::kMaxOneUnitNodeValue) {
|
||||
} else if(value<=UCharsTrie::kMaxOneUnitNodeValue) {
|
||||
intUnits[0]=(UChar)((value+1)<<6);
|
||||
length=1;
|
||||
} else {
|
||||
intUnits[0]=(UChar)(UCharTrie::kMinTwoUnitNodeValueLead+((value>>10)&0x7fc0));
|
||||
intUnits[0]=(UChar)(UCharsTrie::kMinTwoUnitNodeValueLead+((value>>10)&0x7fc0));
|
||||
intUnits[1]=(UChar)value;
|
||||
length=2;
|
||||
}
|
||||
|
@ -378,18 +376,18 @@ UCharTrieBuilder::writeValueAndType(UBool hasValue, int32_t value, int32_t node)
|
|||
}
|
||||
|
||||
int32_t
|
||||
UCharTrieBuilder::writeDeltaTo(int32_t jumpTarget) {
|
||||
UCharsTrieBuilder::writeDeltaTo(int32_t jumpTarget) {
|
||||
int32_t i=ucharsLength-jumpTarget;
|
||||
UChar intUnits[3];
|
||||
int32_t length;
|
||||
U_ASSERT(i>=0);
|
||||
if(i<=UCharTrie::kMaxOneUnitDelta) {
|
||||
if(i<=UCharsTrie::kMaxOneUnitDelta) {
|
||||
length=0;
|
||||
} else if(i<=UCharTrie::kMaxTwoUnitDelta) {
|
||||
intUnits[0]=(UChar)(UCharTrie::kMinTwoUnitDeltaLead+(i>>16));
|
||||
} else if(i<=UCharsTrie::kMaxTwoUnitDelta) {
|
||||
intUnits[0]=(UChar)(UCharsTrie::kMinTwoUnitDeltaLead+(i>>16));
|
||||
length=1;
|
||||
} else {
|
||||
intUnits[0]=(UChar)(UCharTrie::kThreeUnitDeltaLead);
|
||||
intUnits[0]=(UChar)(UCharsTrie::kThreeUnitDeltaLead);
|
||||
intUnits[1]=(UChar)(i>>16);
|
||||
length=2;
|
||||
}
|
|
@ -3,41 +3,42 @@
|
|||
* Copyright (C) 2010-2011, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*******************************************************************************
|
||||
* file name: uchartriebuilder.h
|
||||
* file name: ucharstriebuilder.h
|
||||
* encoding: US-ASCII
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 2010nov14
|
||||
* created by: Markus W. Scherer
|
||||
*
|
||||
* Builder class for UCharTrie dictionary trie.
|
||||
*/
|
||||
|
||||
#ifndef __UCHARTRIEBUILDER_H__
|
||||
#define __UCHARTRIEBUILDER_H__
|
||||
#ifndef __UCHARSTRIEBUILDER_H__
|
||||
#define __UCHARSTRIEBUILDER_H__
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/unistr.h"
|
||||
#include "dicttriebuilder.h"
|
||||
#include "uchartrie.h"
|
||||
#include "stringtriebuilder.h"
|
||||
#include "ucharstrie.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
class UCharTrieElement;
|
||||
class UCharsTrieElement;
|
||||
|
||||
class U_TOOLUTIL_API UCharTrieBuilder : public DictTrieBuilder {
|
||||
/**
|
||||
* Builder class for UCharsTrie.
|
||||
*/
|
||||
class U_TOOLUTIL_API UCharsTrieBuilder : public StringTrieBuilder {
|
||||
public:
|
||||
UCharTrieBuilder()
|
||||
UCharsTrieBuilder()
|
||||
: elements(NULL), elementsCapacity(0), elementsLength(0),
|
||||
uchars(NULL), ucharsCapacity(0), ucharsLength(0) {}
|
||||
virtual ~UCharTrieBuilder();
|
||||
virtual ~UCharsTrieBuilder();
|
||||
|
||||
UCharTrieBuilder &add(const UnicodeString &s, int32_t value, UErrorCode &errorCode);
|
||||
UCharsTrieBuilder &add(const UnicodeString &s, int32_t value, UErrorCode &errorCode);
|
||||
|
||||
UnicodeString &build(UDictTrieBuildOption buildOption, UnicodeString &result, UErrorCode &errorCode);
|
||||
UnicodeString &build(UStringTrieBuildOption buildOption, UnicodeString &result, UErrorCode &errorCode);
|
||||
|
||||
UCharTrieBuilder &clear() {
|
||||
UCharsTrieBuilder &clear() {
|
||||
strings.remove();
|
||||
elementsLength=0;
|
||||
ucharsLength=0;
|
||||
|
@ -57,15 +58,15 @@ private:
|
|||
|
||||
virtual UBool matchNodesCanHaveValues() const { return TRUE; }
|
||||
|
||||
virtual int32_t getMaxBranchLinearSubNodeLength() const { return UCharTrie::kMaxBranchLinearSubNodeLength; }
|
||||
virtual int32_t getMinLinearMatch() const { return UCharTrie::kMinLinearMatch; }
|
||||
virtual int32_t getMaxLinearMatchLength() const { return UCharTrie::kMaxLinearMatchLength; }
|
||||
virtual int32_t getMaxBranchLinearSubNodeLength() const { return UCharsTrie::kMaxBranchLinearSubNodeLength; }
|
||||
virtual int32_t getMinLinearMatch() const { return UCharsTrie::kMinLinearMatch; }
|
||||
virtual int32_t getMaxLinearMatchLength() const { return UCharsTrie::kMaxLinearMatchLength; }
|
||||
|
||||
class UCTLinearMatchNode : public LinearMatchNode {
|
||||
public:
|
||||
UCTLinearMatchNode(const UChar *units, int32_t len, Node *nextNode);
|
||||
virtual UBool operator==(const Node &other) const;
|
||||
virtual void write(DictTrieBuilder &builder);
|
||||
virtual void write(StringTrieBuilder &builder);
|
||||
private:
|
||||
const UChar *s;
|
||||
};
|
||||
|
@ -82,7 +83,7 @@ private:
|
|||
virtual int32_t writeDeltaTo(int32_t jumpTarget);
|
||||
|
||||
UnicodeString strings;
|
||||
UCharTrieElement *elements;
|
||||
UCharsTrieElement *elements;
|
||||
int32_t elementsCapacity;
|
||||
int32_t elementsLength;
|
||||
|
||||
|
@ -95,4 +96,4 @@ private:
|
|||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif // __UCHARTRIEBUILDER_H__
|
||||
#endif // __UCHARSTRIEBUILDER_H__
|
|
@ -1,9 +1,9 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2010, International Business Machines
|
||||
* Copyright (C) 2010-2011, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*******************************************************************************
|
||||
* file name: uchartrieiterator.h
|
||||
* file name: ucharstrieiterator.h
|
||||
* encoding: US-ASCII
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
|
@ -14,22 +14,22 @@
|
|||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/unistr.h"
|
||||
#include "uchartrie.h"
|
||||
#include "uchartrieiterator.h"
|
||||
#include "ucharstrie.h"
|
||||
#include "ucharstrieiterator.h"
|
||||
#include "uvectr32.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
UCharTrieIterator::UCharTrieIterator(const UChar *trieUChars, int32_t maxStringLength,
|
||||
UErrorCode &errorCode)
|
||||
UCharsTrieIterator::UCharsTrieIterator(const UChar *trieUChars, int32_t maxStringLength,
|
||||
UErrorCode &errorCode)
|
||||
: uchars_(trieUChars),
|
||||
pos_(uchars_), initialPos_(uchars_),
|
||||
remainingMatchLength_(-1), initialRemainingMatchLength_(-1),
|
||||
skipValue_(FALSE),
|
||||
maxLength_(maxStringLength), value_(0), stack_(errorCode) {}
|
||||
|
||||
UCharTrieIterator::UCharTrieIterator(const UCharTrie &trie, int32_t maxStringLength,
|
||||
UErrorCode &errorCode)
|
||||
UCharsTrieIterator::UCharsTrieIterator(const UCharsTrie &trie, int32_t maxStringLength,
|
||||
UErrorCode &errorCode)
|
||||
: uchars_(trie.uchars_), pos_(trie.pos_), initialPos_(trie.pos_),
|
||||
remainingMatchLength_(trie.remainingMatchLength_),
|
||||
initialRemainingMatchLength_(trie.remainingMatchLength_),
|
||||
|
@ -48,7 +48,7 @@ UCharTrieIterator::UCharTrieIterator(const UCharTrie &trie, int32_t maxStringLen
|
|||
}
|
||||
}
|
||||
|
||||
UCharTrieIterator &UCharTrieIterator::reset() {
|
||||
UCharsTrieIterator &UCharsTrieIterator::reset() {
|
||||
pos_=initialPos_;
|
||||
remainingMatchLength_=initialRemainingMatchLength_;
|
||||
skipValue_=FALSE;
|
||||
|
@ -64,7 +64,7 @@ UCharTrieIterator &UCharTrieIterator::reset() {
|
|||
}
|
||||
|
||||
UBool
|
||||
UCharTrieIterator::next(UErrorCode &errorCode) {
|
||||
UCharsTrieIterator::next(UErrorCode &errorCode) {
|
||||
if(U_FAILURE(errorCode)) {
|
||||
return FALSE;
|
||||
}
|
||||
|
@ -97,18 +97,18 @@ UCharTrieIterator::next(UErrorCode &errorCode) {
|
|||
}
|
||||
for(;;) {
|
||||
int32_t node=*pos++;
|
||||
if(node>=UCharTrie::kMinValueLead) {
|
||||
if(node>=UCharsTrie::kMinValueLead) {
|
||||
if(skipValue_) {
|
||||
pos=UCharTrie::skipNodeValue(pos, node);
|
||||
node&=UCharTrie::kNodeTypeMask;
|
||||
pos=UCharsTrie::skipNodeValue(pos, node);
|
||||
node&=UCharsTrie::kNodeTypeMask;
|
||||
skipValue_=FALSE;
|
||||
} else {
|
||||
// Deliver value for the string so far.
|
||||
UBool isFinal=(UBool)(node>>15);
|
||||
if(isFinal) {
|
||||
value_=UCharTrie::readValue(pos, node&0x7fff);
|
||||
value_=UCharsTrie::readValue(pos, node&0x7fff);
|
||||
} else {
|
||||
value_=UCharTrie::readNodeValue(pos, node);
|
||||
value_=UCharsTrie::readNodeValue(pos, node);
|
||||
}
|
||||
if(isFinal || (maxLength_>0 && str_.length()==maxLength_)) {
|
||||
pos_=NULL;
|
||||
|
@ -126,7 +126,7 @@ UCharTrieIterator::next(UErrorCode &errorCode) {
|
|||
if(maxLength_>0 && str_.length()==maxLength_) {
|
||||
return truncateAndStop();
|
||||
}
|
||||
if(node<UCharTrie::kMinLinearMatch) {
|
||||
if(node<UCharsTrie::kMinLinearMatch) {
|
||||
if(node==0) {
|
||||
node=*pos++;
|
||||
}
|
||||
|
@ -136,7 +136,7 @@ UCharTrieIterator::next(UErrorCode &errorCode) {
|
|||
}
|
||||
} else {
|
||||
// Linear-match node, append length units to str_.
|
||||
int32_t length=node-UCharTrie::kMinLinearMatch+1;
|
||||
int32_t length=node-UCharsTrie::kMinLinearMatch+1;
|
||||
if(maxLength_>0 && str_.length()+length>maxLength_) {
|
||||
str_.append(pos, maxLength_-str_.length());
|
||||
return truncateAndStop();
|
||||
|
@ -149,23 +149,23 @@ UCharTrieIterator::next(UErrorCode &errorCode) {
|
|||
|
||||
// Branch node, needs to take the first outbound edge and push state for the rest.
|
||||
const UChar *
|
||||
UCharTrieIterator::branchNext(const UChar *pos, int32_t length, UErrorCode &errorCode) {
|
||||
while(length>UCharTrie::kMaxBranchLinearSubNodeLength) {
|
||||
UCharsTrieIterator::branchNext(const UChar *pos, int32_t length, UErrorCode &errorCode) {
|
||||
while(length>UCharsTrie::kMaxBranchLinearSubNodeLength) {
|
||||
++pos; // ignore the comparison unit
|
||||
// Push state for the greater-or-equal edge.
|
||||
stack_.addElement((int32_t)(UCharTrie::skipDelta(pos)-uchars_), errorCode);
|
||||
stack_.addElement((int32_t)(UCharsTrie::skipDelta(pos)-uchars_), errorCode);
|
||||
stack_.addElement(((length-(length>>1))<<16)|str_.length(), errorCode);
|
||||
// Follow the less-than edge.
|
||||
length>>=1;
|
||||
pos=UCharTrie::jumpByDelta(pos);
|
||||
pos=UCharsTrie::jumpByDelta(pos);
|
||||
}
|
||||
// List of key-value pairs where values are either final values or jump deltas.
|
||||
// Read the first (key, value) pair.
|
||||
UChar trieUnit=*pos++;
|
||||
int32_t node=*pos++;
|
||||
UBool isFinal=(UBool)(node>>15);
|
||||
int32_t value=UCharTrie::readValue(pos, node&=0x7fff);
|
||||
pos=UCharTrie::skipValue(pos, node);
|
||||
int32_t value=UCharsTrie::readValue(pos, node&=0x7fff);
|
||||
pos=UCharsTrie::skipValue(pos, node);
|
||||
stack_.addElement((int32_t)(pos-uchars_), errorCode);
|
||||
stack_.addElement(((length-1)<<16)|str_.length(), errorCode);
|
||||
str_.append(trieUnit);
|
|
@ -1,9 +1,9 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2010, International Business Machines
|
||||
* Copyright (C) 2010-2011, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*******************************************************************************
|
||||
* file name: uchartrieiterator.h
|
||||
* file name: ucharstrieiterator.h
|
||||
* encoding: US-ASCII
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
|
@ -12,28 +12,28 @@
|
|||
* created by: Markus W. Scherer
|
||||
*/
|
||||
|
||||
#ifndef __UCHARTRIEITERATOR_H__
|
||||
#define __UCHARTRIEITERATOR_H__
|
||||
#ifndef __UCHARSTRIEITERATOR_H__
|
||||
#define __UCHARSTRIEITERATOR_H__
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C++ API: UCharTrie iterator for all of its (string, value) pairs.
|
||||
* \brief C++ API: UCharsTrie iterator for all of its (string, value) pairs.
|
||||
*/
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/unistr.h"
|
||||
#include "uchartrie.h"
|
||||
#include "ucharstrie.h"
|
||||
#include "uvectr32.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
/**
|
||||
* Iterator for all of the (string, value) pairs in a UCharTrie.
|
||||
* Iterator for all of the (string, value) pairs in a UCharsTrie.
|
||||
*/
|
||||
class U_TOOLUTIL_API UCharTrieIterator : public UMemory {
|
||||
class U_TOOLUTIL_API UCharsTrieIterator : public UMemory {
|
||||
public:
|
||||
/**
|
||||
* Iterates from the root of a UChar-serialized UCharTrie.
|
||||
* Iterates from the root of a UChar-serialized UCharsTrie.
|
||||
* @param trieUChars The trie UChars.
|
||||
* @param maxStringLength If 0, the iterator returns full strings.
|
||||
* Otherwise, the iterator returns strings with this maximum length.
|
||||
|
@ -42,10 +42,10 @@ public:
|
|||
* immediately. Check for U_FAILURE() on output or use with
|
||||
* function chaining. (See User Guide for details.)
|
||||
*/
|
||||
UCharTrieIterator(const UChar *trieUChars, int32_t maxStringLength, UErrorCode &errorCode);
|
||||
UCharsTrieIterator(const UChar *trieUChars, int32_t maxStringLength, UErrorCode &errorCode);
|
||||
|
||||
/**
|
||||
* Iterates from the current state of the specified UCharTrie.
|
||||
* Iterates from the current state of the specified UCharsTrie.
|
||||
* @param trie The trie whose state will be copied for iteration.
|
||||
* @param maxStringLength If 0, the iterator returns full strings.
|
||||
* Otherwise, the iterator returns strings with this maximum length.
|
||||
|
@ -54,12 +54,12 @@ public:
|
|||
* immediately. Check for U_FAILURE() on output or use with
|
||||
* function chaining. (See User Guide for details.)
|
||||
*/
|
||||
UCharTrieIterator(const UCharTrie &trie, int32_t maxStringLength, UErrorCode &errorCode);
|
||||
UCharsTrieIterator(const UCharsTrie &trie, int32_t maxStringLength, UErrorCode &errorCode);
|
||||
|
||||
/**
|
||||
* Resets this iterator to its initial state.
|
||||
*/
|
||||
UCharTrieIterator &reset();
|
||||
UCharsTrieIterator &reset();
|
||||
|
||||
/**
|
||||
* Finds the next (string, value) pair if there is one.
|
||||
|
@ -118,4 +118,4 @@ private:
|
|||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif // __UCHARTRIEITERATOR_H__
|
||||
#endif // __UCHARSTRIEITERATOR_H__
|
Loading…
Add table
Reference in a new issue