ICU-8167 rename ByteTrie to BytesTrie, DictTrie to StringTrie, etc.

X-SVN-Rev: 29265
This commit is contained in:
Markus Scherer 2011-01-05 21:05:47 +00:00
parent a0575a248c
commit 62306a038c
28 changed files with 908 additions and 909 deletions

View file

@ -1,6 +1,6 @@
#******************************************************************************
#
# Copyright (C) 1999-2010, International Business Machines
# Copyright (C) 1999-2011, International Business Machines
# Corporation and others. All Rights Reserved.
#
#******************************************************************************
@ -85,7 +85,7 @@ ucnv_u7.o ucnv_u8.o ucnv_u16.o ucnv_u32.o ucnvscsu.o ucnvbocu.o \
ucnv_ext.o ucnvmbcs.o ucnv2022.o ucnvhz.o ucnv_lmb.o ucnvisci.o ucnvdisp.o ucnv_set.o ucnv_ct.o \
uresbund.o ures_cnv.o uresdata.o resbund.o resbund_cnv.o \
ucat.o locmap.o uloc.o locid.o locutil.o locavailable.o locdispnames.o loclikely.o locresdata.o \
bytestream.o stringpiece.o bytetrie.o \
bytestream.o stringpiece.o bytestrie.o \
ustr_cnv.o unistr_cnv.o unistr.o unistr_case.o unistr_props.o \
utf_impl.o ustring.o ustrcase.o ucasemap.o cstring.o ustrfmt.o ustrtrns.o ustr_wcs.o utext.o \
normalizer2impl.o normalizer2.o filterednormalizer2.o normlzr.o unorm.o unormcmp.o unorm_it.o \

View file

@ -1,9 +1,9 @@
/*
*******************************************************************************
* Copyright (C) 2010, International Business Machines
* Copyright (C) 2010-2011, International Business Machines
* Corporation and others. All Rights Reserved.
*******************************************************************************
* file name: bytetrie.cpp
* file name: bytestrie.cpp
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
@ -16,13 +16,13 @@
#include "unicode/bytestream.h"
#include "unicode/uobject.h"
#include "uassert.h"
#include "bytetrie.h"
#include "bytestrie.h"
U_NAMESPACE_BEGIN
// lead byte already shifted right by 1.
int32_t
ByteTrie::readValue(const uint8_t *pos, int32_t leadByte) {
BytesTrie::readValue(const uint8_t *pos, int32_t leadByte) {
int32_t value;
if(leadByte<kMinTwoByteValueLead) {
value=leadByte-kMinOneByteValueLead;
@ -39,7 +39,7 @@ ByteTrie::readValue(const uint8_t *pos, int32_t leadByte) {
}
const uint8_t *
ByteTrie::jumpByDelta(const uint8_t *pos) {
BytesTrie::jumpByDelta(const uint8_t *pos) {
int32_t delta=*pos++;
if(delta<kMinTwoByteDeltaLead) {
// nothing to do
@ -58,20 +58,20 @@ ByteTrie::jumpByDelta(const uint8_t *pos) {
return pos+delta;
}
UDictTrieResult
ByteTrie::current() const {
UStringTrieResult
BytesTrie::current() const {
const uint8_t *pos=pos_;
if(pos==NULL) {
return UDICTTRIE_NO_MATCH;
return USTRINGTRIE_NO_MATCH;
} else {
int32_t node;
return (remainingMatchLength_<0 && (node=*pos)>=kMinValueLead) ?
valueResult(node) : UDICTTRIE_NO_VALUE;
valueResult(node) : USTRINGTRIE_NO_VALUE;
}
}
UDictTrieResult
ByteTrie::branchNext(const uint8_t *pos, int32_t length, int32_t inByte) {
UStringTrieResult
BytesTrie::branchNext(const uint8_t *pos, int32_t length, int32_t inByte) {
// Branch according to the current byte.
if(length==0) {
length=*pos++;
@ -93,12 +93,12 @@ ByteTrie::branchNext(const uint8_t *pos, int32_t length, int32_t inByte) {
// and divides length by 2.
do {
if(inByte==*pos++) {
UDictTrieResult result;
UStringTrieResult result;
int32_t node=*pos;
U_ASSERT(node>=kMinValueLead);
if(node&kValueIsFinal) {
// Leave the final value for getValue() to read.
result=UDICTTRIE_HAS_FINAL_VALUE;
result=USTRINGTRIE_FINAL_VALUE;
} else {
// Use the non-final value as the jump delta.
++pos;
@ -122,7 +122,7 @@ ByteTrie::branchNext(const uint8_t *pos, int32_t length, int32_t inByte) {
// end readValue()
pos+=delta;
node=*pos;
result= node>=kMinValueLead ? valueResult(node) : UDICTTRIE_NO_VALUE;
result= node>=kMinValueLead ? valueResult(node) : USTRINGTRIE_NO_VALUE;
}
pos_=pos;
return result;
@ -133,15 +133,15 @@ ByteTrie::branchNext(const uint8_t *pos, int32_t length, int32_t inByte) {
if(inByte==*pos++) {
pos_=pos;
int32_t node=*pos;
return node>=kMinValueLead ? valueResult(node) : UDICTTRIE_NO_VALUE;
return node>=kMinValueLead ? valueResult(node) : USTRINGTRIE_NO_VALUE;
} else {
stop();
return UDICTTRIE_NO_MATCH;
return USTRINGTRIE_NO_MATCH;
}
}
UDictTrieResult
ByteTrie::nextImpl(const uint8_t *pos, int32_t inByte) {
UStringTrieResult
BytesTrie::nextImpl(const uint8_t *pos, int32_t inByte) {
for(;;) {
int32_t node=*pos++;
if(node<kMinLinearMatch) {
@ -153,7 +153,7 @@ ByteTrie::nextImpl(const uint8_t *pos, int32_t inByte) {
remainingMatchLength_=--length;
pos_=pos;
return (length<0 && (node=*pos)>=kMinValueLead) ?
valueResult(node) : UDICTTRIE_NO_VALUE;
valueResult(node) : USTRINGTRIE_NO_VALUE;
} else {
// No match.
break;
@ -169,14 +169,14 @@ ByteTrie::nextImpl(const uint8_t *pos, int32_t inByte) {
}
}
stop();
return UDICTTRIE_NO_MATCH;
return USTRINGTRIE_NO_MATCH;
}
UDictTrieResult
ByteTrie::next(int32_t inByte) {
UStringTrieResult
BytesTrie::next(int32_t inByte) {
const uint8_t *pos=pos_;
if(pos==NULL) {
return UDICTTRIE_NO_MATCH;
return USTRINGTRIE_NO_MATCH;
}
int32_t length=remainingMatchLength_; // Actual remaining match length minus 1.
if(length>=0) {
@ -186,24 +186,24 @@ ByteTrie::next(int32_t inByte) {
pos_=pos;
int32_t node;
return (length<0 && (node=*pos)>=kMinValueLead) ?
valueResult(node) : UDICTTRIE_NO_VALUE;
valueResult(node) : USTRINGTRIE_NO_VALUE;
} else {
stop();
return UDICTTRIE_NO_MATCH;
return USTRINGTRIE_NO_MATCH;
}
}
return nextImpl(pos, inByte);
}
UDictTrieResult
ByteTrie::next(const char *s, int32_t sLength) {
UStringTrieResult
BytesTrie::next(const char *s, int32_t sLength) {
if(sLength<0 ? *s==0 : sLength==0) {
// Empty input.
return current();
}
const uint8_t *pos=pos_;
if(pos==NULL) {
return UDICTTRIE_NO_MATCH;
return USTRINGTRIE_NO_MATCH;
}
int32_t length=remainingMatchLength_; // Actual remaining match length minus 1.
for(;;) {
@ -217,7 +217,7 @@ ByteTrie::next(const char *s, int32_t sLength) {
pos_=pos;
int32_t node;
return (length<0 && (node=*pos)>=kMinValueLead) ?
valueResult(node) : UDICTTRIE_NO_VALUE;
valueResult(node) : USTRINGTRIE_NO_VALUE;
}
if(length<0) {
remainingMatchLength_=length;
@ -225,7 +225,7 @@ ByteTrie::next(const char *s, int32_t sLength) {
}
if(inByte!=*pos) {
stop();
return UDICTTRIE_NO_MATCH;
return USTRINGTRIE_NO_MATCH;
}
++pos;
--length;
@ -237,7 +237,7 @@ ByteTrie::next(const char *s, int32_t sLength) {
pos_=pos;
int32_t node;
return (length<0 && (node=*pos)>=kMinValueLead) ?
valueResult(node) : UDICTTRIE_NO_VALUE;
valueResult(node) : USTRINGTRIE_NO_VALUE;
}
inByte=*s++;
--sLength;
@ -247,7 +247,7 @@ ByteTrie::next(const char *s, int32_t sLength) {
}
if(inByte!=*pos) {
stop();
return UDICTTRIE_NO_MATCH;
return USTRINGTRIE_NO_MATCH;
}
++pos;
--length;
@ -256,9 +256,9 @@ ByteTrie::next(const char *s, int32_t sLength) {
for(;;) {
int32_t node=*pos++;
if(node<kMinLinearMatch) {
UDictTrieResult result=branchNext(pos, node, inByte);
if(result==UDICTTRIE_NO_MATCH) {
return UDICTTRIE_NO_MATCH;
UStringTrieResult result=branchNext(pos, node, inByte);
if(result==USTRINGTRIE_NO_MATCH) {
return USTRINGTRIE_NO_MATCH;
}
// Fetch the next input byte, if there is one.
if(sLength<0) {
@ -272,10 +272,10 @@ ByteTrie::next(const char *s, int32_t sLength) {
inByte=*s++;
--sLength;
}
if(result==UDICTTRIE_HAS_FINAL_VALUE) {
if(result==USTRINGTRIE_FINAL_VALUE) {
// No further matching bytes.
stop();
return UDICTTRIE_NO_MATCH;
return USTRINGTRIE_NO_MATCH;
}
pos=pos_; // branchNext() advanced pos and wrote it to pos_ .
} else if(node<kMinValueLead) {
@ -283,7 +283,7 @@ ByteTrie::next(const char *s, int32_t sLength) {
length=node-kMinLinearMatch; // Actual match length minus 1.
if(inByte!=*pos) {
stop();
return UDICTTRIE_NO_MATCH;
return USTRINGTRIE_NO_MATCH;
}
++pos;
--length;
@ -291,7 +291,7 @@ ByteTrie::next(const char *s, int32_t sLength) {
} else if(node&kValueIsFinal) {
// No further matching bytes.
stop();
return UDICTTRIE_NO_MATCH;
return USTRINGTRIE_NO_MATCH;
} else {
// Skip intermediate value.
pos=skipValue(pos, node);
@ -303,8 +303,8 @@ ByteTrie::next(const char *s, int32_t sLength) {
}
const uint8_t *
ByteTrie::findUniqueValueFromBranch(const uint8_t *pos, int32_t length,
UBool haveUniqueValue, int32_t &uniqueValue) {
BytesTrie::findUniqueValueFromBranch(const uint8_t *pos, int32_t length,
UBool haveUniqueValue, int32_t &uniqueValue) {
while(length>kMaxBranchLinearSubNodeLength) {
++pos; // ignore the comparison byte
if(NULL==findUniqueValueFromBranch(jumpByDelta(pos), length>>1, haveUniqueValue, uniqueValue)) {
@ -340,7 +340,7 @@ ByteTrie::findUniqueValueFromBranch(const uint8_t *pos, int32_t length,
}
UBool
ByteTrie::findUniqueValue(const uint8_t *pos, UBool haveUniqueValue, int32_t &uniqueValue) {
BytesTrie::findUniqueValue(const uint8_t *pos, UBool haveUniqueValue, int32_t &uniqueValue) {
for(;;) {
int32_t node=*pos++;
if(node<kMinLinearMatch) {
@ -375,7 +375,7 @@ ByteTrie::findUniqueValue(const uint8_t *pos, UBool haveUniqueValue, int32_t &un
}
int32_t
ByteTrie::getNextBytes(ByteSink &out) const {
BytesTrie::getNextBytes(ByteSink &out) const {
const uint8_t *pos=pos_;
if(pos==NULL) {
return 0;
@ -408,7 +408,7 @@ ByteTrie::getNextBytes(ByteSink &out) const {
}
void
ByteTrie::getNextBranchBytes(const uint8_t *pos, int32_t length, ByteSink &out) {
BytesTrie::getNextBranchBytes(const uint8_t *pos, int32_t length, ByteSink &out) {
while(length>kMaxBranchLinearSubNodeLength) {
++pos; // ignore the comparison byte
getNextBranchBytes(jumpByDelta(pos), length>>1, out);
@ -423,7 +423,7 @@ ByteTrie::getNextBranchBytes(const uint8_t *pos, int32_t length, ByteSink &out)
}
void
ByteTrie::append(ByteSink &out, int c) {
BytesTrie::append(ByteSink &out, int c) {
char ch=(char)c;
out.Append(&ch, 1);
}

View file

@ -1,9 +1,9 @@
/*
*******************************************************************************
* Copyright (C) 2010, International Business Machines
* Copyright (C) 2010-2011, International Business Machines
* Corporation and others. All Rights Reserved.
*******************************************************************************
* file name: bytetrie.h
* file name: bytestrie.h
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
@ -12,55 +12,54 @@
* created by: Markus W. Scherer
*/
#ifndef __BYTETRIE_H__
#define __BYTETRIE_H__
#ifndef __BYTESTRIE_H__
#define __BYTESTRIE_H__
/**
* \file
* \brief C++ API: Dictionary trie for mapping arbitrary byte sequences
* to integer values.
* \brief C++ API: Trie for mapping byte sequences to integer values.
*/
#include "unicode/utypes.h"
#include "unicode/uobject.h"
#include "uassert.h"
#include "udicttrie.h"
#include "ustringtrie.h"
U_NAMESPACE_BEGIN
class ByteSink;
class ByteTrieBuilder;
class ByteTrieIterator;
class BytesTrieBuilder;
class BytesTrieIterator;
/**
* Light-weight, non-const reader class for a ByteTrie.
* Light-weight, non-const reader class for a BytesTrie.
* Traverses a byte-serialized data structure with minimal state,
* for mapping byte sequences to non-negative integer values.
*/
class U_COMMON_API ByteTrie : public UMemory {
class U_COMMON_API BytesTrie : public UMemory {
public:
ByteTrie(const void *trieBytes)
BytesTrie(const void *trieBytes)
: bytes_(reinterpret_cast<const uint8_t *>(trieBytes)),
pos_(bytes_), remainingMatchLength_(-1) {}
/**
* Resets this trie to its initial state.
*/
ByteTrie &reset() {
BytesTrie &reset() {
pos_=bytes_;
remainingMatchLength_=-1;
return *this;
}
/**
* ByteTrie state object, for saving a trie's current state
* BytesTrie state object, for saving a trie's current state
* and resetting the trie back to this state later.
*/
class State : public UMemory {
public:
State() { bytes=NULL; }
private:
friend class ByteTrie;
friend class BytesTrie;
const uint8_t *bytes;
const uint8_t *pos;
@ -71,7 +70,7 @@ public:
* Saves the state of this trie.
* @see resetToState
*/
const ByteTrie &saveState(State &state) const {
const BytesTrie &saveState(State &state) const {
state.bytes=bytes_;
state.pos=pos_;
state.remainingMatchLength=remainingMatchLength_;
@ -85,7 +84,7 @@ public:
* @see saveState
* @see reset
*/
ByteTrie &resetToState(const State &state) {
BytesTrie &resetToState(const State &state) {
if(bytes_==state.bytes && bytes_!=NULL) {
pos_=state.pos;
remainingMatchLength_=state.remainingMatchLength;
@ -98,14 +97,14 @@ public:
* and whether another input byte can continue a matching byte sequence.
* @return The match/value Result.
*/
UDictTrieResult current() const;
UStringTrieResult current() const;
/**
* Traverses the trie from the initial state for this input byte.
* Equivalent to reset().next(inByte).
* @return The match/value Result.
*/
inline UDictTrieResult first(int32_t inByte) {
inline UStringTrieResult first(int32_t inByte) {
remainingMatchLength_=-1;
return nextImpl(bytes_, inByte);
}
@ -114,7 +113,7 @@ public:
* Traverses the trie from the current state for this input byte.
* @return The match/value Result.
*/
UDictTrieResult next(int32_t inByte);
UStringTrieResult next(int32_t inByte);
/**
* Traverses the trie from the current state for this byte sequence.
@ -122,19 +121,20 @@ public:
* \code
* Result result=current();
* for(each c in s)
* if((result=next(c))==UDICTTRIE_NO_MATCH) return UDICTTRIE_NO_MATCH;
* if(!USTRINGTRIE_HAS_NEXT(result)) return USTRINGTRIE_NO_MATCH;
* result=next(c);
* return result;
* \endcode
* @return The match/value Result.
*/
UDictTrieResult next(const char *s, int32_t length);
UStringTrieResult next(const char *s, int32_t length);
/**
* Returns a matching byte sequence's value if called immediately after
* current()/first()/next() returned UDICTTRIE_HAS_VALUE or UDICTTRIE_HAS_FINAL_VALUE.
* current()/first()/next() returned USTRINGTRIE_INTERMEDIATE_VALUE or USTRINGTRIE_FINAL_VALUE.
* getValue() can be called multiple times.
*
* Do not call getValue() after UDICTTRIE_NO_MATCH or UDICTTRIE_NO_VALUE!
* Do not call getValue() after USTRINGTRIE_NO_MATCH or USTRINGTRIE_NO_VALUE!
*/
inline int32_t getValue() const {
const uint8_t *pos=pos_;
@ -159,7 +159,7 @@ public:
/**
* Finds each byte which continues the byte sequence from the current state.
* That is, each byte b for which it would be next(b)!=UDICTTRIE_NO_MATCH now.
* That is, each byte b for which it would be next(b)!=USTRINGTRIE_NO_MATCH now.
* @param out Each next byte is appended to this object.
* (Only uses the out.Append(s, length) method.)
* @return the number of bytes which continue the byte sequence from here
@ -167,8 +167,8 @@ public:
int32_t getNextBytes(ByteSink &out) const;
private:
friend class ByteTrieBuilder;
friend class ByteTrieIterator;
friend class BytesTrieBuilder;
friend class BytesTrieIterator;
inline void stop() {
pos_=NULL;
@ -212,15 +212,15 @@ private:
return pos;
}
static inline UDictTrieResult valueResult(int32_t node) {
return (UDictTrieResult)(UDICTTRIE_HAS_VALUE-(node&kValueIsFinal));
static inline UStringTrieResult valueResult(int32_t node) {
return (UStringTrieResult)(USTRINGTRIE_INTERMEDIATE_VALUE-(node&kValueIsFinal));
}
// Handles a branch node for both next(byte) and next(string).
UDictTrieResult branchNext(const uint8_t *pos, int32_t length, int32_t inByte);
UStringTrieResult branchNext(const uint8_t *pos, int32_t length, int32_t inByte);
// Requires remainingLength_<0.
UDictTrieResult nextImpl(const uint8_t *pos, int32_t inByte);
UStringTrieResult nextImpl(const uint8_t *pos, int32_t inByte);
// Helper functions for hasUniqueValue().
// Recursively finds a unique value (or whether there is not a unique one)
@ -236,7 +236,7 @@ private:
static void getNextBranchBytes(const uint8_t *pos, int32_t length, ByteSink &out);
static void append(ByteSink &out, int c);
// ByteTrie data structure
// BytesTrie data structure
//
// The trie consists of a series of byte-serialized nodes for incremental
// string/byte sequence matching. The root node is at the beginning of the trie data.
@ -315,7 +315,7 @@ private:
static const int32_t kMaxTwoByteDelta=((kMinThreeByteDeltaLead-kMinTwoByteDeltaLead)<<8)-1; // 0x2fff
static const int32_t kMaxThreeByteDelta=((kFourByteDeltaLead-kMinThreeByteDeltaLead)<<16)-1; // 0xdffff
// Fixed value referencing the ByteTrie bytes.
// Fixed value referencing the BytesTrie bytes.
const uint8_t *bytes_;
// Iterator variables.
@ -328,4 +328,4 @@ private:
U_NAMESPACE_END
#endif // __BYTETRIE_H__
#endif // __BYTESTRIE_H__

View file

@ -400,7 +400,7 @@
<ClCompile Include="servslkf.cpp" />
<ClCompile Include="usprep.cpp" />
<ClCompile Include="bytestream.cpp" />
<ClCompile Include="bytetrie.cpp" />
<ClCompile Include="bytestrie.cpp" />
<ClCompile Include="chariter.cpp" />
<ClCompile Include="charstr.cpp" />
<ClCompile Include="cstring.c" />
@ -557,6 +557,7 @@
<ClInclude Include="uhash.h" />
<ClInclude Include="ulist.h" />
<ClInclude Include="ustrenum.h" />
<ClInclude Include="ustringtrie.h" />
<ClInclude Include="utrie.h" />
<ClInclude Include="utrie2.h" />
<ClInclude Include="utrie2_impl.h" />
@ -1366,7 +1367,7 @@
</Command>
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<ClInclude Include="bytetrie.h" />
<ClInclude Include="bytestrie.h" />
<CustomBuild Include="unicode\chariter.h">
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">copy "%(FullPath)" ..\..\include\unicode
</Command>

View file

@ -1,6 +1,6 @@
/*
**********************************************************************
* Copyright (c) 2002-2010, International Business Machines
* Copyright (c) 2002-2011, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
* Author: Alan Liu
@ -165,7 +165,7 @@ int32_t PropNameData::findPropertyValueNameGroup(int32_t valueMapIndex, int32_t
if(valueMapIndex==0) {
return 0; // The property does not have named values.
}
++valueMapIndex; // Skip the ByteTrie offset.
++valueMapIndex; // Skip the BytesTrie offset.
int32_t numRanges=valueMaps[valueMapIndex++];
if(numRanges<0x10) {
// Ranges of values.
@ -214,11 +214,11 @@ const char *PropNameData::getName(const char *nameGroup, int32_t nameIndex) {
return nameGroup;
}
UBool PropNameData::containsName(ByteTrie &trie, const char *name) {
UBool PropNameData::containsName(BytesTrie &trie, const char *name) {
if(name==NULL) {
return FALSE;
}
UDictTrieResult result=UDICTTRIE_NO_VALUE;
UStringTrieResult result=USTRINGTRIE_NO_VALUE;
char c;
while((c=*name++)!=0) {
c=uprv_invCharToLowercaseAscii(c);
@ -226,12 +226,12 @@ UBool PropNameData::containsName(ByteTrie &trie, const char *name) {
if(c==0x2d || c==0x5f || c==0x20 || (0x09<=c && c<=0x0d)) {
continue;
}
if(!UDICTTRIE_RESULT_HAS_NEXT(result)) {
if(!USTRINGTRIE_HAS_NEXT(result)) {
return FALSE;
}
result=trie.next((uint8_t)c);
}
return UDICTTRIE_RESULT_HAS_VALUE(result);
return USTRINGTRIE_HAS_VALUE(result);
}
const char *PropNameData::getPropertyName(int32_t property, int32_t nameChoice) {
@ -254,8 +254,8 @@ const char *PropNameData::getPropertyValueName(int32_t property, int32_t value,
return getName(nameGroups+nameGroupOffset, nameChoice);
}
int32_t PropNameData::getPropertyOrValueEnum(int32_t byteTrieOffset, const char *alias) {
ByteTrie trie(byteTries+byteTrieOffset);
int32_t PropNameData::getPropertyOrValueEnum(int32_t bytesTrieOffset, const char *alias) {
BytesTrie trie(bytesTries+bytesTrieOffset);
if(containsName(trie, alias)) {
return trie.getValue();
} else {
@ -277,7 +277,7 @@ int32_t PropNameData::getPropertyValueEnum(int32_t property, const char *alias)
return UCHAR_INVALID_CODE; // The property does not have named values.
}
// valueMapIndex is the start of the property's valueMap,
// where the first word is the ByteTrie offset.
// where the first word is the BytesTrie offset.
return getPropertyOrValueEnum(valueMaps[valueMapIndex], alias);
}

View file

@ -1,6 +1,6 @@
/*
**********************************************************************
* Copyright (c) 2002-2010, International Business Machines
* Copyright (c) 2002-2011, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
* Author: Alan Liu
@ -14,7 +14,7 @@
#include "unicode/utypes.h"
#include "unicode/uchar.h"
#include "bytetrie.h"
#include "bytestrie.h"
#include "udataswp.h"
#include "uprops.h"
@ -106,13 +106,13 @@ private:
static int32_t findProperty(int32_t property);
static int32_t findPropertyValueNameGroup(int32_t valueMapIndex, int32_t value);
static const char *getName(const char *nameGroup, int32_t nameIndex);
static UBool containsName(ByteTrie &trie, const char *name);
static UBool containsName(BytesTrie &trie, const char *name);
static int32_t getPropertyOrValueEnum(int32_t byteTrieOffset, const char *alias);
static int32_t getPropertyOrValueEnum(int32_t bytesTrieOffset, const char *alias);
static const int32_t indexes[];
static const int32_t valueMaps[];
static const uint8_t byteTries[];
static const uint8_t bytesTries[];
static const char nameGroups[];
};
@ -164,7 +164,7 @@ private:
* If the valueMapIndex is 0, then the property does not have named values.
*
* For each property's value map:
* int32_t byteTrieOffset; -- Offset into byteTries[] for name->value mapping.
* int32_t bytesTrieOffset; -- Offset into bytesTries[] for name->value mapping.
* int32_t numRanges;
* If numRanges is in the range 1..15, then that many ranges of values follow.
* Per range:
@ -181,12 +181,12 @@ private:
*
* For both properties and property values, ranges are sorted by their start/limit values.
*
* uint8_t byteTries[];
* uint8_t bytesTries[];
*
* This is a sequence of ByteTrie structures, byte-serialized tries for
* This is a sequence of BytesTrie structures, byte-serialized tries for
* mapping from names/aliases to values.
* The first one maps from property names/aliases to UProperty enum constants.
* The following ones are indexed by property value map byteTrieOffsets
* The following ones are indexed by property value map bytesTrieOffsets
* for mapping each property's names/aliases to their property values.
*
* char nameGroups[];

View file

@ -1,10 +1,10 @@
/*
* Copyright (C) 1999-2010, International Business Machines
* Copyright (C) 1999-2011, International Business Machines
* Corporation and others. All Rights Reserved.
*
* file name: propname_data.h
*
* machine-generated on: 2010-12-31
* machine-generated on: 2011-01-05
*/
#ifndef INCLUDED_FROM_PROPNAME_CPP
@ -80,7 +80,7 @@ const int32_t PropNameData::valueMaps[989]={
0x2eb1,0x2f20,0x2ec6,0x2e97,0x2f0a,0x2f72,0x2f4a,0x2f5e,0x2f82,0x2f93,0x2ef2,0x2edc,0x2f35
};
const uint8_t PropNameData::byteTries[10229]={
const uint8_t PropNameData::bytesTries[10229]={
0,0x15,0x6d,0xc3,0x16,0x73,0xc1,0xea,0x76,0x5f,0x76,0x68,0x77,0x90,0x78,1,
0x64,0x50,0x69,0x10,0x64,1,0x63,0x30,0x73,0x62,0x13,0x74,0x61,0x72,0x74,0x63,
0x60,0x16,0x6f,0x6e,0x74,0x69,0x6e,0x75,0x65,0x61,0x13,0x69,0x67,0x69,0x74,0x81,

View file

@ -1,6 +1,6 @@
/*
*******************************************************************************
* Copyright (C) 2010, International Business Machines
* Copyright (C) 2010-2011, International Business Machines
* Corporation and others. All Rights Reserved.
*******************************************************************************
* file name: udicttrie.h
@ -12,8 +12,8 @@
* created by: Markus W. Scherer
*/
#ifndef __UDICTTRIE_H__
#define __UDICTTRIE_H__
#ifndef __USTRINGTRIE_H__
#define __USTRINGTRIE_H__
/**
* \file
@ -23,61 +23,61 @@
#include "unicode/utypes.h"
/**
* Return values for ByteTrie::next(), UCharTrie::next() and similar methods.
* @see UDICTTRIE_RESULT_MATCHES
* @see UDICTTRIE_RESULT_HAS_VALUE
* @see UDICTTRIE_RESULT_HAS_NEXT
* Return values for BytesTrie::next(), UCharsTrie::next() and similar methods.
* @see USTRINGTRIE_MATCHES
* @see USTRINGTRIE_HAS_VALUE
* @see USTRINGTRIE_HAS_NEXT
*/
enum UDictTrieResult {
enum UStringTrieResult {
/**
* The input unit(s) did not continue a matching string.
*/
UDICTTRIE_NO_MATCH,
USTRINGTRIE_NO_MATCH,
/**
* The input unit(s) continued a matching string
* but there is no value for the string so far.
* (It is a prefix of a longer string.)
*/
UDICTTRIE_NO_VALUE,
USTRINGTRIE_NO_VALUE,
/**
* The input unit(s) continued a matching string
* and there is a value for the string so far.
* This value will be returned by getValue().
* No further input byte/unit can continue a matching string.
*/
UDICTTRIE_HAS_FINAL_VALUE,
USTRINGTRIE_FINAL_VALUE,
/**
* The input unit(s) continued a matching string
* and there is a value for the string so far.
* This value will be returned by getValue().
* Another input byte/unit can continue a matching string.
*/
UDICTTRIE_HAS_VALUE
USTRINGTRIE_INTERMEDIATE_VALUE
};
/**
* Same as (result!=UDICTTRIE_NO_MATCH).
* @param result A result from ByteTrie::first(), UCharTrie::next() etc.
* Same as (result!=USTRINGTRIE_NO_MATCH).
* @param result A result from BytesTrie::first(), UCharsTrie::next() etc.
* @return true if the input bytes/units so far are part of a matching string/byte sequence.
*/
#define UDICTTRIE_RESULT_MATCHES(result) ((result)!=UDICTTRIE_NO_MATCH)
#define USTRINGTRIE_MATCHES(result) ((result)!=USTRINGTRIE_NO_MATCH)
/**
* Equivalent to (result==UDICTTRIE_HAS_VALUE || result==UDICTTRIE_HAS_FINAL_VALUE) but
* Equivalent to (result==USTRINGTRIE_INTERMEDIATE_VALUE || result==USTRINGTRIE_FINAL_VALUE) but
* this macro evaluates result exactly once.
* @param result A result from ByteTrie::first(), UCharTrie::next() etc.
* @param result A result from BytesTrie::first(), UCharsTrie::next() etc.
* @return true if there is a value for the input bytes/units so far.
* @see ByteTrie::getValue
* @see UCharTrie::getValue
* @see BytesTrie::getValue
* @see UCharsTrie::getValue
*/
#define UDICTTRIE_RESULT_HAS_VALUE(result) ((result)>=UDICTTRIE_HAS_FINAL_VALUE)
#define USTRINGTRIE_HAS_VALUE(result) ((result)>=USTRINGTRIE_FINAL_VALUE)
/**
* Equivalent to (result==UDICTTRIE_NO_VALUE || result==UDICTTRIE_HAS_VALUE) but
* Equivalent to (result==USTRINGTRIE_NO_VALUE || result==USTRINGTRIE_INTERMEDIATE_VALUE) but
* this macro evaluates result exactly once.
* @param result A result from ByteTrie::first(), UCharTrie::next() etc.
* @param result A result from BytesTrie::first(), UCharsTrie::next() etc.
* @return true if another input byte/unit can continue a matching string.
*/
#define UDICTTRIE_RESULT_HAS_NEXT(result) ((result)&1)
#define USTRINGTRIE_HAS_NEXT(result) ((result)&1)
#endif /* __UDICTTRIE_H__ */
#endif /* __USTRINGTRIE_H__ */

View file

@ -1,6 +1,6 @@
#******************************************************************************
#
# Copyright (C) 1999-2010, International Business Machines
# Copyright (C) 1999-2011, International Business Machines
# Corporation and others. All Rights Reserved.
#
#******************************************************************************
@ -50,7 +50,7 @@ sdtfmtts.o svccoll.o tchcfmt.o selfmts.o \
tfsmalls.o tmsgfmt.o trcoll.o tscoll.o tsdate.o tsdcfmsy.o tsdtfmsy.o \
tsmthred.o tsnmfmt.o tsputil.o tstnrapi.o tstnorm.o tzbdtest.o \
tzregts.o tztest.o ucdtest.o usettest.o ustrtest.o strcase.o transtst.o strtest.o thcoll.o \
bytetrietest.o uchartrietest.o \
bytestrietest.o ucharstrietest.o \
itrbbi.o rbbiapts.o rbbitst.o ittrans.o transapi.o cpdtrtst.o \
testutil.o transrt.o trnserr.o normconf.o sfwdchit.o \
jamotest.o srchtest.o reptest.o regextst.o \

View file

@ -1,6 +1,6 @@
/*
*******************************************************************************
* Copyright (C) 2010, International Business Machines
* Copyright (C) 2010-2011, International Business Machines
* Corporation and others. All Rights Reserved.
*******************************************************************************
* file name: bytetrietest.cpp
@ -16,9 +16,9 @@
#include "unicode/utypes.h"
#include "unicode/stringpiece.h"
#include "bytetrie.h"
#include "bytetriebuilder.h"
#include "bytetrieiterator.h"
#include "bytestrie.h"
#include "bytestriebuilder.h"
#include "bytestrieiterator.h"
#include "intltest.h"
#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
@ -28,10 +28,10 @@ struct StringAndValue {
int32_t value;
};
class ByteTrieTest : public IntlTest {
class BytesTrieTest : public IntlTest {
public:
ByteTrieTest() {}
virtual ~ByteTrieTest();
BytesTrieTest() {}
virtual ~BytesTrieTest();
void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par=NULL);
void TestBuilder();
@ -45,7 +45,7 @@ public:
void TestValuesForState();
void TestCompact();
StringPiece buildMonthsTrie(ByteTrieBuilder &builder, UDictTrieBuildOption buildOption);
StringPiece buildMonthsTrie(BytesTrieBuilder &builder, UStringTrieBuildOption buildOption);
void TestHasUniqueValue();
void TestGetNextBytes();
void TestIteratorFromBranch();
@ -55,27 +55,27 @@ public:
void TestTruncatingIteratorFromLinearMatchLong();
void checkData(const StringAndValue data[], int32_t dataLength);
void checkData(const StringAndValue data[], int32_t dataLength, UDictTrieBuildOption buildOption);
void checkData(const StringAndValue data[], int32_t dataLength, UStringTrieBuildOption buildOption);
StringPiece buildTrie(const StringAndValue data[], int32_t dataLength,
ByteTrieBuilder &builder, UDictTrieBuildOption buildOption);
BytesTrieBuilder &builder, UStringTrieBuildOption buildOption);
void checkFirst(const StringPiece &trieBytes, const StringAndValue data[], int32_t dataLength);
void checkNext(const StringPiece &trieBytes, const StringAndValue data[], int32_t dataLength);
void checkNextWithState(const StringPiece &trieBytes, const StringAndValue data[], int32_t dataLength);
void checkNextString(const StringPiece &trieBytes, const StringAndValue data[], int32_t dataLength);
void checkIterator(const StringPiece &trieBytes, const StringAndValue data[], int32_t dataLength);
void checkIterator(ByteTrieIterator &iter, const StringAndValue data[], int32_t dataLength);
void checkIterator(BytesTrieIterator &iter, const StringAndValue data[], int32_t dataLength);
};
extern IntlTest *createByteTrieTest() {
return new ByteTrieTest();
extern IntlTest *createBytesTrieTest() {
return new BytesTrieTest();
}
ByteTrieTest::~ByteTrieTest() {
BytesTrieTest::~BytesTrieTest() {
}
void ByteTrieTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) {
void BytesTrieTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) {
if(exec) {
logln("TestSuite ByteTrieTest: ");
logln("TestSuite BytesTrieTest: ");
}
TESTCASE_AUTO_BEGIN;
TESTCASE_AUTO(TestBuilder);
@ -98,36 +98,36 @@ void ByteTrieTest::runIndexedTest(int32_t index, UBool exec, const char *&name,
TESTCASE_AUTO_END;
}
void ByteTrieTest::TestBuilder() {
void BytesTrieTest::TestBuilder() {
IcuTestErrorCode errorCode(*this, "TestBuilder()");
ByteTrieBuilder builder;
builder.build(UDICTTRIE_BUILD_FAST, errorCode);
BytesTrieBuilder builder;
builder.build(USTRINGTRIE_BUILD_FAST, errorCode);
if(errorCode.reset()!=U_INDEX_OUTOFBOUNDS_ERROR) {
errln("ByteTrieBuilder().build() did not set U_INDEX_OUTOFBOUNDS_ERROR");
errln("BytesTrieBuilder().build() did not set U_INDEX_OUTOFBOUNDS_ERROR");
return;
}
builder.add("=", 0, errorCode).add("=", 1, errorCode).build(UDICTTRIE_BUILD_FAST, errorCode);
builder.add("=", 0, errorCode).add("=", 1, errorCode).build(USTRINGTRIE_BUILD_FAST, errorCode);
if(errorCode.reset()!=U_ILLEGAL_ARGUMENT_ERROR) {
errln("ByteTrieBuilder.build() did not detect duplicates");
errln("BytesTrieBuilder.build() did not detect duplicates");
return;
}
}
void ByteTrieTest::TestEmpty() {
void BytesTrieTest::TestEmpty() {
static const StringAndValue data[]={
{ "", 0 }
};
checkData(data, LENGTHOF(data));
}
void ByteTrieTest::Test_a() {
void BytesTrieTest::Test_a() {
static const StringAndValue data[]={
{ "a", 1 }
};
checkData(data, LENGTHOF(data));
}
void ByteTrieTest::Test_a_ab() {
void BytesTrieTest::Test_a_ab() {
static const StringAndValue data[]={
{ "a", 1 },
{ "ab", 100 }
@ -135,7 +135,7 @@ void ByteTrieTest::Test_a_ab() {
checkData(data, LENGTHOF(data));
}
void ByteTrieTest::TestShortestBranch() {
void BytesTrieTest::TestShortestBranch() {
static const StringAndValue data[]={
{ "a", 1000 },
{ "b", 2000 }
@ -143,7 +143,7 @@ void ByteTrieTest::TestShortestBranch() {
checkData(data, LENGTHOF(data));
}
void ByteTrieTest::TestBranches() {
void BytesTrieTest::TestBranches() {
static const StringAndValue data[]={
{ "a", 0x10 },
{ "cc", 0x40 },
@ -166,7 +166,7 @@ void ByteTrieTest::TestBranches() {
}
}
void ByteTrieTest::TestLongSequence() {
void BytesTrieTest::TestLongSequence() {
static const StringAndValue data[]={
{ "a", -1 },
// sequence of linear-match nodes
@ -182,7 +182,7 @@ void ByteTrieTest::TestLongSequence() {
checkData(data, LENGTHOF(data));
}
void ByteTrieTest::TestLongBranch() {
void BytesTrieTest::TestLongBranch() {
// Split-branch and interesting compact-integer values.
static const StringAndValue data[]={
{ "a", -2 },
@ -210,7 +210,7 @@ void ByteTrieTest::TestLongBranch() {
checkData(data, LENGTHOF(data));
}
void ByteTrieTest::TestValuesForState() {
void BytesTrieTest::TestValuesForState() {
// Check that saveState() and resetToState() interact properly
// with next() and current().
static const StringAndValue data[]={
@ -224,7 +224,7 @@ void ByteTrieTest::TestValuesForState() {
checkData(data, LENGTHOF(data));
}
void ByteTrieTest::TestCompact() {
void BytesTrieTest::TestCompact() {
// Duplicate trailing strings and values provide opportunities for compacting.
static const StringAndValue data[]={
{ "+", 0 },
@ -251,7 +251,7 @@ void ByteTrieTest::TestCompact() {
checkData(data, LENGTHOF(data));
}
StringPiece ByteTrieTest::buildMonthsTrie(ByteTrieBuilder &builder, UDictTrieBuildOption buildOption) {
StringPiece BytesTrieTest::buildMonthsTrie(BytesTrieBuilder &builder, UStringTrieBuildOption buildOption) {
// All types of nodes leading to the same value,
// for code coverage of recursive functions.
// In particular, we need a lot of branches on some single level
@ -291,13 +291,13 @@ StringPiece ByteTrieTest::buildMonthsTrie(ByteTrieBuilder &builder, UDictTrieBui
return buildTrie(data, LENGTHOF(data), builder, buildOption);
}
void ByteTrieTest::TestHasUniqueValue() {
ByteTrieBuilder builder;
StringPiece sp=buildMonthsTrie(builder, UDICTTRIE_BUILD_FAST);
void BytesTrieTest::TestHasUniqueValue() {
BytesTrieBuilder builder;
StringPiece sp=buildMonthsTrie(builder, USTRINGTRIE_BUILD_FAST);
if(sp.empty()) {
return; // buildTrie() reported an error
}
ByteTrie trie(sp.data());
BytesTrie trie(sp.data());
int32_t uniqueValue;
if(trie.hasUniqueValue(uniqueValue)) {
errln("unique value at root");
@ -314,7 +314,7 @@ void ByteTrieTest::TestHasUniqueValue() {
if(trie.hasUniqueValue(uniqueValue)) {
errln("unique value after \"ju\"");
}
if(trie.next('n')!=UDICTTRIE_HAS_VALUE || 6!=trie.getValue()) {
if(trie.next('n')!=USTRINGTRIE_INTERMEDIATE_VALUE || 6!=trie.getValue()) {
errln("not normal value 6 after \"jun\"");
}
// hasUniqueValue() after getValue()
@ -329,13 +329,13 @@ void ByteTrieTest::TestHasUniqueValue() {
}
}
void ByteTrieTest::TestGetNextBytes() {
ByteTrieBuilder builder;
StringPiece sp=buildMonthsTrie(builder, UDICTTRIE_BUILD_SMALL);
void BytesTrieTest::TestGetNextBytes() {
BytesTrieBuilder builder;
StringPiece sp=buildMonthsTrie(builder, USTRINGTRIE_BUILD_SMALL);
if(sp.empty()) {
return; // buildTrie() reported an error
}
ByteTrie trie(sp.data());
BytesTrie trie(sp.data());
char buffer[40];
CheckedArrayByteSink sink(buffer, LENGTHOF(buffer));
int32_t count=trie.getNextBytes(sink);
@ -352,7 +352,7 @@ void ByteTrieTest::TestGetNextBytes() {
errln("months getNextBytes()!=[.abcdefghijklmnopqru] after \"jan\"");
}
// getNextBytes() after getValue()
trie.getValue(); // next() had returned UDICTTRIE_HAS_VALUE.
trie.getValue(); // next() had returned USTRINGTRIE_INTERMEDIATE_VALUE.
memset(buffer, 0, sizeof(buffer));
count=trie.getNextBytes(sink.Reset());
if(count!=20 || sink.NumberOfBytesAppended()!=20 || 0!=strcmp(buffer, ".abcdefghijklmnopqru")) {
@ -380,20 +380,20 @@ void ByteTrieTest::TestGetNextBytes() {
}
}
void ByteTrieTest::TestIteratorFromBranch() {
ByteTrieBuilder builder;
StringPiece sp=buildMonthsTrie(builder, UDICTTRIE_BUILD_FAST);
void BytesTrieTest::TestIteratorFromBranch() {
BytesTrieBuilder builder;
StringPiece sp=buildMonthsTrie(builder, USTRINGTRIE_BUILD_FAST);
if(sp.empty()) {
return; // buildTrie() reported an error
}
ByteTrie trie(sp.data());
BytesTrie trie(sp.data());
// Go to a branch node.
trie.next('j');
trie.next('a');
trie.next('n');
IcuTestErrorCode errorCode(*this, "TestIteratorFromBranch()");
ByteTrieIterator iter(trie, 0, errorCode);
if(errorCode.logIfFailureAndReset("ByteTrieIterator(trie) constructor")) {
BytesTrieIterator iter(trie, 0, errorCode);
if(errorCode.logIfFailureAndReset("BytesTrieIterator(trie) constructor")) {
return;
}
// Expected data: Same as in buildMonthsTrie(), except only the suffixes
@ -431,13 +431,13 @@ void ByteTrieTest::TestIteratorFromBranch() {
checkIterator(iter.reset(), data, LENGTHOF(data));
}
void ByteTrieTest::TestIteratorFromLinearMatch() {
ByteTrieBuilder builder;
StringPiece sp=buildMonthsTrie(builder, UDICTTRIE_BUILD_SMALL);
void BytesTrieTest::TestIteratorFromLinearMatch() {
BytesTrieBuilder builder;
StringPiece sp=buildMonthsTrie(builder, USTRINGTRIE_BUILD_SMALL);
if(sp.empty()) {
return; // buildTrie() reported an error
}
ByteTrie trie(sp.data());
BytesTrie trie(sp.data());
// Go into a linear-match node.
trie.next('j');
trie.next('a');
@ -445,8 +445,8 @@ void ByteTrieTest::TestIteratorFromLinearMatch() {
trie.next('u');
trie.next('a');
IcuTestErrorCode errorCode(*this, "TestIteratorFromLinearMatch()");
ByteTrieIterator iter(trie, 0, errorCode);
if(errorCode.logIfFailureAndReset("ByteTrieIterator(trie) constructor")) {
BytesTrieIterator iter(trie, 0, errorCode);
if(errorCode.logIfFailureAndReset("BytesTrieIterator(trie) constructor")) {
return;
}
// Expected data: Same as in buildMonthsTrie(), except only the suffixes
@ -461,15 +461,15 @@ void ByteTrieTest::TestIteratorFromLinearMatch() {
checkIterator(iter.reset(), data, LENGTHOF(data));
}
void ByteTrieTest::TestTruncatingIteratorFromRoot() {
ByteTrieBuilder builder;
StringPiece sp=buildMonthsTrie(builder, UDICTTRIE_BUILD_FAST);
void BytesTrieTest::TestTruncatingIteratorFromRoot() {
BytesTrieBuilder builder;
StringPiece sp=buildMonthsTrie(builder, USTRINGTRIE_BUILD_FAST);
if(sp.empty()) {
return; // buildTrie() reported an error
}
IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromRoot()");
ByteTrieIterator iter(sp.data(), 4, errorCode);
if(errorCode.logIfFailureAndReset("ByteTrieIterator(trie) constructor")) {
BytesTrieIterator iter(sp.data(), 4, errorCode);
if(errorCode.logIfFailureAndReset("BytesTrieIterator(trie) constructor")) {
return;
}
// Expected data: Same as in buildMonthsTrie(), except only the first 4 characters
@ -508,25 +508,25 @@ void ByteTrieTest::TestTruncatingIteratorFromRoot() {
checkIterator(iter.reset(), data, LENGTHOF(data));
}
void ByteTrieTest::TestTruncatingIteratorFromLinearMatchShort() {
void BytesTrieTest::TestTruncatingIteratorFromLinearMatchShort() {
static const StringAndValue data[]={
{ "abcdef", 10 },
{ "abcdepq", 200 },
{ "abcdeyz", 3000 }
};
ByteTrieBuilder builder;
StringPiece sp=buildTrie(data, LENGTHOF(data), builder, UDICTTRIE_BUILD_FAST);
BytesTrieBuilder builder;
StringPiece sp=buildTrie(data, LENGTHOF(data), builder, USTRINGTRIE_BUILD_FAST);
if(sp.empty()) {
return; // buildTrie() reported an error
}
ByteTrie trie(sp.data());
BytesTrie trie(sp.data());
// Go into a linear-match node.
trie.next('a');
trie.next('b');
IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromLinearMatchShort()");
// Truncate within the linear-match node.
ByteTrieIterator iter(trie, 2, errorCode);
if(errorCode.logIfFailureAndReset("ByteTrieIterator(trie) constructor")) {
BytesTrieIterator iter(trie, 2, errorCode);
if(errorCode.logIfFailureAndReset("BytesTrieIterator(trie) constructor")) {
return;
}
static const StringAndValue expected[]={
@ -538,26 +538,26 @@ void ByteTrieTest::TestTruncatingIteratorFromLinearMatchShort() {
checkIterator(iter.reset(), expected, LENGTHOF(expected));
}
void ByteTrieTest::TestTruncatingIteratorFromLinearMatchLong() {
void BytesTrieTest::TestTruncatingIteratorFromLinearMatchLong() {
static const StringAndValue data[]={
{ "abcdef", 10 },
{ "abcdepq", 200 },
{ "abcdeyz", 3000 }
};
ByteTrieBuilder builder;
StringPiece sp=buildTrie(data, LENGTHOF(data), builder, UDICTTRIE_BUILD_FAST);
BytesTrieBuilder builder;
StringPiece sp=buildTrie(data, LENGTHOF(data), builder, USTRINGTRIE_BUILD_FAST);
if(sp.empty()) {
return; // buildTrie() reported an error
}
ByteTrie trie(sp.data());
BytesTrie trie(sp.data());
// Go into a linear-match node.
trie.next('a');
trie.next('b');
trie.next('c');
IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromLinearMatchLong()");
// Truncate after the linear-match node.
ByteTrieIterator iter(trie, 3, errorCode);
if(errorCode.logIfFailureAndReset("ByteTrieIterator(trie) constructor")) {
BytesTrieIterator iter(trie, 3, errorCode);
if(errorCode.logIfFailureAndReset("BytesTrieIterator(trie) constructor")) {
return;
}
static const StringAndValue expected[]={
@ -571,15 +571,15 @@ void ByteTrieTest::TestTruncatingIteratorFromLinearMatchLong() {
checkIterator(iter.reset(), expected, LENGTHOF(expected));
}
void ByteTrieTest::checkData(const StringAndValue data[], int32_t dataLength) {
void BytesTrieTest::checkData(const StringAndValue data[], int32_t dataLength) {
logln("checkData(dataLength=%d, fast)", (int)dataLength);
checkData(data, dataLength, UDICTTRIE_BUILD_FAST);
checkData(data, dataLength, USTRINGTRIE_BUILD_FAST);
logln("checkData(dataLength=%d, small)", (int)dataLength);
checkData(data, dataLength, UDICTTRIE_BUILD_SMALL);
checkData(data, dataLength, USTRINGTRIE_BUILD_SMALL);
}
void ByteTrieTest::checkData(const StringAndValue data[], int32_t dataLength, UDictTrieBuildOption buildOption) {
ByteTrieBuilder builder;
void BytesTrieTest::checkData(const StringAndValue data[], int32_t dataLength, UStringTrieBuildOption buildOption) {
BytesTrieBuilder builder;
StringPiece sp=buildTrie(data, dataLength, builder, buildOption);
if(sp.empty()) {
return; // buildTrie() reported an error
@ -591,8 +591,8 @@ void ByteTrieTest::checkData(const StringAndValue data[], int32_t dataLength, UD
checkIterator(sp, data, dataLength);
}
StringPiece ByteTrieTest::buildTrie(const StringAndValue data[], int32_t dataLength,
ByteTrieBuilder &builder, UDictTrieBuildOption buildOption) {
StringPiece BytesTrieTest::buildTrie(const StringAndValue data[], int32_t dataLength,
BytesTrieBuilder &builder, UStringTrieBuildOption buildOption) {
IcuTestErrorCode errorCode(*this, "buildTrie()");
// Add the items to the trie builder in an interesting (not trivial, not random) order.
int32_t index, step;
@ -624,20 +624,20 @@ StringPiece ByteTrieTest::buildTrie(const StringAndValue data[], int32_t dataLen
return sp;
}
void ByteTrieTest::checkFirst(const StringPiece &trieBytes,
const StringAndValue data[], int32_t dataLength) {
ByteTrie trie(trieBytes.data());
void BytesTrieTest::checkFirst(const StringPiece &trieBytes,
const StringAndValue data[], int32_t dataLength) {
BytesTrie trie(trieBytes.data());
for(int32_t i=0; i<dataLength; ++i) {
int c=(uint8_t)*data[i].s;
if(c==0) {
continue; // skip empty string
}
UDictTrieResult firstResult=trie.first(c);
int32_t firstValue=UDICTTRIE_RESULT_HAS_VALUE(firstResult) ? trie.getValue() : -1;
UDictTrieResult nextResult=trie.next((uint8_t)data[i].s[1]);
UStringTrieResult firstResult=trie.first(c);
int32_t firstValue=USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1;
UStringTrieResult nextResult=trie.next((uint8_t)data[i].s[1]);
if(firstResult!=trie.reset().next(c) ||
firstResult!=trie.current() ||
firstValue!=(UDICTTRIE_RESULT_HAS_VALUE(firstResult) ? trie.getValue() : -1) ||
firstValue!=(USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1) ||
nextResult!=trie.next((uint8_t)data[i].s[1])
) {
errln("trie.first(%c)!=trie.reset().next(same) for %s",
@ -646,14 +646,14 @@ void ByteTrieTest::checkFirst(const StringPiece &trieBytes,
}
}
void ByteTrieTest::checkNext(const StringPiece &trieBytes,
const StringAndValue data[], int32_t dataLength) {
ByteTrie trie(trieBytes.data());
ByteTrie::State state;
void BytesTrieTest::checkNext(const StringPiece &trieBytes,
const StringAndValue data[], int32_t dataLength) {
BytesTrie trie(trieBytes.data());
BytesTrie::State state;
for(int32_t i=0; i<dataLength; ++i) {
int32_t stringLength= (i&1) ? -1 : strlen(data[i].s);
UDictTrieResult result;
if( !UDICTTRIE_RESULT_HAS_VALUE(result=trie.next(data[i].s, stringLength)) ||
UStringTrieResult result;
if( !USTRINGTRIE_HAS_VALUE(result=trie.next(data[i].s, stringLength)) ||
result!=trie.current()
) {
errln("trie does not seem to contain %s", data[i].s);
@ -669,20 +669,20 @@ void ByteTrieTest::checkNext(const StringPiece &trieBytes,
stringLength=strlen(data[i].s);
result=trie.current();
for(int32_t j=0; j<stringLength; ++j) {
if(!UDICTTRIE_RESULT_HAS_NEXT(result)) {
if(!USTRINGTRIE_HAS_NEXT(result)) {
errln("trie.current()!=hasNext before end of %s (at index %d)", data[i].s, j);
break;
}
if(result==UDICTTRIE_HAS_VALUE) {
if(result==USTRINGTRIE_INTERMEDIATE_VALUE) {
trie.getValue();
if(trie.current()!=UDICTTRIE_HAS_VALUE) {
errln("trie.getValue().current()!=UDICTTRIE_HAS_VALUE before end of %s (at index %d)", data[i].s, j);
if(trie.current()!=USTRINGTRIE_INTERMEDIATE_VALUE) {
errln("trie.getValue().current()!=USTRINGTRIE_INTERMEDIATE_VALUE before end of %s (at index %d)", data[i].s, j);
break;
}
}
result=trie.next(data[i].s[j]);
if(!UDICTTRIE_RESULT_MATCHES(result)) {
errln("trie.next()=UDICTTRIE_NO_MATCH before end of %s (at index %d)", data[i].s, j);
if(!USTRINGTRIE_MATCHES(result)) {
errln("trie.next()=USTRINGTRIE_NO_MATCH before end of %s (at index %d)", data[i].s, j);
break;
}
if(result!=trie.current()) {
@ -690,7 +690,7 @@ void ByteTrieTest::checkNext(const StringPiece &trieBytes,
break;
}
}
if(!UDICTTRIE_RESULT_HAS_VALUE(result)) {
if(!USTRINGTRIE_HAS_VALUE(result)) {
errln("trie.next()!=hasValue at the end of %s", data[i].s);
continue;
}
@ -708,18 +708,18 @@ void ByteTrieTest::checkNext(const StringPiece &trieBytes,
break;
}
}
if((result==UDICTTRIE_HAS_VALUE)!=nextContinues) {
errln("(trie.current()==UDICTTRIE_HAS_VALUE) contradicts "
"(trie.next(some UChar)!=UDICTTRIE_NO_MATCH) after end of %s", data[i].s);
if((result==USTRINGTRIE_INTERMEDIATE_VALUE)!=nextContinues) {
errln("(trie.current()==USTRINGTRIE_INTERMEDIATE_VALUE) contradicts "
"(trie.next(some UChar)!=USTRINGTRIE_NO_MATCH) after end of %s", data[i].s);
}
trie.reset();
}
}
void ByteTrieTest::checkNextWithState(const StringPiece &trieBytes,
const StringAndValue data[], int32_t dataLength) {
ByteTrie trie(trieBytes.data());
ByteTrie::State noState, state;
void BytesTrieTest::checkNextWithState(const StringPiece &trieBytes,
const StringAndValue data[], int32_t dataLength) {
BytesTrie trie(trieBytes.data());
BytesTrie::State noState, state;
for(int32_t i=0; i<dataLength; ++i) {
if((i&1)==0) {
// This should have no effect.
@ -729,36 +729,36 @@ void ByteTrieTest::checkNextWithState(const StringPiece &trieBytes,
int32_t stringLength=strlen(expectedString);
int32_t partialLength=stringLength/3;
for(int32_t j=0; j<partialLength; ++j) {
if(!UDICTTRIE_RESULT_MATCHES(trie.next(expectedString[j]))) {
errln("trie.next()=UDICTTRIE_NO_MATCH for a prefix of %s", data[i].s);
if(!USTRINGTRIE_MATCHES(trie.next(expectedString[j]))) {
errln("trie.next()=USTRINGTRIE_NO_MATCH for a prefix of %s", data[i].s);
return;
}
}
trie.saveState(state);
UDictTrieResult resultAtState=trie.current();
UDictTrieResult result;
UStringTrieResult resultAtState=trie.current();
UStringTrieResult result;
int32_t valueAtState=-99;
if(UDICTTRIE_RESULT_HAS_VALUE(resultAtState)) {
if(USTRINGTRIE_HAS_VALUE(resultAtState)) {
valueAtState=trie.getValue();
}
result=trie.next(0); // mismatch
if(result!=UDICTTRIE_NO_MATCH || result!=trie.current()) {
if(result!=USTRINGTRIE_NO_MATCH || result!=trie.current()) {
errln("trie.next(0) matched after part of %s", data[i].s);
}
if( resultAtState!=trie.resetToState(state).current() ||
(UDICTTRIE_RESULT_HAS_VALUE(resultAtState) && valueAtState!=trie.getValue())
(USTRINGTRIE_HAS_VALUE(resultAtState) && valueAtState!=trie.getValue())
) {
errln("trie.next(part of %s) changes current()/getValue() after "
"saveState/next(0)/resetToState",
data[i].s);
} else if(!UDICTTRIE_RESULT_HAS_VALUE(
} else if(!USTRINGTRIE_HAS_VALUE(
result=trie.next(expectedString+partialLength,
stringLength-partialLength)) ||
result!=trie.current()) {
errln("trie.next(rest of %s) does not seem to contain %s after "
"saveState/next(0)/resetToState",
data[i].s);
} else if(!UDICTTRIE_RESULT_HAS_VALUE(
} else if(!USTRINGTRIE_HAS_VALUE(
result=trie.resetToState(state).
next(expectedString+partialLength,
stringLength-partialLength)) ||
@ -777,36 +777,36 @@ void ByteTrieTest::checkNextWithState(const StringPiece &trieBytes,
// next(string) is also tested in other functions,
// but here we try to go partway through the string, and then beyond it.
void ByteTrieTest::checkNextString(const StringPiece &trieBytes,
const StringAndValue data[], int32_t dataLength) {
ByteTrie trie(trieBytes.data());
void BytesTrieTest::checkNextString(const StringPiece &trieBytes,
const StringAndValue data[], int32_t dataLength) {
BytesTrie trie(trieBytes.data());
for(int32_t i=0; i<dataLength; ++i) {
const char *expectedString=data[i].s;
int32_t stringLength=strlen(expectedString);
if(!trie.next(expectedString, stringLength/2)) {
errln("trie.next(up to middle of string)=UDICTTRIE_NO_MATCH for %s", data[i].s);
errln("trie.next(up to middle of string)=USTRINGTRIE_NO_MATCH for %s", data[i].s);
continue;
}
// Test that we stop properly at the end of the string.
if(trie.next(expectedString+stringLength/2, stringLength+1-stringLength/2)) {
errln("trie.next(string+NUL)!=UDICTTRIE_NO_MATCH for %s", data[i].s);
errln("trie.next(string+NUL)!=USTRINGTRIE_NO_MATCH for %s", data[i].s);
}
trie.reset();
}
}
void ByteTrieTest::checkIterator(const StringPiece &trieBytes,
const StringAndValue data[], int32_t dataLength) {
void BytesTrieTest::checkIterator(const StringPiece &trieBytes,
const StringAndValue data[], int32_t dataLength) {
IcuTestErrorCode errorCode(*this, "checkIterator()");
ByteTrieIterator iter(trieBytes.data(), 0, errorCode);
if(errorCode.logIfFailureAndReset("ByteTrieIterator(trieBytes) constructor")) {
BytesTrieIterator iter(trieBytes.data(), 0, errorCode);
if(errorCode.logIfFailureAndReset("BytesTrieIterator(trieBytes) constructor")) {
return;
}
checkIterator(iter, data, dataLength);
}
void ByteTrieTest::checkIterator(ByteTrieIterator &iter,
const StringAndValue data[], int32_t dataLength) {
void BytesTrieTest::checkIterator(BytesTrieIterator &iter,
const StringAndValue data[], int32_t dataLength) {
IcuTestErrorCode errorCode(*this, "checkIterator()");
for(int32_t i=0; i<dataLength; ++i) {
if(!iter.hasNext()) {

View file

@ -223,8 +223,8 @@
</Link>
</ItemDefinitionGroup>
<ItemGroup>
<ClCompile Include="bytetrietest.cpp" />
<ClCompile Include="uchartrietest.cpp" />
<ClCompile Include="bytestrietest.cpp" />
<ClCompile Include="ucharstrietest.cpp" />
<ClCompile Include="itrbbi.cpp" />
<ClCompile Include="rbbiapts.cpp" />
<ClCompile Include="rbbitst.cpp" />

View file

@ -1,6 +1,6 @@
/********************************************************************
* COPYRIGHT:
* Copyright (c) 1997-2010, International Business Machines Corporation and
* Copyright (c) 1997-2011, International Business Machines Corporation and
* others. All Rights Reserved.
********************************************************************/
@ -29,9 +29,9 @@
#include "aliastst.h"
#include "usettest.h"
extern IntlTest *createByteTrieTest();
extern IntlTest *createBytesTrieTest();
static IntlTest *createLocalPointerTest();
extern IntlTest *createUCharTrieTest();
extern IntlTest *createUCharsTrieTest();
#define CASE(id, test) case id: \
name = #test; \
@ -73,16 +73,16 @@ void IntlTestUtilities::runIndexedTest( int32_t index, UBool exec, const char* &
case 17:
name = "ByteTrieTest";
if (exec) {
logln("TestSuite ByteTrieTest---"); logln();
LocalPointer<IntlTest> test(createByteTrieTest());
logln("TestSuite BytesTrieTest---"); logln();
LocalPointer<IntlTest> test(createBytesTrieTest());
callTest(*test, par);
}
break;
case 18:
name = "UCharTrieTest";
if (exec) {
logln("TestSuite UCharTrieTest---"); logln();
LocalPointer<IntlTest> test(createUCharTrieTest());
logln("TestSuite UCharsTrieTest---"); logln();
LocalPointer<IntlTest> test(createUCharsTrieTest());
callTest(*test, par);
}
break;

View file

@ -1,9 +1,9 @@
/*
*******************************************************************************
* Copyright (C) 2010, International Business Machines
* Copyright (C) 2010-2011, International Business Machines
* Corporation and others. All Rights Reserved.
*******************************************************************************
* file name: uchartrietest.cpp
* file name: ucharstrietest.cpp
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
@ -16,9 +16,9 @@
#include "unicode/utypes.h"
#include "unicode/uniset.h"
#include "uchartrie.h"
#include "uchartriebuilder.h"
#include "uchartrieiterator.h"
#include "ucharstrie.h"
#include "ucharstriebuilder.h"
#include "ucharstrieiterator.h"
#include "intltest.h"
#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
@ -28,10 +28,10 @@ struct StringAndValue {
int32_t value;
};
class UCharTrieTest : public IntlTest {
class UCharsTrieTest : public IntlTest {
public:
UCharTrieTest() {}
virtual ~UCharTrieTest();
UCharsTrieTest() {}
virtual ~UCharsTrieTest();
void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par=NULL);
void TestBuilder();
@ -47,10 +47,10 @@ public:
void TestNextForCodePoint();
void TestFirstForCodePoint();
UBool buildLargeTrie(UCharTrieBuilder &builder, UnicodeString &result, int32_t numUniqueFirst);
UBool buildLargeTrie(UCharsTrieBuilder &builder, UnicodeString &result, int32_t numUniqueFirst);
void TestLargeTrie();
UBool buildMonthsTrie(UCharTrieBuilder &builder, UDictTrieBuildOption buildOption,
UBool buildMonthsTrie(UCharsTrieBuilder &builder, UStringTrieBuildOption buildOption,
UnicodeString &result);
void TestHasUniqueValue();
void TestGetNextUChars();
@ -61,27 +61,27 @@ public:
void TestTruncatingIteratorFromLinearMatchLong();
void checkData(const StringAndValue data[], int32_t dataLength);
void checkData(const StringAndValue data[], int32_t dataLength, UDictTrieBuildOption buildOption);
void checkData(const StringAndValue data[], int32_t dataLength, UStringTrieBuildOption buildOption);
UBool buildTrie(const StringAndValue data[], int32_t dataLength,
UCharTrieBuilder &builder, UDictTrieBuildOption buildOption, UnicodeString &result);
UCharsTrieBuilder &builder, UStringTrieBuildOption buildOption, UnicodeString &result);
void checkFirst(const UnicodeString &trieUChars, const StringAndValue data[], int32_t dataLength);
void checkNext(const UnicodeString &trieUChars, const StringAndValue data[], int32_t dataLength);
void checkNextWithState(const UnicodeString &trieUChars, const StringAndValue data[], int32_t dataLength);
void checkNextString(const UnicodeString &trieUChars, const StringAndValue data[], int32_t dataLength);
void checkIterator(const UnicodeString &trieUChars, const StringAndValue data[], int32_t dataLength);
void checkIterator(UCharTrieIterator &iter, const StringAndValue data[], int32_t dataLength);
void checkIterator(UCharsTrieIterator &iter, const StringAndValue data[], int32_t dataLength);
};
extern IntlTest *createUCharTrieTest() {
return new UCharTrieTest();
extern IntlTest *createUCharsTrieTest() {
return new UCharsTrieTest();
}
UCharTrieTest::~UCharTrieTest() {
UCharsTrieTest::~UCharsTrieTest() {
}
void UCharTrieTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) {
void UCharsTrieTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) {
if(exec) {
logln("TestSuite UCharTrieTest: ");
logln("TestSuite UCharsTrieTest: ");
}
TESTCASE_AUTO_BEGIN;
TESTCASE_AUTO(TestBuilder);
@ -107,37 +107,37 @@ void UCharTrieTest::runIndexedTest(int32_t index, UBool exec, const char *&name,
TESTCASE_AUTO_END;
}
void UCharTrieTest::TestBuilder() {
void UCharsTrieTest::TestBuilder() {
IcuTestErrorCode errorCode(*this, "TestBuilder()");
UCharTrieBuilder builder;
UCharsTrieBuilder builder;
UnicodeString trieUChars;
builder.build(UDICTTRIE_BUILD_FAST, trieUChars, errorCode);
builder.build(USTRINGTRIE_BUILD_FAST, trieUChars, errorCode);
if(errorCode.reset()!=U_INDEX_OUTOFBOUNDS_ERROR) {
errln("UCharTrieBuilder().build() did not set U_INDEX_OUTOFBOUNDS_ERROR");
errln("UCharsTrieBuilder().build() did not set U_INDEX_OUTOFBOUNDS_ERROR");
return;
}
builder.add("=", 0, errorCode).add("=", 1, errorCode).build(UDICTTRIE_BUILD_FAST, trieUChars, errorCode);
builder.add("=", 0, errorCode).add("=", 1, errorCode).build(USTRINGTRIE_BUILD_FAST, trieUChars, errorCode);
if(errorCode.reset()!=U_ILLEGAL_ARGUMENT_ERROR) {
errln("UCharTrieBuilder.build() did not detect duplicates");
errln("UCharsTrieBuilder.build() did not detect duplicates");
return;
}
}
void UCharTrieTest::TestEmpty() {
void UCharsTrieTest::TestEmpty() {
static const StringAndValue data[]={
{ "", 0 }
};
checkData(data, LENGTHOF(data));
}
void UCharTrieTest::Test_a() {
void UCharsTrieTest::Test_a() {
static const StringAndValue data[]={
{ "a", 1 }
};
checkData(data, LENGTHOF(data));
}
void UCharTrieTest::Test_a_ab() {
void UCharsTrieTest::Test_a_ab() {
static const StringAndValue data[]={
{ "a", 1 },
{ "ab", 100 }
@ -145,7 +145,7 @@ void UCharTrieTest::Test_a_ab() {
checkData(data, LENGTHOF(data));
}
void UCharTrieTest::TestShortestBranch() {
void UCharsTrieTest::TestShortestBranch() {
static const StringAndValue data[]={
{ "a", 1000 },
{ "b", 2000 }
@ -153,7 +153,7 @@ void UCharTrieTest::TestShortestBranch() {
checkData(data, LENGTHOF(data));
}
void UCharTrieTest::TestBranches() {
void UCharsTrieTest::TestBranches() {
static const StringAndValue data[]={
{ "a", 0x10 },
{ "cc", 0x40 },
@ -176,7 +176,7 @@ void UCharTrieTest::TestBranches() {
}
}
void UCharTrieTest::TestLongSequence() {
void UCharsTrieTest::TestLongSequence() {
static const StringAndValue data[]={
{ "a", -1 },
// sequence of linear-match nodes
@ -192,7 +192,7 @@ void UCharTrieTest::TestLongSequence() {
checkData(data, LENGTHOF(data));
}
void UCharTrieTest::TestLongBranch() {
void UCharsTrieTest::TestLongBranch() {
// Split-branch and interesting compact-integer values.
static const StringAndValue data[]={
{ "a", -2 },
@ -220,7 +220,7 @@ void UCharTrieTest::TestLongBranch() {
checkData(data, LENGTHOF(data));
}
void UCharTrieTest::TestValuesForState() {
void UCharsTrieTest::TestValuesForState() {
// Check that saveState() and resetToState() interact properly
// with next() and current().
static const StringAndValue data[]={
@ -234,7 +234,7 @@ void UCharTrieTest::TestValuesForState() {
checkData(data, LENGTHOF(data));
}
void UCharTrieTest::TestCompact() {
void UCharsTrieTest::TestCompact() {
// Duplicate trailing strings and values provide opportunities for compacting.
static const StringAndValue data[]={
{ "+", 0 },
@ -261,7 +261,7 @@ void UCharTrieTest::TestCompact() {
checkData(data, LENGTHOF(data));
}
void UCharTrieTest::TestFirstForCodePoint() {
void UCharsTrieTest::TestFirstForCodePoint() {
static const StringAndValue data[]={
{ "a", 1 },
{ "a\\uD800", 2 },
@ -276,49 +276,49 @@ void UCharTrieTest::TestFirstForCodePoint() {
checkData(data, LENGTHOF(data));
}
void UCharTrieTest::TestNextForCodePoint() {
void UCharsTrieTest::TestNextForCodePoint() {
static const StringAndValue data[]={
{ "\\u4dff\\U00010000\\u9999\\U00020000\\udfff\\U0010ffff", 2000000000 },
{ "\\u4dff\\U00010000\\u9999\\U00020002", 44444 },
{ "\\u4dff\\U000103ff", 99999 }
};
UCharTrieBuilder builder;
UCharsTrieBuilder builder;
UnicodeString trieUChars;
if(!buildTrie(data, LENGTHOF(data), builder, UDICTTRIE_BUILD_FAST, trieUChars)) {
if(!buildTrie(data, LENGTHOF(data), builder, USTRINGTRIE_BUILD_FAST, trieUChars)) {
return; // buildTrie() reported an error
}
UCharTrie trie(trieUChars.getBuffer());
UDictTrieResult result;
if( (result=trie.nextForCodePoint(0x4dff))!=UDICTTRIE_NO_VALUE || result!=trie.current() ||
(result=trie.nextForCodePoint(0x10000))!=UDICTTRIE_NO_VALUE || result!=trie.current() ||
(result=trie.nextForCodePoint(0x9999))!=UDICTTRIE_NO_VALUE || result!=trie.current() ||
(result=trie.nextForCodePoint(0x20000))!=UDICTTRIE_NO_VALUE || result!=trie.current() ||
(result=trie.nextForCodePoint(0xdfff))!=UDICTTRIE_NO_VALUE || result!=trie.current() ||
(result=trie.nextForCodePoint(0x10ffff))!=UDICTTRIE_HAS_FINAL_VALUE || result!=trie.current() ||
UCharsTrie trie(trieUChars.getBuffer());
UStringTrieResult result;
if( (result=trie.nextForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE || result!=trie.current() ||
(result=trie.nextForCodePoint(0x10000))!=USTRINGTRIE_NO_VALUE || result!=trie.current() ||
(result=trie.nextForCodePoint(0x9999))!=USTRINGTRIE_NO_VALUE || result!=trie.current() ||
(result=trie.nextForCodePoint(0x20000))!=USTRINGTRIE_NO_VALUE || result!=trie.current() ||
(result=trie.nextForCodePoint(0xdfff))!=USTRINGTRIE_NO_VALUE || result!=trie.current() ||
(result=trie.nextForCodePoint(0x10ffff))!=USTRINGTRIE_FINAL_VALUE || result!=trie.current() ||
trie.getValue()!=2000000000
) {
errln("UCharTrie.nextForCodePoint() fails for %s", data[0].s);
errln("UCharsTrie.nextForCodePoint() fails for %s", data[0].s);
}
if( (result=trie.firstForCodePoint(0x4dff))!=UDICTTRIE_NO_VALUE || result!=trie.current() ||
(result=trie.nextForCodePoint(0x10000))!=UDICTTRIE_NO_VALUE || result!=trie.current() ||
(result=trie.nextForCodePoint(0x9999))!=UDICTTRIE_NO_VALUE || result!=trie.current() ||
(result=trie.nextForCodePoint(0x20002))!=UDICTTRIE_HAS_FINAL_VALUE || result!=trie.current() ||
if( (result=trie.firstForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE || result!=trie.current() ||
(result=trie.nextForCodePoint(0x10000))!=USTRINGTRIE_NO_VALUE || result!=trie.current() ||
(result=trie.nextForCodePoint(0x9999))!=USTRINGTRIE_NO_VALUE || result!=trie.current() ||
(result=trie.nextForCodePoint(0x20002))!=USTRINGTRIE_FINAL_VALUE || result!=trie.current() ||
trie.getValue()!=44444
) {
errln("UCharTrie.nextForCodePoint() fails for %s", data[1].s);
errln("UCharsTrie.nextForCodePoint() fails for %s", data[1].s);
}
if( (result=trie.reset().nextForCodePoint(0x4dff))!=UDICTTRIE_NO_VALUE || result!=trie.current() ||
(result=trie.nextForCodePoint(0x10000))!=UDICTTRIE_NO_VALUE || result!=trie.current() ||
(result=trie.nextForCodePoint(0x9999))!=UDICTTRIE_NO_VALUE || result!=trie.current() ||
(result=trie.nextForCodePoint(0x20222))!=UDICTTRIE_NO_MATCH || result!=trie.current() // no match for trail surrogate
if( (result=trie.reset().nextForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE || result!=trie.current() ||
(result=trie.nextForCodePoint(0x10000))!=USTRINGTRIE_NO_VALUE || result!=trie.current() ||
(result=trie.nextForCodePoint(0x9999))!=USTRINGTRIE_NO_VALUE || result!=trie.current() ||
(result=trie.nextForCodePoint(0x20222))!=USTRINGTRIE_NO_MATCH || result!=trie.current() // no match for trail surrogate
) {
errln("UCharTrie.nextForCodePoint() fails for \\u4dff\\U00010000\\u9999\\U00020222");
errln("UCharsTrie.nextForCodePoint() fails for \\u4dff\\U00010000\\u9999\\U00020222");
}
if( (result=trie.reset().nextForCodePoint(0x4dff))!=UDICTTRIE_NO_VALUE || result!=trie.current() ||
(result=trie.nextForCodePoint(0x103ff))!=UDICTTRIE_HAS_FINAL_VALUE || result!=trie.current() ||
if( (result=trie.reset().nextForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE || result!=trie.current() ||
(result=trie.nextForCodePoint(0x103ff))!=USTRINGTRIE_FINAL_VALUE || result!=trie.current() ||
trie.getValue()!=99999
) {
errln("UCharTrie.nextForCodePoint() fails for %s", data[2].s);
errln("UCharsTrie.nextForCodePoint() fails for %s", data[2].s);
}
}
@ -356,8 +356,8 @@ private:
} // end namespace
UBool UCharTrieTest::buildLargeTrie(UCharTrieBuilder &builder, UnicodeString &result,
int32_t numUniqueFirst) {
UBool UCharsTrieTest::buildLargeTrie(UCharsTrieBuilder &builder, UnicodeString &result,
int32_t numUniqueFirst) {
IcuTestErrorCode errorCode(*this, "buildLargeTrie()");
Generator gen;
builder.clear();
@ -366,33 +366,33 @@ UBool UCharTrieTest::buildLargeTrie(UCharTrieBuilder &builder, UnicodeString &re
gen.next();
}
infoln("buildLargeTrie(%ld) added %ld strings", (long)numUniqueFirst, (long)gen.getIndex());
builder.build(UDICTTRIE_BUILD_FAST, result, errorCode);
builder.build(USTRINGTRIE_BUILD_FAST, result, errorCode);
logln("serialized trie size: %ld UChars\n", (long)result.length());
return errorCode.isSuccess();
}
// Exercise a large branch node.
void UCharTrieTest::TestLargeTrie() {
UCharTrieBuilder builder;
void UCharsTrieTest::TestLargeTrie() {
UCharsTrieBuilder builder;
UnicodeString trieUChars;
if(!buildLargeTrie(builder, trieUChars, 1111)) {
return; // buildTrie() reported an error
}
UCharTrie trie(trieUChars.getBuffer());
UCharsTrie trie(trieUChars.getBuffer());
Generator gen;
while(gen.countUniqueFirstChars()<1111) {
UnicodeString x(gen.getString());
int32_t value=gen.getValue();
if(!x.isEmpty()) {
if(trie.first(x[0])==UDICTTRIE_NO_MATCH) {
errln("next(first char U+%04X)=UDICTTRIE_NO_MATCH for string %ld\n",
if(trie.first(x[0])==USTRINGTRIE_NO_MATCH) {
errln("next(first char U+%04X)=USTRINGTRIE_NO_MATCH for string %ld\n",
x[0], (long)gen.getIndex());
break;
}
x.remove(0, 1);
}
UDictTrieResult result=trie.next(x.getBuffer(), x.length());
if(!UDICTTRIE_RESULT_HAS_VALUE(result) || result!=trie.current() || value!=trie.getValue()) {
UStringTrieResult result=trie.next(x.getBuffer(), x.length());
if(!USTRINGTRIE_HAS_VALUE(result) || result!=trie.current() || value!=trie.getValue()) {
errln("next(%d chars U+%04X U+%04X)!=hasValue or "
"next()!=current() or getValue() wrong "
"for string %ld\n", (int)x.length(), x[0], x[1], (long)gen.getIndex());
@ -413,8 +413,8 @@ enum {
u_y=0x79
};
UBool UCharTrieTest::buildMonthsTrie(UCharTrieBuilder &builder, UDictTrieBuildOption buildOption,
UnicodeString &result) {
UBool UCharsTrieTest::buildMonthsTrie(UCharsTrieBuilder &builder, UStringTrieBuildOption buildOption,
UnicodeString &result) {
// All types of nodes leading to the same value,
// for code coverage of recursive functions.
// In particular, we need a lot of branches on some single level
@ -454,13 +454,13 @@ UBool UCharTrieTest::buildMonthsTrie(UCharTrieBuilder &builder, UDictTrieBuildOp
return buildTrie(data, LENGTHOF(data), builder, buildOption, result);
}
void UCharTrieTest::TestHasUniqueValue() {
UCharTrieBuilder builder;
void UCharsTrieTest::TestHasUniqueValue() {
UCharsTrieBuilder builder;
UnicodeString trieUChars;
if(!buildMonthsTrie(builder, UDICTTRIE_BUILD_FAST, trieUChars)) {
if(!buildMonthsTrie(builder, USTRINGTRIE_BUILD_FAST, trieUChars)) {
return; // buildTrie() reported an error
}
UCharTrie trie(trieUChars.getBuffer());
UCharsTrie trie(trieUChars.getBuffer());
int32_t uniqueValue;
if(trie.hasUniqueValue(uniqueValue)) {
errln("unique value at root");
@ -477,7 +477,7 @@ void UCharTrieTest::TestHasUniqueValue() {
if(trie.hasUniqueValue(uniqueValue)) {
errln("unique value after \"ju\"");
}
if(trie.next(u_n)!=UDICTTRIE_HAS_VALUE || 6!=trie.getValue()) {
if(trie.next(u_n)!=USTRINGTRIE_INTERMEDIATE_VALUE || 6!=trie.getValue()) {
errln("not normal value 6 after \"jun\"");
}
// hasUniqueValue() after getValue()
@ -501,13 +501,13 @@ private:
UnicodeString &str;
};
void UCharTrieTest::TestGetNextUChars() {
UCharTrieBuilder builder;
void UCharsTrieTest::TestGetNextUChars() {
UCharsTrieBuilder builder;
UnicodeString trieUChars;
if(!buildMonthsTrie(builder, UDICTTRIE_BUILD_SMALL, trieUChars)) {
if(!buildMonthsTrie(builder, USTRINGTRIE_BUILD_SMALL, trieUChars)) {
return; // buildTrie() reported an error
}
UCharTrie trie(trieUChars.getBuffer());
UCharsTrie trie(trieUChars.getBuffer());
UnicodeString buffer;
UnicodeStringAppendable app(buffer);
int32_t count=trie.getNextUChars(app);
@ -523,7 +523,7 @@ void UCharTrieTest::TestGetNextUChars() {
errln("months getNextUChars()!=[.abcdefghijklmnopqru] after \"jan\"");
}
// getNextUChars() after getValue()
trie.getValue(); // next() had returned UDICTTRIE_HAS_VALUE.
trie.getValue(); // next() had returned USTRINGTRIE_INTERMEDIATE_VALUE.
count=trie.getNextUChars(app.reset());
if(count!=20 || buffer!=UNICODE_STRING_SIMPLE(".abcdefghijklmnopqru")) {
errln("months getNextUChars()!=[.abcdefghijklmnopqru] after \"jan\"+getValue()");
@ -548,20 +548,20 @@ void UCharTrieTest::TestGetNextUChars() {
}
}
void UCharTrieTest::TestIteratorFromBranch() {
UCharTrieBuilder builder;
void UCharsTrieTest::TestIteratorFromBranch() {
UCharsTrieBuilder builder;
UnicodeString trieUChars;
if(!buildMonthsTrie(builder, UDICTTRIE_BUILD_FAST, trieUChars)) {
if(!buildMonthsTrie(builder, USTRINGTRIE_BUILD_FAST, trieUChars)) {
return; // buildTrie() reported an error
}
UCharTrie trie(trieUChars.getBuffer());
UCharsTrie trie(trieUChars.getBuffer());
// Go to a branch node.
trie.next(u_j);
trie.next(u_a);
trie.next(u_n);
IcuTestErrorCode errorCode(*this, "TestIteratorFromBranch()");
UCharTrieIterator iter(trie, 0, errorCode);
if(errorCode.logIfFailureAndReset("UCharTrieIterator(trie) constructor")) {
UCharsTrieIterator iter(trie, 0, errorCode);
if(errorCode.logIfFailureAndReset("UCharsTrieIterator(trie) constructor")) {
return;
}
// Expected data: Same as in buildMonthsTrie(), except only the suffixes
@ -599,13 +599,13 @@ void UCharTrieTest::TestIteratorFromBranch() {
checkIterator(iter.reset(), data, LENGTHOF(data));
}
void UCharTrieTest::TestIteratorFromLinearMatch() {
UCharTrieBuilder builder;
void UCharsTrieTest::TestIteratorFromLinearMatch() {
UCharsTrieBuilder builder;
UnicodeString trieUChars;
if(!buildMonthsTrie(builder, UDICTTRIE_BUILD_SMALL, trieUChars)) {
if(!buildMonthsTrie(builder, USTRINGTRIE_BUILD_SMALL, trieUChars)) {
return; // buildTrie() reported an error
}
UCharTrie trie(trieUChars.getBuffer());
UCharsTrie trie(trieUChars.getBuffer());
// Go into a linear-match node.
trie.next(u_j);
trie.next(u_a);
@ -613,8 +613,8 @@ void UCharTrieTest::TestIteratorFromLinearMatch() {
trie.next(u_u);
trie.next(u_a);
IcuTestErrorCode errorCode(*this, "TestIteratorFromLinearMatch()");
UCharTrieIterator iter(trie, 0, errorCode);
if(errorCode.logIfFailureAndReset("UCharTrieIterator(trie) constructor")) {
UCharsTrieIterator iter(trie, 0, errorCode);
if(errorCode.logIfFailureAndReset("UCharsTrieIterator(trie) constructor")) {
return;
}
// Expected data: Same as in buildMonthsTrie(), except only the suffixes
@ -629,15 +629,15 @@ void UCharTrieTest::TestIteratorFromLinearMatch() {
checkIterator(iter.reset(), data, LENGTHOF(data));
}
void UCharTrieTest::TestTruncatingIteratorFromRoot() {
UCharTrieBuilder builder;
void UCharsTrieTest::TestTruncatingIteratorFromRoot() {
UCharsTrieBuilder builder;
UnicodeString trieUChars;
if(!buildMonthsTrie(builder, UDICTTRIE_BUILD_FAST, trieUChars)) {
if(!buildMonthsTrie(builder, USTRINGTRIE_BUILD_FAST, trieUChars)) {
return; // buildTrie() reported an error
}
IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromRoot()");
UCharTrieIterator iter(trieUChars.getBuffer(), 4, errorCode);
if(errorCode.logIfFailureAndReset("UCharTrieIterator(trie) constructor")) {
UCharsTrieIterator iter(trieUChars.getBuffer(), 4, errorCode);
if(errorCode.logIfFailureAndReset("UCharsTrieIterator(trie) constructor")) {
return;
}
// Expected data: Same as in buildMonthsTrie(), except only the first 4 characters
@ -676,25 +676,25 @@ void UCharTrieTest::TestTruncatingIteratorFromRoot() {
checkIterator(iter.reset(), data, LENGTHOF(data));
}
void UCharTrieTest::TestTruncatingIteratorFromLinearMatchShort() {
void UCharsTrieTest::TestTruncatingIteratorFromLinearMatchShort() {
static const StringAndValue data[]={
{ "abcdef", 10 },
{ "abcdepq", 200 },
{ "abcdeyz", 3000 }
};
UCharTrieBuilder builder;
UCharsTrieBuilder builder;
UnicodeString trieUChars;
if(!buildTrie(data, LENGTHOF(data), builder, UDICTTRIE_BUILD_FAST, trieUChars)) {
if(!buildTrie(data, LENGTHOF(data), builder, USTRINGTRIE_BUILD_FAST, trieUChars)) {
return; // buildTrie() reported an error
}
UCharTrie trie(trieUChars.getBuffer());
UCharsTrie trie(trieUChars.getBuffer());
// Go into a linear-match node.
trie.next(u_a);
trie.next(u_b);
IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromLinearMatchShort()");
// Truncate within the linear-match node.
UCharTrieIterator iter(trie, 2, errorCode);
if(errorCode.logIfFailureAndReset("UCharTrieIterator(trie) constructor")) {
UCharsTrieIterator iter(trie, 2, errorCode);
if(errorCode.logIfFailureAndReset("UCharsTrieIterator(trie) constructor")) {
return;
}
static const StringAndValue expected[]={
@ -706,26 +706,26 @@ void UCharTrieTest::TestTruncatingIteratorFromLinearMatchShort() {
checkIterator(iter.reset(), expected, LENGTHOF(expected));
}
void UCharTrieTest::TestTruncatingIteratorFromLinearMatchLong() {
void UCharsTrieTest::TestTruncatingIteratorFromLinearMatchLong() {
static const StringAndValue data[]={
{ "abcdef", 10 },
{ "abcdepq", 200 },
{ "abcdeyz", 3000 }
};
UCharTrieBuilder builder;
UCharsTrieBuilder builder;
UnicodeString trieUChars;
if(!buildTrie(data, LENGTHOF(data), builder, UDICTTRIE_BUILD_FAST, trieUChars)) {
if(!buildTrie(data, LENGTHOF(data), builder, USTRINGTRIE_BUILD_FAST, trieUChars)) {
return; // buildTrie() reported an error
}
UCharTrie trie(trieUChars.getBuffer());
UCharsTrie trie(trieUChars.getBuffer());
// Go into a linear-match node.
trie.next(u_a);
trie.next(u_b);
trie.next(u_c);
IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromLinearMatchLong()");
// Truncate after the linear-match node.
UCharTrieIterator iter(trie, 3, errorCode);
if(errorCode.logIfFailureAndReset("UCharTrieIterator(trie) constructor")) {
UCharsTrieIterator iter(trie, 3, errorCode);
if(errorCode.logIfFailureAndReset("UCharsTrieIterator(trie) constructor")) {
return;
}
static const StringAndValue expected[]={
@ -739,15 +739,15 @@ void UCharTrieTest::TestTruncatingIteratorFromLinearMatchLong() {
checkIterator(iter.reset(), expected, LENGTHOF(expected));
}
void UCharTrieTest::checkData(const StringAndValue data[], int32_t dataLength) {
void UCharsTrieTest::checkData(const StringAndValue data[], int32_t dataLength) {
logln("checkData(dataLength=%d, fast)", (int)dataLength);
checkData(data, dataLength, UDICTTRIE_BUILD_FAST);
checkData(data, dataLength, USTRINGTRIE_BUILD_FAST);
logln("checkData(dataLength=%d, small)", (int)dataLength);
checkData(data, dataLength, UDICTTRIE_BUILD_SMALL);
checkData(data, dataLength, USTRINGTRIE_BUILD_SMALL);
}
void UCharTrieTest::checkData(const StringAndValue data[], int32_t dataLength, UDictTrieBuildOption buildOption) {
UCharTrieBuilder builder;
void UCharsTrieTest::checkData(const StringAndValue data[], int32_t dataLength, UStringTrieBuildOption buildOption) {
UCharsTrieBuilder builder;
UnicodeString trieUChars;
if(!buildTrie(data, dataLength, builder, buildOption, trieUChars)) {
return; // buildTrie() reported an error
@ -759,8 +759,8 @@ void UCharTrieTest::checkData(const StringAndValue data[], int32_t dataLength, U
checkIterator(trieUChars, data, dataLength);
}
UBool UCharTrieTest::buildTrie(const StringAndValue data[], int32_t dataLength,
UCharTrieBuilder &builder, UDictTrieBuildOption buildOption, UnicodeString &result) {
UBool UCharsTrieTest::buildTrie(const StringAndValue data[], int32_t dataLength,
UCharsTrieBuilder &builder, UStringTrieBuildOption buildOption, UnicodeString &result) {
IcuTestErrorCode errorCode(*this, "buildTrie()");
// Add the items to the trie builder in an interesting (not trivial, not random) order.
int32_t index, step;
@ -793,9 +793,9 @@ UBool UCharTrieTest::buildTrie(const StringAndValue data[], int32_t dataLength,
return errorCode.isSuccess();
}
void UCharTrieTest::checkFirst(const UnicodeString &trieUChars,
const StringAndValue data[], int32_t dataLength) {
UCharTrie trie(trieUChars.getBuffer());
void UCharsTrieTest::checkFirst(const UnicodeString &trieUChars,
const StringAndValue data[], int32_t dataLength) {
UCharsTrie trie(trieUChars.getBuffer());
for(int32_t i=0; i<dataLength; ++i) {
if(*data[i].s==0) {
continue; // skip empty string
@ -803,12 +803,12 @@ void UCharTrieTest::checkFirst(const UnicodeString &trieUChars,
UnicodeString expectedString=UnicodeString(data[i].s, -1, US_INV).unescape();
UChar32 c=expectedString[0];
UChar32 nextCp=expectedString.length()>1 ? expectedString[1] : 0;
UDictTrieResult firstResult=trie.first(c);
int32_t firstValue=UDICTTRIE_RESULT_HAS_VALUE(firstResult) ? trie.getValue() : -1;
UDictTrieResult nextResult=trie.next(nextCp);
UStringTrieResult firstResult=trie.first(c);
int32_t firstValue=USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1;
UStringTrieResult nextResult=trie.next(nextCp);
if(firstResult!=trie.reset().next(c) ||
firstResult!=trie.current() ||
firstValue!=(UDICTTRIE_RESULT_HAS_VALUE(firstResult) ? trie.getValue() : -1) ||
firstValue!=(USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1) ||
nextResult!=trie.next(nextCp)
) {
errln("trie.first(U+%04X)!=trie.reset().next(same) for %s",
@ -818,11 +818,11 @@ void UCharTrieTest::checkFirst(const UnicodeString &trieUChars,
int32_t cLength=U16_LENGTH(c);
nextCp=expectedString.length()>cLength ? expectedString.char32At(cLength) : 0;
firstResult=trie.firstForCodePoint(c);
firstValue=UDICTTRIE_RESULT_HAS_VALUE(firstResult) ? trie.getValue() : -1;
firstValue=USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1;
nextResult=trie.nextForCodePoint(nextCp);
if(firstResult!=trie.reset().nextForCodePoint(c) ||
firstResult!=trie.current() ||
firstValue!=(UDICTTRIE_RESULT_HAS_VALUE(firstResult) ? trie.getValue() : -1) ||
firstValue!=(USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1) ||
nextResult!=trie.nextForCodePoint(nextCp)
) {
errln("trie.firstForCodePoint(U+%04X)!=trie.reset().nextForCodePoint(same) for %s",
@ -831,15 +831,15 @@ void UCharTrieTest::checkFirst(const UnicodeString &trieUChars,
}
}
void UCharTrieTest::checkNext(const UnicodeString &trieUChars,
const StringAndValue data[], int32_t dataLength) {
UCharTrie trie(trieUChars.getBuffer());
UCharTrie::State state;
void UCharsTrieTest::checkNext(const UnicodeString &trieUChars,
const StringAndValue data[], int32_t dataLength) {
UCharsTrie trie(trieUChars.getBuffer());
UCharsTrie::State state;
for(int32_t i=0; i<dataLength; ++i) {
UnicodeString expectedString=UnicodeString(data[i].s, -1, US_INV).unescape();
int32_t stringLength= (i&1) ? -1 : expectedString.length();
UDictTrieResult result;
if( !UDICTTRIE_RESULT_HAS_VALUE(
UStringTrieResult result;
if( !USTRINGTRIE_HAS_VALUE(
result=trie.next(expectedString.getTerminatedBuffer(), stringLength)) ||
result!=trie.current()
) {
@ -856,20 +856,20 @@ void UCharTrieTest::checkNext(const UnicodeString &trieUChars,
stringLength=expectedString.length();
result=trie.current();
for(int32_t j=0; j<stringLength; ++j) {
if(!UDICTTRIE_RESULT_HAS_NEXT(result)) {
if(!USTRINGTRIE_HAS_NEXT(result)) {
errln("trie.current()!=hasNext before end of %s (at index %d)", data[i].s, j);
break;
}
if(result==UDICTTRIE_HAS_VALUE) {
if(result==USTRINGTRIE_INTERMEDIATE_VALUE) {
trie.getValue();
if(trie.current()!=UDICTTRIE_HAS_VALUE) {
errln("trie.getValue().current()!=UDICTTRIE_HAS_VALUE before end of %s (at index %d)", data[i].s, j);
if(trie.current()!=USTRINGTRIE_INTERMEDIATE_VALUE) {
errln("trie.getValue().current()!=USTRINGTRIE_INTERMEDIATE_VALUE before end of %s (at index %d)", data[i].s, j);
break;
}
}
result=trie.next(expectedString[j]);
if(!UDICTTRIE_RESULT_MATCHES(result)) {
errln("trie.next()=UDICTTRIE_NO_MATCH before end of %s (at index %d)", data[i].s, j);
if(!USTRINGTRIE_MATCHES(result)) {
errln("trie.next()=USTRINGTRIE_NO_MATCH before end of %s (at index %d)", data[i].s, j);
break;
}
if(result!=trie.current()) {
@ -877,7 +877,7 @@ void UCharTrieTest::checkNext(const UnicodeString &trieUChars,
break;
}
}
if(!UDICTTRIE_RESULT_HAS_VALUE(result)) {
if(!USTRINGTRIE_HAS_VALUE(result)) {
errln("trie.next()!=hasValue at the end of %s", data[i].s);
continue;
}
@ -898,18 +898,18 @@ void UCharTrieTest::checkNext(const UnicodeString &trieUChars,
break;
}
}
if((result==UDICTTRIE_HAS_VALUE)!=nextContinues) {
errln("(trie.current()==UDICTTRIE_HAS_VALUE) contradicts "
"(trie.next(some UChar)!=UDICTTRIE_NO_MATCH) after end of %s", data[i].s);
if((result==USTRINGTRIE_INTERMEDIATE_VALUE)!=nextContinues) {
errln("(trie.current()==USTRINGTRIE_INTERMEDIATE_VALUE) contradicts "
"(trie.next(some UChar)!=USTRINGTRIE_NO_MATCH) after end of %s", data[i].s);
}
trie.reset();
}
}
void UCharTrieTest::checkNextWithState(const UnicodeString &trieUChars,
const StringAndValue data[], int32_t dataLength) {
UCharTrie trie(trieUChars.getBuffer());
UCharTrie::State noState, state;
void UCharsTrieTest::checkNextWithState(const UnicodeString &trieUChars,
const StringAndValue data[], int32_t dataLength) {
UCharsTrie trie(trieUChars.getBuffer());
UCharsTrie::State noState, state;
for(int32_t i=0; i<dataLength; ++i) {
if((i&1)==0) {
// This should have no effect.
@ -919,36 +919,36 @@ void UCharTrieTest::checkNextWithState(const UnicodeString &trieUChars,
int32_t stringLength=expectedString.length();
int32_t partialLength=stringLength/3;
for(int32_t j=0; j<partialLength; ++j) {
if(!UDICTTRIE_RESULT_MATCHES(trie.next(expectedString[j]))) {
errln("trie.next()=UDICTTRIE_NO_MATCH for a prefix of %s", data[i].s);
if(!USTRINGTRIE_MATCHES(trie.next(expectedString[j]))) {
errln("trie.next()=USTRINGTRIE_NO_MATCH for a prefix of %s", data[i].s);
return;
}
}
trie.saveState(state);
UDictTrieResult resultAtState=trie.current();
UDictTrieResult result;
UStringTrieResult resultAtState=trie.current();
UStringTrieResult result;
int32_t valueAtState=-99;
if(UDICTTRIE_RESULT_HAS_VALUE(resultAtState)) {
if(USTRINGTRIE_HAS_VALUE(resultAtState)) {
valueAtState=trie.getValue();
}
result=trie.next(0); // mismatch
if(result!=UDICTTRIE_NO_MATCH || result!=trie.current()) {
if(result!=USTRINGTRIE_NO_MATCH || result!=trie.current()) {
errln("trie.next(0) matched after part of %s", data[i].s);
}
if( resultAtState!=trie.resetToState(state).current() ||
(UDICTTRIE_RESULT_HAS_VALUE(resultAtState) && valueAtState!=trie.getValue())
(USTRINGTRIE_HAS_VALUE(resultAtState) && valueAtState!=trie.getValue())
) {
errln("trie.next(part of %s) changes current()/getValue() after "
"saveState/next(0)/resetToState",
data[i].s);
} else if(!UDICTTRIE_RESULT_HAS_VALUE(
} else if(!USTRINGTRIE_HAS_VALUE(
result=trie.next(expectedString.getTerminatedBuffer()+partialLength,
stringLength-partialLength)) ||
result!=trie.current()) {
errln("trie.next(rest of %s) does not seem to contain %s after "
"saveState/next(0)/resetToState",
data[i].s);
} else if(!UDICTTRIE_RESULT_HAS_VALUE(
} else if(!USTRINGTRIE_HAS_VALUE(
result=trie.resetToState(state).
next(expectedString.getTerminatedBuffer()+partialLength,
stringLength-partialLength)) ||
@ -967,37 +967,37 @@ void UCharTrieTest::checkNextWithState(const UnicodeString &trieUChars,
// next(string) is also tested in other functions,
// but here we try to go partway through the string, and then beyond it.
void UCharTrieTest::checkNextString(const UnicodeString &trieUChars,
const StringAndValue data[], int32_t dataLength) {
UCharTrie trie(trieUChars.getBuffer());
void UCharsTrieTest::checkNextString(const UnicodeString &trieUChars,
const StringAndValue data[], int32_t dataLength) {
UCharsTrie trie(trieUChars.getBuffer());
for(int32_t i=0; i<dataLength; ++i) {
UnicodeString expectedString=UnicodeString(data[i].s, -1, US_INV).unescape();
int32_t stringLength=expectedString.length();
if(!trie.next(expectedString.getTerminatedBuffer(), stringLength/2)) {
errln("trie.next(up to middle of string)=UDICTTRIE_NO_MATCH for %s", data[i].s);
errln("trie.next(up to middle of string)=USTRINGTRIE_NO_MATCH for %s", data[i].s);
continue;
}
// Test that we stop properly at the end of the string.
if(trie.next(expectedString.getTerminatedBuffer()+stringLength/2,
stringLength+1-stringLength/2)) {
errln("trie.next(string+NUL)!=UDICTTRIE_NO_MATCH for %s", data[i].s);
errln("trie.next(string+NUL)!=USTRINGTRIE_NO_MATCH for %s", data[i].s);
}
trie.reset();
}
}
void UCharTrieTest::checkIterator(const UnicodeString &trieUChars,
const StringAndValue data[], int32_t dataLength) {
void UCharsTrieTest::checkIterator(const UnicodeString &trieUChars,
const StringAndValue data[], int32_t dataLength) {
IcuTestErrorCode errorCode(*this, "checkIterator()");
UCharTrieIterator iter(trieUChars.getBuffer(), 0, errorCode);
if(errorCode.logIfFailureAndReset("UCharTrieIterator(trieUChars) constructor")) {
UCharsTrieIterator iter(trieUChars.getBuffer(), 0, errorCode);
if(errorCode.logIfFailureAndReset("UCharsTrieIterator(trieUChars) constructor")) {
return;
}
checkIterator(iter, data, dataLength);
}
void UCharTrieTest::checkIterator(UCharTrieIterator &iter,
const StringAndValue data[], int32_t dataLength) {
void UCharsTrieTest::checkIterator(UCharsTrieIterator &iter,
const StringAndValue data[], int32_t dataLength) {
IcuTestErrorCode errorCode(*this, "checkIterator()");
for(int32_t i=0; i<dataLength; ++i) {
if(!iter.hasNext()) {

View file

@ -1,6 +1,6 @@
/*
**********************************************************************
* Copyright (C) 2002-2010, International Business Machines
* Copyright (C) 2010-2011, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
* file name: dicttrieperf.cpp
@ -26,15 +26,15 @@
#include <stdlib.h>
#include "unicode/uperf.h"
#include "unicode/utext.h"
#include "bytetrie.h"
#include "bytetriebuilder.h"
#include "bytestrie.h"
#include "bytestriebuilder.h"
#include "charstr.h"
#include "package.h"
#include "toolutil.h"
#include "triedict.h"
#include "ucbuf.h" // struct ULine
#include "uchartrie.h"
#include "uchartriebuilder.h"
#include "ucharstrie.h"
#include "ucharstriebuilder.h"
#include "uoptions.h"
#include "uvectr32.h"
@ -259,18 +259,18 @@ public:
}
};
static int32_t byteTrieLookup(const char *s, const char *nameTrieBytes) {
ByteTrie trie(nameTrieBytes);
if(UDICTTRIE_RESULT_HAS_VALUE(trie.next(s, -1))) {
static int32_t bytesTrieLookup(const char *s, const char *nameTrieBytes) {
BytesTrie trie(nameTrieBytes);
if(USTRINGTRIE_HAS_VALUE(trie.next(s, -1))) {
return trie.getValue();
} else {
return -1;
}
}
class ByteTriePackageLookup : public PackageLookup {
class BytesTriePackageLookup : public PackageLookup {
public:
ByteTriePackageLookup(const DictionaryTriePerfTest &perf)
BytesTriePackageLookup(const DictionaryTriePerfTest &perf)
: PackageLookup(perf) {
IcuToolErrorCode errorCode("BinarySearchPackageLookup()");
int32_t count=pkg.getItemCount();
@ -292,20 +292,20 @@ public:
// NUL-terminate the name for call() to find the next one.
itemNames.append(0, errorCode);
}
int32_t length=builder.build(UDICTTRIE_BUILD_SMALL, errorCode).length();
printf("size of ByteTrie: %6ld\n", (long)length);
int32_t length=builder.build(USTRINGTRIE_BUILD_SMALL, errorCode).length();
printf("size of BytesTrie: %6ld\n", (long)length);
// count+1: +1 for the last-item limit offset which we should have always had
printf("size of dataOffsets:%6ld\n", (long)((count+1)*4));
printf("total index size: %6ld\n", (long)(length+(count+1)*4));
}
virtual ~ByteTriePackageLookup() {}
virtual ~BytesTriePackageLookup() {}
virtual void call(UErrorCode *pErrorCode) {
int32_t count=pkg.getItemCount();
const char *nameTrieBytes=builder.build(UDICTTRIE_BUILD_SMALL, *pErrorCode).data();
const char *nameTrieBytes=builder.build(USTRINGTRIE_BUILD_SMALL, *pErrorCode).data();
const char *name=itemNames.data();
for(int32_t i=0; i<count; ++i) {
if(byteTrieLookup(name, nameTrieBytes)<0) {
if(bytesTrieLookup(name, nameTrieBytes)<0) {
fprintf(stderr, "item not found: %s\n", name);
}
name=strchr(name, 0)+1;
@ -313,7 +313,7 @@ public:
}
protected:
ByteTrieBuilder builder;
BytesTrieBuilder builder;
CharString itemNames;
};
@ -337,9 +337,9 @@ class CompactTrieDictLookup : public DictLookup {
public:
CompactTrieDictLookup(const DictionaryTriePerfTest &perfTest)
: DictLookup(perfTest), ctd(NULL) {
IcuToolErrorCode errorCode("UCharTrieDictLookup()");
IcuToolErrorCode errorCode("UCharsTrieDictLookup()");
// U+0E1C is the median code unit, from
// the UCharTrie root node (split-branch node) for thaidict.txt.
// the UCharsTrie root node (split-branch node) for thaidict.txt.
MutableTrieDictionary builder(0xe1c, errorCode);
const ULine *lines=perf.getCachedLines();
int32_t numLines=perf.getNumLines();
@ -386,13 +386,13 @@ protected:
// Closely imitate CompactTrieDictionary::matches().
// Note: CompactTrieDictionary::matches() is part of its trie implementation,
// and while it loops over the text, it knows the current state.
// By contrast, this implementation uses UCharTrie API functions that have to
// By contrast, this implementation uses UCharsTrie API functions that have to
// check the trie state each time and load/store state in the object.
// (Whether it hasNext() and whether it is in the middle of a linear-match node.)
static int32_t
ucharTrieMatches(UCharTrie &trie,
UText *text, int32_t textLimit,
int32_t *lengths, int &count, int limit ) {
ucharsTrieMatches(UCharsTrie &trie,
UText *text, int32_t textLimit,
int32_t *lengths, int &count, int limit ) {
UChar32 c=utext_next32(text);
// Notes:
// a) CompactTrieDictionary::matches() does not check for U_SENTINEL.
@ -402,19 +402,19 @@ ucharTrieMatches(UCharTrie &trie,
}
// Should be firstForCodePoint() but CompactTrieDictionary
// handles only code units.
UDictTrieResult result=trie.first(c);
UStringTrieResult result=trie.first(c);
int32_t numChars=1;
count=0;
for(;;) {
if(UDICTTRIE_RESULT_HAS_VALUE(result)) {
if(USTRINGTRIE_HAS_VALUE(result)) {
if(count<limit) {
// lengths[count++]=(int32_t)utext_getNativeIndex(text);
lengths[count++]=numChars; // CompactTrieDictionary just counts chars too.
}
if(result==UDICTTRIE_HAS_FINAL_VALUE) {
if(result==USTRINGTRIE_FINAL_VALUE) {
break;
}
} else if(result==UDICTTRIE_NO_MATCH) {
} else if(result==USTRINGTRIE_NO_MATCH) {
break;
}
if(numChars>=textLimit) {
@ -447,11 +447,11 @@ ucharTrieMatches(UCharTrie &trie,
return numChars;
}
class UCharTrieDictLookup : public DictLookup {
class UCharsTrieDictLookup : public DictLookup {
public:
UCharTrieDictLookup(const DictionaryTriePerfTest &perfTest)
UCharsTrieDictLookup(const DictionaryTriePerfTest &perfTest)
: DictLookup(perfTest) {
IcuToolErrorCode errorCode("UCharTrieDictLookup()");
IcuToolErrorCode errorCode("UCharsTrieDictLookup()");
const ULine *lines=perf.getCachedLines();
int32_t numLines=perf.getNumLines();
for(int32_t i=0; i<numLines; ++i) {
@ -462,24 +462,24 @@ public:
builder.add(UnicodeString(FALSE, lines[i].name, lines[i].len), 0, errorCode);
}
UnicodeString trieUChars;
int32_t length=builder.build(UDICTTRIE_BUILD_SMALL, trieUChars, errorCode).length();
printf("size of UCharTrie: %6ld bytes\n", (long)length*2);
int32_t length=builder.build(USTRINGTRIE_BUILD_SMALL, trieUChars, errorCode).length();
printf("size of UCharsTrie: %6ld bytes\n", (long)length*2);
}
virtual ~UCharTrieDictLookup() {}
virtual ~UCharsTrieDictLookup() {}
protected:
UCharTrieBuilder builder;
UCharsTrieBuilder builder;
};
class UCharTrieDictMatches : public UCharTrieDictLookup {
class UCharsTrieDictMatches : public UCharsTrieDictLookup {
public:
UCharTrieDictMatches(const DictionaryTriePerfTest &perfTest)
: UCharTrieDictLookup(perfTest) {}
UCharsTrieDictMatches(const DictionaryTriePerfTest &perfTest)
: UCharsTrieDictLookup(perfTest) {}
virtual void call(UErrorCode *pErrorCode) {
UnicodeString uchars;
UCharTrie trie(builder.build(UDICTTRIE_BUILD_SMALL, uchars, *pErrorCode).getBuffer());
UCharsTrie trie(builder.build(USTRINGTRIE_BUILD_SMALL, uchars, *pErrorCode).getBuffer());
UText text=UTEXT_INITIALIZER;
int32_t lengths[20];
const ULine *lines=perf.getCachedLines();
@ -491,8 +491,8 @@ public:
}
utext_openUChars(&text, lines[i].name, lines[i].len, pErrorCode);
int32_t count=0;
ucharTrieMatches(trie, &text, lines[i].len,
lengths, count, LENGTHOF(lengths));
ucharsTrieMatches(trie, &text, lines[i].len,
lengths, count, LENGTHOF(lengths));
if(count==0 || lengths[count-1]!=lines[i].len) {
fprintf(stderr, "word %ld (0-based) not found\n", (long)i);
}
@ -500,14 +500,14 @@ public:
}
};
class UCharTrieDictContains : public UCharTrieDictLookup {
class UCharsTrieDictContains : public UCharsTrieDictLookup {
public:
UCharTrieDictContains(const DictionaryTriePerfTest &perfTest)
: UCharTrieDictLookup(perfTest) {}
UCharsTrieDictContains(const DictionaryTriePerfTest &perfTest)
: UCharsTrieDictLookup(perfTest) {}
virtual void call(UErrorCode *pErrorCode) {
UnicodeString uchars;
UCharTrie trie(builder.build(UDICTTRIE_BUILD_SMALL, uchars, *pErrorCode).getBuffer());
UCharsTrie trie(builder.build(USTRINGTRIE_BUILD_SMALL, uchars, *pErrorCode).getBuffer());
const ULine *lines=perf.getCachedLines();
int32_t numLines=perf.getNumLines();
for(int32_t i=0; i<numLines; ++i) {
@ -515,7 +515,7 @@ public:
if(lines[i].name[0]<0x41) {
continue;
}
if(!UDICTTRIE_RESULT_HAS_VALUE(trie.reset().next(lines[i].name, lines[i].len))) {
if(!USTRINGTRIE_HAS_VALUE(trie.reset().next(lines[i].name, lines[i].len))) {
fprintf(stderr, "word %ld (0-based) not found\n", (long)i);
}
}
@ -547,11 +547,11 @@ static UBool thaiWordToBytes(const UChar *s, int32_t length,
return TRUE;
}
class ByteTrieDictLookup : public DictLookup {
class BytesTrieDictLookup : public DictLookup {
public:
ByteTrieDictLookup(const DictionaryTriePerfTest &perfTest)
BytesTrieDictLookup(const DictionaryTriePerfTest &perfTest)
: DictLookup(perfTest), noDict(FALSE) {
IcuToolErrorCode errorCode("ByteTrieDictLookup()");
IcuToolErrorCode errorCode("BytesTrieDictLookup()");
CharString str;
const ULine *lines=perf.getCachedLines();
int32_t numLines=perf.getNumLines();
@ -568,39 +568,39 @@ public:
builder.add(str.toStringPiece(), 0, errorCode);
}
if(!noDict) {
int32_t length=builder.build(UDICTTRIE_BUILD_SMALL, errorCode).length();
printf("size of ByteTrie: %6ld bytes\n", (long)length);
int32_t length=builder.build(USTRINGTRIE_BUILD_SMALL, errorCode).length();
printf("size of BytesTrie: %6ld bytes\n", (long)length);
}
}
virtual ~ByteTrieDictLookup() {}
virtual ~BytesTrieDictLookup() {}
protected:
ByteTrieBuilder builder;
BytesTrieBuilder builder;
UBool noDict;
};
static int32_t
byteTrieMatches(ByteTrie &trie,
UText *text, int32_t textLimit,
int32_t *lengths, int &count, int limit ) {
bytesTrieMatches(BytesTrie &trie,
UText *text, int32_t textLimit,
int32_t *lengths, int &count, int limit ) {
UChar32 c=utext_next32(text);
if(c<0) {
return 0;
}
UDictTrieResult result=trie.first(thaiCharToByte(c));
UStringTrieResult result=trie.first(thaiCharToByte(c));
int32_t numChars=1;
count=0;
for(;;) {
if(UDICTTRIE_RESULT_HAS_VALUE(result)) {
if(USTRINGTRIE_HAS_VALUE(result)) {
if(count<limit) {
// lengths[count++]=(int32_t)utext_getNativeIndex(text);
lengths[count++]=numChars; // CompactTrieDictionary just counts chars too.
}
if(result==UDICTTRIE_HAS_FINAL_VALUE) {
if(result==USTRINGTRIE_FINAL_VALUE) {
break;
}
} else if(result==UDICTTRIE_NO_MATCH) {
} else if(result==USTRINGTRIE_NO_MATCH) {
break;
}
if(numChars>=textLimit) {
@ -616,16 +616,16 @@ byteTrieMatches(ByteTrie &trie,
return numChars;
}
class ByteTrieDictMatches : public ByteTrieDictLookup {
class BytesTrieDictMatches : public BytesTrieDictLookup {
public:
ByteTrieDictMatches(const DictionaryTriePerfTest &perfTest)
: ByteTrieDictLookup(perfTest) {}
BytesTrieDictMatches(const DictionaryTriePerfTest &perfTest)
: BytesTrieDictLookup(perfTest) {}
virtual void call(UErrorCode *pErrorCode) {
if(noDict) {
return;
}
ByteTrie trie(builder.build(UDICTTRIE_BUILD_SMALL, *pErrorCode).data());
BytesTrie trie(builder.build(USTRINGTRIE_BUILD_SMALL, *pErrorCode).data());
UText text=UTEXT_INITIALIZER;
int32_t lengths[20];
const ULine *lines=perf.getCachedLines();
@ -637,8 +637,8 @@ public:
}
utext_openUChars(&text, lines[i].name, lines[i].len, pErrorCode);
int32_t count=0;
byteTrieMatches(trie, &text, lines[i].len,
lengths, count, LENGTHOF(lengths));
bytesTrieMatches(trie, &text, lines[i].len,
lengths, count, LENGTHOF(lengths));
if(count==0 || lengths[count-1]!=lines[i].len) {
fprintf(stderr, "word %ld (0-based) not found\n", (long)i);
}
@ -646,16 +646,16 @@ public:
}
};
class ByteTrieDictContains : public ByteTrieDictLookup {
class BytesTrieDictContains : public BytesTrieDictLookup {
public:
ByteTrieDictContains(const DictionaryTriePerfTest &perfTest)
: ByteTrieDictLookup(perfTest) {}
BytesTrieDictContains(const DictionaryTriePerfTest &perfTest)
: BytesTrieDictLookup(perfTest) {}
virtual void call(UErrorCode *pErrorCode) {
if(noDict) {
return;
}
ByteTrie trie(builder.build(UDICTTRIE_BUILD_SMALL, *pErrorCode).data());
BytesTrie trie(builder.build(USTRINGTRIE_BUILD_SMALL, *pErrorCode).data());
const ULine *lines=perf.getCachedLines();
int32_t numLines=perf.getNumLines();
for(int32_t i=0; i<numLines; ++i) {
@ -664,16 +664,16 @@ public:
if(line[0]<0x41) {
continue;
}
UDictTrieResult result=trie.first(thaiCharToByte(line[0]));
UStringTrieResult result=trie.first(thaiCharToByte(line[0]));
int32_t lineLength=lines[i].len;
for(int32_t j=1; j<lineLength; ++j) {
if(!UDICTTRIE_RESULT_HAS_NEXT(result)) {
if(!USTRINGTRIE_HAS_NEXT(result)) {
fprintf(stderr, "word %ld (0-based) not found\n", (long)i);
break;
}
result=trie.next(thaiCharToByte(line[j]));
}
if(!UDICTTRIE_RESULT_HAS_VALUE(result)) {
if(!USTRINGTRIE_HAS_VALUE(result)) {
fprintf(stderr, "word %ld (0-based) not found\n", (long)i);
}
}
@ -691,27 +691,27 @@ UPerfFunction *DictionaryTriePerfTest::runIndexedTest(int32_t index, UBool exec,
}
break;
case 1:
name="uchartriematches";
name="ucharstriematches";
if(exec) {
return new UCharTrieDictMatches(*this);
return new UCharsTrieDictMatches(*this);
}
break;
case 2:
name="uchartriecontains";
name="ucharstriecontains";
if(exec) {
return new UCharTrieDictContains(*this);
return new UCharsTrieDictContains(*this);
}
break;
case 3:
name="bytetriematches";
name="bytestriematches";
if(exec) {
return new ByteTrieDictMatches(*this);
return new BytesTrieDictMatches(*this);
}
break;
case 4:
name="bytetriecontains";
name="bytestriecontains";
if(exec) {
return new ByteTrieDictContains(*this);
return new BytesTrieDictContains(*this);
}
break;
default:
@ -720,8 +720,8 @@ UPerfFunction *DictionaryTriePerfTest::runIndexedTest(int32_t index, UBool exec,
}
} else {
if(index==0 && exec) {
puts("Running ByteTrie perf tests on the .dat package file from the --sourcedir.\n"
"For UCharTrie perf tests on a dictionary text file, specify the -f or --file-name.\n");
puts("Running BytesTrie perf tests on the .dat package file from the --sourcedir.\n"
"For UCharsTrie perf tests on a dictionary text file, specify the -f or --file-name.\n");
}
switch(index) {
case 0:
@ -737,9 +737,9 @@ UPerfFunction *DictionaryTriePerfTest::runIndexedTest(int32_t index, UBool exec,
}
break;
case 2:
name="bytetrie";
name="bytestrie";
if(exec) {
return new ByteTriePackageLookup(*this);
return new BytesTriePackageLookup(*this);
}
break;
default:

View file

@ -1,6 +1,6 @@
#******************************************************************************
#
# Copyright (C) 1999-2010, International Business Machines
# Copyright (C) 1999-2011, International Business Machines
# Corporation and others. All Rights Reserved.
#
#******************************************************************************
@ -52,8 +52,8 @@ LDFLAGS += $(LDFLAGSICUTOOLUTIL)
LIBS = $(LIBICUI18N) $(LIBICUUC) $(DEFAULT_LIBS)
OBJECTS = filestrm.o package.o pkgitems.o swapimpl.o toolutil.o unewdata.o \
dicttriebuilder.o bytetriebuilder.o bytetrieiterator.o \
uchartrie.o uchartriebuilder.o uchartrieiterator.o \
stringtriebuilder.o bytestriebuilder.o bytestrieiterator.o \
ucharstrie.o ucharstriebuilder.o ucharstrieiterator.o \
denseranges.o \
ucm.o ucmstate.o uoptions.o uparse.o \
ucbuf.o xmlparser.o writesrc.o \

View file

@ -3,21 +3,19 @@
* Copyright (C) 2010-2011, International Business Machines
* Corporation and others. All Rights Reserved.
*******************************************************************************
* file name: bytetriebuilder.cpp
* file name: bytestriebuilder.cpp
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 2010sep25
* created by: Markus W. Scherer
*
* Builder class for ByteTrie dictionary trie.
*/
#include "unicode/utypes.h"
#include "unicode/stringpiece.h"
#include "bytetrie.h"
#include "bytetriebuilder.h"
#include "bytestrie.h"
#include "bytestriebuilder.h"
#include "charstr.h"
#include "cmemory.h"
#include "uarrsort.h"
@ -26,11 +24,11 @@ U_NAMESPACE_BEGIN
/*
* Note: This builder implementation stores (bytes, value) pairs with full copies
* of the byte sequences, until the ByteTrie is built.
* of the byte sequences, until the BytesTrie is built.
* It might(!) take less memory if we collected the data in a temporary, dynamic trie.
*/
class ByteTrieElement : public UMemory {
class BytesTrieElement : public UMemory {
public:
// Use compiler's default constructor, initializes nothing.
@ -62,7 +60,7 @@ public:
int32_t getValue() const { return value; }
int32_t compareStringTo(const ByteTrieElement &o, const CharString &strings) const;
int32_t compareStringTo(const BytesTrieElement &o, const CharString &strings) const;
private:
const char *data(const CharString &strings) const {
@ -85,8 +83,8 @@ private:
};
void
ByteTrieElement::setTo(const StringPiece &s, int32_t val,
CharString &strings, UErrorCode &errorCode) {
BytesTrieElement::setTo(const StringPiece &s, int32_t val,
CharString &strings, UErrorCode &errorCode) {
if(U_FAILURE(errorCode)) {
return;
}
@ -108,7 +106,7 @@ ByteTrieElement::setTo(const StringPiece &s, int32_t val,
}
int32_t
ByteTrieElement::compareStringTo(const ByteTrieElement &other, const CharString &strings) const {
BytesTrieElement::compareStringTo(const BytesTrieElement &other, const CharString &strings) const {
// TODO: add StringPiece::compare(), see ticket #8187
StringPiece thisString=getString(strings);
StringPiece otherString=other.getString(strings);
@ -123,13 +121,13 @@ ByteTrieElement::compareStringTo(const ByteTrieElement &other, const CharString
return diff!=0 ? diff : lengthDiff;
}
ByteTrieBuilder::~ByteTrieBuilder() {
BytesTrieBuilder::~BytesTrieBuilder() {
delete[] elements;
uprv_free(bytes);
}
ByteTrieBuilder &
ByteTrieBuilder::add(const StringPiece &s, int32_t value, UErrorCode &errorCode) {
BytesTrieBuilder &
BytesTrieBuilder::add(const StringPiece &s, int32_t value, UErrorCode &errorCode) {
if(U_FAILURE(errorCode)) {
return *this;
}
@ -146,12 +144,12 @@ ByteTrieBuilder::add(const StringPiece &s, int32_t value, UErrorCode &errorCode)
} else {
newCapacity=4*elementsCapacity;
}
ByteTrieElement *newElements=new ByteTrieElement[newCapacity];
BytesTrieElement *newElements=new BytesTrieElement[newCapacity];
if(newElements==NULL) {
errorCode=U_MEMORY_ALLOCATION_ERROR;
}
if(elementsLength>0) {
uprv_memcpy(newElements, elements, elementsLength*sizeof(ByteTrieElement));
uprv_memcpy(newElements, elements, elementsLength*sizeof(BytesTrieElement));
}
delete[] elements;
elements=newElements;
@ -166,15 +164,15 @@ U_CDECL_BEGIN
static int32_t U_CALLCONV
compareElementStrings(const void *context, const void *left, const void *right) {
const CharString *strings=reinterpret_cast<const CharString *>(context);
const ByteTrieElement *leftElement=reinterpret_cast<const ByteTrieElement *>(left);
const ByteTrieElement *rightElement=reinterpret_cast<const ByteTrieElement *>(right);
const BytesTrieElement *leftElement=reinterpret_cast<const BytesTrieElement *>(left);
const BytesTrieElement *rightElement=reinterpret_cast<const BytesTrieElement *>(right);
return leftElement->compareStringTo(*rightElement, *strings);
}
U_CDECL_END
StringPiece
ByteTrieBuilder::build(UDictTrieBuildOption buildOption, UErrorCode &errorCode) {
BytesTrieBuilder::build(UStringTrieBuildOption buildOption, UErrorCode &errorCode) {
StringPiece result;
if(U_FAILURE(errorCode)) {
return result;
@ -188,7 +186,7 @@ ByteTrieBuilder::build(UDictTrieBuildOption buildOption, UErrorCode &errorCode)
errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
return result;
}
uprv_sortArray(elements, elementsLength, (int32_t)sizeof(ByteTrieElement),
uprv_sortArray(elements, elementsLength, (int32_t)sizeof(BytesTrieElement),
compareElementStrings, &strings,
FALSE, // need not be a stable sort
&errorCode);
@ -214,7 +212,7 @@ ByteTrieBuilder::build(UDictTrieBuildOption buildOption, UErrorCode &errorCode)
errorCode=U_MEMORY_ALLOCATION_ERROR;
return result;
}
DictTrieBuilder::build(buildOption, elementsLength, errorCode);
StringTrieBuilder::build(buildOption, elementsLength, errorCode);
if(bytes==NULL) {
errorCode=U_MEMORY_ALLOCATION_ERROR;
} else {
@ -224,24 +222,24 @@ ByteTrieBuilder::build(UDictTrieBuildOption buildOption, UErrorCode &errorCode)
}
int32_t
ByteTrieBuilder::getElementStringLength(int32_t i) const {
BytesTrieBuilder::getElementStringLength(int32_t i) const {
return elements[i].getStringLength(strings);
}
UChar
ByteTrieBuilder::getElementUnit(int32_t i, int32_t byteIndex) const {
BytesTrieBuilder::getElementUnit(int32_t i, int32_t byteIndex) const {
return (uint8_t)elements[i].charAt(byteIndex, strings);
}
int32_t
ByteTrieBuilder::getElementValue(int32_t i) const {
BytesTrieBuilder::getElementValue(int32_t i) const {
return elements[i].getValue();
}
int32_t
ByteTrieBuilder::getLimitOfLinearMatch(int32_t first, int32_t last, int32_t byteIndex) const {
const ByteTrieElement &firstElement=elements[first];
const ByteTrieElement &lastElement=elements[last];
BytesTrieBuilder::getLimitOfLinearMatch(int32_t first, int32_t last, int32_t byteIndex) const {
const BytesTrieElement &firstElement=elements[first];
const BytesTrieElement &lastElement=elements[last];
int32_t minStringLength=firstElement.getStringLength(strings);
while(++byteIndex<minStringLength &&
firstElement.charAt(byteIndex, strings)==
@ -250,7 +248,7 @@ ByteTrieBuilder::getLimitOfLinearMatch(int32_t first, int32_t last, int32_t byte
}
int32_t
ByteTrieBuilder::countElementUnits(int32_t start, int32_t limit, int32_t byteIndex) const {
BytesTrieBuilder::countElementUnits(int32_t start, int32_t limit, int32_t byteIndex) const {
int32_t length=0; // Number of different units at unitIndex.
int32_t i=start;
do {
@ -264,7 +262,7 @@ ByteTrieBuilder::countElementUnits(int32_t start, int32_t limit, int32_t byteInd
}
int32_t
ByteTrieBuilder::skipElementsBySomeUnits(int32_t i, int32_t byteIndex, int32_t count) const {
BytesTrieBuilder::skipElementsBySomeUnits(int32_t i, int32_t byteIndex, int32_t count) const {
do {
char byte=elements[i++].charAt(byteIndex, strings);
while(byte==elements[i].charAt(byteIndex, strings)) {
@ -275,7 +273,7 @@ ByteTrieBuilder::skipElementsBySomeUnits(int32_t i, int32_t byteIndex, int32_t c
}
int32_t
ByteTrieBuilder::indexOfElementWithNextUnit(int32_t i, int32_t byteIndex, UChar byte) const {
BytesTrieBuilder::indexOfElementWithNextUnit(int32_t i, int32_t byteIndex, UChar byte) const {
char b=(char)byte;
while(b==elements[i].charAt(byteIndex, strings)) {
++i;
@ -283,13 +281,13 @@ ByteTrieBuilder::indexOfElementWithNextUnit(int32_t i, int32_t byteIndex, UChar
return i;
}
ByteTrieBuilder::BTLinearMatchNode::BTLinearMatchNode(const char *bytes, int32_t len, Node *nextNode)
BytesTrieBuilder::BTLinearMatchNode::BTLinearMatchNode(const char *bytes, int32_t len, Node *nextNode)
: LinearMatchNode(len, nextNode), s(bytes) {
hash=hash*37+uhash_hashCharsN(bytes, len);
}
UBool
ByteTrieBuilder::BTLinearMatchNode::operator==(const Node &other) const {
BytesTrieBuilder::BTLinearMatchNode::operator==(const Node &other) const {
if(this==&other) {
return TRUE;
}
@ -301,16 +299,16 @@ ByteTrieBuilder::BTLinearMatchNode::operator==(const Node &other) const {
}
void
ByteTrieBuilder::BTLinearMatchNode::write(DictTrieBuilder &builder) {
ByteTrieBuilder &b=(ByteTrieBuilder &)builder;
BytesTrieBuilder::BTLinearMatchNode::write(StringTrieBuilder &builder) {
BytesTrieBuilder &b=(BytesTrieBuilder &)builder;
next->write(builder);
b.write(s, length);
offset=b.write(b.getMinLinearMatch()+length-1);
}
DictTrieBuilder::Node *
ByteTrieBuilder::createLinearMatchNode(int32_t i, int32_t byteIndex, int32_t length,
Node *nextNode) const {
StringTrieBuilder::Node *
BytesTrieBuilder::createLinearMatchNode(int32_t i, int32_t byteIndex, int32_t length,
Node *nextNode) const {
return new BTLinearMatchNode(
elements[i].getString(strings).data()+byteIndex,
length,
@ -318,7 +316,7 @@ ByteTrieBuilder::createLinearMatchNode(int32_t i, int32_t byteIndex, int32_t len
}
UBool
ByteTrieBuilder::ensureCapacity(int32_t length) {
BytesTrieBuilder::ensureCapacity(int32_t length) {
if(bytes==NULL) {
return FALSE; // previous memory allocation had failed
}
@ -344,7 +342,7 @@ ByteTrieBuilder::ensureCapacity(int32_t length) {
}
int32_t
ByteTrieBuilder::write(int32_t byte) {
BytesTrieBuilder::write(int32_t byte) {
int32_t newLength=bytesLength+1;
if(ensureCapacity(newLength)) {
bytesLength=newLength;
@ -354,7 +352,7 @@ ByteTrieBuilder::write(int32_t byte) {
}
int32_t
ByteTrieBuilder::write(const char *b, int32_t length) {
BytesTrieBuilder::write(const char *b, int32_t length) {
int32_t newLength=bytesLength+length;
if(ensureCapacity(newLength)) {
bytesLength=newLength;
@ -364,31 +362,31 @@ ByteTrieBuilder::write(const char *b, int32_t length) {
}
int32_t
ByteTrieBuilder::writeElementUnits(int32_t i, int32_t byteIndex, int32_t length) {
BytesTrieBuilder::writeElementUnits(int32_t i, int32_t byteIndex, int32_t length) {
return write(elements[i].getString(strings).data()+byteIndex, length);
}
int32_t
ByteTrieBuilder::writeValueAndFinal(int32_t i, UBool final) {
BytesTrieBuilder::writeValueAndFinal(int32_t i, UBool final) {
char intBytes[5];
int32_t length=1;
if(i<0 || i>0xffffff) {
intBytes[0]=(char)ByteTrie::kFiveByteValueLead;
intBytes[0]=(char)BytesTrie::kFiveByteValueLead;
intBytes[1]=(char)(i>>24);
intBytes[2]=(char)(i>>16);
intBytes[3]=(char)(i>>8);
intBytes[4]=(char)i;
length=5;
} else if(i<=ByteTrie::kMaxOneByteValue) {
intBytes[0]=(char)(ByteTrie::kMinOneByteValueLead+i);
} else if(i<=BytesTrie::kMaxOneByteValue) {
intBytes[0]=(char)(BytesTrie::kMinOneByteValueLead+i);
} else {
if(i<=ByteTrie::kMaxTwoByteValue) {
intBytes[0]=(char)(ByteTrie::kMinTwoByteValueLead+(i>>8));
if(i<=BytesTrie::kMaxTwoByteValue) {
intBytes[0]=(char)(BytesTrie::kMinTwoByteValueLead+(i>>8));
} else {
if(i<=ByteTrie::kMaxThreeByteValue) {
intBytes[0]=(char)(ByteTrie::kMinThreeByteValueLead+(i>>16));
if(i<=BytesTrie::kMaxThreeByteValue) {
intBytes[0]=(char)(BytesTrie::kMinThreeByteValueLead+(i>>16));
} else {
intBytes[0]=(char)ByteTrie::kFourByteValueLead;
intBytes[0]=(char)BytesTrie::kFourByteValueLead;
intBytes[1]=(char)(i>>16);
length=2;
}
@ -401,7 +399,7 @@ ByteTrieBuilder::writeValueAndFinal(int32_t i, UBool final) {
}
int32_t
ByteTrieBuilder::writeValueAndType(UBool hasValue, int32_t value, int32_t node) {
BytesTrieBuilder::writeValueAndType(UBool hasValue, int32_t value, int32_t node) {
int32_t offset=write(node);
if(hasValue) {
offset=writeValueAndFinal(value, FALSE);
@ -410,26 +408,26 @@ ByteTrieBuilder::writeValueAndType(UBool hasValue, int32_t value, int32_t node)
}
int32_t
ByteTrieBuilder::writeDeltaTo(int32_t jumpTarget) {
BytesTrieBuilder::writeDeltaTo(int32_t jumpTarget) {
int32_t i=bytesLength-jumpTarget;
char intBytes[5];
int32_t length;
U_ASSERT(i>=0);
if(i<=ByteTrie::kMaxOneByteDelta) {
if(i<=BytesTrie::kMaxOneByteDelta) {
length=0;
} else if(i<=ByteTrie::kMaxTwoByteDelta) {
intBytes[0]=(char)(ByteTrie::kMinTwoByteDeltaLead+(i>>8));
} else if(i<=BytesTrie::kMaxTwoByteDelta) {
intBytes[0]=(char)(BytesTrie::kMinTwoByteDeltaLead+(i>>8));
length=1;
} else {
if(i<=ByteTrie::kMaxThreeByteDelta) {
intBytes[0]=(char)(ByteTrie::kMinThreeByteDeltaLead+(i>>16));
if(i<=BytesTrie::kMaxThreeByteDelta) {
intBytes[0]=(char)(BytesTrie::kMinThreeByteDeltaLead+(i>>16));
length=2;
} else {
if(i<=0xffffff) {
intBytes[0]=(char)ByteTrie::kFourByteDeltaLead;
intBytes[0]=(char)BytesTrie::kFourByteDeltaLead;
length=3;
} else {
intBytes[0]=(char)ByteTrie::kFiveByteDeltaLead;
intBytes[0]=(char)BytesTrie::kFiveByteDeltaLead;
intBytes[1]=(char)(i>>24);
length=4;
}

View file

@ -3,42 +3,43 @@
* Copyright (C) 2010-2011, International Business Machines
* Corporation and others. All Rights Reserved.
*******************************************************************************
* file name: bytetriebuilder.h
* file name: bytestriebuilder.h
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 2010sep25
* created by: Markus W. Scherer
*
* Builder class for ByteTrie dictionary trie.
*/
#ifndef __BYTETRIEBUILDER_H__
#define __BYTETRIEBUILDER_H__
#ifndef __BYTESTRIEBUILDER_H__
#define __BYTESTRIEBUILDER_H__
#include "unicode/utypes.h"
#include "unicode/stringpiece.h"
#include "bytetrie.h"
#include "bytestrie.h"
#include "charstr.h"
#include "dicttriebuilder.h"
#include "stringtriebuilder.h"
U_NAMESPACE_BEGIN
class ByteTrieElement;
class BytesTrieElement;
class U_TOOLUTIL_API ByteTrieBuilder : public DictTrieBuilder {
/**
* Builder class for BytesTrie.
*/
class U_TOOLUTIL_API BytesTrieBuilder : public StringTrieBuilder {
public:
ByteTrieBuilder()
BytesTrieBuilder()
: elements(NULL), elementsCapacity(0), elementsLength(0),
bytes(NULL), bytesCapacity(0), bytesLength(0) {}
virtual ~ByteTrieBuilder();
virtual ~BytesTrieBuilder();
ByteTrieBuilder &add(const StringPiece &s, int32_t value, UErrorCode &errorCode);
BytesTrieBuilder &add(const StringPiece &s, int32_t value, UErrorCode &errorCode);
StringPiece build(UDictTrieBuildOption buildOption, UErrorCode &errorCode);
StringPiece build(UStringTrieBuildOption buildOption, UErrorCode &errorCode);
ByteTrieBuilder &clear() {
BytesTrieBuilder &clear() {
strings.clear();
elementsLength=0;
bytesLength=0;
@ -58,15 +59,15 @@ private:
virtual UBool matchNodesCanHaveValues() const { return FALSE; }
virtual int32_t getMaxBranchLinearSubNodeLength() const { return ByteTrie::kMaxBranchLinearSubNodeLength; }
virtual int32_t getMinLinearMatch() const { return ByteTrie::kMinLinearMatch; }
virtual int32_t getMaxLinearMatchLength() const { return ByteTrie::kMaxLinearMatchLength; }
virtual int32_t getMaxBranchLinearSubNodeLength() const { return BytesTrie::kMaxBranchLinearSubNodeLength; }
virtual int32_t getMinLinearMatch() const { return BytesTrie::kMinLinearMatch; }
virtual int32_t getMaxLinearMatchLength() const { return BytesTrie::kMaxLinearMatchLength; }
class BTLinearMatchNode : public LinearMatchNode {
public:
BTLinearMatchNode(const char *units, int32_t len, Node *nextNode);
virtual UBool operator==(const Node &other) const;
virtual void write(DictTrieBuilder &builder);
virtual void write(StringTrieBuilder &builder);
private:
const char *s;
};
@ -83,7 +84,7 @@ private:
virtual int32_t writeDeltaTo(int32_t jumpTarget);
CharString strings;
ByteTrieElement *elements;
BytesTrieElement *elements;
int32_t elementsCapacity;
int32_t elementsLength;
@ -96,4 +97,4 @@ private:
U_NAMESPACE_END
#endif // __BYTETRIEBUILDER_H__
#endif // __BYTESTRIEBUILDER_H__

View file

@ -1,9 +1,9 @@
/*
*******************************************************************************
* Copyright (C) 2010, International Business Machines
* Copyright (C) 2010-2011, International Business Machines
* Corporation and others. All Rights Reserved.
*******************************************************************************
* file name: bytetrieiterator.cpp
* file name: bytestrieiterator.cpp
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
@ -14,22 +14,22 @@
#include "unicode/utypes.h"
#include "unicode/stringpiece.h"
#include "bytetrie.h"
#include "bytetrieiterator.h"
#include "bytestrie.h"
#include "bytestrieiterator.h"
#include "charstr.h"
#include "uvectr32.h"
U_NAMESPACE_BEGIN
ByteTrieIterator::ByteTrieIterator(const void *trieBytes, int32_t maxStringLength,
UErrorCode &errorCode)
BytesTrieIterator::BytesTrieIterator(const void *trieBytes, int32_t maxStringLength,
UErrorCode &errorCode)
: bytes_(reinterpret_cast<const uint8_t *>(trieBytes)),
pos_(bytes_), initialPos_(bytes_),
remainingMatchLength_(-1), initialRemainingMatchLength_(-1),
maxLength_(maxStringLength), value_(0), stack_(errorCode) {}
ByteTrieIterator::ByteTrieIterator(const ByteTrie &trie, int32_t maxStringLength,
UErrorCode &errorCode)
BytesTrieIterator::BytesTrieIterator(const BytesTrie &trie, int32_t maxStringLength,
UErrorCode &errorCode)
: bytes_(trie.bytes_), pos_(trie.pos_), initialPos_(trie.pos_),
remainingMatchLength_(trie.remainingMatchLength_),
initialRemainingMatchLength_(trie.remainingMatchLength_),
@ -47,7 +47,7 @@ ByteTrieIterator::ByteTrieIterator(const ByteTrie &trie, int32_t maxStringLength
}
}
ByteTrieIterator &ByteTrieIterator::reset() {
BytesTrieIterator &BytesTrieIterator::reset() {
pos_=initialPos_;
remainingMatchLength_=initialRemainingMatchLength_;
int32_t length=remainingMatchLength_+1; // Remaining match length.
@ -62,7 +62,7 @@ ByteTrieIterator &ByteTrieIterator::reset() {
}
UBool
ByteTrieIterator::next(UErrorCode &errorCode) {
BytesTrieIterator::next(UErrorCode &errorCode) {
if(U_FAILURE(errorCode)) {
return FALSE;
}
@ -95,14 +95,14 @@ ByteTrieIterator::next(UErrorCode &errorCode) {
}
for(;;) {
int32_t node=*pos++;
if(node>=ByteTrie::kMinValueLead) {
if(node>=BytesTrie::kMinValueLead) {
// Deliver value for the byte sequence so far.
UBool isFinal=(UBool)(node&ByteTrie::kValueIsFinal);
value_=ByteTrie::readValue(pos, node>>1);
UBool isFinal=(UBool)(node&BytesTrie::kValueIsFinal);
value_=BytesTrie::readValue(pos, node>>1);
if(isFinal || (maxLength_>0 && str_.length()==maxLength_)) {
pos_=NULL;
} else {
pos_=ByteTrie::skipValue(pos, node);
pos_=BytesTrie::skipValue(pos, node);
}
sp_.set(str_.data(), str_.length());
return TRUE;
@ -110,7 +110,7 @@ ByteTrieIterator::next(UErrorCode &errorCode) {
if(maxLength_>0 && str_.length()==maxLength_) {
return truncateAndStop();
}
if(node<ByteTrie::kMinLinearMatch) {
if(node<BytesTrie::kMinLinearMatch) {
if(node==0) {
node=*pos++;
}
@ -120,7 +120,7 @@ ByteTrieIterator::next(UErrorCode &errorCode) {
}
} else {
// Linear-match node, append length bytes to str_.
int32_t length=node-ByteTrie::kMinLinearMatch+1;
int32_t length=node-BytesTrie::kMinLinearMatch+1;
if(maxLength_>0 && str_.length()+length>maxLength_) {
str_.append(reinterpret_cast<const char *>(pos),
maxLength_-str_.length(), errorCode);
@ -134,23 +134,23 @@ ByteTrieIterator::next(UErrorCode &errorCode) {
// Branch node, needs to take the first outbound edge and push state for the rest.
const uint8_t *
ByteTrieIterator::branchNext(const uint8_t *pos, int32_t length, UErrorCode &errorCode) {
while(length>ByteTrie::kMaxBranchLinearSubNodeLength) {
BytesTrieIterator::branchNext(const uint8_t *pos, int32_t length, UErrorCode &errorCode) {
while(length>BytesTrie::kMaxBranchLinearSubNodeLength) {
++pos; // ignore the comparison byte
// Push state for the greater-or-equal edge.
stack_.addElement((int32_t)(ByteTrie::skipDelta(pos)-bytes_), errorCode);
stack_.addElement((int32_t)(BytesTrie::skipDelta(pos)-bytes_), errorCode);
stack_.addElement(((length-(length>>1))<<16)|str_.length(), errorCode);
// Follow the less-than edge.
length>>=1;
pos=ByteTrie::jumpByDelta(pos);
pos=BytesTrie::jumpByDelta(pos);
}
// List of key-value pairs where values are either final values or jump deltas.
// Read the first (key, value) pair.
uint8_t trieByte=*pos++;
int32_t node=*pos++;
UBool isFinal=(UBool)(node&ByteTrie::kValueIsFinal);
int32_t value=ByteTrie::readValue(pos, node>>1);
pos=ByteTrie::skipValue(pos, node);
UBool isFinal=(UBool)(node&BytesTrie::kValueIsFinal);
int32_t value=BytesTrie::readValue(pos, node>>1);
pos=BytesTrie::skipValue(pos, node);
stack_.addElement((int32_t)(pos-bytes_), errorCode);
stack_.addElement(((length-1)<<16)|str_.length(), errorCode);
str_.append((char)trieByte, errorCode);

View file

@ -1,9 +1,9 @@
/*
*******************************************************************************
* Copyright (C) 2010, International Business Machines
* Copyright (C) 2010-2011, International Business Machines
* Corporation and others. All Rights Reserved.
*******************************************************************************
* file name: bytetrieiterator.h
* file name: bytestrieiterator.h
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
@ -12,32 +12,32 @@
* created by: Markus W. Scherer
*/
#ifndef __BYTETRIEITERATOR_H__
#define __BYTETRIEITERATOR_H__
#ifndef __BYTESTRIEITERATOR_H__
#define __BYTESTRIEITERATOR_H__
/**
* \file
* \brief C++ API: ByteTrie iterator for all of its (byte sequence, value) pairs.
* \brief C++ API: BytesTrie iterator for all of its (byte sequence, value) pairs.
*/
// Needed if and when we change the .dat package index to a ByteTrie,
// Needed if and when we change the .dat package index to a BytesTrie,
// so that icupkg can work with an input package.
#include "unicode/utypes.h"
#include "unicode/stringpiece.h"
#include "bytetrie.h"
#include "bytestrie.h"
#include "charstr.h"
#include "uvectr32.h"
U_NAMESPACE_BEGIN
/**
* Iterator for all of the (byte sequence, value) pairs in a ByteTrie.
* Iterator for all of the (byte sequence, value) pairs in a BytesTrie.
*/
class U_TOOLUTIL_API ByteTrieIterator : public UMemory {
class U_TOOLUTIL_API BytesTrieIterator : public UMemory {
public:
/**
* Iterates from the root of a byte-serialized ByteTrie.
* Iterates from the root of a byte-serialized BytesTrie.
* @param trieBytes The trie bytes.
* @param maxStringLength If 0, the iterator returns full strings/byte sequences.
* Otherwise, the iterator returns strings with this maximum length.
@ -46,10 +46,10 @@ public:
* immediately. Check for U_FAILURE() on output or use with
* function chaining. (See User Guide for details.)
*/
ByteTrieIterator(const void *trieBytes, int32_t maxStringLength, UErrorCode &errorCode);
BytesTrieIterator(const void *trieBytes, int32_t maxStringLength, UErrorCode &errorCode);
/**
* Iterates from the current state of the specified ByteTrie.
* Iterates from the current state of the specified BytesTrie.
* @param trie The trie whose state will be copied for iteration.
* @param maxStringLength If 0, the iterator returns full strings/byte sequences.
* Otherwise, the iterator returns strings with this maximum length.
@ -58,12 +58,12 @@ public:
* immediately. Check for U_FAILURE() on output or use with
* function chaining. (See User Guide for details.)
*/
ByteTrieIterator(const ByteTrie &trie, int32_t maxStringLength, UErrorCode &errorCode);
BytesTrieIterator(const BytesTrie &trie, int32_t maxStringLength, UErrorCode &errorCode);
/**
* Resets this iterator to its initial state.
*/
ByteTrieIterator &reset();
BytesTrieIterator &reset();
/**
* Finds the next (byte sequence, value) pair if there is one.
@ -113,7 +113,7 @@ private:
// The stack stores pairs of integers for backtracking to another
// outbound edge of a branch node.
// The first integer is an offset from ByteTrie.bytes.
// The first integer is an offset from BytesTrie.bytes.
// The second integer has the str.length() from before the node in bits 15..0,
// and the remaining branch length in bits 24..16. (Bits 31..25 are unused.)
// (We could store the remaining branch length minus 1 in bits 23..16 and not use bits 31..24,
@ -123,4 +123,4 @@ private:
U_NAMESPACE_END
#endif // __BYTETRIEITERATOR_H__
#endif // __BYTESTRIEITERATOR_H__

View file

@ -3,51 +3,49 @@
* Copyright (C) 2010-2011, International Business Machines
* Corporation and others. All Rights Reserved.
*******************************************************************************
* file name: dicttriebuilder.cpp
* file name: stringtriebuilder.cpp
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 2010dec24
* created by: Markus W. Scherer
*
* Base class for dictionary-trie builder classes.
*/
#include <typeinfo> // for 'typeid' to work
#include "unicode/utypes.h"
#include "dicttriebuilder.h"
#include "stringtriebuilder.h"
#include "uassert.h"
#include "uhash.h"
U_CDECL_BEGIN
static int32_t U_CALLCONV
hashDictTrieNode(const UHashTok key) {
return U_NAMESPACE_QUALIFIER DictTrieBuilder::hashNode(key.pointer);
hashStringTrieNode(const UHashTok key) {
return U_NAMESPACE_QUALIFIER StringTrieBuilder::hashNode(key.pointer);
}
static UBool U_CALLCONV
equalDictTrieNodes(const UHashTok key1, const UHashTok key2) {
return U_NAMESPACE_QUALIFIER DictTrieBuilder::equalNodes(key1.pointer, key2.pointer);
equalStringTrieNodes(const UHashTok key1, const UHashTok key2) {
return U_NAMESPACE_QUALIFIER StringTrieBuilder::equalNodes(key1.pointer, key2.pointer);
}
U_CDECL_END
U_NAMESPACE_BEGIN
DictTrieBuilder::DictTrieBuilder() : nodes(NULL) {}
StringTrieBuilder::StringTrieBuilder() : nodes(NULL) {}
DictTrieBuilder::~DictTrieBuilder() {
StringTrieBuilder::~StringTrieBuilder() {
deleteCompactBuilder();
}
void
DictTrieBuilder::createCompactBuilder(int32_t sizeGuess, UErrorCode &errorCode) {
StringTrieBuilder::createCompactBuilder(int32_t sizeGuess, UErrorCode &errorCode) {
if(U_FAILURE(errorCode)) {
return;
}
nodes=uhash_openSize(hashDictTrieNode, equalDictTrieNodes, NULL,
nodes=uhash_openSize(hashStringTrieNode, equalStringTrieNodes, NULL,
sizeGuess, &errorCode);
if(U_SUCCESS(errorCode) && nodes==NULL) {
errorCode=U_MEMORY_ALLOCATION_ERROR;
@ -58,17 +56,17 @@ DictTrieBuilder::createCompactBuilder(int32_t sizeGuess, UErrorCode &errorCode)
}
void
DictTrieBuilder::deleteCompactBuilder() {
StringTrieBuilder::deleteCompactBuilder() {
uhash_close(nodes);
nodes=NULL;
}
void
DictTrieBuilder::build(UDictTrieBuildOption buildOption, int32_t elementsLength,
StringTrieBuilder::build(UStringTrieBuildOption buildOption, int32_t elementsLength,
UErrorCode &errorCode) {
if(buildOption==UDICTTRIE_BUILD_FAST) {
if(buildOption==USTRINGTRIE_BUILD_FAST) {
writeNode(0, elementsLength, 0);
} else /* UDICTTRIE_BUILD_SMALL */ {
} else /* USTRINGTRIE_BUILD_SMALL */ {
createCompactBuilder(2*elementsLength, errorCode);
Node *root=makeNode(0, elementsLength, 0, errorCode);
if(U_SUCCESS(errorCode)) {
@ -83,7 +81,7 @@ DictTrieBuilder::build(UDictTrieBuildOption buildOption, int32_t elementsLength,
// and all strings of the [start..limit[ elements must be sorted and
// have a common prefix of length unitIndex.
int32_t
DictTrieBuilder::writeNode(int32_t start, int32_t limit, int32_t unitIndex) {
StringTrieBuilder::writeNode(int32_t start, int32_t limit, int32_t unitIndex) {
UBool hasValue=FALSE;
int32_t value=0;
int32_t type;
@ -131,7 +129,7 @@ DictTrieBuilder::writeNode(int32_t start, int32_t limit, int32_t unitIndex) {
// start<limit && all strings longer than unitIndex &&
// length different units at unitIndex
int32_t
DictTrieBuilder::writeBranchSubNode(int32_t start, int32_t limit, int32_t unitIndex, int32_t length) {
StringTrieBuilder::writeBranchSubNode(int32_t start, int32_t limit, int32_t unitIndex, int32_t length) {
UChar middleUnits[kMaxSplitBranchLevels];
int32_t lessThan[kMaxSplitBranchLevels];
int32_t ltLength=0;
@ -203,8 +201,8 @@ DictTrieBuilder::writeBranchSubNode(int32_t start, int32_t limit, int32_t unitIn
// Requires start<limit,
// and all strings of the [start..limit[ elements must be sorted and
// have a common prefix of length unitIndex.
DictTrieBuilder::Node *
DictTrieBuilder::makeNode(int32_t start, int32_t limit, int32_t unitIndex, UErrorCode &errorCode) {
StringTrieBuilder::Node *
StringTrieBuilder::makeNode(int32_t start, int32_t limit, int32_t unitIndex, UErrorCode &errorCode) {
if(U_FAILURE(errorCode)) {
return NULL;
}
@ -255,8 +253,8 @@ DictTrieBuilder::makeNode(int32_t start, int32_t limit, int32_t unitIndex, UErro
// start<limit && all strings longer than unitIndex &&
// length different units at unitIndex
DictTrieBuilder::Node *
DictTrieBuilder::makeBranchSubNode(int32_t start, int32_t limit, int32_t unitIndex,
StringTrieBuilder::Node *
StringTrieBuilder::makeBranchSubNode(int32_t start, int32_t limit, int32_t unitIndex,
int32_t length, UErrorCode &errorCode) {
if(U_FAILURE(errorCode)) {
return NULL;
@ -314,8 +312,8 @@ DictTrieBuilder::makeBranchSubNode(int32_t start, int32_t limit, int32_t unitInd
return node;
}
DictTrieBuilder::Node *
DictTrieBuilder::registerNode(Node *newNode, UErrorCode &errorCode) {
StringTrieBuilder::Node *
StringTrieBuilder::registerNode(Node *newNode, UErrorCode &errorCode) {
if(U_FAILURE(errorCode)) {
delete newNode;
return NULL;
@ -343,8 +341,8 @@ DictTrieBuilder::registerNode(Node *newNode, UErrorCode &errorCode) {
return newNode;
}
DictTrieBuilder::Node *
DictTrieBuilder::registerFinalValue(int32_t value, UErrorCode &errorCode) {
StringTrieBuilder::Node *
StringTrieBuilder::registerFinalValue(int32_t value, UErrorCode &errorCode) {
if(U_FAILURE(errorCode)) {
return NULL;
}
@ -373,34 +371,34 @@ DictTrieBuilder::registerFinalValue(int32_t value, UErrorCode &errorCode) {
}
UBool
DictTrieBuilder::hashNode(const void *node) {
StringTrieBuilder::hashNode(const void *node) {
return ((const Node *)node)->hashCode();
}
UBool
DictTrieBuilder::equalNodes(const void *left, const void *right) {
StringTrieBuilder::equalNodes(const void *left, const void *right) {
return *(const Node *)left==*(const Node *)right;
}
UOBJECT_DEFINE_NO_RTTI_IMPLEMENTATION(DictTrieBuilder)
UOBJECT_DEFINE_NO_RTTI_IMPLEMENTATION(StringTrieBuilder)
UBool
DictTrieBuilder::Node::operator==(const Node &other) const {
StringTrieBuilder::Node::operator==(const Node &other) const {
return this==&other || (typeid(*this)==typeid(other) && hash==other.hash);
}
int32_t
DictTrieBuilder::Node::markRightEdgesFirst(int32_t edgeNumber) {
StringTrieBuilder::Node::markRightEdgesFirst(int32_t edgeNumber) {
if(offset==0) {
offset=edgeNumber;
}
return edgeNumber;
}
UOBJECT_DEFINE_NO_RTTI_IMPLEMENTATION(DictTrieBuilder::Node)
UOBJECT_DEFINE_NO_RTTI_IMPLEMENTATION(StringTrieBuilder::Node)
UBool
DictTrieBuilder::FinalValueNode::operator==(const Node &other) const {
StringTrieBuilder::FinalValueNode::operator==(const Node &other) const {
if(this==&other) {
return TRUE;
}
@ -412,12 +410,12 @@ DictTrieBuilder::FinalValueNode::operator==(const Node &other) const {
}
void
DictTrieBuilder::FinalValueNode::write(DictTrieBuilder &builder) {
StringTrieBuilder::FinalValueNode::write(StringTrieBuilder &builder) {
offset=builder.writeValueAndFinal(value, TRUE);
}
UBool
DictTrieBuilder::ValueNode::operator==(const Node &other) const {
StringTrieBuilder::ValueNode::operator==(const Node &other) const {
if(this==&other) {
return TRUE;
}
@ -429,7 +427,7 @@ DictTrieBuilder::ValueNode::operator==(const Node &other) const {
}
UBool
DictTrieBuilder::IntermediateValueNode::operator==(const Node &other) const {
StringTrieBuilder::IntermediateValueNode::operator==(const Node &other) const {
if(this==&other) {
return TRUE;
}
@ -441,7 +439,7 @@ DictTrieBuilder::IntermediateValueNode::operator==(const Node &other) const {
}
int32_t
DictTrieBuilder::IntermediateValueNode::markRightEdgesFirst(int32_t edgeNumber) {
StringTrieBuilder::IntermediateValueNode::markRightEdgesFirst(int32_t edgeNumber) {
if(offset==0) {
offset=edgeNumber=next->markRightEdgesFirst(edgeNumber);
}
@ -449,13 +447,13 @@ DictTrieBuilder::IntermediateValueNode::markRightEdgesFirst(int32_t edgeNumber)
}
void
DictTrieBuilder::IntermediateValueNode::write(DictTrieBuilder &builder) {
StringTrieBuilder::IntermediateValueNode::write(StringTrieBuilder &builder) {
next->write(builder);
offset=builder.writeValueAndFinal(value, FALSE);
}
UBool
DictTrieBuilder::LinearMatchNode::operator==(const Node &other) const {
StringTrieBuilder::LinearMatchNode::operator==(const Node &other) const {
if(this==&other) {
return TRUE;
}
@ -467,7 +465,7 @@ DictTrieBuilder::LinearMatchNode::operator==(const Node &other) const {
}
int32_t
DictTrieBuilder::LinearMatchNode::markRightEdgesFirst(int32_t edgeNumber) {
StringTrieBuilder::LinearMatchNode::markRightEdgesFirst(int32_t edgeNumber) {
if(offset==0) {
offset=edgeNumber=next->markRightEdgesFirst(edgeNumber);
}
@ -475,7 +473,7 @@ DictTrieBuilder::LinearMatchNode::markRightEdgesFirst(int32_t edgeNumber) {
}
UBool
DictTrieBuilder::ListBranchNode::operator==(const Node &other) const {
StringTrieBuilder::ListBranchNode::operator==(const Node &other) const {
if(this==&other) {
return TRUE;
}
@ -492,7 +490,7 @@ DictTrieBuilder::ListBranchNode::operator==(const Node &other) const {
}
int32_t
DictTrieBuilder::ListBranchNode::markRightEdgesFirst(int32_t edgeNumber) {
StringTrieBuilder::ListBranchNode::markRightEdgesFirst(int32_t edgeNumber) {
if(offset==0) {
firstEdgeNumber=edgeNumber;
int32_t step=0;
@ -511,7 +509,7 @@ DictTrieBuilder::ListBranchNode::markRightEdgesFirst(int32_t edgeNumber) {
}
void
DictTrieBuilder::ListBranchNode::write(DictTrieBuilder &builder) {
StringTrieBuilder::ListBranchNode::write(StringTrieBuilder &builder) {
// Write the sub-nodes in reverse order: The jump lengths are deltas from
// after their own positions, so if we wrote the minUnit sub-node first,
// then its jump delta would be larger.
@ -554,7 +552,7 @@ DictTrieBuilder::ListBranchNode::write(DictTrieBuilder &builder) {
}
UBool
DictTrieBuilder::SplitBranchNode::operator==(const Node &other) const {
StringTrieBuilder::SplitBranchNode::operator==(const Node &other) const {
if(this==&other) {
return TRUE;
}
@ -566,7 +564,7 @@ DictTrieBuilder::SplitBranchNode::operator==(const Node &other) const {
}
int32_t
DictTrieBuilder::SplitBranchNode::markRightEdgesFirst(int32_t edgeNumber) {
StringTrieBuilder::SplitBranchNode::markRightEdgesFirst(int32_t edgeNumber) {
if(offset==0) {
firstEdgeNumber=edgeNumber;
edgeNumber=greaterOrEqual->markRightEdgesFirst(edgeNumber);
@ -576,7 +574,7 @@ DictTrieBuilder::SplitBranchNode::markRightEdgesFirst(int32_t edgeNumber) {
}
void
DictTrieBuilder::SplitBranchNode::write(DictTrieBuilder &builder) {
StringTrieBuilder::SplitBranchNode::write(StringTrieBuilder &builder) {
// Encode the less-than branch first.
lessThan->writeUnlessInsideRightEdge(firstEdgeNumber, greaterOrEqual->getOffset(), builder);
// Encode the greater-or-equal branch last because we do not jump for it at all.
@ -588,7 +586,7 @@ DictTrieBuilder::SplitBranchNode::write(DictTrieBuilder &builder) {
}
UBool
DictTrieBuilder::BranchHeadNode::operator==(const Node &other) const {
StringTrieBuilder::BranchHeadNode::operator==(const Node &other) const {
if(this==&other) {
return TRUE;
}
@ -600,7 +598,7 @@ DictTrieBuilder::BranchHeadNode::operator==(const Node &other) const {
}
int32_t
DictTrieBuilder::BranchHeadNode::markRightEdgesFirst(int32_t edgeNumber) {
StringTrieBuilder::BranchHeadNode::markRightEdgesFirst(int32_t edgeNumber) {
if(offset==0) {
offset=edgeNumber=next->markRightEdgesFirst(edgeNumber);
}
@ -608,7 +606,7 @@ DictTrieBuilder::BranchHeadNode::markRightEdgesFirst(int32_t edgeNumber) {
}
void
DictTrieBuilder::BranchHeadNode::write(DictTrieBuilder &builder) {
StringTrieBuilder::BranchHeadNode::write(StringTrieBuilder &builder) {
next->write(builder);
if(length<=builder.getMinLinearMatch()) {
offset=builder.writeValueAndType(hasValue, value, length-1);

View file

@ -3,32 +3,33 @@
* Copyright (C) 2010-2011, International Business Machines
* Corporation and others. All Rights Reserved.
*******************************************************************************
* file name: dicttriebuilder.h
* file name: stringtriebuilder.h
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 2010dec24
* created by: Markus W. Scherer
*
* Base class for dictionary-trie builder classes.
*/
#ifndef __DICTTRIEBUILDER_H__
#define __DICTTRIEBUILDER_H__
#ifndef __STRINGTRIEBUILDER_H__
#define __STRINGTRIEBUILDER_H__
#include "unicode/utypes.h"
#include "unicode/uobject.h"
#include "uhash.h"
enum UDictTrieBuildOption {
UDICTTRIE_BUILD_FAST,
UDICTTRIE_BUILD_SMALL
enum UStringTrieBuildOption {
USTRINGTRIE_BUILD_FAST,
USTRINGTRIE_BUILD_SMALL
};
U_NAMESPACE_BEGIN
class U_TOOLUTIL_API DictTrieBuilder : public UObject {
/**
* Base class for string trie builder classes.
*/
class U_TOOLUTIL_API StringTrieBuilder : public UObject {
public:
/** @internal */
static UBool hashNode(const void *node);
@ -36,13 +37,13 @@ public:
static UBool equalNodes(const void *left, const void *right);
protected:
DictTrieBuilder();
virtual ~DictTrieBuilder();
StringTrieBuilder();
virtual ~StringTrieBuilder();
void createCompactBuilder(int32_t sizeGuess, UErrorCode &errorCode);
void deleteCompactBuilder();
void build(UDictTrieBuildOption buildOption, int32_t elementsLength, UErrorCode &errorCode);
void build(UStringTrieBuildOption buildOption, int32_t elementsLength, UErrorCode &errorCode);
int32_t writeNode(int32_t start, int32_t limit, int32_t byteIndex);
int32_t writeBranchSubNode(int32_t start, int32_t limit, int32_t byteIndex, int32_t length);
@ -157,10 +158,10 @@ protected:
*/
virtual int32_t markRightEdgesFirst(int32_t edgeNumber);
// write() must set the offset to a positive value.
virtual void write(DictTrieBuilder &builder) = 0;
virtual void write(StringTrieBuilder &builder) = 0;
// See markRightEdgesFirst.
inline void writeUnlessInsideRightEdge(int32_t firstRight, int32_t lastRight,
DictTrieBuilder &builder) {
StringTrieBuilder &builder) {
// Note: Edge numbers are negative, lastRight<=firstRight.
// If offset>0 then this node and its sub-nodes have been written already
// and we need not write them again.
@ -189,7 +190,7 @@ protected:
public:
FinalValueNode(int32_t v) : Node(0x111111*37+v), value(v) {}
virtual UBool operator==(const Node &other) const;
virtual void write(DictTrieBuilder &builder);
virtual void write(StringTrieBuilder &builder);
protected:
int32_t value;
};
@ -214,7 +215,7 @@ protected:
: ValueNode(0x222222*37+hashCode(nextNode)), next(nextNode) { setValue(v); }
virtual UBool operator==(const Node &other) const;
virtual int32_t markRightEdgesFirst(int32_t edgeNumber);
virtual void write(DictTrieBuilder &builder);
virtual void write(StringTrieBuilder &builder);
protected:
Node *next;
};
@ -243,7 +244,7 @@ protected:
ListBranchNode() : BranchNode(0x444444), length(0) {}
virtual UBool operator==(const Node &other) const;
virtual int32_t markRightEdgesFirst(int32_t edgeNumber);
virtual void write(DictTrieBuilder &builder);
virtual void write(StringTrieBuilder &builder);
// Adds a unit with a final value.
void add(int32_t c, int32_t value) {
units[length]=(UChar)c;
@ -275,7 +276,7 @@ protected:
unit(middleUnit), lessThan(lessThanNode), greaterOrEqual(greaterOrEqualNode) {}
virtual UBool operator==(const Node &other) const;
virtual int32_t markRightEdgesFirst(int32_t edgeNumber);
virtual void write(DictTrieBuilder &builder);
virtual void write(StringTrieBuilder &builder);
protected:
UChar unit;
Node *lessThan;
@ -290,7 +291,7 @@ protected:
length(len), next(subNode) {}
virtual UBool operator==(const Node &other) const;
virtual int32_t markRightEdgesFirst(int32_t edgeNumber);
virtual void write(DictTrieBuilder &builder);
virtual void write(StringTrieBuilder &builder);
protected:
int32_t length;
Node *next; // A branch sub-node.
@ -312,4 +313,4 @@ private:
U_NAMESPACE_END
#endif // __DICTTRIEBUILDER_H__
#endif // __STRINGTRIEBUILDER_H__

View file

@ -246,10 +246,9 @@
</Link>
</ItemDefinitionGroup>
<ItemGroup>
<ClCompile Include="bytetriebuilder.cpp" />
<ClCompile Include="bytetrieiterator.cpp" />
<ClCompile Include="bytestriebuilder.cpp" />
<ClCompile Include="bytestrieiterator.cpp" />
<ClCompile Include="denseranges.cpp" />
<ClCompile Include="dicttriebuilder.cpp" />
<ClCompile Include="filestrm.c" />
<ClCompile Include="filetools.cpp" />
<ClCompile Include="flagparser.c" />
@ -268,6 +267,7 @@
</ClCompile>
<ClCompile Include="pkg_icu.cpp" />
<ClCompile Include="pkgitems.cpp" />
<ClCompile Include="stringtriebuilder.cpp" />
<ClCompile Include="swapimpl.cpp" />
<ClCompile Include="toolutil.cpp">
<DisableLanguageExtensions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">false</DisableLanguageExtensions>
@ -276,9 +276,9 @@
<DisableLanguageExtensions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">false</DisableLanguageExtensions>
</ClCompile>
<ClCompile Include="ucbuf.c" />
<ClCompile Include="uchartrie.cpp" />
<ClCompile Include="uchartriebuilder.cpp" />
<ClCompile Include="uchartrieiterator.cpp" />
<ClCompile Include="ucharstrie.cpp" />
<ClCompile Include="ucharstriebuilder.cpp" />
<ClCompile Include="ucharstrieiterator.cpp" />
<ClCompile Include="ucm.c" />
<ClCompile Include="ucmstate.c" />
<ClCompile Include="unewdata.c" />
@ -296,10 +296,9 @@
</ClCompile>
</ItemGroup>
<ItemGroup>
<ClInclude Include="bytetriebuilder.h" />
<ClInclude Include="bytetrieiterator.h" />
<ClInclude Include="bytestriebuilder.h" />
<ClInclude Include="bytestrieiterator.h" />
<ClInclude Include="denseranges.h" />
<ClInclude Include="dicttriebuilder.h" />
<ClInclude Include="filestrm.h" />
<ClInclude Include="filetools.h" />
<ClInclude Include="flagparser.h" />
@ -309,12 +308,13 @@
<ClInclude Include="pkg_icu.h" />
<ClInclude Include="pkg_imp.h" />
<ClInclude Include="platform_xopen_source_extended.h" />
<ClInclude Include="stringtriebuilder.h" />
<ClInclude Include="swapimpl.h" />
<ClInclude Include="toolutil.h" />
<ClInclude Include="ucbuf.h" />
<ClInclude Include="uchartrie.h" />
<ClInclude Include="uchartriebuilder.h" />
<ClInclude Include="uchartrieiterator.h" />
<ClInclude Include="ucharstrie.h" />
<ClInclude Include="ucharstriebuilder.h" />
<ClInclude Include="ucharstrieiterator.h" />
<ClInclude Include="ucm.h" />
<ClInclude Include="unewdata.h" />
<ClInclude Include="uoptions.h" />

View file

@ -1,9 +1,9 @@
/*
*******************************************************************************
* Copyright (C) 2010, International Business Machines
* Copyright (C) 2010-2011, International Business Machines
* Corporation and others. All Rights Reserved.
*******************************************************************************
* file name: uchartrie.h
* file name: ucharstrie.h
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
@ -15,7 +15,7 @@
#include "unicode/utypes.h"
#include "unicode/uobject.h"
#include "uassert.h"
#include "uchartrie.h"
#include "ucharstrie.h"
U_NAMESPACE_BEGIN
@ -48,20 +48,20 @@ Appendable::append(const UChar *s, int32_t length) {
UOBJECT_DEFINE_NO_RTTI_IMPLEMENTATION(Appendable)
UDictTrieResult
UCharTrie::current() const {
UStringTrieResult
UCharsTrie::current() const {
const UChar *pos=pos_;
if(pos==NULL) {
return UDICTTRIE_NO_MATCH;
return USTRINGTRIE_NO_MATCH;
} else {
int32_t node;
return (remainingMatchLength_<0 && (node=*pos)>=kMinValueLead) ?
valueResult(node) : UDICTTRIE_NO_VALUE;
valueResult(node) : USTRINGTRIE_NO_VALUE;
}
}
UDictTrieResult
UCharTrie::branchNext(const UChar *pos, int32_t length, int32_t uchar) {
UStringTrieResult
UCharsTrie::branchNext(const UChar *pos, int32_t length, int32_t uchar) {
// Branch according to the current unit.
if(length==0) {
length=*pos++;
@ -83,11 +83,11 @@ UCharTrie::branchNext(const UChar *pos, int32_t length, int32_t uchar) {
// and divides length by 2.
do {
if(uchar==*pos++) {
UDictTrieResult result;
UStringTrieResult result;
int32_t node=*pos;
if(node&kValueIsFinal) {
// Leave the final value for getValue() to read.
result=UDICTTRIE_HAS_FINAL_VALUE;
result=USTRINGTRIE_FINAL_VALUE;
} else {
// Use the non-final value as the jump delta.
++pos;
@ -104,7 +104,7 @@ UCharTrie::branchNext(const UChar *pos, int32_t length, int32_t uchar) {
// end readValue()
pos+=delta;
node=*pos;
result= node>=kMinValueLead ? valueResult(node) : UDICTTRIE_NO_VALUE;
result= node>=kMinValueLead ? valueResult(node) : USTRINGTRIE_NO_VALUE;
}
pos_=pos;
return result;
@ -115,15 +115,15 @@ UCharTrie::branchNext(const UChar *pos, int32_t length, int32_t uchar) {
if(uchar==*pos++) {
pos_=pos;
int32_t node=*pos;
return node>=kMinValueLead ? valueResult(node) : UDICTTRIE_NO_VALUE;
return node>=kMinValueLead ? valueResult(node) : USTRINGTRIE_NO_VALUE;
} else {
stop();
return UDICTTRIE_NO_MATCH;
return USTRINGTRIE_NO_MATCH;
}
}
UDictTrieResult
UCharTrie::nextImpl(const UChar *pos, int32_t uchar) {
UStringTrieResult
UCharsTrie::nextImpl(const UChar *pos, int32_t uchar) {
int32_t node=*pos++;
for(;;) {
if(node<kMinLinearMatch) {
@ -135,7 +135,7 @@ UCharTrie::nextImpl(const UChar *pos, int32_t uchar) {
remainingMatchLength_=--length;
pos_=pos;
return (length<0 && (node=*pos)>=kMinValueLead) ?
valueResult(node) : UDICTTRIE_NO_VALUE;
valueResult(node) : USTRINGTRIE_NO_VALUE;
} else {
// No match.
break;
@ -150,14 +150,14 @@ UCharTrie::nextImpl(const UChar *pos, int32_t uchar) {
}
}
stop();
return UDICTTRIE_NO_MATCH;
return USTRINGTRIE_NO_MATCH;
}
UDictTrieResult
UCharTrie::next(int32_t uchar) {
UStringTrieResult
UCharsTrie::next(int32_t uchar) {
const UChar *pos=pos_;
if(pos==NULL) {
return UDICTTRIE_NO_MATCH;
return USTRINGTRIE_NO_MATCH;
}
int32_t length=remainingMatchLength_; // Actual remaining match length minus 1.
if(length>=0) {
@ -167,24 +167,24 @@ UCharTrie::next(int32_t uchar) {
pos_=pos;
int32_t node;
return (length<0 && (node=*pos)>=kMinValueLead) ?
valueResult(node) : UDICTTRIE_NO_VALUE;
valueResult(node) : USTRINGTRIE_NO_VALUE;
} else {
stop();
return UDICTTRIE_NO_MATCH;
return USTRINGTRIE_NO_MATCH;
}
}
return nextImpl(pos, uchar);
}
UDictTrieResult
UCharTrie::next(const UChar *s, int32_t sLength) {
UStringTrieResult
UCharsTrie::next(const UChar *s, int32_t sLength) {
if(sLength<0 ? *s==0 : sLength==0) {
// Empty input.
return current();
}
const UChar *pos=pos_;
if(pos==NULL) {
return UDICTTRIE_NO_MATCH;
return USTRINGTRIE_NO_MATCH;
}
int32_t length=remainingMatchLength_; // Actual remaining match length minus 1.
for(;;) {
@ -198,7 +198,7 @@ UCharTrie::next(const UChar *s, int32_t sLength) {
pos_=pos;
int32_t node;
return (length<0 && (node=*pos)>=kMinValueLead) ?
valueResult(node) : UDICTTRIE_NO_VALUE;
valueResult(node) : USTRINGTRIE_NO_VALUE;
}
if(length<0) {
remainingMatchLength_=length;
@ -206,7 +206,7 @@ UCharTrie::next(const UChar *s, int32_t sLength) {
}
if(uchar!=*pos) {
stop();
return UDICTTRIE_NO_MATCH;
return USTRINGTRIE_NO_MATCH;
}
++pos;
--length;
@ -218,7 +218,7 @@ UCharTrie::next(const UChar *s, int32_t sLength) {
pos_=pos;
int32_t node;
return (length<0 && (node=*pos)>=kMinValueLead) ?
valueResult(node) : UDICTTRIE_NO_VALUE;
valueResult(node) : USTRINGTRIE_NO_VALUE;
}
uchar=*s++;
--sLength;
@ -228,7 +228,7 @@ UCharTrie::next(const UChar *s, int32_t sLength) {
}
if(uchar!=*pos) {
stop();
return UDICTTRIE_NO_MATCH;
return USTRINGTRIE_NO_MATCH;
}
++pos;
--length;
@ -237,9 +237,9 @@ UCharTrie::next(const UChar *s, int32_t sLength) {
int32_t node=*pos++;
for(;;) {
if(node<kMinLinearMatch) {
UDictTrieResult result=branchNext(pos, node, uchar);
if(result==UDICTTRIE_NO_MATCH) {
return UDICTTRIE_NO_MATCH;
UStringTrieResult result=branchNext(pos, node, uchar);
if(result==USTRINGTRIE_NO_MATCH) {
return USTRINGTRIE_NO_MATCH;
}
// Fetch the next input unit, if there is one.
if(sLength<0) {
@ -253,10 +253,10 @@ UCharTrie::next(const UChar *s, int32_t sLength) {
uchar=*s++;
--sLength;
}
if(result==UDICTTRIE_HAS_FINAL_VALUE) {
if(result==USTRINGTRIE_FINAL_VALUE) {
// No further matching units.
stop();
return UDICTTRIE_NO_MATCH;
return USTRINGTRIE_NO_MATCH;
}
pos=pos_; // branchNext() advanced pos and wrote it to pos_ .
node=*pos++;
@ -265,7 +265,7 @@ UCharTrie::next(const UChar *s, int32_t sLength) {
length=node-kMinLinearMatch; // Actual match length minus 1.
if(uchar!=*pos) {
stop();
return UDICTTRIE_NO_MATCH;
return USTRINGTRIE_NO_MATCH;
}
++pos;
--length;
@ -273,7 +273,7 @@ UCharTrie::next(const UChar *s, int32_t sLength) {
} else if(node&kValueIsFinal) {
// No further matching units.
stop();
return UDICTTRIE_NO_MATCH;
return USTRINGTRIE_NO_MATCH;
} else {
// Skip intermediate value.
pos=skipNodeValue(pos, node);
@ -284,8 +284,8 @@ UCharTrie::next(const UChar *s, int32_t sLength) {
}
const UChar *
UCharTrie::findUniqueValueFromBranch(const UChar *pos, int32_t length,
UBool haveUniqueValue, int32_t &uniqueValue) {
UCharsTrie::findUniqueValueFromBranch(const UChar *pos, int32_t length,
UBool haveUniqueValue, int32_t &uniqueValue) {
while(length>kMaxBranchLinearSubNodeLength) {
++pos; // ignore the comparison byte
if(NULL==findUniqueValueFromBranch(jumpByDelta(pos), length>>1, haveUniqueValue, uniqueValue)) {
@ -322,7 +322,7 @@ UCharTrie::findUniqueValueFromBranch(const UChar *pos, int32_t length,
}
UBool
UCharTrie::findUniqueValue(const UChar *pos, UBool haveUniqueValue, int32_t &uniqueValue) {
UCharsTrie::findUniqueValue(const UChar *pos, UBool haveUniqueValue, int32_t &uniqueValue) {
int32_t node=*pos++;
for(;;) {
if(node<kMinLinearMatch) {
@ -365,7 +365,7 @@ UCharTrie::findUniqueValue(const UChar *pos, UBool haveUniqueValue, int32_t &uni
}
int32_t
UCharTrie::getNextUChars(Appendable &out) const {
UCharsTrie::getNextUChars(Appendable &out) const {
const UChar *pos=pos_;
if(pos==NULL) {
return 0;
@ -397,7 +397,7 @@ UCharTrie::getNextUChars(Appendable &out) const {
}
void
UCharTrie::getNextBranchUChars(const UChar *pos, int32_t length, Appendable &out) {
UCharsTrie::getNextBranchUChars(const UChar *pos, int32_t length, Appendable &out) {
while(length>kMaxBranchLinearSubNodeLength) {
++pos; // ignore the comparison unit
getNextBranchUChars(jumpByDelta(pos), length>>1, out);

View file

@ -1,9 +1,9 @@
/*
*******************************************************************************
* Copyright (C) 2010, International Business Machines
* Copyright (C) 2010-2011, International Business Machines
* Corporation and others. All Rights Reserved.
*******************************************************************************
* file name: uchartrie.h
* file name: ucharstrie.h
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
@ -12,24 +12,24 @@
* created by: Markus W. Scherer
*/
#ifndef __UCHARTRIE_H__
#define __UCHARTRIE_H__
#ifndef __UCHARSTRIE_H__
#define __UCHARSTRIE_H__
/**
* \file
* \brief C++ API: Dictionary trie for mapping Unicode strings (or 16-bit-unit sequences)
* \brief C++ API: Trie for mapping Unicode strings (or 16-bit-unit sequences)
* to integer values.
*/
#include "unicode/utypes.h"
#include "unicode/uobject.h"
#include "uassert.h"
#include "udicttrie.h"
#include "ustringtrie.h"
U_NAMESPACE_BEGIN
class UCharTrieBuilder;
class UCharTrieIterator;
class UCharsTrieBuilder;
class UCharsTrieIterator;
/**
* Base class for objects to which Unicode characters and strings can be appended.
@ -67,34 +67,34 @@ private:
};
/**
* Light-weight, non-const reader class for a UCharTrie.
* Light-weight, non-const reader class for a UCharsTrie.
* Traverses a UChar-serialized data structure with minimal state,
* for mapping strings (16-bit-unit sequences) to non-negative integer values.
*/
class U_TOOLUTIL_API UCharTrie : public UMemory {
class U_TOOLUTIL_API UCharsTrie : public UMemory {
public:
UCharTrie(const UChar *trieUChars)
UCharsTrie(const UChar *trieUChars)
: uchars_(trieUChars),
pos_(uchars_), remainingMatchLength_(-1) {}
/**
* Resets this trie to its initial state.
*/
UCharTrie &reset() {
UCharsTrie &reset() {
pos_=uchars_;
remainingMatchLength_=-1;
return *this;
}
/**
* UCharTrie state object, for saving a trie's current state
* UCharsTrie state object, for saving a trie's current state
* and resetting the trie back to this state later.
*/
class State : public UMemory {
public:
State() { uchars=NULL; }
private:
friend class UCharTrie;
friend class UCharsTrie;
const UChar *uchars;
const UChar *pos;
@ -105,7 +105,7 @@ public:
* Saves the state of this trie.
* @see resetToState
*/
const UCharTrie &saveState(State &state) const {
const UCharsTrie &saveState(State &state) const {
state.uchars=uchars_;
state.pos=pos_;
state.remainingMatchLength=remainingMatchLength_;
@ -119,7 +119,7 @@ public:
* @see saveState
* @see reset
*/
UCharTrie &resetToState(const State &state) {
UCharsTrie &resetToState(const State &state) {
if(uchars_==state.uchars && uchars_!=NULL) {
pos_=state.pos;
remainingMatchLength_=state.remainingMatchLength;
@ -132,14 +132,14 @@ public:
* and whether another input UChar can continue a matching string.
* @return The match/value Result.
*/
UDictTrieResult current() const;
UStringTrieResult current() const;
/**
* Traverses the trie from the initial state for this input UChar.
* Equivalent to reset().next(uchar).
* @return The match/value Result.
*/
inline UDictTrieResult first(int32_t uchar) {
inline UStringTrieResult first(int32_t uchar) {
remainingMatchLength_=-1;
return nextImpl(uchars_, uchar);
}
@ -150,31 +150,31 @@ public:
* Equivalent to reset().nextForCodePoint(cp).
* @return The match/value Result.
*/
inline UDictTrieResult firstForCodePoint(UChar32 cp) {
inline UStringTrieResult firstForCodePoint(UChar32 cp) {
return cp<=0xffff ?
first(cp) :
(first(U16_LEAD(cp))!=UDICTTRIE_NO_MATCH ?
(USTRINGTRIE_HAS_NEXT(first(U16_LEAD(cp))) ?
next(U16_TRAIL(cp)) :
UDICTTRIE_NO_MATCH);
USTRINGTRIE_NO_MATCH);
}
/**
* Traverses the trie from the current state for this input UChar.
* @return The match/value Result.
*/
UDictTrieResult next(int32_t uchar);
UStringTrieResult next(int32_t uchar);
/**
* Traverses the trie from the current state for the
* one or two UTF-16 code units for this input code point.
* @return The match/value Result.
*/
inline UDictTrieResult nextForCodePoint(UChar32 cp) {
inline UStringTrieResult nextForCodePoint(UChar32 cp) {
return cp<=0xffff ?
next(cp) :
(next(U16_LEAD(cp))!=UDICTTRIE_NO_MATCH ?
(USTRINGTRIE_HAS_NEXT(next(U16_LEAD(cp))) ?
next(U16_TRAIL(cp)) :
UDICTTRIE_NO_MATCH);
USTRINGTRIE_NO_MATCH);
}
/**
@ -183,19 +183,20 @@ public:
* \code
* Result result=current();
* for(each c in s)
* if((result=next(c))==UDICTTRIE_NO_MATCH) return UDICTTRIE_NO_MATCH;
* if(!USTRINGTRIE_HAS_NEXT(result)) return USTRINGTRIE_NO_MATCH;
* result=next(c);
* return result;
* \endcode
* @return The match/value Result.
*/
UDictTrieResult next(const UChar *s, int32_t length);
UStringTrieResult next(const UChar *s, int32_t length);
/**
* Returns a matching string's value if called immediately after
* current()/first()/next() returned UDICTTRIE_HAS_VALUE or UDICTTRIE_HAS_FINAL_VALUE.
* current()/first()/next() returned USTRINGTRIE_INTERMEDIATE_VALUE or USTRINGTRIE_FINAL_VALUE.
* getValue() can be called multiple times.
*
* Do not call getValue() after UDICTTRIE_NO_MATCH or UDICTTRIE_NO_VALUE!
* Do not call getValue() after USTRINGTRIE_NO_MATCH or USTRINGTRIE_NO_VALUE!
*/
inline int32_t getValue() const {
const UChar *pos=pos_;
@ -221,7 +222,7 @@ public:
/**
* Finds each UChar which continues the string from the current state.
* That is, each UChar c for which it would be next(c)!=UDICTTRIE_NO_MATCH now.
* That is, each UChar c for which it would be next(c)!=USTRINGTRIE_NO_MATCH now.
* @param out Each next UChar is appended to this object.
* (Only uses the out.append(c) method.)
* @return the number of UChars which continue the string from here
@ -229,8 +230,8 @@ public:
int32_t getNextUChars(Appendable &out) const;
private:
friend class UCharTrieBuilder;
friend class UCharTrieIterator;
friend class UCharsTrieBuilder;
friend class UCharsTrieIterator;
inline void stop() {
pos_=NULL;
@ -313,15 +314,15 @@ private:
return pos;
}
static inline UDictTrieResult valueResult(int32_t node) {
return (UDictTrieResult)(UDICTTRIE_HAS_VALUE-(node>>15));
static inline UStringTrieResult valueResult(int32_t node) {
return (UStringTrieResult)(USTRINGTRIE_INTERMEDIATE_VALUE-(node>>15));
}
// Handles a branch node for both next(uchar) and next(string).
UDictTrieResult branchNext(const UChar *pos, int32_t length, int32_t uchar);
UStringTrieResult branchNext(const UChar *pos, int32_t length, int32_t uchar);
// Requires remainingLength_<0.
UDictTrieResult nextImpl(const UChar *pos, int32_t uchar);
UStringTrieResult nextImpl(const UChar *pos, int32_t uchar);
// Helper functions for hasUniqueValue().
// Recursively finds a unique value (or whether there is not a unique one)
@ -336,7 +337,7 @@ private:
// getNextUChars() when pos is on a branch node.
static void getNextBranchUChars(const UChar *pos, int32_t length, Appendable &out);
// UCharTrie data structure
// UCharsTrie data structure
//
// The trie consists of a series of UChar-serialized nodes for incremental
// Unicode string/UChar sequence matching. (UChar=16-bit unsigned integer)
@ -417,7 +418,7 @@ private:
static const int32_t kMaxTwoUnitDelta=((kThreeUnitDeltaLead-kMinTwoUnitDeltaLead)<<16)-1; // 0x03feffff
// Fixed value referencing the UCharTrie words.
// Fixed value referencing the UCharsTrie words.
const UChar *uchars_;
// Iterator variables.
@ -430,4 +431,4 @@ private:
U_NAMESPACE_END
#endif // __UCHARTRIE_H__
#endif // __UCHARSTRIE_H__

View file

@ -3,15 +3,13 @@
* Copyright (C) 2010-2011, International Business Machines
* Corporation and others. All Rights Reserved.
*******************************************************************************
* file name: uchartriebuilder.h
* file name: ucharstriebuilder.h
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 2010nov14
* created by: Markus W. Scherer
*
* Builder class for UCharTrie dictionary trie.
*/
#include "unicode/utypes.h"
@ -19,18 +17,18 @@
#include "unicode/ustring.h"
#include "cmemory.h"
#include "uarrsort.h"
#include "uchartrie.h"
#include "uchartriebuilder.h"
#include "ucharstrie.h"
#include "ucharstriebuilder.h"
U_NAMESPACE_BEGIN
/*
* Note: This builder implementation stores (string, value) pairs with full copies
* of the 16-bit-unit sequences, until the UCharTrie is built.
* of the 16-bit-unit sequences, until the UCharsTrie is built.
* It might(!) take less memory if we collected the data in a temporary, dynamic trie.
*/
class UCharTrieElement : public UMemory {
class UCharsTrieElement : public UMemory {
public:
// Use compiler's default constructor, initializes nothing.
@ -50,7 +48,7 @@ public:
int32_t getValue() const { return value; }
int32_t compareStringTo(const UCharTrieElement &o, const UnicodeString &strings) const;
int32_t compareStringTo(const UCharsTrieElement &o, const UnicodeString &strings) const;
private:
// The first strings unit contains the string length.
@ -60,8 +58,8 @@ private:
};
void
UCharTrieElement::setTo(const UnicodeString &s, int32_t val,
UnicodeString &strings, UErrorCode &errorCode) {
UCharsTrieElement::setTo(const UnicodeString &s, int32_t val,
UnicodeString &strings, UErrorCode &errorCode) {
if(U_FAILURE(errorCode)) {
return;
}
@ -78,17 +76,17 @@ UCharTrieElement::setTo(const UnicodeString &s, int32_t val,
}
int32_t
UCharTrieElement::compareStringTo(const UCharTrieElement &other, const UnicodeString &strings) const {
UCharsTrieElement::compareStringTo(const UCharsTrieElement &other, const UnicodeString &strings) const {
return getString(strings).compare(other.getString(strings));
}
UCharTrieBuilder::~UCharTrieBuilder() {
UCharsTrieBuilder::~UCharsTrieBuilder() {
delete[] elements;
uprv_free(uchars);
}
UCharTrieBuilder &
UCharTrieBuilder::add(const UnicodeString &s, int32_t value, UErrorCode &errorCode) {
UCharsTrieBuilder &
UCharsTrieBuilder::add(const UnicodeString &s, int32_t value, UErrorCode &errorCode) {
if(U_FAILURE(errorCode)) {
return *this;
}
@ -105,12 +103,12 @@ UCharTrieBuilder::add(const UnicodeString &s, int32_t value, UErrorCode &errorCo
} else {
newCapacity=4*elementsCapacity;
}
UCharTrieElement *newElements=new UCharTrieElement[newCapacity];
UCharsTrieElement *newElements=new UCharsTrieElement[newCapacity];
if(newElements==NULL) {
errorCode=U_MEMORY_ALLOCATION_ERROR;
}
if(elementsLength>0) {
uprv_memcpy(newElements, elements, elementsLength*sizeof(UCharTrieElement));
uprv_memcpy(newElements, elements, elementsLength*sizeof(UCharsTrieElement));
}
delete[] elements;
elements=newElements;
@ -128,15 +126,15 @@ U_CDECL_BEGIN
static int32_t U_CALLCONV
compareElementStrings(const void *context, const void *left, const void *right) {
const UnicodeString *strings=reinterpret_cast<const UnicodeString *>(context);
const UCharTrieElement *leftElement=reinterpret_cast<const UCharTrieElement *>(left);
const UCharTrieElement *rightElement=reinterpret_cast<const UCharTrieElement *>(right);
const UCharsTrieElement *leftElement=reinterpret_cast<const UCharsTrieElement *>(left);
const UCharsTrieElement *rightElement=reinterpret_cast<const UCharsTrieElement *>(right);
return leftElement->compareStringTo(*rightElement, *strings);
}
U_CDECL_END
UnicodeString &
UCharTrieBuilder::build(UDictTrieBuildOption buildOption, UnicodeString &result, UErrorCode &errorCode) {
UCharsTrieBuilder::build(UStringTrieBuildOption buildOption, UnicodeString &result, UErrorCode &errorCode) {
if(U_FAILURE(errorCode)) {
return result;
}
@ -153,7 +151,7 @@ UCharTrieBuilder::build(UDictTrieBuildOption buildOption, UnicodeString &result,
errorCode=U_MEMORY_ALLOCATION_ERROR;
return result;
}
uprv_sortArray(elements, elementsLength, (int32_t)sizeof(UCharTrieElement),
uprv_sortArray(elements, elementsLength, (int32_t)sizeof(UCharsTrieElement),
compareElementStrings, &strings,
FALSE, // need not be a stable sort
&errorCode);
@ -179,7 +177,7 @@ UCharTrieBuilder::build(UDictTrieBuildOption buildOption, UnicodeString &result,
errorCode=U_MEMORY_ALLOCATION_ERROR;
return result;
}
DictTrieBuilder::build(buildOption, elementsLength, errorCode);
StringTrieBuilder::build(buildOption, elementsLength, errorCode);
if(uchars==NULL) {
errorCode=U_MEMORY_ALLOCATION_ERROR;
} else {
@ -189,24 +187,24 @@ UCharTrieBuilder::build(UDictTrieBuildOption buildOption, UnicodeString &result,
}
int32_t
UCharTrieBuilder::getElementStringLength(int32_t i) const {
UCharsTrieBuilder::getElementStringLength(int32_t i) const {
return elements[i].getStringLength(strings);
}
UChar
UCharTrieBuilder::getElementUnit(int32_t i, int32_t unitIndex) const {
UCharsTrieBuilder::getElementUnit(int32_t i, int32_t unitIndex) const {
return elements[i].charAt(unitIndex, strings);
}
int32_t
UCharTrieBuilder::getElementValue(int32_t i) const {
UCharsTrieBuilder::getElementValue(int32_t i) const {
return elements[i].getValue();
}
int32_t
UCharTrieBuilder::getLimitOfLinearMatch(int32_t first, int32_t last, int32_t unitIndex) const {
const UCharTrieElement &firstElement=elements[first];
const UCharTrieElement &lastElement=elements[last];
UCharsTrieBuilder::getLimitOfLinearMatch(int32_t first, int32_t last, int32_t unitIndex) const {
const UCharsTrieElement &firstElement=elements[first];
const UCharsTrieElement &lastElement=elements[last];
int32_t minStringLength=firstElement.getStringLength(strings);
while(++unitIndex<minStringLength &&
firstElement.charAt(unitIndex, strings)==
@ -215,7 +213,7 @@ UCharTrieBuilder::getLimitOfLinearMatch(int32_t first, int32_t last, int32_t uni
}
int32_t
UCharTrieBuilder::countElementUnits(int32_t start, int32_t limit, int32_t unitIndex) const {
UCharsTrieBuilder::countElementUnits(int32_t start, int32_t limit, int32_t unitIndex) const {
int32_t length=0; // Number of different units at unitIndex.
int32_t i=start;
do {
@ -229,7 +227,7 @@ UCharTrieBuilder::countElementUnits(int32_t start, int32_t limit, int32_t unitIn
}
int32_t
UCharTrieBuilder::skipElementsBySomeUnits(int32_t i, int32_t unitIndex, int32_t count) const {
UCharsTrieBuilder::skipElementsBySomeUnits(int32_t i, int32_t unitIndex, int32_t count) const {
do {
UChar unit=elements[i++].charAt(unitIndex, strings);
while(unit==elements[i].charAt(unitIndex, strings)) {
@ -240,20 +238,20 @@ UCharTrieBuilder::skipElementsBySomeUnits(int32_t i, int32_t unitIndex, int32_t
}
int32_t
UCharTrieBuilder::indexOfElementWithNextUnit(int32_t i, int32_t unitIndex, UChar unit) const {
UCharsTrieBuilder::indexOfElementWithNextUnit(int32_t i, int32_t unitIndex, UChar unit) const {
while(unit==elements[i].charAt(unitIndex, strings)) {
++i;
}
return i;
}
UCharTrieBuilder::UCTLinearMatchNode::UCTLinearMatchNode(const UChar *units, int32_t len, Node *nextNode)
UCharsTrieBuilder::UCTLinearMatchNode::UCTLinearMatchNode(const UChar *units, int32_t len, Node *nextNode)
: LinearMatchNode(len, nextNode), s(units) {
hash=hash*37+uhash_hashUCharsN(units, len);
}
UBool
UCharTrieBuilder::UCTLinearMatchNode::operator==(const Node &other) const {
UCharsTrieBuilder::UCTLinearMatchNode::operator==(const Node &other) const {
if(this==&other) {
return TRUE;
}
@ -265,16 +263,16 @@ UCharTrieBuilder::UCTLinearMatchNode::operator==(const Node &other) const {
}
void
UCharTrieBuilder::UCTLinearMatchNode::write(DictTrieBuilder &builder) {
UCharTrieBuilder &b=(UCharTrieBuilder &)builder;
UCharsTrieBuilder::UCTLinearMatchNode::write(StringTrieBuilder &builder) {
UCharsTrieBuilder &b=(UCharsTrieBuilder &)builder;
next->write(builder);
b.write(s, length);
offset=b.writeValueAndType(hasValue, value, b.getMinLinearMatch()+length-1);
}
DictTrieBuilder::Node *
UCharTrieBuilder::createLinearMatchNode(int32_t i, int32_t unitIndex, int32_t length,
Node *nextNode) const {
StringTrieBuilder::Node *
UCharsTrieBuilder::createLinearMatchNode(int32_t i, int32_t unitIndex, int32_t length,
Node *nextNode) const {
return new UCTLinearMatchNode(
elements[i].getString(strings).getBuffer()+unitIndex,
length,
@ -282,7 +280,7 @@ UCharTrieBuilder::createLinearMatchNode(int32_t i, int32_t unitIndex, int32_t le
}
UBool
UCharTrieBuilder::ensureCapacity(int32_t length) {
UCharsTrieBuilder::ensureCapacity(int32_t length) {
if(uchars==NULL) {
return FALSE; // previous memory allocation had failed
}
@ -308,7 +306,7 @@ UCharTrieBuilder::ensureCapacity(int32_t length) {
}
int32_t
UCharTrieBuilder::write(int32_t unit) {
UCharsTrieBuilder::write(int32_t unit) {
int32_t newLength=ucharsLength+1;
if(ensureCapacity(newLength)) {
ucharsLength=newLength;
@ -318,7 +316,7 @@ UCharTrieBuilder::write(int32_t unit) {
}
int32_t
UCharTrieBuilder::write(const UChar *s, int32_t length) {
UCharsTrieBuilder::write(const UChar *s, int32_t length) {
int32_t newLength=ucharsLength+length;
if(ensureCapacity(newLength)) {
ucharsLength=newLength;
@ -328,24 +326,24 @@ UCharTrieBuilder::write(const UChar *s, int32_t length) {
}
int32_t
UCharTrieBuilder::writeElementUnits(int32_t i, int32_t unitIndex, int32_t length) {
UCharsTrieBuilder::writeElementUnits(int32_t i, int32_t unitIndex, int32_t length) {
return write(elements[i].getString(strings).getBuffer()+unitIndex, length);
}
int32_t
UCharTrieBuilder::writeValueAndFinal(int32_t i, UBool final) {
UCharsTrieBuilder::writeValueAndFinal(int32_t i, UBool final) {
UChar intUnits[3];
int32_t length;
if(i<0 || i>UCharTrie::kMaxTwoUnitValue) {
intUnits[0]=(UChar)(UCharTrie::kThreeUnitValueLead);
if(i<0 || i>UCharsTrie::kMaxTwoUnitValue) {
intUnits[0]=(UChar)(UCharsTrie::kThreeUnitValueLead);
intUnits[1]=(UChar)(i>>16);
intUnits[2]=(UChar)i;
length=3;
} else if(i<=UCharTrie::kMaxOneUnitValue) {
} else if(i<=UCharsTrie::kMaxOneUnitValue) {
intUnits[0]=(UChar)(i);
length=1;
} else {
intUnits[0]=(UChar)(UCharTrie::kMinTwoUnitValueLead+(i>>16));
intUnits[0]=(UChar)(UCharsTrie::kMinTwoUnitValueLead+(i>>16));
intUnits[1]=(UChar)i;
length=2;
}
@ -354,22 +352,22 @@ UCharTrieBuilder::writeValueAndFinal(int32_t i, UBool final) {
}
int32_t
UCharTrieBuilder::writeValueAndType(UBool hasValue, int32_t value, int32_t node) {
UCharsTrieBuilder::writeValueAndType(UBool hasValue, int32_t value, int32_t node) {
if(!hasValue) {
return write(node);
}
UChar intUnits[3];
int32_t length;
if(value<0 || value>UCharTrie::kMaxTwoUnitNodeValue) {
intUnits[0]=(UChar)(UCharTrie::kThreeUnitNodeValueLead);
if(value<0 || value>UCharsTrie::kMaxTwoUnitNodeValue) {
intUnits[0]=(UChar)(UCharsTrie::kThreeUnitNodeValueLead);
intUnits[1]=(UChar)(value>>16);
intUnits[2]=(UChar)value;
length=3;
} else if(value<=UCharTrie::kMaxOneUnitNodeValue) {
} else if(value<=UCharsTrie::kMaxOneUnitNodeValue) {
intUnits[0]=(UChar)((value+1)<<6);
length=1;
} else {
intUnits[0]=(UChar)(UCharTrie::kMinTwoUnitNodeValueLead+((value>>10)&0x7fc0));
intUnits[0]=(UChar)(UCharsTrie::kMinTwoUnitNodeValueLead+((value>>10)&0x7fc0));
intUnits[1]=(UChar)value;
length=2;
}
@ -378,18 +376,18 @@ UCharTrieBuilder::writeValueAndType(UBool hasValue, int32_t value, int32_t node)
}
int32_t
UCharTrieBuilder::writeDeltaTo(int32_t jumpTarget) {
UCharsTrieBuilder::writeDeltaTo(int32_t jumpTarget) {
int32_t i=ucharsLength-jumpTarget;
UChar intUnits[3];
int32_t length;
U_ASSERT(i>=0);
if(i<=UCharTrie::kMaxOneUnitDelta) {
if(i<=UCharsTrie::kMaxOneUnitDelta) {
length=0;
} else if(i<=UCharTrie::kMaxTwoUnitDelta) {
intUnits[0]=(UChar)(UCharTrie::kMinTwoUnitDeltaLead+(i>>16));
} else if(i<=UCharsTrie::kMaxTwoUnitDelta) {
intUnits[0]=(UChar)(UCharsTrie::kMinTwoUnitDeltaLead+(i>>16));
length=1;
} else {
intUnits[0]=(UChar)(UCharTrie::kThreeUnitDeltaLead);
intUnits[0]=(UChar)(UCharsTrie::kThreeUnitDeltaLead);
intUnits[1]=(UChar)(i>>16);
length=2;
}

View file

@ -3,41 +3,42 @@
* Copyright (C) 2010-2011, International Business Machines
* Corporation and others. All Rights Reserved.
*******************************************************************************
* file name: uchartriebuilder.h
* file name: ucharstriebuilder.h
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 2010nov14
* created by: Markus W. Scherer
*
* Builder class for UCharTrie dictionary trie.
*/
#ifndef __UCHARTRIEBUILDER_H__
#define __UCHARTRIEBUILDER_H__
#ifndef __UCHARSTRIEBUILDER_H__
#define __UCHARSTRIEBUILDER_H__
#include "unicode/utypes.h"
#include "unicode/unistr.h"
#include "dicttriebuilder.h"
#include "uchartrie.h"
#include "stringtriebuilder.h"
#include "ucharstrie.h"
U_NAMESPACE_BEGIN
class UCharTrieElement;
class UCharsTrieElement;
class U_TOOLUTIL_API UCharTrieBuilder : public DictTrieBuilder {
/**
* Builder class for UCharsTrie.
*/
class U_TOOLUTIL_API UCharsTrieBuilder : public StringTrieBuilder {
public:
UCharTrieBuilder()
UCharsTrieBuilder()
: elements(NULL), elementsCapacity(0), elementsLength(0),
uchars(NULL), ucharsCapacity(0), ucharsLength(0) {}
virtual ~UCharTrieBuilder();
virtual ~UCharsTrieBuilder();
UCharTrieBuilder &add(const UnicodeString &s, int32_t value, UErrorCode &errorCode);
UCharsTrieBuilder &add(const UnicodeString &s, int32_t value, UErrorCode &errorCode);
UnicodeString &build(UDictTrieBuildOption buildOption, UnicodeString &result, UErrorCode &errorCode);
UnicodeString &build(UStringTrieBuildOption buildOption, UnicodeString &result, UErrorCode &errorCode);
UCharTrieBuilder &clear() {
UCharsTrieBuilder &clear() {
strings.remove();
elementsLength=0;
ucharsLength=0;
@ -57,15 +58,15 @@ private:
virtual UBool matchNodesCanHaveValues() const { return TRUE; }
virtual int32_t getMaxBranchLinearSubNodeLength() const { return UCharTrie::kMaxBranchLinearSubNodeLength; }
virtual int32_t getMinLinearMatch() const { return UCharTrie::kMinLinearMatch; }
virtual int32_t getMaxLinearMatchLength() const { return UCharTrie::kMaxLinearMatchLength; }
virtual int32_t getMaxBranchLinearSubNodeLength() const { return UCharsTrie::kMaxBranchLinearSubNodeLength; }
virtual int32_t getMinLinearMatch() const { return UCharsTrie::kMinLinearMatch; }
virtual int32_t getMaxLinearMatchLength() const { return UCharsTrie::kMaxLinearMatchLength; }
class UCTLinearMatchNode : public LinearMatchNode {
public:
UCTLinearMatchNode(const UChar *units, int32_t len, Node *nextNode);
virtual UBool operator==(const Node &other) const;
virtual void write(DictTrieBuilder &builder);
virtual void write(StringTrieBuilder &builder);
private:
const UChar *s;
};
@ -82,7 +83,7 @@ private:
virtual int32_t writeDeltaTo(int32_t jumpTarget);
UnicodeString strings;
UCharTrieElement *elements;
UCharsTrieElement *elements;
int32_t elementsCapacity;
int32_t elementsLength;
@ -95,4 +96,4 @@ private:
U_NAMESPACE_END
#endif // __UCHARTRIEBUILDER_H__
#endif // __UCHARSTRIEBUILDER_H__

View file

@ -1,9 +1,9 @@
/*
*******************************************************************************
* Copyright (C) 2010, International Business Machines
* Copyright (C) 2010-2011, International Business Machines
* Corporation and others. All Rights Reserved.
*******************************************************************************
* file name: uchartrieiterator.h
* file name: ucharstrieiterator.h
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
@ -14,22 +14,22 @@
#include "unicode/utypes.h"
#include "unicode/unistr.h"
#include "uchartrie.h"
#include "uchartrieiterator.h"
#include "ucharstrie.h"
#include "ucharstrieiterator.h"
#include "uvectr32.h"
U_NAMESPACE_BEGIN
UCharTrieIterator::UCharTrieIterator(const UChar *trieUChars, int32_t maxStringLength,
UErrorCode &errorCode)
UCharsTrieIterator::UCharsTrieIterator(const UChar *trieUChars, int32_t maxStringLength,
UErrorCode &errorCode)
: uchars_(trieUChars),
pos_(uchars_), initialPos_(uchars_),
remainingMatchLength_(-1), initialRemainingMatchLength_(-1),
skipValue_(FALSE),
maxLength_(maxStringLength), value_(0), stack_(errorCode) {}
UCharTrieIterator::UCharTrieIterator(const UCharTrie &trie, int32_t maxStringLength,
UErrorCode &errorCode)
UCharsTrieIterator::UCharsTrieIterator(const UCharsTrie &trie, int32_t maxStringLength,
UErrorCode &errorCode)
: uchars_(trie.uchars_), pos_(trie.pos_), initialPos_(trie.pos_),
remainingMatchLength_(trie.remainingMatchLength_),
initialRemainingMatchLength_(trie.remainingMatchLength_),
@ -48,7 +48,7 @@ UCharTrieIterator::UCharTrieIterator(const UCharTrie &trie, int32_t maxStringLen
}
}
UCharTrieIterator &UCharTrieIterator::reset() {
UCharsTrieIterator &UCharsTrieIterator::reset() {
pos_=initialPos_;
remainingMatchLength_=initialRemainingMatchLength_;
skipValue_=FALSE;
@ -64,7 +64,7 @@ UCharTrieIterator &UCharTrieIterator::reset() {
}
UBool
UCharTrieIterator::next(UErrorCode &errorCode) {
UCharsTrieIterator::next(UErrorCode &errorCode) {
if(U_FAILURE(errorCode)) {
return FALSE;
}
@ -97,18 +97,18 @@ UCharTrieIterator::next(UErrorCode &errorCode) {
}
for(;;) {
int32_t node=*pos++;
if(node>=UCharTrie::kMinValueLead) {
if(node>=UCharsTrie::kMinValueLead) {
if(skipValue_) {
pos=UCharTrie::skipNodeValue(pos, node);
node&=UCharTrie::kNodeTypeMask;
pos=UCharsTrie::skipNodeValue(pos, node);
node&=UCharsTrie::kNodeTypeMask;
skipValue_=FALSE;
} else {
// Deliver value for the string so far.
UBool isFinal=(UBool)(node>>15);
if(isFinal) {
value_=UCharTrie::readValue(pos, node&0x7fff);
value_=UCharsTrie::readValue(pos, node&0x7fff);
} else {
value_=UCharTrie::readNodeValue(pos, node);
value_=UCharsTrie::readNodeValue(pos, node);
}
if(isFinal || (maxLength_>0 && str_.length()==maxLength_)) {
pos_=NULL;
@ -126,7 +126,7 @@ UCharTrieIterator::next(UErrorCode &errorCode) {
if(maxLength_>0 && str_.length()==maxLength_) {
return truncateAndStop();
}
if(node<UCharTrie::kMinLinearMatch) {
if(node<UCharsTrie::kMinLinearMatch) {
if(node==0) {
node=*pos++;
}
@ -136,7 +136,7 @@ UCharTrieIterator::next(UErrorCode &errorCode) {
}
} else {
// Linear-match node, append length units to str_.
int32_t length=node-UCharTrie::kMinLinearMatch+1;
int32_t length=node-UCharsTrie::kMinLinearMatch+1;
if(maxLength_>0 && str_.length()+length>maxLength_) {
str_.append(pos, maxLength_-str_.length());
return truncateAndStop();
@ -149,23 +149,23 @@ UCharTrieIterator::next(UErrorCode &errorCode) {
// Branch node, needs to take the first outbound edge and push state for the rest.
const UChar *
UCharTrieIterator::branchNext(const UChar *pos, int32_t length, UErrorCode &errorCode) {
while(length>UCharTrie::kMaxBranchLinearSubNodeLength) {
UCharsTrieIterator::branchNext(const UChar *pos, int32_t length, UErrorCode &errorCode) {
while(length>UCharsTrie::kMaxBranchLinearSubNodeLength) {
++pos; // ignore the comparison unit
// Push state for the greater-or-equal edge.
stack_.addElement((int32_t)(UCharTrie::skipDelta(pos)-uchars_), errorCode);
stack_.addElement((int32_t)(UCharsTrie::skipDelta(pos)-uchars_), errorCode);
stack_.addElement(((length-(length>>1))<<16)|str_.length(), errorCode);
// Follow the less-than edge.
length>>=1;
pos=UCharTrie::jumpByDelta(pos);
pos=UCharsTrie::jumpByDelta(pos);
}
// List of key-value pairs where values are either final values or jump deltas.
// Read the first (key, value) pair.
UChar trieUnit=*pos++;
int32_t node=*pos++;
UBool isFinal=(UBool)(node>>15);
int32_t value=UCharTrie::readValue(pos, node&=0x7fff);
pos=UCharTrie::skipValue(pos, node);
int32_t value=UCharsTrie::readValue(pos, node&=0x7fff);
pos=UCharsTrie::skipValue(pos, node);
stack_.addElement((int32_t)(pos-uchars_), errorCode);
stack_.addElement(((length-1)<<16)|str_.length(), errorCode);
str_.append(trieUnit);

View file

@ -1,9 +1,9 @@
/*
*******************************************************************************
* Copyright (C) 2010, International Business Machines
* Copyright (C) 2010-2011, International Business Machines
* Corporation and others. All Rights Reserved.
*******************************************************************************
* file name: uchartrieiterator.h
* file name: ucharstrieiterator.h
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
@ -12,28 +12,28 @@
* created by: Markus W. Scherer
*/
#ifndef __UCHARTRIEITERATOR_H__
#define __UCHARTRIEITERATOR_H__
#ifndef __UCHARSTRIEITERATOR_H__
#define __UCHARSTRIEITERATOR_H__
/**
* \file
* \brief C++ API: UCharTrie iterator for all of its (string, value) pairs.
* \brief C++ API: UCharsTrie iterator for all of its (string, value) pairs.
*/
#include "unicode/utypes.h"
#include "unicode/unistr.h"
#include "uchartrie.h"
#include "ucharstrie.h"
#include "uvectr32.h"
U_NAMESPACE_BEGIN
/**
* Iterator for all of the (string, value) pairs in a UCharTrie.
* Iterator for all of the (string, value) pairs in a UCharsTrie.
*/
class U_TOOLUTIL_API UCharTrieIterator : public UMemory {
class U_TOOLUTIL_API UCharsTrieIterator : public UMemory {
public:
/**
* Iterates from the root of a UChar-serialized UCharTrie.
* Iterates from the root of a UChar-serialized UCharsTrie.
* @param trieUChars The trie UChars.
* @param maxStringLength If 0, the iterator returns full strings.
* Otherwise, the iterator returns strings with this maximum length.
@ -42,10 +42,10 @@ public:
* immediately. Check for U_FAILURE() on output or use with
* function chaining. (See User Guide for details.)
*/
UCharTrieIterator(const UChar *trieUChars, int32_t maxStringLength, UErrorCode &errorCode);
UCharsTrieIterator(const UChar *trieUChars, int32_t maxStringLength, UErrorCode &errorCode);
/**
* Iterates from the current state of the specified UCharTrie.
* Iterates from the current state of the specified UCharsTrie.
* @param trie The trie whose state will be copied for iteration.
* @param maxStringLength If 0, the iterator returns full strings.
* Otherwise, the iterator returns strings with this maximum length.
@ -54,12 +54,12 @@ public:
* immediately. Check for U_FAILURE() on output or use with
* function chaining. (See User Guide for details.)
*/
UCharTrieIterator(const UCharTrie &trie, int32_t maxStringLength, UErrorCode &errorCode);
UCharsTrieIterator(const UCharsTrie &trie, int32_t maxStringLength, UErrorCode &errorCode);
/**
* Resets this iterator to its initial state.
*/
UCharTrieIterator &reset();
UCharsTrieIterator &reset();
/**
* Finds the next (string, value) pair if there is one.
@ -118,4 +118,4 @@ private:
U_NAMESPACE_END
#endif // __UCHARTRIEITERATOR_H__
#endif // __UCHARSTRIEITERATOR_H__