diff --git a/icu4c/source/common/Makefile.in b/icu4c/source/common/Makefile.in index d9493c5d207..b083b4f9e33 100644 --- a/icu4c/source/common/Makefile.in +++ b/icu4c/source/common/Makefile.in @@ -1,6 +1,6 @@ #****************************************************************************** # -# Copyright (C) 1999-2010, International Business Machines +# Copyright (C) 1999-2011, International Business Machines # Corporation and others. All Rights Reserved. # #****************************************************************************** @@ -85,7 +85,7 @@ ucnv_u7.o ucnv_u8.o ucnv_u16.o ucnv_u32.o ucnvscsu.o ucnvbocu.o \ ucnv_ext.o ucnvmbcs.o ucnv2022.o ucnvhz.o ucnv_lmb.o ucnvisci.o ucnvdisp.o ucnv_set.o ucnv_ct.o \ uresbund.o ures_cnv.o uresdata.o resbund.o resbund_cnv.o \ ucat.o locmap.o uloc.o locid.o locutil.o locavailable.o locdispnames.o loclikely.o locresdata.o \ -bytestream.o stringpiece.o bytetrie.o \ +bytestream.o stringpiece.o bytestrie.o \ ustr_cnv.o unistr_cnv.o unistr.o unistr_case.o unistr_props.o \ utf_impl.o ustring.o ustrcase.o ucasemap.o cstring.o ustrfmt.o ustrtrns.o ustr_wcs.o utext.o \ normalizer2impl.o normalizer2.o filterednormalizer2.o normlzr.o unorm.o unormcmp.o unorm_it.o \ diff --git a/icu4c/source/common/bytetrie.cpp b/icu4c/source/common/bytestrie.cpp similarity index 83% rename from icu4c/source/common/bytetrie.cpp rename to icu4c/source/common/bytestrie.cpp index d913b3b4995..c9049146863 100644 --- a/icu4c/source/common/bytetrie.cpp +++ b/icu4c/source/common/bytestrie.cpp @@ -1,9 +1,9 @@ /* ******************************************************************************* -* Copyright (C) 2010, International Business Machines +* Copyright (C) 2010-2011, International Business Machines * Corporation and others. All Rights Reserved. ******************************************************************************* -* file name: bytetrie.cpp +* file name: bytestrie.cpp * encoding: US-ASCII * tab size: 8 (not used) * indentation:4 @@ -16,13 +16,13 @@ #include "unicode/bytestream.h" #include "unicode/uobject.h" #include "uassert.h" -#include "bytetrie.h" +#include "bytestrie.h" U_NAMESPACE_BEGIN // lead byte already shifted right by 1. int32_t -ByteTrie::readValue(const uint8_t *pos, int32_t leadByte) { +BytesTrie::readValue(const uint8_t *pos, int32_t leadByte) { int32_t value; if(leadByte=kMinValueLead) ? - valueResult(node) : UDICTTRIE_NO_VALUE; + valueResult(node) : USTRINGTRIE_NO_VALUE; } } -UDictTrieResult -ByteTrie::branchNext(const uint8_t *pos, int32_t length, int32_t inByte) { +UStringTrieResult +BytesTrie::branchNext(const uint8_t *pos, int32_t length, int32_t inByte) { // Branch according to the current byte. if(length==0) { length=*pos++; @@ -93,12 +93,12 @@ ByteTrie::branchNext(const uint8_t *pos, int32_t length, int32_t inByte) { // and divides length by 2. do { if(inByte==*pos++) { - UDictTrieResult result; + UStringTrieResult result; int32_t node=*pos; U_ASSERT(node>=kMinValueLead); if(node&kValueIsFinal) { // Leave the final value for getValue() to read. - result=UDICTTRIE_HAS_FINAL_VALUE; + result=USTRINGTRIE_FINAL_VALUE; } else { // Use the non-final value as the jump delta. ++pos; @@ -122,7 +122,7 @@ ByteTrie::branchNext(const uint8_t *pos, int32_t length, int32_t inByte) { // end readValue() pos+=delta; node=*pos; - result= node>=kMinValueLead ? valueResult(node) : UDICTTRIE_NO_VALUE; + result= node>=kMinValueLead ? valueResult(node) : USTRINGTRIE_NO_VALUE; } pos_=pos; return result; @@ -133,15 +133,15 @@ ByteTrie::branchNext(const uint8_t *pos, int32_t length, int32_t inByte) { if(inByte==*pos++) { pos_=pos; int32_t node=*pos; - return node>=kMinValueLead ? valueResult(node) : UDICTTRIE_NO_VALUE; + return node>=kMinValueLead ? valueResult(node) : USTRINGTRIE_NO_VALUE; } else { stop(); - return UDICTTRIE_NO_MATCH; + return USTRINGTRIE_NO_MATCH; } } -UDictTrieResult -ByteTrie::nextImpl(const uint8_t *pos, int32_t inByte) { +UStringTrieResult +BytesTrie::nextImpl(const uint8_t *pos, int32_t inByte) { for(;;) { int32_t node=*pos++; if(node=kMinValueLead) ? - valueResult(node) : UDICTTRIE_NO_VALUE; + valueResult(node) : USTRINGTRIE_NO_VALUE; } else { // No match. break; @@ -169,14 +169,14 @@ ByteTrie::nextImpl(const uint8_t *pos, int32_t inByte) { } } stop(); - return UDICTTRIE_NO_MATCH; + return USTRINGTRIE_NO_MATCH; } -UDictTrieResult -ByteTrie::next(int32_t inByte) { +UStringTrieResult +BytesTrie::next(int32_t inByte) { const uint8_t *pos=pos_; if(pos==NULL) { - return UDICTTRIE_NO_MATCH; + return USTRINGTRIE_NO_MATCH; } int32_t length=remainingMatchLength_; // Actual remaining match length minus 1. if(length>=0) { @@ -186,24 +186,24 @@ ByteTrie::next(int32_t inByte) { pos_=pos; int32_t node; return (length<0 && (node=*pos)>=kMinValueLead) ? - valueResult(node) : UDICTTRIE_NO_VALUE; + valueResult(node) : USTRINGTRIE_NO_VALUE; } else { stop(); - return UDICTTRIE_NO_MATCH; + return USTRINGTRIE_NO_MATCH; } } return nextImpl(pos, inByte); } -UDictTrieResult -ByteTrie::next(const char *s, int32_t sLength) { +UStringTrieResult +BytesTrie::next(const char *s, int32_t sLength) { if(sLength<0 ? *s==0 : sLength==0) { // Empty input. return current(); } const uint8_t *pos=pos_; if(pos==NULL) { - return UDICTTRIE_NO_MATCH; + return USTRINGTRIE_NO_MATCH; } int32_t length=remainingMatchLength_; // Actual remaining match length minus 1. for(;;) { @@ -217,7 +217,7 @@ ByteTrie::next(const char *s, int32_t sLength) { pos_=pos; int32_t node; return (length<0 && (node=*pos)>=kMinValueLead) ? - valueResult(node) : UDICTTRIE_NO_VALUE; + valueResult(node) : USTRINGTRIE_NO_VALUE; } if(length<0) { remainingMatchLength_=length; @@ -225,7 +225,7 @@ ByteTrie::next(const char *s, int32_t sLength) { } if(inByte!=*pos) { stop(); - return UDICTTRIE_NO_MATCH; + return USTRINGTRIE_NO_MATCH; } ++pos; --length; @@ -237,7 +237,7 @@ ByteTrie::next(const char *s, int32_t sLength) { pos_=pos; int32_t node; return (length<0 && (node=*pos)>=kMinValueLead) ? - valueResult(node) : UDICTTRIE_NO_VALUE; + valueResult(node) : USTRINGTRIE_NO_VALUE; } inByte=*s++; --sLength; @@ -247,7 +247,7 @@ ByteTrie::next(const char *s, int32_t sLength) { } if(inByte!=*pos) { stop(); - return UDICTTRIE_NO_MATCH; + return USTRINGTRIE_NO_MATCH; } ++pos; --length; @@ -256,9 +256,9 @@ ByteTrie::next(const char *s, int32_t sLength) { for(;;) { int32_t node=*pos++; if(nodekMaxBranchLinearSubNodeLength) { ++pos; // ignore the comparison byte if(NULL==findUniqueValueFromBranch(jumpByDelta(pos), length>>1, haveUniqueValue, uniqueValue)) { @@ -340,7 +340,7 @@ ByteTrie::findUniqueValueFromBranch(const uint8_t *pos, int32_t length, } UBool -ByteTrie::findUniqueValue(const uint8_t *pos, UBool haveUniqueValue, int32_t &uniqueValue) { +BytesTrie::findUniqueValue(const uint8_t *pos, UBool haveUniqueValue, int32_t &uniqueValue) { for(;;) { int32_t node=*pos++; if(nodekMaxBranchLinearSubNodeLength) { ++pos; // ignore the comparison byte getNextBranchBytes(jumpByDelta(pos), length>>1, out); @@ -423,7 +423,7 @@ ByteTrie::getNextBranchBytes(const uint8_t *pos, int32_t length, ByteSink &out) } void -ByteTrie::append(ByteSink &out, int c) { +BytesTrie::append(ByteSink &out, int c) { char ch=(char)c; out.Append(&ch, 1); } diff --git a/icu4c/source/common/bytetrie.h b/icu4c/source/common/bytestrie.h similarity index 86% rename from icu4c/source/common/bytetrie.h rename to icu4c/source/common/bytestrie.h index 47b34c8f62b..706fbdfb950 100644 --- a/icu4c/source/common/bytetrie.h +++ b/icu4c/source/common/bytestrie.h @@ -1,9 +1,9 @@ /* ******************************************************************************* -* Copyright (C) 2010, International Business Machines +* Copyright (C) 2010-2011, International Business Machines * Corporation and others. All Rights Reserved. ******************************************************************************* -* file name: bytetrie.h +* file name: bytestrie.h * encoding: US-ASCII * tab size: 8 (not used) * indentation:4 @@ -12,55 +12,54 @@ * created by: Markus W. Scherer */ -#ifndef __BYTETRIE_H__ -#define __BYTETRIE_H__ +#ifndef __BYTESTRIE_H__ +#define __BYTESTRIE_H__ /** * \file - * \brief C++ API: Dictionary trie for mapping arbitrary byte sequences - * to integer values. + * \brief C++ API: Trie for mapping byte sequences to integer values. */ #include "unicode/utypes.h" #include "unicode/uobject.h" #include "uassert.h" -#include "udicttrie.h" +#include "ustringtrie.h" U_NAMESPACE_BEGIN class ByteSink; -class ByteTrieBuilder; -class ByteTrieIterator; +class BytesTrieBuilder; +class BytesTrieIterator; /** - * Light-weight, non-const reader class for a ByteTrie. + * Light-weight, non-const reader class for a BytesTrie. * Traverses a byte-serialized data structure with minimal state, * for mapping byte sequences to non-negative integer values. */ -class U_COMMON_API ByteTrie : public UMemory { +class U_COMMON_API BytesTrie : public UMemory { public: - ByteTrie(const void *trieBytes) + BytesTrie(const void *trieBytes) : bytes_(reinterpret_cast(trieBytes)), pos_(bytes_), remainingMatchLength_(-1) {} /** * Resets this trie to its initial state. */ - ByteTrie &reset() { + BytesTrie &reset() { pos_=bytes_; remainingMatchLength_=-1; return *this; } /** - * ByteTrie state object, for saving a trie's current state + * BytesTrie state object, for saving a trie's current state * and resetting the trie back to this state later. */ class State : public UMemory { public: State() { bytes=NULL; } private: - friend class ByteTrie; + friend class BytesTrie; const uint8_t *bytes; const uint8_t *pos; @@ -71,7 +70,7 @@ public: * Saves the state of this trie. * @see resetToState */ - const ByteTrie &saveState(State &state) const { + const BytesTrie &saveState(State &state) const { state.bytes=bytes_; state.pos=pos_; state.remainingMatchLength=remainingMatchLength_; @@ -85,7 +84,7 @@ public: * @see saveState * @see reset */ - ByteTrie &resetToState(const State &state) { + BytesTrie &resetToState(const State &state) { if(bytes_==state.bytes && bytes_!=NULL) { pos_=state.pos; remainingMatchLength_=state.remainingMatchLength; @@ -98,14 +97,14 @@ public: * and whether another input byte can continue a matching byte sequence. * @return The match/value Result. */ - UDictTrieResult current() const; + UStringTrieResult current() const; /** * Traverses the trie from the initial state for this input byte. * Equivalent to reset().next(inByte). * @return The match/value Result. */ - inline UDictTrieResult first(int32_t inByte) { + inline UStringTrieResult first(int32_t inByte) { remainingMatchLength_=-1; return nextImpl(bytes_, inByte); } @@ -114,7 +113,7 @@ public: * Traverses the trie from the current state for this input byte. * @return The match/value Result. */ - UDictTrieResult next(int32_t inByte); + UStringTrieResult next(int32_t inByte); /** * Traverses the trie from the current state for this byte sequence. @@ -122,19 +121,20 @@ public: * \code * Result result=current(); * for(each c in s) - * if((result=next(c))==UDICTTRIE_NO_MATCH) return UDICTTRIE_NO_MATCH; + * if(!USTRINGTRIE_HAS_NEXT(result)) return USTRINGTRIE_NO_MATCH; + * result=next(c); * return result; * \endcode * @return The match/value Result. */ - UDictTrieResult next(const char *s, int32_t length); + UStringTrieResult next(const char *s, int32_t length); /** * Returns a matching byte sequence's value if called immediately after - * current()/first()/next() returned UDICTTRIE_HAS_VALUE or UDICTTRIE_HAS_FINAL_VALUE. + * current()/first()/next() returned USTRINGTRIE_INTERMEDIATE_VALUE or USTRINGTRIE_FINAL_VALUE. * getValue() can be called multiple times. * - * Do not call getValue() after UDICTTRIE_NO_MATCH or UDICTTRIE_NO_VALUE! + * Do not call getValue() after USTRINGTRIE_NO_MATCH or USTRINGTRIE_NO_VALUE! */ inline int32_t getValue() const { const uint8_t *pos=pos_; @@ -159,7 +159,7 @@ public: /** * Finds each byte which continues the byte sequence from the current state. - * That is, each byte b for which it would be next(b)!=UDICTTRIE_NO_MATCH now. + * That is, each byte b for which it would be next(b)!=USTRINGTRIE_NO_MATCH now. * @param out Each next byte is appended to this object. * (Only uses the out.Append(s, length) method.) * @return the number of bytes which continue the byte sequence from here @@ -167,8 +167,8 @@ public: int32_t getNextBytes(ByteSink &out) const; private: - friend class ByteTrieBuilder; - friend class ByteTrieIterator; + friend class BytesTrieBuilder; + friend class BytesTrieIterator; inline void stop() { pos_=NULL; @@ -212,15 +212,15 @@ private: return pos; } - static inline UDictTrieResult valueResult(int32_t node) { - return (UDictTrieResult)(UDICTTRIE_HAS_VALUE-(node&kValueIsFinal)); + static inline UStringTrieResult valueResult(int32_t node) { + return (UStringTrieResult)(USTRINGTRIE_INTERMEDIATE_VALUE-(node&kValueIsFinal)); } // Handles a branch node for both next(byte) and next(string). - UDictTrieResult branchNext(const uint8_t *pos, int32_t length, int32_t inByte); + UStringTrieResult branchNext(const uint8_t *pos, int32_t length, int32_t inByte); // Requires remainingLength_<0. - UDictTrieResult nextImpl(const uint8_t *pos, int32_t inByte); + UStringTrieResult nextImpl(const uint8_t *pos, int32_t inByte); // Helper functions for hasUniqueValue(). // Recursively finds a unique value (or whether there is not a unique one) @@ -236,7 +236,7 @@ private: static void getNextBranchBytes(const uint8_t *pos, int32_t length, ByteSink &out); static void append(ByteSink &out, int c); - // ByteTrie data structure + // BytesTrie data structure // // The trie consists of a series of byte-serialized nodes for incremental // string/byte sequence matching. The root node is at the beginning of the trie data. @@ -315,7 +315,7 @@ private: static const int32_t kMaxTwoByteDelta=((kMinThreeByteDeltaLead-kMinTwoByteDeltaLead)<<8)-1; // 0x2fff static const int32_t kMaxThreeByteDelta=((kFourByteDeltaLead-kMinThreeByteDeltaLead)<<16)-1; // 0xdffff - // Fixed value referencing the ByteTrie bytes. + // Fixed value referencing the BytesTrie bytes. const uint8_t *bytes_; // Iterator variables. @@ -328,4 +328,4 @@ private: U_NAMESPACE_END -#endif // __BYTETRIE_H__ +#endif // __BYTESTRIE_H__ diff --git a/icu4c/source/common/common.vcxproj b/icu4c/source/common/common.vcxproj index 809097f2737..99a1eaa9478 100644 --- a/icu4c/source/common/common.vcxproj +++ b/icu4c/source/common/common.vcxproj @@ -400,7 +400,7 @@ - + @@ -557,6 +557,7 @@ + @@ -1366,7 +1367,7 @@ ..\..\include\unicode\%(Filename)%(Extension);%(Outputs) - + copy "%(FullPath)" ..\..\include\unicode diff --git a/icu4c/source/common/propname.cpp b/icu4c/source/common/propname.cpp index ff3bb350a69..b34362ea815 100644 --- a/icu4c/source/common/propname.cpp +++ b/icu4c/source/common/propname.cpp @@ -1,6 +1,6 @@ /* ********************************************************************** -* Copyright (c) 2002-2010, International Business Machines +* Copyright (c) 2002-2011, International Business Machines * Corporation and others. All Rights Reserved. ********************************************************************** * Author: Alan Liu @@ -165,7 +165,7 @@ int32_t PropNameData::findPropertyValueNameGroup(int32_t valueMapIndex, int32_t if(valueMapIndex==0) { return 0; // The property does not have named values. } - ++valueMapIndex; // Skip the ByteTrie offset. + ++valueMapIndex; // Skip the BytesTrie offset. int32_t numRanges=valueMaps[valueMapIndex++]; if(numRanges<0x10) { // Ranges of values. @@ -214,11 +214,11 @@ const char *PropNameData::getName(const char *nameGroup, int32_t nameIndex) { return nameGroup; } -UBool PropNameData::containsName(ByteTrie &trie, const char *name) { +UBool PropNameData::containsName(BytesTrie &trie, const char *name) { if(name==NULL) { return FALSE; } - UDictTrieResult result=UDICTTRIE_NO_VALUE; + UStringTrieResult result=USTRINGTRIE_NO_VALUE; char c; while((c=*name++)!=0) { c=uprv_invCharToLowercaseAscii(c); @@ -226,12 +226,12 @@ UBool PropNameData::containsName(ByteTrie &trie, const char *name) { if(c==0x2d || c==0x5f || c==0x20 || (0x09<=c && c<=0x0d)) { continue; } - if(!UDICTTRIE_RESULT_HAS_NEXT(result)) { + if(!USTRINGTRIE_HAS_NEXT(result)) { return FALSE; } result=trie.next((uint8_t)c); } - return UDICTTRIE_RESULT_HAS_VALUE(result); + return USTRINGTRIE_HAS_VALUE(result); } const char *PropNameData::getPropertyName(int32_t property, int32_t nameChoice) { @@ -254,8 +254,8 @@ const char *PropNameData::getPropertyValueName(int32_t property, int32_t value, return getName(nameGroups+nameGroupOffset, nameChoice); } -int32_t PropNameData::getPropertyOrValueEnum(int32_t byteTrieOffset, const char *alias) { - ByteTrie trie(byteTries+byteTrieOffset); +int32_t PropNameData::getPropertyOrValueEnum(int32_t bytesTrieOffset, const char *alias) { + BytesTrie trie(bytesTries+bytesTrieOffset); if(containsName(trie, alias)) { return trie.getValue(); } else { @@ -277,7 +277,7 @@ int32_t PropNameData::getPropertyValueEnum(int32_t property, const char *alias) return UCHAR_INVALID_CODE; // The property does not have named values. } // valueMapIndex is the start of the property's valueMap, - // where the first word is the ByteTrie offset. + // where the first word is the BytesTrie offset. return getPropertyOrValueEnum(valueMaps[valueMapIndex], alias); } diff --git a/icu4c/source/common/propname.h b/icu4c/source/common/propname.h index d4639143798..525064c84c9 100644 --- a/icu4c/source/common/propname.h +++ b/icu4c/source/common/propname.h @@ -1,6 +1,6 @@ /* ********************************************************************** -* Copyright (c) 2002-2010, International Business Machines +* Copyright (c) 2002-2011, International Business Machines * Corporation and others. All Rights Reserved. ********************************************************************** * Author: Alan Liu @@ -14,7 +14,7 @@ #include "unicode/utypes.h" #include "unicode/uchar.h" -#include "bytetrie.h" +#include "bytestrie.h" #include "udataswp.h" #include "uprops.h" @@ -106,13 +106,13 @@ private: static int32_t findProperty(int32_t property); static int32_t findPropertyValueNameGroup(int32_t valueMapIndex, int32_t value); static const char *getName(const char *nameGroup, int32_t nameIndex); - static UBool containsName(ByteTrie &trie, const char *name); + static UBool containsName(BytesTrie &trie, const char *name); - static int32_t getPropertyOrValueEnum(int32_t byteTrieOffset, const char *alias); + static int32_t getPropertyOrValueEnum(int32_t bytesTrieOffset, const char *alias); static const int32_t indexes[]; static const int32_t valueMaps[]; - static const uint8_t byteTries[]; + static const uint8_t bytesTries[]; static const char nameGroups[]; }; @@ -164,7 +164,7 @@ private: * If the valueMapIndex is 0, then the property does not have named values. * * For each property's value map: - * int32_t byteTrieOffset; -- Offset into byteTries[] for name->value mapping. + * int32_t bytesTrieOffset; -- Offset into bytesTries[] for name->value mapping. * int32_t numRanges; * If numRanges is in the range 1..15, then that many ranges of values follow. * Per range: @@ -181,12 +181,12 @@ private: * * For both properties and property values, ranges are sorted by their start/limit values. * - * uint8_t byteTries[]; + * uint8_t bytesTries[]; * - * This is a sequence of ByteTrie structures, byte-serialized tries for + * This is a sequence of BytesTrie structures, byte-serialized tries for * mapping from names/aliases to values. * The first one maps from property names/aliases to UProperty enum constants. - * The following ones are indexed by property value map byteTrieOffsets + * The following ones are indexed by property value map bytesTrieOffsets * for mapping each property's names/aliases to their property values. * * char nameGroups[]; diff --git a/icu4c/source/common/propname_data.h b/icu4c/source/common/propname_data.h index f7d01064da0..6fccf8bf03d 100644 --- a/icu4c/source/common/propname_data.h +++ b/icu4c/source/common/propname_data.h @@ -1,10 +1,10 @@ /* - * Copyright (C) 1999-2010, International Business Machines + * Copyright (C) 1999-2011, International Business Machines * Corporation and others. All Rights Reserved. * * file name: propname_data.h * - * machine-generated on: 2010-12-31 + * machine-generated on: 2011-01-05 */ #ifndef INCLUDED_FROM_PROPNAME_CPP @@ -80,7 +80,7 @@ const int32_t PropNameData::valueMaps[989]={ 0x2eb1,0x2f20,0x2ec6,0x2e97,0x2f0a,0x2f72,0x2f4a,0x2f5e,0x2f82,0x2f93,0x2ef2,0x2edc,0x2f35 }; -const uint8_t PropNameData::byteTries[10229]={ +const uint8_t PropNameData::bytesTries[10229]={ 0,0x15,0x6d,0xc3,0x16,0x73,0xc1,0xea,0x76,0x5f,0x76,0x68,0x77,0x90,0x78,1, 0x64,0x50,0x69,0x10,0x64,1,0x63,0x30,0x73,0x62,0x13,0x74,0x61,0x72,0x74,0x63, 0x60,0x16,0x6f,0x6e,0x74,0x69,0x6e,0x75,0x65,0x61,0x13,0x69,0x67,0x69,0x74,0x81, diff --git a/icu4c/source/common/udicttrie.h b/icu4c/source/common/ustringtrie.h similarity index 57% rename from icu4c/source/common/udicttrie.h rename to icu4c/source/common/ustringtrie.h index 2f6401ba33c..7cfe6ec5e7d 100644 --- a/icu4c/source/common/udicttrie.h +++ b/icu4c/source/common/ustringtrie.h @@ -1,6 +1,6 @@ /* ******************************************************************************* -* Copyright (C) 2010, International Business Machines +* Copyright (C) 2010-2011, International Business Machines * Corporation and others. All Rights Reserved. ******************************************************************************* * file name: udicttrie.h @@ -12,8 +12,8 @@ * created by: Markus W. Scherer */ -#ifndef __UDICTTRIE_H__ -#define __UDICTTRIE_H__ +#ifndef __USTRINGTRIE_H__ +#define __USTRINGTRIE_H__ /** * \file @@ -23,61 +23,61 @@ #include "unicode/utypes.h" /** - * Return values for ByteTrie::next(), UCharTrie::next() and similar methods. - * @see UDICTTRIE_RESULT_MATCHES - * @see UDICTTRIE_RESULT_HAS_VALUE - * @see UDICTTRIE_RESULT_HAS_NEXT + * Return values for BytesTrie::next(), UCharsTrie::next() and similar methods. + * @see USTRINGTRIE_MATCHES + * @see USTRINGTRIE_HAS_VALUE + * @see USTRINGTRIE_HAS_NEXT */ -enum UDictTrieResult { +enum UStringTrieResult { /** * The input unit(s) did not continue a matching string. */ - UDICTTRIE_NO_MATCH, + USTRINGTRIE_NO_MATCH, /** * The input unit(s) continued a matching string * but there is no value for the string so far. * (It is a prefix of a longer string.) */ - UDICTTRIE_NO_VALUE, + USTRINGTRIE_NO_VALUE, /** * The input unit(s) continued a matching string * and there is a value for the string so far. * This value will be returned by getValue(). * No further input byte/unit can continue a matching string. */ - UDICTTRIE_HAS_FINAL_VALUE, + USTRINGTRIE_FINAL_VALUE, /** * The input unit(s) continued a matching string * and there is a value for the string so far. * This value will be returned by getValue(). * Another input byte/unit can continue a matching string. */ - UDICTTRIE_HAS_VALUE + USTRINGTRIE_INTERMEDIATE_VALUE }; /** - * Same as (result!=UDICTTRIE_NO_MATCH). - * @param result A result from ByteTrie::first(), UCharTrie::next() etc. + * Same as (result!=USTRINGTRIE_NO_MATCH). + * @param result A result from BytesTrie::first(), UCharsTrie::next() etc. * @return true if the input bytes/units so far are part of a matching string/byte sequence. */ -#define UDICTTRIE_RESULT_MATCHES(result) ((result)!=UDICTTRIE_NO_MATCH) +#define USTRINGTRIE_MATCHES(result) ((result)!=USTRINGTRIE_NO_MATCH) /** - * Equivalent to (result==UDICTTRIE_HAS_VALUE || result==UDICTTRIE_HAS_FINAL_VALUE) but + * Equivalent to (result==USTRINGTRIE_INTERMEDIATE_VALUE || result==USTRINGTRIE_FINAL_VALUE) but * this macro evaluates result exactly once. - * @param result A result from ByteTrie::first(), UCharTrie::next() etc. + * @param result A result from BytesTrie::first(), UCharsTrie::next() etc. * @return true if there is a value for the input bytes/units so far. - * @see ByteTrie::getValue - * @see UCharTrie::getValue + * @see BytesTrie::getValue + * @see UCharsTrie::getValue */ -#define UDICTTRIE_RESULT_HAS_VALUE(result) ((result)>=UDICTTRIE_HAS_FINAL_VALUE) +#define USTRINGTRIE_HAS_VALUE(result) ((result)>=USTRINGTRIE_FINAL_VALUE) /** - * Equivalent to (result==UDICTTRIE_NO_VALUE || result==UDICTTRIE_HAS_VALUE) but + * Equivalent to (result==USTRINGTRIE_NO_VALUE || result==USTRINGTRIE_INTERMEDIATE_VALUE) but * this macro evaluates result exactly once. - * @param result A result from ByteTrie::first(), UCharTrie::next() etc. + * @param result A result from BytesTrie::first(), UCharsTrie::next() etc. * @return true if another input byte/unit can continue a matching string. */ -#define UDICTTRIE_RESULT_HAS_NEXT(result) ((result)&1) +#define USTRINGTRIE_HAS_NEXT(result) ((result)&1) -#endif /* __UDICTTRIE_H__ */ +#endif /* __USTRINGTRIE_H__ */ diff --git a/icu4c/source/test/intltest/Makefile.in b/icu4c/source/test/intltest/Makefile.in index d114a180a84..4d59aae636e 100644 --- a/icu4c/source/test/intltest/Makefile.in +++ b/icu4c/source/test/intltest/Makefile.in @@ -1,6 +1,6 @@ #****************************************************************************** # -# Copyright (C) 1999-2010, International Business Machines +# Copyright (C) 1999-2011, International Business Machines # Corporation and others. All Rights Reserved. # #****************************************************************************** @@ -50,7 +50,7 @@ sdtfmtts.o svccoll.o tchcfmt.o selfmts.o \ tfsmalls.o tmsgfmt.o trcoll.o tscoll.o tsdate.o tsdcfmsy.o tsdtfmsy.o \ tsmthred.o tsnmfmt.o tsputil.o tstnrapi.o tstnorm.o tzbdtest.o \ tzregts.o tztest.o ucdtest.o usettest.o ustrtest.o strcase.o transtst.o strtest.o thcoll.o \ -bytetrietest.o uchartrietest.o \ +bytestrietest.o ucharstrietest.o \ itrbbi.o rbbiapts.o rbbitst.o ittrans.o transapi.o cpdtrtst.o \ testutil.o transrt.o trnserr.o normconf.o sfwdchit.o \ jamotest.o srchtest.o reptest.o regextst.o \ diff --git a/icu4c/source/test/intltest/bytetrietest.cpp b/icu4c/source/test/intltest/bytestrietest.cpp similarity index 76% rename from icu4c/source/test/intltest/bytetrietest.cpp rename to icu4c/source/test/intltest/bytestrietest.cpp index fd37748e4fb..4166dd8821c 100644 --- a/icu4c/source/test/intltest/bytetrietest.cpp +++ b/icu4c/source/test/intltest/bytestrietest.cpp @@ -1,6 +1,6 @@ /* ******************************************************************************* -* Copyright (C) 2010, International Business Machines +* Copyright (C) 2010-2011, International Business Machines * Corporation and others. All Rights Reserved. ******************************************************************************* * file name: bytetrietest.cpp @@ -16,9 +16,9 @@ #include "unicode/utypes.h" #include "unicode/stringpiece.h" -#include "bytetrie.h" -#include "bytetriebuilder.h" -#include "bytetrieiterator.h" +#include "bytestrie.h" +#include "bytestriebuilder.h" +#include "bytestrieiterator.h" #include "intltest.h" #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) @@ -28,10 +28,10 @@ struct StringAndValue { int32_t value; }; -class ByteTrieTest : public IntlTest { +class BytesTrieTest : public IntlTest { public: - ByteTrieTest() {} - virtual ~ByteTrieTest(); + BytesTrieTest() {} + virtual ~BytesTrieTest(); void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par=NULL); void TestBuilder(); @@ -45,7 +45,7 @@ public: void TestValuesForState(); void TestCompact(); - StringPiece buildMonthsTrie(ByteTrieBuilder &builder, UDictTrieBuildOption buildOption); + StringPiece buildMonthsTrie(BytesTrieBuilder &builder, UStringTrieBuildOption buildOption); void TestHasUniqueValue(); void TestGetNextBytes(); void TestIteratorFromBranch(); @@ -55,27 +55,27 @@ public: void TestTruncatingIteratorFromLinearMatchLong(); void checkData(const StringAndValue data[], int32_t dataLength); - void checkData(const StringAndValue data[], int32_t dataLength, UDictTrieBuildOption buildOption); + void checkData(const StringAndValue data[], int32_t dataLength, UStringTrieBuildOption buildOption); StringPiece buildTrie(const StringAndValue data[], int32_t dataLength, - ByteTrieBuilder &builder, UDictTrieBuildOption buildOption); + BytesTrieBuilder &builder, UStringTrieBuildOption buildOption); void checkFirst(const StringPiece &trieBytes, const StringAndValue data[], int32_t dataLength); void checkNext(const StringPiece &trieBytes, const StringAndValue data[], int32_t dataLength); void checkNextWithState(const StringPiece &trieBytes, const StringAndValue data[], int32_t dataLength); void checkNextString(const StringPiece &trieBytes, const StringAndValue data[], int32_t dataLength); void checkIterator(const StringPiece &trieBytes, const StringAndValue data[], int32_t dataLength); - void checkIterator(ByteTrieIterator &iter, const StringAndValue data[], int32_t dataLength); + void checkIterator(BytesTrieIterator &iter, const StringAndValue data[], int32_t dataLength); }; -extern IntlTest *createByteTrieTest() { - return new ByteTrieTest(); +extern IntlTest *createBytesTrieTest() { + return new BytesTrieTest(); } -ByteTrieTest::~ByteTrieTest() { +BytesTrieTest::~BytesTrieTest() { } -void ByteTrieTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) { +void BytesTrieTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) { if(exec) { - logln("TestSuite ByteTrieTest: "); + logln("TestSuite BytesTrieTest: "); } TESTCASE_AUTO_BEGIN; TESTCASE_AUTO(TestBuilder); @@ -98,36 +98,36 @@ void ByteTrieTest::runIndexedTest(int32_t index, UBool exec, const char *&name, TESTCASE_AUTO_END; } -void ByteTrieTest::TestBuilder() { +void BytesTrieTest::TestBuilder() { IcuTestErrorCode errorCode(*this, "TestBuilder()"); - ByteTrieBuilder builder; - builder.build(UDICTTRIE_BUILD_FAST, errorCode); + BytesTrieBuilder builder; + builder.build(USTRINGTRIE_BUILD_FAST, errorCode); if(errorCode.reset()!=U_INDEX_OUTOFBOUNDS_ERROR) { - errln("ByteTrieBuilder().build() did not set U_INDEX_OUTOFBOUNDS_ERROR"); + errln("BytesTrieBuilder().build() did not set U_INDEX_OUTOFBOUNDS_ERROR"); return; } - builder.add("=", 0, errorCode).add("=", 1, errorCode).build(UDICTTRIE_BUILD_FAST, errorCode); + builder.add("=", 0, errorCode).add("=", 1, errorCode).build(USTRINGTRIE_BUILD_FAST, errorCode); if(errorCode.reset()!=U_ILLEGAL_ARGUMENT_ERROR) { - errln("ByteTrieBuilder.build() did not detect duplicates"); + errln("BytesTrieBuilder.build() did not detect duplicates"); return; } } -void ByteTrieTest::TestEmpty() { +void BytesTrieTest::TestEmpty() { static const StringAndValue data[]={ { "", 0 } }; checkData(data, LENGTHOF(data)); } -void ByteTrieTest::Test_a() { +void BytesTrieTest::Test_a() { static const StringAndValue data[]={ { "a", 1 } }; checkData(data, LENGTHOF(data)); } -void ByteTrieTest::Test_a_ab() { +void BytesTrieTest::Test_a_ab() { static const StringAndValue data[]={ { "a", 1 }, { "ab", 100 } @@ -135,7 +135,7 @@ void ByteTrieTest::Test_a_ab() { checkData(data, LENGTHOF(data)); } -void ByteTrieTest::TestShortestBranch() { +void BytesTrieTest::TestShortestBranch() { static const StringAndValue data[]={ { "a", 1000 }, { "b", 2000 } @@ -143,7 +143,7 @@ void ByteTrieTest::TestShortestBranch() { checkData(data, LENGTHOF(data)); } -void ByteTrieTest::TestBranches() { +void BytesTrieTest::TestBranches() { static const StringAndValue data[]={ { "a", 0x10 }, { "cc", 0x40 }, @@ -166,7 +166,7 @@ void ByteTrieTest::TestBranches() { } } -void ByteTrieTest::TestLongSequence() { +void BytesTrieTest::TestLongSequence() { static const StringAndValue data[]={ { "a", -1 }, // sequence of linear-match nodes @@ -182,7 +182,7 @@ void ByteTrieTest::TestLongSequence() { checkData(data, LENGTHOF(data)); } -void ByteTrieTest::TestLongBranch() { +void BytesTrieTest::TestLongBranch() { // Split-branch and interesting compact-integer values. static const StringAndValue data[]={ { "a", -2 }, @@ -210,7 +210,7 @@ void ByteTrieTest::TestLongBranch() { checkData(data, LENGTHOF(data)); } -void ByteTrieTest::TestValuesForState() { +void BytesTrieTest::TestValuesForState() { // Check that saveState() and resetToState() interact properly // with next() and current(). static const StringAndValue data[]={ @@ -224,7 +224,7 @@ void ByteTrieTest::TestValuesForState() { checkData(data, LENGTHOF(data)); } -void ByteTrieTest::TestCompact() { +void BytesTrieTest::TestCompact() { // Duplicate trailing strings and values provide opportunities for compacting. static const StringAndValue data[]={ { "+", 0 }, @@ -251,7 +251,7 @@ void ByteTrieTest::TestCompact() { checkData(data, LENGTHOF(data)); } -StringPiece ByteTrieTest::buildMonthsTrie(ByteTrieBuilder &builder, UDictTrieBuildOption buildOption) { +StringPiece BytesTrieTest::buildMonthsTrie(BytesTrieBuilder &builder, UStringTrieBuildOption buildOption) { // All types of nodes leading to the same value, // for code coverage of recursive functions. // In particular, we need a lot of branches on some single level @@ -291,13 +291,13 @@ StringPiece ByteTrieTest::buildMonthsTrie(ByteTrieBuilder &builder, UDictTrieBui return buildTrie(data, LENGTHOF(data), builder, buildOption); } -void ByteTrieTest::TestHasUniqueValue() { - ByteTrieBuilder builder; - StringPiece sp=buildMonthsTrie(builder, UDICTTRIE_BUILD_FAST); +void BytesTrieTest::TestHasUniqueValue() { + BytesTrieBuilder builder; + StringPiece sp=buildMonthsTrie(builder, USTRINGTRIE_BUILD_FAST); if(sp.empty()) { return; // buildTrie() reported an error } - ByteTrie trie(sp.data()); + BytesTrie trie(sp.data()); int32_t uniqueValue; if(trie.hasUniqueValue(uniqueValue)) { errln("unique value at root"); @@ -314,7 +314,7 @@ void ByteTrieTest::TestHasUniqueValue() { if(trie.hasUniqueValue(uniqueValue)) { errln("unique value after \"ju\""); } - if(trie.next('n')!=UDICTTRIE_HAS_VALUE || 6!=trie.getValue()) { + if(trie.next('n')!=USTRINGTRIE_INTERMEDIATE_VALUE || 6!=trie.getValue()) { errln("not normal value 6 after \"jun\""); } // hasUniqueValue() after getValue() @@ -329,13 +329,13 @@ void ByteTrieTest::TestHasUniqueValue() { } } -void ByteTrieTest::TestGetNextBytes() { - ByteTrieBuilder builder; - StringPiece sp=buildMonthsTrie(builder, UDICTTRIE_BUILD_SMALL); +void BytesTrieTest::TestGetNextBytes() { + BytesTrieBuilder builder; + StringPiece sp=buildMonthsTrie(builder, USTRINGTRIE_BUILD_SMALL); if(sp.empty()) { return; // buildTrie() reported an error } - ByteTrie trie(sp.data()); + BytesTrie trie(sp.data()); char buffer[40]; CheckedArrayByteSink sink(buffer, LENGTHOF(buffer)); int32_t count=trie.getNextBytes(sink); @@ -352,7 +352,7 @@ void ByteTrieTest::TestGetNextBytes() { errln("months getNextBytes()!=[.abcdefghijklmnopqru] after \"jan\""); } // getNextBytes() after getValue() - trie.getValue(); // next() had returned UDICTTRIE_HAS_VALUE. + trie.getValue(); // next() had returned USTRINGTRIE_INTERMEDIATE_VALUE. memset(buffer, 0, sizeof(buffer)); count=trie.getNextBytes(sink.Reset()); if(count!=20 || sink.NumberOfBytesAppended()!=20 || 0!=strcmp(buffer, ".abcdefghijklmnopqru")) { @@ -380,20 +380,20 @@ void ByteTrieTest::TestGetNextBytes() { } } -void ByteTrieTest::TestIteratorFromBranch() { - ByteTrieBuilder builder; - StringPiece sp=buildMonthsTrie(builder, UDICTTRIE_BUILD_FAST); +void BytesTrieTest::TestIteratorFromBranch() { + BytesTrieBuilder builder; + StringPiece sp=buildMonthsTrie(builder, USTRINGTRIE_BUILD_FAST); if(sp.empty()) { return; // buildTrie() reported an error } - ByteTrie trie(sp.data()); + BytesTrie trie(sp.data()); // Go to a branch node. trie.next('j'); trie.next('a'); trie.next('n'); IcuTestErrorCode errorCode(*this, "TestIteratorFromBranch()"); - ByteTrieIterator iter(trie, 0, errorCode); - if(errorCode.logIfFailureAndReset("ByteTrieIterator(trie) constructor")) { + BytesTrieIterator iter(trie, 0, errorCode); + if(errorCode.logIfFailureAndReset("BytesTrieIterator(trie) constructor")) { return; } // Expected data: Same as in buildMonthsTrie(), except only the suffixes @@ -431,13 +431,13 @@ void ByteTrieTest::TestIteratorFromBranch() { checkIterator(iter.reset(), data, LENGTHOF(data)); } -void ByteTrieTest::TestIteratorFromLinearMatch() { - ByteTrieBuilder builder; - StringPiece sp=buildMonthsTrie(builder, UDICTTRIE_BUILD_SMALL); +void BytesTrieTest::TestIteratorFromLinearMatch() { + BytesTrieBuilder builder; + StringPiece sp=buildMonthsTrie(builder, USTRINGTRIE_BUILD_SMALL); if(sp.empty()) { return; // buildTrie() reported an error } - ByteTrie trie(sp.data()); + BytesTrie trie(sp.data()); // Go into a linear-match node. trie.next('j'); trie.next('a'); @@ -445,8 +445,8 @@ void ByteTrieTest::TestIteratorFromLinearMatch() { trie.next('u'); trie.next('a'); IcuTestErrorCode errorCode(*this, "TestIteratorFromLinearMatch()"); - ByteTrieIterator iter(trie, 0, errorCode); - if(errorCode.logIfFailureAndReset("ByteTrieIterator(trie) constructor")) { + BytesTrieIterator iter(trie, 0, errorCode); + if(errorCode.logIfFailureAndReset("BytesTrieIterator(trie) constructor")) { return; } // Expected data: Same as in buildMonthsTrie(), except only the suffixes @@ -461,15 +461,15 @@ void ByteTrieTest::TestIteratorFromLinearMatch() { checkIterator(iter.reset(), data, LENGTHOF(data)); } -void ByteTrieTest::TestTruncatingIteratorFromRoot() { - ByteTrieBuilder builder; - StringPiece sp=buildMonthsTrie(builder, UDICTTRIE_BUILD_FAST); +void BytesTrieTest::TestTruncatingIteratorFromRoot() { + BytesTrieBuilder builder; + StringPiece sp=buildMonthsTrie(builder, USTRINGTRIE_BUILD_FAST); if(sp.empty()) { return; // buildTrie() reported an error } IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromRoot()"); - ByteTrieIterator iter(sp.data(), 4, errorCode); - if(errorCode.logIfFailureAndReset("ByteTrieIterator(trie) constructor")) { + BytesTrieIterator iter(sp.data(), 4, errorCode); + if(errorCode.logIfFailureAndReset("BytesTrieIterator(trie) constructor")) { return; } // Expected data: Same as in buildMonthsTrie(), except only the first 4 characters @@ -508,25 +508,25 @@ void ByteTrieTest::TestTruncatingIteratorFromRoot() { checkIterator(iter.reset(), data, LENGTHOF(data)); } -void ByteTrieTest::TestTruncatingIteratorFromLinearMatchShort() { +void BytesTrieTest::TestTruncatingIteratorFromLinearMatchShort() { static const StringAndValue data[]={ { "abcdef", 10 }, { "abcdepq", 200 }, { "abcdeyz", 3000 } }; - ByteTrieBuilder builder; - StringPiece sp=buildTrie(data, LENGTHOF(data), builder, UDICTTRIE_BUILD_FAST); + BytesTrieBuilder builder; + StringPiece sp=buildTrie(data, LENGTHOF(data), builder, USTRINGTRIE_BUILD_FAST); if(sp.empty()) { return; // buildTrie() reported an error } - ByteTrie trie(sp.data()); + BytesTrie trie(sp.data()); // Go into a linear-match node. trie.next('a'); trie.next('b'); IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromLinearMatchShort()"); // Truncate within the linear-match node. - ByteTrieIterator iter(trie, 2, errorCode); - if(errorCode.logIfFailureAndReset("ByteTrieIterator(trie) constructor")) { + BytesTrieIterator iter(trie, 2, errorCode); + if(errorCode.logIfFailureAndReset("BytesTrieIterator(trie) constructor")) { return; } static const StringAndValue expected[]={ @@ -538,26 +538,26 @@ void ByteTrieTest::TestTruncatingIteratorFromLinearMatchShort() { checkIterator(iter.reset(), expected, LENGTHOF(expected)); } -void ByteTrieTest::TestTruncatingIteratorFromLinearMatchLong() { +void BytesTrieTest::TestTruncatingIteratorFromLinearMatchLong() { static const StringAndValue data[]={ { "abcdef", 10 }, { "abcdepq", 200 }, { "abcdeyz", 3000 } }; - ByteTrieBuilder builder; - StringPiece sp=buildTrie(data, LENGTHOF(data), builder, UDICTTRIE_BUILD_FAST); + BytesTrieBuilder builder; + StringPiece sp=buildTrie(data, LENGTHOF(data), builder, USTRINGTRIE_BUILD_FAST); if(sp.empty()) { return; // buildTrie() reported an error } - ByteTrie trie(sp.data()); + BytesTrie trie(sp.data()); // Go into a linear-match node. trie.next('a'); trie.next('b'); trie.next('c'); IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromLinearMatchLong()"); // Truncate after the linear-match node. - ByteTrieIterator iter(trie, 3, errorCode); - if(errorCode.logIfFailureAndReset("ByteTrieIterator(trie) constructor")) { + BytesTrieIterator iter(trie, 3, errorCode); + if(errorCode.logIfFailureAndReset("BytesTrieIterator(trie) constructor")) { return; } static const StringAndValue expected[]={ @@ -571,15 +571,15 @@ void ByteTrieTest::TestTruncatingIteratorFromLinearMatchLong() { checkIterator(iter.reset(), expected, LENGTHOF(expected)); } -void ByteTrieTest::checkData(const StringAndValue data[], int32_t dataLength) { +void BytesTrieTest::checkData(const StringAndValue data[], int32_t dataLength) { logln("checkData(dataLength=%d, fast)", (int)dataLength); - checkData(data, dataLength, UDICTTRIE_BUILD_FAST); + checkData(data, dataLength, USTRINGTRIE_BUILD_FAST); logln("checkData(dataLength=%d, small)", (int)dataLength); - checkData(data, dataLength, UDICTTRIE_BUILD_SMALL); + checkData(data, dataLength, USTRINGTRIE_BUILD_SMALL); } -void ByteTrieTest::checkData(const StringAndValue data[], int32_t dataLength, UDictTrieBuildOption buildOption) { - ByteTrieBuilder builder; +void BytesTrieTest::checkData(const StringAndValue data[], int32_t dataLength, UStringTrieBuildOption buildOption) { + BytesTrieBuilder builder; StringPiece sp=buildTrie(data, dataLength, builder, buildOption); if(sp.empty()) { return; // buildTrie() reported an error @@ -591,8 +591,8 @@ void ByteTrieTest::checkData(const StringAndValue data[], int32_t dataLength, UD checkIterator(sp, data, dataLength); } -StringPiece ByteTrieTest::buildTrie(const StringAndValue data[], int32_t dataLength, - ByteTrieBuilder &builder, UDictTrieBuildOption buildOption) { +StringPiece BytesTrieTest::buildTrie(const StringAndValue data[], int32_t dataLength, + BytesTrieBuilder &builder, UStringTrieBuildOption buildOption) { IcuTestErrorCode errorCode(*this, "buildTrie()"); // Add the items to the trie builder in an interesting (not trivial, not random) order. int32_t index, step; @@ -624,20 +624,20 @@ StringPiece ByteTrieTest::buildTrie(const StringAndValue data[], int32_t dataLen return sp; } -void ByteTrieTest::checkFirst(const StringPiece &trieBytes, - const StringAndValue data[], int32_t dataLength) { - ByteTrie trie(trieBytes.data()); +void BytesTrieTest::checkFirst(const StringPiece &trieBytes, + const StringAndValue data[], int32_t dataLength) { + BytesTrie trie(trieBytes.data()); for(int32_t i=0; i - - + + diff --git a/icu4c/source/test/intltest/itutil.cpp b/icu4c/source/test/intltest/itutil.cpp index 2734f972fe5..38e1a925f91 100644 --- a/icu4c/source/test/intltest/itutil.cpp +++ b/icu4c/source/test/intltest/itutil.cpp @@ -1,6 +1,6 @@ /******************************************************************** * COPYRIGHT: - * Copyright (c) 1997-2010, International Business Machines Corporation and + * Copyright (c) 1997-2011, International Business Machines Corporation and * others. All Rights Reserved. ********************************************************************/ @@ -29,9 +29,9 @@ #include "aliastst.h" #include "usettest.h" -extern IntlTest *createByteTrieTest(); +extern IntlTest *createBytesTrieTest(); static IntlTest *createLocalPointerTest(); -extern IntlTest *createUCharTrieTest(); +extern IntlTest *createUCharsTrieTest(); #define CASE(id, test) case id: \ name = #test; \ @@ -73,16 +73,16 @@ void IntlTestUtilities::runIndexedTest( int32_t index, UBool exec, const char* & case 17: name = "ByteTrieTest"; if (exec) { - logln("TestSuite ByteTrieTest---"); logln(); - LocalPointer test(createByteTrieTest()); + logln("TestSuite BytesTrieTest---"); logln(); + LocalPointer test(createBytesTrieTest()); callTest(*test, par); } break; case 18: name = "UCharTrieTest"; if (exec) { - logln("TestSuite UCharTrieTest---"); logln(); - LocalPointer test(createUCharTrieTest()); + logln("TestSuite UCharsTrieTest---"); logln(); + LocalPointer test(createUCharsTrieTest()); callTest(*test, par); } break; diff --git a/icu4c/source/test/intltest/uchartrietest.cpp b/icu4c/source/test/intltest/ucharstrietest.cpp similarity index 72% rename from icu4c/source/test/intltest/uchartrietest.cpp rename to icu4c/source/test/intltest/ucharstrietest.cpp index 4732835599e..b317958a048 100644 --- a/icu4c/source/test/intltest/uchartrietest.cpp +++ b/icu4c/source/test/intltest/ucharstrietest.cpp @@ -1,9 +1,9 @@ /* ******************************************************************************* -* Copyright (C) 2010, International Business Machines +* Copyright (C) 2010-2011, International Business Machines * Corporation and others. All Rights Reserved. ******************************************************************************* -* file name: uchartrietest.cpp +* file name: ucharstrietest.cpp * encoding: US-ASCII * tab size: 8 (not used) * indentation:4 @@ -16,9 +16,9 @@ #include "unicode/utypes.h" #include "unicode/uniset.h" -#include "uchartrie.h" -#include "uchartriebuilder.h" -#include "uchartrieiterator.h" +#include "ucharstrie.h" +#include "ucharstriebuilder.h" +#include "ucharstrieiterator.h" #include "intltest.h" #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) @@ -28,10 +28,10 @@ struct StringAndValue { int32_t value; }; -class UCharTrieTest : public IntlTest { +class UCharsTrieTest : public IntlTest { public: - UCharTrieTest() {} - virtual ~UCharTrieTest(); + UCharsTrieTest() {} + virtual ~UCharsTrieTest(); void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par=NULL); void TestBuilder(); @@ -47,10 +47,10 @@ public: void TestNextForCodePoint(); void TestFirstForCodePoint(); - UBool buildLargeTrie(UCharTrieBuilder &builder, UnicodeString &result, int32_t numUniqueFirst); + UBool buildLargeTrie(UCharsTrieBuilder &builder, UnicodeString &result, int32_t numUniqueFirst); void TestLargeTrie(); - UBool buildMonthsTrie(UCharTrieBuilder &builder, UDictTrieBuildOption buildOption, + UBool buildMonthsTrie(UCharsTrieBuilder &builder, UStringTrieBuildOption buildOption, UnicodeString &result); void TestHasUniqueValue(); void TestGetNextUChars(); @@ -61,27 +61,27 @@ public: void TestTruncatingIteratorFromLinearMatchLong(); void checkData(const StringAndValue data[], int32_t dataLength); - void checkData(const StringAndValue data[], int32_t dataLength, UDictTrieBuildOption buildOption); + void checkData(const StringAndValue data[], int32_t dataLength, UStringTrieBuildOption buildOption); UBool buildTrie(const StringAndValue data[], int32_t dataLength, - UCharTrieBuilder &builder, UDictTrieBuildOption buildOption, UnicodeString &result); + UCharsTrieBuilder &builder, UStringTrieBuildOption buildOption, UnicodeString &result); void checkFirst(const UnicodeString &trieUChars, const StringAndValue data[], int32_t dataLength); void checkNext(const UnicodeString &trieUChars, const StringAndValue data[], int32_t dataLength); void checkNextWithState(const UnicodeString &trieUChars, const StringAndValue data[], int32_t dataLength); void checkNextString(const UnicodeString &trieUChars, const StringAndValue data[], int32_t dataLength); void checkIterator(const UnicodeString &trieUChars, const StringAndValue data[], int32_t dataLength); - void checkIterator(UCharTrieIterator &iter, const StringAndValue data[], int32_t dataLength); + void checkIterator(UCharsTrieIterator &iter, const StringAndValue data[], int32_t dataLength); }; -extern IntlTest *createUCharTrieTest() { - return new UCharTrieTest(); +extern IntlTest *createUCharsTrieTest() { + return new UCharsTrieTest(); } -UCharTrieTest::~UCharTrieTest() { +UCharsTrieTest::~UCharsTrieTest() { } -void UCharTrieTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) { +void UCharsTrieTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) { if(exec) { - logln("TestSuite UCharTrieTest: "); + logln("TestSuite UCharsTrieTest: "); } TESTCASE_AUTO_BEGIN; TESTCASE_AUTO(TestBuilder); @@ -107,37 +107,37 @@ void UCharTrieTest::runIndexedTest(int32_t index, UBool exec, const char *&name, TESTCASE_AUTO_END; } -void UCharTrieTest::TestBuilder() { +void UCharsTrieTest::TestBuilder() { IcuTestErrorCode errorCode(*this, "TestBuilder()"); - UCharTrieBuilder builder; + UCharsTrieBuilder builder; UnicodeString trieUChars; - builder.build(UDICTTRIE_BUILD_FAST, trieUChars, errorCode); + builder.build(USTRINGTRIE_BUILD_FAST, trieUChars, errorCode); if(errorCode.reset()!=U_INDEX_OUTOFBOUNDS_ERROR) { - errln("UCharTrieBuilder().build() did not set U_INDEX_OUTOFBOUNDS_ERROR"); + errln("UCharsTrieBuilder().build() did not set U_INDEX_OUTOFBOUNDS_ERROR"); return; } - builder.add("=", 0, errorCode).add("=", 1, errorCode).build(UDICTTRIE_BUILD_FAST, trieUChars, errorCode); + builder.add("=", 0, errorCode).add("=", 1, errorCode).build(USTRINGTRIE_BUILD_FAST, trieUChars, errorCode); if(errorCode.reset()!=U_ILLEGAL_ARGUMENT_ERROR) { - errln("UCharTrieBuilder.build() did not detect duplicates"); + errln("UCharsTrieBuilder.build() did not detect duplicates"); return; } } -void UCharTrieTest::TestEmpty() { +void UCharsTrieTest::TestEmpty() { static const StringAndValue data[]={ { "", 0 } }; checkData(data, LENGTHOF(data)); } -void UCharTrieTest::Test_a() { +void UCharsTrieTest::Test_a() { static const StringAndValue data[]={ { "a", 1 } }; checkData(data, LENGTHOF(data)); } -void UCharTrieTest::Test_a_ab() { +void UCharsTrieTest::Test_a_ab() { static const StringAndValue data[]={ { "a", 1 }, { "ab", 100 } @@ -145,7 +145,7 @@ void UCharTrieTest::Test_a_ab() { checkData(data, LENGTHOF(data)); } -void UCharTrieTest::TestShortestBranch() { +void UCharsTrieTest::TestShortestBranch() { static const StringAndValue data[]={ { "a", 1000 }, { "b", 2000 } @@ -153,7 +153,7 @@ void UCharTrieTest::TestShortestBranch() { checkData(data, LENGTHOF(data)); } -void UCharTrieTest::TestBranches() { +void UCharsTrieTest::TestBranches() { static const StringAndValue data[]={ { "a", 0x10 }, { "cc", 0x40 }, @@ -176,7 +176,7 @@ void UCharTrieTest::TestBranches() { } } -void UCharTrieTest::TestLongSequence() { +void UCharsTrieTest::TestLongSequence() { static const StringAndValue data[]={ { "a", -1 }, // sequence of linear-match nodes @@ -192,7 +192,7 @@ void UCharTrieTest::TestLongSequence() { checkData(data, LENGTHOF(data)); } -void UCharTrieTest::TestLongBranch() { +void UCharsTrieTest::TestLongBranch() { // Split-branch and interesting compact-integer values. static const StringAndValue data[]={ { "a", -2 }, @@ -220,7 +220,7 @@ void UCharTrieTest::TestLongBranch() { checkData(data, LENGTHOF(data)); } -void UCharTrieTest::TestValuesForState() { +void UCharsTrieTest::TestValuesForState() { // Check that saveState() and resetToState() interact properly // with next() and current(). static const StringAndValue data[]={ @@ -234,7 +234,7 @@ void UCharTrieTest::TestValuesForState() { checkData(data, LENGTHOF(data)); } -void UCharTrieTest::TestCompact() { +void UCharsTrieTest::TestCompact() { // Duplicate trailing strings and values provide opportunities for compacting. static const StringAndValue data[]={ { "+", 0 }, @@ -261,7 +261,7 @@ void UCharTrieTest::TestCompact() { checkData(data, LENGTHOF(data)); } -void UCharTrieTest::TestFirstForCodePoint() { +void UCharsTrieTest::TestFirstForCodePoint() { static const StringAndValue data[]={ { "a", 1 }, { "a\\uD800", 2 }, @@ -276,49 +276,49 @@ void UCharTrieTest::TestFirstForCodePoint() { checkData(data, LENGTHOF(data)); } -void UCharTrieTest::TestNextForCodePoint() { +void UCharsTrieTest::TestNextForCodePoint() { static const StringAndValue data[]={ { "\\u4dff\\U00010000\\u9999\\U00020000\\udfff\\U0010ffff", 2000000000 }, { "\\u4dff\\U00010000\\u9999\\U00020002", 44444 }, { "\\u4dff\\U000103ff", 99999 } }; - UCharTrieBuilder builder; + UCharsTrieBuilder builder; UnicodeString trieUChars; - if(!buildTrie(data, LENGTHOF(data), builder, UDICTTRIE_BUILD_FAST, trieUChars)) { + if(!buildTrie(data, LENGTHOF(data), builder, USTRINGTRIE_BUILD_FAST, trieUChars)) { return; // buildTrie() reported an error } - UCharTrie trie(trieUChars.getBuffer()); - UDictTrieResult result; - if( (result=trie.nextForCodePoint(0x4dff))!=UDICTTRIE_NO_VALUE || result!=trie.current() || - (result=trie.nextForCodePoint(0x10000))!=UDICTTRIE_NO_VALUE || result!=trie.current() || - (result=trie.nextForCodePoint(0x9999))!=UDICTTRIE_NO_VALUE || result!=trie.current() || - (result=trie.nextForCodePoint(0x20000))!=UDICTTRIE_NO_VALUE || result!=trie.current() || - (result=trie.nextForCodePoint(0xdfff))!=UDICTTRIE_NO_VALUE || result!=trie.current() || - (result=trie.nextForCodePoint(0x10ffff))!=UDICTTRIE_HAS_FINAL_VALUE || result!=trie.current() || + UCharsTrie trie(trieUChars.getBuffer()); + UStringTrieResult result; + if( (result=trie.nextForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE || result!=trie.current() || + (result=trie.nextForCodePoint(0x10000))!=USTRINGTRIE_NO_VALUE || result!=trie.current() || + (result=trie.nextForCodePoint(0x9999))!=USTRINGTRIE_NO_VALUE || result!=trie.current() || + (result=trie.nextForCodePoint(0x20000))!=USTRINGTRIE_NO_VALUE || result!=trie.current() || + (result=trie.nextForCodePoint(0xdfff))!=USTRINGTRIE_NO_VALUE || result!=trie.current() || + (result=trie.nextForCodePoint(0x10ffff))!=USTRINGTRIE_FINAL_VALUE || result!=trie.current() || trie.getValue()!=2000000000 ) { - errln("UCharTrie.nextForCodePoint() fails for %s", data[0].s); + errln("UCharsTrie.nextForCodePoint() fails for %s", data[0].s); } - if( (result=trie.firstForCodePoint(0x4dff))!=UDICTTRIE_NO_VALUE || result!=trie.current() || - (result=trie.nextForCodePoint(0x10000))!=UDICTTRIE_NO_VALUE || result!=trie.current() || - (result=trie.nextForCodePoint(0x9999))!=UDICTTRIE_NO_VALUE || result!=trie.current() || - (result=trie.nextForCodePoint(0x20002))!=UDICTTRIE_HAS_FINAL_VALUE || result!=trie.current() || + if( (result=trie.firstForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE || result!=trie.current() || + (result=trie.nextForCodePoint(0x10000))!=USTRINGTRIE_NO_VALUE || result!=trie.current() || + (result=trie.nextForCodePoint(0x9999))!=USTRINGTRIE_NO_VALUE || result!=trie.current() || + (result=trie.nextForCodePoint(0x20002))!=USTRINGTRIE_FINAL_VALUE || result!=trie.current() || trie.getValue()!=44444 ) { - errln("UCharTrie.nextForCodePoint() fails for %s", data[1].s); + errln("UCharsTrie.nextForCodePoint() fails for %s", data[1].s); } - if( (result=trie.reset().nextForCodePoint(0x4dff))!=UDICTTRIE_NO_VALUE || result!=trie.current() || - (result=trie.nextForCodePoint(0x10000))!=UDICTTRIE_NO_VALUE || result!=trie.current() || - (result=trie.nextForCodePoint(0x9999))!=UDICTTRIE_NO_VALUE || result!=trie.current() || - (result=trie.nextForCodePoint(0x20222))!=UDICTTRIE_NO_MATCH || result!=trie.current() // no match for trail surrogate + if( (result=trie.reset().nextForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE || result!=trie.current() || + (result=trie.nextForCodePoint(0x10000))!=USTRINGTRIE_NO_VALUE || result!=trie.current() || + (result=trie.nextForCodePoint(0x9999))!=USTRINGTRIE_NO_VALUE || result!=trie.current() || + (result=trie.nextForCodePoint(0x20222))!=USTRINGTRIE_NO_MATCH || result!=trie.current() // no match for trail surrogate ) { - errln("UCharTrie.nextForCodePoint() fails for \\u4dff\\U00010000\\u9999\\U00020222"); + errln("UCharsTrie.nextForCodePoint() fails for \\u4dff\\U00010000\\u9999\\U00020222"); } - if( (result=trie.reset().nextForCodePoint(0x4dff))!=UDICTTRIE_NO_VALUE || result!=trie.current() || - (result=trie.nextForCodePoint(0x103ff))!=UDICTTRIE_HAS_FINAL_VALUE || result!=trie.current() || + if( (result=trie.reset().nextForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE || result!=trie.current() || + (result=trie.nextForCodePoint(0x103ff))!=USTRINGTRIE_FINAL_VALUE || result!=trie.current() || trie.getValue()!=99999 ) { - errln("UCharTrie.nextForCodePoint() fails for %s", data[2].s); + errln("UCharsTrie.nextForCodePoint() fails for %s", data[2].s); } } @@ -356,8 +356,8 @@ private: } // end namespace -UBool UCharTrieTest::buildLargeTrie(UCharTrieBuilder &builder, UnicodeString &result, - int32_t numUniqueFirst) { +UBool UCharsTrieTest::buildLargeTrie(UCharsTrieBuilder &builder, UnicodeString &result, + int32_t numUniqueFirst) { IcuTestErrorCode errorCode(*this, "buildLargeTrie()"); Generator gen; builder.clear(); @@ -366,33 +366,33 @@ UBool UCharTrieTest::buildLargeTrie(UCharTrieBuilder &builder, UnicodeString &re gen.next(); } infoln("buildLargeTrie(%ld) added %ld strings", (long)numUniqueFirst, (long)gen.getIndex()); - builder.build(UDICTTRIE_BUILD_FAST, result, errorCode); + builder.build(USTRINGTRIE_BUILD_FAST, result, errorCode); logln("serialized trie size: %ld UChars\n", (long)result.length()); return errorCode.isSuccess(); } // Exercise a large branch node. -void UCharTrieTest::TestLargeTrie() { - UCharTrieBuilder builder; +void UCharsTrieTest::TestLargeTrie() { + UCharsTrieBuilder builder; UnicodeString trieUChars; if(!buildLargeTrie(builder, trieUChars, 1111)) { return; // buildTrie() reported an error } - UCharTrie trie(trieUChars.getBuffer()); + UCharsTrie trie(trieUChars.getBuffer()); Generator gen; while(gen.countUniqueFirstChars()<1111) { UnicodeString x(gen.getString()); int32_t value=gen.getValue(); if(!x.isEmpty()) { - if(trie.first(x[0])==UDICTTRIE_NO_MATCH) { - errln("next(first char U+%04X)=UDICTTRIE_NO_MATCH for string %ld\n", + if(trie.first(x[0])==USTRINGTRIE_NO_MATCH) { + errln("next(first char U+%04X)=USTRINGTRIE_NO_MATCH for string %ld\n", x[0], (long)gen.getIndex()); break; } x.remove(0, 1); } - UDictTrieResult result=trie.next(x.getBuffer(), x.length()); - if(!UDICTTRIE_RESULT_HAS_VALUE(result) || result!=trie.current() || value!=trie.getValue()) { + UStringTrieResult result=trie.next(x.getBuffer(), x.length()); + if(!USTRINGTRIE_HAS_VALUE(result) || result!=trie.current() || value!=trie.getValue()) { errln("next(%d chars U+%04X U+%04X)!=hasValue or " "next()!=current() or getValue() wrong " "for string %ld\n", (int)x.length(), x[0], x[1], (long)gen.getIndex()); @@ -413,8 +413,8 @@ enum { u_y=0x79 }; -UBool UCharTrieTest::buildMonthsTrie(UCharTrieBuilder &builder, UDictTrieBuildOption buildOption, - UnicodeString &result) { +UBool UCharsTrieTest::buildMonthsTrie(UCharsTrieBuilder &builder, UStringTrieBuildOption buildOption, + UnicodeString &result) { // All types of nodes leading to the same value, // for code coverage of recursive functions. // In particular, we need a lot of branches on some single level @@ -454,13 +454,13 @@ UBool UCharTrieTest::buildMonthsTrie(UCharTrieBuilder &builder, UDictTrieBuildOp return buildTrie(data, LENGTHOF(data), builder, buildOption, result); } -void UCharTrieTest::TestHasUniqueValue() { - UCharTrieBuilder builder; +void UCharsTrieTest::TestHasUniqueValue() { + UCharsTrieBuilder builder; UnicodeString trieUChars; - if(!buildMonthsTrie(builder, UDICTTRIE_BUILD_FAST, trieUChars)) { + if(!buildMonthsTrie(builder, USTRINGTRIE_BUILD_FAST, trieUChars)) { return; // buildTrie() reported an error } - UCharTrie trie(trieUChars.getBuffer()); + UCharsTrie trie(trieUChars.getBuffer()); int32_t uniqueValue; if(trie.hasUniqueValue(uniqueValue)) { errln("unique value at root"); @@ -477,7 +477,7 @@ void UCharTrieTest::TestHasUniqueValue() { if(trie.hasUniqueValue(uniqueValue)) { errln("unique value after \"ju\""); } - if(trie.next(u_n)!=UDICTTRIE_HAS_VALUE || 6!=trie.getValue()) { + if(trie.next(u_n)!=USTRINGTRIE_INTERMEDIATE_VALUE || 6!=trie.getValue()) { errln("not normal value 6 after \"jun\""); } // hasUniqueValue() after getValue() @@ -501,13 +501,13 @@ private: UnicodeString &str; }; -void UCharTrieTest::TestGetNextUChars() { - UCharTrieBuilder builder; +void UCharsTrieTest::TestGetNextUChars() { + UCharsTrieBuilder builder; UnicodeString trieUChars; - if(!buildMonthsTrie(builder, UDICTTRIE_BUILD_SMALL, trieUChars)) { + if(!buildMonthsTrie(builder, USTRINGTRIE_BUILD_SMALL, trieUChars)) { return; // buildTrie() reported an error } - UCharTrie trie(trieUChars.getBuffer()); + UCharsTrie trie(trieUChars.getBuffer()); UnicodeString buffer; UnicodeStringAppendable app(buffer); int32_t count=trie.getNextUChars(app); @@ -523,7 +523,7 @@ void UCharTrieTest::TestGetNextUChars() { errln("months getNextUChars()!=[.abcdefghijklmnopqru] after \"jan\""); } // getNextUChars() after getValue() - trie.getValue(); // next() had returned UDICTTRIE_HAS_VALUE. + trie.getValue(); // next() had returned USTRINGTRIE_INTERMEDIATE_VALUE. count=trie.getNextUChars(app.reset()); if(count!=20 || buffer!=UNICODE_STRING_SIMPLE(".abcdefghijklmnopqru")) { errln("months getNextUChars()!=[.abcdefghijklmnopqru] after \"jan\"+getValue()"); @@ -548,20 +548,20 @@ void UCharTrieTest::TestGetNextUChars() { } } -void UCharTrieTest::TestIteratorFromBranch() { - UCharTrieBuilder builder; +void UCharsTrieTest::TestIteratorFromBranch() { + UCharsTrieBuilder builder; UnicodeString trieUChars; - if(!buildMonthsTrie(builder, UDICTTRIE_BUILD_FAST, trieUChars)) { + if(!buildMonthsTrie(builder, USTRINGTRIE_BUILD_FAST, trieUChars)) { return; // buildTrie() reported an error } - UCharTrie trie(trieUChars.getBuffer()); + UCharsTrie trie(trieUChars.getBuffer()); // Go to a branch node. trie.next(u_j); trie.next(u_a); trie.next(u_n); IcuTestErrorCode errorCode(*this, "TestIteratorFromBranch()"); - UCharTrieIterator iter(trie, 0, errorCode); - if(errorCode.logIfFailureAndReset("UCharTrieIterator(trie) constructor")) { + UCharsTrieIterator iter(trie, 0, errorCode); + if(errorCode.logIfFailureAndReset("UCharsTrieIterator(trie) constructor")) { return; } // Expected data: Same as in buildMonthsTrie(), except only the suffixes @@ -599,13 +599,13 @@ void UCharTrieTest::TestIteratorFromBranch() { checkIterator(iter.reset(), data, LENGTHOF(data)); } -void UCharTrieTest::TestIteratorFromLinearMatch() { - UCharTrieBuilder builder; +void UCharsTrieTest::TestIteratorFromLinearMatch() { + UCharsTrieBuilder builder; UnicodeString trieUChars; - if(!buildMonthsTrie(builder, UDICTTRIE_BUILD_SMALL, trieUChars)) { + if(!buildMonthsTrie(builder, USTRINGTRIE_BUILD_SMALL, trieUChars)) { return; // buildTrie() reported an error } - UCharTrie trie(trieUChars.getBuffer()); + UCharsTrie trie(trieUChars.getBuffer()); // Go into a linear-match node. trie.next(u_j); trie.next(u_a); @@ -613,8 +613,8 @@ void UCharTrieTest::TestIteratorFromLinearMatch() { trie.next(u_u); trie.next(u_a); IcuTestErrorCode errorCode(*this, "TestIteratorFromLinearMatch()"); - UCharTrieIterator iter(trie, 0, errorCode); - if(errorCode.logIfFailureAndReset("UCharTrieIterator(trie) constructor")) { + UCharsTrieIterator iter(trie, 0, errorCode); + if(errorCode.logIfFailureAndReset("UCharsTrieIterator(trie) constructor")) { return; } // Expected data: Same as in buildMonthsTrie(), except only the suffixes @@ -629,15 +629,15 @@ void UCharTrieTest::TestIteratorFromLinearMatch() { checkIterator(iter.reset(), data, LENGTHOF(data)); } -void UCharTrieTest::TestTruncatingIteratorFromRoot() { - UCharTrieBuilder builder; +void UCharsTrieTest::TestTruncatingIteratorFromRoot() { + UCharsTrieBuilder builder; UnicodeString trieUChars; - if(!buildMonthsTrie(builder, UDICTTRIE_BUILD_FAST, trieUChars)) { + if(!buildMonthsTrie(builder, USTRINGTRIE_BUILD_FAST, trieUChars)) { return; // buildTrie() reported an error } IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromRoot()"); - UCharTrieIterator iter(trieUChars.getBuffer(), 4, errorCode); - if(errorCode.logIfFailureAndReset("UCharTrieIterator(trie) constructor")) { + UCharsTrieIterator iter(trieUChars.getBuffer(), 4, errorCode); + if(errorCode.logIfFailureAndReset("UCharsTrieIterator(trie) constructor")) { return; } // Expected data: Same as in buildMonthsTrie(), except only the first 4 characters @@ -676,25 +676,25 @@ void UCharTrieTest::TestTruncatingIteratorFromRoot() { checkIterator(iter.reset(), data, LENGTHOF(data)); } -void UCharTrieTest::TestTruncatingIteratorFromLinearMatchShort() { +void UCharsTrieTest::TestTruncatingIteratorFromLinearMatchShort() { static const StringAndValue data[]={ { "abcdef", 10 }, { "abcdepq", 200 }, { "abcdeyz", 3000 } }; - UCharTrieBuilder builder; + UCharsTrieBuilder builder; UnicodeString trieUChars; - if(!buildTrie(data, LENGTHOF(data), builder, UDICTTRIE_BUILD_FAST, trieUChars)) { + if(!buildTrie(data, LENGTHOF(data), builder, USTRINGTRIE_BUILD_FAST, trieUChars)) { return; // buildTrie() reported an error } - UCharTrie trie(trieUChars.getBuffer()); + UCharsTrie trie(trieUChars.getBuffer()); // Go into a linear-match node. trie.next(u_a); trie.next(u_b); IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromLinearMatchShort()"); // Truncate within the linear-match node. - UCharTrieIterator iter(trie, 2, errorCode); - if(errorCode.logIfFailureAndReset("UCharTrieIterator(trie) constructor")) { + UCharsTrieIterator iter(trie, 2, errorCode); + if(errorCode.logIfFailureAndReset("UCharsTrieIterator(trie) constructor")) { return; } static const StringAndValue expected[]={ @@ -706,26 +706,26 @@ void UCharTrieTest::TestTruncatingIteratorFromLinearMatchShort() { checkIterator(iter.reset(), expected, LENGTHOF(expected)); } -void UCharTrieTest::TestTruncatingIteratorFromLinearMatchLong() { +void UCharsTrieTest::TestTruncatingIteratorFromLinearMatchLong() { static const StringAndValue data[]={ { "abcdef", 10 }, { "abcdepq", 200 }, { "abcdeyz", 3000 } }; - UCharTrieBuilder builder; + UCharsTrieBuilder builder; UnicodeString trieUChars; - if(!buildTrie(data, LENGTHOF(data), builder, UDICTTRIE_BUILD_FAST, trieUChars)) { + if(!buildTrie(data, LENGTHOF(data), builder, USTRINGTRIE_BUILD_FAST, trieUChars)) { return; // buildTrie() reported an error } - UCharTrie trie(trieUChars.getBuffer()); + UCharsTrie trie(trieUChars.getBuffer()); // Go into a linear-match node. trie.next(u_a); trie.next(u_b); trie.next(u_c); IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromLinearMatchLong()"); // Truncate after the linear-match node. - UCharTrieIterator iter(trie, 3, errorCode); - if(errorCode.logIfFailureAndReset("UCharTrieIterator(trie) constructor")) { + UCharsTrieIterator iter(trie, 3, errorCode); + if(errorCode.logIfFailureAndReset("UCharsTrieIterator(trie) constructor")) { return; } static const StringAndValue expected[]={ @@ -739,15 +739,15 @@ void UCharTrieTest::TestTruncatingIteratorFromLinearMatchLong() { checkIterator(iter.reset(), expected, LENGTHOF(expected)); } -void UCharTrieTest::checkData(const StringAndValue data[], int32_t dataLength) { +void UCharsTrieTest::checkData(const StringAndValue data[], int32_t dataLength) { logln("checkData(dataLength=%d, fast)", (int)dataLength); - checkData(data, dataLength, UDICTTRIE_BUILD_FAST); + checkData(data, dataLength, USTRINGTRIE_BUILD_FAST); logln("checkData(dataLength=%d, small)", (int)dataLength); - checkData(data, dataLength, UDICTTRIE_BUILD_SMALL); + checkData(data, dataLength, USTRINGTRIE_BUILD_SMALL); } -void UCharTrieTest::checkData(const StringAndValue data[], int32_t dataLength, UDictTrieBuildOption buildOption) { - UCharTrieBuilder builder; +void UCharsTrieTest::checkData(const StringAndValue data[], int32_t dataLength, UStringTrieBuildOption buildOption) { + UCharsTrieBuilder builder; UnicodeString trieUChars; if(!buildTrie(data, dataLength, builder, buildOption, trieUChars)) { return; // buildTrie() reported an error @@ -759,8 +759,8 @@ void UCharTrieTest::checkData(const StringAndValue data[], int32_t dataLength, U checkIterator(trieUChars, data, dataLength); } -UBool UCharTrieTest::buildTrie(const StringAndValue data[], int32_t dataLength, - UCharTrieBuilder &builder, UDictTrieBuildOption buildOption, UnicodeString &result) { +UBool UCharsTrieTest::buildTrie(const StringAndValue data[], int32_t dataLength, + UCharsTrieBuilder &builder, UStringTrieBuildOption buildOption, UnicodeString &result) { IcuTestErrorCode errorCode(*this, "buildTrie()"); // Add the items to the trie builder in an interesting (not trivial, not random) order. int32_t index, step; @@ -793,9 +793,9 @@ UBool UCharTrieTest::buildTrie(const StringAndValue data[], int32_t dataLength, return errorCode.isSuccess(); } -void UCharTrieTest::checkFirst(const UnicodeString &trieUChars, - const StringAndValue data[], int32_t dataLength) { - UCharTrie trie(trieUChars.getBuffer()); +void UCharsTrieTest::checkFirst(const UnicodeString &trieUChars, + const StringAndValue data[], int32_t dataLength) { + UCharsTrie trie(trieUChars.getBuffer()); for(int32_t i=0; i1 ? expectedString[1] : 0; - UDictTrieResult firstResult=trie.first(c); - int32_t firstValue=UDICTTRIE_RESULT_HAS_VALUE(firstResult) ? trie.getValue() : -1; - UDictTrieResult nextResult=trie.next(nextCp); + UStringTrieResult firstResult=trie.first(c); + int32_t firstValue=USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1; + UStringTrieResult nextResult=trie.next(nextCp); if(firstResult!=trie.reset().next(c) || firstResult!=trie.current() || - firstValue!=(UDICTTRIE_RESULT_HAS_VALUE(firstResult) ? trie.getValue() : -1) || + firstValue!=(USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1) || nextResult!=trie.next(nextCp) ) { errln("trie.first(U+%04X)!=trie.reset().next(same) for %s", @@ -818,11 +818,11 @@ void UCharTrieTest::checkFirst(const UnicodeString &trieUChars, int32_t cLength=U16_LENGTH(c); nextCp=expectedString.length()>cLength ? expectedString.char32At(cLength) : 0; firstResult=trie.firstForCodePoint(c); - firstValue=UDICTTRIE_RESULT_HAS_VALUE(firstResult) ? trie.getValue() : -1; + firstValue=USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1; nextResult=trie.nextForCodePoint(nextCp); if(firstResult!=trie.reset().nextForCodePoint(c) || firstResult!=trie.current() || - firstValue!=(UDICTTRIE_RESULT_HAS_VALUE(firstResult) ? trie.getValue() : -1) || + firstValue!=(USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1) || nextResult!=trie.nextForCodePoint(nextCp) ) { errln("trie.firstForCodePoint(U+%04X)!=trie.reset().nextForCodePoint(same) for %s", @@ -831,15 +831,15 @@ void UCharTrieTest::checkFirst(const UnicodeString &trieUChars, } } -void UCharTrieTest::checkNext(const UnicodeString &trieUChars, - const StringAndValue data[], int32_t dataLength) { - UCharTrie trie(trieUChars.getBuffer()); - UCharTrie::State state; +void UCharsTrieTest::checkNext(const UnicodeString &trieUChars, + const StringAndValue data[], int32_t dataLength) { + UCharsTrie trie(trieUChars.getBuffer()); + UCharsTrie::State state; for(int32_t i=0; i #include "unicode/uperf.h" #include "unicode/utext.h" -#include "bytetrie.h" -#include "bytetriebuilder.h" +#include "bytestrie.h" +#include "bytestriebuilder.h" #include "charstr.h" #include "package.h" #include "toolutil.h" #include "triedict.h" #include "ucbuf.h" // struct ULine -#include "uchartrie.h" -#include "uchartriebuilder.h" +#include "ucharstrie.h" +#include "ucharstriebuilder.h" #include "uoptions.h" #include "uvectr32.h" @@ -259,18 +259,18 @@ public: } }; -static int32_t byteTrieLookup(const char *s, const char *nameTrieBytes) { - ByteTrie trie(nameTrieBytes); - if(UDICTTRIE_RESULT_HAS_VALUE(trie.next(s, -1))) { +static int32_t bytesTrieLookup(const char *s, const char *nameTrieBytes) { + BytesTrie trie(nameTrieBytes); + if(USTRINGTRIE_HAS_VALUE(trie.next(s, -1))) { return trie.getValue(); } else { return -1; } } -class ByteTriePackageLookup : public PackageLookup { +class BytesTriePackageLookup : public PackageLookup { public: - ByteTriePackageLookup(const DictionaryTriePerfTest &perf) + BytesTriePackageLookup(const DictionaryTriePerfTest &perf) : PackageLookup(perf) { IcuToolErrorCode errorCode("BinarySearchPackageLookup()"); int32_t count=pkg.getItemCount(); @@ -292,20 +292,20 @@ public: // NUL-terminate the name for call() to find the next one. itemNames.append(0, errorCode); } - int32_t length=builder.build(UDICTTRIE_BUILD_SMALL, errorCode).length(); - printf("size of ByteTrie: %6ld\n", (long)length); + int32_t length=builder.build(USTRINGTRIE_BUILD_SMALL, errorCode).length(); + printf("size of BytesTrie: %6ld\n", (long)length); // count+1: +1 for the last-item limit offset which we should have always had printf("size of dataOffsets:%6ld\n", (long)((count+1)*4)); printf("total index size: %6ld\n", (long)(length+(count+1)*4)); } - virtual ~ByteTriePackageLookup() {} + virtual ~BytesTriePackageLookup() {} virtual void call(UErrorCode *pErrorCode) { int32_t count=pkg.getItemCount(); - const char *nameTrieBytes=builder.build(UDICTTRIE_BUILD_SMALL, *pErrorCode).data(); + const char *nameTrieBytes=builder.build(USTRINGTRIE_BUILD_SMALL, *pErrorCode).data(); const char *name=itemNames.data(); for(int32_t i=0; i=textLimit) { @@ -447,11 +447,11 @@ ucharTrieMatches(UCharTrie &trie, return numChars; } -class UCharTrieDictLookup : public DictLookup { +class UCharsTrieDictLookup : public DictLookup { public: - UCharTrieDictLookup(const DictionaryTriePerfTest &perfTest) + UCharsTrieDictLookup(const DictionaryTriePerfTest &perfTest) : DictLookup(perfTest) { - IcuToolErrorCode errorCode("UCharTrieDictLookup()"); + IcuToolErrorCode errorCode("UCharsTrieDictLookup()"); const ULine *lines=perf.getCachedLines(); int32_t numLines=perf.getNumLines(); for(int32_t i=0; i=textLimit) { @@ -616,16 +616,16 @@ byteTrieMatches(ByteTrie &trie, return numChars; } -class ByteTrieDictMatches : public ByteTrieDictLookup { +class BytesTrieDictMatches : public BytesTrieDictLookup { public: - ByteTrieDictMatches(const DictionaryTriePerfTest &perfTest) - : ByteTrieDictLookup(perfTest) {} + BytesTrieDictMatches(const DictionaryTriePerfTest &perfTest) + : BytesTrieDictLookup(perfTest) {} virtual void call(UErrorCode *pErrorCode) { if(noDict) { return; } - ByteTrie trie(builder.build(UDICTTRIE_BUILD_SMALL, *pErrorCode).data()); + BytesTrie trie(builder.build(USTRINGTRIE_BUILD_SMALL, *pErrorCode).data()); UText text=UTEXT_INITIALIZER; int32_t lengths[20]; const ULine *lines=perf.getCachedLines(); @@ -637,8 +637,8 @@ public: } utext_openUChars(&text, lines[i].name, lines[i].len, pErrorCode); int32_t count=0; - byteTrieMatches(trie, &text, lines[i].len, - lengths, count, LENGTHOF(lengths)); + bytesTrieMatches(trie, &text, lines[i].len, + lengths, count, LENGTHOF(lengths)); if(count==0 || lengths[count-1]!=lines[i].len) { fprintf(stderr, "word %ld (0-based) not found\n", (long)i); } @@ -646,16 +646,16 @@ public: } }; -class ByteTrieDictContains : public ByteTrieDictLookup { +class BytesTrieDictContains : public BytesTrieDictLookup { public: - ByteTrieDictContains(const DictionaryTriePerfTest &perfTest) - : ByteTrieDictLookup(perfTest) {} + BytesTrieDictContains(const DictionaryTriePerfTest &perfTest) + : BytesTrieDictLookup(perfTest) {} virtual void call(UErrorCode *pErrorCode) { if(noDict) { return; } - ByteTrie trie(builder.build(UDICTTRIE_BUILD_SMALL, *pErrorCode).data()); + BytesTrie trie(builder.build(USTRINGTRIE_BUILD_SMALL, *pErrorCode).data()); const ULine *lines=perf.getCachedLines(); int32_t numLines=perf.getNumLines(); for(int32_t i=0; i0) { - uprv_memcpy(newElements, elements, elementsLength*sizeof(ByteTrieElement)); + uprv_memcpy(newElements, elements, elementsLength*sizeof(BytesTrieElement)); } delete[] elements; elements=newElements; @@ -166,15 +164,15 @@ U_CDECL_BEGIN static int32_t U_CALLCONV compareElementStrings(const void *context, const void *left, const void *right) { const CharString *strings=reinterpret_cast(context); - const ByteTrieElement *leftElement=reinterpret_cast(left); - const ByteTrieElement *rightElement=reinterpret_cast(right); + const BytesTrieElement *leftElement=reinterpret_cast(left); + const BytesTrieElement *rightElement=reinterpret_cast(right); return leftElement->compareStringTo(*rightElement, *strings); } U_CDECL_END StringPiece -ByteTrieBuilder::build(UDictTrieBuildOption buildOption, UErrorCode &errorCode) { +BytesTrieBuilder::build(UStringTrieBuildOption buildOption, UErrorCode &errorCode) { StringPiece result; if(U_FAILURE(errorCode)) { return result; @@ -188,7 +186,7 @@ ByteTrieBuilder::build(UDictTrieBuildOption buildOption, UErrorCode &errorCode) errorCode=U_INDEX_OUTOFBOUNDS_ERROR; return result; } - uprv_sortArray(elements, elementsLength, (int32_t)sizeof(ByteTrieElement), + uprv_sortArray(elements, elementsLength, (int32_t)sizeof(BytesTrieElement), compareElementStrings, &strings, FALSE, // need not be a stable sort &errorCode); @@ -214,7 +212,7 @@ ByteTrieBuilder::build(UDictTrieBuildOption buildOption, UErrorCode &errorCode) errorCode=U_MEMORY_ALLOCATION_ERROR; return result; } - DictTrieBuilder::build(buildOption, elementsLength, errorCode); + StringTrieBuilder::build(buildOption, elementsLength, errorCode); if(bytes==NULL) { errorCode=U_MEMORY_ALLOCATION_ERROR; } else { @@ -224,24 +222,24 @@ ByteTrieBuilder::build(UDictTrieBuildOption buildOption, UErrorCode &errorCode) } int32_t -ByteTrieBuilder::getElementStringLength(int32_t i) const { +BytesTrieBuilder::getElementStringLength(int32_t i) const { return elements[i].getStringLength(strings); } UChar -ByteTrieBuilder::getElementUnit(int32_t i, int32_t byteIndex) const { +BytesTrieBuilder::getElementUnit(int32_t i, int32_t byteIndex) const { return (uint8_t)elements[i].charAt(byteIndex, strings); } int32_t -ByteTrieBuilder::getElementValue(int32_t i) const { +BytesTrieBuilder::getElementValue(int32_t i) const { return elements[i].getValue(); } int32_t -ByteTrieBuilder::getLimitOfLinearMatch(int32_t first, int32_t last, int32_t byteIndex) const { - const ByteTrieElement &firstElement=elements[first]; - const ByteTrieElement &lastElement=elements[last]; +BytesTrieBuilder::getLimitOfLinearMatch(int32_t first, int32_t last, int32_t byteIndex) const { + const BytesTrieElement &firstElement=elements[first]; + const BytesTrieElement &lastElement=elements[last]; int32_t minStringLength=firstElement.getStringLength(strings); while(++byteIndexwrite(builder); b.write(s, length); offset=b.write(b.getMinLinearMatch()+length-1); } -DictTrieBuilder::Node * -ByteTrieBuilder::createLinearMatchNode(int32_t i, int32_t byteIndex, int32_t length, - Node *nextNode) const { +StringTrieBuilder::Node * +BytesTrieBuilder::createLinearMatchNode(int32_t i, int32_t byteIndex, int32_t length, + Node *nextNode) const { return new BTLinearMatchNode( elements[i].getString(strings).data()+byteIndex, length, @@ -318,7 +316,7 @@ ByteTrieBuilder::createLinearMatchNode(int32_t i, int32_t byteIndex, int32_t len } UBool -ByteTrieBuilder::ensureCapacity(int32_t length) { +BytesTrieBuilder::ensureCapacity(int32_t length) { if(bytes==NULL) { return FALSE; // previous memory allocation had failed } @@ -344,7 +342,7 @@ ByteTrieBuilder::ensureCapacity(int32_t length) { } int32_t -ByteTrieBuilder::write(int32_t byte) { +BytesTrieBuilder::write(int32_t byte) { int32_t newLength=bytesLength+1; if(ensureCapacity(newLength)) { bytesLength=newLength; @@ -354,7 +352,7 @@ ByteTrieBuilder::write(int32_t byte) { } int32_t -ByteTrieBuilder::write(const char *b, int32_t length) { +BytesTrieBuilder::write(const char *b, int32_t length) { int32_t newLength=bytesLength+length; if(ensureCapacity(newLength)) { bytesLength=newLength; @@ -364,31 +362,31 @@ ByteTrieBuilder::write(const char *b, int32_t length) { } int32_t -ByteTrieBuilder::writeElementUnits(int32_t i, int32_t byteIndex, int32_t length) { +BytesTrieBuilder::writeElementUnits(int32_t i, int32_t byteIndex, int32_t length) { return write(elements[i].getString(strings).data()+byteIndex, length); } int32_t -ByteTrieBuilder::writeValueAndFinal(int32_t i, UBool final) { +BytesTrieBuilder::writeValueAndFinal(int32_t i, UBool final) { char intBytes[5]; int32_t length=1; if(i<0 || i>0xffffff) { - intBytes[0]=(char)ByteTrie::kFiveByteValueLead; + intBytes[0]=(char)BytesTrie::kFiveByteValueLead; intBytes[1]=(char)(i>>24); intBytes[2]=(char)(i>>16); intBytes[3]=(char)(i>>8); intBytes[4]=(char)i; length=5; - } else if(i<=ByteTrie::kMaxOneByteValue) { - intBytes[0]=(char)(ByteTrie::kMinOneByteValueLead+i); + } else if(i<=BytesTrie::kMaxOneByteValue) { + intBytes[0]=(char)(BytesTrie::kMinOneByteValueLead+i); } else { - if(i<=ByteTrie::kMaxTwoByteValue) { - intBytes[0]=(char)(ByteTrie::kMinTwoByteValueLead+(i>>8)); + if(i<=BytesTrie::kMaxTwoByteValue) { + intBytes[0]=(char)(BytesTrie::kMinTwoByteValueLead+(i>>8)); } else { - if(i<=ByteTrie::kMaxThreeByteValue) { - intBytes[0]=(char)(ByteTrie::kMinThreeByteValueLead+(i>>16)); + if(i<=BytesTrie::kMaxThreeByteValue) { + intBytes[0]=(char)(BytesTrie::kMinThreeByteValueLead+(i>>16)); } else { - intBytes[0]=(char)ByteTrie::kFourByteValueLead; + intBytes[0]=(char)BytesTrie::kFourByteValueLead; intBytes[1]=(char)(i>>16); length=2; } @@ -401,7 +399,7 @@ ByteTrieBuilder::writeValueAndFinal(int32_t i, UBool final) { } int32_t -ByteTrieBuilder::writeValueAndType(UBool hasValue, int32_t value, int32_t node) { +BytesTrieBuilder::writeValueAndType(UBool hasValue, int32_t value, int32_t node) { int32_t offset=write(node); if(hasValue) { offset=writeValueAndFinal(value, FALSE); @@ -410,26 +408,26 @@ ByteTrieBuilder::writeValueAndType(UBool hasValue, int32_t value, int32_t node) } int32_t -ByteTrieBuilder::writeDeltaTo(int32_t jumpTarget) { +BytesTrieBuilder::writeDeltaTo(int32_t jumpTarget) { int32_t i=bytesLength-jumpTarget; char intBytes[5]; int32_t length; U_ASSERT(i>=0); - if(i<=ByteTrie::kMaxOneByteDelta) { + if(i<=BytesTrie::kMaxOneByteDelta) { length=0; - } else if(i<=ByteTrie::kMaxTwoByteDelta) { - intBytes[0]=(char)(ByteTrie::kMinTwoByteDeltaLead+(i>>8)); + } else if(i<=BytesTrie::kMaxTwoByteDelta) { + intBytes[0]=(char)(BytesTrie::kMinTwoByteDeltaLead+(i>>8)); length=1; } else { - if(i<=ByteTrie::kMaxThreeByteDelta) { - intBytes[0]=(char)(ByteTrie::kMinThreeByteDeltaLead+(i>>16)); + if(i<=BytesTrie::kMaxThreeByteDelta) { + intBytes[0]=(char)(BytesTrie::kMinThreeByteDeltaLead+(i>>16)); length=2; } else { if(i<=0xffffff) { - intBytes[0]=(char)ByteTrie::kFourByteDeltaLead; + intBytes[0]=(char)BytesTrie::kFourByteDeltaLead; length=3; } else { - intBytes[0]=(char)ByteTrie::kFiveByteDeltaLead; + intBytes[0]=(char)BytesTrie::kFiveByteDeltaLead; intBytes[1]=(char)(i>>24); length=4; } diff --git a/icu4c/source/tools/toolutil/bytetriebuilder.h b/icu4c/source/tools/toolutil/bytestriebuilder.h similarity index 76% rename from icu4c/source/tools/toolutil/bytetriebuilder.h rename to icu4c/source/tools/toolutil/bytestriebuilder.h index 563216f6e5d..fc7e54cd0a6 100644 --- a/icu4c/source/tools/toolutil/bytetriebuilder.h +++ b/icu4c/source/tools/toolutil/bytestriebuilder.h @@ -3,42 +3,43 @@ * Copyright (C) 2010-2011, International Business Machines * Corporation and others. All Rights Reserved. ******************************************************************************* -* file name: bytetriebuilder.h +* file name: bytestriebuilder.h * encoding: US-ASCII * tab size: 8 (not used) * indentation:4 * * created on: 2010sep25 * created by: Markus W. Scherer -* -* Builder class for ByteTrie dictionary trie. */ -#ifndef __BYTETRIEBUILDER_H__ -#define __BYTETRIEBUILDER_H__ +#ifndef __BYTESTRIEBUILDER_H__ +#define __BYTESTRIEBUILDER_H__ #include "unicode/utypes.h" #include "unicode/stringpiece.h" -#include "bytetrie.h" +#include "bytestrie.h" #include "charstr.h" -#include "dicttriebuilder.h" +#include "stringtriebuilder.h" U_NAMESPACE_BEGIN -class ByteTrieElement; +class BytesTrieElement; -class U_TOOLUTIL_API ByteTrieBuilder : public DictTrieBuilder { +/** + * Builder class for BytesTrie. + */ +class U_TOOLUTIL_API BytesTrieBuilder : public StringTrieBuilder { public: - ByteTrieBuilder() + BytesTrieBuilder() : elements(NULL), elementsCapacity(0), elementsLength(0), bytes(NULL), bytesCapacity(0), bytesLength(0) {} - virtual ~ByteTrieBuilder(); + virtual ~BytesTrieBuilder(); - ByteTrieBuilder &add(const StringPiece &s, int32_t value, UErrorCode &errorCode); + BytesTrieBuilder &add(const StringPiece &s, int32_t value, UErrorCode &errorCode); - StringPiece build(UDictTrieBuildOption buildOption, UErrorCode &errorCode); + StringPiece build(UStringTrieBuildOption buildOption, UErrorCode &errorCode); - ByteTrieBuilder &clear() { + BytesTrieBuilder &clear() { strings.clear(); elementsLength=0; bytesLength=0; @@ -58,15 +59,15 @@ private: virtual UBool matchNodesCanHaveValues() const { return FALSE; } - virtual int32_t getMaxBranchLinearSubNodeLength() const { return ByteTrie::kMaxBranchLinearSubNodeLength; } - virtual int32_t getMinLinearMatch() const { return ByteTrie::kMinLinearMatch; } - virtual int32_t getMaxLinearMatchLength() const { return ByteTrie::kMaxLinearMatchLength; } + virtual int32_t getMaxBranchLinearSubNodeLength() const { return BytesTrie::kMaxBranchLinearSubNodeLength; } + virtual int32_t getMinLinearMatch() const { return BytesTrie::kMinLinearMatch; } + virtual int32_t getMaxLinearMatchLength() const { return BytesTrie::kMaxLinearMatchLength; } class BTLinearMatchNode : public LinearMatchNode { public: BTLinearMatchNode(const char *units, int32_t len, Node *nextNode); virtual UBool operator==(const Node &other) const; - virtual void write(DictTrieBuilder &builder); + virtual void write(StringTrieBuilder &builder); private: const char *s; }; @@ -83,7 +84,7 @@ private: virtual int32_t writeDeltaTo(int32_t jumpTarget); CharString strings; - ByteTrieElement *elements; + BytesTrieElement *elements; int32_t elementsCapacity; int32_t elementsLength; @@ -96,4 +97,4 @@ private: U_NAMESPACE_END -#endif // __BYTETRIEBUILDER_H__ +#endif // __BYTESTRIEBUILDER_H__ diff --git a/icu4c/source/tools/toolutil/bytetrieiterator.cpp b/icu4c/source/tools/toolutil/bytestrieiterator.cpp similarity index 78% rename from icu4c/source/tools/toolutil/bytetrieiterator.cpp rename to icu4c/source/tools/toolutil/bytestrieiterator.cpp index eb513142cc0..a922aeb8f12 100644 --- a/icu4c/source/tools/toolutil/bytetrieiterator.cpp +++ b/icu4c/source/tools/toolutil/bytestrieiterator.cpp @@ -1,9 +1,9 @@ /* ******************************************************************************* -* Copyright (C) 2010, International Business Machines +* Copyright (C) 2010-2011, International Business Machines * Corporation and others. All Rights Reserved. ******************************************************************************* -* file name: bytetrieiterator.cpp +* file name: bytestrieiterator.cpp * encoding: US-ASCII * tab size: 8 (not used) * indentation:4 @@ -14,22 +14,22 @@ #include "unicode/utypes.h" #include "unicode/stringpiece.h" -#include "bytetrie.h" -#include "bytetrieiterator.h" +#include "bytestrie.h" +#include "bytestrieiterator.h" #include "charstr.h" #include "uvectr32.h" U_NAMESPACE_BEGIN -ByteTrieIterator::ByteTrieIterator(const void *trieBytes, int32_t maxStringLength, - UErrorCode &errorCode) +BytesTrieIterator::BytesTrieIterator(const void *trieBytes, int32_t maxStringLength, + UErrorCode &errorCode) : bytes_(reinterpret_cast(trieBytes)), pos_(bytes_), initialPos_(bytes_), remainingMatchLength_(-1), initialRemainingMatchLength_(-1), maxLength_(maxStringLength), value_(0), stack_(errorCode) {} -ByteTrieIterator::ByteTrieIterator(const ByteTrie &trie, int32_t maxStringLength, - UErrorCode &errorCode) +BytesTrieIterator::BytesTrieIterator(const BytesTrie &trie, int32_t maxStringLength, + UErrorCode &errorCode) : bytes_(trie.bytes_), pos_(trie.pos_), initialPos_(trie.pos_), remainingMatchLength_(trie.remainingMatchLength_), initialRemainingMatchLength_(trie.remainingMatchLength_), @@ -47,7 +47,7 @@ ByteTrieIterator::ByteTrieIterator(const ByteTrie &trie, int32_t maxStringLength } } -ByteTrieIterator &ByteTrieIterator::reset() { +BytesTrieIterator &BytesTrieIterator::reset() { pos_=initialPos_; remainingMatchLength_=initialRemainingMatchLength_; int32_t length=remainingMatchLength_+1; // Remaining match length. @@ -62,7 +62,7 @@ ByteTrieIterator &ByteTrieIterator::reset() { } UBool -ByteTrieIterator::next(UErrorCode &errorCode) { +BytesTrieIterator::next(UErrorCode &errorCode) { if(U_FAILURE(errorCode)) { return FALSE; } @@ -95,14 +95,14 @@ ByteTrieIterator::next(UErrorCode &errorCode) { } for(;;) { int32_t node=*pos++; - if(node>=ByteTrie::kMinValueLead) { + if(node>=BytesTrie::kMinValueLead) { // Deliver value for the byte sequence so far. - UBool isFinal=(UBool)(node&ByteTrie::kValueIsFinal); - value_=ByteTrie::readValue(pos, node>>1); + UBool isFinal=(UBool)(node&BytesTrie::kValueIsFinal); + value_=BytesTrie::readValue(pos, node>>1); if(isFinal || (maxLength_>0 && str_.length()==maxLength_)) { pos_=NULL; } else { - pos_=ByteTrie::skipValue(pos, node); + pos_=BytesTrie::skipValue(pos, node); } sp_.set(str_.data(), str_.length()); return TRUE; @@ -110,7 +110,7 @@ ByteTrieIterator::next(UErrorCode &errorCode) { if(maxLength_>0 && str_.length()==maxLength_) { return truncateAndStop(); } - if(node0 && str_.length()+length>maxLength_) { str_.append(reinterpret_cast(pos), maxLength_-str_.length(), errorCode); @@ -134,23 +134,23 @@ ByteTrieIterator::next(UErrorCode &errorCode) { // Branch node, needs to take the first outbound edge and push state for the rest. const uint8_t * -ByteTrieIterator::branchNext(const uint8_t *pos, int32_t length, UErrorCode &errorCode) { - while(length>ByteTrie::kMaxBranchLinearSubNodeLength) { +BytesTrieIterator::branchNext(const uint8_t *pos, int32_t length, UErrorCode &errorCode) { + while(length>BytesTrie::kMaxBranchLinearSubNodeLength) { ++pos; // ignore the comparison byte // Push state for the greater-or-equal edge. - stack_.addElement((int32_t)(ByteTrie::skipDelta(pos)-bytes_), errorCode); + stack_.addElement((int32_t)(BytesTrie::skipDelta(pos)-bytes_), errorCode); stack_.addElement(((length-(length>>1))<<16)|str_.length(), errorCode); // Follow the less-than edge. length>>=1; - pos=ByteTrie::jumpByDelta(pos); + pos=BytesTrie::jumpByDelta(pos); } // List of key-value pairs where values are either final values or jump deltas. // Read the first (key, value) pair. uint8_t trieByte=*pos++; int32_t node=*pos++; - UBool isFinal=(UBool)(node&ByteTrie::kValueIsFinal); - int32_t value=ByteTrie::readValue(pos, node>>1); - pos=ByteTrie::skipValue(pos, node); + UBool isFinal=(UBool)(node&BytesTrie::kValueIsFinal); + int32_t value=BytesTrie::readValue(pos, node>>1); + pos=BytesTrie::skipValue(pos, node); stack_.addElement((int32_t)(pos-bytes_), errorCode); stack_.addElement(((length-1)<<16)|str_.length(), errorCode); str_.append((char)trieByte, errorCode); diff --git a/icu4c/source/tools/toolutil/bytetrieiterator.h b/icu4c/source/tools/toolutil/bytestrieiterator.h similarity index 82% rename from icu4c/source/tools/toolutil/bytetrieiterator.h rename to icu4c/source/tools/toolutil/bytestrieiterator.h index c985703a886..1af5ee6f249 100644 --- a/icu4c/source/tools/toolutil/bytetrieiterator.h +++ b/icu4c/source/tools/toolutil/bytestrieiterator.h @@ -1,9 +1,9 @@ /* ******************************************************************************* -* Copyright (C) 2010, International Business Machines +* Copyright (C) 2010-2011, International Business Machines * Corporation and others. All Rights Reserved. ******************************************************************************* -* file name: bytetrieiterator.h +* file name: bytestrieiterator.h * encoding: US-ASCII * tab size: 8 (not used) * indentation:4 @@ -12,32 +12,32 @@ * created by: Markus W. Scherer */ -#ifndef __BYTETRIEITERATOR_H__ -#define __BYTETRIEITERATOR_H__ +#ifndef __BYTESTRIEITERATOR_H__ +#define __BYTESTRIEITERATOR_H__ /** * \file - * \brief C++ API: ByteTrie iterator for all of its (byte sequence, value) pairs. + * \brief C++ API: BytesTrie iterator for all of its (byte sequence, value) pairs. */ -// Needed if and when we change the .dat package index to a ByteTrie, +// Needed if and when we change the .dat package index to a BytesTrie, // so that icupkg can work with an input package. #include "unicode/utypes.h" #include "unicode/stringpiece.h" -#include "bytetrie.h" +#include "bytestrie.h" #include "charstr.h" #include "uvectr32.h" U_NAMESPACE_BEGIN /** - * Iterator for all of the (byte sequence, value) pairs in a ByteTrie. + * Iterator for all of the (byte sequence, value) pairs in a BytesTrie. */ -class U_TOOLUTIL_API ByteTrieIterator : public UMemory { +class U_TOOLUTIL_API BytesTrieIterator : public UMemory { public: /** - * Iterates from the root of a byte-serialized ByteTrie. + * Iterates from the root of a byte-serialized BytesTrie. * @param trieBytes The trie bytes. * @param maxStringLength If 0, the iterator returns full strings/byte sequences. * Otherwise, the iterator returns strings with this maximum length. @@ -46,10 +46,10 @@ public: * immediately. Check for U_FAILURE() on output or use with * function chaining. (See User Guide for details.) */ - ByteTrieIterator(const void *trieBytes, int32_t maxStringLength, UErrorCode &errorCode); + BytesTrieIterator(const void *trieBytes, int32_t maxStringLength, UErrorCode &errorCode); /** - * Iterates from the current state of the specified ByteTrie. + * Iterates from the current state of the specified BytesTrie. * @param trie The trie whose state will be copied for iteration. * @param maxStringLength If 0, the iterator returns full strings/byte sequences. * Otherwise, the iterator returns strings with this maximum length. @@ -58,12 +58,12 @@ public: * immediately. Check for U_FAILURE() on output or use with * function chaining. (See User Guide for details.) */ - ByteTrieIterator(const ByteTrie &trie, int32_t maxStringLength, UErrorCode &errorCode); + BytesTrieIterator(const BytesTrie &trie, int32_t maxStringLength, UErrorCode &errorCode); /** * Resets this iterator to its initial state. */ - ByteTrieIterator &reset(); + BytesTrieIterator &reset(); /** * Finds the next (byte sequence, value) pair if there is one. @@ -113,7 +113,7 @@ private: // The stack stores pairs of integers for backtracking to another // outbound edge of a branch node. - // The first integer is an offset from ByteTrie.bytes. + // The first integer is an offset from BytesTrie.bytes. // The second integer has the str.length() from before the node in bits 15..0, // and the remaining branch length in bits 24..16. (Bits 31..25 are unused.) // (We could store the remaining branch length minus 1 in bits 23..16 and not use bits 31..24, @@ -123,4 +123,4 @@ private: U_NAMESPACE_END -#endif // __BYTETRIEITERATOR_H__ +#endif // __BYTESTRIEITERATOR_H__ diff --git a/icu4c/source/tools/toolutil/dicttriebuilder.cpp b/icu4c/source/tools/toolutil/stringtriebuilder.cpp similarity index 85% rename from icu4c/source/tools/toolutil/dicttriebuilder.cpp rename to icu4c/source/tools/toolutil/stringtriebuilder.cpp index 0acd9cc97f6..642a0ba9f26 100644 --- a/icu4c/source/tools/toolutil/dicttriebuilder.cpp +++ b/icu4c/source/tools/toolutil/stringtriebuilder.cpp @@ -3,51 +3,49 @@ * Copyright (C) 2010-2011, International Business Machines * Corporation and others. All Rights Reserved. ******************************************************************************* -* file name: dicttriebuilder.cpp +* file name: stringtriebuilder.cpp * encoding: US-ASCII * tab size: 8 (not used) * indentation:4 * * created on: 2010dec24 * created by: Markus W. Scherer -* -* Base class for dictionary-trie builder classes. */ #include // for 'typeid' to work #include "unicode/utypes.h" -#include "dicttriebuilder.h" +#include "stringtriebuilder.h" #include "uassert.h" #include "uhash.h" U_CDECL_BEGIN static int32_t U_CALLCONV -hashDictTrieNode(const UHashTok key) { - return U_NAMESPACE_QUALIFIER DictTrieBuilder::hashNode(key.pointer); +hashStringTrieNode(const UHashTok key) { + return U_NAMESPACE_QUALIFIER StringTrieBuilder::hashNode(key.pointer); } static UBool U_CALLCONV -equalDictTrieNodes(const UHashTok key1, const UHashTok key2) { - return U_NAMESPACE_QUALIFIER DictTrieBuilder::equalNodes(key1.pointer, key2.pointer); +equalStringTrieNodes(const UHashTok key1, const UHashTok key2) { + return U_NAMESPACE_QUALIFIER StringTrieBuilder::equalNodes(key1.pointer, key2.pointer); } U_CDECL_END U_NAMESPACE_BEGIN -DictTrieBuilder::DictTrieBuilder() : nodes(NULL) {} +StringTrieBuilder::StringTrieBuilder() : nodes(NULL) {} -DictTrieBuilder::~DictTrieBuilder() { +StringTrieBuilder::~StringTrieBuilder() { deleteCompactBuilder(); } void -DictTrieBuilder::createCompactBuilder(int32_t sizeGuess, UErrorCode &errorCode) { +StringTrieBuilder::createCompactBuilder(int32_t sizeGuess, UErrorCode &errorCode) { if(U_FAILURE(errorCode)) { return; } - nodes=uhash_openSize(hashDictTrieNode, equalDictTrieNodes, NULL, + nodes=uhash_openSize(hashStringTrieNode, equalStringTrieNodes, NULL, sizeGuess, &errorCode); if(U_SUCCESS(errorCode) && nodes==NULL) { errorCode=U_MEMORY_ALLOCATION_ERROR; @@ -58,17 +56,17 @@ DictTrieBuilder::createCompactBuilder(int32_t sizeGuess, UErrorCode &errorCode) } void -DictTrieBuilder::deleteCompactBuilder() { +StringTrieBuilder::deleteCompactBuilder() { uhash_close(nodes); nodes=NULL; } void -DictTrieBuilder::build(UDictTrieBuildOption buildOption, int32_t elementsLength, +StringTrieBuilder::build(UStringTrieBuildOption buildOption, int32_t elementsLength, UErrorCode &errorCode) { - if(buildOption==UDICTTRIE_BUILD_FAST) { + if(buildOption==USTRINGTRIE_BUILD_FAST) { writeNode(0, elementsLength, 0); - } else /* UDICTTRIE_BUILD_SMALL */ { + } else /* USTRINGTRIE_BUILD_SMALL */ { createCompactBuilder(2*elementsLength, errorCode); Node *root=makeNode(0, elementsLength, 0, errorCode); if(U_SUCCESS(errorCode)) { @@ -83,7 +81,7 @@ DictTrieBuilder::build(UDictTrieBuildOption buildOption, int32_t elementsLength, // and all strings of the [start..limit[ elements must be sorted and // have a common prefix of length unitIndex. int32_t -DictTrieBuilder::writeNode(int32_t start, int32_t limit, int32_t unitIndex) { +StringTrieBuilder::writeNode(int32_t start, int32_t limit, int32_t unitIndex) { UBool hasValue=FALSE; int32_t value=0; int32_t type; @@ -131,7 +129,7 @@ DictTrieBuilder::writeNode(int32_t start, int32_t limit, int32_t unitIndex) { // starthashCode(); } UBool -DictTrieBuilder::equalNodes(const void *left, const void *right) { +StringTrieBuilder::equalNodes(const void *left, const void *right) { return *(const Node *)left==*(const Node *)right; } -UOBJECT_DEFINE_NO_RTTI_IMPLEMENTATION(DictTrieBuilder) +UOBJECT_DEFINE_NO_RTTI_IMPLEMENTATION(StringTrieBuilder) UBool -DictTrieBuilder::Node::operator==(const Node &other) const { +StringTrieBuilder::Node::operator==(const Node &other) const { return this==&other || (typeid(*this)==typeid(other) && hash==other.hash); } int32_t -DictTrieBuilder::Node::markRightEdgesFirst(int32_t edgeNumber) { +StringTrieBuilder::Node::markRightEdgesFirst(int32_t edgeNumber) { if(offset==0) { offset=edgeNumber; } return edgeNumber; } -UOBJECT_DEFINE_NO_RTTI_IMPLEMENTATION(DictTrieBuilder::Node) +UOBJECT_DEFINE_NO_RTTI_IMPLEMENTATION(StringTrieBuilder::Node) UBool -DictTrieBuilder::FinalValueNode::operator==(const Node &other) const { +StringTrieBuilder::FinalValueNode::operator==(const Node &other) const { if(this==&other) { return TRUE; } @@ -412,12 +410,12 @@ DictTrieBuilder::FinalValueNode::operator==(const Node &other) const { } void -DictTrieBuilder::FinalValueNode::write(DictTrieBuilder &builder) { +StringTrieBuilder::FinalValueNode::write(StringTrieBuilder &builder) { offset=builder.writeValueAndFinal(value, TRUE); } UBool -DictTrieBuilder::ValueNode::operator==(const Node &other) const { +StringTrieBuilder::ValueNode::operator==(const Node &other) const { if(this==&other) { return TRUE; } @@ -429,7 +427,7 @@ DictTrieBuilder::ValueNode::operator==(const Node &other) const { } UBool -DictTrieBuilder::IntermediateValueNode::operator==(const Node &other) const { +StringTrieBuilder::IntermediateValueNode::operator==(const Node &other) const { if(this==&other) { return TRUE; } @@ -441,7 +439,7 @@ DictTrieBuilder::IntermediateValueNode::operator==(const Node &other) const { } int32_t -DictTrieBuilder::IntermediateValueNode::markRightEdgesFirst(int32_t edgeNumber) { +StringTrieBuilder::IntermediateValueNode::markRightEdgesFirst(int32_t edgeNumber) { if(offset==0) { offset=edgeNumber=next->markRightEdgesFirst(edgeNumber); } @@ -449,13 +447,13 @@ DictTrieBuilder::IntermediateValueNode::markRightEdgesFirst(int32_t edgeNumber) } void -DictTrieBuilder::IntermediateValueNode::write(DictTrieBuilder &builder) { +StringTrieBuilder::IntermediateValueNode::write(StringTrieBuilder &builder) { next->write(builder); offset=builder.writeValueAndFinal(value, FALSE); } UBool -DictTrieBuilder::LinearMatchNode::operator==(const Node &other) const { +StringTrieBuilder::LinearMatchNode::operator==(const Node &other) const { if(this==&other) { return TRUE; } @@ -467,7 +465,7 @@ DictTrieBuilder::LinearMatchNode::operator==(const Node &other) const { } int32_t -DictTrieBuilder::LinearMatchNode::markRightEdgesFirst(int32_t edgeNumber) { +StringTrieBuilder::LinearMatchNode::markRightEdgesFirst(int32_t edgeNumber) { if(offset==0) { offset=edgeNumber=next->markRightEdgesFirst(edgeNumber); } @@ -475,7 +473,7 @@ DictTrieBuilder::LinearMatchNode::markRightEdgesFirst(int32_t edgeNumber) { } UBool -DictTrieBuilder::ListBranchNode::operator==(const Node &other) const { +StringTrieBuilder::ListBranchNode::operator==(const Node &other) const { if(this==&other) { return TRUE; } @@ -492,7 +490,7 @@ DictTrieBuilder::ListBranchNode::operator==(const Node &other) const { } int32_t -DictTrieBuilder::ListBranchNode::markRightEdgesFirst(int32_t edgeNumber) { +StringTrieBuilder::ListBranchNode::markRightEdgesFirst(int32_t edgeNumber) { if(offset==0) { firstEdgeNumber=edgeNumber; int32_t step=0; @@ -511,7 +509,7 @@ DictTrieBuilder::ListBranchNode::markRightEdgesFirst(int32_t edgeNumber) { } void -DictTrieBuilder::ListBranchNode::write(DictTrieBuilder &builder) { +StringTrieBuilder::ListBranchNode::write(StringTrieBuilder &builder) { // Write the sub-nodes in reverse order: The jump lengths are deltas from // after their own positions, so if we wrote the minUnit sub-node first, // then its jump delta would be larger. @@ -554,7 +552,7 @@ DictTrieBuilder::ListBranchNode::write(DictTrieBuilder &builder) { } UBool -DictTrieBuilder::SplitBranchNode::operator==(const Node &other) const { +StringTrieBuilder::SplitBranchNode::operator==(const Node &other) const { if(this==&other) { return TRUE; } @@ -566,7 +564,7 @@ DictTrieBuilder::SplitBranchNode::operator==(const Node &other) const { } int32_t -DictTrieBuilder::SplitBranchNode::markRightEdgesFirst(int32_t edgeNumber) { +StringTrieBuilder::SplitBranchNode::markRightEdgesFirst(int32_t edgeNumber) { if(offset==0) { firstEdgeNumber=edgeNumber; edgeNumber=greaterOrEqual->markRightEdgesFirst(edgeNumber); @@ -576,7 +574,7 @@ DictTrieBuilder::SplitBranchNode::markRightEdgesFirst(int32_t edgeNumber) { } void -DictTrieBuilder::SplitBranchNode::write(DictTrieBuilder &builder) { +StringTrieBuilder::SplitBranchNode::write(StringTrieBuilder &builder) { // Encode the less-than branch first. lessThan->writeUnlessInsideRightEdge(firstEdgeNumber, greaterOrEqual->getOffset(), builder); // Encode the greater-or-equal branch last because we do not jump for it at all. @@ -588,7 +586,7 @@ DictTrieBuilder::SplitBranchNode::write(DictTrieBuilder &builder) { } UBool -DictTrieBuilder::BranchHeadNode::operator==(const Node &other) const { +StringTrieBuilder::BranchHeadNode::operator==(const Node &other) const { if(this==&other) { return TRUE; } @@ -600,7 +598,7 @@ DictTrieBuilder::BranchHeadNode::operator==(const Node &other) const { } int32_t -DictTrieBuilder::BranchHeadNode::markRightEdgesFirst(int32_t edgeNumber) { +StringTrieBuilder::BranchHeadNode::markRightEdgesFirst(int32_t edgeNumber) { if(offset==0) { offset=edgeNumber=next->markRightEdgesFirst(edgeNumber); } @@ -608,7 +606,7 @@ DictTrieBuilder::BranchHeadNode::markRightEdgesFirst(int32_t edgeNumber) { } void -DictTrieBuilder::BranchHeadNode::write(DictTrieBuilder &builder) { +StringTrieBuilder::BranchHeadNode::write(StringTrieBuilder &builder) { next->write(builder); if(length<=builder.getMinLinearMatch()) { offset=builder.writeValueAndType(hasValue, value, length-1); diff --git a/icu4c/source/tools/toolutil/dicttriebuilder.h b/icu4c/source/tools/toolutil/stringtriebuilder.h similarity index 93% rename from icu4c/source/tools/toolutil/dicttriebuilder.h rename to icu4c/source/tools/toolutil/stringtriebuilder.h index a67ee99d5cb..b26d5777292 100644 --- a/icu4c/source/tools/toolutil/dicttriebuilder.h +++ b/icu4c/source/tools/toolutil/stringtriebuilder.h @@ -3,32 +3,33 @@ * Copyright (C) 2010-2011, International Business Machines * Corporation and others. All Rights Reserved. ******************************************************************************* -* file name: dicttriebuilder.h +* file name: stringtriebuilder.h * encoding: US-ASCII * tab size: 8 (not used) * indentation:4 * * created on: 2010dec24 * created by: Markus W. Scherer -* -* Base class for dictionary-trie builder classes. */ -#ifndef __DICTTRIEBUILDER_H__ -#define __DICTTRIEBUILDER_H__ +#ifndef __STRINGTRIEBUILDER_H__ +#define __STRINGTRIEBUILDER_H__ #include "unicode/utypes.h" #include "unicode/uobject.h" #include "uhash.h" -enum UDictTrieBuildOption { - UDICTTRIE_BUILD_FAST, - UDICTTRIE_BUILD_SMALL +enum UStringTrieBuildOption { + USTRINGTRIE_BUILD_FAST, + USTRINGTRIE_BUILD_SMALL }; U_NAMESPACE_BEGIN -class U_TOOLUTIL_API DictTrieBuilder : public UObject { +/** + * Base class for string trie builder classes. + */ +class U_TOOLUTIL_API StringTrieBuilder : public UObject { public: /** @internal */ static UBool hashNode(const void *node); @@ -36,13 +37,13 @@ public: static UBool equalNodes(const void *left, const void *right); protected: - DictTrieBuilder(); - virtual ~DictTrieBuilder(); + StringTrieBuilder(); + virtual ~StringTrieBuilder(); void createCompactBuilder(int32_t sizeGuess, UErrorCode &errorCode); void deleteCompactBuilder(); - void build(UDictTrieBuildOption buildOption, int32_t elementsLength, UErrorCode &errorCode); + void build(UStringTrieBuildOption buildOption, int32_t elementsLength, UErrorCode &errorCode); int32_t writeNode(int32_t start, int32_t limit, int32_t byteIndex); int32_t writeBranchSubNode(int32_t start, int32_t limit, int32_t byteIndex, int32_t length); @@ -157,10 +158,10 @@ protected: */ virtual int32_t markRightEdgesFirst(int32_t edgeNumber); // write() must set the offset to a positive value. - virtual void write(DictTrieBuilder &builder) = 0; + virtual void write(StringTrieBuilder &builder) = 0; // See markRightEdgesFirst. inline void writeUnlessInsideRightEdge(int32_t firstRight, int32_t lastRight, - DictTrieBuilder &builder) { + StringTrieBuilder &builder) { // Note: Edge numbers are negative, lastRight<=firstRight. // If offset>0 then this node and its sub-nodes have been written already // and we need not write them again. @@ -189,7 +190,7 @@ protected: public: FinalValueNode(int32_t v) : Node(0x111111*37+v), value(v) {} virtual UBool operator==(const Node &other) const; - virtual void write(DictTrieBuilder &builder); + virtual void write(StringTrieBuilder &builder); protected: int32_t value; }; @@ -214,7 +215,7 @@ protected: : ValueNode(0x222222*37+hashCode(nextNode)), next(nextNode) { setValue(v); } virtual UBool operator==(const Node &other) const; virtual int32_t markRightEdgesFirst(int32_t edgeNumber); - virtual void write(DictTrieBuilder &builder); + virtual void write(StringTrieBuilder &builder); protected: Node *next; }; @@ -243,7 +244,7 @@ protected: ListBranchNode() : BranchNode(0x444444), length(0) {} virtual UBool operator==(const Node &other) const; virtual int32_t markRightEdgesFirst(int32_t edgeNumber); - virtual void write(DictTrieBuilder &builder); + virtual void write(StringTrieBuilder &builder); // Adds a unit with a final value. void add(int32_t c, int32_t value) { units[length]=(UChar)c; @@ -275,7 +276,7 @@ protected: unit(middleUnit), lessThan(lessThanNode), greaterOrEqual(greaterOrEqualNode) {} virtual UBool operator==(const Node &other) const; virtual int32_t markRightEdgesFirst(int32_t edgeNumber); - virtual void write(DictTrieBuilder &builder); + virtual void write(StringTrieBuilder &builder); protected: UChar unit; Node *lessThan; @@ -290,7 +291,7 @@ protected: length(len), next(subNode) {} virtual UBool operator==(const Node &other) const; virtual int32_t markRightEdgesFirst(int32_t edgeNumber); - virtual void write(DictTrieBuilder &builder); + virtual void write(StringTrieBuilder &builder); protected: int32_t length; Node *next; // A branch sub-node. @@ -312,4 +313,4 @@ private: U_NAMESPACE_END -#endif // __DICTTRIEBUILDER_H__ +#endif // __STRINGTRIEBUILDER_H__ diff --git a/icu4c/source/tools/toolutil/toolutil.vcxproj b/icu4c/source/tools/toolutil/toolutil.vcxproj index 370b73f3040..5be0199d3cc 100644 --- a/icu4c/source/tools/toolutil/toolutil.vcxproj +++ b/icu4c/source/tools/toolutil/toolutil.vcxproj @@ -246,10 +246,9 @@ - - + + - @@ -268,6 +267,7 @@ + false @@ -276,9 +276,9 @@ false - - - + + + @@ -296,10 +296,9 @@ - - + + - @@ -309,12 +308,13 @@ + - - - + + + diff --git a/icu4c/source/tools/toolutil/uchartrie.cpp b/icu4c/source/tools/toolutil/ucharstrie.cpp similarity index 83% rename from icu4c/source/tools/toolutil/uchartrie.cpp rename to icu4c/source/tools/toolutil/ucharstrie.cpp index 913436687aa..44a7e8af0d9 100644 --- a/icu4c/source/tools/toolutil/uchartrie.cpp +++ b/icu4c/source/tools/toolutil/ucharstrie.cpp @@ -1,9 +1,9 @@ /* ******************************************************************************* -* Copyright (C) 2010, International Business Machines +* Copyright (C) 2010-2011, International Business Machines * Corporation and others. All Rights Reserved. ******************************************************************************* -* file name: uchartrie.h +* file name: ucharstrie.h * encoding: US-ASCII * tab size: 8 (not used) * indentation:4 @@ -15,7 +15,7 @@ #include "unicode/utypes.h" #include "unicode/uobject.h" #include "uassert.h" -#include "uchartrie.h" +#include "ucharstrie.h" U_NAMESPACE_BEGIN @@ -48,20 +48,20 @@ Appendable::append(const UChar *s, int32_t length) { UOBJECT_DEFINE_NO_RTTI_IMPLEMENTATION(Appendable) -UDictTrieResult -UCharTrie::current() const { +UStringTrieResult +UCharsTrie::current() const { const UChar *pos=pos_; if(pos==NULL) { - return UDICTTRIE_NO_MATCH; + return USTRINGTRIE_NO_MATCH; } else { int32_t node; return (remainingMatchLength_<0 && (node=*pos)>=kMinValueLead) ? - valueResult(node) : UDICTTRIE_NO_VALUE; + valueResult(node) : USTRINGTRIE_NO_VALUE; } } -UDictTrieResult -UCharTrie::branchNext(const UChar *pos, int32_t length, int32_t uchar) { +UStringTrieResult +UCharsTrie::branchNext(const UChar *pos, int32_t length, int32_t uchar) { // Branch according to the current unit. if(length==0) { length=*pos++; @@ -83,11 +83,11 @@ UCharTrie::branchNext(const UChar *pos, int32_t length, int32_t uchar) { // and divides length by 2. do { if(uchar==*pos++) { - UDictTrieResult result; + UStringTrieResult result; int32_t node=*pos; if(node&kValueIsFinal) { // Leave the final value for getValue() to read. - result=UDICTTRIE_HAS_FINAL_VALUE; + result=USTRINGTRIE_FINAL_VALUE; } else { // Use the non-final value as the jump delta. ++pos; @@ -104,7 +104,7 @@ UCharTrie::branchNext(const UChar *pos, int32_t length, int32_t uchar) { // end readValue() pos+=delta; node=*pos; - result= node>=kMinValueLead ? valueResult(node) : UDICTTRIE_NO_VALUE; + result= node>=kMinValueLead ? valueResult(node) : USTRINGTRIE_NO_VALUE; } pos_=pos; return result; @@ -115,15 +115,15 @@ UCharTrie::branchNext(const UChar *pos, int32_t length, int32_t uchar) { if(uchar==*pos++) { pos_=pos; int32_t node=*pos; - return node>=kMinValueLead ? valueResult(node) : UDICTTRIE_NO_VALUE; + return node>=kMinValueLead ? valueResult(node) : USTRINGTRIE_NO_VALUE; } else { stop(); - return UDICTTRIE_NO_MATCH; + return USTRINGTRIE_NO_MATCH; } } -UDictTrieResult -UCharTrie::nextImpl(const UChar *pos, int32_t uchar) { +UStringTrieResult +UCharsTrie::nextImpl(const UChar *pos, int32_t uchar) { int32_t node=*pos++; for(;;) { if(node=kMinValueLead) ? - valueResult(node) : UDICTTRIE_NO_VALUE; + valueResult(node) : USTRINGTRIE_NO_VALUE; } else { // No match. break; @@ -150,14 +150,14 @@ UCharTrie::nextImpl(const UChar *pos, int32_t uchar) { } } stop(); - return UDICTTRIE_NO_MATCH; + return USTRINGTRIE_NO_MATCH; } -UDictTrieResult -UCharTrie::next(int32_t uchar) { +UStringTrieResult +UCharsTrie::next(int32_t uchar) { const UChar *pos=pos_; if(pos==NULL) { - return UDICTTRIE_NO_MATCH; + return USTRINGTRIE_NO_MATCH; } int32_t length=remainingMatchLength_; // Actual remaining match length minus 1. if(length>=0) { @@ -167,24 +167,24 @@ UCharTrie::next(int32_t uchar) { pos_=pos; int32_t node; return (length<0 && (node=*pos)>=kMinValueLead) ? - valueResult(node) : UDICTTRIE_NO_VALUE; + valueResult(node) : USTRINGTRIE_NO_VALUE; } else { stop(); - return UDICTTRIE_NO_MATCH; + return USTRINGTRIE_NO_MATCH; } } return nextImpl(pos, uchar); } -UDictTrieResult -UCharTrie::next(const UChar *s, int32_t sLength) { +UStringTrieResult +UCharsTrie::next(const UChar *s, int32_t sLength) { if(sLength<0 ? *s==0 : sLength==0) { // Empty input. return current(); } const UChar *pos=pos_; if(pos==NULL) { - return UDICTTRIE_NO_MATCH; + return USTRINGTRIE_NO_MATCH; } int32_t length=remainingMatchLength_; // Actual remaining match length minus 1. for(;;) { @@ -198,7 +198,7 @@ UCharTrie::next(const UChar *s, int32_t sLength) { pos_=pos; int32_t node; return (length<0 && (node=*pos)>=kMinValueLead) ? - valueResult(node) : UDICTTRIE_NO_VALUE; + valueResult(node) : USTRINGTRIE_NO_VALUE; } if(length<0) { remainingMatchLength_=length; @@ -206,7 +206,7 @@ UCharTrie::next(const UChar *s, int32_t sLength) { } if(uchar!=*pos) { stop(); - return UDICTTRIE_NO_MATCH; + return USTRINGTRIE_NO_MATCH; } ++pos; --length; @@ -218,7 +218,7 @@ UCharTrie::next(const UChar *s, int32_t sLength) { pos_=pos; int32_t node; return (length<0 && (node=*pos)>=kMinValueLead) ? - valueResult(node) : UDICTTRIE_NO_VALUE; + valueResult(node) : USTRINGTRIE_NO_VALUE; } uchar=*s++; --sLength; @@ -228,7 +228,7 @@ UCharTrie::next(const UChar *s, int32_t sLength) { } if(uchar!=*pos) { stop(); - return UDICTTRIE_NO_MATCH; + return USTRINGTRIE_NO_MATCH; } ++pos; --length; @@ -237,9 +237,9 @@ UCharTrie::next(const UChar *s, int32_t sLength) { int32_t node=*pos++; for(;;) { if(nodekMaxBranchLinearSubNodeLength) { ++pos; // ignore the comparison byte if(NULL==findUniqueValueFromBranch(jumpByDelta(pos), length>>1, haveUniqueValue, uniqueValue)) { @@ -322,7 +322,7 @@ UCharTrie::findUniqueValueFromBranch(const UChar *pos, int32_t length, } UBool -UCharTrie::findUniqueValue(const UChar *pos, UBool haveUniqueValue, int32_t &uniqueValue) { +UCharsTrie::findUniqueValue(const UChar *pos, UBool haveUniqueValue, int32_t &uniqueValue) { int32_t node=*pos++; for(;;) { if(nodekMaxBranchLinearSubNodeLength) { ++pos; // ignore the comparison unit getNextBranchUChars(jumpByDelta(pos), length>>1, out); diff --git a/icu4c/source/tools/toolutil/uchartrie.h b/icu4c/source/tools/toolutil/ucharstrie.h similarity index 86% rename from icu4c/source/tools/toolutil/uchartrie.h rename to icu4c/source/tools/toolutil/ucharstrie.h index 6bbff16e729..c6a4b681d13 100644 --- a/icu4c/source/tools/toolutil/uchartrie.h +++ b/icu4c/source/tools/toolutil/ucharstrie.h @@ -1,9 +1,9 @@ /* ******************************************************************************* -* Copyright (C) 2010, International Business Machines +* Copyright (C) 2010-2011, International Business Machines * Corporation and others. All Rights Reserved. ******************************************************************************* -* file name: uchartrie.h +* file name: ucharstrie.h * encoding: US-ASCII * tab size: 8 (not used) * indentation:4 @@ -12,24 +12,24 @@ * created by: Markus W. Scherer */ -#ifndef __UCHARTRIE_H__ -#define __UCHARTRIE_H__ +#ifndef __UCHARSTRIE_H__ +#define __UCHARSTRIE_H__ /** * \file - * \brief C++ API: Dictionary trie for mapping Unicode strings (or 16-bit-unit sequences) + * \brief C++ API: Trie for mapping Unicode strings (or 16-bit-unit sequences) * to integer values. */ #include "unicode/utypes.h" #include "unicode/uobject.h" #include "uassert.h" -#include "udicttrie.h" +#include "ustringtrie.h" U_NAMESPACE_BEGIN -class UCharTrieBuilder; -class UCharTrieIterator; +class UCharsTrieBuilder; +class UCharsTrieIterator; /** * Base class for objects to which Unicode characters and strings can be appended. @@ -67,34 +67,34 @@ private: }; /** - * Light-weight, non-const reader class for a UCharTrie. + * Light-weight, non-const reader class for a UCharsTrie. * Traverses a UChar-serialized data structure with minimal state, * for mapping strings (16-bit-unit sequences) to non-negative integer values. */ -class U_TOOLUTIL_API UCharTrie : public UMemory { +class U_TOOLUTIL_API UCharsTrie : public UMemory { public: - UCharTrie(const UChar *trieUChars) + UCharsTrie(const UChar *trieUChars) : uchars_(trieUChars), pos_(uchars_), remainingMatchLength_(-1) {} /** * Resets this trie to its initial state. */ - UCharTrie &reset() { + UCharsTrie &reset() { pos_=uchars_; remainingMatchLength_=-1; return *this; } /** - * UCharTrie state object, for saving a trie's current state + * UCharsTrie state object, for saving a trie's current state * and resetting the trie back to this state later. */ class State : public UMemory { public: State() { uchars=NULL; } private: - friend class UCharTrie; + friend class UCharsTrie; const UChar *uchars; const UChar *pos; @@ -105,7 +105,7 @@ public: * Saves the state of this trie. * @see resetToState */ - const UCharTrie &saveState(State &state) const { + const UCharsTrie &saveState(State &state) const { state.uchars=uchars_; state.pos=pos_; state.remainingMatchLength=remainingMatchLength_; @@ -119,7 +119,7 @@ public: * @see saveState * @see reset */ - UCharTrie &resetToState(const State &state) { + UCharsTrie &resetToState(const State &state) { if(uchars_==state.uchars && uchars_!=NULL) { pos_=state.pos; remainingMatchLength_=state.remainingMatchLength; @@ -132,14 +132,14 @@ public: * and whether another input UChar can continue a matching string. * @return The match/value Result. */ - UDictTrieResult current() const; + UStringTrieResult current() const; /** * Traverses the trie from the initial state for this input UChar. * Equivalent to reset().next(uchar). * @return The match/value Result. */ - inline UDictTrieResult first(int32_t uchar) { + inline UStringTrieResult first(int32_t uchar) { remainingMatchLength_=-1; return nextImpl(uchars_, uchar); } @@ -150,31 +150,31 @@ public: * Equivalent to reset().nextForCodePoint(cp). * @return The match/value Result. */ - inline UDictTrieResult firstForCodePoint(UChar32 cp) { + inline UStringTrieResult firstForCodePoint(UChar32 cp) { return cp<=0xffff ? first(cp) : - (first(U16_LEAD(cp))!=UDICTTRIE_NO_MATCH ? + (USTRINGTRIE_HAS_NEXT(first(U16_LEAD(cp))) ? next(U16_TRAIL(cp)) : - UDICTTRIE_NO_MATCH); + USTRINGTRIE_NO_MATCH); } /** * Traverses the trie from the current state for this input UChar. * @return The match/value Result. */ - UDictTrieResult next(int32_t uchar); + UStringTrieResult next(int32_t uchar); /** * Traverses the trie from the current state for the * one or two UTF-16 code units for this input code point. * @return The match/value Result. */ - inline UDictTrieResult nextForCodePoint(UChar32 cp) { + inline UStringTrieResult nextForCodePoint(UChar32 cp) { return cp<=0xffff ? next(cp) : - (next(U16_LEAD(cp))!=UDICTTRIE_NO_MATCH ? + (USTRINGTRIE_HAS_NEXT(next(U16_LEAD(cp))) ? next(U16_TRAIL(cp)) : - UDICTTRIE_NO_MATCH); + USTRINGTRIE_NO_MATCH); } /** @@ -183,19 +183,20 @@ public: * \code * Result result=current(); * for(each c in s) - * if((result=next(c))==UDICTTRIE_NO_MATCH) return UDICTTRIE_NO_MATCH; + * if(!USTRINGTRIE_HAS_NEXT(result)) return USTRINGTRIE_NO_MATCH; + * result=next(c); * return result; * \endcode * @return The match/value Result. */ - UDictTrieResult next(const UChar *s, int32_t length); + UStringTrieResult next(const UChar *s, int32_t length); /** * Returns a matching string's value if called immediately after - * current()/first()/next() returned UDICTTRIE_HAS_VALUE or UDICTTRIE_HAS_FINAL_VALUE. + * current()/first()/next() returned USTRINGTRIE_INTERMEDIATE_VALUE or USTRINGTRIE_FINAL_VALUE. * getValue() can be called multiple times. * - * Do not call getValue() after UDICTTRIE_NO_MATCH or UDICTTRIE_NO_VALUE! + * Do not call getValue() after USTRINGTRIE_NO_MATCH or USTRINGTRIE_NO_VALUE! */ inline int32_t getValue() const { const UChar *pos=pos_; @@ -221,7 +222,7 @@ public: /** * Finds each UChar which continues the string from the current state. - * That is, each UChar c for which it would be next(c)!=UDICTTRIE_NO_MATCH now. + * That is, each UChar c for which it would be next(c)!=USTRINGTRIE_NO_MATCH now. * @param out Each next UChar is appended to this object. * (Only uses the out.append(c) method.) * @return the number of UChars which continue the string from here @@ -229,8 +230,8 @@ public: int32_t getNextUChars(Appendable &out) const; private: - friend class UCharTrieBuilder; - friend class UCharTrieIterator; + friend class UCharsTrieBuilder; + friend class UCharsTrieIterator; inline void stop() { pos_=NULL; @@ -313,15 +314,15 @@ private: return pos; } - static inline UDictTrieResult valueResult(int32_t node) { - return (UDictTrieResult)(UDICTTRIE_HAS_VALUE-(node>>15)); + static inline UStringTrieResult valueResult(int32_t node) { + return (UStringTrieResult)(USTRINGTRIE_INTERMEDIATE_VALUE-(node>>15)); } // Handles a branch node for both next(uchar) and next(string). - UDictTrieResult branchNext(const UChar *pos, int32_t length, int32_t uchar); + UStringTrieResult branchNext(const UChar *pos, int32_t length, int32_t uchar); // Requires remainingLength_<0. - UDictTrieResult nextImpl(const UChar *pos, int32_t uchar); + UStringTrieResult nextImpl(const UChar *pos, int32_t uchar); // Helper functions for hasUniqueValue(). // Recursively finds a unique value (or whether there is not a unique one) @@ -336,7 +337,7 @@ private: // getNextUChars() when pos is on a branch node. static void getNextBranchUChars(const UChar *pos, int32_t length, Appendable &out); - // UCharTrie data structure + // UCharsTrie data structure // // The trie consists of a series of UChar-serialized nodes for incremental // Unicode string/UChar sequence matching. (UChar=16-bit unsigned integer) @@ -417,7 +418,7 @@ private: static const int32_t kMaxTwoUnitDelta=((kThreeUnitDeltaLead-kMinTwoUnitDeltaLead)<<16)-1; // 0x03feffff - // Fixed value referencing the UCharTrie words. + // Fixed value referencing the UCharsTrie words. const UChar *uchars_; // Iterator variables. @@ -430,4 +431,4 @@ private: U_NAMESPACE_END -#endif // __UCHARTRIE_H__ +#endif // __UCHARSTRIE_H__ diff --git a/icu4c/source/tools/toolutil/uchartriebuilder.cpp b/icu4c/source/tools/toolutil/ucharstriebuilder.cpp similarity index 71% rename from icu4c/source/tools/toolutil/uchartriebuilder.cpp rename to icu4c/source/tools/toolutil/ucharstriebuilder.cpp index 3ff64efc709..155c276569d 100644 --- a/icu4c/source/tools/toolutil/uchartriebuilder.cpp +++ b/icu4c/source/tools/toolutil/ucharstriebuilder.cpp @@ -3,15 +3,13 @@ * Copyright (C) 2010-2011, International Business Machines * Corporation and others. All Rights Reserved. ******************************************************************************* -* file name: uchartriebuilder.h +* file name: ucharstriebuilder.h * encoding: US-ASCII * tab size: 8 (not used) * indentation:4 * * created on: 2010nov14 * created by: Markus W. Scherer -* -* Builder class for UCharTrie dictionary trie. */ #include "unicode/utypes.h" @@ -19,18 +17,18 @@ #include "unicode/ustring.h" #include "cmemory.h" #include "uarrsort.h" -#include "uchartrie.h" -#include "uchartriebuilder.h" +#include "ucharstrie.h" +#include "ucharstriebuilder.h" U_NAMESPACE_BEGIN /* * Note: This builder implementation stores (string, value) pairs with full copies - * of the 16-bit-unit sequences, until the UCharTrie is built. + * of the 16-bit-unit sequences, until the UCharsTrie is built. * It might(!) take less memory if we collected the data in a temporary, dynamic trie. */ -class UCharTrieElement : public UMemory { +class UCharsTrieElement : public UMemory { public: // Use compiler's default constructor, initializes nothing. @@ -50,7 +48,7 @@ public: int32_t getValue() const { return value; } - int32_t compareStringTo(const UCharTrieElement &o, const UnicodeString &strings) const; + int32_t compareStringTo(const UCharsTrieElement &o, const UnicodeString &strings) const; private: // The first strings unit contains the string length. @@ -60,8 +58,8 @@ private: }; void -UCharTrieElement::setTo(const UnicodeString &s, int32_t val, - UnicodeString &strings, UErrorCode &errorCode) { +UCharsTrieElement::setTo(const UnicodeString &s, int32_t val, + UnicodeString &strings, UErrorCode &errorCode) { if(U_FAILURE(errorCode)) { return; } @@ -78,17 +76,17 @@ UCharTrieElement::setTo(const UnicodeString &s, int32_t val, } int32_t -UCharTrieElement::compareStringTo(const UCharTrieElement &other, const UnicodeString &strings) const { +UCharsTrieElement::compareStringTo(const UCharsTrieElement &other, const UnicodeString &strings) const { return getString(strings).compare(other.getString(strings)); } -UCharTrieBuilder::~UCharTrieBuilder() { +UCharsTrieBuilder::~UCharsTrieBuilder() { delete[] elements; uprv_free(uchars); } -UCharTrieBuilder & -UCharTrieBuilder::add(const UnicodeString &s, int32_t value, UErrorCode &errorCode) { +UCharsTrieBuilder & +UCharsTrieBuilder::add(const UnicodeString &s, int32_t value, UErrorCode &errorCode) { if(U_FAILURE(errorCode)) { return *this; } @@ -105,12 +103,12 @@ UCharTrieBuilder::add(const UnicodeString &s, int32_t value, UErrorCode &errorCo } else { newCapacity=4*elementsCapacity; } - UCharTrieElement *newElements=new UCharTrieElement[newCapacity]; + UCharsTrieElement *newElements=new UCharsTrieElement[newCapacity]; if(newElements==NULL) { errorCode=U_MEMORY_ALLOCATION_ERROR; } if(elementsLength>0) { - uprv_memcpy(newElements, elements, elementsLength*sizeof(UCharTrieElement)); + uprv_memcpy(newElements, elements, elementsLength*sizeof(UCharsTrieElement)); } delete[] elements; elements=newElements; @@ -128,15 +126,15 @@ U_CDECL_BEGIN static int32_t U_CALLCONV compareElementStrings(const void *context, const void *left, const void *right) { const UnicodeString *strings=reinterpret_cast(context); - const UCharTrieElement *leftElement=reinterpret_cast(left); - const UCharTrieElement *rightElement=reinterpret_cast(right); + const UCharsTrieElement *leftElement=reinterpret_cast(left); + const UCharsTrieElement *rightElement=reinterpret_cast(right); return leftElement->compareStringTo(*rightElement, *strings); } U_CDECL_END UnicodeString & -UCharTrieBuilder::build(UDictTrieBuildOption buildOption, UnicodeString &result, UErrorCode &errorCode) { +UCharsTrieBuilder::build(UStringTrieBuildOption buildOption, UnicodeString &result, UErrorCode &errorCode) { if(U_FAILURE(errorCode)) { return result; } @@ -153,7 +151,7 @@ UCharTrieBuilder::build(UDictTrieBuildOption buildOption, UnicodeString &result, errorCode=U_MEMORY_ALLOCATION_ERROR; return result; } - uprv_sortArray(elements, elementsLength, (int32_t)sizeof(UCharTrieElement), + uprv_sortArray(elements, elementsLength, (int32_t)sizeof(UCharsTrieElement), compareElementStrings, &strings, FALSE, // need not be a stable sort &errorCode); @@ -179,7 +177,7 @@ UCharTrieBuilder::build(UDictTrieBuildOption buildOption, UnicodeString &result, errorCode=U_MEMORY_ALLOCATION_ERROR; return result; } - DictTrieBuilder::build(buildOption, elementsLength, errorCode); + StringTrieBuilder::build(buildOption, elementsLength, errorCode); if(uchars==NULL) { errorCode=U_MEMORY_ALLOCATION_ERROR; } else { @@ -189,24 +187,24 @@ UCharTrieBuilder::build(UDictTrieBuildOption buildOption, UnicodeString &result, } int32_t -UCharTrieBuilder::getElementStringLength(int32_t i) const { +UCharsTrieBuilder::getElementStringLength(int32_t i) const { return elements[i].getStringLength(strings); } UChar -UCharTrieBuilder::getElementUnit(int32_t i, int32_t unitIndex) const { +UCharsTrieBuilder::getElementUnit(int32_t i, int32_t unitIndex) const { return elements[i].charAt(unitIndex, strings); } int32_t -UCharTrieBuilder::getElementValue(int32_t i) const { +UCharsTrieBuilder::getElementValue(int32_t i) const { return elements[i].getValue(); } int32_t -UCharTrieBuilder::getLimitOfLinearMatch(int32_t first, int32_t last, int32_t unitIndex) const { - const UCharTrieElement &firstElement=elements[first]; - const UCharTrieElement &lastElement=elements[last]; +UCharsTrieBuilder::getLimitOfLinearMatch(int32_t first, int32_t last, int32_t unitIndex) const { + const UCharsTrieElement &firstElement=elements[first]; + const UCharsTrieElement &lastElement=elements[last]; int32_t minStringLength=firstElement.getStringLength(strings); while(++unitIndexwrite(builder); b.write(s, length); offset=b.writeValueAndType(hasValue, value, b.getMinLinearMatch()+length-1); } -DictTrieBuilder::Node * -UCharTrieBuilder::createLinearMatchNode(int32_t i, int32_t unitIndex, int32_t length, - Node *nextNode) const { +StringTrieBuilder::Node * +UCharsTrieBuilder::createLinearMatchNode(int32_t i, int32_t unitIndex, int32_t length, + Node *nextNode) const { return new UCTLinearMatchNode( elements[i].getString(strings).getBuffer()+unitIndex, length, @@ -282,7 +280,7 @@ UCharTrieBuilder::createLinearMatchNode(int32_t i, int32_t unitIndex, int32_t le } UBool -UCharTrieBuilder::ensureCapacity(int32_t length) { +UCharsTrieBuilder::ensureCapacity(int32_t length) { if(uchars==NULL) { return FALSE; // previous memory allocation had failed } @@ -308,7 +306,7 @@ UCharTrieBuilder::ensureCapacity(int32_t length) { } int32_t -UCharTrieBuilder::write(int32_t unit) { +UCharsTrieBuilder::write(int32_t unit) { int32_t newLength=ucharsLength+1; if(ensureCapacity(newLength)) { ucharsLength=newLength; @@ -318,7 +316,7 @@ UCharTrieBuilder::write(int32_t unit) { } int32_t -UCharTrieBuilder::write(const UChar *s, int32_t length) { +UCharsTrieBuilder::write(const UChar *s, int32_t length) { int32_t newLength=ucharsLength+length; if(ensureCapacity(newLength)) { ucharsLength=newLength; @@ -328,24 +326,24 @@ UCharTrieBuilder::write(const UChar *s, int32_t length) { } int32_t -UCharTrieBuilder::writeElementUnits(int32_t i, int32_t unitIndex, int32_t length) { +UCharsTrieBuilder::writeElementUnits(int32_t i, int32_t unitIndex, int32_t length) { return write(elements[i].getString(strings).getBuffer()+unitIndex, length); } int32_t -UCharTrieBuilder::writeValueAndFinal(int32_t i, UBool final) { +UCharsTrieBuilder::writeValueAndFinal(int32_t i, UBool final) { UChar intUnits[3]; int32_t length; - if(i<0 || i>UCharTrie::kMaxTwoUnitValue) { - intUnits[0]=(UChar)(UCharTrie::kThreeUnitValueLead); + if(i<0 || i>UCharsTrie::kMaxTwoUnitValue) { + intUnits[0]=(UChar)(UCharsTrie::kThreeUnitValueLead); intUnits[1]=(UChar)(i>>16); intUnits[2]=(UChar)i; length=3; - } else if(i<=UCharTrie::kMaxOneUnitValue) { + } else if(i<=UCharsTrie::kMaxOneUnitValue) { intUnits[0]=(UChar)(i); length=1; } else { - intUnits[0]=(UChar)(UCharTrie::kMinTwoUnitValueLead+(i>>16)); + intUnits[0]=(UChar)(UCharsTrie::kMinTwoUnitValueLead+(i>>16)); intUnits[1]=(UChar)i; length=2; } @@ -354,22 +352,22 @@ UCharTrieBuilder::writeValueAndFinal(int32_t i, UBool final) { } int32_t -UCharTrieBuilder::writeValueAndType(UBool hasValue, int32_t value, int32_t node) { +UCharsTrieBuilder::writeValueAndType(UBool hasValue, int32_t value, int32_t node) { if(!hasValue) { return write(node); } UChar intUnits[3]; int32_t length; - if(value<0 || value>UCharTrie::kMaxTwoUnitNodeValue) { - intUnits[0]=(UChar)(UCharTrie::kThreeUnitNodeValueLead); + if(value<0 || value>UCharsTrie::kMaxTwoUnitNodeValue) { + intUnits[0]=(UChar)(UCharsTrie::kThreeUnitNodeValueLead); intUnits[1]=(UChar)(value>>16); intUnits[2]=(UChar)value; length=3; - } else if(value<=UCharTrie::kMaxOneUnitNodeValue) { + } else if(value<=UCharsTrie::kMaxOneUnitNodeValue) { intUnits[0]=(UChar)((value+1)<<6); length=1; } else { - intUnits[0]=(UChar)(UCharTrie::kMinTwoUnitNodeValueLead+((value>>10)&0x7fc0)); + intUnits[0]=(UChar)(UCharsTrie::kMinTwoUnitNodeValueLead+((value>>10)&0x7fc0)); intUnits[1]=(UChar)value; length=2; } @@ -378,18 +376,18 @@ UCharTrieBuilder::writeValueAndType(UBool hasValue, int32_t value, int32_t node) } int32_t -UCharTrieBuilder::writeDeltaTo(int32_t jumpTarget) { +UCharsTrieBuilder::writeDeltaTo(int32_t jumpTarget) { int32_t i=ucharsLength-jumpTarget; UChar intUnits[3]; int32_t length; U_ASSERT(i>=0); - if(i<=UCharTrie::kMaxOneUnitDelta) { + if(i<=UCharsTrie::kMaxOneUnitDelta) { length=0; - } else if(i<=UCharTrie::kMaxTwoUnitDelta) { - intUnits[0]=(UChar)(UCharTrie::kMinTwoUnitDeltaLead+(i>>16)); + } else if(i<=UCharsTrie::kMaxTwoUnitDelta) { + intUnits[0]=(UChar)(UCharsTrie::kMinTwoUnitDeltaLead+(i>>16)); length=1; } else { - intUnits[0]=(UChar)(UCharTrie::kThreeUnitDeltaLead); + intUnits[0]=(UChar)(UCharsTrie::kThreeUnitDeltaLead); intUnits[1]=(UChar)(i>>16); length=2; } diff --git a/icu4c/source/tools/toolutil/uchartriebuilder.h b/icu4c/source/tools/toolutil/ucharstriebuilder.h similarity index 75% rename from icu4c/source/tools/toolutil/uchartriebuilder.h rename to icu4c/source/tools/toolutil/ucharstriebuilder.h index ca37e52bd31..b9cf1ffb251 100644 --- a/icu4c/source/tools/toolutil/uchartriebuilder.h +++ b/icu4c/source/tools/toolutil/ucharstriebuilder.h @@ -3,41 +3,42 @@ * Copyright (C) 2010-2011, International Business Machines * Corporation and others. All Rights Reserved. ******************************************************************************* -* file name: uchartriebuilder.h +* file name: ucharstriebuilder.h * encoding: US-ASCII * tab size: 8 (not used) * indentation:4 * * created on: 2010nov14 * created by: Markus W. Scherer -* -* Builder class for UCharTrie dictionary trie. */ -#ifndef __UCHARTRIEBUILDER_H__ -#define __UCHARTRIEBUILDER_H__ +#ifndef __UCHARSTRIEBUILDER_H__ +#define __UCHARSTRIEBUILDER_H__ #include "unicode/utypes.h" #include "unicode/unistr.h" -#include "dicttriebuilder.h" -#include "uchartrie.h" +#include "stringtriebuilder.h" +#include "ucharstrie.h" U_NAMESPACE_BEGIN -class UCharTrieElement; +class UCharsTrieElement; -class U_TOOLUTIL_API UCharTrieBuilder : public DictTrieBuilder { +/** + * Builder class for UCharsTrie. + */ +class U_TOOLUTIL_API UCharsTrieBuilder : public StringTrieBuilder { public: - UCharTrieBuilder() + UCharsTrieBuilder() : elements(NULL), elementsCapacity(0), elementsLength(0), uchars(NULL), ucharsCapacity(0), ucharsLength(0) {} - virtual ~UCharTrieBuilder(); + virtual ~UCharsTrieBuilder(); - UCharTrieBuilder &add(const UnicodeString &s, int32_t value, UErrorCode &errorCode); + UCharsTrieBuilder &add(const UnicodeString &s, int32_t value, UErrorCode &errorCode); - UnicodeString &build(UDictTrieBuildOption buildOption, UnicodeString &result, UErrorCode &errorCode); + UnicodeString &build(UStringTrieBuildOption buildOption, UnicodeString &result, UErrorCode &errorCode); - UCharTrieBuilder &clear() { + UCharsTrieBuilder &clear() { strings.remove(); elementsLength=0; ucharsLength=0; @@ -57,15 +58,15 @@ private: virtual UBool matchNodesCanHaveValues() const { return TRUE; } - virtual int32_t getMaxBranchLinearSubNodeLength() const { return UCharTrie::kMaxBranchLinearSubNodeLength; } - virtual int32_t getMinLinearMatch() const { return UCharTrie::kMinLinearMatch; } - virtual int32_t getMaxLinearMatchLength() const { return UCharTrie::kMaxLinearMatchLength; } + virtual int32_t getMaxBranchLinearSubNodeLength() const { return UCharsTrie::kMaxBranchLinearSubNodeLength; } + virtual int32_t getMinLinearMatch() const { return UCharsTrie::kMinLinearMatch; } + virtual int32_t getMaxLinearMatchLength() const { return UCharsTrie::kMaxLinearMatchLength; } class UCTLinearMatchNode : public LinearMatchNode { public: UCTLinearMatchNode(const UChar *units, int32_t len, Node *nextNode); virtual UBool operator==(const Node &other) const; - virtual void write(DictTrieBuilder &builder); + virtual void write(StringTrieBuilder &builder); private: const UChar *s; }; @@ -82,7 +83,7 @@ private: virtual int32_t writeDeltaTo(int32_t jumpTarget); UnicodeString strings; - UCharTrieElement *elements; + UCharsTrieElement *elements; int32_t elementsCapacity; int32_t elementsLength; @@ -95,4 +96,4 @@ private: U_NAMESPACE_END -#endif // __UCHARTRIEBUILDER_H__ +#endif // __UCHARSTRIEBUILDER_H__ diff --git a/icu4c/source/tools/toolutil/uchartrieiterator.cpp b/icu4c/source/tools/toolutil/ucharstrieiterator.cpp similarity index 79% rename from icu4c/source/tools/toolutil/uchartrieiterator.cpp rename to icu4c/source/tools/toolutil/ucharstrieiterator.cpp index ebab551e6ae..0bea6501628 100644 --- a/icu4c/source/tools/toolutil/uchartrieiterator.cpp +++ b/icu4c/source/tools/toolutil/ucharstrieiterator.cpp @@ -1,9 +1,9 @@ /* ******************************************************************************* -* Copyright (C) 2010, International Business Machines +* Copyright (C) 2010-2011, International Business Machines * Corporation and others. All Rights Reserved. ******************************************************************************* -* file name: uchartrieiterator.h +* file name: ucharstrieiterator.h * encoding: US-ASCII * tab size: 8 (not used) * indentation:4 @@ -14,22 +14,22 @@ #include "unicode/utypes.h" #include "unicode/unistr.h" -#include "uchartrie.h" -#include "uchartrieiterator.h" +#include "ucharstrie.h" +#include "ucharstrieiterator.h" #include "uvectr32.h" U_NAMESPACE_BEGIN -UCharTrieIterator::UCharTrieIterator(const UChar *trieUChars, int32_t maxStringLength, - UErrorCode &errorCode) +UCharsTrieIterator::UCharsTrieIterator(const UChar *trieUChars, int32_t maxStringLength, + UErrorCode &errorCode) : uchars_(trieUChars), pos_(uchars_), initialPos_(uchars_), remainingMatchLength_(-1), initialRemainingMatchLength_(-1), skipValue_(FALSE), maxLength_(maxStringLength), value_(0), stack_(errorCode) {} -UCharTrieIterator::UCharTrieIterator(const UCharTrie &trie, int32_t maxStringLength, - UErrorCode &errorCode) +UCharsTrieIterator::UCharsTrieIterator(const UCharsTrie &trie, int32_t maxStringLength, + UErrorCode &errorCode) : uchars_(trie.uchars_), pos_(trie.pos_), initialPos_(trie.pos_), remainingMatchLength_(trie.remainingMatchLength_), initialRemainingMatchLength_(trie.remainingMatchLength_), @@ -48,7 +48,7 @@ UCharTrieIterator::UCharTrieIterator(const UCharTrie &trie, int32_t maxStringLen } } -UCharTrieIterator &UCharTrieIterator::reset() { +UCharsTrieIterator &UCharsTrieIterator::reset() { pos_=initialPos_; remainingMatchLength_=initialRemainingMatchLength_; skipValue_=FALSE; @@ -64,7 +64,7 @@ UCharTrieIterator &UCharTrieIterator::reset() { } UBool -UCharTrieIterator::next(UErrorCode &errorCode) { +UCharsTrieIterator::next(UErrorCode &errorCode) { if(U_FAILURE(errorCode)) { return FALSE; } @@ -97,18 +97,18 @@ UCharTrieIterator::next(UErrorCode &errorCode) { } for(;;) { int32_t node=*pos++; - if(node>=UCharTrie::kMinValueLead) { + if(node>=UCharsTrie::kMinValueLead) { if(skipValue_) { - pos=UCharTrie::skipNodeValue(pos, node); - node&=UCharTrie::kNodeTypeMask; + pos=UCharsTrie::skipNodeValue(pos, node); + node&=UCharsTrie::kNodeTypeMask; skipValue_=FALSE; } else { // Deliver value for the string so far. UBool isFinal=(UBool)(node>>15); if(isFinal) { - value_=UCharTrie::readValue(pos, node&0x7fff); + value_=UCharsTrie::readValue(pos, node&0x7fff); } else { - value_=UCharTrie::readNodeValue(pos, node); + value_=UCharsTrie::readNodeValue(pos, node); } if(isFinal || (maxLength_>0 && str_.length()==maxLength_)) { pos_=NULL; @@ -126,7 +126,7 @@ UCharTrieIterator::next(UErrorCode &errorCode) { if(maxLength_>0 && str_.length()==maxLength_) { return truncateAndStop(); } - if(node0 && str_.length()+length>maxLength_) { str_.append(pos, maxLength_-str_.length()); return truncateAndStop(); @@ -149,23 +149,23 @@ UCharTrieIterator::next(UErrorCode &errorCode) { // Branch node, needs to take the first outbound edge and push state for the rest. const UChar * -UCharTrieIterator::branchNext(const UChar *pos, int32_t length, UErrorCode &errorCode) { - while(length>UCharTrie::kMaxBranchLinearSubNodeLength) { +UCharsTrieIterator::branchNext(const UChar *pos, int32_t length, UErrorCode &errorCode) { + while(length>UCharsTrie::kMaxBranchLinearSubNodeLength) { ++pos; // ignore the comparison unit // Push state for the greater-or-equal edge. - stack_.addElement((int32_t)(UCharTrie::skipDelta(pos)-uchars_), errorCode); + stack_.addElement((int32_t)(UCharsTrie::skipDelta(pos)-uchars_), errorCode); stack_.addElement(((length-(length>>1))<<16)|str_.length(), errorCode); // Follow the less-than edge. length>>=1; - pos=UCharTrie::jumpByDelta(pos); + pos=UCharsTrie::jumpByDelta(pos); } // List of key-value pairs where values are either final values or jump deltas. // Read the first (key, value) pair. UChar trieUnit=*pos++; int32_t node=*pos++; UBool isFinal=(UBool)(node>>15); - int32_t value=UCharTrie::readValue(pos, node&=0x7fff); - pos=UCharTrie::skipValue(pos, node); + int32_t value=UCharsTrie::readValue(pos, node&=0x7fff); + pos=UCharsTrie::skipValue(pos, node); stack_.addElement((int32_t)(pos-uchars_), errorCode); stack_.addElement(((length-1)<<16)|str_.length(), errorCode); str_.append(trieUnit); diff --git a/icu4c/source/tools/toolutil/uchartrieiterator.h b/icu4c/source/tools/toolutil/ucharstrieiterator.h similarity index 82% rename from icu4c/source/tools/toolutil/uchartrieiterator.h rename to icu4c/source/tools/toolutil/ucharstrieiterator.h index 531477cbe34..cb6fe2e0eef 100644 --- a/icu4c/source/tools/toolutil/uchartrieiterator.h +++ b/icu4c/source/tools/toolutil/ucharstrieiterator.h @@ -1,9 +1,9 @@ /* ******************************************************************************* -* Copyright (C) 2010, International Business Machines +* Copyright (C) 2010-2011, International Business Machines * Corporation and others. All Rights Reserved. ******************************************************************************* -* file name: uchartrieiterator.h +* file name: ucharstrieiterator.h * encoding: US-ASCII * tab size: 8 (not used) * indentation:4 @@ -12,28 +12,28 @@ * created by: Markus W. Scherer */ -#ifndef __UCHARTRIEITERATOR_H__ -#define __UCHARTRIEITERATOR_H__ +#ifndef __UCHARSTRIEITERATOR_H__ +#define __UCHARSTRIEITERATOR_H__ /** * \file - * \brief C++ API: UCharTrie iterator for all of its (string, value) pairs. + * \brief C++ API: UCharsTrie iterator for all of its (string, value) pairs. */ #include "unicode/utypes.h" #include "unicode/unistr.h" -#include "uchartrie.h" +#include "ucharstrie.h" #include "uvectr32.h" U_NAMESPACE_BEGIN /** - * Iterator for all of the (string, value) pairs in a UCharTrie. + * Iterator for all of the (string, value) pairs in a UCharsTrie. */ -class U_TOOLUTIL_API UCharTrieIterator : public UMemory { +class U_TOOLUTIL_API UCharsTrieIterator : public UMemory { public: /** - * Iterates from the root of a UChar-serialized UCharTrie. + * Iterates from the root of a UChar-serialized UCharsTrie. * @param trieUChars The trie UChars. * @param maxStringLength If 0, the iterator returns full strings. * Otherwise, the iterator returns strings with this maximum length. @@ -42,10 +42,10 @@ public: * immediately. Check for U_FAILURE() on output or use with * function chaining. (See User Guide for details.) */ - UCharTrieIterator(const UChar *trieUChars, int32_t maxStringLength, UErrorCode &errorCode); + UCharsTrieIterator(const UChar *trieUChars, int32_t maxStringLength, UErrorCode &errorCode); /** - * Iterates from the current state of the specified UCharTrie. + * Iterates from the current state of the specified UCharsTrie. * @param trie The trie whose state will be copied for iteration. * @param maxStringLength If 0, the iterator returns full strings. * Otherwise, the iterator returns strings with this maximum length. @@ -54,12 +54,12 @@ public: * immediately. Check for U_FAILURE() on output or use with * function chaining. (See User Guide for details.) */ - UCharTrieIterator(const UCharTrie &trie, int32_t maxStringLength, UErrorCode &errorCode); + UCharsTrieIterator(const UCharsTrie &trie, int32_t maxStringLength, UErrorCode &errorCode); /** * Resets this iterator to its initial state. */ - UCharTrieIterator &reset(); + UCharsTrieIterator &reset(); /** * Finds the next (string, value) pair if there is one. @@ -118,4 +118,4 @@ private: U_NAMESPACE_END -#endif // __UCHARTRIEITERATOR_H__ +#endif // __UCHARSTRIEITERATOR_H__