From 9cc27feeb4226900364d7ec6dc0a997bd5022341 Mon Sep 17 00:00:00 2001 From: Markus Scherer Date: Thu, 27 Jan 2011 21:41:03 +0000 Subject: [PATCH] ICU-8167 port trie API changes from Java X-SVN-Rev: 29367 --- icu4c/source/common/bytestrie.cpp | 8 + icu4c/source/common/bytestrie.h | 57 ++- icu4c/source/common/ustringtrie.h | 3 + icu4c/source/test/intltest/bytestrietest.cpp | 259 ++++++++------ icu4c/source/test/intltest/ucharstrietest.cpp | 332 ++++++++++-------- .../test/perf/dicttrieperf/dicttrieperf.cpp | 68 ++-- .../tools/toolutil/bytestriebuilder.cpp | 94 +++-- .../source/tools/toolutil/bytestriebuilder.h | 73 +++- .../source/tools/toolutil/stringtriebuilder.h | 17 + icu4c/source/tools/toolutil/ucharstrie.cpp | 5 + icu4c/source/tools/toolutil/ucharstrie.h | 53 ++- .../tools/toolutil/ucharstriebuilder.cpp | 103 ++++-- .../source/tools/toolutil/ucharstriebuilder.h | 76 +++- 13 files changed, 766 insertions(+), 382 deletions(-) diff --git a/icu4c/source/common/bytestrie.cpp b/icu4c/source/common/bytestrie.cpp index c9049146863..ce9d7bab57b 100644 --- a/icu4c/source/common/bytestrie.cpp +++ b/icu4c/source/common/bytestrie.cpp @@ -15,11 +15,16 @@ #include "unicode/utypes.h" #include "unicode/bytestream.h" #include "unicode/uobject.h" +#include "cmemory.h" #include "uassert.h" #include "bytestrie.h" U_NAMESPACE_BEGIN +BytesTrie::~BytesTrie() { + uprv_free(ownedArray_); +} + // lead byte already shifted right by 1. int32_t BytesTrie::readValue(const uint8_t *pos, int32_t leadByte) { @@ -178,6 +183,9 @@ BytesTrie::next(int32_t inByte) { if(pos==NULL) { return USTRINGTRIE_NO_MATCH; } + if(inByte<0) { + inByte+=0x100; + } int32_t length=remainingMatchLength_; // Actual remaining match length minus 1. if(length>=0) { // Remaining part of a linear-match node. diff --git a/icu4c/source/common/bytestrie.h b/icu4c/source/common/bytestrie.h index ca1087d85ef..c80f26c3673 100644 --- a/icu4c/source/common/bytestrie.h +++ b/icu4c/source/common/bytestrie.h @@ -37,17 +37,47 @@ class UVector32; * Light-weight, non-const reader class for a BytesTrie. * Traverses a byte-serialized data structure with minimal state, * for mapping byte sequences to non-negative integer values. + * + * This class owns the serialized trie data only if it was constructed by + * the builder's build() method. + * The public constructor and the copy constructor only alias the data (only copy the pointer). + * There is no assignment operator. + * + * This class is not intended for public subclassing. */ class U_COMMON_API BytesTrie : public UMemory { public: /** * Constructs a BytesTrie reader instance. - * @param trieBytes The trie bytes. + * + * The trieBytes must contain a copy of a byte sequence from the BytesTrieBuilder, + * starting with the first byte of that sequence. + * The BytesTrie object will not read more bytes than + * the BytesTrieBuilder generated in the corresponding build() call. + * + * The array is not copied/cloned and must not be modified while + * the BytesTrie object is in use. + * + * @param trieBytes The byte array that contains the serialized trie. */ BytesTrie(const void *trieBytes) - : bytes_(reinterpret_cast(trieBytes)), + : ownedArray_(NULL), bytes_(reinterpret_cast(trieBytes)), pos_(bytes_), remainingMatchLength_(-1) {} + /** + * Destructor. + */ + ~BytesTrie(); + + /** + * Copy constructor, copies the other trie reader object and its state, + * but not the byte array which will be shared. (Shallow copy.) + * @param Another BytesTrie object. + */ + BytesTrie(const BytesTrie &other) + : ownedArray_(NULL), bytes_(other.bytes_), + pos_(other.pos_), remainingMatchLength_(other.remainingMatchLength_) {} + /** * Resets this trie to its initial state. */ @@ -108,15 +138,22 @@ public: /** * Traverses the trie from the initial state for this input byte. * Equivalent to reset().next(inByte). + * @param inByte Input byte value. Values -0x100..-1 are treated like 0..0xff. + * Values below -0x100 and above 0xff will never match. * @return The match/value Result. */ inline UStringTrieResult first(int32_t inByte) { remainingMatchLength_=-1; + if(inByte<0) { + inByte+=0x100; + } return nextImpl(bytes_, inByte); } /** * Traverses the trie from the current state for this input byte. + * @param inByte Input byte value. Values -0x100..-1 are treated like 0..0xff. + * Values below -0x100 and above 0xff will never match. * @return The match/value Result. */ UStringTrieResult next(int32_t inByte); @@ -262,6 +299,20 @@ public: private: friend class BytesTrieBuilder; + /** + * Constructs a BytesTrie reader instance. + * Unlike the public constructor which just aliases an array, + * this constructor adopts the builder's array. + * This constructor is only called by the builder. + */ + BytesTrie(void *adoptBytes, const void *trieBytes) + : ownedArray_(reinterpret_cast(adoptBytes)), + bytes_(reinterpret_cast(trieBytes)), + pos_(bytes_), remainingMatchLength_(-1) {} + + // No assignment operator. + BytesTrie &operator=(const BytesTrie &other); + inline void stop() { pos_=NULL; } @@ -407,6 +458,8 @@ private: static const int32_t kMaxTwoByteDelta=((kMinThreeByteDeltaLead-kMinTwoByteDeltaLead)<<8)-1; // 0x2fff static const int32_t kMaxThreeByteDelta=((kFourByteDeltaLead-kMinThreeByteDeltaLead)<<16)-1; // 0xdffff + uint8_t *ownedArray_; + // Fixed value referencing the BytesTrie bytes. const uint8_t *bytes_; diff --git a/icu4c/source/common/ustringtrie.h b/icu4c/source/common/ustringtrie.h index 7cfe6ec5e7d..af1766b074f 100644 --- a/icu4c/source/common/ustringtrie.h +++ b/icu4c/source/common/ustringtrie.h @@ -31,6 +31,9 @@ enum UStringTrieResult { /** * The input unit(s) did not continue a matching string. + * Once current()/next() return USTRINGTRIE_NO_MATCH, + * all further calls to current()/next() will also return USTRINGTRIE_NO_MATCH, + * until the trie is reset to its original state or to a saved state. */ USTRINGTRIE_NO_MATCH, /** diff --git a/icu4c/source/test/intltest/bytestrietest.cpp b/icu4c/source/test/intltest/bytestrietest.cpp index c6d6c1020f0..cbd43512e47 100644 --- a/icu4c/source/test/intltest/bytestrietest.cpp +++ b/icu4c/source/test/intltest/bytestrietest.cpp @@ -15,6 +15,7 @@ #include #include "unicode/utypes.h" +#include "unicode/localpointer.h" #include "unicode/stringpiece.h" #include "bytestrie.h" #include "bytestriebuilder.h" @@ -29,7 +30,7 @@ struct StringAndValue { class BytesTrieTest : public IntlTest { public: - BytesTrieTest() {} + BytesTrieTest(); virtual ~BytesTrieTest(); void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par=NULL); @@ -44,7 +45,7 @@ public: void TestValuesForState(); void TestCompact(); - StringPiece buildMonthsTrie(BytesTrieBuilder &builder, UStringTrieBuildOption buildOption); + BytesTrie *buildMonthsTrie(UStringTrieBuildOption buildOption); void TestHasUniqueValue(); void TestGetNextBytes(); void TestIteratorFromBranch(); @@ -52,24 +53,34 @@ public: void TestTruncatingIteratorFromRoot(); void TestTruncatingIteratorFromLinearMatchShort(); void TestTruncatingIteratorFromLinearMatchLong(); + void TestIteratorFromBytes(); void checkData(const StringAndValue data[], int32_t dataLength); void checkData(const StringAndValue data[], int32_t dataLength, UStringTrieBuildOption buildOption); - StringPiece buildTrie(const StringAndValue data[], int32_t dataLength, - BytesTrieBuilder &builder, UStringTrieBuildOption buildOption); - void checkFirst(const StringPiece &trieBytes, const StringAndValue data[], int32_t dataLength); - void checkNext(const StringPiece &trieBytes, const StringAndValue data[], int32_t dataLength); - void checkNextWithState(const StringPiece &trieBytes, const StringAndValue data[], int32_t dataLength); - void checkNextString(const StringPiece &trieBytes, const StringAndValue data[], int32_t dataLength); - void checkIterator(const StringPiece &trieBytes, const StringAndValue data[], int32_t dataLength); + BytesTrie *buildTrie(const StringAndValue data[], int32_t dataLength, + UStringTrieBuildOption buildOption); + void checkFirst(BytesTrie &trie, const StringAndValue data[], int32_t dataLength); + void checkNext(BytesTrie &trie, const StringAndValue data[], int32_t dataLength); + void checkNextWithState(BytesTrie &trie, const StringAndValue data[], int32_t dataLength); + void checkNextString(BytesTrie &trie, const StringAndValue data[], int32_t dataLength); + void checkIterator(const BytesTrie &trie, const StringAndValue data[], int32_t dataLength); void checkIterator(BytesTrie::Iterator &iter, const StringAndValue data[], int32_t dataLength); + +private: + BytesTrieBuilder *builder_; }; extern IntlTest *createBytesTrieTest() { return new BytesTrieTest(); } +BytesTrieTest::BytesTrieTest() : builder_(NULL) { + IcuTestErrorCode errorCode(*this, "BytesTrieTest()"); + builder_=new BytesTrieBuilder(errorCode); +} + BytesTrieTest::~BytesTrieTest() { + delete builder_; } void BytesTrieTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) { @@ -94,20 +105,22 @@ void BytesTrieTest::runIndexedTest(int32_t index, UBool exec, const char *&name, TESTCASE_AUTO(TestTruncatingIteratorFromRoot); TESTCASE_AUTO(TestTruncatingIteratorFromLinearMatchShort); TESTCASE_AUTO(TestTruncatingIteratorFromLinearMatchLong); + TESTCASE_AUTO(TestIteratorFromBytes); TESTCASE_AUTO_END; } void BytesTrieTest::TestBuilder() { IcuTestErrorCode errorCode(*this, "TestBuilder()"); - BytesTrieBuilder builder; - builder.build(USTRINGTRIE_BUILD_FAST, errorCode); + builder_->clear(); + delete builder_->build(USTRINGTRIE_BUILD_FAST, errorCode); if(errorCode.reset()!=U_INDEX_OUTOFBOUNDS_ERROR) { errln("BytesTrieBuilder().build() did not set U_INDEX_OUTOFBOUNDS_ERROR"); return; } - builder.add("=", 0, errorCode).add("=", 1, errorCode).build(USTRINGTRIE_BUILD_FAST, errorCode); + // TODO: remove .build(...) once add() checks for duplicates. + builder_->add("=", 0, errorCode).add("=", 1, errorCode).build(USTRINGTRIE_BUILD_FAST, errorCode); if(errorCode.reset()!=U_ILLEGAL_ARGUMENT_ERROR) { - errln("BytesTrieBuilder.build() did not detect duplicates"); + errln("BytesTrieBuilder.add() did not detect duplicates"); return; } } @@ -250,7 +263,7 @@ void BytesTrieTest::TestCompact() { checkData(data, LENGTHOF(data)); } -StringPiece BytesTrieTest::buildMonthsTrie(BytesTrieBuilder &builder, UStringTrieBuildOption buildOption) { +BytesTrie *BytesTrieTest::buildMonthsTrie(UStringTrieBuildOption buildOption) { // All types of nodes leading to the same value, // for code coverage of recursive functions. // In particular, we need a lot of branches on some single level @@ -287,111 +300,105 @@ StringPiece BytesTrieTest::buildMonthsTrie(BytesTrieBuilder &builder, UStringTri { "jun.", 6 }, { "june", 6 } }; - return buildTrie(data, LENGTHOF(data), builder, buildOption); + return buildTrie(data, LENGTHOF(data), buildOption); } void BytesTrieTest::TestHasUniqueValue() { - BytesTrieBuilder builder; - StringPiece sp=buildMonthsTrie(builder, USTRINGTRIE_BUILD_FAST); - if(sp.empty()) { + LocalPointer trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST)); + if(trie.isNull()) { return; // buildTrie() reported an error } - BytesTrie trie(sp.data()); int32_t uniqueValue; - if(trie.hasUniqueValue(uniqueValue)) { + if(trie->hasUniqueValue(uniqueValue)) { errln("unique value at root"); } - trie.next('j'); - trie.next('a'); - trie.next('n'); + trie->next('j'); + trie->next('a'); + trie->next('n'); // hasUniqueValue() directly after next() - if(!trie.hasUniqueValue(uniqueValue) || uniqueValue!=1) { + if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=1) { errln("not unique value 1 after \"jan\""); } - trie.first('j'); - trie.next('u'); - if(trie.hasUniqueValue(uniqueValue)) { + trie->first('j'); + trie->next('u'); + if(trie->hasUniqueValue(uniqueValue)) { errln("unique value after \"ju\""); } - if(trie.next('n')!=USTRINGTRIE_INTERMEDIATE_VALUE || 6!=trie.getValue()) { + if(trie->next('n')!=USTRINGTRIE_INTERMEDIATE_VALUE || 6!=trie->getValue()) { errln("not normal value 6 after \"jun\""); } // hasUniqueValue() after getValue() - if(!trie.hasUniqueValue(uniqueValue) || uniqueValue!=6) { + if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=6) { errln("not unique value 6 after \"jun\""); } // hasUniqueValue() from within a linear-match node - trie.first('a'); - trie.next('u'); - if(!trie.hasUniqueValue(uniqueValue) || uniqueValue!=8) { + trie->first('a'); + trie->next('u'); + if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=8) { errln("not unique value 8 after \"au\""); } } void BytesTrieTest::TestGetNextBytes() { - BytesTrieBuilder builder; - StringPiece sp=buildMonthsTrie(builder, USTRINGTRIE_BUILD_SMALL); - if(sp.empty()) { + LocalPointer trie(buildMonthsTrie(USTRINGTRIE_BUILD_SMALL)); + if(trie.isNull()) { return; // buildTrie() reported an error } - BytesTrie trie(sp.data()); char buffer[40]; CheckedArrayByteSink sink(buffer, LENGTHOF(buffer)); - int32_t count=trie.getNextBytes(sink); + int32_t count=trie->getNextBytes(sink); if(count!=2 || sink.NumberOfBytesAppended()!=2 || buffer[0]!='a' || buffer[1]!='j') { errln("months getNextBytes()!=[aj] at root"); } - trie.next('j'); - trie.next('a'); - trie.next('n'); + trie->next('j'); + trie->next('a'); + trie->next('n'); // getNextBytes() directly after next() - count=trie.getNextBytes(sink.Reset()); + count=trie->getNextBytes(sink.Reset()); buffer[count]=0; if(count!=20 || sink.NumberOfBytesAppended()!=20 || 0!=strcmp(buffer, ".abcdefghijklmnopqru")) { errln("months getNextBytes()!=[.abcdefghijklmnopqru] after \"jan\""); } // getNextBytes() after getValue() - trie.getValue(); // next() had returned USTRINGTRIE_INTERMEDIATE_VALUE. + trie->getValue(); // next() had returned USTRINGTRIE_INTERMEDIATE_VALUE. memset(buffer, 0, sizeof(buffer)); - count=trie.getNextBytes(sink.Reset()); + count=trie->getNextBytes(sink.Reset()); if(count!=20 || sink.NumberOfBytesAppended()!=20 || 0!=strcmp(buffer, ".abcdefghijklmnopqru")) { errln("months getNextBytes()!=[.abcdefghijklmnopqru] after \"jan\"+getValue()"); } // getNextBytes() from a linear-match node - trie.next('u'); + trie->next('u'); memset(buffer, 0, sizeof(buffer)); - count=trie.getNextBytes(sink.Reset()); + count=trie->getNextBytes(sink.Reset()); if(count!=1 || sink.NumberOfBytesAppended()!=1 || buffer[0]!='a') { errln("months getNextBytes()!=[a] after \"janu\""); } - trie.next('a'); + trie->next('a'); memset(buffer, 0, sizeof(buffer)); - count=trie.getNextBytes(sink.Reset()); + count=trie->getNextBytes(sink.Reset()); if(count!=1 || sink.NumberOfBytesAppended()!=1 || buffer[0]!='r') { errln("months getNextBytes()!=[r] after \"janua\""); } - trie.next('r'); - trie.next('y'); + trie->next('r'); + trie->next('y'); // getNextBytes() after a final match - count=trie.getNextBytes(sink.Reset()); + count=trie->getNextBytes(sink.Reset()); if(count!=0 || sink.NumberOfBytesAppended()!=0) { errln("months getNextBytes()!=[] after \"january\""); } } void BytesTrieTest::TestIteratorFromBranch() { - BytesTrieBuilder builder; - StringPiece sp=buildMonthsTrie(builder, USTRINGTRIE_BUILD_FAST); - if(sp.empty()) { + LocalPointer trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST)); + if(trie.isNull()) { return; // buildTrie() reported an error } - BytesTrie trie(sp.data()); // Go to a branch node. - trie.next('j'); - trie.next('a'); - trie.next('n'); + trie->next('j'); + trie->next('a'); + trie->next('n'); IcuTestErrorCode errorCode(*this, "TestIteratorFromBranch()"); - BytesTrie::Iterator iter(trie, 0, errorCode); + BytesTrie::Iterator iter(*trie, 0, errorCode); if(errorCode.logIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) { return; } @@ -431,20 +438,18 @@ void BytesTrieTest::TestIteratorFromBranch() { } void BytesTrieTest::TestIteratorFromLinearMatch() { - BytesTrieBuilder builder; - StringPiece sp=buildMonthsTrie(builder, USTRINGTRIE_BUILD_SMALL); - if(sp.empty()) { + LocalPointer trie(buildMonthsTrie(USTRINGTRIE_BUILD_SMALL)); + if(trie.isNull()) { return; // buildTrie() reported an error } - BytesTrie trie(sp.data()); // Go into a linear-match node. - trie.next('j'); - trie.next('a'); - trie.next('n'); - trie.next('u'); - trie.next('a'); + trie->next('j'); + trie->next('a'); + trie->next('n'); + trie->next('u'); + trie->next('a'); IcuTestErrorCode errorCode(*this, "TestIteratorFromLinearMatch()"); - BytesTrie::Iterator iter(trie, 0, errorCode); + BytesTrie::Iterator iter(*trie, 0, errorCode); if(errorCode.logIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) { return; } @@ -461,13 +466,12 @@ void BytesTrieTest::TestIteratorFromLinearMatch() { } void BytesTrieTest::TestTruncatingIteratorFromRoot() { - BytesTrieBuilder builder; - StringPiece sp=buildMonthsTrie(builder, USTRINGTRIE_BUILD_FAST); - if(sp.empty()) { + LocalPointer trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST)); + if(trie.isNull()) { return; // buildTrie() reported an error } IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromRoot()"); - BytesTrie::Iterator iter(sp.data(), 4, errorCode); + BytesTrie::Iterator iter(*trie, 4, errorCode); if(errorCode.logIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) { return; } @@ -513,18 +517,16 @@ void BytesTrieTest::TestTruncatingIteratorFromLinearMatchShort() { { "abcdepq", 200 }, { "abcdeyz", 3000 } }; - BytesTrieBuilder builder; - StringPiece sp=buildTrie(data, LENGTHOF(data), builder, USTRINGTRIE_BUILD_FAST); - if(sp.empty()) { + LocalPointer trie(buildTrie(data, LENGTHOF(data), USTRINGTRIE_BUILD_FAST)); + if(trie.isNull()) { return; // buildTrie() reported an error } - BytesTrie trie(sp.data()); // Go into a linear-match node. - trie.next('a'); - trie.next('b'); + trie->next('a'); + trie->next('b'); IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromLinearMatchShort()"); // Truncate within the linear-match node. - BytesTrie::Iterator iter(trie, 2, errorCode); + BytesTrie::Iterator iter(*trie, 2, errorCode); if(errorCode.logIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) { return; } @@ -543,19 +545,17 @@ void BytesTrieTest::TestTruncatingIteratorFromLinearMatchLong() { { "abcdepq", 200 }, { "abcdeyz", 3000 } }; - BytesTrieBuilder builder; - StringPiece sp=buildTrie(data, LENGTHOF(data), builder, USTRINGTRIE_BUILD_FAST); - if(sp.empty()) { + LocalPointer trie(buildTrie(data, LENGTHOF(data), USTRINGTRIE_BUILD_FAST)); + if(trie.isNull()) { return; // buildTrie() reported an error } - BytesTrie trie(sp.data()); // Go into a linear-match node. - trie.next('a'); - trie.next('b'); - trie.next('c'); + trie->next('a'); + trie->next('b'); + trie->next('c'); IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromLinearMatchLong()"); // Truncate after the linear-match node. - BytesTrie::Iterator iter(trie, 3, errorCode); + BytesTrie::Iterator iter(*trie, 3, errorCode); if(errorCode.logIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) { return; } @@ -570,6 +570,22 @@ void BytesTrieTest::TestTruncatingIteratorFromLinearMatchLong() { checkIterator(iter.reset(), expected, LENGTHOF(expected)); } +void BytesTrieTest::TestIteratorFromBytes() { + static const StringAndValue data[]={ + { "mm", 3 }, + { "mmm", 33 }, + { "mmnop", 333 } + }; + builder_->clear(); + IcuTestErrorCode errorCode(*this, "TestIteratorFromBytes()"); + for(int32_t i=0; iadd(data[i].s, data[i].value, errorCode); + } + StringPiece trieBytes=builder_->buildStringPiece(USTRINGTRIE_BUILD_FAST, errorCode); + BytesTrie::Iterator iter(trieBytes.data(), 0, errorCode); + checkIterator(iter, data, LENGTHOF(data)); +} + void BytesTrieTest::checkData(const StringAndValue data[], int32_t dataLength) { logln("checkData(dataLength=%d, fast)", (int)dataLength); checkData(data, dataLength, USTRINGTRIE_BUILD_FAST); @@ -578,20 +594,19 @@ void BytesTrieTest::checkData(const StringAndValue data[], int32_t dataLength) { } void BytesTrieTest::checkData(const StringAndValue data[], int32_t dataLength, UStringTrieBuildOption buildOption) { - BytesTrieBuilder builder; - StringPiece sp=buildTrie(data, dataLength, builder, buildOption); - if(sp.empty()) { + LocalPointer trie(buildTrie(data, dataLength, buildOption)); + if(trie.isNull()) { return; // buildTrie() reported an error } - checkFirst(sp, data, dataLength); - checkNext(sp, data, dataLength); - checkNextWithState(sp, data, dataLength); - checkNextString(sp, data, dataLength); - checkIterator(sp, data, dataLength); + checkFirst(*trie, data, dataLength); + checkNext(*trie, data, dataLength); + checkNextWithState(*trie, data, dataLength); + checkNextString(*trie, data, dataLength); + checkIterator(*trie, data, dataLength); } -StringPiece BytesTrieTest::buildTrie(const StringAndValue data[], int32_t dataLength, - BytesTrieBuilder &builder, UStringTrieBuildOption buildOption) { +BytesTrie *BytesTrieTest::buildTrie(const StringAndValue data[], int32_t dataLength, + UStringTrieBuildOption buildOption) { IcuTestErrorCode errorCode(*this, "buildTrie()"); // Add the items to the trie builder in an interesting (not trivial, not random) order. int32_t index, step; @@ -607,47 +622,61 @@ StringPiece BytesTrieTest::buildTrie(const StringAndValue data[], int32_t dataLe index=dataLength-1; step=-1; } - builder.clear(); + builder_->clear(); for(int32_t i=0; iadd(data[index].s, data[index].value, errorCode); index=(index+step)%dataLength; } - StringPiece sp(builder.build(buildOption, errorCode)); + StringPiece sp=builder_->buildStringPiece(buildOption, errorCode); + LocalPointer trie(builder_->build(buildOption, errorCode)); if(!errorCode.logIfFailureAndReset("add()/build()")) { - builder.add("zzz", 999, errorCode); + builder_->add("zzz", 999, errorCode); if(errorCode.reset()!=U_NO_WRITE_PERMISSION) { errln("builder.build().add(zzz) did not set U_NO_WRITE_PERMISSION"); } } logln("serialized trie size: %ld bytes\n", (long)sp.length()); - return sp; + StringPiece sp2=builder_->buildStringPiece(buildOption, errorCode); + if(sp.data()==sp2.data()) { + errln("builder.buildStringPiece() before & after build() returned same array"); + } + if(errorCode.isFailure()) { + return NULL; + } + // Tries from either build() method should be identical but + // BytesTrie does not implement equals(). + // We just return either one. + if((dataLength&1)!=0) { + return trie.orphan(); + } else { + return new BytesTrie(sp2.data()); + } } -void BytesTrieTest::checkFirst(const StringPiece &trieBytes, +void BytesTrieTest::checkFirst(BytesTrie &trie, const StringAndValue data[], int32_t dataLength) { - BytesTrie trie(trieBytes.data()); for(int32_t i=0; i #include "unicode/utypes.h" +#include "unicode/localpointer.h" #include "unicode/uniset.h" #include "ucharstrie.h" #include "ucharstriebuilder.h" @@ -29,7 +30,7 @@ struct StringAndValue { class UCharsTrieTest : public IntlTest { public: - UCharsTrieTest() {} + UCharsTrieTest(); virtual ~UCharsTrieTest(); void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par=NULL); @@ -46,11 +47,10 @@ public: void TestFirstForCodePoint(); void TestNextForCodePoint(); - UBool buildLargeTrie(UCharsTrieBuilder &builder, UnicodeString &result, int32_t numUniqueFirst); + UCharsTrie *buildLargeTrie(int32_t numUniqueFirst); void TestLargeTrie(); - UBool buildMonthsTrie(UCharsTrieBuilder &builder, UStringTrieBuildOption buildOption, - UnicodeString &result); + UCharsTrie *buildMonthsTrie(UStringTrieBuildOption buildOption); void TestHasUniqueValue(); void TestGetNextUChars(); void TestIteratorFromBranch(); @@ -58,24 +58,34 @@ public: void TestTruncatingIteratorFromRoot(); void TestTruncatingIteratorFromLinearMatchShort(); void TestTruncatingIteratorFromLinearMatchLong(); + void TestIteratorFromUChars(); void checkData(const StringAndValue data[], int32_t dataLength); void checkData(const StringAndValue data[], int32_t dataLength, UStringTrieBuildOption buildOption); - UBool buildTrie(const StringAndValue data[], int32_t dataLength, - UCharsTrieBuilder &builder, UStringTrieBuildOption buildOption, UnicodeString &result); - void checkFirst(const UnicodeString &trieUChars, const StringAndValue data[], int32_t dataLength); - void checkNext(const UnicodeString &trieUChars, const StringAndValue data[], int32_t dataLength); - void checkNextWithState(const UnicodeString &trieUChars, const StringAndValue data[], int32_t dataLength); - void checkNextString(const UnicodeString &trieUChars, const StringAndValue data[], int32_t dataLength); - void checkIterator(const UnicodeString &trieUChars, const StringAndValue data[], int32_t dataLength); + UCharsTrie *buildTrie(const StringAndValue data[], int32_t dataLength, + UStringTrieBuildOption buildOption); + void checkFirst(UCharsTrie &trie, const StringAndValue data[], int32_t dataLength); + void checkNext(UCharsTrie &trie, const StringAndValue data[], int32_t dataLength); + void checkNextWithState(UCharsTrie &trie, const StringAndValue data[], int32_t dataLength); + void checkNextString(UCharsTrie &trie, const StringAndValue data[], int32_t dataLength); + void checkIterator(UCharsTrie &trie, const StringAndValue data[], int32_t dataLength); void checkIterator(UCharsTrie::Iterator &iter, const StringAndValue data[], int32_t dataLength); + +private: + UCharsTrieBuilder *builder_; }; extern IntlTest *createUCharsTrieTest() { return new UCharsTrieTest(); } +UCharsTrieTest::UCharsTrieTest() : builder_(NULL) { + IcuTestErrorCode errorCode(*this, "UCharsTrieTest()"); + builder_=new UCharsTrieBuilder(errorCode); +} + UCharsTrieTest::~UCharsTrieTest() { + delete builder_; } void UCharsTrieTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) { @@ -103,21 +113,21 @@ void UCharsTrieTest::runIndexedTest(int32_t index, UBool exec, const char *&name TESTCASE_AUTO(TestTruncatingIteratorFromRoot); TESTCASE_AUTO(TestTruncatingIteratorFromLinearMatchShort); TESTCASE_AUTO(TestTruncatingIteratorFromLinearMatchLong); + TESTCASE_AUTO(TestIteratorFromUChars); TESTCASE_AUTO_END; } void UCharsTrieTest::TestBuilder() { IcuTestErrorCode errorCode(*this, "TestBuilder()"); - UCharsTrieBuilder builder; - UnicodeString trieUChars; - builder.build(USTRINGTRIE_BUILD_FAST, trieUChars, errorCode); + delete builder_->build(USTRINGTRIE_BUILD_FAST, errorCode); if(errorCode.reset()!=U_INDEX_OUTOFBOUNDS_ERROR) { errln("UCharsTrieBuilder().build() did not set U_INDEX_OUTOFBOUNDS_ERROR"); return; } - builder.add("=", 0, errorCode).add("=", 1, errorCode).build(USTRINGTRIE_BUILD_FAST, trieUChars, errorCode); + // TODO: remove .build(...) once add() checks for duplicates. + builder_->add("=", 0, errorCode).add("=", 1, errorCode).build(USTRINGTRIE_BUILD_FAST, errorCode); if(errorCode.reset()!=U_ILLEGAL_ARGUMENT_ERROR) { - errln("UCharsTrieBuilder.build() did not detect duplicates"); + errln("UCharsTrieBuilder.add() did not detect duplicates"); return; } } @@ -281,41 +291,39 @@ void UCharsTrieTest::TestNextForCodePoint() { { "\\u4dff\\U00010000\\u9999\\U00020002", 44444 }, { "\\u4dff\\U000103ff", 99999 } }; - UCharsTrieBuilder builder; - UnicodeString trieUChars; - if(!buildTrie(data, LENGTHOF(data), builder, USTRINGTRIE_BUILD_FAST, trieUChars)) { + LocalPointer trie(buildTrie(data, LENGTHOF(data), USTRINGTRIE_BUILD_FAST)); + if(trie.isNull()) { return; // buildTrie() reported an error } - UCharsTrie trie(trieUChars.getBuffer()); UStringTrieResult result; - if( (result=trie.nextForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE || result!=trie.current() || - (result=trie.nextForCodePoint(0x10000))!=USTRINGTRIE_NO_VALUE || result!=trie.current() || - (result=trie.nextForCodePoint(0x9999))!=USTRINGTRIE_NO_VALUE || result!=trie.current() || - (result=trie.nextForCodePoint(0x20000))!=USTRINGTRIE_NO_VALUE || result!=trie.current() || - (result=trie.nextForCodePoint(0xdfff))!=USTRINGTRIE_NO_VALUE || result!=trie.current() || - (result=trie.nextForCodePoint(0x10ffff))!=USTRINGTRIE_FINAL_VALUE || result!=trie.current() || - trie.getValue()!=2000000000 + if( (result=trie->nextForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE || result!=trie->current() || + (result=trie->nextForCodePoint(0x10000))!=USTRINGTRIE_NO_VALUE || result!=trie->current() || + (result=trie->nextForCodePoint(0x9999))!=USTRINGTRIE_NO_VALUE || result!=trie->current() || + (result=trie->nextForCodePoint(0x20000))!=USTRINGTRIE_NO_VALUE || result!=trie->current() || + (result=trie->nextForCodePoint(0xdfff))!=USTRINGTRIE_NO_VALUE || result!=trie->current() || + (result=trie->nextForCodePoint(0x10ffff))!=USTRINGTRIE_FINAL_VALUE || result!=trie->current() || + trie->getValue()!=2000000000 ) { errln("UCharsTrie.nextForCodePoint() fails for %s", data[0].s); } - if( (result=trie.firstForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE || result!=trie.current() || - (result=trie.nextForCodePoint(0x10000))!=USTRINGTRIE_NO_VALUE || result!=trie.current() || - (result=trie.nextForCodePoint(0x9999))!=USTRINGTRIE_NO_VALUE || result!=trie.current() || - (result=trie.nextForCodePoint(0x20002))!=USTRINGTRIE_FINAL_VALUE || result!=trie.current() || - trie.getValue()!=44444 + if( (result=trie->firstForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE || result!=trie->current() || + (result=trie->nextForCodePoint(0x10000))!=USTRINGTRIE_NO_VALUE || result!=trie->current() || + (result=trie->nextForCodePoint(0x9999))!=USTRINGTRIE_NO_VALUE || result!=trie->current() || + (result=trie->nextForCodePoint(0x20002))!=USTRINGTRIE_FINAL_VALUE || result!=trie->current() || + trie->getValue()!=44444 ) { errln("UCharsTrie.nextForCodePoint() fails for %s", data[1].s); } - if( (result=trie.reset().nextForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE || result!=trie.current() || - (result=trie.nextForCodePoint(0x10000))!=USTRINGTRIE_NO_VALUE || result!=trie.current() || - (result=trie.nextForCodePoint(0x9999))!=USTRINGTRIE_NO_VALUE || result!=trie.current() || - (result=trie.nextForCodePoint(0x20222))!=USTRINGTRIE_NO_MATCH || result!=trie.current() // no match for trail surrogate + if( (result=trie->reset().nextForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE || result!=trie->current() || + (result=trie->nextForCodePoint(0x10000))!=USTRINGTRIE_NO_VALUE || result!=trie->current() || + (result=trie->nextForCodePoint(0x9999))!=USTRINGTRIE_NO_VALUE || result!=trie->current() || + (result=trie->nextForCodePoint(0x20222))!=USTRINGTRIE_NO_MATCH || result!=trie->current() // no match for trail surrogate ) { errln("UCharsTrie.nextForCodePoint() fails for \\u4dff\\U00010000\\u9999\\U00020222"); } - if( (result=trie.reset().nextForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE || result!=trie.current() || - (result=trie.nextForCodePoint(0x103ff))!=USTRINGTRIE_FINAL_VALUE || result!=trie.current() || - trie.getValue()!=99999 + if( (result=trie->reset().nextForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE || result!=trie->current() || + (result=trie->nextForCodePoint(0x103ff))!=USTRINGTRIE_FINAL_VALUE || result!=trie->current() || + trie->getValue()!=99999 ) { errln("UCharsTrie.nextForCodePoint() fails for %s", data[2].s); } @@ -355,43 +363,41 @@ private: } // end namespace -UBool UCharsTrieTest::buildLargeTrie(UCharsTrieBuilder &builder, UnicodeString &result, - int32_t numUniqueFirst) { +UCharsTrie *UCharsTrieTest::buildLargeTrie(int32_t numUniqueFirst) { IcuTestErrorCode errorCode(*this, "buildLargeTrie()"); Generator gen; - builder.clear(); + builder_->clear(); while(gen.countUniqueFirstChars()add(gen.getString(), gen.getValue(), errorCode); gen.next(); } infoln("buildLargeTrie(%ld) added %ld strings", (long)numUniqueFirst, (long)gen.getIndex()); - builder.build(USTRINGTRIE_BUILD_FAST, result, errorCode); - logln("serialized trie size: %ld UChars\n", (long)result.length()); - return errorCode.isSuccess(); + UnicodeString trieUChars; + builder_->buildUnicodeString(USTRINGTRIE_BUILD_FAST, trieUChars, errorCode); + logln("serialized trie size: %ld UChars\n", (long)trieUChars.length()); + return new UCharsTrie(trieUChars.getBuffer()); } // Exercise a large branch node. void UCharsTrieTest::TestLargeTrie() { - UCharsTrieBuilder builder; - UnicodeString trieUChars; - if(!buildLargeTrie(builder, trieUChars, 1111)) { + LocalPointer trie(buildLargeTrie(1111)); + if(trie.isNull()) { return; // buildTrie() reported an error } - UCharsTrie trie(trieUChars.getBuffer()); Generator gen; while(gen.countUniqueFirstChars()<1111) { UnicodeString x(gen.getString()); int32_t value=gen.getValue(); if(!x.isEmpty()) { - if(trie.first(x[0])==USTRINGTRIE_NO_MATCH) { + if(trie->first(x[0])==USTRINGTRIE_NO_MATCH) { errln("first(first char U+%04X)=USTRINGTRIE_NO_MATCH for string %ld\n", x[0], (long)gen.getIndex()); break; } x.remove(0, 1); } - UStringTrieResult result=trie.next(x.getBuffer(), x.length()); - if(!USTRINGTRIE_HAS_VALUE(result) || result!=trie.current() || value!=trie.getValue()) { + UStringTrieResult result=trie->next(x.getBuffer(), x.length()); + if(!USTRINGTRIE_HAS_VALUE(result) || result!=trie->current() || value!=trie->getValue()) { errln("next(%d chars U+%04X U+%04X)!=hasValue or " "next()!=current() or getValue() wrong " "for string %ld\n", (int)x.length(), x[0], x[1], (long)gen.getIndex()); @@ -412,8 +418,7 @@ enum { u_y=0x79 }; -UBool UCharsTrieTest::buildMonthsTrie(UCharsTrieBuilder &builder, UStringTrieBuildOption buildOption, - UnicodeString &result) { +UCharsTrie *UCharsTrieTest::buildMonthsTrie(UStringTrieBuildOption buildOption) { // All types of nodes leading to the same value, // for code coverage of recursive functions. // In particular, we need a lot of branches on some single level @@ -450,43 +455,41 @@ UBool UCharsTrieTest::buildMonthsTrie(UCharsTrieBuilder &builder, UStringTrieBui { "jun.", 6 }, { "june", 6 } }; - return buildTrie(data, LENGTHOF(data), builder, buildOption, result); + return buildTrie(data, LENGTHOF(data), buildOption); } void UCharsTrieTest::TestHasUniqueValue() { - UCharsTrieBuilder builder; - UnicodeString trieUChars; - if(!buildMonthsTrie(builder, USTRINGTRIE_BUILD_FAST, trieUChars)) { + LocalPointer trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST)); + if(trie.isNull()) { return; // buildTrie() reported an error } - UCharsTrie trie(trieUChars.getBuffer()); int32_t uniqueValue; - if(trie.hasUniqueValue(uniqueValue)) { + if(trie->hasUniqueValue(uniqueValue)) { errln("unique value at root"); } - trie.next(u_j); - trie.next(u_a); - trie.next(u_n); + trie->next(u_j); + trie->next(u_a); + trie->next(u_n); // hasUniqueValue() directly after next() - if(!trie.hasUniqueValue(uniqueValue) || uniqueValue!=1) { + if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=1) { errln("not unique value 1 after \"jan\""); } - trie.first(u_j); - trie.next(u_u); - if(trie.hasUniqueValue(uniqueValue)) { + trie->first(u_j); + trie->next(u_u); + if(trie->hasUniqueValue(uniqueValue)) { errln("unique value after \"ju\""); } - if(trie.next(u_n)!=USTRINGTRIE_INTERMEDIATE_VALUE || 6!=trie.getValue()) { + if(trie->next(u_n)!=USTRINGTRIE_INTERMEDIATE_VALUE || 6!=trie->getValue()) { errln("not normal value 6 after \"jun\""); } // hasUniqueValue() after getValue() - if(!trie.hasUniqueValue(uniqueValue) || uniqueValue!=6) { + if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=6) { errln("not unique value 6 after \"jun\""); } // hasUniqueValue() from within a linear-match node - trie.first(u_a); - trie.next(u_u); - if(!trie.hasUniqueValue(uniqueValue) || uniqueValue!=8) { + trie->first(u_a); + trie->next(u_u); + if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=8) { errln("not unique value 8 after \"au\""); } } @@ -501,65 +504,61 @@ private: }; void UCharsTrieTest::TestGetNextUChars() { - UCharsTrieBuilder builder; - UnicodeString trieUChars; - if(!buildMonthsTrie(builder, USTRINGTRIE_BUILD_SMALL, trieUChars)) { + LocalPointer trie(buildMonthsTrie(USTRINGTRIE_BUILD_SMALL)); + if(trie.isNull()) { return; // buildTrie() reported an error } - UCharsTrie trie(trieUChars.getBuffer()); UnicodeString buffer; UnicodeStringAppendable app(buffer); - int32_t count=trie.getNextUChars(app); + int32_t count=trie->getNextUChars(app); if(count!=2 || buffer.length()!=2 || buffer[0]!=u_a || buffer[1]!=u_j) { errln("months getNextUChars()!=[aj] at root"); } - trie.next(u_j); - trie.next(u_a); - trie.next(u_n); + trie->next(u_j); + trie->next(u_a); + trie->next(u_n); // getNextUChars() directly after next() - count=trie.getNextUChars(app.reset()); + count=trie->getNextUChars(app.reset()); if(count!=20 || buffer!=UNICODE_STRING_SIMPLE(".abcdefghijklmnopqru")) { errln("months getNextUChars()!=[.abcdefghijklmnopqru] after \"jan\""); } // getNextUChars() after getValue() - trie.getValue(); // next() had returned USTRINGTRIE_INTERMEDIATE_VALUE. - count=trie.getNextUChars(app.reset()); + trie->getValue(); // next() had returned USTRINGTRIE_INTERMEDIATE_VALUE. + count=trie->getNextUChars(app.reset()); if(count!=20 || buffer!=UNICODE_STRING_SIMPLE(".abcdefghijklmnopqru")) { errln("months getNextUChars()!=[.abcdefghijklmnopqru] after \"jan\"+getValue()"); } // getNextUChars() from a linear-match node - trie.next(u_u); - count=trie.getNextUChars(app.reset()); + trie->next(u_u); + count=trie->getNextUChars(app.reset()); if(count!=1 || buffer.length()!=1 || buffer[0]!=u_a) { errln("months getNextUChars()!=[a] after \"janu\""); } - trie.next(u_a); - count=trie.getNextUChars(app.reset()); + trie->next(u_a); + count=trie->getNextUChars(app.reset()); if(count!=1 || buffer.length()!=1 || buffer[0]!=u_r) { errln("months getNextUChars()!=[r] after \"janua\""); } - trie.next(u_r); - trie.next(u_y); + trie->next(u_r); + trie->next(u_y); // getNextUChars() after a final match - count=trie.getNextUChars(app.reset()); + count=trie->getNextUChars(app.reset()); if(count!=0 || buffer.length()!=0) { errln("months getNextUChars()!=[] after \"january\""); } } void UCharsTrieTest::TestIteratorFromBranch() { - UCharsTrieBuilder builder; - UnicodeString trieUChars; - if(!buildMonthsTrie(builder, USTRINGTRIE_BUILD_FAST, trieUChars)) { + LocalPointer trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST)); + if(trie.isNull()) { return; // buildTrie() reported an error } - UCharsTrie trie(trieUChars.getBuffer()); // Go to a branch node. - trie.next(u_j); - trie.next(u_a); - trie.next(u_n); + trie->next(u_j); + trie->next(u_a); + trie->next(u_n); IcuTestErrorCode errorCode(*this, "TestIteratorFromBranch()"); - UCharsTrie::Iterator iter(trie, 0, errorCode); + UCharsTrie::Iterator iter(*trie, 0, errorCode); if(errorCode.logIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) { return; } @@ -599,20 +598,18 @@ void UCharsTrieTest::TestIteratorFromBranch() { } void UCharsTrieTest::TestIteratorFromLinearMatch() { - UCharsTrieBuilder builder; - UnicodeString trieUChars; - if(!buildMonthsTrie(builder, USTRINGTRIE_BUILD_SMALL, trieUChars)) { + LocalPointer trie(buildMonthsTrie(USTRINGTRIE_BUILD_SMALL)); + if(trie.isNull()) { return; // buildTrie() reported an error } - UCharsTrie trie(trieUChars.getBuffer()); // Go into a linear-match node. - trie.next(u_j); - trie.next(u_a); - trie.next(u_n); - trie.next(u_u); - trie.next(u_a); + trie->next(u_j); + trie->next(u_a); + trie->next(u_n); + trie->next(u_u); + trie->next(u_a); IcuTestErrorCode errorCode(*this, "TestIteratorFromLinearMatch()"); - UCharsTrie::Iterator iter(trie, 0, errorCode); + UCharsTrie::Iterator iter(*trie, 0, errorCode); if(errorCode.logIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) { return; } @@ -629,13 +626,12 @@ void UCharsTrieTest::TestIteratorFromLinearMatch() { } void UCharsTrieTest::TestTruncatingIteratorFromRoot() { - UCharsTrieBuilder builder; - UnicodeString trieUChars; - if(!buildMonthsTrie(builder, USTRINGTRIE_BUILD_FAST, trieUChars)) { + LocalPointer trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST)); + if(trie.isNull()) { return; // buildTrie() reported an error } IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromRoot()"); - UCharsTrie::Iterator iter(trieUChars.getBuffer(), 4, errorCode); + UCharsTrie::Iterator iter(*trie, 4, errorCode); if(errorCode.logIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) { return; } @@ -681,18 +677,16 @@ void UCharsTrieTest::TestTruncatingIteratorFromLinearMatchShort() { { "abcdepq", 200 }, { "abcdeyz", 3000 } }; - UCharsTrieBuilder builder; - UnicodeString trieUChars; - if(!buildTrie(data, LENGTHOF(data), builder, USTRINGTRIE_BUILD_FAST, trieUChars)) { + LocalPointer trie(buildTrie(data, LENGTHOF(data), USTRINGTRIE_BUILD_FAST)); + if(trie.isNull()) { return; // buildTrie() reported an error } - UCharsTrie trie(trieUChars.getBuffer()); // Go into a linear-match node. - trie.next(u_a); - trie.next(u_b); + trie->next(u_a); + trie->next(u_b); IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromLinearMatchShort()"); // Truncate within the linear-match node. - UCharsTrie::Iterator iter(trie, 2, errorCode); + UCharsTrie::Iterator iter(*trie, 2, errorCode); if(errorCode.logIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) { return; } @@ -711,19 +705,17 @@ void UCharsTrieTest::TestTruncatingIteratorFromLinearMatchLong() { { "abcdepq", 200 }, { "abcdeyz", 3000 } }; - UCharsTrieBuilder builder; - UnicodeString trieUChars; - if(!buildTrie(data, LENGTHOF(data), builder, USTRINGTRIE_BUILD_FAST, trieUChars)) { + LocalPointer trie(buildTrie(data, LENGTHOF(data), USTRINGTRIE_BUILD_FAST)); + if(trie.isNull()) { return; // buildTrie() reported an error } - UCharsTrie trie(trieUChars.getBuffer()); // Go into a linear-match node. - trie.next(u_a); - trie.next(u_b); - trie.next(u_c); + trie->next(u_a); + trie->next(u_b); + trie->next(u_c); IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromLinearMatchLong()"); // Truncate after the linear-match node. - UCharsTrie::Iterator iter(trie, 3, errorCode); + UCharsTrie::Iterator iter(*trie, 3, errorCode); if(errorCode.logIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) { return; } @@ -738,6 +730,23 @@ void UCharsTrieTest::TestTruncatingIteratorFromLinearMatchLong() { checkIterator(iter.reset(), expected, LENGTHOF(expected)); } +void UCharsTrieTest::TestIteratorFromUChars() { + static const StringAndValue data[]={ + { "mm", 3 }, + { "mmm", 33 }, + { "mmnop", 333 } + }; + builder_->clear(); + IcuTestErrorCode errorCode(*this, "TestIteratorFromUChars()"); + for(int32_t i=0; iadd(data[i].s, data[i].value, errorCode); + } + UnicodeString trieUChars; + builder_->buildUnicodeString(USTRINGTRIE_BUILD_FAST, trieUChars, errorCode); + UCharsTrie::Iterator iter(trieUChars.getBuffer(), 0, errorCode); + checkIterator(iter, data, LENGTHOF(data)); +} + void UCharsTrieTest::checkData(const StringAndValue data[], int32_t dataLength) { logln("checkData(dataLength=%d, fast)", (int)dataLength); checkData(data, dataLength, USTRINGTRIE_BUILD_FAST); @@ -746,20 +755,19 @@ void UCharsTrieTest::checkData(const StringAndValue data[], int32_t dataLength) } void UCharsTrieTest::checkData(const StringAndValue data[], int32_t dataLength, UStringTrieBuildOption buildOption) { - UCharsTrieBuilder builder; - UnicodeString trieUChars; - if(!buildTrie(data, dataLength, builder, buildOption, trieUChars)) { + LocalPointer trie(buildTrie(data, dataLength, buildOption)); + if(trie.isNull()) { return; // buildTrie() reported an error } - checkFirst(trieUChars, data, dataLength); - checkNext(trieUChars, data, dataLength); - checkNextWithState(trieUChars, data, dataLength); - checkNextString(trieUChars, data, dataLength); - checkIterator(trieUChars, data, dataLength); + checkFirst(*trie, data, dataLength); + checkNext(*trie, data, dataLength); + checkNextWithState(*trie, data, dataLength); + checkNextString(*trie, data, dataLength); + checkIterator(*trie, data, dataLength); } -UBool UCharsTrieTest::buildTrie(const StringAndValue data[], int32_t dataLength, - UCharsTrieBuilder &builder, UStringTrieBuildOption buildOption, UnicodeString &result) { +UCharsTrie *UCharsTrieTest::buildTrie(const StringAndValue data[], int32_t dataLength, + UStringTrieBuildOption buildOption) { IcuTestErrorCode errorCode(*this, "buildTrie()"); // Add the items to the trie builder in an interesting (not trivial, not random) order. int32_t index, step; @@ -775,26 +783,42 @@ UBool UCharsTrieTest::buildTrie(const StringAndValue data[], int32_t dataLength, index=dataLength-1; step=-1; } - builder.clear(); + builder_->clear(); for(int32_t i=0; iadd(UnicodeString(data[index].s, -1, US_INV).unescape(), + data[index].value, errorCode); index=(index+step)%dataLength; } - builder.build(buildOption, result, errorCode); + UnicodeString trieUChars; + builder_->buildUnicodeString(buildOption, trieUChars, errorCode); + LocalPointer trie(builder_->build(buildOption, errorCode)); if(!errorCode.logIfFailureAndReset("add()/build()")) { - builder.add("zzz", 999, errorCode); + builder_->add("zzz", 999, errorCode); if(errorCode.reset()!=U_NO_WRITE_PERMISSION) { errln("builder.build().add(zzz) did not set U_NO_WRITE_PERMISSION"); } } - logln("serialized trie size: %ld UChars\n", (long)result.length()); - return errorCode.isSuccess(); + logln("serialized trie size: %ld UChars\n", (long)trieUChars.length()); + UnicodeString trieUChars2; + builder_->buildUnicodeString(buildOption, trieUChars2, errorCode); + if(trieUChars.getBuffer()==trieUChars2.getBuffer()) { + errln("builder.buildUnicodeString() before & after build() returned same array"); + } + if(errorCode.isFailure()) { + return NULL; + } + // Tries from either build() method should be identical but + // UCharsTrie does not implement equals(). + // We just return either one. + if((dataLength&1)!=0) { + return trie.orphan(); + } else { + return new UCharsTrie(trieUChars2.getBuffer()); + } } -void UCharsTrieTest::checkFirst(const UnicodeString &trieUChars, +void UCharsTrieTest::checkFirst(UCharsTrie &trie, const StringAndValue data[], int32_t dataLength) { - UCharsTrie trie(trieUChars.getBuffer()); for(int32_t i=0; i #include +#include "unicode/localpointer.h" #include "unicode/uperf.h" #include "unicode/utext.h" #include "bytestrie.h" @@ -273,6 +274,7 @@ public: BytesTriePackageLookup(const DictionaryTriePerfTest &perf) : PackageLookup(perf) { IcuToolErrorCode errorCode("BinarySearchPackageLookup()"); + builder=new BytesTrieBuilder(errorCode); int32_t count=pkg.getItemCount(); for(int32_t i=0; iadd(fullName, i, errorCode); // NUL-terminate the name for call() to find the next one. itemNames.append(0, errorCode); } - int32_t length=builder.build(USTRINGTRIE_BUILD_SMALL, errorCode).length(); + int32_t length=builder->buildStringPiece(USTRINGTRIE_BUILD_SMALL, errorCode).length(); printf("size of BytesTrie: %6ld\n", (long)length); // count+1: +1 for the last-item limit offset which we should have always had printf("size of dataOffsets:%6ld\n", (long)((count+1)*4)); printf("total index size: %6ld\n", (long)(length+(count+1)*4)); } - virtual ~BytesTriePackageLookup() {} + virtual ~BytesTriePackageLookup() { + delete builder; + } virtual void call(UErrorCode *pErrorCode) { int32_t count=pkg.getItemCount(); - const char *nameTrieBytes=builder.build(USTRINGTRIE_BUILD_SMALL, *pErrorCode).data(); + const char *nameTrieBytes=builder->buildStringPiece(USTRINGTRIE_BUILD_SMALL, *pErrorCode).data(); const char *name=itemNames.data(); for(int32_t i=0; iadd(UnicodeString(FALSE, lines[i].name, lines[i].len), 0, errorCode); } UnicodeString trieUChars; - int32_t length=builder.build(USTRINGTRIE_BUILD_SMALL, trieUChars, errorCode).length(); + int32_t length=builder->buildUnicodeString(USTRINGTRIE_BUILD_SMALL, trieUChars, errorCode).length(); printf("size of UCharsTrie: %6ld bytes\n", (long)length*2); + trie=builder->build(USTRINGTRIE_BUILD_SMALL, errorCode); } - virtual ~UCharsTrieDictLookup() {} + virtual ~UCharsTrieDictLookup() { + delete builder; + delete trie; + } protected: - UCharsTrieBuilder builder; + UCharsTrieBuilder *builder; + UCharsTrie *trie; }; class UCharsTrieDictMatches : public UCharsTrieDictLookup { @@ -478,8 +488,6 @@ public: : UCharsTrieDictLookup(perfTest) {} virtual void call(UErrorCode *pErrorCode) { - UnicodeString uchars; - UCharsTrie trie(builder.build(USTRINGTRIE_BUILD_SMALL, uchars, *pErrorCode).getBuffer()); UText text=UTEXT_INITIALIZER; int32_t lengths[20]; const ULine *lines=perf.getCachedLines(); @@ -491,7 +499,7 @@ public: } utext_openUChars(&text, lines[i].name, lines[i].len, pErrorCode); int32_t count=0; - ucharsTrieMatches(trie, &text, lines[i].len, + ucharsTrieMatches(*trie, &text, lines[i].len, lengths, count, LENGTHOF(lengths)); if(count==0 || lengths[count-1]!=lines[i].len) { fprintf(stderr, "word %ld (0-based) not found\n", (long)i); @@ -505,17 +513,15 @@ public: UCharsTrieDictContains(const DictionaryTriePerfTest &perfTest) : UCharsTrieDictLookup(perfTest) {} - virtual void call(UErrorCode *pErrorCode) { - UnicodeString uchars; - UCharsTrie trie(builder.build(USTRINGTRIE_BUILD_SMALL, uchars, *pErrorCode).getBuffer()); + virtual void call(UErrorCode * /*pErrorCode*/) { const ULine *lines=perf.getCachedLines(); int32_t numLines=perf.getNumLines(); for(int32_t i=0; ireset().next(lines[i].name, lines[i].len))) { fprintf(stderr, "word %ld (0-based) not found\n", (long)i); } } @@ -550,8 +556,9 @@ static UBool thaiWordToBytes(const UChar *s, int32_t length, class BytesTrieDictLookup : public DictLookup { public: BytesTrieDictLookup(const DictionaryTriePerfTest &perfTest) - : DictLookup(perfTest), noDict(FALSE) { + : DictLookup(perfTest), trie(NULL), noDict(FALSE) { IcuToolErrorCode errorCode("BytesTrieDictLookup()"); + builder=new BytesTrieBuilder(errorCode); CharString str; const ULine *lines=perf.getCachedLines(); int32_t numLines=perf.getNumLines(); @@ -565,18 +572,23 @@ public: noDict=TRUE; break; } - builder.add(str.toStringPiece(), 0, errorCode); + builder->add(str.toStringPiece(), 0, errorCode); } if(!noDict) { - int32_t length=builder.build(USTRINGTRIE_BUILD_SMALL, errorCode).length(); + int32_t length=builder->buildStringPiece(USTRINGTRIE_BUILD_SMALL, errorCode).length(); printf("size of BytesTrie: %6ld bytes\n", (long)length); + trie=builder->build(USTRINGTRIE_BUILD_SMALL, errorCode); } } - virtual ~BytesTrieDictLookup() {} + virtual ~BytesTrieDictLookup() { + delete builder; + delete trie; + } protected: - BytesTrieBuilder builder; + BytesTrieBuilder *builder; + BytesTrie *trie; UBool noDict; }; @@ -625,7 +637,6 @@ public: if(noDict) { return; } - BytesTrie trie(builder.build(USTRINGTRIE_BUILD_SMALL, *pErrorCode).data()); UText text=UTEXT_INITIALIZER; int32_t lengths[20]; const ULine *lines=perf.getCachedLines(); @@ -637,7 +648,7 @@ public: } utext_openUChars(&text, lines[i].name, lines[i].len, pErrorCode); int32_t count=0; - bytesTrieMatches(trie, &text, lines[i].len, + bytesTrieMatches(*trie, &text, lines[i].len, lengths, count, LENGTHOF(lengths)); if(count==0 || lengths[count-1]!=lines[i].len) { fprintf(stderr, "word %ld (0-based) not found\n", (long)i); @@ -651,11 +662,10 @@ public: BytesTrieDictContains(const DictionaryTriePerfTest &perfTest) : BytesTrieDictLookup(perfTest) {} - virtual void call(UErrorCode *pErrorCode) { + virtual void call(UErrorCode * /*pErrorCode*/) { if(noDict) { return; } - BytesTrie trie(builder.build(USTRINGTRIE_BUILD_SMALL, *pErrorCode).data()); const ULine *lines=perf.getCachedLines(); int32_t numLines=perf.getNumLines(); for(int32_t i=0; ifirst(thaiCharToByte(line[0])); int32_t lineLength=lines[i].len; for(int32_t j=1; jnext(thaiCharToByte(line[j])); } if(!USTRINGTRIE_HAS_VALUE(result)) { fprintf(stderr, "word %ld (0-based) not found\n", (long)i); diff --git a/icu4c/source/tools/toolutil/bytestriebuilder.cpp b/icu4c/source/tools/toolutil/bytestriebuilder.cpp index 7ed702b86d6..a9639bb72f6 100644 --- a/icu4c/source/tools/toolutil/bytestriebuilder.cpp +++ b/icu4c/source/tools/toolutil/bytestriebuilder.cpp @@ -121,6 +121,10 @@ BytesTrieElement::compareStringTo(const BytesTrieElement &other, const CharStrin return diff!=0 ? diff : lengthDiff; } +BytesTrieBuilder::BytesTrieBuilder(UErrorCode & /*errorCode*/) + : elements(NULL), elementsCapacity(0), elementsLength(0), + bytes(NULL), bytesCapacity(0), bytesLength(0) {} + BytesTrieBuilder::~BytesTrieBuilder() { delete[] elements; uprv_free(bytes); @@ -170,39 +174,66 @@ compareElementStrings(const void *context, const void *left, const void *right) U_CDECL_END -StringPiece +BytesTrie * BytesTrieBuilder::build(UStringTrieBuildOption buildOption, UErrorCode &errorCode) { - StringPiece result; - if(U_FAILURE(errorCode)) { - return result; - } - if(bytesLength>0) { - // Already built. - result.set(bytes+(bytesCapacity-bytesLength), bytesLength); - return result; - } - if(elementsLength==0) { - errorCode=U_INDEX_OUTOFBOUNDS_ERROR; - return result; - } - uprv_sortArray(elements, elementsLength, (int32_t)sizeof(BytesTrieElement), - compareElementStrings, &strings, - FALSE, // need not be a stable sort - &errorCode); - if(U_FAILURE(errorCode)) { - return result; - } - // Duplicate strings are not allowed. - StringPiece prev=elements[0].getString(strings); - for(int32_t i=1; i0) { + // Already built. + return; + } + if(bytesLength==0) { + if(elementsLength==0) { + errorCode=U_INDEX_OUTOFBOUNDS_ERROR; + return; + } + uprv_sortArray(elements, elementsLength, (int32_t)sizeof(BytesTrieElement), + compareElementStrings, &strings, + FALSE, // need not be a stable sort + &errorCode); + if(U_FAILURE(errorCode)) { + return; + } + // Duplicate strings are not allowed. + StringPiece prev=elements[0].getString(strings); + for(int32_t i=1; i0) { - // Already built. - result.setTo(FALSE, uchars+(ucharsCapacity-ucharsLength), ucharsLength); - return result; - } - if(elementsLength==0) { - errorCode=U_INDEX_OUTOFBOUNDS_ERROR; - return result; - } - if(strings.isBogus()) { - errorCode=U_MEMORY_ALLOCATION_ERROR; - return result; - } - uprv_sortArray(elements, elementsLength, (int32_t)sizeof(UCharsTrieElement), - compareElementStrings, &strings, - FALSE, // need not be a stable sort - &errorCode); - if(U_FAILURE(errorCode)) { - return result; - } - // Duplicate strings are not allowed. - UnicodeString prev=elements[0].getString(strings); - for(int32_t i=1; i0) { + // Already built. + return; + } + if(ucharsLength==0) { + if(elementsLength==0) { + errorCode=U_INDEX_OUTOFBOUNDS_ERROR; + return; + } + if(strings.isBogus()) { + errorCode=U_MEMORY_ALLOCATION_ERROR; + return; + } + uprv_sortArray(elements, elementsLength, (int32_t)sizeof(UCharsTrieElement), + compareElementStrings, &strings, + FALSE, // need not be a stable sort + &errorCode); + if(U_FAILURE(errorCode)) { + return; + } + // Duplicate strings are not allowed. + UnicodeString prev=elements[0].getString(strings); + for(int32_t i=1; i