mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-06 05:55:35 +00:00
ICU-8167 port trie API changes from Java
X-SVN-Rev: 29367
This commit is contained in:
parent
ff2821ca77
commit
9cc27feeb4
13 changed files with 766 additions and 382 deletions
|
@ -15,11 +15,16 @@
|
|||
#include "unicode/utypes.h"
|
||||
#include "unicode/bytestream.h"
|
||||
#include "unicode/uobject.h"
|
||||
#include "cmemory.h"
|
||||
#include "uassert.h"
|
||||
#include "bytestrie.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
BytesTrie::~BytesTrie() {
|
||||
uprv_free(ownedArray_);
|
||||
}
|
||||
|
||||
// lead byte already shifted right by 1.
|
||||
int32_t
|
||||
BytesTrie::readValue(const uint8_t *pos, int32_t leadByte) {
|
||||
|
@ -178,6 +183,9 @@ BytesTrie::next(int32_t inByte) {
|
|||
if(pos==NULL) {
|
||||
return USTRINGTRIE_NO_MATCH;
|
||||
}
|
||||
if(inByte<0) {
|
||||
inByte+=0x100;
|
||||
}
|
||||
int32_t length=remainingMatchLength_; // Actual remaining match length minus 1.
|
||||
if(length>=0) {
|
||||
// Remaining part of a linear-match node.
|
||||
|
|
|
@ -37,17 +37,47 @@ class UVector32;
|
|||
* Light-weight, non-const reader class for a BytesTrie.
|
||||
* Traverses a byte-serialized data structure with minimal state,
|
||||
* for mapping byte sequences to non-negative integer values.
|
||||
*
|
||||
* This class owns the serialized trie data only if it was constructed by
|
||||
* the builder's build() method.
|
||||
* The public constructor and the copy constructor only alias the data (only copy the pointer).
|
||||
* There is no assignment operator.
|
||||
*
|
||||
* This class is not intended for public subclassing.
|
||||
*/
|
||||
class U_COMMON_API BytesTrie : public UMemory {
|
||||
public:
|
||||
/**
|
||||
* Constructs a BytesTrie reader instance.
|
||||
* @param trieBytes The trie bytes.
|
||||
*
|
||||
* The trieBytes must contain a copy of a byte sequence from the BytesTrieBuilder,
|
||||
* starting with the first byte of that sequence.
|
||||
* The BytesTrie object will not read more bytes than
|
||||
* the BytesTrieBuilder generated in the corresponding build() call.
|
||||
*
|
||||
* The array is not copied/cloned and must not be modified while
|
||||
* the BytesTrie object is in use.
|
||||
*
|
||||
* @param trieBytes The byte array that contains the serialized trie.
|
||||
*/
|
||||
BytesTrie(const void *trieBytes)
|
||||
: bytes_(reinterpret_cast<const uint8_t *>(trieBytes)),
|
||||
: ownedArray_(NULL), bytes_(reinterpret_cast<const uint8_t *>(trieBytes)),
|
||||
pos_(bytes_), remainingMatchLength_(-1) {}
|
||||
|
||||
/**
|
||||
* Destructor.
|
||||
*/
|
||||
~BytesTrie();
|
||||
|
||||
/**
|
||||
* Copy constructor, copies the other trie reader object and its state,
|
||||
* but not the byte array which will be shared. (Shallow copy.)
|
||||
* @param Another BytesTrie object.
|
||||
*/
|
||||
BytesTrie(const BytesTrie &other)
|
||||
: ownedArray_(NULL), bytes_(other.bytes_),
|
||||
pos_(other.pos_), remainingMatchLength_(other.remainingMatchLength_) {}
|
||||
|
||||
/**
|
||||
* Resets this trie to its initial state.
|
||||
*/
|
||||
|
@ -108,15 +138,22 @@ public:
|
|||
/**
|
||||
* Traverses the trie from the initial state for this input byte.
|
||||
* Equivalent to reset().next(inByte).
|
||||
* @param inByte Input byte value. Values -0x100..-1 are treated like 0..0xff.
|
||||
* Values below -0x100 and above 0xff will never match.
|
||||
* @return The match/value Result.
|
||||
*/
|
||||
inline UStringTrieResult first(int32_t inByte) {
|
||||
remainingMatchLength_=-1;
|
||||
if(inByte<0) {
|
||||
inByte+=0x100;
|
||||
}
|
||||
return nextImpl(bytes_, inByte);
|
||||
}
|
||||
|
||||
/**
|
||||
* Traverses the trie from the current state for this input byte.
|
||||
* @param inByte Input byte value. Values -0x100..-1 are treated like 0..0xff.
|
||||
* Values below -0x100 and above 0xff will never match.
|
||||
* @return The match/value Result.
|
||||
*/
|
||||
UStringTrieResult next(int32_t inByte);
|
||||
|
@ -262,6 +299,20 @@ public:
|
|||
private:
|
||||
friend class BytesTrieBuilder;
|
||||
|
||||
/**
|
||||
* Constructs a BytesTrie reader instance.
|
||||
* Unlike the public constructor which just aliases an array,
|
||||
* this constructor adopts the builder's array.
|
||||
* This constructor is only called by the builder.
|
||||
*/
|
||||
BytesTrie(void *adoptBytes, const void *trieBytes)
|
||||
: ownedArray_(reinterpret_cast<uint8_t *>(adoptBytes)),
|
||||
bytes_(reinterpret_cast<const uint8_t *>(trieBytes)),
|
||||
pos_(bytes_), remainingMatchLength_(-1) {}
|
||||
|
||||
// No assignment operator.
|
||||
BytesTrie &operator=(const BytesTrie &other);
|
||||
|
||||
inline void stop() {
|
||||
pos_=NULL;
|
||||
}
|
||||
|
@ -407,6 +458,8 @@ private:
|
|||
static const int32_t kMaxTwoByteDelta=((kMinThreeByteDeltaLead-kMinTwoByteDeltaLead)<<8)-1; // 0x2fff
|
||||
static const int32_t kMaxThreeByteDelta=((kFourByteDeltaLead-kMinThreeByteDeltaLead)<<16)-1; // 0xdffff
|
||||
|
||||
uint8_t *ownedArray_;
|
||||
|
||||
// Fixed value referencing the BytesTrie bytes.
|
||||
const uint8_t *bytes_;
|
||||
|
||||
|
|
|
@ -31,6 +31,9 @@
|
|||
enum UStringTrieResult {
|
||||
/**
|
||||
* The input unit(s) did not continue a matching string.
|
||||
* Once current()/next() return USTRINGTRIE_NO_MATCH,
|
||||
* all further calls to current()/next() will also return USTRINGTRIE_NO_MATCH,
|
||||
* until the trie is reset to its original state or to a saved state.
|
||||
*/
|
||||
USTRINGTRIE_NO_MATCH,
|
||||
/**
|
||||
|
|
|
@ -15,6 +15,7 @@
|
|||
#include <string.h>
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/localpointer.h"
|
||||
#include "unicode/stringpiece.h"
|
||||
#include "bytestrie.h"
|
||||
#include "bytestriebuilder.h"
|
||||
|
@ -29,7 +30,7 @@ struct StringAndValue {
|
|||
|
||||
class BytesTrieTest : public IntlTest {
|
||||
public:
|
||||
BytesTrieTest() {}
|
||||
BytesTrieTest();
|
||||
virtual ~BytesTrieTest();
|
||||
|
||||
void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par=NULL);
|
||||
|
@ -44,7 +45,7 @@ public:
|
|||
void TestValuesForState();
|
||||
void TestCompact();
|
||||
|
||||
StringPiece buildMonthsTrie(BytesTrieBuilder &builder, UStringTrieBuildOption buildOption);
|
||||
BytesTrie *buildMonthsTrie(UStringTrieBuildOption buildOption);
|
||||
void TestHasUniqueValue();
|
||||
void TestGetNextBytes();
|
||||
void TestIteratorFromBranch();
|
||||
|
@ -52,24 +53,34 @@ public:
|
|||
void TestTruncatingIteratorFromRoot();
|
||||
void TestTruncatingIteratorFromLinearMatchShort();
|
||||
void TestTruncatingIteratorFromLinearMatchLong();
|
||||
void TestIteratorFromBytes();
|
||||
|
||||
void checkData(const StringAndValue data[], int32_t dataLength);
|
||||
void checkData(const StringAndValue data[], int32_t dataLength, UStringTrieBuildOption buildOption);
|
||||
StringPiece buildTrie(const StringAndValue data[], int32_t dataLength,
|
||||
BytesTrieBuilder &builder, UStringTrieBuildOption buildOption);
|
||||
void checkFirst(const StringPiece &trieBytes, const StringAndValue data[], int32_t dataLength);
|
||||
void checkNext(const StringPiece &trieBytes, const StringAndValue data[], int32_t dataLength);
|
||||
void checkNextWithState(const StringPiece &trieBytes, const StringAndValue data[], int32_t dataLength);
|
||||
void checkNextString(const StringPiece &trieBytes, const StringAndValue data[], int32_t dataLength);
|
||||
void checkIterator(const StringPiece &trieBytes, const StringAndValue data[], int32_t dataLength);
|
||||
BytesTrie *buildTrie(const StringAndValue data[], int32_t dataLength,
|
||||
UStringTrieBuildOption buildOption);
|
||||
void checkFirst(BytesTrie &trie, const StringAndValue data[], int32_t dataLength);
|
||||
void checkNext(BytesTrie &trie, const StringAndValue data[], int32_t dataLength);
|
||||
void checkNextWithState(BytesTrie &trie, const StringAndValue data[], int32_t dataLength);
|
||||
void checkNextString(BytesTrie &trie, const StringAndValue data[], int32_t dataLength);
|
||||
void checkIterator(const BytesTrie &trie, const StringAndValue data[], int32_t dataLength);
|
||||
void checkIterator(BytesTrie::Iterator &iter, const StringAndValue data[], int32_t dataLength);
|
||||
|
||||
private:
|
||||
BytesTrieBuilder *builder_;
|
||||
};
|
||||
|
||||
extern IntlTest *createBytesTrieTest() {
|
||||
return new BytesTrieTest();
|
||||
}
|
||||
|
||||
BytesTrieTest::BytesTrieTest() : builder_(NULL) {
|
||||
IcuTestErrorCode errorCode(*this, "BytesTrieTest()");
|
||||
builder_=new BytesTrieBuilder(errorCode);
|
||||
}
|
||||
|
||||
BytesTrieTest::~BytesTrieTest() {
|
||||
delete builder_;
|
||||
}
|
||||
|
||||
void BytesTrieTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) {
|
||||
|
@ -94,20 +105,22 @@ void BytesTrieTest::runIndexedTest(int32_t index, UBool exec, const char *&name,
|
|||
TESTCASE_AUTO(TestTruncatingIteratorFromRoot);
|
||||
TESTCASE_AUTO(TestTruncatingIteratorFromLinearMatchShort);
|
||||
TESTCASE_AUTO(TestTruncatingIteratorFromLinearMatchLong);
|
||||
TESTCASE_AUTO(TestIteratorFromBytes);
|
||||
TESTCASE_AUTO_END;
|
||||
}
|
||||
|
||||
void BytesTrieTest::TestBuilder() {
|
||||
IcuTestErrorCode errorCode(*this, "TestBuilder()");
|
||||
BytesTrieBuilder builder;
|
||||
builder.build(USTRINGTRIE_BUILD_FAST, errorCode);
|
||||
builder_->clear();
|
||||
delete builder_->build(USTRINGTRIE_BUILD_FAST, errorCode);
|
||||
if(errorCode.reset()!=U_INDEX_OUTOFBOUNDS_ERROR) {
|
||||
errln("BytesTrieBuilder().build() did not set U_INDEX_OUTOFBOUNDS_ERROR");
|
||||
return;
|
||||
}
|
||||
builder.add("=", 0, errorCode).add("=", 1, errorCode).build(USTRINGTRIE_BUILD_FAST, errorCode);
|
||||
// TODO: remove .build(...) once add() checks for duplicates.
|
||||
builder_->add("=", 0, errorCode).add("=", 1, errorCode).build(USTRINGTRIE_BUILD_FAST, errorCode);
|
||||
if(errorCode.reset()!=U_ILLEGAL_ARGUMENT_ERROR) {
|
||||
errln("BytesTrieBuilder.build() did not detect duplicates");
|
||||
errln("BytesTrieBuilder.add() did not detect duplicates");
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
@ -250,7 +263,7 @@ void BytesTrieTest::TestCompact() {
|
|||
checkData(data, LENGTHOF(data));
|
||||
}
|
||||
|
||||
StringPiece BytesTrieTest::buildMonthsTrie(BytesTrieBuilder &builder, UStringTrieBuildOption buildOption) {
|
||||
BytesTrie *BytesTrieTest::buildMonthsTrie(UStringTrieBuildOption buildOption) {
|
||||
// All types of nodes leading to the same value,
|
||||
// for code coverage of recursive functions.
|
||||
// In particular, we need a lot of branches on some single level
|
||||
|
@ -287,111 +300,105 @@ StringPiece BytesTrieTest::buildMonthsTrie(BytesTrieBuilder &builder, UStringTri
|
|||
{ "jun.", 6 },
|
||||
{ "june", 6 }
|
||||
};
|
||||
return buildTrie(data, LENGTHOF(data), builder, buildOption);
|
||||
return buildTrie(data, LENGTHOF(data), buildOption);
|
||||
}
|
||||
|
||||
void BytesTrieTest::TestHasUniqueValue() {
|
||||
BytesTrieBuilder builder;
|
||||
StringPiece sp=buildMonthsTrie(builder, USTRINGTRIE_BUILD_FAST);
|
||||
if(sp.empty()) {
|
||||
LocalPointer<BytesTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST));
|
||||
if(trie.isNull()) {
|
||||
return; // buildTrie() reported an error
|
||||
}
|
||||
BytesTrie trie(sp.data());
|
||||
int32_t uniqueValue;
|
||||
if(trie.hasUniqueValue(uniqueValue)) {
|
||||
if(trie->hasUniqueValue(uniqueValue)) {
|
||||
errln("unique value at root");
|
||||
}
|
||||
trie.next('j');
|
||||
trie.next('a');
|
||||
trie.next('n');
|
||||
trie->next('j');
|
||||
trie->next('a');
|
||||
trie->next('n');
|
||||
// hasUniqueValue() directly after next()
|
||||
if(!trie.hasUniqueValue(uniqueValue) || uniqueValue!=1) {
|
||||
if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=1) {
|
||||
errln("not unique value 1 after \"jan\"");
|
||||
}
|
||||
trie.first('j');
|
||||
trie.next('u');
|
||||
if(trie.hasUniqueValue(uniqueValue)) {
|
||||
trie->first('j');
|
||||
trie->next('u');
|
||||
if(trie->hasUniqueValue(uniqueValue)) {
|
||||
errln("unique value after \"ju\"");
|
||||
}
|
||||
if(trie.next('n')!=USTRINGTRIE_INTERMEDIATE_VALUE || 6!=trie.getValue()) {
|
||||
if(trie->next('n')!=USTRINGTRIE_INTERMEDIATE_VALUE || 6!=trie->getValue()) {
|
||||
errln("not normal value 6 after \"jun\"");
|
||||
}
|
||||
// hasUniqueValue() after getValue()
|
||||
if(!trie.hasUniqueValue(uniqueValue) || uniqueValue!=6) {
|
||||
if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=6) {
|
||||
errln("not unique value 6 after \"jun\"");
|
||||
}
|
||||
// hasUniqueValue() from within a linear-match node
|
||||
trie.first('a');
|
||||
trie.next('u');
|
||||
if(!trie.hasUniqueValue(uniqueValue) || uniqueValue!=8) {
|
||||
trie->first('a');
|
||||
trie->next('u');
|
||||
if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=8) {
|
||||
errln("not unique value 8 after \"au\"");
|
||||
}
|
||||
}
|
||||
|
||||
void BytesTrieTest::TestGetNextBytes() {
|
||||
BytesTrieBuilder builder;
|
||||
StringPiece sp=buildMonthsTrie(builder, USTRINGTRIE_BUILD_SMALL);
|
||||
if(sp.empty()) {
|
||||
LocalPointer<BytesTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_SMALL));
|
||||
if(trie.isNull()) {
|
||||
return; // buildTrie() reported an error
|
||||
}
|
||||
BytesTrie trie(sp.data());
|
||||
char buffer[40];
|
||||
CheckedArrayByteSink sink(buffer, LENGTHOF(buffer));
|
||||
int32_t count=trie.getNextBytes(sink);
|
||||
int32_t count=trie->getNextBytes(sink);
|
||||
if(count!=2 || sink.NumberOfBytesAppended()!=2 || buffer[0]!='a' || buffer[1]!='j') {
|
||||
errln("months getNextBytes()!=[aj] at root");
|
||||
}
|
||||
trie.next('j');
|
||||
trie.next('a');
|
||||
trie.next('n');
|
||||
trie->next('j');
|
||||
trie->next('a');
|
||||
trie->next('n');
|
||||
// getNextBytes() directly after next()
|
||||
count=trie.getNextBytes(sink.Reset());
|
||||
count=trie->getNextBytes(sink.Reset());
|
||||
buffer[count]=0;
|
||||
if(count!=20 || sink.NumberOfBytesAppended()!=20 || 0!=strcmp(buffer, ".abcdefghijklmnopqru")) {
|
||||
errln("months getNextBytes()!=[.abcdefghijklmnopqru] after \"jan\"");
|
||||
}
|
||||
// getNextBytes() after getValue()
|
||||
trie.getValue(); // next() had returned USTRINGTRIE_INTERMEDIATE_VALUE.
|
||||
trie->getValue(); // next() had returned USTRINGTRIE_INTERMEDIATE_VALUE.
|
||||
memset(buffer, 0, sizeof(buffer));
|
||||
count=trie.getNextBytes(sink.Reset());
|
||||
count=trie->getNextBytes(sink.Reset());
|
||||
if(count!=20 || sink.NumberOfBytesAppended()!=20 || 0!=strcmp(buffer, ".abcdefghijklmnopqru")) {
|
||||
errln("months getNextBytes()!=[.abcdefghijklmnopqru] after \"jan\"+getValue()");
|
||||
}
|
||||
// getNextBytes() from a linear-match node
|
||||
trie.next('u');
|
||||
trie->next('u');
|
||||
memset(buffer, 0, sizeof(buffer));
|
||||
count=trie.getNextBytes(sink.Reset());
|
||||
count=trie->getNextBytes(sink.Reset());
|
||||
if(count!=1 || sink.NumberOfBytesAppended()!=1 || buffer[0]!='a') {
|
||||
errln("months getNextBytes()!=[a] after \"janu\"");
|
||||
}
|
||||
trie.next('a');
|
||||
trie->next('a');
|
||||
memset(buffer, 0, sizeof(buffer));
|
||||
count=trie.getNextBytes(sink.Reset());
|
||||
count=trie->getNextBytes(sink.Reset());
|
||||
if(count!=1 || sink.NumberOfBytesAppended()!=1 || buffer[0]!='r') {
|
||||
errln("months getNextBytes()!=[r] after \"janua\"");
|
||||
}
|
||||
trie.next('r');
|
||||
trie.next('y');
|
||||
trie->next('r');
|
||||
trie->next('y');
|
||||
// getNextBytes() after a final match
|
||||
count=trie.getNextBytes(sink.Reset());
|
||||
count=trie->getNextBytes(sink.Reset());
|
||||
if(count!=0 || sink.NumberOfBytesAppended()!=0) {
|
||||
errln("months getNextBytes()!=[] after \"january\"");
|
||||
}
|
||||
}
|
||||
|
||||
void BytesTrieTest::TestIteratorFromBranch() {
|
||||
BytesTrieBuilder builder;
|
||||
StringPiece sp=buildMonthsTrie(builder, USTRINGTRIE_BUILD_FAST);
|
||||
if(sp.empty()) {
|
||||
LocalPointer<BytesTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST));
|
||||
if(trie.isNull()) {
|
||||
return; // buildTrie() reported an error
|
||||
}
|
||||
BytesTrie trie(sp.data());
|
||||
// Go to a branch node.
|
||||
trie.next('j');
|
||||
trie.next('a');
|
||||
trie.next('n');
|
||||
trie->next('j');
|
||||
trie->next('a');
|
||||
trie->next('n');
|
||||
IcuTestErrorCode errorCode(*this, "TestIteratorFromBranch()");
|
||||
BytesTrie::Iterator iter(trie, 0, errorCode);
|
||||
BytesTrie::Iterator iter(*trie, 0, errorCode);
|
||||
if(errorCode.logIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) {
|
||||
return;
|
||||
}
|
||||
|
@ -431,20 +438,18 @@ void BytesTrieTest::TestIteratorFromBranch() {
|
|||
}
|
||||
|
||||
void BytesTrieTest::TestIteratorFromLinearMatch() {
|
||||
BytesTrieBuilder builder;
|
||||
StringPiece sp=buildMonthsTrie(builder, USTRINGTRIE_BUILD_SMALL);
|
||||
if(sp.empty()) {
|
||||
LocalPointer<BytesTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_SMALL));
|
||||
if(trie.isNull()) {
|
||||
return; // buildTrie() reported an error
|
||||
}
|
||||
BytesTrie trie(sp.data());
|
||||
// Go into a linear-match node.
|
||||
trie.next('j');
|
||||
trie.next('a');
|
||||
trie.next('n');
|
||||
trie.next('u');
|
||||
trie.next('a');
|
||||
trie->next('j');
|
||||
trie->next('a');
|
||||
trie->next('n');
|
||||
trie->next('u');
|
||||
trie->next('a');
|
||||
IcuTestErrorCode errorCode(*this, "TestIteratorFromLinearMatch()");
|
||||
BytesTrie::Iterator iter(trie, 0, errorCode);
|
||||
BytesTrie::Iterator iter(*trie, 0, errorCode);
|
||||
if(errorCode.logIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) {
|
||||
return;
|
||||
}
|
||||
|
@ -461,13 +466,12 @@ void BytesTrieTest::TestIteratorFromLinearMatch() {
|
|||
}
|
||||
|
||||
void BytesTrieTest::TestTruncatingIteratorFromRoot() {
|
||||
BytesTrieBuilder builder;
|
||||
StringPiece sp=buildMonthsTrie(builder, USTRINGTRIE_BUILD_FAST);
|
||||
if(sp.empty()) {
|
||||
LocalPointer<BytesTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST));
|
||||
if(trie.isNull()) {
|
||||
return; // buildTrie() reported an error
|
||||
}
|
||||
IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromRoot()");
|
||||
BytesTrie::Iterator iter(sp.data(), 4, errorCode);
|
||||
BytesTrie::Iterator iter(*trie, 4, errorCode);
|
||||
if(errorCode.logIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) {
|
||||
return;
|
||||
}
|
||||
|
@ -513,18 +517,16 @@ void BytesTrieTest::TestTruncatingIteratorFromLinearMatchShort() {
|
|||
{ "abcdepq", 200 },
|
||||
{ "abcdeyz", 3000 }
|
||||
};
|
||||
BytesTrieBuilder builder;
|
||||
StringPiece sp=buildTrie(data, LENGTHOF(data), builder, USTRINGTRIE_BUILD_FAST);
|
||||
if(sp.empty()) {
|
||||
LocalPointer<BytesTrie> trie(buildTrie(data, LENGTHOF(data), USTRINGTRIE_BUILD_FAST));
|
||||
if(trie.isNull()) {
|
||||
return; // buildTrie() reported an error
|
||||
}
|
||||
BytesTrie trie(sp.data());
|
||||
// Go into a linear-match node.
|
||||
trie.next('a');
|
||||
trie.next('b');
|
||||
trie->next('a');
|
||||
trie->next('b');
|
||||
IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromLinearMatchShort()");
|
||||
// Truncate within the linear-match node.
|
||||
BytesTrie::Iterator iter(trie, 2, errorCode);
|
||||
BytesTrie::Iterator iter(*trie, 2, errorCode);
|
||||
if(errorCode.logIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) {
|
||||
return;
|
||||
}
|
||||
|
@ -543,19 +545,17 @@ void BytesTrieTest::TestTruncatingIteratorFromLinearMatchLong() {
|
|||
{ "abcdepq", 200 },
|
||||
{ "abcdeyz", 3000 }
|
||||
};
|
||||
BytesTrieBuilder builder;
|
||||
StringPiece sp=buildTrie(data, LENGTHOF(data), builder, USTRINGTRIE_BUILD_FAST);
|
||||
if(sp.empty()) {
|
||||
LocalPointer<BytesTrie> trie(buildTrie(data, LENGTHOF(data), USTRINGTRIE_BUILD_FAST));
|
||||
if(trie.isNull()) {
|
||||
return; // buildTrie() reported an error
|
||||
}
|
||||
BytesTrie trie(sp.data());
|
||||
// Go into a linear-match node.
|
||||
trie.next('a');
|
||||
trie.next('b');
|
||||
trie.next('c');
|
||||
trie->next('a');
|
||||
trie->next('b');
|
||||
trie->next('c');
|
||||
IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromLinearMatchLong()");
|
||||
// Truncate after the linear-match node.
|
||||
BytesTrie::Iterator iter(trie, 3, errorCode);
|
||||
BytesTrie::Iterator iter(*trie, 3, errorCode);
|
||||
if(errorCode.logIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) {
|
||||
return;
|
||||
}
|
||||
|
@ -570,6 +570,22 @@ void BytesTrieTest::TestTruncatingIteratorFromLinearMatchLong() {
|
|||
checkIterator(iter.reset(), expected, LENGTHOF(expected));
|
||||
}
|
||||
|
||||
void BytesTrieTest::TestIteratorFromBytes() {
|
||||
static const StringAndValue data[]={
|
||||
{ "mm", 3 },
|
||||
{ "mmm", 33 },
|
||||
{ "mmnop", 333 }
|
||||
};
|
||||
builder_->clear();
|
||||
IcuTestErrorCode errorCode(*this, "TestIteratorFromBytes()");
|
||||
for(int32_t i=0; i<LENGTHOF(data); ++i) {
|
||||
builder_->add(data[i].s, data[i].value, errorCode);
|
||||
}
|
||||
StringPiece trieBytes=builder_->buildStringPiece(USTRINGTRIE_BUILD_FAST, errorCode);
|
||||
BytesTrie::Iterator iter(trieBytes.data(), 0, errorCode);
|
||||
checkIterator(iter, data, LENGTHOF(data));
|
||||
}
|
||||
|
||||
void BytesTrieTest::checkData(const StringAndValue data[], int32_t dataLength) {
|
||||
logln("checkData(dataLength=%d, fast)", (int)dataLength);
|
||||
checkData(data, dataLength, USTRINGTRIE_BUILD_FAST);
|
||||
|
@ -578,20 +594,19 @@ void BytesTrieTest::checkData(const StringAndValue data[], int32_t dataLength) {
|
|||
}
|
||||
|
||||
void BytesTrieTest::checkData(const StringAndValue data[], int32_t dataLength, UStringTrieBuildOption buildOption) {
|
||||
BytesTrieBuilder builder;
|
||||
StringPiece sp=buildTrie(data, dataLength, builder, buildOption);
|
||||
if(sp.empty()) {
|
||||
LocalPointer<BytesTrie> trie(buildTrie(data, dataLength, buildOption));
|
||||
if(trie.isNull()) {
|
||||
return; // buildTrie() reported an error
|
||||
}
|
||||
checkFirst(sp, data, dataLength);
|
||||
checkNext(sp, data, dataLength);
|
||||
checkNextWithState(sp, data, dataLength);
|
||||
checkNextString(sp, data, dataLength);
|
||||
checkIterator(sp, data, dataLength);
|
||||
checkFirst(*trie, data, dataLength);
|
||||
checkNext(*trie, data, dataLength);
|
||||
checkNextWithState(*trie, data, dataLength);
|
||||
checkNextString(*trie, data, dataLength);
|
||||
checkIterator(*trie, data, dataLength);
|
||||
}
|
||||
|
||||
StringPiece BytesTrieTest::buildTrie(const StringAndValue data[], int32_t dataLength,
|
||||
BytesTrieBuilder &builder, UStringTrieBuildOption buildOption) {
|
||||
BytesTrie *BytesTrieTest::buildTrie(const StringAndValue data[], int32_t dataLength,
|
||||
UStringTrieBuildOption buildOption) {
|
||||
IcuTestErrorCode errorCode(*this, "buildTrie()");
|
||||
// Add the items to the trie builder in an interesting (not trivial, not random) order.
|
||||
int32_t index, step;
|
||||
|
@ -607,47 +622,61 @@ StringPiece BytesTrieTest::buildTrie(const StringAndValue data[], int32_t dataLe
|
|||
index=dataLength-1;
|
||||
step=-1;
|
||||
}
|
||||
builder.clear();
|
||||
builder_->clear();
|
||||
for(int32_t i=0; i<dataLength; ++i) {
|
||||
builder.add(data[index].s, data[index].value, errorCode);
|
||||
builder_->add(data[index].s, data[index].value, errorCode);
|
||||
index=(index+step)%dataLength;
|
||||
}
|
||||
StringPiece sp(builder.build(buildOption, errorCode));
|
||||
StringPiece sp=builder_->buildStringPiece(buildOption, errorCode);
|
||||
LocalPointer<BytesTrie> trie(builder_->build(buildOption, errorCode));
|
||||
if(!errorCode.logIfFailureAndReset("add()/build()")) {
|
||||
builder.add("zzz", 999, errorCode);
|
||||
builder_->add("zzz", 999, errorCode);
|
||||
if(errorCode.reset()!=U_NO_WRITE_PERMISSION) {
|
||||
errln("builder.build().add(zzz) did not set U_NO_WRITE_PERMISSION");
|
||||
}
|
||||
}
|
||||
logln("serialized trie size: %ld bytes\n", (long)sp.length());
|
||||
return sp;
|
||||
StringPiece sp2=builder_->buildStringPiece(buildOption, errorCode);
|
||||
if(sp.data()==sp2.data()) {
|
||||
errln("builder.buildStringPiece() before & after build() returned same array");
|
||||
}
|
||||
if(errorCode.isFailure()) {
|
||||
return NULL;
|
||||
}
|
||||
// Tries from either build() method should be identical but
|
||||
// BytesTrie does not implement equals().
|
||||
// We just return either one.
|
||||
if((dataLength&1)!=0) {
|
||||
return trie.orphan();
|
||||
} else {
|
||||
return new BytesTrie(sp2.data());
|
||||
}
|
||||
}
|
||||
|
||||
void BytesTrieTest::checkFirst(const StringPiece &trieBytes,
|
||||
void BytesTrieTest::checkFirst(BytesTrie &trie,
|
||||
const StringAndValue data[], int32_t dataLength) {
|
||||
BytesTrie trie(trieBytes.data());
|
||||
for(int32_t i=0; i<dataLength; ++i) {
|
||||
int c=(uint8_t)*data[i].s;
|
||||
int c=*data[i].s;
|
||||
if(c==0) {
|
||||
continue; // skip empty string
|
||||
}
|
||||
UStringTrieResult firstResult=trie.first(c);
|
||||
int32_t firstValue=USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1;
|
||||
UStringTrieResult nextResult=trie.next((uint8_t)data[i].s[1]);
|
||||
UStringTrieResult nextResult=trie.next(data[i].s[1]);
|
||||
if(firstResult!=trie.reset().next(c) ||
|
||||
firstResult!=trie.current() ||
|
||||
firstValue!=(USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1) ||
|
||||
nextResult!=trie.next((uint8_t)data[i].s[1])
|
||||
nextResult!=trie.next(data[i].s[1])
|
||||
) {
|
||||
errln("trie.first(%c)!=trie.reset().next(same) for %s",
|
||||
c, data[i].s);
|
||||
}
|
||||
}
|
||||
trie.reset();
|
||||
}
|
||||
|
||||
void BytesTrieTest::checkNext(const StringPiece &trieBytes,
|
||||
void BytesTrieTest::checkNext(BytesTrie &trie,
|
||||
const StringAndValue data[], int32_t dataLength) {
|
||||
BytesTrie trie(trieBytes.data());
|
||||
BytesTrie::State state;
|
||||
for(int32_t i=0; i<dataLength; ++i) {
|
||||
int32_t stringLength= (i&1) ? -1 : strlen(data[i].s);
|
||||
|
@ -715,9 +744,8 @@ void BytesTrieTest::checkNext(const StringPiece &trieBytes,
|
|||
}
|
||||
}
|
||||
|
||||
void BytesTrieTest::checkNextWithState(const StringPiece &trieBytes,
|
||||
void BytesTrieTest::checkNextWithState(BytesTrie &trie,
|
||||
const StringAndValue data[], int32_t dataLength) {
|
||||
BytesTrie trie(trieBytes.data());
|
||||
BytesTrie::State noState, state;
|
||||
for(int32_t i=0; i<dataLength; ++i) {
|
||||
if((i&1)==0) {
|
||||
|
@ -776,9 +804,8 @@ void BytesTrieTest::checkNextWithState(const StringPiece &trieBytes,
|
|||
|
||||
// next(string) is also tested in other functions,
|
||||
// but here we try to go partway through the string, and then beyond it.
|
||||
void BytesTrieTest::checkNextString(const StringPiece &trieBytes,
|
||||
void BytesTrieTest::checkNextString(BytesTrie &trie,
|
||||
const StringAndValue data[], int32_t dataLength) {
|
||||
BytesTrie trie(trieBytes.data());
|
||||
for(int32_t i=0; i<dataLength; ++i) {
|
||||
const char *expectedString=data[i].s;
|
||||
int32_t stringLength=strlen(expectedString);
|
||||
|
@ -794,11 +821,11 @@ void BytesTrieTest::checkNextString(const StringPiece &trieBytes,
|
|||
}
|
||||
}
|
||||
|
||||
void BytesTrieTest::checkIterator(const StringPiece &trieBytes,
|
||||
void BytesTrieTest::checkIterator(const BytesTrie &trie,
|
||||
const StringAndValue data[], int32_t dataLength) {
|
||||
IcuTestErrorCode errorCode(*this, "checkIterator()");
|
||||
BytesTrie::Iterator iter(trieBytes.data(), 0, errorCode);
|
||||
if(errorCode.logIfFailureAndReset("BytesTrie::Iterator(trieBytes) constructor")) {
|
||||
BytesTrie::Iterator iter(trie, 0, errorCode);
|
||||
if(errorCode.logIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) {
|
||||
return;
|
||||
}
|
||||
checkIterator(iter, data, dataLength);
|
||||
|
|
|
@ -15,6 +15,7 @@
|
|||
#include <string.h>
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/localpointer.h"
|
||||
#include "unicode/uniset.h"
|
||||
#include "ucharstrie.h"
|
||||
#include "ucharstriebuilder.h"
|
||||
|
@ -29,7 +30,7 @@ struct StringAndValue {
|
|||
|
||||
class UCharsTrieTest : public IntlTest {
|
||||
public:
|
||||
UCharsTrieTest() {}
|
||||
UCharsTrieTest();
|
||||
virtual ~UCharsTrieTest();
|
||||
|
||||
void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par=NULL);
|
||||
|
@ -46,11 +47,10 @@ public:
|
|||
void TestFirstForCodePoint();
|
||||
void TestNextForCodePoint();
|
||||
|
||||
UBool buildLargeTrie(UCharsTrieBuilder &builder, UnicodeString &result, int32_t numUniqueFirst);
|
||||
UCharsTrie *buildLargeTrie(int32_t numUniqueFirst);
|
||||
void TestLargeTrie();
|
||||
|
||||
UBool buildMonthsTrie(UCharsTrieBuilder &builder, UStringTrieBuildOption buildOption,
|
||||
UnicodeString &result);
|
||||
UCharsTrie *buildMonthsTrie(UStringTrieBuildOption buildOption);
|
||||
void TestHasUniqueValue();
|
||||
void TestGetNextUChars();
|
||||
void TestIteratorFromBranch();
|
||||
|
@ -58,24 +58,34 @@ public:
|
|||
void TestTruncatingIteratorFromRoot();
|
||||
void TestTruncatingIteratorFromLinearMatchShort();
|
||||
void TestTruncatingIteratorFromLinearMatchLong();
|
||||
void TestIteratorFromUChars();
|
||||
|
||||
void checkData(const StringAndValue data[], int32_t dataLength);
|
||||
void checkData(const StringAndValue data[], int32_t dataLength, UStringTrieBuildOption buildOption);
|
||||
UBool buildTrie(const StringAndValue data[], int32_t dataLength,
|
||||
UCharsTrieBuilder &builder, UStringTrieBuildOption buildOption, UnicodeString &result);
|
||||
void checkFirst(const UnicodeString &trieUChars, const StringAndValue data[], int32_t dataLength);
|
||||
void checkNext(const UnicodeString &trieUChars, const StringAndValue data[], int32_t dataLength);
|
||||
void checkNextWithState(const UnicodeString &trieUChars, const StringAndValue data[], int32_t dataLength);
|
||||
void checkNextString(const UnicodeString &trieUChars, const StringAndValue data[], int32_t dataLength);
|
||||
void checkIterator(const UnicodeString &trieUChars, const StringAndValue data[], int32_t dataLength);
|
||||
UCharsTrie *buildTrie(const StringAndValue data[], int32_t dataLength,
|
||||
UStringTrieBuildOption buildOption);
|
||||
void checkFirst(UCharsTrie &trie, const StringAndValue data[], int32_t dataLength);
|
||||
void checkNext(UCharsTrie &trie, const StringAndValue data[], int32_t dataLength);
|
||||
void checkNextWithState(UCharsTrie &trie, const StringAndValue data[], int32_t dataLength);
|
||||
void checkNextString(UCharsTrie &trie, const StringAndValue data[], int32_t dataLength);
|
||||
void checkIterator(UCharsTrie &trie, const StringAndValue data[], int32_t dataLength);
|
||||
void checkIterator(UCharsTrie::Iterator &iter, const StringAndValue data[], int32_t dataLength);
|
||||
|
||||
private:
|
||||
UCharsTrieBuilder *builder_;
|
||||
};
|
||||
|
||||
extern IntlTest *createUCharsTrieTest() {
|
||||
return new UCharsTrieTest();
|
||||
}
|
||||
|
||||
UCharsTrieTest::UCharsTrieTest() : builder_(NULL) {
|
||||
IcuTestErrorCode errorCode(*this, "UCharsTrieTest()");
|
||||
builder_=new UCharsTrieBuilder(errorCode);
|
||||
}
|
||||
|
||||
UCharsTrieTest::~UCharsTrieTest() {
|
||||
delete builder_;
|
||||
}
|
||||
|
||||
void UCharsTrieTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) {
|
||||
|
@ -103,21 +113,21 @@ void UCharsTrieTest::runIndexedTest(int32_t index, UBool exec, const char *&name
|
|||
TESTCASE_AUTO(TestTruncatingIteratorFromRoot);
|
||||
TESTCASE_AUTO(TestTruncatingIteratorFromLinearMatchShort);
|
||||
TESTCASE_AUTO(TestTruncatingIteratorFromLinearMatchLong);
|
||||
TESTCASE_AUTO(TestIteratorFromUChars);
|
||||
TESTCASE_AUTO_END;
|
||||
}
|
||||
|
||||
void UCharsTrieTest::TestBuilder() {
|
||||
IcuTestErrorCode errorCode(*this, "TestBuilder()");
|
||||
UCharsTrieBuilder builder;
|
||||
UnicodeString trieUChars;
|
||||
builder.build(USTRINGTRIE_BUILD_FAST, trieUChars, errorCode);
|
||||
delete builder_->build(USTRINGTRIE_BUILD_FAST, errorCode);
|
||||
if(errorCode.reset()!=U_INDEX_OUTOFBOUNDS_ERROR) {
|
||||
errln("UCharsTrieBuilder().build() did not set U_INDEX_OUTOFBOUNDS_ERROR");
|
||||
return;
|
||||
}
|
||||
builder.add("=", 0, errorCode).add("=", 1, errorCode).build(USTRINGTRIE_BUILD_FAST, trieUChars, errorCode);
|
||||
// TODO: remove .build(...) once add() checks for duplicates.
|
||||
builder_->add("=", 0, errorCode).add("=", 1, errorCode).build(USTRINGTRIE_BUILD_FAST, errorCode);
|
||||
if(errorCode.reset()!=U_ILLEGAL_ARGUMENT_ERROR) {
|
||||
errln("UCharsTrieBuilder.build() did not detect duplicates");
|
||||
errln("UCharsTrieBuilder.add() did not detect duplicates");
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
@ -281,41 +291,39 @@ void UCharsTrieTest::TestNextForCodePoint() {
|
|||
{ "\\u4dff\\U00010000\\u9999\\U00020002", 44444 },
|
||||
{ "\\u4dff\\U000103ff", 99999 }
|
||||
};
|
||||
UCharsTrieBuilder builder;
|
||||
UnicodeString trieUChars;
|
||||
if(!buildTrie(data, LENGTHOF(data), builder, USTRINGTRIE_BUILD_FAST, trieUChars)) {
|
||||
LocalPointer<UCharsTrie> trie(buildTrie(data, LENGTHOF(data), USTRINGTRIE_BUILD_FAST));
|
||||
if(trie.isNull()) {
|
||||
return; // buildTrie() reported an error
|
||||
}
|
||||
UCharsTrie trie(trieUChars.getBuffer());
|
||||
UStringTrieResult result;
|
||||
if( (result=trie.nextForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE || result!=trie.current() ||
|
||||
(result=trie.nextForCodePoint(0x10000))!=USTRINGTRIE_NO_VALUE || result!=trie.current() ||
|
||||
(result=trie.nextForCodePoint(0x9999))!=USTRINGTRIE_NO_VALUE || result!=trie.current() ||
|
||||
(result=trie.nextForCodePoint(0x20000))!=USTRINGTRIE_NO_VALUE || result!=trie.current() ||
|
||||
(result=trie.nextForCodePoint(0xdfff))!=USTRINGTRIE_NO_VALUE || result!=trie.current() ||
|
||||
(result=trie.nextForCodePoint(0x10ffff))!=USTRINGTRIE_FINAL_VALUE || result!=trie.current() ||
|
||||
trie.getValue()!=2000000000
|
||||
if( (result=trie->nextForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
|
||||
(result=trie->nextForCodePoint(0x10000))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
|
||||
(result=trie->nextForCodePoint(0x9999))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
|
||||
(result=trie->nextForCodePoint(0x20000))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
|
||||
(result=trie->nextForCodePoint(0xdfff))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
|
||||
(result=trie->nextForCodePoint(0x10ffff))!=USTRINGTRIE_FINAL_VALUE || result!=trie->current() ||
|
||||
trie->getValue()!=2000000000
|
||||
) {
|
||||
errln("UCharsTrie.nextForCodePoint() fails for %s", data[0].s);
|
||||
}
|
||||
if( (result=trie.firstForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE || result!=trie.current() ||
|
||||
(result=trie.nextForCodePoint(0x10000))!=USTRINGTRIE_NO_VALUE || result!=trie.current() ||
|
||||
(result=trie.nextForCodePoint(0x9999))!=USTRINGTRIE_NO_VALUE || result!=trie.current() ||
|
||||
(result=trie.nextForCodePoint(0x20002))!=USTRINGTRIE_FINAL_VALUE || result!=trie.current() ||
|
||||
trie.getValue()!=44444
|
||||
if( (result=trie->firstForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
|
||||
(result=trie->nextForCodePoint(0x10000))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
|
||||
(result=trie->nextForCodePoint(0x9999))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
|
||||
(result=trie->nextForCodePoint(0x20002))!=USTRINGTRIE_FINAL_VALUE || result!=trie->current() ||
|
||||
trie->getValue()!=44444
|
||||
) {
|
||||
errln("UCharsTrie.nextForCodePoint() fails for %s", data[1].s);
|
||||
}
|
||||
if( (result=trie.reset().nextForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE || result!=trie.current() ||
|
||||
(result=trie.nextForCodePoint(0x10000))!=USTRINGTRIE_NO_VALUE || result!=trie.current() ||
|
||||
(result=trie.nextForCodePoint(0x9999))!=USTRINGTRIE_NO_VALUE || result!=trie.current() ||
|
||||
(result=trie.nextForCodePoint(0x20222))!=USTRINGTRIE_NO_MATCH || result!=trie.current() // no match for trail surrogate
|
||||
if( (result=trie->reset().nextForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
|
||||
(result=trie->nextForCodePoint(0x10000))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
|
||||
(result=trie->nextForCodePoint(0x9999))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
|
||||
(result=trie->nextForCodePoint(0x20222))!=USTRINGTRIE_NO_MATCH || result!=trie->current() // no match for trail surrogate
|
||||
) {
|
||||
errln("UCharsTrie.nextForCodePoint() fails for \\u4dff\\U00010000\\u9999\\U00020222");
|
||||
}
|
||||
if( (result=trie.reset().nextForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE || result!=trie.current() ||
|
||||
(result=trie.nextForCodePoint(0x103ff))!=USTRINGTRIE_FINAL_VALUE || result!=trie.current() ||
|
||||
trie.getValue()!=99999
|
||||
if( (result=trie->reset().nextForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
|
||||
(result=trie->nextForCodePoint(0x103ff))!=USTRINGTRIE_FINAL_VALUE || result!=trie->current() ||
|
||||
trie->getValue()!=99999
|
||||
) {
|
||||
errln("UCharsTrie.nextForCodePoint() fails for %s", data[2].s);
|
||||
}
|
||||
|
@ -355,43 +363,41 @@ private:
|
|||
|
||||
} // end namespace
|
||||
|
||||
UBool UCharsTrieTest::buildLargeTrie(UCharsTrieBuilder &builder, UnicodeString &result,
|
||||
int32_t numUniqueFirst) {
|
||||
UCharsTrie *UCharsTrieTest::buildLargeTrie(int32_t numUniqueFirst) {
|
||||
IcuTestErrorCode errorCode(*this, "buildLargeTrie()");
|
||||
Generator gen;
|
||||
builder.clear();
|
||||
builder_->clear();
|
||||
while(gen.countUniqueFirstChars()<numUniqueFirst) {
|
||||
builder.add(gen.getString(), gen.getValue(), errorCode);
|
||||
builder_->add(gen.getString(), gen.getValue(), errorCode);
|
||||
gen.next();
|
||||
}
|
||||
infoln("buildLargeTrie(%ld) added %ld strings", (long)numUniqueFirst, (long)gen.getIndex());
|
||||
builder.build(USTRINGTRIE_BUILD_FAST, result, errorCode);
|
||||
logln("serialized trie size: %ld UChars\n", (long)result.length());
|
||||
return errorCode.isSuccess();
|
||||
UnicodeString trieUChars;
|
||||
builder_->buildUnicodeString(USTRINGTRIE_BUILD_FAST, trieUChars, errorCode);
|
||||
logln("serialized trie size: %ld UChars\n", (long)trieUChars.length());
|
||||
return new UCharsTrie(trieUChars.getBuffer());
|
||||
}
|
||||
|
||||
// Exercise a large branch node.
|
||||
void UCharsTrieTest::TestLargeTrie() {
|
||||
UCharsTrieBuilder builder;
|
||||
UnicodeString trieUChars;
|
||||
if(!buildLargeTrie(builder, trieUChars, 1111)) {
|
||||
LocalPointer<UCharsTrie> trie(buildLargeTrie(1111));
|
||||
if(trie.isNull()) {
|
||||
return; // buildTrie() reported an error
|
||||
}
|
||||
UCharsTrie trie(trieUChars.getBuffer());
|
||||
Generator gen;
|
||||
while(gen.countUniqueFirstChars()<1111) {
|
||||
UnicodeString x(gen.getString());
|
||||
int32_t value=gen.getValue();
|
||||
if(!x.isEmpty()) {
|
||||
if(trie.first(x[0])==USTRINGTRIE_NO_MATCH) {
|
||||
if(trie->first(x[0])==USTRINGTRIE_NO_MATCH) {
|
||||
errln("first(first char U+%04X)=USTRINGTRIE_NO_MATCH for string %ld\n",
|
||||
x[0], (long)gen.getIndex());
|
||||
break;
|
||||
}
|
||||
x.remove(0, 1);
|
||||
}
|
||||
UStringTrieResult result=trie.next(x.getBuffer(), x.length());
|
||||
if(!USTRINGTRIE_HAS_VALUE(result) || result!=trie.current() || value!=trie.getValue()) {
|
||||
UStringTrieResult result=trie->next(x.getBuffer(), x.length());
|
||||
if(!USTRINGTRIE_HAS_VALUE(result) || result!=trie->current() || value!=trie->getValue()) {
|
||||
errln("next(%d chars U+%04X U+%04X)!=hasValue or "
|
||||
"next()!=current() or getValue() wrong "
|
||||
"for string %ld\n", (int)x.length(), x[0], x[1], (long)gen.getIndex());
|
||||
|
@ -412,8 +418,7 @@ enum {
|
|||
u_y=0x79
|
||||
};
|
||||
|
||||
UBool UCharsTrieTest::buildMonthsTrie(UCharsTrieBuilder &builder, UStringTrieBuildOption buildOption,
|
||||
UnicodeString &result) {
|
||||
UCharsTrie *UCharsTrieTest::buildMonthsTrie(UStringTrieBuildOption buildOption) {
|
||||
// All types of nodes leading to the same value,
|
||||
// for code coverage of recursive functions.
|
||||
// In particular, we need a lot of branches on some single level
|
||||
|
@ -450,43 +455,41 @@ UBool UCharsTrieTest::buildMonthsTrie(UCharsTrieBuilder &builder, UStringTrieBui
|
|||
{ "jun.", 6 },
|
||||
{ "june", 6 }
|
||||
};
|
||||
return buildTrie(data, LENGTHOF(data), builder, buildOption, result);
|
||||
return buildTrie(data, LENGTHOF(data), buildOption);
|
||||
}
|
||||
|
||||
void UCharsTrieTest::TestHasUniqueValue() {
|
||||
UCharsTrieBuilder builder;
|
||||
UnicodeString trieUChars;
|
||||
if(!buildMonthsTrie(builder, USTRINGTRIE_BUILD_FAST, trieUChars)) {
|
||||
LocalPointer<UCharsTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST));
|
||||
if(trie.isNull()) {
|
||||
return; // buildTrie() reported an error
|
||||
}
|
||||
UCharsTrie trie(trieUChars.getBuffer());
|
||||
int32_t uniqueValue;
|
||||
if(trie.hasUniqueValue(uniqueValue)) {
|
||||
if(trie->hasUniqueValue(uniqueValue)) {
|
||||
errln("unique value at root");
|
||||
}
|
||||
trie.next(u_j);
|
||||
trie.next(u_a);
|
||||
trie.next(u_n);
|
||||
trie->next(u_j);
|
||||
trie->next(u_a);
|
||||
trie->next(u_n);
|
||||
// hasUniqueValue() directly after next()
|
||||
if(!trie.hasUniqueValue(uniqueValue) || uniqueValue!=1) {
|
||||
if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=1) {
|
||||
errln("not unique value 1 after \"jan\"");
|
||||
}
|
||||
trie.first(u_j);
|
||||
trie.next(u_u);
|
||||
if(trie.hasUniqueValue(uniqueValue)) {
|
||||
trie->first(u_j);
|
||||
trie->next(u_u);
|
||||
if(trie->hasUniqueValue(uniqueValue)) {
|
||||
errln("unique value after \"ju\"");
|
||||
}
|
||||
if(trie.next(u_n)!=USTRINGTRIE_INTERMEDIATE_VALUE || 6!=trie.getValue()) {
|
||||
if(trie->next(u_n)!=USTRINGTRIE_INTERMEDIATE_VALUE || 6!=trie->getValue()) {
|
||||
errln("not normal value 6 after \"jun\"");
|
||||
}
|
||||
// hasUniqueValue() after getValue()
|
||||
if(!trie.hasUniqueValue(uniqueValue) || uniqueValue!=6) {
|
||||
if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=6) {
|
||||
errln("not unique value 6 after \"jun\"");
|
||||
}
|
||||
// hasUniqueValue() from within a linear-match node
|
||||
trie.first(u_a);
|
||||
trie.next(u_u);
|
||||
if(!trie.hasUniqueValue(uniqueValue) || uniqueValue!=8) {
|
||||
trie->first(u_a);
|
||||
trie->next(u_u);
|
||||
if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=8) {
|
||||
errln("not unique value 8 after \"au\"");
|
||||
}
|
||||
}
|
||||
|
@ -501,65 +504,61 @@ private:
|
|||
};
|
||||
|
||||
void UCharsTrieTest::TestGetNextUChars() {
|
||||
UCharsTrieBuilder builder;
|
||||
UnicodeString trieUChars;
|
||||
if(!buildMonthsTrie(builder, USTRINGTRIE_BUILD_SMALL, trieUChars)) {
|
||||
LocalPointer<UCharsTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_SMALL));
|
||||
if(trie.isNull()) {
|
||||
return; // buildTrie() reported an error
|
||||
}
|
||||
UCharsTrie trie(trieUChars.getBuffer());
|
||||
UnicodeString buffer;
|
||||
UnicodeStringAppendable app(buffer);
|
||||
int32_t count=trie.getNextUChars(app);
|
||||
int32_t count=trie->getNextUChars(app);
|
||||
if(count!=2 || buffer.length()!=2 || buffer[0]!=u_a || buffer[1]!=u_j) {
|
||||
errln("months getNextUChars()!=[aj] at root");
|
||||
}
|
||||
trie.next(u_j);
|
||||
trie.next(u_a);
|
||||
trie.next(u_n);
|
||||
trie->next(u_j);
|
||||
trie->next(u_a);
|
||||
trie->next(u_n);
|
||||
// getNextUChars() directly after next()
|
||||
count=trie.getNextUChars(app.reset());
|
||||
count=trie->getNextUChars(app.reset());
|
||||
if(count!=20 || buffer!=UNICODE_STRING_SIMPLE(".abcdefghijklmnopqru")) {
|
||||
errln("months getNextUChars()!=[.abcdefghijklmnopqru] after \"jan\"");
|
||||
}
|
||||
// getNextUChars() after getValue()
|
||||
trie.getValue(); // next() had returned USTRINGTRIE_INTERMEDIATE_VALUE.
|
||||
count=trie.getNextUChars(app.reset());
|
||||
trie->getValue(); // next() had returned USTRINGTRIE_INTERMEDIATE_VALUE.
|
||||
count=trie->getNextUChars(app.reset());
|
||||
if(count!=20 || buffer!=UNICODE_STRING_SIMPLE(".abcdefghijklmnopqru")) {
|
||||
errln("months getNextUChars()!=[.abcdefghijklmnopqru] after \"jan\"+getValue()");
|
||||
}
|
||||
// getNextUChars() from a linear-match node
|
||||
trie.next(u_u);
|
||||
count=trie.getNextUChars(app.reset());
|
||||
trie->next(u_u);
|
||||
count=trie->getNextUChars(app.reset());
|
||||
if(count!=1 || buffer.length()!=1 || buffer[0]!=u_a) {
|
||||
errln("months getNextUChars()!=[a] after \"janu\"");
|
||||
}
|
||||
trie.next(u_a);
|
||||
count=trie.getNextUChars(app.reset());
|
||||
trie->next(u_a);
|
||||
count=trie->getNextUChars(app.reset());
|
||||
if(count!=1 || buffer.length()!=1 || buffer[0]!=u_r) {
|
||||
errln("months getNextUChars()!=[r] after \"janua\"");
|
||||
}
|
||||
trie.next(u_r);
|
||||
trie.next(u_y);
|
||||
trie->next(u_r);
|
||||
trie->next(u_y);
|
||||
// getNextUChars() after a final match
|
||||
count=trie.getNextUChars(app.reset());
|
||||
count=trie->getNextUChars(app.reset());
|
||||
if(count!=0 || buffer.length()!=0) {
|
||||
errln("months getNextUChars()!=[] after \"january\"");
|
||||
}
|
||||
}
|
||||
|
||||
void UCharsTrieTest::TestIteratorFromBranch() {
|
||||
UCharsTrieBuilder builder;
|
||||
UnicodeString trieUChars;
|
||||
if(!buildMonthsTrie(builder, USTRINGTRIE_BUILD_FAST, trieUChars)) {
|
||||
LocalPointer<UCharsTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST));
|
||||
if(trie.isNull()) {
|
||||
return; // buildTrie() reported an error
|
||||
}
|
||||
UCharsTrie trie(trieUChars.getBuffer());
|
||||
// Go to a branch node.
|
||||
trie.next(u_j);
|
||||
trie.next(u_a);
|
||||
trie.next(u_n);
|
||||
trie->next(u_j);
|
||||
trie->next(u_a);
|
||||
trie->next(u_n);
|
||||
IcuTestErrorCode errorCode(*this, "TestIteratorFromBranch()");
|
||||
UCharsTrie::Iterator iter(trie, 0, errorCode);
|
||||
UCharsTrie::Iterator iter(*trie, 0, errorCode);
|
||||
if(errorCode.logIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) {
|
||||
return;
|
||||
}
|
||||
|
@ -599,20 +598,18 @@ void UCharsTrieTest::TestIteratorFromBranch() {
|
|||
}
|
||||
|
||||
void UCharsTrieTest::TestIteratorFromLinearMatch() {
|
||||
UCharsTrieBuilder builder;
|
||||
UnicodeString trieUChars;
|
||||
if(!buildMonthsTrie(builder, USTRINGTRIE_BUILD_SMALL, trieUChars)) {
|
||||
LocalPointer<UCharsTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_SMALL));
|
||||
if(trie.isNull()) {
|
||||
return; // buildTrie() reported an error
|
||||
}
|
||||
UCharsTrie trie(trieUChars.getBuffer());
|
||||
// Go into a linear-match node.
|
||||
trie.next(u_j);
|
||||
trie.next(u_a);
|
||||
trie.next(u_n);
|
||||
trie.next(u_u);
|
||||
trie.next(u_a);
|
||||
trie->next(u_j);
|
||||
trie->next(u_a);
|
||||
trie->next(u_n);
|
||||
trie->next(u_u);
|
||||
trie->next(u_a);
|
||||
IcuTestErrorCode errorCode(*this, "TestIteratorFromLinearMatch()");
|
||||
UCharsTrie::Iterator iter(trie, 0, errorCode);
|
||||
UCharsTrie::Iterator iter(*trie, 0, errorCode);
|
||||
if(errorCode.logIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) {
|
||||
return;
|
||||
}
|
||||
|
@ -629,13 +626,12 @@ void UCharsTrieTest::TestIteratorFromLinearMatch() {
|
|||
}
|
||||
|
||||
void UCharsTrieTest::TestTruncatingIteratorFromRoot() {
|
||||
UCharsTrieBuilder builder;
|
||||
UnicodeString trieUChars;
|
||||
if(!buildMonthsTrie(builder, USTRINGTRIE_BUILD_FAST, trieUChars)) {
|
||||
LocalPointer<UCharsTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST));
|
||||
if(trie.isNull()) {
|
||||
return; // buildTrie() reported an error
|
||||
}
|
||||
IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromRoot()");
|
||||
UCharsTrie::Iterator iter(trieUChars.getBuffer(), 4, errorCode);
|
||||
UCharsTrie::Iterator iter(*trie, 4, errorCode);
|
||||
if(errorCode.logIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) {
|
||||
return;
|
||||
}
|
||||
|
@ -681,18 +677,16 @@ void UCharsTrieTest::TestTruncatingIteratorFromLinearMatchShort() {
|
|||
{ "abcdepq", 200 },
|
||||
{ "abcdeyz", 3000 }
|
||||
};
|
||||
UCharsTrieBuilder builder;
|
||||
UnicodeString trieUChars;
|
||||
if(!buildTrie(data, LENGTHOF(data), builder, USTRINGTRIE_BUILD_FAST, trieUChars)) {
|
||||
LocalPointer<UCharsTrie> trie(buildTrie(data, LENGTHOF(data), USTRINGTRIE_BUILD_FAST));
|
||||
if(trie.isNull()) {
|
||||
return; // buildTrie() reported an error
|
||||
}
|
||||
UCharsTrie trie(trieUChars.getBuffer());
|
||||
// Go into a linear-match node.
|
||||
trie.next(u_a);
|
||||
trie.next(u_b);
|
||||
trie->next(u_a);
|
||||
trie->next(u_b);
|
||||
IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromLinearMatchShort()");
|
||||
// Truncate within the linear-match node.
|
||||
UCharsTrie::Iterator iter(trie, 2, errorCode);
|
||||
UCharsTrie::Iterator iter(*trie, 2, errorCode);
|
||||
if(errorCode.logIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) {
|
||||
return;
|
||||
}
|
||||
|
@ -711,19 +705,17 @@ void UCharsTrieTest::TestTruncatingIteratorFromLinearMatchLong() {
|
|||
{ "abcdepq", 200 },
|
||||
{ "abcdeyz", 3000 }
|
||||
};
|
||||
UCharsTrieBuilder builder;
|
||||
UnicodeString trieUChars;
|
||||
if(!buildTrie(data, LENGTHOF(data), builder, USTRINGTRIE_BUILD_FAST, trieUChars)) {
|
||||
LocalPointer<UCharsTrie> trie(buildTrie(data, LENGTHOF(data), USTRINGTRIE_BUILD_FAST));
|
||||
if(trie.isNull()) {
|
||||
return; // buildTrie() reported an error
|
||||
}
|
||||
UCharsTrie trie(trieUChars.getBuffer());
|
||||
// Go into a linear-match node.
|
||||
trie.next(u_a);
|
||||
trie.next(u_b);
|
||||
trie.next(u_c);
|
||||
trie->next(u_a);
|
||||
trie->next(u_b);
|
||||
trie->next(u_c);
|
||||
IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromLinearMatchLong()");
|
||||
// Truncate after the linear-match node.
|
||||
UCharsTrie::Iterator iter(trie, 3, errorCode);
|
||||
UCharsTrie::Iterator iter(*trie, 3, errorCode);
|
||||
if(errorCode.logIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) {
|
||||
return;
|
||||
}
|
||||
|
@ -738,6 +730,23 @@ void UCharsTrieTest::TestTruncatingIteratorFromLinearMatchLong() {
|
|||
checkIterator(iter.reset(), expected, LENGTHOF(expected));
|
||||
}
|
||||
|
||||
void UCharsTrieTest::TestIteratorFromUChars() {
|
||||
static const StringAndValue data[]={
|
||||
{ "mm", 3 },
|
||||
{ "mmm", 33 },
|
||||
{ "mmnop", 333 }
|
||||
};
|
||||
builder_->clear();
|
||||
IcuTestErrorCode errorCode(*this, "TestIteratorFromUChars()");
|
||||
for(int32_t i=0; i<LENGTHOF(data); ++i) {
|
||||
builder_->add(data[i].s, data[i].value, errorCode);
|
||||
}
|
||||
UnicodeString trieUChars;
|
||||
builder_->buildUnicodeString(USTRINGTRIE_BUILD_FAST, trieUChars, errorCode);
|
||||
UCharsTrie::Iterator iter(trieUChars.getBuffer(), 0, errorCode);
|
||||
checkIterator(iter, data, LENGTHOF(data));
|
||||
}
|
||||
|
||||
void UCharsTrieTest::checkData(const StringAndValue data[], int32_t dataLength) {
|
||||
logln("checkData(dataLength=%d, fast)", (int)dataLength);
|
||||
checkData(data, dataLength, USTRINGTRIE_BUILD_FAST);
|
||||
|
@ -746,20 +755,19 @@ void UCharsTrieTest::checkData(const StringAndValue data[], int32_t dataLength)
|
|||
}
|
||||
|
||||
void UCharsTrieTest::checkData(const StringAndValue data[], int32_t dataLength, UStringTrieBuildOption buildOption) {
|
||||
UCharsTrieBuilder builder;
|
||||
UnicodeString trieUChars;
|
||||
if(!buildTrie(data, dataLength, builder, buildOption, trieUChars)) {
|
||||
LocalPointer<UCharsTrie> trie(buildTrie(data, dataLength, buildOption));
|
||||
if(trie.isNull()) {
|
||||
return; // buildTrie() reported an error
|
||||
}
|
||||
checkFirst(trieUChars, data, dataLength);
|
||||
checkNext(trieUChars, data, dataLength);
|
||||
checkNextWithState(trieUChars, data, dataLength);
|
||||
checkNextString(trieUChars, data, dataLength);
|
||||
checkIterator(trieUChars, data, dataLength);
|
||||
checkFirst(*trie, data, dataLength);
|
||||
checkNext(*trie, data, dataLength);
|
||||
checkNextWithState(*trie, data, dataLength);
|
||||
checkNextString(*trie, data, dataLength);
|
||||
checkIterator(*trie, data, dataLength);
|
||||
}
|
||||
|
||||
UBool UCharsTrieTest::buildTrie(const StringAndValue data[], int32_t dataLength,
|
||||
UCharsTrieBuilder &builder, UStringTrieBuildOption buildOption, UnicodeString &result) {
|
||||
UCharsTrie *UCharsTrieTest::buildTrie(const StringAndValue data[], int32_t dataLength,
|
||||
UStringTrieBuildOption buildOption) {
|
||||
IcuTestErrorCode errorCode(*this, "buildTrie()");
|
||||
// Add the items to the trie builder in an interesting (not trivial, not random) order.
|
||||
int32_t index, step;
|
||||
|
@ -775,26 +783,42 @@ UBool UCharsTrieTest::buildTrie(const StringAndValue data[], int32_t dataLength,
|
|||
index=dataLength-1;
|
||||
step=-1;
|
||||
}
|
||||
builder.clear();
|
||||
builder_->clear();
|
||||
for(int32_t i=0; i<dataLength; ++i) {
|
||||
builder.add(UnicodeString(data[index].s, -1, US_INV).unescape(),
|
||||
data[index].value, errorCode);
|
||||
builder_->add(UnicodeString(data[index].s, -1, US_INV).unescape(),
|
||||
data[index].value, errorCode);
|
||||
index=(index+step)%dataLength;
|
||||
}
|
||||
builder.build(buildOption, result, errorCode);
|
||||
UnicodeString trieUChars;
|
||||
builder_->buildUnicodeString(buildOption, trieUChars, errorCode);
|
||||
LocalPointer<UCharsTrie> trie(builder_->build(buildOption, errorCode));
|
||||
if(!errorCode.logIfFailureAndReset("add()/build()")) {
|
||||
builder.add("zzz", 999, errorCode);
|
||||
builder_->add("zzz", 999, errorCode);
|
||||
if(errorCode.reset()!=U_NO_WRITE_PERMISSION) {
|
||||
errln("builder.build().add(zzz) did not set U_NO_WRITE_PERMISSION");
|
||||
}
|
||||
}
|
||||
logln("serialized trie size: %ld UChars\n", (long)result.length());
|
||||
return errorCode.isSuccess();
|
||||
logln("serialized trie size: %ld UChars\n", (long)trieUChars.length());
|
||||
UnicodeString trieUChars2;
|
||||
builder_->buildUnicodeString(buildOption, trieUChars2, errorCode);
|
||||
if(trieUChars.getBuffer()==trieUChars2.getBuffer()) {
|
||||
errln("builder.buildUnicodeString() before & after build() returned same array");
|
||||
}
|
||||
if(errorCode.isFailure()) {
|
||||
return NULL;
|
||||
}
|
||||
// Tries from either build() method should be identical but
|
||||
// UCharsTrie does not implement equals().
|
||||
// We just return either one.
|
||||
if((dataLength&1)!=0) {
|
||||
return trie.orphan();
|
||||
} else {
|
||||
return new UCharsTrie(trieUChars2.getBuffer());
|
||||
}
|
||||
}
|
||||
|
||||
void UCharsTrieTest::checkFirst(const UnicodeString &trieUChars,
|
||||
void UCharsTrieTest::checkFirst(UCharsTrie &trie,
|
||||
const StringAndValue data[], int32_t dataLength) {
|
||||
UCharsTrie trie(trieUChars.getBuffer());
|
||||
for(int32_t i=0; i<dataLength; ++i) {
|
||||
if(*data[i].s==0) {
|
||||
continue; // skip empty string
|
||||
|
@ -828,11 +852,11 @@ void UCharsTrieTest::checkFirst(const UnicodeString &trieUChars,
|
|||
c, data[i].s);
|
||||
}
|
||||
}
|
||||
trie.reset();
|
||||
}
|
||||
|
||||
void UCharsTrieTest::checkNext(const UnicodeString &trieUChars,
|
||||
void UCharsTrieTest::checkNext(UCharsTrie &trie,
|
||||
const StringAndValue data[], int32_t dataLength) {
|
||||
UCharsTrie trie(trieUChars.getBuffer());
|
||||
UCharsTrie::State state;
|
||||
for(int32_t i=0; i<dataLength; ++i) {
|
||||
UnicodeString expectedString=UnicodeString(data[i].s, -1, US_INV).unescape();
|
||||
|
@ -905,9 +929,8 @@ void UCharsTrieTest::checkNext(const UnicodeString &trieUChars,
|
|||
}
|
||||
}
|
||||
|
||||
void UCharsTrieTest::checkNextWithState(const UnicodeString &trieUChars,
|
||||
void UCharsTrieTest::checkNextWithState(UCharsTrie &trie,
|
||||
const StringAndValue data[], int32_t dataLength) {
|
||||
UCharsTrie trie(trieUChars.getBuffer());
|
||||
UCharsTrie::State noState, state;
|
||||
for(int32_t i=0; i<dataLength; ++i) {
|
||||
if((i&1)==0) {
|
||||
|
@ -966,9 +989,8 @@ void UCharsTrieTest::checkNextWithState(const UnicodeString &trieUChars,
|
|||
|
||||
// next(string) is also tested in other functions,
|
||||
// but here we try to go partway through the string, and then beyond it.
|
||||
void UCharsTrieTest::checkNextString(const UnicodeString &trieUChars,
|
||||
void UCharsTrieTest::checkNextString(UCharsTrie &trie,
|
||||
const StringAndValue data[], int32_t dataLength) {
|
||||
UCharsTrie trie(trieUChars.getBuffer());
|
||||
for(int32_t i=0; i<dataLength; ++i) {
|
||||
UnicodeString expectedString=UnicodeString(data[i].s, -1, US_INV).unescape();
|
||||
int32_t stringLength=expectedString.length();
|
||||
|
@ -985,10 +1007,10 @@ void UCharsTrieTest::checkNextString(const UnicodeString &trieUChars,
|
|||
}
|
||||
}
|
||||
|
||||
void UCharsTrieTest::checkIterator(const UnicodeString &trieUChars,
|
||||
void UCharsTrieTest::checkIterator(UCharsTrie &trie,
|
||||
const StringAndValue data[], int32_t dataLength) {
|
||||
IcuTestErrorCode errorCode(*this, "checkIterator()");
|
||||
UCharsTrie::Iterator iter(trieUChars.getBuffer(), 0, errorCode);
|
||||
UCharsTrie::Iterator iter(trie, 0, errorCode);
|
||||
if(errorCode.logIfFailureAndReset("UCharsTrie::Iterator(trieUChars) constructor")) {
|
||||
return;
|
||||
}
|
||||
|
|
|
@ -24,6 +24,7 @@
|
|||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include "unicode/localpointer.h"
|
||||
#include "unicode/uperf.h"
|
||||
#include "unicode/utext.h"
|
||||
#include "bytestrie.h"
|
||||
|
@ -273,6 +274,7 @@ public:
|
|||
BytesTriePackageLookup(const DictionaryTriePerfTest &perf)
|
||||
: PackageLookup(perf) {
|
||||
IcuToolErrorCode errorCode("BinarySearchPackageLookup()");
|
||||
builder=new BytesTrieBuilder(errorCode);
|
||||
int32_t count=pkg.getItemCount();
|
||||
for(int32_t i=0; i<count; ++i) {
|
||||
// The Package class removes the "icudt46l/" prefix.
|
||||
|
@ -288,21 +290,23 @@ public:
|
|||
// start and limit offset of the data item.
|
||||
StringPiece fullName(itemNames.toStringPiece());
|
||||
fullName.remove_prefix(offset);
|
||||
builder.add(fullName, i, errorCode);
|
||||
builder->add(fullName, i, errorCode);
|
||||
// NUL-terminate the name for call() to find the next one.
|
||||
itemNames.append(0, errorCode);
|
||||
}
|
||||
int32_t length=builder.build(USTRINGTRIE_BUILD_SMALL, errorCode).length();
|
||||
int32_t length=builder->buildStringPiece(USTRINGTRIE_BUILD_SMALL, errorCode).length();
|
||||
printf("size of BytesTrie: %6ld\n", (long)length);
|
||||
// count+1: +1 for the last-item limit offset which we should have always had
|
||||
printf("size of dataOffsets:%6ld\n", (long)((count+1)*4));
|
||||
printf("total index size: %6ld\n", (long)(length+(count+1)*4));
|
||||
}
|
||||
virtual ~BytesTriePackageLookup() {}
|
||||
virtual ~BytesTriePackageLookup() {
|
||||
delete builder;
|
||||
}
|
||||
|
||||
virtual void call(UErrorCode *pErrorCode) {
|
||||
int32_t count=pkg.getItemCount();
|
||||
const char *nameTrieBytes=builder.build(USTRINGTRIE_BUILD_SMALL, *pErrorCode).data();
|
||||
const char *nameTrieBytes=builder->buildStringPiece(USTRINGTRIE_BUILD_SMALL, *pErrorCode).data();
|
||||
const char *name=itemNames.data();
|
||||
for(int32_t i=0; i<count; ++i) {
|
||||
if(bytesTrieLookup(name, nameTrieBytes)<0) {
|
||||
|
@ -313,7 +317,7 @@ public:
|
|||
}
|
||||
|
||||
protected:
|
||||
BytesTrieBuilder builder;
|
||||
BytesTrieBuilder *builder;
|
||||
CharString itemNames;
|
||||
};
|
||||
|
||||
|
@ -450,8 +454,9 @@ ucharsTrieMatches(UCharsTrie &trie,
|
|||
class UCharsTrieDictLookup : public DictLookup {
|
||||
public:
|
||||
UCharsTrieDictLookup(const DictionaryTriePerfTest &perfTest)
|
||||
: DictLookup(perfTest) {
|
||||
: DictLookup(perfTest), trie(NULL) {
|
||||
IcuToolErrorCode errorCode("UCharsTrieDictLookup()");
|
||||
builder=new UCharsTrieBuilder(errorCode);
|
||||
const ULine *lines=perf.getCachedLines();
|
||||
int32_t numLines=perf.getNumLines();
|
||||
for(int32_t i=0; i<numLines; ++i) {
|
||||
|
@ -459,17 +464,22 @@ public:
|
|||
if(lines[i].name[0]<0x41) {
|
||||
continue;
|
||||
}
|
||||
builder.add(UnicodeString(FALSE, lines[i].name, lines[i].len), 0, errorCode);
|
||||
builder->add(UnicodeString(FALSE, lines[i].name, lines[i].len), 0, errorCode);
|
||||
}
|
||||
UnicodeString trieUChars;
|
||||
int32_t length=builder.build(USTRINGTRIE_BUILD_SMALL, trieUChars, errorCode).length();
|
||||
int32_t length=builder->buildUnicodeString(USTRINGTRIE_BUILD_SMALL, trieUChars, errorCode).length();
|
||||
printf("size of UCharsTrie: %6ld bytes\n", (long)length*2);
|
||||
trie=builder->build(USTRINGTRIE_BUILD_SMALL, errorCode);
|
||||
}
|
||||
|
||||
virtual ~UCharsTrieDictLookup() {}
|
||||
virtual ~UCharsTrieDictLookup() {
|
||||
delete builder;
|
||||
delete trie;
|
||||
}
|
||||
|
||||
protected:
|
||||
UCharsTrieBuilder builder;
|
||||
UCharsTrieBuilder *builder;
|
||||
UCharsTrie *trie;
|
||||
};
|
||||
|
||||
class UCharsTrieDictMatches : public UCharsTrieDictLookup {
|
||||
|
@ -478,8 +488,6 @@ public:
|
|||
: UCharsTrieDictLookup(perfTest) {}
|
||||
|
||||
virtual void call(UErrorCode *pErrorCode) {
|
||||
UnicodeString uchars;
|
||||
UCharsTrie trie(builder.build(USTRINGTRIE_BUILD_SMALL, uchars, *pErrorCode).getBuffer());
|
||||
UText text=UTEXT_INITIALIZER;
|
||||
int32_t lengths[20];
|
||||
const ULine *lines=perf.getCachedLines();
|
||||
|
@ -491,7 +499,7 @@ public:
|
|||
}
|
||||
utext_openUChars(&text, lines[i].name, lines[i].len, pErrorCode);
|
||||
int32_t count=0;
|
||||
ucharsTrieMatches(trie, &text, lines[i].len,
|
||||
ucharsTrieMatches(*trie, &text, lines[i].len,
|
||||
lengths, count, LENGTHOF(lengths));
|
||||
if(count==0 || lengths[count-1]!=lines[i].len) {
|
||||
fprintf(stderr, "word %ld (0-based) not found\n", (long)i);
|
||||
|
@ -505,17 +513,15 @@ public:
|
|||
UCharsTrieDictContains(const DictionaryTriePerfTest &perfTest)
|
||||
: UCharsTrieDictLookup(perfTest) {}
|
||||
|
||||
virtual void call(UErrorCode *pErrorCode) {
|
||||
UnicodeString uchars;
|
||||
UCharsTrie trie(builder.build(USTRINGTRIE_BUILD_SMALL, uchars, *pErrorCode).getBuffer());
|
||||
virtual void call(UErrorCode * /*pErrorCode*/) {
|
||||
const ULine *lines=perf.getCachedLines();
|
||||
int32_t numLines=perf.getNumLines();
|
||||
for(int32_t i=0; i<numLines; ++i) {
|
||||
// Skip comment lines (start with a character below 'A').
|
||||
// Skip comment lines (which start with a character below 'A').
|
||||
if(lines[i].name[0]<0x41) {
|
||||
continue;
|
||||
}
|
||||
if(!USTRINGTRIE_HAS_VALUE(trie.reset().next(lines[i].name, lines[i].len))) {
|
||||
if(!USTRINGTRIE_HAS_VALUE(trie->reset().next(lines[i].name, lines[i].len))) {
|
||||
fprintf(stderr, "word %ld (0-based) not found\n", (long)i);
|
||||
}
|
||||
}
|
||||
|
@ -550,8 +556,9 @@ static UBool thaiWordToBytes(const UChar *s, int32_t length,
|
|||
class BytesTrieDictLookup : public DictLookup {
|
||||
public:
|
||||
BytesTrieDictLookup(const DictionaryTriePerfTest &perfTest)
|
||||
: DictLookup(perfTest), noDict(FALSE) {
|
||||
: DictLookup(perfTest), trie(NULL), noDict(FALSE) {
|
||||
IcuToolErrorCode errorCode("BytesTrieDictLookup()");
|
||||
builder=new BytesTrieBuilder(errorCode);
|
||||
CharString str;
|
||||
const ULine *lines=perf.getCachedLines();
|
||||
int32_t numLines=perf.getNumLines();
|
||||
|
@ -565,18 +572,23 @@ public:
|
|||
noDict=TRUE;
|
||||
break;
|
||||
}
|
||||
builder.add(str.toStringPiece(), 0, errorCode);
|
||||
builder->add(str.toStringPiece(), 0, errorCode);
|
||||
}
|
||||
if(!noDict) {
|
||||
int32_t length=builder.build(USTRINGTRIE_BUILD_SMALL, errorCode).length();
|
||||
int32_t length=builder->buildStringPiece(USTRINGTRIE_BUILD_SMALL, errorCode).length();
|
||||
printf("size of BytesTrie: %6ld bytes\n", (long)length);
|
||||
trie=builder->build(USTRINGTRIE_BUILD_SMALL, errorCode);
|
||||
}
|
||||
}
|
||||
|
||||
virtual ~BytesTrieDictLookup() {}
|
||||
virtual ~BytesTrieDictLookup() {
|
||||
delete builder;
|
||||
delete trie;
|
||||
}
|
||||
|
||||
protected:
|
||||
BytesTrieBuilder builder;
|
||||
BytesTrieBuilder *builder;
|
||||
BytesTrie *trie;
|
||||
UBool noDict;
|
||||
};
|
||||
|
||||
|
@ -625,7 +637,6 @@ public:
|
|||
if(noDict) {
|
||||
return;
|
||||
}
|
||||
BytesTrie trie(builder.build(USTRINGTRIE_BUILD_SMALL, *pErrorCode).data());
|
||||
UText text=UTEXT_INITIALIZER;
|
||||
int32_t lengths[20];
|
||||
const ULine *lines=perf.getCachedLines();
|
||||
|
@ -637,7 +648,7 @@ public:
|
|||
}
|
||||
utext_openUChars(&text, lines[i].name, lines[i].len, pErrorCode);
|
||||
int32_t count=0;
|
||||
bytesTrieMatches(trie, &text, lines[i].len,
|
||||
bytesTrieMatches(*trie, &text, lines[i].len,
|
||||
lengths, count, LENGTHOF(lengths));
|
||||
if(count==0 || lengths[count-1]!=lines[i].len) {
|
||||
fprintf(stderr, "word %ld (0-based) not found\n", (long)i);
|
||||
|
@ -651,11 +662,10 @@ public:
|
|||
BytesTrieDictContains(const DictionaryTriePerfTest &perfTest)
|
||||
: BytesTrieDictLookup(perfTest) {}
|
||||
|
||||
virtual void call(UErrorCode *pErrorCode) {
|
||||
virtual void call(UErrorCode * /*pErrorCode*/) {
|
||||
if(noDict) {
|
||||
return;
|
||||
}
|
||||
BytesTrie trie(builder.build(USTRINGTRIE_BUILD_SMALL, *pErrorCode).data());
|
||||
const ULine *lines=perf.getCachedLines();
|
||||
int32_t numLines=perf.getNumLines();
|
||||
for(int32_t i=0; i<numLines; ++i) {
|
||||
|
@ -664,14 +674,14 @@ public:
|
|||
if(line[0]<0x41) {
|
||||
continue;
|
||||
}
|
||||
UStringTrieResult result=trie.first(thaiCharToByte(line[0]));
|
||||
UStringTrieResult result=trie->first(thaiCharToByte(line[0]));
|
||||
int32_t lineLength=lines[i].len;
|
||||
for(int32_t j=1; j<lineLength; ++j) {
|
||||
if(!USTRINGTRIE_HAS_NEXT(result)) {
|
||||
fprintf(stderr, "word %ld (0-based) not found\n", (long)i);
|
||||
break;
|
||||
}
|
||||
result=trie.next(thaiCharToByte(line[j]));
|
||||
result=trie->next(thaiCharToByte(line[j]));
|
||||
}
|
||||
if(!USTRINGTRIE_HAS_VALUE(result)) {
|
||||
fprintf(stderr, "word %ld (0-based) not found\n", (long)i);
|
||||
|
|
|
@ -121,6 +121,10 @@ BytesTrieElement::compareStringTo(const BytesTrieElement &other, const CharStrin
|
|||
return diff!=0 ? diff : lengthDiff;
|
||||
}
|
||||
|
||||
BytesTrieBuilder::BytesTrieBuilder(UErrorCode & /*errorCode*/)
|
||||
: elements(NULL), elementsCapacity(0), elementsLength(0),
|
||||
bytes(NULL), bytesCapacity(0), bytesLength(0) {}
|
||||
|
||||
BytesTrieBuilder::~BytesTrieBuilder() {
|
||||
delete[] elements;
|
||||
uprv_free(bytes);
|
||||
|
@ -170,39 +174,66 @@ compareElementStrings(const void *context, const void *left, const void *right)
|
|||
|
||||
U_CDECL_END
|
||||
|
||||
StringPiece
|
||||
BytesTrie *
|
||||
BytesTrieBuilder::build(UStringTrieBuildOption buildOption, UErrorCode &errorCode) {
|
||||
StringPiece result;
|
||||
if(U_FAILURE(errorCode)) {
|
||||
return result;
|
||||
}
|
||||
if(bytesLength>0) {
|
||||
// Already built.
|
||||
result.set(bytes+(bytesCapacity-bytesLength), bytesLength);
|
||||
return result;
|
||||
}
|
||||
if(elementsLength==0) {
|
||||
errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
|
||||
return result;
|
||||
}
|
||||
uprv_sortArray(elements, elementsLength, (int32_t)sizeof(BytesTrieElement),
|
||||
compareElementStrings, &strings,
|
||||
FALSE, // need not be a stable sort
|
||||
&errorCode);
|
||||
if(U_FAILURE(errorCode)) {
|
||||
return result;
|
||||
}
|
||||
// Duplicate strings are not allowed.
|
||||
StringPiece prev=elements[0].getString(strings);
|
||||
for(int32_t i=1; i<elementsLength; ++i) {
|
||||
StringPiece current=elements[i].getString(strings);
|
||||
if(prev==current) {
|
||||
errorCode=U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return result;
|
||||
buildBytes(buildOption, errorCode);
|
||||
BytesTrie *newTrie=NULL;
|
||||
if(U_SUCCESS(errorCode)) {
|
||||
newTrie=new BytesTrie(bytes, bytes+(bytesCapacity-bytesLength));
|
||||
if(newTrie==NULL) {
|
||||
errorCode=U_MEMORY_ALLOCATION_ERROR;
|
||||
} else {
|
||||
bytes=NULL; // The new trie now owns the array.
|
||||
bytesCapacity=0;
|
||||
}
|
||||
}
|
||||
return newTrie;
|
||||
}
|
||||
|
||||
StringPiece
|
||||
BytesTrieBuilder::buildStringPiece(UStringTrieBuildOption buildOption, UErrorCode &errorCode) {
|
||||
buildBytes(buildOption, errorCode);
|
||||
StringPiece result;
|
||||
if(U_SUCCESS(errorCode)) {
|
||||
result.set(bytes+(bytesCapacity-bytesLength), bytesLength);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
void
|
||||
BytesTrieBuilder::buildBytes(UStringTrieBuildOption buildOption, UErrorCode &errorCode) {
|
||||
if(U_FAILURE(errorCode)) {
|
||||
return;
|
||||
}
|
||||
if(bytes!=NULL && bytesLength>0) {
|
||||
// Already built.
|
||||
return;
|
||||
}
|
||||
if(bytesLength==0) {
|
||||
if(elementsLength==0) {
|
||||
errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
|
||||
return;
|
||||
}
|
||||
uprv_sortArray(elements, elementsLength, (int32_t)sizeof(BytesTrieElement),
|
||||
compareElementStrings, &strings,
|
||||
FALSE, // need not be a stable sort
|
||||
&errorCode);
|
||||
if(U_FAILURE(errorCode)) {
|
||||
return;
|
||||
}
|
||||
// Duplicate strings are not allowed.
|
||||
StringPiece prev=elements[0].getString(strings);
|
||||
for(int32_t i=1; i<elementsLength; ++i) {
|
||||
StringPiece current=elements[i].getString(strings);
|
||||
if(prev==current) {
|
||||
errorCode=U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return;
|
||||
}
|
||||
prev=current;
|
||||
}
|
||||
prev=current;
|
||||
}
|
||||
// Create and byte-serialize the trie for the elements.
|
||||
bytesLength=0;
|
||||
int32_t capacity=strings.length();
|
||||
if(capacity<1024) {
|
||||
capacity=1024;
|
||||
|
@ -213,17 +244,14 @@ BytesTrieBuilder::build(UStringTrieBuildOption buildOption, UErrorCode &errorCod
|
|||
if(bytes==NULL) {
|
||||
errorCode=U_MEMORY_ALLOCATION_ERROR;
|
||||
bytesCapacity=0;
|
||||
return result;
|
||||
return;
|
||||
}
|
||||
bytesCapacity=capacity;
|
||||
}
|
||||
StringTrieBuilder::build(buildOption, elementsLength, errorCode);
|
||||
if(bytes==NULL) {
|
||||
errorCode=U_MEMORY_ALLOCATION_ERROR;
|
||||
} else {
|
||||
result.set(bytes+(bytesCapacity-bytesLength), bytesLength);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
int32_t
|
||||
|
|
|
@ -27,18 +27,78 @@ class BytesTrieElement;
|
|||
|
||||
/**
|
||||
* Builder class for BytesTrie.
|
||||
*
|
||||
* This class is not intended for public subclassing.
|
||||
*/
|
||||
class U_TOOLUTIL_API BytesTrieBuilder : public StringTrieBuilder {
|
||||
public:
|
||||
BytesTrieBuilder()
|
||||
: elements(NULL), elementsCapacity(0), elementsLength(0),
|
||||
bytes(NULL), bytesCapacity(0), bytesLength(0) {}
|
||||
/**
|
||||
* Constructs an empty builder.
|
||||
* @param errorCode Standard ICU error code.
|
||||
*/
|
||||
BytesTrieBuilder(UErrorCode &errorCode);
|
||||
|
||||
/**
|
||||
* Destructor.
|
||||
*/
|
||||
virtual ~BytesTrieBuilder();
|
||||
|
||||
/**
|
||||
* Adds a (byte sequence, value) pair.
|
||||
* The byte sequence must be unique.
|
||||
* The bytes will be copied; the builder does not keep
|
||||
* a reference to the input StringPiece or its data().
|
||||
* @param s The input byte sequence.
|
||||
* @param value The value associated with this byte sequence.
|
||||
* @param errorCode Standard ICU error code. Its input value must
|
||||
* pass the U_SUCCESS() test, or else the function returns
|
||||
* immediately. Check for U_FAILURE() on output or use with
|
||||
* function chaining. (See User Guide for details.)
|
||||
* @return *this
|
||||
*/
|
||||
BytesTrieBuilder &add(const StringPiece &s, int32_t value, UErrorCode &errorCode);
|
||||
|
||||
StringPiece build(UStringTrieBuildOption buildOption, UErrorCode &errorCode);
|
||||
/**
|
||||
* Builds a BytesTrie for the add()ed data.
|
||||
* Once built, no further data can be add()ed until clear() is called.
|
||||
*
|
||||
* This method passes ownership of the builder's internal result array to the new trie object.
|
||||
* Another call to any build() variant will re-serialize the trie.
|
||||
* After clear() has been called, a new array will be used as well.
|
||||
* @param buildOption Build option, see UStringTrieBuildOption.
|
||||
* @param errorCode Standard ICU error code. Its input value must
|
||||
* pass the U_SUCCESS() test, or else the function returns
|
||||
* immediately. Check for U_FAILURE() on output or use with
|
||||
* function chaining. (See User Guide for details.)
|
||||
* @return A new BytesTrie for the add()ed data.
|
||||
*/
|
||||
BytesTrie *build(UStringTrieBuildOption buildOption, UErrorCode &errorCode);
|
||||
|
||||
/**
|
||||
* Builds a BytesTrie for the add()ed data and byte-serializes it.
|
||||
* Once built, no further data can be add()ed until clear() is called.
|
||||
*
|
||||
* Multiple calls to buildStringPiece() return StringPieces referring to the
|
||||
* builder's same byte array, without rebuilding.
|
||||
* If buildStringPiece() is called after build(), the trie will be
|
||||
* re-serialized into a new array.
|
||||
* If build() is called after buildStringPiece(), the trie object will become
|
||||
* the owner of the previously returned array.
|
||||
* After clear() has been called, a new array will be used as well.
|
||||
* @param buildOption Build option, see UStringTrieBuildOption.
|
||||
* @param errorCode Standard ICU error code. Its input value must
|
||||
* pass the U_SUCCESS() test, or else the function returns
|
||||
* immediately. Check for U_FAILURE() on output or use with
|
||||
* function chaining. (See User Guide for details.)
|
||||
* @return A StringPiece which refers to the byte-serialized BytesTrie for the add()ed data.
|
||||
*/
|
||||
StringPiece buildStringPiece(UStringTrieBuildOption buildOption, UErrorCode &errorCode);
|
||||
|
||||
/**
|
||||
* Removes all (byte sequence, value) pairs.
|
||||
* New data can then be add()ed and a new trie can be built.
|
||||
* @return *this
|
||||
*/
|
||||
BytesTrieBuilder &clear() {
|
||||
strings.clear();
|
||||
elementsLength=0;
|
||||
|
@ -47,6 +107,11 @@ public:
|
|||
}
|
||||
|
||||
private:
|
||||
BytesTrieBuilder(const BytesTrieBuilder &other); // no copy constructor
|
||||
BytesTrieBuilder &operator=(const BytesTrieBuilder &other); // no assignment operator
|
||||
|
||||
void buildBytes(UStringTrieBuildOption buildOption, UErrorCode &errorCode);
|
||||
|
||||
virtual int32_t getElementStringLength(int32_t i) const;
|
||||
virtual UChar getElementUnit(int32_t i, int32_t byteIndex) const;
|
||||
virtual int32_t getElementValue(int32_t i) const;
|
||||
|
|
|
@ -19,8 +19,23 @@
|
|||
#include "unicode/uobject.h"
|
||||
#include "uhash.h"
|
||||
|
||||
/**
|
||||
* Build options for BytesTrieBuilder and CharsTrieBuilder.
|
||||
*/
|
||||
enum UStringTrieBuildOption {
|
||||
/**
|
||||
* Builds a trie quickly.
|
||||
*/
|
||||
USTRINGTRIE_BUILD_FAST,
|
||||
/**
|
||||
* Builds a trie more slowly, attempting to generate
|
||||
* a shorter but equivalent serialization.
|
||||
* This build option also uses more memory.
|
||||
*
|
||||
* This option can be effective when many integer values are the same
|
||||
* and string/byte sequence suffixes can be shared.
|
||||
* Runtime speed is not expected to improve.
|
||||
*/
|
||||
USTRINGTRIE_BUILD_SMALL
|
||||
};
|
||||
|
||||
|
@ -28,6 +43,8 @@ U_NAMESPACE_BEGIN
|
|||
|
||||
/**
|
||||
* Base class for string trie builder classes.
|
||||
*
|
||||
* This class is not intended for public subclassing.
|
||||
*/
|
||||
class U_TOOLUTIL_API StringTrieBuilder : public UObject {
|
||||
public:
|
||||
|
|
|
@ -14,6 +14,7 @@
|
|||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/uobject.h"
|
||||
#include "cmemory.h"
|
||||
#include "uassert.h"
|
||||
#include "ucharstrie.h"
|
||||
|
||||
|
@ -48,6 +49,10 @@ Appendable::append(const UChar *s, int32_t length) {
|
|||
|
||||
UOBJECT_DEFINE_NO_RTTI_IMPLEMENTATION(Appendable)
|
||||
|
||||
UCharsTrie::~UCharsTrie() {
|
||||
uprv_free(ownedArray_);
|
||||
}
|
||||
|
||||
UStringTrieResult
|
||||
UCharsTrie::current() const {
|
||||
const UChar *pos=pos_;
|
||||
|
|
|
@ -71,17 +71,47 @@ private:
|
|||
* Light-weight, non-const reader class for a UCharsTrie.
|
||||
* Traverses a UChar-serialized data structure with minimal state,
|
||||
* for mapping strings (16-bit-unit sequences) to non-negative integer values.
|
||||
*
|
||||
* This class owns the serialized trie data only if it was constructed by
|
||||
* the builder's build() method.
|
||||
* The public constructor and the copy constructor only alias the data (only copy the pointer).
|
||||
* There is no assignment operator.
|
||||
*
|
||||
* This class is not intended for public subclassing.
|
||||
*/
|
||||
class U_TOOLUTIL_API UCharsTrie : public UMemory {
|
||||
public:
|
||||
/**
|
||||
* Constructs a UCharsTrie reader instance.
|
||||
* @param trieUChars The trie UChars.
|
||||
*
|
||||
* The trieUChars must contain a copy of a UChar sequence from the UCharsTrieBuilder,
|
||||
* starting with the first UChar of that sequence.
|
||||
* The UCharsTrie object will not read more UChars than
|
||||
* the UCharsTrieBuilder generated in the corresponding build() call.
|
||||
*
|
||||
* The array is not copied/cloned and must not be modified while
|
||||
* the UCharsTrie object is in use.
|
||||
*
|
||||
* @param trieUChars The UChar array that contains the serialized trie.
|
||||
*/
|
||||
UCharsTrie(const UChar *trieUChars)
|
||||
: uchars_(trieUChars),
|
||||
: ownedArray_(NULL), uchars_(trieUChars),
|
||||
pos_(uchars_), remainingMatchLength_(-1) {}
|
||||
|
||||
/**
|
||||
* Destructor.
|
||||
*/
|
||||
~UCharsTrie();
|
||||
|
||||
/**
|
||||
* Copy constructor, copies the other trie reader object and its state,
|
||||
* but not the UChar array which will be shared. (Shallow copy.)
|
||||
* @param Another UCharsTrie object.
|
||||
*/
|
||||
UCharsTrie(const UCharsTrie &other)
|
||||
: ownedArray_(NULL), uchars_(other.uchars_),
|
||||
pos_(other.pos_), remainingMatchLength_(other.remainingMatchLength_) {}
|
||||
|
||||
/**
|
||||
* Resets this trie to its initial state.
|
||||
*/
|
||||
|
@ -142,6 +172,7 @@ public:
|
|||
/**
|
||||
* Traverses the trie from the initial state for this input UChar.
|
||||
* Equivalent to reset().next(uchar).
|
||||
* @param uchar Input char value. Values below 0 and above 0xffff will never match.
|
||||
* @return The match/value Result.
|
||||
*/
|
||||
inline UStringTrieResult first(int32_t uchar) {
|
||||
|
@ -153,6 +184,7 @@ public:
|
|||
* Traverses the trie from the initial state for the
|
||||
* one or two UTF-16 code units for this input code point.
|
||||
* Equivalent to reset().nextForCodePoint(cp).
|
||||
* @param cp A Unicode code point 0..0x10ffff.
|
||||
* @return The match/value Result.
|
||||
*/
|
||||
inline UStringTrieResult firstForCodePoint(UChar32 cp) {
|
||||
|
@ -165,6 +197,7 @@ public:
|
|||
|
||||
/**
|
||||
* Traverses the trie from the current state for this input UChar.
|
||||
* @param uchar Input char value. Values below 0 and above 0xffff will never match.
|
||||
* @return The match/value Result.
|
||||
*/
|
||||
UStringTrieResult next(int32_t uchar);
|
||||
|
@ -172,6 +205,7 @@ public:
|
|||
/**
|
||||
* Traverses the trie from the current state for the
|
||||
* one or two UTF-16 code units for this input code point.
|
||||
* @param cp A Unicode code point 0..0x10ffff.
|
||||
* @return The match/value Result.
|
||||
*/
|
||||
inline UStringTrieResult nextForCodePoint(UChar32 cp) {
|
||||
|
@ -328,6 +362,19 @@ public:
|
|||
private:
|
||||
friend class UCharsTrieBuilder;
|
||||
|
||||
/**
|
||||
* Constructs a UCharsTrie reader instance.
|
||||
* Unlike the public constructor which just aliases an array,
|
||||
* this constructor adopts the builder's array.
|
||||
* This constructor is only called by the builder.
|
||||
*/
|
||||
UCharsTrie(UChar *adoptUChars, const UChar *trieUChars)
|
||||
: ownedArray_(adoptUChars), uchars_(trieUChars),
|
||||
pos_(uchars_), remainingMatchLength_(-1) {}
|
||||
|
||||
// No assignment operator.
|
||||
UCharsTrie &operator=(const UCharsTrie &other);
|
||||
|
||||
inline void stop() {
|
||||
pos_=NULL;
|
||||
}
|
||||
|
@ -513,6 +560,8 @@ private:
|
|||
|
||||
static const int32_t kMaxTwoUnitDelta=((kThreeUnitDeltaLead-kMinTwoUnitDeltaLead)<<16)-1; // 0x03feffff
|
||||
|
||||
UChar *ownedArray_;
|
||||
|
||||
// Fixed value referencing the UCharsTrie words.
|
||||
const UChar *uchars_;
|
||||
|
||||
|
|
|
@ -80,6 +80,10 @@ UCharsTrieElement::compareStringTo(const UCharsTrieElement &other, const Unicode
|
|||
return getString(strings).compare(other.getString(strings));
|
||||
}
|
||||
|
||||
UCharsTrieBuilder::UCharsTrieBuilder(UErrorCode & /*errorCode*/)
|
||||
: elements(NULL), elementsCapacity(0), elementsLength(0),
|
||||
uchars(NULL), ucharsCapacity(0), ucharsLength(0) {}
|
||||
|
||||
UCharsTrieBuilder::~UCharsTrieBuilder() {
|
||||
delete[] elements;
|
||||
uprv_free(uchars);
|
||||
|
@ -132,42 +136,70 @@ compareElementStrings(const void *context, const void *left, const void *right)
|
|||
|
||||
U_CDECL_END
|
||||
|
||||
UnicodeString &
|
||||
UCharsTrieBuilder::build(UStringTrieBuildOption buildOption, UnicodeString &result, UErrorCode &errorCode) {
|
||||
if(U_FAILURE(errorCode)) {
|
||||
return result;
|
||||
}
|
||||
if(ucharsLength>0) {
|
||||
// Already built.
|
||||
result.setTo(FALSE, uchars+(ucharsCapacity-ucharsLength), ucharsLength);
|
||||
return result;
|
||||
}
|
||||
if(elementsLength==0) {
|
||||
errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
|
||||
return result;
|
||||
}
|
||||
if(strings.isBogus()) {
|
||||
errorCode=U_MEMORY_ALLOCATION_ERROR;
|
||||
return result;
|
||||
}
|
||||
uprv_sortArray(elements, elementsLength, (int32_t)sizeof(UCharsTrieElement),
|
||||
compareElementStrings, &strings,
|
||||
FALSE, // need not be a stable sort
|
||||
&errorCode);
|
||||
if(U_FAILURE(errorCode)) {
|
||||
return result;
|
||||
}
|
||||
// Duplicate strings are not allowed.
|
||||
UnicodeString prev=elements[0].getString(strings);
|
||||
for(int32_t i=1; i<elementsLength; ++i) {
|
||||
UnicodeString current=elements[i].getString(strings);
|
||||
if(prev==current) {
|
||||
errorCode=U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return result;
|
||||
UCharsTrie *
|
||||
UCharsTrieBuilder::build(UStringTrieBuildOption buildOption, UErrorCode &errorCode) {
|
||||
buildUChars(buildOption, errorCode);
|
||||
UCharsTrie *newTrie=NULL;
|
||||
if(U_SUCCESS(errorCode)) {
|
||||
newTrie=new UCharsTrie(uchars, uchars+(ucharsCapacity-ucharsLength));
|
||||
if(newTrie==NULL) {
|
||||
errorCode=U_MEMORY_ALLOCATION_ERROR;
|
||||
} else {
|
||||
uchars=NULL; // The new trie now owns the array.
|
||||
ucharsCapacity=0;
|
||||
}
|
||||
}
|
||||
return newTrie;
|
||||
}
|
||||
|
||||
UnicodeString &
|
||||
UCharsTrieBuilder::buildUnicodeString(UStringTrieBuildOption buildOption, UnicodeString &result,
|
||||
UErrorCode &errorCode) {
|
||||
buildUChars(buildOption, errorCode);
|
||||
if(U_SUCCESS(errorCode)) {
|
||||
result.setTo(FALSE, uchars+(ucharsCapacity-ucharsLength), ucharsLength);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
void
|
||||
UCharsTrieBuilder::buildUChars(UStringTrieBuildOption buildOption, UErrorCode &errorCode) {
|
||||
if(U_FAILURE(errorCode)) {
|
||||
return;
|
||||
}
|
||||
if(uchars!=NULL && ucharsLength>0) {
|
||||
// Already built.
|
||||
return;
|
||||
}
|
||||
if(ucharsLength==0) {
|
||||
if(elementsLength==0) {
|
||||
errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
|
||||
return;
|
||||
}
|
||||
if(strings.isBogus()) {
|
||||
errorCode=U_MEMORY_ALLOCATION_ERROR;
|
||||
return;
|
||||
}
|
||||
uprv_sortArray(elements, elementsLength, (int32_t)sizeof(UCharsTrieElement),
|
||||
compareElementStrings, &strings,
|
||||
FALSE, // need not be a stable sort
|
||||
&errorCode);
|
||||
if(U_FAILURE(errorCode)) {
|
||||
return;
|
||||
}
|
||||
// Duplicate strings are not allowed.
|
||||
UnicodeString prev=elements[0].getString(strings);
|
||||
for(int32_t i=1; i<elementsLength; ++i) {
|
||||
UnicodeString current=elements[i].getString(strings);
|
||||
if(prev==current) {
|
||||
errorCode=U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return;
|
||||
}
|
||||
prev.fastCopyFrom(current);
|
||||
}
|
||||
prev.fastCopyFrom(current);
|
||||
}
|
||||
// Create and UChar-serialize the trie for the elements.
|
||||
ucharsLength=0;
|
||||
int32_t capacity=strings.length();
|
||||
if(capacity<1024) {
|
||||
capacity=1024;
|
||||
|
@ -178,17 +210,14 @@ UCharsTrieBuilder::build(UStringTrieBuildOption buildOption, UnicodeString &resu
|
|||
if(uchars==NULL) {
|
||||
errorCode=U_MEMORY_ALLOCATION_ERROR;
|
||||
ucharsCapacity=0;
|
||||
return result;
|
||||
return;
|
||||
}
|
||||
ucharsCapacity=capacity;
|
||||
}
|
||||
StringTrieBuilder::build(buildOption, elementsLength, errorCode);
|
||||
if(uchars==NULL) {
|
||||
errorCode=U_MEMORY_ALLOCATION_ERROR;
|
||||
} else {
|
||||
result.setTo(FALSE, uchars+(ucharsCapacity-ucharsLength), ucharsLength);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
int32_t
|
||||
|
|
|
@ -26,18 +26,81 @@ class UCharsTrieElement;
|
|||
|
||||
/**
|
||||
* Builder class for UCharsTrie.
|
||||
*
|
||||
* This class is not intended for public subclassing.
|
||||
*/
|
||||
class U_TOOLUTIL_API UCharsTrieBuilder : public StringTrieBuilder {
|
||||
public:
|
||||
UCharsTrieBuilder()
|
||||
: elements(NULL), elementsCapacity(0), elementsLength(0),
|
||||
uchars(NULL), ucharsCapacity(0), ucharsLength(0) {}
|
||||
/**
|
||||
* Constructs an empty builder.
|
||||
* @param errorCode Standard ICU error code.
|
||||
*/
|
||||
UCharsTrieBuilder(UErrorCode &errorCode);
|
||||
|
||||
/**
|
||||
* Destructor.
|
||||
*/
|
||||
virtual ~UCharsTrieBuilder();
|
||||
|
||||
/**
|
||||
* Adds a (string, value) pair.
|
||||
* The string must be unique.
|
||||
* The string contents will be copied; the builder does not keep
|
||||
* a reference to the input UnicodeString or its buffer.
|
||||
* @param s The input string.
|
||||
* @param value The value associated with this string.
|
||||
* @param errorCode Standard ICU error code. Its input value must
|
||||
* pass the U_SUCCESS() test, or else the function returns
|
||||
* immediately. Check for U_FAILURE() on output or use with
|
||||
* function chaining. (See User Guide for details.)
|
||||
* @return *this
|
||||
*/
|
||||
UCharsTrieBuilder &add(const UnicodeString &s, int32_t value, UErrorCode &errorCode);
|
||||
|
||||
UnicodeString &build(UStringTrieBuildOption buildOption, UnicodeString &result, UErrorCode &errorCode);
|
||||
/**
|
||||
* Builds a UCharsTrie for the add()ed data.
|
||||
* Once built, no further data can be add()ed until clear() is called.
|
||||
*
|
||||
* This method passes ownership of the builder's internal result array to the new trie object.
|
||||
* Another call to any build() variant will re-serialize the trie.
|
||||
* After clear() has been called, a new array will be used as well.
|
||||
* @param buildOption Build option, see UStringTrieBuildOption.
|
||||
* @param errorCode Standard ICU error code. Its input value must
|
||||
* pass the U_SUCCESS() test, or else the function returns
|
||||
* immediately. Check for U_FAILURE() on output or use with
|
||||
* function chaining. (See User Guide for details.)
|
||||
* @return A new UCharsTrie for the add()ed data.
|
||||
*/
|
||||
UCharsTrie *build(UStringTrieBuildOption buildOption, UErrorCode &errorCode);
|
||||
|
||||
/**
|
||||
* Builds a UCharsTrie for the add()ed data and UChar-serializes it.
|
||||
* Once built, no further data can be add()ed until clear() is called.
|
||||
*
|
||||
* Multiple calls to buildUnicodeString() set the UnicodeStrings to the
|
||||
* builder's same UChar array, without rebuilding.
|
||||
* If buildUnicodeString() is called after build(), the trie will be
|
||||
* re-serialized into a new array.
|
||||
* If build() is called after buildUnicodeString(), the trie object will become
|
||||
* the owner of the previously returned array.
|
||||
* After clear() has been called, a new array will be used as well.
|
||||
* @param buildOption Build option, see UStringTrieBuildOption.
|
||||
* @param result A UnicodeString which will be set to the UChar-serialized
|
||||
* UCharsTrie for the add()ed data.
|
||||
* @param errorCode Standard ICU error code. Its input value must
|
||||
* pass the U_SUCCESS() test, or else the function returns
|
||||
* immediately. Check for U_FAILURE() on output or use with
|
||||
* function chaining. (See User Guide for details.)
|
||||
* @return result
|
||||
*/
|
||||
UnicodeString &buildUnicodeString(UStringTrieBuildOption buildOption, UnicodeString &result,
|
||||
UErrorCode &errorCode);
|
||||
|
||||
/**
|
||||
* Removes all (string, value) pairs.
|
||||
* New data can then be add()ed and a new trie can be built.
|
||||
* @return *this
|
||||
*/
|
||||
UCharsTrieBuilder &clear() {
|
||||
strings.remove();
|
||||
elementsLength=0;
|
||||
|
@ -46,6 +109,11 @@ public:
|
|||
}
|
||||
|
||||
private:
|
||||
UCharsTrieBuilder(const UCharsTrieBuilder &other); // no copy constructor
|
||||
UCharsTrieBuilder &operator=(const UCharsTrieBuilder &other); // no assignment operator
|
||||
|
||||
void buildUChars(UStringTrieBuildOption buildOption, UErrorCode &errorCode);
|
||||
|
||||
virtual int32_t getElementStringLength(int32_t i) const;
|
||||
virtual UChar getElementUnit(int32_t i, int32_t unitIndex) const;
|
||||
virtual int32_t getElementValue(int32_t i) const;
|
||||
|
|
Loading…
Add table
Reference in a new issue