mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-08 06:53:45 +00:00
ICU-20735 simpler state saving for C++ string tries
This commit is contained in:
parent
8646872f68
commit
527ff9f7ac
6 changed files with 200 additions and 6 deletions
|
@ -97,6 +97,39 @@ public:
|
|||
return *this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the state of this trie as a 64-bit integer.
|
||||
* The state value is never 0.
|
||||
*
|
||||
* @return opaque state value
|
||||
* @see resetToState64
|
||||
* @draft ICU 65
|
||||
*/
|
||||
uint64_t getState64() const {
|
||||
return (static_cast<uint64_t>(remainingMatchLength_ + 2) << kState64RemainingShift) |
|
||||
(uint64_t)(pos_ - bytes_);
|
||||
}
|
||||
|
||||
/**
|
||||
* Resets this trie to the saved state.
|
||||
* Unlike resetToState(State), the 64-bit state value
|
||||
* must be from getState64() from the same trie object or
|
||||
* from one initialized the exact same way.
|
||||
* Because of no validation, this method is faster.
|
||||
*
|
||||
* @param state The opaque trie state value from getState64().
|
||||
* @return *this
|
||||
* @see getState64
|
||||
* @see resetToState
|
||||
* @see reset
|
||||
* @draft ICU 65
|
||||
*/
|
||||
BytesTrie &resetToState64(uint64_t state) {
|
||||
remainingMatchLength_ = static_cast<int32_t>(state >> kState64RemainingShift) - 2;
|
||||
pos_ = bytes_ + (state & kState64PosMask);
|
||||
return *this;
|
||||
}
|
||||
|
||||
/**
|
||||
* BytesTrie state object, for saving a trie's current state
|
||||
* and resetting the trie back to this state later.
|
||||
|
@ -505,6 +538,13 @@ private:
|
|||
static const int32_t kMaxTwoByteDelta=((kMinThreeByteDeltaLead-kMinTwoByteDeltaLead)<<8)-1; // 0x2fff
|
||||
static const int32_t kMaxThreeByteDelta=((kFourByteDeltaLead-kMinThreeByteDeltaLead)<<16)-1; // 0xdffff
|
||||
|
||||
// For getState64():
|
||||
// The remainingMatchLength_ is -1..14=(kMaxLinearMatchLength=0x10)-2
|
||||
// so we need at least 5 bits for that.
|
||||
// We add 2 to store it as a positive value 1..16=kMaxLinearMatchLength.
|
||||
static constexpr int32_t kState64RemainingShift = 59;
|
||||
static constexpr uint64_t kState64PosMask = (UINT64_C(1) << kState64RemainingShift) - 1;
|
||||
|
||||
uint8_t *ownedArray_;
|
||||
|
||||
// Fixed value referencing the BytesTrie bytes.
|
||||
|
|
|
@ -97,6 +97,39 @@ public:
|
|||
return *this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the state of this trie as a 64-bit integer.
|
||||
* The state value is never 0.
|
||||
*
|
||||
* @return opaque state value
|
||||
* @see resetToState64
|
||||
* @draft ICU 65
|
||||
*/
|
||||
uint64_t getState64() const {
|
||||
return (static_cast<uint64_t>(remainingMatchLength_ + 2) << kState64RemainingShift) |
|
||||
(uint64_t)(pos_ - uchars_);
|
||||
}
|
||||
|
||||
/**
|
||||
* Resets this trie to the saved state.
|
||||
* Unlike resetToState(State), the 64-bit state value
|
||||
* must be from getState64() from the same trie object or
|
||||
* from one initialized the exact same way.
|
||||
* Because of no validation, this method is faster.
|
||||
*
|
||||
* @param state The opaque trie state value from getState64().
|
||||
* @return *this
|
||||
* @see getState64
|
||||
* @see resetToState
|
||||
* @see reset
|
||||
* @draft ICU 65
|
||||
*/
|
||||
UCharsTrie &resetToState64(uint64_t state) {
|
||||
remainingMatchLength_ = static_cast<int32_t>(state >> kState64RemainingShift) - 2;
|
||||
pos_ = uchars_ + (state & kState64PosMask);
|
||||
return *this;
|
||||
}
|
||||
|
||||
/**
|
||||
* UCharsTrie state object, for saving a trie's current state
|
||||
* and resetting the trie back to this state later.
|
||||
|
@ -563,6 +596,13 @@ private:
|
|||
|
||||
static const int32_t kMaxTwoUnitDelta=((kThreeUnitDeltaLead-kMinTwoUnitDeltaLead)<<16)-1; // 0x03feffff
|
||||
|
||||
// For getState64():
|
||||
// The remainingMatchLength_ is -1..14=(kMaxLinearMatchLength=0x10)-2
|
||||
// so we need at least 5 bits for that.
|
||||
// We add 2 to store it as a positive value 1..16=kMaxLinearMatchLength.
|
||||
static constexpr int32_t kState64RemainingShift = 59;
|
||||
static constexpr uint64_t kState64PosMask = (UINT64_C(1) << kState64RemainingShift) - 1;
|
||||
|
||||
char16_t *ownedArray_;
|
||||
|
||||
// Fixed value referencing the UCharsTrie words.
|
||||
|
|
|
@ -64,6 +64,7 @@ public:
|
|||
void checkFirst(BytesTrie &trie, const StringAndValue data[], int32_t dataLength);
|
||||
void checkNext(BytesTrie &trie, const StringAndValue data[], int32_t dataLength);
|
||||
void checkNextWithState(BytesTrie &trie, const StringAndValue data[], int32_t dataLength);
|
||||
void checkNextWithState64(BytesTrie &trie, const StringAndValue data[], int32_t dataLength);
|
||||
void checkNextString(BytesTrie &trie, const StringAndValue data[], int32_t dataLength);
|
||||
void checkIterator(const BytesTrie &trie, const StringAndValue data[], int32_t dataLength);
|
||||
void checkIterator(BytesTrie::Iterator &iter, const StringAndValue data[], int32_t dataLength);
|
||||
|
@ -613,6 +614,7 @@ void BytesTrieTest::checkData(const StringAndValue data[], int32_t dataLength, U
|
|||
checkFirst(*trie, data, dataLength);
|
||||
checkNext(*trie, data, dataLength);
|
||||
checkNextWithState(*trie, data, dataLength);
|
||||
checkNextWithState64(*trie, data, dataLength);
|
||||
checkNextString(*trie, data, dataLength);
|
||||
checkIterator(*trie, data, dataLength);
|
||||
}
|
||||
|
@ -825,6 +827,61 @@ void BytesTrieTest::checkNextWithState(BytesTrie &trie,
|
|||
}
|
||||
}
|
||||
|
||||
void BytesTrieTest::checkNextWithState64(BytesTrie &trie,
|
||||
const StringAndValue data[], int32_t dataLength) {
|
||||
assertTrue("trie(initial state).getState64()!=0", trie.getState64() != 0);
|
||||
for(int32_t i=0; i<dataLength; ++i) {
|
||||
const char *expectedString=data[i].s;
|
||||
int32_t stringLength= static_cast<int32_t>(strlen(expectedString));
|
||||
int32_t partialLength = stringLength / 3;
|
||||
for(int32_t j=0; j<partialLength; ++j) {
|
||||
if(!USTRINGTRIE_MATCHES(trie.next(expectedString[j]))) {
|
||||
errln("trie.next()=USTRINGTRIE_NO_MATCH for a prefix of %s", data[i].s);
|
||||
return;
|
||||
}
|
||||
}
|
||||
uint64_t state = trie.getState64();
|
||||
assertTrue("trie.getState64()!=0", state != 0);
|
||||
UStringTrieResult resultAtState=trie.current();
|
||||
UStringTrieResult result;
|
||||
int32_t valueAtState=-99;
|
||||
if(USTRINGTRIE_HAS_VALUE(resultAtState)) {
|
||||
valueAtState=trie.getValue();
|
||||
}
|
||||
result=trie.next(0); // mismatch
|
||||
if(result!=USTRINGTRIE_NO_MATCH || result!=trie.current()) {
|
||||
errln("trie.next(0) matched after part of %s", data[i].s);
|
||||
}
|
||||
if( resultAtState!=trie.resetToState64(state).current() ||
|
||||
(USTRINGTRIE_HAS_VALUE(resultAtState) && valueAtState!=trie.getValue())
|
||||
) {
|
||||
errln("trie.next(part of %s) changes current()/getValue() after "
|
||||
"getState64/next(0)/resetToState64",
|
||||
data[i].s);
|
||||
} else if(!USTRINGTRIE_HAS_VALUE(
|
||||
result=trie.next(expectedString+partialLength,
|
||||
stringLength-partialLength)) ||
|
||||
result!=trie.current()) {
|
||||
errln("trie.next(rest of %s) does not seem to contain %s after "
|
||||
"getState64/next(0)/resetToState64",
|
||||
data[i].s, data[i].s);
|
||||
} else if(!USTRINGTRIE_HAS_VALUE(
|
||||
result=trie.resetToState64(state).
|
||||
next(expectedString+partialLength,
|
||||
stringLength-partialLength)) ||
|
||||
result!=trie.current()) {
|
||||
errln("trie does not seem to contain %s after getState64/next(rest)/resetToState64",
|
||||
data[i].s);
|
||||
} else if(trie.getValue()!=data[i].value) {
|
||||
errln("trie value for %s is %ld=0x%lx instead of expected %ld=0x%lx",
|
||||
data[i].s,
|
||||
(long)trie.getValue(), (long)trie.getValue(),
|
||||
(long)data[i].value, (long)data[i].value);
|
||||
}
|
||||
trie.reset();
|
||||
}
|
||||
}
|
||||
|
||||
// next(string) is also tested in other functions,
|
||||
// but here we try to go partway through the string, and then beyond it.
|
||||
void BytesTrieTest::checkNextString(BytesTrie &trie,
|
||||
|
|
|
@ -71,6 +71,7 @@ public:
|
|||
void checkFirst(UCharsTrie &trie, const StringAndValue data[], int32_t dataLength);
|
||||
void checkNext(UCharsTrie &trie, const StringAndValue data[], int32_t dataLength);
|
||||
void checkNextWithState(UCharsTrie &trie, const StringAndValue data[], int32_t dataLength);
|
||||
void checkNextWithState64(UCharsTrie &trie, const StringAndValue data[], int32_t dataLength);
|
||||
void checkNextString(UCharsTrie &trie, const StringAndValue data[], int32_t dataLength);
|
||||
void checkIterator(UCharsTrie &trie, const StringAndValue data[], int32_t dataLength);
|
||||
void checkIterator(UCharsTrie::Iterator &iter, const StringAndValue data[], int32_t dataLength);
|
||||
|
@ -762,6 +763,7 @@ void UCharsTrieTest::checkData(const StringAndValue data[], int32_t dataLength,
|
|||
checkFirst(*trie, data, dataLength);
|
||||
checkNext(*trie, data, dataLength);
|
||||
checkNextWithState(*trie, data, dataLength);
|
||||
checkNextWithState64(*trie, data, dataLength);
|
||||
checkNextString(*trie, data, dataLength);
|
||||
checkIterator(*trie, data, dataLength);
|
||||
}
|
||||
|
@ -987,6 +989,61 @@ void UCharsTrieTest::checkNextWithState(UCharsTrie &trie,
|
|||
}
|
||||
}
|
||||
|
||||
void UCharsTrieTest::checkNextWithState64(UCharsTrie &trie,
|
||||
const StringAndValue data[], int32_t dataLength) {
|
||||
assertTrue("trie(initial state).getState64()!=0", trie.getState64() != 0);
|
||||
for(int32_t i=0; i<dataLength; ++i) {
|
||||
UnicodeString expectedString=UnicodeString(data[i].s, -1, US_INV).unescape();
|
||||
int32_t stringLength=expectedString.length();
|
||||
int32_t partialLength = stringLength / 3;
|
||||
for(int32_t j=0; j<partialLength; ++j) {
|
||||
if(!USTRINGTRIE_MATCHES(trie.next(expectedString[j]))) {
|
||||
errln("trie.next()=USTRINGTRIE_NO_MATCH for a prefix of %s", data[i].s);
|
||||
return;
|
||||
}
|
||||
}
|
||||
uint64_t state = trie.getState64();
|
||||
assertTrue("trie.getState64()!=0", state != 0);
|
||||
UStringTrieResult resultAtState=trie.current();
|
||||
UStringTrieResult result;
|
||||
int32_t valueAtState=-99;
|
||||
if(USTRINGTRIE_HAS_VALUE(resultAtState)) {
|
||||
valueAtState=trie.getValue();
|
||||
}
|
||||
result=trie.next(0); // mismatch
|
||||
if(result!=USTRINGTRIE_NO_MATCH || result!=trie.current()) {
|
||||
errln("trie.next(0) matched after part of %s", data[i].s);
|
||||
}
|
||||
if( resultAtState!=trie.resetToState64(state).current() ||
|
||||
(USTRINGTRIE_HAS_VALUE(resultAtState) && valueAtState!=trie.getValue())
|
||||
) {
|
||||
errln("trie.next(part of %s) changes current()/getValue() after "
|
||||
"getState64/next(0)/resetToState64",
|
||||
data[i].s);
|
||||
} else if(!USTRINGTRIE_HAS_VALUE(
|
||||
result=trie.next(expectedString.getTerminatedBuffer()+partialLength,
|
||||
stringLength-partialLength)) ||
|
||||
result!=trie.current()) {
|
||||
errln("trie.next(rest of %s) does not seem to contain %s after "
|
||||
"getState64/next(0)/resetToState64",
|
||||
data[i].s, data[i].s);
|
||||
} else if(!USTRINGTRIE_HAS_VALUE(
|
||||
result=trie.resetToState64(state).
|
||||
next(expectedString.getTerminatedBuffer()+partialLength,
|
||||
stringLength-partialLength)) ||
|
||||
result!=trie.current()) {
|
||||
errln("trie does not seem to contain %s after getState64/next(rest)/resetToState64",
|
||||
data[i].s);
|
||||
} else if(trie.getValue()!=data[i].value) {
|
||||
errln("trie value for %s is %ld=0x%lx instead of expected %ld=0x%lx",
|
||||
data[i].s,
|
||||
(long)trie.getValue(), (long)trie.getValue(),
|
||||
(long)data[i].value, (long)data[i].value);
|
||||
}
|
||||
trie.reset();
|
||||
}
|
||||
}
|
||||
|
||||
// next(string) is also tested in other functions,
|
||||
// but here we try to go partway through the string, and then beyond it.
|
||||
void UCharsTrieTest::checkNextString(UCharsTrie &trie,
|
||||
|
|
|
@ -768,16 +768,16 @@ public class BytesTrieTest extends TestFmwk {
|
|||
(resultAtState.hasValue() && valueAtState!=trie.getValue())
|
||||
) {
|
||||
errln("trie.next(part of "+data[i].s+") changes current()/getValue() after "+
|
||||
"saveState/next(0)/resetToState");
|
||||
"getState64/next(0)/resetToState64");
|
||||
} else if(!(result=trie.next(expectedString, partialLength, stringLength)).hasValue() ||
|
||||
result!=trie.current()) {
|
||||
errln("trie.next(rest of "+data[i].s+") does not seem to contain "+data[i].s+" after "+
|
||||
"saveState/next(0)/resetToState");
|
||||
"getState64/next(0)/resetToState64");
|
||||
} else if(!(result=trie.resetToState64(state).
|
||||
next(expectedString, partialLength, stringLength)).hasValue() ||
|
||||
result!=trie.current()) {
|
||||
errln("trie does not seem to contain "+data[i].s+
|
||||
" after saveState/next(rest)/resetToState");
|
||||
" after getState64/next(rest)/resetToState64");
|
||||
} else if(trie.getValue()!=data[i].value) {
|
||||
errln(String.format("trie value for %s is %d=0x%x instead of expected %d=0x%x",
|
||||
data[i].s,
|
||||
|
|
|
@ -914,16 +914,16 @@ public class CharsTrieTest extends TestFmwk {
|
|||
(resultAtState.hasValue() && valueAtState!=trie.getValue())
|
||||
) {
|
||||
errln("trie.next(part of "+data[i].s+") changes current()/getValue() after "+
|
||||
"saveState/next(0)/resetToState");
|
||||
"getState64/next(0)/resetToState64");
|
||||
} else if(!(result=trie.next(expectedString, partialLength, stringLength)).hasValue() ||
|
||||
result!=trie.current()) {
|
||||
errln("trie.next(rest of "+data[i].s+") does not seem to contain "+data[i].s+" after "+
|
||||
"saveState/next(0)/resetToState");
|
||||
"getState64/next(0)/resetToState64");
|
||||
} else if(!(result=trie.resetToState64(state).
|
||||
next(expectedString, partialLength, stringLength)).hasValue() ||
|
||||
result!=trie.current()) {
|
||||
errln("trie does not seem to contain "+data[i].s+
|
||||
" after saveState/next(rest)/resetToState");
|
||||
" after getState64/next(rest)/resetToState64");
|
||||
} else if(trie.getValue()!=data[i].value) {
|
||||
errln(String.format("trie value for %s is %d=0x%x instead of expected %d=0x%x",
|
||||
data[i].s,
|
||||
|
|
Loading…
Add table
Reference in a new issue