mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-08 15:05:53 +00:00
ICU-13307 C++ Edits::mergedAndAppend(ab, bc); map indexes only from inside spans not empty deletions/insertions; make Edits copyable and Edits::Iterator default-constructible
X-SVN-Rev: 40333
This commit is contained in:
parent
32f20ec9b4
commit
837280a366
5 changed files with 683 additions and 73 deletions
|
@ -33,20 +33,85 @@ const int32_t LENGTH_IN_2TRAIL = 62;
|
|||
|
||||
} // namespace
|
||||
|
||||
Edits::~Edits() {
|
||||
if(array != stackArray) {
|
||||
void Edits::releaseArray() U_NOEXCEPT {
|
||||
if (array != stackArray) {
|
||||
uprv_free(array);
|
||||
}
|
||||
}
|
||||
|
||||
void Edits::reset() {
|
||||
Edits &Edits::copyArray(const Edits &other) {
|
||||
if (U_FAILURE(errorCode_)) {
|
||||
length = delta = numChanges = 0;
|
||||
return *this;
|
||||
}
|
||||
if (length > capacity) {
|
||||
uint16_t *newArray = (uint16_t *)uprv_malloc((size_t)length * 2);
|
||||
if (newArray == nullptr) {
|
||||
length = delta = numChanges = 0;
|
||||
errorCode_ = U_MEMORY_ALLOCATION_ERROR;
|
||||
return *this;
|
||||
}
|
||||
releaseArray();
|
||||
array = newArray;
|
||||
capacity = length;
|
||||
}
|
||||
if (length > 0) {
|
||||
uprv_memcpy(array, other.array, (size_t)length * 2);
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
Edits &Edits::moveArray(Edits &src) U_NOEXCEPT {
|
||||
if (U_FAILURE(errorCode_)) {
|
||||
length = delta = numChanges = 0;
|
||||
return *this;
|
||||
}
|
||||
releaseArray();
|
||||
if (length > STACK_CAPACITY) {
|
||||
array = src.array;
|
||||
capacity = src.capacity;
|
||||
src.array = src.stackArray;
|
||||
src.capacity = STACK_CAPACITY;
|
||||
src.reset();
|
||||
return *this;
|
||||
}
|
||||
array = stackArray;
|
||||
capacity = STACK_CAPACITY;
|
||||
if (length > 0) {
|
||||
uprv_memcpy(array, src.array, (size_t)length * 2);
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
Edits &Edits::operator=(const Edits &other) {
|
||||
length = other.length;
|
||||
delta = other.delta;
|
||||
numChanges = other.numChanges;
|
||||
errorCode_ = other.errorCode_;
|
||||
return copyArray(other);
|
||||
}
|
||||
|
||||
Edits &Edits::operator=(Edits &&src) U_NOEXCEPT {
|
||||
length = src.length;
|
||||
delta = src.delta;
|
||||
numChanges = src.numChanges;
|
||||
errorCode_ = src.errorCode_;
|
||||
return moveArray(src);
|
||||
}
|
||||
|
||||
Edits::~Edits() {
|
||||
releaseArray();
|
||||
}
|
||||
|
||||
void Edits::reset() U_NOEXCEPT {
|
||||
length = delta = numChanges = 0;
|
||||
errorCode_ = U_ZERO_ERROR;
|
||||
}
|
||||
|
||||
void Edits::addUnchanged(int32_t unchangedLength) {
|
||||
if(U_FAILURE(errorCode) || unchangedLength == 0) { return; }
|
||||
if(U_FAILURE(errorCode_) || unchangedLength == 0) { return; }
|
||||
if(unchangedLength < 0) {
|
||||
errorCode = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
errorCode_ = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return;
|
||||
}
|
||||
// Merge into previous unchanged-text record, if any.
|
||||
|
@ -72,7 +137,7 @@ void Edits::addUnchanged(int32_t unchangedLength) {
|
|||
}
|
||||
|
||||
void Edits::addReplace(int32_t oldLength, int32_t newLength) {
|
||||
if(U_FAILURE(errorCode)) { return; }
|
||||
if(U_FAILURE(errorCode_)) { return; }
|
||||
if(oldLength == newLength && 0 < oldLength && oldLength <= MAX_SHORT_WIDTH) {
|
||||
// Replacement of short oldLength text units by same-length new text.
|
||||
// Merge into previous short-replacement record, if any.
|
||||
|
@ -88,7 +153,7 @@ void Edits::addReplace(int32_t oldLength, int32_t newLength) {
|
|||
}
|
||||
|
||||
if(oldLength < 0 || newLength < 0) {
|
||||
errorCode = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
errorCode_ = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return;
|
||||
}
|
||||
if (oldLength == 0 && newLength == 0) {
|
||||
|
@ -100,7 +165,7 @@ void Edits::addReplace(int32_t oldLength, int32_t newLength) {
|
|||
if ((newDelta > 0 && delta >= 0 && newDelta > (INT32_MAX - delta)) ||
|
||||
(newDelta < 0 && delta < 0 && newDelta < (INT32_MIN - delta))) {
|
||||
// Integer overflow or underflow.
|
||||
errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
|
||||
errorCode_ = U_INDEX_OUTOFBOUNDS_ERROR;
|
||||
return;
|
||||
}
|
||||
delta += newDelta;
|
||||
|
@ -151,7 +216,7 @@ UBool Edits::growArray() {
|
|||
} else if (capacity == INT32_MAX) {
|
||||
// Not U_BUFFER_OVERFLOW_ERROR because that could be confused on a string transform API
|
||||
// with a result-string-buffer overflow.
|
||||
errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
|
||||
errorCode_ = U_INDEX_OUTOFBOUNDS_ERROR;
|
||||
return FALSE;
|
||||
} else if (capacity >= (INT32_MAX / 2)) {
|
||||
newCapacity = INT32_MAX;
|
||||
|
@ -160,18 +225,16 @@ UBool Edits::growArray() {
|
|||
}
|
||||
// Grow by at least 5 units so that a maximal change record will fit.
|
||||
if ((newCapacity - capacity) < 5) {
|
||||
errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
|
||||
errorCode_ = U_INDEX_OUTOFBOUNDS_ERROR;
|
||||
return FALSE;
|
||||
}
|
||||
uint16_t *newArray = (uint16_t *)uprv_malloc((size_t)newCapacity * 2);
|
||||
if (newArray == NULL) {
|
||||
errorCode = U_MEMORY_ALLOCATION_ERROR;
|
||||
errorCode_ = U_MEMORY_ALLOCATION_ERROR;
|
||||
return FALSE;
|
||||
}
|
||||
uprv_memcpy(newArray, array, (size_t)length * 2);
|
||||
if (array != stackArray) {
|
||||
uprv_free(array);
|
||||
}
|
||||
releaseArray();
|
||||
array = newArray;
|
||||
capacity = newCapacity;
|
||||
return TRUE;
|
||||
|
@ -179,11 +242,157 @@ UBool Edits::growArray() {
|
|||
|
||||
UBool Edits::copyErrorTo(UErrorCode &outErrorCode) {
|
||||
if (U_FAILURE(outErrorCode)) { return TRUE; }
|
||||
if (U_SUCCESS(errorCode)) { return FALSE; }
|
||||
outErrorCode = errorCode;
|
||||
if (U_SUCCESS(errorCode_)) { return FALSE; }
|
||||
outErrorCode = errorCode_;
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
Edits &Edits::mergeAndAppend(const Edits &ab, const Edits &bc, UErrorCode &errorCode) {
|
||||
if (copyErrorTo(errorCode)) { return *this; }
|
||||
// Picture string a --(Edits ab)--> string b --(Edits bc)--> string c.
|
||||
// Parallel iteration over both Edits.
|
||||
Iterator abIter = ab.getFineIterator();
|
||||
Iterator bcIter = bc.getFineIterator();
|
||||
UBool abHasNext = TRUE, bcHasNext = TRUE;
|
||||
// Copy iterator state into local variables, so that we can modify and subdivide spans.
|
||||
// ab old & new length, bc old & new length
|
||||
int32_t aLength = 0, ab_bLength = 0, bc_bLength = 0, cLength = 0;
|
||||
// When we have different-intermediate-length changes, we accumulate a larger change.
|
||||
int32_t pending_aLength = 0, pending_cLength = 0;
|
||||
for (;;) {
|
||||
// At this point, for each of the two iterators:
|
||||
// Either we are done with the locally cached current edit,
|
||||
// and its intermediate-string length has been reset,
|
||||
// or we will continue to work with a truncated remainder of this edit.
|
||||
//
|
||||
// If the current edit is done, and the iterator has not yet reached the end,
|
||||
// then we fetch the next edit. This is true for at least one of the iterators.
|
||||
//
|
||||
// Normally it does not matter whether we fetch from ab and then bc or vice versa.
|
||||
// However, the result is observably different when
|
||||
// ab deletions meet bc insertions at the same intermediate-string index.
|
||||
// Some users expect the bc insertions to come first, so we fetch from bc first.
|
||||
if (bc_bLength == 0) {
|
||||
if (bcHasNext && (bcHasNext = bcIter.next(errorCode))) {
|
||||
bc_bLength = bcIter.oldLength();
|
||||
cLength = bcIter.newLength();
|
||||
if (bc_bLength == 0) {
|
||||
// insertion
|
||||
if (ab_bLength == 0 || !abIter.hasChange()) {
|
||||
addReplace(pending_aLength, pending_cLength + cLength);
|
||||
pending_aLength = pending_cLength = 0;
|
||||
} else {
|
||||
pending_cLength += cLength;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
}
|
||||
// else see if the other iterator is done, too.
|
||||
}
|
||||
if (ab_bLength == 0) {
|
||||
if (abHasNext && (abHasNext = abIter.next(errorCode))) {
|
||||
aLength = abIter.oldLength();
|
||||
ab_bLength = abIter.newLength();
|
||||
if (ab_bLength == 0) {
|
||||
// deletion
|
||||
if (bc_bLength == bcIter.oldLength() || !bcIter.hasChange()) {
|
||||
addReplace(pending_aLength + aLength, pending_cLength);
|
||||
pending_aLength = pending_cLength = 0;
|
||||
} else {
|
||||
pending_aLength += aLength;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
} else if (bc_bLength == 0) {
|
||||
// Both iterators are done at the same time:
|
||||
// The intermediate-string lengths match.
|
||||
break;
|
||||
} else {
|
||||
// The ab output string is shorter than the bc input string.
|
||||
if (!copyErrorTo(errorCode)) {
|
||||
errorCode = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
}
|
||||
if (bc_bLength == 0) {
|
||||
// The bc input string is shorter than the ab output string.
|
||||
if (!copyErrorTo(errorCode)) {
|
||||
errorCode = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
// Done fetching: ab_bLength > 0 && bc_bLength > 0
|
||||
|
||||
// The current state has two parts:
|
||||
// - Past: We accumulate a longer ac edit in the "pending" variables.
|
||||
// - Current: We have copies of the current ab/bc edits in local variables.
|
||||
// At least one side is newly fetched.
|
||||
// One side might be a truncated remainder of an edit we fetched earlier.
|
||||
|
||||
if (!abIter.hasChange() && !bcIter.hasChange()) {
|
||||
// An unchanged span all the way from string a to string c.
|
||||
if (pending_aLength != 0 || pending_cLength != 0) {
|
||||
addReplace(pending_aLength, pending_cLength);
|
||||
pending_aLength = pending_cLength = 0;
|
||||
}
|
||||
int32_t unchangedLength = aLength <= cLength ? aLength : cLength;
|
||||
addUnchanged(unchangedLength);
|
||||
ab_bLength = aLength -= unchangedLength;
|
||||
bc_bLength = cLength -= unchangedLength;
|
||||
// At least one of the unchanged spans is now empty.
|
||||
continue;
|
||||
}
|
||||
if (!abIter.hasChange() && bcIter.hasChange()) {
|
||||
// Unchanged a->b but changed b->c.
|
||||
if (ab_bLength >= bc_bLength) {
|
||||
// Split the longer unchanged span into change + remainder.
|
||||
addReplace(pending_aLength + bc_bLength, pending_cLength + cLength);
|
||||
pending_aLength = pending_cLength = 0;
|
||||
aLength = ab_bLength -= bc_bLength;
|
||||
bc_bLength = 0;
|
||||
continue;
|
||||
}
|
||||
// Handle the shorter unchanged span below like a change.
|
||||
} else if (abIter.hasChange() && !bcIter.hasChange()) {
|
||||
// Changed a->b and then unchanged b->c.
|
||||
if (ab_bLength <= bc_bLength) {
|
||||
// Split the longer unchanged span into change + remainder.
|
||||
addReplace(pending_aLength + aLength, pending_cLength + ab_bLength);
|
||||
pending_aLength = pending_cLength = 0;
|
||||
cLength = bc_bLength -= ab_bLength;
|
||||
ab_bLength = 0;
|
||||
continue;
|
||||
}
|
||||
// Handle the shorter unchanged span below like a change.
|
||||
} else { // both abIter.hasChange() && bcIter.hasChange()
|
||||
if (ab_bLength == bc_bLength) {
|
||||
// Changes on both sides up to the same position. Emit & reset.
|
||||
addReplace(pending_aLength + aLength, pending_cLength + cLength);
|
||||
pending_aLength = pending_cLength = 0;
|
||||
ab_bLength = bc_bLength = 0;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
// Accumulate the a->c change, reset the shorter side,
|
||||
// keep a remainder of the longer one.
|
||||
pending_aLength += aLength;
|
||||
pending_cLength += cLength;
|
||||
if (ab_bLength < bc_bLength) {
|
||||
bc_bLength -= ab_bLength;
|
||||
cLength = ab_bLength = 0;
|
||||
} else { // ab_bLength > bc_bLength
|
||||
ab_bLength -= bc_bLength;
|
||||
aLength = bc_bLength = 0;
|
||||
}
|
||||
}
|
||||
if (pending_aLength != 0 || pending_cLength != 0) {
|
||||
addReplace(pending_aLength, pending_cLength);
|
||||
}
|
||||
copyErrorTo(errorCode);
|
||||
return *this;
|
||||
}
|
||||
|
||||
Edits::Iterator::Iterator(const uint16_t *a, int32_t len, UBool oc, UBool crs) :
|
||||
array(a), index(0), length(len), remaining(0),
|
||||
onlyChanges_(oc), coarse(crs),
|
||||
|
@ -308,12 +517,7 @@ int32_t Edits::Iterator::findIndex(int32_t i, UBool findSource, UErrorCode &erro
|
|||
spanStart = destIndex;
|
||||
spanLength = newLength_;
|
||||
}
|
||||
// If we are at the start or limit of an empty span, then we search from
|
||||
// the start of the string so that we always return
|
||||
// the first of several consecutive empty spans, for consistent results.
|
||||
// We do not currently track the properties of the previous span,
|
||||
// so for now we always reset if we are at the start of the current span.
|
||||
if (i <= spanStart) {
|
||||
if (i < spanStart) {
|
||||
// Reset the iterator to the start.
|
||||
index = remaining = oldLength_ = newLength_ = srcIndex = replIndex = destIndex = 0;
|
||||
} else if (i < (spanStart + spanLength)) {
|
||||
|
@ -328,8 +532,8 @@ int32_t Edits::Iterator::findIndex(int32_t i, UBool findSource, UErrorCode &erro
|
|||
spanStart = destIndex;
|
||||
spanLength = newLength_;
|
||||
}
|
||||
if (i == spanStart || i < (spanStart + spanLength)) {
|
||||
// The index is in the current span, or at an empty one.
|
||||
if (i < (spanStart + spanLength)) {
|
||||
// The index is in the current span.
|
||||
return 0;
|
||||
}
|
||||
if (remaining > 0) {
|
||||
|
|
|
@ -37,18 +37,60 @@ public:
|
|||
*/
|
||||
Edits() :
|
||||
array(stackArray), capacity(STACK_CAPACITY), length(0), delta(0), numChanges(0),
|
||||
errorCode(U_ZERO_ERROR) {}
|
||||
errorCode_(U_ZERO_ERROR) {}
|
||||
/**
|
||||
* Copy constructor.
|
||||
* @param other source edits
|
||||
* @draft ICU 60
|
||||
*/
|
||||
Edits(const Edits &other) :
|
||||
array(stackArray), capacity(STACK_CAPACITY), length(other.length),
|
||||
delta(other.delta), numChanges(other.numChanges),
|
||||
errorCode_(other.errorCode_) {
|
||||
copyArray(other);
|
||||
}
|
||||
/**
|
||||
* Move constructor, might leave src empty.
|
||||
* This object will have the same contents that the source object had.
|
||||
* @param src source edits
|
||||
* @draft ICU 60
|
||||
*/
|
||||
Edits(Edits &&src) U_NOEXCEPT :
|
||||
array(stackArray), capacity(STACK_CAPACITY), length(src.length),
|
||||
delta(src.delta), numChanges(src.numChanges),
|
||||
errorCode_(src.errorCode_) {
|
||||
moveArray(src);
|
||||
}
|
||||
|
||||
/**
|
||||
* Destructor.
|
||||
* @draft ICU 59
|
||||
*/
|
||||
~Edits();
|
||||
|
||||
/**
|
||||
* Assignment operator.
|
||||
* @param other source edits
|
||||
* @return *this
|
||||
* @draft ICU 60
|
||||
*/
|
||||
Edits &operator=(const Edits &other);
|
||||
|
||||
/**
|
||||
* Move assignment operator, might leave src empty.
|
||||
* This object will have the same contents that the source object had.
|
||||
* The behavior is undefined if *this and src are the same object.
|
||||
* @param src source edits
|
||||
* @return *this
|
||||
* @draft ICU 60
|
||||
*/
|
||||
Edits &operator=(Edits &&src) U_NOEXCEPT;
|
||||
|
||||
/**
|
||||
* Resets the data but may not release memory.
|
||||
* @draft ICU 59
|
||||
*/
|
||||
void reset();
|
||||
void reset() U_NOEXCEPT;
|
||||
|
||||
/**
|
||||
* Adds a record for an unchanged segment of text.
|
||||
|
@ -99,6 +141,15 @@ public:
|
|||
* @draft ICU 59
|
||||
*/
|
||||
struct U_COMMON_API Iterator U_FINAL : public UMemory {
|
||||
/**
|
||||
* Default constructor, empty iterator.
|
||||
* @draft ICU 60
|
||||
*/
|
||||
Iterator() :
|
||||
array(nullptr), index(0), length(0),
|
||||
remaining(0), onlyChanges_(FALSE), coarse(FALSE),
|
||||
changed(FALSE), oldLength_(0), newLength_(0),
|
||||
srcIndex(0), replIndex(0), destIndex(0) {}
|
||||
/**
|
||||
* Copy constructor.
|
||||
* @draft ICU 59
|
||||
|
@ -309,9 +360,39 @@ public:
|
|||
return Iterator(array, length, FALSE, FALSE);
|
||||
}
|
||||
|
||||
/**
|
||||
* Merges the two input Edits and appends the result to this object.
|
||||
*
|
||||
* Consider two string transformations (for example, normalization and case mapping)
|
||||
* where each records Edits in addition to writing an output string.<br>
|
||||
* Edits ab reflect how substrings of input string a
|
||||
* map to substrings of intermediate string b.<br>
|
||||
* Edits bc reflect how substrings of intermediate string b
|
||||
* map to substrings of output string c.<br>
|
||||
* This function merges ab and bc such that the additional edits
|
||||
* recorded in this object reflect how substrings of input string a
|
||||
* map to substrings of output string c.
|
||||
*
|
||||
* If unrelated Edits are passed in where the output string of the first
|
||||
* has a different length than the input string of the second,
|
||||
* then a U_ILLEGAL_ARGUMENT_ERROR is reported.
|
||||
*
|
||||
* @param ab reflects how substrings of input string a
|
||||
* map to substrings of intermediate string b.
|
||||
* @param bc reflects how substrings of intermediate string b
|
||||
* map to substrings of output string c.
|
||||
* @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
|
||||
* or else the function returns immediately. Check for U_FAILURE()
|
||||
* on output or use with function chaining. (See User Guide for details.)
|
||||
* @return *this, with the merged edits appended
|
||||
* @draft ICU 60
|
||||
*/
|
||||
Edits &mergeAndAppend(const Edits &ab, const Edits &bc, UErrorCode &errorCode);
|
||||
|
||||
private:
|
||||
Edits(const Edits &) = delete;
|
||||
Edits &operator=(const Edits &) = delete;
|
||||
void releaseArray() U_NOEXCEPT;
|
||||
Edits ©Array(const Edits &other);
|
||||
Edits &moveArray(Edits &src) U_NOEXCEPT;
|
||||
|
||||
void setLastUnit(int32_t last) { array[length - 1] = (uint16_t)last; }
|
||||
int32_t lastUnit() const { return length > 0 ? array[length - 1] : 0xffff; }
|
||||
|
@ -325,7 +406,7 @@ private:
|
|||
int32_t length;
|
||||
int32_t delta;
|
||||
int32_t numChanges;
|
||||
UErrorCode errorCode;
|
||||
UErrorCode errorCode_;
|
||||
uint16_t stackArray[STACK_CAPACITY];
|
||||
};
|
||||
|
||||
|
|
|
@ -57,6 +57,8 @@ public:
|
|||
void TestMalformedUTF8();
|
||||
void TestBufferOverflow();
|
||||
void TestEdits();
|
||||
void TestCopyMoveEdits();
|
||||
void TestMergeEdits();
|
||||
void TestCaseMapWithEdits();
|
||||
void TestCaseMapUTF8WithEdits();
|
||||
void TestLongUnicodeString();
|
||||
|
@ -94,6 +96,8 @@ StringCaseTest::runIndexedTest(int32_t index, UBool exec, const char *&name, cha
|
|||
TESTCASE_AUTO(TestMalformedUTF8);
|
||||
TESTCASE_AUTO(TestBufferOverflow);
|
||||
TESTCASE_AUTO(TestEdits);
|
||||
TESTCASE_AUTO(TestCopyMoveEdits);
|
||||
TESTCASE_AUTO(TestMergeEdits);
|
||||
TESTCASE_AUTO(TestCaseMapWithEdits);
|
||||
TESTCASE_AUTO(TestCaseMapUTF8WithEdits);
|
||||
TESTCASE_AUTO(TestLongUnicodeString);
|
||||
|
@ -966,6 +970,225 @@ void StringCaseTest::TestEdits() {
|
|||
assertFalse("reset then iterator", ei.next(errorCode));
|
||||
}
|
||||
|
||||
void StringCaseTest::TestCopyMoveEdits() {
|
||||
IcuTestErrorCode errorCode(*this, "TestCopyMoveEdits");
|
||||
// Exceed the stack array capacity.
|
||||
Edits a;
|
||||
for (int32_t i = 0; i < 250; ++i) {
|
||||
a.addReplace(i % 10, (i % 10) + 1);
|
||||
}
|
||||
assertEquals("a: many edits, length delta", 250, a.lengthDelta());
|
||||
|
||||
// copy
|
||||
Edits b(a);
|
||||
assertEquals("b: copy of many edits, length delta", 250, b.lengthDelta());
|
||||
assertEquals("a remains: many edits, length delta", 250, a.lengthDelta());
|
||||
TestUtility::checkEqualEdits(*this, u"b copy of a", a, b, errorCode);
|
||||
|
||||
// assign
|
||||
Edits c;
|
||||
c.addUnchanged(99);
|
||||
c.addReplace(88, 77);
|
||||
c = b;
|
||||
assertEquals("c: assigned many edits, length delta", 250, c.lengthDelta());
|
||||
assertEquals("b remains: many edits, length delta", 250, b.lengthDelta());
|
||||
TestUtility::checkEqualEdits(*this, u"c = b", b, c, errorCode);
|
||||
|
||||
// move constructor empties object with heap array
|
||||
Edits d(std::move(a));
|
||||
assertEquals("d: move-constructed many edits, length delta", 250, d.lengthDelta());
|
||||
assertFalse("a moved away: no more hasChanges", a.hasChanges());
|
||||
TestUtility::checkEqualEdits(*this, u"d() <- a", d, b, errorCode);
|
||||
Edits empty;
|
||||
TestUtility::checkEqualEdits(*this, u"a moved away", empty, a, errorCode);
|
||||
|
||||
// move assignment empties object with heap array
|
||||
Edits e;
|
||||
e.addReplace(0, 1000);
|
||||
e = std::move(b);
|
||||
assertEquals("e: move-assigned many edits, length delta", 250, e.lengthDelta());
|
||||
assertFalse("b moved away: no more hasChanges", b.hasChanges());
|
||||
TestUtility::checkEqualEdits(*this, u"e <- b", e, c, errorCode);
|
||||
TestUtility::checkEqualEdits(*this, u"b moved away", empty, b, errorCode);
|
||||
|
||||
// Edits::Iterator default constructor.
|
||||
Edits::Iterator iter;
|
||||
assertFalse("Edits::Iterator().next()", iter.next(errorCode));
|
||||
assertSuccess("Edits::Iterator().next()", errorCode);
|
||||
iter = e.getFineChangesIterator();
|
||||
assertTrue("iter.next()", iter.next(errorCode));
|
||||
assertSuccess("iter.next()", errorCode);
|
||||
assertTrue("iter.hasChange()", iter.hasChange());
|
||||
assertEquals("iter.newLength()", 1, iter.newLength());
|
||||
}
|
||||
|
||||
void StringCaseTest::TestMergeEdits() {
|
||||
// For debugging, set -v to see matching edits up to a failure.
|
||||
IcuTestErrorCode errorCode(*this, "TestMergeEdits");
|
||||
Edits ab, bc, ac, expected_ac;
|
||||
|
||||
// Simple: Two parallel non-changes.
|
||||
ab.addUnchanged(2);
|
||||
bc.addUnchanged(2);
|
||||
expected_ac.addUnchanged(2);
|
||||
|
||||
// Simple: Two aligned changes.
|
||||
ab.addReplace(3, 2);
|
||||
bc.addReplace(2, 1);
|
||||
expected_ac.addReplace(3, 1);
|
||||
|
||||
// Unequal non-changes.
|
||||
ab.addUnchanged(5);
|
||||
bc.addUnchanged(3);
|
||||
expected_ac.addUnchanged(3);
|
||||
// ab ahead by 2
|
||||
|
||||
// Overlapping changes accumulate until they share a boundary.
|
||||
ab.addReplace(4, 3);
|
||||
bc.addReplace(3, 2);
|
||||
ab.addReplace(4, 3);
|
||||
bc.addReplace(3, 2);
|
||||
ab.addReplace(4, 3);
|
||||
bc.addReplace(3, 2);
|
||||
bc.addUnchanged(4);
|
||||
expected_ac.addReplace(14, 8);
|
||||
// bc ahead by 2
|
||||
|
||||
// Balance out intermediate-string lengths.
|
||||
ab.addUnchanged(2);
|
||||
expected_ac.addUnchanged(2);
|
||||
|
||||
// Insert something and delete it: Should disappear.
|
||||
ab.addReplace(0, 5);
|
||||
ab.addReplace(0, 2);
|
||||
bc.addReplace(7, 0);
|
||||
|
||||
// Parallel change to make a new boundary.
|
||||
ab.addReplace(1, 2);
|
||||
bc.addReplace(2, 3);
|
||||
expected_ac.addReplace(1, 3);
|
||||
|
||||
// Multiple ab deletions should remain separate at the boundary.
|
||||
ab.addReplace(1, 0);
|
||||
ab.addReplace(2, 0);
|
||||
ab.addReplace(3, 0);
|
||||
expected_ac.addReplace(1, 0);
|
||||
expected_ac.addReplace(2, 0);
|
||||
expected_ac.addReplace(3, 0);
|
||||
|
||||
// Unequal non-changes can be split for another boundary.
|
||||
ab.addUnchanged(2);
|
||||
bc.addUnchanged(1);
|
||||
expected_ac.addUnchanged(1);
|
||||
// ab ahead by 1
|
||||
|
||||
// Multiple bc insertions should create a boundary and remain separate.
|
||||
bc.addReplace(0, 4);
|
||||
bc.addReplace(0, 5);
|
||||
bc.addReplace(0, 6);
|
||||
expected_ac.addReplace(0, 4);
|
||||
expected_ac.addReplace(0, 5);
|
||||
expected_ac.addReplace(0, 6);
|
||||
// ab ahead by 1
|
||||
|
||||
// Multiple ab deletions in the middle of a bc change are merged.
|
||||
bc.addReplace(2, 2);
|
||||
// bc ahead by 1
|
||||
ab.addReplace(1, 0);
|
||||
ab.addReplace(2, 0);
|
||||
ab.addReplace(3, 0);
|
||||
ab.addReplace(4, 1);
|
||||
expected_ac.addReplace(11, 2);
|
||||
|
||||
// Multiple bc insertions in the middle of an ab change are merged.
|
||||
ab.addReplace(5, 6);
|
||||
bc.addReplace(3, 3);
|
||||
// ab ahead by 3
|
||||
bc.addReplace(0, 4);
|
||||
bc.addReplace(0, 5);
|
||||
bc.addReplace(0, 6);
|
||||
bc.addReplace(3, 7);
|
||||
expected_ac.addReplace(5, 25);
|
||||
|
||||
// Delete around a deletion.
|
||||
ab.addReplace(4, 4);
|
||||
ab.addReplace(3, 0);
|
||||
ab.addUnchanged(2);
|
||||
bc.addReplace(2, 2);
|
||||
bc.addReplace(4, 0);
|
||||
expected_ac.addReplace(9, 2);
|
||||
|
||||
// Insert into an insertion.
|
||||
ab.addReplace(0, 2);
|
||||
bc.addReplace(1, 1);
|
||||
bc.addReplace(0, 8);
|
||||
bc.addUnchanged(4);
|
||||
expected_ac.addReplace(0, 10);
|
||||
// bc ahead by 3
|
||||
|
||||
// Balance out intermediate-string lengths.
|
||||
ab.addUnchanged(3);
|
||||
expected_ac.addUnchanged(3);
|
||||
|
||||
// Deletions meet insertions.
|
||||
// Output order is arbitrary in principle, but we expect insertions first
|
||||
// and want to keep it that way.
|
||||
ab.addReplace(2, 0);
|
||||
ab.addReplace(4, 0);
|
||||
ab.addReplace(6, 0);
|
||||
bc.addReplace(0, 1);
|
||||
bc.addReplace(0, 3);
|
||||
bc.addReplace(0, 5);
|
||||
expected_ac.addReplace(0, 1);
|
||||
expected_ac.addReplace(0, 3);
|
||||
expected_ac.addReplace(0, 5);
|
||||
expected_ac.addReplace(2, 0);
|
||||
expected_ac.addReplace(4, 0);
|
||||
expected_ac.addReplace(6, 0);
|
||||
|
||||
// End with a non-change, so that further edits are never reordered.
|
||||
ab.addUnchanged(1);
|
||||
bc.addUnchanged(1);
|
||||
expected_ac.addUnchanged(1);
|
||||
|
||||
ac.mergeAndAppend(ab, bc, errorCode);
|
||||
assertSuccess("ab+bc", errorCode);
|
||||
if (!TestUtility::checkEqualEdits(*this, u"ab+bc", expected_ac, ac, errorCode)) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Append more Edits.
|
||||
Edits ab2, bc2;
|
||||
ab2.addUnchanged(5);
|
||||
bc2.addReplace(1, 2);
|
||||
bc2.addUnchanged(4);
|
||||
expected_ac.addReplace(1, 2);
|
||||
expected_ac.addUnchanged(4);
|
||||
ac.mergeAndAppend(ab2, bc2, errorCode);
|
||||
assertSuccess("ab2+bc2", errorCode);
|
||||
if (!TestUtility::checkEqualEdits(*this, u"ab2+bc2", expected_ac, ac, errorCode)) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Append empty edits.
|
||||
Edits empty;
|
||||
ac.mergeAndAppend(empty, empty, errorCode);
|
||||
assertSuccess("empty+empty", errorCode);
|
||||
if (!TestUtility::checkEqualEdits(*this, u"empty+empty", expected_ac, ac, errorCode)) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Error: Append more edits with mismatched intermediate-string lengths.
|
||||
Edits mismatch;
|
||||
mismatch.addReplace(1, 1);
|
||||
ac.mergeAndAppend(ab2, mismatch, errorCode);
|
||||
assertEquals("ab2+mismatch", U_ILLEGAL_ARGUMENT_ERROR, errorCode.get());
|
||||
errorCode.reset();
|
||||
ac.mergeAndAppend(mismatch, bc2, errorCode);
|
||||
assertEquals("mismatch+bc2", U_ILLEGAL_ARGUMENT_ERROR, errorCode.get());
|
||||
errorCode.reset();
|
||||
}
|
||||
|
||||
void StringCaseTest::TestCaseMapWithEdits() {
|
||||
IcuTestErrorCode errorCode(*this, "TestEdits");
|
||||
UChar dest[20];
|
||||
|
|
|
@ -10,6 +10,8 @@
|
|||
**********************************************************************
|
||||
*/
|
||||
|
||||
#include <algorithm>
|
||||
#include <vector>
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/edits.h"
|
||||
#include "unicode/unistr.h"
|
||||
|
@ -65,6 +67,100 @@ UnicodeString TestUtility::hex(const uint8_t* bytes, int32_t len) {
|
|||
return buf;
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
UnicodeString printOneEdit(const Edits::Iterator &ei) {
|
||||
if (ei.hasChange()) {
|
||||
return UnicodeString() + ei.oldLength() + u"->" + ei.newLength();
|
||||
} else {
|
||||
return UnicodeString() + ei.oldLength() + u"=" + ei.newLength();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Maps indexes according to the expected edits.
|
||||
* A destination index can occur multiple times when there are source deletions.
|
||||
* Map according to the last occurrence, normally in a non-empty destination span.
|
||||
* Simplest is to search from the back.
|
||||
*/
|
||||
int32_t srcIndexFromDest(const EditChange expected[], int32_t expLength,
|
||||
int32_t srcLength, int32_t destLength, int32_t index) {
|
||||
int32_t srcIndex = srcLength;
|
||||
int32_t destIndex = destLength;
|
||||
int32_t i = expLength;
|
||||
while (index < destIndex && i > 0) {
|
||||
--i;
|
||||
int32_t prevSrcIndex = srcIndex - expected[i].oldLength;
|
||||
int32_t prevDestIndex = destIndex - expected[i].newLength;
|
||||
if (index == prevDestIndex) {
|
||||
return prevSrcIndex;
|
||||
} else if (index > prevDestIndex) {
|
||||
if (expected[i].change) {
|
||||
// In a change span, map to its end.
|
||||
return srcIndex;
|
||||
} else {
|
||||
// In an unchanged span, offset within it.
|
||||
return prevSrcIndex + (index - prevDestIndex);
|
||||
}
|
||||
}
|
||||
srcIndex = prevSrcIndex;
|
||||
destIndex = prevDestIndex;
|
||||
}
|
||||
// index is outside the string.
|
||||
return srcIndex;
|
||||
}
|
||||
|
||||
int32_t destIndexFromSrc(const EditChange expected[], int32_t expLength,
|
||||
int32_t srcLength, int32_t destLength, int32_t index) {
|
||||
int32_t srcIndex = srcLength;
|
||||
int32_t destIndex = destLength;
|
||||
int32_t i = expLength;
|
||||
while (index < srcIndex && i > 0) {
|
||||
--i;
|
||||
int32_t prevSrcIndex = srcIndex - expected[i].oldLength;
|
||||
int32_t prevDestIndex = destIndex - expected[i].newLength;
|
||||
if (index == prevSrcIndex) {
|
||||
return prevDestIndex;
|
||||
} else if (index > prevSrcIndex) {
|
||||
if (expected[i].change) {
|
||||
// In a change span, map to its end.
|
||||
return destIndex;
|
||||
} else {
|
||||
// In an unchanged span, offset within it.
|
||||
return prevDestIndex + (index - prevSrcIndex);
|
||||
}
|
||||
}
|
||||
srcIndex = prevSrcIndex;
|
||||
destIndex = prevDestIndex;
|
||||
}
|
||||
// index is outside the string.
|
||||
return destIndex;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
// For debugging, set -v to see matching edits up to a failure.
|
||||
UBool TestUtility::checkEqualEdits(IntlTest &test, const UnicodeString &name,
|
||||
const Edits &e1, const Edits &e2, UErrorCode &errorCode) {
|
||||
Edits::Iterator ei1 = e1.getFineIterator();
|
||||
Edits::Iterator ei2 = e2.getFineIterator();
|
||||
UBool ok = TRUE;
|
||||
for (int32_t i = 0; ok; ++i) {
|
||||
UBool ei1HasNext = ei1.next(errorCode);
|
||||
UBool ei2HasNext = ei2.next(errorCode);
|
||||
ok &= test.assertEquals(name + u" next()[" + i + u"]" + __LINE__,
|
||||
ei1HasNext, ei2HasNext);
|
||||
ok &= test.assertSuccess(name + u" errorCode[" + i + u"]" + __LINE__, errorCode);
|
||||
ok &= test.assertEquals(name + u" edit[" + i + u"]" + __LINE__,
|
||||
printOneEdit(ei1), printOneEdit(ei2));
|
||||
if (!ei1HasNext || !ei2HasNext) {
|
||||
break;
|
||||
}
|
||||
test.logln();
|
||||
}
|
||||
return ok;
|
||||
}
|
||||
|
||||
void TestUtility::checkEditsIter(
|
||||
IntlTest &test,
|
||||
const UnicodeString &name,
|
||||
|
@ -77,8 +173,6 @@ void TestUtility::checkEditsIter(
|
|||
int32_t expSrcIndex = 0;
|
||||
int32_t expDestIndex = 0;
|
||||
int32_t expReplIndex = 0;
|
||||
int32_t expSrcIndexFromDest = 0; // for sourceIndexFromDestinationIndex()
|
||||
int32_t expDestIndexFromSrc = 0; // for destinationIndexFromSourceIndex()
|
||||
for (int32_t expIndex = 0; expIndex < expLength; ++expIndex) {
|
||||
const EditChange &expect = expected[expIndex];
|
||||
UnicodeString msg = UnicodeString(name).append(u' ') + expIndex;
|
||||
|
@ -92,7 +186,7 @@ void TestUtility::checkEditsIter(
|
|||
test.assertEquals(msg + u":" + __LINE__, expReplIndex, ei1.replacementIndex());
|
||||
}
|
||||
|
||||
if (expect.oldLength > 0 && expDestIndex == expDestIndexFromSrc) {
|
||||
if (expect.oldLength > 0) {
|
||||
test.assertTrue(msg + u":" + __LINE__, ei2.findSourceIndex(expSrcIndex, errorCode));
|
||||
test.assertEquals(msg + u":" + __LINE__, expect.change, ei2.hasChange());
|
||||
test.assertEquals(msg + u":" + __LINE__, expect.oldLength, ei2.oldLength());
|
||||
|
@ -108,7 +202,7 @@ void TestUtility::checkEditsIter(
|
|||
}
|
||||
}
|
||||
|
||||
if (expect.newLength > 0 && expSrcIndex == expSrcIndexFromDest) {
|
||||
if (expect.newLength > 0) {
|
||||
test.assertTrue(msg + u":" + __LINE__, ei2.findDestinationIndex(expDestIndex, errorCode));
|
||||
test.assertEquals(msg + u":" + __LINE__, expect.change, ei2.hasChange());
|
||||
test.assertEquals(msg + u":" + __LINE__, expect.oldLength, ei2.oldLength());
|
||||
|
@ -124,45 +218,11 @@ void TestUtility::checkEditsIter(
|
|||
}
|
||||
}
|
||||
|
||||
// Span starts.
|
||||
test.assertEquals(name + u":" + __LINE__, expDestIndexFromSrc,
|
||||
ei2.destinationIndexFromSourceIndex(expSrcIndex, errorCode));
|
||||
test.assertEquals(name + u":" + __LINE__, expSrcIndexFromDest,
|
||||
ei2.sourceIndexFromDestinationIndex(expDestIndex, errorCode));
|
||||
|
||||
// Inside unchanged span map offsets 1:1.
|
||||
if (!expect.change && expect.oldLength >= 2) {
|
||||
test.assertEquals(name + u":" + __LINE__, expDestIndex + 1,
|
||||
ei2.destinationIndexFromSourceIndex(expSrcIndex + 1, errorCode));
|
||||
test.assertEquals(name + u":" + __LINE__, expSrcIndex + 1,
|
||||
ei2.sourceIndexFromDestinationIndex(expDestIndex + 1, errorCode));
|
||||
}
|
||||
|
||||
// Inside change span map to the span limit.
|
||||
int32_t expSrcLimit = expSrcIndex + expect.oldLength;
|
||||
int32_t expDestLimit = expDestIndex + expect.newLength;
|
||||
if (expect.change) {
|
||||
if (expect.oldLength >= 2) {
|
||||
test.assertEquals(name + u":" + __LINE__, expDestLimit,
|
||||
ei2.destinationIndexFromSourceIndex(expSrcIndex + 1, errorCode));
|
||||
}
|
||||
if (expect.newLength >= 2) {
|
||||
test.assertEquals(name + u":" + __LINE__, expSrcLimit,
|
||||
ei2.sourceIndexFromDestinationIndex(expDestIndex + 1, errorCode));
|
||||
}
|
||||
}
|
||||
|
||||
expSrcIndex = expSrcLimit;
|
||||
expDestIndex = expDestLimit;
|
||||
expSrcIndex += expect.oldLength;
|
||||
expDestIndex += expect.newLength;
|
||||
if (expect.change) {
|
||||
expReplIndex += expect.newLength;
|
||||
}
|
||||
if (expect.newLength > 0) {
|
||||
expSrcIndexFromDest = expSrcIndex;
|
||||
}
|
||||
if (expect.oldLength > 0) {
|
||||
expDestIndexFromSrc = expDestIndex;
|
||||
}
|
||||
}
|
||||
UnicodeString msg = UnicodeString(name).append(u" end");
|
||||
test.assertFalse(msg + u":" + __LINE__, ei1.next(errorCode));
|
||||
|
@ -175,8 +235,47 @@ void TestUtility::checkEditsIter(
|
|||
|
||||
test.assertFalse(name + u":" + __LINE__, ei2.findSourceIndex(expSrcIndex, errorCode));
|
||||
test.assertFalse(name + u":" + __LINE__, ei2.findDestinationIndex(expDestIndex, errorCode));
|
||||
test.assertEquals(name + u":" + __LINE__, expDestIndex,
|
||||
ei2.destinationIndexFromSourceIndex(expSrcIndex, errorCode));
|
||||
test.assertEquals(name + u":" + __LINE__, expSrcIndex,
|
||||
ei2.sourceIndexFromDestinationIndex(expDestIndex, errorCode));
|
||||
|
||||
// Check mapping of all indexes against a simple implementation
|
||||
// that works on the expected changes.
|
||||
// Iterate once forward, once backward, to cover more runtime conditions.
|
||||
int32_t srcLength = expSrcIndex;
|
||||
int32_t destLength = expDestIndex;
|
||||
std::vector<int32_t> srcIndexes;
|
||||
std::vector<int32_t> destIndexes;
|
||||
srcIndexes.push_back(-1);
|
||||
destIndexes.push_back(-1);
|
||||
int32_t srcIndex = 0;
|
||||
int32_t destIndex = 0;
|
||||
for (int32_t i = 0; i < expLength; ++i) {
|
||||
if (expected[i].oldLength > 0) {
|
||||
srcIndexes.push_back(srcIndex);
|
||||
if (expected[i].oldLength > 1) {
|
||||
srcIndexes.push_back(srcIndex + 1);
|
||||
}
|
||||
}
|
||||
if (expected[i].newLength > 0) {
|
||||
destIndexes.push_back(destIndex);
|
||||
if (expected[i].newLength > 0) {
|
||||
destIndexes.push_back(destIndex + 1);
|
||||
}
|
||||
}
|
||||
srcIndex += expected[i].oldLength;
|
||||
destIndex += expected[i].newLength;
|
||||
}
|
||||
srcIndexes.push_back(srcLength);
|
||||
destIndexes.push_back(destLength);
|
||||
srcIndexes.push_back(srcLength + 1);
|
||||
destIndexes.push_back(destLength + 1);
|
||||
std::reverse(destIndexes.begin(), destIndexes.end());
|
||||
for (int32_t i : srcIndexes) {
|
||||
test.assertEquals(name + u" destIndexFromSrc(" + i + u"):" + __LINE__,
|
||||
destIndexFromSrc(expected, expLength, srcLength, destLength, i),
|
||||
ei2.destinationIndexFromSourceIndex(i, errorCode));
|
||||
}
|
||||
for (int32_t i : destIndexes) {
|
||||
test.assertEquals(name + u" srcIndexFromDest(" + i + u"):" + __LINE__,
|
||||
srcIndexFromDest(expected, expLength, srcLength, destLength, i),
|
||||
ei2.sourceIndexFromDestinationIndex(i, errorCode));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -37,6 +37,9 @@ public:
|
|||
|
||||
static UnicodeString hex(const uint8_t* bytes, int32_t len);
|
||||
|
||||
static UBool checkEqualEdits(IntlTest &test, const UnicodeString &name,
|
||||
const Edits &e1, const Edits &e2, UErrorCode &errorCode);
|
||||
|
||||
static void checkEditsIter(
|
||||
IntlTest &test, const UnicodeString &name,
|
||||
Edits::Iterator ei1, Edits::Iterator ei2, // two equal iterators
|
||||
|
|
Loading…
Add table
Reference in a new issue