ICU-13307 C++ Edits::mergedAndAppend(ab, bc); map indexes only from inside spans not empty deletions/insertions; make Edits copyable and Edits::Iterator default-constructible

X-SVN-Rev: 40333
This commit is contained in:
Markus Scherer 2017-08-16 19:19:30 +00:00
parent 32f20ec9b4
commit 837280a366
5 changed files with 683 additions and 73 deletions

View file

@ -33,20 +33,85 @@ const int32_t LENGTH_IN_2TRAIL = 62;
} // namespace
Edits::~Edits() {
if(array != stackArray) {
void Edits::releaseArray() U_NOEXCEPT {
if (array != stackArray) {
uprv_free(array);
}
}
void Edits::reset() {
Edits &Edits::copyArray(const Edits &other) {
if (U_FAILURE(errorCode_)) {
length = delta = numChanges = 0;
return *this;
}
if (length > capacity) {
uint16_t *newArray = (uint16_t *)uprv_malloc((size_t)length * 2);
if (newArray == nullptr) {
length = delta = numChanges = 0;
errorCode_ = U_MEMORY_ALLOCATION_ERROR;
return *this;
}
releaseArray();
array = newArray;
capacity = length;
}
if (length > 0) {
uprv_memcpy(array, other.array, (size_t)length * 2);
}
return *this;
}
Edits &Edits::moveArray(Edits &src) U_NOEXCEPT {
if (U_FAILURE(errorCode_)) {
length = delta = numChanges = 0;
return *this;
}
releaseArray();
if (length > STACK_CAPACITY) {
array = src.array;
capacity = src.capacity;
src.array = src.stackArray;
src.capacity = STACK_CAPACITY;
src.reset();
return *this;
}
array = stackArray;
capacity = STACK_CAPACITY;
if (length > 0) {
uprv_memcpy(array, src.array, (size_t)length * 2);
}
return *this;
}
Edits &Edits::operator=(const Edits &other) {
length = other.length;
delta = other.delta;
numChanges = other.numChanges;
errorCode_ = other.errorCode_;
return copyArray(other);
}
Edits &Edits::operator=(Edits &&src) U_NOEXCEPT {
length = src.length;
delta = src.delta;
numChanges = src.numChanges;
errorCode_ = src.errorCode_;
return moveArray(src);
}
Edits::~Edits() {
releaseArray();
}
void Edits::reset() U_NOEXCEPT {
length = delta = numChanges = 0;
errorCode_ = U_ZERO_ERROR;
}
void Edits::addUnchanged(int32_t unchangedLength) {
if(U_FAILURE(errorCode) || unchangedLength == 0) { return; }
if(U_FAILURE(errorCode_) || unchangedLength == 0) { return; }
if(unchangedLength < 0) {
errorCode = U_ILLEGAL_ARGUMENT_ERROR;
errorCode_ = U_ILLEGAL_ARGUMENT_ERROR;
return;
}
// Merge into previous unchanged-text record, if any.
@ -72,7 +137,7 @@ void Edits::addUnchanged(int32_t unchangedLength) {
}
void Edits::addReplace(int32_t oldLength, int32_t newLength) {
if(U_FAILURE(errorCode)) { return; }
if(U_FAILURE(errorCode_)) { return; }
if(oldLength == newLength && 0 < oldLength && oldLength <= MAX_SHORT_WIDTH) {
// Replacement of short oldLength text units by same-length new text.
// Merge into previous short-replacement record, if any.
@ -88,7 +153,7 @@ void Edits::addReplace(int32_t oldLength, int32_t newLength) {
}
if(oldLength < 0 || newLength < 0) {
errorCode = U_ILLEGAL_ARGUMENT_ERROR;
errorCode_ = U_ILLEGAL_ARGUMENT_ERROR;
return;
}
if (oldLength == 0 && newLength == 0) {
@ -100,7 +165,7 @@ void Edits::addReplace(int32_t oldLength, int32_t newLength) {
if ((newDelta > 0 && delta >= 0 && newDelta > (INT32_MAX - delta)) ||
(newDelta < 0 && delta < 0 && newDelta < (INT32_MIN - delta))) {
// Integer overflow or underflow.
errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
errorCode_ = U_INDEX_OUTOFBOUNDS_ERROR;
return;
}
delta += newDelta;
@ -151,7 +216,7 @@ UBool Edits::growArray() {
} else if (capacity == INT32_MAX) {
// Not U_BUFFER_OVERFLOW_ERROR because that could be confused on a string transform API
// with a result-string-buffer overflow.
errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
errorCode_ = U_INDEX_OUTOFBOUNDS_ERROR;
return FALSE;
} else if (capacity >= (INT32_MAX / 2)) {
newCapacity = INT32_MAX;
@ -160,18 +225,16 @@ UBool Edits::growArray() {
}
// Grow by at least 5 units so that a maximal change record will fit.
if ((newCapacity - capacity) < 5) {
errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
errorCode_ = U_INDEX_OUTOFBOUNDS_ERROR;
return FALSE;
}
uint16_t *newArray = (uint16_t *)uprv_malloc((size_t)newCapacity * 2);
if (newArray == NULL) {
errorCode = U_MEMORY_ALLOCATION_ERROR;
errorCode_ = U_MEMORY_ALLOCATION_ERROR;
return FALSE;
}
uprv_memcpy(newArray, array, (size_t)length * 2);
if (array != stackArray) {
uprv_free(array);
}
releaseArray();
array = newArray;
capacity = newCapacity;
return TRUE;
@ -179,11 +242,157 @@ UBool Edits::growArray() {
UBool Edits::copyErrorTo(UErrorCode &outErrorCode) {
if (U_FAILURE(outErrorCode)) { return TRUE; }
if (U_SUCCESS(errorCode)) { return FALSE; }
outErrorCode = errorCode;
if (U_SUCCESS(errorCode_)) { return FALSE; }
outErrorCode = errorCode_;
return TRUE;
}
Edits &Edits::mergeAndAppend(const Edits &ab, const Edits &bc, UErrorCode &errorCode) {
if (copyErrorTo(errorCode)) { return *this; }
// Picture string a --(Edits ab)--> string b --(Edits bc)--> string c.
// Parallel iteration over both Edits.
Iterator abIter = ab.getFineIterator();
Iterator bcIter = bc.getFineIterator();
UBool abHasNext = TRUE, bcHasNext = TRUE;
// Copy iterator state into local variables, so that we can modify and subdivide spans.
// ab old & new length, bc old & new length
int32_t aLength = 0, ab_bLength = 0, bc_bLength = 0, cLength = 0;
// When we have different-intermediate-length changes, we accumulate a larger change.
int32_t pending_aLength = 0, pending_cLength = 0;
for (;;) {
// At this point, for each of the two iterators:
// Either we are done with the locally cached current edit,
// and its intermediate-string length has been reset,
// or we will continue to work with a truncated remainder of this edit.
//
// If the current edit is done, and the iterator has not yet reached the end,
// then we fetch the next edit. This is true for at least one of the iterators.
//
// Normally it does not matter whether we fetch from ab and then bc or vice versa.
// However, the result is observably different when
// ab deletions meet bc insertions at the same intermediate-string index.
// Some users expect the bc insertions to come first, so we fetch from bc first.
if (bc_bLength == 0) {
if (bcHasNext && (bcHasNext = bcIter.next(errorCode))) {
bc_bLength = bcIter.oldLength();
cLength = bcIter.newLength();
if (bc_bLength == 0) {
// insertion
if (ab_bLength == 0 || !abIter.hasChange()) {
addReplace(pending_aLength, pending_cLength + cLength);
pending_aLength = pending_cLength = 0;
} else {
pending_cLength += cLength;
}
continue;
}
}
// else see if the other iterator is done, too.
}
if (ab_bLength == 0) {
if (abHasNext && (abHasNext = abIter.next(errorCode))) {
aLength = abIter.oldLength();
ab_bLength = abIter.newLength();
if (ab_bLength == 0) {
// deletion
if (bc_bLength == bcIter.oldLength() || !bcIter.hasChange()) {
addReplace(pending_aLength + aLength, pending_cLength);
pending_aLength = pending_cLength = 0;
} else {
pending_aLength += aLength;
}
continue;
}
} else if (bc_bLength == 0) {
// Both iterators are done at the same time:
// The intermediate-string lengths match.
break;
} else {
// The ab output string is shorter than the bc input string.
if (!copyErrorTo(errorCode)) {
errorCode = U_ILLEGAL_ARGUMENT_ERROR;
}
return *this;
}
}
if (bc_bLength == 0) {
// The bc input string is shorter than the ab output string.
if (!copyErrorTo(errorCode)) {
errorCode = U_ILLEGAL_ARGUMENT_ERROR;
}
return *this;
}
// Done fetching: ab_bLength > 0 && bc_bLength > 0
// The current state has two parts:
// - Past: We accumulate a longer ac edit in the "pending" variables.
// - Current: We have copies of the current ab/bc edits in local variables.
// At least one side is newly fetched.
// One side might be a truncated remainder of an edit we fetched earlier.
if (!abIter.hasChange() && !bcIter.hasChange()) {
// An unchanged span all the way from string a to string c.
if (pending_aLength != 0 || pending_cLength != 0) {
addReplace(pending_aLength, pending_cLength);
pending_aLength = pending_cLength = 0;
}
int32_t unchangedLength = aLength <= cLength ? aLength : cLength;
addUnchanged(unchangedLength);
ab_bLength = aLength -= unchangedLength;
bc_bLength = cLength -= unchangedLength;
// At least one of the unchanged spans is now empty.
continue;
}
if (!abIter.hasChange() && bcIter.hasChange()) {
// Unchanged a->b but changed b->c.
if (ab_bLength >= bc_bLength) {
// Split the longer unchanged span into change + remainder.
addReplace(pending_aLength + bc_bLength, pending_cLength + cLength);
pending_aLength = pending_cLength = 0;
aLength = ab_bLength -= bc_bLength;
bc_bLength = 0;
continue;
}
// Handle the shorter unchanged span below like a change.
} else if (abIter.hasChange() && !bcIter.hasChange()) {
// Changed a->b and then unchanged b->c.
if (ab_bLength <= bc_bLength) {
// Split the longer unchanged span into change + remainder.
addReplace(pending_aLength + aLength, pending_cLength + ab_bLength);
pending_aLength = pending_cLength = 0;
cLength = bc_bLength -= ab_bLength;
ab_bLength = 0;
continue;
}
// Handle the shorter unchanged span below like a change.
} else { // both abIter.hasChange() && bcIter.hasChange()
if (ab_bLength == bc_bLength) {
// Changes on both sides up to the same position. Emit & reset.
addReplace(pending_aLength + aLength, pending_cLength + cLength);
pending_aLength = pending_cLength = 0;
ab_bLength = bc_bLength = 0;
continue;
}
}
// Accumulate the a->c change, reset the shorter side,
// keep a remainder of the longer one.
pending_aLength += aLength;
pending_cLength += cLength;
if (ab_bLength < bc_bLength) {
bc_bLength -= ab_bLength;
cLength = ab_bLength = 0;
} else { // ab_bLength > bc_bLength
ab_bLength -= bc_bLength;
aLength = bc_bLength = 0;
}
}
if (pending_aLength != 0 || pending_cLength != 0) {
addReplace(pending_aLength, pending_cLength);
}
copyErrorTo(errorCode);
return *this;
}
Edits::Iterator::Iterator(const uint16_t *a, int32_t len, UBool oc, UBool crs) :
array(a), index(0), length(len), remaining(0),
onlyChanges_(oc), coarse(crs),
@ -308,12 +517,7 @@ int32_t Edits::Iterator::findIndex(int32_t i, UBool findSource, UErrorCode &erro
spanStart = destIndex;
spanLength = newLength_;
}
// If we are at the start or limit of an empty span, then we search from
// the start of the string so that we always return
// the first of several consecutive empty spans, for consistent results.
// We do not currently track the properties of the previous span,
// so for now we always reset if we are at the start of the current span.
if (i <= spanStart) {
if (i < spanStart) {
// Reset the iterator to the start.
index = remaining = oldLength_ = newLength_ = srcIndex = replIndex = destIndex = 0;
} else if (i < (spanStart + spanLength)) {
@ -328,8 +532,8 @@ int32_t Edits::Iterator::findIndex(int32_t i, UBool findSource, UErrorCode &erro
spanStart = destIndex;
spanLength = newLength_;
}
if (i == spanStart || i < (spanStart + spanLength)) {
// The index is in the current span, or at an empty one.
if (i < (spanStart + spanLength)) {
// The index is in the current span.
return 0;
}
if (remaining > 0) {

View file

@ -37,18 +37,60 @@ public:
*/
Edits() :
array(stackArray), capacity(STACK_CAPACITY), length(0), delta(0), numChanges(0),
errorCode(U_ZERO_ERROR) {}
errorCode_(U_ZERO_ERROR) {}
/**
* Copy constructor.
* @param other source edits
* @draft ICU 60
*/
Edits(const Edits &other) :
array(stackArray), capacity(STACK_CAPACITY), length(other.length),
delta(other.delta), numChanges(other.numChanges),
errorCode_(other.errorCode_) {
copyArray(other);
}
/**
* Move constructor, might leave src empty.
* This object will have the same contents that the source object had.
* @param src source edits
* @draft ICU 60
*/
Edits(Edits &&src) U_NOEXCEPT :
array(stackArray), capacity(STACK_CAPACITY), length(src.length),
delta(src.delta), numChanges(src.numChanges),
errorCode_(src.errorCode_) {
moveArray(src);
}
/**
* Destructor.
* @draft ICU 59
*/
~Edits();
/**
* Assignment operator.
* @param other source edits
* @return *this
* @draft ICU 60
*/
Edits &operator=(const Edits &other);
/**
* Move assignment operator, might leave src empty.
* This object will have the same contents that the source object had.
* The behavior is undefined if *this and src are the same object.
* @param src source edits
* @return *this
* @draft ICU 60
*/
Edits &operator=(Edits &&src) U_NOEXCEPT;
/**
* Resets the data but may not release memory.
* @draft ICU 59
*/
void reset();
void reset() U_NOEXCEPT;
/**
* Adds a record for an unchanged segment of text.
@ -99,6 +141,15 @@ public:
* @draft ICU 59
*/
struct U_COMMON_API Iterator U_FINAL : public UMemory {
/**
* Default constructor, empty iterator.
* @draft ICU 60
*/
Iterator() :
array(nullptr), index(0), length(0),
remaining(0), onlyChanges_(FALSE), coarse(FALSE),
changed(FALSE), oldLength_(0), newLength_(0),
srcIndex(0), replIndex(0), destIndex(0) {}
/**
* Copy constructor.
* @draft ICU 59
@ -309,9 +360,39 @@ public:
return Iterator(array, length, FALSE, FALSE);
}
/**
* Merges the two input Edits and appends the result to this object.
*
* Consider two string transformations (for example, normalization and case mapping)
* where each records Edits in addition to writing an output string.<br>
* Edits ab reflect how substrings of input string a
* map to substrings of intermediate string b.<br>
* Edits bc reflect how substrings of intermediate string b
* map to substrings of output string c.<br>
* This function merges ab and bc such that the additional edits
* recorded in this object reflect how substrings of input string a
* map to substrings of output string c.
*
* If unrelated Edits are passed in where the output string of the first
* has a different length than the input string of the second,
* then a U_ILLEGAL_ARGUMENT_ERROR is reported.
*
* @param ab reflects how substrings of input string a
* map to substrings of intermediate string b.
* @param bc reflects how substrings of intermediate string b
* map to substrings of output string c.
* @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
* or else the function returns immediately. Check for U_FAILURE()
* on output or use with function chaining. (See User Guide for details.)
* @return *this, with the merged edits appended
* @draft ICU 60
*/
Edits &mergeAndAppend(const Edits &ab, const Edits &bc, UErrorCode &errorCode);
private:
Edits(const Edits &) = delete;
Edits &operator=(const Edits &) = delete;
void releaseArray() U_NOEXCEPT;
Edits &copyArray(const Edits &other);
Edits &moveArray(Edits &src) U_NOEXCEPT;
void setLastUnit(int32_t last) { array[length - 1] = (uint16_t)last; }
int32_t lastUnit() const { return length > 0 ? array[length - 1] : 0xffff; }
@ -325,7 +406,7 @@ private:
int32_t length;
int32_t delta;
int32_t numChanges;
UErrorCode errorCode;
UErrorCode errorCode_;
uint16_t stackArray[STACK_CAPACITY];
};

View file

@ -57,6 +57,8 @@ public:
void TestMalformedUTF8();
void TestBufferOverflow();
void TestEdits();
void TestCopyMoveEdits();
void TestMergeEdits();
void TestCaseMapWithEdits();
void TestCaseMapUTF8WithEdits();
void TestLongUnicodeString();
@ -94,6 +96,8 @@ StringCaseTest::runIndexedTest(int32_t index, UBool exec, const char *&name, cha
TESTCASE_AUTO(TestMalformedUTF8);
TESTCASE_AUTO(TestBufferOverflow);
TESTCASE_AUTO(TestEdits);
TESTCASE_AUTO(TestCopyMoveEdits);
TESTCASE_AUTO(TestMergeEdits);
TESTCASE_AUTO(TestCaseMapWithEdits);
TESTCASE_AUTO(TestCaseMapUTF8WithEdits);
TESTCASE_AUTO(TestLongUnicodeString);
@ -966,6 +970,225 @@ void StringCaseTest::TestEdits() {
assertFalse("reset then iterator", ei.next(errorCode));
}
void StringCaseTest::TestCopyMoveEdits() {
IcuTestErrorCode errorCode(*this, "TestCopyMoveEdits");
// Exceed the stack array capacity.
Edits a;
for (int32_t i = 0; i < 250; ++i) {
a.addReplace(i % 10, (i % 10) + 1);
}
assertEquals("a: many edits, length delta", 250, a.lengthDelta());
// copy
Edits b(a);
assertEquals("b: copy of many edits, length delta", 250, b.lengthDelta());
assertEquals("a remains: many edits, length delta", 250, a.lengthDelta());
TestUtility::checkEqualEdits(*this, u"b copy of a", a, b, errorCode);
// assign
Edits c;
c.addUnchanged(99);
c.addReplace(88, 77);
c = b;
assertEquals("c: assigned many edits, length delta", 250, c.lengthDelta());
assertEquals("b remains: many edits, length delta", 250, b.lengthDelta());
TestUtility::checkEqualEdits(*this, u"c = b", b, c, errorCode);
// move constructor empties object with heap array
Edits d(std::move(a));
assertEquals("d: move-constructed many edits, length delta", 250, d.lengthDelta());
assertFalse("a moved away: no more hasChanges", a.hasChanges());
TestUtility::checkEqualEdits(*this, u"d() <- a", d, b, errorCode);
Edits empty;
TestUtility::checkEqualEdits(*this, u"a moved away", empty, a, errorCode);
// move assignment empties object with heap array
Edits e;
e.addReplace(0, 1000);
e = std::move(b);
assertEquals("e: move-assigned many edits, length delta", 250, e.lengthDelta());
assertFalse("b moved away: no more hasChanges", b.hasChanges());
TestUtility::checkEqualEdits(*this, u"e <- b", e, c, errorCode);
TestUtility::checkEqualEdits(*this, u"b moved away", empty, b, errorCode);
// Edits::Iterator default constructor.
Edits::Iterator iter;
assertFalse("Edits::Iterator().next()", iter.next(errorCode));
assertSuccess("Edits::Iterator().next()", errorCode);
iter = e.getFineChangesIterator();
assertTrue("iter.next()", iter.next(errorCode));
assertSuccess("iter.next()", errorCode);
assertTrue("iter.hasChange()", iter.hasChange());
assertEquals("iter.newLength()", 1, iter.newLength());
}
void StringCaseTest::TestMergeEdits() {
// For debugging, set -v to see matching edits up to a failure.
IcuTestErrorCode errorCode(*this, "TestMergeEdits");
Edits ab, bc, ac, expected_ac;
// Simple: Two parallel non-changes.
ab.addUnchanged(2);
bc.addUnchanged(2);
expected_ac.addUnchanged(2);
// Simple: Two aligned changes.
ab.addReplace(3, 2);
bc.addReplace(2, 1);
expected_ac.addReplace(3, 1);
// Unequal non-changes.
ab.addUnchanged(5);
bc.addUnchanged(3);
expected_ac.addUnchanged(3);
// ab ahead by 2
// Overlapping changes accumulate until they share a boundary.
ab.addReplace(4, 3);
bc.addReplace(3, 2);
ab.addReplace(4, 3);
bc.addReplace(3, 2);
ab.addReplace(4, 3);
bc.addReplace(3, 2);
bc.addUnchanged(4);
expected_ac.addReplace(14, 8);
// bc ahead by 2
// Balance out intermediate-string lengths.
ab.addUnchanged(2);
expected_ac.addUnchanged(2);
// Insert something and delete it: Should disappear.
ab.addReplace(0, 5);
ab.addReplace(0, 2);
bc.addReplace(7, 0);
// Parallel change to make a new boundary.
ab.addReplace(1, 2);
bc.addReplace(2, 3);
expected_ac.addReplace(1, 3);
// Multiple ab deletions should remain separate at the boundary.
ab.addReplace(1, 0);
ab.addReplace(2, 0);
ab.addReplace(3, 0);
expected_ac.addReplace(1, 0);
expected_ac.addReplace(2, 0);
expected_ac.addReplace(3, 0);
// Unequal non-changes can be split for another boundary.
ab.addUnchanged(2);
bc.addUnchanged(1);
expected_ac.addUnchanged(1);
// ab ahead by 1
// Multiple bc insertions should create a boundary and remain separate.
bc.addReplace(0, 4);
bc.addReplace(0, 5);
bc.addReplace(0, 6);
expected_ac.addReplace(0, 4);
expected_ac.addReplace(0, 5);
expected_ac.addReplace(0, 6);
// ab ahead by 1
// Multiple ab deletions in the middle of a bc change are merged.
bc.addReplace(2, 2);
// bc ahead by 1
ab.addReplace(1, 0);
ab.addReplace(2, 0);
ab.addReplace(3, 0);
ab.addReplace(4, 1);
expected_ac.addReplace(11, 2);
// Multiple bc insertions in the middle of an ab change are merged.
ab.addReplace(5, 6);
bc.addReplace(3, 3);
// ab ahead by 3
bc.addReplace(0, 4);
bc.addReplace(0, 5);
bc.addReplace(0, 6);
bc.addReplace(3, 7);
expected_ac.addReplace(5, 25);
// Delete around a deletion.
ab.addReplace(4, 4);
ab.addReplace(3, 0);
ab.addUnchanged(2);
bc.addReplace(2, 2);
bc.addReplace(4, 0);
expected_ac.addReplace(9, 2);
// Insert into an insertion.
ab.addReplace(0, 2);
bc.addReplace(1, 1);
bc.addReplace(0, 8);
bc.addUnchanged(4);
expected_ac.addReplace(0, 10);
// bc ahead by 3
// Balance out intermediate-string lengths.
ab.addUnchanged(3);
expected_ac.addUnchanged(3);
// Deletions meet insertions.
// Output order is arbitrary in principle, but we expect insertions first
// and want to keep it that way.
ab.addReplace(2, 0);
ab.addReplace(4, 0);
ab.addReplace(6, 0);
bc.addReplace(0, 1);
bc.addReplace(0, 3);
bc.addReplace(0, 5);
expected_ac.addReplace(0, 1);
expected_ac.addReplace(0, 3);
expected_ac.addReplace(0, 5);
expected_ac.addReplace(2, 0);
expected_ac.addReplace(4, 0);
expected_ac.addReplace(6, 0);
// End with a non-change, so that further edits are never reordered.
ab.addUnchanged(1);
bc.addUnchanged(1);
expected_ac.addUnchanged(1);
ac.mergeAndAppend(ab, bc, errorCode);
assertSuccess("ab+bc", errorCode);
if (!TestUtility::checkEqualEdits(*this, u"ab+bc", expected_ac, ac, errorCode)) {
return;
}
// Append more Edits.
Edits ab2, bc2;
ab2.addUnchanged(5);
bc2.addReplace(1, 2);
bc2.addUnchanged(4);
expected_ac.addReplace(1, 2);
expected_ac.addUnchanged(4);
ac.mergeAndAppend(ab2, bc2, errorCode);
assertSuccess("ab2+bc2", errorCode);
if (!TestUtility::checkEqualEdits(*this, u"ab2+bc2", expected_ac, ac, errorCode)) {
return;
}
// Append empty edits.
Edits empty;
ac.mergeAndAppend(empty, empty, errorCode);
assertSuccess("empty+empty", errorCode);
if (!TestUtility::checkEqualEdits(*this, u"empty+empty", expected_ac, ac, errorCode)) {
return;
}
// Error: Append more edits with mismatched intermediate-string lengths.
Edits mismatch;
mismatch.addReplace(1, 1);
ac.mergeAndAppend(ab2, mismatch, errorCode);
assertEquals("ab2+mismatch", U_ILLEGAL_ARGUMENT_ERROR, errorCode.get());
errorCode.reset();
ac.mergeAndAppend(mismatch, bc2, errorCode);
assertEquals("mismatch+bc2", U_ILLEGAL_ARGUMENT_ERROR, errorCode.get());
errorCode.reset();
}
void StringCaseTest::TestCaseMapWithEdits() {
IcuTestErrorCode errorCode(*this, "TestEdits");
UChar dest[20];

View file

@ -10,6 +10,8 @@
**********************************************************************
*/
#include <algorithm>
#include <vector>
#include "unicode/utypes.h"
#include "unicode/edits.h"
#include "unicode/unistr.h"
@ -65,6 +67,100 @@ UnicodeString TestUtility::hex(const uint8_t* bytes, int32_t len) {
return buf;
}
namespace {
UnicodeString printOneEdit(const Edits::Iterator &ei) {
if (ei.hasChange()) {
return UnicodeString() + ei.oldLength() + u"->" + ei.newLength();
} else {
return UnicodeString() + ei.oldLength() + u"=" + ei.newLength();
}
}
/**
* Maps indexes according to the expected edits.
* A destination index can occur multiple times when there are source deletions.
* Map according to the last occurrence, normally in a non-empty destination span.
* Simplest is to search from the back.
*/
int32_t srcIndexFromDest(const EditChange expected[], int32_t expLength,
int32_t srcLength, int32_t destLength, int32_t index) {
int32_t srcIndex = srcLength;
int32_t destIndex = destLength;
int32_t i = expLength;
while (index < destIndex && i > 0) {
--i;
int32_t prevSrcIndex = srcIndex - expected[i].oldLength;
int32_t prevDestIndex = destIndex - expected[i].newLength;
if (index == prevDestIndex) {
return prevSrcIndex;
} else if (index > prevDestIndex) {
if (expected[i].change) {
// In a change span, map to its end.
return srcIndex;
} else {
// In an unchanged span, offset within it.
return prevSrcIndex + (index - prevDestIndex);
}
}
srcIndex = prevSrcIndex;
destIndex = prevDestIndex;
}
// index is outside the string.
return srcIndex;
}
int32_t destIndexFromSrc(const EditChange expected[], int32_t expLength,
int32_t srcLength, int32_t destLength, int32_t index) {
int32_t srcIndex = srcLength;
int32_t destIndex = destLength;
int32_t i = expLength;
while (index < srcIndex && i > 0) {
--i;
int32_t prevSrcIndex = srcIndex - expected[i].oldLength;
int32_t prevDestIndex = destIndex - expected[i].newLength;
if (index == prevSrcIndex) {
return prevDestIndex;
} else if (index > prevSrcIndex) {
if (expected[i].change) {
// In a change span, map to its end.
return destIndex;
} else {
// In an unchanged span, offset within it.
return prevDestIndex + (index - prevSrcIndex);
}
}
srcIndex = prevSrcIndex;
destIndex = prevDestIndex;
}
// index is outside the string.
return destIndex;
}
} // namespace
// For debugging, set -v to see matching edits up to a failure.
UBool TestUtility::checkEqualEdits(IntlTest &test, const UnicodeString &name,
const Edits &e1, const Edits &e2, UErrorCode &errorCode) {
Edits::Iterator ei1 = e1.getFineIterator();
Edits::Iterator ei2 = e2.getFineIterator();
UBool ok = TRUE;
for (int32_t i = 0; ok; ++i) {
UBool ei1HasNext = ei1.next(errorCode);
UBool ei2HasNext = ei2.next(errorCode);
ok &= test.assertEquals(name + u" next()[" + i + u"]" + __LINE__,
ei1HasNext, ei2HasNext);
ok &= test.assertSuccess(name + u" errorCode[" + i + u"]" + __LINE__, errorCode);
ok &= test.assertEquals(name + u" edit[" + i + u"]" + __LINE__,
printOneEdit(ei1), printOneEdit(ei2));
if (!ei1HasNext || !ei2HasNext) {
break;
}
test.logln();
}
return ok;
}
void TestUtility::checkEditsIter(
IntlTest &test,
const UnicodeString &name,
@ -77,8 +173,6 @@ void TestUtility::checkEditsIter(
int32_t expSrcIndex = 0;
int32_t expDestIndex = 0;
int32_t expReplIndex = 0;
int32_t expSrcIndexFromDest = 0; // for sourceIndexFromDestinationIndex()
int32_t expDestIndexFromSrc = 0; // for destinationIndexFromSourceIndex()
for (int32_t expIndex = 0; expIndex < expLength; ++expIndex) {
const EditChange &expect = expected[expIndex];
UnicodeString msg = UnicodeString(name).append(u' ') + expIndex;
@ -92,7 +186,7 @@ void TestUtility::checkEditsIter(
test.assertEquals(msg + u":" + __LINE__, expReplIndex, ei1.replacementIndex());
}
if (expect.oldLength > 0 && expDestIndex == expDestIndexFromSrc) {
if (expect.oldLength > 0) {
test.assertTrue(msg + u":" + __LINE__, ei2.findSourceIndex(expSrcIndex, errorCode));
test.assertEquals(msg + u":" + __LINE__, expect.change, ei2.hasChange());
test.assertEquals(msg + u":" + __LINE__, expect.oldLength, ei2.oldLength());
@ -108,7 +202,7 @@ void TestUtility::checkEditsIter(
}
}
if (expect.newLength > 0 && expSrcIndex == expSrcIndexFromDest) {
if (expect.newLength > 0) {
test.assertTrue(msg + u":" + __LINE__, ei2.findDestinationIndex(expDestIndex, errorCode));
test.assertEquals(msg + u":" + __LINE__, expect.change, ei2.hasChange());
test.assertEquals(msg + u":" + __LINE__, expect.oldLength, ei2.oldLength());
@ -124,45 +218,11 @@ void TestUtility::checkEditsIter(
}
}
// Span starts.
test.assertEquals(name + u":" + __LINE__, expDestIndexFromSrc,
ei2.destinationIndexFromSourceIndex(expSrcIndex, errorCode));
test.assertEquals(name + u":" + __LINE__, expSrcIndexFromDest,
ei2.sourceIndexFromDestinationIndex(expDestIndex, errorCode));
// Inside unchanged span map offsets 1:1.
if (!expect.change && expect.oldLength >= 2) {
test.assertEquals(name + u":" + __LINE__, expDestIndex + 1,
ei2.destinationIndexFromSourceIndex(expSrcIndex + 1, errorCode));
test.assertEquals(name + u":" + __LINE__, expSrcIndex + 1,
ei2.sourceIndexFromDestinationIndex(expDestIndex + 1, errorCode));
}
// Inside change span map to the span limit.
int32_t expSrcLimit = expSrcIndex + expect.oldLength;
int32_t expDestLimit = expDestIndex + expect.newLength;
if (expect.change) {
if (expect.oldLength >= 2) {
test.assertEquals(name + u":" + __LINE__, expDestLimit,
ei2.destinationIndexFromSourceIndex(expSrcIndex + 1, errorCode));
}
if (expect.newLength >= 2) {
test.assertEquals(name + u":" + __LINE__, expSrcLimit,
ei2.sourceIndexFromDestinationIndex(expDestIndex + 1, errorCode));
}
}
expSrcIndex = expSrcLimit;
expDestIndex = expDestLimit;
expSrcIndex += expect.oldLength;
expDestIndex += expect.newLength;
if (expect.change) {
expReplIndex += expect.newLength;
}
if (expect.newLength > 0) {
expSrcIndexFromDest = expSrcIndex;
}
if (expect.oldLength > 0) {
expDestIndexFromSrc = expDestIndex;
}
}
UnicodeString msg = UnicodeString(name).append(u" end");
test.assertFalse(msg + u":" + __LINE__, ei1.next(errorCode));
@ -175,8 +235,47 @@ void TestUtility::checkEditsIter(
test.assertFalse(name + u":" + __LINE__, ei2.findSourceIndex(expSrcIndex, errorCode));
test.assertFalse(name + u":" + __LINE__, ei2.findDestinationIndex(expDestIndex, errorCode));
test.assertEquals(name + u":" + __LINE__, expDestIndex,
ei2.destinationIndexFromSourceIndex(expSrcIndex, errorCode));
test.assertEquals(name + u":" + __LINE__, expSrcIndex,
ei2.sourceIndexFromDestinationIndex(expDestIndex, errorCode));
// Check mapping of all indexes against a simple implementation
// that works on the expected changes.
// Iterate once forward, once backward, to cover more runtime conditions.
int32_t srcLength = expSrcIndex;
int32_t destLength = expDestIndex;
std::vector<int32_t> srcIndexes;
std::vector<int32_t> destIndexes;
srcIndexes.push_back(-1);
destIndexes.push_back(-1);
int32_t srcIndex = 0;
int32_t destIndex = 0;
for (int32_t i = 0; i < expLength; ++i) {
if (expected[i].oldLength > 0) {
srcIndexes.push_back(srcIndex);
if (expected[i].oldLength > 1) {
srcIndexes.push_back(srcIndex + 1);
}
}
if (expected[i].newLength > 0) {
destIndexes.push_back(destIndex);
if (expected[i].newLength > 0) {
destIndexes.push_back(destIndex + 1);
}
}
srcIndex += expected[i].oldLength;
destIndex += expected[i].newLength;
}
srcIndexes.push_back(srcLength);
destIndexes.push_back(destLength);
srcIndexes.push_back(srcLength + 1);
destIndexes.push_back(destLength + 1);
std::reverse(destIndexes.begin(), destIndexes.end());
for (int32_t i : srcIndexes) {
test.assertEquals(name + u" destIndexFromSrc(" + i + u"):" + __LINE__,
destIndexFromSrc(expected, expLength, srcLength, destLength, i),
ei2.destinationIndexFromSourceIndex(i, errorCode));
}
for (int32_t i : destIndexes) {
test.assertEquals(name + u" srcIndexFromDest(" + i + u"):" + __LINE__,
srcIndexFromDest(expected, expLength, srcLength, destLength, i),
ei2.sourceIndexFromDestinationIndex(i, errorCode));
}
}

View file

@ -37,6 +37,9 @@ public:
static UnicodeString hex(const uint8_t* bytes, int32_t len);
static UBool checkEqualEdits(IntlTest &test, const UnicodeString &name,
const Edits &e1, const Edits &e2, UErrorCode &errorCode);
static void checkEditsIter(
IntlTest &test, const UnicodeString &name,
Edits::Iterator ei1, Edits::Iterator ei2, // two equal iterators