mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-10 07:39:16 +00:00
ICU-13270 icu::Edits add numberOfChanges(); Edits::Iterator add findDestinationIndex(), destinationIndexFromSourceIndex(), sourceIndexFromDestinationIndex()
X-SVN-Rev: 40286
This commit is contained in:
parent
112d214d29
commit
9a3a03c417
7 changed files with 487 additions and 94 deletions
|
@ -40,7 +40,7 @@ Edits::~Edits() {
|
|||
}
|
||||
|
||||
void Edits::reset() {
|
||||
length = delta = 0;
|
||||
length = delta = numChanges = 0;
|
||||
}
|
||||
|
||||
void Edits::addUnchanged(int32_t unchangedLength) {
|
||||
|
@ -76,6 +76,7 @@ void Edits::addReplace(int32_t oldLength, int32_t newLength) {
|
|||
if(oldLength == newLength && 0 < oldLength && oldLength <= MAX_SHORT_WIDTH) {
|
||||
// Replacement of short oldLength text units by same-length new text.
|
||||
// Merge into previous short-replacement record, if any.
|
||||
++numChanges;
|
||||
int32_t last = lastUnit();
|
||||
if(MAX_UNCHANGED < last && last < MAX_SHORT_CHANGE &&
|
||||
(last >> 12) == oldLength && (last & 0xfff) < MAX_SHORT_CHANGE_LENGTH) {
|
||||
|
@ -93,6 +94,7 @@ void Edits::addReplace(int32_t oldLength, int32_t newLength) {
|
|||
if (oldLength == 0 && newLength == 0) {
|
||||
return;
|
||||
}
|
||||
++numChanges;
|
||||
int32_t newDelta = newLength - oldLength;
|
||||
if (newDelta != 0) {
|
||||
if ((newDelta > 0 && delta >= 0 && newDelta > (INT32_MAX - delta)) ||
|
||||
|
@ -182,18 +184,6 @@ UBool Edits::copyErrorTo(UErrorCode &outErrorCode) {
|
|||
return TRUE;
|
||||
}
|
||||
|
||||
UBool Edits::hasChanges() const {
|
||||
if (delta != 0) {
|
||||
return TRUE;
|
||||
}
|
||||
for (int32_t i = 0; i < length; ++i) {
|
||||
if (array[i] > MAX_UNCHANGED) {
|
||||
return TRUE;
|
||||
}
|
||||
}
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
Edits::Iterator::Iterator(const uint16_t *a, int32_t len, UBool oc, UBool crs) :
|
||||
array(a), index(0), length(len), remaining(0),
|
||||
onlyChanges_(oc), coarse(crs),
|
||||
|
@ -308,39 +298,97 @@ UBool Edits::Iterator::next(UBool onlyChanges, UErrorCode &errorCode) {
|
|||
return TRUE;
|
||||
}
|
||||
|
||||
UBool Edits::Iterator::findSourceIndex(int32_t i, UErrorCode &errorCode) {
|
||||
if (U_FAILURE(errorCode) || i < 0) { return FALSE; }
|
||||
if (i < srcIndex) {
|
||||
int32_t Edits::Iterator::findIndex(int32_t i, UBool findSource, UErrorCode &errorCode) {
|
||||
if (U_FAILURE(errorCode) || i < 0) { return -1; }
|
||||
int32_t spanStart, spanLength;
|
||||
if (findSource) { // find source index
|
||||
spanStart = srcIndex;
|
||||
spanLength = oldLength_;
|
||||
} else { // find destination index
|
||||
spanStart = destIndex;
|
||||
spanLength = newLength_;
|
||||
}
|
||||
// If we are at the start or limit of an empty span, then we search from
|
||||
// the start of the string so that we always return
|
||||
// the first of several consecutive empty spans, for consistent results.
|
||||
// We do not currently track the properties of the previous span,
|
||||
// so for now we always reset if we are at the start of the current span.
|
||||
if (i <= spanStart) {
|
||||
// Reset the iterator to the start.
|
||||
index = remaining = oldLength_ = newLength_ = srcIndex = replIndex = destIndex = 0;
|
||||
} else if (i < (srcIndex + oldLength_)) {
|
||||
} else if (i < (spanStart + spanLength)) {
|
||||
// The index is in the current span.
|
||||
return TRUE;
|
||||
return 0;
|
||||
}
|
||||
while (next(FALSE, errorCode)) {
|
||||
if (i < (srcIndex + oldLength_)) {
|
||||
// The index is in the current span.
|
||||
return TRUE;
|
||||
if (findSource) {
|
||||
spanStart = srcIndex;
|
||||
spanLength = oldLength_;
|
||||
} else {
|
||||
spanStart = destIndex;
|
||||
spanLength = newLength_;
|
||||
}
|
||||
if (i == spanStart || i < (spanStart + spanLength)) {
|
||||
// The index is in the current span, or at an empty one.
|
||||
return 0;
|
||||
}
|
||||
if (remaining > 0) {
|
||||
// Is the index in one of the remaining compressed edits?
|
||||
// srcIndex is the start of the current span, before the remaining ones.
|
||||
int32_t len = (remaining + 1) * oldLength_;
|
||||
if (i < (srcIndex + len)) {
|
||||
int32_t n = (i - srcIndex) / oldLength_; // 1 <= n <= remaining
|
||||
len = n * oldLength_;
|
||||
// spanStart is the start of the current span, before the remaining ones.
|
||||
int32_t len = (remaining + 1) * spanLength;
|
||||
if (i < (spanStart + len)) {
|
||||
int32_t n = (i - spanStart) / spanLength; // 1 <= n <= remaining
|
||||
len = n * spanLength;
|
||||
srcIndex += len;
|
||||
replIndex += len;
|
||||
destIndex += len;
|
||||
remaining -= n;
|
||||
return TRUE;
|
||||
return 0;
|
||||
}
|
||||
// Make next() skip all of these edits at once.
|
||||
oldLength_ = newLength_ = len;
|
||||
remaining = 0;
|
||||
}
|
||||
}
|
||||
return FALSE;
|
||||
return 1;
|
||||
}
|
||||
|
||||
int32_t Edits::Iterator::destinationIndexFromSourceIndex(int32_t i, UErrorCode &errorCode) {
|
||||
int32_t where = findIndex(i, TRUE, errorCode);
|
||||
if (where < 0) {
|
||||
// Error or before the string.
|
||||
return 0;
|
||||
}
|
||||
if (where > 0 || i == srcIndex) {
|
||||
// At or after string length, or at start of the found span.
|
||||
return destIndex;
|
||||
}
|
||||
if (changed) {
|
||||
// In a change span, map to its end.
|
||||
return destIndex + newLength_;
|
||||
} else {
|
||||
// In an unchanged span, offset 1:1 within it.
|
||||
return destIndex + (i - srcIndex);
|
||||
}
|
||||
}
|
||||
|
||||
int32_t Edits::Iterator::sourceIndexFromDestinationIndex(int32_t i, UErrorCode &errorCode) {
|
||||
int32_t where = findIndex(i, FALSE, errorCode);
|
||||
if (where < 0) {
|
||||
// Error or before the string.
|
||||
return 0;
|
||||
}
|
||||
if (where > 0 || i == destIndex) {
|
||||
// At or after string length, or at start of the found span.
|
||||
return srcIndex;
|
||||
}
|
||||
if (changed) {
|
||||
// In a change span, map to its end.
|
||||
return srcIndex + oldLength_;
|
||||
} else {
|
||||
// In an unchanged span, offset within it.
|
||||
return srcIndex + (i - destIndex);
|
||||
}
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
|
|
@ -36,7 +36,7 @@ public:
|
|||
* @draft ICU 59
|
||||
*/
|
||||
Edits() :
|
||||
array(stackArray), capacity(STACK_CAPACITY), length(0), delta(0),
|
||||
array(stackArray), capacity(STACK_CAPACITY), length(0), delta(0), numChanges(0),
|
||||
errorCode(U_ZERO_ERROR) {}
|
||||
/**
|
||||
* Destructor.
|
||||
|
@ -66,6 +66,9 @@ public:
|
|||
* Sets the UErrorCode if an error occurred while recording edits.
|
||||
* Preserves older error codes in the outErrorCode.
|
||||
* Normally called from inside ICU string transformation functions, not user code.
|
||||
* @param outErrorCode Set to an error code if it does not contain one already
|
||||
* and an error occurred while recording edits.
|
||||
* Otherwise unchanged.
|
||||
* @return TRUE if U_FAILURE(outErrorCode)
|
||||
* @draft ICU 59
|
||||
*/
|
||||
|
@ -81,7 +84,13 @@ public:
|
|||
* @return TRUE if there are any change edits
|
||||
* @draft ICU 59
|
||||
*/
|
||||
UBool hasChanges() const;
|
||||
UBool hasChanges() const { return numChanges != 0; }
|
||||
|
||||
/**
|
||||
* @return the number of change edits
|
||||
* @draft ICU 60
|
||||
*/
|
||||
int32_t numberOfChanges() const { return numChanges; }
|
||||
|
||||
/**
|
||||
* Access to the list of edits.
|
||||
|
@ -103,6 +112,9 @@ public:
|
|||
|
||||
/**
|
||||
* Advances to the next edit.
|
||||
* @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
|
||||
* or else the function returns immediately. Check for U_FAILURE()
|
||||
* on output or use with function chaining. (See User Guide for details.)
|
||||
* @return TRUE if there is another edit
|
||||
* @draft ICU 59
|
||||
*/
|
||||
|
@ -121,10 +133,86 @@ public:
|
|||
* if the source index is out of bounds for the source string.
|
||||
*
|
||||
* @param i source index
|
||||
* @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
|
||||
* or else the function returns immediately. Check for U_FAILURE()
|
||||
* on output or use with function chaining. (See User Guide for details.)
|
||||
* @return TRUE if the edit for the source index was found
|
||||
* @draft ICU 59
|
||||
*/
|
||||
UBool findSourceIndex(int32_t i, UErrorCode &errorCode);
|
||||
UBool findSourceIndex(int32_t i, UErrorCode &errorCode) {
|
||||
return findIndex(i, TRUE, errorCode) == 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Finds the edit that contains the destination index.
|
||||
* The destination index may be found in a non-change
|
||||
* even if normal iteration would skip non-changes.
|
||||
* Normal iteration can continue from a found edit.
|
||||
*
|
||||
* The iterator state before this search logically does not matter.
|
||||
* (It may affect the performance of the search.)
|
||||
*
|
||||
* The iterator state after this search is undefined
|
||||
* if the source index is out of bounds for the source string.
|
||||
*
|
||||
* @param i destination index
|
||||
* @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
|
||||
* or else the function returns immediately. Check for U_FAILURE()
|
||||
* on output or use with function chaining. (See User Guide for details.)
|
||||
* @return TRUE if the edit for the destination index was found
|
||||
* @draft ICU 60
|
||||
*/
|
||||
UBool findDestinationIndex(int32_t i, UErrorCode &errorCode) {
|
||||
return findIndex(i, FALSE, errorCode) == 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the destination index corresponding to the given source index.
|
||||
* If the source index is inside a change edit (not at its start),
|
||||
* then the destination index at the end of that edit is returned,
|
||||
* since there is no information about index mapping inside a change edit.
|
||||
*
|
||||
* (This means that indexes to the start and middle of an edit,
|
||||
* for example around a grapheme cluster, are mapped to indexes
|
||||
* encompassing the entire edit.
|
||||
* The alternative, mapping an interior index to the start,
|
||||
* would map such an interval to an empty one.)
|
||||
*
|
||||
* This operation will usually but not always modify this object.
|
||||
* The iterator state after this search is undefined.
|
||||
*
|
||||
* @param i source index
|
||||
* @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
|
||||
* or else the function returns immediately. Check for U_FAILURE()
|
||||
* on output or use with function chaining. (See User Guide for details.)
|
||||
* @return destination index; undefined if i is not 0..string length
|
||||
* @draft ICU 60
|
||||
*/
|
||||
int32_t destinationIndexFromSourceIndex(int32_t i, UErrorCode &errorCode);
|
||||
|
||||
/**
|
||||
* Returns the source index corresponding to the given destination index.
|
||||
* If the destination index is inside a change edit (not at its start),
|
||||
* then the source index at the end of that edit is returned,
|
||||
* since there is no information about index mapping inside a change edit.
|
||||
*
|
||||
* (This means that indexes to the start and middle of an edit,
|
||||
* for example around a grapheme cluster, are mapped to indexes
|
||||
* encompassing the entire edit.
|
||||
* The alternative, mapping an interior index to the start,
|
||||
* would map such an interval to an empty one.)
|
||||
*
|
||||
* This operation will usually but not always modify this object.
|
||||
* The iterator state after this search is undefined.
|
||||
*
|
||||
* @param i destination index
|
||||
* @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
|
||||
* or else the function returns immediately. Check for U_FAILURE()
|
||||
* on output or use with function chaining. (See User Guide for details.)
|
||||
* @return source index; undefined if i is not 0..string length
|
||||
* @draft ICU 60
|
||||
*/
|
||||
int32_t sourceIndexFromDestinationIndex(int32_t i, UErrorCode &errorCode);
|
||||
|
||||
/**
|
||||
* @return TRUE if this edit replaces oldLength() units with newLength() different ones.
|
||||
|
@ -170,6 +258,8 @@ public:
|
|||
void updateIndexes();
|
||||
UBool noNext();
|
||||
UBool next(UBool onlyChanges, UErrorCode &errorCode);
|
||||
/** @return -1: error or i<0; 0: found; 1: i>=string length */
|
||||
int32_t findIndex(int32_t i, UBool findSource, UErrorCode &errorCode);
|
||||
|
||||
const uint16_t *array;
|
||||
int32_t index, length;
|
||||
|
@ -234,6 +324,7 @@ private:
|
|||
int32_t capacity;
|
||||
int32_t length;
|
||||
int32_t delta;
|
||||
int32_t numChanges;
|
||||
UErrorCode errorCode;
|
||||
uint16_t stackArray[STACK_CAPACITY];
|
||||
};
|
||||
|
|
|
@ -906,13 +906,15 @@ void StringCaseTest::TestBufferOverflow() {
|
|||
void StringCaseTest::TestEdits() {
|
||||
IcuTestErrorCode errorCode(*this, "TestEdits");
|
||||
Edits edits;
|
||||
assertFalse("new Edits", edits.hasChanges());
|
||||
assertFalse("new Edits hasChanges", edits.hasChanges());
|
||||
assertEquals("new Edits numberOfChanges", 0, edits.numberOfChanges());
|
||||
assertEquals("new Edits", 0, edits.lengthDelta());
|
||||
edits.addUnchanged(1); // multiple unchanged ranges are combined
|
||||
edits.addUnchanged(10000); // too long, and they are split
|
||||
edits.addReplace(0, 0);
|
||||
edits.addUnchanged(2);
|
||||
assertFalse("unchanged 10003", edits.hasChanges());
|
||||
assertFalse("unchanged 10003 hasChanges", edits.hasChanges());
|
||||
assertEquals("unchanged 10003 numberOfChanges", 0, edits.numberOfChanges());
|
||||
assertEquals("unchanged 10003", 0, edits.lengthDelta());
|
||||
edits.addReplace(1, 1); // multiple short equal-length edits are compressed
|
||||
edits.addUnchanged(0);
|
||||
|
@ -922,7 +924,8 @@ void StringCaseTest::TestEdits() {
|
|||
edits.addReplace(100, 0);
|
||||
edits.addReplace(3000, 4000); // variable-length encoding
|
||||
edits.addReplace(100000, 100000);
|
||||
assertTrue("some edits", edits.hasChanges());
|
||||
assertTrue("some edits hasChanges", edits.hasChanges());
|
||||
assertEquals("some edits numberOfChanges", 7, edits.numberOfChanges());
|
||||
assertEquals("some edits", 10 - 100 + 1000, edits.lengthDelta());
|
||||
UErrorCode outErrorCode = U_ZERO_ERROR;
|
||||
assertFalse("edits done: copyErrorTo", edits.copyErrorTo(outErrorCode));
|
||||
|
@ -956,7 +959,8 @@ void StringCaseTest::TestEdits() {
|
|||
fineExpectedChanges, UPRV_LENGTHOF(fineExpectedChanges), FALSE, errorCode);
|
||||
|
||||
edits.reset();
|
||||
assertFalse("reset", edits.hasChanges());
|
||||
assertFalse("reset hasChanges", edits.hasChanges());
|
||||
assertEquals("reset numberOfChanges", 0, edits.numberOfChanges());
|
||||
assertEquals("reset", 0, edits.lengthDelta());
|
||||
Edits::Iterator ei = edits.getCoarseChangesIterator();
|
||||
assertFalse("reset then iterator", ei.next(errorCode));
|
||||
|
|
|
@ -71,32 +71,35 @@ void TestUtility::checkEditsIter(
|
|||
Edits::Iterator ei1, Edits::Iterator ei2, // two equal iterators
|
||||
const EditChange expected[], int32_t expLength, UBool withUnchanged,
|
||||
UErrorCode &errorCode) {
|
||||
test.assertFalse(name, ei2.findSourceIndex(-1, errorCode));
|
||||
test.assertFalse(name + u":" + __LINE__, ei2.findSourceIndex(-1, errorCode));
|
||||
test.assertFalse(name + u":" + __LINE__, ei2.findDestinationIndex(-1, errorCode));
|
||||
|
||||
int32_t expSrcIndex = 0;
|
||||
int32_t expDestIndex = 0;
|
||||
int32_t expReplIndex = 0;
|
||||
int32_t expSrcIndexFromDest = 0; // for sourceIndexFromDestinationIndex()
|
||||
int32_t expDestIndexFromSrc = 0; // for destinationIndexFromSourceIndex()
|
||||
for (int32_t expIndex = 0; expIndex < expLength; ++expIndex) {
|
||||
const EditChange &expect = expected[expIndex];
|
||||
UnicodeString msg = UnicodeString(name).append(u' ') + expIndex;
|
||||
if (withUnchanged || expect.change) {
|
||||
test.assertTrue(msg, ei1.next(errorCode));
|
||||
test.assertEquals(msg, expect.change, ei1.hasChange());
|
||||
test.assertEquals(msg, expect.oldLength, ei1.oldLength());
|
||||
test.assertEquals(msg, expect.newLength, ei1.newLength());
|
||||
test.assertEquals(msg, expSrcIndex, ei1.sourceIndex());
|
||||
test.assertEquals(msg, expDestIndex, ei1.destinationIndex());
|
||||
test.assertEquals(msg, expReplIndex, ei1.replacementIndex());
|
||||
test.assertTrue(msg + u":" + __LINE__, ei1.next(errorCode));
|
||||
test.assertEquals(msg + u":" + __LINE__, expect.change, ei1.hasChange());
|
||||
test.assertEquals(msg + u":" + __LINE__, expect.oldLength, ei1.oldLength());
|
||||
test.assertEquals(msg + u":" + __LINE__, expect.newLength, ei1.newLength());
|
||||
test.assertEquals(msg + u":" + __LINE__, expSrcIndex, ei1.sourceIndex());
|
||||
test.assertEquals(msg + u":" + __LINE__, expDestIndex, ei1.destinationIndex());
|
||||
test.assertEquals(msg + u":" + __LINE__, expReplIndex, ei1.replacementIndex());
|
||||
}
|
||||
|
||||
if (expect.oldLength > 0) {
|
||||
test.assertTrue(msg, ei2.findSourceIndex(expSrcIndex, errorCode));
|
||||
test.assertEquals(msg, expect.change, ei2.hasChange());
|
||||
test.assertEquals(msg, expect.oldLength, ei2.oldLength());
|
||||
test.assertEquals(msg, expect.newLength, ei2.newLength());
|
||||
test.assertEquals(msg, expSrcIndex, ei2.sourceIndex());
|
||||
test.assertEquals(msg, expDestIndex, ei2.destinationIndex());
|
||||
test.assertEquals(msg, expReplIndex, ei2.replacementIndex());
|
||||
if (expect.oldLength > 0 && expDestIndex == expDestIndexFromSrc) {
|
||||
test.assertTrue(msg + u":" + __LINE__, ei2.findSourceIndex(expSrcIndex, errorCode));
|
||||
test.assertEquals(msg + u":" + __LINE__, expect.change, ei2.hasChange());
|
||||
test.assertEquals(msg + u":" + __LINE__, expect.oldLength, ei2.oldLength());
|
||||
test.assertEquals(msg + u":" + __LINE__, expect.newLength, ei2.newLength());
|
||||
test.assertEquals(msg + u":" + __LINE__, expSrcIndex, ei2.sourceIndex());
|
||||
test.assertEquals(msg + u":" + __LINE__, expDestIndex, ei2.destinationIndex());
|
||||
test.assertEquals(msg + u":" + __LINE__, expReplIndex, ei2.replacementIndex());
|
||||
if (!withUnchanged) {
|
||||
// For some iterators, move past the current range
|
||||
// so that findSourceIndex() has to look before the current index.
|
||||
|
@ -105,20 +108,75 @@ void TestUtility::checkEditsIter(
|
|||
}
|
||||
}
|
||||
|
||||
expSrcIndex += expect.oldLength;
|
||||
expDestIndex += expect.newLength;
|
||||
if (expect.newLength > 0 && expSrcIndex == expSrcIndexFromDest) {
|
||||
test.assertTrue(msg + u":" + __LINE__, ei2.findDestinationIndex(expDestIndex, errorCode));
|
||||
test.assertEquals(msg + u":" + __LINE__, expect.change, ei2.hasChange());
|
||||
test.assertEquals(msg + u":" + __LINE__, expect.oldLength, ei2.oldLength());
|
||||
test.assertEquals(msg + u":" + __LINE__, expect.newLength, ei2.newLength());
|
||||
test.assertEquals(msg + u":" + __LINE__, expSrcIndex, ei2.sourceIndex());
|
||||
test.assertEquals(msg + u":" + __LINE__, expDestIndex, ei2.destinationIndex());
|
||||
test.assertEquals(msg + u":" + __LINE__, expReplIndex, ei2.replacementIndex());
|
||||
if (!withUnchanged) {
|
||||
// For some iterators, move past the current range
|
||||
// so that findSourceIndex() has to look before the current index.
|
||||
ei2.next(errorCode);
|
||||
ei2.next(errorCode);
|
||||
}
|
||||
}
|
||||
|
||||
// Span starts.
|
||||
test.assertEquals(name + u":" + __LINE__, expDestIndexFromSrc,
|
||||
ei2.destinationIndexFromSourceIndex(expSrcIndex, errorCode));
|
||||
test.assertEquals(name + u":" + __LINE__, expSrcIndexFromDest,
|
||||
ei2.sourceIndexFromDestinationIndex(expDestIndex, errorCode));
|
||||
|
||||
// Inside unchanged span map offsets 1:1.
|
||||
if (!expect.change && expect.oldLength >= 2) {
|
||||
test.assertEquals(name + u":" + __LINE__, expDestIndex + 1,
|
||||
ei2.destinationIndexFromSourceIndex(expSrcIndex + 1, errorCode));
|
||||
test.assertEquals(name + u":" + __LINE__, expSrcIndex + 1,
|
||||
ei2.sourceIndexFromDestinationIndex(expDestIndex + 1, errorCode));
|
||||
}
|
||||
|
||||
// Inside change span map to the span limit.
|
||||
int32_t expSrcLimit = expSrcIndex + expect.oldLength;
|
||||
int32_t expDestLimit = expDestIndex + expect.newLength;
|
||||
if (expect.change) {
|
||||
if (expect.oldLength >= 2) {
|
||||
test.assertEquals(name + u":" + __LINE__, expDestLimit,
|
||||
ei2.destinationIndexFromSourceIndex(expSrcIndex + 1, errorCode));
|
||||
}
|
||||
if (expect.newLength >= 2) {
|
||||
test.assertEquals(name + u":" + __LINE__, expSrcLimit,
|
||||
ei2.sourceIndexFromDestinationIndex(expDestIndex + 1, errorCode));
|
||||
}
|
||||
}
|
||||
|
||||
expSrcIndex = expSrcLimit;
|
||||
expDestIndex = expDestLimit;
|
||||
if (expect.change) {
|
||||
expReplIndex += expect.newLength;
|
||||
}
|
||||
if (expect.newLength > 0) {
|
||||
expSrcIndexFromDest = expSrcIndex;
|
||||
}
|
||||
if (expect.oldLength > 0) {
|
||||
expDestIndexFromSrc = expDestIndex;
|
||||
}
|
||||
}
|
||||
UnicodeString msg = UnicodeString(name).append(u" end");
|
||||
test.assertFalse(msg, ei1.next(errorCode));
|
||||
test.assertFalse(msg, ei1.hasChange());
|
||||
test.assertEquals(msg, 0, ei1.oldLength());
|
||||
test.assertEquals(msg, 0, ei1.newLength());
|
||||
test.assertEquals(msg, expSrcIndex, ei1.sourceIndex());
|
||||
test.assertEquals(msg, expDestIndex, ei1.destinationIndex());
|
||||
test.assertEquals(msg, expReplIndex, ei1.replacementIndex());
|
||||
test.assertFalse(msg + u":" + __LINE__, ei1.next(errorCode));
|
||||
test.assertFalse(msg + u":" + __LINE__, ei1.hasChange());
|
||||
test.assertEquals(msg + u":" + __LINE__, 0, ei1.oldLength());
|
||||
test.assertEquals(msg + u":" + __LINE__, 0, ei1.newLength());
|
||||
test.assertEquals(msg + u":" + __LINE__, expSrcIndex, ei1.sourceIndex());
|
||||
test.assertEquals(msg + u":" + __LINE__, expDestIndex, ei1.destinationIndex());
|
||||
test.assertEquals(msg + u":" + __LINE__, expReplIndex, ei1.replacementIndex());
|
||||
|
||||
test.assertFalse(name, ei2.findSourceIndex(expSrcIndex, errorCode));
|
||||
test.assertFalse(name + u":" + __LINE__, ei2.findSourceIndex(expSrcIndex, errorCode));
|
||||
test.assertFalse(name + u":" + __LINE__, ei2.findDestinationIndex(expDestIndex, errorCode));
|
||||
test.assertEquals(name + u":" + __LINE__, expDestIndex,
|
||||
ei2.destinationIndexFromSourceIndex(expSrcIndex, errorCode));
|
||||
test.assertEquals(name + u":" + __LINE__, expSrcIndex,
|
||||
ei2.sourceIndexFromDestinationIndex(expDestIndex, errorCode));
|
||||
}
|
||||
|
|
|
@ -1562,6 +1562,8 @@ BasicNormalizerTest::TestNormalizeUTF8WithEdits() {
|
|||
{ TRUE, 6, 3 }, // 가\u3133→ 갃
|
||||
{ FALSE, 2, 2 } // 2 spaces
|
||||
};
|
||||
assertTrue("normalizeUTF8 with Edits hasChanges", edits.hasChanges());
|
||||
assertEquals("normalizeUTF8 with Edits numberOfChanges", 9, edits.numberOfChanges());
|
||||
TestUtility::checkEditsIter(*this, u"normalizeUTF8 with Edits",
|
||||
edits.getFineIterator(), edits.getFineIterator(),
|
||||
expectedChanges, UPRV_LENGTHOF(expectedChanges),
|
||||
|
@ -1577,6 +1579,8 @@ BasicNormalizerTest::TestNormalizeUTF8WithEdits() {
|
|||
nfkc_cf->normalizeUTF8(U_OMIT_UNCHANGED_TEXT, src, sink, &edits, errorCode);
|
||||
assertSuccess("normalizeUTF8 omit unchanged", errorCode.get());
|
||||
assertEquals("normalizeUTF8 omit unchanged", expected.c_str(), result.c_str());
|
||||
assertTrue("normalizeUTF8 omit unchanged hasChanges", edits.hasChanges());
|
||||
assertEquals("normalizeUTF8 omit unchanged numberOfChanges", 9, edits.numberOfChanges());
|
||||
TestUtility::checkEditsIter(*this, u"normalizeUTF8 omit unchanged",
|
||||
edits.getFineIterator(), edits.getFineIterator(),
|
||||
expectedChanges, UPRV_LENGTHOF(expectedChanges),
|
||||
|
@ -1604,6 +1608,8 @@ BasicNormalizerTest::TestNormalizeUTF8WithEdits() {
|
|||
{ TRUE, 6, 3 }, // 가\u3133→ 갃
|
||||
{ FALSE, 2, 2 } // 2 spaces
|
||||
};
|
||||
assertTrue("filtered normalizeUTF8 hasChanges", edits.hasChanges());
|
||||
assertEquals("filtered normalizeUTF8 numberOfChanges", 7, edits.numberOfChanges());
|
||||
TestUtility::checkEditsIter(*this, u"filtered normalizeUTF8",
|
||||
edits.getFineIterator(), edits.getFineIterator(),
|
||||
filteredChanges, UPRV_LENGTHOF(filteredChanges),
|
||||
|
@ -1621,6 +1627,8 @@ BasicNormalizerTest::TestNormalizeUTF8WithEdits() {
|
|||
fn2.normalizeUTF8(U_OMIT_UNCHANGED_TEXT, src, sink, &edits, errorCode);
|
||||
assertSuccess("filtered normalizeUTF8 omit unchanged", errorCode.get());
|
||||
assertEquals("filtered normalizeUTF8 omit unchanged", expected.c_str(), result.c_str());
|
||||
assertTrue("filtered normalizeUTF8 omit unchanged hasChanges", edits.hasChanges());
|
||||
assertEquals("filtered normalizeUTF8 omit unchanged numberOfChanges", 7, edits.numberOfChanges());
|
||||
TestUtility::checkEditsIter(*this, u"filtered normalizeUTF8 omit unchanged",
|
||||
edits.getFineIterator(), edits.getFineIterator(),
|
||||
filteredChanges, UPRV_LENGTHOF(filteredChanges),
|
||||
|
|
|
@ -36,6 +36,7 @@ public final class Edits {
|
|||
private char[] array;
|
||||
private int length;
|
||||
private int delta;
|
||||
private int numChanges;
|
||||
|
||||
/**
|
||||
* Constructs an empty object.
|
||||
|
@ -52,7 +53,7 @@ public final class Edits {
|
|||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public void reset() {
|
||||
length = delta = 0;
|
||||
length = delta = numChanges = 0;
|
||||
}
|
||||
|
||||
private void setLastUnit(int last) {
|
||||
|
@ -105,6 +106,7 @@ public final class Edits {
|
|||
if(oldLength == newLength && 0 < oldLength && oldLength <= MAX_SHORT_WIDTH) {
|
||||
// Replacement of short oldLength text units by same-length new text.
|
||||
// Merge into previous short-replacement record, if any.
|
||||
++numChanges;
|
||||
int last = lastUnit();
|
||||
if(MAX_UNCHANGED < last && last < MAX_SHORT_CHANGE &&
|
||||
(last >> 12) == oldLength && (last & 0xfff) < MAX_SHORT_CHANGE_LENGTH) {
|
||||
|
@ -123,6 +125,7 @@ public final class Edits {
|
|||
if (oldLength == 0 && newLength == 0) {
|
||||
return;
|
||||
}
|
||||
++numChanges;
|
||||
int newDelta = newLength - oldLength;
|
||||
if (newDelta != 0) {
|
||||
if ((newDelta > 0 && delta >= 0 && newDelta > (Integer.MAX_VALUE - delta)) ||
|
||||
|
@ -202,17 +205,14 @@ public final class Edits {
|
|||
* @draft ICU 59
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public boolean hasChanges() {
|
||||
if (delta != 0) {
|
||||
return true;
|
||||
}
|
||||
for (int i = 0; i < length; ++i) {
|
||||
if (array[i] > MAX_UNCHANGED) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
public boolean hasChanges() { return numChanges != 0; }
|
||||
|
||||
/**
|
||||
* @return the number of change edits
|
||||
* @draft ICU 60
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public int numberOfChanges() { return numChanges; }
|
||||
|
||||
/**
|
||||
* Access to the list of edits.
|
||||
|
@ -374,38 +374,162 @@ public final class Edits {
|
|||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public boolean findSourceIndex(int i) {
|
||||
if (i < 0) { return false; }
|
||||
if (i < srcIndex) {
|
||||
return findIndex(i, true) == 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Finds the edit that contains the destination index.
|
||||
* The destination index may be found in a non-change
|
||||
* even if normal iteration would skip non-changes.
|
||||
* Normal iteration can continue from a found edit.
|
||||
*
|
||||
* <p>The iterator state before this search logically does not matter.
|
||||
* (It may affect the performance of the search.)
|
||||
*
|
||||
* <p>The iterator state after this search is undefined
|
||||
* if the source index is out of bounds for the source string.
|
||||
*
|
||||
* @param i destination index
|
||||
* @return true if the edit for the destination index was found
|
||||
* @draft ICU 60
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public boolean findDestinationIndex(int i) {
|
||||
return findIndex(i, false) == 0;
|
||||
}
|
||||
|
||||
/** @return -1: error or i<0; 0: found; 1: i>=string length */
|
||||
private int findIndex(int i, boolean findSource) {
|
||||
if (i < 0) { return -1; }
|
||||
int spanStart, spanLength;
|
||||
if (findSource) { // find source index
|
||||
spanStart = srcIndex;
|
||||
spanLength = oldLength_;
|
||||
} else { // find destination index
|
||||
spanStart = destIndex;
|
||||
spanLength = newLength_;
|
||||
}
|
||||
// If we are at the start or limit of an empty span, then we search from
|
||||
// the start of the string so that we always return
|
||||
// the first of several consecutive empty spans, for consistent results.
|
||||
// We do not currently track the properties of the previous span,
|
||||
// so for now we always reset if we are at the start of the current span.
|
||||
if (i <= spanStart) {
|
||||
// Reset the iterator to the start.
|
||||
index = remaining = oldLength_ = newLength_ = srcIndex = replIndex = destIndex = 0;
|
||||
} else if (i < (srcIndex + oldLength_)) {
|
||||
} else if (i < (spanStart + spanLength)) {
|
||||
// The index is in the current span.
|
||||
return true;
|
||||
return 0;
|
||||
}
|
||||
while (next(false)) {
|
||||
if (i < (srcIndex + oldLength_)) {
|
||||
// The index is in the current span.
|
||||
return true;
|
||||
if (findSource) {
|
||||
spanStart = srcIndex;
|
||||
spanLength = oldLength_;
|
||||
} else {
|
||||
spanStart = destIndex;
|
||||
spanLength = newLength_;
|
||||
}
|
||||
if (i == spanStart || i < (spanStart + spanLength)) {
|
||||
// The index is in the current span, or at an empty one.
|
||||
return 0;
|
||||
}
|
||||
if (remaining > 0) {
|
||||
// Is the index in one of the remaining compressed edits?
|
||||
// srcIndex is the start of the current span, before the remaining ones.
|
||||
int len = (remaining + 1) * oldLength_;
|
||||
if (i < (srcIndex + len)) {
|
||||
int n = (i - srcIndex) / oldLength_; // 1 <= n <= remaining
|
||||
len = n * oldLength_;
|
||||
// spanStart is the start of the current span, before the remaining ones.
|
||||
int len = (remaining + 1) * spanLength;
|
||||
if (i < (spanStart + len)) {
|
||||
int n = (i - spanStart) / spanLength; // 1 <= n <= remaining
|
||||
len = n * spanLength;
|
||||
srcIndex += len;
|
||||
replIndex += len;
|
||||
destIndex += len;
|
||||
remaining -= n;
|
||||
return true;
|
||||
return 0;
|
||||
}
|
||||
// Make next() skip all of these edits at once.
|
||||
oldLength_ = newLength_ = len;
|
||||
remaining = 0;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
return 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the destination index corresponding to the given source index.
|
||||
* If the source index is inside a change edit (not at its start),
|
||||
* then the destination index at the end of that edit is returned,
|
||||
* since there is no information about index mapping inside a change edit.
|
||||
*
|
||||
* <p>(This means that indexes to the start and middle of an edit,
|
||||
* for example around a grapheme cluster, are mapped to indexes
|
||||
* encompassing the entire edit.
|
||||
* The alternative, mapping an interior index to the start,
|
||||
* would map such an interval to an empty one.)
|
||||
*
|
||||
* <p>This operation will usually but not always modify this object.
|
||||
* The iterator state after this search is undefined.
|
||||
*
|
||||
* @param i source index
|
||||
* @return destination index; undefined if i is not 0..string length
|
||||
* @draft ICU 60
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public int destinationIndexFromSourceIndex(int i) {
|
||||
int where = findIndex(i, true);
|
||||
if (where < 0) {
|
||||
// Error or before the string.
|
||||
return 0;
|
||||
}
|
||||
if (where > 0 || i == srcIndex) {
|
||||
// At or after string length, or at start of the found span.
|
||||
return destIndex;
|
||||
}
|
||||
if (changed) {
|
||||
// In a change span, map to its end.
|
||||
return destIndex + newLength_;
|
||||
} else {
|
||||
// In an unchanged span, offset 1:1 within it.
|
||||
return destIndex + (i - srcIndex);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the source index corresponding to the given destination index.
|
||||
* If the destination index is inside a change edit (not at its start),
|
||||
* then the source index at the end of that edit is returned,
|
||||
* since there is no information about index mapping inside a change edit.
|
||||
*
|
||||
* <p>(This means that indexes to the start and middle of an edit,
|
||||
* for example around a grapheme cluster, are mapped to indexes
|
||||
* encompassing the entire edit.
|
||||
* The alternative, mapping an interior index to the start,
|
||||
* would map such an interval to an empty one.)
|
||||
*
|
||||
* <p>This operation will usually but not always modify this object.
|
||||
* The iterator state after this search is undefined.
|
||||
*
|
||||
* @param i destination index
|
||||
* @return source index; undefined if i is not 0..string length
|
||||
* @draft ICU 60
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public int sourceIndexFromDestinationIndex(int i) {
|
||||
int where = findIndex(i, false);
|
||||
if (where < 0) {
|
||||
// Error or before the string.
|
||||
return 0;
|
||||
}
|
||||
if (where > 0 || i == destIndex) {
|
||||
// At or after string length, or at start of the found span.
|
||||
return srcIndex;
|
||||
}
|
||||
if (changed) {
|
||||
// In a change span, map to its end.
|
||||
return srcIndex + oldLength_;
|
||||
} else {
|
||||
// In an unchanged span, offset within it.
|
||||
return srcIndex + (i - destIndex);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -781,10 +781,13 @@ public final class UCharacterCaseTest extends TestFmwk
|
|||
String name, Edits.Iterator ei1, Edits.Iterator ei2, // two equal iterators
|
||||
EditChange[] expected, boolean withUnchanged) {
|
||||
assertFalse(name, ei2.findSourceIndex(-1));
|
||||
assertFalse(name, ei2.findDestinationIndex(-1));
|
||||
|
||||
int expSrcIndex = 0;
|
||||
int expDestIndex = 0;
|
||||
int expReplIndex = 0;
|
||||
int expSrcIndexFromDest = 0; // for sourceIndexFromDestinationIndex()
|
||||
int expDestIndexFromSrc = 0; // for destinationIndexFromSourceIndex()
|
||||
for (int expIndex = 0; expIndex < expected.length; ++expIndex) {
|
||||
EditChange expect = expected[expIndex];
|
||||
String msg = name + ' ' + expIndex;
|
||||
|
@ -798,7 +801,7 @@ public final class UCharacterCaseTest extends TestFmwk
|
|||
assertEquals(msg, expReplIndex, ei1.replacementIndex());
|
||||
}
|
||||
|
||||
if (expect.oldLength > 0) {
|
||||
if (expect.oldLength > 0 && expDestIndex == expDestIndexFromSrc) {
|
||||
assertTrue(msg, ei2.findSourceIndex(expSrcIndex));
|
||||
assertEquals(msg, expect.change, ei2.hasChange());
|
||||
assertEquals(msg, expect.oldLength, ei2.oldLength());
|
||||
|
@ -814,11 +817,61 @@ public final class UCharacterCaseTest extends TestFmwk
|
|||
}
|
||||
}
|
||||
|
||||
expSrcIndex += expect.oldLength;
|
||||
expDestIndex += expect.newLength;
|
||||
if (expect.newLength > 0 && expSrcIndex == expSrcIndexFromDest) {
|
||||
assertTrue(msg, ei2.findDestinationIndex(expDestIndex));
|
||||
assertEquals(msg, expect.change, ei2.hasChange());
|
||||
assertEquals(msg, expect.oldLength, ei2.oldLength());
|
||||
assertEquals(msg, expect.newLength, ei2.newLength());
|
||||
assertEquals(msg, expSrcIndex, ei2.sourceIndex());
|
||||
assertEquals(msg, expDestIndex, ei2.destinationIndex());
|
||||
assertEquals(msg, expReplIndex, ei2.replacementIndex());
|
||||
if (!withUnchanged) {
|
||||
// For some iterators, move past the current range
|
||||
// so that findSourceIndex() has to look before the current index.
|
||||
ei2.next();
|
||||
ei2.next();
|
||||
}
|
||||
}
|
||||
|
||||
// Span starts.
|
||||
assertEquals(name, expDestIndexFromSrc,
|
||||
ei2.destinationIndexFromSourceIndex(expSrcIndex));
|
||||
assertEquals(name, expSrcIndexFromDest,
|
||||
ei2.sourceIndexFromDestinationIndex(expDestIndex));
|
||||
|
||||
// Inside unchanged span map offsets 1:1.
|
||||
if (!expect.change && expect.oldLength >= 2) {
|
||||
assertEquals(name, expDestIndex + 1,
|
||||
ei2.destinationIndexFromSourceIndex(expSrcIndex + 1));
|
||||
assertEquals(name, expSrcIndex + 1,
|
||||
ei2.sourceIndexFromDestinationIndex(expDestIndex + 1));
|
||||
}
|
||||
|
||||
// Inside change span map to the span limit.
|
||||
int expSrcLimit = expSrcIndex + expect.oldLength;
|
||||
int expDestLimit = expDestIndex + expect.newLength;
|
||||
if (expect.change) {
|
||||
if (expect.oldLength >= 2) {
|
||||
assertEquals(name, expDestLimit,
|
||||
ei2.destinationIndexFromSourceIndex(expSrcIndex + 1));
|
||||
}
|
||||
if (expect.newLength >= 2) {
|
||||
assertEquals(name, expSrcLimit,
|
||||
ei2.sourceIndexFromDestinationIndex(expDestIndex + 1));
|
||||
}
|
||||
}
|
||||
|
||||
expSrcIndex = expSrcLimit;
|
||||
expDestIndex = expDestLimit;
|
||||
if (expect.change) {
|
||||
expReplIndex += expect.newLength;
|
||||
}
|
||||
if (expect.newLength > 0) {
|
||||
expSrcIndexFromDest = expSrcIndex;
|
||||
}
|
||||
if (expect.oldLength > 0) {
|
||||
expDestIndexFromSrc = expDestIndex;
|
||||
}
|
||||
}
|
||||
String msg = name + " end";
|
||||
assertFalse(msg, ei1.next());
|
||||
|
@ -830,18 +883,23 @@ public final class UCharacterCaseTest extends TestFmwk
|
|||
assertEquals(msg, expReplIndex, ei1.replacementIndex());
|
||||
|
||||
assertFalse(name, ei2.findSourceIndex(expSrcIndex));
|
||||
assertFalse(name, ei2.findDestinationIndex(expDestIndex));
|
||||
assertEquals(name, expDestIndex, ei2.destinationIndexFromSourceIndex(expSrcIndex));
|
||||
assertEquals(name, expSrcIndex, ei2.sourceIndexFromDestinationIndex(expDestIndex));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void TestEdits() {
|
||||
Edits edits = new Edits();
|
||||
assertFalse("new Edits", edits.hasChanges());
|
||||
assertFalse("new Edits hasChanges", edits.hasChanges());
|
||||
assertEquals("new Edits numberOfChanges", 0, edits.numberOfChanges());
|
||||
assertEquals("new Edits", 0, edits.lengthDelta());
|
||||
edits.addUnchanged(1); // multiple unchanged ranges are combined
|
||||
edits.addUnchanged(10000); // too long, and they are split
|
||||
edits.addReplace(0, 0);
|
||||
edits.addUnchanged(2);
|
||||
assertFalse("unchanged 10003", edits.hasChanges());
|
||||
assertFalse("unchanged 10003 hasChanges", edits.hasChanges());
|
||||
assertEquals("unchanged 10003 numberOfChanges", 0, edits.numberOfChanges());
|
||||
assertEquals("unchanged 10003", 0, edits.lengthDelta());
|
||||
edits.addReplace(1, 1); // multiple short equal-length edits are compressed
|
||||
edits.addUnchanged(0);
|
||||
|
@ -851,7 +909,8 @@ public final class UCharacterCaseTest extends TestFmwk
|
|||
edits.addReplace(100, 0);
|
||||
edits.addReplace(3000, 4000); // variable-length encoding
|
||||
edits.addReplace(100000, 100000);
|
||||
assertTrue("some edits", edits.hasChanges());
|
||||
assertTrue("some edits hasChanges", edits.hasChanges());
|
||||
assertEquals("some edits numberOfChanges", 7, edits.numberOfChanges());
|
||||
assertEquals("some edits", 10 - 100 + 1000, edits.lengthDelta());
|
||||
|
||||
EditChange[] coarseExpectedChanges = new EditChange[] {
|
||||
|
@ -883,7 +942,8 @@ public final class UCharacterCaseTest extends TestFmwk
|
|||
fineExpectedChanges, false);
|
||||
|
||||
edits.reset();
|
||||
assertFalse("reset", edits.hasChanges());
|
||||
assertFalse("reset hasChanges", edits.hasChanges());
|
||||
assertEquals("reset numberOfChanges", 0, edits.numberOfChanges());
|
||||
assertEquals("reset", 0, edits.lengthDelta());
|
||||
Edits.Iterator ei = edits.getCoarseChangesIterator();
|
||||
assertFalse("reset then iterator", ei.next());
|
||||
|
|
Loading…
Add table
Reference in a new issue