ICU-13629 Improving API docs for Edits and Edits.Iterator.

X-SVN-Rev: 41363
This commit is contained in:
Shane Carr 2018-05-09 00:06:53 +00:00
parent 1fe1497d88
commit 5c969e791f
5 changed files with 595 additions and 46 deletions

View file

@ -4,10 +4,12 @@
// edits.cpp
// created: 2017feb08 Markus W. Scherer
#include "unicode/utypes.h"
#include "unicode/edits.h"
#include "unicode/unistr.h"
#include "unicode/utypes.h"
#include "cmemory.h"
#include "uassert.h"
#include "util.h"
U_NAMESPACE_BEGIN
@ -773,4 +775,29 @@ int32_t Edits::Iterator::sourceIndexFromDestinationIndex(int32_t i, UErrorCode &
}
}
UnicodeString& Edits::Iterator::toString(UnicodeString& sb) const {
sb.append(u"{ src[", -1);
ICU_Utility::appendNumber(sb, srcIndex);
sb.append(u"..", -1);
ICU_Utility::appendNumber(sb, srcIndex + oldLength_);
if (changed) {
sb.append(u"] ⇝ dest[", -1);
} else {
sb.append(u"] ≡ dest[", -1);
}
ICU_Utility::appendNumber(sb, destIndex);
sb.append(u"..", -1);
ICU_Utility::appendNumber(sb, destIndex + newLength_);
if (changed) {
sb.append(u"], repl[", -1);
ICU_Utility::appendNumber(sb, replIndex);
sb.append(u"..", -1);
ICU_Utility::appendNumber(sb, replIndex + newLength_);
sb.append(u"] }", -1);
} else {
sb.append(u"] (no-change) }", -1);
}
return sb;
}
U_NAMESPACE_END

View file

@ -17,10 +17,57 @@
U_NAMESPACE_BEGIN
class UnicodeString;
/**
* Records lengths of string edits but not replacement text.
* Supports replacements, insertions, deletions in linear progression.
* Does not support moving/reordering of text.
* Records lengths of string edits but not replacement text. Supports replacements, insertions, deletions
* in linear progression. Does not support moving/reordering of text.
*
* There are two types of edits: <em>change edits</em> and <em>no-change edits</em>. Add edits to
* instances of this class using {@link #addReplace(int, int)} (for change edits) and
* {@link #addUnchanged(int)} (for no-change edits). Change edits are retained with full granularity,
* whereas adjacent no-change edits are always merged together. In no-change edits, there is a one-to-one
* mapping between code points in the source and destination strings.
*
* After all edits have been added, instances of this class should be considered immutable, and an
* {@link Edits::Iterator} can be used for queries.
*
* There are four flavors of Edits::Iterator:
*
* <ul>
* <li>{@link #getFineIterator()} retains full granularity of change edits.
* <li>{@link #getFineChangesIterator()} retains full granularity of change edits, and when calling
* next() on the iterator, skips over no-change edits (unchanged regions).
* <li>{@link #getCoarseIterator()} treats adjacent change edits as a single edit. (Adjacent no-change
* edits are automatically merged during the construction phase.)
* <li>{@link #getCoarseChangesIterator()} treats adjacent change edits as a single edit, and when
* calling next() on the iterator, skips over no-change edits (unchanged regions).
* </ul>
*
* For example, consider the string "abcßDeF", which case-folds to "abcssdef". This string has the
* following fine edits:
* <ul>
* <li>abc abc (no-change)
* <li>ß ss (change)
* <li>D d (change)
* <li>e e (no-change)
* <li>F f (change)
* </ul>
* and the following coarse edits (note how adjacent change edits get merged together):
* <ul>
* <li>abc abc (no-change)
* <li>ßD ssd (change)
* <li>e e (no-change)
* <li>F f (change)
* </ul>
*
* The "fine changes" and "coarse changes" iterators will step through only the change edits when their
* {@link Edits::Iterator#next()} methods are called. They are identical to the non-change iterators when
* their {@link Edits::Iterator#findSourceIndex(int)} or {@link Edits::Iterator#findDestinationIndex(int)}
* methods are used to walk through the string.
*
* For examples of how to use this class, see the test <code>TestCaseMapEditsIteratorDocs</code> in
* UCharacterCaseTest.java.
*
* An Edits object tracks a separate UErrorCode, but ICU string transformation functions
* (e.g., case mapping functions) merge any such errors into their API's UErrorCode.
@ -91,13 +138,13 @@ public:
void reset() U_NOEXCEPT;
/**
* Adds a record for an unchanged segment of text.
* Adds a no-change edit: a record for an unchanged segment of text.
* Normally called from inside ICU string transformation functions, not user code.
* @stable ICU 59
*/
void addUnchanged(int32_t unchangedLength);
/**
* Adds a record for a text replacement/insertion/deletion.
* Adds a change edit: a record for a text replacement/insertion/deletion.
* Normally called from inside ICU string transformation functions, not user code.
* @stable ICU 59
*/
@ -136,6 +183,18 @@ public:
/**
* Access to the list of edits.
*
* At any moment in time, an instance of this class points to a single edit: a "window" into a span
* of the source string and the corresponding span of the destination string. The source string span
* starts at {@link #sourceIndex()} and runs for {@link #oldLength()} chars; the destination string
* span starts at {@link #destinationIndex()} and runs for {@link #newLength()} chars.
*
* The iterator can be moved between edits using the {@link #next()}, {@link #findSourceIndex(int)},
* and {@link #findDestinationIndex(int)} methods. Calling any of these methods mutates the iterator
* to make it point to the corresponding edit.
*
* For more information, see the documentation for {@link Edits}.
*
* @see getCoarseIterator
* @see getFineIterator
* @stable ICU 59
@ -162,7 +221,7 @@ public:
Iterator &operator=(const Iterator &other) = default;
/**
* Advances to the next edit.
* Advances the iterator to the next edit.
* @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
* or else the function returns immediately. Check for U_FAILURE()
* on output or use with function chaining. (See User Guide for details.)
@ -172,9 +231,9 @@ public:
UBool next(UErrorCode &errorCode) { return next(onlyChanges_, errorCode); }
/**
* Finds the edit that contains the source index.
* The source index may be found in a non-change
* even if normal iteration would skip non-changes.
* Moves the iterator to the edit that contains the source index.
* The source index may be found in a no-change edit
* even if normal iteration would skip no-change edits.
* Normal iteration can continue from a found edit.
*
* The iterator state before this search logically does not matter.
@ -196,9 +255,9 @@ public:
#ifndef U_HIDE_DRAFT_API
/**
* Finds the edit that contains the destination index.
* The destination index may be found in a non-change
* even if normal iteration would skip non-changes.
* Moves the iterator to the edit that contains the destination index.
* The destination index may be found in a no-change edit
* even if normal iteration would skip no-change edits.
* Normal iteration can continue from a found edit.
*
* The iterator state before this search logically does not matter.
@ -219,7 +278,7 @@ public:
}
/**
* Returns the destination index corresponding to the given source index.
* Computes the destination index corresponding to the given source index.
* If the source index is inside a change edit (not at its start),
* then the destination index at the end of that edit is returned,
* since there is no information about index mapping inside a change edit.
@ -243,7 +302,7 @@ public:
int32_t destinationIndexFromSourceIndex(int32_t i, UErrorCode &errorCode);
/**
* Returns the source index corresponding to the given destination index.
* Computes the source index corresponding to the given destination index.
* If the destination index is inside a change edit (not at its start),
* then the source index at the end of that edit is returned,
* since there is no information about index mapping inside a change edit.
@ -268,17 +327,27 @@ public:
#endif // U_HIDE_DRAFT_API
/**
* Returns whether the edit currently represented by the iterator is a change edit.
*
* @return TRUE if this edit replaces oldLength() units with newLength() different ones.
* FALSE if oldLength units remain unchanged.
* @stable ICU 59
*/
UBool hasChange() const { return changed; }
/**
* The length of the current span in the source string, which starts at {@link #sourceIndex}.
*
* @return the number of units in the original string which are replaced or remain unchanged.
* @stable ICU 59
*/
int32_t oldLength() const { return oldLength_; }
/**
* The length of the current span in the destination string, which starts at
* {@link #destinationIndex}, or in the replacement string, which starts at
* {@link #replacementIndex}.
*
* @return the number of units in the modified string, if hasChange() is TRUE.
* Same as oldLength if hasChange() is FALSE.
* @stable ICU 59
@ -286,22 +355,47 @@ public:
int32_t newLength() const { return newLength_; }
/**
* The start index of the current span in the source string; the span has length
* {@link #oldLength}.
*
* @return the current index into the source string
* @stable ICU 59
*/
int32_t sourceIndex() const { return srcIndex; }
/**
* The start index of the current span in the replacement string; the span has length
* {@link #newLength}. Well-defined only if the current edit is a change edit.
* <p>
* The <em>replacement string</em> is the concatenation of all substrings of the destination
* string corresponding to change edits.
* <p>
* This method is intended to be used together with operations that write only replacement
* characters (e.g., {@link CaseMap#omitUnchangedText()}). The source string can then be modified
* in-place.
*
* @return the current index into the replacement-characters-only string,
* not counting unchanged spans
* @stable ICU 59
*/
int32_t replacementIndex() const { return replIndex; }
/**
* The start index of the current span in the destination string; the span has length
* {@link #newLength}.
*
* @return the current index into the full destination string
* @stable ICU 59
*/
int32_t destinationIndex() const { return destIndex; }
/**
* A string representation of the current edit represented by the iterator for debugging. You
* should not depend on the contents of the return string.
* @internal
*/
UnicodeString& toString(UnicodeString& appendTo) const;
private:
friend class Edits;
@ -330,8 +424,10 @@ public:
};
/**
* Returns an Iterator for coarse-grained changes for simple string updates.
* Skips non-changes.
* Returns an Iterator for coarse-grained change edits
* (adjacent change edits are treated as one).
* Can be used to perform simple string updates.
* Skips no-change edits.
* @return an Iterator that merges adjacent changes.
* @stable ICU 59
*/
@ -340,7 +436,10 @@ public:
}
/**
* Returns an Iterator for coarse-grained changes and non-changes for simple string updates.
* Returns an Iterator for coarse-grained change and no-change edits
* (adjacent change edits are treated as one).
* Can be used to perform simple string updates.
* Adjacent change edits are treated as one edit.
* @return an Iterator that merges adjacent changes.
* @stable ICU 59
*/
@ -349,8 +448,10 @@ public:
}
/**
* Returns an Iterator for fine-grained changes for modifying styled text.
* Skips non-changes.
* Returns an Iterator for fine-grained change edits
* (full granularity of change edits is retained).
* Can be used for modifying styled text.
* Skips no-change edits.
* @return an Iterator that separates adjacent changes.
* @stable ICU 59
*/
@ -359,7 +460,9 @@ public:
}
/**
* Returns an Iterator for fine-grained changes and non-changes for modifying styled text.
* Returns an Iterator for fine-grained change and no-change edits
* (full granularity of change edits is retained).
* Can be used for modifying styled text.
* @return an Iterator that separates adjacent changes.
* @stable ICU 59
*/

View file

@ -67,6 +67,7 @@ public:
void TestLongUnicodeString();
void TestBug13127();
void TestInPlaceTitle();
void TestCaseMapEditsIteratorDocs();
private:
void assertGreekUpper(const char16_t *s, const char16_t *expected);
@ -111,6 +112,7 @@ StringCaseTest::runIndexedTest(int32_t index, UBool exec, const char *&name, cha
TESTCASE_AUTO(TestBug13127);
TESTCASE_AUTO(TestInPlaceTitle);
#endif
TESTCASE_AUTO(TestCaseMapEditsIteratorDocs);
TESTCASE_AUTO_END;
}
@ -1532,4 +1534,154 @@ void StringCaseTest::TestInPlaceTitle() {
assertEquals("u_strToTitle(in-place) length", u_strlen(expected), length);
assertEquals("u_strToTitle(in-place)", expected, s);
}
void StringCaseTest::TestCaseMapEditsIteratorDocs() {
IcuTestErrorCode status(*this, "TestCaseMapEditsIteratorDocs");
const char16_t* input = u"abcßDeF";
int32_t inputLength = u_strlen(input);
// output: "abcssdef"
char16_t output[10];
Edits edits;
CaseMap::fold(0, input, -1, output, 10, &edits, status);
static const char16_t* fineIteratorExpected[] = {
u"{ src[0..3] ≡ dest[0..3] (no-change) }",
u"{ src[3..4] ⇝ dest[3..5], repl[0..2] }",
u"{ src[4..5] ⇝ dest[5..6], repl[2..3] }",
u"{ src[5..6] ≡ dest[6..7] (no-change) }",
u"{ src[6..7] ⇝ dest[7..8], repl[3..4] }",
};
static const char16_t* fineChangesIteratorExpected[] = {
u"{ src[3..4] ⇝ dest[3..5], repl[0..2] }",
u"{ src[4..5] ⇝ dest[5..6], repl[2..3] }",
u"{ src[6..7] ⇝ dest[7..8], repl[3..4] }",
};
static const char16_t* coarseIteratorExpected[] = {
u"{ src[0..3] ≡ dest[0..3] (no-change) }",
u"{ src[3..5] ⇝ dest[3..6], repl[0..3] }",
u"{ src[5..6] ≡ dest[6..7] (no-change) }",
u"{ src[6..7] ⇝ dest[7..8], repl[3..4] }",
};
static const char16_t* coarseChangesIteratorExpected[] = {
u"{ src[3..5] ⇝ dest[3..6], repl[0..3] }",
u"{ src[6..7] ⇝ dest[7..8], repl[3..4] }",
};
// Expected destination indices when source index is queried
static int32_t expectedDestFineEditIndices[] = {0, 0, 0, 3, 5, 6, 7};
static int32_t expectedDestCoarseEditIndices[] = {0, 0, 0, 3, 3, 6, 7};
static int32_t expectedDestFineStringIndices[] = {0, 1, 2, 3, 5, 6, 7};
static int32_t expectedDestCoarseStringIndices[] = {0, 1, 2, 3, 6, 6, 7};
// Expected source indices when destination index is queried
static int32_t expectedSrcFineEditIndices[] = { 0, 0, 0, 3, 3, 4, 5, 6 };
static int32_t expectedSrcCoarseEditIndices[] = { 0, 0, 0, 3, 3, 3, 5, 6 };
static int32_t expectedSrcFineStringIndices[] = { 0, 1, 2, 3, 4, 4, 5, 6 };
static int32_t expectedSrcCoarseStringIndices[] = { 0, 1, 2, 3, 5, 5, 5, 6 };
// Demonstrate the iterator next() method:
Edits::Iterator fineIterator = edits.getFineIterator();
int i = 0;
UnicodeString toString;
while (fineIterator.next(status)) {
UnicodeString expected = fineIteratorExpected[i++];
assertEquals(UnicodeString(u"Iteration #") + i,
expected,
fineIterator.toString(toString.remove()));
}
Edits::Iterator fineChangesIterator = edits.getFineChangesIterator();
i = 0;
while (fineChangesIterator.next(status)) {
UnicodeString expected = fineChangesIteratorExpected[i++];
assertEquals(UnicodeString(u"Iteration #") + i,
expected,
fineChangesIterator.toString(toString.remove()));
}
Edits::Iterator coarseIterator = edits.getCoarseIterator();
i = 0;
while (coarseIterator.next(status)) {
UnicodeString expected = coarseIteratorExpected[i++];
assertEquals(UnicodeString(u"Iteration #") + i,
expected,
coarseIterator.toString(toString.remove()));
}
Edits::Iterator coarseChangesIterator = edits.getCoarseChangesIterator();
i = 0;
while (coarseChangesIterator.next(status)) {
UnicodeString expected = coarseChangesIteratorExpected[i++];
assertEquals(UnicodeString(u"Iteration #") + i,
expected,
coarseChangesIterator.toString(toString.remove()));
}
// Demonstrate the iterator indexing methods:
// fineIterator should have the same behavior as fineChangesIterator, and
// coarseIterator should have the same behavior as coarseChangesIterator.
for (int32_t srcIndex=0; srcIndex<inputLength; srcIndex++) {
fineIterator.findSourceIndex(srcIndex, status);
fineChangesIterator.findSourceIndex(srcIndex, status);
coarseIterator.findSourceIndex(srcIndex, status);
coarseChangesIterator.findSourceIndex(srcIndex, status);
assertEquals(UnicodeString("Source index: ") + srcIndex,
expectedDestFineEditIndices[srcIndex],
fineIterator.destinationIndex());
assertEquals(UnicodeString("Source index: ") + srcIndex,
expectedDestFineEditIndices[srcIndex],
fineChangesIterator.destinationIndex());
assertEquals(UnicodeString("Source index: ") + srcIndex,
expectedDestCoarseEditIndices[srcIndex],
coarseIterator.destinationIndex());
assertEquals(UnicodeString("Source index: ") + srcIndex,
expectedDestCoarseEditIndices[srcIndex],
coarseChangesIterator.destinationIndex());
assertEquals(UnicodeString("Source index: ") + srcIndex,
expectedDestFineStringIndices[srcIndex],
fineIterator.destinationIndexFromSourceIndex(srcIndex, status));
assertEquals(UnicodeString("Source index: ") + srcIndex,
expectedDestFineStringIndices[srcIndex],
fineChangesIterator.destinationIndexFromSourceIndex(srcIndex, status));
assertEquals(UnicodeString("Source index: ") + srcIndex,
expectedDestCoarseStringIndices[srcIndex],
coarseIterator.destinationIndexFromSourceIndex(srcIndex, status));
assertEquals(UnicodeString("Source index: ") + srcIndex,
expectedDestCoarseStringIndices[srcIndex],
coarseChangesIterator.destinationIndexFromSourceIndex(srcIndex, status));
}
for (int32_t destIndex=0; destIndex<inputLength; destIndex++) {
fineIterator.findDestinationIndex(destIndex, status);
fineChangesIterator.findDestinationIndex(destIndex, status);
coarseIterator.findDestinationIndex(destIndex, status);
coarseChangesIterator.findDestinationIndex(destIndex, status);
assertEquals(UnicodeString("Destination index: ") + destIndex,
expectedSrcFineEditIndices[destIndex],
fineIterator.sourceIndex());
assertEquals(UnicodeString("Destination index: ") + destIndex,
expectedSrcFineEditIndices[destIndex],
fineChangesIterator.sourceIndex());
assertEquals(UnicodeString("Destination index: ") + destIndex,
expectedSrcCoarseEditIndices[destIndex],
coarseIterator.sourceIndex());
assertEquals(UnicodeString("Destination index: ") + destIndex,
expectedSrcCoarseEditIndices[destIndex],
coarseChangesIterator.sourceIndex());
assertEquals(UnicodeString("Destination index: ") + destIndex,
expectedSrcFineStringIndices[destIndex],
fineIterator.sourceIndexFromDestinationIndex(destIndex, status));
assertEquals(UnicodeString("Destination index: ") + destIndex,
expectedSrcFineStringIndices[destIndex],
fineChangesIterator.sourceIndexFromDestinationIndex(destIndex, status));
assertEquals(UnicodeString("Destination index: ") + destIndex,
expectedSrcCoarseStringIndices[destIndex],
coarseIterator.sourceIndexFromDestinationIndex(destIndex, status));
assertEquals(UnicodeString("Destination index: ") + destIndex,
expectedSrcCoarseStringIndices[destIndex],
coarseChangesIterator.sourceIndexFromDestinationIndex(destIndex, status));
}
}
#endif

View file

@ -6,9 +6,54 @@ import java.nio.BufferOverflowException;
import java.util.Arrays;
/**
* Records lengths of string edits but not replacement text.
* Supports replacements, insertions, deletions in linear progression.
* Does not support moving/reordering of text.
* Records lengths of string edits but not replacement text. Supports replacements, insertions, deletions
* in linear progression. Does not support moving/reordering of text.
* <p>
* There are two types of edits: <em>change edits</em> and <em>no-change edits</em>. Add edits to
* instances of this class using {@link #addReplace(int, int)} (for change edits) and
* {@link #addUnchanged(int)} (for no-change edits). Change edits are retained with full granularity,
* whereas adjacent no-change edits are always merged together. In no-change edits, there is a one-to-one
* mapping between code points in the source and destination strings.
* <p>
* After all edits have been added, instances of this class should be considered immutable, and an
* {@link Edits.Iterator} can be used for queries.
* <p>
* There are four flavors of Edits.Iterator:
* <p>
* <ul>
* <li>{@link #getFineIterator()} retains full granularity of change edits.
* <li>{@link #getFineChangesIterator()} retains full granularity of change edits, and when calling
* next() on the iterator, skips over no-change edits (unchanged regions).
* <li>{@link #getCoarseIterator()} treats adjacent change edits as a single edit. (Adjacent no-change
* edits are automatically merged during the construction phase.)
* <li>{@link #getCoarseChangesIterator()} treats adjacent change edits as a single edit, and when
* calling next() on the iterator, skips over no-change edits (unchanged regions).
* </ul>
* <p>
* For example, consider the string "abcßDeF", which case-folds to "abcssdef". This string has the
* following fine edits:
* <ul>
* <li>abc abc (no-change)
* <li>ß ss (change)
* <li>D d (change)
* <li>e e (no-change)
* <li>F f (change)
* </ul>
* and the following coarse edits (note how adjacent change edits get merged together):
* <ul>
* <li>abc abc (no-change)
* <li>ßD ssd (change)
* <li>e e (no-change)
* <li>F f (change)
* </ul>
* <p>
* The "fine changes" and "coarse changes" iterators will step through only the change edits when their
* {@link Edits.Iterator#next()} methods are called. They are identical to the non-change iterators when
* their {@link Edits.Iterator#findSourceIndex(int)} or {@link Edits.Iterator#findDestinationIndex(int)}
* methods are used to walk through the string.
* <p>
* For examples of how to use this class, see the test <code>TestCaseMapEditsIteratorDocs</code> in
* UCharacterCaseTest.java.
*
* @stable ICU 59
*/
@ -61,7 +106,7 @@ public final class Edits {
}
/**
* Adds a record for an unchanged segment of text.
* Adds a no-change edit: a record for an unchanged segment of text.
* Normally called from inside ICU string transformation functions, not user code.
* @stable ICU 59
*/
@ -93,7 +138,7 @@ public final class Edits {
}
/**
* Adds a record for a text replacement/insertion/deletion.
* Adds a change edit: a record for a text replacement/insertion/deletion.
* Normally called from inside ICU string transformation functions, not user code.
* @stable ICU 59
*/
@ -210,6 +255,20 @@ public final class Edits {
/**
* Access to the list of edits.
* <p>
* At any moment in time, an instance of this class points to a single edit: a "window" into a span
* of the source string and the corresponding span of the destination string. The source string span
* starts at {@link #sourceIndex()} and runs for {@link #oldLength()} chars; the destination string
* span starts at {@link #destinationIndex()} and runs for {@link #newLength()} chars.
* <p>
* The iterator can be moved between edits using the {@link #next()}, {@link #findSourceIndex(int)},
* and {@link #findDestinationIndex(int)} methods. Calling any of these methods mutates the iterator
* to make it point to the corresponding edit.
* <p>
* For more information, see the documentation for {@link Edits}.
* <p>
* Note: Although this class is called "Iterator", it does not implement {@link java.util.Iterator}.
*
* @see #getCoarseIterator
* @see #getFineIterator
* @stable ICU 59
@ -281,7 +340,7 @@ public final class Edits {
}
/**
* Advances to the next edit.
* Advances the iterator to the next edit.
* @return true if there is another edit
* @stable ICU 59
*/
@ -489,9 +548,9 @@ public final class Edits {
}
/**
* Finds the edit that contains the source index.
* The source index may be found in a non-change
* even if normal iteration would skip non-changes.
* Moves the iterator to the edit that contains the source index.
* The source index may be found in a no-change edit
* even if normal iteration would skip no-change edits.
* Normal iteration can continue from a found edit.
*
* <p>The iterator state before this search logically does not matter.
@ -509,9 +568,9 @@ public final class Edits {
}
/**
* Finds the edit that contains the destination index.
* The destination index may be found in a non-change
* even if normal iteration would skip non-changes.
* Moves the iterator to the edit that contains the destination index.
* The destination index may be found in a no-change edit
* even if normal iteration would skip no-change edits.
* Normal iteration can continue from a found edit.
*
* <p>The iterator state before this search logically does not matter.
@ -617,7 +676,7 @@ public final class Edits {
}
/**
* Returns the destination index corresponding to the given source index.
* Computes the destination index corresponding to the given source index.
* If the source index is inside a change edit (not at its start),
* then the destination index at the end of that edit is returned,
* since there is no information about index mapping inside a change edit.
@ -656,7 +715,7 @@ public final class Edits {
}
/**
* Returns the source index corresponding to the given destination index.
* Computes the source index corresponding to the given destination index.
* If the destination index is inside a change edit (not at its start),
* then the source index at the end of that edit is returned,
* since there is no information about index mapping inside a change edit.
@ -695,44 +754,106 @@ public final class Edits {
}
/**
* Returns whether the edit currently represented by the iterator is a change edit.
*
* @return true if this edit replaces oldLength() units with newLength() different ones.
* false if oldLength units remain unchanged.
* @stable ICU 59
*/
public boolean hasChange() { return changed; }
/**
* @return the number of units in the original string which are replaced or remain unchanged.
* The length of the current span in the source string, which starts at {@link #sourceIndex}.
*
* @return the number of units in the source string which are replaced or remain unchanged.
* @stable ICU 59
*/
public int oldLength() { return oldLength_; }
/**
* @return the number of units in the modified string, if hasChange() is true.
* Same as oldLength if hasChange() is false.
* The length of the current span in the destination string, which starts at
* {@link #destinationIndex}, or in the replacement string, which starts at
* {@link #replacementIndex}.
*
* @return the number of units in the destination string, if hasChange() is true. Same as
* oldLength if hasChange() is false.
* @stable ICU 59
*/
public int newLength() { return newLength_; }
/**
* The start index of the current span in the source string; the span has length
* {@link #oldLength}.
*
* @return the current index into the source string
* @stable ICU 59
*/
public int sourceIndex() { return srcIndex; }
/**
* @return the current index into the replacement-characters-only string,
* not counting unchanged spans
* The start index of the current span in the replacement string; the span has length
* {@link #newLength}. Well-defined only if the current edit is a change edit.
* <p>
* The <em>replacement string</em> is the concatenation of all substrings of the destination
* string corresponding to change edits.
* <p>
* This method is intended to be used together with operations that write only replacement
* characters (e.g., {@link CaseMap#omitUnchangedText()}). The source string can then be modified
* in-place.
*
* @return the current index into the replacement-characters-only string, not counting unchanged
* spans
* @stable ICU 59
*/
public int replacementIndex() { return replIndex; }
/**
* The start index of the current span in the destination string; the span has length
* {@link #newLength}.
*
* @return the current index into the full destination string
* @stable ICU 59
*/
public int destinationIndex() { return destIndex; }
/**
* A string representation of the current edit represented by the iterator for debugging. You
* should not depend on the contents of the return string.
*/
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
sb.append(super.toString());
sb.append("{ src[");
sb.append(srcIndex);
sb.append("..");
sb.append(srcIndex + oldLength_);
if (changed) {
sb.append("] ⇝ dest[");
} else {
sb.append("] ≡ dest[");
}
sb.append(destIndex);
sb.append("..");
sb.append(destIndex + newLength_);
if (changed) {
sb.append("], repl[");
sb.append(replIndex);
sb.append("..");
sb.append(replIndex + newLength_);
sb.append("] }");
} else {
sb.append("] (no-change) }");
}
return sb.toString();
}
};
/**
* Returns an Iterator for coarse-grained changes for simple string updates.
* Skips non-changes.
* Returns an Iterator for coarse-grained change edits
* (adjacent change edits are treated as one).
* Can be used to perform simple string updates.
* Skips no-change edits.
* @return an Iterator that merges adjacent changes.
* @stable ICU 59
*/
@ -741,7 +862,10 @@ public final class Edits {
}
/**
* Returns an Iterator for coarse-grained changes and non-changes for simple string updates.
* Returns an Iterator for coarse-grained change and no-change edits
* (adjacent change edits are treated as one).
* Can be used to perform simple string updates.
* Adjacent change edits are treated as one edit.
* @return an Iterator that merges adjacent changes.
* @stable ICU 59
*/
@ -750,8 +874,10 @@ public final class Edits {
}
/**
* Returns an Iterator for fine-grained changes for modifying styled text.
* Skips non-changes.
* Returns an Iterator for fine-grained change edits
* (full granularity of change edits is retained).
* Can be used for modifying styled text.
* Skips no-change edits.
* @return an Iterator that separates adjacent changes.
* @stable ICU 59
*/
@ -760,7 +886,9 @@ public final class Edits {
}
/**
* Returns an Iterator for fine-grained changes and non-changes for modifying styled text.
* Returns an Iterator for fine-grained change and no-change edits
* (full granularity of change edits is retained).
* Can be used for modifying styled text.
* @return an Iterator that separates adjacent changes.
* @stable ICU 59
*/

View file

@ -1350,6 +1350,145 @@ public final class UCharacterCaseTest extends TestFmwk
CaseMap.fold().turkic().apply("IßtanBul"));
}
@Test
public void TestCaseMapEditsIteratorDocs() {
String input = "abcßDeF";
// output: "abcssdef"
StringBuilder sb = new StringBuilder();
Edits edits = new Edits();
CaseMap.fold().apply(input, sb, edits);
String[] fineIteratorExpected = {
"{ src[0..3] ≡ dest[0..3] (no-change) }",
"{ src[3..4] ⇝ dest[3..5], repl[0..2] }",
"{ src[4..5] ⇝ dest[5..6], repl[2..3] }",
"{ src[5..6] ≡ dest[6..7] (no-change) }",
"{ src[6..7] ⇝ dest[7..8], repl[3..4] }",
};
String[] fineChangesIteratorExpected = {
"{ src[3..4] ⇝ dest[3..5], repl[0..2] }",
"{ src[4..5] ⇝ dest[5..6], repl[2..3] }",
"{ src[6..7] ⇝ dest[7..8], repl[3..4] }",
};
String[] coarseIteratorExpected = {
"{ src[0..3] ≡ dest[0..3] (no-change) }",
"{ src[3..5] ⇝ dest[3..6], repl[0..3] }",
"{ src[5..6] ≡ dest[6..7] (no-change) }",
"{ src[6..7] ⇝ dest[7..8], repl[3..4] }",
};
String[] coarseChangesIteratorExpected = {
"{ src[3..5] ⇝ dest[3..6], repl[0..3] }",
"{ src[6..7] ⇝ dest[7..8], repl[3..4] }",
};
// Expected destination indices when source index is queried
int[] expectedDestFineEditIndices = {0, 0, 0, 3, 5, 6, 7};
int[] expectedDestCoarseEditIndices = {0, 0, 0, 3, 3, 6, 7};
int[] expectedDestFineStringIndices = {0, 1, 2, 3, 5, 6, 7};
int[] expectedDestCoarseStringIndices = {0, 1, 2, 3, 6, 6, 7};
// Expected source indices when destination index is queried
int[] expectedSrcFineEditIndices = { 0, 0, 0, 3, 3, 4, 5, 6 };
int[] expectedSrcCoarseEditIndices = { 0, 0, 0, 3, 3, 3, 5, 6 };
int[] expectedSrcFineStringIndices = { 0, 1, 2, 3, 4, 4, 5, 6 };
int[] expectedSrcCoarseStringIndices = { 0, 1, 2, 3, 5, 5, 5, 6 };
// Demonstrate the iterator next() method:
Edits.Iterator fineIterator = edits.getFineIterator();
int i = 0;
while (fineIterator.next()) {
String expected = fineIteratorExpected[i++];
assertEquals("Iteration #" + i, expected, fineIterator.toString().substring(40));
}
Edits.Iterator fineChangesIterator = edits.getFineChangesIterator();
i = 0;
while (fineChangesIterator.next()) {
String expected = fineChangesIteratorExpected[i++];
assertEquals("Iteration #" + i, expected, fineChangesIterator.toString().substring(40));
}
Edits.Iterator coarseIterator = edits.getCoarseIterator();
i = 0;
while (coarseIterator.next()) {
String expected = coarseIteratorExpected[i++];
assertEquals("Iteration #" + i, expected, coarseIterator.toString().substring(40));
}
Edits.Iterator coarseChangesIterator = edits.getCoarseChangesIterator();
i = 0;
while (coarseChangesIterator.next()) {
String expected = coarseChangesIteratorExpected[i++];
assertEquals("Iteration #" + i, expected, coarseChangesIterator.toString().substring(40));
}
// Demonstrate the iterator indexing methods:
// fineIterator should have the same behavior as fineChangesIterator, and
// coarseIterator should have the same behavior as coarseChangesIterator.
for (int srcIndex=0; srcIndex<input.length(); srcIndex++) {
fineIterator.findSourceIndex(srcIndex);
fineChangesIterator.findSourceIndex(srcIndex);
coarseIterator.findSourceIndex(srcIndex);
coarseChangesIterator.findSourceIndex(srcIndex);
assertEquals("Source index: " + srcIndex,
expectedDestFineEditIndices[srcIndex],
fineIterator.destinationIndex());
assertEquals("Source index: " + srcIndex,
expectedDestFineEditIndices[srcIndex],
fineChangesIterator.destinationIndex());
assertEquals("Source index: " + srcIndex,
expectedDestCoarseEditIndices[srcIndex],
coarseIterator.destinationIndex());
assertEquals("Source index: " + srcIndex,
expectedDestCoarseEditIndices[srcIndex],
coarseChangesIterator.destinationIndex());
assertEquals("Source index: " + srcIndex,
expectedDestFineStringIndices[srcIndex],
fineIterator.destinationIndexFromSourceIndex(srcIndex));
assertEquals("Source index: " + srcIndex,
expectedDestFineStringIndices[srcIndex],
fineChangesIterator.destinationIndexFromSourceIndex(srcIndex));
assertEquals("Source index: " + srcIndex,
expectedDestCoarseStringIndices[srcIndex],
coarseIterator.destinationIndexFromSourceIndex(srcIndex));
assertEquals("Source index: " + srcIndex,
expectedDestCoarseStringIndices[srcIndex],
coarseChangesIterator.destinationIndexFromSourceIndex(srcIndex));
}
for (int destIndex=0; destIndex<input.length(); destIndex++) {
fineIterator.findDestinationIndex(destIndex);
fineChangesIterator.findDestinationIndex(destIndex);
coarseIterator.findDestinationIndex(destIndex);
coarseChangesIterator.findDestinationIndex(destIndex);
assertEquals("Destination index: " + destIndex,
expectedSrcFineEditIndices[destIndex],
fineIterator.sourceIndex());
assertEquals("Destination index: " + destIndex,
expectedSrcFineEditIndices[destIndex],
fineChangesIterator.sourceIndex());
assertEquals("Destination index: " + destIndex,
expectedSrcCoarseEditIndices[destIndex],
coarseIterator.sourceIndex());
assertEquals("Destination index: " + destIndex,
expectedSrcCoarseEditIndices[destIndex],
coarseChangesIterator.sourceIndex());
assertEquals("Destination index: " + destIndex,
expectedSrcFineStringIndices[destIndex],
fineIterator.sourceIndexFromDestinationIndex(destIndex));
assertEquals("Destination index: " + destIndex,
expectedSrcFineStringIndices[destIndex],
fineChangesIterator.sourceIndexFromDestinationIndex(destIndex));
assertEquals("Destination index: " + destIndex,
expectedSrcCoarseStringIndices[destIndex],
coarseIterator.sourceIndexFromDestinationIndex(destIndex));
assertEquals("Destination index: " + destIndex,
expectedSrcCoarseStringIndices[destIndex],
coarseChangesIterator.sourceIndexFromDestinationIndex(destIndex));
}
}
// private data members - test data --------------------------------------
private static final Locale TURKISH_LOCALE_ = new Locale("tr", "TR");