mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-14 17:24:01 +00:00
ICU-13629 Improving API docs for Edits and Edits.Iterator.
X-SVN-Rev: 41363
This commit is contained in:
parent
1fe1497d88
commit
5c969e791f
5 changed files with 595 additions and 46 deletions
|
@ -4,10 +4,12 @@
|
|||
// edits.cpp
|
||||
// created: 2017feb08 Markus W. Scherer
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/edits.h"
|
||||
#include "unicode/unistr.h"
|
||||
#include "unicode/utypes.h"
|
||||
#include "cmemory.h"
|
||||
#include "uassert.h"
|
||||
#include "util.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
|
@ -773,4 +775,29 @@ int32_t Edits::Iterator::sourceIndexFromDestinationIndex(int32_t i, UErrorCode &
|
|||
}
|
||||
}
|
||||
|
||||
UnicodeString& Edits::Iterator::toString(UnicodeString& sb) const {
|
||||
sb.append(u"{ src[", -1);
|
||||
ICU_Utility::appendNumber(sb, srcIndex);
|
||||
sb.append(u"..", -1);
|
||||
ICU_Utility::appendNumber(sb, srcIndex + oldLength_);
|
||||
if (changed) {
|
||||
sb.append(u"] ⇝ dest[", -1);
|
||||
} else {
|
||||
sb.append(u"] ≡ dest[", -1);
|
||||
}
|
||||
ICU_Utility::appendNumber(sb, destIndex);
|
||||
sb.append(u"..", -1);
|
||||
ICU_Utility::appendNumber(sb, destIndex + newLength_);
|
||||
if (changed) {
|
||||
sb.append(u"], repl[", -1);
|
||||
ICU_Utility::appendNumber(sb, replIndex);
|
||||
sb.append(u"..", -1);
|
||||
ICU_Utility::appendNumber(sb, replIndex + newLength_);
|
||||
sb.append(u"] }", -1);
|
||||
} else {
|
||||
sb.append(u"] (no-change) }", -1);
|
||||
}
|
||||
return sb;
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
|
|
@ -17,10 +17,57 @@
|
|||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
class UnicodeString;
|
||||
|
||||
/**
|
||||
* Records lengths of string edits but not replacement text.
|
||||
* Supports replacements, insertions, deletions in linear progression.
|
||||
* Does not support moving/reordering of text.
|
||||
* Records lengths of string edits but not replacement text. Supports replacements, insertions, deletions
|
||||
* in linear progression. Does not support moving/reordering of text.
|
||||
*
|
||||
* There are two types of edits: <em>change edits</em> and <em>no-change edits</em>. Add edits to
|
||||
* instances of this class using {@link #addReplace(int, int)} (for change edits) and
|
||||
* {@link #addUnchanged(int)} (for no-change edits). Change edits are retained with full granularity,
|
||||
* whereas adjacent no-change edits are always merged together. In no-change edits, there is a one-to-one
|
||||
* mapping between code points in the source and destination strings.
|
||||
*
|
||||
* After all edits have been added, instances of this class should be considered immutable, and an
|
||||
* {@link Edits::Iterator} can be used for queries.
|
||||
*
|
||||
* There are four flavors of Edits::Iterator:
|
||||
*
|
||||
* <ul>
|
||||
* <li>{@link #getFineIterator()} retains full granularity of change edits.
|
||||
* <li>{@link #getFineChangesIterator()} retains full granularity of change edits, and when calling
|
||||
* next() on the iterator, skips over no-change edits (unchanged regions).
|
||||
* <li>{@link #getCoarseIterator()} treats adjacent change edits as a single edit. (Adjacent no-change
|
||||
* edits are automatically merged during the construction phase.)
|
||||
* <li>{@link #getCoarseChangesIterator()} treats adjacent change edits as a single edit, and when
|
||||
* calling next() on the iterator, skips over no-change edits (unchanged regions).
|
||||
* </ul>
|
||||
*
|
||||
* For example, consider the string "abcßDeF", which case-folds to "abcssdef". This string has the
|
||||
* following fine edits:
|
||||
* <ul>
|
||||
* <li>abc ⇨ abc (no-change)
|
||||
* <li>ß ⇨ ss (change)
|
||||
* <li>D ⇨ d (change)
|
||||
* <li>e ⇨ e (no-change)
|
||||
* <li>F ⇨ f (change)
|
||||
* </ul>
|
||||
* and the following coarse edits (note how adjacent change edits get merged together):
|
||||
* <ul>
|
||||
* <li>abc ⇨ abc (no-change)
|
||||
* <li>ßD ⇨ ssd (change)
|
||||
* <li>e ⇨ e (no-change)
|
||||
* <li>F ⇨ f (change)
|
||||
* </ul>
|
||||
*
|
||||
* The "fine changes" and "coarse changes" iterators will step through only the change edits when their
|
||||
* {@link Edits::Iterator#next()} methods are called. They are identical to the non-change iterators when
|
||||
* their {@link Edits::Iterator#findSourceIndex(int)} or {@link Edits::Iterator#findDestinationIndex(int)}
|
||||
* methods are used to walk through the string.
|
||||
*
|
||||
* For examples of how to use this class, see the test <code>TestCaseMapEditsIteratorDocs</code> in
|
||||
* UCharacterCaseTest.java.
|
||||
*
|
||||
* An Edits object tracks a separate UErrorCode, but ICU string transformation functions
|
||||
* (e.g., case mapping functions) merge any such errors into their API's UErrorCode.
|
||||
|
@ -91,13 +138,13 @@ public:
|
|||
void reset() U_NOEXCEPT;
|
||||
|
||||
/**
|
||||
* Adds a record for an unchanged segment of text.
|
||||
* Adds a no-change edit: a record for an unchanged segment of text.
|
||||
* Normally called from inside ICU string transformation functions, not user code.
|
||||
* @stable ICU 59
|
||||
*/
|
||||
void addUnchanged(int32_t unchangedLength);
|
||||
/**
|
||||
* Adds a record for a text replacement/insertion/deletion.
|
||||
* Adds a change edit: a record for a text replacement/insertion/deletion.
|
||||
* Normally called from inside ICU string transformation functions, not user code.
|
||||
* @stable ICU 59
|
||||
*/
|
||||
|
@ -136,6 +183,18 @@ public:
|
|||
|
||||
/**
|
||||
* Access to the list of edits.
|
||||
*
|
||||
* At any moment in time, an instance of this class points to a single edit: a "window" into a span
|
||||
* of the source string and the corresponding span of the destination string. The source string span
|
||||
* starts at {@link #sourceIndex()} and runs for {@link #oldLength()} chars; the destination string
|
||||
* span starts at {@link #destinationIndex()} and runs for {@link #newLength()} chars.
|
||||
*
|
||||
* The iterator can be moved between edits using the {@link #next()}, {@link #findSourceIndex(int)},
|
||||
* and {@link #findDestinationIndex(int)} methods. Calling any of these methods mutates the iterator
|
||||
* to make it point to the corresponding edit.
|
||||
*
|
||||
* For more information, see the documentation for {@link Edits}.
|
||||
*
|
||||
* @see getCoarseIterator
|
||||
* @see getFineIterator
|
||||
* @stable ICU 59
|
||||
|
@ -162,7 +221,7 @@ public:
|
|||
Iterator &operator=(const Iterator &other) = default;
|
||||
|
||||
/**
|
||||
* Advances to the next edit.
|
||||
* Advances the iterator to the next edit.
|
||||
* @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
|
||||
* or else the function returns immediately. Check for U_FAILURE()
|
||||
* on output or use with function chaining. (See User Guide for details.)
|
||||
|
@ -172,9 +231,9 @@ public:
|
|||
UBool next(UErrorCode &errorCode) { return next(onlyChanges_, errorCode); }
|
||||
|
||||
/**
|
||||
* Finds the edit that contains the source index.
|
||||
* The source index may be found in a non-change
|
||||
* even if normal iteration would skip non-changes.
|
||||
* Moves the iterator to the edit that contains the source index.
|
||||
* The source index may be found in a no-change edit
|
||||
* even if normal iteration would skip no-change edits.
|
||||
* Normal iteration can continue from a found edit.
|
||||
*
|
||||
* The iterator state before this search logically does not matter.
|
||||
|
@ -196,9 +255,9 @@ public:
|
|||
|
||||
#ifndef U_HIDE_DRAFT_API
|
||||
/**
|
||||
* Finds the edit that contains the destination index.
|
||||
* The destination index may be found in a non-change
|
||||
* even if normal iteration would skip non-changes.
|
||||
* Moves the iterator to the edit that contains the destination index.
|
||||
* The destination index may be found in a no-change edit
|
||||
* even if normal iteration would skip no-change edits.
|
||||
* Normal iteration can continue from a found edit.
|
||||
*
|
||||
* The iterator state before this search logically does not matter.
|
||||
|
@ -219,7 +278,7 @@ public:
|
|||
}
|
||||
|
||||
/**
|
||||
* Returns the destination index corresponding to the given source index.
|
||||
* Computes the destination index corresponding to the given source index.
|
||||
* If the source index is inside a change edit (not at its start),
|
||||
* then the destination index at the end of that edit is returned,
|
||||
* since there is no information about index mapping inside a change edit.
|
||||
|
@ -243,7 +302,7 @@ public:
|
|||
int32_t destinationIndexFromSourceIndex(int32_t i, UErrorCode &errorCode);
|
||||
|
||||
/**
|
||||
* Returns the source index corresponding to the given destination index.
|
||||
* Computes the source index corresponding to the given destination index.
|
||||
* If the destination index is inside a change edit (not at its start),
|
||||
* then the source index at the end of that edit is returned,
|
||||
* since there is no information about index mapping inside a change edit.
|
||||
|
@ -268,17 +327,27 @@ public:
|
|||
#endif // U_HIDE_DRAFT_API
|
||||
|
||||
/**
|
||||
* Returns whether the edit currently represented by the iterator is a change edit.
|
||||
*
|
||||
* @return TRUE if this edit replaces oldLength() units with newLength() different ones.
|
||||
* FALSE if oldLength units remain unchanged.
|
||||
* @stable ICU 59
|
||||
*/
|
||||
UBool hasChange() const { return changed; }
|
||||
|
||||
/**
|
||||
* The length of the current span in the source string, which starts at {@link #sourceIndex}.
|
||||
*
|
||||
* @return the number of units in the original string which are replaced or remain unchanged.
|
||||
* @stable ICU 59
|
||||
*/
|
||||
int32_t oldLength() const { return oldLength_; }
|
||||
|
||||
/**
|
||||
* The length of the current span in the destination string, which starts at
|
||||
* {@link #destinationIndex}, or in the replacement string, which starts at
|
||||
* {@link #replacementIndex}.
|
||||
*
|
||||
* @return the number of units in the modified string, if hasChange() is TRUE.
|
||||
* Same as oldLength if hasChange() is FALSE.
|
||||
* @stable ICU 59
|
||||
|
@ -286,22 +355,47 @@ public:
|
|||
int32_t newLength() const { return newLength_; }
|
||||
|
||||
/**
|
||||
* The start index of the current span in the source string; the span has length
|
||||
* {@link #oldLength}.
|
||||
*
|
||||
* @return the current index into the source string
|
||||
* @stable ICU 59
|
||||
*/
|
||||
int32_t sourceIndex() const { return srcIndex; }
|
||||
|
||||
/**
|
||||
* The start index of the current span in the replacement string; the span has length
|
||||
* {@link #newLength}. Well-defined only if the current edit is a change edit.
|
||||
* <p>
|
||||
* The <em>replacement string</em> is the concatenation of all substrings of the destination
|
||||
* string corresponding to change edits.
|
||||
* <p>
|
||||
* This method is intended to be used together with operations that write only replacement
|
||||
* characters (e.g., {@link CaseMap#omitUnchangedText()}). The source string can then be modified
|
||||
* in-place.
|
||||
*
|
||||
* @return the current index into the replacement-characters-only string,
|
||||
* not counting unchanged spans
|
||||
* @stable ICU 59
|
||||
*/
|
||||
int32_t replacementIndex() const { return replIndex; }
|
||||
|
||||
/**
|
||||
* The start index of the current span in the destination string; the span has length
|
||||
* {@link #newLength}.
|
||||
*
|
||||
* @return the current index into the full destination string
|
||||
* @stable ICU 59
|
||||
*/
|
||||
int32_t destinationIndex() const { return destIndex; }
|
||||
|
||||
/**
|
||||
* A string representation of the current edit represented by the iterator for debugging. You
|
||||
* should not depend on the contents of the return string.
|
||||
* @internal
|
||||
*/
|
||||
UnicodeString& toString(UnicodeString& appendTo) const;
|
||||
|
||||
private:
|
||||
friend class Edits;
|
||||
|
||||
|
@ -330,8 +424,10 @@ public:
|
|||
};
|
||||
|
||||
/**
|
||||
* Returns an Iterator for coarse-grained changes for simple string updates.
|
||||
* Skips non-changes.
|
||||
* Returns an Iterator for coarse-grained change edits
|
||||
* (adjacent change edits are treated as one).
|
||||
* Can be used to perform simple string updates.
|
||||
* Skips no-change edits.
|
||||
* @return an Iterator that merges adjacent changes.
|
||||
* @stable ICU 59
|
||||
*/
|
||||
|
@ -340,7 +436,10 @@ public:
|
|||
}
|
||||
|
||||
/**
|
||||
* Returns an Iterator for coarse-grained changes and non-changes for simple string updates.
|
||||
* Returns an Iterator for coarse-grained change and no-change edits
|
||||
* (adjacent change edits are treated as one).
|
||||
* Can be used to perform simple string updates.
|
||||
* Adjacent change edits are treated as one edit.
|
||||
* @return an Iterator that merges adjacent changes.
|
||||
* @stable ICU 59
|
||||
*/
|
||||
|
@ -349,8 +448,10 @@ public:
|
|||
}
|
||||
|
||||
/**
|
||||
* Returns an Iterator for fine-grained changes for modifying styled text.
|
||||
* Skips non-changes.
|
||||
* Returns an Iterator for fine-grained change edits
|
||||
* (full granularity of change edits is retained).
|
||||
* Can be used for modifying styled text.
|
||||
* Skips no-change edits.
|
||||
* @return an Iterator that separates adjacent changes.
|
||||
* @stable ICU 59
|
||||
*/
|
||||
|
@ -359,7 +460,9 @@ public:
|
|||
}
|
||||
|
||||
/**
|
||||
* Returns an Iterator for fine-grained changes and non-changes for modifying styled text.
|
||||
* Returns an Iterator for fine-grained change and no-change edits
|
||||
* (full granularity of change edits is retained).
|
||||
* Can be used for modifying styled text.
|
||||
* @return an Iterator that separates adjacent changes.
|
||||
* @stable ICU 59
|
||||
*/
|
||||
|
|
|
@ -67,6 +67,7 @@ public:
|
|||
void TestLongUnicodeString();
|
||||
void TestBug13127();
|
||||
void TestInPlaceTitle();
|
||||
void TestCaseMapEditsIteratorDocs();
|
||||
|
||||
private:
|
||||
void assertGreekUpper(const char16_t *s, const char16_t *expected);
|
||||
|
@ -111,6 +112,7 @@ StringCaseTest::runIndexedTest(int32_t index, UBool exec, const char *&name, cha
|
|||
TESTCASE_AUTO(TestBug13127);
|
||||
TESTCASE_AUTO(TestInPlaceTitle);
|
||||
#endif
|
||||
TESTCASE_AUTO(TestCaseMapEditsIteratorDocs);
|
||||
TESTCASE_AUTO_END;
|
||||
}
|
||||
|
||||
|
@ -1532,4 +1534,154 @@ void StringCaseTest::TestInPlaceTitle() {
|
|||
assertEquals("u_strToTitle(in-place) length", u_strlen(expected), length);
|
||||
assertEquals("u_strToTitle(in-place)", expected, s);
|
||||
}
|
||||
|
||||
void StringCaseTest::TestCaseMapEditsIteratorDocs() {
|
||||
IcuTestErrorCode status(*this, "TestCaseMapEditsIteratorDocs");
|
||||
const char16_t* input = u"abcßDeF";
|
||||
int32_t inputLength = u_strlen(input);
|
||||
// output: "abcssdef"
|
||||
|
||||
char16_t output[10];
|
||||
Edits edits;
|
||||
CaseMap::fold(0, input, -1, output, 10, &edits, status);
|
||||
|
||||
static const char16_t* fineIteratorExpected[] = {
|
||||
u"{ src[0..3] ≡ dest[0..3] (no-change) }",
|
||||
u"{ src[3..4] ⇝ dest[3..5], repl[0..2] }",
|
||||
u"{ src[4..5] ⇝ dest[5..6], repl[2..3] }",
|
||||
u"{ src[5..6] ≡ dest[6..7] (no-change) }",
|
||||
u"{ src[6..7] ⇝ dest[7..8], repl[3..4] }",
|
||||
};
|
||||
static const char16_t* fineChangesIteratorExpected[] = {
|
||||
u"{ src[3..4] ⇝ dest[3..5], repl[0..2] }",
|
||||
u"{ src[4..5] ⇝ dest[5..6], repl[2..3] }",
|
||||
u"{ src[6..7] ⇝ dest[7..8], repl[3..4] }",
|
||||
};
|
||||
static const char16_t* coarseIteratorExpected[] = {
|
||||
u"{ src[0..3] ≡ dest[0..3] (no-change) }",
|
||||
u"{ src[3..5] ⇝ dest[3..6], repl[0..3] }",
|
||||
u"{ src[5..6] ≡ dest[6..7] (no-change) }",
|
||||
u"{ src[6..7] ⇝ dest[7..8], repl[3..4] }",
|
||||
};
|
||||
static const char16_t* coarseChangesIteratorExpected[] = {
|
||||
u"{ src[3..5] ⇝ dest[3..6], repl[0..3] }",
|
||||
u"{ src[6..7] ⇝ dest[7..8], repl[3..4] }",
|
||||
};
|
||||
|
||||
// Expected destination indices when source index is queried
|
||||
static int32_t expectedDestFineEditIndices[] = {0, 0, 0, 3, 5, 6, 7};
|
||||
static int32_t expectedDestCoarseEditIndices[] = {0, 0, 0, 3, 3, 6, 7};
|
||||
static int32_t expectedDestFineStringIndices[] = {0, 1, 2, 3, 5, 6, 7};
|
||||
static int32_t expectedDestCoarseStringIndices[] = {0, 1, 2, 3, 6, 6, 7};
|
||||
|
||||
// Expected source indices when destination index is queried
|
||||
static int32_t expectedSrcFineEditIndices[] = { 0, 0, 0, 3, 3, 4, 5, 6 };
|
||||
static int32_t expectedSrcCoarseEditIndices[] = { 0, 0, 0, 3, 3, 3, 5, 6 };
|
||||
static int32_t expectedSrcFineStringIndices[] = { 0, 1, 2, 3, 4, 4, 5, 6 };
|
||||
static int32_t expectedSrcCoarseStringIndices[] = { 0, 1, 2, 3, 5, 5, 5, 6 };
|
||||
|
||||
// Demonstrate the iterator next() method:
|
||||
Edits::Iterator fineIterator = edits.getFineIterator();
|
||||
int i = 0;
|
||||
UnicodeString toString;
|
||||
while (fineIterator.next(status)) {
|
||||
UnicodeString expected = fineIteratorExpected[i++];
|
||||
assertEquals(UnicodeString(u"Iteration #") + i,
|
||||
expected,
|
||||
fineIterator.toString(toString.remove()));
|
||||
}
|
||||
Edits::Iterator fineChangesIterator = edits.getFineChangesIterator();
|
||||
i = 0;
|
||||
while (fineChangesIterator.next(status)) {
|
||||
UnicodeString expected = fineChangesIteratorExpected[i++];
|
||||
assertEquals(UnicodeString(u"Iteration #") + i,
|
||||
expected,
|
||||
fineChangesIterator.toString(toString.remove()));
|
||||
}
|
||||
Edits::Iterator coarseIterator = edits.getCoarseIterator();
|
||||
i = 0;
|
||||
while (coarseIterator.next(status)) {
|
||||
UnicodeString expected = coarseIteratorExpected[i++];
|
||||
assertEquals(UnicodeString(u"Iteration #") + i,
|
||||
expected,
|
||||
coarseIterator.toString(toString.remove()));
|
||||
}
|
||||
Edits::Iterator coarseChangesIterator = edits.getCoarseChangesIterator();
|
||||
i = 0;
|
||||
while (coarseChangesIterator.next(status)) {
|
||||
UnicodeString expected = coarseChangesIteratorExpected[i++];
|
||||
assertEquals(UnicodeString(u"Iteration #") + i,
|
||||
expected,
|
||||
coarseChangesIterator.toString(toString.remove()));
|
||||
}
|
||||
|
||||
// Demonstrate the iterator indexing methods:
|
||||
// fineIterator should have the same behavior as fineChangesIterator, and
|
||||
// coarseIterator should have the same behavior as coarseChangesIterator.
|
||||
for (int32_t srcIndex=0; srcIndex<inputLength; srcIndex++) {
|
||||
fineIterator.findSourceIndex(srcIndex, status);
|
||||
fineChangesIterator.findSourceIndex(srcIndex, status);
|
||||
coarseIterator.findSourceIndex(srcIndex, status);
|
||||
coarseChangesIterator.findSourceIndex(srcIndex, status);
|
||||
|
||||
assertEquals(UnicodeString("Source index: ") + srcIndex,
|
||||
expectedDestFineEditIndices[srcIndex],
|
||||
fineIterator.destinationIndex());
|
||||
assertEquals(UnicodeString("Source index: ") + srcIndex,
|
||||
expectedDestFineEditIndices[srcIndex],
|
||||
fineChangesIterator.destinationIndex());
|
||||
assertEquals(UnicodeString("Source index: ") + srcIndex,
|
||||
expectedDestCoarseEditIndices[srcIndex],
|
||||
coarseIterator.destinationIndex());
|
||||
assertEquals(UnicodeString("Source index: ") + srcIndex,
|
||||
expectedDestCoarseEditIndices[srcIndex],
|
||||
coarseChangesIterator.destinationIndex());
|
||||
|
||||
assertEquals(UnicodeString("Source index: ") + srcIndex,
|
||||
expectedDestFineStringIndices[srcIndex],
|
||||
fineIterator.destinationIndexFromSourceIndex(srcIndex, status));
|
||||
assertEquals(UnicodeString("Source index: ") + srcIndex,
|
||||
expectedDestFineStringIndices[srcIndex],
|
||||
fineChangesIterator.destinationIndexFromSourceIndex(srcIndex, status));
|
||||
assertEquals(UnicodeString("Source index: ") + srcIndex,
|
||||
expectedDestCoarseStringIndices[srcIndex],
|
||||
coarseIterator.destinationIndexFromSourceIndex(srcIndex, status));
|
||||
assertEquals(UnicodeString("Source index: ") + srcIndex,
|
||||
expectedDestCoarseStringIndices[srcIndex],
|
||||
coarseChangesIterator.destinationIndexFromSourceIndex(srcIndex, status));
|
||||
}
|
||||
for (int32_t destIndex=0; destIndex<inputLength; destIndex++) {
|
||||
fineIterator.findDestinationIndex(destIndex, status);
|
||||
fineChangesIterator.findDestinationIndex(destIndex, status);
|
||||
coarseIterator.findDestinationIndex(destIndex, status);
|
||||
coarseChangesIterator.findDestinationIndex(destIndex, status);
|
||||
|
||||
assertEquals(UnicodeString("Destination index: ") + destIndex,
|
||||
expectedSrcFineEditIndices[destIndex],
|
||||
fineIterator.sourceIndex());
|
||||
assertEquals(UnicodeString("Destination index: ") + destIndex,
|
||||
expectedSrcFineEditIndices[destIndex],
|
||||
fineChangesIterator.sourceIndex());
|
||||
assertEquals(UnicodeString("Destination index: ") + destIndex,
|
||||
expectedSrcCoarseEditIndices[destIndex],
|
||||
coarseIterator.sourceIndex());
|
||||
assertEquals(UnicodeString("Destination index: ") + destIndex,
|
||||
expectedSrcCoarseEditIndices[destIndex],
|
||||
coarseChangesIterator.sourceIndex());
|
||||
|
||||
assertEquals(UnicodeString("Destination index: ") + destIndex,
|
||||
expectedSrcFineStringIndices[destIndex],
|
||||
fineIterator.sourceIndexFromDestinationIndex(destIndex, status));
|
||||
assertEquals(UnicodeString("Destination index: ") + destIndex,
|
||||
expectedSrcFineStringIndices[destIndex],
|
||||
fineChangesIterator.sourceIndexFromDestinationIndex(destIndex, status));
|
||||
assertEquals(UnicodeString("Destination index: ") + destIndex,
|
||||
expectedSrcCoarseStringIndices[destIndex],
|
||||
coarseIterator.sourceIndexFromDestinationIndex(destIndex, status));
|
||||
assertEquals(UnicodeString("Destination index: ") + destIndex,
|
||||
expectedSrcCoarseStringIndices[destIndex],
|
||||
coarseChangesIterator.sourceIndexFromDestinationIndex(destIndex, status));
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
@ -6,9 +6,54 @@ import java.nio.BufferOverflowException;
|
|||
import java.util.Arrays;
|
||||
|
||||
/**
|
||||
* Records lengths of string edits but not replacement text.
|
||||
* Supports replacements, insertions, deletions in linear progression.
|
||||
* Does not support moving/reordering of text.
|
||||
* Records lengths of string edits but not replacement text. Supports replacements, insertions, deletions
|
||||
* in linear progression. Does not support moving/reordering of text.
|
||||
* <p>
|
||||
* There are two types of edits: <em>change edits</em> and <em>no-change edits</em>. Add edits to
|
||||
* instances of this class using {@link #addReplace(int, int)} (for change edits) and
|
||||
* {@link #addUnchanged(int)} (for no-change edits). Change edits are retained with full granularity,
|
||||
* whereas adjacent no-change edits are always merged together. In no-change edits, there is a one-to-one
|
||||
* mapping between code points in the source and destination strings.
|
||||
* <p>
|
||||
* After all edits have been added, instances of this class should be considered immutable, and an
|
||||
* {@link Edits.Iterator} can be used for queries.
|
||||
* <p>
|
||||
* There are four flavors of Edits.Iterator:
|
||||
* <p>
|
||||
* <ul>
|
||||
* <li>{@link #getFineIterator()} retains full granularity of change edits.
|
||||
* <li>{@link #getFineChangesIterator()} retains full granularity of change edits, and when calling
|
||||
* next() on the iterator, skips over no-change edits (unchanged regions).
|
||||
* <li>{@link #getCoarseIterator()} treats adjacent change edits as a single edit. (Adjacent no-change
|
||||
* edits are automatically merged during the construction phase.)
|
||||
* <li>{@link #getCoarseChangesIterator()} treats adjacent change edits as a single edit, and when
|
||||
* calling next() on the iterator, skips over no-change edits (unchanged regions).
|
||||
* </ul>
|
||||
* <p>
|
||||
* For example, consider the string "abcßDeF", which case-folds to "abcssdef". This string has the
|
||||
* following fine edits:
|
||||
* <ul>
|
||||
* <li>abc ⇨ abc (no-change)
|
||||
* <li>ß ⇨ ss (change)
|
||||
* <li>D ⇨ d (change)
|
||||
* <li>e ⇨ e (no-change)
|
||||
* <li>F ⇨ f (change)
|
||||
* </ul>
|
||||
* and the following coarse edits (note how adjacent change edits get merged together):
|
||||
* <ul>
|
||||
* <li>abc ⇨ abc (no-change)
|
||||
* <li>ßD ⇨ ssd (change)
|
||||
* <li>e ⇨ e (no-change)
|
||||
* <li>F ⇨ f (change)
|
||||
* </ul>
|
||||
* <p>
|
||||
* The "fine changes" and "coarse changes" iterators will step through only the change edits when their
|
||||
* {@link Edits.Iterator#next()} methods are called. They are identical to the non-change iterators when
|
||||
* their {@link Edits.Iterator#findSourceIndex(int)} or {@link Edits.Iterator#findDestinationIndex(int)}
|
||||
* methods are used to walk through the string.
|
||||
* <p>
|
||||
* For examples of how to use this class, see the test <code>TestCaseMapEditsIteratorDocs</code> in
|
||||
* UCharacterCaseTest.java.
|
||||
*
|
||||
* @stable ICU 59
|
||||
*/
|
||||
|
@ -61,7 +106,7 @@ public final class Edits {
|
|||
}
|
||||
|
||||
/**
|
||||
* Adds a record for an unchanged segment of text.
|
||||
* Adds a no-change edit: a record for an unchanged segment of text.
|
||||
* Normally called from inside ICU string transformation functions, not user code.
|
||||
* @stable ICU 59
|
||||
*/
|
||||
|
@ -93,7 +138,7 @@ public final class Edits {
|
|||
}
|
||||
|
||||
/**
|
||||
* Adds a record for a text replacement/insertion/deletion.
|
||||
* Adds a change edit: a record for a text replacement/insertion/deletion.
|
||||
* Normally called from inside ICU string transformation functions, not user code.
|
||||
* @stable ICU 59
|
||||
*/
|
||||
|
@ -210,6 +255,20 @@ public final class Edits {
|
|||
|
||||
/**
|
||||
* Access to the list of edits.
|
||||
* <p>
|
||||
* At any moment in time, an instance of this class points to a single edit: a "window" into a span
|
||||
* of the source string and the corresponding span of the destination string. The source string span
|
||||
* starts at {@link #sourceIndex()} and runs for {@link #oldLength()} chars; the destination string
|
||||
* span starts at {@link #destinationIndex()} and runs for {@link #newLength()} chars.
|
||||
* <p>
|
||||
* The iterator can be moved between edits using the {@link #next()}, {@link #findSourceIndex(int)},
|
||||
* and {@link #findDestinationIndex(int)} methods. Calling any of these methods mutates the iterator
|
||||
* to make it point to the corresponding edit.
|
||||
* <p>
|
||||
* For more information, see the documentation for {@link Edits}.
|
||||
* <p>
|
||||
* Note: Although this class is called "Iterator", it does not implement {@link java.util.Iterator}.
|
||||
*
|
||||
* @see #getCoarseIterator
|
||||
* @see #getFineIterator
|
||||
* @stable ICU 59
|
||||
|
@ -281,7 +340,7 @@ public final class Edits {
|
|||
}
|
||||
|
||||
/**
|
||||
* Advances to the next edit.
|
||||
* Advances the iterator to the next edit.
|
||||
* @return true if there is another edit
|
||||
* @stable ICU 59
|
||||
*/
|
||||
|
@ -489,9 +548,9 @@ public final class Edits {
|
|||
}
|
||||
|
||||
/**
|
||||
* Finds the edit that contains the source index.
|
||||
* The source index may be found in a non-change
|
||||
* even if normal iteration would skip non-changes.
|
||||
* Moves the iterator to the edit that contains the source index.
|
||||
* The source index may be found in a no-change edit
|
||||
* even if normal iteration would skip no-change edits.
|
||||
* Normal iteration can continue from a found edit.
|
||||
*
|
||||
* <p>The iterator state before this search logically does not matter.
|
||||
|
@ -509,9 +568,9 @@ public final class Edits {
|
|||
}
|
||||
|
||||
/**
|
||||
* Finds the edit that contains the destination index.
|
||||
* The destination index may be found in a non-change
|
||||
* even if normal iteration would skip non-changes.
|
||||
* Moves the iterator to the edit that contains the destination index.
|
||||
* The destination index may be found in a no-change edit
|
||||
* even if normal iteration would skip no-change edits.
|
||||
* Normal iteration can continue from a found edit.
|
||||
*
|
||||
* <p>The iterator state before this search logically does not matter.
|
||||
|
@ -617,7 +676,7 @@ public final class Edits {
|
|||
}
|
||||
|
||||
/**
|
||||
* Returns the destination index corresponding to the given source index.
|
||||
* Computes the destination index corresponding to the given source index.
|
||||
* If the source index is inside a change edit (not at its start),
|
||||
* then the destination index at the end of that edit is returned,
|
||||
* since there is no information about index mapping inside a change edit.
|
||||
|
@ -656,7 +715,7 @@ public final class Edits {
|
|||
}
|
||||
|
||||
/**
|
||||
* Returns the source index corresponding to the given destination index.
|
||||
* Computes the source index corresponding to the given destination index.
|
||||
* If the destination index is inside a change edit (not at its start),
|
||||
* then the source index at the end of that edit is returned,
|
||||
* since there is no information about index mapping inside a change edit.
|
||||
|
@ -695,44 +754,106 @@ public final class Edits {
|
|||
}
|
||||
|
||||
/**
|
||||
* Returns whether the edit currently represented by the iterator is a change edit.
|
||||
*
|
||||
* @return true if this edit replaces oldLength() units with newLength() different ones.
|
||||
* false if oldLength units remain unchanged.
|
||||
* @stable ICU 59
|
||||
*/
|
||||
public boolean hasChange() { return changed; }
|
||||
|
||||
/**
|
||||
* @return the number of units in the original string which are replaced or remain unchanged.
|
||||
* The length of the current span in the source string, which starts at {@link #sourceIndex}.
|
||||
*
|
||||
* @return the number of units in the source string which are replaced or remain unchanged.
|
||||
* @stable ICU 59
|
||||
*/
|
||||
public int oldLength() { return oldLength_; }
|
||||
|
||||
/**
|
||||
* @return the number of units in the modified string, if hasChange() is true.
|
||||
* Same as oldLength if hasChange() is false.
|
||||
* The length of the current span in the destination string, which starts at
|
||||
* {@link #destinationIndex}, or in the replacement string, which starts at
|
||||
* {@link #replacementIndex}.
|
||||
*
|
||||
* @return the number of units in the destination string, if hasChange() is true. Same as
|
||||
* oldLength if hasChange() is false.
|
||||
* @stable ICU 59
|
||||
*/
|
||||
public int newLength() { return newLength_; }
|
||||
|
||||
/**
|
||||
* The start index of the current span in the source string; the span has length
|
||||
* {@link #oldLength}.
|
||||
*
|
||||
* @return the current index into the source string
|
||||
* @stable ICU 59
|
||||
*/
|
||||
public int sourceIndex() { return srcIndex; }
|
||||
|
||||
/**
|
||||
* @return the current index into the replacement-characters-only string,
|
||||
* not counting unchanged spans
|
||||
* The start index of the current span in the replacement string; the span has length
|
||||
* {@link #newLength}. Well-defined only if the current edit is a change edit.
|
||||
* <p>
|
||||
* The <em>replacement string</em> is the concatenation of all substrings of the destination
|
||||
* string corresponding to change edits.
|
||||
* <p>
|
||||
* This method is intended to be used together with operations that write only replacement
|
||||
* characters (e.g., {@link CaseMap#omitUnchangedText()}). The source string can then be modified
|
||||
* in-place.
|
||||
*
|
||||
* @return the current index into the replacement-characters-only string, not counting unchanged
|
||||
* spans
|
||||
* @stable ICU 59
|
||||
*/
|
||||
public int replacementIndex() { return replIndex; }
|
||||
|
||||
/**
|
||||
* The start index of the current span in the destination string; the span has length
|
||||
* {@link #newLength}.
|
||||
*
|
||||
* @return the current index into the full destination string
|
||||
* @stable ICU 59
|
||||
*/
|
||||
public int destinationIndex() { return destIndex; }
|
||||
|
||||
/**
|
||||
* A string representation of the current edit represented by the iterator for debugging. You
|
||||
* should not depend on the contents of the return string.
|
||||
*/
|
||||
@Override
|
||||
public String toString() {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
sb.append(super.toString());
|
||||
sb.append("{ src[");
|
||||
sb.append(srcIndex);
|
||||
sb.append("..");
|
||||
sb.append(srcIndex + oldLength_);
|
||||
if (changed) {
|
||||
sb.append("] ⇝ dest[");
|
||||
} else {
|
||||
sb.append("] ≡ dest[");
|
||||
}
|
||||
sb.append(destIndex);
|
||||
sb.append("..");
|
||||
sb.append(destIndex + newLength_);
|
||||
if (changed) {
|
||||
sb.append("], repl[");
|
||||
sb.append(replIndex);
|
||||
sb.append("..");
|
||||
sb.append(replIndex + newLength_);
|
||||
sb.append("] }");
|
||||
} else {
|
||||
sb.append("] (no-change) }");
|
||||
}
|
||||
return sb.toString();
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Returns an Iterator for coarse-grained changes for simple string updates.
|
||||
* Skips non-changes.
|
||||
* Returns an Iterator for coarse-grained change edits
|
||||
* (adjacent change edits are treated as one).
|
||||
* Can be used to perform simple string updates.
|
||||
* Skips no-change edits.
|
||||
* @return an Iterator that merges adjacent changes.
|
||||
* @stable ICU 59
|
||||
*/
|
||||
|
@ -741,7 +862,10 @@ public final class Edits {
|
|||
}
|
||||
|
||||
/**
|
||||
* Returns an Iterator for coarse-grained changes and non-changes for simple string updates.
|
||||
* Returns an Iterator for coarse-grained change and no-change edits
|
||||
* (adjacent change edits are treated as one).
|
||||
* Can be used to perform simple string updates.
|
||||
* Adjacent change edits are treated as one edit.
|
||||
* @return an Iterator that merges adjacent changes.
|
||||
* @stable ICU 59
|
||||
*/
|
||||
|
@ -750,8 +874,10 @@ public final class Edits {
|
|||
}
|
||||
|
||||
/**
|
||||
* Returns an Iterator for fine-grained changes for modifying styled text.
|
||||
* Skips non-changes.
|
||||
* Returns an Iterator for fine-grained change edits
|
||||
* (full granularity of change edits is retained).
|
||||
* Can be used for modifying styled text.
|
||||
* Skips no-change edits.
|
||||
* @return an Iterator that separates adjacent changes.
|
||||
* @stable ICU 59
|
||||
*/
|
||||
|
@ -760,7 +886,9 @@ public final class Edits {
|
|||
}
|
||||
|
||||
/**
|
||||
* Returns an Iterator for fine-grained changes and non-changes for modifying styled text.
|
||||
* Returns an Iterator for fine-grained change and no-change edits
|
||||
* (full granularity of change edits is retained).
|
||||
* Can be used for modifying styled text.
|
||||
* @return an Iterator that separates adjacent changes.
|
||||
* @stable ICU 59
|
||||
*/
|
||||
|
|
|
@ -1350,6 +1350,145 @@ public final class UCharacterCaseTest extends TestFmwk
|
|||
CaseMap.fold().turkic().apply("IßtanBul"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void TestCaseMapEditsIteratorDocs() {
|
||||
String input = "abcßDeF";
|
||||
// output: "abcssdef"
|
||||
|
||||
StringBuilder sb = new StringBuilder();
|
||||
Edits edits = new Edits();
|
||||
CaseMap.fold().apply(input, sb, edits);
|
||||
|
||||
String[] fineIteratorExpected = {
|
||||
"{ src[0..3] ≡ dest[0..3] (no-change) }",
|
||||
"{ src[3..4] ⇝ dest[3..5], repl[0..2] }",
|
||||
"{ src[4..5] ⇝ dest[5..6], repl[2..3] }",
|
||||
"{ src[5..6] ≡ dest[6..7] (no-change) }",
|
||||
"{ src[6..7] ⇝ dest[7..8], repl[3..4] }",
|
||||
};
|
||||
String[] fineChangesIteratorExpected = {
|
||||
"{ src[3..4] ⇝ dest[3..5], repl[0..2] }",
|
||||
"{ src[4..5] ⇝ dest[5..6], repl[2..3] }",
|
||||
"{ src[6..7] ⇝ dest[7..8], repl[3..4] }",
|
||||
};
|
||||
String[] coarseIteratorExpected = {
|
||||
"{ src[0..3] ≡ dest[0..3] (no-change) }",
|
||||
"{ src[3..5] ⇝ dest[3..6], repl[0..3] }",
|
||||
"{ src[5..6] ≡ dest[6..7] (no-change) }",
|
||||
"{ src[6..7] ⇝ dest[7..8], repl[3..4] }",
|
||||
};
|
||||
String[] coarseChangesIteratorExpected = {
|
||||
"{ src[3..5] ⇝ dest[3..6], repl[0..3] }",
|
||||
"{ src[6..7] ⇝ dest[7..8], repl[3..4] }",
|
||||
};
|
||||
|
||||
// Expected destination indices when source index is queried
|
||||
int[] expectedDestFineEditIndices = {0, 0, 0, 3, 5, 6, 7};
|
||||
int[] expectedDestCoarseEditIndices = {0, 0, 0, 3, 3, 6, 7};
|
||||
int[] expectedDestFineStringIndices = {0, 1, 2, 3, 5, 6, 7};
|
||||
int[] expectedDestCoarseStringIndices = {0, 1, 2, 3, 6, 6, 7};
|
||||
|
||||
// Expected source indices when destination index is queried
|
||||
int[] expectedSrcFineEditIndices = { 0, 0, 0, 3, 3, 4, 5, 6 };
|
||||
int[] expectedSrcCoarseEditIndices = { 0, 0, 0, 3, 3, 3, 5, 6 };
|
||||
int[] expectedSrcFineStringIndices = { 0, 1, 2, 3, 4, 4, 5, 6 };
|
||||
int[] expectedSrcCoarseStringIndices = { 0, 1, 2, 3, 5, 5, 5, 6 };
|
||||
|
||||
// Demonstrate the iterator next() method:
|
||||
Edits.Iterator fineIterator = edits.getFineIterator();
|
||||
int i = 0;
|
||||
while (fineIterator.next()) {
|
||||
String expected = fineIteratorExpected[i++];
|
||||
assertEquals("Iteration #" + i, expected, fineIterator.toString().substring(40));
|
||||
}
|
||||
Edits.Iterator fineChangesIterator = edits.getFineChangesIterator();
|
||||
i = 0;
|
||||
while (fineChangesIterator.next()) {
|
||||
String expected = fineChangesIteratorExpected[i++];
|
||||
assertEquals("Iteration #" + i, expected, fineChangesIterator.toString().substring(40));
|
||||
}
|
||||
Edits.Iterator coarseIterator = edits.getCoarseIterator();
|
||||
i = 0;
|
||||
while (coarseIterator.next()) {
|
||||
String expected = coarseIteratorExpected[i++];
|
||||
assertEquals("Iteration #" + i, expected, coarseIterator.toString().substring(40));
|
||||
}
|
||||
Edits.Iterator coarseChangesIterator = edits.getCoarseChangesIterator();
|
||||
i = 0;
|
||||
while (coarseChangesIterator.next()) {
|
||||
String expected = coarseChangesIteratorExpected[i++];
|
||||
assertEquals("Iteration #" + i, expected, coarseChangesIterator.toString().substring(40));
|
||||
}
|
||||
|
||||
// Demonstrate the iterator indexing methods:
|
||||
// fineIterator should have the same behavior as fineChangesIterator, and
|
||||
// coarseIterator should have the same behavior as coarseChangesIterator.
|
||||
for (int srcIndex=0; srcIndex<input.length(); srcIndex++) {
|
||||
fineIterator.findSourceIndex(srcIndex);
|
||||
fineChangesIterator.findSourceIndex(srcIndex);
|
||||
coarseIterator.findSourceIndex(srcIndex);
|
||||
coarseChangesIterator.findSourceIndex(srcIndex);
|
||||
|
||||
assertEquals("Source index: " + srcIndex,
|
||||
expectedDestFineEditIndices[srcIndex],
|
||||
fineIterator.destinationIndex());
|
||||
assertEquals("Source index: " + srcIndex,
|
||||
expectedDestFineEditIndices[srcIndex],
|
||||
fineChangesIterator.destinationIndex());
|
||||
assertEquals("Source index: " + srcIndex,
|
||||
expectedDestCoarseEditIndices[srcIndex],
|
||||
coarseIterator.destinationIndex());
|
||||
assertEquals("Source index: " + srcIndex,
|
||||
expectedDestCoarseEditIndices[srcIndex],
|
||||
coarseChangesIterator.destinationIndex());
|
||||
|
||||
assertEquals("Source index: " + srcIndex,
|
||||
expectedDestFineStringIndices[srcIndex],
|
||||
fineIterator.destinationIndexFromSourceIndex(srcIndex));
|
||||
assertEquals("Source index: " + srcIndex,
|
||||
expectedDestFineStringIndices[srcIndex],
|
||||
fineChangesIterator.destinationIndexFromSourceIndex(srcIndex));
|
||||
assertEquals("Source index: " + srcIndex,
|
||||
expectedDestCoarseStringIndices[srcIndex],
|
||||
coarseIterator.destinationIndexFromSourceIndex(srcIndex));
|
||||
assertEquals("Source index: " + srcIndex,
|
||||
expectedDestCoarseStringIndices[srcIndex],
|
||||
coarseChangesIterator.destinationIndexFromSourceIndex(srcIndex));
|
||||
}
|
||||
for (int destIndex=0; destIndex<input.length(); destIndex++) {
|
||||
fineIterator.findDestinationIndex(destIndex);
|
||||
fineChangesIterator.findDestinationIndex(destIndex);
|
||||
coarseIterator.findDestinationIndex(destIndex);
|
||||
coarseChangesIterator.findDestinationIndex(destIndex);
|
||||
|
||||
assertEquals("Destination index: " + destIndex,
|
||||
expectedSrcFineEditIndices[destIndex],
|
||||
fineIterator.sourceIndex());
|
||||
assertEquals("Destination index: " + destIndex,
|
||||
expectedSrcFineEditIndices[destIndex],
|
||||
fineChangesIterator.sourceIndex());
|
||||
assertEquals("Destination index: " + destIndex,
|
||||
expectedSrcCoarseEditIndices[destIndex],
|
||||
coarseIterator.sourceIndex());
|
||||
assertEquals("Destination index: " + destIndex,
|
||||
expectedSrcCoarseEditIndices[destIndex],
|
||||
coarseChangesIterator.sourceIndex());
|
||||
|
||||
assertEquals("Destination index: " + destIndex,
|
||||
expectedSrcFineStringIndices[destIndex],
|
||||
fineIterator.sourceIndexFromDestinationIndex(destIndex));
|
||||
assertEquals("Destination index: " + destIndex,
|
||||
expectedSrcFineStringIndices[destIndex],
|
||||
fineChangesIterator.sourceIndexFromDestinationIndex(destIndex));
|
||||
assertEquals("Destination index: " + destIndex,
|
||||
expectedSrcCoarseStringIndices[destIndex],
|
||||
coarseIterator.sourceIndexFromDestinationIndex(destIndex));
|
||||
assertEquals("Destination index: " + destIndex,
|
||||
expectedSrcCoarseStringIndices[destIndex],
|
||||
coarseChangesIterator.sourceIndexFromDestinationIndex(destIndex));
|
||||
}
|
||||
}
|
||||
|
||||
// private data members - test data --------------------------------------
|
||||
|
||||
private static final Locale TURKISH_LOCALE_ = new Locale("tr", "TR");
|
||||
|
|
Loading…
Add table
Reference in a new issue