mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-09 07:22:11 +00:00
ICU-13337 public string option for not resetting an Edits object: U_EDITS_NO_RESET
X-SVN-Rev: 40394
This commit is contained in:
parent
52417e2221
commit
321f467190
10 changed files with 62 additions and 34 deletions
|
@ -22,11 +22,11 @@
|
|||
|
||||
#include "unicode/edits.h"
|
||||
#include "unicode/normalizer2.h"
|
||||
#include "unicode/stringoptions.h"
|
||||
#include "unicode/uniset.h"
|
||||
#include "unicode/unistr.h"
|
||||
#include "unicode/unorm.h"
|
||||
#include "cpputils.h"
|
||||
#include "ustr_imp.h" // U_EDITS_NO_RESET
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
|
|
|
@ -20,10 +20,10 @@
|
|||
|
||||
#include "unicode/edits.h"
|
||||
#include "unicode/normalizer2.h"
|
||||
#include "unicode/stringoptions.h"
|
||||
#include "unicode/unistr.h"
|
||||
#include "cpputils.h"
|
||||
#include "normalizer2impl.h"
|
||||
#include "ustr_imp.h" // U_EDITS_NO_RESET
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
|
|
|
@ -22,6 +22,7 @@
|
|||
|
||||
#include "unicode/edits.h"
|
||||
#include "unicode/normalizer2.h"
|
||||
#include "unicode/stringoptions.h"
|
||||
#include "unicode/unistr.h"
|
||||
#include "unicode/unorm.h"
|
||||
#include "cstring.h"
|
||||
|
@ -30,7 +31,6 @@
|
|||
#include "normalizer2impl.h"
|
||||
#include "uassert.h"
|
||||
#include "ucln_cmn.h"
|
||||
#include "ustr_imp.h" // U_EDITS_NO_RESET
|
||||
|
||||
using icu::Normalizer2Impl;
|
||||
|
||||
|
|
|
@ -22,6 +22,7 @@
|
|||
#include "unicode/brkiter.h"
|
||||
#include "unicode/casemap.h"
|
||||
#include "unicode/edits.h"
|
||||
#include "unicode/stringoptions.h"
|
||||
#include "unicode/ubrk.h"
|
||||
#include "unicode/uloc.h"
|
||||
#include "unicode/ustring.h"
|
||||
|
|
|
@ -36,7 +36,7 @@ public:
|
|||
* The source string and the destination buffer must not overlap.
|
||||
*
|
||||
* @param locale The locale ID. ("" = root locale, NULL = default locale.)
|
||||
* @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT.
|
||||
* @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
|
||||
* @param src The original string.
|
||||
* @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
|
||||
* @param dest A buffer for the result string. The result will be NUL-terminated if
|
||||
|
@ -48,7 +48,8 @@ public:
|
|||
* @param edits Records edits for index mapping, working with styled text,
|
||||
* and getting only changes (if any).
|
||||
* The Edits contents is undefined if any error occurs.
|
||||
* This function calls edits->reset() first. edits can be NULL.
|
||||
* This function calls edits->reset() first unless
|
||||
* options includes U_EDITS_NO_RESET. edits can be NULL.
|
||||
* @param errorCode Reference to an in/out error code value
|
||||
* which must not indicate a failure before the function call.
|
||||
* @return The length of the result string, if successful.
|
||||
|
@ -71,7 +72,7 @@ public:
|
|||
* The source string and the destination buffer must not overlap.
|
||||
*
|
||||
* @param locale The locale ID. ("" = root locale, NULL = default locale.)
|
||||
* @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT.
|
||||
* @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
|
||||
* @param src The original string.
|
||||
* @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
|
||||
* @param dest A buffer for the result string. The result will be NUL-terminated if
|
||||
|
@ -83,7 +84,8 @@ public:
|
|||
* @param edits Records edits for index mapping, working with styled text,
|
||||
* and getting only changes (if any).
|
||||
* The Edits contents is undefined if any error occurs.
|
||||
* This function calls edits->reset() first. edits can be NULL.
|
||||
* This function calls edits->reset() first unless
|
||||
* options includes U_EDITS_NO_RESET. edits can be NULL.
|
||||
* @param errorCode Reference to an in/out error code value
|
||||
* which must not indicate a failure before the function call.
|
||||
* @return The length of the result string, if successful.
|
||||
|
@ -112,7 +114,7 @@ public:
|
|||
* all others. (This can be modified with options bits.)
|
||||
*
|
||||
* @param locale The locale ID. ("" = root locale, NULL = default locale.)
|
||||
* @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT,
|
||||
* @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET,
|
||||
* U_TITLECASE_NO_LOWERCASE,
|
||||
* U_TITLECASE_NO_BREAK_ADJUSTMENT, U_TITLECASE_ADJUST_TO_CASED,
|
||||
* U_TITLECASE_WHOLE_STRING, U_TITLECASE_SENTENCES.
|
||||
|
@ -132,7 +134,8 @@ public:
|
|||
* @param edits Records edits for index mapping, working with styled text,
|
||||
* and getting only changes (if any).
|
||||
* The Edits contents is undefined if any error occurs.
|
||||
* This function calls edits->reset() first. edits can be NULL.
|
||||
* This function calls edits->reset() first unless
|
||||
* options includes U_EDITS_NO_RESET. edits can be NULL.
|
||||
* @param errorCode Reference to an in/out error code value
|
||||
* which must not indicate a failure before the function call.
|
||||
* @return The length of the result string, if successful.
|
||||
|
@ -161,7 +164,7 @@ public:
|
|||
* The result may be longer or shorter than the original.
|
||||
* The source string and the destination buffer must not overlap.
|
||||
*
|
||||
* @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT,
|
||||
* @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET,
|
||||
* U_FOLD_CASE_DEFAULT, U_FOLD_CASE_EXCLUDE_SPECIAL_I.
|
||||
* @param src The original string.
|
||||
* @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
|
||||
|
@ -174,7 +177,8 @@ public:
|
|||
* @param edits Records edits for index mapping, working with styled text,
|
||||
* and getting only changes (if any).
|
||||
* The Edits contents is undefined if any error occurs.
|
||||
* This function calls edits->reset() first. edits can be NULL.
|
||||
* This function calls edits->reset() first unless
|
||||
* options includes U_EDITS_NO_RESET. edits can be NULL.
|
||||
* @param errorCode Reference to an in/out error code value
|
||||
* which must not indicate a failure before the function call.
|
||||
* @return The length of the result string, if successful.
|
||||
|
@ -197,7 +201,7 @@ public:
|
|||
* The source string and the destination buffer must not overlap.
|
||||
*
|
||||
* @param locale The locale ID. ("" = root locale, NULL = default locale.)
|
||||
* @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT.
|
||||
* @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
|
||||
* @param src The original string.
|
||||
* @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
|
||||
* @param dest A buffer for the result string. The result will be NUL-terminated if
|
||||
|
@ -209,7 +213,8 @@ public:
|
|||
* @param edits Records edits for index mapping, working with styled text,
|
||||
* and getting only changes (if any).
|
||||
* The Edits contents is undefined if any error occurs.
|
||||
* This function calls edits->reset() first. edits can be NULL.
|
||||
* This function calls edits->reset() first unless
|
||||
* options includes U_EDITS_NO_RESET. edits can be NULL.
|
||||
* @param errorCode Reference to an in/out error code value
|
||||
* which must not indicate a failure before the function call.
|
||||
* @return The length of the result string, if successful.
|
||||
|
@ -232,7 +237,7 @@ public:
|
|||
* The source string and the destination buffer must not overlap.
|
||||
*
|
||||
* @param locale The locale ID. ("" = root locale, NULL = default locale.)
|
||||
* @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT.
|
||||
* @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
|
||||
* @param src The original string.
|
||||
* @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
|
||||
* @param dest A buffer for the result string. The result will be NUL-terminated if
|
||||
|
@ -244,7 +249,8 @@ public:
|
|||
* @param edits Records edits for index mapping, working with styled text,
|
||||
* and getting only changes (if any).
|
||||
* The Edits contents is undefined if any error occurs.
|
||||
* This function calls edits->reset() first. edits can be NULL.
|
||||
* This function calls edits->reset() first unless
|
||||
* options includes U_EDITS_NO_RESET. edits can be NULL.
|
||||
* @param errorCode Reference to an in/out error code value
|
||||
* which must not indicate a failure before the function call.
|
||||
* @return The length of the result string, if successful.
|
||||
|
@ -273,7 +279,7 @@ public:
|
|||
* all others. (This can be modified with options bits.)
|
||||
*
|
||||
* @param locale The locale ID. ("" = root locale, NULL = default locale.)
|
||||
* @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT,
|
||||
* @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET,
|
||||
* U_TITLECASE_NO_LOWERCASE,
|
||||
* U_TITLECASE_NO_BREAK_ADJUSTMENT, U_TITLECASE_ADJUST_TO_CASED,
|
||||
* U_TITLECASE_WHOLE_STRING, U_TITLECASE_SENTENCES.
|
||||
|
@ -293,7 +299,8 @@ public:
|
|||
* @param edits Records edits for index mapping, working with styled text,
|
||||
* and getting only changes (if any).
|
||||
* The Edits contents is undefined if any error occurs.
|
||||
* This function calls edits->reset() first. edits can be NULL.
|
||||
* This function calls edits->reset() first unless
|
||||
* options includes U_EDITS_NO_RESET. edits can be NULL.
|
||||
* @param errorCode Reference to an in/out error code value
|
||||
* which must not indicate a failure before the function call.
|
||||
* @return The length of the result string, if successful.
|
||||
|
@ -321,7 +328,7 @@ public:
|
|||
* The result may be longer or shorter than the original.
|
||||
* The source string and the destination buffer must not overlap.
|
||||
*
|
||||
* @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT,
|
||||
* @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET,
|
||||
* U_FOLD_CASE_DEFAULT, U_FOLD_CASE_EXCLUDE_SPECIAL_I.
|
||||
* @param src The original string.
|
||||
* @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
|
||||
|
@ -334,7 +341,8 @@ public:
|
|||
* @param edits Records edits for index mapping, working with styled text,
|
||||
* and getting only changes (if any).
|
||||
* The Edits contents is undefined if any error occurs.
|
||||
* This function calls edits->reset() first. edits can be NULL.
|
||||
* This function calls edits->reset() first unless
|
||||
* options includes U_EDITS_NO_RESET. edits can be NULL.
|
||||
* @param errorCode Reference to an in/out error code value
|
||||
* which must not indicate a failure before the function call.
|
||||
* @return The length of the result string, if successful.
|
||||
|
|
|
@ -228,14 +228,15 @@ public:
|
|||
* (UNORM2_COMPOSE and UNORM2_COMPOSE_CONTIGUOUS).
|
||||
* Otherwise currently converts to & from UTF-16 and does not support edits.
|
||||
*
|
||||
* @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT.
|
||||
* @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
|
||||
* @param src Source UTF-8 string.
|
||||
* @param sink A ByteSink to which the normalized UTF-8 result string is written.
|
||||
* sink.Flush() is called at the end.
|
||||
* @param edits Records edits for index mapping, working with styled text,
|
||||
* and getting only changes (if any).
|
||||
* The Edits contents is undefined if any error occurs.
|
||||
* This function calls edits->reset() first. edits can be nullptr.
|
||||
* This function calls edits->reset() first unless
|
||||
* options includes U_EDITS_NO_RESET. edits can be nullptr.
|
||||
* @param errorCode Standard ICU error code. Its input value must
|
||||
* pass the U_SUCCESS() test, or else the function returns
|
||||
* immediately. Check for U_FAILURE() on output or use with
|
||||
|
@ -545,14 +546,15 @@ public:
|
|||
* (UNORM2_COMPOSE and UNORM2_COMPOSE_CONTIGUOUS).
|
||||
* Otherwise currently converts to & from UTF-16 and does not support edits.
|
||||
*
|
||||
* @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT.
|
||||
* @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
|
||||
* @param src Source UTF-8 string.
|
||||
* @param sink A ByteSink to which the normalized UTF-8 result string is written.
|
||||
* sink.Flush() is called at the end.
|
||||
* @param edits Records edits for index mapping, working with styled text,
|
||||
* and getting only changes (if any).
|
||||
* The Edits contents is undefined if any error occurs.
|
||||
* This function calls edits->reset() first. edits can be nullptr.
|
||||
* This function calls edits->reset() first unless
|
||||
* options includes U_EDITS_NO_RESET. edits can be nullptr.
|
||||
* @param errorCode Standard ICU error code. Its input value must
|
||||
* pass the U_SUCCESS() test, or else the function returns
|
||||
* immediately. Check for U_FAILURE() on output or use with
|
||||
|
|
|
@ -134,6 +134,17 @@
|
|||
*/
|
||||
#define U_TITLECASE_ADJUST_TO_CASED 0x400
|
||||
|
||||
/**
|
||||
* Option for string transformation functions to not first reset the Edits object.
|
||||
* Used for example in some case-mapping and normalization functions.
|
||||
*
|
||||
* @see CaseMap
|
||||
* @see Edits
|
||||
* @see Normalizer2
|
||||
* @draft ICU 60
|
||||
*/
|
||||
#define U_EDITS_NO_RESET 0x2000
|
||||
|
||||
/**
|
||||
* Omit unchanged text when recording how source substrings
|
||||
* relate to changed and unchanged result substrings.
|
||||
|
@ -182,7 +193,6 @@
|
|||
// ucasemap_imp.h #define U_TITLECASE_ITERATOR_MASK 0xe0
|
||||
// ucasemap_imp.h #define U_TITLECASE_ADJUSTMENT_MASK 0x600
|
||||
// ustr_imp.h #define _STRNCMP_STYLE 0x1000
|
||||
// ustr_imp.h #define U_EDITS_NO_RESET 0x2000
|
||||
// unormcmp.cpp #define _COMPARE_EQUIV 0x80000
|
||||
|
||||
#endif // __STRINGOPTIONS_H__
|
||||
|
|
|
@ -25,11 +25,6 @@
|
|||
*/
|
||||
#define _STRNCMP_STYLE 0x1000
|
||||
|
||||
/**
|
||||
* Internal option for string transformation functions to not first reset the Edits object.
|
||||
*/
|
||||
#define U_EDITS_NO_RESET 0x2000
|
||||
|
||||
/**
|
||||
* Compare two strings in code point order or code unit order.
|
||||
* Works in strcmp style (both lengths -1),
|
||||
|
|
|
@ -24,6 +24,7 @@
|
|||
#include "unicode/brkiter.h"
|
||||
#include "unicode/casemap.h"
|
||||
#include "unicode/edits.h"
|
||||
#include "unicode/stringoptions.h"
|
||||
#include "unicode/ustring.h"
|
||||
#include "unicode/ucasemap.h"
|
||||
#include "unicode/ubrk.h"
|
||||
|
|
|
@ -1272,18 +1272,23 @@ void StringCaseTest::TestCaseMapWithEdits() {
|
|||
TRUE, errorCode);
|
||||
#endif
|
||||
|
||||
edits.reset();
|
||||
length = CaseMap::fold(U_OMIT_UNCHANGED_TEXT | U_FOLD_CASE_EXCLUDE_SPECIAL_I,
|
||||
// No explicit nor automatic edits.reset(). Edits should be appended.
|
||||
length = CaseMap::fold(U_OMIT_UNCHANGED_TEXT | U_EDITS_NO_RESET | U_FOLD_CASE_EXCLUDE_SPECIAL_I,
|
||||
u"IßtanBul", 8, dest, UPRV_LENGTHOF(dest), &edits, errorCode);
|
||||
assertEquals(u"foldCase(IßtanBul)", UnicodeString(u"ıssb"), UnicodeString(TRUE, dest, length));
|
||||
static const EditChange foldExpectedChanges[] = {
|
||||
// From titlecasing.
|
||||
{ FALSE, 1, 1 },
|
||||
{ TRUE, 1, 1 },
|
||||
{ FALSE, 10, 10 },
|
||||
// From case folding.
|
||||
{ TRUE, 1, 1 },
|
||||
{ TRUE, 1, 2 },
|
||||
{ FALSE, 3, 3 },
|
||||
{ TRUE, 1, 1 },
|
||||
{ FALSE, 2, 2 }
|
||||
};
|
||||
TestUtility::checkEditsIter(*this, u"foldCase(IßtanBul)",
|
||||
TestUtility::checkEditsIter(*this, u"foldCase(no Edits reset, IßtanBul)",
|
||||
edits.getFineIterator(), edits.getFineIterator(),
|
||||
foldExpectedChanges, UPRV_LENGTHOF(foldExpectedChanges),
|
||||
TRUE, errorCode);
|
||||
|
@ -1348,12 +1353,18 @@ void StringCaseTest::TestCaseMapUTF8WithEdits() {
|
|||
TRUE, errorCode);
|
||||
#endif
|
||||
|
||||
edits.reset();
|
||||
length = CaseMap::utf8Fold(U_OMIT_UNCHANGED_TEXT | U_FOLD_CASE_EXCLUDE_SPECIAL_I,
|
||||
// No explicit nor automatic edits.reset(). Edits should be appended.
|
||||
length = CaseMap::utf8Fold(U_OMIT_UNCHANGED_TEXT | U_EDITS_NO_RESET |
|
||||
U_FOLD_CASE_EXCLUDE_SPECIAL_I,
|
||||
u8"IßtanBul", 1 + 2 + 6, dest, UPRV_LENGTHOF(dest), &edits, errorCode);
|
||||
assertEquals(u"foldCase(IßtanBul)", UnicodeString(u"ıssb"),
|
||||
UnicodeString::fromUTF8(StringPiece(dest, length)));
|
||||
static const EditChange foldExpectedChanges[] = {
|
||||
// From titlecasing.
|
||||
{ FALSE, 1, 1 },
|
||||
{ TRUE, 1, 1 },
|
||||
{ FALSE, 10, 10 },
|
||||
// From case folding.
|
||||
{ TRUE, 1, 2 },
|
||||
{ TRUE, 2, 2 },
|
||||
{ FALSE, 3, 3 },
|
||||
|
|
Loading…
Add table
Reference in a new issue