ICU-13337 public string option for not resetting an Edits object: U_EDITS_NO_RESET

X-SVN-Rev: 40394
This commit is contained in:
Markus Scherer 2017-09-13 16:15:28 +00:00
parent 52417e2221
commit 321f467190
10 changed files with 62 additions and 34 deletions

View file

@ -22,11 +22,11 @@
#include "unicode/edits.h"
#include "unicode/normalizer2.h"
#include "unicode/stringoptions.h"
#include "unicode/uniset.h"
#include "unicode/unistr.h"
#include "unicode/unorm.h"
#include "cpputils.h"
#include "ustr_imp.h" // U_EDITS_NO_RESET
U_NAMESPACE_BEGIN

View file

@ -20,10 +20,10 @@
#include "unicode/edits.h"
#include "unicode/normalizer2.h"
#include "unicode/stringoptions.h"
#include "unicode/unistr.h"
#include "cpputils.h"
#include "normalizer2impl.h"
#include "ustr_imp.h" // U_EDITS_NO_RESET
U_NAMESPACE_BEGIN

View file

@ -22,6 +22,7 @@
#include "unicode/edits.h"
#include "unicode/normalizer2.h"
#include "unicode/stringoptions.h"
#include "unicode/unistr.h"
#include "unicode/unorm.h"
#include "cstring.h"
@ -30,7 +31,6 @@
#include "normalizer2impl.h"
#include "uassert.h"
#include "ucln_cmn.h"
#include "ustr_imp.h" // U_EDITS_NO_RESET
using icu::Normalizer2Impl;

View file

@ -22,6 +22,7 @@
#include "unicode/brkiter.h"
#include "unicode/casemap.h"
#include "unicode/edits.h"
#include "unicode/stringoptions.h"
#include "unicode/ubrk.h"
#include "unicode/uloc.h"
#include "unicode/ustring.h"

View file

@ -36,7 +36,7 @@ public:
* The source string and the destination buffer must not overlap.
*
* @param locale The locale ID. ("" = root locale, NULL = default locale.)
* @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT.
* @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
* @param src The original string.
* @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
* @param dest A buffer for the result string. The result will be NUL-terminated if
@ -48,7 +48,8 @@ public:
* @param edits Records edits for index mapping, working with styled text,
* and getting only changes (if any).
* The Edits contents is undefined if any error occurs.
* This function calls edits->reset() first. edits can be NULL.
* This function calls edits->reset() first unless
* options includes U_EDITS_NO_RESET. edits can be NULL.
* @param errorCode Reference to an in/out error code value
* which must not indicate a failure before the function call.
* @return The length of the result string, if successful.
@ -71,7 +72,7 @@ public:
* The source string and the destination buffer must not overlap.
*
* @param locale The locale ID. ("" = root locale, NULL = default locale.)
* @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT.
* @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
* @param src The original string.
* @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
* @param dest A buffer for the result string. The result will be NUL-terminated if
@ -83,7 +84,8 @@ public:
* @param edits Records edits for index mapping, working with styled text,
* and getting only changes (if any).
* The Edits contents is undefined if any error occurs.
* This function calls edits->reset() first. edits can be NULL.
* This function calls edits->reset() first unless
* options includes U_EDITS_NO_RESET. edits can be NULL.
* @param errorCode Reference to an in/out error code value
* which must not indicate a failure before the function call.
* @return The length of the result string, if successful.
@ -112,7 +114,7 @@ public:
* all others. (This can be modified with options bits.)
*
* @param locale The locale ID. ("" = root locale, NULL = default locale.)
* @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT,
* @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET,
* U_TITLECASE_NO_LOWERCASE,
* U_TITLECASE_NO_BREAK_ADJUSTMENT, U_TITLECASE_ADJUST_TO_CASED,
* U_TITLECASE_WHOLE_STRING, U_TITLECASE_SENTENCES.
@ -132,7 +134,8 @@ public:
* @param edits Records edits for index mapping, working with styled text,
* and getting only changes (if any).
* The Edits contents is undefined if any error occurs.
* This function calls edits->reset() first. edits can be NULL.
* This function calls edits->reset() first unless
* options includes U_EDITS_NO_RESET. edits can be NULL.
* @param errorCode Reference to an in/out error code value
* which must not indicate a failure before the function call.
* @return The length of the result string, if successful.
@ -161,7 +164,7 @@ public:
* The result may be longer or shorter than the original.
* The source string and the destination buffer must not overlap.
*
* @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT,
* @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET,
* U_FOLD_CASE_DEFAULT, U_FOLD_CASE_EXCLUDE_SPECIAL_I.
* @param src The original string.
* @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
@ -174,7 +177,8 @@ public:
* @param edits Records edits for index mapping, working with styled text,
* and getting only changes (if any).
* The Edits contents is undefined if any error occurs.
* This function calls edits->reset() first. edits can be NULL.
* This function calls edits->reset() first unless
* options includes U_EDITS_NO_RESET. edits can be NULL.
* @param errorCode Reference to an in/out error code value
* which must not indicate a failure before the function call.
* @return The length of the result string, if successful.
@ -197,7 +201,7 @@ public:
* The source string and the destination buffer must not overlap.
*
* @param locale The locale ID. ("" = root locale, NULL = default locale.)
* @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT.
* @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
* @param src The original string.
* @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
* @param dest A buffer for the result string. The result will be NUL-terminated if
@ -209,7 +213,8 @@ public:
* @param edits Records edits for index mapping, working with styled text,
* and getting only changes (if any).
* The Edits contents is undefined if any error occurs.
* This function calls edits->reset() first. edits can be NULL.
* This function calls edits->reset() first unless
* options includes U_EDITS_NO_RESET. edits can be NULL.
* @param errorCode Reference to an in/out error code value
* which must not indicate a failure before the function call.
* @return The length of the result string, if successful.
@ -232,7 +237,7 @@ public:
* The source string and the destination buffer must not overlap.
*
* @param locale The locale ID. ("" = root locale, NULL = default locale.)
* @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT.
* @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
* @param src The original string.
* @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
* @param dest A buffer for the result string. The result will be NUL-terminated if
@ -244,7 +249,8 @@ public:
* @param edits Records edits for index mapping, working with styled text,
* and getting only changes (if any).
* The Edits contents is undefined if any error occurs.
* This function calls edits->reset() first. edits can be NULL.
* This function calls edits->reset() first unless
* options includes U_EDITS_NO_RESET. edits can be NULL.
* @param errorCode Reference to an in/out error code value
* which must not indicate a failure before the function call.
* @return The length of the result string, if successful.
@ -273,7 +279,7 @@ public:
* all others. (This can be modified with options bits.)
*
* @param locale The locale ID. ("" = root locale, NULL = default locale.)
* @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT,
* @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET,
* U_TITLECASE_NO_LOWERCASE,
* U_TITLECASE_NO_BREAK_ADJUSTMENT, U_TITLECASE_ADJUST_TO_CASED,
* U_TITLECASE_WHOLE_STRING, U_TITLECASE_SENTENCES.
@ -293,7 +299,8 @@ public:
* @param edits Records edits for index mapping, working with styled text,
* and getting only changes (if any).
* The Edits contents is undefined if any error occurs.
* This function calls edits->reset() first. edits can be NULL.
* This function calls edits->reset() first unless
* options includes U_EDITS_NO_RESET. edits can be NULL.
* @param errorCode Reference to an in/out error code value
* which must not indicate a failure before the function call.
* @return The length of the result string, if successful.
@ -321,7 +328,7 @@ public:
* The result may be longer or shorter than the original.
* The source string and the destination buffer must not overlap.
*
* @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT,
* @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET,
* U_FOLD_CASE_DEFAULT, U_FOLD_CASE_EXCLUDE_SPECIAL_I.
* @param src The original string.
* @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
@ -334,7 +341,8 @@ public:
* @param edits Records edits for index mapping, working with styled text,
* and getting only changes (if any).
* The Edits contents is undefined if any error occurs.
* This function calls edits->reset() first. edits can be NULL.
* This function calls edits->reset() first unless
* options includes U_EDITS_NO_RESET. edits can be NULL.
* @param errorCode Reference to an in/out error code value
* which must not indicate a failure before the function call.
* @return The length of the result string, if successful.

View file

@ -228,14 +228,15 @@ public:
* (UNORM2_COMPOSE and UNORM2_COMPOSE_CONTIGUOUS).
* Otherwise currently converts to & from UTF-16 and does not support edits.
*
* @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT.
* @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
* @param src Source UTF-8 string.
* @param sink A ByteSink to which the normalized UTF-8 result string is written.
* sink.Flush() is called at the end.
* @param edits Records edits for index mapping, working with styled text,
* and getting only changes (if any).
* The Edits contents is undefined if any error occurs.
* This function calls edits->reset() first. edits can be nullptr.
* This function calls edits->reset() first unless
* options includes U_EDITS_NO_RESET. edits can be nullptr.
* @param errorCode Standard ICU error code. Its input value must
* pass the U_SUCCESS() test, or else the function returns
* immediately. Check for U_FAILURE() on output or use with
@ -545,14 +546,15 @@ public:
* (UNORM2_COMPOSE and UNORM2_COMPOSE_CONTIGUOUS).
* Otherwise currently converts to & from UTF-16 and does not support edits.
*
* @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT.
* @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
* @param src Source UTF-8 string.
* @param sink A ByteSink to which the normalized UTF-8 result string is written.
* sink.Flush() is called at the end.
* @param edits Records edits for index mapping, working with styled text,
* and getting only changes (if any).
* The Edits contents is undefined if any error occurs.
* This function calls edits->reset() first. edits can be nullptr.
* This function calls edits->reset() first unless
* options includes U_EDITS_NO_RESET. edits can be nullptr.
* @param errorCode Standard ICU error code. Its input value must
* pass the U_SUCCESS() test, or else the function returns
* immediately. Check for U_FAILURE() on output or use with

View file

@ -134,6 +134,17 @@
*/
#define U_TITLECASE_ADJUST_TO_CASED 0x400
/**
* Option for string transformation functions to not first reset the Edits object.
* Used for example in some case-mapping and normalization functions.
*
* @see CaseMap
* @see Edits
* @see Normalizer2
* @draft ICU 60
*/
#define U_EDITS_NO_RESET 0x2000
/**
* Omit unchanged text when recording how source substrings
* relate to changed and unchanged result substrings.
@ -182,7 +193,6 @@
// ucasemap_imp.h #define U_TITLECASE_ITERATOR_MASK 0xe0
// ucasemap_imp.h #define U_TITLECASE_ADJUSTMENT_MASK 0x600
// ustr_imp.h #define _STRNCMP_STYLE 0x1000
// ustr_imp.h #define U_EDITS_NO_RESET 0x2000
// unormcmp.cpp #define _COMPARE_EQUIV 0x80000
#endif // __STRINGOPTIONS_H__

View file

@ -25,11 +25,6 @@
*/
#define _STRNCMP_STYLE 0x1000
/**
* Internal option for string transformation functions to not first reset the Edits object.
*/
#define U_EDITS_NO_RESET 0x2000
/**
* Compare two strings in code point order or code unit order.
* Works in strcmp style (both lengths -1),

View file

@ -24,6 +24,7 @@
#include "unicode/brkiter.h"
#include "unicode/casemap.h"
#include "unicode/edits.h"
#include "unicode/stringoptions.h"
#include "unicode/ustring.h"
#include "unicode/ucasemap.h"
#include "unicode/ubrk.h"

View file

@ -1272,18 +1272,23 @@ void StringCaseTest::TestCaseMapWithEdits() {
TRUE, errorCode);
#endif
edits.reset();
length = CaseMap::fold(U_OMIT_UNCHANGED_TEXT | U_FOLD_CASE_EXCLUDE_SPECIAL_I,
// No explicit nor automatic edits.reset(). Edits should be appended.
length = CaseMap::fold(U_OMIT_UNCHANGED_TEXT | U_EDITS_NO_RESET | U_FOLD_CASE_EXCLUDE_SPECIAL_I,
u"IßtanBul", 8, dest, UPRV_LENGTHOF(dest), &edits, errorCode);
assertEquals(u"foldCase(IßtanBul)", UnicodeString(u"ıssb"), UnicodeString(TRUE, dest, length));
static const EditChange foldExpectedChanges[] = {
// From titlecasing.
{ FALSE, 1, 1 },
{ TRUE, 1, 1 },
{ FALSE, 10, 10 },
// From case folding.
{ TRUE, 1, 1 },
{ TRUE, 1, 2 },
{ FALSE, 3, 3 },
{ TRUE, 1, 1 },
{ FALSE, 2, 2 }
};
TestUtility::checkEditsIter(*this, u"foldCase(IßtanBul)",
TestUtility::checkEditsIter(*this, u"foldCase(no Edits reset, IßtanBul)",
edits.getFineIterator(), edits.getFineIterator(),
foldExpectedChanges, UPRV_LENGTHOF(foldExpectedChanges),
TRUE, errorCode);
@ -1348,12 +1353,18 @@ void StringCaseTest::TestCaseMapUTF8WithEdits() {
TRUE, errorCode);
#endif
edits.reset();
length = CaseMap::utf8Fold(U_OMIT_UNCHANGED_TEXT | U_FOLD_CASE_EXCLUDE_SPECIAL_I,
// No explicit nor automatic edits.reset(). Edits should be appended.
length = CaseMap::utf8Fold(U_OMIT_UNCHANGED_TEXT | U_EDITS_NO_RESET |
U_FOLD_CASE_EXCLUDE_SPECIAL_I,
u8"IßtanBul", 1 + 2 + 6, dest, UPRV_LENGTHOF(dest), &edits, errorCode);
assertEquals(u"foldCase(IßtanBul)", UnicodeString(u"ıssb"),
UnicodeString::fromUTF8(StringPiece(dest, length)));
static const EditChange foldExpectedChanges[] = {
// From titlecasing.
{ FALSE, 1, 1 },
{ TRUE, 1, 1 },
{ FALSE, 10, 10 },
// From case folding.
{ TRUE, 1, 2 },
{ TRUE, 2, 2 },
{ FALSE, 3, 3 },