ICU-12410 add ucasemap_toTitleWithEdits(const UCaseMap *csm, icu::BreakIterator *iter, ...), change UErrorCode pointer to reference internally and in new C++ API functions, add ucasemap_foldCaseWithEdits(), change UCaseMap.iter to C++ BreakIterator

X-SVN-Rev: 39551
This commit is contained in:
Markus Scherer 2017-01-09 23:52:12 +00:00
parent 00f2e12b65
commit 7035c31b6d
10 changed files with 308 additions and 156 deletions

View file

@ -32,14 +32,13 @@ U_NAMESPACE_USE
U_CAPI const UBreakIterator * U_EXPORT2
ucasemap_getBreakIterator(const UCaseMap *csm) {
return csm->iter;
return reinterpret_cast<UBreakIterator *>(csm->iter);
}
U_CAPI void U_EXPORT2
ucasemap_setBreakIterator(UCaseMap *csm, UBreakIterator *iterToAdopt, UErrorCode * /*pErrorCode*/) {
// Do not call ubrk_close() so that we do not depend on all of the BreakIterator code.
delete reinterpret_cast<BreakIterator *>(csm->iter);
csm->iter=iterToAdopt;
delete csm->iter;
csm->iter=reinterpret_cast<BreakIterator *>(iterToAdopt);
}
U_CAPI int32_t U_EXPORT2
@ -49,15 +48,13 @@ ucasemap_utf8ToTitle(UCaseMap *csm,
UErrorCode *pErrorCode) {
UText utext=UTEXT_INITIALIZER;
utext_openUTF8(&utext, (const char *)src, srcLength, pErrorCode);
if(csm->iter==NULL) {
csm->iter=BreakIterator::createWordInstance(Locale(csm->locale), *pErrorCode);
}
if(U_FAILURE(*pErrorCode)) {
return 0;
}
if(csm->iter==NULL) {
csm->iter=ubrk_open(UBRK_WORD, csm->locale,
NULL, 0,
pErrorCode);
}
ubrk_setUText(csm->iter, &utext, pErrorCode);
csm->iter->setText(&utext, *pErrorCode);
int32_t length=ucasemap_mapUTF8(csm,
(uint8_t *)dest, destCapacity,
(const uint8_t *)src, srcLength,

View file

@ -22,15 +22,14 @@
#define __UCASEMAP_H__
#include "unicode/utypes.h"
#include "unicode/ustring.h"
#include "unicode/localpointer.h"
#if U_SHOW_CPLUSPLUS_API
#include "unicode/uobject.h"
#endif // U_SHOW_CPLUSPLUS_API
#include "unicode/ustring.h"
/**
* \file
* \brief C API: Unicode case mapping functions using a UCaseMap service object.
@ -89,6 +88,8 @@ ucasemap_close(UCaseMap *csm);
U_NAMESPACE_BEGIN
class BreakIterator;
/**
* \class LocalUCaseMapPointer
* "Smart pointer" class, closes a UCaseMap via ucasemap_close().
@ -107,6 +108,9 @@ U_DEFINE_LOCAL_OPEN_POINTER(LocalUCaseMapPointer, UCaseMap, ucasemap_close);
* Supports replacements, insertions, deletions in linear progression.
* Does not support moving/reordering of text.
*
* An Edits object tracks a separate UErrorCode, but ICU case mapping functions
* merge any such errors into their API's UErrorCode.
*
* @internal ICU 59 technology preview
*/
class Edits final : public UMemory {
@ -154,21 +158,24 @@ public:
/**
* Adds a record for an unchanged segment of text.
* Normally called from inside ICU case mapping functions, not user code.
* @internal ICU 59 technology preview
*/
void addUnchanged(int32_t unchangedLength);
/**
* Adds a record for a text replacement/insertion/deletion.
* Normally called from inside ICU case mapping functions, not user code.
* @internal ICU 59 technology preview
*/
void addReplace(int32_t oldLength, int32_t newLength);
/**
* Sets the UErrorCode if an error occurred while recording edits.
* Preserves older error codes in the outErrorCode.
* Normally called from inside ICU case mapping functions, not user code.
* @return TRUE if U_FAILURE(outErrorCode)
* @internal ICU 59 technology preview
*/
UBool setErrorCode(UErrorCode &outErrorCode);
UBool copyErrorTo(UErrorCode &outErrorCode);
/**
* How much longer is the new text compared with the old text?
@ -463,7 +470,7 @@ ucasemap_setOptions(UCaseMap *csm, uint32_t options, UErrorCode *pErrorCode);
* @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
* @param edits Records edits for index mapping, working with styled text,
* and getting only changes (if any). Can be NULL.
* @param pErrorCode Must be a valid pointer to an error code value,
* @param errorCode Reference to an in/out error code value
* which must not indicate a failure before the function call.
* @return The length of the result string, if successful - or in case of a buffer overflow,
* in which case it will be greater than destCapacity.
@ -476,7 +483,7 @@ ucasemap_toLowerWithEdits(const UCaseMap *csm,
UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
icu::Edits *edits,
UErrorCode *pErrorCode);
UErrorCode &errorCode);
/**
* Uppercases the characters in a UTF-16 string and optionally records edits.
@ -495,7 +502,7 @@ ucasemap_toLowerWithEdits(const UCaseMap *csm,
* @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
* @param edits Records edits for index mapping, working with styled text,
* and getting only changes (if any). Can be NULL.
* @param pErrorCode Must be a valid pointer to an error code value,
* @param errorCode Reference to an in/out error code value
* which must not indicate a failure before the function call.
* @return The length of the result string, if successful - or in case of a buffer overflow,
* in which case it will be greater than destCapacity.
@ -508,7 +515,99 @@ ucasemap_toUpperWithEdits(const UCaseMap *csm,
UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
icu::Edits *edits,
UErrorCode *pErrorCode);
UErrorCode &errorCode);
#if !UCONFIG_NO_BREAK_ITERATION
/**
* Titlecases a UTF-16 string and optionally records edits.
* Casing is locale-dependent and context-sensitive.
* The result may be longer or shorter than the original.
* The source string and the destination buffer must not overlap.
*
* Titlecasing uses a break iterator to find the first characters of words
* that are to be titlecased. It titlecases those characters and lowercases
* all others. (This can be modified with ucasemap_setOptions().)
*
* The titlecase break iterator can be provided to customize for arbitrary
* styles, using rules and dictionaries beyond the standard iterators.
* The standard titlecase iterator for the root locale implements the
* algorithm of Unicode TR 21.
*
* This function uses only the setText(), first() and next() methods of the
* provided break iterator.
*
* @param csm UCaseMap service object.
* @param iter A break iterator to find the first characters of words that are to be titlecased.
* It is set to the source string and used one or more times for iteration.
* If NULL, then a clone of ucasemap_getBreakIterator() is used.
* If that is NULL too, then a word break iterator for the locale is used
* (or something equivalent).
* @param dest A buffer for the result string. The result will be NUL-terminated if
* the buffer is large enough.
* The contents is undefined in case of failure.
* @param destCapacity The size of the buffer (number of bytes). If it is 0, then
* dest may be NULL and the function will only return the length of the result
* without writing any of the result string.
* @param src The original string.
* @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
* @param edits Records edits for index mapping, working with styled text,
* and getting only changes (if any). Can be NULL.
* @param errorCode Reference to an in/out error code value
* which must not indicate a failure before the function call.
* @return The length of the result string, if successful - or in case of a buffer overflow,
* in which case it will be greater than destCapacity.
*
* @see u_strToTitle
* @internal ICU 59 technology preview
*/
U_CAPI int32_t U_EXPORT2
ucasemap_toTitleWithEdits(const UCaseMap *csm, icu::BreakIterator *iter,
UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
icu::Edits *edits,
UErrorCode &errorCode);
#endif // UCONFIG_NO_BREAK_ITERATION
/**
* Case-folds the characters in a UTF-16 string and optionally records edits.
*
* Case-folding is locale-independent and not context-sensitive,
* but there is an option for whether to include or exclude mappings for dotted I
* and dotless i that are marked with 'T' in CaseFolding.txt.
*
* The result may be longer or shorter than the original.
* The source string and the destination buffer must not overlap.
*
* @param csm UCaseMap service object.
* @param dest A buffer for the result string. The result will be NUL-terminated if
* the buffer is large enough.
* The contents is undefined in case of failure.
* @param destCapacity The size of the buffer (number of bytes). If it is 0, then
* dest may be NULL and the function will only return the length of the result
* without writing any of the result string.
* @param src The original string.
* @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
* @param edits Records edits for index mapping, working with styled text,
* and getting only changes (if any). Can be NULL.
* @param errorCode Reference to an in/out error code value
* which must not indicate a failure before the function call.
* @return The length of the result string, if successful - or in case of a buffer overflow,
* in which case it will be greater than destCapacity.
*
* @see u_strFoldCase
* @see ucasemap_setOptions
* @see U_FOLD_CASE_DEFAULT
* @see U_FOLD_CASE_EXCLUDE_SPECIAL_I
* @internal ICU 59 technology preview
*/
U_CAPI int32_t U_EXPORT2
ucasemap_foldCaseWithEdits(const UCaseMap *csm,
UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
icu::Edits *edits,
UErrorCode &errorCode);
#endif // U_HIDE_INTERNAL_API
#endif // U_SHOW_CPLUSPLUS_API
@ -600,7 +699,7 @@ ucasemap_toTitle(UCaseMap *csm,
const UChar *src, int32_t srcLength,
UErrorCode *pErrorCode);
#endif
#endif // UCONFIG_NO_BREAK_ITERATION
/**
* Lowercase the characters in a UTF-8 string.
@ -762,10 +861,13 @@ ucasemap_utf8FoldCase(const UCaseMap *csm,
*/
typedef int32_t U_CALLCONV
UStringCaseMapper(const UCaseMap *csm,
#if !UCONFIG_NO_BREAK_ITERATION
icu::BreakIterator *iter,
#endif
UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
icu::Edits *edits,
UErrorCode *pErrorCode);
UErrorCode &errorCode);
#endif // U_SHOW_CPLUSPLUS_API
#endif

View file

@ -3573,7 +3573,11 @@ private:
* as in ustr_imp.h for ustrcase_map().
*/
UnicodeString &
caseMap(const UCaseMap *csm, UStringCaseMapper *stringCaseMapper);
caseMap(const UCaseMap *csm,
#if !UCONFIG_NO_BREAK_ITERATION
BreakIterator *iter,
#endif
UStringCaseMapper *stringCaseMapper);
// ref counting
void addRef(void);

View file

@ -88,7 +88,7 @@ UnicodeString::doCaseCompare(int32_t start,
//========================================
UnicodeString &
UnicodeString::caseMap(const UCaseMap *csm,
UnicodeString::caseMap(const UCaseMap *csm, UCASEMAP_BREAK_ITERATOR_PARAM
UStringCaseMapper *stringCaseMapper) {
if(isEmpty() || !isWritable()) {
// nothing to do
@ -121,7 +121,9 @@ UnicodeString::caseMap(const UCaseMap *csm,
buffer = fUnion.fStackFields.fBuffer;
capacity = US_STACKBUF_SIZE;
}
newLength = stringCaseMapper(csm, buffer, capacity, oldArray, oldLength, NULL, &errorCode);
newLength = stringCaseMapper(csm, UCASEMAP_BREAK_ITERATOR
buffer, capacity,
oldArray, oldLength, NULL, errorCode);
if (U_SUCCESS(errorCode)) {
setLength(newLength);
return *this;
@ -140,22 +142,18 @@ UnicodeString::caseMap(const UCaseMap *csm,
Edits edits;
edits.setWriteUnchanged(FALSE);
UChar replacementChars[200];
stringCaseMapper(csm, replacementChars, UPRV_LENGTHOF(replacementChars),
oldArray, oldLength, &edits, &errorCode);
UErrorCode editsError = U_ZERO_ERROR;
if (edits.setErrorCode(editsError)) {
setToBogus();
return *this;
}
newLength = oldLength + edits.lengthDelta();
stringCaseMapper(csm, UCASEMAP_BREAK_ITERATOR
replacementChars, UPRV_LENGTHOF(replacementChars),
oldArray, oldLength, &edits, errorCode);
if (U_SUCCESS(errorCode)) {
// Grow the buffer at most once, not for multiple doReplace() calls.
newLength = oldLength + edits.lengthDelta();
if (newLength > oldLength && !cloneArrayIfNeeded(newLength, newLength)) {
return *this;
}
for (Edits::Iterator iter = edits.getCoarseChangesIterator(); iter.next(errorCode);) {
doReplace(iter.destinationIndex(), iter.oldLength(),
replacementChars, iter.replacementIndex(), iter.newLength());
for (Edits::Iterator ei = edits.getCoarseChangesIterator(); ei.next(errorCode);) {
doReplace(ei.destinationIndex(), ei.oldLength(),
replacementChars, ei.replacementIndex(), ei.newLength());
}
if (U_FAILURE(errorCode)) {
setToBogus();
@ -163,6 +161,7 @@ UnicodeString::caseMap(const UCaseMap *csm,
return *this;
} else if (errorCode == U_BUFFER_OVERFLOW_ERROR) {
// common overflow handling below
newLength = oldLength + edits.lengthDelta();
} else {
setToBogus();
return *this;
@ -179,8 +178,9 @@ UnicodeString::caseMap(const UCaseMap *csm,
return *this;
}
errorCode = U_ZERO_ERROR;
newLength = stringCaseMapper(csm, getArrayStart(), getCapacity(),
oldArray, oldLength, NULL, &errorCode);
newLength = stringCaseMapper(csm, UCASEMAP_BREAK_ITERATOR
getArrayStart(), getCapacity(),
oldArray, oldLength, NULL, errorCode);
if (bufferToDelete) {
uprv_free(bufferToDelete);
}
@ -197,7 +197,7 @@ UnicodeString::foldCase(uint32_t options) {
UCaseMap csm=UCASEMAP_INITIALIZER;
csm.csp=ucase_getSingleton();
csm.options=options;
return caseMap(&csm, ustrcase_internalFold);
return caseMap(&csm, UCASEMAP_BREAK_ITERATOR_NULL ustrcase_internalFold);
}
U_NAMESPACE_END

View file

@ -54,7 +54,7 @@ UnicodeString &
UnicodeString::toLower(const Locale &locale) {
UCaseMap csm=UCASEMAP_INITIALIZER;
setTempCaseMap(&csm, locale.getName());
return caseMap(&csm, ustrcase_internalToLower);
return caseMap(&csm, UCASEMAP_BREAK_ITERATOR_NULL ustrcase_internalToLower);
}
UnicodeString &
@ -66,7 +66,7 @@ UnicodeString &
UnicodeString::toUpper(const Locale &locale) {
UCaseMap csm=UCASEMAP_INITIALIZER;
setTempCaseMap(&csm, locale.getName());
return caseMap(&csm, ustrcase_internalToUpper);
return caseMap(&csm, UCASEMAP_BREAK_ITERATOR_NULL ustrcase_internalToUpper);
}
U_NAMESPACE_END

View file

@ -22,22 +22,11 @@
#if !UCONFIG_NO_BREAK_ITERATION
#include "unicode/brkiter.h"
#include "unicode/ubrk.h"
#include "unicode/unistr.h"
#include "unicode/ustring.h"
#include "cmemory.h"
#include "ustr_imp.h"
static int32_t U_CALLCONV
unistr_case_internalToTitle(const UCaseMap *csm,
UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
icu::Edits *edits,
UErrorCode *pErrorCode) {
ubrk_setText(csm->iter, src, srcLength, pErrorCode);
return ustrcase_internalToTitle(csm, dest, destCapacity, src, srcLength, edits, pErrorCode);
}
/*
* Set parameters on an empty UCaseMap, for UCaseMap-less API functions.
* Do this fast because it is called with every function call.
@ -80,8 +69,8 @@ UnicodeString::toTitle(BreakIterator *titleIter, const Locale &locale, uint32_t
return *this;
}
}
csm.iter=reinterpret_cast<UBreakIterator *>(bi);
caseMap(&csm, unistr_case_internalToTitle);
bi->setText(*this);
caseMap(&csm, bi, ustrcase_internalToTitle);
if(titleIter==NULL) {
delete bi;
}

View file

@ -114,7 +114,7 @@ uprv_loadPropsData(UErrorCode *errorCode);*/
struct UCaseMap {
const UCaseProps *csp;
#if !UCONFIG_NO_BREAK_ITERATION
UBreakIterator *iter; /* We adopt the iterator, so we own it. */
icu::BreakIterator *iter; /* We adopt the iterator, so we own it. */
#endif
char locale[32];
int32_t locCache;
@ -123,8 +123,16 @@ struct UCaseMap {
#if UCONFIG_NO_BREAK_ITERATION
# define UCASEMAP_INITIALIZER { NULL, { 0 }, 0, 0 }
# define UCASEMAP_BREAK_ITERATOR_PARAM
# define UCASEMAP_BREAK_ITERATOR_UNUSED
# define UCASEMAP_BREAK_ITERATOR
# define UCASEMAP_BREAK_ITERATOR_NULL
#else
# define UCASEMAP_INITIALIZER { NULL, NULL, { 0 }, 0, 0 }
# define UCASEMAP_BREAK_ITERATOR_PARAM icu::BreakIterator *iter,
# define UCASEMAP_BREAK_ITERATOR_UNUSED icu::BreakIterator *,
# define UCASEMAP_BREAK_ITERATOR iter,
# define UCASEMAP_BREAK_ITERATOR_NULL NULL,
#endif
U_CFUNC void
@ -132,51 +140,52 @@ ustrcase_setTempCaseMapLocale(UCaseMap *csm, const char *locale);
/** Implements UStringCaseMapper. */
U_CFUNC int32_t U_CALLCONV
ustrcase_internalToLower(const UCaseMap *csm,
ustrcase_internalToLower(const UCaseMap *csm, UCASEMAP_BREAK_ITERATOR_PARAM
UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
icu::Edits *edits,
UErrorCode *pErrorCode);
UErrorCode &errorCode);
/** Implements UStringCaseMapper. */
U_CFUNC int32_t U_CALLCONV
ustrcase_internalToUpper(const UCaseMap *csm,
ustrcase_internalToUpper(const UCaseMap *csm, UCASEMAP_BREAK_ITERATOR_PARAM
UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
icu::Edits *edits,
UErrorCode *pErrorCode);
UErrorCode &errorCode);
#if !UCONFIG_NO_BREAK_ITERATION
/** Implements UStringCaseMapper. */
U_CFUNC int32_t U_CALLCONV
ustrcase_internalToTitle(const UCaseMap *csm,
icu::BreakIterator *iter,
UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
icu::Edits *edits,
UErrorCode *pErrorCode);
UErrorCode &errorCode);
#endif
/** Implements UStringCaseMapper. */
U_CFUNC int32_t U_CALLCONV
ustrcase_internalFold(const UCaseMap *csm,
ustrcase_internalFold(const UCaseMap *csm, UCASEMAP_BREAK_ITERATOR_PARAM
UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
icu::Edits *edits,
UErrorCode *pErrorCode);
UErrorCode &errorCode);
/**
* Common string case mapping implementation for ucasemap_toXyz() and UnicodeString::toXyz().
* Implements argument checking.
*/
U_CFUNC int32_t
ustrcase_map(const UCaseMap *csm,
ustrcase_map(const UCaseMap *csm, UCASEMAP_BREAK_ITERATOR_PARAM
UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
UStringCaseMapper *stringCaseMapper,
icu::Edits *edits,
UErrorCode *pErrorCode);
UErrorCode &errorCode);
/**
* Common string case mapping implementation for old-fashioned u_strToXyz() functions
@ -184,11 +193,11 @@ ustrcase_map(const UCaseMap *csm,
* Implements argument checking and internally works with an intermediate buffer if necessary.
*/
U_CFUNC int32_t
ustrcase_mapWithOverlap(const UCaseMap *csm,
ustrcase_mapWithOverlap(const UCaseMap *csm, UCASEMAP_BREAK_ITERATOR_PARAM
UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
UStringCaseMapper *stringCaseMapper,
UErrorCode *pErrorCode);
UErrorCode &errorCode);
/**
* UTF-8 string case mapping function type, used by ucasemap_mapUTF8().

View file

@ -22,6 +22,7 @@
#if !UCONFIG_NO_BREAK_ITERATION
#include "unicode/brkiter.h"
#include "unicode/localpointer.h"
#include "unicode/ubrk.h"
#include "unicode/ucasemap.h"
#include "cmemory.h"
@ -57,20 +58,51 @@ u_strToTitle(UChar *dest, int32_t destCapacity,
UErrorCode *pErrorCode) {
UCaseMap csm=UCASEMAP_INITIALIZER;
setTempCaseMap(&csm, locale);
icu::LocalPointer<icu::BreakIterator> ownedIter;
icu::BreakIterator *iter;
if(titleIter!=NULL) {
ubrk_setText(csm.iter=titleIter, src, srcLength, pErrorCode);
iter=reinterpret_cast<icu::BreakIterator *>(titleIter);
} else {
csm.iter=ubrk_open(UBRK_WORD, csm.locale, src, srcLength, pErrorCode);
iter=icu::BreakIterator::createWordInstance(icu::Locale(csm.locale), *pErrorCode);
ownedIter.adoptInstead(iter);
}
int32_t length=ustrcase_mapWithOverlap(
&csm,
if(U_FAILURE(*pErrorCode)) {
return 0;
}
icu::UnicodeString s(srcLength<0, src, srcLength);
iter->setText(s);
return ustrcase_mapWithOverlap(
&csm, iter,
dest, destCapacity,
src, srcLength,
ustrcase_internalToTitle, pErrorCode);
if(titleIter==NULL && csm.iter!=NULL) {
ubrk_close(csm.iter);
ustrcase_internalToTitle, *pErrorCode);
}
U_CAPI int32_t U_EXPORT2
ucasemap_toTitleWithEdits(const UCaseMap *csm, icu::BreakIterator *iter,
UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
icu::Edits *edits,
UErrorCode &errorCode) {
icu::LocalPointer<icu::BreakIterator> ownedIter;
if(iter==NULL) {
if(csm->iter!=NULL) {
iter=csm->iter->clone();
} else {
iter=icu::BreakIterator::createWordInstance(icu::Locale(csm->locale), errorCode);
}
ownedIter.adoptInsteadAndCheckErrorCode(iter, errorCode);
}
return length;
if(U_FAILURE(errorCode)) {
return 0;
}
icu::UnicodeString s(srcLength<0, src, srcLength);
iter->setText(s);
return ustrcase_map(
csm, iter,
dest, destCapacity,
src, srcLength,
ustrcase_internalToTitle, edits, errorCode);
}
U_CAPI int32_t U_EXPORT2
@ -78,16 +110,19 @@ ucasemap_toTitle(UCaseMap *csm,
UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
UErrorCode *pErrorCode) {
if(csm->iter!=NULL) {
ubrk_setText(csm->iter, src, srcLength, pErrorCode);
} else {
csm->iter=ubrk_open(UBRK_WORD, csm->locale, src, srcLength, pErrorCode);
if(csm->iter==NULL) {
csm->iter=icu::BreakIterator::createWordInstance(icu::Locale(csm->locale), *pErrorCode);
}
if(U_FAILURE(*pErrorCode)) {
return 0;
}
icu::UnicodeString s(srcLength<0, src, srcLength);
csm->iter->setText(s);
return ustrcase_map(
csm,
csm, csm->iter,
dest, destCapacity,
src, srcLength,
ustrcase_internalToTitle, NULL, pErrorCode);
ustrcase_internalToTitle, NULL, *pErrorCode);
}
#endif // !UCONFIG_NO_BREAK_ITERATION

View file

@ -207,7 +207,7 @@ UBool Edits::growArray() {
return TRUE;
}
UBool Edits::setErrorCode(UErrorCode &outErrorCode) {
UBool Edits::copyErrorTo(UErrorCode &outErrorCode) {
if (U_FAILURE(outErrorCode)) { return TRUE; }
if (U_SUCCESS(errorCode)) { return FALSE; }
outErrorCode = errorCode;
@ -374,6 +374,22 @@ UBool Edits::Iterator::findSourceIndex(int32_t i, UErrorCode &errorCode) {
return FALSE;
}
namespace {
int32_t checkOverflowAndEditsError(int32_t destIndex, int32_t destCapacity,
Edits *edits, UErrorCode &errorCode) {
if (U_SUCCESS(errorCode)) {
if (destIndex > destCapacity) {
errorCode = U_BUFFER_OVERFLOW_ERROR;
} else if (edits != NULL) {
edits->copyErrorTo(errorCode);
}
}
return destIndex;
}
} // namespace
U_NAMESPACE_END
U_NAMESPACE_USE
@ -527,7 +543,7 @@ _caseMap(const UCaseMap *csm, UCaseMapFull *map,
const UChar *src, UCaseContext *csc,
int32_t srcStart, int32_t srcLimit,
icu::Edits *edits,
UErrorCode *pErrorCode) {
UErrorCode &errorCode) {
int32_t locCache=csm->locCache;
/* case mapping loop */
@ -544,33 +560,26 @@ _caseMap(const UCaseMap *csm, UCaseMapFull *map,
destIndex = appendResult(dest, destIndex, destCapacity, c, s,
srcIndex - cpStart, edits);
if (destIndex < 0) {
*pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR;
errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
return 0;
}
}
if(destIndex>destCapacity) {
*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
}
return destIndex;
}
#if !UCONFIG_NO_BREAK_ITERATION
U_CFUNC int32_t U_CALLCONV
ustrcase_internalToTitle(const UCaseMap *csm,
ustrcase_internalToTitle(const UCaseMap *csm, BreakIterator *iter,
UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
icu::Edits *edits,
UErrorCode *pErrorCode) {
if(U_FAILURE(*pErrorCode)) {
UErrorCode &errorCode) {
if(U_FAILURE(errorCode)) {
return 0;
}
// Use the C++ abstract base class to minimize dependencies.
// TODO: Change UCaseMap.iter to store a BreakIterator directly.
BreakIterator *bi=reinterpret_cast<BreakIterator *>(csm->iter);
/* set up local variables */
int32_t locCache=csm->locCache;
UCaseContext csc=UCASECONTEXT_INITIALIZER;
@ -586,9 +595,9 @@ ustrcase_internalToTitle(const UCaseMap *csm,
int32_t idx;
if(isFirstIndex) {
isFirstIndex=FALSE;
idx=bi->first();
idx=iter->first();
} else {
idx=bi->next();
idx=iter->next();
}
if(idx==UBRK_DONE || idx>srcLength) {
idx=srcLength;
@ -632,7 +641,7 @@ ustrcase_internalToTitle(const UCaseMap *csm,
destIndex=appendUnchanged(dest, destIndex, destCapacity,
src+prev, titleStart-prev, edits);
if(destIndex<0) {
*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
return 0;
}
}
@ -647,7 +656,7 @@ ustrcase_internalToTitle(const UCaseMap *csm,
destIndex=appendResult(dest, destIndex, destCapacity, c, s,
titleLimit-titleStart, edits);
if(destIndex<0) {
*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
return 0;
}
@ -658,7 +667,7 @@ ustrcase_internalToTitle(const UCaseMap *csm,
(src[titleStart+1] == 0x004A || src[titleStart+1] == 0x006A)) {
destIndex=appendUChar(dest, destIndex, destCapacity, 0x004A);
if(destIndex<0) {
*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
return 0;
}
if(edits!=NULL) {
@ -677,11 +686,11 @@ ustrcase_internalToTitle(const UCaseMap *csm,
dest+destIndex, destCapacity-destIndex,
src, &csc,
titleLimit, idx,
edits, pErrorCode);
if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
*pErrorCode=U_ZERO_ERROR;
edits, errorCode);
if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
errorCode=U_ZERO_ERROR;
}
if(U_FAILURE(*pErrorCode)) {
if(U_FAILURE(errorCode)) {
return destIndex;
}
} else {
@ -689,7 +698,7 @@ ustrcase_internalToTitle(const UCaseMap *csm,
destIndex=appendUnchanged(dest, destIndex, destCapacity,
src+titleLimit, idx-titleLimit, edits);
if(destIndex<0) {
*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
return 0;
}
}
@ -700,10 +709,7 @@ ustrcase_internalToTitle(const UCaseMap *csm,
prev=idx;
}
if(destIndex>destCapacity) {
*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
}
return destIndex;
return checkOverflowAndEditsError(destIndex, destCapacity, edits, errorCode);
}
#endif // !UCONFIG_NO_BREAK_ITERATION
@ -1191,7 +1197,7 @@ int32_t toUpper(const UCaseMap *csm,
UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
Edits *edits,
UErrorCode *pErrorCode) {
UErrorCode &errorCode) {
int32_t locCache = UCASE_LOC_GREEK;
int32_t destIndex=0;
uint32_t state = 0;
@ -1310,7 +1316,7 @@ int32_t toUpper(const UCaseMap *csm,
--numYpogegrammeni;
}
if(destIndex<0) {
*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
return 0;
}
}
@ -1320,7 +1326,7 @@ int32_t toUpper(const UCaseMap *csm,
destIndex = appendResult(dest, destIndex, destCapacity, c, s,
nextIndex - i, edits);
if (destIndex < 0) {
*pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR;
errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
return 0;
}
}
@ -1328,10 +1334,7 @@ int32_t toUpper(const UCaseMap *csm,
state = nextState;
}
if(destIndex>destCapacity) {
*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
}
return destIndex;
return checkOverflowAndEditsError(destIndex, destCapacity, edits, errorCode);
}
} // namespace GreekUpper
@ -1340,47 +1343,49 @@ U_NAMESPACE_END
/* functions available in the common library (for unistr_case.cpp) */
U_CFUNC int32_t U_CALLCONV
ustrcase_internalToLower(const UCaseMap *csm,
ustrcase_internalToLower(const UCaseMap *csm, UCASEMAP_BREAK_ITERATOR_UNUSED
UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
icu::Edits *edits,
UErrorCode *pErrorCode) {
UErrorCode &errorCode) {
UCaseContext csc=UCASECONTEXT_INITIALIZER;
csc.p=(void *)src;
csc.limit=srcLength;
return _caseMap(
int32_t destIndex = _caseMap(
csm, ucase_toFullLower,
dest, destCapacity,
src, &csc, 0, srcLength,
edits, pErrorCode);
edits, errorCode);
return checkOverflowAndEditsError(destIndex, destCapacity, edits, errorCode);
}
U_CFUNC int32_t U_CALLCONV
ustrcase_internalToUpper(const UCaseMap *csm,
ustrcase_internalToUpper(const UCaseMap *csm, UCASEMAP_BREAK_ITERATOR_UNUSED
UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
icu::Edits *edits,
UErrorCode *pErrorCode) {
UErrorCode &errorCode) {
int32_t locCache = csm->locCache;
if (ucase_getCaseLocale(csm->locale, &locCache) == UCASE_LOC_GREEK) {
return GreekUpper::toUpper(csm, dest, destCapacity, src, srcLength, edits, pErrorCode);
return GreekUpper::toUpper(csm, dest, destCapacity, src, srcLength, edits, errorCode);
}
UCaseContext csc=UCASECONTEXT_INITIALIZER;
csc.p=(void *)src;
csc.limit=srcLength;
return _caseMap(
int32_t destIndex = _caseMap(
csm, ucase_toFullUpper,
dest, destCapacity,
src, &csc, 0, srcLength,
edits, pErrorCode);
edits, errorCode);
return checkOverflowAndEditsError(destIndex, destCapacity, edits, errorCode);
}
U_CFUNC int32_t U_CALLCONV
ustrcase_internalFold(const UCaseMap *csm,
ustrcase_internalFold(const UCaseMap *csm, UCASEMAP_BREAK_ITERATOR_UNUSED
UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
icu::Edits *edits,
UErrorCode *pErrorCode) {
UErrorCode &errorCode) {
/* case mapping loop */
int32_t srcIndex = 0;
int32_t destIndex = 0;
@ -1393,29 +1398,25 @@ ustrcase_internalFold(const UCaseMap *csm,
destIndex = appendResult(dest, destIndex, destCapacity, c, s,
srcIndex - cpStart, edits);
if (destIndex < 0) {
*pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR;
errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
return 0;
}
}
// TODO: are these internal functions called where destIndex>destCapacity is not already checked? (see u_terminateUChars())
if (destIndex > destCapacity) {
*pErrorCode = U_BUFFER_OVERFLOW_ERROR;
}
return destIndex;
return checkOverflowAndEditsError(destIndex, destCapacity, edits, errorCode);
}
U_CFUNC int32_t
ustrcase_map(const UCaseMap *csm,
ustrcase_map(const UCaseMap *csm, UCASEMAP_BREAK_ITERATOR_PARAM
UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
UStringCaseMapper *stringCaseMapper,
icu::Edits *edits,
UErrorCode *pErrorCode) {
UErrorCode &errorCode) {
int32_t destLength;
/* check argument values */
if(U_FAILURE(*pErrorCode)) {
if(U_FAILURE(errorCode)) {
return 0;
}
if( destCapacity<0 ||
@ -1423,7 +1424,7 @@ ustrcase_map(const UCaseMap *csm,
src==NULL ||
srcLength<-1
) {
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
errorCode=U_ILLEGAL_ARGUMENT_ERROR;
return 0;
}
@ -1437,27 +1438,28 @@ ustrcase_map(const UCaseMap *csm,
((src>=dest && src<(dest+destCapacity)) ||
(dest>=src && dest<(src+srcLength)))
) {
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
errorCode=U_ILLEGAL_ARGUMENT_ERROR;
return 0;
}
destLength=stringCaseMapper(csm, dest, destCapacity, src, srcLength, edits, pErrorCode);
return u_terminateUChars(dest, destCapacity, destLength, pErrorCode);
destLength=stringCaseMapper(csm, UCASEMAP_BREAK_ITERATOR
dest, destCapacity, src, srcLength, edits, errorCode);
return u_terminateUChars(dest, destCapacity, destLength, &errorCode);
}
U_CFUNC int32_t
ustrcase_mapWithOverlap(const UCaseMap *csm,
ustrcase_mapWithOverlap(const UCaseMap *csm, UCASEMAP_BREAK_ITERATOR_PARAM
UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
UStringCaseMapper *stringCaseMapper,
UErrorCode *pErrorCode) {
UErrorCode &errorCode) {
UChar buffer[300];
UChar *temp;
int32_t destLength;
/* check argument values */
if(U_FAILURE(*pErrorCode)) {
if(U_FAILURE(errorCode)) {
return 0;
}
if( destCapacity<0 ||
@ -1465,7 +1467,7 @@ ustrcase_mapWithOverlap(const UCaseMap *csm,
src==NULL ||
srcLength<-1
) {
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
errorCode=U_ILLEGAL_ARGUMENT_ERROR;
return 0;
}
@ -1487,7 +1489,7 @@ ustrcase_mapWithOverlap(const UCaseMap *csm,
/* allocate a buffer */
temp=(UChar *)uprv_malloc(destCapacity*U_SIZEOF_UCHAR);
if(temp==NULL) {
*pErrorCode=U_MEMORY_ALLOCATION_ERROR;
errorCode=U_MEMORY_ALLOCATION_ERROR;
return 0;
}
}
@ -1495,10 +1497,11 @@ ustrcase_mapWithOverlap(const UCaseMap *csm,
temp=dest;
}
destLength=stringCaseMapper(csm, temp, destCapacity, src, srcLength, NULL, pErrorCode);
destLength=stringCaseMapper(csm, UCASEMAP_BREAK_ITERATOR
temp, destCapacity, src, srcLength, NULL, errorCode);
if(temp!=dest) {
/* copy the result string to the destination buffer */
if (U_SUCCESS(*pErrorCode) && 0 < destLength && destLength <= destCapacity) {
if (U_SUCCESS(errorCode) && 0 < destLength && destLength <= destCapacity) {
u_memmove(dest, temp, destLength);
}
if(temp!=buffer) {
@ -1506,7 +1509,7 @@ ustrcase_mapWithOverlap(const UCaseMap *csm,
}
}
return u_terminateUChars(dest, destCapacity, destLength, pErrorCode);
return u_terminateUChars(dest, destCapacity, destLength, &errorCode);
}
/* public API functions */
@ -1520,10 +1523,10 @@ u_strFoldCase(UChar *dest, int32_t destCapacity,
csm.csp=ucase_getSingleton();
csm.options=options;
return ustrcase_mapWithOverlap(
&csm,
&csm, UCASEMAP_BREAK_ITERATOR_NULL
dest, destCapacity,
src, srcLength,
ustrcase_internalFold, pErrorCode);
ustrcase_internalFold, *pErrorCode);
}
U_CAPI int32_t U_EXPORT2
@ -1531,12 +1534,12 @@ ucasemap_toLowerWithEdits(const UCaseMap *csm,
UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
icu::Edits *edits,
UErrorCode *pErrorCode) {
UErrorCode &errorCode) {
return ustrcase_map(
csm,
csm, UCASEMAP_BREAK_ITERATOR_NULL
dest, destCapacity,
src, srcLength,
ustrcase_internalToLower, edits, pErrorCode);
ustrcase_internalToLower, edits, errorCode);
}
U_CAPI int32_t U_EXPORT2
@ -1544,12 +1547,25 @@ ucasemap_toUpperWithEdits(const UCaseMap *csm,
UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
icu::Edits *edits,
UErrorCode *pErrorCode) {
UErrorCode &errorCode) {
return ustrcase_map(
csm,
csm, UCASEMAP_BREAK_ITERATOR_NULL
dest, destCapacity,
src, srcLength,
ustrcase_internalToUpper, edits, pErrorCode);
ustrcase_internalToUpper, edits, errorCode);
}
U_CAPI int32_t U_EXPORT2
ucasemap_foldCaseWithEdits(const UCaseMap *csm,
UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
icu::Edits *edits,
UErrorCode &errorCode) {
return ustrcase_map(
csm, UCASEMAP_BREAK_ITERATOR_NULL
dest, destCapacity,
src, srcLength,
ustrcase_internalFold, edits, errorCode);
}
/* case-insensitive string comparisons -------------------------------------- */

View file

@ -91,10 +91,10 @@ u_strToLower(UChar *dest, int32_t destCapacity,
UCaseMap csm=UCASEMAP_INITIALIZER;
setTempCaseMap(&csm, locale);
return ustrcase_mapWithOverlap(
&csm,
&csm, UCASEMAP_BREAK_ITERATOR_NULL
dest, destCapacity,
src, srcLength,
ustrcase_internalToLower, pErrorCode);
ustrcase_internalToLower, *pErrorCode);
}
U_CAPI int32_t U_EXPORT2
@ -105,8 +105,8 @@ u_strToUpper(UChar *dest, int32_t destCapacity,
UCaseMap csm=UCASEMAP_INITIALIZER;
setTempCaseMap(&csm, locale);
return ustrcase_mapWithOverlap(
&csm,
&csm, UCASEMAP_BREAK_ITERATOR_NULL
dest, destCapacity,
src, srcLength,
ustrcase_internalToUpper, pErrorCode);
ustrcase_internalToUpper, *pErrorCode);
}