diff --git a/icu4c/source/common/ucasemap.cpp b/icu4c/source/common/ucasemap.cpp index 45ac1757e82..ef8e80e0557 100644 --- a/icu4c/source/common/ucasemap.cpp +++ b/icu4c/source/common/ucasemap.cpp @@ -37,16 +37,28 @@ U_NAMESPACE_USE -using icu::internal::CaseMapFriend; - /* UCaseMap service object -------------------------------------------------- */ +UCaseMap::UCaseMap(const char *localeID, uint32_t opts, UErrorCode *pErrorCode) : +#if !UCONFIG_NO_BREAK_ITERATION + iter(NULL), +#endif + locCache(UCASE_LOC_UNKNOWN), options(opts) { + ucasemap_setLocale(this, localeID, pErrorCode); +} + +UCaseMap::~UCaseMap() { +#if !UCONFIG_NO_BREAK_ITERATION + delete iter; +#endif +} + U_CAPI UCaseMap * U_EXPORT2 ucasemap_open(const char *locale, uint32_t options, UErrorCode *pErrorCode) { if(U_FAILURE(*pErrorCode)) { return NULL; } - CaseMap *csm = new CaseMap(locale, options, *pErrorCode); + UCaseMap *csm = new UCaseMap(locale, options, pErrorCode); if(csm==NULL) { *pErrorCode = U_MEMORY_ALLOCATION_ERROR; return NULL; @@ -54,24 +66,22 @@ ucasemap_open(const char *locale, uint32_t options, UErrorCode *pErrorCode) { delete csm; return NULL; } - return CaseMapFriend::toUCaseMap(*csm); + return csm; } U_CAPI void U_EXPORT2 ucasemap_close(UCaseMap *csm) { - if(csm!=NULL) { - delete CaseMapFriend::fromUCaseMap(csm); - } + delete csm; } U_CAPI const char * U_EXPORT2 ucasemap_getLocale(const UCaseMap *csm) { - return CaseMapFriend::localeID(*CaseMapFriend::fromUCaseMap(csm)); + return csm->locale; } U_CAPI uint32_t U_EXPORT2 ucasemap_getOptions(const UCaseMap *csm) { - return CaseMapFriend::options(*CaseMapFriend::fromUCaseMap(csm)); + return csm->options; } U_CAPI void U_EXPORT2 @@ -79,7 +89,28 @@ ucasemap_setLocale(UCaseMap *csm, const char *locale, UErrorCode *pErrorCode) { if(U_FAILURE(*pErrorCode)) { return; } - CaseMapFriend::setLocale(*CaseMapFriend::fromUCaseMap(csm), locale, *pErrorCode); + if (locale != NULL && *locale == 0) { + csm->locale[0] = 0; + csm->locCache = UCASE_LOC_ROOT; + return; + } + + int32_t length=uloc_getName(locale, csm->locale, (int32_t)sizeof(csm->locale), pErrorCode); + if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR || length==sizeof(csm->locale)) { + *pErrorCode=U_ZERO_ERROR; + /* we only really need the language code for case mappings */ + length=uloc_getLanguage(locale, csm->locale, (int32_t)sizeof(csm->locale), pErrorCode); + } + if(length==sizeof(csm->locale)) { + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + } + if(U_SUCCESS(*pErrorCode)) { + csm->locCache=UCASE_LOC_UNKNOWN; + ucase_getCaseLocale(csm->locale, &csm->locCache); + } else { + csm->locale[0]=0; + csm->locCache = UCASE_LOC_ROOT; + } } U_CAPI void U_EXPORT2 @@ -87,7 +118,7 @@ ucasemap_setOptions(UCaseMap *csm, uint32_t options, UErrorCode *pErrorCode) { if(U_FAILURE(*pErrorCode)) { return; } - CaseMapFriend::setOptions(*CaseMapFriend::fromUCaseMap(csm), options); + csm->options=options; } /* UTF-8 string case mappings ----------------------------------------------- */ @@ -233,7 +264,7 @@ utf8_caseContextIterator(void *context, int8_t dir) { * context [0..srcLength[ into account. */ static int32_t -_caseMap(const CaseMap &csm, UCaseMapFull *map, +_caseMap(int32_t caseLocale, uint32_t /* TODO: options */, UCaseMapFull *map, uint8_t *dest, int32_t destCapacity, const uint8_t *src, UCaseContext *csc, int32_t srcStart, int32_t srcLimit, @@ -241,9 +272,6 @@ _caseMap(const CaseMap &csm, UCaseMapFull *map, const UChar *s = NULL; UChar32 c, c2 = 0; int32_t srcIndex, destIndex; - int32_t locCache; - - locCache = CaseMapFriend::caseLocale(csm); /* case mapping loop */ srcIndex=srcStart; @@ -261,7 +289,7 @@ _caseMap(const CaseMap &csm, UCaseMapFull *map, } continue; } - c=map(NULL, c, utf8_caseContextIterator, csc, &s, NULL, &locCache); + c=map(NULL, c, utf8_caseContextIterator, csc, &s, NULL, &caseLocale); if((destIndex=0) { csc.cpStart=titleStart; csc.cpLimit=titleLimit; - c=ucase_toFullTitle(NULL, c, utf8_caseContextIterator, &csc, &s, NULL, &locCache); + c=ucase_toFullTitle(NULL, c, utf8_caseContextIterator, &csc, &s, NULL, &caseLocale); destIndex=appendResult(dest, destIndex, destCapacity, c, s); } else { // Malformed UTF-8. @@ -376,7 +404,7 @@ ucasemap_internalUTF8ToTitle(const CaseMap &csm, BreakIterator *iter, /* Special case Dutch IJ titlecasing */ if (titleStart+1 < idx && - locCache == UCASE_LOC_DUTCH && + caseLocale == UCASE_LOC_DUTCH && (src[titleStart] == 0x0049 || src[titleStart] == 0x0069) && (src[titleStart+1] == 0x004A || src[titleStart+1] == 0x006A)) { destIndex=appendUChar(dest, destIndex, destCapacity, 0x004A); @@ -384,11 +412,11 @@ ucasemap_internalUTF8ToTitle(const CaseMap &csm, BreakIterator *iter, } /* lowercase [titleLimit..index[ */ if(titleLimit=0) { const UChar *s; UChar32 c2 = 0; - c=ucase_toFullUpper(NULL, c, NULL, NULL, &s, NULL, &locCache); + c=ucase_toFullUpper(NULL, c, NULL, NULL, &s, NULL, &caseLocale); if((destIndexlocCache, csm->options, UCASEMAP_BREAK_ITERATOR_NULL (uint8_t *)dest, destCapacity, (const uint8_t *)src, srcLength, ucasemap_internalUTF8ToLower, pErrorCode); @@ -710,7 +736,7 @@ ucasemap_utf8ToUpper(const UCaseMap *csm, const char *src, int32_t srcLength, UErrorCode *pErrorCode) { return ucasemap_mapUTF8( - *CaseMapFriend::fromUCaseMap(csm), UCASEMAP_BREAK_ITERATOR_NULL + csm->locCache, csm->options, UCASEMAP_BREAK_ITERATOR_NULL (uint8_t *)dest, destCapacity, (const uint8_t *)src, srcLength, ucasemap_internalUTF8ToUpper, pErrorCode); @@ -722,7 +748,7 @@ ucasemap_utf8FoldCase(const UCaseMap *csm, const char *src, int32_t srcLength, UErrorCode *pErrorCode) { return ucasemap_mapUTF8( - *CaseMapFriend::fromUCaseMap(csm), UCASEMAP_BREAK_ITERATOR_NULL + UCASE_LOC_ROOT, csm->options, UCASEMAP_BREAK_ITERATOR_NULL (uint8_t *)dest, destCapacity, (const uint8_t *)src, srcLength, ucasemap_internalUTF8Fold, pErrorCode); diff --git a/icu4c/source/common/ucasemap_titlecase_brkiter.cpp b/icu4c/source/common/ucasemap_titlecase_brkiter.cpp index 592227681cd..908c1e3d333 100644 --- a/icu4c/source/common/ucasemap_titlecase_brkiter.cpp +++ b/icu4c/source/common/ucasemap_titlecase_brkiter.cpp @@ -30,12 +30,9 @@ U_NAMESPACE_USE -using icu::internal::CaseMapFriend; - U_CAPI const UBreakIterator * U_EXPORT2 ucasemap_getBreakIterator(const UCaseMap *csm) { - return reinterpret_cast( - CaseMapFriend::iter(*CaseMapFriend::fromUCaseMap(csm))); + return reinterpret_cast(csm->iter); } U_CAPI void U_EXPORT2 @@ -43,31 +40,29 @@ ucasemap_setBreakIterator(UCaseMap *csm, UBreakIterator *iterToAdopt, UErrorCode if(U_FAILURE(*pErrorCode)) { return; } - CaseMapFriend::adoptIter(*CaseMapFriend::fromUCaseMap(csm), - reinterpret_cast(iterToAdopt)); + delete csm->iter; + csm->iter=reinterpret_cast(iterToAdopt); } U_CAPI int32_t U_EXPORT2 -ucasemap_utf8ToTitle(UCaseMap *ucsm, +ucasemap_utf8ToTitle(UCaseMap *csm, char *dest, int32_t destCapacity, const char *src, int32_t srcLength, UErrorCode *pErrorCode) { if (U_FAILURE(*pErrorCode)) { return 0; } - CaseMap &csm = *CaseMapFriend::fromUCaseMap(ucsm); UText utext=UTEXT_INITIALIZER; utext_openUTF8(&utext, (const char *)src, srcLength, pErrorCode); - if (CaseMapFriend::iter(csm) == NULL) { - CaseMapFriend::adoptIter( - csm, BreakIterator::createWordInstance(CaseMapFriend::locale(csm), *pErrorCode)); + if(csm->iter==NULL) { + csm->iter=BreakIterator::createWordInstance(Locale(csm->locale), *pErrorCode); } if (U_FAILURE(*pErrorCode)) { return 0; } - CaseMapFriend::mutableIter(csm)->setText(&utext, *pErrorCode); - int32_t length=ucasemap_mapUTF8(csm, - CaseMapFriend::mutableIter(csm), + csm->iter->setText(&utext, *pErrorCode); + int32_t length=ucasemap_mapUTF8( + csm->locCache, csm->options, csm->iter, (uint8_t *)dest, destCapacity, (const uint8_t *)src, srcLength, ucasemap_internalUTF8ToTitle, pErrorCode); diff --git a/icu4c/source/common/unicode/ucasemap.h b/icu4c/source/common/unicode/ucasemap.h index b38c8ce9a93..45f4b070d1a 100644 --- a/icu4c/source/common/unicode/ucasemap.h +++ b/icu4c/source/common/unicode/ucasemap.h @@ -25,7 +25,6 @@ #include "unicode/localpointer.h" #if U_SHOW_CPLUSPLUS_API -#include "unicode/locid.h" #include "unicode/uobject.h" #endif // U_SHOW_CPLUSPLUS_API @@ -111,8 +110,8 @@ U_DEFINE_LOCAL_OPEN_POINTER(LocalUCaseMapPointer, UCaseMap, ucasemap_close); * Supports replacements, insertions, deletions in linear progression. * Does not support moving/reordering of text. * - * An Edits object tracks a separate UErrorCode, but ICU case mapping functions - * merge any such errors into their API's UErrorCode. + * An Edits object tracks a separate UErrorCode, but ICU string transformation functions + * (e.g., case mapping functions) merge any such errors into their API's UErrorCode. * * @draft ICU 59 */ @@ -124,7 +123,11 @@ public: */ Edits() : array(stackArray), capacity(STACK_CAPACITY), length(0), delta(0), - omit(FALSE), errorCode(U_ZERO_ERROR) {} + errorCode(U_ZERO_ERROR) {} + /** + * Destructor. + * @draft ICU 59 + */ ~Edits(); /** @@ -133,48 +136,22 @@ public: */ void reset(); - /** - * Controls whether the case mapping function is to write or omit - * characters that do not change. - * The complete result can be computed by applying just the changes - * to the original string. - * @see omitUnchanged - * @see writeUnchanged - * @draft ICU 59 - */ - Edits &setWriteUnchanged(UBool write) { - omit = !write; - return *this; - } - /** - * @return TRUE if the case mapping function is to omit characters that do not change. - * @see setWriteUnchanged - * @draft ICU 59 - */ - UBool omitUnchanged() const { return omit; } - /** - * @return TRUE if the case mapping function is to write characters that do not change. - * @see setWriteUnchanged - * @draft ICU 59 - */ - UBool writeUnchanged() const { return !omit; } - /** * Adds a record for an unchanged segment of text. - * Normally called from inside ICU case mapping functions, not user code. + * Normally called from inside ICU string transformation functions, not user code. * @draft ICU 59 */ void addUnchanged(int32_t unchangedLength); /** * Adds a record for a text replacement/insertion/deletion. - * Normally called from inside ICU case mapping functions, not user code. + * Normally called from inside ICU string transformation functions, not user code. * @draft ICU 59 */ void addReplace(int32_t oldLength, int32_t newLength); /** * Sets the UErrorCode if an error occurred while recording edits. * Preserves older error codes in the outErrorCode. - * Normally called from inside ICU case mapping functions, not user code. + * Normally called from inside ICU string transformation functions, not user code. * @return TRUE if U_FAILURE(outErrorCode) * @draft ICU 59 */ @@ -199,6 +176,17 @@ public: * @draft ICU 59 */ struct Iterator final : public UMemory { + /** + * Copy constructor. + * @draft ICU 59 + */ + Iterator(const Iterator &other) = default; + /** + * Assignment operator. + * @draft ICU 59 + */ + Iterator &operator=(const Iterator &other) = default; + /** * Advances to the next edit. * @return TRUE if there is another edit @@ -332,59 +320,36 @@ private: int32_t capacity; int32_t length; int32_t delta; - UBool omit; UErrorCode errorCode; uint16_t stackArray[STACK_CAPACITY]; }; -namespace internal { -/** @internal ICU implementation detail */ -class CaseMapFriend; -} // namespace internal - +/** + * Low-level C++ case mapping functions. + * + * @draft ICU 59 + */ class U_COMMON_API CaseMap final : public UMemory { public: /** - * Constructor for the root locale and options. - * Explicitly construct with Locale::getDefault() for the default locale. - * @draft ICU 59 - */ - inline CaseMap(uint32_t options, UErrorCode &errorCode); - /** - * Constructor for locale and options. - * @draft ICU 59 - */ - CaseMap(const Locale &locale, uint32_t options, UErrorCode &errorCode); - /** - * Constructor for locale ID and options. - * @draft ICU 59 - */ - CaseMap(const char *localeID, uint32_t options, UErrorCode &errorCode); - - /** - * Destructor. - * @draft ICU 59 - */ - ~CaseMap(); - -// TODO: reverse src & dest? C vs. C++ conventions - - /** - * Lowercases the characters in a UTF-16 string and optionally records edits. + * Lowercases a UTF-16 string and optionally records edits. * Casing is locale-dependent and context-sensitive. * The result may be longer or shorter than the original. * The source string and the destination buffer must not overlap. * + * @param locale The locale ID. ("" = root locale, NULL = default locale.) + * @param options Options bit set, usually 0. See UCASEMAP_OMIT_UNCHANGED_TEXT. + * @param src The original string. + * @param srcLength The length of the original string. If -1, then src must be NUL-terminated. * @param dest A buffer for the result string. The result will be NUL-terminated if * the buffer is large enough. * The contents is undefined in case of failure. * @param destCapacity The size of the buffer (number of bytes). If it is 0, then * dest may be NULL and the function will only return the length of the result * without writing any of the result string. - * @param src The original string. - * @param srcLength The length of the original string. If -1, then src must be NUL-terminated. * @param edits Records edits for index mapping, working with styled text, - * and getting only changes (if any). Can be NULL. + * and getting only changes (if any). + * This function calls edits->reset() first. edits can be NULL. * @param errorCode Reference to an in/out error code value * which must not indicate a failure before the function call. * @return The length of the result string, if successful - or in case of a buffer overflow, @@ -393,39 +358,44 @@ public: * @see u_strToLower * @draft ICU 59 */ - int32_t toLower(UChar *dest, int32_t destCapacity, - const UChar *src, int32_t srcLength, - Edits *edits, - UErrorCode &errorCode) const; + static int32_t toLower( + const char *locale, uint32_t options, + const UChar *src, int32_t srcLength, + UChar *dest, int32_t destCapacity, Edits *edits, + UErrorCode &errorCode); /** - * Uppercases the characters in a UTF-16 string and optionally records edits. + * Uppercases a UTF-16 string and optionally records edits. * Casing is locale-dependent and context-sensitive. * The result may be longer or shorter than the original. * The source string and the destination buffer must not overlap. * + * @param locale The locale ID. ("" = root locale, NULL = default locale.) + * @param options Options bit set, usually 0. See UCASEMAP_OMIT_UNCHANGED_TEXT. + * @param src The original string. + * @param srcLength The length of the original string. If -1, then src must be NUL-terminated. * @param dest A buffer for the result string. The result will be NUL-terminated if * the buffer is large enough. * The contents is undefined in case of failure. * @param destCapacity The size of the buffer (number of bytes). If it is 0, then * dest may be NULL and the function will only return the length of the result * without writing any of the result string. - * @param src The original string. - * @param srcLength The length of the original string. If -1, then src must be NUL-terminated. * @param edits Records edits for index mapping, working with styled text, - * and getting only changes (if any). Can be NULL. + * and getting only changes (if any). + * This function calls edits->reset() first. edits can be NULL. * @param errorCode Reference to an in/out error code value * which must not indicate a failure before the function call. * @return The length of the result string, if successful - or in case of a buffer overflow, * in which case it will be greater than destCapacity. * - * @see u_strToLower + * @see u_strToUpper * @draft ICU 59 */ - int32_t toUpper(UChar *dest, int32_t destCapacity, - const UChar *src, int32_t srcLength, - Edits *edits, - UErrorCode &errorCode) const; + static int32_t toUpper( + const char *locale, uint32_t options, + const UChar *src, int32_t srcLength, + UChar *dest, int32_t destCapacity, Edits *edits, + UErrorCode &errorCode); #if !UCONFIG_NO_BREAK_ITERATION @@ -437,48 +407,46 @@ public: * * Titlecasing uses a break iterator to find the first characters of words * that are to be titlecased. It titlecases those characters and lowercases - * all others. (This can be modified with ucasemap_setOptions().) - * - * The titlecase break iterator can be provided to customize for arbitrary - * styles, using rules and dictionaries beyond the standard iterators. - * The standard titlecase iterator for the root locale implements the - * algorithm of Unicode TR 21. - * - * This function uses only the setText(), first() and next() methods of the - * provided break iterator. + * all others. (This can be modified with options bits.) * + * @param locale The locale ID. ("" = root locale, NULL = default locale.) + * @param options Options bit set, usually 0. See UCASEMAP_OMIT_UNCHANGED_TEXT, + * U_TITLECASE_NO_LOWERCASE, U_TITLECASE_NO_BREAK_ADJUSTMENT. * @param iter A break iterator to find the first characters of words that are to be titlecased. - * It is set to the source string and used one or more times for iteration. + * It is set to the source string (setText()) + * and used one or more times for iteration (first() and next()). * If NULL, then a word break iterator for the locale is used * (or something equivalent). + * @param src The original string. + * @param srcLength The length of the original string. If -1, then src must be NUL-terminated. * @param dest A buffer for the result string. The result will be NUL-terminated if * the buffer is large enough. * The contents is undefined in case of failure. * @param destCapacity The size of the buffer (number of bytes). If it is 0, then * dest may be NULL and the function will only return the length of the result * without writing any of the result string. - * @param src The original string. - * @param srcLength The length of the original string. If -1, then src must be NUL-terminated. * @param edits Records edits for index mapping, working with styled text, - * and getting only changes (if any). Can be NULL. + * and getting only changes (if any). + * This function calls edits->reset() first. edits can be NULL. * @param errorCode Reference to an in/out error code value * which must not indicate a failure before the function call. * @return The length of the result string, if successful - or in case of a buffer overflow, * in which case it will be greater than destCapacity. * * @see u_strToTitle + * @see ucasemap_toTitle * @draft ICU 59 */ - int32_t toTitle(BreakIterator *iter, - UChar *dest, int32_t destCapacity, - const UChar *src, int32_t srcLength, - Edits *edits, - UErrorCode &errorCode) const; + static int32_t toTitle( + const char *locale, uint32_t options, BreakIterator *iter, + const UChar *src, int32_t srcLength, + UChar *dest, int32_t destCapacity, Edits *edits, + UErrorCode &errorCode); #endif // UCONFIG_NO_BREAK_ITERATION /** - * Case-folds the characters in a UTF-16 string and optionally records edits. + * Case-folds a UTF-16 string and optionally records edits. * * Case-folding is locale-independent and not context-sensitive, * but there is an option for whether to include or exclude mappings for dotted I @@ -487,57 +455,45 @@ public: * The result may be longer or shorter than the original. * The source string and the destination buffer must not overlap. * + * @param options Options bit set, usually 0. See UCASEMAP_OMIT_UNCHANGED_TEXT, + * U_FOLD_CASE_DEFAULT, U_FOLD_CASE_EXCLUDE_SPECIAL_I. + * @param src The original string. + * @param srcLength The length of the original string. If -1, then src must be NUL-terminated. * @param dest A buffer for the result string. The result will be NUL-terminated if * the buffer is large enough. * The contents is undefined in case of failure. * @param destCapacity The size of the buffer (number of bytes). If it is 0, then * dest may be NULL and the function will only return the length of the result * without writing any of the result string. - * @param src The original string. - * @param srcLength The length of the original string. If -1, then src must be NUL-terminated. * @param edits Records edits for index mapping, working with styled text, - * and getting only changes (if any). Can be NULL. + * and getting only changes (if any). + * This function calls edits->reset() first. edits can be NULL. * @param errorCode Reference to an in/out error code value * which must not indicate a failure before the function call. * @return The length of the result string, if successful - or in case of a buffer overflow, * in which case it will be greater than destCapacity. * * @see u_strFoldCase - * @see ucasemap_setOptions - * @see U_FOLD_CASE_DEFAULT - * @see U_FOLD_CASE_EXCLUDE_SPECIAL_I * @draft ICU 59 */ - int32_t foldCase(UChar *dest, int32_t destCapacity, - const UChar *src, int32_t srcLength, - Edits *edits, - UErrorCode &errorCode) const; + static int32_t foldCase( + uint32_t options, + const UChar *src, int32_t srcLength, + UChar *dest, int32_t destCapacity, Edits *edits, + UErrorCode &errorCode); private: - friend class internal::CaseMapFriend; - + CaseMap() = delete; CaseMap(const CaseMap &other) = delete; CaseMap &operator=(const CaseMap &other) = delete; - - CaseMap(const Locale &loc, int32_t caseLoc, uint32_t opts, UErrorCode &errorCode); - - void setCaseLocale(const char *localeID); - void setLocale(const char *localeID, UErrorCode &errorCode); - - int32_t caseLocale; - uint32_t options; - Locale locale; -#if !UCONFIG_NO_BREAK_ITERATION - BreakIterator *iter; // owned; only set by old C-style API -#endif }; -CaseMap::CaseMap(uint32_t opts, UErrorCode & /*errorCode*/) : - caseLocale(/* UCASE_LOC_ROOT = */ 1), options(opts), locale(Locale::getRoot()) -#if !UCONFIG_NO_BREAK_ITERATION - , iter(NULL) -#endif - {} +/** + * Omit unchanged text when case-mapping with Edits. + * + * @draft ICU 59 + */ +#define UCASEMAP_OMIT_UNCHANGED_TEXT 0x4000 #endif // U_HIDE_DRAFT_API diff --git a/icu4c/source/common/unicode/unistr.h b/icu4c/source/common/unicode/unistr.h index ed994dbd6b4..7dfd4d44bb0 100644 --- a/icu4c/source/common/unicode/unistr.h +++ b/icu4c/source/common/unicode/unistr.h @@ -59,7 +59,6 @@ U_NAMESPACE_BEGIN #if !UCONFIG_NO_BREAK_ITERATION class BreakIterator; // unicode/brkiter.h #endif -class CaseMap; class Edits; U_NAMESPACE_END @@ -72,7 +71,7 @@ U_NAMESPACE_END * @internal */ typedef int32_t U_CALLCONV -UStringCaseMapper(const icu::CaseMap &csm, +UStringCaseMapper(int32_t caseLocale, uint32_t options, #if !UCONFIG_NO_BREAK_ITERATION icu::BreakIterator *iter, #endif @@ -3596,7 +3595,7 @@ private: * as in ustr_imp.h for ustrcase_map(). */ UnicodeString & - caseMap(const CaseMap &csm, + caseMap(int32_t caseLocale, uint32_t options, #if !UCONFIG_NO_BREAK_ITERATION BreakIterator *iter, #endif diff --git a/icu4c/source/common/unistr_case.cpp b/icu4c/source/common/unistr_case.cpp index a3b80b0666c..3b8d150b730 100644 --- a/icu4c/source/common/unistr_case.cpp +++ b/icu4c/source/common/unistr_case.cpp @@ -88,7 +88,7 @@ UnicodeString::doCaseCompare(int32_t start, //======================================== UnicodeString & -UnicodeString::caseMap(const CaseMap &csm, UCASEMAP_BREAK_ITERATOR_PARAM +UnicodeString::caseMap(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM UStringCaseMapper *stringCaseMapper) { if(isEmpty() || !isWritable()) { // nothing to do @@ -121,7 +121,7 @@ UnicodeString::caseMap(const CaseMap &csm, UCASEMAP_BREAK_ITERATOR_PARAM buffer = fUnion.fStackFields.fBuffer; capacity = US_STACKBUF_SIZE; } - newLength = stringCaseMapper(csm, UCASEMAP_BREAK_ITERATOR + newLength = stringCaseMapper(caseLocale, options, UCASEMAP_BREAK_ITERATOR buffer, capacity, oldArray, oldLength, NULL, errorCode); if (U_SUCCESS(errorCode)) { @@ -140,9 +140,8 @@ UnicodeString::caseMap(const CaseMap &csm, UCASEMAP_BREAK_ITERATOR_PARAM // and often does not change its length. oldArray = getArrayStart(); Edits edits; - edits.setWriteUnchanged(FALSE); UChar replacementChars[200]; - stringCaseMapper(csm, UCASEMAP_BREAK_ITERATOR + stringCaseMapper(caseLocale, options | UCASEMAP_OMIT_UNCHANGED_TEXT, UCASEMAP_BREAK_ITERATOR replacementChars, UPRV_LENGTHOF(replacementChars), oldArray, oldLength, &edits, errorCode); if (U_SUCCESS(errorCode)) { @@ -178,7 +177,7 @@ UnicodeString::caseMap(const CaseMap &csm, UCASEMAP_BREAK_ITERATOR_PARAM return *this; } errorCode = U_ZERO_ERROR; - newLength = stringCaseMapper(csm, UCASEMAP_BREAK_ITERATOR + newLength = stringCaseMapper(caseLocale, options, UCASEMAP_BREAK_ITERATOR getArrayStart(), getCapacity(), oldArray, oldLength, NULL, errorCode); if (bufferToDelete) { @@ -194,9 +193,7 @@ UnicodeString::caseMap(const CaseMap &csm, UCASEMAP_BREAK_ITERATOR_PARAM UnicodeString & UnicodeString::foldCase(uint32_t options) { - UErrorCode errorCode = U_ZERO_ERROR; - CaseMap csm(options, errorCode); - return caseMap(csm, UCASEMAP_BREAK_ITERATOR_NULL ustrcase_internalFold); + return caseMap(UCASE_LOC_ROOT, options, UCASEMAP_BREAK_ITERATOR_NULL ustrcase_internalFold); } U_NAMESPACE_END diff --git a/icu4c/source/common/unistr_case_locale.cpp b/icu4c/source/common/unistr_case_locale.cpp index 2a08c9b3180..46ada884137 100644 --- a/icu4c/source/common/unistr_case_locale.cpp +++ b/icu4c/source/common/unistr_case_locale.cpp @@ -31,26 +31,26 @@ U_NAMESPACE_BEGIN UnicodeString & UnicodeString::toLower() { - return toLower(Locale::getDefault()); + return caseMap(ustrcase_getCaseLocale(NULL), 0, + UCASEMAP_BREAK_ITERATOR_NULL ustrcase_internalToLower); } UnicodeString & UnicodeString::toLower(const Locale &locale) { - UErrorCode errorCode = U_ZERO_ERROR; - CaseMap csm(locale, 0, errorCode); - return caseMap(csm, UCASEMAP_BREAK_ITERATOR_NULL ustrcase_internalToLower); + return caseMap(ustrcase_getCaseLocale(locale.getBaseName()), 0, + UCASEMAP_BREAK_ITERATOR_NULL ustrcase_internalToLower); } UnicodeString & UnicodeString::toUpper() { - return toUpper(Locale::getDefault()); + return caseMap(ustrcase_getCaseLocale(NULL), 0, + UCASEMAP_BREAK_ITERATOR_NULL ustrcase_internalToUpper); } UnicodeString & UnicodeString::toUpper(const Locale &locale) { - UErrorCode errorCode = U_ZERO_ERROR; - CaseMap csm(locale, 0, errorCode); - return caseMap(csm, UCASEMAP_BREAK_ITERATOR_NULL ustrcase_internalToUpper); + return caseMap(ustrcase_getCaseLocale(locale.getBaseName()), 0, + UCASEMAP_BREAK_ITERATOR_NULL ustrcase_internalToUpper); } U_NAMESPACE_END diff --git a/icu4c/source/common/unistr_titlecase_brkiter.cpp b/icu4c/source/common/unistr_titlecase_brkiter.cpp index 24f60c1e289..d04233c3497 100644 --- a/icu4c/source/common/unistr_titlecase_brkiter.cpp +++ b/icu4c/source/common/unistr_titlecase_brkiter.cpp @@ -41,10 +41,9 @@ UnicodeString::toTitle(BreakIterator *titleIter, const Locale &locale) { UnicodeString & UnicodeString::toTitle(BreakIterator *titleIter, const Locale &locale, uint32_t options) { - UErrorCode errorCode = U_ZERO_ERROR; - CaseMap csm(locale, options, errorCode); BreakIterator *bi=titleIter; if(bi==NULL) { + UErrorCode errorCode=U_ZERO_ERROR; bi=BreakIterator::createWordInstance(locale, errorCode); if(U_FAILURE(errorCode)) { setToBogus(); @@ -52,7 +51,7 @@ UnicodeString::toTitle(BreakIterator *titleIter, const Locale &locale, uint32_t } } bi->setText(*this); - caseMap(csm, bi, ustrcase_internalToTitle); + caseMap(ustrcase_getCaseLocale(locale.getBaseName()), options, bi, ustrcase_internalToTitle); if(titleIter==NULL) { delete bi; } diff --git a/icu4c/source/common/ustr_imp.h b/icu4c/source/common/ustr_imp.h index b6d160856fe..b1382ff7c7b 100644 --- a/icu4c/source/common/ustr_imp.h +++ b/icu4c/source/common/ustr_imp.h @@ -104,7 +104,7 @@ uprv_loadPropsData(UErrorCode *errorCode);*/ #ifdef __cplusplus // TODO: Consider moving these case mapping definitions -// into a new internal header like casemap_imp.h. +// into a new internal header like ucasemap_imp.h. #include "unicode/unistr.h" // for UStringCaseMapper @@ -113,39 +113,25 @@ uprv_loadPropsData(UErrorCode *errorCode);*/ * ustring.h/ustrcase.c and UnicodeString case mapping functions. */ -/** Avoid public @internal CaseMap methods. Define only one CaseMap friend. */ -class icu::internal::CaseMapFriend final /* all static */ { -public: - static UCaseMap *toUCaseMap(icu::CaseMap &csm) { - return reinterpret_cast(&csm); - } - - static const icu::CaseMap *fromUCaseMap(const UCaseMap *csm) { - return reinterpret_cast(csm); - } - static icu::CaseMap *fromUCaseMap(UCaseMap *csm) { - return reinterpret_cast(csm); +struct UCaseMap : public icu::UMemory { + /** Implements most of ucasemap_open(). */ + UCaseMap(const char *localeID, uint32_t opts, UErrorCode *pErrorCode); + /** Root locale. */ + UCaseMap(uint32_t opts) : +#if !UCONFIG_NO_BREAK_ITERATION + iter(NULL), +#endif + locCache(/* UCASE_LOC_ROOT= */ 1), options(opts) { + locale[0] = 0; } + ~UCaseMap(); #if !UCONFIG_NO_BREAK_ITERATION - static const icu::BreakIterator *iter(const icu::CaseMap &csm) { return csm.iter; } - static icu::BreakIterator *mutableIter(icu::CaseMap &csm) { return csm.iter; } - static void adoptIter(icu::CaseMap &csm, icu::BreakIterator *iter); + icu::BreakIterator *iter; /* We adopt the iterator, so we own it. */ #endif - - static const icu::Locale &locale(const icu::CaseMap &csm) { return csm.locale; } - static const char *localeID(const icu::CaseMap &csm) { return csm.locale.getName(); } - static void setLocale(icu::CaseMap &csm, const char *localeID, UErrorCode &errorCode) { - csm.setLocale(localeID, errorCode); - } - - static int32_t caseLocale(const icu::CaseMap &csm) { return csm.caseLocale; } - - static uint32_t options(const icu::CaseMap &csm) { return csm.options; } - static void setOptions(icu::CaseMap &csm, uint32_t options) { csm.options = options; } - -private: - CaseMapFriend() = delete; + char locale[32]; + int32_t locCache; + uint32_t options; }; #if UCONFIG_NO_BREAK_ITERATION @@ -162,9 +148,13 @@ private: # define UCASEMAP_BREAK_ITERATOR_NULL NULL, #endif +U_CFUNC int32_t +ustrcase_getCaseLocale(const char *locale); + +// TODO: swap src / dest if approved for new public api /** Implements UStringCaseMapper. */ U_CFUNC int32_t U_CALLCONV -ustrcase_internalToLower(const icu::CaseMap &csm, UCASEMAP_BREAK_ITERATOR_PARAM +ustrcase_internalToLower(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM UChar *dest, int32_t destCapacity, const UChar *src, int32_t srcLength, icu::Edits *edits, @@ -172,7 +162,7 @@ ustrcase_internalToLower(const icu::CaseMap &csm, UCASEMAP_BREAK_ITERATOR_PARAM /** Implements UStringCaseMapper. */ U_CFUNC int32_t U_CALLCONV -ustrcase_internalToUpper(const icu::CaseMap &csm, UCASEMAP_BREAK_ITERATOR_PARAM +ustrcase_internalToUpper(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM UChar *dest, int32_t destCapacity, const UChar *src, int32_t srcLength, icu::Edits *edits, @@ -182,7 +172,7 @@ ustrcase_internalToUpper(const icu::CaseMap &csm, UCASEMAP_BREAK_ITERATOR_PARAM /** Implements UStringCaseMapper. */ U_CFUNC int32_t U_CALLCONV -ustrcase_internalToTitle(const icu::CaseMap &csm, +ustrcase_internalToTitle(int32_t caseLocale, uint32_t options, icu::BreakIterator *iter, UChar *dest, int32_t destCapacity, const UChar *src, int32_t srcLength, @@ -193,7 +183,7 @@ ustrcase_internalToTitle(const icu::CaseMap &csm, /** Implements UStringCaseMapper. */ U_CFUNC int32_t U_CALLCONV -ustrcase_internalFold(const icu::CaseMap &csm, UCASEMAP_BREAK_ITERATOR_PARAM +ustrcase_internalFold(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM UChar *dest, int32_t destCapacity, const UChar *src, int32_t srcLength, icu::Edits *edits, @@ -204,7 +194,7 @@ ustrcase_internalFold(const icu::CaseMap &csm, UCASEMAP_BREAK_ITERATOR_PARAM * Implements argument checking. */ U_CFUNC int32_t -ustrcase_map(const icu::CaseMap &csm, UCASEMAP_BREAK_ITERATOR_PARAM +ustrcase_map(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM UChar *dest, int32_t destCapacity, const UChar *src, int32_t srcLength, UStringCaseMapper *stringCaseMapper, @@ -217,7 +207,7 @@ ustrcase_map(const icu::CaseMap &csm, UCASEMAP_BREAK_ITERATOR_PARAM * Implements argument checking and internally works with an intermediate buffer if necessary. */ U_CFUNC int32_t -ustrcase_mapWithOverlap(const icu::CaseMap &csm, UCASEMAP_BREAK_ITERATOR_PARAM +ustrcase_mapWithOverlap(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM UChar *dest, int32_t destCapacity, const UChar *src, int32_t srcLength, UStringCaseMapper *stringCaseMapper, @@ -231,7 +221,7 @@ ustrcase_mapWithOverlap(const icu::CaseMap &csm, UCASEMAP_BREAK_ITERATOR_PARAM * src and dest must not overlap. */ typedef int32_t U_CALLCONV -UTF8CaseMapper(const icu::CaseMap &csm, +UTF8CaseMapper(int32_t caseLocale, uint32_t options, #if !UCONFIG_NO_BREAK_ITERATION icu::BreakIterator *iter, #endif @@ -243,7 +233,7 @@ UTF8CaseMapper(const icu::CaseMap &csm, /** Implements UTF8CaseMapper. */ U_CFUNC int32_t U_CALLCONV -ucasemap_internalUTF8ToTitle(const icu::CaseMap &csm, +ucasemap_internalUTF8ToTitle(int32_t caseLocale, uint32_t options, icu::BreakIterator *iter, uint8_t *dest, int32_t destCapacity, const uint8_t *src, int32_t srcLength, @@ -256,7 +246,7 @@ ucasemap_internalUTF8ToTitle(const icu::CaseMap &csm, * for UTF-8 string case mapping as a common function. */ U_CFUNC int32_t -ucasemap_mapUTF8(const icu::CaseMap &csm, UCASEMAP_BREAK_ITERATOR_PARAM +ucasemap_mapUTF8(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM uint8_t *dest, int32_t destCapacity, const uint8_t *src, int32_t srcLength, UTF8CaseMapper *stringCaseMapper, diff --git a/icu4c/source/common/ustr_titlecase_brkiter.cpp b/icu4c/source/common/ustr_titlecase_brkiter.cpp index e70c863ee24..695ea0be06a 100644 --- a/icu4c/source/common/ustr_titlecase_brkiter.cpp +++ b/icu4c/source/common/ustr_titlecase_brkiter.cpp @@ -31,15 +31,8 @@ U_NAMESPACE_USE -using icu::internal::CaseMapFriend; - // TODO: create casemap.cpp -void icu::internal::CaseMapFriend::adoptIter(CaseMap &csm, BreakIterator *iter) { - delete csm.iter; - csm.iter = iter; -} - /* functions available in the common library (for unistr_case.cpp) */ /* public API functions */ @@ -50,13 +43,13 @@ u_strToTitle(UChar *dest, int32_t destCapacity, UBreakIterator *titleIter, const char *locale, UErrorCode *pErrorCode) { - CaseMap csm(locale, 0, *pErrorCode); + LocalPointer ownedIter; BreakIterator *iter; if(titleIter!=NULL) { iter=reinterpret_cast(titleIter); } else { - iter=BreakIterator::createWordInstance(CaseMapFriend::locale(csm), *pErrorCode); - CaseMapFriend::adoptIter(csm, iter); + iter=BreakIterator::createWordInstance(Locale(locale), *pErrorCode); + ownedIter.adoptInstead(iter); } if(U_FAILURE(*pErrorCode)) { return 0; @@ -64,7 +57,7 @@ u_strToTitle(UChar *dest, int32_t destCapacity, UnicodeString s(srcLength<0, src, srcLength); iter->setText(s); return ustrcase_mapWithOverlap( - csm, iter, + ustrcase_getCaseLocale(locale), 0, iter, dest, destCapacity, src, srcLength, ustrcase_internalToTitle, *pErrorCode); @@ -72,27 +65,23 @@ u_strToTitle(UChar *dest, int32_t destCapacity, U_NAMESPACE_BEGIN -int32_t CaseMap::toTitle(BreakIterator *it, - UChar *dest, int32_t destCapacity, - const UChar *src, int32_t srcLength, - Edits *edits, - UErrorCode &errorCode) const { +int32_t CaseMap::toTitle( + const char *locale, uint32_t options, BreakIterator *iter, + const UChar *src, int32_t srcLength, + UChar *dest, int32_t destCapacity, Edits *edits, + UErrorCode &errorCode) { LocalPointer ownedIter; - if(it==NULL) { - if(iter!=NULL) { - it=iter->clone(); - } else { - it=BreakIterator::createWordInstance(locale, errorCode); - } - ownedIter.adoptInsteadAndCheckErrorCode(it, errorCode); + if(iter==NULL) { + iter=BreakIterator::createWordInstance(Locale(locale), errorCode); + ownedIter.adoptInstead(iter); } if(U_FAILURE(errorCode)) { return 0; } UnicodeString s(srcLength<0, src, srcLength); - it->setText(s); + iter->setText(s); return ustrcase_map( - *this, it, + ustrcase_getCaseLocale(locale), options, iter, dest, destCapacity, src, srcLength, ustrcase_internalToTitle, edits, errorCode); @@ -101,25 +90,23 @@ int32_t CaseMap::toTitle(BreakIterator *it, U_NAMESPACE_END U_CAPI int32_t U_EXPORT2 -ucasemap_toTitle(UCaseMap *ucsm, +ucasemap_toTitle(UCaseMap *csm, UChar *dest, int32_t destCapacity, const UChar *src, int32_t srcLength, UErrorCode *pErrorCode) { if (U_FAILURE(*pErrorCode)) { return 0; } - CaseMap &csm = *CaseMapFriend::fromUCaseMap(ucsm); - if (CaseMapFriend::iter(csm) == NULL) { - CaseMapFriend::adoptIter( - csm, BreakIterator::createWordInstance(CaseMapFriend::locale(csm), *pErrorCode)); + if (csm->iter == NULL) { + csm->iter = BreakIterator::createWordInstance(Locale(csm->locale), *pErrorCode); } if (U_FAILURE(*pErrorCode)) { return 0; } UnicodeString s(srcLength<0, src, srcLength); - CaseMapFriend::mutableIter(csm)->setText(s); + csm->iter->setText(s); return ustrcase_map( - csm, CaseMapFriend::mutableIter(csm), + csm->locCache, csm->options, csm->iter, dest, destCapacity, src, srcLength, ustrcase_internalToTitle, NULL, *pErrorCode); diff --git a/icu4c/source/common/ustrcase.cpp b/icu4c/source/common/ustrcase.cpp index 1b0424e14eb..15a1fc711e3 100644 --- a/icu4c/source/common/ustrcase.cpp +++ b/icu4c/source/common/ustrcase.cpp @@ -32,10 +32,6 @@ #include "ustr_imp.h" #include "uassert.h" -U_NAMESPACE_USE - -using icu::internal::CaseMapFriend; - U_NAMESPACE_BEGIN namespace { @@ -404,7 +400,7 @@ U_NAMESPACE_USE static inline int32_t appendResult(UChar *dest, int32_t destIndex, int32_t destCapacity, int32_t result, const UChar *s, - int32_t cpLength, icu::Edits *edits) { + int32_t cpLength, uint32_t options, icu::Edits *edits) { UChar32 c; int32_t length; @@ -413,7 +409,7 @@ appendResult(UChar *dest, int32_t destIndex, int32_t destCapacity, /* (not) original code point */ if(edits!=NULL) { edits->addUnchanged(cpLength); - if(edits->omitUnchanged()) { + if(options & UCASEMAP_OMIT_UNCHANGED_TEXT) { return destIndex; } } @@ -486,11 +482,11 @@ appendUChar(UChar *dest, int32_t destIndex, int32_t destCapacity, UChar c) { static inline int32_t appendUnchanged(UChar *dest, int32_t destIndex, int32_t destCapacity, - const UChar *s, int32_t length, icu::Edits *edits) { + const UChar *s, int32_t length, uint32_t options, icu::Edits *edits) { if(length>0) { if(edits!=NULL) { edits->addUnchanged(length); - if(edits->omitUnchanged()) { + if(options & UCASEMAP_OMIT_UNCHANGED_TEXT) { return destIndex; } } @@ -542,14 +538,12 @@ utf16_caseContextIterator(void *context, int8_t dir) { * context [0..srcLength[ into account. */ static int32_t -_caseMap(const CaseMap &csm, UCaseMapFull *map, +_caseMap(int32_t caseLocale, uint32_t options, UCaseMapFull *map, UChar *dest, int32_t destCapacity, const UChar *src, UCaseContext *csc, int32_t srcStart, int32_t srcLimit, icu::Edits *edits, UErrorCode &errorCode) { - int32_t locCache = CaseMapFriend::caseLocale(csm); - /* case mapping loop */ int32_t srcIndex=srcStart; int32_t destIndex=0; @@ -560,9 +554,9 @@ _caseMap(const CaseMap &csm, UCaseMapFull *map, U16_NEXT(src, srcIndex, srcLimit, c); csc->cpLimit=srcIndex; const UChar *s; - c=map(NULL, c, utf16_caseContextIterator, csc, &s, NULL, &locCache); + c=map(NULL, c, utf16_caseContextIterator, csc, &s, NULL, &caseLocale); destIndex = appendResult(dest, destIndex, destCapacity, c, s, - srcIndex - cpStart, edits); + srcIndex - cpStart, options, edits); if (destIndex < 0) { errorCode = U_INDEX_OUTOFBOUNDS_ERROR; return 0; @@ -575,7 +569,7 @@ _caseMap(const CaseMap &csm, UCaseMapFull *map, #if !UCONFIG_NO_BREAK_ITERATION U_CFUNC int32_t U_CALLCONV -ustrcase_internalToTitle(const CaseMap &csm, BreakIterator *iter, +ustrcase_internalToTitle(int32_t caseLocale, uint32_t options, BreakIterator *iter, UChar *dest, int32_t destCapacity, const UChar *src, int32_t srcLength, icu::Edits *edits, @@ -585,7 +579,6 @@ ustrcase_internalToTitle(const CaseMap &csm, BreakIterator *iter, } /* set up local variables */ - int32_t locCache=CaseMapFriend::caseLocale(csm); UCaseContext csc=UCASECONTEXT_INITIALIZER; csc.p=(void *)src; csc.limit=srcLength; @@ -626,7 +619,7 @@ ustrcase_internalToTitle(const CaseMap &csm, BreakIterator *iter, int32_t titleLimit=prev; UChar32 c; U16_NEXT(src, titleLimit, idx, c); - if((CaseMapFriend::options(csm)&U_TITLECASE_NO_BREAK_ADJUSTMENT)==0 && UCASE_NONE==ucase_getType(NULL, c)) { + if((options&U_TITLECASE_NO_BREAK_ADJUSTMENT)==0 && UCASE_NONE==ucase_getType(NULL, c)) { /* Adjust the titlecasing index (titleStart) to the next cased character. */ for(;;) { titleStart=titleLimit; @@ -643,7 +636,7 @@ ustrcase_internalToTitle(const CaseMap &csm, BreakIterator *iter, } } destIndex=appendUnchanged(dest, destIndex, destCapacity, - src+prev, titleStart-prev, edits); + src+prev, titleStart-prev, options, edits); if(destIndex<0) { errorCode=U_INDEX_OUTOFBOUNDS_ERROR; return 0; @@ -656,9 +649,9 @@ ustrcase_internalToTitle(const CaseMap &csm, BreakIterator *iter, csc.cpLimit=titleLimit; const UChar *s; c=ucase_toFullTitle(NULL, c, utf16_caseContextIterator, &csc, &s, - NULL, &locCache); + NULL, &caseLocale); destIndex=appendResult(dest, destIndex, destCapacity, c, s, - titleLimit-titleStart, edits); + titleLimit-titleStart, options, edits); if(destIndex<0) { errorCode=U_INDEX_OUTOFBOUNDS_ERROR; return 0; @@ -666,7 +659,7 @@ ustrcase_internalToTitle(const CaseMap &csm, BreakIterator *iter, /* Special case Dutch IJ titlecasing */ if (titleStart+1 < idx && - locCache == UCASE_LOC_DUTCH && + caseLocale == UCASE_LOC_DUTCH && (src[titleStart] == 0x0049 || src[titleStart] == 0x0069) && (src[titleStart+1] == 0x004A || src[titleStart+1] == 0x006A)) { destIndex=appendUChar(dest, destIndex, destCapacity, 0x004A); @@ -682,11 +675,11 @@ ustrcase_internalToTitle(const CaseMap &csm, BreakIterator *iter, /* lowercase [titleLimit..index[ */ if(titleLimitaddUnchanged(oldLength); } // Write unchanged text? - change = edits->writeUnchanged(); + change = (options & UCASEMAP_OMIT_UNCHANGED_TEXT) == 0; } } @@ -1326,9 +1318,9 @@ int32_t toUpper(const CaseMap & /* unused csm */, } } else { const UChar *s; - c=ucase_toFullUpper(NULL, c, NULL, NULL, &s, NULL, &locCache); + c=ucase_toFullUpper(NULL, c, NULL, NULL, &s, NULL, &caseLocale); destIndex = appendResult(dest, destIndex, destCapacity, c, s, - nextIndex - i, edits); + nextIndex - i, options, edits); if (destIndex < 0) { errorCode = U_INDEX_OUTOFBOUNDS_ERROR; return 0; @@ -1347,7 +1339,7 @@ U_NAMESPACE_END /* functions available in the common library (for unistr_case.cpp) */ U_CFUNC int32_t U_CALLCONV -ustrcase_internalToLower(const CaseMap &csm, UCASEMAP_BREAK_ITERATOR_UNUSED +ustrcase_internalToLower(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_UNUSED UChar *dest, int32_t destCapacity, const UChar *src, int32_t srcLength, icu::Edits *edits, @@ -1356,7 +1348,7 @@ ustrcase_internalToLower(const CaseMap &csm, UCASEMAP_BREAK_ITERATOR_UNUSED csc.p=(void *)src; csc.limit=srcLength; int32_t destIndex = _caseMap( - csm, ucase_toFullLower, + caseLocale, options, ucase_toFullLower, dest, destCapacity, src, &csc, 0, srcLength, edits, errorCode); @@ -1364,20 +1356,19 @@ ustrcase_internalToLower(const CaseMap &csm, UCASEMAP_BREAK_ITERATOR_UNUSED } U_CFUNC int32_t U_CALLCONV -ustrcase_internalToUpper(const CaseMap &csm, UCASEMAP_BREAK_ITERATOR_UNUSED +ustrcase_internalToUpper(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_UNUSED UChar *dest, int32_t destCapacity, const UChar *src, int32_t srcLength, icu::Edits *edits, UErrorCode &errorCode) { - int32_t locCache = CaseMapFriend::caseLocale(csm); - if (locCache == UCASE_LOC_GREEK) { - return GreekUpper::toUpper(csm, dest, destCapacity, src, srcLength, edits, errorCode); + if (caseLocale == UCASE_LOC_GREEK) { + return GreekUpper::toUpper(caseLocale, options, dest, destCapacity, src, srcLength, edits, errorCode); } UCaseContext csc=UCASECONTEXT_INITIALIZER; csc.p=(void *)src; csc.limit=srcLength; int32_t destIndex = _caseMap( - csm, ucase_toFullUpper, + caseLocale, options, ucase_toFullUpper, dest, destCapacity, src, &csc, 0, srcLength, edits, errorCode); @@ -1385,7 +1376,7 @@ ustrcase_internalToUpper(const CaseMap &csm, UCASEMAP_BREAK_ITERATOR_UNUSED } U_CFUNC int32_t U_CALLCONV -ustrcase_internalFold(const CaseMap &csm, UCASEMAP_BREAK_ITERATOR_UNUSED +ustrcase_internalFold(int32_t /* caseLocale */, uint32_t options, UCASEMAP_BREAK_ITERATOR_UNUSED UChar *dest, int32_t destCapacity, const UChar *src, int32_t srcLength, icu::Edits *edits, @@ -1398,9 +1389,9 @@ ustrcase_internalFold(const CaseMap &csm, UCASEMAP_BREAK_ITERATOR_UNUSED UChar32 c; U16_NEXT(src, srcIndex, srcLength, c); const UChar *s; - c = ucase_toFullFolding(NULL, c, &s, CaseMapFriend::options(csm)); + c = ucase_toFullFolding(NULL, c, &s, options); destIndex = appendResult(dest, destIndex, destCapacity, c, s, - srcIndex - cpStart, edits); + srcIndex - cpStart, options, edits); if (destIndex < 0) { errorCode = U_INDEX_OUTOFBOUNDS_ERROR; return 0; @@ -1411,7 +1402,7 @@ ustrcase_internalFold(const CaseMap &csm, UCASEMAP_BREAK_ITERATOR_UNUSED } U_CFUNC int32_t -ustrcase_map(const CaseMap &csm, UCASEMAP_BREAK_ITERATOR_PARAM +ustrcase_map(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM UChar *dest, int32_t destCapacity, const UChar *src, int32_t srcLength, UStringCaseMapper *stringCaseMapper, @@ -1446,13 +1437,16 @@ ustrcase_map(const CaseMap &csm, UCASEMAP_BREAK_ITERATOR_PARAM return 0; } - destLength=stringCaseMapper(csm, UCASEMAP_BREAK_ITERATOR + if(edits!=NULL) { + edits->reset(); + } + destLength=stringCaseMapper(caseLocale, options, UCASEMAP_BREAK_ITERATOR dest, destCapacity, src, srcLength, edits, errorCode); return u_terminateUChars(dest, destCapacity, destLength, &errorCode); } U_CFUNC int32_t -ustrcase_mapWithOverlap(const CaseMap &csm, UCASEMAP_BREAK_ITERATOR_PARAM +ustrcase_mapWithOverlap(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM UChar *dest, int32_t destCapacity, const UChar *src, int32_t srcLength, UStringCaseMapper *stringCaseMapper, @@ -1501,7 +1495,7 @@ ustrcase_mapWithOverlap(const CaseMap &csm, UCASEMAP_BREAK_ITERATOR_PARAM temp=dest; } - destLength=stringCaseMapper(csm, UCASEMAP_BREAK_ITERATOR + destLength=stringCaseMapper(caseLocale, options, UCASEMAP_BREAK_ITERATOR temp, destCapacity, src, srcLength, NULL, errorCode); if(temp!=dest) { /* copy the result string to the destination buffer */ @@ -1524,7 +1518,7 @@ u_strFoldCase(UChar *dest, int32_t destCapacity, uint32_t options, UErrorCode *pErrorCode) { return ustrcase_mapWithOverlap( - CaseMap(options, *pErrorCode), UCASEMAP_BREAK_ITERATOR_NULL + UCASE_LOC_ROOT, options, UCASEMAP_BREAK_ITERATOR_NULL dest, destCapacity, src, srcLength, ustrcase_internalFold, *pErrorCode); @@ -1532,34 +1526,13 @@ u_strFoldCase(UChar *dest, int32_t destCapacity, U_NAMESPACE_BEGIN -int32_t CaseMap::toLower(UChar *dest, int32_t destCapacity, - const UChar *src, int32_t srcLength, - icu::Edits *edits, - UErrorCode &errorCode) const { +int32_t CaseMap::foldCase( + uint32_t options, + const UChar *src, int32_t srcLength, + UChar *dest, int32_t destCapacity, Edits *edits, + UErrorCode &errorCode) { return ustrcase_map( - *this, UCASEMAP_BREAK_ITERATOR_NULL - dest, destCapacity, - src, srcLength, - ustrcase_internalToLower, edits, errorCode); -} - -int32_t CaseMap::toUpper(UChar *dest, int32_t destCapacity, - const UChar *src, int32_t srcLength, - icu::Edits *edits, - UErrorCode &errorCode) const { - return ustrcase_map( - *this, UCASEMAP_BREAK_ITERATOR_NULL - dest, destCapacity, - src, srcLength, - ustrcase_internalToUpper, edits, errorCode); -} - -int32_t CaseMap::foldCase(UChar *dest, int32_t destCapacity, - const UChar *src, int32_t srcLength, - icu::Edits *edits, - UErrorCode &errorCode) const { - return ustrcase_map( - *this, UCASEMAP_BREAK_ITERATOR_NULL + UCASE_LOC_ROOT, options, UCASEMAP_BREAK_ITERATOR_NULL dest, destCapacity, src, srcLength, ustrcase_internalFold, edits, errorCode); diff --git a/icu4c/source/common/ustrcase_locale.cpp b/icu4c/source/common/ustrcase_locale.cpp index b71f5dcad53..7cf9301736e 100644 --- a/icu4c/source/common/ustrcase_locale.cpp +++ b/icu4c/source/common/ustrcase_locale.cpp @@ -26,63 +26,18 @@ #include "ucase.h" #include "ustr_imp.h" -U_NAMESPACE_BEGIN - -// TODO: new casemap_locale.cpp - -CaseMap::CaseMap(const Locale &loc, int32_t caseLoc, uint32_t opts, UErrorCode & /*errorCode*/) : - caseLocale(caseLoc), options(opts), locale(loc) -#if !UCONFIG_NO_BREAK_ITERATION - , iter(NULL) -#endif - { - if (caseLoc == 0) { // UCASE_LOC_UNKNOWN - setCaseLocale(locale.getBaseName()); +U_CFUNC int32_t +ustrcase_getCaseLocale(const char *locale) { + if (locale == NULL) { + locale = uloc_getDefault(); } -} - -CaseMap::CaseMap(const Locale &locale, uint32_t options, UErrorCode &errorCode) : - CaseMap(locale, /* UCASE_LOC_UNKNOWN = */ 0, options, errorCode) {} - -// small optimization for localeID=="", a little slower otherwise -CaseMap::CaseMap(const char *localeID, uint32_t options, UErrorCode &errorCode) : - CaseMap(Locale::getRoot(), /* UCASE_LOC_ROOT = */ 1, options, errorCode) { - if (localeID == NULL || *localeID != 0) { - setLocale(localeID, errorCode); // not root - } -} - -CaseMap::~CaseMap() { -#if !UCONFIG_NO_BREAK_ITERATION - delete iter; -#endif -} - -void CaseMap::setCaseLocale(const char *localeID) { - U_ASSERT(localeID != NULL); - caseLocale = UCASE_LOC_UNKNOWN; - ucase_getCaseLocale(localeID, &caseLocale); -} - -void CaseMap::setLocale(const char *localeID, UErrorCode &errorCode) { - if (U_FAILURE(errorCode)) { return; } - if (localeID == NULL) { - locale = Locale::getDefault(); - localeID = locale.getBaseName(); + if (*locale == 0) { + return UCASE_LOC_ROOT; } else { - locale = Locale(localeID); - if (locale.isBogus()) { - errorCode = U_MEMORY_ALLOCATION_ERROR; - localeID = ""; - } + return ucase_getCaseLocale(locale, NULL); } - setCaseLocale(localeID); } -U_NAMESPACE_END - -U_NAMESPACE_USE - /* public API functions */ U_CAPI int32_t U_EXPORT2 @@ -90,9 +45,8 @@ u_strToLower(UChar *dest, int32_t destCapacity, const UChar *src, int32_t srcLength, const char *locale, UErrorCode *pErrorCode) { - CaseMap csm(locale, 0, *pErrorCode); return ustrcase_mapWithOverlap( - csm, UCASEMAP_BREAK_ITERATOR_NULL + ustrcase_getCaseLocale(locale), 0, UCASEMAP_BREAK_ITERATOR_NULL dest, destCapacity, src, srcLength, ustrcase_internalToLower, *pErrorCode); @@ -103,10 +57,37 @@ u_strToUpper(UChar *dest, int32_t destCapacity, const UChar *src, int32_t srcLength, const char *locale, UErrorCode *pErrorCode) { - CaseMap csm(locale, 0, *pErrorCode); return ustrcase_mapWithOverlap( - csm, UCASEMAP_BREAK_ITERATOR_NULL + ustrcase_getCaseLocale(locale), 0, UCASEMAP_BREAK_ITERATOR_NULL dest, destCapacity, src, srcLength, ustrcase_internalToUpper, *pErrorCode); } + +U_NAMESPACE_BEGIN + +int32_t CaseMap::toLower( + const char *locale, uint32_t options, + const UChar *src, int32_t srcLength, + UChar *dest, int32_t destCapacity, Edits *edits, + UErrorCode &errorCode) { + return ustrcase_map( + ustrcase_getCaseLocale(locale), options, UCASEMAP_BREAK_ITERATOR_NULL + dest, destCapacity, + src, srcLength, + ustrcase_internalToLower, edits, errorCode); +} + +int32_t CaseMap::toUpper( + const char *locale, uint32_t options, + const UChar *src, int32_t srcLength, + UChar *dest, int32_t destCapacity, Edits *edits, + UErrorCode &errorCode) { + return ustrcase_map( + ustrcase_getCaseLocale(locale), options, UCASEMAP_BREAK_ITERATOR_NULL + dest, destCapacity, + src, srcLength, + ustrcase_internalToUpper, edits, errorCode); +} + +U_NAMESPACE_END