mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-07 06:25:30 +00:00
ICU-12410 make CaseMap functions static; change the function parameter order to (config, input, output); remove Edits::setWriteUnchanged(), writeUnchanged(), omitUnchanged() and add UCASEMAP_OMIT_UNCHANGED_TEXT options bit; case mapping functions call edits->reset() at start
X-SVN-Rev: 39585
This commit is contained in:
parent
fbd1e089fd
commit
8a55e577e5
11 changed files with 309 additions and 406 deletions
|
@ -37,16 +37,28 @@
|
|||
|
||||
U_NAMESPACE_USE
|
||||
|
||||
using icu::internal::CaseMapFriend;
|
||||
|
||||
/* UCaseMap service object -------------------------------------------------- */
|
||||
|
||||
UCaseMap::UCaseMap(const char *localeID, uint32_t opts, UErrorCode *pErrorCode) :
|
||||
#if !UCONFIG_NO_BREAK_ITERATION
|
||||
iter(NULL),
|
||||
#endif
|
||||
locCache(UCASE_LOC_UNKNOWN), options(opts) {
|
||||
ucasemap_setLocale(this, localeID, pErrorCode);
|
||||
}
|
||||
|
||||
UCaseMap::~UCaseMap() {
|
||||
#if !UCONFIG_NO_BREAK_ITERATION
|
||||
delete iter;
|
||||
#endif
|
||||
}
|
||||
|
||||
U_CAPI UCaseMap * U_EXPORT2
|
||||
ucasemap_open(const char *locale, uint32_t options, UErrorCode *pErrorCode) {
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
return NULL;
|
||||
}
|
||||
CaseMap *csm = new CaseMap(locale, options, *pErrorCode);
|
||||
UCaseMap *csm = new UCaseMap(locale, options, pErrorCode);
|
||||
if(csm==NULL) {
|
||||
*pErrorCode = U_MEMORY_ALLOCATION_ERROR;
|
||||
return NULL;
|
||||
|
@ -54,24 +66,22 @@ ucasemap_open(const char *locale, uint32_t options, UErrorCode *pErrorCode) {
|
|||
delete csm;
|
||||
return NULL;
|
||||
}
|
||||
return CaseMapFriend::toUCaseMap(*csm);
|
||||
return csm;
|
||||
}
|
||||
|
||||
U_CAPI void U_EXPORT2
|
||||
ucasemap_close(UCaseMap *csm) {
|
||||
if(csm!=NULL) {
|
||||
delete CaseMapFriend::fromUCaseMap(csm);
|
||||
}
|
||||
delete csm;
|
||||
}
|
||||
|
||||
U_CAPI const char * U_EXPORT2
|
||||
ucasemap_getLocale(const UCaseMap *csm) {
|
||||
return CaseMapFriend::localeID(*CaseMapFriend::fromUCaseMap(csm));
|
||||
return csm->locale;
|
||||
}
|
||||
|
||||
U_CAPI uint32_t U_EXPORT2
|
||||
ucasemap_getOptions(const UCaseMap *csm) {
|
||||
return CaseMapFriend::options(*CaseMapFriend::fromUCaseMap(csm));
|
||||
return csm->options;
|
||||
}
|
||||
|
||||
U_CAPI void U_EXPORT2
|
||||
|
@ -79,7 +89,28 @@ ucasemap_setLocale(UCaseMap *csm, const char *locale, UErrorCode *pErrorCode) {
|
|||
if(U_FAILURE(*pErrorCode)) {
|
||||
return;
|
||||
}
|
||||
CaseMapFriend::setLocale(*CaseMapFriend::fromUCaseMap(csm), locale, *pErrorCode);
|
||||
if (locale != NULL && *locale == 0) {
|
||||
csm->locale[0] = 0;
|
||||
csm->locCache = UCASE_LOC_ROOT;
|
||||
return;
|
||||
}
|
||||
|
||||
int32_t length=uloc_getName(locale, csm->locale, (int32_t)sizeof(csm->locale), pErrorCode);
|
||||
if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR || length==sizeof(csm->locale)) {
|
||||
*pErrorCode=U_ZERO_ERROR;
|
||||
/* we only really need the language code for case mappings */
|
||||
length=uloc_getLanguage(locale, csm->locale, (int32_t)sizeof(csm->locale), pErrorCode);
|
||||
}
|
||||
if(length==sizeof(csm->locale)) {
|
||||
*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
|
||||
}
|
||||
if(U_SUCCESS(*pErrorCode)) {
|
||||
csm->locCache=UCASE_LOC_UNKNOWN;
|
||||
ucase_getCaseLocale(csm->locale, &csm->locCache);
|
||||
} else {
|
||||
csm->locale[0]=0;
|
||||
csm->locCache = UCASE_LOC_ROOT;
|
||||
}
|
||||
}
|
||||
|
||||
U_CAPI void U_EXPORT2
|
||||
|
@ -87,7 +118,7 @@ ucasemap_setOptions(UCaseMap *csm, uint32_t options, UErrorCode *pErrorCode) {
|
|||
if(U_FAILURE(*pErrorCode)) {
|
||||
return;
|
||||
}
|
||||
CaseMapFriend::setOptions(*CaseMapFriend::fromUCaseMap(csm), options);
|
||||
csm->options=options;
|
||||
}
|
||||
|
||||
/* UTF-8 string case mappings ----------------------------------------------- */
|
||||
|
@ -233,7 +264,7 @@ utf8_caseContextIterator(void *context, int8_t dir) {
|
|||
* context [0..srcLength[ into account.
|
||||
*/
|
||||
static int32_t
|
||||
_caseMap(const CaseMap &csm, UCaseMapFull *map,
|
||||
_caseMap(int32_t caseLocale, uint32_t /* TODO: options */, UCaseMapFull *map,
|
||||
uint8_t *dest, int32_t destCapacity,
|
||||
const uint8_t *src, UCaseContext *csc,
|
||||
int32_t srcStart, int32_t srcLimit,
|
||||
|
@ -241,9 +272,6 @@ _caseMap(const CaseMap &csm, UCaseMapFull *map,
|
|||
const UChar *s = NULL;
|
||||
UChar32 c, c2 = 0;
|
||||
int32_t srcIndex, destIndex;
|
||||
int32_t locCache;
|
||||
|
||||
locCache = CaseMapFriend::caseLocale(csm);
|
||||
|
||||
/* case mapping loop */
|
||||
srcIndex=srcStart;
|
||||
|
@ -261,7 +289,7 @@ _caseMap(const CaseMap &csm, UCaseMapFull *map,
|
|||
}
|
||||
continue;
|
||||
}
|
||||
c=map(NULL, c, utf8_caseContextIterator, csc, &s, NULL, &locCache);
|
||||
c=map(NULL, c, utf8_caseContextIterator, csc, &s, NULL, &caseLocale);
|
||||
if((destIndex<destCapacity) && (c<0 ? (c2=~c)<=0x7f : UCASE_MAX_STRING_LENGTH<c && (c2=c)<=0x7f)) {
|
||||
/* fast path version of appendResult() for ASCII results */
|
||||
dest[destIndex++]=(uint8_t)c2;
|
||||
|
@ -283,10 +311,11 @@ _caseMap(const CaseMap &csm, UCaseMapFull *map,
|
|||
#if !UCONFIG_NO_BREAK_ITERATION
|
||||
|
||||
U_CFUNC int32_t U_CALLCONV
|
||||
ucasemap_internalUTF8ToTitle(const CaseMap &csm, BreakIterator *iter,
|
||||
uint8_t *dest, int32_t destCapacity,
|
||||
const uint8_t *src, int32_t srcLength,
|
||||
UErrorCode *pErrorCode) {
|
||||
ucasemap_internalUTF8ToTitle(
|
||||
int32_t caseLocale, uint32_t options, BreakIterator *iter,
|
||||
uint8_t *dest, int32_t destCapacity,
|
||||
const uint8_t *src, int32_t srcLength,
|
||||
UErrorCode *pErrorCode) {
|
||||
const UChar *s;
|
||||
UChar32 c;
|
||||
int32_t prev, titleStart, titleLimit, idx, destIndex;
|
||||
|
@ -297,7 +326,6 @@ ucasemap_internalUTF8ToTitle(const CaseMap &csm, BreakIterator *iter,
|
|||
}
|
||||
|
||||
/* set up local variables */
|
||||
int32_t locCache=CaseMapFriend::caseLocale(csm);
|
||||
UCaseContext csc=UCASECONTEXT_INITIALIZER;
|
||||
csc.p=(void *)src;
|
||||
csc.limit=srcLength;
|
||||
|
@ -335,7 +363,7 @@ ucasemap_internalUTF8ToTitle(const CaseMap &csm, BreakIterator *iter,
|
|||
/* find and copy uncased characters [prev..titleStart[ */
|
||||
titleStart=titleLimit=prev;
|
||||
U8_NEXT(src, titleLimit, idx, c);
|
||||
if((CaseMapFriend::options(csm)&U_TITLECASE_NO_BREAK_ADJUSTMENT)==0 && UCASE_NONE==ucase_getType(NULL, c)) {
|
||||
if((options&U_TITLECASE_NO_BREAK_ADJUSTMENT)==0 && UCASE_NONE==ucase_getType(NULL, c)) {
|
||||
/* Adjust the titlecasing index (titleStart) to the next cased character. */
|
||||
for(;;) {
|
||||
titleStart=titleLimit;
|
||||
|
@ -363,7 +391,7 @@ ucasemap_internalUTF8ToTitle(const CaseMap &csm, BreakIterator *iter,
|
|||
if(c>=0) {
|
||||
csc.cpStart=titleStart;
|
||||
csc.cpLimit=titleLimit;
|
||||
c=ucase_toFullTitle(NULL, c, utf8_caseContextIterator, &csc, &s, NULL, &locCache);
|
||||
c=ucase_toFullTitle(NULL, c, utf8_caseContextIterator, &csc, &s, NULL, &caseLocale);
|
||||
destIndex=appendResult(dest, destIndex, destCapacity, c, s);
|
||||
} else {
|
||||
// Malformed UTF-8.
|
||||
|
@ -376,7 +404,7 @@ ucasemap_internalUTF8ToTitle(const CaseMap &csm, BreakIterator *iter,
|
|||
|
||||
/* Special case Dutch IJ titlecasing */
|
||||
if (titleStart+1 < idx &&
|
||||
locCache == UCASE_LOC_DUTCH &&
|
||||
caseLocale == UCASE_LOC_DUTCH &&
|
||||
(src[titleStart] == 0x0049 || src[titleStart] == 0x0069) &&
|
||||
(src[titleStart+1] == 0x004A || src[titleStart+1] == 0x006A)) {
|
||||
destIndex=appendUChar(dest, destIndex, destCapacity, 0x004A);
|
||||
|
@ -384,11 +412,11 @@ ucasemap_internalUTF8ToTitle(const CaseMap &csm, BreakIterator *iter,
|
|||
}
|
||||
/* lowercase [titleLimit..index[ */
|
||||
if(titleLimit<idx) {
|
||||
if((CaseMapFriend::options(csm)&U_TITLECASE_NO_LOWERCASE)==0) {
|
||||
if((options&U_TITLECASE_NO_LOWERCASE)==0) {
|
||||
/* Normal operation: Lowercase the rest of the word. */
|
||||
destIndex+=
|
||||
_caseMap(
|
||||
csm, ucase_toFullLower,
|
||||
caseLocale, options, ucase_toFullLower,
|
||||
dest+destIndex, destCapacity-destIndex,
|
||||
src, &csc,
|
||||
titleLimit, idx,
|
||||
|
@ -442,11 +470,10 @@ UBool isFollowedByCasedLetter(const UCaseProps *csp, const uint8_t *s, int32_t i
|
|||
}
|
||||
|
||||
// Keep this consistent with the UTF-16 version in ustrcase.cpp and the Java version in CaseMap.java.
|
||||
int32_t toUpper(const CaseMap & /* unused csm */,
|
||||
int32_t toUpper(int32_t caseLocale, uint32_t /* TODO: options */,
|
||||
uint8_t *dest, int32_t destCapacity,
|
||||
const uint8_t *src, int32_t srcLength,
|
||||
UErrorCode *pErrorCode) {
|
||||
int32_t locCache = UCASE_LOC_GREEK;
|
||||
int32_t destIndex=0;
|
||||
uint32_t state = 0;
|
||||
for (int32_t i = 0; i < srcLength;) {
|
||||
|
@ -540,7 +567,7 @@ int32_t toUpper(const CaseMap & /* unused csm */,
|
|||
} else if(c>=0) {
|
||||
const UChar *s;
|
||||
UChar32 c2 = 0;
|
||||
c=ucase_toFullUpper(NULL, c, NULL, NULL, &s, NULL, &locCache);
|
||||
c=ucase_toFullUpper(NULL, c, NULL, NULL, &s, NULL, &caseLocale);
|
||||
if((destIndex<destCapacity) && (c<0 ? (c2=~c)<=0x7f : UCASE_MAX_STRING_LENGTH<c && (c2=c)<=0x7f)) {
|
||||
/* fast path version of appendResult() for ASCII results */
|
||||
dest[destIndex++]=(uint8_t)c2;
|
||||
|
@ -573,7 +600,7 @@ int32_t toUpper(const CaseMap & /* unused csm */,
|
|||
U_NAMESPACE_END
|
||||
|
||||
static int32_t U_CALLCONV
|
||||
ucasemap_internalUTF8ToLower(const CaseMap &csm, UCASEMAP_BREAK_ITERATOR_UNUSED
|
||||
ucasemap_internalUTF8ToLower(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_UNUSED
|
||||
uint8_t *dest, int32_t destCapacity,
|
||||
const uint8_t *src, int32_t srcLength,
|
||||
UErrorCode *pErrorCode) {
|
||||
|
@ -581,33 +608,32 @@ ucasemap_internalUTF8ToLower(const CaseMap &csm, UCASEMAP_BREAK_ITERATOR_UNUSED
|
|||
csc.p=(void *)src;
|
||||
csc.limit=srcLength;
|
||||
return _caseMap(
|
||||
csm, ucase_toFullLower,
|
||||
caseLocale, options, ucase_toFullLower,
|
||||
dest, destCapacity,
|
||||
src, &csc, 0, srcLength,
|
||||
pErrorCode);
|
||||
}
|
||||
|
||||
static int32_t U_CALLCONV
|
||||
ucasemap_internalUTF8ToUpper(const CaseMap &csm, UCASEMAP_BREAK_ITERATOR_UNUSED
|
||||
ucasemap_internalUTF8ToUpper(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_UNUSED
|
||||
uint8_t *dest, int32_t destCapacity,
|
||||
const uint8_t *src, int32_t srcLength,
|
||||
UErrorCode *pErrorCode) {
|
||||
int32_t locCache = CaseMapFriend::caseLocale(csm);
|
||||
if (locCache == UCASE_LOC_GREEK) {
|
||||
return GreekUpper::toUpper(csm, dest, destCapacity, src, srcLength, pErrorCode);
|
||||
if (caseLocale == UCASE_LOC_GREEK) {
|
||||
return GreekUpper::toUpper(caseLocale, options, dest, destCapacity, src, srcLength, pErrorCode);
|
||||
}
|
||||
UCaseContext csc=UCASECONTEXT_INITIALIZER;
|
||||
csc.p=(void *)src;
|
||||
csc.limit=srcLength;
|
||||
return _caseMap(
|
||||
csm, ucase_toFullUpper,
|
||||
caseLocale, options, ucase_toFullUpper,
|
||||
dest, destCapacity,
|
||||
src, &csc, 0, srcLength,
|
||||
pErrorCode);
|
||||
}
|
||||
|
||||
static int32_t U_CALLCONV
|
||||
ucasemap_internalUTF8Fold(const CaseMap &csm, UCASEMAP_BREAK_ITERATOR_UNUSED
|
||||
ucasemap_internalUTF8Fold(int32_t /* caseLocale */, uint32_t options, UCASEMAP_BREAK_ITERATOR_UNUSED
|
||||
uint8_t *dest, int32_t destCapacity,
|
||||
const uint8_t *src, int32_t srcLength,
|
||||
UErrorCode *pErrorCode) {
|
||||
|
@ -631,7 +657,7 @@ ucasemap_internalUTF8Fold(const CaseMap &csm, UCASEMAP_BREAK_ITERATOR_UNUSED
|
|||
}
|
||||
continue;
|
||||
}
|
||||
c=ucase_toFullFolding(NULL, c, &s, CaseMapFriend::options(csm));
|
||||
c=ucase_toFullFolding(NULL, c, &s, options);
|
||||
if((destIndex<destCapacity) && (c<0 ? (c2=~c)<=0x7f : UCASE_MAX_STRING_LENGTH<c && (c2=c)<=0x7f)) {
|
||||
/* fast path version of appendResult() for ASCII results */
|
||||
dest[destIndex++]=(uint8_t)c2;
|
||||
|
@ -651,7 +677,7 @@ ucasemap_internalUTF8Fold(const CaseMap &csm, UCASEMAP_BREAK_ITERATOR_UNUSED
|
|||
}
|
||||
|
||||
U_CFUNC int32_t
|
||||
ucasemap_mapUTF8(const CaseMap &csm, UCASEMAP_BREAK_ITERATOR_PARAM
|
||||
ucasemap_mapUTF8(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM
|
||||
uint8_t *dest, int32_t destCapacity,
|
||||
const uint8_t *src, int32_t srcLength,
|
||||
UTF8CaseMapper *stringCaseMapper,
|
||||
|
@ -685,7 +711,7 @@ ucasemap_mapUTF8(const CaseMap &csm, UCASEMAP_BREAK_ITERATOR_PARAM
|
|||
return 0;
|
||||
}
|
||||
|
||||
destLength=stringCaseMapper(csm, UCASEMAP_BREAK_ITERATOR
|
||||
destLength=stringCaseMapper(caseLocale, options, UCASEMAP_BREAK_ITERATOR
|
||||
dest, destCapacity, src, srcLength, pErrorCode);
|
||||
return u_terminateChars((char *)dest, destCapacity, destLength, pErrorCode);
|
||||
}
|
||||
|
@ -698,7 +724,7 @@ ucasemap_utf8ToLower(const UCaseMap *csm,
|
|||
const char *src, int32_t srcLength,
|
||||
UErrorCode *pErrorCode) {
|
||||
return ucasemap_mapUTF8(
|
||||
*CaseMapFriend::fromUCaseMap(csm), UCASEMAP_BREAK_ITERATOR_NULL
|
||||
csm->locCache, csm->options, UCASEMAP_BREAK_ITERATOR_NULL
|
||||
(uint8_t *)dest, destCapacity,
|
||||
(const uint8_t *)src, srcLength,
|
||||
ucasemap_internalUTF8ToLower, pErrorCode);
|
||||
|
@ -710,7 +736,7 @@ ucasemap_utf8ToUpper(const UCaseMap *csm,
|
|||
const char *src, int32_t srcLength,
|
||||
UErrorCode *pErrorCode) {
|
||||
return ucasemap_mapUTF8(
|
||||
*CaseMapFriend::fromUCaseMap(csm), UCASEMAP_BREAK_ITERATOR_NULL
|
||||
csm->locCache, csm->options, UCASEMAP_BREAK_ITERATOR_NULL
|
||||
(uint8_t *)dest, destCapacity,
|
||||
(const uint8_t *)src, srcLength,
|
||||
ucasemap_internalUTF8ToUpper, pErrorCode);
|
||||
|
@ -722,7 +748,7 @@ ucasemap_utf8FoldCase(const UCaseMap *csm,
|
|||
const char *src, int32_t srcLength,
|
||||
UErrorCode *pErrorCode) {
|
||||
return ucasemap_mapUTF8(
|
||||
*CaseMapFriend::fromUCaseMap(csm), UCASEMAP_BREAK_ITERATOR_NULL
|
||||
UCASE_LOC_ROOT, csm->options, UCASEMAP_BREAK_ITERATOR_NULL
|
||||
(uint8_t *)dest, destCapacity,
|
||||
(const uint8_t *)src, srcLength,
|
||||
ucasemap_internalUTF8Fold, pErrorCode);
|
||||
|
|
|
@ -30,12 +30,9 @@
|
|||
|
||||
U_NAMESPACE_USE
|
||||
|
||||
using icu::internal::CaseMapFriend;
|
||||
|
||||
U_CAPI const UBreakIterator * U_EXPORT2
|
||||
ucasemap_getBreakIterator(const UCaseMap *csm) {
|
||||
return reinterpret_cast<const UBreakIterator *>(
|
||||
CaseMapFriend::iter(*CaseMapFriend::fromUCaseMap(csm)));
|
||||
return reinterpret_cast<UBreakIterator *>(csm->iter);
|
||||
}
|
||||
|
||||
U_CAPI void U_EXPORT2
|
||||
|
@ -43,31 +40,29 @@ ucasemap_setBreakIterator(UCaseMap *csm, UBreakIterator *iterToAdopt, UErrorCode
|
|||
if(U_FAILURE(*pErrorCode)) {
|
||||
return;
|
||||
}
|
||||
CaseMapFriend::adoptIter(*CaseMapFriend::fromUCaseMap(csm),
|
||||
reinterpret_cast<BreakIterator *>(iterToAdopt));
|
||||
delete csm->iter;
|
||||
csm->iter=reinterpret_cast<BreakIterator *>(iterToAdopt);
|
||||
}
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
ucasemap_utf8ToTitle(UCaseMap *ucsm,
|
||||
ucasemap_utf8ToTitle(UCaseMap *csm,
|
||||
char *dest, int32_t destCapacity,
|
||||
const char *src, int32_t srcLength,
|
||||
UErrorCode *pErrorCode) {
|
||||
if (U_FAILURE(*pErrorCode)) {
|
||||
return 0;
|
||||
}
|
||||
CaseMap &csm = *CaseMapFriend::fromUCaseMap(ucsm);
|
||||
UText utext=UTEXT_INITIALIZER;
|
||||
utext_openUTF8(&utext, (const char *)src, srcLength, pErrorCode);
|
||||
if (CaseMapFriend::iter(csm) == NULL) {
|
||||
CaseMapFriend::adoptIter(
|
||||
csm, BreakIterator::createWordInstance(CaseMapFriend::locale(csm), *pErrorCode));
|
||||
if(csm->iter==NULL) {
|
||||
csm->iter=BreakIterator::createWordInstance(Locale(csm->locale), *pErrorCode);
|
||||
}
|
||||
if (U_FAILURE(*pErrorCode)) {
|
||||
return 0;
|
||||
}
|
||||
CaseMapFriend::mutableIter(csm)->setText(&utext, *pErrorCode);
|
||||
int32_t length=ucasemap_mapUTF8(csm,
|
||||
CaseMapFriend::mutableIter(csm),
|
||||
csm->iter->setText(&utext, *pErrorCode);
|
||||
int32_t length=ucasemap_mapUTF8(
|
||||
csm->locCache, csm->options, csm->iter,
|
||||
(uint8_t *)dest, destCapacity,
|
||||
(const uint8_t *)src, srcLength,
|
||||
ucasemap_internalUTF8ToTitle, pErrorCode);
|
||||
|
|
|
@ -25,7 +25,6 @@
|
|||
#include "unicode/localpointer.h"
|
||||
|
||||
#if U_SHOW_CPLUSPLUS_API
|
||||
#include "unicode/locid.h"
|
||||
#include "unicode/uobject.h"
|
||||
#endif // U_SHOW_CPLUSPLUS_API
|
||||
|
||||
|
@ -111,8 +110,8 @@ U_DEFINE_LOCAL_OPEN_POINTER(LocalUCaseMapPointer, UCaseMap, ucasemap_close);
|
|||
* Supports replacements, insertions, deletions in linear progression.
|
||||
* Does not support moving/reordering of text.
|
||||
*
|
||||
* An Edits object tracks a separate UErrorCode, but ICU case mapping functions
|
||||
* merge any such errors into their API's UErrorCode.
|
||||
* An Edits object tracks a separate UErrorCode, but ICU string transformation functions
|
||||
* (e.g., case mapping functions) merge any such errors into their API's UErrorCode.
|
||||
*
|
||||
* @draft ICU 59
|
||||
*/
|
||||
|
@ -124,7 +123,11 @@ public:
|
|||
*/
|
||||
Edits() :
|
||||
array(stackArray), capacity(STACK_CAPACITY), length(0), delta(0),
|
||||
omit(FALSE), errorCode(U_ZERO_ERROR) {}
|
||||
errorCode(U_ZERO_ERROR) {}
|
||||
/**
|
||||
* Destructor.
|
||||
* @draft ICU 59
|
||||
*/
|
||||
~Edits();
|
||||
|
||||
/**
|
||||
|
@ -133,48 +136,22 @@ public:
|
|||
*/
|
||||
void reset();
|
||||
|
||||
/**
|
||||
* Controls whether the case mapping function is to write or omit
|
||||
* characters that do not change.
|
||||
* The complete result can be computed by applying just the changes
|
||||
* to the original string.
|
||||
* @see omitUnchanged
|
||||
* @see writeUnchanged
|
||||
* @draft ICU 59
|
||||
*/
|
||||
Edits &setWriteUnchanged(UBool write) {
|
||||
omit = !write;
|
||||
return *this;
|
||||
}
|
||||
/**
|
||||
* @return TRUE if the case mapping function is to omit characters that do not change.
|
||||
* @see setWriteUnchanged
|
||||
* @draft ICU 59
|
||||
*/
|
||||
UBool omitUnchanged() const { return omit; }
|
||||
/**
|
||||
* @return TRUE if the case mapping function is to write characters that do not change.
|
||||
* @see setWriteUnchanged
|
||||
* @draft ICU 59
|
||||
*/
|
||||
UBool writeUnchanged() const { return !omit; }
|
||||
|
||||
/**
|
||||
* Adds a record for an unchanged segment of text.
|
||||
* Normally called from inside ICU case mapping functions, not user code.
|
||||
* Normally called from inside ICU string transformation functions, not user code.
|
||||
* @draft ICU 59
|
||||
*/
|
||||
void addUnchanged(int32_t unchangedLength);
|
||||
/**
|
||||
* Adds a record for a text replacement/insertion/deletion.
|
||||
* Normally called from inside ICU case mapping functions, not user code.
|
||||
* Normally called from inside ICU string transformation functions, not user code.
|
||||
* @draft ICU 59
|
||||
*/
|
||||
void addReplace(int32_t oldLength, int32_t newLength);
|
||||
/**
|
||||
* Sets the UErrorCode if an error occurred while recording edits.
|
||||
* Preserves older error codes in the outErrorCode.
|
||||
* Normally called from inside ICU case mapping functions, not user code.
|
||||
* Normally called from inside ICU string transformation functions, not user code.
|
||||
* @return TRUE if U_FAILURE(outErrorCode)
|
||||
* @draft ICU 59
|
||||
*/
|
||||
|
@ -199,6 +176,17 @@ public:
|
|||
* @draft ICU 59
|
||||
*/
|
||||
struct Iterator final : public UMemory {
|
||||
/**
|
||||
* Copy constructor.
|
||||
* @draft ICU 59
|
||||
*/
|
||||
Iterator(const Iterator &other) = default;
|
||||
/**
|
||||
* Assignment operator.
|
||||
* @draft ICU 59
|
||||
*/
|
||||
Iterator &operator=(const Iterator &other) = default;
|
||||
|
||||
/**
|
||||
* Advances to the next edit.
|
||||
* @return TRUE if there is another edit
|
||||
|
@ -332,59 +320,36 @@ private:
|
|||
int32_t capacity;
|
||||
int32_t length;
|
||||
int32_t delta;
|
||||
UBool omit;
|
||||
UErrorCode errorCode;
|
||||
uint16_t stackArray[STACK_CAPACITY];
|
||||
};
|
||||
|
||||
namespace internal {
|
||||
/** @internal ICU implementation detail */
|
||||
class CaseMapFriend;
|
||||
} // namespace internal
|
||||
|
||||
/**
|
||||
* Low-level C++ case mapping functions.
|
||||
*
|
||||
* @draft ICU 59
|
||||
*/
|
||||
class U_COMMON_API CaseMap final : public UMemory {
|
||||
public:
|
||||
/**
|
||||
* Constructor for the root locale and options.
|
||||
* Explicitly construct with Locale::getDefault() for the default locale.
|
||||
* @draft ICU 59
|
||||
*/
|
||||
inline CaseMap(uint32_t options, UErrorCode &errorCode);
|
||||
/**
|
||||
* Constructor for locale and options.
|
||||
* @draft ICU 59
|
||||
*/
|
||||
CaseMap(const Locale &locale, uint32_t options, UErrorCode &errorCode);
|
||||
/**
|
||||
* Constructor for locale ID and options.
|
||||
* @draft ICU 59
|
||||
*/
|
||||
CaseMap(const char *localeID, uint32_t options, UErrorCode &errorCode);
|
||||
|
||||
/**
|
||||
* Destructor.
|
||||
* @draft ICU 59
|
||||
*/
|
||||
~CaseMap();
|
||||
|
||||
// TODO: reverse src & dest? C vs. C++ conventions
|
||||
|
||||
/**
|
||||
* Lowercases the characters in a UTF-16 string and optionally records edits.
|
||||
* Lowercases a UTF-16 string and optionally records edits.
|
||||
* Casing is locale-dependent and context-sensitive.
|
||||
* The result may be longer or shorter than the original.
|
||||
* The source string and the destination buffer must not overlap.
|
||||
*
|
||||
* @param locale The locale ID. ("" = root locale, NULL = default locale.)
|
||||
* @param options Options bit set, usually 0. See UCASEMAP_OMIT_UNCHANGED_TEXT.
|
||||
* @param src The original string.
|
||||
* @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
|
||||
* @param dest A buffer for the result string. The result will be NUL-terminated if
|
||||
* the buffer is large enough.
|
||||
* The contents is undefined in case of failure.
|
||||
* @param destCapacity The size of the buffer (number of bytes). If it is 0, then
|
||||
* dest may be NULL and the function will only return the length of the result
|
||||
* without writing any of the result string.
|
||||
* @param src The original string.
|
||||
* @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
|
||||
* @param edits Records edits for index mapping, working with styled text,
|
||||
* and getting only changes (if any). Can be NULL.
|
||||
* and getting only changes (if any).
|
||||
* This function calls edits->reset() first. edits can be NULL.
|
||||
* @param errorCode Reference to an in/out error code value
|
||||
* which must not indicate a failure before the function call.
|
||||
* @return The length of the result string, if successful - or in case of a buffer overflow,
|
||||
|
@ -393,39 +358,44 @@ public:
|
|||
* @see u_strToLower
|
||||
* @draft ICU 59
|
||||
*/
|
||||
int32_t toLower(UChar *dest, int32_t destCapacity,
|
||||
const UChar *src, int32_t srcLength,
|
||||
Edits *edits,
|
||||
UErrorCode &errorCode) const;
|
||||
static int32_t toLower(
|
||||
const char *locale, uint32_t options,
|
||||
const UChar *src, int32_t srcLength,
|
||||
UChar *dest, int32_t destCapacity, Edits *edits,
|
||||
UErrorCode &errorCode);
|
||||
|
||||
/**
|
||||
* Uppercases the characters in a UTF-16 string and optionally records edits.
|
||||
* Uppercases a UTF-16 string and optionally records edits.
|
||||
* Casing is locale-dependent and context-sensitive.
|
||||
* The result may be longer or shorter than the original.
|
||||
* The source string and the destination buffer must not overlap.
|
||||
*
|
||||
* @param locale The locale ID. ("" = root locale, NULL = default locale.)
|
||||
* @param options Options bit set, usually 0. See UCASEMAP_OMIT_UNCHANGED_TEXT.
|
||||
* @param src The original string.
|
||||
* @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
|
||||
* @param dest A buffer for the result string. The result will be NUL-terminated if
|
||||
* the buffer is large enough.
|
||||
* The contents is undefined in case of failure.
|
||||
* @param destCapacity The size of the buffer (number of bytes). If it is 0, then
|
||||
* dest may be NULL and the function will only return the length of the result
|
||||
* without writing any of the result string.
|
||||
* @param src The original string.
|
||||
* @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
|
||||
* @param edits Records edits for index mapping, working with styled text,
|
||||
* and getting only changes (if any). Can be NULL.
|
||||
* and getting only changes (if any).
|
||||
* This function calls edits->reset() first. edits can be NULL.
|
||||
* @param errorCode Reference to an in/out error code value
|
||||
* which must not indicate a failure before the function call.
|
||||
* @return The length of the result string, if successful - or in case of a buffer overflow,
|
||||
* in which case it will be greater than destCapacity.
|
||||
*
|
||||
* @see u_strToLower
|
||||
* @see u_strToUpper
|
||||
* @draft ICU 59
|
||||
*/
|
||||
int32_t toUpper(UChar *dest, int32_t destCapacity,
|
||||
const UChar *src, int32_t srcLength,
|
||||
Edits *edits,
|
||||
UErrorCode &errorCode) const;
|
||||
static int32_t toUpper(
|
||||
const char *locale, uint32_t options,
|
||||
const UChar *src, int32_t srcLength,
|
||||
UChar *dest, int32_t destCapacity, Edits *edits,
|
||||
UErrorCode &errorCode);
|
||||
|
||||
#if !UCONFIG_NO_BREAK_ITERATION
|
||||
|
||||
|
@ -437,48 +407,46 @@ public:
|
|||
*
|
||||
* Titlecasing uses a break iterator to find the first characters of words
|
||||
* that are to be titlecased. It titlecases those characters and lowercases
|
||||
* all others. (This can be modified with ucasemap_setOptions().)
|
||||
*
|
||||
* The titlecase break iterator can be provided to customize for arbitrary
|
||||
* styles, using rules and dictionaries beyond the standard iterators.
|
||||
* The standard titlecase iterator for the root locale implements the
|
||||
* algorithm of Unicode TR 21.
|
||||
*
|
||||
* This function uses only the setText(), first() and next() methods of the
|
||||
* provided break iterator.
|
||||
* all others. (This can be modified with options bits.)
|
||||
*
|
||||
* @param locale The locale ID. ("" = root locale, NULL = default locale.)
|
||||
* @param options Options bit set, usually 0. See UCASEMAP_OMIT_UNCHANGED_TEXT,
|
||||
* U_TITLECASE_NO_LOWERCASE, U_TITLECASE_NO_BREAK_ADJUSTMENT.
|
||||
* @param iter A break iterator to find the first characters of words that are to be titlecased.
|
||||
* It is set to the source string and used one or more times for iteration.
|
||||
* It is set to the source string (setText())
|
||||
* and used one or more times for iteration (first() and next()).
|
||||
* If NULL, then a word break iterator for the locale is used
|
||||
* (or something equivalent).
|
||||
* @param src The original string.
|
||||
* @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
|
||||
* @param dest A buffer for the result string. The result will be NUL-terminated if
|
||||
* the buffer is large enough.
|
||||
* The contents is undefined in case of failure.
|
||||
* @param destCapacity The size of the buffer (number of bytes). If it is 0, then
|
||||
* dest may be NULL and the function will only return the length of the result
|
||||
* without writing any of the result string.
|
||||
* @param src The original string.
|
||||
* @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
|
||||
* @param edits Records edits for index mapping, working with styled text,
|
||||
* and getting only changes (if any). Can be NULL.
|
||||
* and getting only changes (if any).
|
||||
* This function calls edits->reset() first. edits can be NULL.
|
||||
* @param errorCode Reference to an in/out error code value
|
||||
* which must not indicate a failure before the function call.
|
||||
* @return The length of the result string, if successful - or in case of a buffer overflow,
|
||||
* in which case it will be greater than destCapacity.
|
||||
*
|
||||
* @see u_strToTitle
|
||||
* @see ucasemap_toTitle
|
||||
* @draft ICU 59
|
||||
*/
|
||||
int32_t toTitle(BreakIterator *iter,
|
||||
UChar *dest, int32_t destCapacity,
|
||||
const UChar *src, int32_t srcLength,
|
||||
Edits *edits,
|
||||
UErrorCode &errorCode) const;
|
||||
static int32_t toTitle(
|
||||
const char *locale, uint32_t options, BreakIterator *iter,
|
||||
const UChar *src, int32_t srcLength,
|
||||
UChar *dest, int32_t destCapacity, Edits *edits,
|
||||
UErrorCode &errorCode);
|
||||
|
||||
#endif // UCONFIG_NO_BREAK_ITERATION
|
||||
|
||||
/**
|
||||
* Case-folds the characters in a UTF-16 string and optionally records edits.
|
||||
* Case-folds a UTF-16 string and optionally records edits.
|
||||
*
|
||||
* Case-folding is locale-independent and not context-sensitive,
|
||||
* but there is an option for whether to include or exclude mappings for dotted I
|
||||
|
@ -487,57 +455,45 @@ public:
|
|||
* The result may be longer or shorter than the original.
|
||||
* The source string and the destination buffer must not overlap.
|
||||
*
|
||||
* @param options Options bit set, usually 0. See UCASEMAP_OMIT_UNCHANGED_TEXT,
|
||||
* U_FOLD_CASE_DEFAULT, U_FOLD_CASE_EXCLUDE_SPECIAL_I.
|
||||
* @param src The original string.
|
||||
* @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
|
||||
* @param dest A buffer for the result string. The result will be NUL-terminated if
|
||||
* the buffer is large enough.
|
||||
* The contents is undefined in case of failure.
|
||||
* @param destCapacity The size of the buffer (number of bytes). If it is 0, then
|
||||
* dest may be NULL and the function will only return the length of the result
|
||||
* without writing any of the result string.
|
||||
* @param src The original string.
|
||||
* @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
|
||||
* @param edits Records edits for index mapping, working with styled text,
|
||||
* and getting only changes (if any). Can be NULL.
|
||||
* and getting only changes (if any).
|
||||
* This function calls edits->reset() first. edits can be NULL.
|
||||
* @param errorCode Reference to an in/out error code value
|
||||
* which must not indicate a failure before the function call.
|
||||
* @return The length of the result string, if successful - or in case of a buffer overflow,
|
||||
* in which case it will be greater than destCapacity.
|
||||
*
|
||||
* @see u_strFoldCase
|
||||
* @see ucasemap_setOptions
|
||||
* @see U_FOLD_CASE_DEFAULT
|
||||
* @see U_FOLD_CASE_EXCLUDE_SPECIAL_I
|
||||
* @draft ICU 59
|
||||
*/
|
||||
int32_t foldCase(UChar *dest, int32_t destCapacity,
|
||||
const UChar *src, int32_t srcLength,
|
||||
Edits *edits,
|
||||
UErrorCode &errorCode) const;
|
||||
static int32_t foldCase(
|
||||
uint32_t options,
|
||||
const UChar *src, int32_t srcLength,
|
||||
UChar *dest, int32_t destCapacity, Edits *edits,
|
||||
UErrorCode &errorCode);
|
||||
|
||||
private:
|
||||
friend class internal::CaseMapFriend;
|
||||
|
||||
CaseMap() = delete;
|
||||
CaseMap(const CaseMap &other) = delete;
|
||||
CaseMap &operator=(const CaseMap &other) = delete;
|
||||
|
||||
CaseMap(const Locale &loc, int32_t caseLoc, uint32_t opts, UErrorCode &errorCode);
|
||||
|
||||
void setCaseLocale(const char *localeID);
|
||||
void setLocale(const char *localeID, UErrorCode &errorCode);
|
||||
|
||||
int32_t caseLocale;
|
||||
uint32_t options;
|
||||
Locale locale;
|
||||
#if !UCONFIG_NO_BREAK_ITERATION
|
||||
BreakIterator *iter; // owned; only set by old C-style API
|
||||
#endif
|
||||
};
|
||||
|
||||
CaseMap::CaseMap(uint32_t opts, UErrorCode & /*errorCode*/) :
|
||||
caseLocale(/* UCASE_LOC_ROOT = */ 1), options(opts), locale(Locale::getRoot())
|
||||
#if !UCONFIG_NO_BREAK_ITERATION
|
||||
, iter(NULL)
|
||||
#endif
|
||||
{}
|
||||
/**
|
||||
* Omit unchanged text when case-mapping with Edits.
|
||||
*
|
||||
* @draft ICU 59
|
||||
*/
|
||||
#define UCASEMAP_OMIT_UNCHANGED_TEXT 0x4000
|
||||
|
||||
#endif // U_HIDE_DRAFT_API
|
||||
|
||||
|
|
|
@ -59,7 +59,6 @@ U_NAMESPACE_BEGIN
|
|||
#if !UCONFIG_NO_BREAK_ITERATION
|
||||
class BreakIterator; // unicode/brkiter.h
|
||||
#endif
|
||||
class CaseMap;
|
||||
class Edits;
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
@ -72,7 +71,7 @@ U_NAMESPACE_END
|
|||
* @internal
|
||||
*/
|
||||
typedef int32_t U_CALLCONV
|
||||
UStringCaseMapper(const icu::CaseMap &csm,
|
||||
UStringCaseMapper(int32_t caseLocale, uint32_t options,
|
||||
#if !UCONFIG_NO_BREAK_ITERATION
|
||||
icu::BreakIterator *iter,
|
||||
#endif
|
||||
|
@ -3596,7 +3595,7 @@ private:
|
|||
* as in ustr_imp.h for ustrcase_map().
|
||||
*/
|
||||
UnicodeString &
|
||||
caseMap(const CaseMap &csm,
|
||||
caseMap(int32_t caseLocale, uint32_t options,
|
||||
#if !UCONFIG_NO_BREAK_ITERATION
|
||||
BreakIterator *iter,
|
||||
#endif
|
||||
|
|
|
@ -88,7 +88,7 @@ UnicodeString::doCaseCompare(int32_t start,
|
|||
//========================================
|
||||
|
||||
UnicodeString &
|
||||
UnicodeString::caseMap(const CaseMap &csm, UCASEMAP_BREAK_ITERATOR_PARAM
|
||||
UnicodeString::caseMap(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM
|
||||
UStringCaseMapper *stringCaseMapper) {
|
||||
if(isEmpty() || !isWritable()) {
|
||||
// nothing to do
|
||||
|
@ -121,7 +121,7 @@ UnicodeString::caseMap(const CaseMap &csm, UCASEMAP_BREAK_ITERATOR_PARAM
|
|||
buffer = fUnion.fStackFields.fBuffer;
|
||||
capacity = US_STACKBUF_SIZE;
|
||||
}
|
||||
newLength = stringCaseMapper(csm, UCASEMAP_BREAK_ITERATOR
|
||||
newLength = stringCaseMapper(caseLocale, options, UCASEMAP_BREAK_ITERATOR
|
||||
buffer, capacity,
|
||||
oldArray, oldLength, NULL, errorCode);
|
||||
if (U_SUCCESS(errorCode)) {
|
||||
|
@ -140,9 +140,8 @@ UnicodeString::caseMap(const CaseMap &csm, UCASEMAP_BREAK_ITERATOR_PARAM
|
|||
// and often does not change its length.
|
||||
oldArray = getArrayStart();
|
||||
Edits edits;
|
||||
edits.setWriteUnchanged(FALSE);
|
||||
UChar replacementChars[200];
|
||||
stringCaseMapper(csm, UCASEMAP_BREAK_ITERATOR
|
||||
stringCaseMapper(caseLocale, options | UCASEMAP_OMIT_UNCHANGED_TEXT, UCASEMAP_BREAK_ITERATOR
|
||||
replacementChars, UPRV_LENGTHOF(replacementChars),
|
||||
oldArray, oldLength, &edits, errorCode);
|
||||
if (U_SUCCESS(errorCode)) {
|
||||
|
@ -178,7 +177,7 @@ UnicodeString::caseMap(const CaseMap &csm, UCASEMAP_BREAK_ITERATOR_PARAM
|
|||
return *this;
|
||||
}
|
||||
errorCode = U_ZERO_ERROR;
|
||||
newLength = stringCaseMapper(csm, UCASEMAP_BREAK_ITERATOR
|
||||
newLength = stringCaseMapper(caseLocale, options, UCASEMAP_BREAK_ITERATOR
|
||||
getArrayStart(), getCapacity(),
|
||||
oldArray, oldLength, NULL, errorCode);
|
||||
if (bufferToDelete) {
|
||||
|
@ -194,9 +193,7 @@ UnicodeString::caseMap(const CaseMap &csm, UCASEMAP_BREAK_ITERATOR_PARAM
|
|||
|
||||
UnicodeString &
|
||||
UnicodeString::foldCase(uint32_t options) {
|
||||
UErrorCode errorCode = U_ZERO_ERROR;
|
||||
CaseMap csm(options, errorCode);
|
||||
return caseMap(csm, UCASEMAP_BREAK_ITERATOR_NULL ustrcase_internalFold);
|
||||
return caseMap(UCASE_LOC_ROOT, options, UCASEMAP_BREAK_ITERATOR_NULL ustrcase_internalFold);
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
|
|
@ -31,26 +31,26 @@ U_NAMESPACE_BEGIN
|
|||
|
||||
UnicodeString &
|
||||
UnicodeString::toLower() {
|
||||
return toLower(Locale::getDefault());
|
||||
return caseMap(ustrcase_getCaseLocale(NULL), 0,
|
||||
UCASEMAP_BREAK_ITERATOR_NULL ustrcase_internalToLower);
|
||||
}
|
||||
|
||||
UnicodeString &
|
||||
UnicodeString::toLower(const Locale &locale) {
|
||||
UErrorCode errorCode = U_ZERO_ERROR;
|
||||
CaseMap csm(locale, 0, errorCode);
|
||||
return caseMap(csm, UCASEMAP_BREAK_ITERATOR_NULL ustrcase_internalToLower);
|
||||
return caseMap(ustrcase_getCaseLocale(locale.getBaseName()), 0,
|
||||
UCASEMAP_BREAK_ITERATOR_NULL ustrcase_internalToLower);
|
||||
}
|
||||
|
||||
UnicodeString &
|
||||
UnicodeString::toUpper() {
|
||||
return toUpper(Locale::getDefault());
|
||||
return caseMap(ustrcase_getCaseLocale(NULL), 0,
|
||||
UCASEMAP_BREAK_ITERATOR_NULL ustrcase_internalToUpper);
|
||||
}
|
||||
|
||||
UnicodeString &
|
||||
UnicodeString::toUpper(const Locale &locale) {
|
||||
UErrorCode errorCode = U_ZERO_ERROR;
|
||||
CaseMap csm(locale, 0, errorCode);
|
||||
return caseMap(csm, UCASEMAP_BREAK_ITERATOR_NULL ustrcase_internalToUpper);
|
||||
return caseMap(ustrcase_getCaseLocale(locale.getBaseName()), 0,
|
||||
UCASEMAP_BREAK_ITERATOR_NULL ustrcase_internalToUpper);
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
|
|
@ -41,10 +41,9 @@ UnicodeString::toTitle(BreakIterator *titleIter, const Locale &locale) {
|
|||
|
||||
UnicodeString &
|
||||
UnicodeString::toTitle(BreakIterator *titleIter, const Locale &locale, uint32_t options) {
|
||||
UErrorCode errorCode = U_ZERO_ERROR;
|
||||
CaseMap csm(locale, options, errorCode);
|
||||
BreakIterator *bi=titleIter;
|
||||
if(bi==NULL) {
|
||||
UErrorCode errorCode=U_ZERO_ERROR;
|
||||
bi=BreakIterator::createWordInstance(locale, errorCode);
|
||||
if(U_FAILURE(errorCode)) {
|
||||
setToBogus();
|
||||
|
@ -52,7 +51,7 @@ UnicodeString::toTitle(BreakIterator *titleIter, const Locale &locale, uint32_t
|
|||
}
|
||||
}
|
||||
bi->setText(*this);
|
||||
caseMap(csm, bi, ustrcase_internalToTitle);
|
||||
caseMap(ustrcase_getCaseLocale(locale.getBaseName()), options, bi, ustrcase_internalToTitle);
|
||||
if(titleIter==NULL) {
|
||||
delete bi;
|
||||
}
|
||||
|
|
|
@ -104,7 +104,7 @@ uprv_loadPropsData(UErrorCode *errorCode);*/
|
|||
|
||||
#ifdef __cplusplus
|
||||
// TODO: Consider moving these case mapping definitions
|
||||
// into a new internal header like casemap_imp.h.
|
||||
// into a new internal header like ucasemap_imp.h.
|
||||
|
||||
#include "unicode/unistr.h" // for UStringCaseMapper
|
||||
|
||||
|
@ -113,39 +113,25 @@ uprv_loadPropsData(UErrorCode *errorCode);*/
|
|||
* ustring.h/ustrcase.c and UnicodeString case mapping functions.
|
||||
*/
|
||||
|
||||
/** Avoid public @internal CaseMap methods. Define only one CaseMap friend. */
|
||||
class icu::internal::CaseMapFriend final /* all static */ {
|
||||
public:
|
||||
static UCaseMap *toUCaseMap(icu::CaseMap &csm) {
|
||||
return reinterpret_cast<UCaseMap *>(&csm);
|
||||
}
|
||||
|
||||
static const icu::CaseMap *fromUCaseMap(const UCaseMap *csm) {
|
||||
return reinterpret_cast<const icu::CaseMap *>(csm);
|
||||
}
|
||||
static icu::CaseMap *fromUCaseMap(UCaseMap *csm) {
|
||||
return reinterpret_cast<icu::CaseMap *>(csm);
|
||||
struct UCaseMap : public icu::UMemory {
|
||||
/** Implements most of ucasemap_open(). */
|
||||
UCaseMap(const char *localeID, uint32_t opts, UErrorCode *pErrorCode);
|
||||
/** Root locale. */
|
||||
UCaseMap(uint32_t opts) :
|
||||
#if !UCONFIG_NO_BREAK_ITERATION
|
||||
iter(NULL),
|
||||
#endif
|
||||
locCache(/* UCASE_LOC_ROOT= */ 1), options(opts) {
|
||||
locale[0] = 0;
|
||||
}
|
||||
~UCaseMap();
|
||||
|
||||
#if !UCONFIG_NO_BREAK_ITERATION
|
||||
static const icu::BreakIterator *iter(const icu::CaseMap &csm) { return csm.iter; }
|
||||
static icu::BreakIterator *mutableIter(icu::CaseMap &csm) { return csm.iter; }
|
||||
static void adoptIter(icu::CaseMap &csm, icu::BreakIterator *iter);
|
||||
icu::BreakIterator *iter; /* We adopt the iterator, so we own it. */
|
||||
#endif
|
||||
|
||||
static const icu::Locale &locale(const icu::CaseMap &csm) { return csm.locale; }
|
||||
static const char *localeID(const icu::CaseMap &csm) { return csm.locale.getName(); }
|
||||
static void setLocale(icu::CaseMap &csm, const char *localeID, UErrorCode &errorCode) {
|
||||
csm.setLocale(localeID, errorCode);
|
||||
}
|
||||
|
||||
static int32_t caseLocale(const icu::CaseMap &csm) { return csm.caseLocale; }
|
||||
|
||||
static uint32_t options(const icu::CaseMap &csm) { return csm.options; }
|
||||
static void setOptions(icu::CaseMap &csm, uint32_t options) { csm.options = options; }
|
||||
|
||||
private:
|
||||
CaseMapFriend() = delete;
|
||||
char locale[32];
|
||||
int32_t locCache;
|
||||
uint32_t options;
|
||||
};
|
||||
|
||||
#if UCONFIG_NO_BREAK_ITERATION
|
||||
|
@ -162,9 +148,13 @@ private:
|
|||
# define UCASEMAP_BREAK_ITERATOR_NULL NULL,
|
||||
#endif
|
||||
|
||||
U_CFUNC int32_t
|
||||
ustrcase_getCaseLocale(const char *locale);
|
||||
|
||||
// TODO: swap src / dest if approved for new public api
|
||||
/** Implements UStringCaseMapper. */
|
||||
U_CFUNC int32_t U_CALLCONV
|
||||
ustrcase_internalToLower(const icu::CaseMap &csm, UCASEMAP_BREAK_ITERATOR_PARAM
|
||||
ustrcase_internalToLower(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM
|
||||
UChar *dest, int32_t destCapacity,
|
||||
const UChar *src, int32_t srcLength,
|
||||
icu::Edits *edits,
|
||||
|
@ -172,7 +162,7 @@ ustrcase_internalToLower(const icu::CaseMap &csm, UCASEMAP_BREAK_ITERATOR_PARAM
|
|||
|
||||
/** Implements UStringCaseMapper. */
|
||||
U_CFUNC int32_t U_CALLCONV
|
||||
ustrcase_internalToUpper(const icu::CaseMap &csm, UCASEMAP_BREAK_ITERATOR_PARAM
|
||||
ustrcase_internalToUpper(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM
|
||||
UChar *dest, int32_t destCapacity,
|
||||
const UChar *src, int32_t srcLength,
|
||||
icu::Edits *edits,
|
||||
|
@ -182,7 +172,7 @@ ustrcase_internalToUpper(const icu::CaseMap &csm, UCASEMAP_BREAK_ITERATOR_PARAM
|
|||
|
||||
/** Implements UStringCaseMapper. */
|
||||
U_CFUNC int32_t U_CALLCONV
|
||||
ustrcase_internalToTitle(const icu::CaseMap &csm,
|
||||
ustrcase_internalToTitle(int32_t caseLocale, uint32_t options,
|
||||
icu::BreakIterator *iter,
|
||||
UChar *dest, int32_t destCapacity,
|
||||
const UChar *src, int32_t srcLength,
|
||||
|
@ -193,7 +183,7 @@ ustrcase_internalToTitle(const icu::CaseMap &csm,
|
|||
|
||||
/** Implements UStringCaseMapper. */
|
||||
U_CFUNC int32_t U_CALLCONV
|
||||
ustrcase_internalFold(const icu::CaseMap &csm, UCASEMAP_BREAK_ITERATOR_PARAM
|
||||
ustrcase_internalFold(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM
|
||||
UChar *dest, int32_t destCapacity,
|
||||
const UChar *src, int32_t srcLength,
|
||||
icu::Edits *edits,
|
||||
|
@ -204,7 +194,7 @@ ustrcase_internalFold(const icu::CaseMap &csm, UCASEMAP_BREAK_ITERATOR_PARAM
|
|||
* Implements argument checking.
|
||||
*/
|
||||
U_CFUNC int32_t
|
||||
ustrcase_map(const icu::CaseMap &csm, UCASEMAP_BREAK_ITERATOR_PARAM
|
||||
ustrcase_map(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM
|
||||
UChar *dest, int32_t destCapacity,
|
||||
const UChar *src, int32_t srcLength,
|
||||
UStringCaseMapper *stringCaseMapper,
|
||||
|
@ -217,7 +207,7 @@ ustrcase_map(const icu::CaseMap &csm, UCASEMAP_BREAK_ITERATOR_PARAM
|
|||
* Implements argument checking and internally works with an intermediate buffer if necessary.
|
||||
*/
|
||||
U_CFUNC int32_t
|
||||
ustrcase_mapWithOverlap(const icu::CaseMap &csm, UCASEMAP_BREAK_ITERATOR_PARAM
|
||||
ustrcase_mapWithOverlap(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM
|
||||
UChar *dest, int32_t destCapacity,
|
||||
const UChar *src, int32_t srcLength,
|
||||
UStringCaseMapper *stringCaseMapper,
|
||||
|
@ -231,7 +221,7 @@ ustrcase_mapWithOverlap(const icu::CaseMap &csm, UCASEMAP_BREAK_ITERATOR_PARAM
|
|||
* src and dest must not overlap.
|
||||
*/
|
||||
typedef int32_t U_CALLCONV
|
||||
UTF8CaseMapper(const icu::CaseMap &csm,
|
||||
UTF8CaseMapper(int32_t caseLocale, uint32_t options,
|
||||
#if !UCONFIG_NO_BREAK_ITERATION
|
||||
icu::BreakIterator *iter,
|
||||
#endif
|
||||
|
@ -243,7 +233,7 @@ UTF8CaseMapper(const icu::CaseMap &csm,
|
|||
|
||||
/** Implements UTF8CaseMapper. */
|
||||
U_CFUNC int32_t U_CALLCONV
|
||||
ucasemap_internalUTF8ToTitle(const icu::CaseMap &csm,
|
||||
ucasemap_internalUTF8ToTitle(int32_t caseLocale, uint32_t options,
|
||||
icu::BreakIterator *iter,
|
||||
uint8_t *dest, int32_t destCapacity,
|
||||
const uint8_t *src, int32_t srcLength,
|
||||
|
@ -256,7 +246,7 @@ ucasemap_internalUTF8ToTitle(const icu::CaseMap &csm,
|
|||
* for UTF-8 string case mapping as a common function.
|
||||
*/
|
||||
U_CFUNC int32_t
|
||||
ucasemap_mapUTF8(const icu::CaseMap &csm, UCASEMAP_BREAK_ITERATOR_PARAM
|
||||
ucasemap_mapUTF8(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM
|
||||
uint8_t *dest, int32_t destCapacity,
|
||||
const uint8_t *src, int32_t srcLength,
|
||||
UTF8CaseMapper *stringCaseMapper,
|
||||
|
|
|
@ -31,15 +31,8 @@
|
|||
|
||||
U_NAMESPACE_USE
|
||||
|
||||
using icu::internal::CaseMapFriend;
|
||||
|
||||
// TODO: create casemap.cpp
|
||||
|
||||
void icu::internal::CaseMapFriend::adoptIter(CaseMap &csm, BreakIterator *iter) {
|
||||
delete csm.iter;
|
||||
csm.iter = iter;
|
||||
}
|
||||
|
||||
/* functions available in the common library (for unistr_case.cpp) */
|
||||
|
||||
/* public API functions */
|
||||
|
@ -50,13 +43,13 @@ u_strToTitle(UChar *dest, int32_t destCapacity,
|
|||
UBreakIterator *titleIter,
|
||||
const char *locale,
|
||||
UErrorCode *pErrorCode) {
|
||||
CaseMap csm(locale, 0, *pErrorCode);
|
||||
LocalPointer<BreakIterator> ownedIter;
|
||||
BreakIterator *iter;
|
||||
if(titleIter!=NULL) {
|
||||
iter=reinterpret_cast<BreakIterator *>(titleIter);
|
||||
} else {
|
||||
iter=BreakIterator::createWordInstance(CaseMapFriend::locale(csm), *pErrorCode);
|
||||
CaseMapFriend::adoptIter(csm, iter);
|
||||
iter=BreakIterator::createWordInstance(Locale(locale), *pErrorCode);
|
||||
ownedIter.adoptInstead(iter);
|
||||
}
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
return 0;
|
||||
|
@ -64,7 +57,7 @@ u_strToTitle(UChar *dest, int32_t destCapacity,
|
|||
UnicodeString s(srcLength<0, src, srcLength);
|
||||
iter->setText(s);
|
||||
return ustrcase_mapWithOverlap(
|
||||
csm, iter,
|
||||
ustrcase_getCaseLocale(locale), 0, iter,
|
||||
dest, destCapacity,
|
||||
src, srcLength,
|
||||
ustrcase_internalToTitle, *pErrorCode);
|
||||
|
@ -72,27 +65,23 @@ u_strToTitle(UChar *dest, int32_t destCapacity,
|
|||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
int32_t CaseMap::toTitle(BreakIterator *it,
|
||||
UChar *dest, int32_t destCapacity,
|
||||
const UChar *src, int32_t srcLength,
|
||||
Edits *edits,
|
||||
UErrorCode &errorCode) const {
|
||||
int32_t CaseMap::toTitle(
|
||||
const char *locale, uint32_t options, BreakIterator *iter,
|
||||
const UChar *src, int32_t srcLength,
|
||||
UChar *dest, int32_t destCapacity, Edits *edits,
|
||||
UErrorCode &errorCode) {
|
||||
LocalPointer<BreakIterator> ownedIter;
|
||||
if(it==NULL) {
|
||||
if(iter!=NULL) {
|
||||
it=iter->clone();
|
||||
} else {
|
||||
it=BreakIterator::createWordInstance(locale, errorCode);
|
||||
}
|
||||
ownedIter.adoptInsteadAndCheckErrorCode(it, errorCode);
|
||||
if(iter==NULL) {
|
||||
iter=BreakIterator::createWordInstance(Locale(locale), errorCode);
|
||||
ownedIter.adoptInstead(iter);
|
||||
}
|
||||
if(U_FAILURE(errorCode)) {
|
||||
return 0;
|
||||
}
|
||||
UnicodeString s(srcLength<0, src, srcLength);
|
||||
it->setText(s);
|
||||
iter->setText(s);
|
||||
return ustrcase_map(
|
||||
*this, it,
|
||||
ustrcase_getCaseLocale(locale), options, iter,
|
||||
dest, destCapacity,
|
||||
src, srcLength,
|
||||
ustrcase_internalToTitle, edits, errorCode);
|
||||
|
@ -101,25 +90,23 @@ int32_t CaseMap::toTitle(BreakIterator *it,
|
|||
U_NAMESPACE_END
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
ucasemap_toTitle(UCaseMap *ucsm,
|
||||
ucasemap_toTitle(UCaseMap *csm,
|
||||
UChar *dest, int32_t destCapacity,
|
||||
const UChar *src, int32_t srcLength,
|
||||
UErrorCode *pErrorCode) {
|
||||
if (U_FAILURE(*pErrorCode)) {
|
||||
return 0;
|
||||
}
|
||||
CaseMap &csm = *CaseMapFriend::fromUCaseMap(ucsm);
|
||||
if (CaseMapFriend::iter(csm) == NULL) {
|
||||
CaseMapFriend::adoptIter(
|
||||
csm, BreakIterator::createWordInstance(CaseMapFriend::locale(csm), *pErrorCode));
|
||||
if (csm->iter == NULL) {
|
||||
csm->iter = BreakIterator::createWordInstance(Locale(csm->locale), *pErrorCode);
|
||||
}
|
||||
if (U_FAILURE(*pErrorCode)) {
|
||||
return 0;
|
||||
}
|
||||
UnicodeString s(srcLength<0, src, srcLength);
|
||||
CaseMapFriend::mutableIter(csm)->setText(s);
|
||||
csm->iter->setText(s);
|
||||
return ustrcase_map(
|
||||
csm, CaseMapFriend::mutableIter(csm),
|
||||
csm->locCache, csm->options, csm->iter,
|
||||
dest, destCapacity,
|
||||
src, srcLength,
|
||||
ustrcase_internalToTitle, NULL, *pErrorCode);
|
||||
|
|
|
@ -32,10 +32,6 @@
|
|||
#include "ustr_imp.h"
|
||||
#include "uassert.h"
|
||||
|
||||
U_NAMESPACE_USE
|
||||
|
||||
using icu::internal::CaseMapFriend;
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
namespace {
|
||||
|
@ -404,7 +400,7 @@ U_NAMESPACE_USE
|
|||
static inline int32_t
|
||||
appendResult(UChar *dest, int32_t destIndex, int32_t destCapacity,
|
||||
int32_t result, const UChar *s,
|
||||
int32_t cpLength, icu::Edits *edits) {
|
||||
int32_t cpLength, uint32_t options, icu::Edits *edits) {
|
||||
UChar32 c;
|
||||
int32_t length;
|
||||
|
||||
|
@ -413,7 +409,7 @@ appendResult(UChar *dest, int32_t destIndex, int32_t destCapacity,
|
|||
/* (not) original code point */
|
||||
if(edits!=NULL) {
|
||||
edits->addUnchanged(cpLength);
|
||||
if(edits->omitUnchanged()) {
|
||||
if(options & UCASEMAP_OMIT_UNCHANGED_TEXT) {
|
||||
return destIndex;
|
||||
}
|
||||
}
|
||||
|
@ -486,11 +482,11 @@ appendUChar(UChar *dest, int32_t destIndex, int32_t destCapacity, UChar c) {
|
|||
|
||||
static inline int32_t
|
||||
appendUnchanged(UChar *dest, int32_t destIndex, int32_t destCapacity,
|
||||
const UChar *s, int32_t length, icu::Edits *edits) {
|
||||
const UChar *s, int32_t length, uint32_t options, icu::Edits *edits) {
|
||||
if(length>0) {
|
||||
if(edits!=NULL) {
|
||||
edits->addUnchanged(length);
|
||||
if(edits->omitUnchanged()) {
|
||||
if(options & UCASEMAP_OMIT_UNCHANGED_TEXT) {
|
||||
return destIndex;
|
||||
}
|
||||
}
|
||||
|
@ -542,14 +538,12 @@ utf16_caseContextIterator(void *context, int8_t dir) {
|
|||
* context [0..srcLength[ into account.
|
||||
*/
|
||||
static int32_t
|
||||
_caseMap(const CaseMap &csm, UCaseMapFull *map,
|
||||
_caseMap(int32_t caseLocale, uint32_t options, UCaseMapFull *map,
|
||||
UChar *dest, int32_t destCapacity,
|
||||
const UChar *src, UCaseContext *csc,
|
||||
int32_t srcStart, int32_t srcLimit,
|
||||
icu::Edits *edits,
|
||||
UErrorCode &errorCode) {
|
||||
int32_t locCache = CaseMapFriend::caseLocale(csm);
|
||||
|
||||
/* case mapping loop */
|
||||
int32_t srcIndex=srcStart;
|
||||
int32_t destIndex=0;
|
||||
|
@ -560,9 +554,9 @@ _caseMap(const CaseMap &csm, UCaseMapFull *map,
|
|||
U16_NEXT(src, srcIndex, srcLimit, c);
|
||||
csc->cpLimit=srcIndex;
|
||||
const UChar *s;
|
||||
c=map(NULL, c, utf16_caseContextIterator, csc, &s, NULL, &locCache);
|
||||
c=map(NULL, c, utf16_caseContextIterator, csc, &s, NULL, &caseLocale);
|
||||
destIndex = appendResult(dest, destIndex, destCapacity, c, s,
|
||||
srcIndex - cpStart, edits);
|
||||
srcIndex - cpStart, options, edits);
|
||||
if (destIndex < 0) {
|
||||
errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
|
||||
return 0;
|
||||
|
@ -575,7 +569,7 @@ _caseMap(const CaseMap &csm, UCaseMapFull *map,
|
|||
#if !UCONFIG_NO_BREAK_ITERATION
|
||||
|
||||
U_CFUNC int32_t U_CALLCONV
|
||||
ustrcase_internalToTitle(const CaseMap &csm, BreakIterator *iter,
|
||||
ustrcase_internalToTitle(int32_t caseLocale, uint32_t options, BreakIterator *iter,
|
||||
UChar *dest, int32_t destCapacity,
|
||||
const UChar *src, int32_t srcLength,
|
||||
icu::Edits *edits,
|
||||
|
@ -585,7 +579,6 @@ ustrcase_internalToTitle(const CaseMap &csm, BreakIterator *iter,
|
|||
}
|
||||
|
||||
/* set up local variables */
|
||||
int32_t locCache=CaseMapFriend::caseLocale(csm);
|
||||
UCaseContext csc=UCASECONTEXT_INITIALIZER;
|
||||
csc.p=(void *)src;
|
||||
csc.limit=srcLength;
|
||||
|
@ -626,7 +619,7 @@ ustrcase_internalToTitle(const CaseMap &csm, BreakIterator *iter,
|
|||
int32_t titleLimit=prev;
|
||||
UChar32 c;
|
||||
U16_NEXT(src, titleLimit, idx, c);
|
||||
if((CaseMapFriend::options(csm)&U_TITLECASE_NO_BREAK_ADJUSTMENT)==0 && UCASE_NONE==ucase_getType(NULL, c)) {
|
||||
if((options&U_TITLECASE_NO_BREAK_ADJUSTMENT)==0 && UCASE_NONE==ucase_getType(NULL, c)) {
|
||||
/* Adjust the titlecasing index (titleStart) to the next cased character. */
|
||||
for(;;) {
|
||||
titleStart=titleLimit;
|
||||
|
@ -643,7 +636,7 @@ ustrcase_internalToTitle(const CaseMap &csm, BreakIterator *iter,
|
|||
}
|
||||
}
|
||||
destIndex=appendUnchanged(dest, destIndex, destCapacity,
|
||||
src+prev, titleStart-prev, edits);
|
||||
src+prev, titleStart-prev, options, edits);
|
||||
if(destIndex<0) {
|
||||
errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
|
||||
return 0;
|
||||
|
@ -656,9 +649,9 @@ ustrcase_internalToTitle(const CaseMap &csm, BreakIterator *iter,
|
|||
csc.cpLimit=titleLimit;
|
||||
const UChar *s;
|
||||
c=ucase_toFullTitle(NULL, c, utf16_caseContextIterator, &csc, &s,
|
||||
NULL, &locCache);
|
||||
NULL, &caseLocale);
|
||||
destIndex=appendResult(dest, destIndex, destCapacity, c, s,
|
||||
titleLimit-titleStart, edits);
|
||||
titleLimit-titleStart, options, edits);
|
||||
if(destIndex<0) {
|
||||
errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
|
||||
return 0;
|
||||
|
@ -666,7 +659,7 @@ ustrcase_internalToTitle(const CaseMap &csm, BreakIterator *iter,
|
|||
|
||||
/* Special case Dutch IJ titlecasing */
|
||||
if (titleStart+1 < idx &&
|
||||
locCache == UCASE_LOC_DUTCH &&
|
||||
caseLocale == UCASE_LOC_DUTCH &&
|
||||
(src[titleStart] == 0x0049 || src[titleStart] == 0x0069) &&
|
||||
(src[titleStart+1] == 0x004A || src[titleStart+1] == 0x006A)) {
|
||||
destIndex=appendUChar(dest, destIndex, destCapacity, 0x004A);
|
||||
|
@ -682,11 +675,11 @@ ustrcase_internalToTitle(const CaseMap &csm, BreakIterator *iter,
|
|||
|
||||
/* lowercase [titleLimit..index[ */
|
||||
if(titleLimit<idx) {
|
||||
if((CaseMapFriend::options(csm)&U_TITLECASE_NO_LOWERCASE)==0) {
|
||||
if((options&U_TITLECASE_NO_LOWERCASE)==0) {
|
||||
/* Normal operation: Lowercase the rest of the word. */
|
||||
destIndex+=
|
||||
_caseMap(
|
||||
csm, ucase_toFullLower,
|
||||
caseLocale, options, ucase_toFullLower,
|
||||
dest+destIndex, destCapacity-destIndex,
|
||||
src, &csc,
|
||||
titleLimit, idx,
|
||||
|
@ -700,7 +693,7 @@ ustrcase_internalToTitle(const CaseMap &csm, BreakIterator *iter,
|
|||
} else {
|
||||
/* Optionally just copy the rest of the word unchanged. */
|
||||
destIndex=appendUnchanged(dest, destIndex, destCapacity,
|
||||
src+titleLimit, idx-titleLimit, edits);
|
||||
src+titleLimit, idx-titleLimit, options, edits);
|
||||
if(destIndex<0) {
|
||||
errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
|
||||
return 0;
|
||||
|
@ -1197,12 +1190,11 @@ UBool isFollowedByCasedLetter(const UCaseProps *csp, const UChar *s, int32_t i,
|
|||
* for each character.
|
||||
* TODO: Try to re-consolidate one way or another with the non-Greek function.
|
||||
*/
|
||||
int32_t toUpper(const CaseMap & /* unused csm */,
|
||||
int32_t toUpper(int32_t caseLocale, uint32_t options,
|
||||
UChar *dest, int32_t destCapacity,
|
||||
const UChar *src, int32_t srcLength,
|
||||
Edits *edits,
|
||||
UErrorCode &errorCode) {
|
||||
int32_t locCache = UCASE_LOC_GREEK;
|
||||
int32_t destIndex=0;
|
||||
uint32_t state = 0;
|
||||
for (int32_t i = 0; i < srcLength;) {
|
||||
|
@ -1303,7 +1295,7 @@ int32_t toUpper(const CaseMap & /* unused csm */,
|
|||
edits->addUnchanged(oldLength);
|
||||
}
|
||||
// Write unchanged text?
|
||||
change = edits->writeUnchanged();
|
||||
change = (options & UCASEMAP_OMIT_UNCHANGED_TEXT) == 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1326,9 +1318,9 @@ int32_t toUpper(const CaseMap & /* unused csm */,
|
|||
}
|
||||
} else {
|
||||
const UChar *s;
|
||||
c=ucase_toFullUpper(NULL, c, NULL, NULL, &s, NULL, &locCache);
|
||||
c=ucase_toFullUpper(NULL, c, NULL, NULL, &s, NULL, &caseLocale);
|
||||
destIndex = appendResult(dest, destIndex, destCapacity, c, s,
|
||||
nextIndex - i, edits);
|
||||
nextIndex - i, options, edits);
|
||||
if (destIndex < 0) {
|
||||
errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
|
||||
return 0;
|
||||
|
@ -1347,7 +1339,7 @@ U_NAMESPACE_END
|
|||
/* functions available in the common library (for unistr_case.cpp) */
|
||||
|
||||
U_CFUNC int32_t U_CALLCONV
|
||||
ustrcase_internalToLower(const CaseMap &csm, UCASEMAP_BREAK_ITERATOR_UNUSED
|
||||
ustrcase_internalToLower(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_UNUSED
|
||||
UChar *dest, int32_t destCapacity,
|
||||
const UChar *src, int32_t srcLength,
|
||||
icu::Edits *edits,
|
||||
|
@ -1356,7 +1348,7 @@ ustrcase_internalToLower(const CaseMap &csm, UCASEMAP_BREAK_ITERATOR_UNUSED
|
|||
csc.p=(void *)src;
|
||||
csc.limit=srcLength;
|
||||
int32_t destIndex = _caseMap(
|
||||
csm, ucase_toFullLower,
|
||||
caseLocale, options, ucase_toFullLower,
|
||||
dest, destCapacity,
|
||||
src, &csc, 0, srcLength,
|
||||
edits, errorCode);
|
||||
|
@ -1364,20 +1356,19 @@ ustrcase_internalToLower(const CaseMap &csm, UCASEMAP_BREAK_ITERATOR_UNUSED
|
|||
}
|
||||
|
||||
U_CFUNC int32_t U_CALLCONV
|
||||
ustrcase_internalToUpper(const CaseMap &csm, UCASEMAP_BREAK_ITERATOR_UNUSED
|
||||
ustrcase_internalToUpper(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_UNUSED
|
||||
UChar *dest, int32_t destCapacity,
|
||||
const UChar *src, int32_t srcLength,
|
||||
icu::Edits *edits,
|
||||
UErrorCode &errorCode) {
|
||||
int32_t locCache = CaseMapFriend::caseLocale(csm);
|
||||
if (locCache == UCASE_LOC_GREEK) {
|
||||
return GreekUpper::toUpper(csm, dest, destCapacity, src, srcLength, edits, errorCode);
|
||||
if (caseLocale == UCASE_LOC_GREEK) {
|
||||
return GreekUpper::toUpper(caseLocale, options, dest, destCapacity, src, srcLength, edits, errorCode);
|
||||
}
|
||||
UCaseContext csc=UCASECONTEXT_INITIALIZER;
|
||||
csc.p=(void *)src;
|
||||
csc.limit=srcLength;
|
||||
int32_t destIndex = _caseMap(
|
||||
csm, ucase_toFullUpper,
|
||||
caseLocale, options, ucase_toFullUpper,
|
||||
dest, destCapacity,
|
||||
src, &csc, 0, srcLength,
|
||||
edits, errorCode);
|
||||
|
@ -1385,7 +1376,7 @@ ustrcase_internalToUpper(const CaseMap &csm, UCASEMAP_BREAK_ITERATOR_UNUSED
|
|||
}
|
||||
|
||||
U_CFUNC int32_t U_CALLCONV
|
||||
ustrcase_internalFold(const CaseMap &csm, UCASEMAP_BREAK_ITERATOR_UNUSED
|
||||
ustrcase_internalFold(int32_t /* caseLocale */, uint32_t options, UCASEMAP_BREAK_ITERATOR_UNUSED
|
||||
UChar *dest, int32_t destCapacity,
|
||||
const UChar *src, int32_t srcLength,
|
||||
icu::Edits *edits,
|
||||
|
@ -1398,9 +1389,9 @@ ustrcase_internalFold(const CaseMap &csm, UCASEMAP_BREAK_ITERATOR_UNUSED
|
|||
UChar32 c;
|
||||
U16_NEXT(src, srcIndex, srcLength, c);
|
||||
const UChar *s;
|
||||
c = ucase_toFullFolding(NULL, c, &s, CaseMapFriend::options(csm));
|
||||
c = ucase_toFullFolding(NULL, c, &s, options);
|
||||
destIndex = appendResult(dest, destIndex, destCapacity, c, s,
|
||||
srcIndex - cpStart, edits);
|
||||
srcIndex - cpStart, options, edits);
|
||||
if (destIndex < 0) {
|
||||
errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
|
||||
return 0;
|
||||
|
@ -1411,7 +1402,7 @@ ustrcase_internalFold(const CaseMap &csm, UCASEMAP_BREAK_ITERATOR_UNUSED
|
|||
}
|
||||
|
||||
U_CFUNC int32_t
|
||||
ustrcase_map(const CaseMap &csm, UCASEMAP_BREAK_ITERATOR_PARAM
|
||||
ustrcase_map(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM
|
||||
UChar *dest, int32_t destCapacity,
|
||||
const UChar *src, int32_t srcLength,
|
||||
UStringCaseMapper *stringCaseMapper,
|
||||
|
@ -1446,13 +1437,16 @@ ustrcase_map(const CaseMap &csm, UCASEMAP_BREAK_ITERATOR_PARAM
|
|||
return 0;
|
||||
}
|
||||
|
||||
destLength=stringCaseMapper(csm, UCASEMAP_BREAK_ITERATOR
|
||||
if(edits!=NULL) {
|
||||
edits->reset();
|
||||
}
|
||||
destLength=stringCaseMapper(caseLocale, options, UCASEMAP_BREAK_ITERATOR
|
||||
dest, destCapacity, src, srcLength, edits, errorCode);
|
||||
return u_terminateUChars(dest, destCapacity, destLength, &errorCode);
|
||||
}
|
||||
|
||||
U_CFUNC int32_t
|
||||
ustrcase_mapWithOverlap(const CaseMap &csm, UCASEMAP_BREAK_ITERATOR_PARAM
|
||||
ustrcase_mapWithOverlap(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM
|
||||
UChar *dest, int32_t destCapacity,
|
||||
const UChar *src, int32_t srcLength,
|
||||
UStringCaseMapper *stringCaseMapper,
|
||||
|
@ -1501,7 +1495,7 @@ ustrcase_mapWithOverlap(const CaseMap &csm, UCASEMAP_BREAK_ITERATOR_PARAM
|
|||
temp=dest;
|
||||
}
|
||||
|
||||
destLength=stringCaseMapper(csm, UCASEMAP_BREAK_ITERATOR
|
||||
destLength=stringCaseMapper(caseLocale, options, UCASEMAP_BREAK_ITERATOR
|
||||
temp, destCapacity, src, srcLength, NULL, errorCode);
|
||||
if(temp!=dest) {
|
||||
/* copy the result string to the destination buffer */
|
||||
|
@ -1524,7 +1518,7 @@ u_strFoldCase(UChar *dest, int32_t destCapacity,
|
|||
uint32_t options,
|
||||
UErrorCode *pErrorCode) {
|
||||
return ustrcase_mapWithOverlap(
|
||||
CaseMap(options, *pErrorCode), UCASEMAP_BREAK_ITERATOR_NULL
|
||||
UCASE_LOC_ROOT, options, UCASEMAP_BREAK_ITERATOR_NULL
|
||||
dest, destCapacity,
|
||||
src, srcLength,
|
||||
ustrcase_internalFold, *pErrorCode);
|
||||
|
@ -1532,34 +1526,13 @@ u_strFoldCase(UChar *dest, int32_t destCapacity,
|
|||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
int32_t CaseMap::toLower(UChar *dest, int32_t destCapacity,
|
||||
const UChar *src, int32_t srcLength,
|
||||
icu::Edits *edits,
|
||||
UErrorCode &errorCode) const {
|
||||
int32_t CaseMap::foldCase(
|
||||
uint32_t options,
|
||||
const UChar *src, int32_t srcLength,
|
||||
UChar *dest, int32_t destCapacity, Edits *edits,
|
||||
UErrorCode &errorCode) {
|
||||
return ustrcase_map(
|
||||
*this, UCASEMAP_BREAK_ITERATOR_NULL
|
||||
dest, destCapacity,
|
||||
src, srcLength,
|
||||
ustrcase_internalToLower, edits, errorCode);
|
||||
}
|
||||
|
||||
int32_t CaseMap::toUpper(UChar *dest, int32_t destCapacity,
|
||||
const UChar *src, int32_t srcLength,
|
||||
icu::Edits *edits,
|
||||
UErrorCode &errorCode) const {
|
||||
return ustrcase_map(
|
||||
*this, UCASEMAP_BREAK_ITERATOR_NULL
|
||||
dest, destCapacity,
|
||||
src, srcLength,
|
||||
ustrcase_internalToUpper, edits, errorCode);
|
||||
}
|
||||
|
||||
int32_t CaseMap::foldCase(UChar *dest, int32_t destCapacity,
|
||||
const UChar *src, int32_t srcLength,
|
||||
icu::Edits *edits,
|
||||
UErrorCode &errorCode) const {
|
||||
return ustrcase_map(
|
||||
*this, UCASEMAP_BREAK_ITERATOR_NULL
|
||||
UCASE_LOC_ROOT, options, UCASEMAP_BREAK_ITERATOR_NULL
|
||||
dest, destCapacity,
|
||||
src, srcLength,
|
||||
ustrcase_internalFold, edits, errorCode);
|
||||
|
|
|
@ -26,63 +26,18 @@
|
|||
#include "ucase.h"
|
||||
#include "ustr_imp.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
// TODO: new casemap_locale.cpp
|
||||
|
||||
CaseMap::CaseMap(const Locale &loc, int32_t caseLoc, uint32_t opts, UErrorCode & /*errorCode*/) :
|
||||
caseLocale(caseLoc), options(opts), locale(loc)
|
||||
#if !UCONFIG_NO_BREAK_ITERATION
|
||||
, iter(NULL)
|
||||
#endif
|
||||
{
|
||||
if (caseLoc == 0) { // UCASE_LOC_UNKNOWN
|
||||
setCaseLocale(locale.getBaseName());
|
||||
U_CFUNC int32_t
|
||||
ustrcase_getCaseLocale(const char *locale) {
|
||||
if (locale == NULL) {
|
||||
locale = uloc_getDefault();
|
||||
}
|
||||
}
|
||||
|
||||
CaseMap::CaseMap(const Locale &locale, uint32_t options, UErrorCode &errorCode) :
|
||||
CaseMap(locale, /* UCASE_LOC_UNKNOWN = */ 0, options, errorCode) {}
|
||||
|
||||
// small optimization for localeID=="", a little slower otherwise
|
||||
CaseMap::CaseMap(const char *localeID, uint32_t options, UErrorCode &errorCode) :
|
||||
CaseMap(Locale::getRoot(), /* UCASE_LOC_ROOT = */ 1, options, errorCode) {
|
||||
if (localeID == NULL || *localeID != 0) {
|
||||
setLocale(localeID, errorCode); // not root
|
||||
}
|
||||
}
|
||||
|
||||
CaseMap::~CaseMap() {
|
||||
#if !UCONFIG_NO_BREAK_ITERATION
|
||||
delete iter;
|
||||
#endif
|
||||
}
|
||||
|
||||
void CaseMap::setCaseLocale(const char *localeID) {
|
||||
U_ASSERT(localeID != NULL);
|
||||
caseLocale = UCASE_LOC_UNKNOWN;
|
||||
ucase_getCaseLocale(localeID, &caseLocale);
|
||||
}
|
||||
|
||||
void CaseMap::setLocale(const char *localeID, UErrorCode &errorCode) {
|
||||
if (U_FAILURE(errorCode)) { return; }
|
||||
if (localeID == NULL) {
|
||||
locale = Locale::getDefault();
|
||||
localeID = locale.getBaseName();
|
||||
if (*locale == 0) {
|
||||
return UCASE_LOC_ROOT;
|
||||
} else {
|
||||
locale = Locale(localeID);
|
||||
if (locale.isBogus()) {
|
||||
errorCode = U_MEMORY_ALLOCATION_ERROR;
|
||||
localeID = "";
|
||||
}
|
||||
return ucase_getCaseLocale(locale, NULL);
|
||||
}
|
||||
setCaseLocale(localeID);
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
U_NAMESPACE_USE
|
||||
|
||||
/* public API functions */
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
|
@ -90,9 +45,8 @@ u_strToLower(UChar *dest, int32_t destCapacity,
|
|||
const UChar *src, int32_t srcLength,
|
||||
const char *locale,
|
||||
UErrorCode *pErrorCode) {
|
||||
CaseMap csm(locale, 0, *pErrorCode);
|
||||
return ustrcase_mapWithOverlap(
|
||||
csm, UCASEMAP_BREAK_ITERATOR_NULL
|
||||
ustrcase_getCaseLocale(locale), 0, UCASEMAP_BREAK_ITERATOR_NULL
|
||||
dest, destCapacity,
|
||||
src, srcLength,
|
||||
ustrcase_internalToLower, *pErrorCode);
|
||||
|
@ -103,10 +57,37 @@ u_strToUpper(UChar *dest, int32_t destCapacity,
|
|||
const UChar *src, int32_t srcLength,
|
||||
const char *locale,
|
||||
UErrorCode *pErrorCode) {
|
||||
CaseMap csm(locale, 0, *pErrorCode);
|
||||
return ustrcase_mapWithOverlap(
|
||||
csm, UCASEMAP_BREAK_ITERATOR_NULL
|
||||
ustrcase_getCaseLocale(locale), 0, UCASEMAP_BREAK_ITERATOR_NULL
|
||||
dest, destCapacity,
|
||||
src, srcLength,
|
||||
ustrcase_internalToUpper, *pErrorCode);
|
||||
}
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
int32_t CaseMap::toLower(
|
||||
const char *locale, uint32_t options,
|
||||
const UChar *src, int32_t srcLength,
|
||||
UChar *dest, int32_t destCapacity, Edits *edits,
|
||||
UErrorCode &errorCode) {
|
||||
return ustrcase_map(
|
||||
ustrcase_getCaseLocale(locale), options, UCASEMAP_BREAK_ITERATOR_NULL
|
||||
dest, destCapacity,
|
||||
src, srcLength,
|
||||
ustrcase_internalToLower, edits, errorCode);
|
||||
}
|
||||
|
||||
int32_t CaseMap::toUpper(
|
||||
const char *locale, uint32_t options,
|
||||
const UChar *src, int32_t srcLength,
|
||||
UChar *dest, int32_t destCapacity, Edits *edits,
|
||||
UErrorCode &errorCode) {
|
||||
return ustrcase_map(
|
||||
ustrcase_getCaseLocale(locale), options, UCASEMAP_BREAK_ITERATOR_NULL
|
||||
dest, destCapacity,
|
||||
src, srcLength,
|
||||
ustrcase_internalToUpper, edits, errorCode);
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
|
Loading…
Add table
Reference in a new issue