mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-14 01:11:02 +00:00
ICU-4935 add U_TITLECASE_NO_LOWERCASE, U_TITLECASE_NO_BREAK_ADJUSTMENT, ucasemap_get/setBreakIterator, ucasemap_toTitle, ucasemap_utf8ToTitle, ucasemap_utf8FoldCase, UnicodeString::toTitle(...options)
X-SVN-Rev: 22170
This commit is contained in:
parent
2531662042
commit
c12f6712f9
11 changed files with 1031 additions and 278 deletions
|
@ -1,7 +1,7 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2005, International Business Machines
|
||||
* Copyright (C) 2005-2007, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
|
@ -20,6 +20,10 @@
|
|||
#include "unicode/uloc.h"
|
||||
#include "unicode/ustring.h"
|
||||
#include "unicode/ucasemap.h"
|
||||
#if !UCONFIG_NO_BREAK_ITERATION
|
||||
#include "unicode/ubrk.h"
|
||||
#include "unicode/utext.h"
|
||||
#endif
|
||||
#include "cmemory.h"
|
||||
#include "cstring.h"
|
||||
#include "ucase.h"
|
||||
|
@ -27,13 +31,6 @@
|
|||
|
||||
/* UCaseMap service object -------------------------------------------------- */
|
||||
|
||||
struct UCaseMap {
|
||||
const UCaseProps *csp;
|
||||
char locale[32];
|
||||
int32_t locCache;
|
||||
uint32_t options;
|
||||
};
|
||||
|
||||
U_DRAFT UCaseMap * U_EXPORT2
|
||||
ucasemap_open(const char *locale, uint32_t options, UErrorCode *pErrorCode) {
|
||||
UCaseMap *csm;
|
||||
|
@ -62,6 +59,9 @@ ucasemap_open(const char *locale, uint32_t options, UErrorCode *pErrorCode) {
|
|||
U_DRAFT void U_EXPORT2
|
||||
ucasemap_close(UCaseMap *csm) {
|
||||
if(csm!=NULL) {
|
||||
#if !UCONFIG_NO_BREAK_ITERATION
|
||||
ubrk_close(csm->iter);
|
||||
#endif
|
||||
uprv_free(csm);
|
||||
}
|
||||
}
|
||||
|
@ -106,8 +106,25 @@ ucasemap_setOptions(UCaseMap *csm, uint32_t options, UErrorCode *pErrorCode) {
|
|||
csm->options=options;
|
||||
}
|
||||
|
||||
#if !UCONFIG_NO_BREAK_ITERATION
|
||||
|
||||
U_DRAFT const UBreakIterator * U_EXPORT2
|
||||
ucasemap_getBreakIterator(const UCaseMap *csm) {
|
||||
return csm->iter;
|
||||
}
|
||||
|
||||
U_DRAFT void U_EXPORT2
|
||||
ucasemap_setBreakIterator(UCaseMap *csm, UBreakIterator *iterToAdopt, UErrorCode *pErrorCode) {
|
||||
ubrk_close(csm->iter);
|
||||
csm->iter=iterToAdopt;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
/* UTF-8 string case mappings ----------------------------------------------- */
|
||||
|
||||
/* TODO(markus): Move to a new, separate utf8case.c file. */
|
||||
|
||||
/* append a full case mapping result, see UCASE_MAX_STRING_LENGTH */
|
||||
static U_INLINE int32_t
|
||||
appendResult(uint8_t *dest, int32_t destIndex, int32_t destCapacity,
|
||||
|
@ -146,7 +163,7 @@ appendResult(uint8_t *dest, int32_t destIndex, int32_t destCapacity,
|
|||
(char *)(dest+destIndex), destCapacity-destIndex, &destLength,
|
||||
s, length,
|
||||
&errorCode);
|
||||
destIndex+=length;
|
||||
destIndex+=destLength;
|
||||
/* we might have an overflow, but we know the actual length */
|
||||
}
|
||||
} else {
|
||||
|
@ -159,7 +176,7 @@ appendResult(uint8_t *dest, int32_t destIndex, int32_t destCapacity,
|
|||
NULL, 0, &destLength,
|
||||
s, length,
|
||||
&errorCode);
|
||||
destIndex+=length;
|
||||
destIndex+=destLength;
|
||||
}
|
||||
}
|
||||
return destIndex;
|
||||
|
@ -197,12 +214,6 @@ utf8_caseContextIterator(void *context, int8_t dir) {
|
|||
return U_SENTINEL;
|
||||
}
|
||||
|
||||
typedef int32_t U_CALLCONV
|
||||
UCaseMapFull(const UCaseProps *csp, UChar32 c,
|
||||
UCaseContextIterator *iter, void *context,
|
||||
const UChar **pString,
|
||||
const char *locale, int32_t *locCache);
|
||||
|
||||
/*
|
||||
* Case-maps [srcStart..srcLimit[ but takes
|
||||
* context [0..srcLength[ into account.
|
||||
|
@ -214,7 +225,7 @@ _caseMap(const UCaseMap *csm, UCaseMapFull *map,
|
|||
int32_t srcStart, int32_t srcLimit,
|
||||
UErrorCode *pErrorCode) {
|
||||
const UChar *s;
|
||||
UChar32 c;
|
||||
UChar32 c, c2;
|
||||
int32_t srcIndex, destIndex;
|
||||
int32_t locCache;
|
||||
|
||||
|
@ -227,8 +238,192 @@ _caseMap(const UCaseMap *csm, UCaseMapFull *map,
|
|||
csc->cpStart=srcIndex;
|
||||
U8_NEXT(src, srcIndex, srcLimit, c);
|
||||
csc->cpLimit=srcIndex;
|
||||
if(c<0) {
|
||||
int32_t i=csc->cpStart;
|
||||
while(destIndex<destCapacity && i<srcIndex) {
|
||||
dest[destIndex++]=src[i++];
|
||||
}
|
||||
continue;
|
||||
}
|
||||
c=map(csm->csp, c, utf8_caseContextIterator, csc, &s, csm->locale, &locCache);
|
||||
destIndex=appendResult(dest, destIndex, destCapacity, c, s);
|
||||
if((destIndex<destCapacity) && (c<0 ? (c2=~c)<=0x7f : UCASE_MAX_STRING_LENGTH<c && (c2=c)<=0x7f)) {
|
||||
/* fast path version of appendResult() for ASCII results */
|
||||
dest[destIndex++]=(uint8_t)c2;
|
||||
} else {
|
||||
destIndex=appendResult(dest, destIndex, destCapacity, c, s);
|
||||
}
|
||||
}
|
||||
|
||||
if(destIndex>destCapacity) {
|
||||
*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
|
||||
}
|
||||
return destIndex;
|
||||
}
|
||||
|
||||
#if !UCONFIG_NO_BREAK_ITERATION
|
||||
|
||||
/*
|
||||
* Internal titlecasing function.
|
||||
*/
|
||||
static int32_t
|
||||
_toTitle(UCaseMap *csm,
|
||||
uint8_t *dest, int32_t destCapacity,
|
||||
const uint8_t *src, UCaseContext *csc,
|
||||
int32_t srcLength,
|
||||
UErrorCode *pErrorCode) {
|
||||
UText utext=UTEXT_INITIALIZER;
|
||||
const UChar *s;
|
||||
UChar32 c;
|
||||
int32_t prev, titleStart, titleLimit, index, destIndex, length;
|
||||
UBool isFirstIndex;
|
||||
|
||||
utext_openUTF8(&utext, (const char *)src, srcLength, pErrorCode);
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
return 0;
|
||||
}
|
||||
if(csm->iter==NULL) {
|
||||
csm->iter=ubrk_open(UBRK_WORD, csm->locale,
|
||||
NULL, 0,
|
||||
pErrorCode);
|
||||
}
|
||||
ubrk_setUText(csm->iter, &utext, pErrorCode);
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
utext_close(&utext);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* set up local variables */
|
||||
destIndex=0;
|
||||
prev=0;
|
||||
isFirstIndex=TRUE;
|
||||
|
||||
/* titlecasing loop */
|
||||
while(prev<srcLength) {
|
||||
/* find next index where to titlecase */
|
||||
if(isFirstIndex) {
|
||||
isFirstIndex=FALSE;
|
||||
index=ubrk_first(csm->iter);
|
||||
} else {
|
||||
index=ubrk_next(csm->iter);
|
||||
}
|
||||
if(index==UBRK_DONE || index>srcLength) {
|
||||
index=srcLength;
|
||||
}
|
||||
|
||||
/*
|
||||
* Unicode 4 & 5 section 3.13 Default Case Operations:
|
||||
*
|
||||
* R3 toTitlecase(X): Find the word boundaries based on Unicode Standard Annex
|
||||
* #29, "Text Boundaries." Between each pair of word boundaries, find the first
|
||||
* cased character F. If F exists, map F to default_title(F); then map each
|
||||
* subsequent character C to default_lower(C).
|
||||
*
|
||||
* In this implementation, segment [prev..index[ into 3 parts:
|
||||
* a) uncased characters (copy as-is) [prev..titleStart[
|
||||
* b) first case letter (titlecase) [titleStart..titleLimit[
|
||||
* c) subsequent characters (lowercase) [titleLimit..index[
|
||||
*/
|
||||
if(prev<index) {
|
||||
/* find and copy uncased characters [prev..titleStart[ */
|
||||
titleStart=titleLimit=prev;
|
||||
U8_NEXT(src, titleLimit, index, c);
|
||||
if((csm->options&U_TITLECASE_NO_BREAK_ADJUSTMENT)==0 && UCASE_NONE==ucase_getType(csm->csp, c)) {
|
||||
/* Adjust the titlecasing index (titleStart) to the next cased character. */
|
||||
for(;;) {
|
||||
titleStart=titleLimit;
|
||||
if(titleLimit==index) {
|
||||
/*
|
||||
* only uncased characters in [prev..index[
|
||||
* stop with titleStart==titleLimit==index
|
||||
*/
|
||||
break;
|
||||
}
|
||||
U8_NEXT(src, titleLimit, index, c);
|
||||
if(UCASE_NONE!=ucase_getType(csm->csp, c)) {
|
||||
break; /* cased letter at [titleStart..titleLimit[ */
|
||||
}
|
||||
}
|
||||
length=titleStart-prev;
|
||||
if(length>0) {
|
||||
if((destIndex+length)<=destCapacity) {
|
||||
uprv_memcpy(dest+destIndex, src+prev, length);
|
||||
}
|
||||
destIndex+=length;
|
||||
}
|
||||
}
|
||||
|
||||
if(titleStart<titleLimit) {
|
||||
/* titlecase c which is from [titleStart..titleLimit[ */
|
||||
csc->cpStart=titleStart;
|
||||
csc->cpLimit=titleLimit;
|
||||
c=ucase_toFullTitle(csm->csp, c, utf8_caseContextIterator, csc, &s, csm->locale, &csm->locCache);
|
||||
destIndex=appendResult(dest, destIndex, destCapacity, c, s);
|
||||
|
||||
/* lowercase [titleLimit..index[ */
|
||||
if(titleLimit<index) {
|
||||
if((csm->options&U_TITLECASE_NO_LOWERCASE)==0) {
|
||||
/* Normal operation: Lowercase the rest of the word. */
|
||||
destIndex+=
|
||||
_caseMap(
|
||||
csm, ucase_toFullLower,
|
||||
dest+destIndex, destCapacity-destIndex,
|
||||
src, csc,
|
||||
titleLimit, index,
|
||||
pErrorCode);
|
||||
} else {
|
||||
/* Optionally just copy the rest of the word unchanged. */
|
||||
length=index-titleLimit;
|
||||
if((destIndex+length)<=destCapacity) {
|
||||
uprv_memcpy(dest+destIndex, src+titleLimit, length);
|
||||
}
|
||||
destIndex+=length;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
prev=index;
|
||||
}
|
||||
|
||||
if(destIndex>destCapacity) {
|
||||
*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
|
||||
}
|
||||
utext_close(&utext);
|
||||
return destIndex;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
U_CFUNC int32_t
|
||||
utf8_foldCase(const UCaseProps *csp,
|
||||
uint8_t *dest, int32_t destCapacity,
|
||||
const uint8_t *src, int32_t srcLength,
|
||||
uint32_t options,
|
||||
UErrorCode *pErrorCode) {
|
||||
int32_t srcIndex, destIndex;
|
||||
|
||||
const UChar *s;
|
||||
UChar32 c, c2;
|
||||
int32_t start;
|
||||
|
||||
/* case mapping loop */
|
||||
srcIndex=destIndex=0;
|
||||
while(srcIndex<srcLength) {
|
||||
start=srcIndex;
|
||||
U8_NEXT(src, srcIndex, srcLength, c);
|
||||
if(c<0) {
|
||||
while(destIndex<destCapacity && start<srcIndex) {
|
||||
dest[destIndex++]=src[start++];
|
||||
}
|
||||
continue;
|
||||
}
|
||||
c=ucase_toFullFolding(csp, c, &s, options);
|
||||
if((destIndex<destCapacity) && (c<0 ? (c2=~c)<=0x7f : UCASE_MAX_STRING_LENGTH<c && (c2=c)<=0x7f)) {
|
||||
/* fast path version of appendResult() for ASCII results */
|
||||
dest[destIndex++]=(uint8_t)c2;
|
||||
} else {
|
||||
destIndex=appendResult(dest, destIndex, destCapacity, c, s);
|
||||
}
|
||||
}
|
||||
|
||||
if(destIndex>destCapacity) {
|
||||
|
@ -241,12 +436,6 @@ _caseMap(const UCaseMap *csm, UCaseMapFull *map,
|
|||
* Implement argument checking and buffer handling
|
||||
* for string case mapping as a common function.
|
||||
*/
|
||||
enum {
|
||||
TO_LOWER,
|
||||
TO_UPPER,
|
||||
TO_TITLE,
|
||||
FOLD_CASE
|
||||
};
|
||||
|
||||
/* common internal function for public API functions */
|
||||
|
||||
|
@ -256,7 +445,6 @@ caseMap(const UCaseMap *csm,
|
|||
const uint8_t *src, int32_t srcLength,
|
||||
int32_t toWhichCase,
|
||||
UErrorCode *pErrorCode) {
|
||||
UCaseContext csc={ NULL };
|
||||
int32_t destLength;
|
||||
|
||||
/* check argument values */
|
||||
|
@ -288,21 +476,37 @@ caseMap(const UCaseMap *csm,
|
|||
|
||||
destLength=0;
|
||||
|
||||
csc.p=(void *)src;
|
||||
csc.limit=srcLength;
|
||||
if(toWhichCase==FOLD_CASE) {
|
||||
destLength=utf8_foldCase(csm->csp, dest, destCapacity, src, srcLength,
|
||||
csm->options, pErrorCode);
|
||||
} else {
|
||||
UCaseContext csc={ NULL };
|
||||
|
||||
if(toWhichCase==TO_LOWER) {
|
||||
destLength=_caseMap(csm, ucase_toFullLower,
|
||||
dest, destCapacity,
|
||||
src, &csc,
|
||||
0, srcLength,
|
||||
pErrorCode);
|
||||
} else /* if(toWhichCase==TO_UPPER) */ {
|
||||
destLength=_caseMap(csm, ucase_toFullUpper,
|
||||
dest, destCapacity,
|
||||
src, &csc,
|
||||
0, srcLength,
|
||||
pErrorCode);
|
||||
csc.p=(void *)src;
|
||||
csc.limit=srcLength;
|
||||
|
||||
if(toWhichCase==TO_LOWER) {
|
||||
destLength=_caseMap(csm, ucase_toFullLower,
|
||||
dest, destCapacity,
|
||||
src, &csc,
|
||||
0, srcLength,
|
||||
pErrorCode);
|
||||
} else if(toWhichCase==TO_UPPER) {
|
||||
destLength=_caseMap(csm, ucase_toFullUpper,
|
||||
dest, destCapacity,
|
||||
src, &csc,
|
||||
0, srcLength,
|
||||
pErrorCode);
|
||||
} else /* if(toWhichCase==TO_TITLE) */ {
|
||||
#if UCONFIG_NO_BREAK_ITERATION
|
||||
*pErrorCode=U_UNSUPPORTED_ERROR;
|
||||
#else
|
||||
/* UCaseMap is actually non-const in toTitle() APIs. */
|
||||
destLength=_toTitle((UCaseMap *)csm, dest, destCapacity,
|
||||
src, &csc, srcLength,
|
||||
pErrorCode);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
return u_terminateChars((char *)dest, destCapacity, destLength, pErrorCode);
|
||||
|
@ -331,3 +535,29 @@ ucasemap_utf8ToUpper(const UCaseMap *csm,
|
|||
(const uint8_t *)src, srcLength,
|
||||
TO_UPPER, pErrorCode);
|
||||
}
|
||||
|
||||
#if !UCONFIG_NO_BREAK_ITERATION
|
||||
|
||||
U_DRAFT int32_t U_EXPORT2
|
||||
ucasemap_utf8ToTitle(UCaseMap *csm,
|
||||
char *dest, int32_t destCapacity,
|
||||
const char *src, int32_t srcLength,
|
||||
UErrorCode *pErrorCode) {
|
||||
return caseMap(csm,
|
||||
(uint8_t *)dest, destCapacity,
|
||||
(const uint8_t *)src, srcLength,
|
||||
TO_TITLE, pErrorCode);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
U_DRAFT int32_t U_EXPORT2
|
||||
ucasemap_utf8FoldCase(const UCaseMap *csm,
|
||||
char *dest, int32_t destCapacity,
|
||||
const char *src, int32_t srcLength,
|
||||
UErrorCode *pErrorCode) {
|
||||
return caseMap(csm,
|
||||
(uint8_t *)dest, destCapacity,
|
||||
(const uint8_t *)src, srcLength,
|
||||
FOLD_CASE, pErrorCode);
|
||||
}
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2005, International Business Machines
|
||||
* Copyright (C) 2005-2007, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
|
@ -30,10 +30,9 @@
|
|||
* for the attributes, as usual.
|
||||
*
|
||||
* Currently, the functionality provided here does not overlap with uchar.h
|
||||
* and ustring.h.
|
||||
* and ustring.h, except for ucasemap_toTitle().
|
||||
*
|
||||
* ucasemap_utf8ToLower() and ucasemap_utf8ToUpper() operate directly on
|
||||
* UTF-8 strings.
|
||||
* ucasemap_utf8XYZ() functions operate directly on UTF-8 strings.
|
||||
*/
|
||||
|
||||
/**
|
||||
|
@ -60,6 +59,10 @@ typedef struct UCaseMap UCaseMap; /**< C typedef for struct UCaseMap. @draft ICU
|
|||
* which must not indicate a failure before the function call.
|
||||
* @return Pointer to a UCaseMap service object, if successful.
|
||||
*
|
||||
* @see U_FOLD_CASE_DEFAULT
|
||||
* @see U_FOLD_CASE_EXCLUDE_SPECIAL_I
|
||||
* @see U_TITLECASE_NO_LOWERCASE
|
||||
* @see U_TITLECASE_NO_BREAK_ADJUSTMENT
|
||||
* @draft ICU 3.4
|
||||
*/
|
||||
U_DRAFT UCaseMap * U_EXPORT2
|
||||
|
@ -119,6 +122,135 @@ ucasemap_setLocale(UCaseMap *csm, const char *locale, UErrorCode *pErrorCode);
|
|||
U_DRAFT void U_EXPORT2
|
||||
ucasemap_setOptions(UCaseMap *csm, uint32_t options, UErrorCode *pErrorCode);
|
||||
|
||||
#ifndef U_HIDE_DRAFT_API
|
||||
|
||||
/**
|
||||
* Do not lowercase non-initial parts of words when titlecasing.
|
||||
* Option bit for titlecasing APIs that take an options bit set.
|
||||
*
|
||||
* By default, titlecasing will titlecase the first cased character
|
||||
* of a word and lowercase all other characters.
|
||||
* With this option, the other characters will not be modified.
|
||||
*
|
||||
* @see ucasemap_setOptions
|
||||
* @see ucasemap_toTitle
|
||||
* @see ucasemap_utf8ToTitle
|
||||
* @see UnicodeString::toTitle
|
||||
* @draft ICU 3.8
|
||||
*/
|
||||
#define U_TITLECASE_NO_LOWERCASE 0x100
|
||||
|
||||
/**
|
||||
* Do not adjust the titlecasing indexes from BreakIterator::next() indexes;
|
||||
* titlecase exactly the characters at breaks from the iterator.
|
||||
* Option bit for titlecasing APIs that take an options bit set.
|
||||
*
|
||||
* By default, titlecasing will take each break iterator index,
|
||||
* adjust it by looking for the next cased character, and titlecase that one.
|
||||
* Other characters are lowercased.
|
||||
*
|
||||
* This follows Unicode 4 & 5 section 3.13 Default Case Operations:
|
||||
*
|
||||
* R3 toTitlecase(X): Find the word boundaries based on Unicode Standard Annex
|
||||
* #29, "Text Boundaries." Between each pair of word boundaries, find the first
|
||||
* cased character F. If F exists, map F to default_title(F); then map each
|
||||
* subsequent character C to default_lower(C).
|
||||
*
|
||||
* @see ucasemap_setOptions
|
||||
* @see ucasemap_toTitle
|
||||
* @see ucasemap_utf8ToTitle
|
||||
* @see UnicodeString::toTitle
|
||||
* @see U_TITLECASE_NO_LOWERCASE
|
||||
* @draft ICU 3.8
|
||||
*/
|
||||
#define U_TITLECASE_NO_BREAK_ADJUSTMENT 0x200
|
||||
|
||||
#endif
|
||||
|
||||
#if !UCONFIG_NO_BREAK_ITERATION
|
||||
|
||||
/**
|
||||
* Get the break iterator that is used for titlecasing.
|
||||
* Do not modify the returned break iterator.
|
||||
* @param csm UCaseMap service object.
|
||||
* @return titlecasing break iterator
|
||||
* @draft ICU 3.8
|
||||
*/
|
||||
U_DRAFT const UBreakIterator * U_EXPORT2
|
||||
ucasemap_getBreakIterator(const UCaseMap *csm);
|
||||
|
||||
/**
|
||||
* Set the break iterator that is used for titlecasing.
|
||||
* The UCaseMap service object releases a previously set break iterator
|
||||
* and "adopts" this new one, taking ownership of it.
|
||||
* It will be released in a subsequent call to ucasemap_setBreakIterator()
|
||||
* or ucasemap_close().
|
||||
*
|
||||
* Break iterator operations are not thread-safe. Therefore, titlecasing
|
||||
* functions use non-const UCaseMap objects. It is not possible to titlecase
|
||||
* strings concurrently using the same UCaseMap.
|
||||
*
|
||||
* @param csm UCaseMap service object.
|
||||
* @param iterToAdopt Break iterator to be adopted for titlecasing.
|
||||
* @param pErrorCode Must be a valid pointer to an error code value,
|
||||
* which must not indicate a failure before the function call.
|
||||
*
|
||||
* @see ucasemap_toTitle
|
||||
* @see ucasemap_utf8ToTitle
|
||||
* @draft ICU 3.8
|
||||
*/
|
||||
U_DRAFT void U_EXPORT2
|
||||
ucasemap_setBreakIterator(UCaseMap *csm, UBreakIterator *iterToAdopt, UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* Titlecase a UTF-16 string. This function is almost a duplicate of u_strToTitle(),
|
||||
* except that it takes ucasemap_setOptions() into account and has performance
|
||||
* advantages from being able to use a UCaseMap object for multiple case mapping
|
||||
* operations, saving setup time.
|
||||
*
|
||||
* Casing is locale-dependent and context-sensitive.
|
||||
* Titlecasing uses a break iterator to find the first characters of words
|
||||
* that are to be titlecased. It titlecases those characters and lowercases
|
||||
* all others. (This can be modified with ucasemap_setOptions().)
|
||||
*
|
||||
* The titlecase break iterator can be provided to customize for arbitrary
|
||||
* styles, using rules and dictionaries beyond the standard iterators.
|
||||
* It may be more efficient to always provide an iterator to avoid
|
||||
* opening and closing one for each string.
|
||||
* The standard titlecase iterator for the root locale implements the
|
||||
* algorithm of Unicode TR 21.
|
||||
*
|
||||
* This function uses only the setText(), first() and next() methods of the
|
||||
* provided break iterator.
|
||||
*
|
||||
* The result may be longer or shorter than the original.
|
||||
* The source string and the destination buffer must not overlap.
|
||||
*
|
||||
* @param csm UCaseMap service object.
|
||||
* @param dest A buffer for the result string. The result will be NUL-terminated if
|
||||
* the buffer is large enough.
|
||||
* The contents is undefined in case of failure.
|
||||
* @param destCapacity The size of the buffer (number of bytes). If it is 0, then
|
||||
* dest may be NULL and the function will only return the length of the result
|
||||
* without writing any of the result string.
|
||||
* @param src The original string.
|
||||
* @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
|
||||
* @param pErrorCode Must be a valid pointer to an error code value,
|
||||
* which must not indicate a failure before the function call.
|
||||
* @return The length of the result string, if successful - or in case of a buffer overflow,
|
||||
* in which case it will be greater than destCapacity.
|
||||
*
|
||||
* @see u_strToTitle
|
||||
* @draft ICU 3.8
|
||||
*/
|
||||
U_DRAFT int32_t U_EXPORT2
|
||||
ucasemap_toTitle(UCaseMap *csm,
|
||||
UChar *dest, int32_t destCapacity,
|
||||
const UChar *src, int32_t srcLength,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Lowercase the characters in a UTF-8 string.
|
||||
* Casing is locale-dependent and context-sensitive.
|
||||
|
@ -132,7 +264,7 @@ ucasemap_setOptions(UCaseMap *csm, uint32_t options, UErrorCode *pErrorCode);
|
|||
* @param destCapacity The size of the buffer (number of bytes). If it is 0, then
|
||||
* dest may be NULL and the function will only return the length of the result
|
||||
* without writing any of the result string.
|
||||
* @param src The original string
|
||||
* @param src The original string.
|
||||
* @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
|
||||
* @param pErrorCode Must be a valid pointer to an error code value,
|
||||
* which must not indicate a failure before the function call.
|
||||
|
@ -161,7 +293,7 @@ ucasemap_utf8ToLower(const UCaseMap *csm,
|
|||
* @param destCapacity The size of the buffer (number of bytes). If it is 0, then
|
||||
* dest may be NULL and the function will only return the length of the result
|
||||
* without writing any of the result string.
|
||||
* @param src The original string
|
||||
* @param src The original string.
|
||||
* @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
|
||||
* @param pErrorCode Must be a valid pointer to an error code value,
|
||||
* which must not indicate a failure before the function call.
|
||||
|
@ -177,4 +309,87 @@ ucasemap_utf8ToUpper(const UCaseMap *csm,
|
|||
const char *src, int32_t srcLength,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
#if !UCONFIG_NO_BREAK_ITERATION
|
||||
|
||||
/**
|
||||
* Titlecase a UTF-8 string.
|
||||
* Casing is locale-dependent and context-sensitive.
|
||||
* Titlecasing uses a break iterator to find the first characters of words
|
||||
* that are to be titlecased. It titlecases those characters and lowercases
|
||||
* all others. (This can be modified with ucasemap_setOptions().)
|
||||
*
|
||||
* The titlecase break iterator can be provided to customize for arbitrary
|
||||
* styles, using rules and dictionaries beyond the standard iterators.
|
||||
* It may be more efficient to always provide an iterator to avoid
|
||||
* opening and closing one for each string.
|
||||
* The standard titlecase iterator for the root locale implements the
|
||||
* algorithm of Unicode TR 21.
|
||||
*
|
||||
* This function uses only the setText(), first() and next() methods of the
|
||||
* provided break iterator.
|
||||
*
|
||||
* The result may be longer or shorter than the original.
|
||||
* The source string and the destination buffer must not overlap.
|
||||
*
|
||||
* @param csm UCaseMap service object.
|
||||
* @param dest A buffer for the result string. The result will be NUL-terminated if
|
||||
* the buffer is large enough.
|
||||
* The contents is undefined in case of failure.
|
||||
* @param destCapacity The size of the buffer (number of bytes). If it is 0, then
|
||||
* dest may be NULL and the function will only return the length of the result
|
||||
* without writing any of the result string.
|
||||
* @param src The original string.
|
||||
* @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
|
||||
* @param pErrorCode Must be a valid pointer to an error code value,
|
||||
* which must not indicate a failure before the function call.
|
||||
* @return The length of the result string, if successful - or in case of a buffer overflow,
|
||||
* in which case it will be greater than destCapacity.
|
||||
*
|
||||
* @see u_strToTitle
|
||||
* @see U_TITLECASE_NO_LOWERCASE
|
||||
* @see U_TITLECASE_NO_BREAK_ADJUSTMENT
|
||||
* @draft ICU 3.8
|
||||
*/
|
||||
U_DRAFT int32_t U_EXPORT2
|
||||
ucasemap_utf8ToTitle(UCaseMap *csm,
|
||||
char *dest, int32_t destCapacity,
|
||||
const char *src, int32_t srcLength,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Case-fold the characters in a UTF-8 string.
|
||||
* Case-folding is locale-independent and not context-sensitive,
|
||||
* but there is an option for whether to include or exclude mappings for dotted I
|
||||
* and dotless i that are marked with 'I' in CaseFolding.txt.
|
||||
* The result may be longer or shorter than the original.
|
||||
* The source string and the destination buffer must not overlap.
|
||||
*
|
||||
* @param csm UCaseMap service object.
|
||||
* @param dest A buffer for the result string. The result will be NUL-terminated if
|
||||
* the buffer is large enough.
|
||||
* The contents is undefined in case of failure.
|
||||
* @param destCapacity The size of the buffer (number of bytes). If it is 0, then
|
||||
* dest may be NULL and the function will only return the length of the result
|
||||
* without writing any of the result string.
|
||||
* @param src The original string.
|
||||
* @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
|
||||
* @param pErrorCode Must be a valid pointer to an error code value,
|
||||
* which must not indicate a failure before the function call.
|
||||
* @return The length of the result string, if successful - or in case of a buffer overflow,
|
||||
* in which case it will be greater than destCapacity.
|
||||
*
|
||||
* @see u_strFoldCase
|
||||
* @see ucasemap_setOptions
|
||||
* @see U_FOLD_CASE_DEFAULT
|
||||
* @see U_FOLD_CASE_EXCLUDE_SPECIAL_I
|
||||
* @draft ICU 3.8
|
||||
*/
|
||||
U_DRAFT int32_t U_EXPORT2
|
||||
ucasemap_utf8FoldCase(const UCaseMap *csm,
|
||||
char *dest, int32_t destCapacity,
|
||||
const char *src, int32_t srcLength,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
#endif
|
||||
|
|
|
@ -2430,7 +2430,7 @@ public:
|
|||
* The standard titlecase iterator for the root locale implements the
|
||||
* algorithm of Unicode TR 21.
|
||||
*
|
||||
* This function uses only the first() and next() methods of the
|
||||
* This function uses only the setText(), first() and next() methods of the
|
||||
* provided break iterator.
|
||||
*
|
||||
* @param titleIter A break iterator to find the first characters of words
|
||||
|
@ -2458,7 +2458,7 @@ public:
|
|||
* The standard titlecase iterator for the root locale implements the
|
||||
* algorithm of Unicode TR 21.
|
||||
*
|
||||
* This function uses only the first() and next() methods of the
|
||||
* This function uses only the setText(), first() and next() methods of the
|
||||
* provided break iterator.
|
||||
*
|
||||
* @param titleIter A break iterator to find the first characters of words
|
||||
|
@ -2472,6 +2472,37 @@ public:
|
|||
*/
|
||||
UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale);
|
||||
|
||||
/**
|
||||
* Titlecase this string, with options.
|
||||
*
|
||||
* Casing is locale-dependent and context-sensitive.
|
||||
* Titlecasing uses a break iterator to find the first characters of words
|
||||
* that are to be titlecased. It titlecases those characters and lowercases
|
||||
* all others. (This can be modified with options.)
|
||||
*
|
||||
* The titlecase break iterator can be provided to customize for arbitrary
|
||||
* styles, using rules and dictionaries beyond the standard iterators.
|
||||
* It may be more efficient to always provide an iterator to avoid
|
||||
* opening and closing one for each string.
|
||||
* The standard titlecase iterator for the root locale implements the
|
||||
* algorithm of Unicode TR 21.
|
||||
*
|
||||
* This function uses only the setText(), first() and next() methods of the
|
||||
* provided break iterator.
|
||||
*
|
||||
* @param titleIter A break iterator to find the first characters of words
|
||||
* that are to be titlecased.
|
||||
* If none is provided (0), then a standard titlecase
|
||||
* break iterator is opened.
|
||||
* Otherwise the provided iterator is set to the string's text.
|
||||
* @param locale The locale to consider.
|
||||
* @return A reference to this.
|
||||
* @see U_TITLECASE_NO_LOWERCASE
|
||||
* @see U_TITLECASE_NO_BREAK_ADJUSTMENT
|
||||
* @draft ICU 3.8
|
||||
*/
|
||||
UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale, uint32_t options);
|
||||
|
||||
#endif
|
||||
|
||||
/**
|
||||
|
|
|
@ -1105,7 +1105,7 @@ u_strToLower(UChar *dest, int32_t destCapacity,
|
|||
* The standard titlecase iterator for the root locale implements the
|
||||
* algorithm of Unicode TR 21.
|
||||
*
|
||||
* This function uses only the first() and next() methods of the
|
||||
* This function uses only the setText(), first() and next() methods of the
|
||||
* provided break iterator.
|
||||
*
|
||||
* The result may be longer or shorter than the original.
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 1999-2005, International Business Machines
|
||||
* Copyright (C) 1999-2007, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
|
@ -138,28 +138,6 @@ UnicodeString::caseMap(BreakIterator *titleIter,
|
|||
return *this;
|
||||
}
|
||||
|
||||
#if !UCONFIG_NO_BREAK_ITERATION
|
||||
// set up the titlecasing break iterator
|
||||
UBreakIterator *cTitleIter = 0;
|
||||
|
||||
if(toWhichCase == TO_TITLE) {
|
||||
errorCode = U_ZERO_ERROR;
|
||||
if(titleIter != 0) {
|
||||
cTitleIter = (UBreakIterator *)titleIter;
|
||||
ubrk_setText(cTitleIter, oldArray, oldLength, &errorCode);
|
||||
} else {
|
||||
cTitleIter = ubrk_open(UBRK_WORD, locale,
|
||||
oldArray, oldLength,
|
||||
&errorCode);
|
||||
}
|
||||
if(U_FAILURE(errorCode)) {
|
||||
uprv_free(bufferToDelete);
|
||||
setToBogus();
|
||||
return *this;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
// Case-map, and if the result is too long, then reallocate and repeat.
|
||||
do {
|
||||
errorCode = U_ZERO_ERROR;
|
||||
|
@ -177,7 +155,7 @@ UnicodeString::caseMap(BreakIterator *titleIter,
|
|||
#else
|
||||
fLength = ustr_toTitle(csp, fArray, fCapacity,
|
||||
oldArray, oldLength,
|
||||
cTitleIter, locale, &errorCode);
|
||||
(UBreakIterator *)titleIter, locale, options, &errorCode);
|
||||
#endif
|
||||
} else {
|
||||
fLength = ustr_foldCase(csp, fArray, fCapacity,
|
||||
|
@ -187,12 +165,6 @@ UnicodeString::caseMap(BreakIterator *titleIter,
|
|||
}
|
||||
} while(errorCode==U_BUFFER_OVERFLOW_ERROR && cloneArrayIfNeeded(fLength, fLength, FALSE));
|
||||
|
||||
#if !UCONFIG_NO_BREAK_ITERATION
|
||||
if(cTitleIter != 0 && titleIter == 0) {
|
||||
ubrk_close(cTitleIter);
|
||||
}
|
||||
#endif
|
||||
|
||||
if (bufferToDelete) {
|
||||
uprv_free(bufferToDelete);
|
||||
}
|
||||
|
@ -234,6 +206,11 @@ UnicodeString::toTitle(BreakIterator *titleIter, const Locale &locale) {
|
|||
return caseMap(titleIter, locale.getName(), 0, TO_TITLE);
|
||||
}
|
||||
|
||||
UnicodeString &
|
||||
UnicodeString::toTitle(BreakIterator *titleIter, const Locale &locale, uint32_t options) {
|
||||
return caseMap(titleIter, locale.getName(), options, TO_TITLE);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
UnicodeString &
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
**********************************************************************
|
||||
* Copyright (C) 1999-2006, International Business Machines
|
||||
* Copyright (C) 1999-2007, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
* file name: ustr_imp.h
|
||||
|
@ -98,6 +98,42 @@ u_growBufferFromStatic(void *context,
|
|||
* ustring.h/ustrcase.c and UnicodeString case mapping functions.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @internal
|
||||
*/
|
||||
struct UCaseMap {
|
||||
const UCaseProps *csp;
|
||||
#if !UCONFIG_NO_BREAK_ITERATION
|
||||
UBreakIterator *iter; /* We adopt the iterator, so we own it. */
|
||||
#endif
|
||||
char locale[32];
|
||||
int32_t locCache;
|
||||
uint32_t options;
|
||||
};
|
||||
|
||||
#ifndef __UCASEMAP_H__
|
||||
typedef struct UCaseMap UCaseMap;
|
||||
#endif
|
||||
|
||||
/**
|
||||
* @internal
|
||||
*/
|
||||
enum {
|
||||
TO_LOWER,
|
||||
TO_UPPER,
|
||||
TO_TITLE,
|
||||
FOLD_CASE
|
||||
};
|
||||
|
||||
/**
|
||||
* @internal
|
||||
*/
|
||||
typedef int32_t U_CALLCONV
|
||||
UCaseMapFull(const UCaseProps *csp, UChar32 c,
|
||||
UCaseContextIterator *iter, void *context,
|
||||
const UChar **pString,
|
||||
const char *locale, int32_t *locCache);
|
||||
|
||||
/**
|
||||
* @internal
|
||||
*/
|
||||
|
@ -128,7 +164,7 @@ ustr_toTitle(const UCaseProps *csp,
|
|||
UChar *dest, int32_t destCapacity,
|
||||
const UChar *src, int32_t srcLength,
|
||||
UBreakIterator *titleIter,
|
||||
const char *locale,
|
||||
const char *locale, uint32_t options,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
#endif
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2001-2006, International Business Machines
|
||||
* Copyright (C) 2001-2007, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
|
@ -21,6 +21,7 @@
|
|||
#include "unicode/utypes.h"
|
||||
#include "unicode/uloc.h"
|
||||
#include "unicode/ustring.h"
|
||||
#include "unicode/ucasemap.h"
|
||||
#include "unicode/ubrk.h"
|
||||
#include "cmemory.h"
|
||||
#include "ucase.h"
|
||||
|
@ -114,26 +115,22 @@ utf16_caseContextIterator(void *context, int8_t dir) {
|
|||
return U_SENTINEL;
|
||||
}
|
||||
|
||||
typedef int32_t U_CALLCONV
|
||||
UCaseMapFull(const UCaseProps *csp, UChar32 c,
|
||||
UCaseContextIterator *iter, void *context,
|
||||
const UChar **pString,
|
||||
const char *locale, int32_t *locCache);
|
||||
|
||||
/*
|
||||
* Case-maps [srcStart..srcLimit[ but takes
|
||||
* context [0..srcLength[ into account.
|
||||
*/
|
||||
static int32_t
|
||||
_caseMap(const UCaseProps *csp, UCaseMapFull *map,
|
||||
_caseMap(const UCaseMap *csm, UCaseMapFull *map,
|
||||
UChar *dest, int32_t destCapacity,
|
||||
const UChar *src, UCaseContext *csc,
|
||||
int32_t srcStart, int32_t srcLimit,
|
||||
const char *locale, int32_t *locCache,
|
||||
UErrorCode *pErrorCode) {
|
||||
const UChar *s;
|
||||
UChar32 c, c2;
|
||||
int32_t srcIndex, destIndex;
|
||||
int32_t locCache;
|
||||
|
||||
locCache=csm->locCache;
|
||||
|
||||
/* case mapping loop */
|
||||
srcIndex=srcStart;
|
||||
|
@ -142,7 +139,7 @@ _caseMap(const UCaseProps *csp, UCaseMapFull *map,
|
|||
csc->cpStart=srcIndex;
|
||||
U16_NEXT(src, srcIndex, srcLimit, c);
|
||||
csc->cpLimit=srcIndex;
|
||||
c=map(csp, c, utf16_caseContextIterator, csc, &s, locale, locCache);
|
||||
c=map(csm->csp, c, utf16_caseContextIterator, csc, &s, csm->locale, &locCache);
|
||||
if((destIndex<destCapacity) && (c<0 ? (c2=~c)<=0xffff : UCASE_MAX_STRING_LENGTH<c && (c2=c)<=0xffff)) {
|
||||
/* fast path version of appendResult() for BMP results */
|
||||
dest[destIndex++]=(UChar)c2;
|
||||
|
@ -157,26 +154,83 @@ _caseMap(const UCaseProps *csp, UCaseMapFull *map,
|
|||
return destIndex;
|
||||
}
|
||||
|
||||
static void
|
||||
setTempCaseMapLocale(UCaseMap *csm, const char *locale, UErrorCode *pErrorCode) {
|
||||
/*
|
||||
* We could call ucasemap_setLocale(), but here we really only care about
|
||||
* the initial language subtag, we need not return the real string via
|
||||
* ucasemap_getLocale(), and we don't care about only getting "x" from
|
||||
* "x-some-thing" etc.
|
||||
*
|
||||
* We ignore locales with a longer-than-3 initial subtag.
|
||||
*
|
||||
* We also do not fill in the locCache because it is rarely used,
|
||||
* and not worth setting unless we reuse it for many case mapping operations.
|
||||
* (That's why UCaseMap was created.)
|
||||
*/
|
||||
int i;
|
||||
char c;
|
||||
|
||||
/* the internal functions require locale!=NULL */
|
||||
if(locale==NULL) {
|
||||
locale=uloc_getDefault();
|
||||
}
|
||||
for(i=0; i<4 && (c=locale[i])!=0 && c!='-' && c!='_'; ++i) {
|
||||
csm->locale[i]=c;
|
||||
}
|
||||
if(i<=3) {
|
||||
csm->locale[i]=0; /* Up to 3 non-separator characters. */
|
||||
} else {
|
||||
csm->locale[0]=0; /* Longer-than-3 initial subtag: Ignore. */
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Set parameters on an empty UCaseMap, for UCaseMap-less API functions.
|
||||
* Do this fast because it is called with every function call.
|
||||
*/
|
||||
static U_INLINE void
|
||||
setTempCaseMap(UCaseMap *csm, const char *locale, UErrorCode *pErrorCode) {
|
||||
if(csm->csp==NULL) {
|
||||
csm->csp=ucase_getSingleton(pErrorCode);
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
if(locale!=NULL && locale[0]==0) {
|
||||
csm->locale[0]=0;
|
||||
} else {
|
||||
setTempCaseMapLocale(csm, locale, pErrorCode);
|
||||
}
|
||||
}
|
||||
|
||||
#if !UCONFIG_NO_BREAK_ITERATION
|
||||
|
||||
/*
|
||||
* Internal titlecasing function.
|
||||
*
|
||||
* Must get titleIter!=NULL.
|
||||
*/
|
||||
static int32_t
|
||||
_toTitle(const UCaseProps *csp,
|
||||
_toTitle(UCaseMap *csm,
|
||||
UChar *dest, int32_t destCapacity,
|
||||
const UChar *src, UCaseContext *csc,
|
||||
int32_t srcLength,
|
||||
UBreakIterator *titleIter,
|
||||
const char *locale, int32_t *locCache,
|
||||
UErrorCode *pErrorCode) {
|
||||
const UChar *s;
|
||||
UChar32 c;
|
||||
int32_t prev, titleStart, titleLimit, index, destIndex, length;
|
||||
UBool isFirstIndex;
|
||||
|
||||
if(csm->iter!=NULL) {
|
||||
ubrk_setText(csm->iter, src, srcLength, pErrorCode);
|
||||
} else {
|
||||
csm->iter=ubrk_open(UBRK_WORD, csm->locale,
|
||||
src, srcLength,
|
||||
pErrorCode);
|
||||
}
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* set up local variables */
|
||||
destIndex=0;
|
||||
prev=0;
|
||||
|
@ -187,9 +241,9 @@ _toTitle(const UCaseProps *csp,
|
|||
/* find next index where to titlecase */
|
||||
if(isFirstIndex) {
|
||||
isFirstIndex=FALSE;
|
||||
index=ubrk_first(titleIter);
|
||||
index=ubrk_first(csm->iter);
|
||||
} else {
|
||||
index=ubrk_next(titleIter);
|
||||
index=ubrk_next(csm->iter);
|
||||
}
|
||||
if(index==UBRK_DONE || index>srcLength) {
|
||||
index=srcLength;
|
||||
|
@ -211,45 +265,58 @@ _toTitle(const UCaseProps *csp,
|
|||
if(prev<index) {
|
||||
/* find and copy uncased characters [prev..titleStart[ */
|
||||
titleStart=titleLimit=prev;
|
||||
for(;;) {
|
||||
U16_NEXT(src, titleLimit, srcLength, c);
|
||||
if(UCASE_NONE!=ucase_getType(csp, c)) {
|
||||
break; /* cased letter at [titleStart..titleLimit[ */
|
||||
U16_NEXT(src, titleLimit, index, c);
|
||||
if((csm->options&U_TITLECASE_NO_BREAK_ADJUSTMENT)==0 && UCASE_NONE==ucase_getType(csm->csp, c)) {
|
||||
/* Adjust the titlecasing index (titleStart) to the next cased character. */
|
||||
for(;;) {
|
||||
titleStart=titleLimit;
|
||||
if(titleLimit==index) {
|
||||
/*
|
||||
* only uncased characters in [prev..index[
|
||||
* stop with titleStart==titleLimit==index
|
||||
*/
|
||||
break;
|
||||
}
|
||||
U16_NEXT(src, titleLimit, index, c);
|
||||
if(UCASE_NONE!=ucase_getType(csm->csp, c)) {
|
||||
break; /* cased letter at [titleStart..titleLimit[ */
|
||||
}
|
||||
}
|
||||
titleStart=titleLimit;
|
||||
if(titleLimit==index) {
|
||||
/*
|
||||
* only uncased characters in [prev..index[
|
||||
* stop with titleStart==titleLimit==index
|
||||
*/
|
||||
break;
|
||||
length=titleStart-prev;
|
||||
if(length>0) {
|
||||
if((destIndex+length)<=destCapacity) {
|
||||
uprv_memcpy(dest+destIndex, src+prev, length*U_SIZEOF_UCHAR);
|
||||
}
|
||||
destIndex+=length;
|
||||
}
|
||||
}
|
||||
length=titleStart-prev;
|
||||
if(length>0) {
|
||||
if((destIndex+length)<=destCapacity) {
|
||||
uprv_memcpy(dest+destIndex, src+prev, length*U_SIZEOF_UCHAR);
|
||||
}
|
||||
destIndex+=length;
|
||||
}
|
||||
|
||||
if(titleStart<titleLimit) {
|
||||
/* titlecase c which is from [titleStart..titleLimit[ */
|
||||
csc->cpStart=titleStart;
|
||||
csc->cpLimit=titleLimit;
|
||||
c=ucase_toFullTitle(csp, c, utf16_caseContextIterator, csc, &s, locale, locCache);
|
||||
c=ucase_toFullTitle(csm->csp, c, utf16_caseContextIterator, csc, &s, csm->locale, &csm->locCache);
|
||||
destIndex=appendResult(dest, destIndex, destCapacity, c, s);
|
||||
|
||||
/* lowercase [titleLimit..index[ */
|
||||
if(titleLimit<index) {
|
||||
destIndex+=
|
||||
_caseMap(
|
||||
csp, ucase_toFullLower,
|
||||
dest+destIndex, destCapacity-destIndex,
|
||||
src, csc,
|
||||
titleLimit, index,
|
||||
locale, locCache,
|
||||
pErrorCode);
|
||||
if((csm->options&U_TITLECASE_NO_LOWERCASE)==0) {
|
||||
/* Normal operation: Lowercase the rest of the word. */
|
||||
destIndex+=
|
||||
_caseMap(
|
||||
csm, ucase_toFullLower,
|
||||
dest+destIndex, destCapacity-destIndex,
|
||||
src, csc,
|
||||
titleLimit, index,
|
||||
pErrorCode);
|
||||
} else {
|
||||
/* Optionally just copy the rest of the word unchanged. */
|
||||
length=index-titleLimit;
|
||||
if((destIndex+length)<=destCapacity) {
|
||||
uprv_memcpy(dest+destIndex, src+titleLimit, length*U_SIZEOF_UCHAR);
|
||||
}
|
||||
destIndex+=length;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -263,26 +330,6 @@ _toTitle(const UCaseProps *csp,
|
|||
return destIndex;
|
||||
}
|
||||
|
||||
U_CFUNC int32_t
|
||||
ustr_toTitle(const UCaseProps *csp,
|
||||
UChar *dest, int32_t destCapacity,
|
||||
const UChar *src, int32_t srcLength,
|
||||
UBreakIterator *titleIter,
|
||||
const char *locale,
|
||||
UErrorCode *pErrorCode) {
|
||||
UCaseContext csc={ NULL };
|
||||
int32_t locCache;
|
||||
|
||||
csc.p=(void *)src;
|
||||
csc.limit=srcLength;
|
||||
locCache=0;
|
||||
|
||||
return _toTitle(csp,
|
||||
dest, destCapacity,
|
||||
src, &csc, srcLength,
|
||||
titleIter, locale, &locCache, pErrorCode);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
/* functions available in the common library (for unistr_case.cpp) */
|
||||
|
@ -293,17 +340,18 @@ ustr_toLower(const UCaseProps *csp,
|
|||
const UChar *src, int32_t srcLength,
|
||||
const char *locale,
|
||||
UErrorCode *pErrorCode) {
|
||||
UCaseMap csm={ NULL };
|
||||
UCaseContext csc={ NULL };
|
||||
int32_t locCache;
|
||||
|
||||
csm.csp=csp;
|
||||
setTempCaseMap(&csm, locale, pErrorCode);
|
||||
csc.p=(void *)src;
|
||||
csc.limit=srcLength;
|
||||
locCache=0;
|
||||
|
||||
return _caseMap(csp, ucase_toFullLower,
|
||||
return _caseMap(&csm, ucase_toFullLower,
|
||||
dest, destCapacity,
|
||||
src, &csc, 0, srcLength,
|
||||
locale, &locCache, pErrorCode);
|
||||
pErrorCode);
|
||||
}
|
||||
|
||||
U_CFUNC int32_t
|
||||
|
@ -312,19 +360,52 @@ ustr_toUpper(const UCaseProps *csp,
|
|||
const UChar *src, int32_t srcLength,
|
||||
const char *locale,
|
||||
UErrorCode *pErrorCode) {
|
||||
UCaseMap csm={ NULL };
|
||||
UCaseContext csc={ NULL };
|
||||
int32_t locCache;
|
||||
|
||||
csm.csp=csp;
|
||||
setTempCaseMap(&csm, locale, pErrorCode);
|
||||
csc.p=(void *)src;
|
||||
csc.limit=srcLength;
|
||||
locCache=0;
|
||||
|
||||
return _caseMap(csp, ucase_toFullUpper,
|
||||
return _caseMap(&csm, ucase_toFullUpper,
|
||||
dest, destCapacity,
|
||||
src, &csc, 0, srcLength,
|
||||
locale, &locCache, pErrorCode);
|
||||
pErrorCode);
|
||||
}
|
||||
|
||||
#if !UCONFIG_NO_BREAK_ITERATION
|
||||
|
||||
U_CFUNC int32_t
|
||||
ustr_toTitle(const UCaseProps *csp,
|
||||
UChar *dest, int32_t destCapacity,
|
||||
const UChar *src, int32_t srcLength,
|
||||
UBreakIterator *titleIter,
|
||||
const char *locale, uint32_t options,
|
||||
UErrorCode *pErrorCode) {
|
||||
UCaseMap csm={ NULL };
|
||||
UCaseContext csc={ NULL };
|
||||
int32_t length;
|
||||
|
||||
csm.csp=csp;
|
||||
csm.iter=titleIter;
|
||||
csm.options=options;
|
||||
setTempCaseMap(&csm, locale, pErrorCode);
|
||||
csc.p=(void *)src;
|
||||
csc.limit=srcLength;
|
||||
|
||||
length=_toTitle(&csm,
|
||||
dest, destCapacity,
|
||||
src, &csc, srcLength,
|
||||
pErrorCode);
|
||||
if(titleIter==NULL && csm.iter!=NULL) {
|
||||
ubrk_close(csm.iter);
|
||||
}
|
||||
return length;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
U_CFUNC int32_t
|
||||
ustr_foldCase(const UCaseProps *csp,
|
||||
UChar *dest, int32_t destCapacity,
|
||||
|
@ -359,30 +440,19 @@ ustr_foldCase(const UCaseProps *csp,
|
|||
* Implement argument checking and buffer handling
|
||||
* for string case mapping as a common function.
|
||||
*/
|
||||
enum {
|
||||
TO_LOWER,
|
||||
TO_UPPER,
|
||||
TO_TITLE,
|
||||
FOLD_CASE
|
||||
};
|
||||
|
||||
/* common internal function for public API functions */
|
||||
|
||||
static int32_t
|
||||
caseMap(UChar *dest, int32_t destCapacity,
|
||||
caseMap(const UCaseMap *csm,
|
||||
UChar *dest, int32_t destCapacity,
|
||||
const UChar *src, int32_t srcLength,
|
||||
UBreakIterator *titleIter,
|
||||
const char *locale,
|
||||
uint32_t options,
|
||||
int32_t toWhichCase,
|
||||
UErrorCode *pErrorCode) {
|
||||
UChar buffer[300];
|
||||
UChar *temp;
|
||||
|
||||
const UCaseProps *csp;
|
||||
|
||||
int32_t destLength;
|
||||
UBool ownTitleIter;
|
||||
|
||||
/* check argument values */
|
||||
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
|
||||
|
@ -397,11 +467,6 @@ caseMap(UChar *dest, int32_t destCapacity,
|
|||
return 0;
|
||||
}
|
||||
|
||||
csp=ucase_getSingleton(pErrorCode);
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* get the string length */
|
||||
if(srcLength==-1) {
|
||||
srcLength=u_strlen(src);
|
||||
|
@ -428,53 +493,38 @@ caseMap(UChar *dest, int32_t destCapacity,
|
|||
temp=dest;
|
||||
}
|
||||
|
||||
ownTitleIter=FALSE;
|
||||
destLength=0;
|
||||
|
||||
if(toWhichCase==FOLD_CASE) {
|
||||
destLength=ustr_foldCase(csp, temp, destCapacity, src, srcLength,
|
||||
options, pErrorCode);
|
||||
destLength=ustr_foldCase(csm->csp, temp, destCapacity, src, srcLength,
|
||||
csm->options, pErrorCode);
|
||||
} else {
|
||||
UCaseContext csc={ NULL };
|
||||
int32_t locCache;
|
||||
|
||||
csc.p=(void *)src;
|
||||
csc.limit=srcLength;
|
||||
locCache=0;
|
||||
|
||||
/* the internal functions require locale!=NULL */
|
||||
if(locale==NULL) {
|
||||
locale=uloc_getDefault();
|
||||
}
|
||||
|
||||
if(toWhichCase==TO_LOWER) {
|
||||
destLength=_caseMap(csp, ucase_toFullLower,
|
||||
destLength=_caseMap(csm, ucase_toFullLower,
|
||||
temp, destCapacity,
|
||||
src, &csc,
|
||||
0, srcLength,
|
||||
locale, &locCache, pErrorCode);
|
||||
pErrorCode);
|
||||
} else if(toWhichCase==TO_UPPER) {
|
||||
destLength=_caseMap(csp, ucase_toFullUpper,
|
||||
destLength=_caseMap(csm, ucase_toFullUpper,
|
||||
temp, destCapacity,
|
||||
src, &csc,
|
||||
0, srcLength,
|
||||
locale, &locCache, pErrorCode);
|
||||
pErrorCode);
|
||||
} else /* if(toWhichCase==TO_TITLE) */ {
|
||||
#if UCONFIG_NO_BREAK_ITERATION
|
||||
#if UCONFIG_NO_BREAK_ITERATION
|
||||
*pErrorCode=U_UNSUPPORTED_ERROR;
|
||||
#else
|
||||
if(titleIter==NULL) {
|
||||
titleIter=ubrk_open(UBRK_WORD, locale,
|
||||
src, srcLength,
|
||||
pErrorCode);
|
||||
ownTitleIter=(UBool)U_SUCCESS(*pErrorCode);
|
||||
}
|
||||
if(U_SUCCESS(*pErrorCode)) {
|
||||
destLength=_toTitle(csp, temp, destCapacity,
|
||||
src, &csc, srcLength,
|
||||
titleIter, locale, &locCache, pErrorCode);
|
||||
}
|
||||
#endif
|
||||
#else
|
||||
/* UCaseMap is actually non-const in toTitle() APIs. */
|
||||
destLength=_toTitle((UCaseMap *)csm, temp, destCapacity,
|
||||
src, &csc, srcLength,
|
||||
pErrorCode);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
if(temp!=dest) {
|
||||
|
@ -490,12 +540,6 @@ caseMap(UChar *dest, int32_t destCapacity,
|
|||
}
|
||||
}
|
||||
|
||||
#if !UCONFIG_NO_BREAK_ITERATION
|
||||
if(ownTitleIter) {
|
||||
ubrk_close(titleIter);
|
||||
}
|
||||
#endif
|
||||
|
||||
return u_terminateUChars(dest, destCapacity, destLength, pErrorCode);
|
||||
}
|
||||
|
||||
|
@ -506,9 +550,11 @@ u_strToLower(UChar *dest, int32_t destCapacity,
|
|||
const UChar *src, int32_t srcLength,
|
||||
const char *locale,
|
||||
UErrorCode *pErrorCode) {
|
||||
return caseMap(dest, destCapacity,
|
||||
UCaseMap csm={ NULL };
|
||||
setTempCaseMap(&csm, locale, pErrorCode);
|
||||
return caseMap(&csm,
|
||||
dest, destCapacity,
|
||||
src, srcLength,
|
||||
NULL, locale, 0,
|
||||
TO_LOWER, pErrorCode);
|
||||
}
|
||||
|
||||
|
@ -517,9 +563,11 @@ u_strToUpper(UChar *dest, int32_t destCapacity,
|
|||
const UChar *src, int32_t srcLength,
|
||||
const char *locale,
|
||||
UErrorCode *pErrorCode) {
|
||||
return caseMap(dest, destCapacity,
|
||||
UCaseMap csm={ NULL };
|
||||
setTempCaseMap(&csm, locale, pErrorCode);
|
||||
return caseMap(&csm,
|
||||
dest, destCapacity,
|
||||
src, srcLength,
|
||||
NULL, locale, 0,
|
||||
TO_UPPER, pErrorCode);
|
||||
}
|
||||
|
||||
|
@ -531,9 +579,29 @@ u_strToTitle(UChar *dest, int32_t destCapacity,
|
|||
UBreakIterator *titleIter,
|
||||
const char *locale,
|
||||
UErrorCode *pErrorCode) {
|
||||
return caseMap(dest, destCapacity,
|
||||
UCaseMap csm={ NULL };
|
||||
int32_t length;
|
||||
|
||||
csm.iter=titleIter;
|
||||
setTempCaseMap(&csm, locale, pErrorCode);
|
||||
length=caseMap(&csm,
|
||||
dest, destCapacity,
|
||||
src, srcLength,
|
||||
TO_TITLE, pErrorCode);
|
||||
if(titleIter==NULL && csm.iter!=NULL) {
|
||||
ubrk_close(csm.iter);
|
||||
}
|
||||
return length;
|
||||
}
|
||||
|
||||
U_DRAFT int32_t U_EXPORT2
|
||||
ucasemap_toTitle(UCaseMap *csm,
|
||||
UChar *dest, int32_t destCapacity,
|
||||
const UChar *src, int32_t srcLength,
|
||||
UErrorCode *pErrorCode) {
|
||||
return caseMap(csm,
|
||||
dest, destCapacity,
|
||||
src, srcLength,
|
||||
titleIter, locale, 0,
|
||||
TO_TITLE, pErrorCode);
|
||||
}
|
||||
|
||||
|
@ -544,9 +612,12 @@ u_strFoldCase(UChar *dest, int32_t destCapacity,
|
|||
const UChar *src, int32_t srcLength,
|
||||
uint32_t options,
|
||||
UErrorCode *pErrorCode) {
|
||||
return caseMap(dest, destCapacity,
|
||||
UCaseMap csm={ NULL };
|
||||
csm.csp=ucase_getSingleton(pErrorCode);
|
||||
csm.options=options;
|
||||
return caseMap(&csm,
|
||||
dest, destCapacity,
|
||||
src, srcLength,
|
||||
NULL, NULL, options,
|
||||
FOLD_CASE, pErrorCode);
|
||||
}
|
||||
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2002-2006, International Business Machines
|
||||
* Copyright (C) 2002-2007, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
|
@ -26,6 +26,8 @@
|
|||
#include "cmemory.h"
|
||||
#include "cintltst.h"
|
||||
|
||||
#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
|
||||
|
||||
/* test string case mapping functions --------------------------------------- */
|
||||
|
||||
static void
|
||||
|
@ -776,10 +778,153 @@ TestUCaseMap(void) {
|
|||
log_err("ucasemap_utf8ToUpper(overflow) failed\n");
|
||||
}
|
||||
|
||||
/* C API coverage for case folding. More thorough test via C++ intltest's StringCaseTest::TestCasing(). */
|
||||
errorCode=U_ZERO_ERROR;
|
||||
utf8Out[0]=0;
|
||||
length=ucasemap_utf8FoldCase(csm, utf8Out, (int32_t)sizeof(utf8Out), aBc, 3, &errorCode);
|
||||
if(U_FAILURE(errorCode) || length!=3 || 0!=strcmp(abc, utf8Out)) {
|
||||
log_err("ucasemap_utf8FoldCase(aBc) failed\n");
|
||||
}
|
||||
|
||||
ucasemap_close(csm);
|
||||
}
|
||||
|
||||
void addCaseTest(TestNode** root);
|
||||
#if !UCONFIG_NO_BREAK_ITERATION
|
||||
|
||||
/* Try titlecasing with options. */
|
||||
static void
|
||||
TestUCaseMapToTitle(void) {
|
||||
/* "a 'CaT. A 'dOg! 'eTc." where '=U+02BB */
|
||||
/*
|
||||
* Note: The sentence BreakIterator does not recognize a '.'
|
||||
* as a sentence terminator if it is followed by lowercase.
|
||||
* That is why the example has the '!'.
|
||||
*/
|
||||
static const UChar
|
||||
|
||||
beforeTitle[]= { 0x61, 0x20, 0x2bb, 0x43, 0x61, 0x54, 0x2e, 0x20, 0x41, 0x20, 0x2bb, 0x64, 0x4f, 0x67, 0x21, 0x20, 0x2bb, 0x65, 0x54, 0x63, 0x2e },
|
||||
titleWord[]= { 0x41, 0x20, 0x2bb, 0x43, 0x61, 0x74, 0x2e, 0x20, 0x41, 0x20, 0x2bb, 0x44, 0x6f, 0x67, 0x21, 0x20, 0x2bb, 0x45, 0x74, 0x63, 0x2e },
|
||||
titleWordNoAdjust[]={ 0x41, 0x20, 0x2bb, 0x63, 0x61, 0x74, 0x2e, 0x20, 0x41, 0x20, 0x2bb, 0x64, 0x6f, 0x67, 0x21, 0x20, 0x2bb, 0x65, 0x74, 0x63, 0x2e },
|
||||
titleSentNoLower[]= { 0x41, 0x20, 0x2bb, 0x43, 0x61, 0x54, 0x2e, 0x20, 0x41, 0x20, 0x2bb, 0x64, 0x4f, 0x67, 0x21, 0x20, 0x2bb, 0x45, 0x54, 0x63, 0x2e };
|
||||
|
||||
UChar buffer[32];
|
||||
UCaseMap *csm;
|
||||
UBreakIterator *sentenceIter;
|
||||
const UBreakIterator *iter;
|
||||
int32_t length;
|
||||
UErrorCode errorCode;
|
||||
|
||||
errorCode=U_ZERO_ERROR;
|
||||
csm=ucasemap_open("", 0, &errorCode);
|
||||
if(U_FAILURE(errorCode)) {
|
||||
log_err("ucasemap_open(\"\") failed - %s\n", u_errorName(errorCode));
|
||||
return;
|
||||
}
|
||||
|
||||
iter=ucasemap_getBreakIterator(csm);
|
||||
if(iter!=NULL) {
|
||||
log_err("ucasemap_getBreakIterator() returns %p!=NULL before setting any iterator or titlecasing\n", iter);
|
||||
}
|
||||
|
||||
/* Use default UBreakIterator: Word breaks. */
|
||||
length=ucasemap_toTitle(csm, buffer, LENGTHOF(buffer), beforeTitle, LENGTHOF(beforeTitle), &errorCode);
|
||||
if( U_FAILURE(errorCode) ||
|
||||
length!=LENGTHOF(titleWord) ||
|
||||
0!=u_memcmp(buffer, titleWord, length) ||
|
||||
buffer[length]!=0
|
||||
) {
|
||||
log_err("ucasemap_toTitle(default iterator)=%ld failed - %s\n", (long)length, u_errorName(errorCode));
|
||||
}
|
||||
|
||||
iter=ucasemap_getBreakIterator(csm);
|
||||
if(iter==NULL) {
|
||||
log_err("ucasemap_getBreakIterator() returns NULL after titlecasing\n");
|
||||
}
|
||||
|
||||
/* Try U_TITLECASE_NO_BREAK_ADJUSTMENT. */
|
||||
ucasemap_setOptions(csm, U_TITLECASE_NO_BREAK_ADJUSTMENT, &errorCode);
|
||||
if(U_FAILURE(errorCode)) {
|
||||
log_err("error: ucasemap_setOptions(U_TITLECASE_NO_BREAK_ADJUSTMENT) failed - %s\n", u_errorName(errorCode));
|
||||
return;
|
||||
}
|
||||
|
||||
length=ucasemap_toTitle(csm, buffer, LENGTHOF(buffer), beforeTitle, LENGTHOF(beforeTitle), &errorCode);
|
||||
if( U_FAILURE(errorCode) ||
|
||||
length!=LENGTHOF(titleWordNoAdjust) ||
|
||||
0!=u_memcmp(buffer, titleWordNoAdjust, length) ||
|
||||
buffer[length]!=0
|
||||
) {
|
||||
log_err("ucasemap_toTitle(default iterator, no break adjustment)=%ld failed - %s\n", (long)length, u_errorName(errorCode));
|
||||
}
|
||||
|
||||
/* Set a sentence break iterator. */
|
||||
errorCode=U_ZERO_ERROR;
|
||||
sentenceIter=ubrk_open(UBRK_SENTENCE, "", NULL, 0, &errorCode);
|
||||
if(U_FAILURE(errorCode)) {
|
||||
log_err("error: ubrk_open(UBRK_SENTENCE) failed - %s\n", u_errorName(errorCode));
|
||||
ucasemap_close(csm);
|
||||
return;
|
||||
}
|
||||
ucasemap_setBreakIterator(csm, sentenceIter, &errorCode);
|
||||
if(U_FAILURE(errorCode)) {
|
||||
log_err("error: ucasemap_setBreakIterator(sentence iterator) failed - %s\n", u_errorName(errorCode));
|
||||
ubrk_close(sentenceIter);
|
||||
ucasemap_close(csm);
|
||||
return;
|
||||
}
|
||||
iter=ucasemap_getBreakIterator(csm);
|
||||
if(iter!=sentenceIter) {
|
||||
log_err("ucasemap_getBreakIterator() returns %p!=%p after setting the iterator\n", iter, sentenceIter);
|
||||
}
|
||||
|
||||
ucasemap_setOptions(csm, U_TITLECASE_NO_LOWERCASE, &errorCode);
|
||||
if(U_FAILURE(errorCode)) {
|
||||
log_err("error: ucasemap_setOptions(U_TITLECASE_NO_LOWERCASE) failed - %s\n", u_errorName(errorCode));
|
||||
return;
|
||||
}
|
||||
|
||||
/* Use the sentence break iterator with the option. Preflight first. */
|
||||
length=ucasemap_toTitle(csm, NULL, 0, beforeTitle, LENGTHOF(beforeTitle), &errorCode);
|
||||
if( errorCode!=U_BUFFER_OVERFLOW_ERROR ||
|
||||
length!=LENGTHOF(titleSentNoLower)
|
||||
) {
|
||||
log_err("ucasemap_toTitle(preflight sentence break iterator, no lowercasing)=%ld failed - %s\n", (long)length, u_errorName(errorCode));
|
||||
}
|
||||
|
||||
errorCode=U_ZERO_ERROR;
|
||||
buffer[0]=0;
|
||||
length=ucasemap_toTitle(csm, buffer, LENGTHOF(buffer), beforeTitle, LENGTHOF(beforeTitle), &errorCode);
|
||||
if( U_FAILURE(errorCode) ||
|
||||
length!=LENGTHOF(titleSentNoLower) ||
|
||||
0!=u_memcmp(buffer, titleSentNoLower, length) ||
|
||||
buffer[length]!=0
|
||||
) {
|
||||
log_err("ucasemap_toTitle(sentence break iterator, no lowercasing)=%ld failed - %s\n", (long)length, u_errorName(errorCode));
|
||||
}
|
||||
|
||||
/* UTF-8 C API coverage. More thorough test via C++ intltest's StringCaseTest::TestCasing(). */
|
||||
{
|
||||
char utf8BeforeTitle[64], utf8TitleSentNoLower[64], utf8[64];
|
||||
int32_t utf8BeforeTitleLength, utf8TitleSentNoLowerLength;
|
||||
|
||||
errorCode=U_ZERO_ERROR;
|
||||
u_strToUTF8(utf8BeforeTitle, (int32_t)sizeof(utf8BeforeTitle), &utf8BeforeTitleLength, beforeTitle, LENGTHOF(beforeTitle), &errorCode);
|
||||
u_strToUTF8(utf8TitleSentNoLower, (int32_t)sizeof(utf8TitleSentNoLower), &utf8TitleSentNoLowerLength, titleSentNoLower, LENGTHOF(titleSentNoLower), &errorCode);
|
||||
|
||||
length=ucasemap_utf8ToTitle(csm, utf8, (int32_t)sizeof(utf8), utf8BeforeTitle, utf8BeforeTitleLength, &errorCode);
|
||||
if( U_FAILURE(errorCode) ||
|
||||
length!=utf8TitleSentNoLowerLength ||
|
||||
0!=uprv_memcmp(utf8, utf8TitleSentNoLower, length) ||
|
||||
utf8[length]!=0
|
||||
) {
|
||||
log_err("ucasemap_utf8ToTitle(sentence break iterator, no lowercasing)=%ld failed - %s\n", (long)length, u_errorName(errorCode));
|
||||
}
|
||||
}
|
||||
|
||||
ucasemap_close(csm);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
void addCaseTest(TestNode** root) {
|
||||
/* cstrcase.c functions, declared in cucdtst.h */
|
||||
|
@ -791,4 +936,7 @@ void addCaseTest(TestNode** root) {
|
|||
addTest(root, &TestCaseFolding, "tsutil/cstrcase/TestCaseFolding");
|
||||
addTest(root, &TestCaseCompare, "tsutil/cstrcase/TestCaseCompare");
|
||||
addTest(root, &TestUCaseMap, "tsutil/cstrcase/TestUCaseMap");
|
||||
#if !UCONFIG_NO_BREAK_ITERATION
|
||||
addTest(root, &TestUCaseMapToTitle, "tsutil/cstrcase/TestUCaseMapToTitle");
|
||||
#endif
|
||||
}
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2002-2005, International Business Machines
|
||||
* Copyright (C) 2002-2007, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
|
@ -26,6 +26,8 @@
|
|||
#include "ustrtest.h"
|
||||
#include "unicode/tstdtmod.h"
|
||||
|
||||
#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
|
||||
|
||||
StringCaseTest::~StringCaseTest() {}
|
||||
|
||||
void
|
||||
|
@ -325,9 +327,8 @@ StringCaseTest::TestCaseConversion()
|
|||
enum {
|
||||
TEST_LOWER,
|
||||
TEST_UPPER,
|
||||
#if !UCONFIG_NO_BREAK_ITERATION
|
||||
TEST_TITLE,
|
||||
#endif
|
||||
TEST_FOLD,
|
||||
TEST_COUNT
|
||||
};
|
||||
|
||||
|
@ -335,9 +336,8 @@ enum {
|
|||
static const char *const dataNames[TEST_COUNT+1]={
|
||||
"lowercasing",
|
||||
"uppercasing",
|
||||
#if !UCONFIG_NO_BREAK_ITERATION
|
||||
"titlecasing",
|
||||
#endif
|
||||
"casefolding",
|
||||
""
|
||||
};
|
||||
|
||||
|
@ -345,20 +345,31 @@ void
|
|||
StringCaseTest::TestCasingImpl(const UnicodeString &input,
|
||||
const UnicodeString &output,
|
||||
int32_t whichCase,
|
||||
const char *localeID, uint32_t options) {
|
||||
void *iter, const char *localeID, uint32_t options) {
|
||||
// UnicodeString
|
||||
UnicodeString result;
|
||||
const char *name;
|
||||
Locale locale(localeID);
|
||||
|
||||
result=input;
|
||||
switch(whichCase) {
|
||||
case TEST_LOWER:
|
||||
name="toLower";
|
||||
result.toLower(Locale(localeID));
|
||||
result.toLower(locale);
|
||||
break;
|
||||
case TEST_UPPER:
|
||||
name="toUpper";
|
||||
result.toUpper(Locale(localeID));
|
||||
result.toUpper(locale);
|
||||
break;
|
||||
#if !UCONFIG_NO_BREAK_ITERATION
|
||||
case TEST_TITLE:
|
||||
name="toTitle";
|
||||
result.toTitle((BreakIterator *)iter, locale, options);
|
||||
break;
|
||||
#endif
|
||||
case TEST_FOLD:
|
||||
name="foldCase";
|
||||
result.foldCase(options);
|
||||
break;
|
||||
default:
|
||||
name="";
|
||||
|
@ -367,6 +378,15 @@ StringCaseTest::TestCasingImpl(const UnicodeString &input,
|
|||
if(result!=output) {
|
||||
errln("error: UnicodeString.%s() got a wrong result for a test case from casing.res", name);
|
||||
}
|
||||
#if !UCONFIG_NO_BREAK_ITERATION
|
||||
if(whichCase==TEST_TITLE && options==0) {
|
||||
result=input;
|
||||
result.toTitle((BreakIterator *)iter, locale);
|
||||
if(result!=output) {
|
||||
errln("error: UnicodeString.toTitle(options=0) got a wrong result for a test case from casing.res");
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
// UTF-8
|
||||
char utf8In[100], utf8Out[100];
|
||||
|
@ -378,6 +398,14 @@ StringCaseTest::TestCasingImpl(const UnicodeString &input,
|
|||
|
||||
errorCode=U_ZERO_ERROR;
|
||||
csm=ucasemap_open(localeID, options, &errorCode);
|
||||
#if !UCONFIG_NO_BREAK_ITERATION
|
||||
if(iter!=NULL) {
|
||||
// Clone the break iterator so that the UCaseMap can safely adopt it.
|
||||
int32_t size=1; // Not 0 because that only gives preflighting.
|
||||
UBreakIterator *clone=ubrk_safeClone((UBreakIterator *)iter, NULL, &size, &errorCode);
|
||||
ucasemap_setBreakIterator(csm, clone, &errorCode);
|
||||
}
|
||||
#endif
|
||||
|
||||
u_strToUTF8(utf8In, (int32_t)sizeof(utf8In), &utf8InLength, input.getBuffer(), input.length(), &errorCode);
|
||||
switch(whichCase) {
|
||||
|
@ -393,6 +421,18 @@ StringCaseTest::TestCasingImpl(const UnicodeString &input,
|
|||
utf8Out, (int32_t)sizeof(utf8Out),
|
||||
utf8In, utf8InLength, &errorCode);
|
||||
break;
|
||||
case TEST_TITLE:
|
||||
name="ucasemap_utf8ToTitle";
|
||||
utf8OutLength=ucasemap_utf8ToTitle(csm,
|
||||
utf8Out, (int32_t)sizeof(utf8Out),
|
||||
utf8In, utf8InLength, &errorCode);
|
||||
break;
|
||||
case TEST_FOLD:
|
||||
name="ucasemap_utf8FoldCase";
|
||||
utf8OutLength=ucasemap_utf8FoldCase(csm,
|
||||
utf8Out, (int32_t)sizeof(utf8Out),
|
||||
utf8In, utf8InLength, &errorCode);
|
||||
break;
|
||||
default:
|
||||
name="";
|
||||
utf8OutLength=0;
|
||||
|
@ -410,36 +450,22 @@ StringCaseTest::TestCasingImpl(const UnicodeString &input,
|
|||
ucasemap_close(csm);
|
||||
}
|
||||
|
||||
#if !UCONFIG_NO_BREAK_ITERATION
|
||||
|
||||
void
|
||||
StringCaseTest::TestTitleCasing(const UnicodeString &input,
|
||||
const UnicodeString &output,
|
||||
const char *localeID,
|
||||
UBreakIterator *iter) {
|
||||
UnicodeString result;
|
||||
|
||||
result=input;
|
||||
result.toTitle((BreakIterator *)iter, Locale(localeID));
|
||||
if(result!=output) {
|
||||
errln("error: UnicodeString.toTitle() got a wrong result for a test case from casing.res");
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
void
|
||||
StringCaseTest::TestCasing() {
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
#if !UCONFIG_NO_BREAK_ITERATION
|
||||
UBreakIterator *iter;
|
||||
#endif
|
||||
void *iter;
|
||||
char cLocaleID[100];
|
||||
UnicodeString locale, input, output, result;
|
||||
UnicodeString locale, input, output, optionsString, result;
|
||||
uint32_t options;
|
||||
int32_t whichCase, type;
|
||||
TestDataModule *driver = TestDataModule::getTestDataModule("casing", *this, status);
|
||||
if(U_SUCCESS(status)) {
|
||||
for(whichCase=0; whichCase<TEST_COUNT; ++whichCase) {
|
||||
#if UCONFIG_NO_BREAK_ITERATION
|
||||
if(whichCase==TEST_TITLE) {
|
||||
continue;
|
||||
}
|
||||
#endif
|
||||
TestData *casingTest = driver->createTestData(dataNames[whichCase], status);
|
||||
if(U_FAILURE(status)) {
|
||||
errln("TestCasing failed to createTestData(%s) - %s", dataNames[whichCase], u_errorName(status));
|
||||
|
@ -447,39 +473,48 @@ StringCaseTest::TestCasing() {
|
|||
}
|
||||
const DataMap *myCase = NULL;
|
||||
while(casingTest->nextCase(myCase, status)) {
|
||||
locale = myCase->getString("Locale", status);
|
||||
locale.extract(0, 0x7fffffff, cLocaleID, sizeof(cLocaleID), "");
|
||||
|
||||
input = myCase->getString("Input", status);
|
||||
output = myCase->getString("Output", status);
|
||||
|
||||
#if !UCONFIG_NO_BREAK_ITERATION
|
||||
if(whichCase!=TEST_FOLD) {
|
||||
locale = myCase->getString("Locale", status);
|
||||
}
|
||||
locale.extract(0, 0x7fffffff, cLocaleID, sizeof(cLocaleID), "");
|
||||
|
||||
iter=NULL;
|
||||
#if !UCONFIG_NO_BREAK_ITERATION
|
||||
if(whichCase==TEST_TITLE) {
|
||||
type = myCase->getInt("Type", status);
|
||||
if(type>=0) {
|
||||
iter=ubrk_open((UBreakIteratorType)type, cLocaleID, NULL, 0, &status);
|
||||
} else if(type==-2) {
|
||||
// Open a trivial break iterator that only delivers { 0, length }
|
||||
// or even just { 0 } as boundaries.
|
||||
static const UChar rules[] = { 0x2e, 0x2a, 0x3b }; // ".*;"
|
||||
UParseError parseError;
|
||||
iter=ubrk_openRules(rules, LENGTHOF(rules), NULL, 0, &parseError, &status);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
if(whichCase==TEST_TITLE || whichCase==TEST_FOLD) {
|
||||
optionsString = myCase->getString("Options", status);
|
||||
options = 0;
|
||||
if(optionsString.indexOf((UChar)0x54)>=0) { // T
|
||||
options|=U_FOLD_CASE_EXCLUDE_SPECIAL_I;
|
||||
}
|
||||
if(optionsString.indexOf((UChar)0x4c)>=0) { // L
|
||||
options|=U_TITLECASE_NO_LOWERCASE;
|
||||
}
|
||||
if(optionsString.indexOf((UChar)0x41)>=0) { // A
|
||||
options|=U_TITLECASE_NO_BREAK_ADJUSTMENT;
|
||||
}
|
||||
}
|
||||
|
||||
if(U_FAILURE(status)) {
|
||||
errln("error: TestCasing() setup failed for %s test case from casing.res: %s", dataNames[whichCase], u_errorName(status));
|
||||
status = U_ZERO_ERROR;
|
||||
} else {
|
||||
switch(whichCase) {
|
||||
case TEST_LOWER:
|
||||
case TEST_UPPER:
|
||||
TestCasingImpl(input, output, whichCase, cLocaleID, 0);
|
||||
break;
|
||||
#if !UCONFIG_NO_BREAK_ITERATION
|
||||
case TEST_TITLE:
|
||||
TestTitleCasing(input, output, cLocaleID, iter);
|
||||
break;
|
||||
#endif
|
||||
default:
|
||||
break; // won't happen
|
||||
}
|
||||
TestCasingImpl(input, output, whichCase, iter, cLocaleID, options);
|
||||
}
|
||||
|
||||
#if !UCONFIG_NO_BREAK_ITERATION
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/********************************************************************
|
||||
* COPYRIGHT:
|
||||
* Copyright (c) 1997-2006, International Business Machines Corporation and
|
||||
* Copyright (c) 1997-2007, International Business Machines Corporation and
|
||||
* others. All Rights Reserved.
|
||||
********************************************************************/
|
||||
|
||||
|
@ -8,7 +8,6 @@
|
|||
#define UNICODESTRINGTEST_H
|
||||
|
||||
#include "unicode/unistr.h"
|
||||
#include "unicode/ubrk.h"
|
||||
#include "intltest.h"
|
||||
|
||||
/**
|
||||
|
@ -90,13 +89,7 @@ public:
|
|||
void TestCasingImpl(const UnicodeString &input,
|
||||
const UnicodeString &output,
|
||||
int32_t whichCase,
|
||||
const char *localeID, uint32_t options);
|
||||
#if !UCONFIG_NO_BREAK_ITERATION
|
||||
void TestTitleCasing(const UnicodeString &input,
|
||||
const UnicodeString &output,
|
||||
const char *localeID,
|
||||
UBreakIterator *iter);
|
||||
#endif
|
||||
void *iter, const char *localeID, uint32_t options);
|
||||
void TestCasing();
|
||||
};
|
||||
|
||||
|
|
33
icu4c/source/test/testdata/casing.txt
vendored
33
icu4c/source/test/testdata/casing.txt
vendored
|
@ -1,6 +1,6 @@
|
|||
//*******************************************************************************
|
||||
//*
|
||||
//* Copyright (C) 2002-2006, International Business Machines
|
||||
//* Copyright (C) 2002-2007, International Business Machines
|
||||
//* Corporation and others. All Rights Reserved.
|
||||
//*
|
||||
//*******************************************************************************
|
||||
|
@ -13,7 +13,8 @@ casing:table(nofallback) {
|
|||
"each item is an array with\n"
|
||||
"input string, result string, locale ID[, break iterator]\n"
|
||||
"the break iterator (only for titlecasing) is specified as an int, same as in UBreakIteratorType:\n"
|
||||
"0=UBRK_CHARACTER 1=UBRK_WORD 2=UBRK_LINE 3=UBRK_SENTENCE 4=UBRK_TITLE -1=default\n"
|
||||
"0=UBRK_CHARACTER 1=UBRK_WORD 2=UBRK_LINE 3=UBRK_SENTENCE 4=UBRK_TITLE -1=default (NULL=words) -2=no breaks (.*)\n"
|
||||
"options: T=U_FOLD_CASE_EXCLUDE_SPECIAL_I L=U_TITLECASE_NO_LOWERCASE A=U_TITLECASE_NO_BREAK_ADJUSTMENT\n"
|
||||
}
|
||||
}
|
||||
TestData {
|
||||
|
@ -34,13 +35,29 @@ casing:table(nofallback) {
|
|||
}
|
||||
}
|
||||
titlecasing {
|
||||
Headers { "Input", "Output", "Locale", "Type" }
|
||||
Headers { "Input", "Output", "Locale", "Type", "Options" }
|
||||
Cases {
|
||||
{ "ʻaMeLikA huI Pū ʻʻʻiA", "ʻAmelika Hui Pū ʻʻʻIa", "", "-1" }, // titlecase first _cased_ letter, j4933
|
||||
{ " tHe QUIcK bRoWn", " The Quick Brown", "", "4" },
|
||||
{ "DŽDždžLJLjljNJNjnj", "DžDžDžLjLjLjNjNjNj", "", "0" }, // UBRK_CHARACTER
|
||||
{ "ljubav ljubav", "Ljubav Ljubav", "", "-1" }, // Lj vs. L+j
|
||||
{ "'oH dOn'T tItLeCaSe AfTeR lEtTeR+'", "'Oh Don't Titlecase After Letter+'", "", "-1" }
|
||||
{ "ʻaMeLikA huI Pū ʻʻʻiA", "ʻAmelika Hui Pū ʻʻʻIa", "", "-1", "" }, // titlecase first _cased_ letter, j4933
|
||||
{ " tHe QUIcK bRoWn", " The Quick Brown", "", "4", "" },
|
||||
{ "DŽDždžLJLjljNJNjnj", "DžDžDžLjLjLjNjNjNj", "", "0", "" }, // UBRK_CHARACTER
|
||||
{ "ljubav ljubav", "Ljubav Ljubav", "", "-1", "" }, // Lj vs. L+j
|
||||
{ "'oH dOn'T tItLeCaSe AfTeR lEtTeR+'", "'Oh Don't Titlecase After Letter+'", "", "-1", "" },
|
||||
|
||||
{ "a ʻCaT. A ʻdOg! ʻeTc.", "A ʻCat. A ʻDog! ʻEtc.", "", "-1", "" }, // default
|
||||
{ "a ʻCaT. A ʻdOg! ʻeTc.", "A ʻcat. A ʻdog! ʻetc.", "", "-1", "A" }, // U_TITLECASE_NO_BREAK_ADJUSTMENT
|
||||
{ "a ʻCaT. A ʻdOg! ʻeTc.", "A ʻCaT. A ʻdOg! ʻETc.", "", "3", "L" }, // UBRK_SENTENCE and U_TITLECASE_NO_LOWERCASE
|
||||
|
||||
{ "ʻcAt! ʻeTc.", "ʻCat! ʻetc.", "", "-2", "" }, // -2=Trivial break iterator
|
||||
{ "ʻcAt! ʻeTc.", "ʻcat! ʻetc.", "", "-2", "A" }, // U_TITLECASE_NO_BREAK_ADJUSTMENT
|
||||
{ "ʻcAt! ʻeTc.", "ʻCAt! ʻeTc.", "", "-2", "L" }, // U_TITLECASE_NO_LOWERCASE
|
||||
{ "ʻcAt! ʻeTc.", "ʻcAt! ʻeTc.", "", "-2", "AL" } // Both options
|
||||
}
|
||||
}
|
||||
casefolding {
|
||||
Headers { "Input", "Output", "Options" }
|
||||
Cases {
|
||||
{ "aBİIıϐßffi", "abi̇iıβssffi", "" },
|
||||
{ "aBİIıϐßffi", "abiııβssffi", "T" } // U_FOLD_CASE_EXCLUDE_SPECIAL_I
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue