ICU-3969 use new case mapping code in ucase.c, with ucase.icu data

X-SVN-Rev: 16257
This commit is contained in:
Markus Scherer 2004-09-06 15:57:11 +00:00
parent 9f84b31d3a
commit 29038e96b7
20 changed files with 965 additions and 1740 deletions

File diff suppressed because it is too large Load diff

View file

@ -87,6 +87,7 @@ u_cleanup(void)
uset_cleanup();
unames_cleanup();
pname_cleanup();
ucase_cleanup();
uchar_cleanup();
#if !UCONFIG_NO_CONVERSION
ucnv_cleanup();

View file

@ -144,6 +144,14 @@ UnicodeString::caseMap(BreakIterator *titleIter,
return *this;
}
UErrorCode errorCode;
UCaseProps *csp=ucase_getSingleton(&errorCode);
if(U_FAILURE(errorCode)) {
setToBogus();
return *this;
}
// We need to allocate a new buffer for the internal string case mapping function.
// This is very similar to how doReplace() below keeps the old array pointer
// and deletes the old array itself after it is done.
@ -167,8 +175,6 @@ UnicodeString::caseMap(BreakIterator *titleIter,
return *this;
}
UErrorCode errorCode;
#if !UCONFIG_NO_BREAK_ITERATION
// set up the titlecasing break iterator
UBreakIterator *cTitleIter = 0;
@ -195,28 +201,26 @@ UnicodeString::caseMap(BreakIterator *titleIter,
do {
errorCode = U_ZERO_ERROR;
if(toWhichCase==TO_LOWER) {
fLength = u_internalStrToLower(fArray, fCapacity,
oldArray, oldLength,
0, oldLength,
locale.getName(),
&errorCode);
fLength = ustr_toLower(csp, fArray, fCapacity,
oldArray, oldLength,
locale.getName(), &errorCode);
} else if(toWhichCase==TO_UPPER) {
fLength = u_internalStrToUpper(fArray, fCapacity,
oldArray, oldLength,
locale.getName(),
&errorCode);
#if !UCONFIG_NO_BREAK_ITERATION
fLength = ustr_toUpper(csp, fArray, fCapacity,
oldArray, oldLength,
locale.getName(), &errorCode);
} else if(toWhichCase==TO_TITLE) {
fLength = u_internalStrToTitle(fArray, fCapacity,
oldArray, oldLength,
cTitleIter, locale.getName(),
&errorCode);
#if UCONFIG_NO_BREAK_ITERATION
errorCode=U_UNSUPPORTED_ERROR;
#else
fLength = ustr_toTitle(csp, fArray, fCapacity,
oldArray, oldLength,
cTitleIter, locale.getName(), &errorCode);
#endif
} else {
fLength = u_internalStrFoldCase(fArray, fCapacity,
oldArray, oldLength,
options,
&errorCode);
fLength = ustr_foldCase(csp, fArray, fCapacity,
oldArray, oldLength,
options,
&errorCode);
}
} while(errorCode==U_BUFFER_OVERFLOW_ERROR && cloneArrayIfNeeded(fLength, fLength, FALSE));

View file

@ -27,7 +27,7 @@
// moved up to make unorm_cmpEquivFold work without normalization
#include "unicode/ustring.h"
#include "unormimp.h"
#include "ustr_imp.h"
#include "ucase.h"
#if !UCONFIG_NO_NORMALIZATION
@ -4109,6 +4109,8 @@ unorm_cmpEquivFold(const UChar *s1, int32_t length1,
const UChar *s2, int32_t length2,
uint32_t options,
UErrorCode *pErrorCode) {
UCaseProps *csp;
// current-level start/limit - s1/s2 as current
const UChar *start1, *start2, *limit1, *limit2;
@ -4123,7 +4125,7 @@ unorm_cmpEquivFold(const UChar *s1, int32_t length1,
UChar decomp1[4], decomp2[4];
// case folding buffers, only use current-level start/limit
UChar fold1[32], fold2[32];
UChar fold1[UCASE_MAX_STRING_LENGTH+1], fold2[UCASE_MAX_STRING_LENGTH+1];
// track which is the current level per string
int32_t level1, level2;
@ -4139,11 +4141,18 @@ unorm_cmpEquivFold(const UChar *s1, int32_t length1,
// normalization/properties data loaded?
if( ((options&_COMPARE_EQUIV)!=0 && !_haveData(*pErrorCode)) ||
((options&U_COMPARE_IGNORE_CASE)!=0 && !uprv_haveProperties(pErrorCode)) ||
U_FAILURE(*pErrorCode)
) {
return 0;
}
if((options&U_COMPARE_IGNORE_CASE)!=0) {
csp=ucase_getSingleton(pErrorCode);
if(U_FAILURE(*pErrorCode)) {
return 0;
}
} else {
csp=NULL;
}
// initialize
start1=s1;
@ -4266,9 +4275,9 @@ unorm_cmpEquivFold(const UChar *s1, int32_t length1,
// continue with the main loop as soon as there is a real change
if( level1==0 && (options&U_COMPARE_IGNORE_CASE) &&
(length=u_internalFoldCase((UChar32)cp1, fold1, 32, options))>=0
(length=ucase_toFullFolding(csp, (UChar32)cp1, &p, options))>=0
) {
// cp1 case-folds to fold1[length]
// cp1 case-folds to the code point "length" or to p[length]
if(UTF_IS_SURROGATE(c1)) {
if(UTF_IS_SURROGATE_FIRST(c1)) {
// advance beyond source surrogate pair if it case-folds
@ -4290,6 +4299,15 @@ unorm_cmpEquivFold(const UChar *s1, int32_t length1,
stack1[0].limit=limit1;
++level1;
// copy the folding result to fold1[]
if(length<=UCASE_MAX_STRING_LENGTH) {
u_memcpy(fold1, p, length);
} else {
int32_t i=0;
U16_APPEND_UNSAFE(fold1, i, length);
length=i;
}
// set next level pointers to case folding
start1=s1=fold1;
limit1=fold1+length;
@ -4300,9 +4318,9 @@ unorm_cmpEquivFold(const UChar *s1, int32_t length1,
}
if( level2==0 && (options&U_COMPARE_IGNORE_CASE) &&
(length=u_internalFoldCase((UChar32)cp2, fold2, 32, options))>=0
(length=ucase_toFullFolding(csp, (UChar32)cp2, &p, options))>=0
) {
// cp2 case-folds to fold2[length]
// cp2 case-folds to the code point "length" or to p[length]
if(UTF_IS_SURROGATE(c2)) {
if(UTF_IS_SURROGATE_FIRST(c2)) {
// advance beyond source surrogate pair if it case-folds
@ -4324,6 +4342,15 @@ unorm_cmpEquivFold(const UChar *s1, int32_t length1,
stack2[0].limit=limit2;
++level2;
// copy the folding result to fold2[]
if(length<=UCASE_MAX_STRING_LENGTH) {
u_memcpy(fold2, p, length);
} else {
int32_t i=0;
U16_APPEND_UNSAFE(fold2, i, length);
length=i;
}
// set next level pointers to case folding
start2=s2=fold2;
limit2=fold2+length;

View file

@ -22,6 +22,7 @@
#include "unicode/uscript.h"
#include "cstring.h"
#include "unormimp.h"
#include "ucase.h"
#include "uprops.h"
#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
@ -183,6 +184,9 @@ static const struct {
* column and mask values for binary properties from u_getUnicodeProperties().
* Must be in order of corresponding UProperty,
* and there must be exacly one entry per binary UProperty.
*
* Properties with mask 0 are handled in code.
* Pseudo-column -2 indicates case mapping properties.
*/
{ 1, U_MASK(UPROPS_ALPHABETIC) },
{ 1, U_MASK(UPROPS_ASCII_HEX_DIGIT) },
@ -206,19 +210,19 @@ static const struct {
{ 1, U_MASK(UPROPS_IDS_TRINARY_OPERATOR) },
{ 1, U_MASK(UPROPS_JOIN_CONTROL) },
{ 1, U_MASK(UPROPS_LOGICAL_ORDER_EXCEPTION) },
{ 1, U_MASK(UPROPS_LOWERCASE) },
{ -2, 0 }, /* UCHAR_LOWERCASE */
{ 1, U_MASK(UPROPS_MATH) },
{ 1, U_MASK(UPROPS_NONCHARACTER_CODE_POINT) },
{ 1, U_MASK(UPROPS_QUOTATION_MARK) },
{ 1, U_MASK(UPROPS_RADICAL) },
{ 1, U_MASK(UPROPS_SOFT_DOTTED) },
{ -2, 0 }, /* UCHAR_SOFT_DOTTED */
{ 1, U_MASK(UPROPS_TERMINAL_PUNCTUATION) },
{ 1, U_MASK(UPROPS_UNIFIED_IDEOGRAPH) },
{ 1, U_MASK(UPROPS_UPPERCASE) },
{ -2, 0 }, /* UCHAR_UPPERCASE */
{ 1, U_MASK(UPROPS_WHITE_SPACE) },
{ 1, U_MASK(UPROPS_XID_CONTINUE) },
{ 1, U_MASK(UPROPS_XID_START) },
{ -1, U_MASK(UPROPS_CASE_SENSITIVE_SHIFT) },
{ -2, 0 }, /* UCHAR_CASE_SENSITIVE */
{ 2, U_MASK(UPROPS_V2_S_TERM) },
{ 2, U_MASK(UPROPS_V2_VARIATION_SELECTOR) },
{ 0, 0 }, /* UCHAR_NFD_INERT */
@ -238,6 +242,25 @@ u_hasBinaryProperty(UChar32 c, UProperty which) {
if(mask!=0) {
/* systematic, directly stored properties */
return (u_getUnicodeProperties(c, binProps[which].column)&mask)!=0;
} else if(binProps[which].column==-2) {
/* case mapping properties */
UErrorCode errorCode=U_ZERO_ERROR;
UCaseProps *csp=uchar_getCaseProps(&errorCode);
if(U_FAILURE(errorCode)) {
return FALSE;
}
switch(which) {
case UCHAR_LOWERCASE:
return (UBool)(UCASE_LOWER==ucase_getType(csp, c));
case UCHAR_UPPERCASE:
return (UBool)(UCASE_UPPER==ucase_getType(csp, c));
case UCHAR_SOFT_DOTTED:
return ucase_isSoftDotted(csp, c);
case UCHAR_CASE_SENSITIVE:
return ucase_isCaseSensitive(csp, c);
default:
break;
}
} else {
#if !UCONFIG_NO_NORMALIZATION
/* normalization properties from unorm.icu */
@ -572,6 +595,7 @@ uprv_getInclusions(USet* set, UErrorCode *pErrorCode) {
unorm_addPropertyStarts(set, pErrorCode);
#endif
uchar_addPropertyStarts(set, pErrorCode);
ucase_addPropertyStarts(uchar_getCaseProps(pErrorCode), set, pErrorCode);
#ifdef DEBUG
{

View file

@ -22,6 +22,7 @@
#include "unicode/utypes.h"
#include "unicode/uset.h"
#include "ucase.h"
#include "udataswp.h"
/* indexes[] entries */
@ -208,6 +209,13 @@ u_getUnicodeProperties(UChar32 c, int32_t column);
U_CFUNC int32_t
uprv_getMaxValues(int32_t column);
/**
* Get internal UCaseProps pointer from uchar.c for uprops.c.
* Other code should use ucase_getSingleton().
*/
U_CFUNC UCaseProps *
uchar_getCaseProps(UErrorCode *pErrorCode);
/**
* \var uprv_comparePropertyNames
* Unicode property names and property value names are compared "loosely".

View file

@ -17,6 +17,7 @@
#include "unicode/utypes.h"
#include "unicode/uiter.h"
#include "ucase.h"
/** Simple declaration for u_strToTitle() to avoid including unicode/ubrk.h. */
#ifndef UBRK_TYPEDEF_UBREAK_ITERATOR
@ -24,22 +25,6 @@
typedef void UBreakIterator;
#endif
/**
* Bit mask for getting just the options from a string compare options word
* that are relevant for case-insensitive string comparison.
* See uchar.h. Also include _STRNCMP_STYLE and U_COMPARE_CODE_POINT_ORDER.
* @internal
*/
#define _STRCASECMP_OPTIONS_MASK 0xffff
/**
* Bit mask for getting just the options from a string compare options word
* that are relevant for case folding (of a single string or code point).
* See uchar.h.
* @internal
*/
#define _FOLD_CASE_OPTIONS_MASK 0xff
/**
* Compare two strings in code point order or code unit order.
* Works in strcmp style (both lengths -1),
@ -99,26 +84,27 @@ u_growBufferFromStatic(void *context,
/*
* Internal string casing functions implementing
* ustring.h/ustrcase.c and UnicodeString case mapping functions.
*
* Lowercases [srcStart..srcLimit[ but takes
* context [0..srcLength[ into account.
* @internal
*/
U_CFUNC int32_t
u_internalStrToLower(UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
int32_t srcStart, int32_t srcLimit,
const char *locale,
UErrorCode *pErrorCode);
/**
* @internal
*/
U_CFUNC int32_t
u_internalStrToUpper(UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
const char *locale,
UErrorCode *pErrorCode);
ustr_toLower(UCaseProps *csp,
UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
const char *locale,
UErrorCode *pErrorCode);
/**
* @internal
*/
U_CFUNC int32_t
ustr_toUpper(UCaseProps *csp,
UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
const char *locale,
UErrorCode *pErrorCode);
#if !UCONFIG_NO_BREAK_ITERATION
@ -126,11 +112,12 @@ u_internalStrToUpper(UChar *dest, int32_t destCapacity,
* @internal
*/
U_CFUNC int32_t
u_internalStrToTitle(UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
UBreakIterator *titleIter,
const char *locale,
UErrorCode *pErrorCode);
ustr_toTitle(UCaseProps *csp,
UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
UBreakIterator *titleIter,
const char *locale,
UErrorCode *pErrorCode);
#endif
@ -139,62 +126,11 @@ u_internalStrToTitle(UChar *dest, int32_t destCapacity,
* @internal
*/
U_CFUNC int32_t
u_internalStrFoldCase(UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
uint32_t options,
UErrorCode *pErrorCode);
/**
* Get the full lowercase mapping for c.
* @param iter Character iterator to check for context for SpecialCasing.
* The current index must be on the character after c.
* This function may or may not change the iterator index.
* If iter==NULL then a context-independent result is returned.
* @return the length of the output, negative if same as c
* @internal
*/
U_CAPI int32_t U_EXPORT2
u_internalToLower(UChar32 c, UCharIterator *iter,
UChar *dest, int32_t destCapacity,
const char *locale);
/**
* Get the full uppercase mapping for c.
* @param iter Character iterator to check for context for SpecialCasing.
* The current index must be on the character after c.
* This function may or may not change the iterator index.
* If iter==NULL then a context-independent result is returned.
* @return the length of the output, negative if same as c
* @internal
*/
U_CAPI int32_t U_EXPORT2
u_internalToUpper(UChar32 c, UCharIterator *iter,
UChar *dest, int32_t destCapacity,
const char *locale);
/**
* Get the full titlecase mapping for c.
* @param iter Character iterator to check for context for SpecialCasing.
* The current index must be on the character after c.
* This function may or may not change the iterator index.
* If iter==NULL then a context-independent result is returned.
* @return the length of the output, negative if same as c
* @internal
*/
U_CAPI int32_t U_EXPORT2
u_internalToTitle(UChar32 c, UCharIterator *iter,
UChar *dest, int32_t destCapacity,
const char *locale);
/**
* Get the full case folding mapping for c.
* @return the length of the output, negative if same as c
* @internal
*/
U_CAPI int32_t U_EXPORT2
u_internalFoldCase(UChar32 c,
UChar *dest, int32_t destCapacity,
uint32_t options);
ustr_foldCase(UCaseProps *csp,
UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
uint32_t options,
UErrorCode *pErrorCode);
/**
* NUL-terminate a UChar * string if possible.
@ -233,6 +169,4 @@ u_terminateUChar32s(UChar32 *dest, int32_t destCapacity, int32_t length, UErrorC
U_CAPI int32_t U_EXPORT2
u_terminateWChars(wchar_t *dest, int32_t destCapacity, int32_t length, UErrorCode *pErrorCode);
#define u_getMaxCaseExpansion() 10
#endif

View file

@ -1,7 +1,7 @@
/*
*******************************************************************************
*
* Copyright (C) 2001-2003, International Business Machines
* Copyright (C) 2001-2004, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
@ -22,32 +22,156 @@
#include "unicode/ustring.h"
#include "unicode/ubrk.h"
#include "cmemory.h"
#include "ucase.h"
#include "unormimp.h"
#include "ustr_imp.h"
/* string casing ------------------------------------------------------------ */
/* append a full case mapping result, see UCASE_MAX_STRING_LENGTH */
static U_INLINE int32_t
appendResult(UChar *dest, int32_t destIndex, int32_t destCapacity,
int32_t result, const UChar *s) {
UChar32 c;
int32_t length;
/* decode the result */
if(result<0) {
/* (not) original code point */
c=~result;
length=-1;
} else if(result<=UCASE_MAX_STRING_LENGTH) {
c=U_SENTINEL;
length=result;
} else {
c=result;
length=-1;
}
if(destIndex<destCapacity) {
/* append the result */
if(length<0) {
/* code point */
UBool isError=FALSE;
U16_APPEND(dest, destIndex, destCapacity, c, isError);
if(isError) {
/* overflow, nothing written */
destIndex+=U16_LENGTH(c);
}
} else {
/* string */
if((destIndex+length)<=destCapacity) {
while(length>0) {
dest[destIndex++]=*s++;
--length;
}
} else {
/* overflow */
destIndex+=length;
}
}
} else {
/* preflight */
if(length<0) {
destIndex+=U16_LENGTH(c);
} else {
destIndex+=length;
}
}
return destIndex;
}
static UChar32 U_CALLCONV
utf16_caseContextIterator(void *context, int8_t dir) {
UCaseContext *csc=(UCaseContext *)context;
UChar32 c;
if(dir<0) {
/* reset for backward iteration */
csc->index=csc->cpStart;
csc->dir=dir;
} else if(dir>0) {
/* reset for forward iteration */
csc->index=csc->cpLimit;
csc->dir=dir;
} else {
/* continue current iteration direction */
dir=csc->dir;
}
if(dir<0) {
if(csc->start<csc->index) {
U16_PREV((const UChar *)csc->p, csc->start, csc->index, c);
return c;
}
} else {
if(csc->index<csc->limit) {
U16_NEXT((const UChar *)csc->p, csc->index, csc->limit, c);
return c;
}
}
return U_SENTINEL;
}
typedef int32_t U_CALLCONV
UCaseMapFull(const UCaseProps *csp, UChar32 c,
UCaseContextIterator *iter, void *context,
const UChar **pString,
const char *locale, int32_t *locCache);
/*
* Lowercases [srcStart..srcLimit[ but takes
* context [0..srcLength[ into account.
*/
static int32_t
_caseMap(UCaseProps *csp, UCaseMapFull *map,
UChar *dest, int32_t destCapacity,
const UChar *src, UCaseContext *csc,
int32_t srcStart, int32_t srcLimit,
const char *locale, int32_t *locCache,
UErrorCode *pErrorCode) {
const UChar *s;
UChar32 c;
int32_t srcIndex, destIndex;
/* case mapping loop */
srcIndex=srcStart;
destIndex=0;
while(srcIndex<srcLimit) {
csc->cpStart=srcIndex;
U16_NEXT(src, srcIndex, srcLimit, c);
csc->cpLimit=srcIndex;
c=map(csp, c, utf16_caseContextIterator, csc, &s, locale, locCache);
destIndex=appendResult(dest, destIndex, destCapacity, c, s);
}
if(destIndex>destCapacity) {
*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
}
return destIndex;
}
#if !UCONFIG_NO_BREAK_ITERATION
/*
* Internal titlecasing function,
* using u_internalStrToLower() and u_internalToTitle().
* Internal titlecasing function.
*
* Must get titleIter!=NULL.
*/
U_CFUNC int32_t
u_internalStrToTitle(UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
UBreakIterator *titleIter,
const char *locale,
UErrorCode *pErrorCode) {
UCharIterator iter;
static int32_t
_toTitle(UCaseProps *csp,
UChar *dest, int32_t destCapacity,
const UChar *src, UCaseContext *csc,
int32_t srcLength,
UBreakIterator *titleIter,
const char *locale, int32_t *locCache,
UErrorCode *pErrorCode) {
const UChar *s;
UChar32 c;
int32_t prev, index, destIndex, length;
int32_t prev, index, destIndex;
UBool isFirstIndex;
/* set up local variables */
uiter_setString(&iter, src, srcLength);
destIndex=0;
prev=0;
isFirstIndex=TRUE;
@ -67,20 +191,14 @@ u_internalStrToTitle(UChar *dest, int32_t destCapacity,
/* lowercase [prev..index[ */
if(prev<index) {
if(destIndex<destCapacity) {
length=u_internalStrToLower(dest+destIndex, destCapacity-destIndex,
src, srcLength,
prev, index,
locale,
pErrorCode);
} else {
length=u_internalStrToLower(NULL, 0,
src, srcLength,
prev, index,
locale,
pErrorCode);
}
destIndex+=length;
destIndex+=
_caseMap(
csp, ucase_toFullLower,
dest+destIndex, destCapacity-destIndex,
src, csc,
prev, index,
locale, locCache,
pErrorCode);
}
if(index>=srcLength) {
@ -88,28 +206,108 @@ u_internalStrToTitle(UChar *dest, int32_t destCapacity,
}
/* titlecase the character at the found index */
UTF_NEXT_CHAR(src, index, srcLength, c);
iter.move(&iter, index, UITER_ZERO);
if(destIndex<destCapacity) {
length=u_internalToTitle(c, &iter,
dest+destIndex, destCapacity-destIndex,
locale);
} else {
length=u_internalToTitle(c, &iter, NULL, 0, locale);
}
if(length<0) {
length=-length;
}
destIndex+=length;
csc->cpStart=index;
U16_NEXT(src, index, srcLength, c);
csc->cpLimit=index;
c=ucase_toFullTitle(csp, c, utf16_caseContextIterator, csc, &s, locale, locCache);
destIndex=appendResult(dest, destIndex, destCapacity, c, s);
prev=index;
}
if(destIndex>destCapacity) {
*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
}
return destIndex;
}
#endif
/* functions available in the common library (for unistr_case.cpp) */
U_CFUNC int32_t
ustr_toLower(UCaseProps *csp,
UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
const char *locale,
UErrorCode *pErrorCode) {
UCaseContext csc={ NULL };
int32_t locCache;
csc.p=(void *)src;
csc.limit=srcLength;
locCache=0;
return _caseMap(csp, ucase_toFullLower,
dest, destCapacity,
src, &csc, 0, srcLength,
locale, &locCache, pErrorCode);
}
U_CFUNC int32_t
ustr_toUpper(UCaseProps *csp,
UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
const char *locale,
UErrorCode *pErrorCode) {
UCaseContext csc={ NULL };
int32_t locCache;
csc.p=(void *)src;
csc.limit=srcLength;
locCache=0;
return _caseMap(csp, ucase_toFullUpper,
dest, destCapacity,
src, &csc, 0, srcLength,
locale, &locCache, pErrorCode);
}
U_CFUNC int32_t
ustr_toTitle(UCaseProps *csp,
UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
UBreakIterator *titleIter,
const char *locale,
UErrorCode *pErrorCode) {
UCaseContext csc={ NULL };
int32_t locCache;
csc.p=(void *)src;
csc.limit=srcLength;
locCache=0;
return _toTitle(csp,
dest, destCapacity,
src, &csc, srcLength,
titleIter, locale, &locCache, pErrorCode);
}
U_CFUNC int32_t
ustr_foldCase(UCaseProps *csp,
UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
uint32_t options,
UErrorCode *pErrorCode) {
int32_t srcIndex, destIndex;
const UChar *s;
UChar32 c;
/* case mapping loop */
srcIndex=destIndex=0;
while(srcIndex<srcLength) {
U16_NEXT(src, srcIndex, srcLength, c);
c=ucase_toFullFolding(csp, c, &s, options);
destIndex=appendResult(dest, destIndex, destCapacity, c, s);
}
if(destIndex>destCapacity) {
*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
}
return destIndex;
}
/*
* Implement argument checking and buffer handling
* for string case mapping as a common function.
@ -121,16 +319,21 @@ enum {
FOLD_CASE
};
/* common internal function for public API functions */
static int32_t
u_strCaseMap(UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
UBreakIterator *titleIter,
const char *locale,
uint32_t options,
int32_t toWhichCase,
UErrorCode *pErrorCode) {
caseMap(UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
UBreakIterator *titleIter,
const char *locale,
uint32_t options,
int32_t toWhichCase,
UErrorCode *pErrorCode) {
UChar buffer[300];
UChar *temp;
UCaseProps *csp;
int32_t destLength;
UBool ownTitleIter;
@ -147,6 +350,11 @@ u_strCaseMap(UChar *dest, int32_t destCapacity,
return 0;
}
csp=ucase_getSingleton(pErrorCode);
if(U_FAILURE(*pErrorCode)) {
return 0;
}
/* get the string length */
if(srcLength==-1) {
srcLength=u_strlen(src);
@ -176,30 +384,46 @@ u_strCaseMap(UChar *dest, int32_t destCapacity,
ownTitleIter=FALSE;
destLength=0;
if(toWhichCase==TO_LOWER) {
destLength=u_internalStrToLower(temp, destCapacity,
src, srcLength,
0, srcLength,
locale, pErrorCode);
} else if(toWhichCase==TO_UPPER) {
destLength=u_internalStrToUpper(temp, destCapacity, src, srcLength,
locale, pErrorCode);
#if !UCONFIG_NO_BREAK_ITERATION
} else if(toWhichCase==TO_TITLE) {
if(titleIter==NULL) {
titleIter=ubrk_open(UBRK_WORD, locale,
src, srcLength,
pErrorCode);
ownTitleIter=(UBool)U_SUCCESS(*pErrorCode);
}
if(U_SUCCESS(*pErrorCode)) {
destLength=u_internalStrToTitle(temp, destCapacity, src, srcLength,
titleIter, locale, pErrorCode);
}
#endif
if(toWhichCase==FOLD_CASE) {
destLength=ustr_foldCase(csp, temp, destCapacity, src, srcLength,
options, pErrorCode);
} else {
destLength=u_internalStrFoldCase(temp, destCapacity, src, srcLength,
options, pErrorCode);
UCaseContext csc={ NULL };
int32_t locCache;
csc.p=(void *)src;
csc.limit=srcLength;
locCache=0;
if(toWhichCase==TO_LOWER) {
destLength=_caseMap(csp, ucase_toFullLower,
temp, destCapacity,
src, &csc,
0, srcLength,
locale, &locCache, pErrorCode);
} else if(toWhichCase==TO_UPPER) {
destLength=_caseMap(csp, ucase_toFullUpper,
temp, destCapacity,
src, &csc,
0, srcLength,
locale, &locCache, pErrorCode);
} else /* if(toWhichCase==TO_TITLE) */ {
#if UCONFIG_NO_BREAK_ITERATION
*pErrorCode=U_UNSUPPORTED_ERROR;
#else
if(titleIter==NULL) {
titleIter=ubrk_open(UBRK_WORD, locale,
src, srcLength,
pErrorCode);
ownTitleIter=(UBool)U_SUCCESS(*pErrorCode);
}
if(U_SUCCESS(*pErrorCode)) {
destLength=_toTitle(csp, temp, destCapacity,
src, &csc, srcLength,
titleIter, locale, &locCache, pErrorCode);
}
#endif
}
}
if(temp!=dest) {
/* copy the result string to the destination buffer */
@ -223,15 +447,17 @@ u_strCaseMap(UChar *dest, int32_t destCapacity,
return u_terminateUChars(dest, destCapacity, destLength, pErrorCode);
}
/* public API functions */
U_CAPI int32_t U_EXPORT2
u_strToLower(UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
const char *locale,
UErrorCode *pErrorCode) {
return u_strCaseMap(dest, destCapacity,
src, srcLength,
NULL, locale, 0,
TO_LOWER, pErrorCode);
return caseMap(dest, destCapacity,
src, srcLength,
NULL, locale, 0,
TO_LOWER, pErrorCode);
}
U_CAPI int32_t U_EXPORT2
@ -239,33 +465,37 @@ u_strToUpper(UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
const char *locale,
UErrorCode *pErrorCode) {
return u_strCaseMap(dest, destCapacity,
src, srcLength,
NULL, locale, 0,
TO_UPPER, pErrorCode);
return caseMap(dest, destCapacity,
src, srcLength,
NULL, locale, 0,
TO_UPPER, pErrorCode);
}
#if !UCONFIG_NO_BREAK_ITERATION
U_CAPI int32_t U_EXPORT2
u_strToTitle(UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
UBreakIterator *titleIter,
const char *locale,
UErrorCode *pErrorCode) {
return u_strCaseMap(dest, destCapacity,
src, srcLength,
titleIter, locale, 0,
TO_TITLE, pErrorCode);
return caseMap(dest, destCapacity,
src, srcLength,
titleIter, locale, 0,
TO_TITLE, pErrorCode);
}
#endif
U_CAPI int32_t U_EXPORT2
u_strFoldCase(UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
uint32_t options,
UErrorCode *pErrorCode) {
return u_strCaseMap(dest, destCapacity,
src, srcLength,
NULL, NULL, options,
FOLD_CASE, pErrorCode);
return caseMap(dest, destCapacity,
src, srcLength,
NULL, NULL, options,
FOLD_CASE, pErrorCode);
}
/* case-insensitive string comparisons */

View file

@ -70,7 +70,7 @@ strmatch.o usearch.o search.o stsearch.o \
translit.o utrans.o esctrn.o unesctrn.o \
funcrepl.o strrepl.o tridpars.o \
cpdtrans.o rbt.o rbt_data.o rbt_pars.o rbt_rule.o rbt_set.o \
nultrans.o remtrans.o titletrn.o tolowtrn.o toupptrn.o anytrans.o \
nultrans.o remtrans.o casetrn.o titletrn.o tolowtrn.o toupptrn.o anytrans.o \
name2uni.o uni2name.o nortrans.o quant.o transreg.o \
regexcmp.o rematch.o repattrn.o regexst.o uregex.o ulocdata.o \
measfmt.o currfmt.o curramt.o currunit.o measure.o

View file

@ -0,0 +1,193 @@
/*
*******************************************************************************
*
* Copyright (C) 2001-2004, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
* file name: casetrn.cpp
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 2004sep03
* created by: Markus W. Scherer
*
* Implementation class for lower-/upper-/title-casing transliterators.
*/
#include "unicode/utypes.h"
#if !UCONFIG_NO_TRANSLITERATION
#include "unicode/uchar.h"
#include "unicode/ustring.h"
#include "tolowtrn.h"
#include "ucase.h"
#include "cpputils.h"
U_NAMESPACE_BEGIN
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CaseMapTransliterator)
/**
* Constructs a transliterator.
*/
CaseMapTransliterator::CaseMapTransliterator(const Locale &loc, const UnicodeString &id, UCaseMapFull *map) :
Transliterator(id, 0),
fLoc(loc), fLocName(NULL),
fCsp(NULL),
fMap(map)
{
UErrorCode errorCode = U_ZERO_ERROR;
fCsp = ucase_getSingleton(&errorCode); // expect to get NULL if failure
fLocName=fLoc.getName();
// TODO test incremental mode with context-sensitive text (e.g. greek sigma)
// TODO need to call setMaximumContextLength()?!
}
/**
* Destructor.
*/
CaseMapTransliterator::~CaseMapTransliterator() {
}
/**
* Copy constructor.
*/
CaseMapTransliterator::CaseMapTransliterator(const CaseMapTransliterator& o) :
Transliterator(o),
fLoc(o.fLoc), fLocName(NULL), fCsp(o.fCsp), fMap(o.fMap)
{
fLocName=fLoc.getName();
}
/**
* Assignment operator.
*/
CaseMapTransliterator& CaseMapTransliterator::operator=(const CaseMapTransliterator& o) {
Transliterator::operator=(o);
fLoc = o.fLoc;
fLocName = fLoc.getName();
fCsp = o.fCsp;
fMap = o.fMap;
return *this;
}
/**
* Transliterator API.
*/
Transliterator* CaseMapTransliterator::clone(void) const {
return new CaseMapTransliterator(*this);
}
/* case context iterator using a Replaceable */
UChar32 U_CALLCONV
CaseMapTransliterator::rep_caseContextIterator(void *context, int8_t dir) {
UCaseContext *csc=(UCaseContext *)context;
Replaceable *rep=(Replaceable *)csc->p;
UChar32 c;
if(dir<0) {
/* reset for backward iteration */
csc->index=csc->cpStart;
csc->dir=dir;
} else if(dir>0) {
/* reset for forward iteration */
csc->index=csc->cpLimit;
csc->dir=dir;
} else {
/* continue current iteration direction */
dir=csc->dir;
}
// automatically adjust start and limit if the Replaceable disagrees
// with the original values
if(dir<0) {
if(csc->start<csc->index) {
c=rep->char32At(csc->index-1);
if(c<0) {
csc->start=csc->index;
} else {
csc->index-=U16_LENGTH(c);
return c;
}
}
} else {
// detect, and store in csc->b1, if we hit the limit
if(csc->index<csc->limit) {
c=rep->char32At(csc->index);
if(c<0) {
csc->limit=csc->index;
csc->b1=TRUE;
} else {
csc->index+=U16_LENGTH(c);
return c;
}
} else {
csc->b1=TRUE;
}
}
return U_SENTINEL;
}
/**
* Implements {@link Transliterator#handleTransliterate}.
*/
void CaseMapTransliterator::handleTransliterate(Replaceable& text,
UTransPosition& offsets,
UBool isIncremental) const
{
if (offsets.start >= offsets.limit) {
return;
}
UCaseContext csc={ &text };
csc.start = offsets.contextStart;
csc.limit = offsets.contextLimit;
UnicodeString tmp;
const UChar *s;
UChar32 c;
int32_t textPos, delta, result, locCache=0;
for(textPos=offsets.start; textPos<offsets.limit;) {
csc.cpStart=textPos;
c=text.char32At(textPos);
csc.cpLimit=textPos+=U16_LENGTH(c);
result=fMap(fCsp, c, rep_caseContextIterator, &csc, &s, fLocName, &locCache);
if(csc.b1 && isIncremental) {
// fMap() tried to look beyond the context limit
// wait for more input
break;
}
if(result>=0) {
// replace the current code point with its full case mapping result
// see UCASE_MAX_STRING_LENGTH
if(result<=UCASE_MAX_STRING_LENGTH) {
// string s[result]
tmp.setTo(FALSE, s, result);
delta=result-U16_LENGTH(c);
} else {
// single code point
tmp.setTo(result);
delta=tmp.length()-U16_LENGTH(c);
}
text.handleReplaceBetween(csc.cpStart, textPos, tmp);
if(delta!=0) {
textPos+=delta;
csc.limit=offsets.contextLimit+=delta;
offsets.limit+=delta;
}
}
}
offsets.start=textPos;
}
U_NAMESPACE_END
#endif /* #if !UCONFIG_NO_TRANSLITERATION */

114
icu4c/source/i18n/casetrn.h Normal file
View file

@ -0,0 +1,114 @@
/*
*******************************************************************************
*
* Copyright (C) 2001-2004, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
* file name: casetrn.h
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 2004sep03
* created by: Markus W. Scherer
*
* Implementation class for lower-/upper-/title-casing transliterators.
*/
#ifndef __CASETRN_H__
#define __CASETRN_H__
#include "unicode/utypes.h"
#if !UCONFIG_NO_TRANSLITERATION
#include "unicode/translit.h"
#include "unicode/locid.h"
#include "ucase.h"
U_CDECL_BEGIN
typedef int32_t U_CALLCONV
UCaseMapFull(const UCaseProps *csp, UChar32 c,
UCaseContextIterator *iter, void *context,
const UChar **pString,
const char *locale, int32_t *locCache);
U_CDECL_END
U_NAMESPACE_BEGIN
/**
* A transliterator that performs locale-sensitive
* case mapping.
*/
class U_I18N_API CaseMapTransliterator : public Transliterator {
public:
/**
* Constructs a transliterator.
* @param loc the given locale.
* @param id the transliterator ID.
* @param map the full case mapping function (see ucase.h)
*/
CaseMapTransliterator(const Locale &loc, const UnicodeString &id, UCaseMapFull *map);
/**
* Destructor.
*/
virtual ~CaseMapTransliterator();
/**
* Copy constructor.
*/
CaseMapTransliterator(const CaseMapTransliterator&);
/**
* Assignment operator.
*/
CaseMapTransliterator& operator=(const CaseMapTransliterator&);
/**
* Transliterator API.
* @return a copy of the object.
*/
virtual Transliterator* clone(void) const;
/**
* ICU "poor man's RTTI", returns a UClassID for the actual class.
*/
virtual UClassID getDynamicClassID() const;
/**
* ICU "poor man's RTTI", returns a UClassID for this class.
*/
static UClassID U_EXPORT2 getStaticClassID();
protected:
/**
* Implements {@link Transliterator#handleTransliterate}.
* @param text the buffer holding transliterated and
* untransliterated text
* @param offset the start and limit of the text, the position
* of the cursor, and the start and limit of transliteration.
* @param incremental if true, assume more text may be coming after
* pos.contextLimit. Otherwise, assume the text is complete.
*/
virtual void handleTransliterate(Replaceable& text,
UTransPosition& offsets,
UBool isIncremental) const;
/** case context iterator using a Replaceable */
static UChar32 U_CALLCONV rep_caseContextIterator(void *context, int8_t dir);
Locale fLoc;
const char *fLocName;
UCaseProps *fCsp;
UCaseMapFull *fMap;
};
U_NAMESPACE_END
#endif /* #if !UCONFIG_NO_TRANSLITERATION */
#endif

View file

@ -2225,6 +2225,14 @@ SOURCE=.\anytrans.h
# End Source File
# Begin Source File
SOURCE=.\casetrn.cpp
# End Source File
# Begin Source File
SOURCE=.\casetrn.h
# End Source File
# Begin Source File
SOURCE=.\cpdtrans.cpp
# End Source File
# Begin Source File

View file

@ -1092,6 +1092,12 @@
<File
RelativePath=".\anytrans.h">
</File>
<File
RelativePath=".\casetrn.cpp">
</File>
<File
RelativePath=".\casetrn.h">
</File>
<File
RelativePath=".\cpdtrans.cpp">
</File>

View file

@ -17,84 +17,32 @@
#include "unicode/ustring.h"
#include "titletrn.h"
#include "umutex.h"
#include "ucln_in.h"
#include "ustr_imp.h"
#include "ucase.h"
#include "cpputils.h"
U_NAMESPACE_BEGIN
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(TitlecaseTransliterator)
/**
* ID for this transliterator.
*/
static const char CURR_ID[] = "Any-Title";
/**
* The set of characters we skip. These are neither cased nor
* non-cased, to us; we copy them verbatim. INVARIANT: Either SKIP
* and CASED are both NULL, or neither is NULL.
*/
static UnicodeSet* SKIP = NULL;
/**
* The set of characters that cause the next non-SKIP character to be
* lowercased. INVARIANT: Either SKIP and CASED are both NULL, or
* neither is NULL.
*/
static UnicodeSet* CASED = NULL;
TitlecaseTransliterator::TitlecaseTransliterator(const Locale& theLoc) :
Transliterator(UnicodeString(CURR_ID, ""), 0),
loc(theLoc),
buffer(0)
CaseMapTransliterator(theLoc, UNICODE_STRING("Any-Title", 9), NULL)
{
buffer = (UChar *)uprv_malloc(u_getMaxCaseExpansion()*sizeof(buffer[0]));
// Need to look back 2 characters in the case of "can't"
setMaximumContextLength(2);
umtx_lock(NULL);
UBool f = (SKIP == NULL);
umtx_unlock(NULL);
if (f) {
UErrorCode ec = U_ZERO_ERROR;
UnicodeSet* skip =
new UnicodeSet(UNICODE_STRING_SIMPLE("[\\u00AD \\u2019 \\' [:Mn:] [:Me:] [:Cf:] [:Lm:] [:Sk:]]"), ec);
UnicodeSet* cased =
new UnicodeSet(UNICODE_STRING_SIMPLE("[[:Lu:] [:Ll:] [:Lt:]]"), ec);
if (skip != NULL && cased != NULL && U_SUCCESS(ec)) {
umtx_lock(NULL);
if (SKIP == NULL) {
SKIP = skip;
CASED = cased;
skip = cased = NULL;
}
umtx_unlock(NULL);
}
delete skip;
delete cased;
ucln_i18n_registerCleanup();
}
}
/**
* Destructor.
*/
TitlecaseTransliterator::~TitlecaseTransliterator() {
uprv_free(buffer);
}
/**
* Copy constructor.
*/
TitlecaseTransliterator::TitlecaseTransliterator(const TitlecaseTransliterator& o) :
Transliterator(o),
loc(o.loc),
buffer(0)
CaseMapTransliterator(o)
{
buffer = (UChar *)uprv_malloc(u_getMaxCaseExpansion()*sizeof(buffer[0]));
uprv_arrayCopy(o.buffer, 0, this->buffer, 0, u_getMaxCaseExpansion());
}
/**
@ -102,9 +50,7 @@ TitlecaseTransliterator::TitlecaseTransliterator(const TitlecaseTransliterator&
*/
TitlecaseTransliterator& TitlecaseTransliterator::operator=(
const TitlecaseTransliterator& o) {
Transliterator::operator=(o);
loc = o.loc;
uprv_arrayCopy(o.buffer, 0, this->buffer, 0, u_getMaxCaseExpansion());
CaseMapTransliterator::operator=(o);
return *this;
}
@ -120,91 +66,97 @@ Transliterator* TitlecaseTransliterator::clone(void) const {
*/
void TitlecaseTransliterator::handleTransliterate(
Replaceable& text, UTransPosition& offsets,
UBool /*isIncremental*/) const
UBool isIncremental) const
{
/* TODO: Verify that isIncremental can be ignored */
if (SKIP == NULL) {
// TODO reimplement, see ustrcase.c
// using a real word break iterator
// instead of just looking for a transition between cased and uncased characters
// call CaseMapTransliterator::handleTransliterate() for lowercasing? (set fMap)
// needs to take isIncremental into account because case mappings are context-sensitive
// also detect when lowercasing function did not finish because of context
if (offsets.start >= offsets.limit) {
return;
}
// case type: >0 cased (UCASE_LOWER etc.) ==0 uncased <0 case-ignorable
int32_t type;
// Our mode; we are either converting letter toTitle or
// toLower.
UBool doTitle = TRUE;
// Determine if there is a preceding context of CASED SKIP*,
// Determine if there is a preceding context of cased case-ignorable*,
// in which case we want to start in toLower mode. If the
// prior context is anything else (including empty) then start
// in toTitle mode.
UChar32 c;
int32_t start;
for (start = offsets.start - 1; start >= offsets.contextStart; start -= UTF_CHAR_LENGTH(c)) {
for (start = offsets.start - 1; start >= offsets.contextStart; start -= U16_LENGTH(c)) {
c = text.char32At(start);
if (SKIP->contains(c)) {
continue;
type=ucase_getTypeOrIgnorable(fCsp, c);
if(type>0) { // cased
doTitle=FALSE;
break;
} else if(type==0) { // uncased but not ignorable
break;
}
doTitle = !CASED->contains(c);
break;
// else (type<0) case-ignorable: continue
}
// Convert things after a CASED character toLower; things
// after a non-CASED, non-SKIP character toTitle. SKIP
// Convert things after a cased character toLower; things
// after an uncased, non-case-ignorable character toTitle. Case-ignorable
// characters are copied directly and do not change the mode.
int32_t textPos = offsets.start;
if (textPos >= offsets.limit) return;
UCaseContext csc={ &text };
csc.start = offsets.contextStart;
csc.limit = offsets.contextLimit;
UnicodeString original;
text.extractBetween(offsets.contextStart, offsets.contextLimit, original);
UnicodeString tmp;
const UChar *s;
int32_t textPos, delta, result, locCache=0;
UCharIterator iter;
uiter_setReplaceable(&iter, &text);
iter.start = offsets.contextStart;
iter.limit = offsets.contextLimit;
for(textPos=offsets.start; textPos<offsets.limit;) {
csc.cpStart=textPos;
c=text.char32At(textPos);
csc.cpLimit=textPos+=U16_LENGTH(c);
// Walk through original string
// If there is a case change, modify corresponding position in replaceable
int32_t i = textPos - offsets.contextStart;
int32_t limit = offsets.limit - offsets.contextStart;
UChar32 cp;
int32_t oldLen;
int32_t newLen;
for (; i < limit; ) {
UTF_GET_CHAR(original.getBuffer(), 0, i, original.length(), cp);
oldLen = UTF_CHAR_LENGTH(cp);
i += oldLen;
iter.index = i; // Point _past_ current char
if (!SKIP->contains(cp)) {
if (doTitle) {
newLen = u_internalToTitle(cp, &iter, buffer, u_getMaxCaseExpansion(), loc.getName());
type=ucase_getTypeOrIgnorable(fCsp, c);
if(type>=0) { // not case-ignorable
if(doTitle) {
result=ucase_toFullTitle(fCsp, c, rep_caseContextIterator, &csc, &s, fLocName, &locCache);
} else {
newLen = u_internalToLower(cp, &iter, buffer, u_getMaxCaseExpansion(), loc.getName());
result=ucase_toFullLower(fCsp, c, rep_caseContextIterator, &csc, &s, fLocName, &locCache);
}
doTitle = !CASED->contains(cp);
if (newLen >= 0) {
UnicodeString temp(buffer, newLen);
text.handleReplaceBetween(textPos, textPos + oldLen, temp);
if (newLen != oldLen) {
textPos += newLen;
offsets.limit += newLen - oldLen;
offsets.contextLimit += newLen - oldLen;
continue;
doTitle = (UBool)(type==0); // doTitle=isUncased
if(csc.b1 && isIncremental) {
// fMap() tried to look beyond the context limit
// wait for more input
break;
}
if(result>=0) {
// replace the current code point with its full case mapping result
// see UCASE_MAX_STRING_LENGTH
if(result<=UCASE_MAX_STRING_LENGTH) {
// string s[result]
tmp.setTo(FALSE, s, result);
delta=result-U16_LENGTH(c);
} else {
// single code point
tmp.setTo(result);
delta=tmp.length()-U16_LENGTH(c);
}
text.handleReplaceBetween(csc.cpStart, textPos, tmp);
if(delta!=0) {
textPos+=delta;
csc.limit=offsets.contextLimit+=delta;
offsets.limit+=delta;
}
}
}
textPos += oldLen;
}
offsets.start = offsets.limit;
}
/**
* Static memory cleanup function.
*/
void TitlecaseTransliterator::cleanup() {
if (SKIP != NULL) {
delete SKIP; SKIP = NULL;
delete CASED; CASED = NULL;
}
offsets.start=textPos;
}
U_NAMESPACE_END

View file

@ -16,6 +16,8 @@
#include "unicode/translit.h"
#include "unicode/locid.h"
#include "ucase.h"
#include "casetrn.h"
U_NAMESPACE_BEGIN
@ -26,7 +28,7 @@ U_NAMESPACE_BEGIN
* case using <code>u_totitle()</code>.
* @author Alan Liu
*/
class U_I18N_API TitlecaseTransliterator : public Transliterator {
class U_I18N_API TitlecaseTransliterator : public CaseMapTransliterator {
public:
/**
@ -83,17 +85,6 @@ class U_I18N_API TitlecaseTransliterator : public Transliterator {
*/
virtual void handleTransliterate(Replaceable& text, UTransPosition& offset,
UBool isIncremental) const;
public:
/**
* Static memory cleanup function. FOR INTERNAL USE ONLY; DO NOT
* CALL.
*/
static void cleanup();
private:
Locale loc;
UChar* buffer;
};
U_NAMESPACE_END

View file

@ -26,27 +26,22 @@ UOBJECT_DEFINE_RTTI_IMPLEMENTATION(LowercaseTransliterator)
* Constructs a transliterator.
*/
LowercaseTransliterator::LowercaseTransliterator(const Locale& theLoc) :
Transliterator(UNICODE_STRING("Any-Lower", 9), 0),
loc(theLoc) , buffer(0)
CaseMapTransliterator(theLoc, UNICODE_STRING("Any-Lower", 9), ucase_toFullLower)
{
buffer = (UChar *)uprv_malloc(u_getMaxCaseExpansion()*sizeof(buffer[0]));
}
/**
* Destructor.
*/
LowercaseTransliterator::~LowercaseTransliterator() {
uprv_free(buffer);
}
/**
* Copy constructor.
*/
LowercaseTransliterator::LowercaseTransliterator(const LowercaseTransliterator& o) :
Transliterator(o),
loc(o.loc), buffer(0)
CaseMapTransliterator(o)
{
buffer = (UChar *)uprv_malloc(u_getMaxCaseExpansion()*sizeof(buffer[0]));
}
/**
@ -54,9 +49,7 @@ LowercaseTransliterator::LowercaseTransliterator(const LowercaseTransliterator&
*/
LowercaseTransliterator& LowercaseTransliterator::operator=(
const LowercaseTransliterator& o) {
Transliterator::operator=(o);
loc = o.loc;
uprv_arrayCopy((const UChar*)o.buffer, 0, this->buffer, 0, u_getMaxCaseExpansion());
CaseMapTransliterator::operator=(o);
return *this;
}
@ -67,55 +60,6 @@ Transliterator* LowercaseTransliterator::clone(void) const {
return new LowercaseTransliterator(*this);
}
/**
* Implements {@link Transliterator#handleTransliterate}.
*/
void LowercaseTransliterator::handleTransliterate(Replaceable& text,
UTransPosition& offsets,
UBool /*isIncremental*/) const
{
/* TODO: Verify that isIncremental can be ignored */
int32_t textPos = offsets.start;
if (textPos >= offsets.limit) return;
// get string for context
UnicodeString original;
text.extractBetween(offsets.contextStart, offsets.contextLimit, original);
UCharIterator iter;
uiter_setReplaceable(&iter, &text);
iter.start = offsets.contextStart;
iter.limit = offsets.contextLimit;
// Walk through original string
// If there is a case change, modify corresponding position in replaceable
int32_t i = textPos - offsets.contextStart;
int32_t limit = offsets.limit - offsets.contextStart;
UChar32 cp;
int32_t oldLen;
for (; i < limit; ) {
UTF_GET_CHAR(original.getBuffer(), 0, i, original.length(), cp);
oldLen = UTF_CHAR_LENGTH(cp);
i += oldLen;
iter.index = i; // Point _past_ current char
int32_t newLen = u_internalToLower(cp, &iter, buffer, u_getMaxCaseExpansion(), loc.getName());
if (newLen >= 0) {
UnicodeString temp(buffer, newLen);
text.handleReplaceBetween(textPos, textPos + oldLen, temp);
if (newLen != oldLen) {
textPos += newLen;
offsets.limit += newLen - oldLen;
offsets.contextLimit += newLen - oldLen;
continue;
}
}
textPos += oldLen;
}
offsets.start = offsets.limit;
}
U_NAMESPACE_END
#endif /* #if !UCONFIG_NO_TRANSLITERATION */

View file

@ -16,6 +16,7 @@
#include "unicode/translit.h"
#include "unicode/locid.h"
#include "casetrn.h"
U_NAMESPACE_BEGIN
@ -24,7 +25,7 @@ U_NAMESPACE_BEGIN
* case mapping.
* @author Alan Liu
*/
class U_I18N_API LowercaseTransliterator : public Transliterator {
class U_I18N_API LowercaseTransliterator : public CaseMapTransliterator {
public:
@ -68,27 +69,6 @@ class U_I18N_API LowercaseTransliterator : public Transliterator {
* @draft ICU 2.2
*/
static UClassID U_EXPORT2 getStaticClassID();
protected:
/**
* Implements {@link Transliterator#handleTransliterate}.
* @param text the buffer holding transliterated and
* untransliterated text
* @param offset the start and limit of the text, the position
* of the cursor, and the start and limit of transliteration.
* @param incremental if true, assume more text may be coming after
* pos.contextLimit. Otherwise, assume the text is complete.
*/
virtual void handleTransliterate(Replaceable& text,
UTransPosition& offsets,
UBool isIncremental) const;
private:
Locale loc;
UChar* buffer;
};
U_NAMESPACE_END

View file

@ -22,36 +22,26 @@ U_NAMESPACE_BEGIN
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(UppercaseTransliterator)
static const char CURR_ID[] = "Any-Upper";
/**
* Constructs a transliterator.
*/
UppercaseTransliterator::UppercaseTransliterator(const Locale& theLoc) :
Transliterator(UnicodeString(CURR_ID, ""), 0),
loc(theLoc),
buffer(0)
CaseMapTransliterator(theLoc, UNICODE_STRING("Any-Upper", 9), ucase_toFullUpper)
{
buffer = (UChar *)uprv_malloc(u_getMaxCaseExpansion()*sizeof(buffer[0]));
}
/**
* Destructor.
*/
UppercaseTransliterator::~UppercaseTransliterator() {
uprv_free(buffer);
}
/**
* Copy constructor.
*/
UppercaseTransliterator::UppercaseTransliterator(const UppercaseTransliterator& o) :
Transliterator(o),
loc(o.loc),
buffer(0)
CaseMapTransliterator(o)
{
buffer = (UChar *)uprv_malloc(u_getMaxCaseExpansion()*sizeof(buffer[0]));
uprv_arrayCopy(o.buffer, 0, this->buffer, 0, u_getMaxCaseExpansion());
}
/**
@ -59,9 +49,7 @@ UppercaseTransliterator::UppercaseTransliterator(const UppercaseTransliterator&
*/
UppercaseTransliterator& UppercaseTransliterator::operator=(
const UppercaseTransliterator& o) {
Transliterator::operator=(o);
loc = o.loc;
uprv_arrayCopy(o.buffer, 0, this->buffer, 0, u_getMaxCaseExpansion());
CaseMapTransliterator::operator=(o);
return *this;
}
@ -72,57 +60,6 @@ Transliterator* UppercaseTransliterator::clone(void) const {
return new UppercaseTransliterator(*this);
}
/**
* Implements {@link Transliterator#handleTransliterate}.
*/
void UppercaseTransliterator::handleTransliterate(Replaceable& text,
UTransPosition& offsets,
UBool /*isIncremental*/) const
{
/* TODO: Verify that isIncremental can be ignored */
int32_t textPos = offsets.start;
if (textPos >= offsets.limit)
return;
// get string for context
UnicodeString original;
text.extractBetween(offsets.contextStart, offsets.contextLimit, original);
UCharIterator iter;
uiter_setReplaceable(&iter, &text);
iter.start = offsets.contextStart;
iter.limit = offsets.contextLimit;
// Walk through original string
// If there is a case change, modify corresponding position in replaceable
int32_t i = textPos - offsets.contextStart;
int32_t limit = offsets.limit - offsets.contextStart;
UChar32 cp;
int32_t oldLen;
for (; i < limit; ) {
UTF_GET_CHAR(original.getBuffer(), 0, i, original.length(), cp);
oldLen = UTF_CHAR_LENGTH(cp);
i += oldLen;
iter.index = i; // Point _past_ current char
int32_t newLen = u_internalToUpper(cp, &iter, buffer, u_getMaxCaseExpansion(), loc.getName());
if (newLen >= 0) {
UnicodeString temp(buffer, newLen);
text.handleReplaceBetween(textPos, textPos + oldLen, temp);
if (newLen != oldLen) {
textPos += newLen;
offsets.limit += newLen - oldLen;
offsets.contextLimit += newLen - oldLen;
continue;
}
}
textPos += oldLen;
}
offsets.start = offsets.limit;
}
U_NAMESPACE_END
#endif /* #if !UCONFIG_NO_TRANSLITERATION */

View file

@ -16,6 +16,7 @@
#include "unicode/translit.h"
#include "unicode/locid.h"
#include "casetrn.h"
U_NAMESPACE_BEGIN
@ -24,7 +25,7 @@ U_NAMESPACE_BEGIN
* case mapping.
* @author Alan Liu
*/
class U_I18N_API UppercaseTransliterator : public Transliterator {
class U_I18N_API UppercaseTransliterator : public CaseMapTransliterator {
public:
@ -68,28 +69,6 @@ class U_I18N_API UppercaseTransliterator : public Transliterator {
* @draft ICU 2.2
*/
static UClassID U_EXPORT2 getStaticClassID();
protected:
/**
* Implements {@link Transliterator#handleTransliterate}.
* @param text the buffer holding transliterated and
* untransliterated text
* @param offset the start and limit of the text, the position
* of the cursor, and the start and limit of transliteration.
* @param incremental if true, assume more text may be coming after
* pos.contextLimit. Otherwise, assume the text is complete.
*/
virtual void handleTransliterate(Replaceable& text,
UTransPosition& offsets,
UBool isIncremental) const;
private:
Locale loc;
UChar* buffer;
};
U_NAMESPACE_END

View file

@ -1537,7 +1537,6 @@ U_NAMESPACE_END
* user, because RBTs hold pointers to common data objects.
*/
U_CFUNC UBool transliterator_cleanup(void) {
TitlecaseTransliterator::cleanup();
TransliteratorIDParser::cleanup();
if (registry) {
delete registry;