mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-10 07:39:16 +00:00
ICU-3969 use new case mapping code in ucase.c, with ucase.icu data
X-SVN-Rev: 16257
This commit is contained in:
parent
9f84b31d3a
commit
29038e96b7
20 changed files with 965 additions and 1740 deletions
File diff suppressed because it is too large
Load diff
|
@ -87,6 +87,7 @@ u_cleanup(void)
|
|||
uset_cleanup();
|
||||
unames_cleanup();
|
||||
pname_cleanup();
|
||||
ucase_cleanup();
|
||||
uchar_cleanup();
|
||||
#if !UCONFIG_NO_CONVERSION
|
||||
ucnv_cleanup();
|
||||
|
|
|
@ -144,6 +144,14 @@ UnicodeString::caseMap(BreakIterator *titleIter,
|
|||
return *this;
|
||||
}
|
||||
|
||||
UErrorCode errorCode;
|
||||
|
||||
UCaseProps *csp=ucase_getSingleton(&errorCode);
|
||||
if(U_FAILURE(errorCode)) {
|
||||
setToBogus();
|
||||
return *this;
|
||||
}
|
||||
|
||||
// We need to allocate a new buffer for the internal string case mapping function.
|
||||
// This is very similar to how doReplace() below keeps the old array pointer
|
||||
// and deletes the old array itself after it is done.
|
||||
|
@ -167,8 +175,6 @@ UnicodeString::caseMap(BreakIterator *titleIter,
|
|||
return *this;
|
||||
}
|
||||
|
||||
UErrorCode errorCode;
|
||||
|
||||
#if !UCONFIG_NO_BREAK_ITERATION
|
||||
// set up the titlecasing break iterator
|
||||
UBreakIterator *cTitleIter = 0;
|
||||
|
@ -195,28 +201,26 @@ UnicodeString::caseMap(BreakIterator *titleIter,
|
|||
do {
|
||||
errorCode = U_ZERO_ERROR;
|
||||
if(toWhichCase==TO_LOWER) {
|
||||
fLength = u_internalStrToLower(fArray, fCapacity,
|
||||
oldArray, oldLength,
|
||||
0, oldLength,
|
||||
locale.getName(),
|
||||
&errorCode);
|
||||
fLength = ustr_toLower(csp, fArray, fCapacity,
|
||||
oldArray, oldLength,
|
||||
locale.getName(), &errorCode);
|
||||
} else if(toWhichCase==TO_UPPER) {
|
||||
fLength = u_internalStrToUpper(fArray, fCapacity,
|
||||
oldArray, oldLength,
|
||||
locale.getName(),
|
||||
&errorCode);
|
||||
#if !UCONFIG_NO_BREAK_ITERATION
|
||||
fLength = ustr_toUpper(csp, fArray, fCapacity,
|
||||
oldArray, oldLength,
|
||||
locale.getName(), &errorCode);
|
||||
} else if(toWhichCase==TO_TITLE) {
|
||||
fLength = u_internalStrToTitle(fArray, fCapacity,
|
||||
oldArray, oldLength,
|
||||
cTitleIter, locale.getName(),
|
||||
&errorCode);
|
||||
#if UCONFIG_NO_BREAK_ITERATION
|
||||
errorCode=U_UNSUPPORTED_ERROR;
|
||||
#else
|
||||
fLength = ustr_toTitle(csp, fArray, fCapacity,
|
||||
oldArray, oldLength,
|
||||
cTitleIter, locale.getName(), &errorCode);
|
||||
#endif
|
||||
} else {
|
||||
fLength = u_internalStrFoldCase(fArray, fCapacity,
|
||||
oldArray, oldLength,
|
||||
options,
|
||||
&errorCode);
|
||||
fLength = ustr_foldCase(csp, fArray, fCapacity,
|
||||
oldArray, oldLength,
|
||||
options,
|
||||
&errorCode);
|
||||
}
|
||||
} while(errorCode==U_BUFFER_OVERFLOW_ERROR && cloneArrayIfNeeded(fLength, fLength, FALSE));
|
||||
|
||||
|
|
|
@ -27,7 +27,7 @@
|
|||
// moved up to make unorm_cmpEquivFold work without normalization
|
||||
#include "unicode/ustring.h"
|
||||
#include "unormimp.h"
|
||||
#include "ustr_imp.h"
|
||||
#include "ucase.h"
|
||||
|
||||
#if !UCONFIG_NO_NORMALIZATION
|
||||
|
||||
|
@ -4109,6 +4109,8 @@ unorm_cmpEquivFold(const UChar *s1, int32_t length1,
|
|||
const UChar *s2, int32_t length2,
|
||||
uint32_t options,
|
||||
UErrorCode *pErrorCode) {
|
||||
UCaseProps *csp;
|
||||
|
||||
// current-level start/limit - s1/s2 as current
|
||||
const UChar *start1, *start2, *limit1, *limit2;
|
||||
|
||||
|
@ -4123,7 +4125,7 @@ unorm_cmpEquivFold(const UChar *s1, int32_t length1,
|
|||
UChar decomp1[4], decomp2[4];
|
||||
|
||||
// case folding buffers, only use current-level start/limit
|
||||
UChar fold1[32], fold2[32];
|
||||
UChar fold1[UCASE_MAX_STRING_LENGTH+1], fold2[UCASE_MAX_STRING_LENGTH+1];
|
||||
|
||||
// track which is the current level per string
|
||||
int32_t level1, level2;
|
||||
|
@ -4139,11 +4141,18 @@ unorm_cmpEquivFold(const UChar *s1, int32_t length1,
|
|||
|
||||
// normalization/properties data loaded?
|
||||
if( ((options&_COMPARE_EQUIV)!=0 && !_haveData(*pErrorCode)) ||
|
||||
((options&U_COMPARE_IGNORE_CASE)!=0 && !uprv_haveProperties(pErrorCode)) ||
|
||||
U_FAILURE(*pErrorCode)
|
||||
) {
|
||||
return 0;
|
||||
}
|
||||
if((options&U_COMPARE_IGNORE_CASE)!=0) {
|
||||
csp=ucase_getSingleton(pErrorCode);
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
return 0;
|
||||
}
|
||||
} else {
|
||||
csp=NULL;
|
||||
}
|
||||
|
||||
// initialize
|
||||
start1=s1;
|
||||
|
@ -4266,9 +4275,9 @@ unorm_cmpEquivFold(const UChar *s1, int32_t length1,
|
|||
// continue with the main loop as soon as there is a real change
|
||||
|
||||
if( level1==0 && (options&U_COMPARE_IGNORE_CASE) &&
|
||||
(length=u_internalFoldCase((UChar32)cp1, fold1, 32, options))>=0
|
||||
(length=ucase_toFullFolding(csp, (UChar32)cp1, &p, options))>=0
|
||||
) {
|
||||
// cp1 case-folds to fold1[length]
|
||||
// cp1 case-folds to the code point "length" or to p[length]
|
||||
if(UTF_IS_SURROGATE(c1)) {
|
||||
if(UTF_IS_SURROGATE_FIRST(c1)) {
|
||||
// advance beyond source surrogate pair if it case-folds
|
||||
|
@ -4290,6 +4299,15 @@ unorm_cmpEquivFold(const UChar *s1, int32_t length1,
|
|||
stack1[0].limit=limit1;
|
||||
++level1;
|
||||
|
||||
// copy the folding result to fold1[]
|
||||
if(length<=UCASE_MAX_STRING_LENGTH) {
|
||||
u_memcpy(fold1, p, length);
|
||||
} else {
|
||||
int32_t i=0;
|
||||
U16_APPEND_UNSAFE(fold1, i, length);
|
||||
length=i;
|
||||
}
|
||||
|
||||
// set next level pointers to case folding
|
||||
start1=s1=fold1;
|
||||
limit1=fold1+length;
|
||||
|
@ -4300,9 +4318,9 @@ unorm_cmpEquivFold(const UChar *s1, int32_t length1,
|
|||
}
|
||||
|
||||
if( level2==0 && (options&U_COMPARE_IGNORE_CASE) &&
|
||||
(length=u_internalFoldCase((UChar32)cp2, fold2, 32, options))>=0
|
||||
(length=ucase_toFullFolding(csp, (UChar32)cp2, &p, options))>=0
|
||||
) {
|
||||
// cp2 case-folds to fold2[length]
|
||||
// cp2 case-folds to the code point "length" or to p[length]
|
||||
if(UTF_IS_SURROGATE(c2)) {
|
||||
if(UTF_IS_SURROGATE_FIRST(c2)) {
|
||||
// advance beyond source surrogate pair if it case-folds
|
||||
|
@ -4324,6 +4342,15 @@ unorm_cmpEquivFold(const UChar *s1, int32_t length1,
|
|||
stack2[0].limit=limit2;
|
||||
++level2;
|
||||
|
||||
// copy the folding result to fold2[]
|
||||
if(length<=UCASE_MAX_STRING_LENGTH) {
|
||||
u_memcpy(fold2, p, length);
|
||||
} else {
|
||||
int32_t i=0;
|
||||
U16_APPEND_UNSAFE(fold2, i, length);
|
||||
length=i;
|
||||
}
|
||||
|
||||
// set next level pointers to case folding
|
||||
start2=s2=fold2;
|
||||
limit2=fold2+length;
|
||||
|
|
|
@ -22,6 +22,7 @@
|
|||
#include "unicode/uscript.h"
|
||||
#include "cstring.h"
|
||||
#include "unormimp.h"
|
||||
#include "ucase.h"
|
||||
#include "uprops.h"
|
||||
|
||||
#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
|
||||
|
@ -183,6 +184,9 @@ static const struct {
|
|||
* column and mask values for binary properties from u_getUnicodeProperties().
|
||||
* Must be in order of corresponding UProperty,
|
||||
* and there must be exacly one entry per binary UProperty.
|
||||
*
|
||||
* Properties with mask 0 are handled in code.
|
||||
* Pseudo-column -2 indicates case mapping properties.
|
||||
*/
|
||||
{ 1, U_MASK(UPROPS_ALPHABETIC) },
|
||||
{ 1, U_MASK(UPROPS_ASCII_HEX_DIGIT) },
|
||||
|
@ -206,19 +210,19 @@ static const struct {
|
|||
{ 1, U_MASK(UPROPS_IDS_TRINARY_OPERATOR) },
|
||||
{ 1, U_MASK(UPROPS_JOIN_CONTROL) },
|
||||
{ 1, U_MASK(UPROPS_LOGICAL_ORDER_EXCEPTION) },
|
||||
{ 1, U_MASK(UPROPS_LOWERCASE) },
|
||||
{ -2, 0 }, /* UCHAR_LOWERCASE */
|
||||
{ 1, U_MASK(UPROPS_MATH) },
|
||||
{ 1, U_MASK(UPROPS_NONCHARACTER_CODE_POINT) },
|
||||
{ 1, U_MASK(UPROPS_QUOTATION_MARK) },
|
||||
{ 1, U_MASK(UPROPS_RADICAL) },
|
||||
{ 1, U_MASK(UPROPS_SOFT_DOTTED) },
|
||||
{ -2, 0 }, /* UCHAR_SOFT_DOTTED */
|
||||
{ 1, U_MASK(UPROPS_TERMINAL_PUNCTUATION) },
|
||||
{ 1, U_MASK(UPROPS_UNIFIED_IDEOGRAPH) },
|
||||
{ 1, U_MASK(UPROPS_UPPERCASE) },
|
||||
{ -2, 0 }, /* UCHAR_UPPERCASE */
|
||||
{ 1, U_MASK(UPROPS_WHITE_SPACE) },
|
||||
{ 1, U_MASK(UPROPS_XID_CONTINUE) },
|
||||
{ 1, U_MASK(UPROPS_XID_START) },
|
||||
{ -1, U_MASK(UPROPS_CASE_SENSITIVE_SHIFT) },
|
||||
{ -2, 0 }, /* UCHAR_CASE_SENSITIVE */
|
||||
{ 2, U_MASK(UPROPS_V2_S_TERM) },
|
||||
{ 2, U_MASK(UPROPS_V2_VARIATION_SELECTOR) },
|
||||
{ 0, 0 }, /* UCHAR_NFD_INERT */
|
||||
|
@ -238,6 +242,25 @@ u_hasBinaryProperty(UChar32 c, UProperty which) {
|
|||
if(mask!=0) {
|
||||
/* systematic, directly stored properties */
|
||||
return (u_getUnicodeProperties(c, binProps[which].column)&mask)!=0;
|
||||
} else if(binProps[which].column==-2) {
|
||||
/* case mapping properties */
|
||||
UErrorCode errorCode=U_ZERO_ERROR;
|
||||
UCaseProps *csp=uchar_getCaseProps(&errorCode);
|
||||
if(U_FAILURE(errorCode)) {
|
||||
return FALSE;
|
||||
}
|
||||
switch(which) {
|
||||
case UCHAR_LOWERCASE:
|
||||
return (UBool)(UCASE_LOWER==ucase_getType(csp, c));
|
||||
case UCHAR_UPPERCASE:
|
||||
return (UBool)(UCASE_UPPER==ucase_getType(csp, c));
|
||||
case UCHAR_SOFT_DOTTED:
|
||||
return ucase_isSoftDotted(csp, c);
|
||||
case UCHAR_CASE_SENSITIVE:
|
||||
return ucase_isCaseSensitive(csp, c);
|
||||
default:
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
#if !UCONFIG_NO_NORMALIZATION
|
||||
/* normalization properties from unorm.icu */
|
||||
|
@ -572,6 +595,7 @@ uprv_getInclusions(USet* set, UErrorCode *pErrorCode) {
|
|||
unorm_addPropertyStarts(set, pErrorCode);
|
||||
#endif
|
||||
uchar_addPropertyStarts(set, pErrorCode);
|
||||
ucase_addPropertyStarts(uchar_getCaseProps(pErrorCode), set, pErrorCode);
|
||||
|
||||
#ifdef DEBUG
|
||||
{
|
||||
|
|
|
@ -22,6 +22,7 @@
|
|||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/uset.h"
|
||||
#include "ucase.h"
|
||||
#include "udataswp.h"
|
||||
|
||||
/* indexes[] entries */
|
||||
|
@ -208,6 +209,13 @@ u_getUnicodeProperties(UChar32 c, int32_t column);
|
|||
U_CFUNC int32_t
|
||||
uprv_getMaxValues(int32_t column);
|
||||
|
||||
/**
|
||||
* Get internal UCaseProps pointer from uchar.c for uprops.c.
|
||||
* Other code should use ucase_getSingleton().
|
||||
*/
|
||||
U_CFUNC UCaseProps *
|
||||
uchar_getCaseProps(UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* \var uprv_comparePropertyNames
|
||||
* Unicode property names and property value names are compared "loosely".
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/uiter.h"
|
||||
#include "ucase.h"
|
||||
|
||||
/** Simple declaration for u_strToTitle() to avoid including unicode/ubrk.h. */
|
||||
#ifndef UBRK_TYPEDEF_UBREAK_ITERATOR
|
||||
|
@ -24,22 +25,6 @@
|
|||
typedef void UBreakIterator;
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Bit mask for getting just the options from a string compare options word
|
||||
* that are relevant for case-insensitive string comparison.
|
||||
* See uchar.h. Also include _STRNCMP_STYLE and U_COMPARE_CODE_POINT_ORDER.
|
||||
* @internal
|
||||
*/
|
||||
#define _STRCASECMP_OPTIONS_MASK 0xffff
|
||||
|
||||
/**
|
||||
* Bit mask for getting just the options from a string compare options word
|
||||
* that are relevant for case folding (of a single string or code point).
|
||||
* See uchar.h.
|
||||
* @internal
|
||||
*/
|
||||
#define _FOLD_CASE_OPTIONS_MASK 0xff
|
||||
|
||||
/**
|
||||
* Compare two strings in code point order or code unit order.
|
||||
* Works in strcmp style (both lengths -1),
|
||||
|
@ -99,26 +84,27 @@ u_growBufferFromStatic(void *context,
|
|||
/*
|
||||
* Internal string casing functions implementing
|
||||
* ustring.h/ustrcase.c and UnicodeString case mapping functions.
|
||||
*
|
||||
* Lowercases [srcStart..srcLimit[ but takes
|
||||
* context [0..srcLength[ into account.
|
||||
* @internal
|
||||
*/
|
||||
U_CFUNC int32_t
|
||||
u_internalStrToLower(UChar *dest, int32_t destCapacity,
|
||||
const UChar *src, int32_t srcLength,
|
||||
int32_t srcStart, int32_t srcLimit,
|
||||
const char *locale,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* @internal
|
||||
*/
|
||||
U_CFUNC int32_t
|
||||
u_internalStrToUpper(UChar *dest, int32_t destCapacity,
|
||||
const UChar *src, int32_t srcLength,
|
||||
const char *locale,
|
||||
UErrorCode *pErrorCode);
|
||||
ustr_toLower(UCaseProps *csp,
|
||||
UChar *dest, int32_t destCapacity,
|
||||
const UChar *src, int32_t srcLength,
|
||||
const char *locale,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* @internal
|
||||
*/
|
||||
U_CFUNC int32_t
|
||||
ustr_toUpper(UCaseProps *csp,
|
||||
UChar *dest, int32_t destCapacity,
|
||||
const UChar *src, int32_t srcLength,
|
||||
const char *locale,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
#if !UCONFIG_NO_BREAK_ITERATION
|
||||
|
||||
|
@ -126,11 +112,12 @@ u_internalStrToUpper(UChar *dest, int32_t destCapacity,
|
|||
* @internal
|
||||
*/
|
||||
U_CFUNC int32_t
|
||||
u_internalStrToTitle(UChar *dest, int32_t destCapacity,
|
||||
const UChar *src, int32_t srcLength,
|
||||
UBreakIterator *titleIter,
|
||||
const char *locale,
|
||||
UErrorCode *pErrorCode);
|
||||
ustr_toTitle(UCaseProps *csp,
|
||||
UChar *dest, int32_t destCapacity,
|
||||
const UChar *src, int32_t srcLength,
|
||||
UBreakIterator *titleIter,
|
||||
const char *locale,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
#endif
|
||||
|
||||
|
@ -139,62 +126,11 @@ u_internalStrToTitle(UChar *dest, int32_t destCapacity,
|
|||
* @internal
|
||||
*/
|
||||
U_CFUNC int32_t
|
||||
u_internalStrFoldCase(UChar *dest, int32_t destCapacity,
|
||||
const UChar *src, int32_t srcLength,
|
||||
uint32_t options,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* Get the full lowercase mapping for c.
|
||||
* @param iter Character iterator to check for context for SpecialCasing.
|
||||
* The current index must be on the character after c.
|
||||
* This function may or may not change the iterator index.
|
||||
* If iter==NULL then a context-independent result is returned.
|
||||
* @return the length of the output, negative if same as c
|
||||
* @internal
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
u_internalToLower(UChar32 c, UCharIterator *iter,
|
||||
UChar *dest, int32_t destCapacity,
|
||||
const char *locale);
|
||||
|
||||
/**
|
||||
* Get the full uppercase mapping for c.
|
||||
* @param iter Character iterator to check for context for SpecialCasing.
|
||||
* The current index must be on the character after c.
|
||||
* This function may or may not change the iterator index.
|
||||
* If iter==NULL then a context-independent result is returned.
|
||||
* @return the length of the output, negative if same as c
|
||||
* @internal
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
u_internalToUpper(UChar32 c, UCharIterator *iter,
|
||||
UChar *dest, int32_t destCapacity,
|
||||
const char *locale);
|
||||
|
||||
/**
|
||||
* Get the full titlecase mapping for c.
|
||||
* @param iter Character iterator to check for context for SpecialCasing.
|
||||
* The current index must be on the character after c.
|
||||
* This function may or may not change the iterator index.
|
||||
* If iter==NULL then a context-independent result is returned.
|
||||
* @return the length of the output, negative if same as c
|
||||
* @internal
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
u_internalToTitle(UChar32 c, UCharIterator *iter,
|
||||
UChar *dest, int32_t destCapacity,
|
||||
const char *locale);
|
||||
|
||||
/**
|
||||
* Get the full case folding mapping for c.
|
||||
* @return the length of the output, negative if same as c
|
||||
* @internal
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
u_internalFoldCase(UChar32 c,
|
||||
UChar *dest, int32_t destCapacity,
|
||||
uint32_t options);
|
||||
ustr_foldCase(UCaseProps *csp,
|
||||
UChar *dest, int32_t destCapacity,
|
||||
const UChar *src, int32_t srcLength,
|
||||
uint32_t options,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* NUL-terminate a UChar * string if possible.
|
||||
|
@ -233,6 +169,4 @@ u_terminateUChar32s(UChar32 *dest, int32_t destCapacity, int32_t length, UErrorC
|
|||
U_CAPI int32_t U_EXPORT2
|
||||
u_terminateWChars(wchar_t *dest, int32_t destCapacity, int32_t length, UErrorCode *pErrorCode);
|
||||
|
||||
#define u_getMaxCaseExpansion() 10
|
||||
|
||||
#endif
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2001-2003, International Business Machines
|
||||
* Copyright (C) 2001-2004, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
|
@ -22,32 +22,156 @@
|
|||
#include "unicode/ustring.h"
|
||||
#include "unicode/ubrk.h"
|
||||
#include "cmemory.h"
|
||||
#include "ucase.h"
|
||||
#include "unormimp.h"
|
||||
#include "ustr_imp.h"
|
||||
|
||||
/* string casing ------------------------------------------------------------ */
|
||||
|
||||
/* append a full case mapping result, see UCASE_MAX_STRING_LENGTH */
|
||||
static U_INLINE int32_t
|
||||
appendResult(UChar *dest, int32_t destIndex, int32_t destCapacity,
|
||||
int32_t result, const UChar *s) {
|
||||
UChar32 c;
|
||||
int32_t length;
|
||||
|
||||
/* decode the result */
|
||||
if(result<0) {
|
||||
/* (not) original code point */
|
||||
c=~result;
|
||||
length=-1;
|
||||
} else if(result<=UCASE_MAX_STRING_LENGTH) {
|
||||
c=U_SENTINEL;
|
||||
length=result;
|
||||
} else {
|
||||
c=result;
|
||||
length=-1;
|
||||
}
|
||||
|
||||
if(destIndex<destCapacity) {
|
||||
/* append the result */
|
||||
if(length<0) {
|
||||
/* code point */
|
||||
UBool isError=FALSE;
|
||||
U16_APPEND(dest, destIndex, destCapacity, c, isError);
|
||||
if(isError) {
|
||||
/* overflow, nothing written */
|
||||
destIndex+=U16_LENGTH(c);
|
||||
}
|
||||
} else {
|
||||
/* string */
|
||||
if((destIndex+length)<=destCapacity) {
|
||||
while(length>0) {
|
||||
dest[destIndex++]=*s++;
|
||||
--length;
|
||||
}
|
||||
} else {
|
||||
/* overflow */
|
||||
destIndex+=length;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
/* preflight */
|
||||
if(length<0) {
|
||||
destIndex+=U16_LENGTH(c);
|
||||
} else {
|
||||
destIndex+=length;
|
||||
}
|
||||
}
|
||||
return destIndex;
|
||||
}
|
||||
|
||||
static UChar32 U_CALLCONV
|
||||
utf16_caseContextIterator(void *context, int8_t dir) {
|
||||
UCaseContext *csc=(UCaseContext *)context;
|
||||
UChar32 c;
|
||||
|
||||
if(dir<0) {
|
||||
/* reset for backward iteration */
|
||||
csc->index=csc->cpStart;
|
||||
csc->dir=dir;
|
||||
} else if(dir>0) {
|
||||
/* reset for forward iteration */
|
||||
csc->index=csc->cpLimit;
|
||||
csc->dir=dir;
|
||||
} else {
|
||||
/* continue current iteration direction */
|
||||
dir=csc->dir;
|
||||
}
|
||||
|
||||
if(dir<0) {
|
||||
if(csc->start<csc->index) {
|
||||
U16_PREV((const UChar *)csc->p, csc->start, csc->index, c);
|
||||
return c;
|
||||
}
|
||||
} else {
|
||||
if(csc->index<csc->limit) {
|
||||
U16_NEXT((const UChar *)csc->p, csc->index, csc->limit, c);
|
||||
return c;
|
||||
}
|
||||
}
|
||||
return U_SENTINEL;
|
||||
}
|
||||
|
||||
typedef int32_t U_CALLCONV
|
||||
UCaseMapFull(const UCaseProps *csp, UChar32 c,
|
||||
UCaseContextIterator *iter, void *context,
|
||||
const UChar **pString,
|
||||
const char *locale, int32_t *locCache);
|
||||
|
||||
/*
|
||||
* Lowercases [srcStart..srcLimit[ but takes
|
||||
* context [0..srcLength[ into account.
|
||||
*/
|
||||
static int32_t
|
||||
_caseMap(UCaseProps *csp, UCaseMapFull *map,
|
||||
UChar *dest, int32_t destCapacity,
|
||||
const UChar *src, UCaseContext *csc,
|
||||
int32_t srcStart, int32_t srcLimit,
|
||||
const char *locale, int32_t *locCache,
|
||||
UErrorCode *pErrorCode) {
|
||||
const UChar *s;
|
||||
UChar32 c;
|
||||
int32_t srcIndex, destIndex;
|
||||
|
||||
/* case mapping loop */
|
||||
srcIndex=srcStart;
|
||||
destIndex=0;
|
||||
while(srcIndex<srcLimit) {
|
||||
csc->cpStart=srcIndex;
|
||||
U16_NEXT(src, srcIndex, srcLimit, c);
|
||||
csc->cpLimit=srcIndex;
|
||||
c=map(csp, c, utf16_caseContextIterator, csc, &s, locale, locCache);
|
||||
destIndex=appendResult(dest, destIndex, destCapacity, c, s);
|
||||
}
|
||||
|
||||
if(destIndex>destCapacity) {
|
||||
*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
|
||||
}
|
||||
return destIndex;
|
||||
}
|
||||
|
||||
#if !UCONFIG_NO_BREAK_ITERATION
|
||||
|
||||
/*
|
||||
* Internal titlecasing function,
|
||||
* using u_internalStrToLower() and u_internalToTitle().
|
||||
* Internal titlecasing function.
|
||||
*
|
||||
* Must get titleIter!=NULL.
|
||||
*/
|
||||
U_CFUNC int32_t
|
||||
u_internalStrToTitle(UChar *dest, int32_t destCapacity,
|
||||
const UChar *src, int32_t srcLength,
|
||||
UBreakIterator *titleIter,
|
||||
const char *locale,
|
||||
UErrorCode *pErrorCode) {
|
||||
UCharIterator iter;
|
||||
static int32_t
|
||||
_toTitle(UCaseProps *csp,
|
||||
UChar *dest, int32_t destCapacity,
|
||||
const UChar *src, UCaseContext *csc,
|
||||
int32_t srcLength,
|
||||
UBreakIterator *titleIter,
|
||||
const char *locale, int32_t *locCache,
|
||||
UErrorCode *pErrorCode) {
|
||||
const UChar *s;
|
||||
UChar32 c;
|
||||
int32_t prev, index, destIndex, length;
|
||||
int32_t prev, index, destIndex;
|
||||
UBool isFirstIndex;
|
||||
|
||||
/* set up local variables */
|
||||
uiter_setString(&iter, src, srcLength);
|
||||
destIndex=0;
|
||||
prev=0;
|
||||
isFirstIndex=TRUE;
|
||||
|
@ -67,20 +191,14 @@ u_internalStrToTitle(UChar *dest, int32_t destCapacity,
|
|||
|
||||
/* lowercase [prev..index[ */
|
||||
if(prev<index) {
|
||||
if(destIndex<destCapacity) {
|
||||
length=u_internalStrToLower(dest+destIndex, destCapacity-destIndex,
|
||||
src, srcLength,
|
||||
prev, index,
|
||||
locale,
|
||||
pErrorCode);
|
||||
} else {
|
||||
length=u_internalStrToLower(NULL, 0,
|
||||
src, srcLength,
|
||||
prev, index,
|
||||
locale,
|
||||
pErrorCode);
|
||||
}
|
||||
destIndex+=length;
|
||||
destIndex+=
|
||||
_caseMap(
|
||||
csp, ucase_toFullLower,
|
||||
dest+destIndex, destCapacity-destIndex,
|
||||
src, csc,
|
||||
prev, index,
|
||||
locale, locCache,
|
||||
pErrorCode);
|
||||
}
|
||||
|
||||
if(index>=srcLength) {
|
||||
|
@ -88,28 +206,108 @@ u_internalStrToTitle(UChar *dest, int32_t destCapacity,
|
|||
}
|
||||
|
||||
/* titlecase the character at the found index */
|
||||
UTF_NEXT_CHAR(src, index, srcLength, c);
|
||||
iter.move(&iter, index, UITER_ZERO);
|
||||
if(destIndex<destCapacity) {
|
||||
length=u_internalToTitle(c, &iter,
|
||||
dest+destIndex, destCapacity-destIndex,
|
||||
locale);
|
||||
} else {
|
||||
length=u_internalToTitle(c, &iter, NULL, 0, locale);
|
||||
}
|
||||
if(length<0) {
|
||||
length=-length;
|
||||
}
|
||||
destIndex+=length;
|
||||
csc->cpStart=index;
|
||||
U16_NEXT(src, index, srcLength, c);
|
||||
csc->cpLimit=index;
|
||||
c=ucase_toFullTitle(csp, c, utf16_caseContextIterator, csc, &s, locale, locCache);
|
||||
destIndex=appendResult(dest, destIndex, destCapacity, c, s);
|
||||
|
||||
prev=index;
|
||||
}
|
||||
|
||||
if(destIndex>destCapacity) {
|
||||
*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
|
||||
}
|
||||
return destIndex;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
/* functions available in the common library (for unistr_case.cpp) */
|
||||
|
||||
U_CFUNC int32_t
|
||||
ustr_toLower(UCaseProps *csp,
|
||||
UChar *dest, int32_t destCapacity,
|
||||
const UChar *src, int32_t srcLength,
|
||||
const char *locale,
|
||||
UErrorCode *pErrorCode) {
|
||||
UCaseContext csc={ NULL };
|
||||
int32_t locCache;
|
||||
|
||||
csc.p=(void *)src;
|
||||
csc.limit=srcLength;
|
||||
locCache=0;
|
||||
|
||||
return _caseMap(csp, ucase_toFullLower,
|
||||
dest, destCapacity,
|
||||
src, &csc, 0, srcLength,
|
||||
locale, &locCache, pErrorCode);
|
||||
}
|
||||
|
||||
U_CFUNC int32_t
|
||||
ustr_toUpper(UCaseProps *csp,
|
||||
UChar *dest, int32_t destCapacity,
|
||||
const UChar *src, int32_t srcLength,
|
||||
const char *locale,
|
||||
UErrorCode *pErrorCode) {
|
||||
UCaseContext csc={ NULL };
|
||||
int32_t locCache;
|
||||
|
||||
csc.p=(void *)src;
|
||||
csc.limit=srcLength;
|
||||
locCache=0;
|
||||
|
||||
return _caseMap(csp, ucase_toFullUpper,
|
||||
dest, destCapacity,
|
||||
src, &csc, 0, srcLength,
|
||||
locale, &locCache, pErrorCode);
|
||||
}
|
||||
|
||||
U_CFUNC int32_t
|
||||
ustr_toTitle(UCaseProps *csp,
|
||||
UChar *dest, int32_t destCapacity,
|
||||
const UChar *src, int32_t srcLength,
|
||||
UBreakIterator *titleIter,
|
||||
const char *locale,
|
||||
UErrorCode *pErrorCode) {
|
||||
UCaseContext csc={ NULL };
|
||||
int32_t locCache;
|
||||
|
||||
csc.p=(void *)src;
|
||||
csc.limit=srcLength;
|
||||
locCache=0;
|
||||
|
||||
return _toTitle(csp,
|
||||
dest, destCapacity,
|
||||
src, &csc, srcLength,
|
||||
titleIter, locale, &locCache, pErrorCode);
|
||||
}
|
||||
|
||||
U_CFUNC int32_t
|
||||
ustr_foldCase(UCaseProps *csp,
|
||||
UChar *dest, int32_t destCapacity,
|
||||
const UChar *src, int32_t srcLength,
|
||||
uint32_t options,
|
||||
UErrorCode *pErrorCode) {
|
||||
int32_t srcIndex, destIndex;
|
||||
|
||||
const UChar *s;
|
||||
UChar32 c;
|
||||
|
||||
/* case mapping loop */
|
||||
srcIndex=destIndex=0;
|
||||
while(srcIndex<srcLength) {
|
||||
U16_NEXT(src, srcIndex, srcLength, c);
|
||||
c=ucase_toFullFolding(csp, c, &s, options);
|
||||
destIndex=appendResult(dest, destIndex, destCapacity, c, s);
|
||||
}
|
||||
|
||||
if(destIndex>destCapacity) {
|
||||
*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
|
||||
}
|
||||
return destIndex;
|
||||
}
|
||||
|
||||
/*
|
||||
* Implement argument checking and buffer handling
|
||||
* for string case mapping as a common function.
|
||||
|
@ -121,16 +319,21 @@ enum {
|
|||
FOLD_CASE
|
||||
};
|
||||
|
||||
/* common internal function for public API functions */
|
||||
|
||||
static int32_t
|
||||
u_strCaseMap(UChar *dest, int32_t destCapacity,
|
||||
const UChar *src, int32_t srcLength,
|
||||
UBreakIterator *titleIter,
|
||||
const char *locale,
|
||||
uint32_t options,
|
||||
int32_t toWhichCase,
|
||||
UErrorCode *pErrorCode) {
|
||||
caseMap(UChar *dest, int32_t destCapacity,
|
||||
const UChar *src, int32_t srcLength,
|
||||
UBreakIterator *titleIter,
|
||||
const char *locale,
|
||||
uint32_t options,
|
||||
int32_t toWhichCase,
|
||||
UErrorCode *pErrorCode) {
|
||||
UChar buffer[300];
|
||||
UChar *temp;
|
||||
|
||||
UCaseProps *csp;
|
||||
|
||||
int32_t destLength;
|
||||
UBool ownTitleIter;
|
||||
|
||||
|
@ -147,6 +350,11 @@ u_strCaseMap(UChar *dest, int32_t destCapacity,
|
|||
return 0;
|
||||
}
|
||||
|
||||
csp=ucase_getSingleton(pErrorCode);
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* get the string length */
|
||||
if(srcLength==-1) {
|
||||
srcLength=u_strlen(src);
|
||||
|
@ -176,30 +384,46 @@ u_strCaseMap(UChar *dest, int32_t destCapacity,
|
|||
ownTitleIter=FALSE;
|
||||
destLength=0;
|
||||
|
||||
if(toWhichCase==TO_LOWER) {
|
||||
destLength=u_internalStrToLower(temp, destCapacity,
|
||||
src, srcLength,
|
||||
0, srcLength,
|
||||
locale, pErrorCode);
|
||||
} else if(toWhichCase==TO_UPPER) {
|
||||
destLength=u_internalStrToUpper(temp, destCapacity, src, srcLength,
|
||||
locale, pErrorCode);
|
||||
#if !UCONFIG_NO_BREAK_ITERATION
|
||||
} else if(toWhichCase==TO_TITLE) {
|
||||
if(titleIter==NULL) {
|
||||
titleIter=ubrk_open(UBRK_WORD, locale,
|
||||
src, srcLength,
|
||||
pErrorCode);
|
||||
ownTitleIter=(UBool)U_SUCCESS(*pErrorCode);
|
||||
}
|
||||
if(U_SUCCESS(*pErrorCode)) {
|
||||
destLength=u_internalStrToTitle(temp, destCapacity, src, srcLength,
|
||||
titleIter, locale, pErrorCode);
|
||||
}
|
||||
#endif
|
||||
if(toWhichCase==FOLD_CASE) {
|
||||
destLength=ustr_foldCase(csp, temp, destCapacity, src, srcLength,
|
||||
options, pErrorCode);
|
||||
} else {
|
||||
destLength=u_internalStrFoldCase(temp, destCapacity, src, srcLength,
|
||||
options, pErrorCode);
|
||||
UCaseContext csc={ NULL };
|
||||
int32_t locCache;
|
||||
|
||||
csc.p=(void *)src;
|
||||
csc.limit=srcLength;
|
||||
locCache=0;
|
||||
|
||||
if(toWhichCase==TO_LOWER) {
|
||||
destLength=_caseMap(csp, ucase_toFullLower,
|
||||
temp, destCapacity,
|
||||
src, &csc,
|
||||
0, srcLength,
|
||||
locale, &locCache, pErrorCode);
|
||||
} else if(toWhichCase==TO_UPPER) {
|
||||
destLength=_caseMap(csp, ucase_toFullUpper,
|
||||
temp, destCapacity,
|
||||
src, &csc,
|
||||
0, srcLength,
|
||||
locale, &locCache, pErrorCode);
|
||||
} else /* if(toWhichCase==TO_TITLE) */ {
|
||||
#if UCONFIG_NO_BREAK_ITERATION
|
||||
*pErrorCode=U_UNSUPPORTED_ERROR;
|
||||
#else
|
||||
if(titleIter==NULL) {
|
||||
titleIter=ubrk_open(UBRK_WORD, locale,
|
||||
src, srcLength,
|
||||
pErrorCode);
|
||||
ownTitleIter=(UBool)U_SUCCESS(*pErrorCode);
|
||||
}
|
||||
if(U_SUCCESS(*pErrorCode)) {
|
||||
destLength=_toTitle(csp, temp, destCapacity,
|
||||
src, &csc, srcLength,
|
||||
titleIter, locale, &locCache, pErrorCode);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
if(temp!=dest) {
|
||||
/* copy the result string to the destination buffer */
|
||||
|
@ -223,15 +447,17 @@ u_strCaseMap(UChar *dest, int32_t destCapacity,
|
|||
return u_terminateUChars(dest, destCapacity, destLength, pErrorCode);
|
||||
}
|
||||
|
||||
/* public API functions */
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
u_strToLower(UChar *dest, int32_t destCapacity,
|
||||
const UChar *src, int32_t srcLength,
|
||||
const char *locale,
|
||||
UErrorCode *pErrorCode) {
|
||||
return u_strCaseMap(dest, destCapacity,
|
||||
src, srcLength,
|
||||
NULL, locale, 0,
|
||||
TO_LOWER, pErrorCode);
|
||||
return caseMap(dest, destCapacity,
|
||||
src, srcLength,
|
||||
NULL, locale, 0,
|
||||
TO_LOWER, pErrorCode);
|
||||
}
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
|
@ -239,33 +465,37 @@ u_strToUpper(UChar *dest, int32_t destCapacity,
|
|||
const UChar *src, int32_t srcLength,
|
||||
const char *locale,
|
||||
UErrorCode *pErrorCode) {
|
||||
return u_strCaseMap(dest, destCapacity,
|
||||
src, srcLength,
|
||||
NULL, locale, 0,
|
||||
TO_UPPER, pErrorCode);
|
||||
return caseMap(dest, destCapacity,
|
||||
src, srcLength,
|
||||
NULL, locale, 0,
|
||||
TO_UPPER, pErrorCode);
|
||||
}
|
||||
|
||||
#if !UCONFIG_NO_BREAK_ITERATION
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
u_strToTitle(UChar *dest, int32_t destCapacity,
|
||||
const UChar *src, int32_t srcLength,
|
||||
UBreakIterator *titleIter,
|
||||
const char *locale,
|
||||
UErrorCode *pErrorCode) {
|
||||
return u_strCaseMap(dest, destCapacity,
|
||||
src, srcLength,
|
||||
titleIter, locale, 0,
|
||||
TO_TITLE, pErrorCode);
|
||||
return caseMap(dest, destCapacity,
|
||||
src, srcLength,
|
||||
titleIter, locale, 0,
|
||||
TO_TITLE, pErrorCode);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
u_strFoldCase(UChar *dest, int32_t destCapacity,
|
||||
const UChar *src, int32_t srcLength,
|
||||
uint32_t options,
|
||||
UErrorCode *pErrorCode) {
|
||||
return u_strCaseMap(dest, destCapacity,
|
||||
src, srcLength,
|
||||
NULL, NULL, options,
|
||||
FOLD_CASE, pErrorCode);
|
||||
return caseMap(dest, destCapacity,
|
||||
src, srcLength,
|
||||
NULL, NULL, options,
|
||||
FOLD_CASE, pErrorCode);
|
||||
}
|
||||
|
||||
/* case-insensitive string comparisons */
|
||||
|
|
|
@ -70,7 +70,7 @@ strmatch.o usearch.o search.o stsearch.o \
|
|||
translit.o utrans.o esctrn.o unesctrn.o \
|
||||
funcrepl.o strrepl.o tridpars.o \
|
||||
cpdtrans.o rbt.o rbt_data.o rbt_pars.o rbt_rule.o rbt_set.o \
|
||||
nultrans.o remtrans.o titletrn.o tolowtrn.o toupptrn.o anytrans.o \
|
||||
nultrans.o remtrans.o casetrn.o titletrn.o tolowtrn.o toupptrn.o anytrans.o \
|
||||
name2uni.o uni2name.o nortrans.o quant.o transreg.o \
|
||||
regexcmp.o rematch.o repattrn.o regexst.o uregex.o ulocdata.o \
|
||||
measfmt.o currfmt.o curramt.o currunit.o measure.o
|
||||
|
|
193
icu4c/source/i18n/casetrn.cpp
Normal file
193
icu4c/source/i18n/casetrn.cpp
Normal file
|
@ -0,0 +1,193 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2001-2004, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
* file name: casetrn.cpp
|
||||
* encoding: US-ASCII
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 2004sep03
|
||||
* created by: Markus W. Scherer
|
||||
*
|
||||
* Implementation class for lower-/upper-/title-casing transliterators.
|
||||
*/
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if !UCONFIG_NO_TRANSLITERATION
|
||||
|
||||
#include "unicode/uchar.h"
|
||||
#include "unicode/ustring.h"
|
||||
#include "tolowtrn.h"
|
||||
#include "ucase.h"
|
||||
#include "cpputils.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CaseMapTransliterator)
|
||||
|
||||
/**
|
||||
* Constructs a transliterator.
|
||||
*/
|
||||
CaseMapTransliterator::CaseMapTransliterator(const Locale &loc, const UnicodeString &id, UCaseMapFull *map) :
|
||||
Transliterator(id, 0),
|
||||
fLoc(loc), fLocName(NULL),
|
||||
fCsp(NULL),
|
||||
fMap(map)
|
||||
{
|
||||
UErrorCode errorCode = U_ZERO_ERROR;
|
||||
fCsp = ucase_getSingleton(&errorCode); // expect to get NULL if failure
|
||||
fLocName=fLoc.getName();
|
||||
|
||||
// TODO test incremental mode with context-sensitive text (e.g. greek sigma)
|
||||
// TODO need to call setMaximumContextLength()?!
|
||||
}
|
||||
|
||||
/**
|
||||
* Destructor.
|
||||
*/
|
||||
CaseMapTransliterator::~CaseMapTransliterator() {
|
||||
}
|
||||
|
||||
/**
|
||||
* Copy constructor.
|
||||
*/
|
||||
CaseMapTransliterator::CaseMapTransliterator(const CaseMapTransliterator& o) :
|
||||
Transliterator(o),
|
||||
fLoc(o.fLoc), fLocName(NULL), fCsp(o.fCsp), fMap(o.fMap)
|
||||
{
|
||||
fLocName=fLoc.getName();
|
||||
}
|
||||
|
||||
/**
|
||||
* Assignment operator.
|
||||
*/
|
||||
CaseMapTransliterator& CaseMapTransliterator::operator=(const CaseMapTransliterator& o) {
|
||||
Transliterator::operator=(o);
|
||||
fLoc = o.fLoc;
|
||||
fLocName = fLoc.getName();
|
||||
fCsp = o.fCsp;
|
||||
fMap = o.fMap;
|
||||
return *this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Transliterator API.
|
||||
*/
|
||||
Transliterator* CaseMapTransliterator::clone(void) const {
|
||||
return new CaseMapTransliterator(*this);
|
||||
}
|
||||
|
||||
/* case context iterator using a Replaceable */
|
||||
UChar32 U_CALLCONV
|
||||
CaseMapTransliterator::rep_caseContextIterator(void *context, int8_t dir) {
|
||||
UCaseContext *csc=(UCaseContext *)context;
|
||||
Replaceable *rep=(Replaceable *)csc->p;
|
||||
UChar32 c;
|
||||
|
||||
if(dir<0) {
|
||||
/* reset for backward iteration */
|
||||
csc->index=csc->cpStart;
|
||||
csc->dir=dir;
|
||||
} else if(dir>0) {
|
||||
/* reset for forward iteration */
|
||||
csc->index=csc->cpLimit;
|
||||
csc->dir=dir;
|
||||
} else {
|
||||
/* continue current iteration direction */
|
||||
dir=csc->dir;
|
||||
}
|
||||
|
||||
// automatically adjust start and limit if the Replaceable disagrees
|
||||
// with the original values
|
||||
if(dir<0) {
|
||||
if(csc->start<csc->index) {
|
||||
c=rep->char32At(csc->index-1);
|
||||
if(c<0) {
|
||||
csc->start=csc->index;
|
||||
} else {
|
||||
csc->index-=U16_LENGTH(c);
|
||||
return c;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// detect, and store in csc->b1, if we hit the limit
|
||||
if(csc->index<csc->limit) {
|
||||
c=rep->char32At(csc->index);
|
||||
if(c<0) {
|
||||
csc->limit=csc->index;
|
||||
csc->b1=TRUE;
|
||||
} else {
|
||||
csc->index+=U16_LENGTH(c);
|
||||
return c;
|
||||
}
|
||||
} else {
|
||||
csc->b1=TRUE;
|
||||
}
|
||||
}
|
||||
return U_SENTINEL;
|
||||
}
|
||||
|
||||
/**
|
||||
* Implements {@link Transliterator#handleTransliterate}.
|
||||
*/
|
||||
void CaseMapTransliterator::handleTransliterate(Replaceable& text,
|
||||
UTransPosition& offsets,
|
||||
UBool isIncremental) const
|
||||
{
|
||||
if (offsets.start >= offsets.limit) {
|
||||
return;
|
||||
}
|
||||
|
||||
UCaseContext csc={ &text };
|
||||
csc.start = offsets.contextStart;
|
||||
csc.limit = offsets.contextLimit;
|
||||
|
||||
UnicodeString tmp;
|
||||
const UChar *s;
|
||||
UChar32 c;
|
||||
int32_t textPos, delta, result, locCache=0;
|
||||
|
||||
for(textPos=offsets.start; textPos<offsets.limit;) {
|
||||
csc.cpStart=textPos;
|
||||
c=text.char32At(textPos);
|
||||
csc.cpLimit=textPos+=U16_LENGTH(c);
|
||||
|
||||
result=fMap(fCsp, c, rep_caseContextIterator, &csc, &s, fLocName, &locCache);
|
||||
|
||||
if(csc.b1 && isIncremental) {
|
||||
// fMap() tried to look beyond the context limit
|
||||
// wait for more input
|
||||
break;
|
||||
}
|
||||
|
||||
if(result>=0) {
|
||||
// replace the current code point with its full case mapping result
|
||||
// see UCASE_MAX_STRING_LENGTH
|
||||
if(result<=UCASE_MAX_STRING_LENGTH) {
|
||||
// string s[result]
|
||||
tmp.setTo(FALSE, s, result);
|
||||
delta=result-U16_LENGTH(c);
|
||||
} else {
|
||||
// single code point
|
||||
tmp.setTo(result);
|
||||
delta=tmp.length()-U16_LENGTH(c);
|
||||
}
|
||||
text.handleReplaceBetween(csc.cpStart, textPos, tmp);
|
||||
if(delta!=0) {
|
||||
textPos+=delta;
|
||||
csc.limit=offsets.contextLimit+=delta;
|
||||
offsets.limit+=delta;
|
||||
}
|
||||
}
|
||||
}
|
||||
offsets.start=textPos;
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif /* #if !UCONFIG_NO_TRANSLITERATION */
|
114
icu4c/source/i18n/casetrn.h
Normal file
114
icu4c/source/i18n/casetrn.h
Normal file
|
@ -0,0 +1,114 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2001-2004, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
* file name: casetrn.h
|
||||
* encoding: US-ASCII
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 2004sep03
|
||||
* created by: Markus W. Scherer
|
||||
*
|
||||
* Implementation class for lower-/upper-/title-casing transliterators.
|
||||
*/
|
||||
|
||||
#ifndef __CASETRN_H__
|
||||
#define __CASETRN_H__
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if !UCONFIG_NO_TRANSLITERATION
|
||||
|
||||
#include "unicode/translit.h"
|
||||
#include "unicode/locid.h"
|
||||
#include "ucase.h"
|
||||
|
||||
U_CDECL_BEGIN
|
||||
|
||||
typedef int32_t U_CALLCONV
|
||||
UCaseMapFull(const UCaseProps *csp, UChar32 c,
|
||||
UCaseContextIterator *iter, void *context,
|
||||
const UChar **pString,
|
||||
const char *locale, int32_t *locCache);
|
||||
|
||||
U_CDECL_END
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
/**
|
||||
* A transliterator that performs locale-sensitive
|
||||
* case mapping.
|
||||
*/
|
||||
class U_I18N_API CaseMapTransliterator : public Transliterator {
|
||||
public:
|
||||
/**
|
||||
* Constructs a transliterator.
|
||||
* @param loc the given locale.
|
||||
* @param id the transliterator ID.
|
||||
* @param map the full case mapping function (see ucase.h)
|
||||
*/
|
||||
CaseMapTransliterator(const Locale &loc, const UnicodeString &id, UCaseMapFull *map);
|
||||
|
||||
/**
|
||||
* Destructor.
|
||||
*/
|
||||
virtual ~CaseMapTransliterator();
|
||||
|
||||
/**
|
||||
* Copy constructor.
|
||||
*/
|
||||
CaseMapTransliterator(const CaseMapTransliterator&);
|
||||
|
||||
/**
|
||||
* Assignment operator.
|
||||
*/
|
||||
CaseMapTransliterator& operator=(const CaseMapTransliterator&);
|
||||
|
||||
/**
|
||||
* Transliterator API.
|
||||
* @return a copy of the object.
|
||||
*/
|
||||
virtual Transliterator* clone(void) const;
|
||||
|
||||
/**
|
||||
* ICU "poor man's RTTI", returns a UClassID for the actual class.
|
||||
*/
|
||||
virtual UClassID getDynamicClassID() const;
|
||||
|
||||
/**
|
||||
* ICU "poor man's RTTI", returns a UClassID for this class.
|
||||
*/
|
||||
static UClassID U_EXPORT2 getStaticClassID();
|
||||
|
||||
protected:
|
||||
/**
|
||||
* Implements {@link Transliterator#handleTransliterate}.
|
||||
* @param text the buffer holding transliterated and
|
||||
* untransliterated text
|
||||
* @param offset the start and limit of the text, the position
|
||||
* of the cursor, and the start and limit of transliteration.
|
||||
* @param incremental if true, assume more text may be coming after
|
||||
* pos.contextLimit. Otherwise, assume the text is complete.
|
||||
*/
|
||||
virtual void handleTransliterate(Replaceable& text,
|
||||
UTransPosition& offsets,
|
||||
UBool isIncremental) const;
|
||||
|
||||
/** case context iterator using a Replaceable */
|
||||
static UChar32 U_CALLCONV rep_caseContextIterator(void *context, int8_t dir);
|
||||
|
||||
Locale fLoc;
|
||||
const char *fLocName;
|
||||
UCaseProps *fCsp;
|
||||
UCaseMapFull *fMap;
|
||||
};
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif /* #if !UCONFIG_NO_TRANSLITERATION */
|
||||
|
||||
#endif
|
|
@ -2225,6 +2225,14 @@ SOURCE=.\anytrans.h
|
|||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\casetrn.cpp
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\casetrn.h
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\cpdtrans.cpp
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
|
|
@ -1092,6 +1092,12 @@
|
|||
<File
|
||||
RelativePath=".\anytrans.h">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\casetrn.cpp">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\casetrn.h">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\cpdtrans.cpp">
|
||||
</File>
|
||||
|
|
|
@ -17,84 +17,32 @@
|
|||
#include "unicode/ustring.h"
|
||||
#include "titletrn.h"
|
||||
#include "umutex.h"
|
||||
#include "ucln_in.h"
|
||||
#include "ustr_imp.h"
|
||||
#include "ucase.h"
|
||||
#include "cpputils.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(TitlecaseTransliterator)
|
||||
|
||||
/**
|
||||
* ID for this transliterator.
|
||||
*/
|
||||
static const char CURR_ID[] = "Any-Title";
|
||||
|
||||
/**
|
||||
* The set of characters we skip. These are neither cased nor
|
||||
* non-cased, to us; we copy them verbatim. INVARIANT: Either SKIP
|
||||
* and CASED are both NULL, or neither is NULL.
|
||||
*/
|
||||
static UnicodeSet* SKIP = NULL;
|
||||
|
||||
/**
|
||||
* The set of characters that cause the next non-SKIP character to be
|
||||
* lowercased. INVARIANT: Either SKIP and CASED are both NULL, or
|
||||
* neither is NULL.
|
||||
*/
|
||||
static UnicodeSet* CASED = NULL;
|
||||
|
||||
TitlecaseTransliterator::TitlecaseTransliterator(const Locale& theLoc) :
|
||||
Transliterator(UnicodeString(CURR_ID, ""), 0),
|
||||
loc(theLoc),
|
||||
buffer(0)
|
||||
CaseMapTransliterator(theLoc, UNICODE_STRING("Any-Title", 9), NULL)
|
||||
{
|
||||
buffer = (UChar *)uprv_malloc(u_getMaxCaseExpansion()*sizeof(buffer[0]));
|
||||
// Need to look back 2 characters in the case of "can't"
|
||||
setMaximumContextLength(2);
|
||||
|
||||
umtx_lock(NULL);
|
||||
UBool f = (SKIP == NULL);
|
||||
umtx_unlock(NULL);
|
||||
|
||||
if (f) {
|
||||
UErrorCode ec = U_ZERO_ERROR;
|
||||
UnicodeSet* skip =
|
||||
new UnicodeSet(UNICODE_STRING_SIMPLE("[\\u00AD \\u2019 \\' [:Mn:] [:Me:] [:Cf:] [:Lm:] [:Sk:]]"), ec);
|
||||
UnicodeSet* cased =
|
||||
new UnicodeSet(UNICODE_STRING_SIMPLE("[[:Lu:] [:Ll:] [:Lt:]]"), ec);
|
||||
if (skip != NULL && cased != NULL && U_SUCCESS(ec)) {
|
||||
umtx_lock(NULL);
|
||||
if (SKIP == NULL) {
|
||||
SKIP = skip;
|
||||
CASED = cased;
|
||||
skip = cased = NULL;
|
||||
}
|
||||
umtx_unlock(NULL);
|
||||
}
|
||||
delete skip;
|
||||
delete cased;
|
||||
ucln_i18n_registerCleanup();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Destructor.
|
||||
*/
|
||||
TitlecaseTransliterator::~TitlecaseTransliterator() {
|
||||
uprv_free(buffer);
|
||||
}
|
||||
|
||||
/**
|
||||
* Copy constructor.
|
||||
*/
|
||||
TitlecaseTransliterator::TitlecaseTransliterator(const TitlecaseTransliterator& o) :
|
||||
Transliterator(o),
|
||||
loc(o.loc),
|
||||
buffer(0)
|
||||
CaseMapTransliterator(o)
|
||||
{
|
||||
buffer = (UChar *)uprv_malloc(u_getMaxCaseExpansion()*sizeof(buffer[0]));
|
||||
uprv_arrayCopy(o.buffer, 0, this->buffer, 0, u_getMaxCaseExpansion());
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -102,9 +50,7 @@ TitlecaseTransliterator::TitlecaseTransliterator(const TitlecaseTransliterator&
|
|||
*/
|
||||
TitlecaseTransliterator& TitlecaseTransliterator::operator=(
|
||||
const TitlecaseTransliterator& o) {
|
||||
Transliterator::operator=(o);
|
||||
loc = o.loc;
|
||||
uprv_arrayCopy(o.buffer, 0, this->buffer, 0, u_getMaxCaseExpansion());
|
||||
CaseMapTransliterator::operator=(o);
|
||||
return *this;
|
||||
}
|
||||
|
||||
|
@ -120,91 +66,97 @@ Transliterator* TitlecaseTransliterator::clone(void) const {
|
|||
*/
|
||||
void TitlecaseTransliterator::handleTransliterate(
|
||||
Replaceable& text, UTransPosition& offsets,
|
||||
UBool /*isIncremental*/) const
|
||||
UBool isIncremental) const
|
||||
{
|
||||
/* TODO: Verify that isIncremental can be ignored */
|
||||
if (SKIP == NULL) {
|
||||
// TODO reimplement, see ustrcase.c
|
||||
// using a real word break iterator
|
||||
// instead of just looking for a transition between cased and uncased characters
|
||||
// call CaseMapTransliterator::handleTransliterate() for lowercasing? (set fMap)
|
||||
// needs to take isIncremental into account because case mappings are context-sensitive
|
||||
// also detect when lowercasing function did not finish because of context
|
||||
|
||||
if (offsets.start >= offsets.limit) {
|
||||
return;
|
||||
}
|
||||
|
||||
// case type: >0 cased (UCASE_LOWER etc.) ==0 uncased <0 case-ignorable
|
||||
int32_t type;
|
||||
|
||||
// Our mode; we are either converting letter toTitle or
|
||||
// toLower.
|
||||
UBool doTitle = TRUE;
|
||||
|
||||
// Determine if there is a preceding context of CASED SKIP*,
|
||||
// Determine if there is a preceding context of cased case-ignorable*,
|
||||
// in which case we want to start in toLower mode. If the
|
||||
// prior context is anything else (including empty) then start
|
||||
// in toTitle mode.
|
||||
UChar32 c;
|
||||
int32_t start;
|
||||
for (start = offsets.start - 1; start >= offsets.contextStart; start -= UTF_CHAR_LENGTH(c)) {
|
||||
for (start = offsets.start - 1; start >= offsets.contextStart; start -= U16_LENGTH(c)) {
|
||||
c = text.char32At(start);
|
||||
if (SKIP->contains(c)) {
|
||||
continue;
|
||||
type=ucase_getTypeOrIgnorable(fCsp, c);
|
||||
if(type>0) { // cased
|
||||
doTitle=FALSE;
|
||||
break;
|
||||
} else if(type==0) { // uncased but not ignorable
|
||||
break;
|
||||
}
|
||||
doTitle = !CASED->contains(c);
|
||||
break;
|
||||
// else (type<0) case-ignorable: continue
|
||||
}
|
||||
|
||||
// Convert things after a CASED character toLower; things
|
||||
// after a non-CASED, non-SKIP character toTitle. SKIP
|
||||
// Convert things after a cased character toLower; things
|
||||
// after an uncased, non-case-ignorable character toTitle. Case-ignorable
|
||||
// characters are copied directly and do not change the mode.
|
||||
int32_t textPos = offsets.start;
|
||||
if (textPos >= offsets.limit) return;
|
||||
UCaseContext csc={ &text };
|
||||
csc.start = offsets.contextStart;
|
||||
csc.limit = offsets.contextLimit;
|
||||
|
||||
UnicodeString original;
|
||||
text.extractBetween(offsets.contextStart, offsets.contextLimit, original);
|
||||
UnicodeString tmp;
|
||||
const UChar *s;
|
||||
int32_t textPos, delta, result, locCache=0;
|
||||
|
||||
UCharIterator iter;
|
||||
uiter_setReplaceable(&iter, &text);
|
||||
iter.start = offsets.contextStart;
|
||||
iter.limit = offsets.contextLimit;
|
||||
for(textPos=offsets.start; textPos<offsets.limit;) {
|
||||
csc.cpStart=textPos;
|
||||
c=text.char32At(textPos);
|
||||
csc.cpLimit=textPos+=U16_LENGTH(c);
|
||||
|
||||
// Walk through original string
|
||||
// If there is a case change, modify corresponding position in replaceable
|
||||
|
||||
int32_t i = textPos - offsets.contextStart;
|
||||
int32_t limit = offsets.limit - offsets.contextStart;
|
||||
UChar32 cp;
|
||||
int32_t oldLen;
|
||||
int32_t newLen;
|
||||
|
||||
for (; i < limit; ) {
|
||||
UTF_GET_CHAR(original.getBuffer(), 0, i, original.length(), cp);
|
||||
oldLen = UTF_CHAR_LENGTH(cp);
|
||||
i += oldLen;
|
||||
iter.index = i; // Point _past_ current char
|
||||
if (!SKIP->contains(cp)) {
|
||||
if (doTitle) {
|
||||
newLen = u_internalToTitle(cp, &iter, buffer, u_getMaxCaseExpansion(), loc.getName());
|
||||
type=ucase_getTypeOrIgnorable(fCsp, c);
|
||||
if(type>=0) { // not case-ignorable
|
||||
if(doTitle) {
|
||||
result=ucase_toFullTitle(fCsp, c, rep_caseContextIterator, &csc, &s, fLocName, &locCache);
|
||||
} else {
|
||||
newLen = u_internalToLower(cp, &iter, buffer, u_getMaxCaseExpansion(), loc.getName());
|
||||
result=ucase_toFullLower(fCsp, c, rep_caseContextIterator, &csc, &s, fLocName, &locCache);
|
||||
}
|
||||
doTitle = !CASED->contains(cp);
|
||||
if (newLen >= 0) {
|
||||
UnicodeString temp(buffer, newLen);
|
||||
text.handleReplaceBetween(textPos, textPos + oldLen, temp);
|
||||
if (newLen != oldLen) {
|
||||
textPos += newLen;
|
||||
offsets.limit += newLen - oldLen;
|
||||
offsets.contextLimit += newLen - oldLen;
|
||||
continue;
|
||||
doTitle = (UBool)(type==0); // doTitle=isUncased
|
||||
|
||||
if(csc.b1 && isIncremental) {
|
||||
// fMap() tried to look beyond the context limit
|
||||
// wait for more input
|
||||
break;
|
||||
}
|
||||
|
||||
if(result>=0) {
|
||||
// replace the current code point with its full case mapping result
|
||||
// see UCASE_MAX_STRING_LENGTH
|
||||
if(result<=UCASE_MAX_STRING_LENGTH) {
|
||||
// string s[result]
|
||||
tmp.setTo(FALSE, s, result);
|
||||
delta=result-U16_LENGTH(c);
|
||||
} else {
|
||||
// single code point
|
||||
tmp.setTo(result);
|
||||
delta=tmp.length()-U16_LENGTH(c);
|
||||
}
|
||||
text.handleReplaceBetween(csc.cpStart, textPos, tmp);
|
||||
if(delta!=0) {
|
||||
textPos+=delta;
|
||||
csc.limit=offsets.contextLimit+=delta;
|
||||
offsets.limit+=delta;
|
||||
}
|
||||
}
|
||||
}
|
||||
textPos += oldLen;
|
||||
}
|
||||
offsets.start = offsets.limit;
|
||||
}
|
||||
|
||||
/**
|
||||
* Static memory cleanup function.
|
||||
*/
|
||||
void TitlecaseTransliterator::cleanup() {
|
||||
if (SKIP != NULL) {
|
||||
delete SKIP; SKIP = NULL;
|
||||
delete CASED; CASED = NULL;
|
||||
}
|
||||
offsets.start=textPos;
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
|
|
@ -16,6 +16,8 @@
|
|||
|
||||
#include "unicode/translit.h"
|
||||
#include "unicode/locid.h"
|
||||
#include "ucase.h"
|
||||
#include "casetrn.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
|
@ -26,7 +28,7 @@ U_NAMESPACE_BEGIN
|
|||
* case using <code>u_totitle()</code>.
|
||||
* @author Alan Liu
|
||||
*/
|
||||
class U_I18N_API TitlecaseTransliterator : public Transliterator {
|
||||
class U_I18N_API TitlecaseTransliterator : public CaseMapTransliterator {
|
||||
public:
|
||||
|
||||
/**
|
||||
|
@ -83,17 +85,6 @@ class U_I18N_API TitlecaseTransliterator : public Transliterator {
|
|||
*/
|
||||
virtual void handleTransliterate(Replaceable& text, UTransPosition& offset,
|
||||
UBool isIncremental) const;
|
||||
|
||||
public:
|
||||
|
||||
/**
|
||||
* Static memory cleanup function. FOR INTERNAL USE ONLY; DO NOT
|
||||
* CALL.
|
||||
*/
|
||||
static void cleanup();
|
||||
private:
|
||||
Locale loc;
|
||||
UChar* buffer;
|
||||
};
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
|
|
@ -26,27 +26,22 @@ UOBJECT_DEFINE_RTTI_IMPLEMENTATION(LowercaseTransliterator)
|
|||
* Constructs a transliterator.
|
||||
*/
|
||||
LowercaseTransliterator::LowercaseTransliterator(const Locale& theLoc) :
|
||||
Transliterator(UNICODE_STRING("Any-Lower", 9), 0),
|
||||
loc(theLoc) , buffer(0)
|
||||
CaseMapTransliterator(theLoc, UNICODE_STRING("Any-Lower", 9), ucase_toFullLower)
|
||||
{
|
||||
buffer = (UChar *)uprv_malloc(u_getMaxCaseExpansion()*sizeof(buffer[0]));
|
||||
}
|
||||
|
||||
/**
|
||||
* Destructor.
|
||||
*/
|
||||
LowercaseTransliterator::~LowercaseTransliterator() {
|
||||
uprv_free(buffer);
|
||||
}
|
||||
|
||||
/**
|
||||
* Copy constructor.
|
||||
*/
|
||||
LowercaseTransliterator::LowercaseTransliterator(const LowercaseTransliterator& o) :
|
||||
Transliterator(o),
|
||||
loc(o.loc), buffer(0)
|
||||
CaseMapTransliterator(o)
|
||||
{
|
||||
buffer = (UChar *)uprv_malloc(u_getMaxCaseExpansion()*sizeof(buffer[0]));
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -54,9 +49,7 @@ LowercaseTransliterator::LowercaseTransliterator(const LowercaseTransliterator&
|
|||
*/
|
||||
LowercaseTransliterator& LowercaseTransliterator::operator=(
|
||||
const LowercaseTransliterator& o) {
|
||||
Transliterator::operator=(o);
|
||||
loc = o.loc;
|
||||
uprv_arrayCopy((const UChar*)o.buffer, 0, this->buffer, 0, u_getMaxCaseExpansion());
|
||||
CaseMapTransliterator::operator=(o);
|
||||
return *this;
|
||||
}
|
||||
|
||||
|
@ -67,55 +60,6 @@ Transliterator* LowercaseTransliterator::clone(void) const {
|
|||
return new LowercaseTransliterator(*this);
|
||||
}
|
||||
|
||||
/**
|
||||
* Implements {@link Transliterator#handleTransliterate}.
|
||||
*/
|
||||
void LowercaseTransliterator::handleTransliterate(Replaceable& text,
|
||||
UTransPosition& offsets,
|
||||
UBool /*isIncremental*/) const
|
||||
{
|
||||
/* TODO: Verify that isIncremental can be ignored */
|
||||
int32_t textPos = offsets.start;
|
||||
if (textPos >= offsets.limit) return;
|
||||
|
||||
// get string for context
|
||||
|
||||
UnicodeString original;
|
||||
text.extractBetween(offsets.contextStart, offsets.contextLimit, original);
|
||||
|
||||
UCharIterator iter;
|
||||
uiter_setReplaceable(&iter, &text);
|
||||
iter.start = offsets.contextStart;
|
||||
iter.limit = offsets.contextLimit;
|
||||
|
||||
// Walk through original string
|
||||
// If there is a case change, modify corresponding position in replaceable
|
||||
|
||||
int32_t i = textPos - offsets.contextStart;
|
||||
int32_t limit = offsets.limit - offsets.contextStart;
|
||||
UChar32 cp;
|
||||
int32_t oldLen;
|
||||
|
||||
for (; i < limit; ) {
|
||||
UTF_GET_CHAR(original.getBuffer(), 0, i, original.length(), cp);
|
||||
oldLen = UTF_CHAR_LENGTH(cp);
|
||||
i += oldLen;
|
||||
iter.index = i; // Point _past_ current char
|
||||
int32_t newLen = u_internalToLower(cp, &iter, buffer, u_getMaxCaseExpansion(), loc.getName());
|
||||
if (newLen >= 0) {
|
||||
UnicodeString temp(buffer, newLen);
|
||||
text.handleReplaceBetween(textPos, textPos + oldLen, temp);
|
||||
if (newLen != oldLen) {
|
||||
textPos += newLen;
|
||||
offsets.limit += newLen - oldLen;
|
||||
offsets.contextLimit += newLen - oldLen;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
textPos += oldLen;
|
||||
}
|
||||
offsets.start = offsets.limit;
|
||||
}
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif /* #if !UCONFIG_NO_TRANSLITERATION */
|
||||
|
|
|
@ -16,6 +16,7 @@
|
|||
|
||||
#include "unicode/translit.h"
|
||||
#include "unicode/locid.h"
|
||||
#include "casetrn.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
|
@ -24,7 +25,7 @@ U_NAMESPACE_BEGIN
|
|||
* case mapping.
|
||||
* @author Alan Liu
|
||||
*/
|
||||
class U_I18N_API LowercaseTransliterator : public Transliterator {
|
||||
class U_I18N_API LowercaseTransliterator : public CaseMapTransliterator {
|
||||
|
||||
public:
|
||||
|
||||
|
@ -68,27 +69,6 @@ class U_I18N_API LowercaseTransliterator : public Transliterator {
|
|||
* @draft ICU 2.2
|
||||
*/
|
||||
static UClassID U_EXPORT2 getStaticClassID();
|
||||
|
||||
protected:
|
||||
|
||||
/**
|
||||
* Implements {@link Transliterator#handleTransliterate}.
|
||||
* @param text the buffer holding transliterated and
|
||||
* untransliterated text
|
||||
* @param offset the start and limit of the text, the position
|
||||
* of the cursor, and the start and limit of transliteration.
|
||||
* @param incremental if true, assume more text may be coming after
|
||||
* pos.contextLimit. Otherwise, assume the text is complete.
|
||||
*/
|
||||
virtual void handleTransliterate(Replaceable& text,
|
||||
UTransPosition& offsets,
|
||||
UBool isIncremental) const;
|
||||
|
||||
private:
|
||||
|
||||
Locale loc;
|
||||
UChar* buffer;
|
||||
|
||||
};
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
|
|
@ -22,36 +22,26 @@ U_NAMESPACE_BEGIN
|
|||
|
||||
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(UppercaseTransliterator)
|
||||
|
||||
static const char CURR_ID[] = "Any-Upper";
|
||||
|
||||
/**
|
||||
* Constructs a transliterator.
|
||||
*/
|
||||
UppercaseTransliterator::UppercaseTransliterator(const Locale& theLoc) :
|
||||
Transliterator(UnicodeString(CURR_ID, ""), 0),
|
||||
loc(theLoc),
|
||||
buffer(0)
|
||||
CaseMapTransliterator(theLoc, UNICODE_STRING("Any-Upper", 9), ucase_toFullUpper)
|
||||
{
|
||||
buffer = (UChar *)uprv_malloc(u_getMaxCaseExpansion()*sizeof(buffer[0]));
|
||||
}
|
||||
|
||||
/**
|
||||
* Destructor.
|
||||
*/
|
||||
UppercaseTransliterator::~UppercaseTransliterator() {
|
||||
uprv_free(buffer);
|
||||
}
|
||||
|
||||
/**
|
||||
* Copy constructor.
|
||||
*/
|
||||
UppercaseTransliterator::UppercaseTransliterator(const UppercaseTransliterator& o) :
|
||||
Transliterator(o),
|
||||
loc(o.loc),
|
||||
buffer(0)
|
||||
CaseMapTransliterator(o)
|
||||
{
|
||||
buffer = (UChar *)uprv_malloc(u_getMaxCaseExpansion()*sizeof(buffer[0]));
|
||||
uprv_arrayCopy(o.buffer, 0, this->buffer, 0, u_getMaxCaseExpansion());
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -59,9 +49,7 @@ UppercaseTransliterator::UppercaseTransliterator(const UppercaseTransliterator&
|
|||
*/
|
||||
UppercaseTransliterator& UppercaseTransliterator::operator=(
|
||||
const UppercaseTransliterator& o) {
|
||||
Transliterator::operator=(o);
|
||||
loc = o.loc;
|
||||
uprv_arrayCopy(o.buffer, 0, this->buffer, 0, u_getMaxCaseExpansion());
|
||||
CaseMapTransliterator::operator=(o);
|
||||
return *this;
|
||||
}
|
||||
|
||||
|
@ -72,57 +60,6 @@ Transliterator* UppercaseTransliterator::clone(void) const {
|
|||
return new UppercaseTransliterator(*this);
|
||||
}
|
||||
|
||||
/**
|
||||
* Implements {@link Transliterator#handleTransliterate}.
|
||||
*/
|
||||
void UppercaseTransliterator::handleTransliterate(Replaceable& text,
|
||||
UTransPosition& offsets,
|
||||
UBool /*isIncremental*/) const
|
||||
{
|
||||
/* TODO: Verify that isIncremental can be ignored */
|
||||
int32_t textPos = offsets.start;
|
||||
if (textPos >= offsets.limit)
|
||||
return;
|
||||
|
||||
// get string for context
|
||||
|
||||
UnicodeString original;
|
||||
text.extractBetween(offsets.contextStart, offsets.contextLimit, original);
|
||||
|
||||
UCharIterator iter;
|
||||
uiter_setReplaceable(&iter, &text);
|
||||
iter.start = offsets.contextStart;
|
||||
iter.limit = offsets.contextLimit;
|
||||
|
||||
// Walk through original string
|
||||
// If there is a case change, modify corresponding position in replaceable
|
||||
|
||||
int32_t i = textPos - offsets.contextStart;
|
||||
int32_t limit = offsets.limit - offsets.contextStart;
|
||||
UChar32 cp;
|
||||
int32_t oldLen;
|
||||
|
||||
for (; i < limit; ) {
|
||||
UTF_GET_CHAR(original.getBuffer(), 0, i, original.length(), cp);
|
||||
oldLen = UTF_CHAR_LENGTH(cp);
|
||||
i += oldLen;
|
||||
iter.index = i; // Point _past_ current char
|
||||
int32_t newLen = u_internalToUpper(cp, &iter, buffer, u_getMaxCaseExpansion(), loc.getName());
|
||||
if (newLen >= 0) {
|
||||
UnicodeString temp(buffer, newLen);
|
||||
text.handleReplaceBetween(textPos, textPos + oldLen, temp);
|
||||
if (newLen != oldLen) {
|
||||
textPos += newLen;
|
||||
offsets.limit += newLen - oldLen;
|
||||
offsets.contextLimit += newLen - oldLen;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
textPos += oldLen;
|
||||
}
|
||||
offsets.start = offsets.limit;
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif /* #if !UCONFIG_NO_TRANSLITERATION */
|
||||
|
|
|
@ -16,6 +16,7 @@
|
|||
|
||||
#include "unicode/translit.h"
|
||||
#include "unicode/locid.h"
|
||||
#include "casetrn.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
|
@ -24,7 +25,7 @@ U_NAMESPACE_BEGIN
|
|||
* case mapping.
|
||||
* @author Alan Liu
|
||||
*/
|
||||
class U_I18N_API UppercaseTransliterator : public Transliterator {
|
||||
class U_I18N_API UppercaseTransliterator : public CaseMapTransliterator {
|
||||
|
||||
public:
|
||||
|
||||
|
@ -68,28 +69,6 @@ class U_I18N_API UppercaseTransliterator : public Transliterator {
|
|||
* @draft ICU 2.2
|
||||
*/
|
||||
static UClassID U_EXPORT2 getStaticClassID();
|
||||
|
||||
protected:
|
||||
|
||||
|
||||
/**
|
||||
* Implements {@link Transliterator#handleTransliterate}.
|
||||
* @param text the buffer holding transliterated and
|
||||
* untransliterated text
|
||||
* @param offset the start and limit of the text, the position
|
||||
* of the cursor, and the start and limit of transliteration.
|
||||
* @param incremental if true, assume more text may be coming after
|
||||
* pos.contextLimit. Otherwise, assume the text is complete.
|
||||
*/
|
||||
virtual void handleTransliterate(Replaceable& text,
|
||||
UTransPosition& offsets,
|
||||
UBool isIncremental) const;
|
||||
|
||||
private:
|
||||
|
||||
Locale loc;
|
||||
UChar* buffer;
|
||||
|
||||
};
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
|
|
@ -1537,7 +1537,6 @@ U_NAMESPACE_END
|
|||
* user, because RBTs hold pointers to common data objects.
|
||||
*/
|
||||
U_CFUNC UBool transliterator_cleanup(void) {
|
||||
TitlecaseTransliterator::cleanup();
|
||||
TransliteratorIDParser::cleanup();
|
||||
if (registry) {
|
||||
delete registry;
|
||||
|
|
Loading…
Add table
Reference in a new issue