diff --git a/icu4c/source/common/ucase.c b/icu4c/source/common/ucase.c index 266faabdfc3..491b02a353b 100644 --- a/icu4c/source/common/ucase.c +++ b/icu4c/source/common/ucase.c @@ -805,17 +805,12 @@ ucase_isCaseSensitive(const UCaseProps *csp, UChar32 c) { * zero or more case-ignorable characters. */ -enum { - LOC_UNKNOWN, - LOC_ROOT, - LOC_TURKISH, - LOC_LITHUANIAN -}; - #define is_a(c) ((c)=='a' || (c)=='A') +#define is_d(c) ((c)=='d' || (c)=='D') #define is_e(c) ((c)=='e' || (c)=='E') #define is_i(c) ((c)=='i' || (c)=='I') #define is_l(c) ((c)=='l' || (c)=='L') +#define is_n(c) ((c)=='n' || (c)=='N') #define is_r(c) ((c)=='r' || (c)=='R') #define is_t(c) ((c)=='t' || (c)=='T') #define is_u(c) ((c)=='u' || (c)=='U') @@ -834,11 +829,11 @@ ucase_getCaseLocale(const char *locale, int32_t *locCache) { int32_t result; char c; - if(locCache!=NULL && (result=*locCache)!=LOC_UNKNOWN) { + if(locCache!=NULL && (result=*locCache)!=UCASE_LOC_UNKNOWN) { return result; } - result=LOC_ROOT; + result=UCASE_LOC_ROOT; /* * This function used to use uloc_getLanguage(), but the current code @@ -859,7 +854,7 @@ ucase_getCaseLocale(const char *locale, int32_t *locCache) { if(is_r(c)) { c=*locale; if(is_sep(c)) { - result=LOC_TURKISH; + result=UCASE_LOC_TURKISH; } } } else if(is_a(c)) { @@ -871,7 +866,7 @@ ucase_getCaseLocale(const char *locale, int32_t *locCache) { c=*locale; } if(is_sep(c)) { - result=LOC_TURKISH; + result=UCASE_LOC_TURKISH; } } } else if(is_l(c)) { @@ -883,7 +878,19 @@ ucase_getCaseLocale(const char *locale, int32_t *locCache) { if(is_t(c)) { c=*locale; if(is_sep(c)) { - result=LOC_LITHUANIAN; + result=UCASE_LOC_LITHUANIAN; + } + } + } else if(is_n(c)) { + /* nl or nld? */ + c=*locale++; + if(is_l(c)) { + c=*locale++; + if(is_d(c)) { + c=*locale; + } + if(is_sep(c)) { + result=UCASE_LOC_DUTCH; } } } @@ -1078,7 +1085,7 @@ ucase_toFullLower(const UCaseProps *csp, UChar32 c, * then test for characters that have unconditional mappings in SpecialCasing.txt, * then get the UnicodeData.txt mappings. */ - if( loc==LOC_LITHUANIAN && + if( loc==UCASE_LOC_LITHUANIAN && /* base characters, find accents above */ (((c==0x49 || c==0x4a || c==0x12e) && isFollowedByMoreAbove(csp, iter, context)) || @@ -1124,7 +1131,7 @@ ucase_toFullLower(const UCaseProps *csp, UChar32 c, return 0; /* will not occur */ } /* # Turkish and Azeri */ - } else if(loc==LOC_TURKISH && c==0x130) { + } else if(loc==UCASE_LOC_TURKISH && c==0x130) { /* # I and i-dotless; I-dot and i are case pairs in Turkish and Azeri # The following rules handle those cases. @@ -1133,7 +1140,7 @@ ucase_toFullLower(const UCaseProps *csp, UChar32 c, 0130; 0069; 0130; 0130; az # LATIN CAPITAL LETTER I WITH DOT ABOVE */ return 0x69; - } else if(loc==LOC_TURKISH && c==0x307 && isPrecededBy_I(csp, iter, context)) { + } else if(loc==UCASE_LOC_TURKISH && c==0x307 && isPrecededBy_I(csp, iter, context)) { /* # When lowercasing, remove dot_above in the sequence I + dot_above, which will turn into i. # This matches the behavior of the canonically equivalent I-dot_above @@ -1142,7 +1149,7 @@ ucase_toFullLower(const UCaseProps *csp, UChar32 c, 0307; ; 0307; 0307; az After_I; # COMBINING DOT ABOVE */ return 0; /* remove the dot (continue without output) */ - } else if(loc==LOC_TURKISH && c==0x49 && !isFollowedByDotAbove(csp, iter, context)) { + } else if(loc==UCASE_LOC_TURKISH && c==0x49 && !isFollowedByDotAbove(csp, iter, context)) { /* # When lowercasing, unless an I is before a dot_above, it turns into a dotless i. @@ -1219,7 +1226,7 @@ toUpperOrTitle(const UCaseProps *csp, UChar32 c, /* use hardcoded conditions and mappings */ int32_t loc=ucase_getCaseLocale(locale, locCache); - if(loc==LOC_TURKISH && c==0x69) { + if(loc==UCASE_LOC_TURKISH && c==0x69) { /* # Turkish and Azeri @@ -1232,7 +1239,7 @@ toUpperOrTitle(const UCaseProps *csp, UChar32 c, 0069; 0069; 0130; 0130; az; # LATIN SMALL LETTER I */ return 0x130; - } else if(loc==LOC_LITHUANIAN && c==0x307 && isPrecededBySoftDotted(csp, iter, context)) { + } else if(loc==UCASE_LOC_LITHUANIAN && c==0x307 && isPrecededBySoftDotted(csp, iter, context)) { /* # Lithuanian diff --git a/icu4c/source/common/ucase.h b/icu4c/source/common/ucase.h index 714d0d30228..538adcef735 100644 --- a/icu4c/source/common/ucase.h +++ b/icu4c/source/common/ucase.h @@ -73,6 +73,15 @@ ucase_addPropertyStarts(const UCaseProps *csp, const USetAdder *sa, UErrorCode * U_CFUNC int32_t ucase_getCaseLocale(const char *locale, int32_t *locCache); +/* Casing locale types for ucase_getCaseLocale */ +enum { + UCASE_LOC_UNKNOWN, + UCASE_LOC_ROOT, + UCASE_LOC_TURKISH, + UCASE_LOC_LITHUANIAN, + UCASE_LOC_DUTCH +}; + /** * Bit mask for getting just the options from a string compare options word * that are relevant for case-insensitive string comparison. diff --git a/icu4c/source/common/ucasemap.c b/icu4c/source/common/ucasemap.c index ac20ba313da..6433233aec0 100644 --- a/icu4c/source/common/ucasemap.c +++ b/icu4c/source/common/ucasemap.c @@ -1,7 +1,7 @@ /* ******************************************************************************* * -* Copyright (C) 2005-2007, International Business Machines +* Copyright (C) 2005-2008, International Business Machines * Corporation and others. All Rights Reserved. * ******************************************************************************* @@ -359,6 +359,16 @@ _toTitle(UCaseMap *csm, c=ucase_toFullTitle(csm->csp, c, utf8_caseContextIterator, csc, &s, csm->locale, &csm->locCache); destIndex=appendResult(dest, destIndex, destCapacity, c, s); + + /* Special case Dutch IJ titlecasing */ + if ( titleStart+1 < index && + ucase_getCaseLocale(csm->locale,&csm->locCache) == UCASE_LOC_DUTCH && + ( src[titleStart] == 0x0049 || src[titleStart] == 0x0069 ) && + ( src[titleStart+1] == 0x004A || src[titleStart+1] == 0x006A )) { + c=0x004A; + destIndex=appendResult(dest, destIndex, destCapacity, c, s); + titleLimit++; + } /* lowercase [titleLimit..index[ */ if(titleLimitoptions&U_TITLECASE_NO_LOWERCASE)==0) { diff --git a/icu4c/source/common/ustrcase.c b/icu4c/source/common/ustrcase.c index 402201d8dc0..821e0ffc99f 100644 --- a/icu4c/source/common/ustrcase.c +++ b/icu4c/source/common/ustrcase.c @@ -1,7 +1,7 @@ /* ******************************************************************************* * -* Copyright (C) 2001-2007, International Business Machines +* Copyright (C) 2001-2008, International Business Machines * Corporation and others. All Rights Reserved. * ******************************************************************************* @@ -217,7 +217,7 @@ _toTitle(UCaseMap *csm, UErrorCode *pErrorCode) { const UChar *s; UChar32 c; - int32_t prev, titleStart, titleLimit, index, destIndex, length; + int32_t prev, titleStart, titleLimit, titleLimitSave, index, indexSave, destIndex, length; UBool isFirstIndex; if(csm->iter!=NULL) { @@ -296,7 +296,17 @@ _toTitle(UCaseMap *csm, csc->cpStart=titleStart; csc->cpLimit=titleLimit; c=ucase_toFullTitle(csm->csp, c, utf16_caseContextIterator, csc, &s, csm->locale, &csm->locCache); - destIndex=appendResult(dest, destIndex, destCapacity, c, s); + destIndex=appendResult(dest, destIndex, destCapacity, c, s); + + /* Special case Dutch IJ titlecasing */ + if ( titleStart+1 < index && + ucase_getCaseLocale(csm->locale,&csm->locCache) == UCASE_LOC_DUTCH && + ( src[titleStart] == (UChar32) 0x0049 || src[titleStart] == (UChar32) 0x0069 ) && + ( src[titleStart+1] == (UChar32) 0x004A || src[titleStart+1] == (UChar32) 0x006A )) { + c=(UChar32) 0x004A; + destIndex=appendResult(dest, destIndex, destCapacity, c, s); + titleLimit++; + } /* lowercase [titleLimit..index[ */ if(titleLimit