diff --git a/icu4c/source/common/ustr_imp.h b/icu4c/source/common/ustr_imp.h index ee54d332ef0..a746016d2c2 100644 --- a/icu4c/source/common/ustr_imp.h +++ b/icu4c/source/common/ustr_imp.h @@ -1,6 +1,6 @@ /* ********************************************************************** -* Copyright (C) 1999-2011, International Business Machines +* Copyright (C) 1999-2015, International Business Machines * Corporation and others. All Rights Reserved. ********************************************************************** * file name: ustr_imp.h @@ -62,6 +62,25 @@ u_strcmpFold(const UChar *s1, int32_t length1, uint32_t options, UErrorCode *pErrorCode); +/** + * Interanl API, used for detecting length of + * shared prefix case-insensitively. + * @param s1 input string 1 + * @param length1 length of string 1, or -1 (NULL terminated) + * @param s2 input string 2 + * @param length2 length of string 2, or -1 (NULL terminated) + * @param options compare options + * @param matchLen1 (output) length of partial prefix match in s1 + * @param matchLen2 (output) length of partial prefix match in s2 + * @param pErrorCode receives error status + */ +U_CAPI void +u_caseInsensitivePrefixMatch(const UChar *s1, int32_t length1, + const UChar *s2, int32_t length2, + uint32_t options, + int32_t *matchLen1, int32_t *matchLen2, + UErrorCode *pErrorCode); + /** * Are the Unicode properties loaded? * This must be used before internal functions are called that do diff --git a/icu4c/source/common/ustrcase.cpp b/icu4c/source/common/ustrcase.cpp index e687267df86..4697160e810 100644 --- a/icu4c/source/common/ustrcase.cpp +++ b/icu4c/source/common/ustrcase.cpp @@ -1,7 +1,7 @@ /* ******************************************************************************* * -* Copyright (C) 2001-2014, International Business Machines +* Copyright (C) 2001-2015, International Business Machines * Corporation and others. All Rights Reserved. * ******************************************************************************* @@ -28,6 +28,7 @@ #include "cmemory.h" #include "ucase.h" #include "ustr_imp.h" +#include "uassert.h" U_NAMESPACE_USE @@ -463,17 +464,39 @@ struct CmpEquivLevel { }; typedef struct CmpEquivLevel CmpEquivLevel; -/* internal function */ -U_CFUNC int32_t -u_strcmpFold(const UChar *s1, int32_t length1, - const UChar *s2, int32_t length2, - uint32_t options, - UErrorCode *pErrorCode) { +/** + * Internal implementation code comparing string with case fold. + * This function is called from u_strcmpFold() and u_caseInsensitivePrefixMatch(). + * + * @param s1 input string 1 + * @param length1 length of string 1, or -1 (NULL terminated) + * @param s2 input string 2 + * @param length2 length of string 2, or -1 (NULL terminated) + * @param options compare options + * @param matchLen1 (output) length of partial prefix match in s1 + * @param matchLen2 (output) length of partial prefix match in s2 + * @param pErrorCode receives error status + * @return The result of comparison + */ +static int32_t _cmpFold( + const UChar *s1, int32_t length1, + const UChar *s2, int32_t length2, + uint32_t options, + int32_t *matchLen1, int32_t *matchLen2, + UErrorCode *pErrorCode) { + int32_t cmpRes = 0; + const UCaseProps *csp; /* current-level start/limit - s1/s2 as current */ const UChar *start1, *start2, *limit1, *limit2; + /* points to the original start address */ + const UChar *org1, *org2; + + /* points to the end of match + 1 */ + const UChar *m1, *m2; + /* case folding variables */ const UChar *p; int32_t length; @@ -502,14 +525,20 @@ u_strcmpFold(const UChar *s1, int32_t length1, } /* initialize */ - start1=s1; + if(matchLen1) { + U_ASSERT(matchLen2 !=NULL); + *matchLen1=0; + *matchLen2=0; + } + + start1=m1=org1=s1; if(length1==-1) { limit1=NULL; } else { limit1=s1+length1; } - start2=s2; + start2=m2=org2=s2; if(length2==-1) { limit2=NULL; } else { @@ -577,15 +606,59 @@ u_strcmpFold(const UChar *s1, int32_t length1, * either variable c1, c2 is -1 only if the corresponding string is finished */ if(c1==c2) { + const UChar *next1, *next2; + if(c1<0) { - return 0; /* c1==c2==-1 indicating end of strings */ + cmpRes=0; /* c1==c2==-1 indicating end of strings */ + break; + } + + /* + * Note: Move the match positions in both strings at the same time + * only when corresponding code point(s) in the original strings + * are fully consumed. For example, when comparing s1="Fust" and + * s2="Fu\u00dfball", s2[2] is folded into "ss", and s1[2] matches + * the first code point in the case-folded data. But the second "s" + * has no matching code point in s1, so this implementation returns + * 2 as the prefix match length ("Fu"). + */ + next1=next2=NULL; + if(level1==0) { + next1=s1; + } else if(s1==limit1) { + /* Note: This implementation only use a single level of stack. + * If this code needs to be changed to use multiple levels + * of stacks, the code above should check if the current + * code is at the end of all stacks. + */ + U_ASSERT(level1==1); + + /* is s1 at the end of the current stack? */ + next1=stack1[0].s; + } + + if (next1!=NULL) { + if(level2==0) { + next2=s2; + } else if(s2==limit2) { + U_ASSERT(level2==1); + + /* is s2 at the end of the current stack? */ + next2=stack2[0].s; + } + if(next2!=NULL) { + m1=next1; + m2=next2; + } } c1=c2=-1; /* make us fetch new code units */ continue; } else if(c1<0) { - return -1; /* string 1 ends before string 2 */ + cmpRes=-1; /* string 1 ends before string 2 */ + break; } else if(c2<0) { - return 1; /* string 2 ends before string 1 */ + cmpRes=1; /* string 2 ends before string 1 */ + break; } /* c1!=c2 && c1>=0 && c2>=0 */ @@ -644,6 +717,7 @@ u_strcmpFold(const UChar *s1, int32_t length1, * the decomposition would replace the entire code point */ --s2; + --m2; c2=*(s2-1); } } @@ -689,6 +763,7 @@ u_strcmpFold(const UChar *s1, int32_t length1, * the decomposition would replace the entire code point */ --s1; + --m2; c1=*(s1-1); } } @@ -757,8 +832,24 @@ u_strcmpFold(const UChar *s1, int32_t length1, } } - return c1-c2; + cmpRes=c1-c2; + break; } + + if(matchLen1) { + *matchLen1=m1-org1; + *matchLen2=m2-org2; + } + return cmpRes; +} + +/* internal function */ +U_CFUNC int32_t +u_strcmpFold(const UChar *s1, int32_t length1, + const UChar *s2, int32_t length2, + uint32_t options, + UErrorCode *pErrorCode) { + return _cmpFold(s1, length1, s2, length2, options, NULL, NULL, pErrorCode); } /* public API functions */ @@ -804,3 +895,14 @@ u_strncasecmp(const UChar *s1, const UChar *s2, int32_t n, uint32_t options) { options|(U_COMPARE_IGNORE_CASE|_STRNCMP_STYLE), &errorCode); } + +/* internal API - detect length of shared prefix */ +U_CAPI void +u_caseInsensitivePrefixMatch(const UChar *s1, int32_t length1, + const UChar *s2, int32_t length2, + uint32_t options, + int32_t *matchLen1, int32_t *matchLen2, + UErrorCode *pErrorCode) { + _cmpFold(s1, length1, s2, length2, options, + matchLen1, matchLen2, pErrorCode); +} diff --git a/icu4c/source/i18n/smpdtfmt.cpp b/icu4c/source/i18n/smpdtfmt.cpp index eb4376b3c24..c8744ad474a 100644 --- a/icu4c/source/i18n/smpdtfmt.cpp +++ b/icu4c/source/i18n/smpdtfmt.cpp @@ -61,6 +61,7 @@ #include #include "smpdtfst.h" #include "sharednumberformat.h" +#include "ustr_imp.h" #if defined( U_DEBUG_CALSVC ) || defined (U_DEBUG_CAL) #include @@ -1645,7 +1646,8 @@ SimpleDateFormat::subFormat(UnicodeString &appendTo, case UDAT_TIMEZONE_ISO_FIELD: // 'X' case UDAT_TIMEZONE_ISO_LOCAL_FIELD: // 'x' { - UnicodeString zoneString; + UChar zsbuf[64]; + UnicodeString zoneString(zsbuf, 0, UPRV_LENGTHOF(zsbuf)); const TimeZone& tz = cal.getTimeZone(); UDate date = cal.getTime(status); if (U_SUCCESS(status)) { @@ -2294,11 +2296,10 @@ ExitParse: //---------------------------------------------------------------------- -static UBool -newBestMatchWithOptionalDot(const UnicodeString &lcaseText, - const UnicodeString &data, - UnicodeString &bestMatchName, - int32_t &bestMatchLength); +static int32_t +matchStringWithOptionalDot(const UnicodeString &text, + int32_t index, + const UnicodeString &data); int32_t SimpleDateFormat::matchQuarterString(const UnicodeString& text, int32_t start, @@ -2317,54 +2318,17 @@ int32_t SimpleDateFormat::matchQuarterString(const UnicodeString& text, int32_t bestMatchLength = 0, bestMatch = -1; UnicodeString bestMatchName; - // {sfb} kludge to support case-insensitive comparison - // {markus 2002oct11} do not just use caseCompareBetween because we do not know - // the length of the match after case folding - // {alan 20040607} don't case change the whole string, since the length - // can change - // TODO we need a case-insensitive startsWith function - UnicodeString lcaseText; - text.extract(start, INT32_MAX, lcaseText); - lcaseText.foldCase(); - - for (; i < count; ++i) - { - // Always compare if we have no match yet; otherwise only compare - // against potentially better matches (longer strings). - - if (newBestMatchWithOptionalDot(lcaseText, data[i], bestMatchName, bestMatchLength)) { + for (; i < count; ++i) { + int32_t matchLength = 0; + if ((matchLength = matchStringWithOptionalDot(text, start, data[i])) > bestMatchLength) { + bestMatchLength = matchLength; bestMatch = i; } } - if (bestMatch >= 0) - { + + if (bestMatch >= 0) { cal.set(field, bestMatch * 3); - - // Once we have a match, we have to determine the length of the - // original source string. This will usually be == the length of - // the case folded string, but it may differ (e.g. sharp s). - - // Most of the time, the length will be the same as the length - // of the string from the locale data. Sometimes it will be - // different, in which case we will have to figure it out by - // adding a character at a time, until we have a match. We do - // this all in one loop, where we try 'len' first (at index - // i==0). - int32_t len = bestMatchName.length(); // 99+% of the time - int32_t n = text.length() - start; - for (i=0; i<=n; ++i) { - int32_t j=i; - if (i == 0) { - j = len; - } else if (i == len) { - continue; // already tried this when i was 0 - } - text.extract(start, j, lcaseText); - lcaseText.foldCase(); - if (bestMatchName == lcaseText) { - return start + j; - } - } + return start + bestMatchLength; } return -start; @@ -2382,7 +2346,7 @@ UBool SimpleDateFormat::matchLiterals(const UnicodeString &pattern, UBool inQuote = FALSE; UnicodeString literal; int32_t i = patternOffset; - + // scan pattern looking for contiguous literal characters for ( ; i < pattern.length(); i += 1) { UChar ch = pattern.charAt(i); @@ -2538,24 +2502,11 @@ int32_t SimpleDateFormat::matchString(const UnicodeString& text, UnicodeString bestMatchName; int32_t isLeapMonth = 0; - // {sfb} kludge to support case-insensitive comparison - // {markus 2002oct11} do not just use caseCompareBetween because we do not know - // the length of the match after case folding - // {alan 20040607} don't case change the whole string, since the length - // can change - // TODO we need a case-insensitive startsWith function - UnicodeString lcaseText; - text.extract(start, INT32_MAX, lcaseText); - lcaseText.foldCase(); - - for (; i < count; ++i) - { - // Always compare if we have no match yet; otherwise only compare - // against potentially better matches (longer strings). - - if (newBestMatchWithOptionalDot(lcaseText, data[i], bestMatchName, bestMatchLength)) { + for (; i < count; ++i) { + int32_t matchLen = 0; + if ((matchLen = matchStringWithOptionalDot(text, start, data[i])) > bestMatchLength) { bestMatch = i; - isLeapMonth = 0; + bestMatchLength = matchLen; } if (monthPattern != NULL) { @@ -2564,20 +2515,20 @@ int32_t SimpleDateFormat::matchString(const UnicodeString& text, Formattable monthName((const UnicodeString&)(data[i])); MessageFormat::format(*monthPattern, &monthName, 1, leapMonthName, status); if (U_SUCCESS(status)) { - if (newBestMatchWithOptionalDot(lcaseText, leapMonthName, bestMatchName, bestMatchLength)) { + if ((matchLen = matchStringWithOptionalDot(text, start, leapMonthName)) > bestMatchLength) { bestMatch = i; + bestMatchLength = matchLen; isLeapMonth = 1; } } } } - if (bestMatch >= 0) - { + + if (bestMatch >= 0) { // Adjustment for Hebrew Calendar month Adar II if (!strcmp(cal.getType(),"hebrew") && field==UCAL_MONTH && bestMatch==13) { cal.set(field,6); - } - else { + } else { if (field == UCAL_YEAR) { bestMatch++; // only get here for cyclic year names, which match 1-based years 1-60 } @@ -2587,65 +2538,34 @@ int32_t SimpleDateFormat::matchString(const UnicodeString& text, cal.set(UCAL_IS_LEAP_MONTH, isLeapMonth); } - // Once we have a match, we have to determine the length of the - // original source string. This will usually be == the length of - // the case folded string, but it may differ (e.g. sharp s). - - // Most of the time, the length will be the same as the length - // of the string from the locale data. Sometimes it will be - // different, in which case we will have to figure it out by - // adding a character at a time, until we have a match. We do - // this all in one loop, where we try 'len' first (at index - // i==0). - int32_t len = bestMatchName.length(); // 99+% of the time - int32_t n = text.length() - start; - for (i=0; i<=n; ++i) { - int32_t j=i; - if (i == 0) { - j = len; - } else if (i == len) { - continue; // already tried this when i was 0 - } - text.extract(start, j, lcaseText); - lcaseText.foldCase(); - if (bestMatchName == lcaseText) { - return start + j; - } - } + return start + bestMatchLength; } return -start; } -static UBool -newBestMatchWithOptionalDot(const UnicodeString &lcaseText, - const UnicodeString &data, - UnicodeString &bestMatchName, - int32_t &bestMatchLength) { - UnicodeString lcase; - lcase.fastCopyFrom(data).foldCase(); - int32_t length = lcase.length(); - if (length <= bestMatchLength) { - // data cannot provide a better match. - return FALSE; +static int32_t +matchStringWithOptionalDot(const UnicodeString &text, + int32_t index, + const UnicodeString &data) { + UErrorCode sts = U_ZERO_ERROR; + int32_t matchLenText = 0; + int32_t matchLenData = 0; + + u_caseInsensitivePrefixMatch(text.getBuffer() + index, text.length() - index, + data.getBuffer(), data.length(), + 0 /* default case option */, + &matchLenText, &matchLenData, + &sts); + U_ASSERT (U_SUCCESS(sts)); + + if (matchLenData == data.length() /* normal match */ + || data.charAt(data.length() - 1) == 0x2e + && matchLenData == data.length() - 1 /* match witout trailing dot */) { + return matchLenText; } - if (lcaseText.compareBetween(0, length, lcase, 0, length) == 0) { - // normal match - bestMatchName = lcase; - bestMatchLength = length; - return TRUE; - } - if (lcase.charAt(--length) == 0x2e) { - if (lcaseText.compareBetween(0, length, lcase, 0, length) == 0) { - // The input text matches the data except for data's trailing dot. - bestMatchName = lcase; - bestMatchName.truncate(length); - bestMatchLength = length; - return TRUE; - } - } - return FALSE; + return 0; } //---------------------------------------------------------------------- diff --git a/icu4c/source/i18n/tzfmt.cpp b/icu4c/source/i18n/tzfmt.cpp index cb0421bac85..767fd79135a 100644 --- a/icu4c/source/i18n/tzfmt.cpp +++ b/icu4c/source/i18n/tzfmt.cpp @@ -1,6 +1,6 @@ /* ******************************************************************************* -* Copyright (C) 2011-2014, International Business Machines Corporation and +* Copyright (C) 2011-2015, International Business Machines Corporation and * others. All Rights Reserved. ******************************************************************************* */ @@ -366,7 +366,7 @@ TimeZoneFormat::TimeZoneFormat(const Locale& locale, UErrorCode& status) if (gmtPattern == NULL) { gmtPattern = DEFAULT_GMT_PATTERN; } - initGMTPattern(UnicodeString(gmtPattern, -1), status); + initGMTPattern(UnicodeString(TRUE, gmtPattern, -1), status); UBool useDefaultOffsetPatterns = TRUE; if (hourFormats) { @@ -784,7 +784,8 @@ TimeZoneFormat::format(const Formattable& obj, UnicodeString& appendTo, if (tz != NULL) { int32_t rawOffset, dstOffset; tz->getOffset(date, FALSE, rawOffset, dstOffset, status); - UnicodeString result; + UChar buf[32]; + UnicodeString result(buf, 0, UPRV_LENGTHOF(buf)); formatOffsetLocalizedGMT(rawOffset + dstOffset, result, status); if (U_SUCCESS(status)) { appendTo.append(result); @@ -850,7 +851,8 @@ TimeZoneFormat::parse(UTimeZoneFormatStyle style, const UnicodeString& text, Par } UErrorCode status = U_ZERO_ERROR; - UnicodeString tzID; + UChar tzIDBuf[32]; + UnicodeString tzID(tzIDBuf, 0, UPRV_LENGTHOF(tzIDBuf)); UBool parseTZDBAbbrev = ((parseOptions & UTZFMT_PARSE_OPTION_TZ_DATABASE_ABBREVIATIONS) != 0); @@ -1099,7 +1101,8 @@ TimeZoneFormat::parse(UTimeZoneFormatStyle style, const UnicodeString& text, Par // Failed to parse the input text as the time zone format in the specified style. // Check the longest match among other styles below. - UnicodeString parsedID; + UChar parsedIDBuf[32]; + UnicodeString parsedID(parsedIDBuf, 0, UPRV_LENGTHOF(parsedIDBuf)); UTimeZoneFormatTimeType parsedTimeType = UTZFMT_TIME_TYPE_UNKNOWN; U_ASSERT(parsedPos < 0); @@ -1334,7 +1337,7 @@ TimeZoneFormat::formatGeneric(const TimeZone& tz, int32_t genType, UDate date, U name.setToBogus(); return name; } - return gnames->getGenericLocationName(UnicodeString(canonicalID), name); + return gnames->getGenericLocationName(UnicodeString(TRUE, canonicalID, -1), name); } return gnames->getDisplayName(tz, (UTimeZoneGenericNameType)genType, date, name); } @@ -1357,9 +1360,9 @@ TimeZoneFormat::formatSpecific(const TimeZone& tz, UTimeZoneNameType stdType, UT } if (isDaylight) { - fTimeZoneNames->getDisplayName(UnicodeString(canonicalID), dstType, date, name); + fTimeZoneNames->getDisplayName(UnicodeString(TRUE, canonicalID, -1), dstType, date, name); } else { - fTimeZoneNames->getDisplayName(UnicodeString(canonicalID), stdType, date, name); + fTimeZoneNames->getDisplayName(UnicodeString(TRUE, canonicalID, -1), stdType, date, name); } if (timeType && !name.isEmpty()) { @@ -1407,17 +1410,18 @@ TimeZoneFormat::getTZDBTimeZoneNames(UErrorCode& status) const { UnicodeString& TimeZoneFormat::formatExemplarLocation(const TimeZone& tz, UnicodeString& name) const { - UnicodeString location; + UChar locationBuf[64]; + UnicodeString location(locationBuf, 0, UPRV_LENGTHOF(locationBuf)); const UChar* canonicalID = ZoneMeta::getCanonicalCLDRID(tz); if (canonicalID) { - fTimeZoneNames->getExemplarLocationName(UnicodeString(canonicalID), location); + fTimeZoneNames->getExemplarLocationName(UnicodeString(TRUE, canonicalID, -1), location); } if (location.length() > 0) { name.setTo(location); } else { // Use "unknown" location - fTimeZoneNames->getExemplarLocationName(UnicodeString(UNKNOWN_ZONE_ID), location); + fTimeZoneNames->getExemplarLocationName(UnicodeString(TRUE, UNKNOWN_ZONE_ID, -1), location); if (location.length() > 0) { name.setTo(location); } else { @@ -2405,7 +2409,8 @@ TimeZoneFormat::parseOffsetPattern(const UnicodeString& pattern, OffsetFields re int32_t checkBits = 0; UBool isPrevQuote = FALSE; UBool inQuote = FALSE; - UnicodeString text; + UChar textBuf[32]; + UnicodeString text(textBuf, 0, UPRV_LENGTHOF(textBuf)); GMTOffsetField::FieldType itemType = GMTOffsetField::TEXT; int32_t itemLength = 1; @@ -2651,7 +2656,7 @@ TimeZone* TimeZoneFormat::createTimeZoneForOffset(int32_t offset) const { if (offset == 0) { // when offset is 0, we should use "Etc/GMT" - return TimeZone::createTimeZone(UnicodeString(TZID_GMT)); + return TimeZone::createTimeZone(UnicodeString(TRUE, TZID_GMT, -1)); } return ZoneMeta::createCustomTimeZone(offset); } @@ -2675,7 +2680,8 @@ TimeZoneFormat::getTimeType(UTimeZoneNameType nameType) { UnicodeString& TimeZoneFormat::getTimeZoneID(const TimeZoneNames::MatchInfoCollection* matches, int32_t idx, UnicodeString& tzID) const { if (!matches->getTimeZoneIDAt(idx, tzID)) { - UnicodeString mzID; + UChar mzIDBuf[32]; + UnicodeString mzID(mzIDBuf, 0, UPRV_LENGTHOF(mzIDBuf)); if (matches->getMetaZoneIDAt(idx, mzID)) { fTimeZoneNames->getReferenceZoneID(mzID, fTargetRegion, tzID); } diff --git a/icu4c/source/i18n/tzgnames.cpp b/icu4c/source/i18n/tzgnames.cpp index 6741607b6e6..6389c862d6b 100644 --- a/icu4c/source/i18n/tzgnames.cpp +++ b/icu4c/source/i18n/tzgnames.cpp @@ -1,6 +1,6 @@ /* ******************************************************************************* -* Copyright (C) 2011-2014, International Business Machines Corporation and +* Copyright (C) 2011-2015, International Business Machines Corporation and * others. All Rights Reserved. ******************************************************************************* */ @@ -434,7 +434,7 @@ TZGNCore::initialize(const Locale& locale, UErrorCode& status) { TimeZone *tz = TimeZone::createDefault(); const UChar *tzID = ZoneMeta::getCanonicalCLDRID(*tz); if (tzID != NULL) { - loadStrings(UnicodeString(tzID)); + loadStrings(UnicodeString(TRUE, tzID, -1)); } delete tz; } @@ -467,7 +467,7 @@ TZGNCore::getDisplayName(const TimeZone& tz, UTimeZoneGenericNameType type, UDat { const UChar* tzCanonicalID = ZoneMeta::getCanonicalCLDRID(tz); if (tzCanonicalID != NULL) { - getGenericLocationName(UnicodeString(tzCanonicalID), name); + getGenericLocationName(UnicodeString(TRUE, tzCanonicalID, -1), name); } } break; @@ -477,7 +477,7 @@ TZGNCore::getDisplayName(const TimeZone& tz, UTimeZoneGenericNameType type, UDat if (name.isEmpty()) { const UChar* tzCanonicalID = ZoneMeta::getCanonicalCLDRID(tz); if (tzCanonicalID != NULL) { - getGenericLocationName(UnicodeString(tzCanonicalID), name); + getGenericLocationName(UnicodeString(TRUE, tzCanonicalID, -1), name); } } break; @@ -620,7 +620,7 @@ TZGNCore::formatGenericNonLocationName(const TimeZone& tz, UTimeZoneGenericNameT return name; } - UnicodeString tzID(uID); + UnicodeString tzID(TRUE, uID, -1); // Try to get a name from time zone first UTimeZoneNameType nameType = (type == UTZGNM_LONG) ? UTZNM_LONG_GENERIC : UTZNM_SHORT_GENERIC; @@ -631,12 +631,14 @@ TZGNCore::formatGenericNonLocationName(const TimeZone& tz, UTimeZoneGenericNameT } // Try meta zone - UnicodeString mzID; + UChar mzIDBuf[32]; + UnicodeString mzID(mzIDBuf, 0, UPRV_LENGTHOF(mzIDBuf)); fTimeZoneNames->getMetaZoneID(tzID, date, mzID); if (!mzID.isEmpty()) { UErrorCode status = U_ZERO_ERROR; UBool useStandard = FALSE; int32_t raw, sav; + UChar tmpNameBuf[64]; tz.getOffset(date, FALSE, raw, sav, status); if (U_FAILURE(status)) { @@ -694,7 +696,7 @@ TZGNCore::formatGenericNonLocationName(const TimeZone& tz, UTimeZoneGenericNameT if (useStandard) { UTimeZoneNameType stdNameType = (nameType == UTZNM_LONG_GENERIC) ? UTZNM_LONG_STANDARD : UTZNM_SHORT_STANDARD; - UnicodeString stdName; + UnicodeString stdName(tmpNameBuf, 0, UPRV_LENGTHOF(tmpNameBuf)); fTimeZoneNames->getDisplayName(tzID, stdNameType, date, stdName); if (!stdName.isEmpty()) { name.setTo(stdName); @@ -704,7 +706,8 @@ TZGNCore::formatGenericNonLocationName(const TimeZone& tz, UTimeZoneGenericNameT // for some meta zones in some locales. This looks like a data bugs. // For now, we check if the standard name is different from its generic // name below. - UnicodeString mzGenericName; + UChar genNameBuf[64]; + UnicodeString mzGenericName(genNameBuf, 0, UPRV_LENGTHOF(genNameBuf)); fTimeZoneNames->getMetaZoneDisplayName(mzID, nameType, mzGenericName); if (stdName.caseCompare(mzGenericName, 0) == 0) { name.setToBogus(); @@ -713,13 +716,14 @@ TZGNCore::formatGenericNonLocationName(const TimeZone& tz, UTimeZoneGenericNameT } if (name.isEmpty()) { // Get a name from meta zone - UnicodeString mzName; + UnicodeString mzName(tmpNameBuf, 0, UPRV_LENGTHOF(tmpNameBuf)); fTimeZoneNames->getMetaZoneDisplayName(mzID, nameType, mzName); if (!mzName.isEmpty()) { // Check if we need to use a partial location format. // This check is done by comparing offset with the meta zone's // golden zone at the given date. - UnicodeString goldenID; + UChar idBuf[32]; + UnicodeString goldenID(idBuf, 0, UPRV_LENGTHOF(idBuf)); fTimeZoneNames->getReferenceZoneID(mzID, fTargetRegion, goldenID); if (!goldenID.isEmpty() && goldenID != tzID) { TimeZone *goldenZone = TimeZone::createTimeZone(goldenID); diff --git a/icu4c/source/i18n/tznames.cpp b/icu4c/source/i18n/tznames.cpp index 13f8c4677ae..0ec4a6adfd9 100644 --- a/icu4c/source/i18n/tznames.cpp +++ b/icu4c/source/i18n/tznames.cpp @@ -1,6 +1,6 @@ /* ******************************************************************************* -* Copyright (C) 2011-2014, International Business Machines Corporation and * +* Copyright (C) 2011-2015, International Business Machines Corporation and * * others. All Rights Reserved. * ******************************************************************************* */ @@ -324,7 +324,8 @@ UnicodeString& TimeZoneNames::getDisplayName(const UnicodeString& tzID, UTimeZoneNameType type, UDate date, UnicodeString& name) const { getTimeZoneDisplayName(tzID, type, name); if (name.isEmpty()) { - UnicodeString mzID; + UChar mzIDBuf[32]; + UnicodeString mzID(mzIDBuf, 0, UPRV_LENGTHOF(mzIDBuf)); getMetaZoneID(tzID, date, mzID); getMetaZoneDisplayName(mzID, type, name); } diff --git a/icu4c/source/test/cintltst/cstrcase.c b/icu4c/source/test/cintltst/cstrcase.c index 4f6463012a3..5f7a6e3114f 100644 --- a/icu4c/source/test/cintltst/cstrcase.c +++ b/icu4c/source/test/cintltst/cstrcase.c @@ -1,7 +1,7 @@ /* ******************************************************************************* * -* Copyright (C) 2002-2014, International Business Machines +* Copyright (C) 2002-2015, International Business Machines * Corporation and others. All Rights Reserved. * ******************************************************************************* @@ -25,6 +25,7 @@ #include "unicode/ucasemap.h" #include "cmemory.h" #include "cintltst.h" +#include "ustr_imp.h" /* test string case mapping functions --------------------------------------- */ @@ -989,6 +990,49 @@ TestUCaseMapToTitle(void) { #endif +/* Test case for internal API u_caseInsensitivePrefixMatch */ +static void +TestUCaseInsensitivePrefixMatch(void) { + struct { + const char *s1; + const char *s2; + int32_t r1; + int32_t r2; + } testCases[] = { + {"ABC", "ab", 2, 2}, + {"ABCD", "abcx", 3, 3}, + {"ABC", "xyz", 0, 0}, + /* U+00DF LATIN SMALL LETTER SHARP S */ + {"A\\u00dfBC", "Ass", 2, 3}, + {"Fust", "Fu\\u00dfball", 2, 2}, + {"\\u00dfsA", "s\\u00dfB", 2, 2}, + {"\\u00dfs", "s\\u00df", 2, 2}, + /* U+0130 LATIN CAPITAL LETTER I WITH DOT ABOVE */ + {"XYZ\\u0130i\\u0307xxx", "xyzi\\u0307\\u0130yyy", 6, 6}, + {0, 0, 0, 0} + }; + int32_t i; + + for (i = 0; testCases[i].s1 != 0; i++) { + UErrorCode sts = U_ZERO_ERROR; + UChar u1[64], u2[64]; + int32_t matchLen1, matchLen2; + + u_unescape(testCases[i].s1, u1, 64); + u_unescape(testCases[i].s2, u2, 64); + + u_caseInsensitivePrefixMatch(u1, -1, u2, -1, 0, &matchLen1, &matchLen2, &sts); + if (U_FAILURE(sts)) { + log_err("error: %s, s1=%s, s2=%s", u_errorName(sts), testCases[i].s1, testCases[i].s2); + } else if (matchLen1 != testCases[i].r1 || matchLen2 != testCases[i].r2) { + log_err("s1=%s, s2=%2 / match len1=%d, len2=%d / expected len1=%d, len2=%d", + testCases[i].s1, testCases[i].s2, + matchLen1, matchLen2, + testCases[i].r1, testCases[i].r2); + } + } +} + void addCaseTest(TestNode** root); void addCaseTest(TestNode** root) { @@ -1005,4 +1049,5 @@ void addCaseTest(TestNode** root) { #if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILE_IO addTest(root, &TestUCaseMapToTitle, "tsutil/cstrcase/TestUCaseMapToTitle"); #endif + addTest(root, &TestUCaseInsensitivePrefixMatch, "tsutil/cstrcase/TestUCaseInsensitivePrefixMatch"); }