From 9f29c3ed251fcafd8fe2cf31f92f232c456908c8 Mon Sep 17 00:00:00 2001 From: Peter Edberg Date: Mon, 9 Dec 2013 06:20:23 +0000 Subject: [PATCH] ICU-10553 Implement context-sensitive capitalization for relative dates (C), add en contextTransforms for test X-SVN-Rev: 34718 --- icu4c/source/data/locales/en.txt | 14 +++ icu4c/source/i18n/reldtfmt.cpp | 100 ++++++++++++++- icu4c/source/i18n/reldtfmt.h | 29 ++++- icu4c/source/i18n/udat.cpp | 19 ++- icu4c/source/test/cintltst/cdattst.c | 129 ++++++++++++++++---- icu4c/source/test/testdata/structLocale.txt | 10 +- 6 files changed, 267 insertions(+), 34 deletions(-) diff --git a/icu4c/source/data/locales/en.txt b/icu4c/source/data/locales/en.txt index 74ce046981e..8505b673d50 100644 --- a/icu4c/source/data/locales/en.txt +++ b/icu4c/source/data/locales/en.txt @@ -858,6 +858,20 @@ en{ } } } + contextTransforms{ + calendar-field:intvector{ + 1, + 1, + } + tense:intvector{ + 1, + 1, + } + type:intvector{ + 1, + 1, + } + } delimiters{ alternateQuotationEnd{"’"} alternateQuotationStart{"‘"} diff --git a/icu4c/source/i18n/reldtfmt.cpp b/icu4c/source/i18n/reldtfmt.cpp index 723b0c9a0e8..630534600e6 100644 --- a/icu4c/source/i18n/reldtfmt.cpp +++ b/icu4c/source/i18n/reldtfmt.cpp @@ -15,6 +15,8 @@ #include "unicode/datefmt.h" #include "unicode/smpdtfmt.h" #include "unicode/msgfmt.h" +#include "unicode/udisplaycontext.h" +#include "unicode/uchar.h" #include "gregoimp.h" // for CalendarData #include "cmemory.h" @@ -59,7 +61,8 @@ RelativeDateFormat::RelativeDateFormat(const RelativeDateFormat& other) : RelativeDateFormat::RelativeDateFormat( UDateFormatStyle timeStyle, UDateFormatStyle dateStyle, const Locale& locale, UErrorCode& status) : DateFormat(), fDateTimeFormatter(NULL), fDatePattern(), fTimePattern(), fCombinedFormat(NULL), - fDateStyle(dateStyle), fLocale(locale), fDatesLen(0), fDates(NULL) + fDateStyle(dateStyle), fLocale(locale), fDatesLen(0), fDates(NULL), + fCapitalizationContext(UDISPCTX_CAPITALIZATION_NONE), fCombinedHasDateAtStart(FALSE) { if(U_FAILURE(status) ) { return; @@ -124,7 +127,8 @@ UBool RelativeDateFormat::operator==(const Format& other) const { return (fDateStyle==that->fDateStyle && fDatePattern==that->fDatePattern && fTimePattern==that->fTimePattern && - fLocale==that->fLocale); + fLocale==that->fLocale && + fCapitalizationContext==that->fCapitalizationContext); } return FALSE; } @@ -148,7 +152,43 @@ UnicodeString& RelativeDateFormat::format( Calendar& cal, // found a relative string relativeDayString.setTo(theString, len); } - + + if ( relativeDayString.length() > 0 && !fDatePattern.isEmpty() && + (fTimePattern.isEmpty() || fCombinedFormat == NULL || fCombinedHasDateAtStart)) { + // capitalize relativeDayString according to context for tense, set formatter no context + if ( fCapitalizationContext==UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE || + (fCapitalizationContext==UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU && fCapitalizationForRelativeUnits[0]) || + (fCapitalizationContext==UDISPCTX_CAPITALIZATION_FOR_STANDALONE && fCapitalizationForRelativeUnits[1]) ) { + // titlecase first word of relativeDayString, do like LocaleDisplayNamesImpl::adjustForUsageAndContext + int32_t stopPos, stopPosLimit = 8; + if ( stopPosLimit > len ) { + stopPosLimit = len; + } + for ( stopPos = 0; stopPos < stopPosLimit; stopPos++ ) { + UChar32 ch = relativeDayString.char32At(stopPos); + int32_t wb = u_getIntPropertyValue(ch, UCHAR_WORD_BREAK); + if (!(u_islower(ch) || wb==U_WB_EXTEND || wb==U_WB_SINGLE_QUOTE || wb==U_WB_MIDNUMLET || wb==U_WB_MIDLETTER)) { + break; + } + if (ch >= 0x10000) { + stopPos++; + } + } + if ( stopPos > 0 && stopPos < len ) { + UnicodeString firstWord(relativeDayString, 0, stopPos); + firstWord.toTitle(NULL, fLocale, U_TITLECASE_NO_LOWERCASE | U_TITLECASE_NO_BREAK_ADJUSTMENT); + relativeDayString.replaceBetween(0, stopPos, firstWord); + } else { + // no stopPos, titlecase the whole text + relativeDayString.toTitle(NULL, fLocale, U_TITLECASE_NO_LOWERCASE | U_TITLECASE_NO_BREAK_ADJUSTMENT); + } + } + fDateTimeFormatter->setContext(UDISPCTX_CAPITALIZATION_NONE, status); + } else { + // set our context for the formatter + fDateTimeFormatter->setContext(fCapitalizationContext, status); + } + if (fDatePattern.isEmpty()) { fDateTimeFormatter->applyPattern(fTimePattern); fDateTimeFormatter->format(cal,appendTo,pos); @@ -384,6 +424,9 @@ RelativeDateFormat::getDateFormatSymbols() const return fDateTimeFormatter->getDateFormatSymbols(); } +static const UChar patItem1[] = {0x7B,0x31,0x7D}; // "{1}" +static const int32_t patItem1Len = 3; + void RelativeDateFormat::loadDates(UErrorCode &status) { CalendarData calData(fLocale, "gregorian", status); @@ -420,15 +463,31 @@ void RelativeDateFormat::loadDates(UErrorCode &status) { } const UChar *resStr = ures_getStringByIndex(dateTimePatterns, glueIndex, &resStrLen, &tempStatus); + if (U_SUCCESS(tempStatus) && resStrLen >= patItem1Len && u_strncmp(resStr,patItem1,patItem1Len)==0) { + fCombinedHasDateAtStart = TRUE; + } fCombinedFormat = new MessageFormat(UnicodeString(TRUE, resStr, resStrLen), fLocale, tempStatus); } } - UResourceBundle *rb = ures_open(NULL, fLocale.getBaseName(), &status); - UResourceBundle *sb = ures_getByKeyWithFallback(rb, "fields", NULL, &status); + fCapitalizationForRelativeUnits[0] = fCapitalizationForRelativeUnits[1] = FALSE; + UResourceBundle *lb = ures_open(NULL, fLocale.getBaseName(), &status); + tempStatus = status; + UResourceBundle *rb = ures_getByKeyWithFallback(lb, "contextTransforms", NULL, &tempStatus); + UResourceBundle *sb = ures_getByKeyWithFallback(rb, "tense", NULL, &tempStatus); + if (U_SUCCESS(tempStatus) && sb != NULL) { + int32_t len = 0; + const int32_t * intVector = ures_getIntVector(sb, &len, &tempStatus); + if (U_SUCCESS(tempStatus) && intVector != NULL && len >= 2) { + fCapitalizationForRelativeUnits[0] = intVector[0]; + fCapitalizationForRelativeUnits[1] = intVector[1]; + } + } + sb = ures_getByKeyWithFallback(lb, "fields", sb, &status); rb = ures_getByKeyWithFallback(sb, "day", rb, &status); sb = ures_getByKeyWithFallback(rb, "relative", sb, &status); ures_close(rb); + ures_close(lb); // set up min/max fDayMin=-1; fDayMax=1; @@ -485,6 +544,37 @@ void RelativeDateFormat::loadDates(UErrorCode &status) { // the fDates[] array could be sorted here, for direct access. } +//---------------------------------------------------------------------- + + +void RelativeDateFormat::setContext(UDisplayContext value, UErrorCode& status) +{ + if (U_FAILURE(status)) + return; + if ( (UDisplayContextType)((uint32_t)value >> 8) == UDISPCTX_TYPE_CAPITALIZATION ) { + fCapitalizationContext = value; + } else { + status = U_ILLEGAL_ARGUMENT_ERROR; + } +} + + +//---------------------------------------------------------------------- + + +UDisplayContext RelativeDateFormat::getContext(UDisplayContextType type, UErrorCode& status) const +{ + if (U_FAILURE(status)) + return (UDisplayContext)0; + if (type != UDISPCTX_TYPE_CAPITALIZATION) { + status = U_ILLEGAL_ARGUMENT_ERROR; + return (UDisplayContext)0; + } + return fCapitalizationContext; +} + + +//---------------------------------------------------------------------- // this should to be in DateFormat, instead it was copied from SimpleDateFormat. diff --git a/icu4c/source/i18n/reldtfmt.h b/icu4c/source/i18n/reldtfmt.h index 1206ea3add0..b129a6e2ff0 100644 --- a/icu4c/source/i18n/reldtfmt.h +++ b/icu4c/source/i18n/reldtfmt.h @@ -1,6 +1,6 @@ /* ******************************************************************************* -* Copyright (C) 2007-2012, International Business Machines Corporation and * +* Copyright (C) 2007-2013, International Business Machines Corporation and * * others. All Rights Reserved. * ******************************************************************************* */ @@ -19,6 +19,7 @@ #include "unicode/datefmt.h" #include "unicode/smpdtfmt.h" +#include "unicode/udisplaycontext.h" U_NAMESPACE_BEGIN @@ -232,6 +233,29 @@ public: */ virtual const DateFormatSymbols* getDateFormatSymbols(void) const; + /** + * Set a particular UDisplayContext value in the formatter, such as + * UDISPCTX_CAPITALIZATION_FOR_STANDALONE. + * @param value The UDisplayContext value to set. + * @param status Input/output status. If at entry this indicates a failure + * status, the function will do nothing; otherwise this will be + * updated with any new status from the function. + * @internal + */ + virtual void setContext(UDisplayContext value, UErrorCode& status); + + /** + * Get the formatter's UDisplayContext value for the specified UDisplayContextType, + * such as UDISPCTX_TYPE_CAPITALIZATION. + * @param type The UDisplayContextType whose value to return + * @param status Input/output status. If at entry this indicates a failure + * status, the function will do nothing; otherwise this will be + * updated with any new status from the function. + * @return The UDisplayContextValue for the specified type. + * @internal + */ + virtual UDisplayContext getContext(UDisplayContextType type, UErrorCode& status) const; + private: SimpleDateFormat *fDateTimeFormatter; @@ -247,6 +271,9 @@ private: int32_t fDatesLen; // Length of array URelativeString *fDates; // array of strings + UDisplayContext fCapitalizationContext; + UBool fCapitalizationForRelativeUnits[2]; + UBool fCombinedHasDateAtStart; /** * Get the string at a specific offset. diff --git a/icu4c/source/i18n/udat.cpp b/icu4c/source/i18n/udat.cpp index 1af14507de4..dcbed7e0994 100644 --- a/icu4c/source/i18n/udat.cpp +++ b/icu4c/source/i18n/udat.cpp @@ -984,21 +984,32 @@ udat_getLocaleByType(const UDateFormat *fmt, U_CAPI void U_EXPORT2 udat_setContext(UDateFormat* fmt, UDisplayContext value, UErrorCode* status) { - verifyIsSimpleDateFormat(fmt, status); if (U_FAILURE(*status)) { return; } - ((SimpleDateFormat*)fmt)->setContext(value, *status); + if (dynamic_cast(reinterpret_cast(fmt))!=NULL) { + ((SimpleDateFormat*)fmt)->setContext(value, *status); + } else if (dynamic_cast(reinterpret_cast(fmt))!=NULL) { + ((RelativeDateFormat*)fmt)->setContext(value, *status); + } else { + *status = U_ILLEGAL_ARGUMENT_ERROR; + } + return; } U_CAPI UDisplayContext U_EXPORT2 udat_getContext(UDateFormat* fmt, UDisplayContextType type, UErrorCode* status) { - verifyIsSimpleDateFormat(fmt, status); if (U_FAILURE(*status)) { return (UDisplayContext)0; } - return ((SimpleDateFormat*)fmt)->getContext(type, *status); + if (dynamic_cast(reinterpret_cast(fmt))!=NULL) { + return ((SimpleDateFormat*)fmt)->getContext(type, *status); + } else if (dynamic_cast(reinterpret_cast(fmt))!=NULL) { + return ((RelativeDateFormat*)fmt)->getContext(type, *status); + } + *status = U_ILLEGAL_ARGUMENT_ERROR; + return (UDisplayContext)0; } diff --git a/icu4c/source/test/cintltst/cdattst.c b/icu4c/source/test/cintltst/cdattst.c index 0b10f5e063d..baa1597c32c 100644 --- a/icu4c/source/test/cintltst/cdattst.c +++ b/icu4c/source/test/cintltst/cdattst.c @@ -1390,32 +1390,58 @@ static const TestContextItem textContextItems[] = { { NULL, NULL, (UDisplayContext)0, NULL } }; -static const UDate july022008 = 1215000001979.0; +static const UChar today_enDefault[] = { 0x74,0x6F,0x64,0x61,0x79,0 }; /* "today" */ +static const UChar today_enTitle[] = { 0x54,0x6F,0x64,0x61,0x79,0 }; /* "Today" sentence-begin, uiListOrMenu, standalone */ +static const UChar yesterday_enDefault[] = { 0x79,0x65,0x73,0x74,0x65,0x72,0x64,0x61,0x79,0 }; /* "yesterday" */ +static const UChar yesterday_enTitle[] = { 0x59,0x65,0x73,0x74,0x65,0x72,0x64,0x61,0x79,0 }; /* "Yesterday" sentence-begin, uiListOrMenu, standalone */ +static const UChar today_nbDefault[] = { 0x69,0x20,0x64,0x61,0x67,0 }; /* "i dag" */ +static const UChar today_nbTitle[] = { 0x49,0x20,0x64,0x61,0x67,0 }; /* "I dag" sentence-begin, standalone */ +static const UChar yesterday_nbDefault[] = { 0x69,0x20,0x67,0xE5,0x72,0 }; /* "i går" */ +static const UChar yesterday_nbTitle[] = { 0x49,0x20,0x67,0xE5,0x72,0 }; /* "I går" sentence-begin, standalone */ + +typedef struct { + const char * locale; + UDisplayContext capitalizationContext; + const UChar * expectedFormatToday; + const UChar * expectedFormatYesterday; +} TestRelativeContextItem; + +static const TestRelativeContextItem textContextRelativeItems[] = { + { "en", UDISPCTX_CAPITALIZATION_NONE, today_enDefault, yesterday_enDefault }, +#if !UCONFIG_NO_BREAK_ITERATION + { "en", UDISPCTX_CAPITALIZATION_FOR_MIDDLE_OF_SENTENCE, today_enDefault, yesterday_enDefault }, + { "en", UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE, today_enTitle, yesterday_enTitle }, + { "en", UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU, today_enTitle, yesterday_enTitle }, + { "en", UDISPCTX_CAPITALIZATION_FOR_STANDALONE, today_enTitle, yesterday_enTitle }, +#endif + { "nb", UDISPCTX_CAPITALIZATION_NONE, today_nbDefault, yesterday_nbDefault }, +#if !UCONFIG_NO_BREAK_ITERATION + { "nb", UDISPCTX_CAPITALIZATION_FOR_MIDDLE_OF_SENTENCE, today_nbDefault, yesterday_nbDefault }, + { "nb", UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE, today_nbTitle, yesterday_nbTitle }, + { "nb", UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU, today_nbDefault, yesterday_nbDefault }, + { "nb", UDISPCTX_CAPITALIZATION_FOR_STANDALONE, today_nbTitle, yesterday_nbTitle }, +#endif + { NULL, (UDisplayContext)0, NULL, NULL } +}; + +static const UChar zoneGMT[] = { 0x47,0x4D,0x54,0 }; /* "GMT" */ +static const UDate july022008 = 1215000000000.0; enum { kUbufMax = 64, kBbufMax = 3*kUbufMax }; static void TestContext(void) { - const TestContextItem* textContextItemPtr = textContextItems; - for (; textContextItemPtr->locale != NULL; ++textContextItemPtr) { + const TestContextItem* textContextItemPtr; + const TestRelativeContextItem* textRelContextItemPtr; + for (textContextItemPtr = textContextItems; textContextItemPtr->locale != NULL; ++textContextItemPtr) { UErrorCode status = U_ZERO_ERROR; - UDateFormat* udfmt = udat_open(UDAT_NONE, UDAT_MEDIUM, textContextItemPtr->locale, NULL, 0, NULL, 0, &status); - if ( U_FAILURE(status) ) { - log_data_err("FAIL: udat_open for locale %s, status %s\n", textContextItemPtr->locale, u_errorName(status) ); - } else { - UDateTimePatternGenerator* udtpg = udatpg_open(textContextItemPtr->locale, &status); - if ( U_FAILURE(status) ) { - log_err("FAIL: udatpg_open for locale %s, status %s\n", textContextItemPtr->locale, u_errorName(status) ); - } else { - UChar ubuf[kUbufMax]; - int32_t len = udatpg_getBestPattern(udtpg, textContextItemPtr->skeleton, -1, ubuf, kUbufMax, &status); - if ( U_FAILURE(status) ) { - log_err("FAIL: udatpg_getBestPattern for locale %s, status %s\n", textContextItemPtr->locale, u_errorName(status) ); - } else { - udat_applyPattern(udfmt, FALSE, ubuf, len); + UDateTimePatternGenerator* udtpg = udatpg_open(textContextItemPtr->locale, &status); + if ( U_SUCCESS(status) ) { + UChar ubuf[kUbufMax]; + int32_t len = udatpg_getBestPattern(udtpg, textContextItemPtr->skeleton, -1, ubuf, kUbufMax, &status); + if ( U_SUCCESS(status) ) { + UDateFormat* udfmt = udat_open(UDAT_PATTERN, UDAT_PATTERN, textContextItemPtr->locale, zoneGMT, -1, ubuf, len, &status); + if ( U_SUCCESS(status) ) { udat_setContext(udfmt, textContextItemPtr->capitalizationContext, &status); - if ( U_FAILURE(status) ) { - log_err("FAIL: udat_setContext for locale %s, capitalizationContext %d, status %s\n", - textContextItemPtr->locale, (int)textContextItemPtr->capitalizationContext, u_errorName(status) ); - } else { + if ( U_SUCCESS(status) ) { UDisplayContext getContext; len = udat_format(udfmt, july022008, ubuf, kUbufMax, NULL, &status); if ( U_FAILURE(status) ) { @@ -1437,11 +1463,68 @@ static void TestContext(void) { log_err("FAIL: udat_getContext for locale %s, capitalizationContext %d, got context %d\n", textContextItemPtr->locale, (int)textContextItemPtr->capitalizationContext, (int)getContext ); } + } else { + log_err("FAIL: udat_setContext for locale %s, capitalizationContext %d, status %s\n", + textContextItemPtr->locale, (int)textContextItemPtr->capitalizationContext, u_errorName(status) ); } + udat_close(udfmt); + } else { + log_data_err("FAIL: udat_open for locale %s, status %s\n", textContextItemPtr->locale, u_errorName(status) ); } - udatpg_close(udtpg); + } else { + log_err("FAIL: udatpg_getBestPattern for locale %s, status %s\n", textContextItemPtr->locale, u_errorName(status) ); } - udat_close(udfmt); + udatpg_close(udtpg); + } else { + log_err("FAIL: udatpg_open for locale %s, status %s\n", textContextItemPtr->locale, u_errorName(status) ); + } + } + for (textRelContextItemPtr = textContextRelativeItems; textRelContextItemPtr->locale != NULL; ++textRelContextItemPtr) { + UErrorCode status = U_ZERO_ERROR; + UCalendar* ucal = ucal_open(zoneGMT, -1, "root", UCAL_GREGORIAN, &status); + if ( U_SUCCESS(status) ) { + UDateFormat* udfmt = udat_open(UDAT_NONE, UDAT_LONG_RELATIVE, textRelContextItemPtr->locale, zoneGMT, -1, NULL, 0, &status); + if ( U_SUCCESS(status) ) { + udat_setContext(udfmt, textRelContextItemPtr->capitalizationContext, &status); + if ( U_SUCCESS(status) ) { + UDate yesterday, today = ucal_getNow(); + UChar ubuf[kUbufMax]; + char bbuf1[kBbufMax]; + char bbuf2[kBbufMax]; + int32_t len = udat_format(udfmt, today, ubuf, kUbufMax, NULL, &status); + if ( U_FAILURE(status) ) { + log_err("FAIL: udat_format today for locale %s, capitalizationContext %d, status %s\n", + textRelContextItemPtr->locale, (int)textRelContextItemPtr->capitalizationContext, u_errorName(status) ); + } else if (u_strncmp(ubuf, textRelContextItemPtr->expectedFormatToday, kUbufMax) != 0) { + log_err("FAIL: udat_format today for locale %s, capitalizationContext %d, expected %s, got %s\n", + textRelContextItemPtr->locale, (int)textRelContextItemPtr->capitalizationContext, + u_austrncpy(bbuf1,textRelContextItemPtr->expectedFormatToday,kUbufMax), u_austrncpy(bbuf2,ubuf,kUbufMax) ); + } + status = U_ZERO_ERROR; + ucal_setMillis(ucal, today, &status); + ucal_add(ucal, UCAL_DATE, -1, &status); + yesterday = ucal_getMillis(ucal, &status); + if ( U_SUCCESS(status) ) { + len = udat_format(udfmt, yesterday, ubuf, kUbufMax, NULL, &status); + if ( U_FAILURE(status) ) { + log_err("FAIL: udat_format yesterday for locale %s, capitalizationContext %d, status %s\n", + textRelContextItemPtr->locale, (int)textRelContextItemPtr->capitalizationContext, u_errorName(status) ); + } else if (u_strncmp(ubuf, textRelContextItemPtr->expectedFormatYesterday, kUbufMax) != 0) { + log_err("FAIL: udat_format yesterday for locale %s, capitalizationContext %d, expected %s, got %s\n", + textRelContextItemPtr->locale, (int)textRelContextItemPtr->capitalizationContext, + u_austrncpy(bbuf1,textRelContextItemPtr->expectedFormatYesterday,kUbufMax), u_austrncpy(bbuf2,ubuf,kUbufMax) ); + } + } + } else { + log_err("FAIL: udat_setContext relative for locale %s, capitalizationContext %d, status %s\n", + textRelContextItemPtr->locale, (int)textRelContextItemPtr->capitalizationContext, u_errorName(status) ); + } + } else { + log_data_err("FAIL: udat_open relative for locale %s, status %s\n", textRelContextItemPtr->locale, u_errorName(status) ); + } + ucal_close(ucal); + } else { + log_data_err("FAIL: ucal_open for locale root, status %s\n", u_errorName(status) ); } } } diff --git a/icu4c/source/test/testdata/structLocale.txt b/icu4c/source/test/testdata/structLocale.txt index 50c572d731e..c0eb1eead2e 100644 --- a/icu4c/source/test/testdata/structLocale.txt +++ b/icu4c/source/test/testdata/structLocale.txt @@ -3876,6 +3876,10 @@ structLocale:table(nofallback){ } defaultNumberingSystem{""} contextTransforms{ + calendar-field:intvector{ + 2, + 2, + } day-format-except-narrow:intvector{ 2, 2, @@ -3920,6 +3924,10 @@ structLocale:table(nofallback){ 2, 2, } + unit-pattern:intvector{ + 2, + 2, + } } delimiters{ quotationStart{""} @@ -13279,7 +13287,7 @@ structLocale:table(nofallback){ "", } } - intervalFormats{ + intervalFormats{ H{ H{""} }