ICU-10553 Implement context-sensitive capitalization for relative dates (C), add en contextTransforms for test

X-SVN-Rev: 34718
This commit is contained in:
Peter Edberg 2013-12-09 06:20:23 +00:00
parent 65ecb0b5fa
commit 9f29c3ed25
6 changed files with 267 additions and 34 deletions

View file

@ -858,6 +858,20 @@ en{
}
}
}
contextTransforms{
calendar-field:intvector{
1,
1,
}
tense:intvector{
1,
1,
}
type:intvector{
1,
1,
}
}
delimiters{
alternateQuotationEnd{""}
alternateQuotationStart{""}

View file

@ -15,6 +15,8 @@
#include "unicode/datefmt.h"
#include "unicode/smpdtfmt.h"
#include "unicode/msgfmt.h"
#include "unicode/udisplaycontext.h"
#include "unicode/uchar.h"
#include "gregoimp.h" // for CalendarData
#include "cmemory.h"
@ -59,7 +61,8 @@ RelativeDateFormat::RelativeDateFormat(const RelativeDateFormat& other) :
RelativeDateFormat::RelativeDateFormat( UDateFormatStyle timeStyle, UDateFormatStyle dateStyle,
const Locale& locale, UErrorCode& status) :
DateFormat(), fDateTimeFormatter(NULL), fDatePattern(), fTimePattern(), fCombinedFormat(NULL),
fDateStyle(dateStyle), fLocale(locale), fDatesLen(0), fDates(NULL)
fDateStyle(dateStyle), fLocale(locale), fDatesLen(0), fDates(NULL),
fCapitalizationContext(UDISPCTX_CAPITALIZATION_NONE), fCombinedHasDateAtStart(FALSE)
{
if(U_FAILURE(status) ) {
return;
@ -124,7 +127,8 @@ UBool RelativeDateFormat::operator==(const Format& other) const {
return (fDateStyle==that->fDateStyle &&
fDatePattern==that->fDatePattern &&
fTimePattern==that->fTimePattern &&
fLocale==that->fLocale);
fLocale==that->fLocale &&
fCapitalizationContext==that->fCapitalizationContext);
}
return FALSE;
}
@ -148,7 +152,43 @@ UnicodeString& RelativeDateFormat::format( Calendar& cal,
// found a relative string
relativeDayString.setTo(theString, len);
}
if ( relativeDayString.length() > 0 && !fDatePattern.isEmpty() &&
(fTimePattern.isEmpty() || fCombinedFormat == NULL || fCombinedHasDateAtStart)) {
// capitalize relativeDayString according to context for tense, set formatter no context
if ( fCapitalizationContext==UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE ||
(fCapitalizationContext==UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU && fCapitalizationForRelativeUnits[0]) ||
(fCapitalizationContext==UDISPCTX_CAPITALIZATION_FOR_STANDALONE && fCapitalizationForRelativeUnits[1]) ) {
// titlecase first word of relativeDayString, do like LocaleDisplayNamesImpl::adjustForUsageAndContext
int32_t stopPos, stopPosLimit = 8;
if ( stopPosLimit > len ) {
stopPosLimit = len;
}
for ( stopPos = 0; stopPos < stopPosLimit; stopPos++ ) {
UChar32 ch = relativeDayString.char32At(stopPos);
int32_t wb = u_getIntPropertyValue(ch, UCHAR_WORD_BREAK);
if (!(u_islower(ch) || wb==U_WB_EXTEND || wb==U_WB_SINGLE_QUOTE || wb==U_WB_MIDNUMLET || wb==U_WB_MIDLETTER)) {
break;
}
if (ch >= 0x10000) {
stopPos++;
}
}
if ( stopPos > 0 && stopPos < len ) {
UnicodeString firstWord(relativeDayString, 0, stopPos);
firstWord.toTitle(NULL, fLocale, U_TITLECASE_NO_LOWERCASE | U_TITLECASE_NO_BREAK_ADJUSTMENT);
relativeDayString.replaceBetween(0, stopPos, firstWord);
} else {
// no stopPos, titlecase the whole text
relativeDayString.toTitle(NULL, fLocale, U_TITLECASE_NO_LOWERCASE | U_TITLECASE_NO_BREAK_ADJUSTMENT);
}
}
fDateTimeFormatter->setContext(UDISPCTX_CAPITALIZATION_NONE, status);
} else {
// set our context for the formatter
fDateTimeFormatter->setContext(fCapitalizationContext, status);
}
if (fDatePattern.isEmpty()) {
fDateTimeFormatter->applyPattern(fTimePattern);
fDateTimeFormatter->format(cal,appendTo,pos);
@ -384,6 +424,9 @@ RelativeDateFormat::getDateFormatSymbols() const
return fDateTimeFormatter->getDateFormatSymbols();
}
static const UChar patItem1[] = {0x7B,0x31,0x7D}; // "{1}"
static const int32_t patItem1Len = 3;
void RelativeDateFormat::loadDates(UErrorCode &status) {
CalendarData calData(fLocale, "gregorian", status);
@ -420,15 +463,31 @@ void RelativeDateFormat::loadDates(UErrorCode &status) {
}
const UChar *resStr = ures_getStringByIndex(dateTimePatterns, glueIndex, &resStrLen, &tempStatus);
if (U_SUCCESS(tempStatus) && resStrLen >= patItem1Len && u_strncmp(resStr,patItem1,patItem1Len)==0) {
fCombinedHasDateAtStart = TRUE;
}
fCombinedFormat = new MessageFormat(UnicodeString(TRUE, resStr, resStrLen), fLocale, tempStatus);
}
}
UResourceBundle *rb = ures_open(NULL, fLocale.getBaseName(), &status);
UResourceBundle *sb = ures_getByKeyWithFallback(rb, "fields", NULL, &status);
fCapitalizationForRelativeUnits[0] = fCapitalizationForRelativeUnits[1] = FALSE;
UResourceBundle *lb = ures_open(NULL, fLocale.getBaseName(), &status);
tempStatus = status;
UResourceBundle *rb = ures_getByKeyWithFallback(lb, "contextTransforms", NULL, &tempStatus);
UResourceBundle *sb = ures_getByKeyWithFallback(rb, "tense", NULL, &tempStatus);
if (U_SUCCESS(tempStatus) && sb != NULL) {
int32_t len = 0;
const int32_t * intVector = ures_getIntVector(sb, &len, &tempStatus);
if (U_SUCCESS(tempStatus) && intVector != NULL && len >= 2) {
fCapitalizationForRelativeUnits[0] = intVector[0];
fCapitalizationForRelativeUnits[1] = intVector[1];
}
}
sb = ures_getByKeyWithFallback(lb, "fields", sb, &status);
rb = ures_getByKeyWithFallback(sb, "day", rb, &status);
sb = ures_getByKeyWithFallback(rb, "relative", sb, &status);
ures_close(rb);
ures_close(lb);
// set up min/max
fDayMin=-1;
fDayMax=1;
@ -485,6 +544,37 @@ void RelativeDateFormat::loadDates(UErrorCode &status) {
// the fDates[] array could be sorted here, for direct access.
}
//----------------------------------------------------------------------
void RelativeDateFormat::setContext(UDisplayContext value, UErrorCode& status)
{
if (U_FAILURE(status))
return;
if ( (UDisplayContextType)((uint32_t)value >> 8) == UDISPCTX_TYPE_CAPITALIZATION ) {
fCapitalizationContext = value;
} else {
status = U_ILLEGAL_ARGUMENT_ERROR;
}
}
//----------------------------------------------------------------------
UDisplayContext RelativeDateFormat::getContext(UDisplayContextType type, UErrorCode& status) const
{
if (U_FAILURE(status))
return (UDisplayContext)0;
if (type != UDISPCTX_TYPE_CAPITALIZATION) {
status = U_ILLEGAL_ARGUMENT_ERROR;
return (UDisplayContext)0;
}
return fCapitalizationContext;
}
//----------------------------------------------------------------------
// this should to be in DateFormat, instead it was copied from SimpleDateFormat.

View file

@ -1,6 +1,6 @@
/*
*******************************************************************************
* Copyright (C) 2007-2012, International Business Machines Corporation and *
* Copyright (C) 2007-2013, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*/
@ -19,6 +19,7 @@
#include "unicode/datefmt.h"
#include "unicode/smpdtfmt.h"
#include "unicode/udisplaycontext.h"
U_NAMESPACE_BEGIN
@ -232,6 +233,29 @@ public:
*/
virtual const DateFormatSymbols* getDateFormatSymbols(void) const;
/**
* Set a particular UDisplayContext value in the formatter, such as
* UDISPCTX_CAPITALIZATION_FOR_STANDALONE.
* @param value The UDisplayContext value to set.
* @param status Input/output status. If at entry this indicates a failure
* status, the function will do nothing; otherwise this will be
* updated with any new status from the function.
* @internal
*/
virtual void setContext(UDisplayContext value, UErrorCode& status);
/**
* Get the formatter's UDisplayContext value for the specified UDisplayContextType,
* such as UDISPCTX_TYPE_CAPITALIZATION.
* @param type The UDisplayContextType whose value to return
* @param status Input/output status. If at entry this indicates a failure
* status, the function will do nothing; otherwise this will be
* updated with any new status from the function.
* @return The UDisplayContextValue for the specified type.
* @internal
*/
virtual UDisplayContext getContext(UDisplayContextType type, UErrorCode& status) const;
private:
SimpleDateFormat *fDateTimeFormatter;
@ -247,6 +271,9 @@ private:
int32_t fDatesLen; // Length of array
URelativeString *fDates; // array of strings
UDisplayContext fCapitalizationContext;
UBool fCapitalizationForRelativeUnits[2];
UBool fCombinedHasDateAtStart;
/**
* Get the string at a specific offset.

View file

@ -984,21 +984,32 @@ udat_getLocaleByType(const UDateFormat *fmt,
U_CAPI void U_EXPORT2
udat_setContext(UDateFormat* fmt, UDisplayContext value, UErrorCode* status)
{
verifyIsSimpleDateFormat(fmt, status);
if (U_FAILURE(*status)) {
return;
}
((SimpleDateFormat*)fmt)->setContext(value, *status);
if (dynamic_cast<const SimpleDateFormat*>(reinterpret_cast<const DateFormat*>(fmt))!=NULL) {
((SimpleDateFormat*)fmt)->setContext(value, *status);
} else if (dynamic_cast<const RelativeDateFormat*>(reinterpret_cast<const DateFormat*>(fmt))!=NULL) {
((RelativeDateFormat*)fmt)->setContext(value, *status);
} else {
*status = U_ILLEGAL_ARGUMENT_ERROR;
}
return;
}
U_CAPI UDisplayContext U_EXPORT2
udat_getContext(UDateFormat* fmt, UDisplayContextType type, UErrorCode* status)
{
verifyIsSimpleDateFormat(fmt, status);
if (U_FAILURE(*status)) {
return (UDisplayContext)0;
}
return ((SimpleDateFormat*)fmt)->getContext(type, *status);
if (dynamic_cast<const SimpleDateFormat*>(reinterpret_cast<const DateFormat*>(fmt))!=NULL) {
return ((SimpleDateFormat*)fmt)->getContext(type, *status);
} else if (dynamic_cast<const RelativeDateFormat*>(reinterpret_cast<const DateFormat*>(fmt))!=NULL) {
return ((RelativeDateFormat*)fmt)->getContext(type, *status);
}
*status = U_ILLEGAL_ARGUMENT_ERROR;
return (UDisplayContext)0;
}

View file

@ -1390,32 +1390,58 @@ static const TestContextItem textContextItems[] = {
{ NULL, NULL, (UDisplayContext)0, NULL }
};
static const UDate july022008 = 1215000001979.0;
static const UChar today_enDefault[] = { 0x74,0x6F,0x64,0x61,0x79,0 }; /* "today" */
static const UChar today_enTitle[] = { 0x54,0x6F,0x64,0x61,0x79,0 }; /* "Today" sentence-begin, uiListOrMenu, standalone */
static const UChar yesterday_enDefault[] = { 0x79,0x65,0x73,0x74,0x65,0x72,0x64,0x61,0x79,0 }; /* "yesterday" */
static const UChar yesterday_enTitle[] = { 0x59,0x65,0x73,0x74,0x65,0x72,0x64,0x61,0x79,0 }; /* "Yesterday" sentence-begin, uiListOrMenu, standalone */
static const UChar today_nbDefault[] = { 0x69,0x20,0x64,0x61,0x67,0 }; /* "i dag" */
static const UChar today_nbTitle[] = { 0x49,0x20,0x64,0x61,0x67,0 }; /* "I dag" sentence-begin, standalone */
static const UChar yesterday_nbDefault[] = { 0x69,0x20,0x67,0xE5,0x72,0 }; /* "i går" */
static const UChar yesterday_nbTitle[] = { 0x49,0x20,0x67,0xE5,0x72,0 }; /* "I går" sentence-begin, standalone */
typedef struct {
const char * locale;
UDisplayContext capitalizationContext;
const UChar * expectedFormatToday;
const UChar * expectedFormatYesterday;
} TestRelativeContextItem;
static const TestRelativeContextItem textContextRelativeItems[] = {
{ "en", UDISPCTX_CAPITALIZATION_NONE, today_enDefault, yesterday_enDefault },
#if !UCONFIG_NO_BREAK_ITERATION
{ "en", UDISPCTX_CAPITALIZATION_FOR_MIDDLE_OF_SENTENCE, today_enDefault, yesterday_enDefault },
{ "en", UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE, today_enTitle, yesterday_enTitle },
{ "en", UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU, today_enTitle, yesterday_enTitle },
{ "en", UDISPCTX_CAPITALIZATION_FOR_STANDALONE, today_enTitle, yesterday_enTitle },
#endif
{ "nb", UDISPCTX_CAPITALIZATION_NONE, today_nbDefault, yesterday_nbDefault },
#if !UCONFIG_NO_BREAK_ITERATION
{ "nb", UDISPCTX_CAPITALIZATION_FOR_MIDDLE_OF_SENTENCE, today_nbDefault, yesterday_nbDefault },
{ "nb", UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE, today_nbTitle, yesterday_nbTitle },
{ "nb", UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU, today_nbDefault, yesterday_nbDefault },
{ "nb", UDISPCTX_CAPITALIZATION_FOR_STANDALONE, today_nbTitle, yesterday_nbTitle },
#endif
{ NULL, (UDisplayContext)0, NULL, NULL }
};
static const UChar zoneGMT[] = { 0x47,0x4D,0x54,0 }; /* "GMT" */
static const UDate july022008 = 1215000000000.0;
enum { kUbufMax = 64, kBbufMax = 3*kUbufMax };
static void TestContext(void) {
const TestContextItem* textContextItemPtr = textContextItems;
for (; textContextItemPtr->locale != NULL; ++textContextItemPtr) {
const TestContextItem* textContextItemPtr;
const TestRelativeContextItem* textRelContextItemPtr;
for (textContextItemPtr = textContextItems; textContextItemPtr->locale != NULL; ++textContextItemPtr) {
UErrorCode status = U_ZERO_ERROR;
UDateFormat* udfmt = udat_open(UDAT_NONE, UDAT_MEDIUM, textContextItemPtr->locale, NULL, 0, NULL, 0, &status);
if ( U_FAILURE(status) ) {
log_data_err("FAIL: udat_open for locale %s, status %s\n", textContextItemPtr->locale, u_errorName(status) );
} else {
UDateTimePatternGenerator* udtpg = udatpg_open(textContextItemPtr->locale, &status);
if ( U_FAILURE(status) ) {
log_err("FAIL: udatpg_open for locale %s, status %s\n", textContextItemPtr->locale, u_errorName(status) );
} else {
UChar ubuf[kUbufMax];
int32_t len = udatpg_getBestPattern(udtpg, textContextItemPtr->skeleton, -1, ubuf, kUbufMax, &status);
if ( U_FAILURE(status) ) {
log_err("FAIL: udatpg_getBestPattern for locale %s, status %s\n", textContextItemPtr->locale, u_errorName(status) );
} else {
udat_applyPattern(udfmt, FALSE, ubuf, len);
UDateTimePatternGenerator* udtpg = udatpg_open(textContextItemPtr->locale, &status);
if ( U_SUCCESS(status) ) {
UChar ubuf[kUbufMax];
int32_t len = udatpg_getBestPattern(udtpg, textContextItemPtr->skeleton, -1, ubuf, kUbufMax, &status);
if ( U_SUCCESS(status) ) {
UDateFormat* udfmt = udat_open(UDAT_PATTERN, UDAT_PATTERN, textContextItemPtr->locale, zoneGMT, -1, ubuf, len, &status);
if ( U_SUCCESS(status) ) {
udat_setContext(udfmt, textContextItemPtr->capitalizationContext, &status);
if ( U_FAILURE(status) ) {
log_err("FAIL: udat_setContext for locale %s, capitalizationContext %d, status %s\n",
textContextItemPtr->locale, (int)textContextItemPtr->capitalizationContext, u_errorName(status) );
} else {
if ( U_SUCCESS(status) ) {
UDisplayContext getContext;
len = udat_format(udfmt, july022008, ubuf, kUbufMax, NULL, &status);
if ( U_FAILURE(status) ) {
@ -1437,11 +1463,68 @@ static void TestContext(void) {
log_err("FAIL: udat_getContext for locale %s, capitalizationContext %d, got context %d\n",
textContextItemPtr->locale, (int)textContextItemPtr->capitalizationContext, (int)getContext );
}
} else {
log_err("FAIL: udat_setContext for locale %s, capitalizationContext %d, status %s\n",
textContextItemPtr->locale, (int)textContextItemPtr->capitalizationContext, u_errorName(status) );
}
udat_close(udfmt);
} else {
log_data_err("FAIL: udat_open for locale %s, status %s\n", textContextItemPtr->locale, u_errorName(status) );
}
udatpg_close(udtpg);
} else {
log_err("FAIL: udatpg_getBestPattern for locale %s, status %s\n", textContextItemPtr->locale, u_errorName(status) );
}
udat_close(udfmt);
udatpg_close(udtpg);
} else {
log_err("FAIL: udatpg_open for locale %s, status %s\n", textContextItemPtr->locale, u_errorName(status) );
}
}
for (textRelContextItemPtr = textContextRelativeItems; textRelContextItemPtr->locale != NULL; ++textRelContextItemPtr) {
UErrorCode status = U_ZERO_ERROR;
UCalendar* ucal = ucal_open(zoneGMT, -1, "root", UCAL_GREGORIAN, &status);
if ( U_SUCCESS(status) ) {
UDateFormat* udfmt = udat_open(UDAT_NONE, UDAT_LONG_RELATIVE, textRelContextItemPtr->locale, zoneGMT, -1, NULL, 0, &status);
if ( U_SUCCESS(status) ) {
udat_setContext(udfmt, textRelContextItemPtr->capitalizationContext, &status);
if ( U_SUCCESS(status) ) {
UDate yesterday, today = ucal_getNow();
UChar ubuf[kUbufMax];
char bbuf1[kBbufMax];
char bbuf2[kBbufMax];
int32_t len = udat_format(udfmt, today, ubuf, kUbufMax, NULL, &status);
if ( U_FAILURE(status) ) {
log_err("FAIL: udat_format today for locale %s, capitalizationContext %d, status %s\n",
textRelContextItemPtr->locale, (int)textRelContextItemPtr->capitalizationContext, u_errorName(status) );
} else if (u_strncmp(ubuf, textRelContextItemPtr->expectedFormatToday, kUbufMax) != 0) {
log_err("FAIL: udat_format today for locale %s, capitalizationContext %d, expected %s, got %s\n",
textRelContextItemPtr->locale, (int)textRelContextItemPtr->capitalizationContext,
u_austrncpy(bbuf1,textRelContextItemPtr->expectedFormatToday,kUbufMax), u_austrncpy(bbuf2,ubuf,kUbufMax) );
}
status = U_ZERO_ERROR;
ucal_setMillis(ucal, today, &status);
ucal_add(ucal, UCAL_DATE, -1, &status);
yesterday = ucal_getMillis(ucal, &status);
if ( U_SUCCESS(status) ) {
len = udat_format(udfmt, yesterday, ubuf, kUbufMax, NULL, &status);
if ( U_FAILURE(status) ) {
log_err("FAIL: udat_format yesterday for locale %s, capitalizationContext %d, status %s\n",
textRelContextItemPtr->locale, (int)textRelContextItemPtr->capitalizationContext, u_errorName(status) );
} else if (u_strncmp(ubuf, textRelContextItemPtr->expectedFormatYesterday, kUbufMax) != 0) {
log_err("FAIL: udat_format yesterday for locale %s, capitalizationContext %d, expected %s, got %s\n",
textRelContextItemPtr->locale, (int)textRelContextItemPtr->capitalizationContext,
u_austrncpy(bbuf1,textRelContextItemPtr->expectedFormatYesterday,kUbufMax), u_austrncpy(bbuf2,ubuf,kUbufMax) );
}
}
} else {
log_err("FAIL: udat_setContext relative for locale %s, capitalizationContext %d, status %s\n",
textRelContextItemPtr->locale, (int)textRelContextItemPtr->capitalizationContext, u_errorName(status) );
}
} else {
log_data_err("FAIL: udat_open relative for locale %s, status %s\n", textRelContextItemPtr->locale, u_errorName(status) );
}
ucal_close(ucal);
} else {
log_data_err("FAIL: ucal_open for locale root, status %s\n", u_errorName(status) );
}
}
}

View file

@ -3876,6 +3876,10 @@ structLocale:table(nofallback){
}
defaultNumberingSystem{""}
contextTransforms{
calendar-field:intvector{
2,
2,
}
day-format-except-narrow:intvector{
2,
2,
@ -3920,6 +3924,10 @@ structLocale:table(nofallback){
2,
2,
}
unit-pattern:intvector{
2,
2,
}
}
delimiters{
quotationStart{""}
@ -13279,7 +13287,7 @@ structLocale:table(nofallback){
"",
}
}
intervalFormats{
intervalFormats{
H{
H{""}
}