ICU-10633 Implement context-sensitive number formatting (currently just for RBNF)

X-SVN-Rev: 35095
This commit is contained in:
Peter Edberg 2014-02-06 09:41:17 +00:00
parent 1eea3529b2
commit 401daae867
4 changed files with 212 additions and 5 deletions

View file

@ -260,6 +260,7 @@ NumberFormat::operator=(const NumberFormat& rhs)
fParseIntegerOnly = rhs.fParseIntegerOnly;
u_strncpy(fCurrency, rhs.fCurrency, 4);
fLenient = rhs.fLenient;
fCapitalizationContext = rhs.fCapitalizationContext;
}
return *this;
}
@ -306,6 +307,10 @@ NumberFormat::operator==(const Format& that) const
if (first) { printf("[ "); first = FALSE; } else { printf(", "); }
debug("fLenient != ");
}
if (!(fCapitalizationContext == other->fCapitalizationContext)) {
if (first) { printf("[ "); first = FALSE; } else { printf(", "); }
debug("fCapitalizationContext != ");
}
if (!first) { printf(" ]"); }
#endif
@ -318,7 +323,8 @@ NumberFormat::operator==(const Format& that) const
fGroupingUsed == other->fGroupingUsed &&
fParseIntegerOnly == other->fParseIntegerOnly &&
u_strcmp(fCurrency, other->fCurrency) == 0 &&
fLenient == other->fLenient)));
fLenient == other->fLenient &&
fCapitalizationContext == other->fCapitalizationContext)));
}
// -------------------------------------

View file

@ -5,6 +5,7 @@
*******************************************************************************
*/
#include "unicode/utypes.h"
#include "utypeinfo.h" // for 'typeid' to work
#include "unicode/rbnf.h"
@ -21,6 +22,8 @@
#include "unicode/ustring.h"
#include "unicode/utf16.h"
#include "unicode/udata.h"
#include "unicode/udisplaycontext.h"
#include "unicode/brkiter.h"
#include "nfrs.h"
#include "cmemory.h"
@ -660,6 +663,10 @@ RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
, lenient(FALSE)
, lenientParseRules(NULL)
, localizations(NULL)
, capitalizationInfoSet(FALSE)
, capitalizationForUIListMenu(FALSE)
, capitalizationForStandAlone(FALSE)
, capitalizationBrkIter(NULL)
{
LocalizationInfo* locinfo = StringLocalizationInfo::create(locs, perror, status);
init(description, locinfo, perror, status);
@ -678,6 +685,10 @@ RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
, lenient(FALSE)
, lenientParseRules(NULL)
, localizations(NULL)
, capitalizationInfoSet(FALSE)
, capitalizationForUIListMenu(FALSE)
, capitalizationForStandAlone(FALSE)
, capitalizationBrkIter(NULL)
{
LocalizationInfo* locinfo = StringLocalizationInfo::create(locs, perror, status);
init(description, locinfo, perror, status);
@ -696,6 +707,10 @@ RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
, lenient(FALSE)
, lenientParseRules(NULL)
, localizations(NULL)
, capitalizationInfoSet(FALSE)
, capitalizationForUIListMenu(FALSE)
, capitalizationForStandAlone(FALSE)
, capitalizationBrkIter(NULL)
{
init(description, info, perror, status);
}
@ -713,6 +728,10 @@ RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
, lenient(FALSE)
, lenientParseRules(NULL)
, localizations(NULL)
, capitalizationInfoSet(FALSE)
, capitalizationForUIListMenu(FALSE)
, capitalizationForStandAlone(FALSE)
, capitalizationBrkIter(NULL)
{
init(description, NULL, perror, status);
}
@ -731,6 +750,10 @@ RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
, lenient(FALSE)
, lenientParseRules(NULL)
, localizations(NULL)
, capitalizationInfoSet(FALSE)
, capitalizationForUIListMenu(FALSE)
, capitalizationForStandAlone(FALSE)
, capitalizationBrkIter(NULL)
{
init(description, NULL, perror, status);
}
@ -746,6 +769,10 @@ RuleBasedNumberFormat::RuleBasedNumberFormat(URBNFRuleSetTag tag, const Locale&
, lenient(FALSE)
, lenientParseRules(NULL)
, localizations(NULL)
, capitalizationInfoSet(FALSE)
, capitalizationForUIListMenu(FALSE)
, capitalizationForStandAlone(FALSE)
, capitalizationBrkIter(NULL)
{
if (U_FAILURE(status)) {
return;
@ -806,6 +833,10 @@ RuleBasedNumberFormat::RuleBasedNumberFormat(const RuleBasedNumberFormat& rhs)
, lenient(FALSE)
, lenientParseRules(NULL)
, localizations(NULL)
, capitalizationInfoSet(FALSE)
, capitalizationForUIListMenu(FALSE)
, capitalizationForStandAlone(FALSE)
, capitalizationBrkIter(NULL)
{
this->operator=(rhs);
}
@ -828,6 +859,12 @@ RuleBasedNumberFormat::operator=(const RuleBasedNumberFormat& rhs)
init(rhs.originalDescription, rhs.localizations ? rhs.localizations->ref() : NULL, perror, status);
setDecimalFormatSymbols(*rhs.getDecimalFormatSymbols());
setDefaultRuleSet(rhs.getDefaultRuleSetName(), status);
capitalizationInfoSet = rhs.capitalizationInfoSet;
capitalizationForUIListMenu = rhs.capitalizationForUIListMenu;
capitalizationForStandAlone = rhs.capitalizationForStandAlone;
capitalizationBrkIter = (rhs.capitalizationBrkIter!=NULL)? rhs.capitalizationBrkIter->clone(): NULL;
return *this;
}
@ -851,6 +888,9 @@ RuleBasedNumberFormat::operator==(const Format& other) const
if (typeid(*this) == typeid(other)) {
const RuleBasedNumberFormat& rhs = (const RuleBasedNumberFormat&)other;
// test for capitalization info equality is adequately handled
// by the NumberFormat test for fCapitalizationContext equality;
// the info here is just derived from that.
if (locale == rhs.locale &&
lenient == rhs.lenient &&
(localizations == NULL
@ -1022,7 +1062,11 @@ RuleBasedNumberFormat::format(int32_t number,
UnicodeString& toAppendTo,
FieldPosition& /* pos */) const
{
if (defaultRuleSet) defaultRuleSet->format((int64_t)number, toAppendTo, toAppendTo.length());
if (defaultRuleSet) {
int32_t startPos = toAppendTo.length();
defaultRuleSet->format((int64_t)number, toAppendTo, toAppendTo.length());
adjustForCapitalizationContext(startPos, toAppendTo);
}
return toAppendTo;
}
@ -1032,7 +1076,11 @@ RuleBasedNumberFormat::format(int64_t number,
UnicodeString& toAppendTo,
FieldPosition& /* pos */) const
{
if (defaultRuleSet) defaultRuleSet->format(number, toAppendTo, toAppendTo.length());
if (defaultRuleSet) {
int32_t startPos = toAppendTo.length();
defaultRuleSet->format(number, toAppendTo, toAppendTo.length());
adjustForCapitalizationContext(startPos, toAppendTo);
}
return toAppendTo;
}
@ -1042,6 +1090,7 @@ RuleBasedNumberFormat::format(double number,
UnicodeString& toAppendTo,
FieldPosition& /* pos */) const
{
int32_t startPos = toAppendTo.length();
// Special case for NaN; adapted from what DecimalFormat::_format( double number,...) does.
if (uprv_isNaN(number)) {
DecimalFormatSymbols* decFmtSyms = getDecimalFormatSymbols(); // RuleBasedNumberFormat internal
@ -1051,7 +1100,7 @@ RuleBasedNumberFormat::format(double number,
} else if (defaultRuleSet) {
defaultRuleSet->format(number, toAppendTo, toAppendTo.length());
}
return toAppendTo;
return adjustForCapitalizationContext(startPos, toAppendTo);
}
@ -1070,7 +1119,9 @@ RuleBasedNumberFormat::format(int32_t number,
} else {
NFRuleSet *rs = findRuleSet(ruleSetName, status);
if (rs) {
int32_t startPos = toAppendTo.length();
rs->format((int64_t)number, toAppendTo, toAppendTo.length());
adjustForCapitalizationContext(startPos, toAppendTo);
}
}
}
@ -1092,7 +1143,9 @@ RuleBasedNumberFormat::format(int64_t number,
} else {
NFRuleSet *rs = findRuleSet(ruleSetName, status);
if (rs) {
int32_t startPos = toAppendTo.length();
rs->format(number, toAppendTo, toAppendTo.length());
adjustForCapitalizationContext(startPos, toAppendTo);
}
}
}
@ -1114,13 +1167,39 @@ RuleBasedNumberFormat::format(double number,
} else {
NFRuleSet *rs = findRuleSet(ruleSetName, status);
if (rs) {
int32_t startPos = toAppendTo.length();
rs->format(number, toAppendTo, toAppendTo.length());
adjustForCapitalizationContext(startPos, toAppendTo);
}
}
}
return toAppendTo;
}
UnicodeString&
RuleBasedNumberFormat::adjustForCapitalizationContext(int32_t startPos,
UnicodeString& currentResult) const
{
#if !UCONFIG_NO_BREAK_ITERATION
if (startPos==0 && currentResult.length() > 0) {
// capitalize currentResult according to context
UChar32 ch = currentResult.char32At(0);
UErrorCode status = U_ZERO_ERROR;
UDisplayContext capitalizationContext = getContext(UDISPCTX_TYPE_CAPITALIZATION, status);
if ( u_islower(ch) && U_SUCCESS(status) && capitalizationBrkIter!= NULL &&
( capitalizationContext==UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE ||
(capitalizationContext==UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU && capitalizationForUIListMenu) ||
(capitalizationContext==UDISPCTX_CAPITALIZATION_FOR_STANDALONE && capitalizationForStandAlone)) ) {
// titlecase first word of currentResult, here use sentence iterator unlike current implementations
// in LocaleDisplayNamesImpl::adjustForUsageAndContext and RelativeDateFormat::format
currentResult.toTitle(capitalizationBrkIter, locale, U_TITLECASE_NO_LOWERCASE | U_TITLECASE_NO_BREAK_ADJUSTMENT);
}
}
#endif
return currentResult;
}
void
RuleBasedNumberFormat::parse(const UnicodeString& text,
Formattable& result,
@ -1422,6 +1501,52 @@ RuleBasedNumberFormat::init(const UnicodeString& rules, LocalizationInfo* locali
originalDescription = rules;
}
// override the NumberFormat implementation in order to
// lazily initialize relevant items
void
RuleBasedNumberFormat::setContext(UDisplayContext value, UErrorCode& status)
{
NumberFormat::setContext(value, status);
if (U_SUCCESS(status)) {
if (!capitalizationInfoSet &&
(value==UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU || value==UDISPCTX_CAPITALIZATION_FOR_STANDALONE)) {
initCapitalizationContextInfo(locale);
capitalizationInfoSet = TRUE;
}
#if !UCONFIG_NO_BREAK_ITERATION
if ( capitalizationBrkIter == NULL && (value==UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE ||
(value==UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU && capitalizationForUIListMenu) ||
(value==UDISPCTX_CAPITALIZATION_FOR_STANDALONE && capitalizationForStandAlone)) ) {
UErrorCode status = U_ZERO_ERROR;
capitalizationBrkIter = BreakIterator::createSentenceInstance(locale, status);
}
#endif
}
}
void
RuleBasedNumberFormat::initCapitalizationContextInfo(const Locale& thelocale)
{
#if !UCONFIG_NO_BREAK_ITERATION
const char * localeID = (thelocale != NULL)? thelocale.getBaseName(): NULL;
UErrorCode status = U_ZERO_ERROR;
UResourceBundle *rb = ures_open(NULL, localeID, &status);
rb = ures_getByKeyWithFallback(rb, "contextTransforms", rb, &status);
// Have't got a good contextTransforms type for RBNF number spellout,
// fix that with CLDR #6857. In the meantime use "symbol".
rb = ures_getByKeyWithFallback(rb, "symbol", rb, &status);
if (U_SUCCESS(status) && rb != NULL) {
int32_t len = 0;
const int32_t * intVector = ures_getIntVector(rb, &len, &status);
if (U_SUCCESS(status) && intVector != NULL && len >= 2) {
capitalizationForUIListMenu = intVector[0];
capitalizationForStandAlone = intVector[1];
}
}
ures_close(rb);
#endif
}
void
RuleBasedNumberFormat::stripWhitespace(UnicodeString& description)
{
@ -1489,6 +1614,9 @@ RuleBasedNumberFormat::dispose()
delete lenientParseRules;
lenientParseRules = NULL;
delete capitalizationBrkIter;
capitalizationBrkIter = NULL;
if (localizations) localizations = localizations->unref();
}

View file

@ -34,6 +34,7 @@
#include "unicode/numfmt.h"
#include "unicode/unistr.h"
#include "unicode/strenum.h"
#include "unicode/brkiter.h"
U_NAMESPACE_BEGIN
@ -894,6 +895,19 @@ public:
*/
virtual UnicodeString getDefaultRuleSetName() const;
/* Cannot use #ifndef U_HIDE_DRAFT_API for the following draft method since it is virtual */
/**
* Set a particular UDisplayContext value in the formatter, such as
* UDISPCTX_CAPITALIZATION_FOR_STANDALONE. Note: For getContext, see
* NumberFormat.
* @param value The UDisplayContext value to set.
* @param status Input/output status. If at entry this indicates a failure
* status, the function will do nothing; otherwise this will be
* updated with any new status from the function.
* @draft ICU 53
*/
virtual void setContext(UDisplayContext value, UErrorCode& status);
public:
/**
* ICU "poor man's RTTI", returns a UClassID for this class.
@ -939,6 +953,7 @@ private:
const Locale& locale, UParseError& perror, UErrorCode& status);
void init(const UnicodeString& rules, LocalizationInfo* localizations, UParseError& perror, UErrorCode& status);
void initCapitalizationContextInfo(const Locale& thelocale);
void dispose();
void stripWhitespace(UnicodeString& src);
void initDefaultRuleSet();
@ -953,6 +968,7 @@ private:
inline NFRuleSet * getDefaultRuleSet() const;
Collator * getCollator() const;
DecimalFormatSymbols * getDecimalFormatSymbols() const;
UnicodeString& adjustForCapitalizationContext(int32_t startPos, UnicodeString& currentResult) const;
private:
NFRuleSet **ruleSets;
@ -966,6 +982,14 @@ private:
UnicodeString* lenientParseRules;
LocalizationInfo* localizations;
UnicodeString originalDescription;
UBool capitalizationInfoSet;
UBool capitalizationForUIListMenu;
UBool capitalizationForStandAlone;
#if !UCONFIG_NO_BREAK_ITERATION
BreakIterator* capitalizationBrkIter;
#else
void* capitalizationBrkIter;
#endif
};
// ---------------

View file

@ -28,6 +28,7 @@
#include "unicode/unum.h"
#include "unicode/unumsys.h"
#include "unicode/ustring.h"
#include "unicode/udisplaycontext.h"
#include "cintltst.h"
#include "cnumtst.h"
@ -2450,13 +2451,31 @@ static void TestCurrencyIsoPluralFormat(void) {
localeString, currencyISOCode, DATA[i][3 + sIndex]);
}
}
unum_close(unumFmt);
}
}
}
typedef struct {
const char * locale;
UNumberFormatStyle style;
UDisplayContext context;
const char * expectedResult;
} TestContextItem;
/* currently no locales have contextTransforms data for "symbol" type */
static const TestContextItem tcItems[] = { /* results for 123.45 */
{ "sv", UNUM_SPELLOUT, UDISPCTX_CAPITALIZATION_FOR_MIDDLE_OF_SENTENCE, "ett\\u00ADhundra\\u00ADtjugo\\u00ADtre komma fyra fem" },
{ "sv", UNUM_SPELLOUT, UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE, "Ett\\u00ADhundra\\u00ADtjugo\\u00ADtre komma fyra fem" },
{ "sv", UNUM_SPELLOUT, UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU, "ett\\u00ADhundra\\u00ADtjugo\\u00ADtre komma fyra fem" },
{ "sv", UNUM_SPELLOUT, UDISPCTX_CAPITALIZATION_FOR_STANDALONE, "ett\\u00ADhundra\\u00ADtjugo\\u00ADtre komma fyra fem" },
{ NULL, (UNumberFormatStyle)0, (UDisplayContext)0, NULL }
};
static void TestContext(void) {
/* just a minimal sanity check for now */
UErrorCode status = U_ZERO_ERROR;
const TestContextItem* itemPtr;
UNumberFormat *unum = unum_open(UNUM_SPELLOUT, NULL, 0, "en", NULL, &status);
if ( U_SUCCESS(status) ) {
UDisplayContext context = unum_getContext(unum, UDISPCTX_TYPE_CAPITALIZATION, &status);
@ -2473,6 +2492,36 @@ static void TestContext(void) {
} else {
log_data_err("unum_open UNUM_SPELLOUT for en fails with status %s\n", myErrorName(status));
}
for (itemPtr = tcItems; itemPtr->locale != NULL; itemPtr++) {
UChar ubufResult[kUBufMax];
int32_t ulenRes;
status = U_ZERO_ERROR;
unum = unum_open(itemPtr->style, NULL, 0, itemPtr->locale, NULL, &status);
if (U_FAILURE(status)) {
log_data_err("FAIL: unum_open, locale %s, style %d - %s\n",
itemPtr->locale, (int)itemPtr->style, myErrorName(status));
continue;
}
unum_setContext(unum, itemPtr->context, &status);
ulenRes = unum_formatDouble(unum, 123.45, ubufResult, kUBufMax, NULL, &status);
if (U_FAILURE(status)) {
log_err("FAIL: unum_formatDouble, locale %s, style %d, context %d - %s\n",
itemPtr->locale, (int)itemPtr->style, (int)itemPtr->context, myErrorName(status));
} else {
UChar ubufExpected[kUBufMax];
int32_t ulenExp = u_unescape(itemPtr->expectedResult, ubufExpected, kUBufMax);
if (ulenRes != ulenExp || u_strncmp(ubufResult, ubufExpected, ulenExp) != 0) {
char bbuf[kUBufMax*2];
u_austrncpy(bbuf, ubufResult, sizeof(bbuf));
log_err("FAIL: unum_formatDouble, locale %s, style %d, context %d, expected %d:\"%s\", got %d:\"%s\"\n",
itemPtr->locale, (int)itemPtr->style, (int)itemPtr->context, ulenExp,
itemPtr->expectedResult, ulenRes, bbuf);
}
}
unum_close(unum);
}
}
#endif /* #if !UCONFIG_NO_FORMATTING */