From 147ca53163dcf8bb5a26de8ee6b442bd1e8a4fe0 Mon Sep 17 00:00:00 2001 From: Markus Scherer Date: Thu, 12 Nov 2015 22:09:04 +0000 Subject: [PATCH] ICU-11986 enumerate most of the measurement unit formatting data, rather than lookup-with-fallback for each piece; adds necessary fallback on leaf level X-SVN-Rev: 38082 --- icu4c/source/common/Makefile.in | 2 +- icu4c/source/common/common.vcxproj | 4 +- icu4c/source/common/common.vcxproj.filters | 8 +- icu4c/source/common/uresbund.cpp | 115 ++++++ .../common/{uresdata.c => uresdata.cpp} | 321 ++++++++++++++- icu4c/source/common/uresdata.h | 60 ++- icu4c/source/common/uresimp.h | 14 +- icu4c/source/common/uresource.cpp | 56 +++ icu4c/source/common/uresource.h | 231 +++++++++++ icu4c/source/i18n/measfmt.cpp | 375 +++++++++++++----- icu4c/source/test/intltest/itformat.cpp | 12 +- icu4c/source/test/intltest/tufmtts.cpp | 80 +++- icu4c/source/test/intltest/tufmtts.h | 63 --- 13 files changed, 1145 insertions(+), 196 deletions(-) rename icu4c/source/common/{uresdata.c => uresdata.cpp} (79%) create mode 100644 icu4c/source/common/uresource.cpp create mode 100644 icu4c/source/common/uresource.h delete mode 100644 icu4c/source/test/intltest/tufmtts.h diff --git a/icu4c/source/common/Makefile.in b/icu4c/source/common/Makefile.in index 30b4c6c89d6..0d159731736 100644 --- a/icu4c/source/common/Makefile.in +++ b/icu4c/source/common/Makefile.in @@ -84,7 +84,7 @@ uhash.o uhash_us.o uenum.o ustrenum.o uvector.o ustack.o uvectr32.o uvectr64.o \ ucnv.o ucnv_bld.o ucnv_cnv.o ucnv_io.o ucnv_cb.o ucnv_err.o ucnvlat1.o \ ucnv_u7.o ucnv_u8.o ucnv_u16.o ucnv_u32.o ucnvscsu.o ucnvbocu.o \ ucnv_ext.o ucnvmbcs.o ucnv2022.o ucnvhz.o ucnv_lmb.o ucnvisci.o ucnvdisp.o ucnv_set.o ucnv_ct.o \ -uresbund.o ures_cnv.o uresdata.o resbund.o resbund_cnv.o \ +uresource.o uresbund.o ures_cnv.o uresdata.o resbund.o resbund_cnv.o \ messagepattern.o ucat.o locmap.o uloc.o locid.o locutil.o locavailable.o locdispnames.o loclikely.o locresdata.o \ bytestream.o stringpiece.o \ stringtriebuilder.o bytestriebuilder.o \ diff --git a/icu4c/source/common/common.vcxproj b/icu4c/source/common/common.vcxproj index 0517cae1767..2acb43f024e 100644 --- a/icu4c/source/common/common.vcxproj +++ b/icu4c/source/common/common.vcxproj @@ -379,7 +379,8 @@ - + + @@ -1136,6 +1137,7 @@ + copy "%(FullPath)" ..\..\include\unicode diff --git a/icu4c/source/common/common.vcxproj.filters b/icu4c/source/common/common.vcxproj.filters index 2d4810b9000..fae609141ef 100644 --- a/icu4c/source/common/common.vcxproj.filters +++ b/icu4c/source/common/common.vcxproj.filters @@ -343,7 +343,10 @@ locales & resources - + + locales & resources + + locales & resources @@ -772,6 +775,9 @@ locales & resources + + locales & resources + locales & resources diff --git a/icu4c/source/common/uresbund.cpp b/icu4c/source/common/uresbund.cpp index e74afb8a483..784869dbc18 100644 --- a/icu4c/source/common/uresbund.cpp +++ b/icu4c/source/common/uresbund.cpp @@ -1882,6 +1882,121 @@ ures_getByKeyWithFallback(const UResourceBundle *resB, return fillIn; } +namespace { + +void getAllContainerItemsWithFallback( + const UResourceBundle *bundle, ResourceDataValue &value, + UResourceArraySink *arraySink, UResourceTableSink *tableSink, + UErrorCode &errorCode) { + if (U_FAILURE(errorCode)) { return; } + // We recursively enumerate child-first, + // only storing parent items in the absence of child items. + // We store a placeholder value for the no-fallback/no-inheritance marker + // to prevent a parent item from being stored. + // + // It would be possible to recursively enumerate parent-first, + // overriding parent items with child items. + // When we see the no-fallback/no-inheritance marker, + // then we would remove the parent's item. + // We would deserialize parent values even though they are overridden in a child bundle. + UResType expectedType = arraySink != NULL ? URES_ARRAY : URES_TABLE; + if (ures_getType(bundle) == expectedType) { + value.pResData = &bundle->fResData; + if (arraySink != NULL) { + ures_getAllArrayItems(&bundle->fResData, bundle->fRes, value, *arraySink, errorCode); + } else /* tableSink != NULL */ { + ures_getAllTableItems(&bundle->fResData, bundle->fRes, value, *tableSink, errorCode); + } + } + UResourceDataEntry *entry = bundle->fData->fParent; + if (entry != NULL && U_SUCCESS(entry->fBogus)) { + // We might try to query the sink whether + // any fallback from the parent bundle is still possible. + + // Turn the parent UResourceDataEntry into a UResourceBundle, + // much like in ures_openWithType(). + // TODO: See if we can refactor ures_getByKeyWithFallback() + // and pull out an inner function that takes and returns a UResourceDataEntry + // so that we need not create UResourceBundle objects. + UResourceBundle parentBundle; + ures_initStackObject(&parentBundle); + parentBundle.fTopLevelData = parentBundle.fData = entry; + // TODO: What is the difference between bundle fData and fTopLevelData? + uprv_memcpy(&parentBundle.fResData, &entry->fData, sizeof(ResourceData)); + // TODO: Try to replace bundle.fResData with just using bundle.fData->fData. + parentBundle.fHasFallback = !parentBundle.fResData.noFallback; + parentBundle.fIsTopLevel = TRUE; + parentBundle.fRes = parentBundle.fResData.rootRes; + parentBundle.fSize = res_countArrayItems(&(parentBundle.fResData), parentBundle.fRes); + parentBundle.fIndex = -1; + entryIncrease(entry); + + // Look up the container item in the parent bundle. + UResourceBundle containerBundle; + ures_initStackObject(&containerBundle); + const UResourceBundle *rb; + if (bundle->fResPath == NULL || *bundle->fResPath == 0) { + rb = &parentBundle; + } else { + rb = ures_getByKeyWithFallback(&parentBundle, bundle->fResPath, + &containerBundle, &errorCode); + } + if (U_SUCCESS(errorCode) && ures_getType(rb) == expectedType) { + getAllContainerItemsWithFallback(rb, value, + arraySink, tableSink, errorCode); + } + ures_close(&containerBundle); + ures_close(&parentBundle); + } +} + +void getAllContainerItemsWithFallback( + const UResourceBundle *bundle, const char *path, + UResourceArraySink *arraySink, UResourceTableSink *tableSink, + UErrorCode &errorCode) { + if (U_FAILURE(errorCode)) { return; } + if (path == NULL) { + errorCode = U_ILLEGAL_ARGUMENT_ERROR; + return; + } + UResourceBundle stackBundle; + ures_initStackObject(&stackBundle); + const UResourceBundle *rb; + if (*path == 0) { + // empty path + rb = bundle; + } else { + rb = ures_getByKeyWithFallback(bundle, path, &stackBundle, &errorCode); + if (U_FAILURE(errorCode)) { + ures_close(&stackBundle); + return; + } + } + UResType expectedType = arraySink != NULL ? URES_ARRAY : URES_TABLE; + if (ures_getType(rb) != expectedType) { + errorCode = U_RESOURCE_TYPE_MISMATCH; + ures_close(&stackBundle); + return; + } + // Get all table items with fallback. + ResourceDataValue value; + getAllContainerItemsWithFallback(rb, value, arraySink, tableSink, errorCode); + ures_close(&stackBundle); +} + +} // namespace + +U_CAPI void U_EXPORT2 +ures_getAllArrayItemsWithFallback(const UResourceBundle *bundle, const char *path, + UResourceArraySink &sink, UErrorCode &errorCode) { + getAllContainerItemsWithFallback(bundle, path, &sink, NULL, errorCode); +} + +U_CAPI void U_EXPORT2 +ures_getAllTableItemsWithFallback(const UResourceBundle *bundle, const char *path, + UResourceTableSink &sink, UErrorCode &errorCode) { + getAllContainerItemsWithFallback(bundle, path, NULL, &sink, errorCode); +} U_CAPI UResourceBundle* U_EXPORT2 ures_getByKey(const UResourceBundle *resB, const char* inKey, UResourceBundle *fillIn, UErrorCode *status) { Resource res = RES_BOGUS; diff --git a/icu4c/source/common/uresdata.c b/icu4c/source/common/uresdata.cpp similarity index 79% rename from icu4c/source/common/uresdata.c rename to icu4c/source/common/uresdata.cpp index 53887fbfdc3..3c555b624f7 100644 --- a/icu4c/source/common/uresdata.c +++ b/icu4c/source/common/uresdata.cpp @@ -3,7 +3,7 @@ * Copyright (C) 1999-2015, International Business Machines Corporation * and others. All Rights Reserved. ******************************************************************************* -* file name: uresdata.c +* file name: uresdata.cpp * encoding: US-ASCII * tab size: 8 (not used) * indentation:4 @@ -24,12 +24,13 @@ #include "cmemory.h" #include "cstring.h" #include "uarrsort.h" -#include "udataswp.h" +#include "uassert.h" #include "ucol_swp.h" +#include "udataswp.h" #include "uinvchar.h" #include "uresdata.h" #include "uresimp.h" -#include "uassert.h" +#include "uresource.h" /* * Resource access helpers @@ -133,7 +134,7 @@ _res_findTable32Item(const ResourceData *pResData, const int32_t *keyOffsets, in static UBool U_CALLCONV isAcceptable(void *context, - const char *type, const char *name, + const char * /*type*/, const char * /*name*/, const UDataInfo *pInfo) { uprv_memcpy(context, pInfo->formatVersion, 4); return (UBool)( @@ -310,7 +311,7 @@ res_getString(const ResourceData *pResData, Resource res, int32_t *pLength) { int32_t length; if(RES_GET_TYPE(res)==URES_STRING_V2) { int32_t first; - if(offsetpoolStringIndexLimit) { + if((int32_t)offsetpoolStringIndexLimit) { p=(const UChar *)pResData->poolBundleStrings+offset; } else { p=(const UChar *)pResData->p16BitUnits+(offset-pResData->poolStringIndexLimit); @@ -342,6 +343,45 @@ res_getString(const ResourceData *pResData, Resource res, int32_t *pLength) { return p; } +namespace { + +/** + * CLDR string value (three empty-set symbols)=={2205, 2205, 2205} + * prevents fallback to the parent bundle. + * TODO: combine with other code that handles this marker, use EMPTY_SET constant. + * TODO: maybe move to uresbund.cpp? + */ +UBool isNoInheritanceMarker(const ResourceData *pResData, Resource res) { + uint32_t offset=RES_GET_OFFSET(res); + if (offset == 0) { + // empty string + } else if (res == offset) { + const int32_t *p32=pResData->pRoot+res; + int32_t length=*p32; + const UChar *p=(const UChar *)p32; + return length == 3 && p[2] == 0x2205 && p[3] == 0x2205 && p[4] == 0x2205; + } else if (RES_GET_TYPE(res) == URES_STRING_V2) { + const UChar *p; + if((int32_t)offsetpoolStringIndexLimit) { + p=(const UChar *)pResData->poolBundleStrings+offset; + } else { + p=(const UChar *)pResData->p16BitUnits+(offset-pResData->poolStringIndexLimit); + } + int32_t first=*p; + if (first == 0x2205) { // implicit length + return p[1] == 0x2205 && p[2] == 0x2205 && p[3] == 0; + } else if (first == 0xdc03) { // explicit length 3 (should not occur) + return p[1] == 0x2205 && p[2] == 0x2205 && p[3] == 0x2205; + } else { + // Assume that the string has not been stored with more length units than necessary. + return FALSE; + } + } + return FALSE; +} + +} // namespace + U_CAPI const UChar * U_EXPORT2 res_getAlias(const ResourceData *pResData, Resource res, int32_t *pLength) { const UChar *p; @@ -423,6 +463,116 @@ res_countArrayItems(const ResourceData *pResData, Resource res) { } } +namespace { + +int32_t getArrayLength(const ResourceData *pResData, Resource res) { + uint32_t offset=RES_GET_OFFSET(res); + if(offset == 0) { + return 0; + } + int32_t type = RES_GET_TYPE(res); + if(type == URES_ARRAY) { + return *(pResData->pRoot+offset); + } else if(type == URES_ARRAY16) { + return pResData->p16BitUnits[offset]; + } else { + return 0; + } +} + +int32_t getTableLength(const ResourceData *pResData, Resource res) { + uint32_t offset=RES_GET_OFFSET(res); + if(offset == 0) { + return 0; + } + int32_t type = RES_GET_TYPE(res); + if(type == URES_TABLE) { + return *((const uint16_t *)(pResData->pRoot+offset)); + } else if(type == URES_TABLE16) { + return pResData->p16BitUnits[offset]; + } else if(type == URES_TABLE32) { + return *(pResData->pRoot+offset); + } else { + return 0; + } +} + +} // namespace + +U_NAMESPACE_BEGIN + +ResourceDataValue::~ResourceDataValue() {} + +UResType ResourceDataValue::getType() const { + return res_getPublicType(res); +} + +const UChar *ResourceDataValue::getString(int32_t &length, UErrorCode &errorCode) const { + if(U_FAILURE(errorCode)) { + return NULL; + } + const UChar *s = res_getString(pResData, res, &length); + if(s == NULL) { + errorCode = U_RESOURCE_TYPE_MISMATCH; + } + return s; +} + +const UChar *ResourceDataValue::getAliasString(int32_t &length, UErrorCode &errorCode) const { + if(U_FAILURE(errorCode)) { + return NULL; + } + const UChar *s = res_getAlias(pResData, res, &length); + if(s == NULL) { + errorCode = U_RESOURCE_TYPE_MISMATCH; + } + return s; +} + +int32_t ResourceDataValue::getInt(UErrorCode &errorCode) const { + if(U_FAILURE(errorCode)) { + return 0; + } + if(RES_GET_TYPE(res) != URES_INT) { + errorCode = U_RESOURCE_TYPE_MISMATCH; + } + return RES_GET_INT(res); +} + +uint32_t ResourceDataValue::getUInt(UErrorCode &errorCode) const { + if(U_FAILURE(errorCode)) { + return 0; + } + if(RES_GET_TYPE(res) != URES_INT) { + errorCode = U_RESOURCE_TYPE_MISMATCH; + } + return RES_GET_UINT(res); +} + +const int32_t *ResourceDataValue::getIntVector(int32_t &length, UErrorCode &errorCode) const { + if(U_FAILURE(errorCode)) { + return NULL; + } + const int32_t *iv = res_getIntVector(pResData, res, &length); + if(iv == NULL) { + errorCode = U_RESOURCE_TYPE_MISMATCH; + } + return iv; +} + +const uint8_t *ResourceDataValue::getBinary(int32_t &length, UErrorCode &errorCode) const { + if(U_FAILURE(errorCode)) { + return NULL; + } + const uint8_t *b = res_getBinary(pResData, res, &length); + if(b == NULL) { + errorCode = U_RESOURCE_TYPE_MISMATCH; + } + return b; +} + +U_NAMESPACE_END + static Resource makeResourceFrom16(const ResourceData *pResData, int32_t res16) { if(res16poolStringIndex16Limit) { @@ -541,6 +691,92 @@ res_getResource(const ResourceData *pResData, const char *key) { return res_getTableItemByKey(pResData, pResData->rootRes, &idx, &realKey); } +// TODO: Ported from Java, but enumerating at this low level may prevent us +// from doing necessary things, like resolving aliases, +// which need access to higher-level UResourceBundle code. +// Consider porting the low-level Container/Array/Table classes from Java, +// with getters for keys and values, +// and doing the enumeration in the higher-level code on top of those accessors. +U_CFUNC void +ures_getAllTableItems(const ResourceData *pResData, Resource table, + icu::ResourceDataValue &value, icu::UResourceTableSink &sink, + UErrorCode &errorCode) { + if(U_FAILURE(errorCode)) { return; } + const uint16_t *keys16 = NULL; + const int32_t *keys32 = NULL; + const uint16_t *items16 = NULL; + const Resource *items32 = NULL; + uint32_t offset = RES_GET_OFFSET(table); + int32_t length = 0; + switch(RES_GET_TYPE(table)) { + case URES_TABLE: { + if (offset != 0) { /* empty if offset==0 */ + keys16 = (const uint16_t *)(pResData->pRoot+offset); + length = *keys16++; + items32 = (const Resource *)(keys16+length+(~length&1)); + } + break; + } + case URES_TABLE16: { + keys16 = pResData->p16BitUnits+offset; + length = *keys16++; + items16 = keys16 + length; + break; + } + case URES_TABLE32: { + if (offset != 0) { /* empty if offset==0 */ + keys32 = pResData->pRoot+offset; + length = *keys32++; + items32 = (const Resource *)keys32 + length; + } + break; + } + default: + errorCode = U_RESOURCE_TYPE_MISMATCH; + return; + } + + for (int32_t i = 0; i < length; ++i) { + const char *key; + if (keys16 != NULL) { + key=RES_GET_KEY16(pResData, keys16[i]); + } else { + key=RES_GET_KEY32(pResData, keys32[i]); + } + Resource res; + if (items16 != NULL) { + res = makeResourceFrom16(pResData, items16[i]); + } else { + res = items32[i]; + } + int32_t type = RES_GET_TYPE(res); + if (URES_IS_ARRAY(type)) { + int32_t numItems = getArrayLength(pResData, res); + icu::UResourceArraySink *subSink = sink.getOrCreateArraySink(key, numItems, errorCode); + if (subSink != NULL) { + ures_getAllArrayItems(pResData, res, value, *subSink, errorCode); + } + } else if (URES_IS_TABLE(type)) { + int32_t numItems = getTableLength(pResData, res); + icu::UResourceTableSink *subSink = sink.getOrCreateTableSink(key, numItems, errorCode); + if (subSink != NULL) { + ures_getAllTableItems(pResData, res, value, *subSink, errorCode); + } + /* TODO: settle on how to deal with aliases, port to Java + } else if (type == URES_ALIAS) { + // aliases not handled in resource enumeration + errorCode = U_UNSUPPORTED_ERROR; + return; */ + } else if (isNoInheritanceMarker(pResData, res)) { + sink.putNoFallback(key, errorCode); + } else { + value.setResource(res); + sink.put(key, value, errorCode); + } + if(U_FAILURE(errorCode)) { return; } + } +} + U_CAPI Resource U_EXPORT2 res_getArrayItem(const ResourceData *pResData, Resource array, int32_t indexR) { uint32_t offset=RES_GET_OFFSET(array); @@ -568,13 +804,68 @@ res_getArrayItem(const ResourceData *pResData, Resource array, int32_t indexR) { return RES_BOGUS; } +U_CFUNC void +ures_getAllArrayItems(const ResourceData *pResData, Resource array, + icu::ResourceDataValue &value, icu::UResourceArraySink &sink, + UErrorCode &errorCode) { + if(U_FAILURE(errorCode)) { return; } + const uint16_t *items16 = NULL; + const Resource *items32 = NULL; + uint32_t offset=RES_GET_OFFSET(array); + int32_t length = 0; + switch(RES_GET_TYPE(array)) { + case URES_ARRAY: { + if (offset!=0) { /* empty if offset==0 */ + items32 = (const Resource *)pResData->pRoot+offset; + length = *items32++; + } + break; + } + case URES_ARRAY16: { + items16 = pResData->p16BitUnits+offset; + length = *items16++; + break; + } + default: + errorCode = U_RESOURCE_TYPE_MISMATCH; + return; + } + + for (int32_t i = 0; i < length; ++i) { + Resource res; + if (items16 != NULL) { + res = makeResourceFrom16(pResData, items16[i]); + } else { + res = items32[i]; + } + int32_t type = RES_GET_TYPE(res); + if (URES_IS_ARRAY(type)) { + int32_t numItems = getArrayLength(pResData, res); + icu::UResourceArraySink *subSink = sink.getOrCreateArraySink(i, numItems, errorCode); + if (subSink != NULL) { + ures_getAllArrayItems(pResData, res, value, *subSink, errorCode); + } + } else if (URES_IS_TABLE(type)) { + int32_t numItems = getTableLength(pResData, res); + icu::UResourceTableSink *subSink = sink.getOrCreateTableSink(i, numItems, errorCode); + if (subSink != NULL) { + ures_getAllTableItems(pResData, res, value, *subSink, errorCode); + } + /* TODO: settle on how to deal with aliases, port to Java + } else if (type == URES_ALIAS) { + // aliases not handled in resource enumeration + errorCode = U_UNSUPPORTED_ERROR; + return; */ + } else { + value.setResource(res); + sink.put(i, value, errorCode); + } + if(U_FAILURE(errorCode)) { return; } + } +} + U_CFUNC Resource res_findResource(const ResourceData *pResData, Resource r, char** path, const char** key) { - /* we pass in a path. CollationElements/Sequence or zoneStrings/3/2 etc. - * iterates over a path and stops when a scalar resource is found. This - * CAN be an alias. Path gets set to the part that has not yet been processed. - */ - char *pathP = *path, *nextSepP = *path; char *closeIndex = NULL; Resource t1 = r; @@ -601,6 +892,10 @@ res_findResource(const ResourceData *pResData, Resource r, char** path, const ch * and set path to the remaining part of the string */ if(nextSepP != NULL) { + if(nextSepP == pathP) { + // Empty key string. + return RES_BOGUS; + } *nextSepP = 0; /* overwrite the separator with a NUL to terminate the key */ *path = nextSepP+1; } else { @@ -615,14 +910,14 @@ res_findResource(const ResourceData *pResData, Resource r, char** path, const ch if(t2 == RES_BOGUS) { /* if we fail to get the resource by key, maybe we got an index */ indexR = uprv_strtol(pathP, &closeIndex, 10); - if(closeIndex != pathP) { + if(*closeIndex == 0) { /* if we indeed have an index, try to get the item by index */ t2 = res_getTableItemByIndex(pResData, t1, indexR, key); } } } else if(URES_IS_ARRAY(type)) { indexR = uprv_strtol(pathP, &closeIndex, 10); - if(closeIndex != pathP) { + if(*closeIndex == 0) { t2 = res_getArrayItem(pResData, t1, indexR); } else { t2 = RES_BOGUS; /* have an array, but don't have a valid index */ @@ -1094,7 +1389,7 @@ ures_swap(const UDataSwapper *ds, */ resFlagsLength=(length+31)>>5; /* number of bytes needed */ resFlagsLength=(resFlagsLength+3)&~3; /* multiple of 4 bytes for uint32_t */ - if(resFlagsLength<=sizeof(stackResFlags)) { + if(resFlagsLength<=(int32_t)sizeof(stackResFlags)) { tempTable.resFlags=stackResFlags; } else { tempTable.resFlags=(uint32_t *)uprv_malloc(resFlagsLength); diff --git a/icu4c/source/common/uresdata.h b/icu4c/source/common/uresdata.h index ff69dd48374..2ebf4dabc46 100644 --- a/icu4c/source/common/uresdata.h +++ b/icu4c/source/common/uresdata.h @@ -453,11 +453,67 @@ res_getTableItemByIndex(const ResourceData *pResData, Resource table, int32_t in U_INTERNAL Resource U_EXPORT2 res_getTableItemByKey(const ResourceData *pResData, Resource table, int32_t *indexS, const char* * key); -/* +/** + * Iterates over the path and stops when a scalar resource is found. + * Follows aliases. * Modifies the contents of *path (replacing separators with NULs), * and also moves *path forward while it finds items. + * + * @param path input: "CollationElements/Sequence" or "zoneStrings/3/2" etc.; + * output: points to the part that has not yet been processed */ -U_CFUNC Resource res_findResource(const ResourceData *pResData, Resource r, char** path, const char** key); +U_CFUNC Resource res_findResource(const ResourceData *pResData, Resource r, + char** path, const char** key); + +#ifdef __cplusplus + +#include "uresource.h" + +U_NAMESPACE_BEGIN + +class ResourceDataValue : public UResourceValue { +public: + ResourceDataValue() : pResData(NULL), res(URES_NONE) {} + virtual ~ResourceDataValue(); + + void setData(const ResourceData *data) { pResData = data; } + void setResource(Resource r) { res = r; } + + virtual UResType getType() const; + virtual const UChar *getString(int32_t &length, UErrorCode &errorCode) const; + virtual const UChar *getAliasString(int32_t &length, UErrorCode &errorCode) const; + virtual int32_t getInt(UErrorCode &errorCode) const; + virtual uint32_t getUInt(UErrorCode &errorCode) const; + virtual const int32_t *getIntVector(int32_t &length, UErrorCode &errorCode) const; + virtual const uint8_t *getBinary(int32_t &length, UErrorCode &errorCode) const; + + const ResourceData *pResData; + +private: + Resource res; +}; + +U_NAMESPACE_END + +/** + * @param value will be set during enumeration; input contents is ignored + * @param sink receives all table item key-value pairs + */ +U_CFUNC void +ures_getAllTableItems(const ResourceData *pResData, Resource table, + icu::ResourceDataValue &value, icu::UResourceTableSink &sink, + UErrorCode &errorCode); + +/** + * @param value will be set during enumeration; input contents is ignored + * @param sink receives all array item values + */ +U_CFUNC void +ures_getAllArrayItems(const ResourceData *pResData, Resource array, + icu::ResourceDataValue &value, icu::UResourceArraySink &sink, + UErrorCode &errorCode); + +#endif /* __cplusplus */ /** * Swap an ICU resource bundle. See udataswp.h. diff --git a/icu4c/source/common/uresimp.h b/icu4c/source/common/uresimp.h index b8ec5a61cac..51d91b7a724 100644 --- a/icu4c/source/common/uresimp.h +++ b/icu4c/source/common/uresimp.h @@ -1,6 +1,6 @@ /* ********************************************************************** -* Copyright (C) 2000-2014, International Business Machines +* Copyright (C) 2000-2015, International Business Machines * Corporation and others. All Rights Reserved. ********************************************************************** */ @@ -222,6 +222,18 @@ ures_getStringByKeyWithFallback(const UResourceBundle *resB, int32_t* len, UErrorCode *status); +#ifdef __cplusplus + +U_CAPI void U_EXPORT2 +ures_getAllArrayItemsWithFallback(const UResourceBundle *bundle, const char *path, + icu::UResourceArraySink &sink, UErrorCode &errorCode); + +U_CAPI void U_EXPORT2 +ures_getAllTableItemsWithFallback(const UResourceBundle *bundle, const char *path, + icu::UResourceTableSink &sink, UErrorCode &errorCode); + +#endif /* __cplusplus */ + /** * Get a version number by key * @param resB bundle containing version number diff --git a/icu4c/source/common/uresource.cpp b/icu4c/source/common/uresource.cpp new file mode 100644 index 00000000000..8bcfe2b5615 --- /dev/null +++ b/icu4c/source/common/uresource.cpp @@ -0,0 +1,56 @@ +/* +******************************************************************************* +* Copyright (C) 2015, International Business Machines +* Corporation and others. All Rights Reserved. +******************************************************************************* +* uresource.cpp +* +* created on: 2015nov04 +* created by: Markus W. Scherer +*/ + +#include "uresource.h" + +#include "unicode/utypes.h" +#include "unicode/uobject.h" +#include "unicode/ures.h" + +U_NAMESPACE_BEGIN + +UResourceValue::~UResourceValue() {} + + +UResourceArraySink::~UResourceArraySink() {} + +void UResourceArraySink::put( + int32_t /*index*/, UResourceValue & /*value*/, UErrorCode & /*errorCode*/) {} + +UResourceArraySink *UResourceArraySink::getOrCreateArraySink( + int32_t /*index*/, int32_t /*size*/, UErrorCode & /*errorCode*/) { + return NULL; +} + +UResourceTableSink *UResourceArraySink::getOrCreateTableSink( + int32_t /*index*/, int32_t /*initialSize*/, UErrorCode & /*errorCode*/) { + return NULL; +} + + +UResourceTableSink::~UResourceTableSink() {} + +void UResourceTableSink::put( + const char * /*key*/, UResourceValue & /*value*/, UErrorCode & /*errorCode*/) {} + +void UResourceTableSink::putNoFallback(const char * /*key*/, UErrorCode & /*errorCode*/) {} + +UResourceArraySink *UResourceTableSink::getOrCreateArraySink( + const char * /*key*/, int32_t /*size*/, UErrorCode & /*errorCode*/) { + return NULL; +} + +UResourceTableSink *UResourceTableSink::getOrCreateTableSink( + const char * /*key*/, int32_t /*initialSize*/, UErrorCode & /*errorCode*/) { + return NULL; +} + +U_NAMESPACE_END diff --git a/icu4c/source/common/uresource.h b/icu4c/source/common/uresource.h new file mode 100644 index 00000000000..c94ae7e4aea --- /dev/null +++ b/icu4c/source/common/uresource.h @@ -0,0 +1,231 @@ +/* +******************************************************************************* +* Copyright (C) 2015, International Business Machines +* Corporation and others. All Rights Reserved. +******************************************************************************* +* uresource.h +* +* created on: 2015nov04 +* created by: Markus W. Scherer +*/ + +#ifndef __URESOURCE_H__ +#define __URESOURCE_H__ + +/** + * \file + * \brief ICU resource bundle key and value types. + */ + +// Note: Ported from ICU4J class ICUResource and its nested classes, +// but the C++ classes are separate, not nested. + +#include "unicode/utypes.h" +#include "unicode/unistr.h" +#include "unicode/ures.h" + +U_NAMESPACE_BEGIN + +class UResourceTableSink; + +// Note: In C++, we use const char * pointers for keys, +// rather than an abstraction like Java ICUResource.Key. + +/** + * Represents a resource bundle item's value. + * Avoids object creations as much as possible. + * Mutable, not thread-safe. + */ +class U_COMMON_API UResourceValue : public UObject { +public: + virtual ~UResourceValue(); + + /** + * @return ICU resource type, for example, URES_STRING + */ + virtual UResType getType() const = 0; + + /** + * Sets U_RESOURCE_TYPE_MISMATCH if this is not a string resource. + * + * @see ures_getString() + */ + virtual const UChar *getString(int32_t &length, UErrorCode &errorCode) const = 0; + + inline UnicodeString getUnicodeString(UErrorCode &errorCode) const { + int32_t len = 0; + const UChar *r = getString(len, errorCode); + return UnicodeString(TRUE, r, len); + } + + /** + * Sets U_RESOURCE_TYPE_MISMATCH if this is not an alias resource. + */ + virtual const UChar *getAliasString(int32_t &length, UErrorCode &errorCode) const = 0; + + inline UnicodeString getAliasUnicodeString(UErrorCode &errorCode) const { + int32_t len = 0; + const UChar *r = getAliasString(len, errorCode); + return UnicodeString(TRUE, r, len); + } + + /** + * Sets U_RESOURCE_TYPE_MISMATCH if this is not an integer resource. + * + * @see ures_getInt() + */ + virtual int32_t getInt(UErrorCode &errorCode) const = 0; + + /** + * Sets U_RESOURCE_TYPE_MISMATCH if this is not an integer resource. + * + * @see ures_getUInt() + */ + virtual uint32_t getUInt(UErrorCode &errorCode) const = 0; + + /** + * Sets U_RESOURCE_TYPE_MISMATCH if this is not an intvector resource. + * + * @see ures_getIntVector() + */ + virtual const int32_t *getIntVector(int32_t &length, UErrorCode &errorCode) const = 0; + + /** + * Sets U_RESOURCE_TYPE_MISMATCH if this is not a binary-blob resource. + * + * @see ures_getBinary() + */ + virtual const uint8_t *getBinary(int32_t &length, UErrorCode &errorCode) const = 0; + +protected: + UResourceValue() {} + +private: + UResourceValue(const UResourceValue &); // no copy constructor + UResourceValue &operator=(const UResourceValue &); // no assignment operator +}; + +/** + * Sink for ICU resource array contents. + * The base class does nothing. + * + * Nested arrays and tables are stored as nested sinks, + * never put() as UResourceValue items. + */ +class U_COMMON_API UResourceArraySink : public UObject { +public: + UResourceArraySink() {} + virtual ~UResourceArraySink(); + + /** + * Adds a value from a resource array. + * + * @param index of the resource array item + * @param value resource value + */ + virtual void put(int32_t index, UResourceValue &value, UErrorCode &errorCode); + + /** + * Returns a nested resource array at the array index as another sink. + * Creates the sink if none exists for the key. + * Returns NULL if nested arrays are not supported. + * The default implementation always returns NULL. + * + * This sink (not the caller) owns the nested sink. + * + * @param index of the resource array item + * @param size number of array items + * @return nested-array sink, or NULL + */ + virtual UResourceArraySink *getOrCreateArraySink( + int32_t index, int32_t size, UErrorCode &errorCode); + + /** + * Returns a nested resource table at the array index as another sink. + * Creates the sink if none exists for the key. + * Returns NULL if nested tables are not supported. + * The default implementation always returns NULL. + * + * This sink (not the caller) owns the nested sink. + * + * @param index of the resource array item + * @param initialSize size hint for creating the sink if necessary + * @return nested-table sink, or NULL + */ + virtual UResourceTableSink *getOrCreateTableSink( + int32_t index, int32_t initialSize, UErrorCode &errorCode); + +private: + UResourceArraySink(const UResourceArraySink &); // no copy constructor + UResourceArraySink &operator=(const UResourceArraySink &); // no assignment operator +}; + +/** + * Sink for ICU resource table contents. + * The base class does nothing. + * + * Nested arrays and tables are stored as nested sinks, + * never put() as UResourceValue items. + */ +class U_COMMON_API UResourceTableSink : public UObject { +public: + UResourceTableSink() {} + virtual ~UResourceTableSink(); + + /** + * Adds a key-value pair from a resource table. + * + * @param key resource key string + * @param value resource value + */ + virtual void put(const char *key, UResourceValue &value, UErrorCode &errorCode); + + /** + * Adds a no-fallback/no-inheritance marker for this key. + * Used for CLDR no-fallback data values of (three empty-set symbols)=={2205, 2205, 2205} + * when enumerating tables with fallback from the specific resource bundle to root. + * + * The default implementation does nothing. + * + * @param key to be removed + */ + virtual void putNoFallback(const char *key, UErrorCode &errorCode); + + /** + * Returns a nested resource array for the key as another sink. + * Creates the sink if none exists for the key. + * Returns NULL if nested arrays are not supported. + * The default implementation always returns NULL. + * + * This sink (not the caller) owns the nested sink. + * + * @param key resource key string + * @param size number of array items + * @return nested-array sink, or NULL + */ + virtual UResourceArraySink *getOrCreateArraySink( + const char *key, int32_t size, UErrorCode &errorCode); + + /** + * Returns a nested resource table for the key as another sink. + * Creates the sink if none exists for the key. + * Returns NULL if nested tables are not supported. + * The default implementation always returns NULL. + * + * This sink (not the caller) owns the nested sink. + * + * @param key resource key string + * @param initialSize size hint for creating the sink if necessary + * @return nested-table sink, or NULL + */ + virtual UResourceTableSink *getOrCreateTableSink( + const char *key, int32_t initialSize, UErrorCode &errorCode); + +private: + UResourceTableSink(const UResourceTableSink &); // no copy constructor + UResourceTableSink &operator=(const UResourceTableSink &); // no assignment operator +}; + +U_NAMESPACE_END + +#endif diff --git a/icu4c/source/i18n/measfmt.cpp b/icu4c/source/i18n/measfmt.cpp index 6f8851c8067..1a6da82fb31 100644 --- a/icu4c/source/i18n/measfmt.cpp +++ b/icu4c/source/i18n/measfmt.cpp @@ -32,6 +32,7 @@ #include "unicode/putil.h" #include "unicode/smpdtfmt.h" #include "uassert.h" +#include "uresource.h" #include "sharednumberformat.h" #include "sharedpluralrules.h" @@ -81,9 +82,17 @@ private: class MeasureFormatCacheData : public SharedObject { public: QuantityFormatter formatters[MEAS_UNIT_COUNT][WIDTH_INDEX_COUNT]; + SimplePatternFormatter *perUnitFormatters[MEAS_UNIT_COUNT][WIDTH_INDEX_COUNT]; SimplePatternFormatter perFormatters[WIDTH_INDEX_COUNT]; MeasureFormatCacheData(); + + UBool hasPerFormatter(int32_t width) const { + // TODO: Create a more obvious way to test if the per-formatter has been set? + // Use pointers, check for NULL? Or add an isValid() method? + return perFormatters[width].getPlaceholderCount() == 2; + } + void adoptCurrencyFormat(int32_t widthIndex, NumberFormat *nfToAdopt) { delete currencyFormats[widthIndex]; currencyFormats[widthIndex] = nfToAdopt; @@ -105,13 +114,6 @@ public: const NumericDateFormatters *getNumericDateFormatters() const { return numericDateFormatters; } - void adoptPerUnitFormatter( - int32_t index, - int32_t widthIndex, - SimplePatternFormatter *formatterToAdopt) { - delete perUnitFormatters[index][widthIndex]; - perUnitFormatters[index][widthIndex] = formatterToAdopt; - } const SimplePatternFormatter * const * getPerUnitFormattersByIndex( int32_t index) const { return perUnitFormatters[index]; @@ -121,7 +123,6 @@ private: NumberFormat *currencyFormats[WIDTH_INDEX_COUNT]; NumberFormat *integerFormat; NumericDateFormatters *numericDateFormatters; - SimplePatternFormatter *perUnitFormatters[MEAS_UNIT_COUNT][WIDTH_INDEX_COUNT]; MeasureFormatCacheData(const MeasureFormatCacheData &other); MeasureFormatCacheData &operator=(const MeasureFormatCacheData &other); }; @@ -176,6 +177,228 @@ static UBool getString( return TRUE; } +namespace { + +class UnitDataSink; + +class UnitPatternSink : public UResourceTableSink { +public: + UnitPatternSink(UnitDataSink &sink) : dataSink(sink) {} + ~UnitPatternSink(); + virtual void put(const char *key, UResourceValue &value, UErrorCode &errorCode); + + UnitDataSink &dataSink; +}; + +class UnitSubtypeSink : public UResourceTableSink { +public: + UnitSubtypeSink(UnitDataSink &sink) : dataSink(sink) {} + ~UnitSubtypeSink(); + virtual UResourceTableSink *getOrCreateTableSink( + const char *key, int32_t initialSize, UErrorCode &errorCode); + + UnitDataSink &dataSink; +}; + +class UnitCompoundSink : public UResourceTableSink { +public: + UnitCompoundSink(UnitDataSink &sink) : dataSink(sink) {} + ~UnitCompoundSink(); + virtual void put(const char *key, UResourceValue &value, UErrorCode &errorCode); + + UnitDataSink &dataSink; +}; + +class UnitTypeSink : public UResourceTableSink { +public: + UnitTypeSink(UnitDataSink &sink) : dataSink(sink) {} + ~UnitTypeSink(); + virtual UResourceTableSink *getOrCreateTableSink( + const char *key, int32_t initialSize, UErrorCode &errorCode); + + UnitDataSink &dataSink; +}; + +static const UChar g_LOCALE_units[] = { + 0x2F, 0x4C, 0x4F, 0x43, 0x41, 0x4C, 0x45, 0x2F, + 0x75, 0x6E, 0x69, 0x74, 0x73 +}; +static const UChar gShort[] = { 0x53, 0x68, 0x6F, 0x72, 0x74 }; +static const UChar gNarrow[] = { 0x4E, 0x61, 0x72, 0x72, 0x6F, 0x77 }; + +class UnitDataSink : public UResourceTableSink { +public: + UnitDataSink(const MeasureUnit *u, int32_t len, MeasureFormatCacheData &outputData); + ~UnitDataSink(); + virtual void put(const char *key, UResourceValue &value, UErrorCode &errorCode); + virtual UResourceTableSink *getOrCreateTableSink( + const char *key, int32_t initialSize, UErrorCode &errorCode); + + static UMeasureFormatWidth widthFromKey(const char *key) { + if (uprv_strncmp(key, "units", 5) == 0) { + key += 5; + if (*key == 0) { + return UMEASFMT_WIDTH_WIDE; + } else if (uprv_strcmp(key, "Short") == 0) { + return UMEASFMT_WIDTH_SHORT; + } else if (uprv_strcmp(key, "Narrow") == 0) { + return UMEASFMT_WIDTH_NARROW; + } + } + return UMEASFMT_WIDTH_COUNT; + } + + static UMeasureFormatWidth widthFromAlias(const UResourceValue &value, UErrorCode &errorCode) { + int32_t length; + const UChar *s = value.getAliasString(length, errorCode); + // For example: "/LOCALE/unitsShort" + if (U_SUCCESS(errorCode) && length >= 13 && u_memcmp(s, g_LOCALE_units, 13) == 0) { + s += 13; + length -= 13; + if (*s == 0) { + return UMEASFMT_WIDTH_WIDE; + } else if (u_strCompare(s, length, gShort, 5, FALSE) == 0) { + return UMEASFMT_WIDTH_SHORT; + } else if (u_strCompare(s, length, gNarrow, 6, FALSE) == 0) { + return UMEASFMT_WIDTH_NARROW; + } + } + return UMEASFMT_WIDTH_COUNT; + } + + // All known units, for mapping from type & subtype to unitIndex. + const MeasureUnit *units; + int32_t unitsLength; + + // Output data. + MeasureFormatCacheData &cacheData; + /** + * Redirection data from root-bundle, top-level sideways aliases. + * - UMEASFMT_WIDTH_COUNT: initial value, just fall back to root + * - UMEASFMT_WIDTH_WIDE/SHORT/NARROW: sideways alias for missing data + * - -1: no-inheritance marker + */ + UMeasureFormatWidth widthFallback[WIDTH_INDEX_COUNT]; + + // Path to current data. + UMeasureFormatWidth width; + const char *type; + int32_t unitIndex; + UBool hasPatterns; + + UnitTypeSink typeSink; + UnitSubtypeSink subtypeSink; + UnitCompoundSink compoundSink; + UnitPatternSink patternSink; +}; + +UnitPatternSink::~UnitPatternSink() {} + +void UnitPatternSink::put(const char *key, UResourceValue &value, UErrorCode &errorCode) { + if (U_FAILURE(errorCode)) { return; } + if (uprv_strcmp(key, "dnam") == 0) { + // Skip display name for now. + } else if (uprv_strcmp(key, "per") == 0) { + if (dataSink.cacheData.perUnitFormatters[dataSink.unitIndex][dataSink.width] == NULL) { + dataSink.cacheData.perUnitFormatters[dataSink.unitIndex][dataSink.width] = + new SimplePatternFormatter(value.getUnicodeString(errorCode)); + } + } else { + // The key must be one of the plural form strings. + if (!dataSink.hasPatterns) { + dataSink.cacheData.formatters[dataSink.unitIndex][dataSink.width].add( + key, value.getUnicodeString(errorCode), errorCode); + } + } +} + +UnitSubtypeSink::~UnitSubtypeSink() {} + +UResourceTableSink *UnitSubtypeSink::getOrCreateTableSink( + const char *key, int32_t /* initialSize */, UErrorCode &errorCode) { + if (U_FAILURE(errorCode)) { return NULL; } + // Find the unit from its type and subtype. + // TODO: There must be a better way to do this. Should be easy inside MeasureUnit. + // There, map type & subtype each to ints, compute unit index. + for (int32_t i = 0; i < dataSink.unitsLength; ++i) { + const MeasureUnit &unit = dataSink.units[i]; + if (uprv_strcmp(unit.getType(), dataSink.type) == 0 && + uprv_strcmp(unit.getSubtype(), key) == 0) { + dataSink.unitIndex = unit.getIndex(); + dataSink.hasPatterns = + dataSink.cacheData.formatters[dataSink.unitIndex][dataSink.width].isValid(); + return &dataSink.patternSink; + } + } + return NULL; +} + +UnitCompoundSink::~UnitCompoundSink() {} + +void UnitCompoundSink::put(const char *key, UResourceValue &value, UErrorCode &errorCode) { + if (U_SUCCESS(errorCode) && uprv_strcmp(key, "per") == 0) { + dataSink.cacheData.perFormatters[dataSink.width]. + compile(value.getUnicodeString(errorCode), errorCode); + } +} + +UnitTypeSink::~UnitTypeSink() {} + +UResourceTableSink *UnitTypeSink::getOrCreateTableSink( + const char *key, int32_t /* initialSize */, UErrorCode &errorCode) { + if (U_FAILURE(errorCode)) { return NULL; } + if (uprv_strcmp(key, "currency") == 0) { + } else if (uprv_strcmp(key, "compound") == 0) { + if (!dataSink.cacheData.hasPerFormatter(dataSink.width)) { + return &dataSink.compoundSink; + } + } else { + dataSink.type = key; + return &dataSink.subtypeSink; + } + return NULL; +} + +UnitDataSink::UnitDataSink(const MeasureUnit *u, int32_t len, MeasureFormatCacheData &outputData) + : units(u), unitsLength(len), cacheData(outputData), + width(UMEASFMT_WIDTH_COUNT), type(NULL), unitIndex(0), hasPatterns(FALSE), + typeSink(*this), subtypeSink(*this), compoundSink(*this), patternSink(*this) { + for (int32_t i = 0; i < WIDTH_INDEX_COUNT; ++i) { + widthFallback[i] = UMEASFMT_WIDTH_COUNT; + } +} + +UnitDataSink::~UnitDataSink() {} + +void UnitDataSink::put(const char *key, UResourceValue &value, UErrorCode &errorCode) { + // Handle aliases like + // units:alias{"/LOCALE/unitsShort"} + // which should only occur in the root bundle. + if (U_FAILURE(errorCode) || value.getType() != URES_ALIAS) { return; } + UMeasureFormatWidth sourceWidth = widthFromKey(key); + if (sourceWidth == UMEASFMT_WIDTH_COUNT) { + // Alias from something we don't care about. + return; + } + UMeasureFormatWidth targetWidth = widthFromAlias(value, errorCode); + if (targetWidth == UMEASFMT_WIDTH_COUNT) { + // We do not recognize what to fall back to. + errorCode = U_UNSUPPORTED_ERROR; + return; + } + widthFallback[sourceWidth] = targetWidth; +} + +UResourceTableSink *UnitDataSink::getOrCreateTableSink( + const char *key, int32_t /* initialSize */, UErrorCode &errorCode) { + if (U_SUCCESS(errorCode) && + (width = widthFromKey(key)) != UMEASFMT_WIDTH_COUNT) { + return &typeSink; + } + return NULL; +} + +} // namespace static UBool loadMeasureUnitData( const UResourceBundle *resource, @@ -184,110 +407,62 @@ static UBool loadMeasureUnitData( if (U_FAILURE(status)) { return FALSE; } - static const char *widthPath[] = {"units", "unitsShort", "unitsNarrow"}; - MeasureUnit *units = NULL; - int32_t unitCount = MeasureUnit::getAvailable(units, 0, status); - while (status == U_BUFFER_OVERFLOW_ERROR) { - status = U_ZERO_ERROR; - delete [] units; - units = new MeasureUnit[unitCount]; - if (units == NULL) { - status = U_MEMORY_ALLOCATION_ERROR; - return FALSE; - } - unitCount = MeasureUnit::getAvailable(units, unitCount, status); + int32_t unitCount = MeasureUnit::getAvailable(NULL, 0, status); + if (status != U_BUFFER_OVERFLOW_ERROR) { + return FALSE; } - for (int32_t currentWidth = 0; currentWidth < WIDTH_INDEX_COUNT; ++currentWidth) { - // Be sure status is clear since next resource bundle lookup may fail. - if (U_FAILURE(status)) { - delete [] units; + status = U_ZERO_ERROR; + LocalArray units(new MeasureUnit[unitCount], status); + unitCount = MeasureUnit::getAvailable(units.getAlias(), unitCount, status); + UnitDataSink sink(units.getAlias(), unitCount, cacheData); + ures_getAllTableItemsWithFallback(resource, "", sink, status); + if (U_FAILURE(status)) { + return FALSE; + } + // Check that we do not fall back to another fallback. + for (int32_t width = 0; width < WIDTH_INDEX_COUNT; ++width) { + UMeasureFormatWidth targetWidth = sink.widthFallback[width]; + if (targetWidth != UMEASFMT_WIDTH_COUNT && + sink.widthFallback[targetWidth] != UMEASFMT_WIDTH_COUNT) { + status = U_UNSUPPORTED_ERROR; return FALSE; } - LocalUResourceBundlePointer widthBundle( - ures_getByKeyWithFallback( - resource, widthPath[currentWidth], NULL, &status)); - // We may not have data for all widths in all locales. - if (status == U_MISSING_RESOURCE_ERROR) { - status = U_ZERO_ERROR; + } + // Copy fallback-width patterns where they are missing. + // Assumption: All plural forms are stored together. + // This means we can fall back from one whole set to another, + // rather than fall back for individual patterns. + for (int32_t width = 0; width < WIDTH_INDEX_COUNT; ++width) { + UMeasureFormatWidth targetWidth = sink.widthFallback[width]; + if (targetWidth == UMEASFMT_WIDTH_COUNT) { continue; } - { - // compound per - LocalUResourceBundlePointer compoundPerBundle( - ures_getByKeyWithFallback( - widthBundle.getAlias(), - "compound/per", - NULL, - &status)); - if (U_FAILURE(status)) { - status = U_ZERO_ERROR; - } else { - UnicodeString perPattern; - getString(compoundPerBundle.getAlias(), perPattern, status); - cacheData.perFormatters[currentWidth].compile(perPattern, status); - } + if (!cacheData.hasPerFormatter(width) && cacheData.hasPerFormatter(targetWidth)) { + cacheData.perFormatters[width] = cacheData.perFormatters[targetWidth]; } - for (int32_t currentUnit = 0; currentUnit < unitCount; ++currentUnit) { - // Be sure status is clear next lookup may fail. - if (U_FAILURE(status)) { - delete [] units; - return FALSE; - } - if (isCurrency(units[currentUnit])) { + for (int32_t i = 0; i < unitCount; ++i) { + if (isCurrency(units[i])) { continue; } - CharString pathBuffer; - pathBuffer.append(units[currentUnit].getType(), status) - .append("/", status) - .append(units[currentUnit].getSubtype(), status); - LocalUResourceBundlePointer unitBundle( - ures_getByKeyWithFallback( - widthBundle.getAlias(), - pathBuffer.data(), - NULL, - &status)); - // We may not have data for all units in all widths - if (status == U_MISSING_RESOURCE_ERROR) { - status = U_ZERO_ERROR; - continue; + int32_t unitIndex = units[i].getIndex(); + if (!cacheData.formatters[unitIndex][width].isValid() && + cacheData.formatters[unitIndex][targetWidth].isValid()) { + cacheData.formatters[unitIndex][width] = + cacheData.formatters[unitIndex][targetWidth]; } - // We must have the unit bundle to proceed - if (U_FAILURE(status)) { - delete [] units; - return FALSE; - } - int32_t size = ures_getSize(unitBundle.getAlias()); - for (int32_t plIndex = 0; plIndex < size; ++plIndex) { - LocalUResourceBundlePointer pluralBundle( - ures_getByIndex( - unitBundle.getAlias(), plIndex, NULL, &status)); - if (U_FAILURE(status)) { - delete [] units; - return FALSE; - } - const char * resKey = ures_getKey(pluralBundle.getAlias()); - if (uprv_strcmp(resKey, "dnam") == 0) { - continue; // skip display name & per pattern (new in CLDR 26 / ICU 54) for now, not part of plurals - } - if (uprv_strcmp(resKey, "per") == 0) { - UnicodeString perPattern; - getString(pluralBundle.getAlias(), perPattern, status); - cacheData.adoptPerUnitFormatter( - units[currentUnit].getIndex(), - currentWidth, - new SimplePatternFormatter(perPattern)); - continue; - } - UnicodeString rawPattern; - getString(pluralBundle.getAlias(), rawPattern, status); - cacheData.formatters[units[currentUnit].getIndex()][currentWidth].add( - resKey, - rawPattern, - status); + if (cacheData.perUnitFormatters[unitIndex][width] == NULL && + cacheData.perUnitFormatters[unitIndex][targetWidth] != NULL) { + cacheData.perUnitFormatters[unitIndex][width] = + new SimplePatternFormatter( + *cacheData.perUnitFormatters[unitIndex][targetWidth]); } } } - delete [] units; + // TODO: Rather than copy patterns, record the width fallback in the cacheData + // and handle it while formatting. + // TODO: Maybe store more sparsely in general, with pointers rather than potentially-empty objects. + // TODO: Maybe change the cache data into an array[WIDTH_INDEX_COUNT] of unit patterns, + // to correspond to the resource data and its aliases. return U_SUCCESS(status); } diff --git a/icu4c/source/test/intltest/itformat.cpp b/icu4c/source/test/intltest/itformat.cpp index f9f815d93e6..deae7ba0b61 100644 --- a/icu4c/source/test/intltest/itformat.cpp +++ b/icu4c/source/test/intltest/itformat.cpp @@ -53,7 +53,6 @@ #include "plurfmts.h" // PluralFormatTest #include "selfmts.h" // PluralFormatTest #include "dtifmtts.h" // DateIntervalFormatTest -#include "tufmtts.h" // TimeUnitTest #include "locnmtst.h" // LocaleDisplayNamesTest #include "dcfmtest.h" // DecimalFormatTest #include "listformattertest.h" // ListFormatterTest @@ -64,6 +63,7 @@ extern IntlTest *createGenderInfoTest(); #if !UCONFIG_NO_BREAK_ITERATION extern IntlTest *createRelativeDateTimeFormatterTest(); #endif +extern IntlTest *createTimeUnitTest(); extern IntlTest *createMeasureFormatTest(); extern IntlTest *createNumberFormatSpecificationTest(); extern IntlTest *createScientificNumberFormatterTest(); @@ -139,7 +139,15 @@ void IntlTestFormat::runIndexedTest( int32_t index, UBool exec, const char* &nam TESTCLASS(35,PluralRulesTest); TESTCLASS(36,PluralFormatTest); TESTCLASS(37,DateIntervalFormatTest); - TESTCLASS(38,TimeUnitTest); + case 38: + name = "TimeUnitTest"; + if (exec) { + logln("TimeUnitTest test---"); + logln((UnicodeString)""); + LocalPointer test(createTimeUnitTest()); + callTest(*test, par); + } + break; TESTCLASS(39,SelectFormatTest); TESTCLASS(40,LocaleDisplayNamesTest); #if !UCONFIG_NO_REGULAR_EXPRESSIONS diff --git a/icu4c/source/test/intltest/tufmtts.cpp b/icu4c/source/test/intltest/tufmtts.cpp index 8ed9b9f7956..6b1ed5a0080 100644 --- a/icu4c/source/test/intltest/tufmtts.cpp +++ b/icu4c/source/test/intltest/tufmtts.cpp @@ -1,5 +1,5 @@ /******************************************************************** - * Copyright (c) 2008-2014, International Business Machines Corporation and + * Copyright (c) 2008-2015, International Business Machines Corporation and * others. All Rights Reserved. ********************************************************************/ @@ -11,9 +11,9 @@ #include "unicode/tmunit.h" #include "unicode/tmutamt.h" #include "unicode/tmutfmt.h" -#include "tufmtts.h" -#include "cmemory.h" #include "unicode/ustring.h" +#include "cmemory.h" +#include "intltest.h" //TODO: put as compilation flag //#define TUFMTTS_DEBUG 1 @@ -22,16 +22,60 @@ #include #endif -void TimeUnitTest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par*/ ) { - if (exec) logln("TestSuite TimeUnitTest"); - switch (index) { - TESTCASE(0, testBasic); - TESTCASE(1, testAPI); - TESTCASE(2, testGreekWithFallback); - TESTCASE(3, testGreekWithSanitization); - TESTCASE(4, test10219Plurals); - default: name = ""; break; +class TimeUnitTest : public IntlTest { + void runIndexedTest(int32_t index, UBool exec, const char* &name, char* /*par*/ ) { + if (exec) logln("TestSuite TimeUnitTest"); + TESTCASE_AUTO_BEGIN; + TESTCASE_AUTO(testBasic); + TESTCASE_AUTO(testAPI); + TESTCASE_AUTO(testGreekWithFallback); + TESTCASE_AUTO(testGreekWithSanitization); + TESTCASE_AUTO(test10219Plurals); + TESTCASE_AUTO(TestBritishShortHourFallback); + TESTCASE_AUTO_END; } + +public: + /** + * Performs basic tests + **/ + void testBasic(); + + /** + * Performs API tests + **/ + void testAPI(); + + /** + * Performs tests for Greek + * This tests that requests for short unit names correctly fall back + * to long unit names for a locale where the locale data does not + * provide short unit names. As of CLDR 1.9, Greek is one such language. + **/ + void testGreekWithFallback(); + + /** + * Performs tests for Greek + * This tests that if the plural count listed in time unit format does not + * match those in the plural rules for the locale, those plural count in + * time unit format will be ingored and subsequently, fall back will kick in + * which is tested above. + * Without data sanitization, setNumberFormat() would crash. + * As of CLDR shiped in ICU4.8, Greek is one such language. + */ + void testGreekWithSanitization(); + + /** + * Performs unit test for ticket 10219 making sure that plurals work + * correctly with rounding. + */ + void test10219Plurals(); + + void TestBritishShortHourFallback(); +}; + +extern IntlTest *createTimeUnitTest() { + return new TimeUnitTest(); } // This function is more lenient than equals operator as it considers integer 3 hours and @@ -469,4 +513,16 @@ void TimeUnitTest::test10219Plurals() { } } +void TimeUnitTest::TestBritishShortHourFallback() { + // See ticket #11986 "incomplete fallback in MeasureFormat". + UErrorCode status = U_ZERO_ERROR; + Formattable oneHour(new TimeUnitAmount(1, TimeUnit::UTIMEUNIT_HOUR, status)); + Locale en_GB("en_GB"); + TimeUnitFormat formatter(en_GB, UTMUTFMT_ABBREVIATED_STYLE, status); + UnicodeString result; + formatter.format(oneHour, result, status); + assertSuccess("TestBritishShortHourFallback()", status); + assertEquals("TestBritishShortHourFallback()", UNICODE_STRING_SIMPLE("1 hr"), result); +} + #endif diff --git a/icu4c/source/test/intltest/tufmtts.h b/icu4c/source/test/intltest/tufmtts.h deleted file mode 100644 index cd4a48542f1..00000000000 --- a/icu4c/source/test/intltest/tufmtts.h +++ /dev/null @@ -1,63 +0,0 @@ -/******************************************************************** - * COPYRIGHT: - * Copyright (c) 2008-2013, International Business Machines Corporation - * and others. All Rights Reserved. - ********************************************************************/ - -#ifndef __INTLTESTTIMEUNITTEST__ -#define __INTLTESTTIMEUNITTEST__ - - -#if !UCONFIG_NO_FORMATTING - -#include "unicode/utypes.h" -#include "unicode/locid.h" -#include "intltest.h" - -/** - * Test basic functionality of various API functions - **/ -class TimeUnitTest: public IntlTest { - void runIndexedTest( int32_t index, UBool exec, const char* &name, char* par = NULL ); - -public: - /** - * Performs basic tests - **/ - void testBasic(); - - /** - * Performs API tests - **/ - void testAPI(); - - /** - * Performs tests for Greek - * This tests that requests for short unit names correctly fall back - * to long unit names for a locale where the locale data does not - * provide short unit names. As of CLDR 1.9, Greek is one such language. - **/ - void testGreekWithFallback(); - - /** - * Performs tests for Greek - * This tests that if the plural count listed in time unit format does not - * match those in the plural rules for the locale, those plural count in - * time unit format will be ingored and subsequently, fall back will kick in - * which is tested above. - * Without data sanitization, setNumberFormat() would crash. - * As of CLDR shiped in ICU4.8, Greek is one such language. - */ - void testGreekWithSanitization(); - - /** - * Performs unit test for ticket 10219 making sure that plurals work - * correctly with rounding. - */ - void test10219Plurals(); - -}; - -#endif /* #if !UCONFIG_NO_FORMATTING */ - -#endif