ICU-13697 Adding data-loading logic for parseLenients sets in CLDR. Ties the sets in with number and currency parsing in ICU4C and ICU4J.

X-SVN-Rev: 41223
This commit is contained in:
Shane Carr 2018-04-12 10:59:37 +00:00
parent af0f8e62e4
commit 354afa4e79
16 changed files with 390 additions and 554 deletions

View file

@ -111,7 +111,8 @@ util.o util_props.o parsepos.o locbased.o cwchar.o wintz.o dtintrv.o ucnvsel.o p
ulist.o uloc_tag.o icudataver.o icuplug.o listformatter.o ulistformatter.o \
sharedobject.o simpleformatter.o unifiedcache.o uloc_keytype.o \
ubiditransform.o \
pluralmap.o
pluralmap.o \
numparse_unisets.o
## Header files to install
HEADERS = $(srcdir)/unicode/*.h

View file

@ -0,0 +1,200 @@
// © 2018 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
#include "unicode/utypes.h"
#if !UCONFIG_NO_FORMATTING && !UPRV_INCOMPLETE_CPP11_SUPPORT
// Allow implicit conversion from char16_t* to UnicodeString for this file:
// Helpful in toString methods and elsewhere.
#define UNISTR_FROM_STRING_EXPLICIT
#include "numparse_unisets.h"
#include "umutex.h"
#include "ucln_cmn.h"
#include "unicode/uniset.h"
#include "uresimp.h"
#include "cstring.h"
#include "uassert.h"
using namespace icu;
using namespace icu::numparse;
using namespace icu::numparse::impl;
using namespace icu::numparse::impl::unisets;
namespace {
static UnicodeSet* gUnicodeSets[COUNT] = {};
UnicodeSet* computeUnion(Key k1, Key k2) {
UnicodeSet* result = new UnicodeSet();
if (result == nullptr) {
return nullptr;
}
result->addAll(*gUnicodeSets[k1]);
result->addAll(*gUnicodeSets[k2]);
result->freeze();
return result;
}
UnicodeSet* computeUnion(Key k1, Key k2, Key k3) {
UnicodeSet* result = new UnicodeSet();
if (result == nullptr) {
return nullptr;
}
result->addAll(*gUnicodeSets[k1]);
result->addAll(*gUnicodeSets[k2]);
result->addAll(*gUnicodeSets[k3]);
result->freeze();
return result;
}
void saveSet(Key key, const UnicodeString& unicodeSetPattern, UErrorCode& status) {
// assert unicodeSets.get(key) == null;
gUnicodeSets[key] = new UnicodeSet(unicodeSetPattern, status);
}
class ParseDataSink : public ResourceSink {
public:
void put(const char* key, ResourceValue& value, UBool /*noFallback*/, UErrorCode& status) U_OVERRIDE {
ResourceTable contextsTable = value.getTable(status);
if (U_FAILURE(status)) { return; }
for (int i = 0; contextsTable.getKeyAndValue(i, key, value); i++) {
if (uprv_strcmp(key, "date") == 0) {
// ignore
} else {
ResourceTable strictnessTable = value.getTable(status);
if (U_FAILURE(status)) { return; }
for (int j = 0; strictnessTable.getKeyAndValue(j, key, value); j++) {
bool isLenient = (uprv_strcmp(key, "lenient") == 0);
ResourceArray array = value.getArray(status);
if (U_FAILURE(status)) { return; }
for (int k = 0; k < array.getSize(); k++) {
array.getValue(k, value);
UnicodeString str = value.getUnicodeString(status);
if (U_FAILURE(status)) { return; }
// There is both lenient and strict data for comma/period,
// but not for any of the other symbols.
if (str.indexOf(u'.') != -1) {
saveSet(isLenient ? PERIOD : STRICT_PERIOD, str, status);
} else if (str.indexOf(u',') != -1) {
saveSet(isLenient ? COMMA : STRICT_COMMA, str, status);
} else if (str.indexOf(u'+') != -1) {
saveSet(PLUS_SIGN, str, status);
} else if (str.indexOf(u'') != -1) {
saveSet(MINUS_SIGN, str, status);
} else if (str.indexOf(u'$') != -1) {
saveSet(DOLLAR_SIGN, str, status);
} else if (str.indexOf(u'£') != -1) {
saveSet(POUND_SIGN, str, status);
} else if (str.indexOf(u'') != -1) {
saveSet(RUPEE_SIGN, str, status);
}
if (U_FAILURE(status)) { return; }
}
}
}
}
}
};
icu::UInitOnce gNumberParseUniSetsInitOnce = U_INITONCE_INITIALIZER;
UBool U_CALLCONV cleanupNumberParseUniSets() {
for (int32_t i = 0; i < COUNT; i++) {
delete gUnicodeSets[i];
gUnicodeSets[i] = nullptr;
}
return TRUE;
}
void U_CALLCONV initNumberParseUniSets(UErrorCode& status) {
ucln_common_registerCleanup(UCLN_COMMON_NUMPARSE_UNISETS, cleanupNumberParseUniSets);
gUnicodeSets[EMPTY] = new UnicodeSet();
// These sets were decided after discussion with icu-design@. See tickets #13084 and #13309.
// Zs+TAB is "horizontal whitespace" according to UTS #18 (blank property).
gUnicodeSets[DEFAULT_IGNORABLES] = new UnicodeSet(
u"[[:Zs:][\\u0009][:Bidi_Control:][:Variation_Selector:]]", status);
gUnicodeSets[STRICT_IGNORABLES] = new UnicodeSet(u"[[:Bidi_Control:]]", status);
LocalUResourceBundlePointer rb(ures_open(nullptr, "root", &status));
if (U_FAILURE(status)) { return; }
ParseDataSink sink;
ures_getAllItemsWithFallback(rb.getAlias(), "parse", sink, status);
if (U_FAILURE(status)) { return; }
// TODO: Should there be fallback behavior if for some reason these sets didn't get populated?
U_ASSERT(gUnicodeSets[COMMA] != nullptr);
U_ASSERT(gUnicodeSets[STRICT_COMMA] != nullptr);
U_ASSERT(gUnicodeSets[PERIOD] != nullptr);
U_ASSERT(gUnicodeSets[STRICT_PERIOD] != nullptr);
gUnicodeSets[OTHER_GROUPING_SEPARATORS] = new UnicodeSet(
u"['٬‘’'\\u0020\\u00A0\\u2000-\\u200A\\u202F\\u205F\\u3000]", status);
gUnicodeSets[ALL_SEPARATORS] = computeUnion(COMMA, PERIOD, OTHER_GROUPING_SEPARATORS);
gUnicodeSets[STRICT_ALL_SEPARATORS] = computeUnion(
STRICT_COMMA, STRICT_PERIOD, OTHER_GROUPING_SEPARATORS);
U_ASSERT(gUnicodeSets[MINUS_SIGN] != nullptr);
U_ASSERT(gUnicodeSets[PLUS_SIGN] != nullptr);
gUnicodeSets[PERCENT_SIGN] = new UnicodeSet(u"[%٪]", status);
gUnicodeSets[PERMILLE_SIGN] = new UnicodeSet(u"[‰؉]", status);
gUnicodeSets[INFINITY_KEY] = new UnicodeSet(u"[∞]", status);
U_ASSERT(gUnicodeSets[DOLLAR_SIGN] != nullptr);
U_ASSERT(gUnicodeSets[POUND_SIGN] != nullptr);
U_ASSERT(gUnicodeSets[RUPEE_SIGN] != nullptr);
gUnicodeSets[YEN_SIGN] = new UnicodeSet(u"\\uffe5]", status);
gUnicodeSets[DIGITS] = new UnicodeSet(u"[:digit:]", status);
gUnicodeSets[DIGITS_OR_ALL_SEPARATORS] = computeUnion(DIGITS, ALL_SEPARATORS);
gUnicodeSets[DIGITS_OR_STRICT_ALL_SEPARATORS] = computeUnion(DIGITS, STRICT_ALL_SEPARATORS);
for (int32_t i = 0; i < COUNT; i++) {
gUnicodeSets[i]->freeze();
}
}
}
const UnicodeSet* unisets::get(Key key) {
UErrorCode localStatus = U_ZERO_ERROR;
umtx_initOnce(gNumberParseUniSetsInitOnce, &initNumberParseUniSets, localStatus);
if (U_FAILURE(localStatus)) {
// TODO: This returns non-null in Java, and callers assume that.
return nullptr;
}
return gUnicodeSets[key];
}
Key unisets::chooseFrom(UnicodeString str, Key key1) {
return get(key1)->contains(str) ? key1 : COUNT;
}
Key unisets::chooseFrom(UnicodeString str, Key key1, Key key2) {
return get(key1)->contains(str) ? key1 : chooseFrom(str, key2);
}
//Key unisets::chooseCurrency(UnicodeString str) {
// if (get(DOLLAR_SIGN)->contains(str)) {
// return DOLLAR_SIGN;
// } else if (get(POUND_SIGN)->contains(str)) {
// return POUND_SIGN;
// } else if (get(RUPEE_SIGN)->contains(str)) {
// return RUPEE_SIGN;
// } else if (get(YEN_SIGN)->contains(str)) {
// return YEN_SIGN;
// } else {
// return COUNT;
// }
//}
#endif /* #if !UCONFIG_NO_FORMATTING */

View file

@ -1,14 +1,16 @@
// © 2018 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
// This file is in common instead of i18n because it is needed by ucurr.cpp.
#include "unicode/utypes.h"
#if !UCONFIG_NO_FORMATTING && !UPRV_INCOMPLETE_CPP11_SUPPORT
#ifndef __NUMPARSE_UNISETS_H__
#define __NUMPARSE_UNISETS_H__
#include "numparse_types.h"
#include "unicode/uniset.h"
#include "unicode/unistr.h"
U_NAMESPACE_BEGIN namespace numparse {
namespace impl {
@ -18,8 +20,6 @@ enum Key {
EMPTY,
// Ignorables
BIDI,
WHITESPACE,
DEFAULT_IGNORABLES,
STRICT_IGNORABLES,
@ -29,7 +29,7 @@ enum Key {
// - PERIOD is a superset of SCRICT_PERIOD
// - ALL_SEPARATORS is the union of COMMA, PERIOD, and OTHER_GROUPING_SEPARATORS
// - STRICT_ALL_SEPARATORS is the union of STRICT_COMMA, STRICT_PERIOD, and OTHER_GRP_SEPARATORS
COMMA,
COMMA,
PERIOD,
STRICT_COMMA,
STRICT_PERIOD,
@ -38,23 +38,27 @@ enum Key {
STRICT_ALL_SEPARATORS,
// Symbols
// TODO: NaN?
MINUS_SIGN,
MINUS_SIGN,
PLUS_SIGN,
PERCENT_SIGN,
PERMILLE_SIGN,
INFINITY_KEY, // INFINITY is defined in cmath
// Currency Symbols
DOLLAR_SIGN,
POUND_SIGN,
RUPEE_SIGN,
YEN_SIGN, // not in CLDR data, but Currency.java wants it
// Other
DIGITS,
CWCF,
DIGITS,
// Combined Separators with Digits (for lead code points)
DIGITS_OR_ALL_SEPARATORS,
DIGITS_OR_ALL_SEPARATORS,
DIGITS_OR_STRICT_ALL_SEPARATORS,
// The number of elements in the enum. Also used to indicate null.
COUNT
COUNT
};
const UnicodeSet* get(Key key);
@ -63,6 +67,19 @@ Key chooseFrom(UnicodeString str, Key key1);
Key chooseFrom(UnicodeString str, Key key1, Key key2);
// Unused in C++:
// Key chooseCurrency(UnicodeString str);
// Used instead:
static const struct {
Key key;
UChar32 exemplar;
} kCurrencyEntries[] = {
{DOLLAR_SIGN, u'$'},
{POUND_SIGN, u'£'},
{RUPEE_SIGN, u''},
{YEN_SIGN, u'¥'},
};
} // namespace unisets
} // namespace impl
} // namespace numparse

View file

@ -33,6 +33,7 @@ Please keep the order of enums declared in same order
as the cleanup functions are suppose to be called. */
typedef enum ECleanupCommonType {
UCLN_COMMON_START = -1,
UCLN_COMMON_NUMPARSE_UNISETS,
UCLN_COMMON_USPREP,
UCLN_COMMON_BREAKITERATOR,
UCLN_COMMON_RBBI,

View file

@ -17,11 +17,13 @@
#include "unicode/ustring.h"
#include "unicode/parsepos.h"
#include "unicode/uniset.h"
#include "unicode/usetiter.h"
#include "unicode/utf16.h"
#include "ustr_imp.h"
#include "charstr.h"
#include "cmemory.h"
#include "cstring.h"
#include "numparse_unisets.h"
#include "uassert.h"
#include "umutex.h"
#include "ucln_cmn.h"
@ -67,14 +69,6 @@ static const int32_t POW10[] = { 1, 10, 100, 1000, 10000, 100000,
static const int32_t MAX_POW10 = UPRV_LENGTHOF(POW10) - 1;
// Defines equivalent currency symbols.
static const char *EQUIV_CURRENCY_SYMBOLS[][2] = {
{"\\u00a5", "\\uffe5"},
{"$", "\\ufe69"},
{"$", "\\uff04"},
{"\\u20a8", "\\u20b9"},
{"\\u00a3", "\\u20a4"}};
#define ISO_CURRENCY_CODE_LENGTH 3
//------------------------------------------------------------
@ -2207,16 +2201,21 @@ static void U_CALLCONV initIsoCodes(UErrorCode &status) {
}
static void populateCurrSymbolsEquiv(icu::Hashtable *hash, UErrorCode &status) {
if (U_FAILURE(status)) {
return;
}
int32_t length = UPRV_LENGTHOF(EQUIV_CURRENCY_SYMBOLS);
for (int32_t i = 0; i < length; ++i) {
icu::UnicodeString lhs(EQUIV_CURRENCY_SYMBOLS[i][0], -1, US_INV);
icu::UnicodeString rhs(EQUIV_CURRENCY_SYMBOLS[i][1], -1, US_INV);
makeEquivalent(lhs.unescape(), rhs.unescape(), hash, status);
if (U_FAILURE(status)) {
return;
using namespace icu::numparse::impl;
if (U_FAILURE(status)) { return; }
for (auto& entry : unisets::kCurrencyEntries) {
UnicodeString exemplar(entry.exemplar);
const UnicodeSet* set = unisets::get(entry.key);
if (set == nullptr) { return; }
UnicodeSetIterator it(*set);
while (it.next()) {
UnicodeString value = it.getString();
if (value == exemplar) {
// No need to mark the exemplar character as an equivalent
continue;
}
makeEquivalent(exemplar, value, hash, status);
if (U_FAILURE(status)) { return; }
}
}
}

View file

@ -92,7 +92,7 @@ csdetect.o csmatch.o csr2022.o csrecog.o csrmbcs.o csrsbcs.o csrucode.o csrutf8.
wintzimpl.o windtfmt.o winnmfmt.o basictz.o dtrule.o rbtz.o tzrule.o tztrans.o vtzone.o zonemeta.o \
standardplural.o upluralrules.o plurrule.o plurfmt.o selfmt.o dtitvfmt.o dtitvinf.o udateintervalformat.o \
tmunit.o tmutamt.o tmutfmt.o currpinf.o \
uspoof.o uspoof_impl.o uspoof_build.o uspoof_conf.o decfmtst.o smpdtfst.o \
uspoof.o uspoof_impl.o uspoof_build.o uspoof_conf.o smpdtfst.o \
ztrans.o zrule.o vzone.o fphdlimp.o fpositer.o ufieldpositer.o \
decNumber.o decContext.o alphaindex.o tznames.o tznames_impl.o tzgnames.o \
tzfmt.o compactdecimalformat.o gender.o region.o scriptset.o \
@ -107,7 +107,7 @@ number_mapper.o number_multiplier.o number_currencysymbols.o number_skeletons.o
double-conversion.o double-conversion-bignum-dtoa.o double-conversion-bignum.o \
double-conversion-cached-powers.o double-conversion-diy-fp.o \
double-conversion-fast-dtoa.o double-conversion-strtod.o \
numparse_stringsegment.o numparse_unisets.o numparse_parsednumber.o numparse_impl.o \
numparse_stringsegment.o numparse_parsednumber.o numparse_impl.o \
numparse_symbols.o numparse_decimal.o numparse_scientific.o numparse_currency.o \
numparse_affixes.o numparse_compositions.o numparse_validators.o \

View file

@ -1,251 +0,0 @@
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
*******************************************************************************
* Copyright (C) 2009-2016, International Business Machines Corporation and
* others. All Rights Reserved.
*******************************************************************************
*
* This file contains the class DecimalFormatStaticSets
*
* DecimalFormatStaticSets holds the UnicodeSets that are needed for lenient
* parsing of decimal and group separators.
********************************************************************************
*/
#include "unicode/utypes.h"
#if !UCONFIG_NO_FORMATTING
#include "unicode/unistr.h"
#include "unicode/uniset.h"
#include "unicode/uchar.h"
#include "cmemory.h"
#include "cstring.h"
#include "uassert.h"
#include "ucln_in.h"
#include "umutex.h"
#include "decfmtst.h"
U_NAMESPACE_BEGIN
//------------------------------------------------------------------------------
//
// Unicode Set pattern strings for all of the required constant sets.
// Initialized with hex values for portability to EBCDIC based machines.
// Really ugly, but there's no good way to avoid it.
//
//------------------------------------------------------------------------------
static const UChar gDotEquivalentsPattern[] = {
// [ . \u2024 \u3002 \uFE12 \uFE52 \uFF0E \uFF61 ]
0x005B, 0x002E, 0x2024, 0x3002, 0xFE12, 0xFE52, 0xFF0E, 0xFF61, 0x005D, 0x0000};
static const UChar gCommaEquivalentsPattern[] = {
// [ , \u060C \u066B \u3001 \uFE10 \uFE11 \uFE50 \uFE51 \uFF0C \uFF64 ]
0x005B, 0x002C, 0x060C, 0x066B, 0x3001, 0xFE10, 0xFE11, 0xFE50, 0xFE51, 0xFF0C, 0xFF64, 0x005D, 0x0000};
static const UChar gOtherGroupingSeparatorsPattern[] = {
// [ \ SPACE ' NBSP \u066C \u2000 - \u200A \u2018 \u2019 \u202F \u205F \u3000 \uFF07 ]
0x005B, 0x005C, 0x0020, 0x0027, 0x00A0, 0x066C, 0x2000, 0x002D, 0x200A, 0x2018, 0x2019, 0x202F, 0x205F, 0x3000, 0xFF07, 0x005D, 0x0000};
static const UChar gDashEquivalentsPattern[] = {
// [ \ - HYPHEN F_DASH N_DASH MINUS ]
0x005B, 0x005C, 0x002D, 0x2010, 0x2012, 0x2013, 0x2212, 0x005D, 0x0000};
static const UChar gStrictDotEquivalentsPattern[] = {
// [ . \u2024 \uFE52 \uFF0E \uFF61 ]
0x005B, 0x002E, 0x2024, 0xFE52, 0xFF0E, 0xFF61, 0x005D, 0x0000};
static const UChar gStrictCommaEquivalentsPattern[] = {
// [ , \u066B \uFE10 \uFE50 \uFF0C ]
0x005B, 0x002C, 0x066B, 0xFE10, 0xFE50, 0xFF0C, 0x005D, 0x0000};
static const UChar gStrictOtherGroupingSeparatorsPattern[] = {
// [ \ SPACE ' NBSP \u066C \u2000 - \u200A \u2018 \u2019 \u202F \u205F \u3000 \uFF07 ]
0x005B, 0x005C, 0x0020, 0x0027, 0x00A0, 0x066C, 0x2000, 0x002D, 0x200A, 0x2018, 0x2019, 0x202F, 0x205F, 0x3000, 0xFF07, 0x005D, 0x0000};
static const UChar gStrictDashEquivalentsPattern[] = {
// [ \ - MINUS ]
0x005B, 0x005C, 0x002D, 0x2212, 0x005D, 0x0000};
static const UChar32 gMinusSigns[] = {
0x002D,
0x207B,
0x208B,
0x2212,
0x2796,
0xFE63,
0xFF0D};
static const UChar32 gPlusSigns[] = {
0x002B,
0x207A,
0x208A,
0x2795,
0xfB29,
0xFE62,
0xFF0B};
static void initUnicodeSet(const UChar32 *raw, int32_t len, UnicodeSet *s) {
for (int32_t i = 0; i < len; ++i) {
s->add(raw[i]);
}
}
DecimalFormatStaticSets::DecimalFormatStaticSets(UErrorCode &status)
: fDotEquivalents(NULL),
fCommaEquivalents(NULL),
fOtherGroupingSeparators(NULL),
fDashEquivalents(NULL),
fStrictDotEquivalents(NULL),
fStrictCommaEquivalents(NULL),
fStrictOtherGroupingSeparators(NULL),
fStrictDashEquivalents(NULL),
fDefaultGroupingSeparators(NULL),
fStrictDefaultGroupingSeparators(NULL),
fMinusSigns(NULL),
fPlusSigns(NULL)
{
fDotEquivalents = new UnicodeSet(UnicodeString(TRUE, gDotEquivalentsPattern, -1), status);
fCommaEquivalents = new UnicodeSet(UnicodeString(TRUE, gCommaEquivalentsPattern, -1), status);
fOtherGroupingSeparators = new UnicodeSet(UnicodeString(TRUE, gOtherGroupingSeparatorsPattern, -1), status);
fDashEquivalents = new UnicodeSet(UnicodeString(TRUE, gDashEquivalentsPattern, -1), status);
fStrictDotEquivalents = new UnicodeSet(UnicodeString(TRUE, gStrictDotEquivalentsPattern, -1), status);
fStrictCommaEquivalents = new UnicodeSet(UnicodeString(TRUE, gStrictCommaEquivalentsPattern, -1), status);
fStrictOtherGroupingSeparators = new UnicodeSet(UnicodeString(TRUE, gStrictOtherGroupingSeparatorsPattern, -1), status);
fStrictDashEquivalents = new UnicodeSet(UnicodeString(TRUE, gStrictDashEquivalentsPattern, -1), status);
fDefaultGroupingSeparators = new UnicodeSet(*fDotEquivalents);
fDefaultGroupingSeparators->addAll(*fCommaEquivalents);
fDefaultGroupingSeparators->addAll(*fOtherGroupingSeparators);
fStrictDefaultGroupingSeparators = new UnicodeSet(*fStrictDotEquivalents);
fStrictDefaultGroupingSeparators->addAll(*fStrictCommaEquivalents);
fStrictDefaultGroupingSeparators->addAll(*fStrictOtherGroupingSeparators);
fMinusSigns = new UnicodeSet();
fPlusSigns = new UnicodeSet();
// Check for null pointers
if (fDotEquivalents == NULL || fCommaEquivalents == NULL || fOtherGroupingSeparators == NULL || fDashEquivalents == NULL ||
fStrictDotEquivalents == NULL || fStrictCommaEquivalents == NULL || fStrictOtherGroupingSeparators == NULL || fStrictDashEquivalents == NULL ||
fDefaultGroupingSeparators == NULL || fStrictOtherGroupingSeparators == NULL ||
fMinusSigns == NULL || fPlusSigns == NULL) {
cleanup();
status = U_MEMORY_ALLOCATION_ERROR;
return;
}
initUnicodeSet(
gMinusSigns,
UPRV_LENGTHOF(gMinusSigns),
fMinusSigns);
initUnicodeSet(
gPlusSigns,
UPRV_LENGTHOF(gPlusSigns),
fPlusSigns);
// Freeze all the sets
fDotEquivalents->freeze();
fCommaEquivalents->freeze();
fOtherGroupingSeparators->freeze();
fDashEquivalents->freeze();
fStrictDotEquivalents->freeze();
fStrictCommaEquivalents->freeze();
fStrictOtherGroupingSeparators->freeze();
fStrictDashEquivalents->freeze();
fDefaultGroupingSeparators->freeze();
fStrictDefaultGroupingSeparators->freeze();
fMinusSigns->freeze();
fPlusSigns->freeze();
}
DecimalFormatStaticSets::~DecimalFormatStaticSets() {
cleanup();
}
void DecimalFormatStaticSets::cleanup() { // Be sure to clean up newly added fields!
delete fDotEquivalents; fDotEquivalents = NULL;
delete fCommaEquivalents; fCommaEquivalents = NULL;
delete fOtherGroupingSeparators; fOtherGroupingSeparators = NULL;
delete fDashEquivalents; fDashEquivalents = NULL;
delete fStrictDotEquivalents; fStrictDotEquivalents = NULL;
delete fStrictCommaEquivalents; fStrictCommaEquivalents = NULL;
delete fStrictOtherGroupingSeparators; fStrictOtherGroupingSeparators = NULL;
delete fStrictDashEquivalents; fStrictDashEquivalents = NULL;
delete fDefaultGroupingSeparators; fDefaultGroupingSeparators = NULL;
delete fStrictDefaultGroupingSeparators; fStrictDefaultGroupingSeparators = NULL;
delete fStrictOtherGroupingSeparators; fStrictOtherGroupingSeparators = NULL;
delete fMinusSigns; fMinusSigns = NULL;
delete fPlusSigns; fPlusSigns = NULL;
}
static DecimalFormatStaticSets *gStaticSets;
static icu::UInitOnce gStaticSetsInitOnce = U_INITONCE_INITIALIZER;
//------------------------------------------------------------------------------
//
// decfmt_cleanup Memory cleanup function, free/delete all
// cached memory. Called by ICU's u_cleanup() function.
//
//------------------------------------------------------------------------------
U_CDECL_BEGIN
static UBool U_CALLCONV
decimfmt_cleanup(void)
{
delete gStaticSets;
gStaticSets = NULL;
gStaticSetsInitOnce.reset();
return TRUE;
}
static void U_CALLCONV initSets(UErrorCode &status) {
U_ASSERT(gStaticSets == NULL);
ucln_i18n_registerCleanup(UCLN_I18N_DECFMT, decimfmt_cleanup);
gStaticSets = new DecimalFormatStaticSets(status);
if (U_FAILURE(status)) {
delete gStaticSets;
gStaticSets = NULL;
return;
}
if (gStaticSets == NULL) {
status = U_MEMORY_ALLOCATION_ERROR;
}
}
U_CDECL_END
const DecimalFormatStaticSets *DecimalFormatStaticSets::getStaticSets(UErrorCode &status) {
umtx_initOnce(gStaticSetsInitOnce, initSets, status);
return gStaticSets;
}
const UnicodeSet *DecimalFormatStaticSets::getSimilarDecimals(UChar32 decimal, UBool strictParse)
{
UErrorCode status = U_ZERO_ERROR;
umtx_initOnce(gStaticSetsInitOnce, initSets, status);
if (U_FAILURE(status)) {
return NULL;
}
if (gStaticSets->fDotEquivalents->contains(decimal)) {
return strictParse ? gStaticSets->fStrictDotEquivalents : gStaticSets->fDotEquivalents;
}
if (gStaticSets->fCommaEquivalents->contains(decimal)) {
return strictParse ? gStaticSets->fStrictCommaEquivalents : gStaticSets->fCommaEquivalents;
}
// if there is no match, return NULL
return NULL;
}
U_NAMESPACE_END
#endif // !UCONFIG_NO_FORMATTING

View file

@ -1,69 +0,0 @@
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
*******************************************************************************
* Copyright (C) 2009-2016, International Business Machines Corporation and
* others. All Rights Reserved.
*******************************************************************************
*
* This file contains declarations for the class DecimalFormatStaticSets
*
* DecimalFormatStaticSets holds the UnicodeSets that are needed for lenient
* parsing of decimal and group separators.
********************************************************************************
*/
#ifndef DECFMTST_H
#define DECFMTST_H
#include "unicode/utypes.h"
#if !UCONFIG_NO_FORMATTING
#include "unicode/uobject.h"
U_NAMESPACE_BEGIN
class UnicodeSet;
class DecimalFormatStaticSets : public UMemory
{
public:
// Constructor and Destructor not for general use.
// Public to permit access from plain C implementation functions.
DecimalFormatStaticSets(UErrorCode &status);
~DecimalFormatStaticSets();
/**
* Return a pointer to a lazy-initialized singleton instance of this class.
*/
static const DecimalFormatStaticSets *getStaticSets(UErrorCode &status);
static const UnicodeSet *getSimilarDecimals(UChar32 decimal, UBool strictParse);
UnicodeSet *fDotEquivalents;
UnicodeSet *fCommaEquivalents;
UnicodeSet *fOtherGroupingSeparators;
UnicodeSet *fDashEquivalents;
UnicodeSet *fStrictDotEquivalents;
UnicodeSet *fStrictCommaEquivalents;
UnicodeSet *fStrictOtherGroupingSeparators;
UnicodeSet *fStrictDashEquivalents;
UnicodeSet *fDefaultGroupingSeparators;
UnicodeSet *fStrictDefaultGroupingSeparators;
UnicodeSet *fMinusSigns;
UnicodeSet *fPlusSigns;
private:
void cleanup();
};
U_NAMESPACE_END
#endif // !UCONFIG_NO_FORMATTING
#endif // DECFMTST_H

View file

@ -1,127 +0,0 @@
// © 2018 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
#include "unicode/utypes.h"
#if !UCONFIG_NO_FORMATTING && !UPRV_INCOMPLETE_CPP11_SUPPORT
// Allow implicit conversion from char16_t* to UnicodeString for this file:
// Helpful in toString methods and elsewhere.
#define UNISTR_FROM_STRING_EXPLICIT
#include "numparse_unisets.h"
#include "numparse_types.h"
#include "umutex.h"
#include "ucln_in.h"
#include "unicode/uniset.h"
using namespace icu;
using namespace icu::numparse;
using namespace icu::numparse::impl;
using namespace icu::numparse::impl::unisets;
namespace {
static UnicodeSet* gUnicodeSets[COUNT] = {};
UnicodeSet* computeUnion(Key k1, Key k2) {
UnicodeSet* result = new UnicodeSet();
if (result == nullptr) {
return nullptr;
}
result->addAll(*gUnicodeSets[k1]);
result->addAll(*gUnicodeSets[k2]);
result->freeze();
return result;
}
UnicodeSet* computeUnion(Key k1, Key k2, Key k3) {
UnicodeSet* result = new UnicodeSet();
if (result == nullptr) {
return nullptr;
}
result->addAll(*gUnicodeSets[k1]);
result->addAll(*gUnicodeSets[k2]);
result->addAll(*gUnicodeSets[k3]);
result->freeze();
return result;
}
icu::UInitOnce gNumberParseUniSetsInitOnce = U_INITONCE_INITIALIZER;
UBool U_CALLCONV cleanupNumberParseUniSets() {
for (int32_t i = 0; i < COUNT; i++) {
delete gUnicodeSets[i];
gUnicodeSets[i] = nullptr;
}
return TRUE;
}
void U_CALLCONV initNumberParseUniSets(UErrorCode& status) {
ucln_i18n_registerCleanup(UCLN_I18N_NUMPARSE_UNISETS, cleanupNumberParseUniSets);
gUnicodeSets[EMPTY] = new UnicodeSet();
// These characters are skipped over and ignored at any point in the string, even in strict mode.
// See ticket #13084.
gUnicodeSets[BIDI] = new UnicodeSet(u"[[:DI:]]", status);
// This set was decided after discussion with icu-design@. See ticket #13309.
// Zs+TAB is "horizontal whitespace" according to UTS #18 (blank property).
gUnicodeSets[WHITESPACE] = new UnicodeSet(u"[[:Zs:][\\u0009]]", status);
gUnicodeSets[DEFAULT_IGNORABLES] = computeUnion(BIDI, WHITESPACE);
gUnicodeSets[STRICT_IGNORABLES] = new UnicodeSet(*gUnicodeSets[BIDI]);
// TODO: Re-generate these sets from the UCD. They probably haven't been updated in a while.
gUnicodeSets[COMMA] = new UnicodeSet(u"[,،٫、︐︑﹐﹑,、]", status);
gUnicodeSets[STRICT_COMMA] = new UnicodeSet(u"[,٫︐﹐,]", status);
gUnicodeSets[PERIOD] = new UnicodeSet(u"[.․。︒﹒.。]", status);
gUnicodeSets[STRICT_PERIOD] = new UnicodeSet(u"[.․﹒.。]", status);
gUnicodeSets[OTHER_GROUPING_SEPARATORS] = new UnicodeSet(
u"['٬‘’'\\u0020\\u00A0\\u2000-\\u200A\\u202F\\u205F\\u3000]", status);
gUnicodeSets[ALL_SEPARATORS] = computeUnion(COMMA, PERIOD, OTHER_GROUPING_SEPARATORS);
gUnicodeSets[STRICT_ALL_SEPARATORS] = computeUnion(
STRICT_COMMA, STRICT_PERIOD, OTHER_GROUPING_SEPARATORS);
gUnicodeSets[MINUS_SIGN] = new UnicodeSet(u"[-⁻₋−➖﹣-]", status);
gUnicodeSets[PLUS_SIGN] = new UnicodeSet(u"[+⁺₊➕﬩﹢+]", status);
gUnicodeSets[PERCENT_SIGN] = new UnicodeSet(u"[%٪]", status);
gUnicodeSets[PERMILLE_SIGN] = new UnicodeSet(u"[‰؉]", status);
gUnicodeSets[INFINITY_KEY] = new UnicodeSet(u"[∞]", status);
gUnicodeSets[DIGITS] = new UnicodeSet(u"[:digit:]", status);
gUnicodeSets[CWCF] = new UnicodeSet(u"[:CWCF:]", status);
gUnicodeSets[DIGITS_OR_ALL_SEPARATORS] = computeUnion(DIGITS, ALL_SEPARATORS);
gUnicodeSets[DIGITS_OR_STRICT_ALL_SEPARATORS] = computeUnion(DIGITS, STRICT_ALL_SEPARATORS);
for (int32_t i = 0; i < COUNT; i++) {
gUnicodeSets[i]->freeze();
}
}
}
const UnicodeSet* unisets::get(Key key) {
UErrorCode localStatus = U_ZERO_ERROR;
umtx_initOnce(gNumberParseUniSetsInitOnce, &initNumberParseUniSets, localStatus);
if (U_FAILURE(localStatus)) {
// TODO: This returns non-null in Java, and callers assume that.
return nullptr;
}
return gUnicodeSets[key];
}
Key unisets::chooseFrom(UnicodeString str, Key key1) {
return get(key1)->contains(str) ? key1 : COUNT;
}
Key unisets::chooseFrom(UnicodeString str, Key key1, Key key2) {
return get(key1)->contains(str) ? key1 : chooseFrom(str, key2);
}
#endif /* #if !UCONFIG_NO_FORMATTING */

View file

@ -15,8 +15,8 @@
#include "unicode/fpositer.h"
#include "unicode/utf16.h"
#include "unicode/uniset.h"
#include "decfmtst.h"
#include "unicode/decimfmt.h"
#include "numparse_unisets.h"
U_NAMESPACE_BEGIN
@ -129,7 +129,6 @@ UnicodeString &ScientificNumberFormatter::SuperscriptStyle::format(
const UnicodeString &original,
FieldPositionIterator &fpi,
const UnicodeString &preExponent,
const DecimalFormatStaticSets &staticSets,
UnicodeString &appendTo,
UErrorCode &status) const {
if (U_FAILURE(status)) {
@ -149,16 +148,17 @@ UnicodeString &ScientificNumberFormatter::SuperscriptStyle::format(
break;
case UNUM_EXPONENT_SIGN_FIELD:
{
using namespace icu::numparse::impl;
int32_t beginIndex = fp.getBeginIndex();
int32_t endIndex = fp.getEndIndex();
UChar32 aChar = original.char32At(beginIndex);
if (staticSets.fMinusSigns->contains(aChar)) {
if (unisets::get(unisets::MINUS_SIGN)->contains(aChar)) {
appendTo.append(
original,
copyFromOffset,
beginIndex - copyFromOffset);
appendTo.append(kSuperscriptMinusSign);
} else if (staticSets.fPlusSigns->contains(aChar)) {
} else if (unisets::get(unisets::PLUS_SIGN)->contains(aChar)) {
appendTo.append(
original,
copyFromOffset,
@ -203,7 +203,6 @@ UnicodeString &ScientificNumberFormatter::MarkupStyle::format(
const UnicodeString &original,
FieldPositionIterator &fpi,
const UnicodeString &preExponent,
const DecimalFormatStaticSets & /*unusedDecimalFormatSets*/,
UnicodeString &appendTo,
UErrorCode &status) const {
if (U_FAILURE(status)) {
@ -243,8 +242,7 @@ ScientificNumberFormatter::ScientificNumberFormatter(
DecimalFormat *fmtToAdopt, Style *styleToAdopt, UErrorCode &status)
: fPreExponent(),
fDecimalFormat(fmtToAdopt),
fStyle(styleToAdopt),
fStaticSets(NULL) {
fStyle(styleToAdopt) {
if (U_FAILURE(status)) {
return;
}
@ -258,7 +256,6 @@ ScientificNumberFormatter::ScientificNumberFormatter(
return;
}
getPreExponent(*sym, fPreExponent);
fStaticSets = DecimalFormatStaticSets::getStaticSets(status);
}
ScientificNumberFormatter::ScientificNumberFormatter(
@ -266,8 +263,7 @@ ScientificNumberFormatter::ScientificNumberFormatter(
: UObject(other),
fPreExponent(other.fPreExponent),
fDecimalFormat(NULL),
fStyle(NULL),
fStaticSets(other.fStaticSets) {
fStyle(NULL) {
fDecimalFormat = static_cast<DecimalFormat *>(
other.fDecimalFormat->clone());
fStyle = other.fStyle->clone();
@ -292,7 +288,6 @@ UnicodeString &ScientificNumberFormatter::format(
original,
fpi,
fPreExponent,
*fStaticSets,
appendTo,
status);
}

View file

@ -27,7 +27,6 @@ It's usually best to have child dependencies called first. */
typedef enum ECleanupI18NType {
UCLN_I18N_START = -1,
UCLN_I18N_NUMBER_SKELETONS,
UCLN_I18N_NUMPARSE_UNISETS,
UCLN_I18N_CURRENCY_SPACING,
UCLN_I18N_SPOOF,
UCLN_I18N_SPOOFDATA,

View file

@ -24,7 +24,6 @@
U_NAMESPACE_BEGIN
class FieldPositionIterator;
class DecimalFormatStaticSets;
class DecimalFormatSymbols;
class DecimalFormat;
class Formattable;
@ -150,7 +149,6 @@ public:
const UnicodeString &original,
FieldPositionIterator &fpi,
const UnicodeString &preExponent,
const DecimalFormatStaticSets &decimalFormatSets,
UnicodeString &appendTo,
UErrorCode &status) const = 0;
private:
@ -165,7 +163,6 @@ public:
const UnicodeString &original,
FieldPositionIterator &fpi,
const UnicodeString &preExponent,
const DecimalFormatStaticSets &decimalFormatSets,
UnicodeString &appendTo,
UErrorCode &status) const;
};
@ -184,7 +181,6 @@ public:
const UnicodeString &original,
FieldPositionIterator &fpi,
const UnicodeString &preExponent,
const DecimalFormatStaticSets &decimalFormatSets,
UnicodeString &appendTo,
UErrorCode &status) const;
private:
@ -211,7 +207,6 @@ public:
UnicodeString fPreExponent;
DecimalFormat *fDecimalFormat;
Style *fStyle;
const DecimalFormatStaticSets *fStaticSets;
};

View file

@ -1412,7 +1412,7 @@ static const char *lenientAffixTestCases[] = {
static const char *lenientMinusTestCases[] = {
"-5",
"\\u22125",
"\\u20105"
"\\u27965"
};
static const char *lenientCurrencyTestCases[] = {

View file

@ -5,7 +5,13 @@ package com.ibm.icu.impl.number.parse;
import java.util.EnumMap;
import java.util.Map;
import com.ibm.icu.impl.ICUData;
import com.ibm.icu.impl.ICUResourceBundle;
import com.ibm.icu.impl.UResource;
import com.ibm.icu.impl.UResource.Value;
import com.ibm.icu.text.UnicodeSet;
import com.ibm.icu.util.ULocale;
import com.ibm.icu.util.UResourceBundle;
/**
* This class statically initializes UnicodeSets useful for number parsing. Microbenchmarks show this to
@ -20,8 +26,6 @@ import com.ibm.icu.text.UnicodeSet;
public class UnicodeSetStaticCache {
public static enum Key {
// Ignorables
BIDI,
WHITESPACE,
DEFAULT_IGNORABLES,
STRICT_IGNORABLES,
@ -47,9 +51,14 @@ public class UnicodeSetStaticCache {
PERMILLE_SIGN,
INFINITY,
// Currency Symbols
DOLLAR_SIGN,
POUND_SIGN,
RUPEE_SIGN,
YEN_SIGN, // not in CLDR data, but Currency.java wants it
// Other
DIGITS,
CWCF, // TODO: Check if this is being used and remove it if not.
// Combined Separators with Digits (for lead code points)
DIGITS_OR_ALL_SEPARATORS,
@ -70,6 +79,20 @@ public class UnicodeSetStaticCache {
return get(key1).contains(str) ? key1 : chooseFrom(str, key2);
}
public static Key chooseCurrency(String str) {
if (get(Key.DOLLAR_SIGN).contains(str)) {
return Key.DOLLAR_SIGN;
} else if (get(Key.POUND_SIGN).contains(str)) {
return Key.POUND_SIGN;
} else if (get(Key.RUPEE_SIGN).contains(str)) {
return Key.RUPEE_SIGN;
} else if (get(Key.YEN_SIGN).contains(str)) {
return Key.YEN_SIGN;
} else {
return null;
}
}
private static UnicodeSet computeUnion(Key k1, Key k2) {
return new UnicodeSet().addAll(get(k1)).addAll(get(k2)).freeze();
}
@ -78,23 +101,98 @@ public class UnicodeSetStaticCache {
return new UnicodeSet().addAll(get(k1)).addAll(get(k2)).addAll(get(k3)).freeze();
}
private static void saveSet(Key key, String unicodeSetPattern) {
assert unicodeSets.get(key) == null;
unicodeSets.put(key, new UnicodeSet(unicodeSetPattern).freeze());
}
/*
parse{
date{
lenient{
"[\\--/]",
"[\\:]",
}
}
general{
lenient{
"[.․。︒﹒.。]",
"[\$﹩$$]",
"[£₤]",
"[₨₹{Rp}{Rs}]",
}
}
number{
lenient{
"[\\-‒⁻₋−➖﹣-]",
"[,،٫、︐︑﹐﹑,、]",
"[+⁺₊➕﬩﹢+]",
}
stricter{
"[,٫︐﹐,]",
"[.․﹒.。]",
}
}
}
*/
static class ParseDataSink extends UResource.Sink {
@Override
public void put(com.ibm.icu.impl.UResource.Key key, Value value, boolean noFallback) {
UResource.Table contextsTable = value.getTable();
for (int i = 0; contextsTable.getKeyAndValue(i, key, value); i++) {
if (key.contentEquals("date")) {
// ignore
} else {
assert key.contentEquals("general") || key.contentEquals("number");
UResource.Table strictnessTable = value.getTable();
for (int j = 0; strictnessTable.getKeyAndValue(j, key, value); j++) {
boolean isLenient = key.contentEquals("lenient");
UResource.Array array = value.getArray();
for (int k = 0; k < array.getSize(); k++) {
array.getValue(k, value);
String str = value.toString();
// There is both lenient and strict data for comma/period,
// but not for any of the other symbols.
if (str.indexOf('.') != -1) {
saveSet(isLenient ? Key.PERIOD : Key.STRICT_PERIOD, str);
} else if (str.indexOf(',') != -1) {
saveSet(isLenient ? Key.COMMA : Key.STRICT_COMMA, str);
} else if (str.indexOf('+') != -1) {
saveSet(Key.PLUS_SIGN, str);
} else if (str.indexOf('') != -1) {
saveSet(Key.MINUS_SIGN, str);
} else if (str.indexOf('$') != -1) {
saveSet(Key.DOLLAR_SIGN, str);
} else if (str.indexOf('£') != -1) {
saveSet(Key.POUND_SIGN, str);
} else if (str.indexOf('₨') != -1) {
saveSet(Key.RUPEE_SIGN, str);
}
}
}
}
}
}
}
static {
// These characters are skipped over and ignored at any point in the string, even in strict mode.
// See ticket #13084.
unicodeSets.put(Key.BIDI, new UnicodeSet("[[:DI:]]").freeze());
// This set was decided after discussion with icu-design@. See ticket #13309.
// These sets were decided after discussion with icu-design@. See tickets #13084 and #13309.
// Zs+TAB is "horizontal whitespace" according to UTS #18 (blank property).
unicodeSets.put(Key.WHITESPACE, new UnicodeSet("[[:Zs:][\\u0009]]").freeze());
unicodeSets.put(Key.DEFAULT_IGNORABLES,
new UnicodeSet("[[:Zs:][\\u0009][:Bidi_Control:][:Variation_Selector:]]").freeze());
unicodeSets.put(Key.STRICT_IGNORABLES, new UnicodeSet("[[:Bidi_Control:]]").freeze());
unicodeSets.put(Key.DEFAULT_IGNORABLES, computeUnion(Key.BIDI, Key.WHITESPACE));
unicodeSets.put(Key.STRICT_IGNORABLES, get(Key.BIDI));
// CLDR provides data for comma, period, minus sign, and plus sign.
ICUResourceBundle rb = (ICUResourceBundle) UResourceBundle
.getBundleInstance(ICUData.ICU_BASE_NAME, ULocale.ROOT);
rb.getAllItemsWithFallback("parse", new ParseDataSink());
// TODO: Should there be fallback behavior if for some reason these sets didn't get populated?
assert unicodeSets.containsKey(Key.COMMA);
assert unicodeSets.containsKey(Key.STRICT_COMMA);
assert unicodeSets.containsKey(Key.PERIOD);
assert unicodeSets.containsKey(Key.STRICT_PERIOD);
// TODO: Re-generate these sets from the UCD. They probably haven't been updated in a while.
unicodeSets.put(Key.COMMA, new UnicodeSet("[,،٫、︐︑﹐﹑,、]").freeze());
unicodeSets.put(Key.STRICT_COMMA, new UnicodeSet("[,٫︐﹐,]").freeze());
unicodeSets.put(Key.PERIOD, new UnicodeSet("[.․。︒﹒.。]").freeze());
unicodeSets.put(Key.STRICT_PERIOD, new UnicodeSet("[.․﹒.。]").freeze());
unicodeSets.put(Key.OTHER_GROUPING_SEPARATORS,
new UnicodeSet("['٬‘’'\\u0020\\u00A0\\u2000-\\u200A\\u202F\\u205F\\u3000]").freeze());
unicodeSets.put(Key.ALL_SEPARATORS,
@ -102,15 +200,19 @@ public class UnicodeSetStaticCache {
unicodeSets.put(Key.STRICT_ALL_SEPARATORS,
computeUnion(Key.STRICT_COMMA, Key.STRICT_PERIOD, Key.OTHER_GROUPING_SEPARATORS));
unicodeSets.put(Key.MINUS_SIGN, new UnicodeSet("[-⁻₋−➖﹣-]").freeze());
unicodeSets.put(Key.PLUS_SIGN, new UnicodeSet("[+⁺₊➕﬩﹢+]").freeze());
assert unicodeSets.containsKey(Key.MINUS_SIGN);
assert unicodeSets.containsKey(Key.PLUS_SIGN);
unicodeSets.put(Key.PERCENT_SIGN, new UnicodeSet("[%٪]").freeze());
unicodeSets.put(Key.PERMILLE_SIGN, new UnicodeSet("[‰؉]").freeze());
unicodeSets.put(Key.INFINITY, new UnicodeSet("[∞]").freeze());
assert unicodeSets.containsKey(Key.DOLLAR_SIGN);
assert unicodeSets.containsKey(Key.POUND_SIGN);
assert unicodeSets.containsKey(Key.RUPEE_SIGN);
unicodeSets.put(Key.YEN_SIGN, new UnicodeSet("\\uffe5]").freeze());
unicodeSets.put(Key.DIGITS, new UnicodeSet("[:digit:]").freeze());
unicodeSets.put(Key.CWCF, new UnicodeSet("[:CWCF:]").freeze());
unicodeSets.put(Key.DIGITS_OR_ALL_SEPARATORS, computeUnion(Key.DIGITS, Key.ALL_SEPARATORS));
unicodeSets.put(Key.DIGITS_OR_STRICT_ALL_SEPARATORS,

View file

@ -14,7 +14,6 @@ import java.text.ParsePosition;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
@ -31,10 +30,12 @@ import com.ibm.icu.impl.ICUResourceBundle;
import com.ibm.icu.impl.SimpleCache;
import com.ibm.icu.impl.SoftCache;
import com.ibm.icu.impl.TextTrieMap;
import com.ibm.icu.impl.number.parse.UnicodeSetStaticCache;
import com.ibm.icu.text.CurrencyDisplayNames;
import com.ibm.icu.text.CurrencyMetaInfo;
import com.ibm.icu.text.CurrencyMetaInfo.CurrencyDigits;
import com.ibm.icu.text.CurrencyMetaInfo.CurrencyFilter;
import com.ibm.icu.text.UnicodeSet;
import com.ibm.icu.util.ULocale.Category;
/**
@ -98,13 +99,6 @@ public class Currency extends MeasureUnit {
*/
public static final int NARROW_SYMBOL_NAME = 3;
private static final EquivalenceRelation<String> EQUIVALENT_CURRENCY_SYMBOLS =
new EquivalenceRelation<String>()
.add("\u00a5", "\uffe5")
.add("$", "\ufe69", "\uff04")
.add("\u20a8", "\u20b9")
.add("\u00a3", "\u20a4");
/**
* Currency Usage used for Decimal Format
* @stable ICU 54
@ -778,8 +772,16 @@ public class Currency extends MeasureUnit {
String isoCode = e.getValue();
// Register under not just symbol, but under every equivalent symbol as well
// e.g short width yen and long width yen.
for (String equivalentSymbol : EQUIVALENT_CURRENCY_SYMBOLS.get(symbol)) {
symTrie.put(equivalentSymbol, new CurrencyStringInfo(isoCode, symbol));
UnicodeSetStaticCache.Key key = UnicodeSetStaticCache.chooseCurrency(symbol);
CurrencyStringInfo value = new CurrencyStringInfo(isoCode, symbol);
if (key != null) {
UnicodeSet equivalents = UnicodeSetStaticCache.get(key);
// The symbol itself is included in the UnicodeSet
for (String equivalentSymbol : equivalents) {
symTrie.put(equivalentSymbol, value);
}
} else {
symTrie.put(symbol, value);
}
}
for (Map.Entry<String, String> e : names.nameMap().entrySet()) {
@ -1039,34 +1041,6 @@ public class Currency extends MeasureUnit {
return info.currencies(filter.withTender());
}
private static final class EquivalenceRelation<T> {
private Map<T, Set<T>> data = new HashMap<T, Set<T>>();
@SuppressWarnings("unchecked") // See ticket #11395, this is safe.
public EquivalenceRelation<T> add(T... items) {
Set<T> group = new HashSet<T>();
for (T item : items) {
if (data.containsKey(item)) {
throw new IllegalArgumentException("All groups passed to add must be disjoint.");
}
group.add(item);
}
for (T item : items) {
data.put(item, group);
}
return this;
}
public Set<T> get(T item) {
Set<T> result = data.get(item);
if (result == null) {
return Collections.singleton(item);
}
return Collections.unmodifiableSet(result);
}
}
private Object writeReplace() throws ObjectStreamException {
return new MeasureUnitProxy(type, subType);
}

View file

@ -1764,7 +1764,7 @@ public class NumberFormatTest extends TestFmwk {
}
// Test default ignorable characters. These should work in both lenient and strict.
UnicodeSet defaultIgnorables = new UnicodeSet("[[:Default_Ignorable_Code_Point:]]").freeze();
UnicodeSet defaultIgnorables = new UnicodeSet("[[:Bidi_Control:]]").freeze();
fmt.setParseStrict(false);
for (String ignorable : defaultIgnorables) {
String str = "a b " + ignorable + "1234c ";