ICU-13697 Renaming numparse_unisets.h to static_unicode_sets.h and refactoring to guarantee safety in no-data builds.

X-SVN-Rev: 41389
This commit is contained in:
Shane Carr 2018-05-16 22:46:40 +00:00
parent bebf3c0983
commit 2e2805df9a
38 changed files with 282 additions and 151 deletions

View file

@ -112,7 +112,7 @@ ulist.o uloc_tag.o icudataver.o icuplug.o listformatter.o ulistformatter.o \
sharedobject.o simpleformatter.o unifiedcache.o uloc_keytype.o \
ubiditransform.o \
pluralmap.o \
numparse_unisets.o
static_unicode_sets.o
## Header files to install
HEADERS = $(srcdir)/unicode/*.h

View file

@ -334,7 +334,7 @@
<ClCompile Include="utf_impl.cpp" />
<ClCompile Include="listformatter.cpp" />
<ClCompile Include="ulistformatter.cpp" />
<ClCompile Include="numparse_unisets.cpp" />
<ClCompile Include="static_unicode_sets.cpp" />
<ClInclude Include="localsvc.h" />
<ClInclude Include="msvcres.h" />
<ClInclude Include="pluralmap.h" />
@ -440,7 +440,7 @@
<ClInclude Include="uinvchar.h" />
<ClInclude Include="ustr_cnv.h" />
<ClInclude Include="ustr_imp.h" />
<ClInclude Include="numparse_unisets.h" />
<ClInclude Include="static_unicode_sets.h" />
</ItemGroup>
<ItemGroup>
<ResourceCompile Include="common.rc" />

View file

@ -607,7 +607,7 @@
<ClCompile Include="ubiditransform.cpp">
<Filter>bidi</Filter>
</ClCompile>
<ClCompile Include="numparse_unisets.cpp">
<ClCompile Include="static_unicode_sets.cpp">
<Filter>formatting</Filter>
</ClCompile>
</ItemGroup>
@ -939,7 +939,7 @@
<ClInclude Include="unicode\ubiditransform.h">
<Filter>bidi</Filter>
</ClInclude>
<ClInclude Include="numparse_unisets.h">
<ClInclude Include="static_unicode_sets.h">
<Filter>formatting</Filter>
</ClInclude>
</ItemGroup>

View file

@ -459,7 +459,7 @@
<ClCompile Include="utf_impl.cpp" />
<ClCompile Include="listformatter.cpp" />
<ClCompile Include="ulistformatter.cpp" />
<ClCompile Include="numparse_unisets.cpp" />
<ClCompile Include="static_unicode_sets.cpp" />
</ItemGroup>
<ItemGroup>
<ClInclude Include="localsvc.h" />
@ -566,7 +566,7 @@
<ClInclude Include="uinvchar.h" />
<ClInclude Include="ustr_cnv.h" />
<ClInclude Include="ustr_imp.h" />
<ClInclude Include="numparse_unisets.h" />
<ClInclude Include="static_unicode_sets.h" />
</ItemGroup>
<ItemGroup>
<ResourceCompile Include="common.rc" />

View file

@ -9,7 +9,7 @@
// Helpful in toString methods and elsewhere.
#define UNISTR_FROM_STRING_EXPLICIT
#include "numparse_unisets.h"
#include "static_unicode_sets.h"
#include "umutex.h"
#include "ucln_cmn.h"
#include "unicode/uniset.h"
@ -18,22 +18,35 @@
#include "uassert.h"
using namespace icu;
using namespace icu::numparse;
using namespace icu::numparse::impl;
using namespace icu::numparse::impl::unisets;
using namespace icu::unisets;
namespace {
static UnicodeSet* gUnicodeSets[COUNT] = {};
UnicodeSet* gUnicodeSets[COUNT] = {};
// Save the empty instance in static memory to have well-defined behavior if a
// regular UnicodeSet cannot be allocated.
char gEmptyUnicodeSet[sizeof(UnicodeSet)];
// Whether the gEmptyUnicodeSet is initialized and ready to use.
UBool gEmptyUnicodeSetInitialized = FALSE;
inline UnicodeSet* getImpl(Key key) {
UnicodeSet* candidate = gUnicodeSets[key];
if (candidate == nullptr) {
return reinterpret_cast<UnicodeSet*>(gEmptyUnicodeSet);
}
return candidate;
}
UnicodeSet* computeUnion(Key k1, Key k2) {
UnicodeSet* result = new UnicodeSet();
if (result == nullptr) {
return nullptr;
}
result->addAll(*gUnicodeSets[k1]);
result->addAll(*gUnicodeSets[k2]);
result->addAll(*getImpl(k1));
result->addAll(*getImpl(k2));
result->freeze();
return result;
}
@ -43,9 +56,9 @@ UnicodeSet* computeUnion(Key k1, Key k2, Key k3) {
if (result == nullptr) {
return nullptr;
}
result->addAll(*gUnicodeSets[k1]);
result->addAll(*gUnicodeSets[k2]);
result->addAll(*gUnicodeSets[k3]);
result->addAll(*getImpl(k1));
result->addAll(*getImpl(k2));
result->addAll(*getImpl(k3));
result->freeze();
return result;
}
@ -104,6 +117,10 @@ class ParseDataSink : public ResourceSink {
icu::UInitOnce gNumberParseUniSetsInitOnce = U_INITONCE_INITIALIZER;
UBool U_CALLCONV cleanupNumberParseUniSets() {
if (gEmptyUnicodeSetInitialized) {
reinterpret_cast<UnicodeSet*>(gEmptyUnicodeSet)->~UnicodeSet();
gEmptyUnicodeSetInitialized = FALSE;
}
for (int32_t i = 0; i < COUNT; i++) {
delete gUnicodeSets[i];
gUnicodeSets[i] = nullptr;
@ -115,7 +132,10 @@ UBool U_CALLCONV cleanupNumberParseUniSets() {
void U_CALLCONV initNumberParseUniSets(UErrorCode& status) {
ucln_common_registerCleanup(UCLN_COMMON_NUMPARSE_UNISETS, cleanupNumberParseUniSets);
gUnicodeSets[EMPTY] = new UnicodeSet();
// Initialize the empty instance for well-defined fallback behavior
new(gEmptyUnicodeSet) UnicodeSet();
reinterpret_cast<UnicodeSet*>(gEmptyUnicodeSet)->freeze();
gEmptyUnicodeSetInitialized = TRUE;
// These sets were decided after discussion with icu-design@. See tickets #13084 and #13309.
// Zs+TAB is "horizontal whitespace" according to UTS #18 (blank property).
@ -129,7 +149,7 @@ void U_CALLCONV initNumberParseUniSets(UErrorCode& status) {
ures_getAllItemsWithFallback(rb.getAlias(), "parse", sink, status);
if (U_FAILURE(status)) { return; }
// TODO: Should there be fallback behavior if for some reason these sets didn't get populated?
// NOTE: It is OK for these assertions to fail if there was a no-data build.
U_ASSERT(gUnicodeSets[COMMA] != nullptr);
U_ASSERT(gUnicodeSets[STRICT_COMMA] != nullptr);
U_ASSERT(gUnicodeSets[PERIOD] != nullptr);
@ -158,8 +178,10 @@ void U_CALLCONV initNumberParseUniSets(UErrorCode& status) {
gUnicodeSets[DIGITS_OR_ALL_SEPARATORS] = computeUnion(DIGITS, ALL_SEPARATORS);
gUnicodeSets[DIGITS_OR_STRICT_ALL_SEPARATORS] = computeUnion(DIGITS, STRICT_ALL_SEPARATORS);
for (int32_t i = 0; i < COUNT; i++) {
gUnicodeSets[i]->freeze();
for (auto* uniset : gUnicodeSets) {
if (uniset != nullptr) {
uniset->freeze();
}
}
}
@ -169,14 +191,13 @@ const UnicodeSet* unisets::get(Key key) {
UErrorCode localStatus = U_ZERO_ERROR;
umtx_initOnce(gNumberParseUniSetsInitOnce, &initNumberParseUniSets, localStatus);
if (U_FAILURE(localStatus)) {
// TODO: This returns non-null in Java, and callers assume that.
return nullptr;
return reinterpret_cast<UnicodeSet*>(gEmptyUnicodeSet);
}
return gUnicodeSets[key];
return getImpl(key);
}
Key unisets::chooseFrom(UnicodeString str, Key key1) {
return get(key1)->contains(str) ? key1 : COUNT;
return get(key1)->contains(str) ? key1 : NONE;
}
Key unisets::chooseFrom(UnicodeString str, Key key1, Key key2) {
@ -193,7 +214,7 @@ Key unisets::chooseFrom(UnicodeString str, Key key1, Key key2) {
// } else if (get(YEN_SIGN)->contains(str)) {
// return YEN_SIGN;
// } else {
// return COUNT;
// return NONE;
// }
//}

View file

@ -6,18 +6,20 @@
#include "unicode/utypes.h"
#if !UCONFIG_NO_FORMATTING
#ifndef __NUMPARSE_UNISETS_H__
#define __NUMPARSE_UNISETS_H__
#ifndef __STATIC_UNICODE_SETS_H__
#define __STATIC_UNICODE_SETS_H__
#include "unicode/uniset.h"
#include "unicode/unistr.h"
U_NAMESPACE_BEGIN namespace numparse {
namespace impl {
U_NAMESPACE_BEGIN
namespace unisets {
enum Key {
EMPTY,
// NONE is used to indicate null in chooseFrom().
// EMPTY is used to get an empty UnicodeSet.
NONE = -1,
EMPTY = 0,
// Ignorables
DEFAULT_IGNORABLES,
@ -57,17 +59,44 @@ enum Key {
DIGITS_OR_ALL_SEPARATORS,
DIGITS_OR_STRICT_ALL_SEPARATORS,
// The number of elements in the enum. Also used to indicate null.
// The number of elements in the enum.
COUNT
};
// Exported as U_COMMON_API for ucurr.cpp
/**
* Gets the static-allocated UnicodeSet according to the provided key. The
* pointer will be deleted during u_cleanup(); the caller should NOT delete it.
*
* Exported as U_COMMON_API for ucurr.cpp
*
* @param key The desired UnicodeSet according to the enum in this file.
* @return The requested UnicodeSet. Guaranteed to be frozen and non-null, but
* may be empty if an error occurred during data loading.
*/
U_COMMON_API const UnicodeSet* get(Key key);
// Exported as U_COMMON_API for numparse_decimal.cpp
/**
* Checks if the UnicodeSet given by key1 contains the given string.
*
* Exported as U_COMMON_API for numparse_decimal.cpp
*
* @param str The string to check.
* @param key1 The set to check.
* @return key1 if the set contains str, or NONE if not.
*/
U_COMMON_API Key chooseFrom(UnicodeString str, Key key1);
// Exported as U_COMMON_API for numparse_decimal.cpp
/**
* Checks if the UnicodeSet given by either key1 or key2 contains the string.
*
* Exported as U_COMMON_API for numparse_decimal.cpp
*
* @param str The string to check.
* @param key1 The first set to check.
* @param key2 The second set to check.
* @return key1 if that set contains str; key2 if that set contains str; or
* NONE if neither set contains str.
*/
U_COMMON_API Key chooseFrom(UnicodeString str, Key key1, Key key2);
// Unused in C++:
@ -84,9 +113,7 @@ static const struct {
};
} // namespace unisets
} // namespace impl
} // namespace numparse
U_NAMESPACE_END
#endif //__NUMPARSE_UNISETS_H__
#endif //__STATIC_UNICODE_SETS_H__
#endif /* #if !UCONFIG_NO_FORMATTING */

View file

@ -23,7 +23,7 @@
#include "charstr.h"
#include "cmemory.h"
#include "cstring.h"
#include "numparse_unisets.h"
#include "static_unicode_sets.h"
#include "uassert.h"
#include "umutex.h"
#include "ucln_cmn.h"
@ -2201,7 +2201,6 @@ static void U_CALLCONV initIsoCodes(UErrorCode &status) {
}
static void populateCurrSymbolsEquiv(icu::Hashtable *hash, UErrorCode &status) {
using namespace icu::numparse::impl;
if (U_FAILURE(status)) { return; }
for (auto& entry : unisets::kCurrencyEntries) {
UnicodeString exemplar(entry.exemplar);

View file

@ -11,7 +11,7 @@
#include "numparse_types.h"
#include "numparse_decimal.h"
#include "numparse_unisets.h"
#include "static_unicode_sets.h"
#include "numparse_utils.h"
#include "unicode/uchar.h"
#include "putilimp.h"
@ -41,7 +41,7 @@ DecimalMatcher::DecimalMatcher(const DecimalFormatSymbols& symbols, const Groupe
decimalSeparator,
strictSeparators ? unisets::STRICT_COMMA : unisets::COMMA,
strictSeparators ? unisets::STRICT_PERIOD : unisets::PERIOD);
if (decimalKey != unisets::COUNT) {
if (decimalKey >= 0) {
decimalUniSet = unisets::get(decimalKey);
} else {
auto* set = new UnicodeSet();
@ -51,7 +51,7 @@ DecimalMatcher::DecimalMatcher(const DecimalFormatSymbols& symbols, const Groupe
fLocalDecimalUniSet.adoptInstead(set);
}
if (groupingKey != unisets::COUNT && decimalKey != unisets::COUNT) {
if (groupingKey >= 0 && decimalKey >= 0) {
// Everything is available in the static cache
separatorSet = groupingUniSet;
leadSet = unisets::get(

View file

@ -21,7 +21,7 @@
#include "unicode/numberformatter.h"
#include "cstr.h"
#include "number_mapper.h"
#include "numparse_unisets.h"
#include "static_unicode_sets.h"
using namespace icu;
using namespace icu::number;

View file

@ -11,7 +11,7 @@
#include "numparse_types.h"
#include "numparse_scientific.h"
#include "numparse_unisets.h"
#include "static_unicode_sets.h"
using namespace icu;
using namespace icu::numparse;

View file

@ -9,7 +9,7 @@
#include "numparse_types.h"
#include "unicode/uniset.h"
#include "numparse_unisets.h"
#include "static_unicode_sets.h"
U_NAMESPACE_BEGIN namespace numparse {
namespace impl {

View file

@ -11,7 +11,7 @@
#include "numparse_types.h"
#include "numparse_validators.h"
#include "numparse_unisets.h"
#include "static_unicode_sets.h"
using namespace icu;
using namespace icu::numparse;

View file

@ -8,7 +8,7 @@
#define __SOURCE_NUMPARSE_VALIDATORS_H__
#include "numparse_types.h"
#include "numparse_unisets.h"
#include "static_unicode_sets.h"
U_NAMESPACE_BEGIN namespace numparse {
namespace impl {

View file

@ -16,7 +16,7 @@
#include "unicode/utf16.h"
#include "unicode/uniset.h"
#include "unicode/decimfmt.h"
#include "numparse_unisets.h"
#include "static_unicode_sets.h"
U_NAMESPACE_BEGIN

View file

@ -64,8 +64,9 @@ scientificnumberformattertest.o datadrivennumberformattestsuite.o \
numberformattesttuple.o pluralmaptest.o \
numbertest_affixutils.o numbertest_api.o numbertest_decimalquantity.o \
numbertest_modifiers.o numbertest_patternmodifier.o numbertest_patternstring.o \
numbertest_stringbuilder.o numbertest_stringsegment.o numbertest_unisets.o \
numbertest_parse.o numbertest_doubleconversion.o numbertest_skeletons.o
numbertest_stringbuilder.o numbertest_stringsegment.o \
numbertest_parse.o numbertest_doubleconversion.o numbertest_skeletons.o \
static_unisets_test.o
DEPS = $(OBJECTS:.o=.d)

View file

@ -1901,9 +1901,13 @@ UBool IntlTest::assertTrue(const char* message, UBool condition, UBool quiet, UB
return condition;
}
UBool IntlTest::assertFalse(const char* message, UBool condition, UBool quiet) {
UBool IntlTest::assertFalse(const char* message, UBool condition, UBool quiet, UBool possibleDataError) {
if (condition) {
errln("FAIL: assertFalse() failed: %s", message);
if (possibleDataError) {
dataerrln("FAIL: assertTrue() failed: %s", message);
} else {
errln("FAIL: assertTrue() failed: %s", message);
}
} else if (!quiet) {
logln("Ok: %s", message);
}
@ -2111,12 +2115,12 @@ static const char* extractToAssertBuf(const UnicodeString& message) {
return ASSERT_BUF;
}
UBool IntlTest::assertTrue(const UnicodeString& message, UBool condition, UBool quiet) {
return assertTrue(extractToAssertBuf(message), condition, quiet);
UBool IntlTest::assertTrue(const UnicodeString& message, UBool condition, UBool quiet, UBool possibleDataError) {
return assertTrue(extractToAssertBuf(message), condition, quiet, possibleDataError);
}
UBool IntlTest::assertFalse(const UnicodeString& message, UBool condition, UBool quiet) {
return assertFalse(extractToAssertBuf(message), condition, quiet);
UBool IntlTest::assertFalse(const UnicodeString& message, UBool condition, UBool quiet, UBool possibleDataError) {
return assertFalse(extractToAssertBuf(message), condition, quiet, possibleDataError);
}
UBool IntlTest::assertSuccess(const UnicodeString& message, UErrorCode ec) {

View file

@ -282,7 +282,7 @@ public:
/* JUnit-like assertions. Each returns TRUE if it succeeds. */
UBool assertTrue(const char* message, UBool condition, UBool quiet=FALSE, UBool possibleDataError=FALSE, const char *file=NULL, int line=0);
UBool assertFalse(const char* message, UBool condition, UBool quiet=FALSE);
UBool assertFalse(const char* message, UBool condition, UBool quiet=FALSE, UBool possibleDataError=FALSE);
/**
* @param possibleDataError - if TRUE, use dataerrln instead of errcheckln on failure
* @return TRUE on success, FALSE on failure.
@ -303,8 +303,8 @@ public:
UBool assertEquals(const UnicodeString& message, const Formattable& expected,
const Formattable& actual);
#endif
UBool assertTrue(const UnicodeString& message, UBool condition, UBool quiet=FALSE);
UBool assertFalse(const UnicodeString& message, UBool condition, UBool quiet=FALSE);
UBool assertTrue(const UnicodeString& message, UBool condition, UBool quiet=FALSE, UBool possibleDataError=FALSE);
UBool assertFalse(const UnicodeString& message, UBool condition, UBool quiet=FALSE, UBool possibleDataError=FALSE);
UBool assertSuccess(const UnicodeString& message, UErrorCode ec);
UBool assertEquals(const UnicodeString& message, const UnicodeString& expected,
const UnicodeString& actual, UBool possibleDataError=FALSE);

View file

@ -253,7 +253,6 @@
<ClCompile Include="numbertest_patternstring.cpp" />
<ClCompile Include="numbertest_stringbuilder.cpp" />
<ClCompile Include="numbertest_stringsegment.cpp" />
<ClCompile Include="numbertest_unisets.cpp" />
<ClCompile Include="numbertest_parse.cpp" />
<ClCompile Include="numbertest_doubleconversion.cpp" />
<ClCompile Include="numbertest_skeletons.cpp" />
@ -270,8 +269,9 @@
<ClCompile Include="scientificnumberformattertest.cpp" />
<ClCompile Include="sdtfmtts.cpp" />
<ClCompile Include="selfmts.cpp" />
<ClCompile Include="tchcfmt.cpp" />
<ClCompile Include="simpleformattertest.cpp" />
<ClCompile Include="static_unisets_test.cpp" />
<ClCompile Include="tchcfmt.cpp" />
<ClCompile Include="tfsmalls.cpp" />
<ClCompile Include="tmsgfmt.cpp" />
<ClCompile Include="tsdate.cpp" />

View file

@ -280,9 +280,6 @@
<ClCompile Include="numbertest_stringsegment.cpp">
<Filter>formatting</Filter>
</ClCompile>
<ClCompile Include="numbertest_unisets.cpp">
<Filter>formatting</Filter>
</ClCompile>
<ClCompile Include="numbertest_parse.cpp">
<Filter>formatting</Filter>
</ClCompile>
@ -328,6 +325,9 @@
<ClCompile Include="simpleformattertest.cpp">
<Filter>formatting</Filter>
</ClCompile>
<ClCompile Include="static_unisets_test.cpp">
<Filter>formatting</Filter>
</ClCompile>
<ClCompile Include="tchcfmt.cpp">
<Filter>formatting</Filter>
</ClCompile>

View file

@ -39,7 +39,8 @@ static IntlTest *createEnumSetTest();
extern IntlTest *createSimpleFormatterTest();
extern IntlTest *createUnifiedCacheTest();
extern IntlTest *createQuantityFormatterTest();
extern IntlTest *createPluralMapTest();
extern IntlTest *createPluralMapTest();
extern IntlTest *createStaticUnicodeSetsTest();
#define CASE(id, test) case id: \
@ -135,6 +136,14 @@ void IntlTestUtilities::runIndexedTest( int32_t index, UBool exec, const char* &
callTest(*test, par);
}
break;
case 24:
name = "StaticUnicodeSetsTest";
if (exec) {
logln("TestSuite StaticUnicodeSetsTest---"); logln();
LocalPointer<IntlTest> test(createStaticUnicodeSetsTest());
callTest(*test, par);
}
break;
default: name = ""; break; //needed to end loop
}
}

View file

@ -214,19 +214,6 @@ class StringSegmentTest : public IntlTest {
void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par = 0);
};
class UniSetsTest : public IntlTest {
public:
void testSetCoverage();
void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par = 0);
private:
void assertInSet(const UnicodeString& localeName, const UnicodeString &setName,
const UnicodeSet& set, const UnicodeString& str);
void assertInSet(const UnicodeString& localeName, const UnicodeString &setName,
const UnicodeSet& set, UChar32 cp);
};
class NumberParserTest : public IntlTest {
public:
void testBasic();
@ -287,9 +274,8 @@ class NumberTest : public IntlTest {
TESTCLASS(6, NumberStringBuilderTest);
TESTCLASS(7, DoubleConversionTest);
TESTCLASS(8, StringSegmentTest);
TESTCLASS(9, UniSetsTest);
TESTCLASS(10, NumberParserTest);
TESTCLASS(11, NumberSkeletonTest);
TESTCLASS(9, NumberParserTest);
TESTCLASS(10, NumberSkeletonTest);
default: name = ""; break; // needed to end loop
}
}

View file

@ -7,14 +7,14 @@
#include "numbertest.h"
#include "numparse_impl.h"
#include "numparse_unisets.h"
#include "static_unicode_sets.h"
#include "unicode/dcfmtsym.h"
#include "unicode/testlog.h"
#include <cmath>
#include <numparse_affixes.h>
using icu::numparse::impl::unisets::get;
using icu::unisets::get;
void NumberParserTest::runIndexedTest(int32_t index, UBool exec, const char*& name, char*) {
if (exec) {

View file

@ -6,21 +6,40 @@
#if !UCONFIG_NO_FORMATTING
#include "numbertest.h"
#include "numparse_unisets.h"
#include "static_unicode_sets.h"
#include "unicode/dcfmtsym.h"
using icu::numparse::impl::unisets::get;
using icu::unisets::get;
void UniSetsTest::runIndexedTest(int32_t index, UBool exec, const char*&name, char*) {
class StaticUnicodeSetsTest : public IntlTest {
public:
void testSetCoverage();
void testNonEmpty();
void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par = 0);
private:
void assertInSet(const UnicodeString& localeName, const UnicodeString &setName,
const UnicodeSet& set, const UnicodeString& str);
void assertInSet(const UnicodeString& localeName, const UnicodeString &setName,
const UnicodeSet& set, UChar32 cp);
};
extern IntlTest *createStaticUnicodeSetsTest() {
return new StaticUnicodeSetsTest();
}
void StaticUnicodeSetsTest::runIndexedTest(int32_t index, UBool exec, const char*&name, char*) {
if (exec) {
logln("TestSuite UniSetsTest: ");
logln("TestSuite StaticUnicodeSetsTest: ");
}
TESTCASE_AUTO_BEGIN;
TESTCASE_AUTO(testSetCoverage);
TESTCASE_AUTO(testNonEmpty);
TESTCASE_AUTO_END;
}
void UniSetsTest::testSetCoverage() {
void StaticUnicodeSetsTest::testSetCoverage() {
UErrorCode status = U_ZERO_ERROR;
// Lenient comma/period should be supersets of strict comma/period;
@ -67,7 +86,18 @@ void UniSetsTest::testSetCoverage() {
}
}
void UniSetsTest::assertInSet(const UnicodeString &localeName, const UnicodeString &setName,
void StaticUnicodeSetsTest::testNonEmpty() {
for (int32_t i=0; i<unisets::COUNT; i++) {
if (i == unisets::EMPTY) {
continue;
}
const UnicodeSet* uset = get(static_cast<unisets::Key>(i));
// Can fail if no data:
assertFalse(UnicodeString("Set should not be empty: ") + i, uset->isEmpty(), FALSE, TRUE);
}
}
void StaticUnicodeSetsTest::assertInSet(const UnicodeString &localeName, const UnicodeString &setName,
const UnicodeSet &set, const UnicodeString &str) {
if (str.countChar32(0, str.length()) != 1) {
// Ignore locale strings with more than one code point (usually a bidi mark)
@ -76,7 +106,7 @@ void UniSetsTest::assertInSet(const UnicodeString &localeName, const UnicodeStri
assertInSet(localeName, setName, set, str.char32At(0));
}
void UniSetsTest::assertInSet(const UnicodeString &localeName, const UnicodeString &setName,
void StaticUnicodeSetsTest::assertInSet(const UnicodeString &localeName, const UnicodeString &setName,
const UnicodeSet &set, UChar32 cp) {
// If this test case fails, add the specified code point to the corresponding set in
// UnicodeSetStaticCache.java and numparse_unisets.cpp

View file

@ -1,29 +1,26 @@
// © 2017 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html#License
package com.ibm.icu.impl.number.parse;
package com.ibm.icu.impl;
import java.util.EnumMap;
import java.util.Map;
import com.ibm.icu.impl.ICUData;
import com.ibm.icu.impl.ICUResourceBundle;
import com.ibm.icu.impl.UResource;
import com.ibm.icu.impl.UResource.Value;
import com.ibm.icu.text.UnicodeSet;
import com.ibm.icu.util.ULocale;
import com.ibm.icu.util.UResourceBundle;
/**
* This class statically initializes UnicodeSets useful for number parsing. Microbenchmarks show this to
* bring a very sizeable performance boost.
* This class statically initializes UnicodeSets, originally built for number parsing. Microbenchmarks
* show this to bring a very sizeable performance boost.
*
* IMPORTANT ASSUMPTION: All of the sets contain code points (no strings) and they are all case-folded.
* If this assumption were ever broken, logic in classes such as SymbolMatcher would need to be updated
* in order to return well-formed sets upon calls to getLeadCodePoints().
* IMPORTANT ASSUMPTION FOR NUMBER PARSING: All of the sets contain code points (no strings) and they are
* all case-folded. If this assumption were ever broken, logic in classes such as SymbolMatcher would
* need to be updated in order to return well-formed sets upon calls to getLeadCodePoints().
*
* @author sffc
*/
public class UnicodeSetStaticCache {
public class StaticUnicodeSets {
public static enum Key {
// Ignorables
DEFAULT_IGNORABLES,
@ -67,18 +64,57 @@ public class UnicodeSetStaticCache {
private static final Map<Key, UnicodeSet> unicodeSets = new EnumMap<Key, UnicodeSet>(Key.class);
/**
* Gets the static-allocated UnicodeSet according to the provided key.
*
* @param key
* The desired UnicodeSet according to the enum in this file.
* @return The requested UnicodeSet. Guaranteed to be frozen and non-null, but may be empty if an
* error occurred during data loading.
*/
public static UnicodeSet get(Key key) {
return unicodeSets.get(key);
UnicodeSet candidate = unicodeSets.get(key);
if (candidate == null) {
return UnicodeSet.EMPTY;
}
return candidate;
}
/**
* Checks if the UnicodeSet given by key1 contains the given string.
*
* @param str
* The string to check.
* @param key1
* The set to check.
* @return key1 if the set contains str, or COUNT if not.
*/
public static Key chooseFrom(String str, Key key1) {
return get(key1).contains(str) ? key1 : null;
}
/**
* Checks if the UnicodeSet given by either key1 or key2 contains the string.
*
* Exported as U_COMMON_API for numparse_decimal.cpp
*
* @param str
* The string to check.
* @param key1
* The first set to check.
* @param key2
* The second set to check.
* @return key1 if that set contains str; key2 if that set contains str; or COUNT if neither set
* contains str.
*/
public static Key chooseFrom(String str, Key key1, Key key2) {
return get(key1).contains(str) ? key1 : chooseFrom(str, key2);
}
/**
* Looks through all Currency-related sets for the given string, returning the first match or null if
* no match was round.
*/
public static Key chooseCurrency(String str) {
if (get(Key.DOLLAR_SIGN).contains(str)) {
return Key.DOLLAR_SIGN;
@ -187,7 +223,7 @@ public class UnicodeSetStaticCache {
.getBundleInstance(ICUData.ICU_BASE_NAME, ULocale.ROOT);
rb.getAllItemsWithFallback("parse", new ParseDataSink());
// TODO: Should there be fallback behavior if for some reason these sets didn't get populated?
// NOTE: It is OK for these assertions to fail if there was a no-data build.
assert unicodeSets.containsKey(Key.COMMA);
assert unicodeSets.containsKey(Key.STRICT_COMMA);
assert unicodeSets.containsKey(Key.PERIOD);

View file

@ -3,9 +3,10 @@
package com.ibm.icu.impl.number.parse;
import com.ibm.icu.impl.StringSegment;
import com.ibm.icu.impl.StaticUnicodeSets;
import com.ibm.icu.impl.StaticUnicodeSets.Key;
import com.ibm.icu.impl.number.DecimalQuantity_DualStorageBCD;
import com.ibm.icu.impl.number.Grouper;
import com.ibm.icu.impl.number.parse.UnicodeSetStaticCache.Key;
import com.ibm.icu.lang.UCharacter;
import com.ibm.icu.text.DecimalFormatSymbols;
import com.ibm.icu.text.UnicodeSet;
@ -63,12 +64,12 @@ public class DecimalMatcher implements NumberParseMatcher {
// Attempt to find separators in the static cache
groupingUniSet = UnicodeSetStaticCache.get(groupingKey);
Key decimalKey = UnicodeSetStaticCache.chooseFrom(decimalSeparator,
groupingUniSet = StaticUnicodeSets.get(groupingKey);
Key decimalKey = StaticUnicodeSets.chooseFrom(decimalSeparator,
strictSeparators ? Key.STRICT_COMMA : Key.COMMA,
strictSeparators ? Key.STRICT_PERIOD : Key.PERIOD);
if (decimalKey != null) {
decimalUniSet = UnicodeSetStaticCache.get(decimalKey);
decimalUniSet = StaticUnicodeSets.get(decimalKey);
} else {
decimalUniSet = new UnicodeSet().add(decimalSeparator.codePointAt(0)).freeze();
}
@ -76,7 +77,7 @@ public class DecimalMatcher implements NumberParseMatcher {
if (groupingKey != null && decimalKey != null) {
// Everything is available in the static cache
separatorSet = groupingUniSet;
leadSet = UnicodeSetStaticCache.get(strictSeparators ? Key.DIGITS_OR_ALL_SEPARATORS
leadSet = StaticUnicodeSets.get(strictSeparators ? Key.DIGITS_OR_ALL_SEPARATORS
: Key.DIGITS_OR_STRICT_ALL_SEPARATORS);
} else {
separatorSet = new UnicodeSet().addAll(groupingUniSet).addAll(decimalUniSet).freeze();

View file

@ -3,6 +3,7 @@
package com.ibm.icu.impl.number.parse;
import com.ibm.icu.impl.StringSegment;
import com.ibm.icu.impl.StaticUnicodeSets;
import com.ibm.icu.text.UnicodeSet;
/**
@ -12,10 +13,10 @@ import com.ibm.icu.text.UnicodeSet;
public class IgnorablesMatcher extends SymbolMatcher implements NumberParseMatcher.Flexible {
public static final IgnorablesMatcher DEFAULT = new IgnorablesMatcher(
UnicodeSetStaticCache.get(UnicodeSetStaticCache.Key.DEFAULT_IGNORABLES));
StaticUnicodeSets.get(StaticUnicodeSets.Key.DEFAULT_IGNORABLES));
public static final IgnorablesMatcher STRICT = new IgnorablesMatcher(
UnicodeSetStaticCache.get(UnicodeSetStaticCache.Key.STRICT_IGNORABLES));
StaticUnicodeSets.get(StaticUnicodeSets.Key.STRICT_IGNORABLES));
public static IgnorablesMatcher getInstance(UnicodeSet ignorables) {
assert ignorables.isFrozen();

View file

@ -3,6 +3,7 @@
package com.ibm.icu.impl.number.parse;
import com.ibm.icu.impl.StringSegment;
import com.ibm.icu.impl.StaticUnicodeSets;
import com.ibm.icu.text.DecimalFormatSymbols;
/**
@ -27,7 +28,7 @@ public class InfinityMatcher extends SymbolMatcher {
}
private InfinityMatcher() {
super(UnicodeSetStaticCache.Key.INFINITY);
super(StaticUnicodeSets.Key.INFINITY);
}
@Override

View file

@ -3,6 +3,7 @@
package com.ibm.icu.impl.number.parse;
import com.ibm.icu.impl.StringSegment;
import com.ibm.icu.impl.StaticUnicodeSets;
import com.ibm.icu.text.DecimalFormatSymbols;
/**
@ -31,7 +32,7 @@ public class MinusSignMatcher extends SymbolMatcher {
}
private MinusSignMatcher(boolean allowTrailing) {
super(UnicodeSetStaticCache.Key.MINUS_SIGN);
super(StaticUnicodeSets.Key.MINUS_SIGN);
this.allowTrailing = allowTrailing;
}

View file

@ -3,6 +3,7 @@
package com.ibm.icu.impl.number.parse;
import com.ibm.icu.impl.StringSegment;
import com.ibm.icu.impl.StaticUnicodeSets;
import com.ibm.icu.text.DecimalFormatSymbols;
/**
@ -27,7 +28,7 @@ public class PercentMatcher extends SymbolMatcher {
}
private PercentMatcher() {
super(UnicodeSetStaticCache.Key.PERCENT_SIGN);
super(StaticUnicodeSets.Key.PERCENT_SIGN);
}
@Override

View file

@ -3,6 +3,7 @@
package com.ibm.icu.impl.number.parse;
import com.ibm.icu.impl.StringSegment;
import com.ibm.icu.impl.StaticUnicodeSets;
import com.ibm.icu.text.DecimalFormatSymbols;
/**
@ -27,7 +28,7 @@ public class PermilleMatcher extends SymbolMatcher {
}
private PermilleMatcher() {
super(UnicodeSetStaticCache.Key.PERMILLE_SIGN);
super(StaticUnicodeSets.Key.PERMILLE_SIGN);
}
@Override

View file

@ -3,6 +3,7 @@
package com.ibm.icu.impl.number.parse;
import com.ibm.icu.impl.StringSegment;
import com.ibm.icu.impl.StaticUnicodeSets;
import com.ibm.icu.text.DecimalFormatSymbols;
/**
@ -31,7 +32,7 @@ public class PlusSignMatcher extends SymbolMatcher {
}
private PlusSignMatcher(boolean allowTrailing) {
super(UnicodeSetStaticCache.Key.PLUS_SIGN);
super(StaticUnicodeSets.Key.PLUS_SIGN);
this.allowTrailing = allowTrailing;
}

View file

@ -3,6 +3,7 @@
package com.ibm.icu.impl.number.parse;
import com.ibm.icu.impl.StringSegment;
import com.ibm.icu.impl.StaticUnicodeSets;
import com.ibm.icu.impl.number.DecimalQuantity_DualStorageBCD;
import com.ibm.icu.impl.number.Grouper;
import com.ibm.icu.text.DecimalFormatSymbols;
@ -37,11 +38,11 @@ public class ScientificMatcher implements NumberParseMatcher {
}
private static UnicodeSet minusSignSet() {
return UnicodeSetStaticCache.get(UnicodeSetStaticCache.Key.MINUS_SIGN);
return StaticUnicodeSets.get(StaticUnicodeSets.Key.MINUS_SIGN);
}
private static UnicodeSet plusSignSet() {
return UnicodeSetStaticCache.get(UnicodeSetStaticCache.Key.PLUS_SIGN);
return StaticUnicodeSets.get(StaticUnicodeSets.Key.PLUS_SIGN);
}
@Override

View file

@ -3,6 +3,7 @@
package com.ibm.icu.impl.number.parse;
import com.ibm.icu.impl.StringSegment;
import com.ibm.icu.impl.StaticUnicodeSets;
import com.ibm.icu.text.UnicodeSet;
/**
@ -22,9 +23,9 @@ public abstract class SymbolMatcher implements NumberParseMatcher {
uniSet = symbolUniSet;
}
protected SymbolMatcher(UnicodeSetStaticCache.Key key) {
protected SymbolMatcher(StaticUnicodeSets.Key key) {
string = "";
uniSet = UnicodeSetStaticCache.get(key);
uniSet = StaticUnicodeSets.get(key);
}
public UnicodeSet getSet() {

View file

@ -13,7 +13,7 @@ import java.text.AttributedCharacterIterator.Attribute;
import java.text.CharacterIterator;
import java.util.Map;
import com.ibm.icu.impl.number.parse.UnicodeSetStaticCache;
import com.ibm.icu.impl.StaticUnicodeSets;
import com.ibm.icu.lang.UCharacter;
import com.ibm.icu.util.ULocale;
@ -230,14 +230,14 @@ public final class ScientificNumberFormatter {
int start = iterator.getRunStart(NumberFormat.Field.EXPONENT_SIGN);
int limit = iterator.getRunLimit(NumberFormat.Field.EXPONENT_SIGN);
int aChar = char32AtAndAdvance(iterator);
if (UnicodeSetStaticCache.get(UnicodeSetStaticCache.Key.MINUS_SIGN).contains(aChar)) {
if (StaticUnicodeSets.get(StaticUnicodeSets.Key.MINUS_SIGN).contains(aChar)) {
append(
iterator,
copyFromOffset,
start,
result);
result.append(SUPERSCRIPT_MINUS_SIGN);
} else if (UnicodeSetStaticCache.get(UnicodeSetStaticCache.Key.PLUS_SIGN).contains(aChar)) {
} else if (StaticUnicodeSets.get(StaticUnicodeSets.Key.PLUS_SIGN).contains(aChar)) {
append(
iterator,
copyFromOffset,

View file

@ -30,7 +30,7 @@ import com.ibm.icu.impl.ICUResourceBundle;
import com.ibm.icu.impl.SimpleCache;
import com.ibm.icu.impl.SoftCache;
import com.ibm.icu.impl.TextTrieMap;
import com.ibm.icu.impl.number.parse.UnicodeSetStaticCache;
import com.ibm.icu.impl.StaticUnicodeSets;
import com.ibm.icu.text.CurrencyDisplayNames;
import com.ibm.icu.text.CurrencyMetaInfo;
import com.ibm.icu.text.CurrencyMetaInfo.CurrencyDigits;
@ -772,10 +772,10 @@ public class Currency extends MeasureUnit {
String isoCode = e.getValue();
// Register under not just symbol, but under every equivalent symbol as well
// e.g short width yen and long width yen.
UnicodeSetStaticCache.Key key = UnicodeSetStaticCache.chooseCurrency(symbol);
StaticUnicodeSets.Key key = StaticUnicodeSets.chooseCurrency(symbol);
CurrencyStringInfo value = new CurrencyStringInfo(isoCode, symbol);
if (key != null) {
UnicodeSet equivalents = UnicodeSetStaticCache.get(key);
UnicodeSet equivalents = StaticUnicodeSets.get(key);
// The symbol itself is included in the UnicodeSet
for (String equivalentSymbol : equivalents) {
symTrie.put(equivalentSymbol, value);

View file

@ -2,7 +2,7 @@
// License & terms of use: http://www.unicode.org/copyright.html#License
package com.ibm.icu.dev.test.number;
import static com.ibm.icu.impl.number.parse.UnicodeSetStaticCache.get;
import static com.ibm.icu.impl.StaticUnicodeSets.get;
import java.math.BigDecimal;
import java.util.Random;
@ -11,8 +11,8 @@ import org.junit.Before;
import org.junit.Test;
import com.ibm.icu.dev.test.TestFmwk;
import com.ibm.icu.impl.StaticUnicodeSets.Key;
import com.ibm.icu.impl.number.DecimalQuantity_DualStorageBCD;
import com.ibm.icu.impl.number.parse.UnicodeSetStaticCache.Key;
import com.ibm.icu.lang.UCharacter;
import com.ibm.icu.number.NumberFormatter;
import com.ibm.icu.number.Precision;

View file

@ -1,24 +0,0 @@
// © 2018 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html#License
package com.ibm.icu.dev.test.number;
import static com.ibm.icu.impl.number.parse.UnicodeSetStaticCache.get;
import static org.junit.Assert.assertTrue;
import org.junit.Test;
import com.ibm.icu.impl.number.parse.UnicodeSetStaticCache.Key;
/**
* This test class is thin; most of it was moved to ExhaustiveNumberTest.
* @author sffc
*/
public class UnicodeSetStaticCacheTest {
@Test
public void testFrozen() {
for (Key key : Key.values()) {
assertTrue(get(key).isFrozen());
}
}
}

View file

@ -0,0 +1,33 @@
// © 2018 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html#License
package com.ibm.icu.dev.test.util;
import static com.ibm.icu.impl.StaticUnicodeSets.get;
import static org.junit.Assert.assertTrue;
import org.junit.Test;
import com.ibm.icu.impl.StaticUnicodeSets.Key;
/**
* NOTE: The test testSetCoverage can be found in ExhaustiveNumberTest.java
*
* @author sffc
*/
public class StaticUnicodeSetsTest {
@Test
public void testFrozen() {
for (Key key : Key.values()) {
assertTrue(get(key).isFrozen());
}
}
@Test
public void testNonEmpty() {
for (Key key : Key.values()) {
// NOTE: No key EMPTY in Java
assertTrue(get(key).isFrozen());
}
}
}