ICU-13574 Fixing CurrencyAnyMatcher. Up-to-date with tests.

X-SVN-Rev: 40893
This commit is contained in:
Shane Carr 2018-02-10 11:32:18 +00:00
parent 59587ad9db
commit a335b723c7
6 changed files with 99 additions and 22 deletions

View file

@ -40,16 +40,16 @@ void AffixPatternMatcherBuilder::consumeToken(AffixPatternType type, UChar32 cp,
// Case 1: the token is a symbol.
switch (type) {
case TYPE_MINUS_SIGN:
addMatcher(fWarehouse.minusSign = {fWarehouse.dfs, true});
addMatcher(fWarehouse.minusSign());
break;
case TYPE_PLUS_SIGN:
addMatcher(fWarehouse.plusSign = {fWarehouse.dfs, true});
addMatcher(fWarehouse.plusSign());
break;
case TYPE_PERCENT:
addMatcher(fWarehouse.percent = {fWarehouse.dfs});
addMatcher(fWarehouse.percent());
break;
case TYPE_PERMILLE:
addMatcher(fWarehouse.permille = {fWarehouse.dfs});
addMatcher(fWarehouse.permille());
break;
case TYPE_CURRENCY_SINGLE:
case TYPE_CURRENCY_DOUBLE:
@ -57,13 +57,7 @@ void AffixPatternMatcherBuilder::consumeToken(AffixPatternType type, UChar32 cp,
case TYPE_CURRENCY_QUAD:
case TYPE_CURRENCY_QUINT:
// All currency symbols use the same matcher
addMatcher(
fWarehouse.currency = {
CurrencyNamesMatcher(
fWarehouse.locale, status), CurrencyCustomMatcher(
fWarehouse.currencyCode,
fWarehouse.currency1,
fWarehouse.currency2)});
addMatcher(fWarehouse.currency(status));
break;
default:
U_ASSERT(FALSE);
@ -109,12 +103,32 @@ AffixTokenMatcherWarehouse::AffixTokenMatcherWarehouse(const UChar* currencyCode
AffixTokenMatcherWarehouse::~AffixTokenMatcherWarehouse() {
// Delete the variable number of batches of code point matchers
for (int32_t i=0; i<codePointNumBatches; i++) {
for (int32_t i = 0; i < codePointNumBatches; i++) {
delete[] codePointsOverflow[i];
}
}
CodePointMatcher& AffixTokenMatcherWarehouse::nextCodePointMatcher(UChar32 cp) {
NumberParseMatcher& AffixTokenMatcherWarehouse::minusSign() {
return fMinusSign = {dfs, true};
}
NumberParseMatcher& AffixTokenMatcherWarehouse::plusSign() {
return fPlusSign = {dfs, true};
}
NumberParseMatcher& AffixTokenMatcherWarehouse::percent() {
return fPercent = {dfs};
}
NumberParseMatcher& AffixTokenMatcherWarehouse::permille() {
return fPermille = {dfs};
}
NumberParseMatcher& AffixTokenMatcherWarehouse::currency(UErrorCode& status) {
return fCurrency = {{locale, status}, {currencyCode, currency1, currency2}};
}
NumberParseMatcher& AffixTokenMatcherWarehouse::nextCodePointMatcher(UChar32 cp) {
if (codePointCount < CODE_POINT_STACK_CAPACITY) {
return codePoints[codePointCount++] = {cp};
}
@ -129,7 +143,7 @@ CodePointMatcher& AffixTokenMatcherWarehouse::nextCodePointMatcher(UChar32 cp) {
codePointsOverflow[codePointNumBatches++] = nextBatch;
}
return codePointsOverflow[codePointNumBatches - 1][(codePointCount++ - CODE_POINT_STACK_CAPACITY) %
CODE_POINT_BATCH_SIZE] = {cp};
CODE_POINT_BATCH_SIZE] = {cp};
}

View file

@ -57,7 +57,17 @@ class AffixTokenMatcherWarehouse {
~AffixTokenMatcherWarehouse();
CodePointMatcher& nextCodePointMatcher(UChar32 cp);
NumberParseMatcher& minusSign();
NumberParseMatcher& plusSign();
NumberParseMatcher& percent();
NumberParseMatcher& permille();
NumberParseMatcher& currency(UErrorCode& status);
NumberParseMatcher& nextCodePointMatcher(UChar32 cp);
private:
UChar currencyCode[4];
@ -68,11 +78,11 @@ class AffixTokenMatcherWarehouse {
const Locale locale;
// NOTE: These are default-constructed and should not be used until initialized.
MinusSignMatcher minusSign;
PlusSignMatcher plusSign;
PercentMatcher percent;
PermilleMatcher permille;
CurrencyAnyMatcher currency;
MinusSignMatcher fMinusSign;
PlusSignMatcher fPlusSign;
PercentMatcher fPercent;
PermilleMatcher fPermille;
CurrencyAnyMatcher fCurrency;
CodePointMatcher codePoints[CODE_POINT_STACK_CAPACITY]; // By value
MaybeStackArray<CodePointMatcher*, 3> codePointsOverflow; // On heap in "batches"

View file

@ -19,7 +19,7 @@ bool AnyMatcher::match(StringSegment& segment, ParsedNumber& result, UErrorCode&
bool maybeMore = false;
// NOTE: The range-based for loop calls the virtual begin() and end() methods.
for (auto* matcher : *this) {
for (auto& matcher : *this) {
maybeMore = maybeMore || matcher->match(segment, result, status);
if (segment.getOffset() != initialOffset) {
// Match succeeded.

View file

@ -119,6 +119,19 @@ CurrencyAnyMatcher::CurrencyAnyMatcher(CurrencyNamesMatcher namesMatcher,
fMatcherArray[1] = &fCustomMatcher;
}
CurrencyAnyMatcher::CurrencyAnyMatcher(CurrencyAnyMatcher&& src) U_NOEXCEPT
: fNamesMatcher(std::move(src.fNamesMatcher)), fCustomMatcher(std::move(src.fCustomMatcher)) {
fMatcherArray[0] = &fNamesMatcher;
fMatcherArray[1] = &fCustomMatcher;
}
CurrencyAnyMatcher& CurrencyAnyMatcher::operator=(CurrencyAnyMatcher&& src) U_NOEXCEPT {
fNamesMatcher = std::move(src.fNamesMatcher);
fCustomMatcher = std::move(src.fCustomMatcher);
// Note: do NOT move fMatcherArray
return *this;
}
const UnicodeSet& CurrencyAnyMatcher::getLeadCodePoints() {
if (fLocalLeadCodePoints.isNull()) {
auto* leadCodePoints = new UnicodeSet();

View file

@ -67,6 +67,12 @@ class CurrencyAnyMatcher : public AnyMatcher, public UMemory {
CurrencyAnyMatcher(CurrencyNamesMatcher namesMatcher, CurrencyCustomMatcher customMatcher);
// Needs custom move constructor/operator since constructor is nontrivial
CurrencyAnyMatcher(CurrencyAnyMatcher&& src) U_NOEXCEPT;
CurrencyAnyMatcher& operator=(CurrencyAnyMatcher&& src) U_NOEXCEPT;
const UnicodeSet& getLeadCodePoints() override;
protected:

View file

@ -23,6 +23,7 @@ void NumberParserTest::runIndexedTest(int32_t index, UBool exec, const char*& na
TESTCASE_AUTO_BEGIN;
TESTCASE_AUTO(testBasic);
TESTCASE_AUTO(testSeriesMatcher);
TESTCASE_AUTO(testCurrencyAnyMatcher);
TESTCASE_AUTO(testAffixPatternMatcher);
TESTCASE_AUTO_END;
}
@ -211,6 +212,39 @@ void NumberParserTest::testSeriesMatcher() {
}
}
void NumberParserTest::testCurrencyAnyMatcher() {
IcuTestErrorCode status(*this, "testCurrencyAnyMatcher");
IgnorablesMatcher ignorables(unisets::DEFAULT_IGNORABLES);
AffixTokenMatcherWarehouse warehouse(u"ICU", u"IU$", u"ICU", {"en",status}, &ignorables, "en");
NumberParseMatcher& matcher = warehouse.currency(status);
static const struct TestCase{
const char16_t* input;
const char16_t* expectedCurrencyCode;
} cases[] {
{ u"", u"\x00" },
{ u"FOO", u"\x00" },
{ u"USD", u"USD" },
{ u"$", u"USD" },
{ u"US dollars", u"USD" },
{ u"eu", u"\x00" },
{ u"euros", u"EUR" },
{ u"ICU", u"ICU" },
{ u"IU$", u"ICU" } };
for (auto& cas : cases) {
UnicodeString input(cas.input);
StringSegment segment(input, 0);
ParsedNumber result;
matcher.match(segment, result, status);
assertEquals("Parsing " + input, cas.expectedCurrencyCode, result.currencyCode);
assertEquals("Whole string on " + input,
cas.expectedCurrencyCode[0] == 0 ? 0 : input.length(),
result.charEnd);
}
}
void NumberParserTest::testAffixPatternMatcher() {
IcuTestErrorCode status(*this, "testAffixPatternMatcher");
@ -227,7 +261,7 @@ void NumberParserTest::testAffixPatternMatcher() {
{true, u"+-%", 3, u"+-%"},
{false, u"ab c", 5, u"a bc"},
{true, u"abc", 3, u"abc"},
//{false, u"hello-to+this%very¤long‰string", 59, u"hello-to+this%very USD long‰string"}
{false, u"hello-to+this%very¤long‰string", 59, u"hello-to+this%very USD long‰string"}
};
for (auto& cas : cases) {