ICU-13574 Adding more matchers derived from SymbolMatcher.

X-SVN-Rev: 40876
This commit is contained in:
Shane Carr 2018-02-09 02:35:02 +00:00
parent 8393405113
commit 12764fa082
9 changed files with 218 additions and 33 deletions

View file

@ -29,8 +29,8 @@ NumberParserImpl::createSimpleParser(const Locale& locale, const UnicodeString&
auto* parser = new NumberParserImpl(parseFlags, true);
DecimalFormatSymbols symbols(locale, status);
// IgnorablesMatcher* ignorables = IgnorablesMatcher.getDefault();
//
IgnorablesMatcher* ignorables = new IgnorablesMatcher(unisets::DEFAULT_IGNORABLES);
// MatcherFactory factory = new MatcherFactory();
// factory.currency = Currency.getInstance("USD");
// factory.symbols = symbols;
@ -45,10 +45,13 @@ NumberParserImpl::createSimpleParser(const Locale& locale, const UnicodeString&
Grouper grouper = Grouper::forStrategy(UNUM_GROUPING_AUTO);
grouper.setLocaleData(patternInfo, locale);
// parser.addMatcher({ignorables, false});
parser->addAndAdoptMatcher(ignorables);
parser->addAndAdoptMatcher(new DecimalMatcher(symbols, grouper, parseFlags));
parser->addAndAdoptMatcher(new MinusSignMatcher(symbols, false));
// parser.addMatcher(NanMatcher.getInstance(symbols, parseFlags));
parser->addAndAdoptMatcher(new PlusSignMatcher(symbols, false));
parser->addAndAdoptMatcher(new PercentMatcher(symbols));
parser->addAndAdoptMatcher(new PermilleMatcher(symbols));
parser->addAndAdoptMatcher(new NanMatcher(symbols));
// parser.addMatcher(ScientificMatcher.getInstance(symbols, grouper, parseFlags));
// parser.addMatcher(CurrencyTrieMatcher.getInstance(locale));
// parser.addMatcher(new RequireNumberMatcher());

View file

@ -16,7 +16,6 @@ using namespace icu::numparse::impl;
SymbolMatcher::SymbolMatcher(const UnicodeString& symbolString, unisets::Key key) {
fUniSet = unisets::get(key);
fOwnsUniSet = false;
if (fUniSet->contains(symbolString)) {
fString.setToBogus();
} else {
@ -24,13 +23,6 @@ SymbolMatcher::SymbolMatcher(const UnicodeString& symbolString, unisets::Key key
}
}
SymbolMatcher::~SymbolMatcher() {
if (fOwnsUniSet) {
delete fUniSet;
fUniSet = nullptr;
}
}
const UnicodeSet* SymbolMatcher::getSet() {
return fUniSet;
}
@ -76,14 +68,30 @@ const UnicodeSet* SymbolMatcher::getLeadCodePoints() const {
}
MinusSignMatcher::MinusSignMatcher(const DecimalFormatSymbols& dfs, bool allowTrailing) : SymbolMatcher(
dfs.getConstSymbol(DecimalFormatSymbols::kMinusSignSymbol),
unisets::MINUS_SIGN), fAllowTrailing(allowTrailing) {
IgnorablesMatcher::IgnorablesMatcher(unisets::Key key)
: SymbolMatcher({}, key) {
}
bool IgnorablesMatcher::isFlexible() const {
return true;
}
bool IgnorablesMatcher::isDisabled(const ParsedNumber&) const {
return false;
}
void IgnorablesMatcher::accept(StringSegment&, ParsedNumber&) const {
// No-op
}
MinusSignMatcher::MinusSignMatcher(const DecimalFormatSymbols& dfs, bool allowTrailing)
: SymbolMatcher(dfs.getConstSymbol(DecimalFormatSymbols::kMinusSignSymbol), unisets::MINUS_SIGN),
fAllowTrailing(allowTrailing) {
}
bool MinusSignMatcher::isDisabled(const ParsedNumber& result) const {
return 0 != (result.flags & FLAG_NEGATIVE) ||
(fAllowTrailing ? false : result.seenNumber());
return 0 != (result.flags & FLAG_NEGATIVE) || (fAllowTrailing ? false : result.seenNumber());
}
void MinusSignMatcher::accept(StringSegment& segment, ParsedNumber& result) const {
@ -92,4 +100,85 @@ void MinusSignMatcher::accept(StringSegment& segment, ParsedNumber& result) cons
}
NanMatcher::NanMatcher(const DecimalFormatSymbols& dfs)
: SymbolMatcher(dfs.getConstSymbol(DecimalFormatSymbols::kNaNSymbol), unisets::EMPTY) {
}
const UnicodeSet* NanMatcher::getLeadCodePoints() const {
// Overriding this here to allow use of statically allocated sets
int leadCp = fString.char32At(0);
const UnicodeSet* s = unisets::get(unisets::NAN_LEAD);
if (s->contains(leadCp)) {
return new UnicodeSet(*s);
} else {
return SymbolMatcher::getLeadCodePoints();
}
}
bool NanMatcher::isDisabled(const ParsedNumber& result) const {
return result.seenNumber();
}
void NanMatcher::accept(StringSegment& segment, ParsedNumber& result) const {
result.flags |= FLAG_NAN;
result.setCharsConsumed(segment);
}
PercentMatcher::PercentMatcher(const DecimalFormatSymbols& dfs)
: SymbolMatcher(dfs.getConstSymbol(DecimalFormatSymbols::kPercentSymbol), unisets::PERCENT_SIGN) {
}
void PercentMatcher::postProcess(ParsedNumber& result) const {
SymbolMatcher::postProcess(result);
if (0 != (result.flags & FLAG_PERCENT) && !result.quantity.bogus) {
result.quantity.adjustMagnitude(-2);
}
}
bool PercentMatcher::isDisabled(const ParsedNumber& result) const {
return 0 != (result.flags & FLAG_PERCENT);
}
void PercentMatcher::accept(StringSegment& segment, ParsedNumber& result) const {
result.flags |= FLAG_PERCENT;
result.setCharsConsumed(segment);
}
PermilleMatcher::PermilleMatcher(const DecimalFormatSymbols& dfs)
: SymbolMatcher(dfs.getConstSymbol(DecimalFormatSymbols::kPerMillSymbol), unisets::PERMILLE_SIGN) {
}
void PermilleMatcher::postProcess(ParsedNumber& result) const {
SymbolMatcher::postProcess(result);
if (0 != (result.flags & FLAG_PERMILLE) && !result.quantity.bogus) {
result.quantity.adjustMagnitude(-3);
}
}
bool PermilleMatcher::isDisabled(const ParsedNumber& result) const {
return 0 != (result.flags & FLAG_PERMILLE);
}
void PermilleMatcher::accept(StringSegment& segment, ParsedNumber& result) const {
result.flags |= FLAG_PERMILLE;
result.setCharsConsumed(segment);
}
PlusSignMatcher::PlusSignMatcher(const DecimalFormatSymbols& dfs, bool allowTrailing)
: SymbolMatcher(dfs.getConstSymbol(DecimalFormatSymbols::kPlusSignSymbol), unisets::PLUS_SIGN),
fAllowTrailing(allowTrailing) {
}
bool PlusSignMatcher::isDisabled(const ParsedNumber& result) const {
return fAllowTrailing ? false : result.seenNumber();
}
void PlusSignMatcher::accept(StringSegment& segment, ParsedNumber& result) const {
result.setCharsConsumed(segment);
}
#endif /* #if !UCONFIG_NO_FORMATTING */

View file

@ -17,8 +17,6 @@ namespace impl {
class SymbolMatcher : public NumberParseMatcher, public UMemory {
public:
~SymbolMatcher() override;
const UnicodeSet* getSet();
bool match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const override;
@ -31,13 +29,25 @@ class SymbolMatcher : public NumberParseMatcher, public UMemory {
protected:
UnicodeString fString;
const UnicodeSet* fUniSet;
bool fOwnsUniSet;
const UnicodeSet* fUniSet; // a reference from numparse_unisets.h; never owned
SymbolMatcher(const UnicodeString& symbolString, unisets::Key key);
};
class IgnorablesMatcher : public SymbolMatcher {
public:
explicit IgnorablesMatcher(unisets::Key key);
bool isFlexible() const override;
protected:
bool isDisabled(const ParsedNumber& result) const override;
void accept(StringSegment& segment, ParsedNumber& result) const override;
};
class MinusSignMatcher : public SymbolMatcher {
public:
MinusSignMatcher(const DecimalFormatSymbols& dfs, bool allowTrailing);
@ -52,6 +62,59 @@ class MinusSignMatcher : public SymbolMatcher {
};
class NanMatcher : public SymbolMatcher {
public:
explicit NanMatcher(const DecimalFormatSymbols& dfs);
const UnicodeSet* getLeadCodePoints() const override;
protected:
bool isDisabled(const ParsedNumber& result) const override;
void accept(StringSegment& segment, ParsedNumber& result) const override;
};
class PercentMatcher : public SymbolMatcher {
public:
explicit PercentMatcher(const DecimalFormatSymbols& dfs);
void postProcess(ParsedNumber& result) const override;
protected:
bool isDisabled(const ParsedNumber& result) const override;
void accept(StringSegment& segment, ParsedNumber& result) const override;
};
class PermilleMatcher : public SymbolMatcher {
public:
explicit PermilleMatcher(const DecimalFormatSymbols& dfs);
void postProcess(ParsedNumber& result) const override;
protected:
bool isDisabled(const ParsedNumber& result) const override;
void accept(StringSegment& segment, ParsedNumber& result) const override;
};
class PlusSignMatcher : public SymbolMatcher {
public:
PlusSignMatcher(const DecimalFormatSymbols& dfs, bool allowTrailing);
protected:
bool isDisabled(const ParsedNumber& result) const override;
void accept(StringSegment& segment, ParsedNumber& result) const override;
private:
bool fAllowTrailing;
};
} // namespace impl
} // namespace numparse
U_NAMESPACE_END

View file

@ -58,6 +58,8 @@ void U_CALLCONV initNumberParseUniSets(UErrorCode &status) {
ucln_i18n_registerCleanup(UCLN_I18N_NUMPARSE_UNISETS, cleanupNumberParseUnitSets);
#define NEW_UNISET(pattern, status) new UnicodeSet(UnicodeString(pattern), status)
gUnicodeSets[EMPTY] = new UnicodeSet();
// BiDi characters are skipped over and ignored at any point in the string, even in strict mode.
gUnicodeSets[BIDI] = NEW_UNISET(u"[[\\u200E\\u200F\\u061C]]", status);

View file

@ -15,6 +15,8 @@ namespace impl {
namespace unisets {
enum Key {
EMPTY,
// Ignorables
BIDI,
WHITESPACE,

View file

@ -18,6 +18,7 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <cmath>
#include "unicode/ctest.h" // for str_timeDelta
#include "unicode/curramt.h"
@ -1998,7 +1999,8 @@ UBool IntlTest::assertEquals(const char* message,
UBool IntlTest::assertEquals(const char* message,
double expected,
double actual) {
if (expected != actual) {
bool bothNaN = std::isnan(expected) && std::isnan(actual);
if (expected != actual && !bothNaN) {
errln((UnicodeString)"FAIL: " + message + "; got " +
actual +
"; expected " + expected);

View file

@ -41,6 +41,9 @@ void NumberParserTest::testBasic() {
{3, u"𝟱𝟭𝟰𝟮𝟯x", u"0", 10, 51423.},
{3, u" 𝟱𝟭𝟰𝟮𝟯", u"0", 11, 51423.},
{3, u"𝟱𝟭𝟰𝟮𝟯 ", u"0", 10, 51423.},
{7, u"51,423", u"#,##,##0", 6, 51423.},
{7, u" 51,423", u"#,##,##0", 7, 51423.},
{7, u"51,423 ", u"#,##,##0", 6, 51423.},
{7, u"𝟱𝟭,𝟰𝟮𝟯", u"#,##,##0", 11, 51423.},
{7, u"𝟳,𝟴𝟵,𝟱𝟭,𝟰𝟮𝟯", u"#,##,##0", 19, 78951423.},
{7, u"𝟳𝟴,𝟵𝟱𝟭.𝟰𝟮𝟯", u"#,##,##0", 18, 78951.423},
@ -48,8 +51,16 @@ void NumberParserTest::testBasic() {
{7, u"𝟳𝟴,𝟬𝟬𝟬.𝟬𝟬𝟬", u"#,##,##0", 18, 78000.},
{7, u"𝟳𝟴,𝟬𝟬𝟬.𝟬𝟮𝟯", u"#,##,##0", 18, 78000.023},
{7, u"𝟳𝟴.𝟬𝟬𝟬.𝟬𝟮𝟯", u"#,##,##0", 11, 78.},
{3, u"-𝟱𝟭𝟰𝟮𝟯", u"0", 11, -51423.},
{3, u"-𝟱𝟭𝟰𝟮𝟯-", u"0", 11, -51423.},
{3, u"-51423", u"0", 6, -51423.},
{3, u"51423-", u"0", 5, 51423.}, // plus and minus sign by default do NOT match after
{3, u"+51423", u"0", 6, 51423.},
{3, u"51423+", u"0", 5, 51423.}, // plus and minus sign by default do NOT match after
{3, u"%51423", u"0", 6, 514.23},
{3, u"51423%", u"0", 6, 514.23},
{3, u"51423%%", u"0", 6, 514.23},
{3, u"‰51423", u"0", 6, 51.423},
{3, u"51423‰", u"0", 6, 51.423},
{3, u"51423‰‰", u"0", 6, 51.423},
// {3, u"a51423US dollars", u"a0¤¤¤", 16, 51423.},
// {3, u"a 51423 US dollars", u"a0¤¤¤", 18, 51423.},
// {3, u"514.23 USD", u"¤0", 10, 514.23},
@ -77,12 +88,11 @@ void NumberParserTest::testBasic() {
// {3, u"a$ b5", u"a ¤ b0", 5, 5.0},
// {3, u"📺1.23", u"📺0;📻0", 6, 1.23},
// {3, u"📻1.23", u"📺0;📻0", 6, -1.23},
// {3, u".00", u"0", 3, 0.0},
// {3, u" 0", u"a0", 31, 0.0}, // should not hang
// {3, u"NaN", u"0", 3, NAN},
// {3, u"NaN E5", u"0", 3, NAN},
// {3, u"0", u"0", 1, 0.0}
};
{3, u".00", u"0", 3, 0.0},
{3, u" 1,234", u"a0", 35, 1234.}, // should not hang
{3, u"NaN", u"0", 3, NAN},
{3, u"NaN E5", u"0", 3, NAN},
{3, u"0", u"0", 1, 0.0}};
parse_flags_t parseFlags = PARSE_FLAG_IGNORE_CASE | PARSE_FLAG_INCLUDE_UNPAIRED_AFFIXES;
for (auto cas : cases) {

View file

@ -88,6 +88,9 @@ public class NumberParserImpl {
parser.addMatcher(ignorables);
parser.addMatcher(DecimalMatcher.getInstance(symbols, grouper, parseFlags));
parser.addMatcher(MinusSignMatcher.getInstance(symbols, false));
parser.addMatcher(PlusSignMatcher.getInstance(symbols, false));
parser.addMatcher(PercentMatcher.getInstance(symbols));
parser.addMatcher(PermilleMatcher.getInstance(symbols));
parser.addMatcher(NanMatcher.getInstance(symbols, parseFlags));
parser.addMatcher(ScientificMatcher.getInstance(symbols, grouper));
parser.addMatcher(CurrencyTrieMatcher.getInstance(locale));

View file

@ -47,6 +47,9 @@ public class NumberParserTest {
{ 3, "𝟱𝟭𝟰𝟮𝟯x", "0", 10, 51423. },
{ 3, " 𝟱𝟭𝟰𝟮𝟯", "0", 11, 51423. },
{ 3, "𝟱𝟭𝟰𝟮𝟯 ", "0", 10, 51423. },
{ 7, "51,423", "#,##,##0", 6, 51423. },
{ 7, " 51,423", "#,##,##0", 7, 51423. },
{ 7, "51,423 ", "#,##,##0", 6, 51423. },
{ 7, "𝟱𝟭,𝟰𝟮𝟯", "#,##,##0", 11, 51423. },
{ 7, "𝟳,𝟴𝟵,𝟱𝟭,𝟰𝟮𝟯", "#,##,##0", 19, 78951423. },
{ 7, "𝟳𝟴,𝟵𝟱𝟭.𝟰𝟮𝟯", "#,##,##0", 18, 78951.423 },
@ -54,8 +57,16 @@ public class NumberParserTest {
{ 7, "𝟳𝟴,𝟬𝟬𝟬.𝟬𝟬𝟬", "#,##,##0", 18, 78000. },
{ 7, "𝟳𝟴,𝟬𝟬𝟬.𝟬𝟮𝟯", "#,##,##0", 18, 78000.023 },
{ 7, "𝟳𝟴.𝟬𝟬𝟬.𝟬𝟮𝟯", "#,##,##0", 11, 78. },
{ 3, "-𝟱𝟭𝟰𝟮𝟯", "0", 11, -51423. },
{ 3, "-𝟱𝟭𝟰𝟮𝟯-", "0", 11, -51423. },
{ 3, "-51423", "0", 6, -51423. },
{ 3, "51423-", "0", 5, 51423. }, // plus and minus sign by default do NOT match after
{ 3, "+51423", "0", 6, 51423. },
{ 3, "51423+", "0", 5, 51423. }, // plus and minus sign by default do NOT match after
{ 3, "%51423", "0", 6, 514.23 },
{ 3, "51423%", "0", 6, 514.23 },
{ 3, "51423%%", "0", 6, 514.23 },
{ 3, "‰51423", "0", 6, 51.423 },
{ 3, "51423‰", "0", 6, 51.423 },
{ 3, "51423‰‰", "0", 6, 51.423 },
{ 3, "a51423US dollars", "a0¤¤¤", 16, 51423. },
{ 3, "a 51423 US dollars", "a0¤¤¤", 18, 51423. },
{ 3, "514.23 USD", "¤0", 10, 514.23 },
@ -84,7 +95,7 @@ public class NumberParserTest {
{ 3, "📺1.23", "📺0;📻0", 6, 1.23 },
{ 3, "📻1.23", "📺0;📻0", 6, -1.23 },
{ 3, ".00", "0", 3, 0.0 },
{ 3, " 0", "a0", 31, 0.0 }, // should not hang
{ 3, " 1,234", "a0", 35, 1234. }, // should not hang
{ 3, "NaN", "0", 3, Double.NaN },
{ 3, "NaN E5", "0", 3, Double.NaN },
{ 3, "0", "0", 1, 0.0 } };