mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-07 06:25:30 +00:00
ICU-13574 Adding more matchers derived from SymbolMatcher.
X-SVN-Rev: 40876
This commit is contained in:
parent
8393405113
commit
12764fa082
9 changed files with 218 additions and 33 deletions
|
@ -29,8 +29,8 @@ NumberParserImpl::createSimpleParser(const Locale& locale, const UnicodeString&
|
|||
auto* parser = new NumberParserImpl(parseFlags, true);
|
||||
DecimalFormatSymbols symbols(locale, status);
|
||||
|
||||
// IgnorablesMatcher* ignorables = IgnorablesMatcher.getDefault();
|
||||
//
|
||||
IgnorablesMatcher* ignorables = new IgnorablesMatcher(unisets::DEFAULT_IGNORABLES);
|
||||
|
||||
// MatcherFactory factory = new MatcherFactory();
|
||||
// factory.currency = Currency.getInstance("USD");
|
||||
// factory.symbols = symbols;
|
||||
|
@ -45,10 +45,13 @@ NumberParserImpl::createSimpleParser(const Locale& locale, const UnicodeString&
|
|||
Grouper grouper = Grouper::forStrategy(UNUM_GROUPING_AUTO);
|
||||
grouper.setLocaleData(patternInfo, locale);
|
||||
|
||||
// parser.addMatcher({ignorables, false});
|
||||
parser->addAndAdoptMatcher(ignorables);
|
||||
parser->addAndAdoptMatcher(new DecimalMatcher(symbols, grouper, parseFlags));
|
||||
parser->addAndAdoptMatcher(new MinusSignMatcher(symbols, false));
|
||||
// parser.addMatcher(NanMatcher.getInstance(symbols, parseFlags));
|
||||
parser->addAndAdoptMatcher(new PlusSignMatcher(symbols, false));
|
||||
parser->addAndAdoptMatcher(new PercentMatcher(symbols));
|
||||
parser->addAndAdoptMatcher(new PermilleMatcher(symbols));
|
||||
parser->addAndAdoptMatcher(new NanMatcher(symbols));
|
||||
// parser.addMatcher(ScientificMatcher.getInstance(symbols, grouper, parseFlags));
|
||||
// parser.addMatcher(CurrencyTrieMatcher.getInstance(locale));
|
||||
// parser.addMatcher(new RequireNumberMatcher());
|
||||
|
|
|
@ -16,7 +16,6 @@ using namespace icu::numparse::impl;
|
|||
|
||||
SymbolMatcher::SymbolMatcher(const UnicodeString& symbolString, unisets::Key key) {
|
||||
fUniSet = unisets::get(key);
|
||||
fOwnsUniSet = false;
|
||||
if (fUniSet->contains(symbolString)) {
|
||||
fString.setToBogus();
|
||||
} else {
|
||||
|
@ -24,13 +23,6 @@ SymbolMatcher::SymbolMatcher(const UnicodeString& symbolString, unisets::Key key
|
|||
}
|
||||
}
|
||||
|
||||
SymbolMatcher::~SymbolMatcher() {
|
||||
if (fOwnsUniSet) {
|
||||
delete fUniSet;
|
||||
fUniSet = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
const UnicodeSet* SymbolMatcher::getSet() {
|
||||
return fUniSet;
|
||||
}
|
||||
|
@ -76,14 +68,30 @@ const UnicodeSet* SymbolMatcher::getLeadCodePoints() const {
|
|||
}
|
||||
|
||||
|
||||
MinusSignMatcher::MinusSignMatcher(const DecimalFormatSymbols& dfs, bool allowTrailing) : SymbolMatcher(
|
||||
dfs.getConstSymbol(DecimalFormatSymbols::kMinusSignSymbol),
|
||||
unisets::MINUS_SIGN), fAllowTrailing(allowTrailing) {
|
||||
IgnorablesMatcher::IgnorablesMatcher(unisets::Key key)
|
||||
: SymbolMatcher({}, key) {
|
||||
}
|
||||
|
||||
bool IgnorablesMatcher::isFlexible() const {
|
||||
return true;
|
||||
}
|
||||
|
||||
bool IgnorablesMatcher::isDisabled(const ParsedNumber&) const {
|
||||
return false;
|
||||
}
|
||||
|
||||
void IgnorablesMatcher::accept(StringSegment&, ParsedNumber&) const {
|
||||
// No-op
|
||||
}
|
||||
|
||||
|
||||
MinusSignMatcher::MinusSignMatcher(const DecimalFormatSymbols& dfs, bool allowTrailing)
|
||||
: SymbolMatcher(dfs.getConstSymbol(DecimalFormatSymbols::kMinusSignSymbol), unisets::MINUS_SIGN),
|
||||
fAllowTrailing(allowTrailing) {
|
||||
}
|
||||
|
||||
bool MinusSignMatcher::isDisabled(const ParsedNumber& result) const {
|
||||
return 0 != (result.flags & FLAG_NEGATIVE) ||
|
||||
(fAllowTrailing ? false : result.seenNumber());
|
||||
return 0 != (result.flags & FLAG_NEGATIVE) || (fAllowTrailing ? false : result.seenNumber());
|
||||
}
|
||||
|
||||
void MinusSignMatcher::accept(StringSegment& segment, ParsedNumber& result) const {
|
||||
|
@ -92,4 +100,85 @@ void MinusSignMatcher::accept(StringSegment& segment, ParsedNumber& result) cons
|
|||
}
|
||||
|
||||
|
||||
NanMatcher::NanMatcher(const DecimalFormatSymbols& dfs)
|
||||
: SymbolMatcher(dfs.getConstSymbol(DecimalFormatSymbols::kNaNSymbol), unisets::EMPTY) {
|
||||
}
|
||||
|
||||
const UnicodeSet* NanMatcher::getLeadCodePoints() const {
|
||||
// Overriding this here to allow use of statically allocated sets
|
||||
int leadCp = fString.char32At(0);
|
||||
const UnicodeSet* s = unisets::get(unisets::NAN_LEAD);
|
||||
if (s->contains(leadCp)) {
|
||||
return new UnicodeSet(*s);
|
||||
} else {
|
||||
return SymbolMatcher::getLeadCodePoints();
|
||||
}
|
||||
}
|
||||
|
||||
bool NanMatcher::isDisabled(const ParsedNumber& result) const {
|
||||
return result.seenNumber();
|
||||
}
|
||||
|
||||
void NanMatcher::accept(StringSegment& segment, ParsedNumber& result) const {
|
||||
result.flags |= FLAG_NAN;
|
||||
result.setCharsConsumed(segment);
|
||||
}
|
||||
|
||||
|
||||
PercentMatcher::PercentMatcher(const DecimalFormatSymbols& dfs)
|
||||
: SymbolMatcher(dfs.getConstSymbol(DecimalFormatSymbols::kPercentSymbol), unisets::PERCENT_SIGN) {
|
||||
}
|
||||
|
||||
void PercentMatcher::postProcess(ParsedNumber& result) const {
|
||||
SymbolMatcher::postProcess(result);
|
||||
if (0 != (result.flags & FLAG_PERCENT) && !result.quantity.bogus) {
|
||||
result.quantity.adjustMagnitude(-2);
|
||||
}
|
||||
}
|
||||
|
||||
bool PercentMatcher::isDisabled(const ParsedNumber& result) const {
|
||||
return 0 != (result.flags & FLAG_PERCENT);
|
||||
}
|
||||
|
||||
void PercentMatcher::accept(StringSegment& segment, ParsedNumber& result) const {
|
||||
result.flags |= FLAG_PERCENT;
|
||||
result.setCharsConsumed(segment);
|
||||
}
|
||||
|
||||
|
||||
PermilleMatcher::PermilleMatcher(const DecimalFormatSymbols& dfs)
|
||||
: SymbolMatcher(dfs.getConstSymbol(DecimalFormatSymbols::kPerMillSymbol), unisets::PERMILLE_SIGN) {
|
||||
}
|
||||
|
||||
void PermilleMatcher::postProcess(ParsedNumber& result) const {
|
||||
SymbolMatcher::postProcess(result);
|
||||
if (0 != (result.flags & FLAG_PERMILLE) && !result.quantity.bogus) {
|
||||
result.quantity.adjustMagnitude(-3);
|
||||
}
|
||||
}
|
||||
|
||||
bool PermilleMatcher::isDisabled(const ParsedNumber& result) const {
|
||||
return 0 != (result.flags & FLAG_PERMILLE);
|
||||
}
|
||||
|
||||
void PermilleMatcher::accept(StringSegment& segment, ParsedNumber& result) const {
|
||||
result.flags |= FLAG_PERMILLE;
|
||||
result.setCharsConsumed(segment);
|
||||
}
|
||||
|
||||
|
||||
PlusSignMatcher::PlusSignMatcher(const DecimalFormatSymbols& dfs, bool allowTrailing)
|
||||
: SymbolMatcher(dfs.getConstSymbol(DecimalFormatSymbols::kPlusSignSymbol), unisets::PLUS_SIGN),
|
||||
fAllowTrailing(allowTrailing) {
|
||||
}
|
||||
|
||||
bool PlusSignMatcher::isDisabled(const ParsedNumber& result) const {
|
||||
return fAllowTrailing ? false : result.seenNumber();
|
||||
}
|
||||
|
||||
void PlusSignMatcher::accept(StringSegment& segment, ParsedNumber& result) const {
|
||||
result.setCharsConsumed(segment);
|
||||
}
|
||||
|
||||
|
||||
#endif /* #if !UCONFIG_NO_FORMATTING */
|
||||
|
|
|
@ -17,8 +17,6 @@ namespace impl {
|
|||
|
||||
class SymbolMatcher : public NumberParseMatcher, public UMemory {
|
||||
public:
|
||||
~SymbolMatcher() override;
|
||||
|
||||
const UnicodeSet* getSet();
|
||||
|
||||
bool match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const override;
|
||||
|
@ -31,13 +29,25 @@ class SymbolMatcher : public NumberParseMatcher, public UMemory {
|
|||
|
||||
protected:
|
||||
UnicodeString fString;
|
||||
const UnicodeSet* fUniSet;
|
||||
bool fOwnsUniSet;
|
||||
const UnicodeSet* fUniSet; // a reference from numparse_unisets.h; never owned
|
||||
|
||||
SymbolMatcher(const UnicodeString& symbolString, unisets::Key key);
|
||||
};
|
||||
|
||||
|
||||
class IgnorablesMatcher : public SymbolMatcher {
|
||||
public:
|
||||
explicit IgnorablesMatcher(unisets::Key key);
|
||||
|
||||
bool isFlexible() const override;
|
||||
|
||||
protected:
|
||||
bool isDisabled(const ParsedNumber& result) const override;
|
||||
|
||||
void accept(StringSegment& segment, ParsedNumber& result) const override;
|
||||
};
|
||||
|
||||
|
||||
class MinusSignMatcher : public SymbolMatcher {
|
||||
public:
|
||||
MinusSignMatcher(const DecimalFormatSymbols& dfs, bool allowTrailing);
|
||||
|
@ -52,6 +62,59 @@ class MinusSignMatcher : public SymbolMatcher {
|
|||
};
|
||||
|
||||
|
||||
class NanMatcher : public SymbolMatcher {
|
||||
public:
|
||||
explicit NanMatcher(const DecimalFormatSymbols& dfs);
|
||||
|
||||
const UnicodeSet* getLeadCodePoints() const override;
|
||||
|
||||
protected:
|
||||
bool isDisabled(const ParsedNumber& result) const override;
|
||||
|
||||
void accept(StringSegment& segment, ParsedNumber& result) const override;
|
||||
};
|
||||
|
||||
|
||||
class PercentMatcher : public SymbolMatcher {
|
||||
public:
|
||||
explicit PercentMatcher(const DecimalFormatSymbols& dfs);
|
||||
|
||||
void postProcess(ParsedNumber& result) const override;
|
||||
|
||||
protected:
|
||||
bool isDisabled(const ParsedNumber& result) const override;
|
||||
|
||||
void accept(StringSegment& segment, ParsedNumber& result) const override;
|
||||
};
|
||||
|
||||
|
||||
class PermilleMatcher : public SymbolMatcher {
|
||||
public:
|
||||
explicit PermilleMatcher(const DecimalFormatSymbols& dfs);
|
||||
|
||||
void postProcess(ParsedNumber& result) const override;
|
||||
|
||||
protected:
|
||||
bool isDisabled(const ParsedNumber& result) const override;
|
||||
|
||||
void accept(StringSegment& segment, ParsedNumber& result) const override;
|
||||
};
|
||||
|
||||
|
||||
class PlusSignMatcher : public SymbolMatcher {
|
||||
public:
|
||||
PlusSignMatcher(const DecimalFormatSymbols& dfs, bool allowTrailing);
|
||||
|
||||
protected:
|
||||
bool isDisabled(const ParsedNumber& result) const override;
|
||||
|
||||
void accept(StringSegment& segment, ParsedNumber& result) const override;
|
||||
|
||||
private:
|
||||
bool fAllowTrailing;
|
||||
};
|
||||
|
||||
|
||||
} // namespace impl
|
||||
} // namespace numparse
|
||||
U_NAMESPACE_END
|
||||
|
|
|
@ -58,6 +58,8 @@ void U_CALLCONV initNumberParseUniSets(UErrorCode &status) {
|
|||
ucln_i18n_registerCleanup(UCLN_I18N_NUMPARSE_UNISETS, cleanupNumberParseUnitSets);
|
||||
#define NEW_UNISET(pattern, status) new UnicodeSet(UnicodeString(pattern), status)
|
||||
|
||||
gUnicodeSets[EMPTY] = new UnicodeSet();
|
||||
|
||||
// BiDi characters are skipped over and ignored at any point in the string, even in strict mode.
|
||||
gUnicodeSets[BIDI] = NEW_UNISET(u"[[\\u200E\\u200F\\u061C]]", status);
|
||||
|
||||
|
|
|
@ -15,6 +15,8 @@ namespace impl {
|
|||
namespace unisets {
|
||||
|
||||
enum Key {
|
||||
EMPTY,
|
||||
|
||||
// Ignorables
|
||||
BIDI,
|
||||
WHITESPACE,
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <cmath>
|
||||
|
||||
#include "unicode/ctest.h" // for str_timeDelta
|
||||
#include "unicode/curramt.h"
|
||||
|
@ -1998,7 +1999,8 @@ UBool IntlTest::assertEquals(const char* message,
|
|||
UBool IntlTest::assertEquals(const char* message,
|
||||
double expected,
|
||||
double actual) {
|
||||
if (expected != actual) {
|
||||
bool bothNaN = std::isnan(expected) && std::isnan(actual);
|
||||
if (expected != actual && !bothNaN) {
|
||||
errln((UnicodeString)"FAIL: " + message + "; got " +
|
||||
actual +
|
||||
"; expected " + expected);
|
||||
|
|
|
@ -41,6 +41,9 @@ void NumberParserTest::testBasic() {
|
|||
{3, u"𝟱𝟭𝟰𝟮𝟯x", u"0", 10, 51423.},
|
||||
{3, u" 𝟱𝟭𝟰𝟮𝟯", u"0", 11, 51423.},
|
||||
{3, u"𝟱𝟭𝟰𝟮𝟯 ", u"0", 10, 51423.},
|
||||
{7, u"51,423", u"#,##,##0", 6, 51423.},
|
||||
{7, u" 51,423", u"#,##,##0", 7, 51423.},
|
||||
{7, u"51,423 ", u"#,##,##0", 6, 51423.},
|
||||
{7, u"𝟱𝟭,𝟰𝟮𝟯", u"#,##,##0", 11, 51423.},
|
||||
{7, u"𝟳,𝟴𝟵,𝟱𝟭,𝟰𝟮𝟯", u"#,##,##0", 19, 78951423.},
|
||||
{7, u"𝟳𝟴,𝟵𝟱𝟭.𝟰𝟮𝟯", u"#,##,##0", 18, 78951.423},
|
||||
|
@ -48,8 +51,16 @@ void NumberParserTest::testBasic() {
|
|||
{7, u"𝟳𝟴,𝟬𝟬𝟬.𝟬𝟬𝟬", u"#,##,##0", 18, 78000.},
|
||||
{7, u"𝟳𝟴,𝟬𝟬𝟬.𝟬𝟮𝟯", u"#,##,##0", 18, 78000.023},
|
||||
{7, u"𝟳𝟴.𝟬𝟬𝟬.𝟬𝟮𝟯", u"#,##,##0", 11, 78.},
|
||||
{3, u"-𝟱𝟭𝟰𝟮𝟯", u"0", 11, -51423.},
|
||||
{3, u"-𝟱𝟭𝟰𝟮𝟯-", u"0", 11, -51423.},
|
||||
{3, u"-51423", u"0", 6, -51423.},
|
||||
{3, u"51423-", u"0", 5, 51423.}, // plus and minus sign by default do NOT match after
|
||||
{3, u"+51423", u"0", 6, 51423.},
|
||||
{3, u"51423+", u"0", 5, 51423.}, // plus and minus sign by default do NOT match after
|
||||
{3, u"%51423", u"0", 6, 514.23},
|
||||
{3, u"51423%", u"0", 6, 514.23},
|
||||
{3, u"51423%%", u"0", 6, 514.23},
|
||||
{3, u"‰51423", u"0", 6, 51.423},
|
||||
{3, u"51423‰", u"0", 6, 51.423},
|
||||
{3, u"51423‰‰", u"0", 6, 51.423},
|
||||
// {3, u"a51423US dollars", u"a0¤¤¤", 16, 51423.},
|
||||
// {3, u"a 51423 US dollars", u"a0¤¤¤", 18, 51423.},
|
||||
// {3, u"514.23 USD", u"¤0", 10, 514.23},
|
||||
|
@ -77,12 +88,11 @@ void NumberParserTest::testBasic() {
|
|||
// {3, u"a$ b5", u"a ¤ b0", 5, 5.0},
|
||||
// {3, u"📺1.23", u"📺0;📻0", 6, 1.23},
|
||||
// {3, u"📻1.23", u"📺0;📻0", 6, -1.23},
|
||||
// {3, u".00", u"0", 3, 0.0},
|
||||
// {3, u" 0", u"a0", 31, 0.0}, // should not hang
|
||||
// {3, u"NaN", u"0", 3, NAN},
|
||||
// {3, u"NaN E5", u"0", 3, NAN},
|
||||
// {3, u"0", u"0", 1, 0.0}
|
||||
};
|
||||
{3, u".00", u"0", 3, 0.0},
|
||||
{3, u" 1,234", u"a0", 35, 1234.}, // should not hang
|
||||
{3, u"NaN", u"0", 3, NAN},
|
||||
{3, u"NaN E5", u"0", 3, NAN},
|
||||
{3, u"0", u"0", 1, 0.0}};
|
||||
|
||||
parse_flags_t parseFlags = PARSE_FLAG_IGNORE_CASE | PARSE_FLAG_INCLUDE_UNPAIRED_AFFIXES;
|
||||
for (auto cas : cases) {
|
||||
|
|
|
@ -88,6 +88,9 @@ public class NumberParserImpl {
|
|||
parser.addMatcher(ignorables);
|
||||
parser.addMatcher(DecimalMatcher.getInstance(symbols, grouper, parseFlags));
|
||||
parser.addMatcher(MinusSignMatcher.getInstance(symbols, false));
|
||||
parser.addMatcher(PlusSignMatcher.getInstance(symbols, false));
|
||||
parser.addMatcher(PercentMatcher.getInstance(symbols));
|
||||
parser.addMatcher(PermilleMatcher.getInstance(symbols));
|
||||
parser.addMatcher(NanMatcher.getInstance(symbols, parseFlags));
|
||||
parser.addMatcher(ScientificMatcher.getInstance(symbols, grouper));
|
||||
parser.addMatcher(CurrencyTrieMatcher.getInstance(locale));
|
||||
|
|
|
@ -47,6 +47,9 @@ public class NumberParserTest {
|
|||
{ 3, "𝟱𝟭𝟰𝟮𝟯x", "0", 10, 51423. },
|
||||
{ 3, " 𝟱𝟭𝟰𝟮𝟯", "0", 11, 51423. },
|
||||
{ 3, "𝟱𝟭𝟰𝟮𝟯 ", "0", 10, 51423. },
|
||||
{ 7, "51,423", "#,##,##0", 6, 51423. },
|
||||
{ 7, " 51,423", "#,##,##0", 7, 51423. },
|
||||
{ 7, "51,423 ", "#,##,##0", 6, 51423. },
|
||||
{ 7, "𝟱𝟭,𝟰𝟮𝟯", "#,##,##0", 11, 51423. },
|
||||
{ 7, "𝟳,𝟴𝟵,𝟱𝟭,𝟰𝟮𝟯", "#,##,##0", 19, 78951423. },
|
||||
{ 7, "𝟳𝟴,𝟵𝟱𝟭.𝟰𝟮𝟯", "#,##,##0", 18, 78951.423 },
|
||||
|
@ -54,8 +57,16 @@ public class NumberParserTest {
|
|||
{ 7, "𝟳𝟴,𝟬𝟬𝟬.𝟬𝟬𝟬", "#,##,##0", 18, 78000. },
|
||||
{ 7, "𝟳𝟴,𝟬𝟬𝟬.𝟬𝟮𝟯", "#,##,##0", 18, 78000.023 },
|
||||
{ 7, "𝟳𝟴.𝟬𝟬𝟬.𝟬𝟮𝟯", "#,##,##0", 11, 78. },
|
||||
{ 3, "-𝟱𝟭𝟰𝟮𝟯", "0", 11, -51423. },
|
||||
{ 3, "-𝟱𝟭𝟰𝟮𝟯-", "0", 11, -51423. },
|
||||
{ 3, "-51423", "0", 6, -51423. },
|
||||
{ 3, "51423-", "0", 5, 51423. }, // plus and minus sign by default do NOT match after
|
||||
{ 3, "+51423", "0", 6, 51423. },
|
||||
{ 3, "51423+", "0", 5, 51423. }, // plus and minus sign by default do NOT match after
|
||||
{ 3, "%51423", "0", 6, 514.23 },
|
||||
{ 3, "51423%", "0", 6, 514.23 },
|
||||
{ 3, "51423%%", "0", 6, 514.23 },
|
||||
{ 3, "‰51423", "0", 6, 51.423 },
|
||||
{ 3, "51423‰", "0", 6, 51.423 },
|
||||
{ 3, "51423‰‰", "0", 6, 51.423 },
|
||||
{ 3, "a51423US dollars", "a0¤¤¤", 16, 51423. },
|
||||
{ 3, "a 51423 US dollars", "a0¤¤¤", 18, 51423. },
|
||||
{ 3, "514.23 USD", "¤0", 10, 514.23 },
|
||||
|
@ -84,7 +95,7 @@ public class NumberParserTest {
|
|||
{ 3, "📺1.23", "📺0;📻0", 6, 1.23 },
|
||||
{ 3, "📻1.23", "📺0;📻0", 6, -1.23 },
|
||||
{ 3, ".00", "0", 3, 0.0 },
|
||||
{ 3, " 0", "a0", 31, 0.0 }, // should not hang
|
||||
{ 3, " 1,234", "a0", 35, 1234. }, // should not hang
|
||||
{ 3, "NaN", "0", 3, Double.NaN },
|
||||
{ 3, "NaN E5", "0", 3, Double.NaN },
|
||||
{ 3, "0", "0", 1, 0.0 } };
|
||||
|
|
Loading…
Add table
Reference in a new issue