mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-07 06:25:30 +00:00
ICU-13574 Implementing final two SymbolMatchers in ICU4C (infinity and padding).
X-SVN-Rev: 40878
This commit is contained in:
parent
fb3ff21caf
commit
b8bab89cb5
10 changed files with 105 additions and 22 deletions
|
@ -5,6 +5,9 @@
|
|||
|
||||
#if !UCONFIG_NO_FORMATTING && !UPRV_INCOMPLETE_CPP11_SUPPORT
|
||||
|
||||
// Allow implicit conversion from char16_t* to UnicodeString for this file
|
||||
#define UNISTR_FROM_STRING_EXPLICIT
|
||||
|
||||
#include "number_types.h"
|
||||
#include "number_patternstring.h"
|
||||
#include "numparse_types.h"
|
||||
|
@ -52,6 +55,8 @@ NumberParserImpl::createSimpleParser(const Locale& locale, const UnicodeString&
|
|||
parser->addMatcher(parser->fLocalMatchers.percent = {symbols});
|
||||
parser->addMatcher(parser->fLocalMatchers.permille = {symbols});
|
||||
parser->addMatcher(parser->fLocalMatchers.nan = {symbols});
|
||||
parser->addMatcher(parser->fLocalMatchers.infinity = {symbols});
|
||||
parser->addMatcher(parser->fLocalMatchers.padding = {u"@"});
|
||||
// parser.addMatcher(ScientificMatcher.getInstance(symbols, grouper, parseFlags));
|
||||
// parser.addMatcher(CurrencyTrieMatcher.getInstance(locale));
|
||||
// parser.addMatcher(new RequireNumberMatcher());
|
||||
|
|
|
@ -46,8 +46,10 @@ class NumberParserImpl {
|
|||
// You must use an assignment operator on them before using.
|
||||
struct {
|
||||
IgnorablesMatcher ignorables;
|
||||
InfinityMatcher infinity;
|
||||
MinusSignMatcher minusSign;
|
||||
NanMatcher nan;
|
||||
PaddingMatcher padding;
|
||||
PercentMatcher percent;
|
||||
PermilleMatcher permille;
|
||||
PlusSignMatcher plusSign;
|
||||
|
|
|
@ -85,6 +85,20 @@ void IgnorablesMatcher::accept(StringSegment&, ParsedNumber&) const {
|
|||
}
|
||||
|
||||
|
||||
InfinityMatcher::InfinityMatcher(const DecimalFormatSymbols& dfs)
|
||||
: SymbolMatcher(dfs.getConstSymbol(DecimalFormatSymbols::kNaNSymbol), unisets::INFINITY) {
|
||||
}
|
||||
|
||||
bool InfinityMatcher::isDisabled(const ParsedNumber& result) const {
|
||||
return 0 != (result.flags & FLAG_INFINITY);
|
||||
}
|
||||
|
||||
void InfinityMatcher::accept(StringSegment& segment, ParsedNumber& result) const {
|
||||
result.flags |= FLAG_INFINITY;
|
||||
result.setCharsConsumed(segment);
|
||||
}
|
||||
|
||||
|
||||
MinusSignMatcher::MinusSignMatcher(const DecimalFormatSymbols& dfs, bool allowTrailing)
|
||||
: SymbolMatcher(dfs.getConstSymbol(DecimalFormatSymbols::kMinusSignSymbol), unisets::MINUS_SIGN),
|
||||
fAllowTrailing(allowTrailing) {
|
||||
|
@ -125,6 +139,22 @@ void NanMatcher::accept(StringSegment& segment, ParsedNumber& result) const {
|
|||
}
|
||||
|
||||
|
||||
PaddingMatcher::PaddingMatcher(const UnicodeString& padString)
|
||||
: SymbolMatcher(padString, unisets::EMPTY) {}
|
||||
|
||||
bool PaddingMatcher::isFlexible() const {
|
||||
return true;
|
||||
}
|
||||
|
||||
bool PaddingMatcher::isDisabled(const ParsedNumber& result) const {
|
||||
return false;
|
||||
}
|
||||
|
||||
void PaddingMatcher::accept(StringSegment& segment, ParsedNumber& result) const {
|
||||
// No-op
|
||||
}
|
||||
|
||||
|
||||
PercentMatcher::PercentMatcher(const DecimalFormatSymbols& dfs)
|
||||
: SymbolMatcher(dfs.getConstSymbol(DecimalFormatSymbols::kPercentSymbol), unisets::PERCENT_SIGN) {
|
||||
}
|
||||
|
|
|
@ -15,6 +15,11 @@ U_NAMESPACE_BEGIN namespace numparse {
|
|||
namespace impl {
|
||||
|
||||
|
||||
/**
|
||||
* A base class for many matchers that performs a simple match against a UnicodeString and/or UnicodeSet.
|
||||
*
|
||||
* @author sffc
|
||||
*/
|
||||
class SymbolMatcher : public NumberParseMatcher, public UMemory {
|
||||
public:
|
||||
SymbolMatcher() = default; // WARNING: Leaves the object in an unusable state
|
||||
|
@ -52,6 +57,19 @@ class IgnorablesMatcher : public SymbolMatcher {
|
|||
};
|
||||
|
||||
|
||||
class InfinityMatcher : public SymbolMatcher {
|
||||
public:
|
||||
InfinityMatcher() = default; // WARNING: Leaves the object in an unusable state
|
||||
|
||||
InfinityMatcher(const DecimalFormatSymbols& dfs);
|
||||
|
||||
protected:
|
||||
bool isDisabled(const ParsedNumber& result) const override;
|
||||
|
||||
void accept(StringSegment& segment, ParsedNumber& result) const override;
|
||||
};
|
||||
|
||||
|
||||
class MinusSignMatcher : public SymbolMatcher {
|
||||
public:
|
||||
MinusSignMatcher() = default; // WARNING: Leaves the object in an unusable state
|
||||
|
@ -83,6 +101,21 @@ class NanMatcher : public SymbolMatcher {
|
|||
};
|
||||
|
||||
|
||||
class PaddingMatcher : public SymbolMatcher {
|
||||
public:
|
||||
PaddingMatcher() = default; // WARNING: Leaves the object in an unusable state
|
||||
|
||||
PaddingMatcher(const UnicodeString& padString);
|
||||
|
||||
bool isFlexible() const override;
|
||||
|
||||
protected:
|
||||
bool isDisabled(const ParsedNumber& result) const override;
|
||||
|
||||
void accept(StringSegment& segment, ParsedNumber& result) const override;
|
||||
};
|
||||
|
||||
|
||||
class PercentMatcher : public SymbolMatcher {
|
||||
public:
|
||||
PercentMatcher() = default; // WARNING: Leaves the object in an unusable state
|
||||
|
|
|
@ -5,6 +5,10 @@
|
|||
|
||||
#if !UCONFIG_NO_FORMATTING && !UPRV_INCOMPLETE_CPP11_SUPPORT
|
||||
|
||||
// Allow implicit conversion from char16_t* to UnicodeString for this file
|
||||
// (useful for UnicodeSet constructor)
|
||||
#define UNISTR_FROM_STRING_EXPLICIT
|
||||
|
||||
#include "numparse_unisets.h"
|
||||
#include "numparse_types.h"
|
||||
#include "umutex.h"
|
||||
|
@ -56,44 +60,42 @@ UBool U_CALLCONV cleanupNumberParseUniSets() {
|
|||
|
||||
void U_CALLCONV initNumberParseUniSets(UErrorCode& status) {
|
||||
ucln_i18n_registerCleanup(UCLN_I18N_NUMPARSE_UNISETS, cleanupNumberParseUniSets);
|
||||
#define NEW_UNISET(pattern, status) new UnicodeSet(UnicodeString(pattern), status)
|
||||
|
||||
gUnicodeSets[EMPTY] = new UnicodeSet();
|
||||
|
||||
// BiDi characters are skipped over and ignored at any point in the string, even in strict mode.
|
||||
gUnicodeSets[BIDI] = NEW_UNISET(u"[[\\u200E\\u200F\\u061C]]", status);
|
||||
gUnicodeSets[BIDI] = new UnicodeSet(u"[[\\u200E\\u200F\\u061C]]", status);
|
||||
|
||||
// This set was decided after discussion with icu-design@. See ticket #13309.
|
||||
// Zs+TAB is "horizontal whitespace" according to UTS #18 (blank property).
|
||||
gUnicodeSets[WHITESPACE] = NEW_UNISET(u"[[:Zs:][\\u0009]]", status);
|
||||
gUnicodeSets[WHITESPACE] = new UnicodeSet(u"[[:Zs:][\\u0009]]", status);
|
||||
|
||||
gUnicodeSets[DEFAULT_IGNORABLES] = computeUnion(BIDI, WHITESPACE);
|
||||
gUnicodeSets[STRICT_IGNORABLES] = new UnicodeSet(*gUnicodeSets[BIDI]);
|
||||
|
||||
// TODO: Re-generate these sets from the UCD. They probably haven't been updated in a while.
|
||||
gUnicodeSets[COMMA] = NEW_UNISET(u"[,،٫、︐︑﹐﹑,、]", status);
|
||||
gUnicodeSets[STRICT_COMMA] = NEW_UNISET(u"[,٫︐﹐,]", status);
|
||||
gUnicodeSets[PERIOD] = NEW_UNISET(u"[.․。︒﹒.。]", status);
|
||||
gUnicodeSets[STRICT_PERIOD] = NEW_UNISET(u"[.․﹒.。]", status);
|
||||
gUnicodeSets[OTHER_GROUPING_SEPARATORS] = NEW_UNISET(
|
||||
u"['٬‘’'\\u0020\\u00A0\\u2000-\\u200A\\u202F\\u205F\\u3000]",
|
||||
status);
|
||||
gUnicodeSets[COMMA] = new UnicodeSet(u"[,،٫、︐︑﹐﹑,、]", status);
|
||||
gUnicodeSets[STRICT_COMMA] = new UnicodeSet(u"[,٫︐﹐,]", status);
|
||||
gUnicodeSets[PERIOD] = new UnicodeSet(u"[.․。︒﹒.。]", status);
|
||||
gUnicodeSets[STRICT_PERIOD] = new UnicodeSet(u"[.․﹒.。]", status);
|
||||
gUnicodeSets[OTHER_GROUPING_SEPARATORS] = new UnicodeSet(
|
||||
u"['٬‘’'\\u0020\\u00A0\\u2000-\\u200A\\u202F\\u205F\\u3000]", status);
|
||||
gUnicodeSets[ALL_SEPARATORS] = computeUnion(COMMA, PERIOD, OTHER_GROUPING_SEPARATORS);
|
||||
gUnicodeSets[STRICT_ALL_SEPARATORS] = computeUnion(
|
||||
STRICT_COMMA, STRICT_PERIOD, OTHER_GROUPING_SEPARATORS);
|
||||
|
||||
gUnicodeSets[MINUS_SIGN] = NEW_UNISET(u"[-⁻₋−➖﹣-]", status);
|
||||
gUnicodeSets[PLUS_SIGN] = NEW_UNISET(u"[+⁺₊➕﬩﹢+]", status);
|
||||
gUnicodeSets[MINUS_SIGN] = new UnicodeSet(u"[-⁻₋−➖﹣-]", status);
|
||||
gUnicodeSets[PLUS_SIGN] = new UnicodeSet(u"[+⁺₊➕﬩﹢+]", status);
|
||||
|
||||
gUnicodeSets[PERCENT_SIGN] = NEW_UNISET(u"[%٪]", status);
|
||||
gUnicodeSets[PERMILLE_SIGN] = NEW_UNISET(u"[‰؉]", status);
|
||||
gUnicodeSets[INFINITY] = NEW_UNISET(u"[∞]", status);
|
||||
gUnicodeSets[PERCENT_SIGN] = new UnicodeSet(u"[%٪]", status);
|
||||
gUnicodeSets[PERMILLE_SIGN] = new UnicodeSet(u"[‰؉]", status);
|
||||
gUnicodeSets[INFINITY] = new UnicodeSet(u"[∞]", status);
|
||||
|
||||
gUnicodeSets[DIGITS] = NEW_UNISET(u"[:digit:]", status);
|
||||
gUnicodeSets[NAN_LEAD] = NEW_UNISET(u"[NnТтmeՈոс¤НнчTtsҳ\u975e\u1002\u0e9a\u10d0\u0f68\u0644\u0646]",
|
||||
status);
|
||||
gUnicodeSets[SCIENTIFIC_LEAD] = NEW_UNISET(u"[Ee×·е\u0627]", status);
|
||||
gUnicodeSets[CWCF] = NEW_UNISET(u"[:CWCF:]", status);
|
||||
gUnicodeSets[DIGITS] = new UnicodeSet(u"[:digit:]", status);
|
||||
gUnicodeSets[NAN_LEAD] = new UnicodeSet(
|
||||
u"[NnТтmeՈոс¤НнчTtsҳ\u975e\u1002\u0e9a\u10d0\u0f68\u0644\u0646]", status);
|
||||
gUnicodeSets[SCIENTIFIC_LEAD] = new UnicodeSet(u"[Ee×·е\u0627]", status);
|
||||
gUnicodeSets[CWCF] = new UnicodeSet(u"[:CWCF:]", status);
|
||||
|
||||
gUnicodeSets[DIGITS_OR_ALL_SEPARATORS] = computeUnion(DIGITS, ALL_SEPARATORS);
|
||||
gUnicodeSets[DIGITS_OR_STRICT_ALL_SEPARATORS] = computeUnion(DIGITS, STRICT_ALL_SEPARATORS);
|
||||
|
|
|
@ -61,6 +61,10 @@ void NumberParserTest::testBasic() {
|
|||
{3, u"‰51423", u"0", 6, 51.423},
|
||||
{3, u"51423‰", u"0", 6, 51.423},
|
||||
{3, u"51423‰‰", u"0", 6, 51.423},
|
||||
{3, u"∞", u"0", 1, INFINITY},
|
||||
{3, u"-∞", u"0", 2, -INFINITY},
|
||||
{3, u"@@@123 @@", u"0", 6, 123.}, // TODO: Should padding be strong instead of weak?
|
||||
{3, u"@@@123@@ ", u"0", 6, 123.}, // TODO: Should padding be strong instead of weak?
|
||||
// {3, u"a51423US dollars", u"a0¤¤¤", 16, 51423.},
|
||||
// {3, u"a 51423 US dollars", u"a0¤¤¤", 18, 51423.},
|
||||
// {3, u"514.23 USD", u"¤0", 10, 514.23},
|
||||
|
|
|
@ -42,6 +42,6 @@ public class InfinityMatcher extends SymbolMatcher {
|
|||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "<PercentMatcher>";
|
||||
return "<InfinityMatcher>";
|
||||
}
|
||||
}
|
||||
|
|
|
@ -92,6 +92,8 @@ public class NumberParserImpl {
|
|||
parser.addMatcher(PercentMatcher.getInstance(symbols));
|
||||
parser.addMatcher(PermilleMatcher.getInstance(symbols));
|
||||
parser.addMatcher(NanMatcher.getInstance(symbols, parseFlags));
|
||||
parser.addMatcher(InfinityMatcher.getInstance(symbols));
|
||||
parser.addMatcher(PaddingMatcher.getInstance("@"));
|
||||
parser.addMatcher(ScientificMatcher.getInstance(symbols, grouper));
|
||||
parser.addMatcher(CurrencyTrieMatcher.getInstance(locale));
|
||||
parser.addMatcher(new RequireNumberMatcher());
|
||||
|
|
|
@ -5,8 +5,9 @@ package com.ibm.icu.impl.number.parse;
|
|||
import com.ibm.icu.text.UnicodeSet;
|
||||
|
||||
/**
|
||||
* @author sffc
|
||||
* A base class for many matchers that performs a simple match against a UnicodeString and/or UnicodeSet.
|
||||
*
|
||||
* @author sffc
|
||||
*/
|
||||
public abstract class SymbolMatcher implements NumberParseMatcher {
|
||||
protected final String string;
|
||||
|
|
|
@ -67,6 +67,10 @@ public class NumberParserTest {
|
|||
{ 3, "‰51423", "0", 6, 51.423 },
|
||||
{ 3, "51423‰", "0", 6, 51.423 },
|
||||
{ 3, "51423‰‰", "0", 6, 51.423 },
|
||||
{ 3, "∞", "0", 1, Double.POSITIVE_INFINITY },
|
||||
{ 3, "-∞", "0", 2, Double.NEGATIVE_INFINITY },
|
||||
{ 3, "@@@123 @@", "0", 6, 123. }, // TODO: Should padding be strong instead of weak?
|
||||
{ 3, "@@@123@@ ", "0", 6, 123. }, // TODO: Should padding be strong instead of weak?
|
||||
{ 3, "a51423US dollars", "a0¤¤¤", 16, 51423. },
|
||||
{ 3, "a 51423 US dollars", "a0¤¤¤", 18, 51423. },
|
||||
{ 3, "514.23 USD", "¤0", 10, 514.23 },
|
||||
|
|
Loading…
Add table
Reference in a new issue