ICU-13634 Changes NumberParseMatcher getLeadCodePoints() to smokeTest() in C++ and Java. The new method is more versatile and eliminates the requirement to maintain two code paths for "lead chars" and "no lead chars".

X-SVN-Rev: 41131
This commit is contained in:
Shane Carr 2018-03-21 06:30:29 +00:00
parent 8b4c367468
commit 01916cad11
40 changed files with 266 additions and 408 deletions

View file

@ -206,21 +206,15 @@ CodePointMatcher::CodePointMatcher(UChar32 cp)
: fCp(cp) {}
bool CodePointMatcher::match(StringSegment& segment, ParsedNumber& result, UErrorCode&) const {
if (segment.matches(fCp)) {
if (segment.startsWith(fCp)) {
segment.adjustOffsetByCodePoint();
result.setCharsConsumed(segment);
}
return false;
}
const UnicodeSet& CodePointMatcher::getLeadCodePoints() {
if (fLocalLeadCodePoints.isNull()) {
auto* leadCodePoints = new UnicodeSet();
leadCodePoints->add(fCp);
leadCodePoints->freeze();
fLocalLeadCodePoints.adoptInstead(leadCodePoints);
}
return *fLocalLeadCodePoints;
bool CodePointMatcher::smokeTest(const StringSegment& segment) const {
return segment.startsWith(fCp);
}
UnicodeString CodePointMatcher::toString() const {
@ -427,19 +421,9 @@ bool AffixMatcher::match(StringSegment& segment, ParsedNumber& result, UErrorCod
}
}
const UnicodeSet& AffixMatcher::getLeadCodePoints() {
if (fLocalLeadCodePoints.isNull()) {
auto* leadCodePoints = new UnicodeSet();
if (fPrefix != nullptr) {
leadCodePoints->addAll(fPrefix->getLeadCodePoints());
}
if (fSuffix != nullptr) {
leadCodePoints->addAll(fSuffix->getLeadCodePoints());
}
leadCodePoints->freeze();
fLocalLeadCodePoints.adoptInstead(leadCodePoints);
}
return *fLocalLeadCodePoints;
bool AffixMatcher::smokeTest(const StringSegment& segment) const {
return (fPrefix != nullptr && fPrefix->smokeTest(segment)) ||
(fSuffix != nullptr && fSuffix->smokeTest(segment));
}
void AffixMatcher::postProcess(ParsedNumber& result) const {

View file

@ -35,7 +35,7 @@ class CodePointMatcher : public NumberParseMatcher, public UMemory {
bool match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const override;
const UnicodeSet& getLeadCodePoints() override;
bool smokeTest(const StringSegment& segment) const override;
UnicodeString toString() const override;
@ -190,7 +190,7 @@ class AffixMatcher : public NumberParseMatcher, public UMemory {
void postProcess(ParsedNumber& result) const override;
const UnicodeSet& getLeadCodePoints() override;
bool smokeTest(const StringSegment& segment) const override;
int8_t compareTo(const AffixMatcher& rhs) const;

View file

@ -38,9 +38,19 @@ bool AnyMatcher::match(StringSegment& segment, ParsedNumber& result, UErrorCode&
return maybeMore;
}
bool AnyMatcher::smokeTest(const StringSegment& segment) const {
// NOTE: The range-based for loop calls the virtual begin() and end() methods.
for (auto& matcher : *this) {
if (matcher->smokeTest(segment)) {
return true;
}
}
return false;
}
void AnyMatcher::postProcess(ParsedNumber& result) const {
// NOTE: The range-based for loop calls the virtual begin() and end() methods.
for (auto* matcher : *this) {
for (auto& matcher : *this) {
matcher->postProcess(result);
}
}
@ -83,6 +93,17 @@ bool SeriesMatcher::match(StringSegment& segment, ParsedNumber& result, UErrorCo
return maybeMore;
}
bool SeriesMatcher::smokeTest(const StringSegment& segment) const {
// NOTE: The range-based for loop calls the virtual begin() and end() methods.
// NOTE: We only want the first element. Use the for loop for boundary checking.
for (auto& matcher : *this) {
// SeriesMatchers are never allowed to start with a Flexible matcher.
U_ASSERT(!matcher->isFlexible());
return matcher->smokeTest(segment);
}
return false;
}
void SeriesMatcher::postProcess(ParsedNumber& result) const {
// NOTE: The range-based for loop calls the virtual begin() and end() methods.
for (auto* matcher : *this) {
@ -99,12 +120,6 @@ ArraySeriesMatcher::ArraySeriesMatcher(MatcherArray& matchers, int32_t matchersL
: fMatchers(std::move(matchers)), fMatchersLen(matchersLen) {
}
const UnicodeSet& ArraySeriesMatcher::getLeadCodePoints() {
// SeriesMatchers are never allowed to start with a Flexible matcher.
U_ASSERT(!fMatchers[0]->isFlexible());
return fMatchers[0]->getLeadCodePoints();
}
int32_t ArraySeriesMatcher::length() const {
return fMatchersLen;
}

View file

@ -42,6 +42,8 @@ class AnyMatcher : public CompositionMatcher {
public:
bool match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const override;
bool smokeTest(const StringSegment& segment) const override;
void postProcess(ParsedNumber& result) const override;
protected:
@ -61,6 +63,8 @@ class SeriesMatcher : public CompositionMatcher {
public:
bool match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const override;
bool smokeTest(const StringSegment& segment) const override;
void postProcess(ParsedNumber& result) const override;
virtual int32_t length() const = 0;
@ -80,13 +84,11 @@ class ArraySeriesMatcher : public SeriesMatcher {
public:
ArraySeriesMatcher(); // WARNING: Leaves the object in an unusable state
typedef MaybeStackArray<NumberParseMatcher*, 3> MatcherArray;
typedef MaybeStackArray<const NumberParseMatcher*, 3> MatcherArray;
/** The array is std::move'd */
ArraySeriesMatcher(MatcherArray& matchers, int32_t matchersLen);
const UnicodeSet& getLeadCodePoints() override;
UnicodeString toString() const override;
int32_t length() const override;

View file

@ -21,7 +21,12 @@ using namespace icu::numparse::impl;
CurrencyNamesMatcher::CurrencyNamesMatcher(const Locale& locale, UErrorCode& status)
: fLocaleName(locale.getName(), -1, status) {}
: fLocaleName(locale.getName(), -1, status) {
uprv_currencyLeads(fLocaleName.data(), fLeadCodePoints, status);
// Always apply case mapping closure for currencies
fLeadCodePoints.closeOver(USET_ADD_CASE_MAPPINGS);
fLeadCodePoints.freeze();
}
bool CurrencyNamesMatcher::match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const {
if (result.currencyCode[0] != 0) {
@ -57,17 +62,8 @@ bool CurrencyNamesMatcher::match(StringSegment& segment, ParsedNumber& result, U
return partialMatch;
}
const UnicodeSet& CurrencyNamesMatcher::getLeadCodePoints() {
if (fLocalLeadCodePoints.isNull()) {
ErrorCode status;
auto* leadCodePoints = new UnicodeSet();
uprv_currencyLeads(fLocaleName.data(), *leadCodePoints, status);
// Always apply case mapping closure for currencies
leadCodePoints->closeOver(USET_ADD_CASE_MAPPINGS);
leadCodePoints->freeze();
fLocalLeadCodePoints.adoptInstead(leadCodePoints);
}
return *fLocalLeadCodePoints;
bool CurrencyNamesMatcher::smokeTest(const StringSegment& segment) const {
return segment.startsWith(fLeadCodePoints);
}
UnicodeString CurrencyNamesMatcher::toString() const {
@ -103,15 +99,8 @@ bool CurrencyCustomMatcher::match(StringSegment& segment, ParsedNumber& result,
return overlap1 == segment.length() || overlap2 == segment.length();
}
const UnicodeSet& CurrencyCustomMatcher::getLeadCodePoints() {
if (fLocalLeadCodePoints.isNull()) {
auto* leadCodePoints = new UnicodeSet();
utils::putLeadCodePoint(fCurrency1, leadCodePoints);
utils::putLeadCodePoint(fCurrency2, leadCodePoints);
leadCodePoints->freeze();
fLocalLeadCodePoints.adoptInstead(leadCodePoints);
}
return *fLocalLeadCodePoints;
bool CurrencyCustomMatcher::smokeTest(const StringSegment& segment) const {
return segment.startsWith(fCurrency1) || segment.startsWith(fCurrency2);
}
UnicodeString CurrencyCustomMatcher::toString() const {
@ -144,17 +133,6 @@ CurrencyAnyMatcher& CurrencyAnyMatcher::operator=(CurrencyAnyMatcher&& src) U_NO
return *this;
}
const UnicodeSet& CurrencyAnyMatcher::getLeadCodePoints() {
if (fLocalLeadCodePoints.isNull()) {
auto* leadCodePoints = new UnicodeSet();
leadCodePoints->addAll(fNamesMatcher.getLeadCodePoints());
leadCodePoints->addAll(fCustomMatcher.getLeadCodePoints());
leadCodePoints->freeze();
fLocalLeadCodePoints.adoptInstead(leadCodePoints);
}
return *fLocalLeadCodePoints;
}
const NumberParseMatcher* const* CurrencyAnyMatcher::begin() const {
return fMatcherArray;
}

View file

@ -11,6 +11,7 @@
#include "numparse_compositions.h"
#include "charstr.h"
#include "number_currencysymbols.h"
#include "unicode/uniset.h"
U_NAMESPACE_BEGIN namespace numparse {
namespace impl {
@ -32,7 +33,7 @@ class CurrencyNamesMatcher : public NumberParseMatcher, public UMemory {
bool match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const override;
const UnicodeSet& getLeadCodePoints() override;
bool smokeTest(const StringSegment& segment) const override;
UnicodeString toString() const override;
@ -41,6 +42,7 @@ class CurrencyNamesMatcher : public NumberParseMatcher, public UMemory {
// Locale has a non-trivial default constructor.
CharString fLocaleName;
UnicodeSet fLeadCodePoints;
};
@ -52,7 +54,7 @@ class CurrencyCustomMatcher : public NumberParseMatcher, public UMemory {
bool match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const override;
const UnicodeSet& getLeadCodePoints() override;
bool smokeTest(const StringSegment& segment) const override;
UnicodeString toString() const override;
@ -78,8 +80,6 @@ class CurrencyAnyMatcher : public AnyMatcher, public UMemory {
CurrencyAnyMatcher& operator=(CurrencyAnyMatcher&& src) U_NOEXCEPT;
const UnicodeSet& getLeadCodePoints() override;
UnicodeString toString() const override;
protected:

View file

@ -295,25 +295,23 @@ bool DecimalMatcher::match(StringSegment& segment, ParsedNumber& result, int8_t
return segment.length() == 0 || hasPartialPrefix;
}
const UnicodeSet& DecimalMatcher::getLeadCodePoints() {
bool DecimalMatcher::smokeTest(const StringSegment& segment) const {
// The common case uses a static leadSet for efficiency.
if (fLocalDigitStrings.isNull() && leadSet != nullptr) {
return *leadSet;
return segment.startsWith(*leadSet);
}
if (fLocalLeadCodePoints.isNull()) {
auto* leadCodePoints = new UnicodeSet();
// Assumption: the sets are all single code points.
leadCodePoints->addAll(*unisets::get(unisets::DIGITS));
leadCodePoints->addAll(*separatorSet);
if (!fLocalDigitStrings.isNull()) {
for (int i = 0; i < 10; i++) {
utils::putLeadCodePoint(fLocalDigitStrings[i], leadCodePoints);
}
if (segment.startsWith(*separatorSet) || u_isdigit(segment.getCodePoint())) {
return true;
}
if (fLocalDigitStrings.isNull()) {
return false;
}
for (int i = 0; i < 10; i++) {
if (segment.startsWith(fLocalDigitStrings[i])) {
return true;
}
leadCodePoints->freeze();
fLocalLeadCodePoints.adoptInstead(leadCodePoints);
}
return *fLocalLeadCodePoints;
return false;
}
UnicodeString DecimalMatcher::toString() const {

View file

@ -27,7 +27,7 @@ class DecimalMatcher : public NumberParseMatcher, public UMemory {
bool
match(StringSegment& segment, ParsedNumber& result, int8_t exponentSign, UErrorCode& status) const;
const UnicodeSet& getLeadCodePoints() override;
bool smokeTest(const StringSegment& segment) const override;
UnicodeString toString() const override;

View file

@ -34,7 +34,7 @@ NumberParserImpl*
NumberParserImpl::createSimpleParser(const Locale& locale, const UnicodeString& patternString,
parse_flags_t parseFlags, UErrorCode& status) {
LocalPointer<NumberParserImpl> parser(new NumberParserImpl(parseFlags, true));
LocalPointer<NumberParserImpl> parser(new NumberParserImpl(parseFlags));
DecimalFormatSymbols symbols(locale, status);
parser->fLocalMatchers.ignorables = {unisets::DEFAULT_IGNORABLES};
@ -117,7 +117,7 @@ NumberParserImpl::createParserFromProperties(const number::impl::DecimalFormatPr
}
IgnorablesMatcher ignorables(isStrict ? unisets::DEFAULT_IGNORABLES : unisets::STRICT_IGNORABLES);
LocalPointer<NumberParserImpl> parser(new NumberParserImpl(parseFlags, status));
LocalPointer<NumberParserImpl> parser(new NumberParserImpl(parseFlags));
//////////////////////
/// AFFIX MATCHERS ///
@ -197,52 +197,22 @@ NumberParserImpl::createParserFromProperties(const number::impl::DecimalFormatPr
return parser.orphan();
}
NumberParserImpl::NumberParserImpl(parse_flags_t parseFlags, bool computeLeads)
: fParseFlags(parseFlags), fComputeLeads(computeLeads) {
NumberParserImpl::NumberParserImpl(parse_flags_t parseFlags)
: fParseFlags(parseFlags) {
}
NumberParserImpl::~NumberParserImpl() {
if (fComputeLeads) {
for (int32_t i = 0; i < fNumMatchers; i++) {
delete (fLeads[i]);
}
}
fNumMatchers = 0;
}
void NumberParserImpl::addMatcher(NumberParseMatcher& matcher) {
if (fNumMatchers + 1 > fMatchers.getCapacity()) {
fMatchers.resize(fNumMatchers * 2, fNumMatchers);
if (fComputeLeads) {
// The two arrays should grow in tandem:
U_ASSERT(fNumMatchers >= fLeads.getCapacity());
fLeads.resize(fNumMatchers * 2, fNumMatchers);
}
}
fMatchers[fNumMatchers] = &matcher;
if (fComputeLeads) {
addLeadCodePointsForMatcher(matcher);
}
fNumMatchers++;
}
void NumberParserImpl::addLeadCodePointsForMatcher(NumberParseMatcher& matcher) {
const UnicodeSet& leadCodePoints = matcher.getLeadCodePoints();
// TODO: Avoid the clone operation here.
if (0 != (fParseFlags & PARSE_FLAG_IGNORE_CASE)) {
auto* copy = dynamic_cast<UnicodeSet*>(leadCodePoints.cloneAsThawed());
copy->closeOver(USET_ADD_CASE_MAPPINGS);
copy->freeze();
fLeads[fNumMatchers] = copy;
} else {
// FIXME: new here because we still take ownership
fLeads[fNumMatchers] = new UnicodeSet(leadCodePoints);
}
}
void NumberParserImpl::freeze() {
fFrozen = true;
}
@ -276,12 +246,11 @@ void NumberParserImpl::parseGreedyRecursive(StringSegment& segment, ParsedNumber
}
int initialOffset = segment.getOffset();
int leadCp = segment.getCodePoint();
for (int32_t i = 0; i < fNumMatchers; i++) {
if (fComputeLeads && !fLeads[i]->contains(leadCp)) {
const NumberParseMatcher* matcher = fMatchers[i];
if (!matcher->smokeTest(segment)) {
continue;
}
const NumberParseMatcher* matcher = fMatchers[i];
matcher->match(segment, result, status);
if (U_FAILURE(status)) {
return;
@ -313,8 +282,10 @@ void NumberParserImpl::parseLongestRecursive(StringSegment& segment, ParsedNumbe
int initialOffset = segment.getOffset();
for (int32_t i = 0; i < fNumMatchers; i++) {
// TODO: Check leadChars here?
const NumberParseMatcher* matcher = fMatchers[i];
if (!matcher->smokeTest(segment)) {
continue;
}
// In a non-greedy parse, we attempt all possible matches and pick the best.
for (int32_t charsToConsume = 0; charsToConsume < segment.length();) {

View file

@ -32,6 +32,11 @@ class NumberParserImpl : public MutableMatcherCollection {
const number::impl::DecimalFormatProperties& properties, const DecimalFormatSymbols& symbols,
bool parseCurrency, bool optimize, UErrorCode& status);
/**
* Does NOT take ownership of the matcher. The matcher MUST remain valid for the lifespan of the
* NumberParserImpl.
* @param matcher The matcher to reference.
*/
void addMatcher(NumberParseMatcher& matcher) override;
void freeze();
@ -48,8 +53,6 @@ class NumberParserImpl : public MutableMatcherCollection {
int32_t fNumMatchers = 0;
// NOTE: The stack capacity for fMatchers and fLeads should be the same
MaybeStackArray<const NumberParseMatcher*, 10> fMatchers;
MaybeStackArray<const UnicodeSet*, 10> fLeads;
bool fComputeLeads;
bool fFrozen = false;
// WARNING: All of these matchers start in an undefined state (default-constructed).
@ -78,9 +81,7 @@ class NumberParserImpl : public MutableMatcherCollection {
RequireNumberValidator number;
} fLocalValidators;
NumberParserImpl(parse_flags_t parseFlags, bool computeLeads);
void addLeadCodePointsForMatcher(NumberParseMatcher& matcher);
explicit NumberParserImpl(parse_flags_t parseFlags);
void parseGreedyRecursive(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const;

View file

@ -44,10 +44,10 @@ bool ScientificMatcher::match(StringSegment& segment, ParsedNumber& result, UErr
// Allow a sign, and then try to match digits.
int8_t exponentSign = 1;
if (segment.matches(*unisets::get(unisets::MINUS_SIGN))) {
if (segment.startsWith(*unisets::get(unisets::MINUS_SIGN))) {
exponentSign = -1;
segment.adjustOffsetByCodePoint();
} else if (segment.matches(*unisets::get(unisets::PLUS_SIGN))) {
} else if (segment.startsWith(*unisets::get(unisets::PLUS_SIGN))) {
segment.adjustOffsetByCodePoint();
}
@ -71,20 +71,8 @@ bool ScientificMatcher::match(StringSegment& segment, ParsedNumber& result, UErr
return false;
}
const UnicodeSet& ScientificMatcher::getLeadCodePoints() {
UChar32 leadCp = fExponentSeparatorString.char32At(0);
const UnicodeSet* s = unisets::get(unisets::SCIENTIFIC_LEAD);
if (s->contains(leadCp)) {
return *s;
}
if (fLocalLeadCodePoints.isNull()) {
auto* leadCodePoints = new UnicodeSet();
leadCodePoints->add(leadCp);
leadCodePoints->freeze();
fLocalLeadCodePoints.adoptInstead(leadCodePoints);
}
return *fLocalLeadCodePoints;
bool ScientificMatcher::smokeTest(const StringSegment& segment) const {
return segment.startsWith(fExponentSeparatorString);
}
UnicodeString ScientificMatcher::toString() const {

View file

@ -25,7 +25,7 @@ class ScientificMatcher : public NumberParseMatcher, public UMemory {
bool match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const override;
const UnicodeSet& getLeadCodePoints() override;
bool smokeTest(const StringSegment& segment) const override;
UnicodeString toString() const override;

View file

@ -75,11 +75,11 @@ UChar32 StringSegment::getCodePoint() const {
}
}
bool StringSegment::matches(UChar32 otherCp) const {
bool StringSegment::startsWith(UChar32 otherCp) const {
return codePointsEqual(getCodePoint(), otherCp, fFoldCase);
}
bool StringSegment::matches(const UnicodeSet& uniset) const {
bool StringSegment::startsWith(const UnicodeSet& uniset) const {
// TODO: Move UnicodeSet case-folding logic here.
// TODO: Handle string matches here instead of separately.
UChar32 cp = getCodePoint();
@ -89,6 +89,15 @@ bool StringSegment::matches(const UnicodeSet& uniset) const {
return uniset.contains(cp);
}
bool StringSegment::startsWith(const UnicodeString& other) const {
if (other.isBogus() || other.length() == 0 || length() == 0) {
return false;
}
int cp1 = getCodePoint();
int cp2 = other.char32At(0);
return codePointsEqual(cp1, cp2, fFoldCase);
}
int32_t StringSegment::getCommonPrefixLength(const UnicodeString& other) {
return getPrefixLengthInternal(other, fFoldCase);
}

View file

@ -58,20 +58,8 @@ bool SymbolMatcher::match(StringSegment& segment, ParsedNumber& result, UErrorCo
return overlap == segment.length();
}
const UnicodeSet& SymbolMatcher::getLeadCodePoints() {
if (fString.isEmpty()) {
// Assumption: for sets from UnicodeSetStaticCache, uniSet == leadCodePoints.
return *fUniSet;
}
if (fLocalLeadCodePoints.isNull()) {
auto* leadCodePoints = new UnicodeSet();
utils::putLeadCodePoints(fUniSet, leadCodePoints);
utils::putLeadCodePoint(fString, leadCodePoints);
leadCodePoints->freeze();
fLocalLeadCodePoints.adoptInstead(leadCodePoints);
}
return *fLocalLeadCodePoints;
bool SymbolMatcher::smokeTest(const StringSegment& segment) const {
return segment.startsWith(*fUniSet) || segment.startsWith(fString);
}
UnicodeString SymbolMatcher::toString() const {
@ -134,17 +122,6 @@ NanMatcher::NanMatcher(const DecimalFormatSymbols& dfs)
: SymbolMatcher(dfs.getConstSymbol(DecimalFormatSymbols::kNaNSymbol), unisets::EMPTY) {
}
const UnicodeSet& NanMatcher::getLeadCodePoints() {
// Overriding this here to allow use of statically allocated sets
int leadCp = fString.char32At(0);
const UnicodeSet* s = unisets::get(unisets::NAN_LEAD);
if (s->contains(leadCp)) {
return *s;
}
return SymbolMatcher::getLeadCodePoints();
}
bool NanMatcher::isDisabled(const ParsedNumber& result) const {
return result.seenNumber();
}

View file

@ -28,7 +28,7 @@ class SymbolMatcher : public NumberParseMatcher, public UMemory {
bool match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const override;
const UnicodeSet& getLeadCodePoints() override;
bool smokeTest(const StringSegment& segment) const override;
UnicodeString toString() const override;
@ -96,8 +96,6 @@ class NanMatcher : public SymbolMatcher {
NanMatcher(const DecimalFormatSymbols& dfs);
const UnicodeSet& getLeadCodePoints() override;
protected:
bool isDisabled(const ParsedNumber& result) const override;

View file

@ -26,7 +26,7 @@ enum ResultFlags {
FLAG_PERCENT = 0x0002,
FLAG_PERMILLE = 0x0004,
FLAG_HAS_EXPONENT = 0x0008,
FLAG_HAS_DEFAULT_CURRENCY = 0x0010,
// FLAG_HAS_DEFAULT_CURRENCY = 0x0010, // no longer used
FLAG_HAS_DECIMAL_SEPARATOR = 0x0020,
FLAG_NAN = 0x0040,
FLAG_INFINITY = 0x0080,
@ -46,6 +46,7 @@ enum ParseFlags {
PARSE_FLAG_USE_FULL_AFFIXES = 0x0100,
PARSE_FLAG_EXACT_AFFIX = 0x0200,
PARSE_FLAG_PLUS_SIGN_ALLOWED = 0x0400,
PARSE_FLAG_OPTIMIZE = 0x0800,
};
@ -216,12 +217,18 @@ class StringSegment : public UMemory, public ::icu::number::impl::CharSequence {
* <p>
* This method will perform case folding if case folding is enabled for the parser.
*/
bool matches(UChar32 otherCp) const;
bool startsWith(UChar32 otherCp) const;
/**
* Returns true if the first code point of this StringSegment is in the given UnicodeSet.
*/
bool matches(const UnicodeSet& uniset) const;
bool startsWith(const UnicodeSet& uniset) const;
/**
* Returns true if there is at least one code point of overlap between this StringSegment and the
* given UnicodeString.
*/
bool startsWith(const UnicodeString& other) const;
/**
* Returns the length of the prefix shared by this StringSegment and the given CharSequence. For
@ -294,17 +301,18 @@ class NumberParseMatcher {
virtual bool match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const = 0;
/**
* Should return a set representing all possible chars (UTF-16 code units) that could be the first
* char that this matcher can consume. This method is only called during construction phase, and its
* return value is used to skip this matcher unless a segment begins with a char in this set. To make
* this matcher always run, return {@link UnicodeSet#ALL_CODE_POINTS}.
* Performs a fast "smoke check" for whether or not this matcher could possibly match against the
* given string segment. The test should be as fast as possible but also as restrictive as possible.
* For example, matchers can maintain a UnicodeSet of all code points that count possibly start a
* match. Matchers should use the {@link StringSegment#startsWith} method in order to correctly
* handle case folding.
*
* The returned UnicodeSet does not need adoption and is guaranteed to be alive for as long as the
* object that returned it.
*
* This method is NOT thread-safe.
* @param segment
* The segment to check against.
* @return true if the matcher might be able to match against this segment; false if it definitely
* will not be able to match.
*/
virtual const UnicodeSet& getLeadCodePoints() = 0;
virtual bool smokeTest(const StringSegment& segment) const = 0;
/**
* Method called at the end of a parse, after all matchers have failed to consume any more chars.
@ -324,9 +332,6 @@ class NumberParseMatcher {
protected:
// No construction except by subclasses!
NumberParseMatcher() = default;
// Optional ownership of the leadCodePoints set
LocalPointer<const UnicodeSet> fLocalLeadCodePoints;
};

View file

@ -93,9 +93,6 @@ void U_CALLCONV initNumberParseUniSets(UErrorCode& status) {
gUnicodeSets[INFINITY_KEY] = new UnicodeSet(u"[∞]", status);
gUnicodeSets[DIGITS] = new UnicodeSet(u"[:digit:]", status);
gUnicodeSets[NAN_LEAD] = new UnicodeSet(
u"[NnТтmeՈոс¤НнчTtsҳ\u975e\u1002\u0e9a\u10d0\u0f68\u0644\u0646]", status);
gUnicodeSets[SCIENTIFIC_LEAD] = new UnicodeSet(u"[Ee×·е\u0627]", status);
gUnicodeSets[CWCF] = new UnicodeSet(u"[:CWCF:]", status);
gUnicodeSets[DIGITS_OR_ALL_SEPARATORS] = computeUnion(DIGITS, ALL_SEPARATORS);

View file

@ -47,8 +47,6 @@ enum Key {
// Other
DIGITS,
NAN_LEAD,
SCIENTIFIC_LEAD,
CWCF,
// Combined Separators with Digits (for lead code points)

View file

@ -21,12 +21,12 @@ class ValidationMatcher : public NumberParseMatcher {
return false;
}
const UnicodeSet& getLeadCodePoints() U_OVERRIDE {
bool smokeTest(const StringSegment&) const U_OVERRIDE {
// No-op
return *unisets::get(unisets::EMPTY);
return false;
}
virtual void postProcess(ParsedNumber& result) const U_OVERRIDE = 0;
void postProcess(ParsedNumber& result) const U_OVERRIDE = 0;
};

View file

@ -176,10 +176,9 @@ void NumberParserTest::testSeriesMatcher() {
matchers[4] = &m4;
ArraySeriesMatcher series(matchers, 5);
assertEquals(
"Lead set should be equal to lead set of lead matcher",
*unisets::get(unisets::PLUS_SIGN),
series.getLeadCodePoints());
assertFalse("", series.smokeTest(StringSegment(u"x", false)));
assertFalse("", series.smokeTest(StringSegment(u"-", false)));
assertTrue("", series.smokeTest(StringSegment(u"+", false)));
static const struct TestCase {
const char16_t* input;

View file

@ -46,8 +46,6 @@ void UniSetsTest::testSetCoverage() {
const UnicodeSet &percent = *get(unisets::PERCENT_SIGN);
const UnicodeSet &permille = *get(unisets::PERMILLE_SIGN);
const UnicodeSet &infinity = *get(unisets::INFINITY_KEY);
const UnicodeSet &nanLead = *get(unisets::NAN_LEAD);
const UnicodeSet &scientificLead = *get(unisets::SCIENTIFIC_LEAD);
int32_t localeCount;
const Locale* allAvailableLocales = Locale::getAvailableLocales(localeCount);
@ -66,11 +64,6 @@ void UniSetsTest::testSetCoverage() {
ASSERT_IN_SET(percent, dfs.getConstSymbol(DecimalFormatSymbols::kPercentSymbol));
ASSERT_IN_SET(permille, dfs.getConstSymbol(DecimalFormatSymbols::kPerMillSymbol));
ASSERT_IN_SET(infinity, dfs.getConstSymbol(DecimalFormatSymbols::kInfinitySymbol));
ASSERT_IN_SET(nanLead, dfs.getConstSymbol(DecimalFormatSymbols::kNaNSymbol).char32At(0));
ASSERT_IN_SET(nanLead,
u_foldCase(dfs.getConstSymbol(DecimalFormatSymbols::kNaNSymbol).char32At(0), 0));
ASSERT_IN_SET(scientificLead,
u_foldCase(dfs.getConstSymbol(DecimalFormatSymbols::kExponentialSymbol).char32At(0), 0));
}
}

View file

@ -134,6 +134,19 @@ public class StringSegment implements CharSequence {
return uniset.contains(cp);
}
/**
* Returns true if there is at least one code point of overlap between this StringSegment and the
* given CharSequence. Null-safe.
*/
public boolean startsWith(CharSequence other) {
if (other == null || other.length() == 0 || length() == 0) {
return false;
}
int cp1 = Character.codePointAt(this, 0);
int cp2 = Character.codePointAt(other, 0);
return codePointsEqual(cp1, cp2, foldCase);
}
/**
* Returns the length of the prefix shared by this StringSegment and the given CharSequence. For
* example, if this string segment is "aab", and the char sequence is "aac", this method returns 2,

View file

@ -13,7 +13,6 @@ import com.ibm.icu.impl.number.AffixPatternProvider;
import com.ibm.icu.impl.number.AffixUtils;
import com.ibm.icu.impl.number.PatternStringUtils;
import com.ibm.icu.number.NumberFormatter.SignDisplay;
import com.ibm.icu.text.UnicodeSet;
/**
* @author sffc
@ -206,15 +205,9 @@ public class AffixMatcher implements NumberParseMatcher {
}
@Override
public UnicodeSet getLeadCodePoints() {
UnicodeSet leadCodePoints = new UnicodeSet();
if (prefix != null) {
leadCodePoints.addAll(prefix.getLeadCodePoints());
}
if (suffix != null) {
leadCodePoints.addAll(suffix.getLeadCodePoints());
}
return leadCodePoints.freeze();
public boolean smokeTest(StringSegment segment) {
return (prefix != null && prefix.smokeTest(segment))
|| (suffix != null && suffix.smokeTest(segment));
}
@Override

View file

@ -6,7 +6,6 @@ import java.util.ArrayList;
import java.util.List;
import com.ibm.icu.impl.StringSegment;
import com.ibm.icu.text.UnicodeSet;
/**
* Composes a number of matchers, and succeeds if any of the matchers succeed. Always greedily chooses
@ -58,22 +57,18 @@ public class AnyMatcher implements NumberParseMatcher {
}
@Override
public UnicodeSet getLeadCodePoints() {
public boolean smokeTest(StringSegment segment) {
assert frozen;
if (matchers == null) {
return UnicodeSet.EMPTY;
return false;
}
if (matchers.size() == 1) {
return matchers.get(0).getLeadCodePoints();
}
UnicodeSet leadCodePoints = new UnicodeSet();
for (int i = 0; i < matchers.size(); i++) {
NumberParseMatcher matcher = matchers.get(i);
leadCodePoints.addAll(matcher.getLeadCodePoints());
if (matchers.get(i).smokeTest(segment)) {
return true;
}
}
return leadCodePoints.freeze();
return false;
}
@Override

View file

@ -3,7 +3,6 @@
package com.ibm.icu.impl.number.parse;
import com.ibm.icu.impl.StringSegment;
import com.ibm.icu.text.UnicodeSet;
/**
* Matches a single code point, performing no other logic.
@ -33,8 +32,8 @@ public class CodePointMatcher implements NumberParseMatcher {
}
@Override
public UnicodeSet getLeadCodePoints() {
return new UnicodeSet().add(cp).freeze();
public boolean smokeTest(StringSegment segment) {
return segment.startsWith(cp);
}
@Override

View file

@ -3,7 +3,6 @@
package com.ibm.icu.impl.number.parse;
import com.ibm.icu.impl.StringSegment;
import com.ibm.icu.text.UnicodeSet;
import com.ibm.icu.util.Currency;
import com.ibm.icu.util.ULocale;
@ -52,11 +51,8 @@ public class CurrencyCustomMatcher implements NumberParseMatcher {
}
@Override
public UnicodeSet getLeadCodePoints() {
UnicodeSet leadCodePoints = new UnicodeSet();
ParsingUtils.putLeadCodePoint(currency1, leadCodePoints);
ParsingUtils.putLeadCodePoint(currency2, leadCodePoints);
return leadCodePoints.freeze();
public boolean smokeTest(StringSegment segment) {
return segment.startsWith(currency1) || segment.startsWith(currency2);
}
@Override

View file

@ -23,6 +23,8 @@ public class CurrencyNamesMatcher implements NumberParseMatcher {
private final TextTrieMap<CurrencyStringInfo> longNameTrie;
private final TextTrieMap<CurrencyStringInfo> symbolTrie;
private final UnicodeSet leadCodePoints;
public static CurrencyNamesMatcher getInstance(ULocale locale) {
// TODO: Pre-compute some of the more popular locales?
return new CurrencyNamesMatcher(locale);
@ -33,6 +35,15 @@ public class CurrencyNamesMatcher implements NumberParseMatcher {
// case folding on long-names but not symbols.
longNameTrie = Currency.getParsingTrie(locale, Currency.LONG_NAME);
symbolTrie = Currency.getParsingTrie(locale, Currency.SYMBOL_NAME);
// Compute the full set of characters that could be the first in a currency to allow for
// efficient smoke test.
leadCodePoints = new UnicodeSet();
longNameTrie.putLeadCodePoints(leadCodePoints);
symbolTrie.putLeadCodePoints(leadCodePoints);
// Always apply case mapping closure for currencies
leadCodePoints.closeOver(UnicodeSet.ADD_CASE_MAPPINGS);
leadCodePoints.freeze();
}
@Override
@ -55,13 +66,8 @@ public class CurrencyNamesMatcher implements NumberParseMatcher {
}
@Override
public UnicodeSet getLeadCodePoints() {
UnicodeSet leadCodePoints = new UnicodeSet();
longNameTrie.putLeadCodePoints(leadCodePoints);
symbolTrie.putLeadCodePoints(leadCodePoints);
// Always apply case mapping closure for currencies
leadCodePoints.closeOver(UnicodeSet.ADD_CASE_MAPPINGS);
return leadCodePoints.freeze();
public boolean smokeTest(StringSegment segment) {
return segment.startsWith(leadCodePoints);
}
@Override

View file

@ -329,21 +329,23 @@ public class DecimalMatcher implements NumberParseMatcher {
}
@Override
public UnicodeSet getLeadCodePoints() {
public boolean smokeTest(StringSegment segment) {
// The common case uses a static leadSet for efficiency.
if (digitStrings == null && leadSet != null) {
return leadSet;
return segment.startsWith(leadSet);
}
UnicodeSet leadCodePoints = new UnicodeSet();
// Assumption: the sets are all single code points.
leadCodePoints.addAll(UnicodeSetStaticCache.get(Key.DIGITS));
leadCodePoints.addAll(separatorSet);
if (digitStrings != null) {
for (int i = 0; i < digitStrings.length; i++) {
ParsingUtils.putLeadCodePoint(digitStrings[i], leadCodePoints);
if (segment.startsWith(separatorSet) || UCharacter.isDigit(segment.getCodePoint())) {
return true;
}
if (digitStrings == null) {
return false;
}
for (int i = 0; i < digitStrings.length; i++) {
if (segment.startsWith(digitStrings[i])) {
return true;
}
}
return leadCodePoints.freeze();
return false;
}
@Override

View file

@ -27,18 +27,6 @@ public class NanMatcher extends SymbolMatcher {
super(symbolString, UnicodeSet.EMPTY);
}
@Override
public UnicodeSet getLeadCodePoints() {
// Overriding this here to allow use of statically allocated sets
int leadCp = string.codePointAt(0);
UnicodeSet s = UnicodeSetStaticCache.get(UnicodeSetStaticCache.Key.NAN_LEAD);
if (s.contains(leadCp)) {
return s;
} else {
return super.getLeadCodePoints();
}
}
@Override
protected boolean isDisabled(ParsedNumber result) {
return result.seenNumber();

View file

@ -3,7 +3,6 @@
package com.ibm.icu.impl.number.parse;
import com.ibm.icu.impl.StringSegment;
import com.ibm.icu.text.UnicodeSet;
/**
* The core interface implemented by all matchers used for number parsing.
@ -45,12 +44,18 @@ public interface NumberParseMatcher {
public boolean match(StringSegment segment, ParsedNumber result);
/**
* Should return a set representing all possible chars (UTF-16 code units) that could be the first
* char that this matcher can consume. This method is only called during construction phase, and its
* return value is used to skip this matcher unless a segment begins with a char in this set. To make
* this matcher always run, return {@link UnicodeSet#ALL_CODE_POINTS}.
* Performs a fast "smoke check" for whether or not this matcher could possibly match against the
* given string segment. The test should be as fast as possible but also as restrictive as possible.
* For example, matchers can maintain a UnicodeSet of all code points that count possibly start a
* match. Matchers should use the {@link StringSegment#startsWith} method in order to correctly
* handle case folding.
*
* @param segment
* The segment to check against.
* @return true if the matcher might be able to match against this segment; false if it definitely
* will not be able to match.
*/
public UnicodeSet getLeadCodePoints();
public boolean smokeTest(StringSegment segment);
/**
* Method called at the end of a parse, after all matchers have failed to consume any more chars.

View file

@ -19,7 +19,6 @@ import com.ibm.icu.impl.number.PropertiesAffixPatternProvider;
import com.ibm.icu.impl.number.RoundingUtils;
import com.ibm.icu.number.NumberFormatter.GroupingStrategy;
import com.ibm.icu.text.DecimalFormatSymbols;
import com.ibm.icu.text.UnicodeSet;
import com.ibm.icu.util.Currency;
import com.ibm.icu.util.CurrencyAmount;
import com.ibm.icu.util.ULocale;
@ -250,7 +249,6 @@ public class NumberParserImpl {
private final int parseFlags;
private final List<NumberParseMatcher> matchers;
private final List<UnicodeSet> leads;
private boolean frozen;
/**
@ -261,11 +259,6 @@ public class NumberParserImpl {
*/
public NumberParserImpl(int parseFlags) {
matchers = new ArrayList<NumberParseMatcher>();
if (0 != (parseFlags & ParsingUtils.PARSE_FLAG_OPTIMIZE)) {
leads = new ArrayList<UnicodeSet>();
} else {
leads = null;
}
this.parseFlags = parseFlags;
frozen = false;
}
@ -273,30 +266,11 @@ public class NumberParserImpl {
public void addMatcher(NumberParseMatcher matcher) {
assert !frozen;
this.matchers.add(matcher);
if (leads != null) {
addLeadCodePointsForMatcher(matcher);
}
}
public void addMatchers(Collection<? extends NumberParseMatcher> matchers) {
assert !frozen;
this.matchers.addAll(matchers);
if (leads != null) {
for (NumberParseMatcher matcher : matchers) {
addLeadCodePointsForMatcher(matcher);
}
}
}
private void addLeadCodePointsForMatcher(NumberParseMatcher matcher) {
UnicodeSet leadCodePoints = matcher.getLeadCodePoints();
assert leadCodePoints.isFrozen();
// TODO: Avoid the clone operation here.
if (0 != (parseFlags & ParsingUtils.PARSE_FLAG_IGNORE_CASE)) {
leadCodePoints = leadCodePoints.cloneAsThawed().closeOver(UnicodeSet.ADD_CASE_MAPPINGS)
.freeze();
}
this.leads.add(leadCodePoints);
}
public void freeze() {
@ -343,12 +317,11 @@ public class NumberParserImpl {
}
int initialOffset = segment.getOffset();
int leadCp = segment.getCodePoint();
for (int i = 0; i < matchers.size(); i++) {
if (leads != null && !leads.get(i).contains(leadCp)) {
NumberParseMatcher matcher = matchers.get(i);
if (!matcher.smokeTest(segment)) {
continue;
}
NumberParseMatcher matcher = matchers.get(i);
matcher.match(segment, result);
if (segment.getOffset() != initialOffset) {
// In a greedy parse, recurse on only the first match.
@ -377,8 +350,10 @@ public class NumberParserImpl {
int initialOffset = segment.getOffset();
for (int i = 0; i < matchers.size(); i++) {
// TODO: Check leadChars here?
NumberParseMatcher matcher = matchers.get(i);
if (!matcher.smokeTest(segment)) {
continue;
}
// In a non-greedy parse, we attempt all possible matches and pick the best.
for (int charsToConsume = 0; charsToConsume < segment.length();) {

View file

@ -5,7 +5,6 @@ package com.ibm.icu.impl.number.parse;
import com.ibm.icu.impl.StringSegment;
import com.ibm.icu.impl.number.Grouper;
import com.ibm.icu.text.DecimalFormatSymbols;
import com.ibm.icu.text.UnicodeSet;
/**
* @author sffc
@ -78,14 +77,8 @@ public class ScientificMatcher implements NumberParseMatcher {
}
@Override
public UnicodeSet getLeadCodePoints() {
int leadCp = exponentSeparatorString.codePointAt(0);
UnicodeSet s = UnicodeSetStaticCache.get(UnicodeSetStaticCache.Key.SCIENTIFIC_LEAD);
if (s.contains(leadCp)) {
return s;
} else {
return new UnicodeSet().add(leadCp).freeze();
}
public boolean smokeTest(StringSegment segment) {
return segment.startsWith(exponentSeparatorString);
}
@Override

View file

@ -6,7 +6,6 @@ import java.util.ArrayList;
import java.util.List;
import com.ibm.icu.impl.StringSegment;
import com.ibm.icu.text.UnicodeSet;
/**
* Composes a number of matchers, running one after another. Matches the input string only if all of the
@ -82,15 +81,15 @@ public class SeriesMatcher implements NumberParseMatcher {
}
@Override
public UnicodeSet getLeadCodePoints() {
public boolean smokeTest(StringSegment segment) {
assert frozen;
if (matchers == null) {
return UnicodeSet.EMPTY;
return false;
}
// SeriesMatchers are never allowed to start with a Flexible matcher.
assert !(matchers.get(0) instanceof NumberParseMatcher.Flexible);
return matchers.get(0).getLeadCodePoints();
return matchers.get(0).smokeTest(segment);
}
@Override

View file

@ -59,16 +59,8 @@ public abstract class SymbolMatcher implements NumberParseMatcher {
}
@Override
public UnicodeSet getLeadCodePoints() {
if (string.isEmpty()) {
// Assumption: for sets from UnicodeSetStaticCache, uniSet == leadCodePoints.
return uniSet;
}
UnicodeSet leadCodePoints = new UnicodeSet();
ParsingUtils.putLeadCodePoints(uniSet, leadCodePoints);
ParsingUtils.putLeadCodePoint(string, leadCodePoints);
return leadCodePoints.freeze();
public boolean smokeTest(StringSegment segment) {
return segment.startsWith(uniSet) || segment.startsWith(string);
}
@Override

View file

@ -49,8 +49,6 @@ public class UnicodeSetStaticCache {
// Other
DIGITS,
NAN_LEAD,
SCIENTIFIC_LEAD,
CWCF, // TODO: Check if this is being used and remove it if not.
// Combined Separators with Digits (for lead code points)
@ -112,10 +110,6 @@ public class UnicodeSetStaticCache {
unicodeSets.put(Key.INFINITY, new UnicodeSet("[∞]").freeze());
unicodeSets.put(Key.DIGITS, new UnicodeSet("[:digit:]").freeze());
unicodeSets.put(Key.NAN_LEAD,
new UnicodeSet("[NnТтmeՈոс¤НнчTtsҳ\u975e\u1002\u0e9a\u10d0\u0f68\u0644\u0646]")
.freeze());
unicodeSets.put(Key.SCIENTIFIC_LEAD, new UnicodeSet("[Ee×·е\u0627]").freeze());
unicodeSets.put(Key.CWCF, new UnicodeSet("[:CWCF:]").freeze());
unicodeSets.put(Key.DIGITS_OR_ALL_SEPARATORS, computeUnion(Key.DIGITS, Key.ALL_SEPARATORS));

View file

@ -3,7 +3,6 @@
package com.ibm.icu.impl.number.parse;
import com.ibm.icu.impl.StringSegment;
import com.ibm.icu.text.UnicodeSet;
/**
* A Matcher used only for post-process validation, not for consuming characters at runtime.
@ -16,8 +15,8 @@ public abstract class ValidationMatcher implements NumberParseMatcher {
}
@Override
public UnicodeSet getLeadCodePoints() {
return UnicodeSet.EMPTY;
public boolean smokeTest(StringSegment segment) {
return false;
}
}

View file

@ -2,6 +2,8 @@
// License & terms of use: http://www.unicode.org/copyright.html#License
package com.ibm.icu.dev.test.number;
import static com.ibm.icu.impl.number.parse.UnicodeSetStaticCache.get;
import java.math.BigDecimal;
import java.util.Random;
@ -10,8 +12,12 @@ import org.junit.Test;
import com.ibm.icu.dev.test.TestFmwk;
import com.ibm.icu.impl.number.DecimalQuantity_DualStorageBCD;
import com.ibm.icu.impl.number.parse.UnicodeSetStaticCache.Key;
import com.ibm.icu.lang.UCharacter;
import com.ibm.icu.number.NumberFormatter;
import com.ibm.icu.number.Rounder;
import com.ibm.icu.text.DecimalFormatSymbols;
import com.ibm.icu.text.UnicodeSet;
import com.ibm.icu.util.ULocale;
/**
@ -27,6 +33,60 @@ public class ExhaustiveNumberTest extends TestFmwk {
org.junit.Assume.assumeTrue(getExhaustiveness() > 5);
}
@Test
public void testSetCoverage() {
// Lenient comma/period should be supersets of strict comma/period;
// it also makes the coverage logic cheaper.
assertTrue("COMMA should be superset of STRICT_COMMA",
get(Key.COMMA).containsAll(get(Key.STRICT_COMMA)));
assertTrue("PERIOD should be superset of STRICT_PERIOD",
get(Key.PERIOD).containsAll(get(Key.STRICT_PERIOD)));
UnicodeSet decimals = get(Key.STRICT_COMMA).cloneAsThawed().addAll(get(Key.STRICT_PERIOD))
.freeze();
UnicodeSet grouping = decimals.cloneAsThawed().addAll(get(Key.OTHER_GROUPING_SEPARATORS))
.freeze();
UnicodeSet plusSign = get(Key.PLUS_SIGN);
UnicodeSet minusSign = get(Key.MINUS_SIGN);
UnicodeSet percent = get(Key.PERCENT_SIGN);
UnicodeSet permille = get(Key.PERMILLE_SIGN);
UnicodeSet infinity = get(Key.INFINITY);
for (ULocale locale : ULocale.getAvailableLocales()) {
DecimalFormatSymbols dfs = DecimalFormatSymbols.getInstance(locale);
assertInSet(locale, decimals, dfs.getDecimalSeparatorString());
assertInSet(locale, grouping, dfs.getGroupingSeparatorString());
assertInSet(locale, plusSign, dfs.getPlusSignString());
assertInSet(locale, minusSign, dfs.getMinusSignString());
assertInSet(locale, percent, dfs.getPercentString());
assertInSet(locale, permille, dfs.getPerMillString());
assertInSet(locale, infinity, dfs.getInfinity());
}
}
static void assertInSet(ULocale locale, UnicodeSet set, String str) {
if (str.codePointCount(0, str.length()) != 1) {
// Ignore locale strings with more than one code point (usually a bidi mark)
return;
}
assertInSet(locale, set, str.codePointAt(0));
}
static void assertInSet(ULocale locale, UnicodeSet set, int cp) {
// If this test case fails, add the specified code point to the corresponding set in
// UnicodeSetStaticCache.java and numparse_unisets.cpp
assertTrue(
locale
+ " U+"
+ Integer.toHexString(cp)
+ " ("
+ UCharacter.toString(cp)
+ ") should be in "
+ set,
set.contains(cp));
}
@Test
public void unlimitedRoundingBigDecimal() {
BigDecimal ten10000 = BigDecimal.valueOf(10).pow(10000);

View file

@ -3,6 +3,7 @@
package com.ibm.icu.dev.test.number;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
import org.junit.Test;
@ -21,8 +22,6 @@ import com.ibm.icu.impl.number.parse.ParsingUtils;
import com.ibm.icu.impl.number.parse.PercentMatcher;
import com.ibm.icu.impl.number.parse.PlusSignMatcher;
import com.ibm.icu.impl.number.parse.SeriesMatcher;
import com.ibm.icu.impl.number.parse.UnicodeSetStaticCache;
import com.ibm.icu.impl.number.parse.UnicodeSetStaticCache.Key;
import com.ibm.icu.text.DecimalFormatSymbols;
import com.ibm.icu.util.Currency;
import com.ibm.icu.util.ULocale;
@ -196,7 +195,9 @@ public class NumberParserTest {
series.addMatcher(IgnorablesMatcher.DEFAULT);
series.freeze();
assertEquals(UnicodeSetStaticCache.get(Key.PLUS_SIGN), series.getLeadCodePoints());
assertFalse(series.smokeTest(new StringSegment("x", false)));
assertFalse(series.smokeTest(new StringSegment("-", false)));
assertTrue(series.smokeTest(new StringSegment("+", false)));
Object[][] cases = new Object[][] {
{ "", 0, true },

View file

@ -30,9 +30,9 @@ import org.junit.Test;
import com.ibm.icu.dev.test.serializable.SerializableTestUtility;
import com.ibm.icu.impl.number.DecimalFormatProperties;
import com.ibm.icu.impl.number.DecimalFormatProperties.ParseMode;
import com.ibm.icu.impl.number.Padder.PadPosition;
import com.ibm.icu.impl.number.PatternStringParser;
import com.ibm.icu.impl.number.parse.NumberParserImpl.ParseMode;
import com.ibm.icu.text.CompactDecimalFormat.CompactStyle;
import com.ibm.icu.text.CurrencyPluralInfo;
import com.ibm.icu.text.MeasureFormat.FormatWidth;

View file

@ -8,82 +8,17 @@ import static org.junit.Assert.assertTrue;
import org.junit.Test;
import com.ibm.icu.impl.number.parse.UnicodeSetStaticCache.Key;
import com.ibm.icu.lang.UCharacter;
import com.ibm.icu.text.DecimalFormatSymbols;
import com.ibm.icu.text.UnicodeSet;
import com.ibm.icu.util.ULocale;
/**
* This test class is thin; most of it was moved to ExhaustiveNumberTest.
* @author sffc
*
*/
public class UnicodeSetStaticCacheTest {
@Test
public void testSetCoverage() {
// Lenient comma/period should be supersets of strict comma/period;
// it also makes the coverage logic cheaper.
assertTrue("COMMA should be superset of STRICT_COMMA",
get(Key.COMMA).containsAll(get(Key.STRICT_COMMA)));
assertTrue("PERIOD should be superset of STRICT_PERIOD",
get(Key.PERIOD).containsAll(get(Key.STRICT_PERIOD)));
UnicodeSet decimals = get(Key.STRICT_COMMA).cloneAsThawed().addAll(get(Key.STRICT_PERIOD))
.freeze();
UnicodeSet grouping = decimals.cloneAsThawed().addAll(get(Key.OTHER_GROUPING_SEPARATORS))
.freeze();
UnicodeSet plusSign = get(Key.PLUS_SIGN);
UnicodeSet minusSign = get(Key.MINUS_SIGN);
UnicodeSet percent = get(Key.PERCENT_SIGN);
UnicodeSet permille = get(Key.PERMILLE_SIGN);
UnicodeSet infinity = get(Key.INFINITY);
UnicodeSet nanLead = get(Key.NAN_LEAD);
UnicodeSet scientificLead = get(Key.SCIENTIFIC_LEAD);
for (ULocale locale : ULocale.getAvailableLocales()) {
DecimalFormatSymbols dfs = DecimalFormatSymbols.getInstance(locale);
assertInSet(locale, decimals, dfs.getDecimalSeparatorString());
assertInSet(locale, grouping, dfs.getGroupingSeparatorString());
assertInSet(locale, plusSign, dfs.getPlusSignString());
assertInSet(locale, minusSign, dfs.getMinusSignString());
assertInSet(locale, percent, dfs.getPercentString());
assertInSet(locale, permille, dfs.getPerMillString());
assertInSet(locale, infinity, dfs.getInfinity());
assertInSet(locale, nanLead, dfs.getNaN().codePointAt(0));
assertInSet(locale, nanLead, UCharacter.foldCase(dfs.getNaN(), true).codePointAt(0));
assertInSet(locale,
scientificLead,
UCharacter.foldCase(dfs.getExponentSeparator(), true).codePointAt(0));
}
}
@Test
public void testFrozen() {
for (Key key : Key.values()) {
assertTrue(get(key).isFrozen());
}
}
static void assertInSet(ULocale locale, UnicodeSet set, String str) {
if (str.codePointCount(0, str.length()) != 1) {
// Ignore locale strings with more than one code point (usually a bidi mark)
return;
}
assertInSet(locale, set, str.codePointAt(0));
}
static void assertInSet(ULocale locale, UnicodeSet set, int cp) {
// If this test case fails, add the specified code point to the corresponding set in
// UnicodeSetStaticCache.java and numparse_unisets.cpp
assertTrue(
locale
+ " U+"
+ Integer.toHexString(cp)
+ " ("
+ UCharacter.toString(cp)
+ ") should be in "
+ set,
set.contains(cp));
}
}