mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-07 06:25:30 +00:00
ICU-13574 Adding initial AffixPatternMatcher to ICU4C. Not completely safe yet. Still needs work.
X-SVN-Rev: 40891
This commit is contained in:
parent
513f123a8c
commit
e7a42e17f6
17 changed files with 401 additions and 37 deletions
|
@ -239,6 +239,20 @@ UnicodeString AffixUtils::replaceType(const CharSequence &affixPattern, AffixPat
|
|||
return output;
|
||||
}
|
||||
|
||||
void AffixUtils::iterateWithConsumer(const CharSequence& affixPattern, TokenConsumer& consumer,
|
||||
UErrorCode& status) {
|
||||
if (affixPattern.length() == 0) {
|
||||
return;
|
||||
};
|
||||
AffixTag tag;
|
||||
while (hasNext(tag, affixPattern)) {
|
||||
tag = nextToken(tag, affixPattern, status);
|
||||
if (U_FAILURE(status)) { return; }
|
||||
consumer.consumeToken(tag.type, tag.codePoint, status);
|
||||
if (U_FAILURE(status)) { return; }
|
||||
}
|
||||
}
|
||||
|
||||
AffixTag AffixUtils::nextToken(AffixTag tag, const CharSequence &patternString, UErrorCode &status) {
|
||||
int32_t offset = tag.offset;
|
||||
int32_t state = tag.state;
|
||||
|
|
|
@ -46,6 +46,11 @@ struct AffixTag {
|
|||
{}
|
||||
};
|
||||
|
||||
class TokenConsumer {
|
||||
public:
|
||||
virtual void consumeToken(AffixPatternType type, UChar32 cp, UErrorCode& status) = 0;
|
||||
};
|
||||
|
||||
// Exported as U_I18N_API because it is a base class for other exported types
|
||||
class U_I18N_API SymbolProvider {
|
||||
public:
|
||||
|
@ -180,6 +185,12 @@ class U_I18N_API AffixUtils {
|
|||
replaceType(const CharSequence &affixPattern, AffixPatternType type, char16_t replacementChar,
|
||||
UErrorCode &status);
|
||||
|
||||
/**
|
||||
* Iterates over the affix pattern, calling the TokenConsumer for each token.
|
||||
*/
|
||||
static void iterateWithConsumer(const CharSequence& affixPattern, TokenConsumer& consumer,
|
||||
UErrorCode& status);
|
||||
|
||||
/**
|
||||
* Returns the next token from the affix pattern.
|
||||
*
|
||||
|
|
|
@ -7,14 +7,153 @@
|
|||
|
||||
#include "numparse_types.h"
|
||||
#include "numparse_affixes.h"
|
||||
#include "numparse_utils.h"
|
||||
#include "number_utils.h"
|
||||
|
||||
using namespace icu;
|
||||
using namespace icu::numparse;
|
||||
using namespace icu::numparse::impl;
|
||||
using namespace icu::number;
|
||||
using namespace icu::number::impl;
|
||||
|
||||
|
||||
AffixPatternMatcherBuilder::AffixPatternMatcherBuilder(const UnicodeString& pattern,
|
||||
AffixTokenMatcherFactory& factory,
|
||||
IgnorablesMatcher* ignorables)
|
||||
: fMatchersLen(0),
|
||||
fLastTypeOrCp(0),
|
||||
fCodePointMatchers(new CodePointMatcher[100]),
|
||||
fCodePointMatchersLen(0),
|
||||
fPattern(pattern),
|
||||
fFactory(factory),
|
||||
fIgnorables(ignorables) {}
|
||||
|
||||
void AffixPatternMatcherBuilder::consumeToken(AffixPatternType type, UChar32 cp, UErrorCode& status) {
|
||||
// This is called by AffixUtils.iterateWithConsumer() for each token.
|
||||
|
||||
// Add an ignorables matcher between tokens except between two literals, and don't put two
|
||||
// ignorables matchers in a row.
|
||||
if (fIgnorables != nullptr && fMatchersLen > 0 &&
|
||||
(fLastTypeOrCp < 0 || !fIgnorables->getSet()->contains(fLastTypeOrCp))) {
|
||||
addMatcher(*fIgnorables);
|
||||
}
|
||||
|
||||
if (type != TYPE_CODEPOINT) {
|
||||
// Case 1: the token is a symbol.
|
||||
switch (type) {
|
||||
case TYPE_MINUS_SIGN:
|
||||
addMatcher(fFactory.minusSign = {fFactory.dfs, true});
|
||||
break;
|
||||
case TYPE_PLUS_SIGN:
|
||||
addMatcher(fFactory.plusSign = {fFactory.dfs, true});
|
||||
break;
|
||||
case TYPE_PERCENT:
|
||||
addMatcher(fFactory.percent = {fFactory.dfs});
|
||||
break;
|
||||
case TYPE_PERMILLE:
|
||||
addMatcher(fFactory.permille = {fFactory.dfs});
|
||||
break;
|
||||
case TYPE_CURRENCY_SINGLE:
|
||||
case TYPE_CURRENCY_DOUBLE:
|
||||
case TYPE_CURRENCY_TRIPLE:
|
||||
case TYPE_CURRENCY_QUAD:
|
||||
case TYPE_CURRENCY_QUINT:
|
||||
// All currency symbols use the same matcher
|
||||
addMatcher(
|
||||
fFactory.currency = {
|
||||
CurrencyNamesMatcher(
|
||||
fFactory.locale, status), CurrencyCustomMatcher(
|
||||
fFactory.currencyCode, fFactory.currency1, fFactory.currency2)});
|
||||
break;
|
||||
default:
|
||||
U_ASSERT(FALSE);
|
||||
}
|
||||
|
||||
} else if (fIgnorables != nullptr && fIgnorables->getSet()->contains(cp)) {
|
||||
// Case 2: the token is an ignorable literal.
|
||||
// No action necessary: the ignorables matcher has already been added.
|
||||
|
||||
} else {
|
||||
// Case 3: the token is a non-ignorable literal.
|
||||
// TODO: This is really clunky. Just trying to get something that works.
|
||||
fCodePointMatchers[fCodePointMatchersLen] = {cp};
|
||||
addMatcher(fCodePointMatchers[fCodePointMatchersLen]);
|
||||
fCodePointMatchersLen++;
|
||||
}
|
||||
fLastTypeOrCp = type != TYPE_CODEPOINT ? type : cp;
|
||||
}
|
||||
|
||||
void AffixPatternMatcherBuilder::addMatcher(NumberParseMatcher& matcher) {
|
||||
if (fMatchersLen >= fMatchers.getCapacity()) {
|
||||
fMatchers.resize(fMatchersLen * 2, fMatchersLen);
|
||||
}
|
||||
fMatchers[fMatchersLen++] = &matcher;
|
||||
}
|
||||
|
||||
AffixPatternMatcher AffixPatternMatcherBuilder::build() {
|
||||
return AffixPatternMatcher(fMatchers, fMatchersLen, fPattern, fCodePointMatchers.orphan());
|
||||
}
|
||||
|
||||
|
||||
AffixTokenMatcherFactory::AffixTokenMatcherFactory(const UChar* currencyCode,
|
||||
const UnicodeString& currency1,
|
||||
const UnicodeString& currency2,
|
||||
const DecimalFormatSymbols& dfs,
|
||||
IgnorablesMatcher* ignorables, const Locale& locale)
|
||||
: currency1(currency1), currency2(currency2), dfs(dfs), ignorables(ignorables), locale(locale) {
|
||||
utils::copyCurrencyCode(this->currencyCode, currencyCode);
|
||||
}
|
||||
|
||||
|
||||
CodePointMatcher::CodePointMatcher(UChar32 cp)
|
||||
: fCp(cp) {}
|
||||
|
||||
bool CodePointMatcher::match(StringSegment& segment, ParsedNumber& result, UErrorCode&) const {
|
||||
if (segment.matches(fCp)) {
|
||||
segment.adjustOffsetByCodePoint();
|
||||
result.setCharsConsumed(segment);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
const UnicodeSet& CodePointMatcher::getLeadCodePoints() {
|
||||
if (fLocalLeadCodePoints.isNull()) {
|
||||
auto* leadCodePoints = new UnicodeSet();
|
||||
leadCodePoints->add(fCp);
|
||||
leadCodePoints->freeze();
|
||||
fLocalLeadCodePoints.adoptInstead(leadCodePoints);
|
||||
}
|
||||
return *fLocalLeadCodePoints;
|
||||
}
|
||||
|
||||
|
||||
AffixPatternMatcher
|
||||
AffixPatternMatcher::fromAffixPattern(const UnicodeString& affixPattern, AffixTokenMatcherFactory& factory,
|
||||
parse_flags_t parseFlags, bool* success, UErrorCode& status) {
|
||||
if (affixPattern.isEmpty()) {
|
||||
*success = false;
|
||||
return {};
|
||||
}
|
||||
*success = true;
|
||||
|
||||
IgnorablesMatcher* ignorables;
|
||||
if (0 != (parseFlags & PARSE_FLAG_EXACT_AFFIX)) {
|
||||
ignorables = nullptr;
|
||||
} else {
|
||||
ignorables = factory.ignorables;
|
||||
}
|
||||
|
||||
AffixPatternMatcherBuilder builder(affixPattern, factory, ignorables);
|
||||
AffixUtils::iterateWithConsumer(UnicodeStringCharSequence(affixPattern), builder, status);
|
||||
return builder.build();
|
||||
}
|
||||
|
||||
AffixPatternMatcher::AffixPatternMatcher(MatcherArray& matchers, int32_t matchersLen,
|
||||
const UnicodeString& pattern, CodePointMatcher* codePointMatchers)
|
||||
: ArraySeriesMatcher(matchers, matchersLen),
|
||||
fPattern(pattern),
|
||||
fCodePointMatchers(codePointMatchers) {
|
||||
}
|
||||
|
||||
|
||||
#endif /* #if !UCONFIG_NO_FORMATTING */
|
||||
|
|
|
@ -8,13 +8,104 @@
|
|||
#define __NUMPARSE_AFFIXES_H__
|
||||
|
||||
#include "numparse_types.h"
|
||||
#include "numparse_symbols.h"
|
||||
#include "numparse_currency.h"
|
||||
#include "number_affixutils.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
namespace numparse {
|
||||
U_NAMESPACE_BEGIN namespace numparse {
|
||||
namespace impl {
|
||||
|
||||
// Forward-declaration of implementation classes for friending
|
||||
class AffixPatternMatcherBuilder;
|
||||
class AffixPatternMatcher;
|
||||
|
||||
class AffixTokenMatcherFactory {
|
||||
public:
|
||||
AffixTokenMatcherFactory(const UChar* currencyCode, const UnicodeString& currency1,
|
||||
const UnicodeString& currency2, const DecimalFormatSymbols& dfs,
|
||||
IgnorablesMatcher* ignorables, const Locale& locale);
|
||||
|
||||
private:
|
||||
UChar currencyCode[4];
|
||||
const UnicodeString& currency1;
|
||||
const UnicodeString& currency2;
|
||||
const DecimalFormatSymbols& dfs;
|
||||
IgnorablesMatcher* ignorables;
|
||||
const Locale locale;
|
||||
|
||||
// NOTE: These are default-constructed and should not be used until initialized.
|
||||
MinusSignMatcher minusSign;
|
||||
PlusSignMatcher plusSign;
|
||||
PercentMatcher percent;
|
||||
PermilleMatcher permille;
|
||||
CurrencyAnyMatcher currency;
|
||||
|
||||
friend class AffixPatternMatcherBuilder;
|
||||
friend class AffixPatternMatcher;
|
||||
};
|
||||
|
||||
|
||||
class CodePointMatcher : public NumberParseMatcher, public UMemory {
|
||||
public:
|
||||
CodePointMatcher() = default; // WARNING: Leaves the object in an unusable state
|
||||
|
||||
CodePointMatcher(UChar32 cp);
|
||||
|
||||
bool match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const override;
|
||||
|
||||
const UnicodeSet& getLeadCodePoints() override;
|
||||
|
||||
private:
|
||||
UChar32 fCp;
|
||||
};
|
||||
|
||||
|
||||
class AffixPatternMatcherBuilder : public ::icu::number::impl::TokenConsumer {
|
||||
public:
|
||||
AffixPatternMatcherBuilder(const UnicodeString& pattern, AffixTokenMatcherFactory& factory,
|
||||
IgnorablesMatcher* ignorables);
|
||||
|
||||
void consumeToken(::icu::number::impl::AffixPatternType type, UChar32 cp, UErrorCode& status) override;
|
||||
|
||||
/** NOTE: You can build only once! */
|
||||
AffixPatternMatcher build();
|
||||
|
||||
private:
|
||||
ArraySeriesMatcher::MatcherArray fMatchers;
|
||||
int32_t fMatchersLen;
|
||||
int32_t fLastTypeOrCp;
|
||||
|
||||
LocalArray<CodePointMatcher> fCodePointMatchers;
|
||||
int32_t fCodePointMatchersLen;
|
||||
|
||||
const UnicodeString& fPattern;
|
||||
AffixTokenMatcherFactory& fFactory;
|
||||
IgnorablesMatcher* fIgnorables;
|
||||
|
||||
void addMatcher(NumberParseMatcher& matcher);
|
||||
};
|
||||
|
||||
|
||||
class AffixPatternMatcher : public ArraySeriesMatcher {
|
||||
public:
|
||||
static AffixPatternMatcher fromAffixPattern(const UnicodeString& affixPattern,
|
||||
AffixTokenMatcherFactory& factory,
|
||||
parse_flags_t parseFlags, bool* success,
|
||||
UErrorCode& status);
|
||||
|
||||
private:
|
||||
UnicodeString fPattern;
|
||||
|
||||
// We need to own the variable number of CodePointMatchers.
|
||||
LocalArray<CodePointMatcher> fCodePointMatchers;
|
||||
|
||||
AffixPatternMatcher() = default; // WARNING: Leaves the object in an unusable state
|
||||
|
||||
AffixPatternMatcher(MatcherArray& matchers, int32_t matchersLen, const UnicodeString& pattern,
|
||||
CodePointMatcher* codePointMatchers);
|
||||
|
||||
friend class AffixPatternMatcherBuilder;
|
||||
};
|
||||
|
||||
|
||||
} // namespace impl
|
||||
|
|
|
@ -87,8 +87,13 @@ void SeriesMatcher::postProcess(ParsedNumber& result) const {
|
|||
}
|
||||
|
||||
|
||||
ArraySeriesMatcher::ArraySeriesMatcher(NumberParseMatcher** matchers, int32_t matchersLen)
|
||||
: fMatchers(matchers), fMatchersLen(matchersLen) {}
|
||||
ArraySeriesMatcher::ArraySeriesMatcher()
|
||||
: fMatchersLen(0) {
|
||||
}
|
||||
|
||||
ArraySeriesMatcher::ArraySeriesMatcher(MatcherArray& matchers, int32_t matchersLen)
|
||||
: fMatchers(std::move(matchers)), fMatchersLen(matchersLen) {
|
||||
}
|
||||
|
||||
const UnicodeSet& ArraySeriesMatcher::getLeadCodePoints() {
|
||||
// SeriesMatchers are never allowed to start with a Flexible matcher.
|
||||
|
@ -96,6 +101,10 @@ const UnicodeSet& ArraySeriesMatcher::getLeadCodePoints() {
|
|||
return fMatchers[0]->getLeadCodePoints();
|
||||
}
|
||||
|
||||
int32_t ArraySeriesMatcher::length() const {
|
||||
return fMatchersLen;
|
||||
}
|
||||
|
||||
const NumberParseMatcher* const* ArraySeriesMatcher::begin() const {
|
||||
return fMatchers.getAlias();
|
||||
}
|
||||
|
|
|
@ -63,6 +63,8 @@ class SeriesMatcher : public CompositionMatcher {
|
|||
|
||||
void postProcess(ParsedNumber& result) const override;
|
||||
|
||||
virtual int32_t length() const = 0;
|
||||
|
||||
protected:
|
||||
// No construction except by subclasses!
|
||||
SeriesMatcher() = default;
|
||||
|
@ -76,18 +78,24 @@ class SeriesMatcher : public CompositionMatcher {
|
|||
*/
|
||||
class ArraySeriesMatcher : public SeriesMatcher {
|
||||
public:
|
||||
/** The array is adopted, but NOT the matchers inside the array. */
|
||||
ArraySeriesMatcher(NumberParseMatcher** matchers, int32_t matchersLen);
|
||||
ArraySeriesMatcher(); // WARNING: Leaves the object in an unusable state
|
||||
|
||||
typedef MaybeStackArray<NumberParseMatcher*, 3> MatcherArray;
|
||||
|
||||
/** The array is std::move'd */
|
||||
ArraySeriesMatcher(MatcherArray& matchers, int32_t matchersLen);
|
||||
|
||||
const UnicodeSet& getLeadCodePoints() override;
|
||||
|
||||
int32_t length() const override;
|
||||
|
||||
protected:
|
||||
const NumberParseMatcher* const* begin() const override;
|
||||
|
||||
const NumberParseMatcher* const* end() const override;
|
||||
|
||||
private:
|
||||
LocalArray<NumberParseMatcher*> fMatchers;
|
||||
MatcherArray fMatchers;
|
||||
int32_t fMatchersLen;
|
||||
};
|
||||
|
||||
|
|
|
@ -16,16 +16,6 @@ using namespace icu::numparse;
|
|||
using namespace icu::numparse::impl;
|
||||
|
||||
|
||||
namespace {
|
||||
|
||||
inline void copyCurrencyCode(UChar* dest, const UChar* src) {
|
||||
uprv_memcpy(dest, src, sizeof(UChar) * 3);
|
||||
dest[3] = 0;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
CurrencyNamesMatcher::CurrencyNamesMatcher(const Locale& locale, UErrorCode& status)
|
||||
: fLocaleName(locale.getName(), -1, status) {}
|
||||
|
||||
|
@ -80,7 +70,7 @@ const UnicodeSet& CurrencyNamesMatcher::getLeadCodePoints() {
|
|||
CurrencyCustomMatcher::CurrencyCustomMatcher(const char16_t* currencyCode, const UnicodeString& currency1,
|
||||
const UnicodeString& currency2)
|
||||
: fCurrency1(currency1), fCurrency2(currency2) {
|
||||
copyCurrencyCode(fCurrencyCode, currencyCode);
|
||||
utils::copyCurrencyCode(fCurrencyCode, currencyCode);
|
||||
}
|
||||
|
||||
bool CurrencyCustomMatcher::match(StringSegment& segment, ParsedNumber& result, UErrorCode&) const {
|
||||
|
@ -90,14 +80,14 @@ bool CurrencyCustomMatcher::match(StringSegment& segment, ParsedNumber& result,
|
|||
|
||||
int overlap1 = segment.getCommonPrefixLength(fCurrency1);
|
||||
if (overlap1 == fCurrency1.length()) {
|
||||
copyCurrencyCode(result.currencyCode, fCurrencyCode);
|
||||
utils::copyCurrencyCode(result.currencyCode, fCurrencyCode);
|
||||
segment.adjustOffset(overlap1);
|
||||
result.setCharsConsumed(segment);
|
||||
}
|
||||
|
||||
int overlap2 = segment.getCommonPrefixLength(fCurrency2);
|
||||
if (overlap2 == fCurrency2.length()) {
|
||||
copyCurrencyCode(result.currencyCode, fCurrencyCode);
|
||||
utils::copyCurrencyCode(result.currencyCode, fCurrencyCode);
|
||||
segment.adjustOffset(overlap2);
|
||||
result.setCharsConsumed(segment);
|
||||
}
|
||||
|
@ -117,6 +107,11 @@ const UnicodeSet& CurrencyCustomMatcher::getLeadCodePoints() {
|
|||
}
|
||||
|
||||
|
||||
CurrencyAnyMatcher::CurrencyAnyMatcher() {
|
||||
fMatcherArray[0] = &fNamesMatcher;
|
||||
fMatcherArray[1] = &fCustomMatcher;
|
||||
}
|
||||
|
||||
CurrencyAnyMatcher::CurrencyAnyMatcher(CurrencyNamesMatcher namesMatcher,
|
||||
CurrencyCustomMatcher customMatcher)
|
||||
: fNamesMatcher(std::move(namesMatcher)), fCustomMatcher(std::move(customMatcher)) {
|
||||
|
|
|
@ -42,6 +42,8 @@ class CurrencyNamesMatcher : public NumberParseMatcher, public UMemory {
|
|||
|
||||
class CurrencyCustomMatcher : public NumberParseMatcher, public UMemory {
|
||||
public:
|
||||
CurrencyCustomMatcher() = default; // WARNING: Leaves the object in an unusable state
|
||||
|
||||
CurrencyCustomMatcher(const char16_t* currencyCode, const UnicodeString& currency1,
|
||||
const UnicodeString& currency2);
|
||||
|
||||
|
@ -61,7 +63,8 @@ class CurrencyCustomMatcher : public NumberParseMatcher, public UMemory {
|
|||
*/
|
||||
class CurrencyAnyMatcher : public AnyMatcher, public UMemory {
|
||||
public:
|
||||
/** Calls std::move on the two arguments. */
|
||||
CurrencyAnyMatcher(); // WARNING: Leaves the object in an unusable state
|
||||
|
||||
CurrencyAnyMatcher(CurrencyNamesMatcher namesMatcher, CurrencyCustomMatcher customMatcher);
|
||||
|
||||
const UnicodeSet& getLeadCodePoints() override;
|
||||
|
|
|
@ -28,7 +28,6 @@ class SymbolMatcher : public NumberParseMatcher, public UMemory {
|
|||
|
||||
bool match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const override;
|
||||
|
||||
/** NOTE: This method is not guaranteed to be thread-safe. */
|
||||
const UnicodeSet& getLeadCodePoints() override;
|
||||
|
||||
virtual bool isDisabled(const ParsedNumber& result) const = 0;
|
||||
|
|
|
@ -28,6 +28,11 @@ inline static void putLeadCodePoint(const UnicodeString& input, UnicodeSet* outp
|
|||
}
|
||||
}
|
||||
|
||||
inline static void copyCurrencyCode(UChar* dest, const UChar* src) {
|
||||
uprv_memcpy(dest, src, sizeof(UChar) * 3);
|
||||
dest[3] = 0;
|
||||
}
|
||||
|
||||
|
||||
} // namespace utils
|
||||
} // namespace impl
|
||||
|
|
|
@ -213,6 +213,7 @@ class NumberParserTest : public IntlTest {
|
|||
void testLocaleFi();
|
||||
void testSeriesMatcher();
|
||||
void testCurrencyAnyMatcher();
|
||||
void testAffixPatternMatcher();
|
||||
void testGroupingDisabled();
|
||||
|
||||
void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par = 0);
|
||||
|
|
|
@ -12,6 +12,7 @@
|
|||
#include "unicode/testlog.h"
|
||||
|
||||
#include <cmath>
|
||||
#include <numparse_affixes.h>
|
||||
|
||||
using icu::numparse::impl::unisets::get;
|
||||
|
||||
|
@ -22,6 +23,7 @@ void NumberParserTest::runIndexedTest(int32_t index, UBool exec, const char*& na
|
|||
TESTCASE_AUTO_BEGIN;
|
||||
TESTCASE_AUTO(testBasic);
|
||||
TESTCASE_AUTO(testSeriesMatcher);
|
||||
TESTCASE_AUTO(testAffixPatternMatcher);
|
||||
TESTCASE_AUTO_END;
|
||||
}
|
||||
|
||||
|
@ -165,7 +167,13 @@ void NumberParserTest::testSeriesMatcher() {
|
|||
PercentMatcher m3(symbols);
|
||||
IgnorablesMatcher m4(unisets::DEFAULT_IGNORABLES);
|
||||
|
||||
ArraySeriesMatcher series(new NumberParseMatcher* [5]{&m0, &m1, &m2, &m3, &m4}, 5);
|
||||
ArraySeriesMatcher::MatcherArray matchers(5);
|
||||
matchers[0] = &m0;
|
||||
matchers[1] = &m1;
|
||||
matchers[2] = &m2;
|
||||
matchers[3] = &m3;
|
||||
matchers[4] = &m4;
|
||||
ArraySeriesMatcher series(matchers, 5);
|
||||
|
||||
assertEquals(
|
||||
"Lead set should be equal to lead set of lead matcher",
|
||||
|
@ -203,5 +211,45 @@ void NumberParserTest::testSeriesMatcher() {
|
|||
}
|
||||
}
|
||||
|
||||
void NumberParserTest::testAffixPatternMatcher() {
|
||||
IcuTestErrorCode status(*this, "testAffixPatternMatcher");
|
||||
|
||||
IgnorablesMatcher ignorables(unisets::DEFAULT_IGNORABLES);
|
||||
AffixTokenMatcherFactory factory(u"EUR", u"foo", u"bar", {"en", status}, &ignorables, "en");
|
||||
|
||||
static const struct TestCase {
|
||||
bool exactMatch;
|
||||
const char16_t* affixPattern;
|
||||
int32_t expectedMatcherLength;
|
||||
const char16_t* sampleParseableString;
|
||||
} cases[] = {{false, u"-", 1, u"-"},
|
||||
{false, u"+-%", 5, u"+-%"},
|
||||
{true, u"+-%", 3, u"+-%"},
|
||||
{false, u"ab c", 5, u"a bc"},
|
||||
{true, u"abc", 3, u"abc"},
|
||||
//{false, u"hello-to+this%very¤long‰string", 59, u"hello-to+this%very USD long‰string"}
|
||||
};
|
||||
|
||||
for (auto& cas : cases) {
|
||||
UnicodeString affixPattern(cas.affixPattern);
|
||||
UnicodeString sampleParseableString(cas.sampleParseableString);
|
||||
int parseFlags = cas.exactMatch ? PARSE_FLAG_EXACT_AFFIX : 0;
|
||||
|
||||
bool success;
|
||||
AffixPatternMatcher matcher = AffixPatternMatcher::fromAffixPattern(
|
||||
affixPattern, factory, parseFlags, &success, status);
|
||||
assertTrue("Creation should be successful", success);
|
||||
|
||||
// Check that the matcher has the expected number of children
|
||||
assertEquals(affixPattern + " " + cas.exactMatch, cas.expectedMatcherLength, matcher.length());
|
||||
|
||||
// Check that the matcher works on a sample string
|
||||
StringSegment segment(sampleParseableString, 0);
|
||||
ParsedNumber result;
|
||||
matcher.match(segment, result, status);
|
||||
assertEquals(affixPattern + " " + cas.exactMatch, sampleParseableString.length(), result.charEnd);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#endif
|
||||
|
|
|
@ -78,7 +78,7 @@ public class AffixMatcher implements NumberParseMatcher {
|
|||
public static void createMatchers(
|
||||
AffixPatternProvider patternInfo,
|
||||
NumberParserImpl output,
|
||||
MatcherFactory factory,
|
||||
AffixTokenMatcherFactory factory,
|
||||
IgnorablesMatcher ignorables,
|
||||
int parseFlags) {
|
||||
if (!isInteresting(patternInfo, ignorables, parseFlags)) {
|
||||
|
|
|
@ -15,7 +15,7 @@ public class AffixPatternMatcher extends SeriesMatcher implements AffixUtils.Tok
|
|||
private final String affixPattern;
|
||||
|
||||
// Used during construction only:
|
||||
private MatcherFactory factory;
|
||||
private AffixTokenMatcherFactory factory;
|
||||
private IgnorablesMatcher ignorables;
|
||||
private int lastTypeOrCp;
|
||||
|
||||
|
@ -29,7 +29,7 @@ public class AffixPatternMatcher extends SeriesMatcher implements AffixUtils.Tok
|
|||
*/
|
||||
public static AffixPatternMatcher fromAffixPattern(
|
||||
String affixPattern,
|
||||
MatcherFactory factory,
|
||||
AffixTokenMatcherFactory factory,
|
||||
int parseFlags) {
|
||||
if (affixPattern.isEmpty()) {
|
||||
return null;
|
||||
|
@ -71,10 +71,10 @@ public class AffixPatternMatcher extends SeriesMatcher implements AffixUtils.Tok
|
|||
// Case 1: the token is a symbol.
|
||||
switch (typeOrCp) {
|
||||
case AffixUtils.TYPE_MINUS_SIGN:
|
||||
addMatcher(factory.minusSign(true));
|
||||
addMatcher(factory.minusSign());
|
||||
break;
|
||||
case AffixUtils.TYPE_PLUS_SIGN:
|
||||
addMatcher(factory.plusSign(true));
|
||||
addMatcher(factory.plusSign());
|
||||
break;
|
||||
case AffixUtils.TYPE_PERCENT:
|
||||
addMatcher(factory.percent());
|
||||
|
|
|
@ -11,18 +11,18 @@ import com.ibm.icu.util.ULocale;
|
|||
*
|
||||
* @author sffc
|
||||
*/
|
||||
public class MatcherFactory {
|
||||
public class AffixTokenMatcherFactory {
|
||||
public Currency currency;
|
||||
public DecimalFormatSymbols symbols;
|
||||
public IgnorablesMatcher ignorables;
|
||||
public ULocale locale;
|
||||
|
||||
public MinusSignMatcher minusSign(boolean allowTrailing) {
|
||||
return MinusSignMatcher.getInstance(symbols, allowTrailing);
|
||||
public MinusSignMatcher minusSign() {
|
||||
return MinusSignMatcher.getInstance(symbols, true);
|
||||
}
|
||||
|
||||
public PlusSignMatcher plusSign(boolean allowTrailing) {
|
||||
return PlusSignMatcher.getInstance(symbols, allowTrailing);
|
||||
public PlusSignMatcher plusSign() {
|
||||
return PlusSignMatcher.getInstance(symbols, true);
|
||||
}
|
||||
|
||||
public PercentMatcher percent() {
|
|
@ -74,7 +74,7 @@ public class NumberParserImpl {
|
|||
DecimalFormatSymbols symbols = DecimalFormatSymbols.getInstance(locale);
|
||||
IgnorablesMatcher ignorables = IgnorablesMatcher.DEFAULT;
|
||||
|
||||
MatcherFactory factory = new MatcherFactory();
|
||||
AffixTokenMatcherFactory factory = new AffixTokenMatcherFactory();
|
||||
factory.currency = Currency.getInstance("USD");
|
||||
factory.symbols = symbols;
|
||||
factory.ignorables = ignorables;
|
||||
|
@ -195,7 +195,7 @@ public class NumberParserImpl {
|
|||
|
||||
NumberParserImpl parser = new NumberParserImpl(parseFlags);
|
||||
|
||||
MatcherFactory factory = new MatcherFactory();
|
||||
AffixTokenMatcherFactory factory = new AffixTokenMatcherFactory();
|
||||
factory.currency = currency;
|
||||
factory.symbols = symbols;
|
||||
factory.ignorables = ignorables;
|
||||
|
|
|
@ -9,9 +9,10 @@ import org.junit.Test;
|
|||
|
||||
import com.ibm.icu.impl.number.CustomSymbolCurrency;
|
||||
import com.ibm.icu.impl.number.DecimalFormatProperties;
|
||||
import com.ibm.icu.impl.number.parse.AffixPatternMatcher;
|
||||
import com.ibm.icu.impl.number.parse.AffixTokenMatcherFactory;
|
||||
import com.ibm.icu.impl.number.parse.AnyMatcher;
|
||||
import com.ibm.icu.impl.number.parse.IgnorablesMatcher;
|
||||
import com.ibm.icu.impl.number.parse.MatcherFactory;
|
||||
import com.ibm.icu.impl.number.parse.MinusSignMatcher;
|
||||
import com.ibm.icu.impl.number.parse.NumberParserImpl;
|
||||
import com.ibm.icu.impl.number.parse.ParsedNumber;
|
||||
|
@ -23,6 +24,7 @@ import com.ibm.icu.impl.number.parse.StringSegment;
|
|||
import com.ibm.icu.impl.number.parse.UnicodeSetStaticCache;
|
||||
import com.ibm.icu.impl.number.parse.UnicodeSetStaticCache.Key;
|
||||
import com.ibm.icu.text.DecimalFormatSymbols;
|
||||
import com.ibm.icu.util.Currency;
|
||||
import com.ibm.icu.util.ULocale;
|
||||
|
||||
/**
|
||||
|
@ -227,7 +229,7 @@ public class NumberParserTest {
|
|||
|
||||
@Test
|
||||
public void testCurrencyAnyMatcher() {
|
||||
MatcherFactory factory = new MatcherFactory();
|
||||
AffixTokenMatcherFactory factory = new AffixTokenMatcherFactory();
|
||||
factory.locale = ULocale.ENGLISH;
|
||||
CustomSymbolCurrency currency = new CustomSymbolCurrency("ICU", "IU$", "ICU");
|
||||
factory.currency = currency;
|
||||
|
@ -257,6 +259,45 @@ public class NumberParserTest {
|
|||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testAffixPatternMatcher() {
|
||||
AffixTokenMatcherFactory factory = new AffixTokenMatcherFactory();
|
||||
factory.currency = Currency.getInstance("EUR");
|
||||
factory.symbols = DecimalFormatSymbols.getInstance(ULocale.ENGLISH);
|
||||
factory.ignorables = IgnorablesMatcher.DEFAULT;
|
||||
factory.locale = ULocale.ENGLISH;
|
||||
|
||||
Object[][] cases = {
|
||||
{ false, "-", 1, "-" },
|
||||
{ false, "+-%", 5, "+-%" },
|
||||
{ true, "+-%", 3, "+-%" },
|
||||
{ false, "ab c", 5, "a bc" },
|
||||
{ true, "abc", 3, "abc" },
|
||||
{ false, "hello-to+this%very¤long‰string", 59, "hello-to+this%very USD long‰string" } };
|
||||
|
||||
for (Object[] cas : cases) {
|
||||
boolean exactMatch = (Boolean) cas[0];
|
||||
String affixPattern = (String) cas[1];
|
||||
int expectedMatcherLength = (Integer) cas[2];
|
||||
String sampleParseableString = (String) cas[3];
|
||||
int parseFlags = exactMatch ? ParsingUtils.PARSE_FLAG_EXACT_AFFIX : 0;
|
||||
|
||||
AffixPatternMatcher matcher = AffixPatternMatcher
|
||||
.fromAffixPattern(affixPattern, factory, parseFlags);
|
||||
|
||||
// Check that the matcher has the expected number of children
|
||||
assertEquals(affixPattern + " " + exactMatch, expectedMatcherLength, matcher.length());
|
||||
|
||||
// Check that the matcher works on a sample string
|
||||
StringSegment segment = new StringSegment(sampleParseableString, 0);
|
||||
ParsedNumber result = new ParsedNumber();
|
||||
matcher.match(segment, result);
|
||||
assertEquals(affixPattern + " " + exactMatch,
|
||||
sampleParseableString.length(),
|
||||
result.charEnd);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGroupingDisabled() {
|
||||
DecimalFormatProperties properties = new DecimalFormatProperties();
|
||||
|
|
Loading…
Add table
Reference in a new issue