From e7a42e17f624ac57754b779cbca6ccf2f87d813f Mon Sep 17 00:00:00 2001 From: Shane Carr Date: Sat, 10 Feb 2018 10:01:46 +0000 Subject: [PATCH] ICU-13574 Adding initial AffixPatternMatcher to ICU4C. Not completely safe yet. Still needs work. X-SVN-Rev: 40891 --- icu4c/source/i18n/number_affixutils.cpp | 14 ++ icu4c/source/i18n/number_affixutils.h | 11 ++ icu4c/source/i18n/numparse_affixes.cpp | 139 ++++++++++++++++++ icu4c/source/i18n/numparse_affixes.h | 95 +++++++++++- icu4c/source/i18n/numparse_compositions.cpp | 13 +- icu4c/source/i18n/numparse_compositions.h | 14 +- icu4c/source/i18n/numparse_currency.cpp | 21 +-- icu4c/source/i18n/numparse_currency.h | 5 +- icu4c/source/i18n/numparse_symbols.h | 1 - icu4c/source/i18n/numparse_utils.h | 5 + icu4c/source/test/intltest/numbertest.h | 1 + .../source/test/intltest/numbertest_parse.cpp | 50 ++++++- .../icu/impl/number/parse/AffixMatcher.java | 2 +- .../number/parse/AffixPatternMatcher.java | 8 +- ...ory.java => AffixTokenMatcherFactory.java} | 10 +- .../impl/number/parse/NumberParserImpl.java | 4 +- .../icu/dev/test/number/NumberParserTest.java | 45 +++++- 17 files changed, 401 insertions(+), 37 deletions(-) rename icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/{MatcherFactory.java => AffixTokenMatcherFactory.java} (78%) diff --git a/icu4c/source/i18n/number_affixutils.cpp b/icu4c/source/i18n/number_affixutils.cpp index df4b267af5a..072edbb2fa6 100644 --- a/icu4c/source/i18n/number_affixutils.cpp +++ b/icu4c/source/i18n/number_affixutils.cpp @@ -239,6 +239,20 @@ UnicodeString AffixUtils::replaceType(const CharSequence &affixPattern, AffixPat return output; } +void AffixUtils::iterateWithConsumer(const CharSequence& affixPattern, TokenConsumer& consumer, + UErrorCode& status) { + if (affixPattern.length() == 0) { + return; + }; + AffixTag tag; + while (hasNext(tag, affixPattern)) { + tag = nextToken(tag, affixPattern, status); + if (U_FAILURE(status)) { return; } + consumer.consumeToken(tag.type, tag.codePoint, status); + if (U_FAILURE(status)) { return; } + } +} + AffixTag AffixUtils::nextToken(AffixTag tag, const CharSequence &patternString, UErrorCode &status) { int32_t offset = tag.offset; int32_t state = tag.state; diff --git a/icu4c/source/i18n/number_affixutils.h b/icu4c/source/i18n/number_affixutils.h index fd76c99b975..665a9d84256 100644 --- a/icu4c/source/i18n/number_affixutils.h +++ b/icu4c/source/i18n/number_affixutils.h @@ -46,6 +46,11 @@ struct AffixTag { {} }; +class TokenConsumer { + public: + virtual void consumeToken(AffixPatternType type, UChar32 cp, UErrorCode& status) = 0; +}; + // Exported as U_I18N_API because it is a base class for other exported types class U_I18N_API SymbolProvider { public: @@ -180,6 +185,12 @@ class U_I18N_API AffixUtils { replaceType(const CharSequence &affixPattern, AffixPatternType type, char16_t replacementChar, UErrorCode &status); + /** + * Iterates over the affix pattern, calling the TokenConsumer for each token. + */ + static void iterateWithConsumer(const CharSequence& affixPattern, TokenConsumer& consumer, + UErrorCode& status); + /** * Returns the next token from the affix pattern. * diff --git a/icu4c/source/i18n/numparse_affixes.cpp b/icu4c/source/i18n/numparse_affixes.cpp index 2ac929d37a2..84a7f751633 100644 --- a/icu4c/source/i18n/numparse_affixes.cpp +++ b/icu4c/source/i18n/numparse_affixes.cpp @@ -7,14 +7,153 @@ #include "numparse_types.h" #include "numparse_affixes.h" +#include "numparse_utils.h" +#include "number_utils.h" using namespace icu; using namespace icu::numparse; using namespace icu::numparse::impl; +using namespace icu::number; +using namespace icu::number::impl; +AffixPatternMatcherBuilder::AffixPatternMatcherBuilder(const UnicodeString& pattern, + AffixTokenMatcherFactory& factory, + IgnorablesMatcher* ignorables) + : fMatchersLen(0), + fLastTypeOrCp(0), + fCodePointMatchers(new CodePointMatcher[100]), + fCodePointMatchersLen(0), + fPattern(pattern), + fFactory(factory), + fIgnorables(ignorables) {} + +void AffixPatternMatcherBuilder::consumeToken(AffixPatternType type, UChar32 cp, UErrorCode& status) { + // This is called by AffixUtils.iterateWithConsumer() for each token. + + // Add an ignorables matcher between tokens except between two literals, and don't put two + // ignorables matchers in a row. + if (fIgnorables != nullptr && fMatchersLen > 0 && + (fLastTypeOrCp < 0 || !fIgnorables->getSet()->contains(fLastTypeOrCp))) { + addMatcher(*fIgnorables); + } + + if (type != TYPE_CODEPOINT) { + // Case 1: the token is a symbol. + switch (type) { + case TYPE_MINUS_SIGN: + addMatcher(fFactory.minusSign = {fFactory.dfs, true}); + break; + case TYPE_PLUS_SIGN: + addMatcher(fFactory.plusSign = {fFactory.dfs, true}); + break; + case TYPE_PERCENT: + addMatcher(fFactory.percent = {fFactory.dfs}); + break; + case TYPE_PERMILLE: + addMatcher(fFactory.permille = {fFactory.dfs}); + break; + case TYPE_CURRENCY_SINGLE: + case TYPE_CURRENCY_DOUBLE: + case TYPE_CURRENCY_TRIPLE: + case TYPE_CURRENCY_QUAD: + case TYPE_CURRENCY_QUINT: + // All currency symbols use the same matcher + addMatcher( + fFactory.currency = { + CurrencyNamesMatcher( + fFactory.locale, status), CurrencyCustomMatcher( + fFactory.currencyCode, fFactory.currency1, fFactory.currency2)}); + break; + default: + U_ASSERT(FALSE); + } + + } else if (fIgnorables != nullptr && fIgnorables->getSet()->contains(cp)) { + // Case 2: the token is an ignorable literal. + // No action necessary: the ignorables matcher has already been added. + + } else { + // Case 3: the token is a non-ignorable literal. + // TODO: This is really clunky. Just trying to get something that works. + fCodePointMatchers[fCodePointMatchersLen] = {cp}; + addMatcher(fCodePointMatchers[fCodePointMatchersLen]); + fCodePointMatchersLen++; + } + fLastTypeOrCp = type != TYPE_CODEPOINT ? type : cp; +} + +void AffixPatternMatcherBuilder::addMatcher(NumberParseMatcher& matcher) { + if (fMatchersLen >= fMatchers.getCapacity()) { + fMatchers.resize(fMatchersLen * 2, fMatchersLen); + } + fMatchers[fMatchersLen++] = &matcher; +} + +AffixPatternMatcher AffixPatternMatcherBuilder::build() { + return AffixPatternMatcher(fMatchers, fMatchersLen, fPattern, fCodePointMatchers.orphan()); +} +AffixTokenMatcherFactory::AffixTokenMatcherFactory(const UChar* currencyCode, + const UnicodeString& currency1, + const UnicodeString& currency2, + const DecimalFormatSymbols& dfs, + IgnorablesMatcher* ignorables, const Locale& locale) + : currency1(currency1), currency2(currency2), dfs(dfs), ignorables(ignorables), locale(locale) { + utils::copyCurrencyCode(this->currencyCode, currencyCode); +} + + +CodePointMatcher::CodePointMatcher(UChar32 cp) + : fCp(cp) {} + +bool CodePointMatcher::match(StringSegment& segment, ParsedNumber& result, UErrorCode&) const { + if (segment.matches(fCp)) { + segment.adjustOffsetByCodePoint(); + result.setCharsConsumed(segment); + } + return false; +} + +const UnicodeSet& CodePointMatcher::getLeadCodePoints() { + if (fLocalLeadCodePoints.isNull()) { + auto* leadCodePoints = new UnicodeSet(); + leadCodePoints->add(fCp); + leadCodePoints->freeze(); + fLocalLeadCodePoints.adoptInstead(leadCodePoints); + } + return *fLocalLeadCodePoints; +} + + +AffixPatternMatcher +AffixPatternMatcher::fromAffixPattern(const UnicodeString& affixPattern, AffixTokenMatcherFactory& factory, + parse_flags_t parseFlags, bool* success, UErrorCode& status) { + if (affixPattern.isEmpty()) { + *success = false; + return {}; + } + *success = true; + + IgnorablesMatcher* ignorables; + if (0 != (parseFlags & PARSE_FLAG_EXACT_AFFIX)) { + ignorables = nullptr; + } else { + ignorables = factory.ignorables; + } + + AffixPatternMatcherBuilder builder(affixPattern, factory, ignorables); + AffixUtils::iterateWithConsumer(UnicodeStringCharSequence(affixPattern), builder, status); + return builder.build(); +} + +AffixPatternMatcher::AffixPatternMatcher(MatcherArray& matchers, int32_t matchersLen, + const UnicodeString& pattern, CodePointMatcher* codePointMatchers) + : ArraySeriesMatcher(matchers, matchersLen), + fPattern(pattern), + fCodePointMatchers(codePointMatchers) { +} #endif /* #if !UCONFIG_NO_FORMATTING */ diff --git a/icu4c/source/i18n/numparse_affixes.h b/icu4c/source/i18n/numparse_affixes.h index 677b50cea08..460034e3fa8 100644 --- a/icu4c/source/i18n/numparse_affixes.h +++ b/icu4c/source/i18n/numparse_affixes.h @@ -8,13 +8,104 @@ #define __NUMPARSE_AFFIXES_H__ #include "numparse_types.h" +#include "numparse_symbols.h" +#include "numparse_currency.h" +#include "number_affixutils.h" -U_NAMESPACE_BEGIN -namespace numparse { +U_NAMESPACE_BEGIN namespace numparse { namespace impl { +// Forward-declaration of implementation classes for friending +class AffixPatternMatcherBuilder; +class AffixPatternMatcher; + +class AffixTokenMatcherFactory { + public: + AffixTokenMatcherFactory(const UChar* currencyCode, const UnicodeString& currency1, + const UnicodeString& currency2, const DecimalFormatSymbols& dfs, + IgnorablesMatcher* ignorables, const Locale& locale); + + private: + UChar currencyCode[4]; + const UnicodeString& currency1; + const UnicodeString& currency2; + const DecimalFormatSymbols& dfs; + IgnorablesMatcher* ignorables; + const Locale locale; + + // NOTE: These are default-constructed and should not be used until initialized. + MinusSignMatcher minusSign; + PlusSignMatcher plusSign; + PercentMatcher percent; + PermilleMatcher permille; + CurrencyAnyMatcher currency; + + friend class AffixPatternMatcherBuilder; + friend class AffixPatternMatcher; +}; +class CodePointMatcher : public NumberParseMatcher, public UMemory { + public: + CodePointMatcher() = default; // WARNING: Leaves the object in an unusable state + + CodePointMatcher(UChar32 cp); + + bool match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const override; + + const UnicodeSet& getLeadCodePoints() override; + + private: + UChar32 fCp; +}; + + +class AffixPatternMatcherBuilder : public ::icu::number::impl::TokenConsumer { + public: + AffixPatternMatcherBuilder(const UnicodeString& pattern, AffixTokenMatcherFactory& factory, + IgnorablesMatcher* ignorables); + + void consumeToken(::icu::number::impl::AffixPatternType type, UChar32 cp, UErrorCode& status) override; + + /** NOTE: You can build only once! */ + AffixPatternMatcher build(); + + private: + ArraySeriesMatcher::MatcherArray fMatchers; + int32_t fMatchersLen; + int32_t fLastTypeOrCp; + + LocalArray fCodePointMatchers; + int32_t fCodePointMatchersLen; + + const UnicodeString& fPattern; + AffixTokenMatcherFactory& fFactory; + IgnorablesMatcher* fIgnorables; + + void addMatcher(NumberParseMatcher& matcher); +}; + + +class AffixPatternMatcher : public ArraySeriesMatcher { + public: + static AffixPatternMatcher fromAffixPattern(const UnicodeString& affixPattern, + AffixTokenMatcherFactory& factory, + parse_flags_t parseFlags, bool* success, + UErrorCode& status); + + private: + UnicodeString fPattern; + + // We need to own the variable number of CodePointMatchers. + LocalArray fCodePointMatchers; + + AffixPatternMatcher() = default; // WARNING: Leaves the object in an unusable state + + AffixPatternMatcher(MatcherArray& matchers, int32_t matchersLen, const UnicodeString& pattern, + CodePointMatcher* codePointMatchers); + + friend class AffixPatternMatcherBuilder; +}; } // namespace impl diff --git a/icu4c/source/i18n/numparse_compositions.cpp b/icu4c/source/i18n/numparse_compositions.cpp index 5d4a92b988c..138b45c6dac 100644 --- a/icu4c/source/i18n/numparse_compositions.cpp +++ b/icu4c/source/i18n/numparse_compositions.cpp @@ -87,8 +87,13 @@ void SeriesMatcher::postProcess(ParsedNumber& result) const { } -ArraySeriesMatcher::ArraySeriesMatcher(NumberParseMatcher** matchers, int32_t matchersLen) - : fMatchers(matchers), fMatchersLen(matchersLen) {} +ArraySeriesMatcher::ArraySeriesMatcher() + : fMatchersLen(0) { +} + +ArraySeriesMatcher::ArraySeriesMatcher(MatcherArray& matchers, int32_t matchersLen) + : fMatchers(std::move(matchers)), fMatchersLen(matchersLen) { +} const UnicodeSet& ArraySeriesMatcher::getLeadCodePoints() { // SeriesMatchers are never allowed to start with a Flexible matcher. @@ -96,6 +101,10 @@ const UnicodeSet& ArraySeriesMatcher::getLeadCodePoints() { return fMatchers[0]->getLeadCodePoints(); } +int32_t ArraySeriesMatcher::length() const { + return fMatchersLen; +} + const NumberParseMatcher* const* ArraySeriesMatcher::begin() const { return fMatchers.getAlias(); } diff --git a/icu4c/source/i18n/numparse_compositions.h b/icu4c/source/i18n/numparse_compositions.h index b52bb2fd8af..51501a805c2 100644 --- a/icu4c/source/i18n/numparse_compositions.h +++ b/icu4c/source/i18n/numparse_compositions.h @@ -63,6 +63,8 @@ class SeriesMatcher : public CompositionMatcher { void postProcess(ParsedNumber& result) const override; + virtual int32_t length() const = 0; + protected: // No construction except by subclasses! SeriesMatcher() = default; @@ -76,18 +78,24 @@ class SeriesMatcher : public CompositionMatcher { */ class ArraySeriesMatcher : public SeriesMatcher { public: - /** The array is adopted, but NOT the matchers inside the array. */ - ArraySeriesMatcher(NumberParseMatcher** matchers, int32_t matchersLen); + ArraySeriesMatcher(); // WARNING: Leaves the object in an unusable state + + typedef MaybeStackArray MatcherArray; + + /** The array is std::move'd */ + ArraySeriesMatcher(MatcherArray& matchers, int32_t matchersLen); const UnicodeSet& getLeadCodePoints() override; + int32_t length() const override; + protected: const NumberParseMatcher* const* begin() const override; const NumberParseMatcher* const* end() const override; private: - LocalArray fMatchers; + MatcherArray fMatchers; int32_t fMatchersLen; }; diff --git a/icu4c/source/i18n/numparse_currency.cpp b/icu4c/source/i18n/numparse_currency.cpp index 90b6bed6dd0..1cd730214af 100644 --- a/icu4c/source/i18n/numparse_currency.cpp +++ b/icu4c/source/i18n/numparse_currency.cpp @@ -16,16 +16,6 @@ using namespace icu::numparse; using namespace icu::numparse::impl; -namespace { - -inline void copyCurrencyCode(UChar* dest, const UChar* src) { - uprv_memcpy(dest, src, sizeof(UChar) * 3); - dest[3] = 0; -} - -} - - CurrencyNamesMatcher::CurrencyNamesMatcher(const Locale& locale, UErrorCode& status) : fLocaleName(locale.getName(), -1, status) {} @@ -80,7 +70,7 @@ const UnicodeSet& CurrencyNamesMatcher::getLeadCodePoints() { CurrencyCustomMatcher::CurrencyCustomMatcher(const char16_t* currencyCode, const UnicodeString& currency1, const UnicodeString& currency2) : fCurrency1(currency1), fCurrency2(currency2) { - copyCurrencyCode(fCurrencyCode, currencyCode); + utils::copyCurrencyCode(fCurrencyCode, currencyCode); } bool CurrencyCustomMatcher::match(StringSegment& segment, ParsedNumber& result, UErrorCode&) const { @@ -90,14 +80,14 @@ bool CurrencyCustomMatcher::match(StringSegment& segment, ParsedNumber& result, int overlap1 = segment.getCommonPrefixLength(fCurrency1); if (overlap1 == fCurrency1.length()) { - copyCurrencyCode(result.currencyCode, fCurrencyCode); + utils::copyCurrencyCode(result.currencyCode, fCurrencyCode); segment.adjustOffset(overlap1); result.setCharsConsumed(segment); } int overlap2 = segment.getCommonPrefixLength(fCurrency2); if (overlap2 == fCurrency2.length()) { - copyCurrencyCode(result.currencyCode, fCurrencyCode); + utils::copyCurrencyCode(result.currencyCode, fCurrencyCode); segment.adjustOffset(overlap2); result.setCharsConsumed(segment); } @@ -117,6 +107,11 @@ const UnicodeSet& CurrencyCustomMatcher::getLeadCodePoints() { } +CurrencyAnyMatcher::CurrencyAnyMatcher() { + fMatcherArray[0] = &fNamesMatcher; + fMatcherArray[1] = &fCustomMatcher; +} + CurrencyAnyMatcher::CurrencyAnyMatcher(CurrencyNamesMatcher namesMatcher, CurrencyCustomMatcher customMatcher) : fNamesMatcher(std::move(namesMatcher)), fCustomMatcher(std::move(customMatcher)) { diff --git a/icu4c/source/i18n/numparse_currency.h b/icu4c/source/i18n/numparse_currency.h index f5f56c86004..bbfa15094a8 100644 --- a/icu4c/source/i18n/numparse_currency.h +++ b/icu4c/source/i18n/numparse_currency.h @@ -42,6 +42,8 @@ class CurrencyNamesMatcher : public NumberParseMatcher, public UMemory { class CurrencyCustomMatcher : public NumberParseMatcher, public UMemory { public: + CurrencyCustomMatcher() = default; // WARNING: Leaves the object in an unusable state + CurrencyCustomMatcher(const char16_t* currencyCode, const UnicodeString& currency1, const UnicodeString& currency2); @@ -61,7 +63,8 @@ class CurrencyCustomMatcher : public NumberParseMatcher, public UMemory { */ class CurrencyAnyMatcher : public AnyMatcher, public UMemory { public: - /** Calls std::move on the two arguments. */ + CurrencyAnyMatcher(); // WARNING: Leaves the object in an unusable state + CurrencyAnyMatcher(CurrencyNamesMatcher namesMatcher, CurrencyCustomMatcher customMatcher); const UnicodeSet& getLeadCodePoints() override; diff --git a/icu4c/source/i18n/numparse_symbols.h b/icu4c/source/i18n/numparse_symbols.h index 289b8902d96..c06724fbe72 100644 --- a/icu4c/source/i18n/numparse_symbols.h +++ b/icu4c/source/i18n/numparse_symbols.h @@ -28,7 +28,6 @@ class SymbolMatcher : public NumberParseMatcher, public UMemory { bool match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const override; - /** NOTE: This method is not guaranteed to be thread-safe. */ const UnicodeSet& getLeadCodePoints() override; virtual bool isDisabled(const ParsedNumber& result) const = 0; diff --git a/icu4c/source/i18n/numparse_utils.h b/icu4c/source/i18n/numparse_utils.h index a25f9ef9df7..590c7943f32 100644 --- a/icu4c/source/i18n/numparse_utils.h +++ b/icu4c/source/i18n/numparse_utils.h @@ -28,6 +28,11 @@ inline static void putLeadCodePoint(const UnicodeString& input, UnicodeSet* outp } } +inline static void copyCurrencyCode(UChar* dest, const UChar* src) { + uprv_memcpy(dest, src, sizeof(UChar) * 3); + dest[3] = 0; +} + } // namespace utils } // namespace impl diff --git a/icu4c/source/test/intltest/numbertest.h b/icu4c/source/test/intltest/numbertest.h index 945d76d9b32..5d56dab2066 100644 --- a/icu4c/source/test/intltest/numbertest.h +++ b/icu4c/source/test/intltest/numbertest.h @@ -213,6 +213,7 @@ class NumberParserTest : public IntlTest { void testLocaleFi(); void testSeriesMatcher(); void testCurrencyAnyMatcher(); + void testAffixPatternMatcher(); void testGroupingDisabled(); void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par = 0); diff --git a/icu4c/source/test/intltest/numbertest_parse.cpp b/icu4c/source/test/intltest/numbertest_parse.cpp index 1dbf73a3d16..776223044bd 100644 --- a/icu4c/source/test/intltest/numbertest_parse.cpp +++ b/icu4c/source/test/intltest/numbertest_parse.cpp @@ -12,6 +12,7 @@ #include "unicode/testlog.h" #include +#include using icu::numparse::impl::unisets::get; @@ -22,6 +23,7 @@ void NumberParserTest::runIndexedTest(int32_t index, UBool exec, const char*& na TESTCASE_AUTO_BEGIN; TESTCASE_AUTO(testBasic); TESTCASE_AUTO(testSeriesMatcher); + TESTCASE_AUTO(testAffixPatternMatcher); TESTCASE_AUTO_END; } @@ -165,7 +167,13 @@ void NumberParserTest::testSeriesMatcher() { PercentMatcher m3(symbols); IgnorablesMatcher m4(unisets::DEFAULT_IGNORABLES); - ArraySeriesMatcher series(new NumberParseMatcher* [5]{&m0, &m1, &m2, &m3, &m4}, 5); + ArraySeriesMatcher::MatcherArray matchers(5); + matchers[0] = &m0; + matchers[1] = &m1; + matchers[2] = &m2; + matchers[3] = &m3; + matchers[4] = &m4; + ArraySeriesMatcher series(matchers, 5); assertEquals( "Lead set should be equal to lead set of lead matcher", @@ -203,5 +211,45 @@ void NumberParserTest::testSeriesMatcher() { } } +void NumberParserTest::testAffixPatternMatcher() { + IcuTestErrorCode status(*this, "testAffixPatternMatcher"); + + IgnorablesMatcher ignorables(unisets::DEFAULT_IGNORABLES); + AffixTokenMatcherFactory factory(u"EUR", u"foo", u"bar", {"en", status}, &ignorables, "en"); + + static const struct TestCase { + bool exactMatch; + const char16_t* affixPattern; + int32_t expectedMatcherLength; + const char16_t* sampleParseableString; + } cases[] = {{false, u"-", 1, u"-"}, + {false, u"+-%", 5, u"+-%"}, + {true, u"+-%", 3, u"+-%"}, + {false, u"ab c", 5, u"a bc"}, + {true, u"abc", 3, u"abc"}, + //{false, u"hello-to+this%very¤long‰string", 59, u"hello-to+this%very USD long‰string"} + }; + + for (auto& cas : cases) { + UnicodeString affixPattern(cas.affixPattern); + UnicodeString sampleParseableString(cas.sampleParseableString); + int parseFlags = cas.exactMatch ? PARSE_FLAG_EXACT_AFFIX : 0; + + bool success; + AffixPatternMatcher matcher = AffixPatternMatcher::fromAffixPattern( + affixPattern, factory, parseFlags, &success, status); + assertTrue("Creation should be successful", success); + + // Check that the matcher has the expected number of children + assertEquals(affixPattern + " " + cas.exactMatch, cas.expectedMatcherLength, matcher.length()); + + // Check that the matcher works on a sample string + StringSegment segment(sampleParseableString, 0); + ParsedNumber result; + matcher.match(segment, result, status); + assertEquals(affixPattern + " " + cas.exactMatch, sampleParseableString.length(), result.charEnd); + } +} + #endif diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/AffixMatcher.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/AffixMatcher.java index 5104e29b9da..6fccdc29321 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/AffixMatcher.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/AffixMatcher.java @@ -78,7 +78,7 @@ public class AffixMatcher implements NumberParseMatcher { public static void createMatchers( AffixPatternProvider patternInfo, NumberParserImpl output, - MatcherFactory factory, + AffixTokenMatcherFactory factory, IgnorablesMatcher ignorables, int parseFlags) { if (!isInteresting(patternInfo, ignorables, parseFlags)) { diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/AffixPatternMatcher.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/AffixPatternMatcher.java index 43d3888579a..770201533e6 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/AffixPatternMatcher.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/AffixPatternMatcher.java @@ -15,7 +15,7 @@ public class AffixPatternMatcher extends SeriesMatcher implements AffixUtils.Tok private final String affixPattern; // Used during construction only: - private MatcherFactory factory; + private AffixTokenMatcherFactory factory; private IgnorablesMatcher ignorables; private int lastTypeOrCp; @@ -29,7 +29,7 @@ public class AffixPatternMatcher extends SeriesMatcher implements AffixUtils.Tok */ public static AffixPatternMatcher fromAffixPattern( String affixPattern, - MatcherFactory factory, + AffixTokenMatcherFactory factory, int parseFlags) { if (affixPattern.isEmpty()) { return null; @@ -71,10 +71,10 @@ public class AffixPatternMatcher extends SeriesMatcher implements AffixUtils.Tok // Case 1: the token is a symbol. switch (typeOrCp) { case AffixUtils.TYPE_MINUS_SIGN: - addMatcher(factory.minusSign(true)); + addMatcher(factory.minusSign()); break; case AffixUtils.TYPE_PLUS_SIGN: - addMatcher(factory.plusSign(true)); + addMatcher(factory.plusSign()); break; case AffixUtils.TYPE_PERCENT: addMatcher(factory.percent()); diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/MatcherFactory.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/AffixTokenMatcherFactory.java similarity index 78% rename from icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/MatcherFactory.java rename to icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/AffixTokenMatcherFactory.java index 63c37b916ed..142b29faefd 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/MatcherFactory.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/AffixTokenMatcherFactory.java @@ -11,18 +11,18 @@ import com.ibm.icu.util.ULocale; * * @author sffc */ -public class MatcherFactory { +public class AffixTokenMatcherFactory { public Currency currency; public DecimalFormatSymbols symbols; public IgnorablesMatcher ignorables; public ULocale locale; - public MinusSignMatcher minusSign(boolean allowTrailing) { - return MinusSignMatcher.getInstance(symbols, allowTrailing); + public MinusSignMatcher minusSign() { + return MinusSignMatcher.getInstance(symbols, true); } - public PlusSignMatcher plusSign(boolean allowTrailing) { - return PlusSignMatcher.getInstance(symbols, allowTrailing); + public PlusSignMatcher plusSign() { + return PlusSignMatcher.getInstance(symbols, true); } public PercentMatcher percent() { diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/NumberParserImpl.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/NumberParserImpl.java index ed75d2d514e..5698472fa98 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/NumberParserImpl.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/NumberParserImpl.java @@ -74,7 +74,7 @@ public class NumberParserImpl { DecimalFormatSymbols symbols = DecimalFormatSymbols.getInstance(locale); IgnorablesMatcher ignorables = IgnorablesMatcher.DEFAULT; - MatcherFactory factory = new MatcherFactory(); + AffixTokenMatcherFactory factory = new AffixTokenMatcherFactory(); factory.currency = Currency.getInstance("USD"); factory.symbols = symbols; factory.ignorables = ignorables; @@ -195,7 +195,7 @@ public class NumberParserImpl { NumberParserImpl parser = new NumberParserImpl(parseFlags); - MatcherFactory factory = new MatcherFactory(); + AffixTokenMatcherFactory factory = new AffixTokenMatcherFactory(); factory.currency = currency; factory.symbols = symbols; factory.ignorables = ignorables; diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/NumberParserTest.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/NumberParserTest.java index 41312d8399a..c1e5eaf8422 100644 --- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/NumberParserTest.java +++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/NumberParserTest.java @@ -9,9 +9,10 @@ import org.junit.Test; import com.ibm.icu.impl.number.CustomSymbolCurrency; import com.ibm.icu.impl.number.DecimalFormatProperties; +import com.ibm.icu.impl.number.parse.AffixPatternMatcher; +import com.ibm.icu.impl.number.parse.AffixTokenMatcherFactory; import com.ibm.icu.impl.number.parse.AnyMatcher; import com.ibm.icu.impl.number.parse.IgnorablesMatcher; -import com.ibm.icu.impl.number.parse.MatcherFactory; import com.ibm.icu.impl.number.parse.MinusSignMatcher; import com.ibm.icu.impl.number.parse.NumberParserImpl; import com.ibm.icu.impl.number.parse.ParsedNumber; @@ -23,6 +24,7 @@ import com.ibm.icu.impl.number.parse.StringSegment; import com.ibm.icu.impl.number.parse.UnicodeSetStaticCache; import com.ibm.icu.impl.number.parse.UnicodeSetStaticCache.Key; import com.ibm.icu.text.DecimalFormatSymbols; +import com.ibm.icu.util.Currency; import com.ibm.icu.util.ULocale; /** @@ -227,7 +229,7 @@ public class NumberParserTest { @Test public void testCurrencyAnyMatcher() { - MatcherFactory factory = new MatcherFactory(); + AffixTokenMatcherFactory factory = new AffixTokenMatcherFactory(); factory.locale = ULocale.ENGLISH; CustomSymbolCurrency currency = new CustomSymbolCurrency("ICU", "IU$", "ICU"); factory.currency = currency; @@ -257,6 +259,45 @@ public class NumberParserTest { } } + @Test + public void testAffixPatternMatcher() { + AffixTokenMatcherFactory factory = new AffixTokenMatcherFactory(); + factory.currency = Currency.getInstance("EUR"); + factory.symbols = DecimalFormatSymbols.getInstance(ULocale.ENGLISH); + factory.ignorables = IgnorablesMatcher.DEFAULT; + factory.locale = ULocale.ENGLISH; + + Object[][] cases = { + { false, "-", 1, "-" }, + { false, "+-%", 5, "+-%" }, + { true, "+-%", 3, "+-%" }, + { false, "ab c", 5, "a bc" }, + { true, "abc", 3, "abc" }, + { false, "hello-to+this%very¤long‰string", 59, "hello-to+this%very USD long‰string" } }; + + for (Object[] cas : cases) { + boolean exactMatch = (Boolean) cas[0]; + String affixPattern = (String) cas[1]; + int expectedMatcherLength = (Integer) cas[2]; + String sampleParseableString = (String) cas[3]; + int parseFlags = exactMatch ? ParsingUtils.PARSE_FLAG_EXACT_AFFIX : 0; + + AffixPatternMatcher matcher = AffixPatternMatcher + .fromAffixPattern(affixPattern, factory, parseFlags); + + // Check that the matcher has the expected number of children + assertEquals(affixPattern + " " + exactMatch, expectedMatcherLength, matcher.length()); + + // Check that the matcher works on a sample string + StringSegment segment = new StringSegment(sampleParseableString, 0); + ParsedNumber result = new ParsedNumber(); + matcher.match(segment, result); + assertEquals(affixPattern + " " + exactMatch, + sampleParseableString.length(), + result.charEnd); + } + } + @Test public void testGroupingDisabled() { DecimalFormatProperties properties = new DecimalFormatProperties();