ICU-13574 Adding composition matchers (SeriesMatcher and AnyMatcher) to ICU4C in preparation for affix matchers. Also re-working memory management in getLeadCodePoints().

X-SVN-Rev: 40890
2025-04-07 06:25:30 +00:00 · 2018-02-10 06:36:07 +00:00 · 2018-02-10 06:36:07 +00:00 · 513f123a8c
commit 513f123a8c
parent 852897ba2c
25 changed files with 596 additions and 79 deletions
--- a/icu4c/source/i18n/Makefile.in
+++ b/icu4c/source/i18n/Makefile.in
@ -110,7 +110,7 @@ number_padding.o number_patternmodifier.o number_patternstring.o \
 number_rounding.o number_scientific.o number_stringbuilder.o \
 numparse_stringsegment.o numparse_unisets.o numparse_parsednumber.o \
 numparse_impl.o numparse_symbols.o numparse_decimal.o numparse_scientific.o \
-numparse_currency.o
+numparse_currency.o numparse_affixes.o numparse_compositions.o


 ## Header files to install
--- a/icu4c/source/i18n/numparse_affixes.cpp
+++ b/icu4c/source/i18n/numparse_affixes.cpp
@ -0,0 +1,20 @@
+// © 2018 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_FORMATTING && !UPRV_INCOMPLETE_CPP11_SUPPORT
+
+#include "numparse_types.h"
+#include "numparse_affixes.h"
+
+using namespace icu;
+using namespace icu::numparse;
+using namespace icu::numparse::impl;
+
+
+
+
+
+
+#endif /* #if !UCONFIG_NO_FORMATTING */
--- a/icu4c/source/i18n/numparse_affixes.h
+++ b/icu4c/source/i18n/numparse_affixes.h
@ -0,0 +1,25 @@
+// © 2018 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_FORMATTING && !UPRV_INCOMPLETE_CPP11_SUPPORT
+#ifndef __NUMPARSE_AFFIXES_H__
+#define __NUMPARSE_AFFIXES_H__
+
+#include "numparse_types.h"
+
+U_NAMESPACE_BEGIN
+namespace numparse {
+namespace impl {
+
+
+
+
+
+} // namespace impl
+} // namespace numparse
+U_NAMESPACE_END
+
+#endif //__NUMPARSE_AFFIXES_H__
+#endif /* #if !UCONFIG_NO_FORMATTING */
--- a/icu4c/source/i18n/numparse_compositions.cpp
+++ b/icu4c/source/i18n/numparse_compositions.cpp
@ -0,0 +1,108 @@
+// © 2018 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_FORMATTING && !UPRV_INCOMPLETE_CPP11_SUPPORT
+
+#include "numparse_types.h"
+#include "numparse_compositions.h"
+#include "unicode/uniset.h"
+
+using namespace icu;
+using namespace icu::numparse;
+using namespace icu::numparse::impl;
+
+
+bool AnyMatcher::match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const {
+    int32_t initialOffset = segment.getOffset();
+    bool maybeMore = false;
+
+    // NOTE: The range-based for loop calls the virtual begin() and end() methods.
+    for (auto* matcher : *this) {
+        maybeMore = maybeMore || matcher->match(segment, result, status);
+        if (segment.getOffset() != initialOffset) {
+            // Match succeeded.
+            // NOTE: Except for a couple edge cases, if a matcher accepted string A, then it will
+            // accept any string starting with A. Therefore, there is no possibility that matchers
+            // later in the list may be evaluated on longer strings, and we can exit the loop here.
+            break;
+        }
+    }
+
+    // None of the matchers succeeded.
+    return maybeMore;
+}
+
+void AnyMatcher::postProcess(ParsedNumber& result) const {
+    // NOTE: The range-based for loop calls the virtual begin() and end() methods.
+    for (auto* matcher : *this) {
+        matcher->postProcess(result);
+    }
+}
+
+
+bool SeriesMatcher::match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const {
+    ParsedNumber backup(result);
+
+    int32_t initialOffset = segment.getOffset();
+    bool maybeMore = true;
+    for (auto* it = begin(); it < end();) {
+        const NumberParseMatcher* matcher = *it;
+        int matcherOffset = segment.getOffset();
+        if (segment.length() != 0) {
+            maybeMore = matcher->match(segment, result, status);
+        } else {
+            // Nothing for this matcher to match; ask for more.
+            maybeMore = true;
+        }
+
+        bool success = (segment.getOffset() != matcherOffset);
+        bool isFlexible = matcher->isFlexible();
+        if (success && isFlexible) {
+            // Match succeeded, and this is a flexible matcher. Re-run it.
+        } else if (success) {
+            // Match succeeded, and this is NOT a flexible matcher. Proceed to the next matcher.
+            it++;
+        } else if (isFlexible) {
+            // Match failed, and this is a flexible matcher. Try again with the next matcher.
+            it++;
+        } else {
+            // Match failed, and this is NOT a flexible matcher. Exit.
+            segment.setOffset(initialOffset);
+            result = backup;
+            return maybeMore;
+        }
+    }
+
+    // All matchers in the series succeeded.
+    return maybeMore;
+}
+
+void SeriesMatcher::postProcess(ParsedNumber& result) const {
+    // NOTE: The range-based for loop calls the virtual begin() and end() methods.
+    for (auto* matcher : *this) {
+        matcher->postProcess(result);
+    }
+}
+
+
+ArraySeriesMatcher::ArraySeriesMatcher(NumberParseMatcher** matchers, int32_t matchersLen)
+        : fMatchers(matchers), fMatchersLen(matchersLen) {}
+
+const UnicodeSet& ArraySeriesMatcher::getLeadCodePoints() {
+    // SeriesMatchers are never allowed to start with a Flexible matcher.
+    U_ASSERT(!fMatchers[0]->isFlexible());
+    return fMatchers[0]->getLeadCodePoints();
+}
+
+const NumberParseMatcher* const* ArraySeriesMatcher::begin() const {
+    return fMatchers.getAlias();
+}
+
+const NumberParseMatcher* const* ArraySeriesMatcher::end() const {
+    return fMatchers.getAlias() + fMatchersLen;
+}
+
+
+#endif /* #if !UCONFIG_NO_FORMATTING */
--- a/icu4c/source/i18n/numparse_compositions.h
+++ b/icu4c/source/i18n/numparse_compositions.h
@ -0,0 +1,100 @@
+// © 2018 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_FORMATTING && !UPRV_INCOMPLETE_CPP11_SUPPORT
+#ifndef __SOURCE_NUMPARSE_COMPOSITIONS__
+#define __SOURCE_NUMPARSE_COMPOSITIONS__
+
+#include "numparse_types.h"
+
+U_NAMESPACE_BEGIN namespace numparse {
+namespace impl {
+
+
+/**
+ * Base class for AnyMatcher and SeriesMatcher.
+ */
+class CompositionMatcher : public NumberParseMatcher {
+  protected:
+    // No construction except by subclasses!
+    CompositionMatcher() = default;
+
+    // To be overridden by subclasses (used for iteration):
+    virtual const NumberParseMatcher* const* begin() const = 0;
+
+    // To be overridden by subclasses (used for iteration):
+    virtual const NumberParseMatcher* const* end() const = 0;
+};
+
+
+/**
+ * Composes a number of matchers, and succeeds if any of the matchers succeed. Always greedily chooses
+ * the first matcher in the list to succeed.
+ *
+ * NOTE: In C++, this is a base class, unlike ICU4J, which uses a factory-style interface.
+ *
+ * @author sffc
+ * @see SeriesMatcher
+ */
+class AnyMatcher : public CompositionMatcher {
+  public:
+    bool match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const override;
+
+    void postProcess(ParsedNumber& result) const override;
+
+  protected:
+    // No construction except by subclasses!
+    AnyMatcher() = default;
+};
+
+
+/**
+ * Composes a number of matchers, running one after another. Matches the input string only if all of the
+ * matchers in the series succeed. Performs greedy matches within the context of the series.
+ *
+ * @author sffc
+ * @see AnyMatcher
+ */
+class SeriesMatcher : public CompositionMatcher {
+  public:
+    bool match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const override;
+
+    void postProcess(ParsedNumber& result) const override;
+
+  protected:
+    // No construction except by subclasses!
+    SeriesMatcher() = default;
+};
+
+
+/**
+ * An implementation of SeriesMatcher that references an array of matchers.
+ *
+ * The object adopts the array, but NOT the matchers contained inside the array.
+ */
+class ArraySeriesMatcher : public SeriesMatcher {
+  public:
+    /** The array is adopted, but NOT the matchers inside the array. */
+    ArraySeriesMatcher(NumberParseMatcher** matchers, int32_t matchersLen);
+
+    const UnicodeSet& getLeadCodePoints() override;
+
+  protected:
+    const NumberParseMatcher* const* begin() const override;
+
+    const NumberParseMatcher* const* end() const override;
+
+  private:
+    LocalArray<NumberParseMatcher*> fMatchers;
+    int32_t fMatchersLen;
+};
+
+
+} // namespace impl
+} // namespace numparse
+U_NAMESPACE_END
+
+#endif //__SOURCE_NUMPARSE_COMPOSITIONS__
+#endif /* #if !UCONFIG_NO_FORMATTING */
--- a/icu4c/source/i18n/numparse_currency.cpp
+++ b/icu4c/source/i18n/numparse_currency.cpp
@ -9,12 +9,23 @@
 #include "numparse_currency.h"
 #include "ucurrimp.h"
 #include "unicode/errorcode.h"
+#include "numparse_utils.h"

 using namespace icu;
 using namespace icu::numparse;
 using namespace icu::numparse::impl;


+namespace {
+
+inline void copyCurrencyCode(UChar* dest, const UChar* src) {
+    uprv_memcpy(dest, src, sizeof(UChar) * 3);
+    dest[3] = 0;
+}
+
+}
+
+
 CurrencyNamesMatcher::CurrencyNamesMatcher(const Locale& locale, UErrorCode& status)
        : fLocaleName(locale.getName(), -1, status) {}

@ -52,15 +63,84 @@ bool CurrencyNamesMatcher::match(StringSegment& segment, ParsedNumber& result, U
    return partialMatch;
 }

-const UnicodeSet* CurrencyNamesMatcher::getLeadCodePoints() const {
-    ErrorCode status;
-    UnicodeSet* leadCodePoints = new UnicodeSet();
-    uprv_currencyLeads(fLocaleName.data(), *leadCodePoints, status);
-    // Always apply case mapping closure for currencies
-    leadCodePoints->closeOver(USET_ADD_CASE_MAPPINGS);
-    leadCodePoints->freeze();
+const UnicodeSet& CurrencyNamesMatcher::getLeadCodePoints() {
+    if (fLocalLeadCodePoints.isNull()) {
+        ErrorCode status;
+        auto* leadCodePoints = new UnicodeSet();
+        uprv_currencyLeads(fLocaleName.data(), *leadCodePoints, status);
+        // Always apply case mapping closure for currencies
+        leadCodePoints->closeOver(USET_ADD_CASE_MAPPINGS);
+        leadCodePoints->freeze();
+        fLocalLeadCodePoints.adoptInstead(leadCodePoints);
+    }
+    return *fLocalLeadCodePoints;
+}

-    return leadCodePoints;
+
+CurrencyCustomMatcher::CurrencyCustomMatcher(const char16_t* currencyCode, const UnicodeString& currency1,
+                                             const UnicodeString& currency2)
+        : fCurrency1(currency1), fCurrency2(currency2) {
+    copyCurrencyCode(fCurrencyCode, currencyCode);
+}
+
+bool CurrencyCustomMatcher::match(StringSegment& segment, ParsedNumber& result, UErrorCode&) const {
+    if (result.currencyCode[0] != 0) {
+        return false;
+    }
+
+    int overlap1 = segment.getCommonPrefixLength(fCurrency1);
+    if (overlap1 == fCurrency1.length()) {
+        copyCurrencyCode(result.currencyCode, fCurrencyCode);
+        segment.adjustOffset(overlap1);
+        result.setCharsConsumed(segment);
+    }
+
+    int overlap2 = segment.getCommonPrefixLength(fCurrency2);
+    if (overlap2 == fCurrency2.length()) {
+        copyCurrencyCode(result.currencyCode, fCurrencyCode);
+        segment.adjustOffset(overlap2);
+        result.setCharsConsumed(segment);
+    }
+
+    return overlap1 == segment.length() || overlap2 == segment.length();
+}
+
+const UnicodeSet& CurrencyCustomMatcher::getLeadCodePoints() {
+    if (fLocalLeadCodePoints.isNull()) {
+        auto* leadCodePoints = new UnicodeSet();
+        utils::putLeadCodePoint(fCurrency1, leadCodePoints);
+        utils::putLeadCodePoint(fCurrency2, leadCodePoints);
+        leadCodePoints->freeze();
+        fLocalLeadCodePoints.adoptInstead(leadCodePoints);
+    }
+    return *fLocalLeadCodePoints;
+}
+
+
+CurrencyAnyMatcher::CurrencyAnyMatcher(CurrencyNamesMatcher namesMatcher,
+                                       CurrencyCustomMatcher customMatcher)
+        : fNamesMatcher(std::move(namesMatcher)), fCustomMatcher(std::move(customMatcher)) {
+    fMatcherArray[0] = &fNamesMatcher;
+    fMatcherArray[1] = &fCustomMatcher;
+}
+
+const UnicodeSet& CurrencyAnyMatcher::getLeadCodePoints() {
+    if (fLocalLeadCodePoints.isNull()) {
+        auto* leadCodePoints = new UnicodeSet();
+        leadCodePoints->addAll(fNamesMatcher.getLeadCodePoints());
+        leadCodePoints->addAll(fCustomMatcher.getLeadCodePoints());
+        leadCodePoints->freeze();
+        fLocalLeadCodePoints.adoptInstead(leadCodePoints);
+    }
+    return *fLocalLeadCodePoints;
+}
+
+const NumberParseMatcher* const* CurrencyAnyMatcher::begin() const {
+    return fMatcherArray;
+}
+
+const NumberParseMatcher* const* CurrencyAnyMatcher::end() const {
+    return fMatcherArray + 2;
 }


--- a/icu4c/source/i18n/numparse_currency.h
+++ b/icu4c/source/i18n/numparse_currency.h
@ -8,6 +8,7 @@
 #define __NUMPARSE_CURRENCY_H__

 #include "numparse_types.h"
+#include "numparse_compositions.h"
 #include "charstr.h"

 U_NAMESPACE_BEGIN namespace numparse {
@ -29,7 +30,7 @@ class CurrencyNamesMatcher : public NumberParseMatcher, public UMemory {

    bool match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const override;

-    const UnicodeSet* getLeadCodePoints() const override;
+    const UnicodeSet& getLeadCodePoints() override;

  private:
    // We could use Locale instead of CharString here, but
@ -39,6 +40,45 @@ class CurrencyNamesMatcher : public NumberParseMatcher, public UMemory {
 };


+class CurrencyCustomMatcher : public NumberParseMatcher, public UMemory {
+  public:
+    CurrencyCustomMatcher(const char16_t* currencyCode, const UnicodeString& currency1,
+                          const UnicodeString& currency2);
+
+    bool match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const override;
+
+    const UnicodeSet& getLeadCodePoints() override;
+
+  private:
+    UChar fCurrencyCode[4];
+    UnicodeString fCurrency1;
+    UnicodeString fCurrency2;
+};
+
+
+/**
+ * An implementation of AnyMatcher, allowing for either currency data or locale currency matches.
+ */
+class CurrencyAnyMatcher : public AnyMatcher, public UMemory {
+  public:
+    /** Calls std::move on the two arguments. */
+    CurrencyAnyMatcher(CurrencyNamesMatcher namesMatcher, CurrencyCustomMatcher customMatcher);
+
+    const UnicodeSet& getLeadCodePoints() override;
+
+  protected:
+    const NumberParseMatcher* const* begin() const override;
+
+    const NumberParseMatcher* const* end() const override;
+
+  private:
+    CurrencyNamesMatcher fNamesMatcher;
+    CurrencyCustomMatcher fCustomMatcher;
+
+    const NumberParseMatcher* fMatcherArray[2];
+};
+
+
 } // namespace impl
 } // namespace numparse
 U_NAMESPACE_END
--- a/icu4c/source/i18n/numparse_decimal.cpp
+++ b/icu4c/source/i18n/numparse_decimal.cpp
@ -291,22 +291,25 @@ bool DecimalMatcher::match(StringSegment& segment, ParsedNumber& result, int8_t
    return segment.length() == 0 || hasPartialPrefix;
 }

-const UnicodeSet* DecimalMatcher::getLeadCodePoints() const {
+const UnicodeSet& DecimalMatcher::getLeadCodePoints() {
    if (fLocalDigitStrings.isNull() && leadSet != nullptr) {
-        return new UnicodeSet(*leadSet);
+        return *leadSet;
    }

-    auto* leadCodePoints = new UnicodeSet();
-    // Assumption: the sets are all single code points.
-    leadCodePoints->addAll(*unisets::get(unisets::DIGITS));
-    leadCodePoints->addAll(*separatorSet);
-    if (!fLocalDigitStrings.isNull()) {
-        for (int i = 0; i < 10; i++) {
-            utils::putLeadCodePoint(fLocalDigitStrings[i], leadCodePoints);
+    if (fLocalLeadCodePoints.isNull()) {
+        auto* leadCodePoints = new UnicodeSet();
+        // Assumption: the sets are all single code points.
+        leadCodePoints->addAll(*unisets::get(unisets::DIGITS));
+        leadCodePoints->addAll(*separatorSet);
+        if (!fLocalDigitStrings.isNull()) {
+            for (int i = 0; i < 10; i++) {
+                utils::putLeadCodePoint(fLocalDigitStrings[i], leadCodePoints);
+            }
        }
+        leadCodePoints->freeze();
+        fLocalLeadCodePoints.adoptInstead(leadCodePoints);
    }
-    leadCodePoints->freeze();
-    return leadCodePoints;
+    return *fLocalLeadCodePoints;
 }


--- a/icu4c/source/i18n/numparse_decimal.h
+++ b/icu4c/source/i18n/numparse_decimal.h
@ -27,7 +27,7 @@ class DecimalMatcher : public NumberParseMatcher, public UMemory {
    bool
    match(StringSegment& segment, ParsedNumber& result, int8_t exponentSign, UErrorCode& status) const;

-    const UnicodeSet* getLeadCodePoints() const override;
+    const UnicodeSet& getLeadCodePoints() override;

  private:
    /** If true, only accept strings whose grouping sizes match the locale */
@ -56,7 +56,7 @@ class DecimalMatcher : public NumberParseMatcher, public UMemory {
    const UnicodeSet* leadSet;

    // Make this class the owner of a few objects that could be allocated.
-    // The first two LocalPointers are used for assigning ownership only.
+    // The first three LocalPointers are used for assigning ownership only.
    LocalPointer<const UnicodeSet> fLocalDecimalUniSet;
    LocalPointer<const UnicodeSet> fLocalSeparatorSet;
    LocalArray<const UnicodeString> fLocalDigitStrings;
--- a/icu4c/source/i18n/numparse_impl.cpp
+++ b/icu4c/source/i18n/numparse_impl.cpp
@ -32,7 +32,7 @@ NumberParserImpl::createSimpleParser(const Locale& locale, const UnicodeString&
    auto* parser = new NumberParserImpl(parseFlags, true);
    DecimalFormatSymbols symbols(locale, status);

-    parser->fLocalMatchers.ignorables = {unisets::DEFAULT_IGNORABLES};
+    parser->fLocalMatchers.ignorables = std::move(IgnorablesMatcher(unisets::DEFAULT_IGNORABLES));

 //    MatcherFactory factory = new MatcherFactory();
 //    factory.currency = Currency.getInstance("USD");
@ -78,7 +78,7 @@ NumberParserImpl::~NumberParserImpl() {
    fNumMatchers = 0;
 }

-void NumberParserImpl::addMatcher(const NumberParseMatcher& matcher) {
+void NumberParserImpl::addMatcher(NumberParseMatcher& matcher) {
    if (fNumMatchers + 1 > fMatchers.getCapacity()) {
        fMatchers.resize(fNumMatchers * 2, fNumMatchers);
        if (fComputeLeads) {
@ -97,17 +97,17 @@ void NumberParserImpl::addMatcher(const NumberParseMatcher& matcher) {
    fNumMatchers++;
 }

-void NumberParserImpl::addLeadCodePointsForMatcher(const NumberParseMatcher& matcher) {
-    const UnicodeSet* leadCodePoints = matcher.getLeadCodePoints();
+void NumberParserImpl::addLeadCodePointsForMatcher(NumberParseMatcher& matcher) {
+    const UnicodeSet& leadCodePoints = matcher.getLeadCodePoints();
    // TODO: Avoid the clone operation here.
    if (0 != (fParseFlags & PARSE_FLAG_IGNORE_CASE)) {
-        UnicodeSet* copy = static_cast<UnicodeSet*>(leadCodePoints->cloneAsThawed());
-        delete leadCodePoints;
+        auto* copy = dynamic_cast<UnicodeSet*>(leadCodePoints.cloneAsThawed());
        copy->closeOver(USET_ADD_CASE_MAPPINGS);
        copy->freeze();
        fLeads[fNumMatchers] = copy;
    } else {
-        fLeads[fNumMatchers] = leadCodePoints;
+        // FIXME: new here because we still take ownership
+        fLeads[fNumMatchers] = new UnicodeSet(leadCodePoints);
    }
 }

--- a/icu4c/source/i18n/numparse_impl.h
+++ b/icu4c/source/i18n/numparse_impl.h
@ -24,7 +24,7 @@ class NumberParserImpl {
    static NumberParserImpl* createSimpleParser(const Locale& locale, const UnicodeString& patternString,
                                                parse_flags_t parseFlags, UErrorCode& status);

-    void addMatcher(const NumberParseMatcher& matcher);
+    void addMatcher(NumberParseMatcher& matcher);

    void freeze();

@ -62,7 +62,7 @@ class NumberParserImpl {

    NumberParserImpl(parse_flags_t parseFlags, bool computeLeads);

-    void addLeadCodePointsForMatcher(const NumberParseMatcher& matcher);
+    void addLeadCodePointsForMatcher(NumberParseMatcher& matcher);

    void parseGreedyRecursive(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const;

--- a/icu4c/source/i18n/numparse_scientific.cpp
+++ b/icu4c/source/i18n/numparse_scientific.cpp
@ -67,17 +67,20 @@ bool ScientificMatcher::match(StringSegment& segment, ParsedNumber& result, UErr
    return false;
 }

-const UnicodeSet* ScientificMatcher::getLeadCodePoints() const {
+const UnicodeSet& ScientificMatcher::getLeadCodePoints() {
    UChar32 leadCp = fExponentSeparatorString.char32At(0);
    const UnicodeSet* s = unisets::get(unisets::SCIENTIFIC_LEAD);
    if (s->contains(leadCp)) {
-        return new UnicodeSet(*s);
-    } else {
-        UnicodeSet* leadCodePoints = new UnicodeSet();
+        return *s;
+    }
+
+    if (fLocalLeadCodePoints.isNull()) {
+        auto* leadCodePoints = new UnicodeSet();
        leadCodePoints->add(leadCp);
        leadCodePoints->freeze();
-        return leadCodePoints;
+        fLocalLeadCodePoints.adoptInstead(leadCodePoints);
    }
+    return *fLocalLeadCodePoints;
 }


--- a/icu4c/source/i18n/numparse_scientific.h
+++ b/icu4c/source/i18n/numparse_scientific.h
@ -25,7 +25,7 @@ class ScientificMatcher : public NumberParseMatcher, public UMemory {

    bool match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const override;

-    const UnicodeSet* getLeadCodePoints() const override;
+    const UnicodeSet& getLeadCodePoints() override;

  private:
    UnicodeString fExponentSeparatorString;
--- a/icu4c/source/i18n/numparse_symbols.cpp
+++ b/icu4c/source/i18n/numparse_symbols.cpp
@ -54,17 +54,20 @@ bool SymbolMatcher::match(StringSegment& segment, ParsedNumber& result, UErrorCo
    return overlap == segment.length();
 }

-const UnicodeSet* SymbolMatcher::getLeadCodePoints() const {
+const UnicodeSet& SymbolMatcher::getLeadCodePoints() {
    if (fString.isEmpty()) {
        // Assumption: for sets from UnicodeSetStaticCache, uniSet == leadCodePoints.
-        return new UnicodeSet(*fUniSet);
+        return *fUniSet;
    }

-    UnicodeSet* leadCodePoints = new UnicodeSet();
-    utils::putLeadCodePoints(fUniSet, leadCodePoints);
-    utils::putLeadCodePoint(fString, leadCodePoints);
-    leadCodePoints->freeze();
-    return leadCodePoints;
+    if (fLocalLeadCodePoints.isNull()) {
+        auto* leadCodePoints = new UnicodeSet();
+        utils::putLeadCodePoints(fUniSet, leadCodePoints);
+        utils::putLeadCodePoint(fString, leadCodePoints);
+        leadCodePoints->freeze();
+        fLocalLeadCodePoints.adoptInstead(leadCodePoints);
+    }
+    return *fLocalLeadCodePoints;
 }


@ -86,7 +89,7 @@ void IgnorablesMatcher::accept(StringSegment&, ParsedNumber&) const {


 InfinityMatcher::InfinityMatcher(const DecimalFormatSymbols& dfs)
-        : SymbolMatcher(dfs.getConstSymbol(DecimalFormatSymbols::kNaNSymbol), unisets::INFINITY) {
+        : SymbolMatcher(dfs.getConstSymbol(DecimalFormatSymbols::kInfinitySymbol), unisets::INFINITY) {
 }

 bool InfinityMatcher::isDisabled(const ParsedNumber& result) const {
@ -118,15 +121,15 @@ NanMatcher::NanMatcher(const DecimalFormatSymbols& dfs)
        : SymbolMatcher(dfs.getConstSymbol(DecimalFormatSymbols::kNaNSymbol), unisets::EMPTY) {
 }

-const UnicodeSet* NanMatcher::getLeadCodePoints() const {
+const UnicodeSet& NanMatcher::getLeadCodePoints() {
    // Overriding this here to allow use of statically allocated sets
    int leadCp = fString.char32At(0);
    const UnicodeSet* s = unisets::get(unisets::NAN_LEAD);
    if (s->contains(leadCp)) {
-        return new UnicodeSet(*s);
-    } else {
-        return SymbolMatcher::getLeadCodePoints();
+        return *s;
    }
+
+    return SymbolMatcher::getLeadCodePoints();
 }

 bool NanMatcher::isDisabled(const ParsedNumber& result) const {
@ -146,11 +149,11 @@ bool PaddingMatcher::isFlexible() const {
    return true;
 }

-bool PaddingMatcher::isDisabled(const ParsedNumber& result) const {
+bool PaddingMatcher::isDisabled(const ParsedNumber&) const {
    return false;
 }

-void PaddingMatcher::accept(StringSegment& segment, ParsedNumber& result) const {
+void PaddingMatcher::accept(StringSegment&, ParsedNumber&) const {
    // No-op
 }

--- a/icu4c/source/i18n/numparse_symbols.h
+++ b/icu4c/source/i18n/numparse_symbols.h
@ -28,7 +28,8 @@ class SymbolMatcher : public NumberParseMatcher, public UMemory {

    bool match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const override;

-    const UnicodeSet* getLeadCodePoints() const override;
+    /** NOTE: This method is not guaranteed to be thread-safe. */
+    const UnicodeSet& getLeadCodePoints() override;

    virtual bool isDisabled(const ParsedNumber& result) const = 0;

@ -92,7 +93,7 @@ class NanMatcher : public SymbolMatcher {

    NanMatcher(const DecimalFormatSymbols& dfs);

-    const UnicodeSet* getLeadCodePoints() const override;
+    const UnicodeSet& getLeadCodePoints() override;

  protected:
    bool isDisabled(const ParsedNumber& result) const override;
--- a/icu4c/source/i18n/numparse_types.h
+++ b/icu4c/source/i18n/numparse_types.h
@ -244,8 +244,6 @@ class StringSegment : public UMemory, public ::icu::number::impl::CharSequence {
 */
 class NumberParseMatcher {
  public:
-    virtual ~NumberParseMatcher() = default;
-
    /**
     * Matchers can override this method to return true to indicate that they are optional and can be run
     * repeatedly. Used by SeriesMatcher, primarily in the context of IgnorablesMatcher.
@ -259,6 +257,8 @@ class NumberParseMatcher {
     * something interesting in the StringSegment, it should update the offset of the StringSegment
     * corresponding to how many chars were matched.
     *
+     * This method is thread-safe.
+     *
     * @param segment
     *            The StringSegment to match against. Matches always start at the beginning of the
     *            segment. The segment is guaranteed to contain at least one char.
@ -275,9 +275,12 @@ class NumberParseMatcher {
     * return value is used to skip this matcher unless a segment begins with a char in this set. To make
     * this matcher always run, return {@link UnicodeSet#ALL_CODE_POINTS}.
     *
-     * The returned UnicodeSet needs adoption!
+     * The returned UnicodeSet does not need adoption and is guaranteed to be alive for as long as the
+     * object that returned it.
+     *
+     * This method is NOT thread-safe.
     */
-    virtual const UnicodeSet* getLeadCodePoints() const = 0;
+    virtual const UnicodeSet& getLeadCodePoints() = 0;

    /**
     * Method called at the end of a parse, after all matchers have failed to consume any more chars.
@ -290,6 +293,13 @@ class NumberParseMatcher {
    virtual void postProcess(ParsedNumber&) const {
        // Default implementation: no-op
    };
+
+  protected:
+    // No construction except by subclasses!
+    NumberParseMatcher() = default;
+
+    // Optional ownership of the leadCodePoints set
+    LocalPointer<const UnicodeSet> fLocalLeadCodePoints;
 };


--- a/icu4c/source/test/intltest/intltest.cpp
+++ b/icu4c/source/test/intltest/intltest.cpp
@ -238,6 +238,12 @@ UnicodeString toString(UBool b) {
  return b ? UnicodeString("TRUE"):UnicodeString("FALSE");
 }

+UnicodeString toString(const UnicodeSet& uniset, UErrorCode& status) {
+    UnicodeString result;
+    uniset.toPattern(result, status);
+    return result;
+}
+
 // stephen - cleaned up 05/05/99
 UnicodeString operator+(const UnicodeString& left, char num)
 { return left + (long)num; }
@ -2050,6 +2056,24 @@ UBool IntlTest::assertEquals(const char* message,
    return TRUE;
 }

+UBool IntlTest::assertEquals(const char* message,
+                             const UnicodeSet& expected,
+                             const UnicodeSet& actual) {
+    IcuTestErrorCode status(*this, "assertEqualsUniSet");
+    if (expected != actual) {
+        errln((UnicodeString)"FAIL: " + message + "; got " +
+              toString(actual, status) +
+              "; expected " + toString(expected, status));
+        return FALSE;
+    }
+#ifdef VERBOSE_ASSERTIONS
+    else {
+        logln((UnicodeString)"Ok: " + message + "; got " + toString(actual, status));
+    }
+#endif
+    return TRUE;
+}
+

 #if !UCONFIG_NO_FORMATTING
 UBool IntlTest::assertEquals(const char* message,
@ -2136,6 +2160,11 @@ UBool IntlTest::assertEquals(const UnicodeString& message,
                             UErrorCode actual) {
    return assertEquals(extractToAssertBuf(message), expected, actual);
 }
+UBool IntlTest::assertEquals(const UnicodeString& message,
+                             const UnicodeSet& expected,
+                             const UnicodeSet& actual) {
+    return assertEquals(extractToAssertBuf(message), expected, actual);
+}

 #if !UCONFIG_NO_FORMATTING
 UBool IntlTest::assertEquals(const UnicodeString& message,
--- a/icu4c/source/test/intltest/intltest.h
+++ b/icu4c/source/test/intltest/intltest.h
@ -16,6 +16,7 @@
 // The following includes utypes.h, uobject.h and unistr.h
 #include "unicode/fmtable.h"
 #include "unicode/testlog.h"
+#include "unicode/uniset.h"

 U_NAMESPACE_USE

@ -295,6 +296,7 @@ public:
    UBool assertEquals(const char* message, int64_t expected, int64_t actual);
    UBool assertEquals(const char* message, double expected, double actual);
    UBool assertEquals(const char* message, UErrorCode expected, UErrorCode actual);
+    UBool assertEquals(const char* message, const UnicodeSet& expected, const UnicodeSet& actual);
 #if !UCONFIG_NO_FORMATTING
    UBool assertEquals(const char* message, const Formattable& expected,
                       const Formattable& actual, UBool possibleDataError=FALSE);
@ -312,6 +314,7 @@ public:
    UBool assertEquals(const UnicodeString& message, int64_t expected, int64_t actual);
    UBool assertEquals(const UnicodeString& message, double expected, double actual);
    UBool assertEquals(const UnicodeString& message, UErrorCode expected, UErrorCode actual);
+    UBool assertEquals(const UnicodeString& message, const UnicodeSet& expected, const UnicodeSet& actual);

    virtual void runIndexedTest( int32_t index, UBool exec, const char* &name, char* par = NULL ); // overide !

--- a/icu4c/source/test/intltest/numbertest.h
+++ b/icu4c/source/test/intltest/numbertest.h
@ -212,6 +212,7 @@ class NumberParserTest : public IntlTest {
    void testBasic();
    void testLocaleFi();
    void testSeriesMatcher();
+    void testCurrencyAnyMatcher();
    void testGroupingDisabled();

    void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par = 0);
--- a/icu4c/source/test/intltest/numbertest_parse.cpp
+++ b/icu4c/source/test/intltest/numbertest_parse.cpp
@ -21,6 +21,7 @@ void NumberParserTest::runIndexedTest(int32_t index, UBool exec, const char*& na
    }
    TESTCASE_AUTO_BEGIN;
        TESTCASE_AUTO(testBasic);
+        TESTCASE_AUTO(testSeriesMatcher);
    TESTCASE_AUTO_END;
 }

@ -99,7 +100,7 @@ void NumberParserTest::testBasic() {
                 {3, u"0", u"0", 1, 0.0}};

    parse_flags_t parseFlags = PARSE_FLAG_IGNORE_CASE | PARSE_FLAG_INCLUDE_UNPAIRED_AFFIXES;
-    for (auto cas : cases) {
+    for (auto& cas : cases) {
        UnicodeString inputString(cas.inputString);
        UnicodeString patternString(cas.patternString);
        LocalPointer<const NumberParserImpl> parser(
@ -153,5 +154,54 @@ void NumberParserTest::testBasic() {
    }
 }

+void NumberParserTest::testSeriesMatcher() {
+    IcuTestErrorCode status(*this, "testSeriesMatcher");
+
+    DecimalFormatSymbols symbols("en", status);
+
+    PlusSignMatcher m0(symbols, false);
+    MinusSignMatcher m1(symbols, false);
+    IgnorablesMatcher m2(unisets::DEFAULT_IGNORABLES);
+    PercentMatcher m3(symbols);
+    IgnorablesMatcher m4(unisets::DEFAULT_IGNORABLES);
+
+    ArraySeriesMatcher series(new NumberParseMatcher* [5]{&m0, &m1, &m2, &m3, &m4}, 5);
+
+    assertEquals(
+            "Lead set should be equal to lead set of lead matcher",
+            *unisets::get(unisets::PLUS_SIGN),
+            series.getLeadCodePoints());
+
+    static const struct TestCase {
+        const char16_t* input;
+        int32_t expectedOffset;
+        bool expectedMaybeMore;
+    } cases[] = {{u"", 0, true},
+                 {u" ", 0, false},
+                 {u"$", 0, false},
+                 {u"+", 0, true},
+                 {u" +", 0, false},
+                 {u"+-", 0, true},
+                 {u"+ -", 0, false},
+                 {u"+-  ", 0, true},
+                 {u"+-  $", 0, false},
+                 {u"+-%", 3, true},
+                 {u"  +-  %  ", 0, false},
+                 {u"+-  %  ", 7, true},
+                 {u"+-%$", 3, false}};
+
+    for (auto& cas : cases) {
+        UnicodeString input(cas.input);
+
+        StringSegment segment(input, 0);
+        ParsedNumber result;
+        bool actualMaybeMore = series.match(segment, result, status);
+        int actualOffset = segment.getOffset();
+
+        assertEquals("'" + input + "'", cas.expectedOffset, actualOffset);
+        assertEquals("'" + input + "'", cas.expectedMaybeMore, actualMaybeMore);
+    }
+}
+

 #endif
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/CurrencyCustomMatcher.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/CurrencyCustomMatcher.java
@ -9,19 +9,19 @@ import com.ibm.icu.util.ULocale;
 /**
 * A matcher for a single currency instance (not the full trie).
 */
-public class CurrencyMatcher implements NumberParseMatcher {
+public class CurrencyCustomMatcher implements NumberParseMatcher {

    private final String isoCode;
    private final String currency1;
    private final String currency2;

-    public static CurrencyMatcher getInstance(Currency currency, ULocale loc) {
-        return new CurrencyMatcher(currency.getSubtype(),
+    public static CurrencyCustomMatcher getInstance(Currency currency, ULocale loc) {
+        return new CurrencyCustomMatcher(currency.getSubtype(),
                currency.getSymbol(loc),
                currency.getCurrencyCode());
    }

-    private CurrencyMatcher(String isoCode, String currency1, String currency2) {
+    private CurrencyCustomMatcher(String isoCode, String currency1, String currency2) {
        this.isoCode = isoCode;
        this.currency1 = currency1;
        this.currency2 = currency2;
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/CurrencyNamesMatcher.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/CurrencyNamesMatcher.java
@ -11,21 +11,24 @@ import com.ibm.icu.util.Currency.CurrencyStringInfo;
 import com.ibm.icu.util.ULocale;

 /**
- * @author sffc
+ * Matches currencies according to all available strings in locale data.
 *
+ * The implementation of this class is different between J and C. See #13584 for a follow-up.
+ *
+ * @author sffc
 */
-public class CurrencyTrieMatcher implements NumberParseMatcher {
+public class CurrencyNamesMatcher implements NumberParseMatcher {

    private final TextTrieMap<CurrencyStringInfo> longNameTrie;
    private final TextTrieMap<CurrencyStringInfo> symbolTrie;

-    public static CurrencyTrieMatcher getInstance(ULocale locale) {
+    public static CurrencyNamesMatcher getInstance(ULocale locale) {
        // TODO: Pre-compute some of the more popular locales?
-        return new CurrencyTrieMatcher(locale);
+        return new CurrencyNamesMatcher(locale);
    }

-    private CurrencyTrieMatcher(ULocale locale) {
-        // TODO: Currency trie does not currently have an option for case folding.  It defaults to use
+    private CurrencyNamesMatcher(ULocale locale) {
+        // TODO: Currency trie does not currently have an option for case folding. It defaults to use
        // case folding on long-names but not symbols.
        longNameTrie = Currency.getParsingTrie(locale, Currency.LONG_NAME);
        symbolTrie = Currency.getParsingTrie(locale, Currency.SYMBOL_NAME);
@ -55,6 +58,8 @@ public class CurrencyTrieMatcher implements NumberParseMatcher {
        UnicodeSet leadCodePoints = new UnicodeSet();
        longNameTrie.putLeadCodePoints(leadCodePoints);
        symbolTrie.putLeadCodePoints(leadCodePoints);
+        // Always apply case mapping closure for currencies
+        leadCodePoints.closeOver(UnicodeSet.ADD_CASE_MAPPINGS);
        return leadCodePoints.freeze();
    }

--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/MatcherFactory.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/MatcherFactory.java
@ -7,14 +7,15 @@ import com.ibm.icu.util.Currency;
 import com.ibm.icu.util.ULocale;

 /**
- * @author sffc
+ * Small helper class that generates matchers for SeriesMatcher.
 *
+ * @author sffc
 */
 public class MatcherFactory {
-    Currency currency;
-    DecimalFormatSymbols symbols;
-    IgnorablesMatcher ignorables;
-    ULocale locale;
+    public Currency currency;
+    public DecimalFormatSymbols symbols;
+    public IgnorablesMatcher ignorables;
+    public ULocale locale;

    public MinusSignMatcher minusSign(boolean allowTrailing) {
        return MinusSignMatcher.getInstance(symbols, allowTrailing);
@ -34,8 +35,8 @@ public class MatcherFactory {

    public AnyMatcher currency() {
        AnyMatcher any = new AnyMatcher();
-        any.addMatcher(CurrencyMatcher.getInstance(currency, locale));
-        any.addMatcher(CurrencyTrieMatcher.getInstance(locale));
+        any.addMatcher(CurrencyCustomMatcher.getInstance(currency, locale));
+        any.addMatcher(CurrencyNamesMatcher.getInstance(locale));
        any.freeze();
        return any;
    }
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/NumberParserImpl.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/NumberParserImpl.java
@ -95,7 +95,7 @@ public class NumberParserImpl {
        parser.addMatcher(InfinityMatcher.getInstance(symbols));
        parser.addMatcher(PaddingMatcher.getInstance("@"));
        parser.addMatcher(ScientificMatcher.getInstance(symbols, grouper));
-        parser.addMatcher(CurrencyTrieMatcher.getInstance(locale));
+        parser.addMatcher(CurrencyNamesMatcher.getInstance(locale));
        parser.addMatcher(new RequireNumberMatcher());

        parser.freeze();
@ -213,8 +213,8 @@ public class NumberParserImpl {
        ////////////////////////

        if (parseCurrency || patternInfo.hasCurrencySign()) {
-            parser.addMatcher(CurrencyMatcher.getInstance(currency, locale));
-            parser.addMatcher(CurrencyTrieMatcher.getInstance(locale));
+            parser.addMatcher(CurrencyCustomMatcher.getInstance(currency, locale));
+            parser.addMatcher(CurrencyNamesMatcher.getInstance(locale));
        }

        ///////////////////////////////
--- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/NumberParserTest.java
+++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/NumberParserTest.java
@ -7,8 +7,11 @@ import static org.junit.Assert.assertTrue;

 import org.junit.Test;

+import com.ibm.icu.impl.number.CustomSymbolCurrency;
 import com.ibm.icu.impl.number.DecimalFormatProperties;
+import com.ibm.icu.impl.number.parse.AnyMatcher;
 import com.ibm.icu.impl.number.parse.IgnorablesMatcher;
+import com.ibm.icu.impl.number.parse.MatcherFactory;
 import com.ibm.icu.impl.number.parse.MinusSignMatcher;
 import com.ibm.icu.impl.number.parse.NumberParserImpl;
 import com.ibm.icu.impl.number.parse.ParsedNumber;
@ -222,6 +225,38 @@ public class NumberParserTest {
        }
    }

+    @Test
+    public void testCurrencyAnyMatcher() {
+        MatcherFactory factory = new MatcherFactory();
+        factory.locale = ULocale.ENGLISH;
+        CustomSymbolCurrency currency = new CustomSymbolCurrency("ICU", "IU$", "ICU");
+        factory.currency = currency;
+        AnyMatcher matcher = factory.currency();
+
+        Object[][] cases = new Object[][] {
+                { "", null },
+                { "FOO", null },
+                { "USD", "USD" },
+                { "$", "USD" },
+                { "US dollars", "USD" },
+                { "eu", null },
+                { "euros", "EUR" },
+                { "ICU", "ICU" },
+                { "IU$", "ICU" } };
+        for (Object[] cas : cases) {
+            String input = (String) cas[0];
+            String expectedCurrencyCode = (String) cas[1];
+
+            StringSegment segment = new StringSegment(input, 0);
+            ParsedNumber result = new ParsedNumber();
+            matcher.match(segment, result);
+            assertEquals("Parsing " + input, expectedCurrencyCode, result.currencyCode);
+            assertEquals("Whole string on " + input,
+                    expectedCurrencyCode == null ? 0 : input.length(),
+                    result.charEnd);
+        }
+    }
+
    @Test
    public void testGroupingDisabled() {
        DecimalFormatProperties properties = new DecimalFormatProperties();