ICU-13634 Changes NumberParseMatcher getLeadCodePoints() to smokeTest() in C++ and Java. The new method is more versatile and eliminates the requirement to maintain two code paths for "lead chars" and "no lead chars".

X-SVN-Rev: 41131
2025-04-14 01:11:02 +00:00 · 2018-03-21 06:30:29 +00:00 · 2018-03-21 06:30:29 +00:00 · 01916cad11
commit 01916cad11
parent 8b4c367468
40 changed files with 266 additions and 408 deletions
--- a/icu4c/source/i18n/numparse_affixes.cpp
+++ b/icu4c/source/i18n/numparse_affixes.cpp
@ -206,21 +206,15 @@ CodePointMatcher::CodePointMatcher(UChar32 cp)
        : fCp(cp) {}

 bool CodePointMatcher::match(StringSegment& segment, ParsedNumber& result, UErrorCode&) const {
-    if (segment.matches(fCp)) {
+    if (segment.startsWith(fCp)) {
        segment.adjustOffsetByCodePoint();
        result.setCharsConsumed(segment);
    }
    return false;
 }

-const UnicodeSet& CodePointMatcher::getLeadCodePoints() {
-    if (fLocalLeadCodePoints.isNull()) {
-        auto* leadCodePoints = new UnicodeSet();
-        leadCodePoints->add(fCp);
-        leadCodePoints->freeze();
-        fLocalLeadCodePoints.adoptInstead(leadCodePoints);
-    }
-    return *fLocalLeadCodePoints;
+bool CodePointMatcher::smokeTest(const StringSegment& segment) const {
+    return segment.startsWith(fCp);
 }

 UnicodeString CodePointMatcher::toString() const {
@ -427,19 +421,9 @@ bool AffixMatcher::match(StringSegment& segment, ParsedNumber& result, UErrorCod
    }
 }

-const UnicodeSet& AffixMatcher::getLeadCodePoints() {
-    if (fLocalLeadCodePoints.isNull()) {
-        auto* leadCodePoints = new UnicodeSet();
-        if (fPrefix != nullptr) {
-            leadCodePoints->addAll(fPrefix->getLeadCodePoints());
-        }
-        if (fSuffix != nullptr) {
-            leadCodePoints->addAll(fSuffix->getLeadCodePoints());
-        }
-        leadCodePoints->freeze();
-        fLocalLeadCodePoints.adoptInstead(leadCodePoints);
-    }
-    return *fLocalLeadCodePoints;
+bool AffixMatcher::smokeTest(const StringSegment& segment) const {
+    return (fPrefix != nullptr && fPrefix->smokeTest(segment)) ||
+           (fSuffix != nullptr && fSuffix->smokeTest(segment));
 }

 void AffixMatcher::postProcess(ParsedNumber& result) const {
--- a/icu4c/source/i18n/numparse_affixes.h
+++ b/icu4c/source/i18n/numparse_affixes.h
@ -35,7 +35,7 @@ class CodePointMatcher : public NumberParseMatcher, public UMemory {

    bool match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const override;

-    const UnicodeSet& getLeadCodePoints() override;
+    bool smokeTest(const StringSegment& segment) const override;

    UnicodeString toString() const override;

@ -190,7 +190,7 @@ class AffixMatcher : public NumberParseMatcher, public UMemory {

    void postProcess(ParsedNumber& result) const override;

-    const UnicodeSet& getLeadCodePoints() override;
+    bool smokeTest(const StringSegment& segment) const override;

    int8_t compareTo(const AffixMatcher& rhs) const;

--- a/icu4c/source/i18n/numparse_compositions.cpp
+++ b/icu4c/source/i18n/numparse_compositions.cpp
@ -38,9 +38,19 @@ bool AnyMatcher::match(StringSegment& segment, ParsedNumber& result, UErrorCode&
    return maybeMore;
 }

+bool AnyMatcher::smokeTest(const StringSegment& segment) const {
+    // NOTE: The range-based for loop calls the virtual begin() and end() methods.
+    for (auto& matcher : *this) {
+        if (matcher->smokeTest(segment)) {
+            return true;
+        }
+    }
+    return false;
+}
+
 void AnyMatcher::postProcess(ParsedNumber& result) const {
    // NOTE: The range-based for loop calls the virtual begin() and end() methods.
-    for (auto* matcher : *this) {
+    for (auto& matcher : *this) {
        matcher->postProcess(result);
    }
 }
@ -83,6 +93,17 @@ bool SeriesMatcher::match(StringSegment& segment, ParsedNumber& result, UErrorCo
    return maybeMore;
 }

+bool SeriesMatcher::smokeTest(const StringSegment& segment) const {
+    // NOTE: The range-based for loop calls the virtual begin() and end() methods.
+    // NOTE: We only want the first element. Use the for loop for boundary checking.
+    for (auto& matcher : *this) {
+        // SeriesMatchers are never allowed to start with a Flexible matcher.
+        U_ASSERT(!matcher->isFlexible());
+        return matcher->smokeTest(segment);
+    }
+    return false;
+}
+
 void SeriesMatcher::postProcess(ParsedNumber& result) const {
    // NOTE: The range-based for loop calls the virtual begin() and end() methods.
    for (auto* matcher : *this) {
@ -99,12 +120,6 @@ ArraySeriesMatcher::ArraySeriesMatcher(MatcherArray& matchers, int32_t matchersL
        : fMatchers(std::move(matchers)), fMatchersLen(matchersLen) {
 }

-const UnicodeSet& ArraySeriesMatcher::getLeadCodePoints() {
-    // SeriesMatchers are never allowed to start with a Flexible matcher.
-    U_ASSERT(!fMatchers[0]->isFlexible());
-    return fMatchers[0]->getLeadCodePoints();
-}
-
 int32_t ArraySeriesMatcher::length() const {
    return fMatchersLen;
 }
--- a/icu4c/source/i18n/numparse_compositions.h
+++ b/icu4c/source/i18n/numparse_compositions.h
@ -42,6 +42,8 @@ class AnyMatcher : public CompositionMatcher {
  public:
    bool match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const override;

+    bool smokeTest(const StringSegment& segment) const override;
+
    void postProcess(ParsedNumber& result) const override;

  protected:
@ -61,6 +63,8 @@ class SeriesMatcher : public CompositionMatcher {
  public:
    bool match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const override;

+    bool smokeTest(const StringSegment& segment) const override;
+
    void postProcess(ParsedNumber& result) const override;

    virtual int32_t length() const = 0;
@ -80,13 +84,11 @@ class ArraySeriesMatcher : public SeriesMatcher {
  public:
    ArraySeriesMatcher();  // WARNING: Leaves the object in an unusable state

-    typedef MaybeStackArray<NumberParseMatcher*, 3> MatcherArray;
+    typedef MaybeStackArray<const NumberParseMatcher*, 3> MatcherArray;

    /** The array is std::move'd */
    ArraySeriesMatcher(MatcherArray& matchers, int32_t matchersLen);

-    const UnicodeSet& getLeadCodePoints() override;
-
    UnicodeString toString() const override;

    int32_t length() const override;
--- a/icu4c/source/i18n/numparse_currency.cpp
+++ b/icu4c/source/i18n/numparse_currency.cpp
@ -21,7 +21,12 @@ using namespace icu::numparse::impl;


 CurrencyNamesMatcher::CurrencyNamesMatcher(const Locale& locale, UErrorCode& status)
-        : fLocaleName(locale.getName(), -1, status) {}
+        : fLocaleName(locale.getName(), -1, status) {
+    uprv_currencyLeads(fLocaleName.data(), fLeadCodePoints, status);
+    // Always apply case mapping closure for currencies
+    fLeadCodePoints.closeOver(USET_ADD_CASE_MAPPINGS);
+    fLeadCodePoints.freeze();
+}

 bool CurrencyNamesMatcher::match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const {
    if (result.currencyCode[0] != 0) {
@ -57,17 +62,8 @@ bool CurrencyNamesMatcher::match(StringSegment& segment, ParsedNumber& result, U
    return partialMatch;
 }

-const UnicodeSet& CurrencyNamesMatcher::getLeadCodePoints() {
-    if (fLocalLeadCodePoints.isNull()) {
-        ErrorCode status;
-        auto* leadCodePoints = new UnicodeSet();
-        uprv_currencyLeads(fLocaleName.data(), *leadCodePoints, status);
-        // Always apply case mapping closure for currencies
-        leadCodePoints->closeOver(USET_ADD_CASE_MAPPINGS);
-        leadCodePoints->freeze();
-        fLocalLeadCodePoints.adoptInstead(leadCodePoints);
-    }
-    return *fLocalLeadCodePoints;
+bool CurrencyNamesMatcher::smokeTest(const StringSegment& segment) const {
+    return segment.startsWith(fLeadCodePoints);
 }

 UnicodeString CurrencyNamesMatcher::toString() const {
@ -103,15 +99,8 @@ bool CurrencyCustomMatcher::match(StringSegment& segment, ParsedNumber& result,
    return overlap1 == segment.length() || overlap2 == segment.length();
 }

-const UnicodeSet& CurrencyCustomMatcher::getLeadCodePoints() {
-    if (fLocalLeadCodePoints.isNull()) {
-        auto* leadCodePoints = new UnicodeSet();
-        utils::putLeadCodePoint(fCurrency1, leadCodePoints);
-        utils::putLeadCodePoint(fCurrency2, leadCodePoints);
-        leadCodePoints->freeze();
-        fLocalLeadCodePoints.adoptInstead(leadCodePoints);
-    }
-    return *fLocalLeadCodePoints;
+bool CurrencyCustomMatcher::smokeTest(const StringSegment& segment) const {
+    return segment.startsWith(fCurrency1) || segment.startsWith(fCurrency2);
 }

 UnicodeString CurrencyCustomMatcher::toString() const {
@ -144,17 +133,6 @@ CurrencyAnyMatcher& CurrencyAnyMatcher::operator=(CurrencyAnyMatcher&& src) U_NO
    return *this;
 }

-const UnicodeSet& CurrencyAnyMatcher::getLeadCodePoints() {
-    if (fLocalLeadCodePoints.isNull()) {
-        auto* leadCodePoints = new UnicodeSet();
-        leadCodePoints->addAll(fNamesMatcher.getLeadCodePoints());
-        leadCodePoints->addAll(fCustomMatcher.getLeadCodePoints());
-        leadCodePoints->freeze();
-        fLocalLeadCodePoints.adoptInstead(leadCodePoints);
-    }
-    return *fLocalLeadCodePoints;
-}
-
 const NumberParseMatcher* const* CurrencyAnyMatcher::begin() const {
    return fMatcherArray;
 }
--- a/icu4c/source/i18n/numparse_currency.h
+++ b/icu4c/source/i18n/numparse_currency.h
@ -11,6 +11,7 @@
 #include "numparse_compositions.h"
 #include "charstr.h"
 #include "number_currencysymbols.h"
+#include "unicode/uniset.h"

 U_NAMESPACE_BEGIN namespace numparse {
 namespace impl {
@ -32,7 +33,7 @@ class CurrencyNamesMatcher : public NumberParseMatcher, public UMemory {

    bool match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const override;

-    const UnicodeSet& getLeadCodePoints() override;
+    bool smokeTest(const StringSegment& segment) const override;

    UnicodeString toString() const override;

@ -41,6 +42,7 @@ class CurrencyNamesMatcher : public NumberParseMatcher, public UMemory {
    // Locale has a non-trivial default constructor.
    CharString fLocaleName;

+    UnicodeSet fLeadCodePoints;
 };


@ -52,7 +54,7 @@ class CurrencyCustomMatcher : public NumberParseMatcher, public UMemory {

    bool match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const override;

-    const UnicodeSet& getLeadCodePoints() override;
+    bool smokeTest(const StringSegment& segment) const override;

    UnicodeString toString() const override;

@ -78,8 +80,6 @@ class CurrencyAnyMatcher : public AnyMatcher, public UMemory {

    CurrencyAnyMatcher& operator=(CurrencyAnyMatcher&& src) U_NOEXCEPT;

-    const UnicodeSet& getLeadCodePoints() override;
-
    UnicodeString toString() const override;

  protected:
--- a/icu4c/source/i18n/numparse_decimal.cpp
+++ b/icu4c/source/i18n/numparse_decimal.cpp
@ -295,25 +295,23 @@ bool DecimalMatcher::match(StringSegment& segment, ParsedNumber& result, int8_t
    return segment.length() == 0 || hasPartialPrefix;
 }

-const UnicodeSet& DecimalMatcher::getLeadCodePoints() {
+bool DecimalMatcher::smokeTest(const StringSegment& segment) const {
+    // The common case uses a static leadSet for efficiency.
    if (fLocalDigitStrings.isNull() && leadSet != nullptr) {
-        return *leadSet;
+        return segment.startsWith(*leadSet);
    }
-
-    if (fLocalLeadCodePoints.isNull()) {
-        auto* leadCodePoints = new UnicodeSet();
-        // Assumption: the sets are all single code points.
-        leadCodePoints->addAll(*unisets::get(unisets::DIGITS));
-        leadCodePoints->addAll(*separatorSet);
-        if (!fLocalDigitStrings.isNull()) {
-            for (int i = 0; i < 10; i++) {
-                utils::putLeadCodePoint(fLocalDigitStrings[i], leadCodePoints);
-            }
+    if (segment.startsWith(*separatorSet) || u_isdigit(segment.getCodePoint())) {
+        return true;
+    }
+    if (fLocalDigitStrings.isNull()) {
+        return false;
+    }
+    for (int i = 0; i < 10; i++) {
+        if (segment.startsWith(fLocalDigitStrings[i])) {
+            return true;
        }
-        leadCodePoints->freeze();
-        fLocalLeadCodePoints.adoptInstead(leadCodePoints);
    }
-    return *fLocalLeadCodePoints;
+    return false;
 }

 UnicodeString DecimalMatcher::toString() const {
--- a/icu4c/source/i18n/numparse_decimal.h
+++ b/icu4c/source/i18n/numparse_decimal.h
@ -27,7 +27,7 @@ class DecimalMatcher : public NumberParseMatcher, public UMemory {
    bool
    match(StringSegment& segment, ParsedNumber& result, int8_t exponentSign, UErrorCode& status) const;

-    const UnicodeSet& getLeadCodePoints() override;
+    bool smokeTest(const StringSegment& segment) const override;

    UnicodeString toString() const override;

--- a/icu4c/source/i18n/numparse_impl.cpp
+++ b/icu4c/source/i18n/numparse_impl.cpp
@ -34,7 +34,7 @@ NumberParserImpl*
 NumberParserImpl::createSimpleParser(const Locale& locale, const UnicodeString& patternString,
                                     parse_flags_t parseFlags, UErrorCode& status) {

-    LocalPointer<NumberParserImpl> parser(new NumberParserImpl(parseFlags, true));
+    LocalPointer<NumberParserImpl> parser(new NumberParserImpl(parseFlags));
    DecimalFormatSymbols symbols(locale, status);

    parser->fLocalMatchers.ignorables = {unisets::DEFAULT_IGNORABLES};
@ -117,7 +117,7 @@ NumberParserImpl::createParserFromProperties(const number::impl::DecimalFormatPr
    }
    IgnorablesMatcher ignorables(isStrict ? unisets::DEFAULT_IGNORABLES : unisets::STRICT_IGNORABLES);

-    LocalPointer<NumberParserImpl> parser(new NumberParserImpl(parseFlags, status));
+    LocalPointer<NumberParserImpl> parser(new NumberParserImpl(parseFlags));

    //////////////////////
    /// AFFIX MATCHERS ///
@ -197,52 +197,22 @@ NumberParserImpl::createParserFromProperties(const number::impl::DecimalFormatPr
    return parser.orphan();
 }

-NumberParserImpl::NumberParserImpl(parse_flags_t parseFlags, bool computeLeads)
-        : fParseFlags(parseFlags), fComputeLeads(computeLeads) {
+NumberParserImpl::NumberParserImpl(parse_flags_t parseFlags)
+        : fParseFlags(parseFlags) {
 }

 NumberParserImpl::~NumberParserImpl() {
-    if (fComputeLeads) {
-        for (int32_t i = 0; i < fNumMatchers; i++) {
-            delete (fLeads[i]);
-        }
-    }
    fNumMatchers = 0;
 }

 void NumberParserImpl::addMatcher(NumberParseMatcher& matcher) {
    if (fNumMatchers + 1 > fMatchers.getCapacity()) {
        fMatchers.resize(fNumMatchers * 2, fNumMatchers);
-        if (fComputeLeads) {
-            // The two arrays should grow in tandem:
-            U_ASSERT(fNumMatchers >= fLeads.getCapacity());
-            fLeads.resize(fNumMatchers * 2, fNumMatchers);
-        }
    }
-
    fMatchers[fNumMatchers] = &matcher;
-
-    if (fComputeLeads) {
-        addLeadCodePointsForMatcher(matcher);
-    }
-
    fNumMatchers++;
 }

-void NumberParserImpl::addLeadCodePointsForMatcher(NumberParseMatcher& matcher) {
-    const UnicodeSet& leadCodePoints = matcher.getLeadCodePoints();
-    // TODO: Avoid the clone operation here.
-    if (0 != (fParseFlags & PARSE_FLAG_IGNORE_CASE)) {
-        auto* copy = dynamic_cast<UnicodeSet*>(leadCodePoints.cloneAsThawed());
-        copy->closeOver(USET_ADD_CASE_MAPPINGS);
-        copy->freeze();
-        fLeads[fNumMatchers] = copy;
-    } else {
-        // FIXME: new here because we still take ownership
-        fLeads[fNumMatchers] = new UnicodeSet(leadCodePoints);
-    }
-}
-
 void NumberParserImpl::freeze() {
    fFrozen = true;
 }
@ -276,12 +246,11 @@ void NumberParserImpl::parseGreedyRecursive(StringSegment& segment, ParsedNumber
    }

    int initialOffset = segment.getOffset();
-    int leadCp = segment.getCodePoint();
    for (int32_t i = 0; i < fNumMatchers; i++) {
-        if (fComputeLeads && !fLeads[i]->contains(leadCp)) {
+        const NumberParseMatcher* matcher = fMatchers[i];
+        if (!matcher->smokeTest(segment)) {
            continue;
        }
-        const NumberParseMatcher* matcher = fMatchers[i];
        matcher->match(segment, result, status);
        if (U_FAILURE(status)) {
            return;
@ -313,8 +282,10 @@ void NumberParserImpl::parseLongestRecursive(StringSegment& segment, ParsedNumbe

    int initialOffset = segment.getOffset();
    for (int32_t i = 0; i < fNumMatchers; i++) {
-        // TODO: Check leadChars here?
        const NumberParseMatcher* matcher = fMatchers[i];
+        if (!matcher->smokeTest(segment)) {
+            continue;
+        }

        // In a non-greedy parse, we attempt all possible matches and pick the best.
        for (int32_t charsToConsume = 0; charsToConsume < segment.length();) {
--- a/icu4c/source/i18n/numparse_impl.h
+++ b/icu4c/source/i18n/numparse_impl.h
@ -32,6 +32,11 @@ class NumberParserImpl : public MutableMatcherCollection {
            const number::impl::DecimalFormatProperties& properties, const DecimalFormatSymbols& symbols,
            bool parseCurrency, bool optimize, UErrorCode& status);

+    /**
+     * Does NOT take ownership of the matcher. The matcher MUST remain valid for the lifespan of the
+     * NumberParserImpl.
+     * @param matcher The matcher to reference.
+     */
    void addMatcher(NumberParseMatcher& matcher) override;

    void freeze();
@ -48,8 +53,6 @@ class NumberParserImpl : public MutableMatcherCollection {
    int32_t fNumMatchers = 0;
    // NOTE: The stack capacity for fMatchers and fLeads should be the same
    MaybeStackArray<const NumberParseMatcher*, 10> fMatchers;
-    MaybeStackArray<const UnicodeSet*, 10> fLeads;
-    bool fComputeLeads;
    bool fFrozen = false;

    // WARNING: All of these matchers start in an undefined state (default-constructed).
@ -78,9 +81,7 @@ class NumberParserImpl : public MutableMatcherCollection {
        RequireNumberValidator number;
    } fLocalValidators;

-    NumberParserImpl(parse_flags_t parseFlags, bool computeLeads);
-
-    void addLeadCodePointsForMatcher(NumberParseMatcher& matcher);
+    explicit NumberParserImpl(parse_flags_t parseFlags);

    void parseGreedyRecursive(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const;

--- a/icu4c/source/i18n/numparse_scientific.cpp
+++ b/icu4c/source/i18n/numparse_scientific.cpp
@ -44,10 +44,10 @@ bool ScientificMatcher::match(StringSegment& segment, ParsedNumber& result, UErr

        // Allow a sign, and then try to match digits.
        int8_t exponentSign = 1;
-        if (segment.matches(*unisets::get(unisets::MINUS_SIGN))) {
+        if (segment.startsWith(*unisets::get(unisets::MINUS_SIGN))) {
            exponentSign = -1;
            segment.adjustOffsetByCodePoint();
-        } else if (segment.matches(*unisets::get(unisets::PLUS_SIGN))) {
+        } else if (segment.startsWith(*unisets::get(unisets::PLUS_SIGN))) {
            segment.adjustOffsetByCodePoint();
        }

@ -71,20 +71,8 @@ bool ScientificMatcher::match(StringSegment& segment, ParsedNumber& result, UErr
    return false;
 }

-const UnicodeSet& ScientificMatcher::getLeadCodePoints() {
-    UChar32 leadCp = fExponentSeparatorString.char32At(0);
-    const UnicodeSet* s = unisets::get(unisets::SCIENTIFIC_LEAD);
-    if (s->contains(leadCp)) {
-        return *s;
-    }
-
-    if (fLocalLeadCodePoints.isNull()) {
-        auto* leadCodePoints = new UnicodeSet();
-        leadCodePoints->add(leadCp);
-        leadCodePoints->freeze();
-        fLocalLeadCodePoints.adoptInstead(leadCodePoints);
-    }
-    return *fLocalLeadCodePoints;
+bool ScientificMatcher::smokeTest(const StringSegment& segment) const {
+    return segment.startsWith(fExponentSeparatorString);
 }

 UnicodeString ScientificMatcher::toString() const {
--- a/icu4c/source/i18n/numparse_scientific.h
+++ b/icu4c/source/i18n/numparse_scientific.h
@ -25,7 +25,7 @@ class ScientificMatcher : public NumberParseMatcher, public UMemory {

    bool match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const override;

-    const UnicodeSet& getLeadCodePoints() override;
+    bool smokeTest(const StringSegment& segment) const override;

    UnicodeString toString() const override;

--- a/icu4c/source/i18n/numparse_stringsegment.cpp
+++ b/icu4c/source/i18n/numparse_stringsegment.cpp
@ -75,11 +75,11 @@ UChar32 StringSegment::getCodePoint() const {
    }
 }

-bool StringSegment::matches(UChar32 otherCp) const {
+bool StringSegment::startsWith(UChar32 otherCp) const {
    return codePointsEqual(getCodePoint(), otherCp, fFoldCase);
 }

-bool StringSegment::matches(const UnicodeSet& uniset) const {
+bool StringSegment::startsWith(const UnicodeSet& uniset) const {
    // TODO: Move UnicodeSet case-folding logic here.
    // TODO: Handle string matches here instead of separately.
    UChar32 cp = getCodePoint();
@ -89,6 +89,15 @@ bool StringSegment::matches(const UnicodeSet& uniset) const {
    return uniset.contains(cp);
 }

+bool StringSegment::startsWith(const UnicodeString& other) const {
+    if (other.isBogus() || other.length() == 0 || length() == 0) {
+        return false;
+    }
+    int cp1 = getCodePoint();
+    int cp2 = other.char32At(0);
+    return codePointsEqual(cp1, cp2, fFoldCase);
+}
+
 int32_t StringSegment::getCommonPrefixLength(const UnicodeString& other) {
    return getPrefixLengthInternal(other, fFoldCase);
 }
--- a/icu4c/source/i18n/numparse_symbols.cpp
+++ b/icu4c/source/i18n/numparse_symbols.cpp
@ -58,20 +58,8 @@ bool SymbolMatcher::match(StringSegment& segment, ParsedNumber& result, UErrorCo
    return overlap == segment.length();
 }

-const UnicodeSet& SymbolMatcher::getLeadCodePoints() {
-    if (fString.isEmpty()) {
-        // Assumption: for sets from UnicodeSetStaticCache, uniSet == leadCodePoints.
-        return *fUniSet;
-    }
-
-    if (fLocalLeadCodePoints.isNull()) {
-        auto* leadCodePoints = new UnicodeSet();
-        utils::putLeadCodePoints(fUniSet, leadCodePoints);
-        utils::putLeadCodePoint(fString, leadCodePoints);
-        leadCodePoints->freeze();
-        fLocalLeadCodePoints.adoptInstead(leadCodePoints);
-    }
-    return *fLocalLeadCodePoints;
+bool SymbolMatcher::smokeTest(const StringSegment& segment) const {
+    return segment.startsWith(*fUniSet) || segment.startsWith(fString);
 }

 UnicodeString SymbolMatcher::toString() const {
@ -134,17 +122,6 @@ NanMatcher::NanMatcher(const DecimalFormatSymbols& dfs)
        : SymbolMatcher(dfs.getConstSymbol(DecimalFormatSymbols::kNaNSymbol), unisets::EMPTY) {
 }

-const UnicodeSet& NanMatcher::getLeadCodePoints() {
-    // Overriding this here to allow use of statically allocated sets
-    int leadCp = fString.char32At(0);
-    const UnicodeSet* s = unisets::get(unisets::NAN_LEAD);
-    if (s->contains(leadCp)) {
-        return *s;
-    }
-
-    return SymbolMatcher::getLeadCodePoints();
-}
-
 bool NanMatcher::isDisabled(const ParsedNumber& result) const {
    return result.seenNumber();
 }
--- a/icu4c/source/i18n/numparse_symbols.h
+++ b/icu4c/source/i18n/numparse_symbols.h
@ -28,7 +28,7 @@ class SymbolMatcher : public NumberParseMatcher, public UMemory {

    bool match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const override;

-    const UnicodeSet& getLeadCodePoints() override;
+    bool smokeTest(const StringSegment& segment) const override;

    UnicodeString toString() const override;

@ -96,8 +96,6 @@ class NanMatcher : public SymbolMatcher {

    NanMatcher(const DecimalFormatSymbols& dfs);

-    const UnicodeSet& getLeadCodePoints() override;
-
  protected:
    bool isDisabled(const ParsedNumber& result) const override;

--- a/icu4c/source/i18n/numparse_types.h
+++ b/icu4c/source/i18n/numparse_types.h
@ -26,7 +26,7 @@ enum ResultFlags {
    FLAG_PERCENT = 0x0002,
    FLAG_PERMILLE = 0x0004,
    FLAG_HAS_EXPONENT = 0x0008,
-    FLAG_HAS_DEFAULT_CURRENCY = 0x0010,
+    // FLAG_HAS_DEFAULT_CURRENCY = 0x0010, // no longer used
    FLAG_HAS_DECIMAL_SEPARATOR = 0x0020,
    FLAG_NAN = 0x0040,
    FLAG_INFINITY = 0x0080,
@ -46,6 +46,7 @@ enum ParseFlags {
    PARSE_FLAG_USE_FULL_AFFIXES = 0x0100,
    PARSE_FLAG_EXACT_AFFIX = 0x0200,
    PARSE_FLAG_PLUS_SIGN_ALLOWED = 0x0400,
+    PARSE_FLAG_OPTIMIZE = 0x0800,
 };


@ -216,12 +217,18 @@ class StringSegment : public UMemory, public ::icu::number::impl::CharSequence {
     * <p>
     * This method will perform case folding if case folding is enabled for the parser.
     */
-    bool matches(UChar32 otherCp) const;
+    bool startsWith(UChar32 otherCp) const;

    /**
     * Returns true if the first code point of this StringSegment is in the given UnicodeSet.
     */
-    bool matches(const UnicodeSet& uniset) const;
+    bool startsWith(const UnicodeSet& uniset) const;
+
+    /**
+     * Returns true if there is at least one code point of overlap between this StringSegment and the
+     * given UnicodeString.
+     */
+    bool startsWith(const UnicodeString& other) const;

    /**
     * Returns the length of the prefix shared by this StringSegment and the given CharSequence. For
@ -294,17 +301,18 @@ class NumberParseMatcher {
    virtual bool match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const = 0;

    /**
-     * Should return a set representing all possible chars (UTF-16 code units) that could be the first
-     * char that this matcher can consume. This method is only called during construction phase, and its
-     * return value is used to skip this matcher unless a segment begins with a char in this set. To make
-     * this matcher always run, return {@link UnicodeSet#ALL_CODE_POINTS}.
+     * Performs a fast "smoke check" for whether or not this matcher could possibly match against the
+     * given string segment. The test should be as fast as possible but also as restrictive as possible.
+     * For example, matchers can maintain a UnicodeSet of all code points that count possibly start a
+     * match. Matchers should use the {@link StringSegment#startsWith} method in order to correctly
+     * handle case folding.
     *
-     * The returned UnicodeSet does not need adoption and is guaranteed to be alive for as long as the
-     * object that returned it.
-     *
-     * This method is NOT thread-safe.
+     * @param segment
+     *            The segment to check against.
+     * @return true if the matcher might be able to match against this segment; false if it definitely
+     *         will not be able to match.
     */
-    virtual const UnicodeSet& getLeadCodePoints() = 0;
+    virtual bool smokeTest(const StringSegment& segment) const = 0;

    /**
     * Method called at the end of a parse, after all matchers have failed to consume any more chars.
@ -324,9 +332,6 @@ class NumberParseMatcher {
  protected:
    // No construction except by subclasses!
    NumberParseMatcher() = default;
-
-    // Optional ownership of the leadCodePoints set
-    LocalPointer<const UnicodeSet> fLocalLeadCodePoints;
 };


--- a/icu4c/source/i18n/numparse_unisets.cpp
+++ b/icu4c/source/i18n/numparse_unisets.cpp
@ -93,9 +93,6 @@ void U_CALLCONV initNumberParseUniSets(UErrorCode& status) {
    gUnicodeSets[INFINITY_KEY] = new UnicodeSet(u"[∞]", status);

    gUnicodeSets[DIGITS] = new UnicodeSet(u"[:digit:]", status);
-    gUnicodeSets[NAN_LEAD] = new UnicodeSet(
-            u"[NnТтmeՈոс¤НнчTtsҳ\u975e\u1002\u0e9a\u10d0\u0f68\u0644\u0646]", status);
-    gUnicodeSets[SCIENTIFIC_LEAD] = new UnicodeSet(u"[Ee×·е\u0627]", status);
    gUnicodeSets[CWCF] = new UnicodeSet(u"[:CWCF:]", status);

    gUnicodeSets[DIGITS_OR_ALL_SEPARATORS] = computeUnion(DIGITS, ALL_SEPARATORS);
--- a/icu4c/source/i18n/numparse_unisets.h
+++ b/icu4c/source/i18n/numparse_unisets.h
@ -47,8 +47,6 @@ enum Key {

    // Other
            DIGITS,
-    NAN_LEAD,
-    SCIENTIFIC_LEAD,
    CWCF,

    // Combined Separators with Digits (for lead code points)
--- a/icu4c/source/i18n/numparse_validators.h
+++ b/icu4c/source/i18n/numparse_validators.h
@ -21,12 +21,12 @@ class ValidationMatcher : public NumberParseMatcher {
        return false;
    }

-    const UnicodeSet& getLeadCodePoints() U_OVERRIDE {
+    bool smokeTest(const StringSegment&) const U_OVERRIDE {
        // No-op
-        return *unisets::get(unisets::EMPTY);
+        return false;
    }

-    virtual void postProcess(ParsedNumber& result) const U_OVERRIDE = 0;
+    void postProcess(ParsedNumber& result) const U_OVERRIDE = 0;
 };


--- a/icu4c/source/test/intltest/numbertest_parse.cpp
+++ b/icu4c/source/test/intltest/numbertest_parse.cpp
@ -176,10 +176,9 @@ void NumberParserTest::testSeriesMatcher() {
    matchers[4] = &m4;
    ArraySeriesMatcher series(matchers, 5);

-    assertEquals(
-            "Lead set should be equal to lead set of lead matcher",
-            *unisets::get(unisets::PLUS_SIGN),
-            series.getLeadCodePoints());
+    assertFalse("", series.smokeTest(StringSegment(u"x", false)));
+    assertFalse("", series.smokeTest(StringSegment(u"-", false)));
+    assertTrue("", series.smokeTest(StringSegment(u"+", false)));

    static const struct TestCase {
        const char16_t* input;
--- a/icu4c/source/test/intltest/numbertest_unisets.cpp
+++ b/icu4c/source/test/intltest/numbertest_unisets.cpp
@ -46,8 +46,6 @@ void UniSetsTest::testSetCoverage() {
    const UnicodeSet &percent = *get(unisets::PERCENT_SIGN);
    const UnicodeSet &permille = *get(unisets::PERMILLE_SIGN);
    const UnicodeSet &infinity = *get(unisets::INFINITY_KEY);
-    const UnicodeSet &nanLead = *get(unisets::NAN_LEAD);
-    const UnicodeSet &scientificLead = *get(unisets::SCIENTIFIC_LEAD);

    int32_t localeCount;
    const Locale* allAvailableLocales = Locale::getAvailableLocales(localeCount);
@ -66,11 +64,6 @@ void UniSetsTest::testSetCoverage() {
        ASSERT_IN_SET(percent, dfs.getConstSymbol(DecimalFormatSymbols::kPercentSymbol));
        ASSERT_IN_SET(permille, dfs.getConstSymbol(DecimalFormatSymbols::kPerMillSymbol));
        ASSERT_IN_SET(infinity, dfs.getConstSymbol(DecimalFormatSymbols::kInfinitySymbol));
-        ASSERT_IN_SET(nanLead, dfs.getConstSymbol(DecimalFormatSymbols::kNaNSymbol).char32At(0));
-        ASSERT_IN_SET(nanLead,
-                u_foldCase(dfs.getConstSymbol(DecimalFormatSymbols::kNaNSymbol).char32At(0), 0));
-        ASSERT_IN_SET(scientificLead,
-                u_foldCase(dfs.getConstSymbol(DecimalFormatSymbols::kExponentialSymbol).char32At(0), 0));
    }
 }

--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/StringSegment.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/StringSegment.java
@ -134,6 +134,19 @@ public class StringSegment implements CharSequence {
        return uniset.contains(cp);
    }

+    /**
+     * Returns true if there is at least one code point of overlap between this StringSegment and the
+     * given CharSequence. Null-safe.
+     */
+    public boolean startsWith(CharSequence other) {
+        if (other == null || other.length() == 0 || length() == 0) {
+            return false;
+        }
+        int cp1 = Character.codePointAt(this, 0);
+        int cp2 = Character.codePointAt(other, 0);
+        return codePointsEqual(cp1, cp2, foldCase);
+    }
+
    /**
     * Returns the length of the prefix shared by this StringSegment and the given CharSequence. For
     * example, if this string segment is "aab", and the char sequence is "aac", this method returns 2,
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/AffixMatcher.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/AffixMatcher.java
@ -13,7 +13,6 @@ import com.ibm.icu.impl.number.AffixPatternProvider;
 import com.ibm.icu.impl.number.AffixUtils;
 import com.ibm.icu.impl.number.PatternStringUtils;
 import com.ibm.icu.number.NumberFormatter.SignDisplay;
-import com.ibm.icu.text.UnicodeSet;

 /**
 * @author sffc
@ -206,15 +205,9 @@ public class AffixMatcher implements NumberParseMatcher {
    }

    @Override
-    public UnicodeSet getLeadCodePoints() {
-        UnicodeSet leadCodePoints = new UnicodeSet();
-        if (prefix != null) {
-            leadCodePoints.addAll(prefix.getLeadCodePoints());
-        }
-        if (suffix != null) {
-            leadCodePoints.addAll(suffix.getLeadCodePoints());
-        }
-        return leadCodePoints.freeze();
+    public boolean smokeTest(StringSegment segment) {
+        return (prefix != null && prefix.smokeTest(segment))
+                || (suffix != null && suffix.smokeTest(segment));
    }

    @Override
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/AnyMatcher.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/AnyMatcher.java
@ -6,7 +6,6 @@ import java.util.ArrayList;
 import java.util.List;

 import com.ibm.icu.impl.StringSegment;
-import com.ibm.icu.text.UnicodeSet;

 /**
 * Composes a number of matchers, and succeeds if any of the matchers succeed. Always greedily chooses
@ -58,22 +57,18 @@ public class AnyMatcher implements NumberParseMatcher {
    }

    @Override
-    public UnicodeSet getLeadCodePoints() {
+    public boolean smokeTest(StringSegment segment) {
        assert frozen;
        if (matchers == null) {
-            return UnicodeSet.EMPTY;
+            return false;
        }

-        if (matchers.size() == 1) {
-            return matchers.get(0).getLeadCodePoints();
-        }
-
-        UnicodeSet leadCodePoints = new UnicodeSet();
        for (int i = 0; i < matchers.size(); i++) {
-            NumberParseMatcher matcher = matchers.get(i);
-            leadCodePoints.addAll(matcher.getLeadCodePoints());
+            if (matchers.get(i).smokeTest(segment)) {
+                return true;
+            }
        }
-        return leadCodePoints.freeze();
+        return false;
    }

    @Override
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/CodePointMatcher.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/CodePointMatcher.java
@ -3,7 +3,6 @@
 package com.ibm.icu.impl.number.parse;

 import com.ibm.icu.impl.StringSegment;
-import com.ibm.icu.text.UnicodeSet;

 /**
 * Matches a single code point, performing no other logic.
@ -33,8 +32,8 @@ public class CodePointMatcher implements NumberParseMatcher {
    }

    @Override
-    public UnicodeSet getLeadCodePoints() {
-        return new UnicodeSet().add(cp).freeze();
+    public boolean smokeTest(StringSegment segment) {
+        return segment.startsWith(cp);
    }

    @Override
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/CurrencyCustomMatcher.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/CurrencyCustomMatcher.java
@ -3,7 +3,6 @@
 package com.ibm.icu.impl.number.parse;

 import com.ibm.icu.impl.StringSegment;
-import com.ibm.icu.text.UnicodeSet;
 import com.ibm.icu.util.Currency;
 import com.ibm.icu.util.ULocale;

@ -52,11 +51,8 @@ public class CurrencyCustomMatcher implements NumberParseMatcher {
    }

    @Override
-    public UnicodeSet getLeadCodePoints() {
-        UnicodeSet leadCodePoints = new UnicodeSet();
-        ParsingUtils.putLeadCodePoint(currency1, leadCodePoints);
-        ParsingUtils.putLeadCodePoint(currency2, leadCodePoints);
-        return leadCodePoints.freeze();
+    public boolean smokeTest(StringSegment segment) {
+        return segment.startsWith(currency1) || segment.startsWith(currency2);
    }

    @Override
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/CurrencyNamesMatcher.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/CurrencyNamesMatcher.java
@ -23,6 +23,8 @@ public class CurrencyNamesMatcher implements NumberParseMatcher {
    private final TextTrieMap<CurrencyStringInfo> longNameTrie;
    private final TextTrieMap<CurrencyStringInfo> symbolTrie;

+    private final UnicodeSet leadCodePoints;
+
    public static CurrencyNamesMatcher getInstance(ULocale locale) {
        // TODO: Pre-compute some of the more popular locales?
        return new CurrencyNamesMatcher(locale);
@ -33,6 +35,15 @@ public class CurrencyNamesMatcher implements NumberParseMatcher {
        // case folding on long-names but not symbols.
        longNameTrie = Currency.getParsingTrie(locale, Currency.LONG_NAME);
        symbolTrie = Currency.getParsingTrie(locale, Currency.SYMBOL_NAME);
+
+        // Compute the full set of characters that could be the first in a currency to allow for
+        // efficient smoke test.
+        leadCodePoints = new UnicodeSet();
+        longNameTrie.putLeadCodePoints(leadCodePoints);
+        symbolTrie.putLeadCodePoints(leadCodePoints);
+        // Always apply case mapping closure for currencies
+        leadCodePoints.closeOver(UnicodeSet.ADD_CASE_MAPPINGS);
+        leadCodePoints.freeze();
    }

    @Override
@ -55,13 +66,8 @@ public class CurrencyNamesMatcher implements NumberParseMatcher {
    }

    @Override
-    public UnicodeSet getLeadCodePoints() {
-        UnicodeSet leadCodePoints = new UnicodeSet();
-        longNameTrie.putLeadCodePoints(leadCodePoints);
-        symbolTrie.putLeadCodePoints(leadCodePoints);
-        // Always apply case mapping closure for currencies
-        leadCodePoints.closeOver(UnicodeSet.ADD_CASE_MAPPINGS);
-        return leadCodePoints.freeze();
+    public boolean smokeTest(StringSegment segment) {
+        return segment.startsWith(leadCodePoints);
    }

    @Override
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/DecimalMatcher.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/DecimalMatcher.java
@ -329,21 +329,23 @@ public class DecimalMatcher implements NumberParseMatcher {
    }

    @Override
-    public UnicodeSet getLeadCodePoints() {
+    public boolean smokeTest(StringSegment segment) {
+        // The common case uses a static leadSet for efficiency.
        if (digitStrings == null && leadSet != null) {
-            return leadSet;
+            return segment.startsWith(leadSet);
        }
-
-        UnicodeSet leadCodePoints = new UnicodeSet();
-        // Assumption: the sets are all single code points.
-        leadCodePoints.addAll(UnicodeSetStaticCache.get(Key.DIGITS));
-        leadCodePoints.addAll(separatorSet);
-        if (digitStrings != null) {
-            for (int i = 0; i < digitStrings.length; i++) {
-                ParsingUtils.putLeadCodePoint(digitStrings[i], leadCodePoints);
+        if (segment.startsWith(separatorSet) || UCharacter.isDigit(segment.getCodePoint())) {
+            return true;
+        }
+        if (digitStrings == null) {
+            return false;
+        }
+        for (int i = 0; i < digitStrings.length; i++) {
+            if (segment.startsWith(digitStrings[i])) {
+                return true;
            }
        }
-        return leadCodePoints.freeze();
+        return false;
    }

    @Override
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/NanMatcher.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/NanMatcher.java
@ -27,18 +27,6 @@ public class NanMatcher extends SymbolMatcher {
        super(symbolString, UnicodeSet.EMPTY);
    }

-    @Override
-    public UnicodeSet getLeadCodePoints() {
-        // Overriding this here to allow use of statically allocated sets
-        int leadCp = string.codePointAt(0);
-        UnicodeSet s = UnicodeSetStaticCache.get(UnicodeSetStaticCache.Key.NAN_LEAD);
-        if (s.contains(leadCp)) {
-            return s;
-        } else {
-            return super.getLeadCodePoints();
-        }
-    }
-
    @Override
    protected boolean isDisabled(ParsedNumber result) {
        return result.seenNumber();
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/NumberParseMatcher.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/NumberParseMatcher.java
@ -3,7 +3,6 @@
 package com.ibm.icu.impl.number.parse;

 import com.ibm.icu.impl.StringSegment;
-import com.ibm.icu.text.UnicodeSet;

 /**
 * The core interface implemented by all matchers used for number parsing.
@ -45,12 +44,18 @@ public interface NumberParseMatcher {
    public boolean match(StringSegment segment, ParsedNumber result);

    /**
-     * Should return a set representing all possible chars (UTF-16 code units) that could be the first
-     * char that this matcher can consume. This method is only called during construction phase, and its
-     * return value is used to skip this matcher unless a segment begins with a char in this set. To make
-     * this matcher always run, return {@link UnicodeSet#ALL_CODE_POINTS}.
+     * Performs a fast "smoke check" for whether or not this matcher could possibly match against the
+     * given string segment. The test should be as fast as possible but also as restrictive as possible.
+     * For example, matchers can maintain a UnicodeSet of all code points that count possibly start a
+     * match. Matchers should use the {@link StringSegment#startsWith} method in order to correctly
+     * handle case folding.
+     *
+     * @param segment
+     *            The segment to check against.
+     * @return true if the matcher might be able to match against this segment; false if it definitely
+     *         will not be able to match.
     */
-    public UnicodeSet getLeadCodePoints();
+    public boolean smokeTest(StringSegment segment);

    /**
     * Method called at the end of a parse, after all matchers have failed to consume any more chars.
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/NumberParserImpl.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/NumberParserImpl.java
@ -19,7 +19,6 @@ import com.ibm.icu.impl.number.PropertiesAffixPatternProvider;
 import com.ibm.icu.impl.number.RoundingUtils;
 import com.ibm.icu.number.NumberFormatter.GroupingStrategy;
 import com.ibm.icu.text.DecimalFormatSymbols;
-import com.ibm.icu.text.UnicodeSet;
 import com.ibm.icu.util.Currency;
 import com.ibm.icu.util.CurrencyAmount;
 import com.ibm.icu.util.ULocale;
@ -250,7 +249,6 @@ public class NumberParserImpl {

    private final int parseFlags;
    private final List<NumberParseMatcher> matchers;
-    private final List<UnicodeSet> leads;
    private boolean frozen;

    /**
@ -261,11 +259,6 @@ public class NumberParserImpl {
     */
    public NumberParserImpl(int parseFlags) {
        matchers = new ArrayList<NumberParseMatcher>();
-        if (0 != (parseFlags & ParsingUtils.PARSE_FLAG_OPTIMIZE)) {
-            leads = new ArrayList<UnicodeSet>();
-        } else {
-            leads = null;
-        }
        this.parseFlags = parseFlags;
        frozen = false;
    }
@ -273,30 +266,11 @@ public class NumberParserImpl {
    public void addMatcher(NumberParseMatcher matcher) {
        assert !frozen;
        this.matchers.add(matcher);
-        if (leads != null) {
-            addLeadCodePointsForMatcher(matcher);
-        }
    }

    public void addMatchers(Collection<? extends NumberParseMatcher> matchers) {
        assert !frozen;
        this.matchers.addAll(matchers);
-        if (leads != null) {
-            for (NumberParseMatcher matcher : matchers) {
-                addLeadCodePointsForMatcher(matcher);
-            }
-        }
-    }
-
-    private void addLeadCodePointsForMatcher(NumberParseMatcher matcher) {
-        UnicodeSet leadCodePoints = matcher.getLeadCodePoints();
-        assert leadCodePoints.isFrozen();
-        // TODO: Avoid the clone operation here.
-        if (0 != (parseFlags & ParsingUtils.PARSE_FLAG_IGNORE_CASE)) {
-            leadCodePoints = leadCodePoints.cloneAsThawed().closeOver(UnicodeSet.ADD_CASE_MAPPINGS)
-                    .freeze();
-        }
-        this.leads.add(leadCodePoints);
    }

    public void freeze() {
@ -343,12 +317,11 @@ public class NumberParserImpl {
        }

        int initialOffset = segment.getOffset();
-        int leadCp = segment.getCodePoint();
        for (int i = 0; i < matchers.size(); i++) {
-            if (leads != null && !leads.get(i).contains(leadCp)) {
+            NumberParseMatcher matcher = matchers.get(i);
+            if (!matcher.smokeTest(segment)) {
                continue;
            }
-            NumberParseMatcher matcher = matchers.get(i);
            matcher.match(segment, result);
            if (segment.getOffset() != initialOffset) {
                // In a greedy parse, recurse on only the first match.
@ -377,8 +350,10 @@ public class NumberParserImpl {

        int initialOffset = segment.getOffset();
        for (int i = 0; i < matchers.size(); i++) {
-            // TODO: Check leadChars here?
            NumberParseMatcher matcher = matchers.get(i);
+            if (!matcher.smokeTest(segment)) {
+                continue;
+            }

            // In a non-greedy parse, we attempt all possible matches and pick the best.
            for (int charsToConsume = 0; charsToConsume < segment.length();) {
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/ScientificMatcher.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/ScientificMatcher.java
@ -5,7 +5,6 @@ package com.ibm.icu.impl.number.parse;
 import com.ibm.icu.impl.StringSegment;
 import com.ibm.icu.impl.number.Grouper;
 import com.ibm.icu.text.DecimalFormatSymbols;
-import com.ibm.icu.text.UnicodeSet;

 /**
 * @author sffc
@ -78,14 +77,8 @@ public class ScientificMatcher implements NumberParseMatcher {
    }

    @Override
-    public UnicodeSet getLeadCodePoints() {
-        int leadCp = exponentSeparatorString.codePointAt(0);
-        UnicodeSet s = UnicodeSetStaticCache.get(UnicodeSetStaticCache.Key.SCIENTIFIC_LEAD);
-        if (s.contains(leadCp)) {
-            return s;
-        } else {
-            return new UnicodeSet().add(leadCp).freeze();
-        }
+    public boolean smokeTest(StringSegment segment) {
+        return segment.startsWith(exponentSeparatorString);
    }

    @Override
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/SeriesMatcher.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/SeriesMatcher.java
@ -6,7 +6,6 @@ import java.util.ArrayList;
 import java.util.List;

 import com.ibm.icu.impl.StringSegment;
-import com.ibm.icu.text.UnicodeSet;

 /**
 * Composes a number of matchers, running one after another. Matches the input string only if all of the
@ -82,15 +81,15 @@ public class SeriesMatcher implements NumberParseMatcher {
    }

    @Override
-    public UnicodeSet getLeadCodePoints() {
+    public boolean smokeTest(StringSegment segment) {
        assert frozen;
        if (matchers == null) {
-            return UnicodeSet.EMPTY;
+            return false;
        }

        // SeriesMatchers are never allowed to start with a Flexible matcher.
        assert !(matchers.get(0) instanceof NumberParseMatcher.Flexible);
-        return matchers.get(0).getLeadCodePoints();
+        return matchers.get(0).smokeTest(segment);
    }

    @Override
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/SymbolMatcher.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/SymbolMatcher.java
@ -59,16 +59,8 @@ public abstract class SymbolMatcher implements NumberParseMatcher {
    }

    @Override
-    public UnicodeSet getLeadCodePoints() {
-        if (string.isEmpty()) {
-            // Assumption: for sets from UnicodeSetStaticCache, uniSet == leadCodePoints.
-            return uniSet;
-        }
-
-        UnicodeSet leadCodePoints = new UnicodeSet();
-        ParsingUtils.putLeadCodePoints(uniSet, leadCodePoints);
-        ParsingUtils.putLeadCodePoint(string, leadCodePoints);
-        return leadCodePoints.freeze();
+    public boolean smokeTest(StringSegment segment) {
+        return segment.startsWith(uniSet) || segment.startsWith(string);
    }

    @Override
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/UnicodeSetStaticCache.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/UnicodeSetStaticCache.java
@ -49,8 +49,6 @@ public class UnicodeSetStaticCache {

        // Other
        DIGITS,
-        NAN_LEAD,
-        SCIENTIFIC_LEAD,
        CWCF, // TODO: Check if this is being used and remove it if not.

        // Combined Separators with Digits (for lead code points)
@ -112,10 +110,6 @@ public class UnicodeSetStaticCache {
        unicodeSets.put(Key.INFINITY, new UnicodeSet("[∞]").freeze());

        unicodeSets.put(Key.DIGITS, new UnicodeSet("[:digit:]").freeze());
-        unicodeSets.put(Key.NAN_LEAD,
-                new UnicodeSet("[NnТтmeՈոс¤НнчTtsҳ\u975e\u1002\u0e9a\u10d0\u0f68\u0644\u0646]")
-                        .freeze());
-        unicodeSets.put(Key.SCIENTIFIC_LEAD, new UnicodeSet("[Ee×·е\u0627]").freeze());
        unicodeSets.put(Key.CWCF, new UnicodeSet("[:CWCF:]").freeze());

        unicodeSets.put(Key.DIGITS_OR_ALL_SEPARATORS, computeUnion(Key.DIGITS, Key.ALL_SEPARATORS));
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/ValidationMatcher.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/ValidationMatcher.java
@ -3,7 +3,6 @@
 package com.ibm.icu.impl.number.parse;

 import com.ibm.icu.impl.StringSegment;
-import com.ibm.icu.text.UnicodeSet;

 /**
 * A Matcher used only for post-process validation, not for consuming characters at runtime.
@ -16,8 +15,8 @@ public abstract class ValidationMatcher implements NumberParseMatcher {
    }

    @Override
-    public UnicodeSet getLeadCodePoints() {
-        return UnicodeSet.EMPTY;
+    public boolean smokeTest(StringSegment segment) {
+        return false;
    }

 }
--- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/ExhaustiveNumberTest.java
+++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/ExhaustiveNumberTest.java
@ -2,6 +2,8 @@
 // License & terms of use: http://www.unicode.org/copyright.html#License
 package com.ibm.icu.dev.test.number;

+import static com.ibm.icu.impl.number.parse.UnicodeSetStaticCache.get;
+
 import java.math.BigDecimal;
 import java.util.Random;

@ -10,8 +12,12 @@ import org.junit.Test;

 import com.ibm.icu.dev.test.TestFmwk;
 import com.ibm.icu.impl.number.DecimalQuantity_DualStorageBCD;
+import com.ibm.icu.impl.number.parse.UnicodeSetStaticCache.Key;
+import com.ibm.icu.lang.UCharacter;
 import com.ibm.icu.number.NumberFormatter;
 import com.ibm.icu.number.Rounder;
+import com.ibm.icu.text.DecimalFormatSymbols;
+import com.ibm.icu.text.UnicodeSet;
 import com.ibm.icu.util.ULocale;

 /**
@ -27,6 +33,60 @@ public class ExhaustiveNumberTest extends TestFmwk {
        org.junit.Assume.assumeTrue(getExhaustiveness() > 5);
    }

+    @Test
+    public void testSetCoverage() {
+        // Lenient comma/period should be supersets of strict comma/period;
+        // it also makes the coverage logic cheaper.
+        assertTrue("COMMA should be superset of STRICT_COMMA",
+                get(Key.COMMA).containsAll(get(Key.STRICT_COMMA)));
+        assertTrue("PERIOD should be superset of STRICT_PERIOD",
+                get(Key.PERIOD).containsAll(get(Key.STRICT_PERIOD)));
+
+        UnicodeSet decimals = get(Key.STRICT_COMMA).cloneAsThawed().addAll(get(Key.STRICT_PERIOD))
+                .freeze();
+        UnicodeSet grouping = decimals.cloneAsThawed().addAll(get(Key.OTHER_GROUPING_SEPARATORS))
+                .freeze();
+        UnicodeSet plusSign = get(Key.PLUS_SIGN);
+        UnicodeSet minusSign = get(Key.MINUS_SIGN);
+        UnicodeSet percent = get(Key.PERCENT_SIGN);
+        UnicodeSet permille = get(Key.PERMILLE_SIGN);
+        UnicodeSet infinity = get(Key.INFINITY);
+
+        for (ULocale locale : ULocale.getAvailableLocales()) {
+            DecimalFormatSymbols dfs = DecimalFormatSymbols.getInstance(locale);
+
+            assertInSet(locale, decimals, dfs.getDecimalSeparatorString());
+            assertInSet(locale, grouping, dfs.getGroupingSeparatorString());
+            assertInSet(locale, plusSign, dfs.getPlusSignString());
+            assertInSet(locale, minusSign, dfs.getMinusSignString());
+            assertInSet(locale, percent, dfs.getPercentString());
+            assertInSet(locale, permille, dfs.getPerMillString());
+            assertInSet(locale, infinity, dfs.getInfinity());
+        }
+    }
+
+    static void assertInSet(ULocale locale, UnicodeSet set, String str) {
+        if (str.codePointCount(0, str.length()) != 1) {
+            // Ignore locale strings with more than one code point (usually a bidi mark)
+            return;
+        }
+        assertInSet(locale, set, str.codePointAt(0));
+    }
+
+    static void assertInSet(ULocale locale, UnicodeSet set, int cp) {
+        // If this test case fails, add the specified code point to the corresponding set in
+        // UnicodeSetStaticCache.java and numparse_unisets.cpp
+        assertTrue(
+                locale
+                        + " U+"
+                        + Integer.toHexString(cp)
+                        + " ("
+                        + UCharacter.toString(cp)
+                        + ") should be in "
+                        + set,
+                set.contains(cp));
+    }
+
    @Test
    public void unlimitedRoundingBigDecimal() {
        BigDecimal ten10000 = BigDecimal.valueOf(10).pow(10000);
--- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/NumberParserTest.java
+++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/NumberParserTest.java
@ -3,6 +3,7 @@
 package com.ibm.icu.dev.test.number;

 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertTrue;

 import org.junit.Test;
@ -21,8 +22,6 @@ import com.ibm.icu.impl.number.parse.ParsingUtils;
 import com.ibm.icu.impl.number.parse.PercentMatcher;
 import com.ibm.icu.impl.number.parse.PlusSignMatcher;
 import com.ibm.icu.impl.number.parse.SeriesMatcher;
-import com.ibm.icu.impl.number.parse.UnicodeSetStaticCache;
-import com.ibm.icu.impl.number.parse.UnicodeSetStaticCache.Key;
 import com.ibm.icu.text.DecimalFormatSymbols;
 import com.ibm.icu.util.Currency;
 import com.ibm.icu.util.ULocale;
@ -196,7 +195,9 @@ public class NumberParserTest {
        series.addMatcher(IgnorablesMatcher.DEFAULT);
        series.freeze();

-        assertEquals(UnicodeSetStaticCache.get(Key.PLUS_SIGN), series.getLeadCodePoints());
+        assertFalse(series.smokeTest(new StringSegment("x", false)));
+        assertFalse(series.smokeTest(new StringSegment("-", false)));
+        assertTrue(series.smokeTest(new StringSegment("+", false)));

        Object[][] cases = new Object[][] {
                { "", 0, true },
--- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/PropertiesTest.java
+++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/PropertiesTest.java
@ -30,9 +30,9 @@ import org.junit.Test;

 import com.ibm.icu.dev.test.serializable.SerializableTestUtility;
 import com.ibm.icu.impl.number.DecimalFormatProperties;
+import com.ibm.icu.impl.number.DecimalFormatProperties.ParseMode;
 import com.ibm.icu.impl.number.Padder.PadPosition;
 import com.ibm.icu.impl.number.PatternStringParser;
-import com.ibm.icu.impl.number.parse.NumberParserImpl.ParseMode;
 import com.ibm.icu.text.CompactDecimalFormat.CompactStyle;
 import com.ibm.icu.text.CurrencyPluralInfo;
 import com.ibm.icu.text.MeasureFormat.FormatWidth;
--- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/UnicodeSetStaticCacheTest.java
+++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/UnicodeSetStaticCacheTest.java
@ -8,82 +8,17 @@ import static org.junit.Assert.assertTrue;
 import org.junit.Test;

 import com.ibm.icu.impl.number.parse.UnicodeSetStaticCache.Key;
-import com.ibm.icu.lang.UCharacter;
-import com.ibm.icu.text.DecimalFormatSymbols;
-import com.ibm.icu.text.UnicodeSet;
-import com.ibm.icu.util.ULocale;

 /**
+ * This test class is thin; most of it was moved to ExhaustiveNumberTest.
 * @author sffc
- *
 */
 public class UnicodeSetStaticCacheTest {

-    @Test
-    public void testSetCoverage() {
-        // Lenient comma/period should be supersets of strict comma/period;
-        // it also makes the coverage logic cheaper.
-        assertTrue("COMMA should be superset of STRICT_COMMA",
-                get(Key.COMMA).containsAll(get(Key.STRICT_COMMA)));
-        assertTrue("PERIOD should be superset of STRICT_PERIOD",
-                get(Key.PERIOD).containsAll(get(Key.STRICT_PERIOD)));
-
-        UnicodeSet decimals = get(Key.STRICT_COMMA).cloneAsThawed().addAll(get(Key.STRICT_PERIOD))
-                .freeze();
-        UnicodeSet grouping = decimals.cloneAsThawed().addAll(get(Key.OTHER_GROUPING_SEPARATORS))
-                .freeze();
-        UnicodeSet plusSign = get(Key.PLUS_SIGN);
-        UnicodeSet minusSign = get(Key.MINUS_SIGN);
-        UnicodeSet percent = get(Key.PERCENT_SIGN);
-        UnicodeSet permille = get(Key.PERMILLE_SIGN);
-        UnicodeSet infinity = get(Key.INFINITY);
-        UnicodeSet nanLead = get(Key.NAN_LEAD);
-        UnicodeSet scientificLead = get(Key.SCIENTIFIC_LEAD);
-
-        for (ULocale locale : ULocale.getAvailableLocales()) {
-            DecimalFormatSymbols dfs = DecimalFormatSymbols.getInstance(locale);
-
-            assertInSet(locale, decimals, dfs.getDecimalSeparatorString());
-            assertInSet(locale, grouping, dfs.getGroupingSeparatorString());
-            assertInSet(locale, plusSign, dfs.getPlusSignString());
-            assertInSet(locale, minusSign, dfs.getMinusSignString());
-            assertInSet(locale, percent, dfs.getPercentString());
-            assertInSet(locale, permille, dfs.getPerMillString());
-            assertInSet(locale, infinity, dfs.getInfinity());
-            assertInSet(locale, nanLead, dfs.getNaN().codePointAt(0));
-            assertInSet(locale, nanLead, UCharacter.foldCase(dfs.getNaN(), true).codePointAt(0));
-            assertInSet(locale,
-                    scientificLead,
-                    UCharacter.foldCase(dfs.getExponentSeparator(), true).codePointAt(0));
-        }
-    }
-
    @Test
    public void testFrozen() {
        for (Key key : Key.values()) {
            assertTrue(get(key).isFrozen());
        }
    }
-
-    static void assertInSet(ULocale locale, UnicodeSet set, String str) {
-        if (str.codePointCount(0, str.length()) != 1) {
-            // Ignore locale strings with more than one code point (usually a bidi mark)
-            return;
-        }
-        assertInSet(locale, set, str.codePointAt(0));
-    }
-
-    static void assertInSet(ULocale locale, UnicodeSet set, int cp) {
-        // If this test case fails, add the specified code point to the corresponding set in
-        // UnicodeSetStaticCache.java and numparse_unisets.cpp
-        assertTrue(
-                locale
-                        + " U+"
-                        + Integer.toHexString(cp)
-                        + " ("
-                        + UCharacter.toString(cp)
-                        + ") should be in "
-                        + set,
-                set.contains(cp));
-    }
 }