ICU-13574 AffixMatcher is working. All simple parsing tests are passing.

X-SVN-Rev: 40903
2025-04-07 06:25:30 +00:00 · 2018-02-13 02:23:52 +00:00 · 2018-02-13 02:23:52 +00:00 · 1ed7deaa8c
commit 1ed7deaa8c
parent 7b1857d0f3
20 changed files with 313 additions and 168 deletions
--- a/icu4c/source/i18n/numparse_affixes.cpp
+++ b/icu4c/source/i18n/numparse_affixes.cpp
@ -5,6 +5,10 @@

 #if !UCONFIG_NO_FORMATTING && !UPRV_INCOMPLETE_CPP11_SUPPORT

+// Allow implicit conversion from char16_t* to UnicodeString for this file:
+// Helpful in toString methods and elsewhere.
+#define UNISTR_FROM_STRING_EXPLICIT
+
 #include "numparse_types.h"
 #include "numparse_affixes.h"
 #include "numparse_utils.h"
@ -122,52 +126,32 @@ AffixPatternMatcher AffixPatternMatcherBuilder::build() {
 }


-AffixTokenMatcherWarehouse::AffixTokenMatcherWarehouse(const UChar* currencyCode,
-                                                       const UnicodeString* currency1,
-                                                       const UnicodeString* currency2,
-                                                       const DecimalFormatSymbols* dfs,
-                                                       IgnorablesMatcher* ignorables, const Locale* locale)
-        : currency1(currency1),
-          currency2(currency2),
-          dfs(dfs),
-          ignorables(ignorables),
-          locale(locale),
-          codePointCount(0),
-          codePointNumBatches(0) {
-    utils::copyCurrencyCode(this->currencyCode, currencyCode);
-}
+CodePointMatcherWarehouse::CodePointMatcherWarehouse()
+        : codePointCount(0), codePointNumBatches(0) {}

-AffixTokenMatcherWarehouse::AffixTokenMatcherWarehouse(
-        AffixTokenMatcherWarehouse&& src) U_NOEXCEPT = default;
-
-AffixTokenMatcherWarehouse::~AffixTokenMatcherWarehouse() {
+CodePointMatcherWarehouse::~CodePointMatcherWarehouse() {
    // Delete the variable number of batches of code point matchers
    for (int32_t i = 0; i < codePointNumBatches; i++) {
        delete[] codePointsOverflow[i];
    }
 }

-NumberParseMatcher& AffixTokenMatcherWarehouse::minusSign() {
-    return fMinusSign = {*dfs, true};
+CodePointMatcherWarehouse::CodePointMatcherWarehouse(CodePointMatcherWarehouse&& src) U_NOEXCEPT
+        : codePoints(std::move(src.codePoints)),
+          codePointsOverflow(std::move(src.codePointsOverflow)),
+          codePointCount(src.codePointCount),
+          codePointNumBatches(src.codePointNumBatches) {}
+
+CodePointMatcherWarehouse&
+CodePointMatcherWarehouse::operator=(CodePointMatcherWarehouse&& src) U_NOEXCEPT {
+    codePoints = std::move(src.codePoints);
+    codePointsOverflow = std::move(src.codePointsOverflow);
+    codePointCount = src.codePointCount;
+    codePointNumBatches = src.codePointNumBatches;
+    return *this;
 }

-NumberParseMatcher& AffixTokenMatcherWarehouse::plusSign() {
-    return fPlusSign = {*dfs, true};
-}
-
-NumberParseMatcher& AffixTokenMatcherWarehouse::percent() {
-    return fPercent = {*dfs};
-}
-
-NumberParseMatcher& AffixTokenMatcherWarehouse::permille() {
-    return fPermille = {*dfs};
-}
-
-NumberParseMatcher& AffixTokenMatcherWarehouse::currency(UErrorCode& status) {
-    return fCurrency = {{*locale, status}, {currencyCode, *currency1, *currency2}};
-}
-
-NumberParseMatcher& AffixTokenMatcherWarehouse::nextCodePointMatcher(UChar32 cp) {
+NumberParseMatcher& CodePointMatcherWarehouse::nextCodePointMatcher(UChar32 cp) {
    if (codePointCount < CODE_POINT_STACK_CAPACITY) {
        return codePoints[codePointCount++] = {cp};
    }
@ -186,6 +170,39 @@ NumberParseMatcher& AffixTokenMatcherWarehouse::nextCodePointMatcher(UChar32 cp)
 }


+AffixTokenMatcherWarehouse::AffixTokenMatcherWarehouse(const AffixTokenMatcherSetupData* setupData)
+        : fSetupData(setupData) {}
+
+NumberParseMatcher& AffixTokenMatcherWarehouse::minusSign() {
+    return fMinusSign = {fSetupData->dfs, true};
+}
+
+NumberParseMatcher& AffixTokenMatcherWarehouse::plusSign() {
+    return fPlusSign = {fSetupData->dfs, true};
+}
+
+NumberParseMatcher& AffixTokenMatcherWarehouse::percent() {
+    return fPercent = {fSetupData->dfs};
+}
+
+NumberParseMatcher& AffixTokenMatcherWarehouse::permille() {
+    return fPermille = {fSetupData->dfs};
+}
+
+NumberParseMatcher& AffixTokenMatcherWarehouse::currency(UErrorCode& status) {
+    return fCurrency = {{fSetupData->locale, status},
+                        {fSetupData->currencyCode, fSetupData->currency1, fSetupData->currency2}};
+}
+
+IgnorablesMatcher& AffixTokenMatcherWarehouse::ignorables() {
+    return fSetupData->ignorables;
+}
+
+NumberParseMatcher& AffixTokenMatcherWarehouse::nextCodePointMatcher(UChar32 cp) {
+    return fCodePoints.nextCodePointMatcher(cp);
+}
+
+
 CodePointMatcher::CodePointMatcher(UChar32 cp)
        : fCp(cp) {}

@ -207,9 +224,13 @@ const UnicodeSet& CodePointMatcher::getLeadCodePoints() {
    return *fLocalLeadCodePoints;
 }

+UnicodeString CodePointMatcher::toString() const {
+    return u"<CodePoint>";
+}
+

 AffixPatternMatcher AffixPatternMatcher::fromAffixPattern(const UnicodeString& affixPattern,
-                                                          AffixTokenMatcherWarehouse& warehouse,
+                                                          AffixTokenMatcherWarehouse& tokenWarehouse,
                                                          parse_flags_t parseFlags, bool* success,
                                                          UErrorCode& status) {
    if (affixPattern.isEmpty()) {
@ -222,10 +243,10 @@ AffixPatternMatcher AffixPatternMatcher::fromAffixPattern(const UnicodeString& a
    if (0 != (parseFlags & PARSE_FLAG_EXACT_AFFIX)) {
        ignorables = nullptr;
    } else {
-        ignorables = warehouse.ignorables;
+        ignorables = &tokenWarehouse.ignorables();
    }

-    AffixPatternMatcherBuilder builder(affixPattern, warehouse, ignorables);
+    AffixPatternMatcherBuilder builder(affixPattern, tokenWarehouse, ignorables);
    AffixUtils::iterateWithConsumer(UnicodeStringCharSequence(affixPattern), builder, status);
    return builder.build();
 }
@ -243,10 +264,9 @@ bool AffixPatternMatcher::operator==(const AffixPatternMatcher& other) const {
 }


-AffixMatcherWarehouse::AffixMatcherWarehouse(AffixTokenMatcherWarehouse& warehouse)
-        : fAffixTokenMatcherWarehouse(std::move(warehouse)) {}
-
-AffixMatcherWarehouse& AffixMatcherWarehouse::operator=(AffixMatcherWarehouse&& src) = default;
+AffixMatcherWarehouse::AffixMatcherWarehouse(AffixTokenMatcherWarehouse* tokenWarehouse)
+        : fTokenWarehouse(tokenWarehouse) {
+}

 bool AffixMatcherWarehouse::isInteresting(const AffixPatternProvider& patternInfo,
                                          const IgnorablesMatcher& ignorables, parse_flags_t parseFlags,
@ -278,18 +298,14 @@ bool AffixMatcherWarehouse::isInteresting(const AffixPatternProvider& patternInf
    return true;
 }

-AffixMatcherWarehouse AffixMatcherWarehouse::createAffixMatchers(const AffixPatternProvider& patternInfo,
-                                                                 MutableMatcherCollection& output,
-                                                                 AffixTokenMatcherWarehouse tokenWarehouse,
-                                                                 const IgnorablesMatcher& ignorables,
-                                                                 parse_flags_t parseFlags,
-                                                                 UErrorCode& status) {
+void AffixMatcherWarehouse::createAffixMatchers(const AffixPatternProvider& patternInfo,
+                                                MutableMatcherCollection& output,
+                                                const IgnorablesMatcher& ignorables,
+                                                parse_flags_t parseFlags, UErrorCode& status) {
    if (!isInteresting(patternInfo, ignorables, parseFlags, status)) {
-        return {};
+        return;
    }

-    AffixMatcherWarehouse warehouse(tokenWarehouse);
-
    // The affixes have interesting characters, or we are in strict mode.
    // Use initial capacity of 6, the highest possible number of AffixMatchers.
    UnicodeString sb;
@ -309,21 +325,19 @@ AffixMatcherWarehouse AffixMatcherWarehouse::createAffixMatchers(const AffixPatt
        bool hasPrefix = false;
        PatternStringUtils::patternInfoToStringBuilder(
                patternInfo, true, signum, signDisplay, StandardPlural::OTHER, false, sb);
-        warehouse.fAffixPatternMatchers[numAffixPatternMatchers] = AffixPatternMatcher::fromAffixPattern(
-                sb, tokenWarehouse, parseFlags, &hasPrefix, status);
-        AffixPatternMatcher* prefix = hasPrefix
-                                      ? &warehouse.fAffixPatternMatchers[numAffixPatternMatchers++]
-                                      : nullptr;
+        fAffixPatternMatchers[numAffixPatternMatchers] = AffixPatternMatcher::fromAffixPattern(
+                sb, *fTokenWarehouse, parseFlags, &hasPrefix, status);
+        AffixPatternMatcher* prefix = hasPrefix ? &fAffixPatternMatchers[numAffixPatternMatchers++]
+                                                : nullptr;

        // Generate Suffix
        bool hasSuffix = false;
        PatternStringUtils::patternInfoToStringBuilder(
                patternInfo, false, signum, signDisplay, StandardPlural::OTHER, false, sb);
-        warehouse.fAffixPatternMatchers[numAffixPatternMatchers] = AffixPatternMatcher::fromAffixPattern(
-                sb, tokenWarehouse, parseFlags, &hasSuffix, status);
-        AffixPatternMatcher* suffix = hasSuffix
-                                      ? &warehouse.fAffixPatternMatchers[numAffixPatternMatchers++]
-                                      : nullptr;
+        fAffixPatternMatchers[numAffixPatternMatchers] = AffixPatternMatcher::fromAffixPattern(
+                sb, *fTokenWarehouse, parseFlags, &hasSuffix, status);
+        AffixPatternMatcher* suffix = hasSuffix ? &fAffixPatternMatchers[numAffixPatternMatchers++]
+                                                : nullptr;

        if (signum == 1) {
            posPrefix = prefix;
@ -338,14 +352,14 @@ AffixMatcherWarehouse AffixMatcherWarehouse::createAffixMatchers(const AffixPatt

        // Note: it is indeed possible for posPrefix and posSuffix to both be null.
        // We still need to add that matcher for strict mode to work.
-        warehouse.fAffixMatchers[numAffixMatchers++] = {prefix, suffix, flags};
+        fAffixMatchers[numAffixMatchers++] = {prefix, suffix, flags};
        if (includeUnpaired && prefix != nullptr && suffix != nullptr) {
            // The following if statements are designed to prevent adding two identical matchers.
-            if (signum == 1 || equals(prefix, posPrefix)) {
-                warehouse.fAffixMatchers[numAffixMatchers++] = {prefix, nullptr, flags};
+            if (signum == 1 || !equals(prefix, posPrefix)) {
+                fAffixMatchers[numAffixMatchers++] = {prefix, nullptr, flags};
            }
-            if (signum == 1 || equals(suffix, posSuffix)) {
-                warehouse.fAffixMatchers[numAffixMatchers++] = {nullptr, suffix, flags};
+            if (signum == 1 || !equals(suffix, posSuffix)) {
+                fAffixMatchers[numAffixMatchers++] = {nullptr, suffix, flags};
            }
        }
    }
@ -356,19 +370,20 @@ AffixMatcherWarehouse AffixMatcherWarehouse::createAffixMatchers(const AffixPatt
    do {
        madeChanges = false;
        for (int32_t i = 1; i < numAffixMatchers; i++) {
-            if (warehouse.fAffixMatchers[i - 1].compareTo(warehouse.fAffixMatchers[i]) > 0) {
+            if (fAffixMatchers[i - 1].compareTo(fAffixMatchers[i]) > 0) {
                madeChanges = true;
-                AffixMatcher temp = std::move(warehouse.fAffixMatchers[i - 1]);
-                warehouse.fAffixMatchers[i - 1] = std::move(warehouse.fAffixMatchers[i]);
-                warehouse.fAffixMatchers[i] = std::move(temp);
+                AffixMatcher temp = std::move(fAffixMatchers[i - 1]);
+                fAffixMatchers[i - 1] = std::move(fAffixMatchers[i]);
+                fAffixMatchers[i] = std::move(temp);
            }
        }
    } while (madeChanges);
-    for (int32_t i = 0; i < numAffixMatchers; i++) {
-        output.addMatcher(warehouse.fAffixMatchers[i]);
-    }

-    return warehouse;
+    for (int32_t i = 0; i < numAffixMatchers; i++) {
+        // Enable the following line to debug affixes
+        //std::cout << "Adding affix matcher: " << CStr(fAffixMatchers[i].toString())() << std::endl;
+        output.addMatcher(fAffixMatchers[i]);
+    }
 }


@ -454,6 +469,14 @@ int8_t AffixMatcher::compareTo(const AffixMatcher& rhs) const {
    }
 }

+UnicodeString AffixMatcher::toString() const {
+    bool isNegative = 0 != (fFlags & FLAG_NEGATIVE);
+    return UnicodeString(u"<Affix") + (isNegative ? u":negative " : u" ") +
+           (fPrefix ? fPrefix->getPattern() : u"null") + u"#" +
+           (fSuffix ? fSuffix->getPattern() : u"null") + u">";
+
+}
+

 #endif /* #if !UCONFIG_NO_FORMATTING */

--- a/icu4c/source/i18n/numparse_affixes.h
+++ b/icu4c/source/i18n/numparse_affixes.h
@ -12,6 +12,8 @@
 #include "numparse_currency.h"
 #include "number_affixutils.h"

+#include <array>
+
 U_NAMESPACE_BEGIN namespace numparse {
 namespace impl {

@ -33,11 +35,57 @@ class CodePointMatcher : public NumberParseMatcher, public UMemory {

    const UnicodeSet& getLeadCodePoints() override;

+    UnicodeString toString() const override;
+
  private:
    UChar32 fCp;
 };


+/**
+ * A warehouse to retain ownership of CodePointMatchers.
+ */
+class CodePointMatcherWarehouse : public UMemory {
+  private:
+    static constexpr int32_t CODE_POINT_STACK_CAPACITY = 5; // Number of entries directly on the stack
+    static constexpr int32_t CODE_POINT_BATCH_SIZE = 10; // Number of entries per heap allocation
+
+  public:
+    CodePointMatcherWarehouse();
+
+    // A custom destructor is needed to free the memory from MaybeStackArray.
+    // A custom move constructor and move assignment seem to be needed because of the custom destructor.
+
+    ~CodePointMatcherWarehouse();
+
+    CodePointMatcherWarehouse(CodePointMatcherWarehouse&& src) U_NOEXCEPT;
+
+    CodePointMatcherWarehouse& operator=(CodePointMatcherWarehouse&& src) U_NOEXCEPT;
+
+    NumberParseMatcher& nextCodePointMatcher(UChar32 cp);
+
+  private:
+    std::array<CodePointMatcher, CODE_POINT_STACK_CAPACITY> codePoints; // By value
+    MaybeStackArray<CodePointMatcher*, 3> codePointsOverflow; // On heap in "batches"
+    int32_t codePointCount; // Total for both the ones by value and on heap
+    int32_t codePointNumBatches; // Number of batches in codePointsOverflow
+};
+
+
+struct AffixTokenMatcherSetupData {
+    const UChar* currencyCode;
+    const UnicodeString& currency1;
+    const UnicodeString& currency2;
+    const DecimalFormatSymbols& dfs;
+    IgnorablesMatcher& ignorables;
+    const Locale& locale;
+
+//    const UChar* currencyCode, const UnicodeString* currency1,
+//    const UnicodeString* currency2, const DecimalFormatSymbols* dfs,
+//            IgnorablesMatcher* ignorables, const Locale* locale
+};
+
+
 /**
 * Small helper class that generates matchers for individual tokens for AffixPatternMatcher.
 *
@ -48,21 +96,11 @@ class CodePointMatcher : public NumberParseMatcher, public UMemory {
 *
 * @author sffc
 */
-class AffixTokenMatcherWarehouse {
-  private:
-    static constexpr int32_t CODE_POINT_STACK_CAPACITY = 5; // Number of entries directly on the stack
-    static constexpr int32_t CODE_POINT_BATCH_SIZE = 10; // Number of entries per heap allocation
-
+class AffixTokenMatcherWarehouse : public UMemory {
  public:
    AffixTokenMatcherWarehouse() = default;  // WARNING: Leaves the object in an unusable state

-    AffixTokenMatcherWarehouse(const UChar* currencyCode, const UnicodeString* currency1,
-                               const UnicodeString* currency2, const DecimalFormatSymbols* dfs,
-                               IgnorablesMatcher* ignorables, const Locale* locale);
-
-    AffixTokenMatcherWarehouse(AffixTokenMatcherWarehouse&& src) U_NOEXCEPT;
-
-    ~AffixTokenMatcherWarehouse();
+    AffixTokenMatcherWarehouse(const AffixTokenMatcherSetupData* setupData);

    NumberParseMatcher& minusSign();

@ -74,16 +112,13 @@ class AffixTokenMatcherWarehouse {

    NumberParseMatcher& currency(UErrorCode& status);

+    IgnorablesMatcher& ignorables();
+
    NumberParseMatcher& nextCodePointMatcher(UChar32 cp);

  private:
-    // NOTE: The following fields may be unsafe to access after construction is done!
-    UChar currencyCode[4];
-    const UnicodeString* currency1;
-    const UnicodeString* currency2;
-    const DecimalFormatSymbols* dfs;
-    IgnorablesMatcher* ignorables;
-    const Locale* locale;
+    // NOTE: The following field may be unsafe to access after construction is done!
+    const AffixTokenMatcherSetupData* fSetupData;

    // NOTE: These are default-constructed and should not be used until initialized.
    MinusSignMatcher fMinusSign;
@ -92,10 +127,8 @@ class AffixTokenMatcherWarehouse {
    PermilleMatcher fPermille;
    CurrencyAnyMatcher fCurrency;

-    CodePointMatcher codePoints[CODE_POINT_STACK_CAPACITY]; // By value
-    MaybeStackArray<CodePointMatcher*, 3> codePointsOverflow; // On heap in "batches"
-    int32_t codePointCount; // Total for both the ones by value and on heap
-    int32_t codePointNumBatches; // Number of batches in codePointsOverflow
+    // Use a child class for code point matchers, since it requires non-default operators.
+    CodePointMatcherWarehouse fCodePoints;

    friend class AffixPatternMatcherBuilder;
    friend class AffixPatternMatcher;
@ -161,6 +194,8 @@ class AffixMatcher : public NumberParseMatcher, public UMemory {

    int8_t compareTo(const AffixMatcher& rhs) const;

+    UnicodeString toString() const override;
+
  private:
    AffixPatternMatcher* fPrefix;
    AffixPatternMatcher* fSuffix;
@ -175,23 +210,19 @@ class AffixMatcherWarehouse {
  public:
    AffixMatcherWarehouse() = default;  // WARNING: Leaves the object in an unusable state

-    AffixMatcherWarehouse(AffixTokenMatcherWarehouse& warehouse);
+    AffixMatcherWarehouse(AffixTokenMatcherWarehouse* tokenWarehouse);

-    AffixMatcherWarehouse& operator=(AffixMatcherWarehouse&& src);
-
-    static AffixMatcherWarehouse createAffixMatchers(const AffixPatternProvider& patternInfo,
-                                                     MutableMatcherCollection& output,
-                                                     AffixTokenMatcherWarehouse tokenWarehouse,
-                                                     const IgnorablesMatcher& ignorables,
-                                                     parse_flags_t parseFlags, UErrorCode& status);
+    void createAffixMatchers(const AffixPatternProvider& patternInfo, MutableMatcherCollection& output,
+                             const IgnorablesMatcher& ignorables, parse_flags_t parseFlags,
+                             UErrorCode& status);

  private:
    // 9 is the limit: positive, zero, and negative, each with prefix, suffix, and prefix+suffix
    AffixMatcher fAffixMatchers[9];
    // 6 is the limit: positive, zero, and negative, a prefix and a suffix for each
    AffixPatternMatcher fAffixPatternMatchers[6];
-    // Store all the tokens used by the AffixPatternMatchers
-    AffixTokenMatcherWarehouse fAffixTokenMatcherWarehouse;
+    // Reference to the warehouse for tokens used by the AffixPatternMatchers
+    AffixTokenMatcherWarehouse* fTokenWarehouse;

    friend class AffixMatcher;

--- a/icu4c/source/i18n/numparse_compositions.cpp
+++ b/icu4c/source/i18n/numparse_compositions.cpp
@ -5,6 +5,10 @@

 #if !UCONFIG_NO_FORMATTING && !UPRV_INCOMPLETE_CPP11_SUPPORT

+// Allow implicit conversion from char16_t* to UnicodeString for this file:
+// Helpful in toString methods and elsewhere.
+#define UNISTR_FROM_STRING_EXPLICIT
+
 #include "numparse_types.h"
 #include "numparse_compositions.h"
 #include "unicode/uniset.h"
@ -113,5 +117,9 @@ const NumberParseMatcher* const* ArraySeriesMatcher::end() const {
    return fMatchers.getAlias() + fMatchersLen;
 }

+UnicodeString ArraySeriesMatcher::toString() const {
+    return u"<ArraySeries>";
+}
+

 #endif /* #if !UCONFIG_NO_FORMATTING */
--- a/icu4c/source/i18n/numparse_compositions.h
+++ b/icu4c/source/i18n/numparse_compositions.h
@ -87,6 +87,8 @@ class ArraySeriesMatcher : public SeriesMatcher {

    const UnicodeSet& getLeadCodePoints() override;

+    UnicodeString toString() const override;
+
    int32_t length() const override;

  protected:
--- a/icu4c/source/i18n/numparse_currency.cpp
+++ b/icu4c/source/i18n/numparse_currency.cpp
@ -5,6 +5,10 @@

 #if !UCONFIG_NO_FORMATTING && !UPRV_INCOMPLETE_CPP11_SUPPORT

+// Allow implicit conversion from char16_t* to UnicodeString for this file:
+// Helpful in toString methods and elsewhere.
+#define UNISTR_FROM_STRING_EXPLICIT
+
 #include "numparse_types.h"
 #include "numparse_currency.h"
 #include "ucurrimp.h"
@ -66,6 +70,10 @@ const UnicodeSet& CurrencyNamesMatcher::getLeadCodePoints() {
    return *fLocalLeadCodePoints;
 }

+UnicodeString CurrencyNamesMatcher::toString() const {
+    return u"<CurrencyNames>";
+}
+

 CurrencyCustomMatcher::CurrencyCustomMatcher(const char16_t* currencyCode, const UnicodeString& currency1,
                                             const UnicodeString& currency2)
@ -106,6 +114,10 @@ const UnicodeSet& CurrencyCustomMatcher::getLeadCodePoints() {
    return *fLocalLeadCodePoints;
 }

+UnicodeString CurrencyCustomMatcher::toString() const {
+    return u"<CurrencyCustom>";
+}
+

 CurrencyAnyMatcher::CurrencyAnyMatcher() {
    fMatcherArray[0] = &fNamesMatcher;
@ -151,5 +163,9 @@ const NumberParseMatcher* const* CurrencyAnyMatcher::end() const {
    return fMatcherArray + 2;
 }

+UnicodeString CurrencyAnyMatcher::toString() const {
+    return u"<CurrencyAny>";
+}
+

 #endif /* #if !UCONFIG_NO_FORMATTING */
--- a/icu4c/source/i18n/numparse_currency.h
+++ b/icu4c/source/i18n/numparse_currency.h
@ -32,6 +32,8 @@ class CurrencyNamesMatcher : public NumberParseMatcher, public UMemory {

    const UnicodeSet& getLeadCodePoints() override;

+    UnicodeString toString() const override;
+
  private:
    // We could use Locale instead of CharString here, but
    // Locale has a non-trivial default constructor.
@ -51,6 +53,8 @@ class CurrencyCustomMatcher : public NumberParseMatcher, public UMemory {

    const UnicodeSet& getLeadCodePoints() override;

+    UnicodeString toString() const override;
+
  private:
    UChar fCurrencyCode[4];
    UnicodeString fCurrency1;
@ -75,6 +79,8 @@ class CurrencyAnyMatcher : public AnyMatcher, public UMemory {

    const UnicodeSet& getLeadCodePoints() override;

+    UnicodeString toString() const override;
+
  protected:
    const NumberParseMatcher* const* begin() const override;

--- a/icu4c/source/i18n/numparse_decimal.cpp
+++ b/icu4c/source/i18n/numparse_decimal.cpp
@ -5,6 +5,10 @@

 #if !UCONFIG_NO_FORMATTING && !UPRV_INCOMPLETE_CPP11_SUPPORT

+// Allow implicit conversion from char16_t* to UnicodeString for this file:
+// Helpful in toString methods and elsewhere.
+#define UNISTR_FROM_STRING_EXPLICIT
+
 #include "numparse_types.h"
 #include "numparse_decimal.h"
 #include "numparse_unisets.h"
@ -312,5 +316,9 @@ const UnicodeSet& DecimalMatcher::getLeadCodePoints() {
    return *fLocalLeadCodePoints;
 }

+UnicodeString DecimalMatcher::toString() const {
+    return u"<Decimal>";
+}
+

 #endif /* #if !UCONFIG_NO_FORMATTING */
--- a/icu4c/source/i18n/numparse_decimal.h
+++ b/icu4c/source/i18n/numparse_decimal.h
@ -29,6 +29,8 @@ class DecimalMatcher : public NumberParseMatcher, public UMemory {

    const UnicodeSet& getLeadCodePoints() override;

+    UnicodeString toString() const override;
+
  private:
    /** If true, only accept strings whose grouping sizes match the locale */
    bool requireGroupingMatch;
--- a/icu4c/source/i18n/numparse_impl.cpp
+++ b/icu4c/source/i18n/numparse_impl.cpp
@ -5,7 +5,8 @@

 #if !UCONFIG_NO_FORMATTING && !UPRV_INCOMPLETE_CPP11_SUPPORT

-// Allow implicit conversion from char16_t* to UnicodeString for this file
+// Allow implicit conversion from char16_t* to UnicodeString for this file:
+// Helpful in toString methods and elsewhere.
 #define UNISTR_FROM_STRING_EXPLICIT

 #include "number_types.h"
@ -17,6 +18,9 @@
 #include "unicode/numberformatter.h"

 #include <typeinfo>
+#include <array>
+#include <iostream>
+#include "cstr.h"

 using namespace icu;
 using namespace icu::number;
@ -35,24 +39,20 @@ NumberParserImpl::createSimpleParser(const Locale& locale, const UnicodeString&
    parser->fLocalMatchers.ignorables = {unisets::DEFAULT_IGNORABLES};
    IgnorablesMatcher& ignorables = parser->fLocalMatchers.ignorables;

+    const UChar currencyCode[] = u"USD";
    UnicodeString currency1(u"IU$");
    UnicodeString currency2(u"ICU");

    ParsedPatternInfo patternInfo;
    PatternParser::parseToPatternInfo(patternString, patternInfo, status);

-    // The following statement sets up the affix matchers.
-//    AffixMatcherWarehouse warehouse = ;
-
-    parser->fLocalMatchers.affixMatcherWarehouse = std::move(AffixMatcherWarehouse::createAffixMatchers(
-            patternInfo,
-            *parser,
-            AffixTokenMatcherWarehouse(
-                    u"USD", &currency1, &currency2, &symbols, &ignorables, &locale),
-            ignorables,
-            parseFlags,
-            status));
-
+    // The following statements set up the affix matchers.
+    AffixTokenMatcherSetupData affixSetupData = {
+            currencyCode, currency1, currency2, symbols, ignorables, locale};
+    parser->fLocalMatchers.affixTokenMatcherWarehouse = {&affixSetupData};
+    parser->fLocalMatchers.affixMatcherWarehouse = {&parser->fLocalMatchers.affixTokenMatcherWarehouse};
+    parser->fLocalMatchers.affixMatcherWarehouse.createAffixMatchers(
+            patternInfo, *parser, ignorables, parseFlags, status);

    Grouper grouper = Grouper::forStrategy(UNUM_GROUPING_AUTO);
    grouper.setLocaleData(patternInfo, locale);
@ -233,7 +233,7 @@ UnicodeString NumberParserImpl::toString() const {
    UnicodeString result(u"<NumberParserImpl matchers:[");
    for (int32_t i = 0; i < fNumMatchers; i++) {
        result.append(u' ');
-        result.append(UnicodeString(typeid(*fMatchers[i]).name()));
+        result.append(fMatchers[i]->toString());
    }
    result.append(u" ]>", -1);
    return result;
--- a/icu4c/source/i18n/numparse_impl.h
+++ b/icu4c/source/i18n/numparse_impl.h
@ -60,6 +60,7 @@ class NumberParserImpl : public MutableMatcherCollection {
        ScientificMatcher scientific;
        CurrencyNamesMatcher currencyNames;
        AffixMatcherWarehouse affixMatcherWarehouse;
+        AffixTokenMatcherWarehouse affixTokenMatcherWarehouse;
    } fLocalMatchers;

    NumberParserImpl(parse_flags_t parseFlags, bool computeLeads);
--- a/icu4c/source/i18n/numparse_parsednumber.cpp
+++ b/icu4c/source/i18n/numparse_parsednumber.cpp
@ -5,6 +5,10 @@

 #if !UCONFIG_NO_FORMATTING && !UPRV_INCOMPLETE_CPP11_SUPPORT

+// Allow implicit conversion from char16_t* to UnicodeString for this file:
+// Helpful in toString methods and elsewhere.
+#define UNISTR_FROM_STRING_EXPLICIT
+
 #include "numparse_types.h"
 #include <cmath>

@ -67,7 +71,11 @@ double ParsedNumber::getDouble() const {
    }

    // TODO: MIN_LONG
-    return quantity.toDouble();
+    double d = quantity.toDouble();
+    if (0 != (flags & FLAG_NEGATIVE)) {
+        d *= -1;
+    }
+    return d;
 }

 bool ParsedNumber::isBetterThan(const ParsedNumber& other) {
--- a/icu4c/source/i18n/numparse_scientific.cpp
+++ b/icu4c/source/i18n/numparse_scientific.cpp
@ -5,6 +5,10 @@

 #if !UCONFIG_NO_FORMATTING && !UPRV_INCOMPLETE_CPP11_SUPPORT

+// Allow implicit conversion from char16_t* to UnicodeString for this file:
+// Helpful in toString methods and elsewhere.
+#define UNISTR_FROM_STRING_EXPLICIT
+
 #include "numparse_types.h"
 #include "numparse_scientific.h"
 #include "numparse_unisets.h"
@ -83,5 +87,9 @@ const UnicodeSet& ScientificMatcher::getLeadCodePoints() {
    return *fLocalLeadCodePoints;
 }

+UnicodeString ScientificMatcher::toString() const {
+    return u"<Scientific>";
+}
+

 #endif /* #if !UCONFIG_NO_FORMATTING */
--- a/icu4c/source/i18n/numparse_scientific.h
+++ b/icu4c/source/i18n/numparse_scientific.h
@ -27,6 +27,8 @@ class ScientificMatcher : public NumberParseMatcher, public UMemory {

    const UnicodeSet& getLeadCodePoints() override;

+    UnicodeString toString() const override;
+
  private:
    UnicodeString fExponentSeparatorString;
    DecimalMatcher fExponentMatcher;
--- a/icu4c/source/i18n/numparse_stringsegment.cpp
+++ b/icu4c/source/i18n/numparse_stringsegment.cpp
@ -5,6 +5,10 @@

 #if !UCONFIG_NO_FORMATTING && !UPRV_INCOMPLETE_CPP11_SUPPORT

+// Allow implicit conversion from char16_t* to UnicodeString for this file:
+// Helpful in toString methods and elsewhere.
+#define UNISTR_FROM_STRING_EXPLICIT
+
 #include "numparse_types.h"
 #include "numparse_stringsegment.h"
 #include "putilimp.h"
--- a/icu4c/source/i18n/numparse_symbols.cpp
+++ b/icu4c/source/i18n/numparse_symbols.cpp
@ -5,6 +5,10 @@

 #if !UCONFIG_NO_FORMATTING && !UPRV_INCOMPLETE_CPP11_SUPPORT

+// Allow implicit conversion from char16_t* to UnicodeString for this file:
+// Helpful in toString methods and elsewhere.
+#define UNISTR_FROM_STRING_EXPLICIT
+
 #include "numparse_types.h"
 #include "numparse_symbols.h"
 #include "numparse_utils.h"
@ -70,6 +74,11 @@ const UnicodeSet& SymbolMatcher::getLeadCodePoints() {
    return *fLocalLeadCodePoints;
 }

+UnicodeString SymbolMatcher::toString() const {
+    // TODO: Customize output for each symbol
+    return u"<Symbol>";
+}
+

 IgnorablesMatcher::IgnorablesMatcher(unisets::Key key)
        : SymbolMatcher({}, key) {
@ -79,6 +88,10 @@ bool IgnorablesMatcher::isFlexible() const {
    return true;
 }

+UnicodeString IgnorablesMatcher::toString() const {
+    return u"<Ignorables>";
+}
+
 bool IgnorablesMatcher::isDisabled(const ParsedNumber&) const {
    return false;
 }
--- a/icu4c/source/i18n/numparse_symbols.h
+++ b/icu4c/source/i18n/numparse_symbols.h
@ -30,6 +30,8 @@ class SymbolMatcher : public NumberParseMatcher, public UMemory {

    const UnicodeSet& getLeadCodePoints() override;

+    UnicodeString toString() const override;
+
    virtual bool isDisabled(const ParsedNumber& result) const = 0;

    virtual void accept(StringSegment& segment, ParsedNumber& result) const = 0;
@ -50,6 +52,8 @@ class IgnorablesMatcher : public SymbolMatcher {

    bool isFlexible() const override;

+    UnicodeString toString() const override;
+
  protected:
    bool isDisabled(const ParsedNumber& result) const override;

--- a/icu4c/source/i18n/numparse_types.h
+++ b/icu4c/source/i18n/numparse_types.h
@ -318,6 +318,9 @@ class NumberParseMatcher {
        // Default implementation: no-op
    };

+    // String for debugging
+    virtual UnicodeString toString() const = 0;
+
  protected:
    // No construction except by subclasses!
    NumberParseMatcher() = default;
--- a/icu4c/source/i18n/numparse_unisets.cpp
+++ b/icu4c/source/i18n/numparse_unisets.cpp
@ -5,8 +5,8 @@

 #if !UCONFIG_NO_FORMATTING && !UPRV_INCOMPLETE_CPP11_SUPPORT

-// Allow implicit conversion from char16_t* to UnicodeString for this file
-// (useful for UnicodeSet constructor)
+// Allow implicit conversion from char16_t* to UnicodeString for this file:
+// Helpful in toString methods and elsewhere.
 #define UNISTR_FROM_STRING_EXPLICIT

 #include "numparse_unisets.h"
--- a/icu4c/source/test/intltest/numbertest_parse.cpp
+++ b/icu4c/source/test/intltest/numbertest_parse.cpp
@ -69,33 +69,33 @@ void NumberParserTest::testBasic() {
                 {3, u"-∞", u"0", 2, -INFINITY},
                 {3, u"@@@123  @@", u"0", 6, 123.}, // TODO: Should padding be strong instead of weak?
                 {3, u"@@@123@@  ", u"0", 6, 123.}, // TODO: Should padding be strong instead of weak?
-//                 {3, u"a51423US dollars", u"a0¤¤¤", 16, 51423.},
-//                 {3, u"a 51423 US dollars", u"a0¤¤¤", 18, 51423.},
+                 {3, u"a51423US dollars", u"a0¤¤¤", 16, 51423.},
+                 {3, u"a 51423 US dollars", u"a0¤¤¤", 18, 51423.},
                 {3, u"514.23 USD", u"¤0", 10, 514.23},
                 {3, u"514.23 GBP", u"¤0", 10, 514.23},
-//                 {3, u"a 𝟱𝟭𝟰𝟮𝟯 b", u"a0b", 14, 51423.},
-//                 {3, u"-a 𝟱𝟭𝟰𝟮𝟯 b", u"a0b", 15, -51423.},
-//                 {3, u"a -𝟱𝟭𝟰𝟮𝟯 b", u"a0b", 15, -51423.},
+                 {3, u"a 𝟱𝟭𝟰𝟮𝟯 b", u"a0b", 14, 51423.},
+                 {3, u"-a 𝟱𝟭𝟰𝟮𝟯 b", u"a0b", 15, -51423.},
+                 {3, u"a -𝟱𝟭𝟰𝟮𝟯 b", u"a0b", 15, -51423.},
                 {3, u"𝟱𝟭𝟰𝟮𝟯", u"[0];(0)", 10, 51423.},
                 {3, u"[𝟱𝟭𝟰𝟮𝟯", u"[0];(0)", 11, 51423.},
                 {3, u"𝟱𝟭𝟰𝟮𝟯]", u"[0];(0)", 11, 51423.},
                 {3, u"[𝟱𝟭𝟰𝟮𝟯]", u"[0];(0)", 12, 51423.},
-//                 {3, u"(𝟱𝟭𝟰𝟮𝟯", u"[0];(0)", 11, -51423.},
-//                 {3, u"𝟱𝟭𝟰𝟮𝟯)", u"[0];(0)", 11, -51423.},
-//                 {3, u"(𝟱𝟭𝟰𝟮𝟯)", u"[0];(0)", 12, -51423.},
-//                 {3, u"𝟱𝟭𝟰𝟮𝟯", u"{0};{0}", 10, 51423.},
-//                 {3, u"{𝟱𝟭𝟰𝟮𝟯", u"{0};{0}", 11, 51423.},
-//                 {3, u"𝟱𝟭𝟰𝟮𝟯}", u"{0};{0}", 11, 51423.},
-//                 {3, u"{𝟱𝟭𝟰𝟮𝟯}", u"{0};{0}", 12, 51423.},
-//                 {1, u"a40b", u"a0'0b'", 3, 40.}, // greedy code path thinks "40" is the number
-//                 {2, u"a40b", u"a0'0b'", 4, 4.}, // slow code path finds the suffix "0b"
+                 {3, u"(𝟱𝟭𝟰𝟮𝟯", u"[0];(0)", 11, -51423.},
+                 {3, u"𝟱𝟭𝟰𝟮𝟯)", u"[0];(0)", 11, -51423.},
+                 {3, u"(𝟱𝟭𝟰𝟮𝟯)", u"[0];(0)", 12, -51423.},
+                 {3, u"𝟱𝟭𝟰𝟮𝟯", u"{0};{0}", 10, 51423.},
+                 {3, u"{𝟱𝟭𝟰𝟮𝟯", u"{0};{0}", 11, 51423.},
+                 {3, u"𝟱𝟭𝟰𝟮𝟯}", u"{0};{0}", 11, 51423.},
+                 {3, u"{𝟱𝟭𝟰𝟮𝟯}", u"{0};{0}", 12, 51423.},
+                 {1, u"a40b", u"a0'0b'", 3, 40.}, // greedy code path thinks "40" is the number
+                 {2, u"a40b", u"a0'0b'", 4, 4.}, // slow code path finds the suffix "0b"
                 {3, u"𝟱.𝟭𝟰𝟮E𝟯", u"0", 12, 5142.},
                 {3, u"𝟱.𝟭𝟰𝟮E-𝟯", u"0", 13, 0.005142},
                 {3, u"𝟱.𝟭𝟰𝟮e-𝟯", u"0", 13, 0.005142},
                 {7, u"5,142.50 Canadian dollars", u"#,##,##0 ¤¤¤", 25, 5142.5},
-//                 {3, u"a$ b5", u"a ¤ b0", 5, 5.0},
-//                 {3, u"📺1.23", u"📺0;📻0", 6, 1.23},
-//                 {3, u"📻1.23", u"📺0;📻0", 6, -1.23},
+                 {3, u"a$ b5", u"a ¤ b0", 5, 5.0},
+                 {3, u"📺1.23", u"📺0;📻0", 6, 1.23},
+                 {3, u"📻1.23", u"📺0;📻0", 6, -1.23},
                 {3, u".00", u"0", 3, 0.0},
                 {3, u"                              1,234", u"a0", 35, 1234.}, // should not hang
                 {3, u"NaN", u"0", 3, NAN},
@ -215,27 +215,29 @@ void NumberParserTest::testSeriesMatcher() {
 void NumberParserTest::testCurrencyAnyMatcher() {
    IcuTestErrorCode status(*this, "testCurrencyAnyMatcher");

-    UnicodeString currency1(u"IU$");
-    UnicodeString currency2(u"ICU");
-    DecimalFormatSymbols symbols("en", status);
    IgnorablesMatcher ignorables(unisets::DEFAULT_IGNORABLES);
-    Locale locale("en");
-    AffixTokenMatcherWarehouse warehouse(u"ICU", &currency1, &currency2, &symbols, &ignorables, &locale);
+    AffixTokenMatcherSetupData affixSetupData = {
+            u"ICU",
+            u"IU$",
+            u"ICU",
+            {"en", status},
+            ignorables,
+            "en"};
+    AffixTokenMatcherWarehouse warehouse(&affixSetupData);
    NumberParseMatcher& matcher = warehouse.currency(status);

-    static const struct TestCase{
+    static const struct TestCase {
        const char16_t* input;
        const char16_t* expectedCurrencyCode;
-    } cases[] {
-            { u"", u"\x00" },
-            { u"FOO", u"\x00" },
-            { u"USD", u"USD" },
-            { u"$", u"USD" },
-            { u"US dollars", u"USD" },
-            { u"eu", u"\x00" },
-            { u"euros", u"EUR" },
-            { u"ICU", u"ICU" },
-            { u"IU$", u"ICU" } };
+    } cases[]{{u"", u"\x00"},
+              {u"FOO", u"\x00"},
+              {u"USD", u"USD"},
+              {u"$", u"USD"},
+              {u"US dollars", u"USD"},
+              {u"eu", u"\x00"},
+              {u"euros", u"EUR"},
+              {u"ICU", u"ICU"},
+              {u"IU$", u"ICU"}};
    for (auto& cas : cases) {
        UnicodeString input(cas.input);

@ -243,7 +245,8 @@ void NumberParserTest::testCurrencyAnyMatcher() {
        ParsedNumber result;
        matcher.match(segment, result, status);
        assertEquals("Parsing " + input, cas.expectedCurrencyCode, result.currencyCode);
-        assertEquals("Whole string on " + input,
+        assertEquals(
+                "Whole string on " + input,
                cas.expectedCurrencyCode[0] == 0 ? 0 : input.length(),
                result.charEnd);
    }
@ -251,13 +254,15 @@ void NumberParserTest::testCurrencyAnyMatcher() {

 void NumberParserTest::testAffixPatternMatcher() {
    IcuTestErrorCode status(*this, "testAffixPatternMatcher");
-
-    UnicodeString currency1(u"foo");
-    UnicodeString currency2(u"bar");
-    DecimalFormatSymbols symbols("en", status);
    IgnorablesMatcher ignorables(unisets::DEFAULT_IGNORABLES);
-    Locale locale("en");
-    AffixTokenMatcherWarehouse warehouse(u"EUR", &currency1, &currency2, &symbols, &ignorables, &locale);
+    AffixTokenMatcherSetupData affixSetupData = {
+            u"USD",
+            u"foo",
+            u"bar",
+            {"en", status},
+            ignorables,
+            "en"};
+    AffixTokenMatcherWarehouse warehouse(&affixSetupData);

    static const struct TestCase {
        bool exactMatch;
@ -269,8 +274,7 @@ void NumberParserTest::testAffixPatternMatcher() {
                 {true, u"+-%", 3, u"+-%"},
                 {false, u"ab c", 5, u"a    bc"},
                 {true, u"abc", 3, u"abc"},
-                 {false, u"hello-to+this%very¤long‰string", 59, u"hello-to+this%very USD long‰string"}
-    };
+                 {false, u"hello-to+this%very¤long‰string", 59, u"hello-to+this%very USD long‰string"}};

    for (auto& cas : cases) {
        UnicodeString affixPattern(cas.affixPattern);
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/StringSegment.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/StringSegment.java
@ -9,6 +9,8 @@ import com.ibm.icu.text.UnicodeSet;
 * A mutable class allowing for a String with a variable offset and length. The charAt, length, and
 * subSequence methods all operate relative to the fixed offset into the String.
 *
+ * TODO: Make sure that this operates only on code point boundaries.
+ *
 * @author sffc
 */
 public class StringSegment implements CharSequence {