mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-07 06:25:30 +00:00
ICU-13574 AffixMatcher is working. All simple parsing tests are passing.
X-SVN-Rev: 40903
This commit is contained in:
parent
7b1857d0f3
commit
1ed7deaa8c
20 changed files with 313 additions and 168 deletions
|
@ -5,6 +5,10 @@
|
|||
|
||||
#if !UCONFIG_NO_FORMATTING && !UPRV_INCOMPLETE_CPP11_SUPPORT
|
||||
|
||||
// Allow implicit conversion from char16_t* to UnicodeString for this file:
|
||||
// Helpful in toString methods and elsewhere.
|
||||
#define UNISTR_FROM_STRING_EXPLICIT
|
||||
|
||||
#include "numparse_types.h"
|
||||
#include "numparse_affixes.h"
|
||||
#include "numparse_utils.h"
|
||||
|
@ -122,52 +126,32 @@ AffixPatternMatcher AffixPatternMatcherBuilder::build() {
|
|||
}
|
||||
|
||||
|
||||
AffixTokenMatcherWarehouse::AffixTokenMatcherWarehouse(const UChar* currencyCode,
|
||||
const UnicodeString* currency1,
|
||||
const UnicodeString* currency2,
|
||||
const DecimalFormatSymbols* dfs,
|
||||
IgnorablesMatcher* ignorables, const Locale* locale)
|
||||
: currency1(currency1),
|
||||
currency2(currency2),
|
||||
dfs(dfs),
|
||||
ignorables(ignorables),
|
||||
locale(locale),
|
||||
codePointCount(0),
|
||||
codePointNumBatches(0) {
|
||||
utils::copyCurrencyCode(this->currencyCode, currencyCode);
|
||||
}
|
||||
CodePointMatcherWarehouse::CodePointMatcherWarehouse()
|
||||
: codePointCount(0), codePointNumBatches(0) {}
|
||||
|
||||
AffixTokenMatcherWarehouse::AffixTokenMatcherWarehouse(
|
||||
AffixTokenMatcherWarehouse&& src) U_NOEXCEPT = default;
|
||||
|
||||
AffixTokenMatcherWarehouse::~AffixTokenMatcherWarehouse() {
|
||||
CodePointMatcherWarehouse::~CodePointMatcherWarehouse() {
|
||||
// Delete the variable number of batches of code point matchers
|
||||
for (int32_t i = 0; i < codePointNumBatches; i++) {
|
||||
delete[] codePointsOverflow[i];
|
||||
}
|
||||
}
|
||||
|
||||
NumberParseMatcher& AffixTokenMatcherWarehouse::minusSign() {
|
||||
return fMinusSign = {*dfs, true};
|
||||
CodePointMatcherWarehouse::CodePointMatcherWarehouse(CodePointMatcherWarehouse&& src) U_NOEXCEPT
|
||||
: codePoints(std::move(src.codePoints)),
|
||||
codePointsOverflow(std::move(src.codePointsOverflow)),
|
||||
codePointCount(src.codePointCount),
|
||||
codePointNumBatches(src.codePointNumBatches) {}
|
||||
|
||||
CodePointMatcherWarehouse&
|
||||
CodePointMatcherWarehouse::operator=(CodePointMatcherWarehouse&& src) U_NOEXCEPT {
|
||||
codePoints = std::move(src.codePoints);
|
||||
codePointsOverflow = std::move(src.codePointsOverflow);
|
||||
codePointCount = src.codePointCount;
|
||||
codePointNumBatches = src.codePointNumBatches;
|
||||
return *this;
|
||||
}
|
||||
|
||||
NumberParseMatcher& AffixTokenMatcherWarehouse::plusSign() {
|
||||
return fPlusSign = {*dfs, true};
|
||||
}
|
||||
|
||||
NumberParseMatcher& AffixTokenMatcherWarehouse::percent() {
|
||||
return fPercent = {*dfs};
|
||||
}
|
||||
|
||||
NumberParseMatcher& AffixTokenMatcherWarehouse::permille() {
|
||||
return fPermille = {*dfs};
|
||||
}
|
||||
|
||||
NumberParseMatcher& AffixTokenMatcherWarehouse::currency(UErrorCode& status) {
|
||||
return fCurrency = {{*locale, status}, {currencyCode, *currency1, *currency2}};
|
||||
}
|
||||
|
||||
NumberParseMatcher& AffixTokenMatcherWarehouse::nextCodePointMatcher(UChar32 cp) {
|
||||
NumberParseMatcher& CodePointMatcherWarehouse::nextCodePointMatcher(UChar32 cp) {
|
||||
if (codePointCount < CODE_POINT_STACK_CAPACITY) {
|
||||
return codePoints[codePointCount++] = {cp};
|
||||
}
|
||||
|
@ -186,6 +170,39 @@ NumberParseMatcher& AffixTokenMatcherWarehouse::nextCodePointMatcher(UChar32 cp)
|
|||
}
|
||||
|
||||
|
||||
AffixTokenMatcherWarehouse::AffixTokenMatcherWarehouse(const AffixTokenMatcherSetupData* setupData)
|
||||
: fSetupData(setupData) {}
|
||||
|
||||
NumberParseMatcher& AffixTokenMatcherWarehouse::minusSign() {
|
||||
return fMinusSign = {fSetupData->dfs, true};
|
||||
}
|
||||
|
||||
NumberParseMatcher& AffixTokenMatcherWarehouse::plusSign() {
|
||||
return fPlusSign = {fSetupData->dfs, true};
|
||||
}
|
||||
|
||||
NumberParseMatcher& AffixTokenMatcherWarehouse::percent() {
|
||||
return fPercent = {fSetupData->dfs};
|
||||
}
|
||||
|
||||
NumberParseMatcher& AffixTokenMatcherWarehouse::permille() {
|
||||
return fPermille = {fSetupData->dfs};
|
||||
}
|
||||
|
||||
NumberParseMatcher& AffixTokenMatcherWarehouse::currency(UErrorCode& status) {
|
||||
return fCurrency = {{fSetupData->locale, status},
|
||||
{fSetupData->currencyCode, fSetupData->currency1, fSetupData->currency2}};
|
||||
}
|
||||
|
||||
IgnorablesMatcher& AffixTokenMatcherWarehouse::ignorables() {
|
||||
return fSetupData->ignorables;
|
||||
}
|
||||
|
||||
NumberParseMatcher& AffixTokenMatcherWarehouse::nextCodePointMatcher(UChar32 cp) {
|
||||
return fCodePoints.nextCodePointMatcher(cp);
|
||||
}
|
||||
|
||||
|
||||
CodePointMatcher::CodePointMatcher(UChar32 cp)
|
||||
: fCp(cp) {}
|
||||
|
||||
|
@ -207,9 +224,13 @@ const UnicodeSet& CodePointMatcher::getLeadCodePoints() {
|
|||
return *fLocalLeadCodePoints;
|
||||
}
|
||||
|
||||
UnicodeString CodePointMatcher::toString() const {
|
||||
return u"<CodePoint>";
|
||||
}
|
||||
|
||||
|
||||
AffixPatternMatcher AffixPatternMatcher::fromAffixPattern(const UnicodeString& affixPattern,
|
||||
AffixTokenMatcherWarehouse& warehouse,
|
||||
AffixTokenMatcherWarehouse& tokenWarehouse,
|
||||
parse_flags_t parseFlags, bool* success,
|
||||
UErrorCode& status) {
|
||||
if (affixPattern.isEmpty()) {
|
||||
|
@ -222,10 +243,10 @@ AffixPatternMatcher AffixPatternMatcher::fromAffixPattern(const UnicodeString& a
|
|||
if (0 != (parseFlags & PARSE_FLAG_EXACT_AFFIX)) {
|
||||
ignorables = nullptr;
|
||||
} else {
|
||||
ignorables = warehouse.ignorables;
|
||||
ignorables = &tokenWarehouse.ignorables();
|
||||
}
|
||||
|
||||
AffixPatternMatcherBuilder builder(affixPattern, warehouse, ignorables);
|
||||
AffixPatternMatcherBuilder builder(affixPattern, tokenWarehouse, ignorables);
|
||||
AffixUtils::iterateWithConsumer(UnicodeStringCharSequence(affixPattern), builder, status);
|
||||
return builder.build();
|
||||
}
|
||||
|
@ -243,10 +264,9 @@ bool AffixPatternMatcher::operator==(const AffixPatternMatcher& other) const {
|
|||
}
|
||||
|
||||
|
||||
AffixMatcherWarehouse::AffixMatcherWarehouse(AffixTokenMatcherWarehouse& warehouse)
|
||||
: fAffixTokenMatcherWarehouse(std::move(warehouse)) {}
|
||||
|
||||
AffixMatcherWarehouse& AffixMatcherWarehouse::operator=(AffixMatcherWarehouse&& src) = default;
|
||||
AffixMatcherWarehouse::AffixMatcherWarehouse(AffixTokenMatcherWarehouse* tokenWarehouse)
|
||||
: fTokenWarehouse(tokenWarehouse) {
|
||||
}
|
||||
|
||||
bool AffixMatcherWarehouse::isInteresting(const AffixPatternProvider& patternInfo,
|
||||
const IgnorablesMatcher& ignorables, parse_flags_t parseFlags,
|
||||
|
@ -278,18 +298,14 @@ bool AffixMatcherWarehouse::isInteresting(const AffixPatternProvider& patternInf
|
|||
return true;
|
||||
}
|
||||
|
||||
AffixMatcherWarehouse AffixMatcherWarehouse::createAffixMatchers(const AffixPatternProvider& patternInfo,
|
||||
MutableMatcherCollection& output,
|
||||
AffixTokenMatcherWarehouse tokenWarehouse,
|
||||
const IgnorablesMatcher& ignorables,
|
||||
parse_flags_t parseFlags,
|
||||
UErrorCode& status) {
|
||||
void AffixMatcherWarehouse::createAffixMatchers(const AffixPatternProvider& patternInfo,
|
||||
MutableMatcherCollection& output,
|
||||
const IgnorablesMatcher& ignorables,
|
||||
parse_flags_t parseFlags, UErrorCode& status) {
|
||||
if (!isInteresting(patternInfo, ignorables, parseFlags, status)) {
|
||||
return {};
|
||||
return;
|
||||
}
|
||||
|
||||
AffixMatcherWarehouse warehouse(tokenWarehouse);
|
||||
|
||||
// The affixes have interesting characters, or we are in strict mode.
|
||||
// Use initial capacity of 6, the highest possible number of AffixMatchers.
|
||||
UnicodeString sb;
|
||||
|
@ -309,21 +325,19 @@ AffixMatcherWarehouse AffixMatcherWarehouse::createAffixMatchers(const AffixPatt
|
|||
bool hasPrefix = false;
|
||||
PatternStringUtils::patternInfoToStringBuilder(
|
||||
patternInfo, true, signum, signDisplay, StandardPlural::OTHER, false, sb);
|
||||
warehouse.fAffixPatternMatchers[numAffixPatternMatchers] = AffixPatternMatcher::fromAffixPattern(
|
||||
sb, tokenWarehouse, parseFlags, &hasPrefix, status);
|
||||
AffixPatternMatcher* prefix = hasPrefix
|
||||
? &warehouse.fAffixPatternMatchers[numAffixPatternMatchers++]
|
||||
: nullptr;
|
||||
fAffixPatternMatchers[numAffixPatternMatchers] = AffixPatternMatcher::fromAffixPattern(
|
||||
sb, *fTokenWarehouse, parseFlags, &hasPrefix, status);
|
||||
AffixPatternMatcher* prefix = hasPrefix ? &fAffixPatternMatchers[numAffixPatternMatchers++]
|
||||
: nullptr;
|
||||
|
||||
// Generate Suffix
|
||||
bool hasSuffix = false;
|
||||
PatternStringUtils::patternInfoToStringBuilder(
|
||||
patternInfo, false, signum, signDisplay, StandardPlural::OTHER, false, sb);
|
||||
warehouse.fAffixPatternMatchers[numAffixPatternMatchers] = AffixPatternMatcher::fromAffixPattern(
|
||||
sb, tokenWarehouse, parseFlags, &hasSuffix, status);
|
||||
AffixPatternMatcher* suffix = hasSuffix
|
||||
? &warehouse.fAffixPatternMatchers[numAffixPatternMatchers++]
|
||||
: nullptr;
|
||||
fAffixPatternMatchers[numAffixPatternMatchers] = AffixPatternMatcher::fromAffixPattern(
|
||||
sb, *fTokenWarehouse, parseFlags, &hasSuffix, status);
|
||||
AffixPatternMatcher* suffix = hasSuffix ? &fAffixPatternMatchers[numAffixPatternMatchers++]
|
||||
: nullptr;
|
||||
|
||||
if (signum == 1) {
|
||||
posPrefix = prefix;
|
||||
|
@ -338,14 +352,14 @@ AffixMatcherWarehouse AffixMatcherWarehouse::createAffixMatchers(const AffixPatt
|
|||
|
||||
// Note: it is indeed possible for posPrefix and posSuffix to both be null.
|
||||
// We still need to add that matcher for strict mode to work.
|
||||
warehouse.fAffixMatchers[numAffixMatchers++] = {prefix, suffix, flags};
|
||||
fAffixMatchers[numAffixMatchers++] = {prefix, suffix, flags};
|
||||
if (includeUnpaired && prefix != nullptr && suffix != nullptr) {
|
||||
// The following if statements are designed to prevent adding two identical matchers.
|
||||
if (signum == 1 || equals(prefix, posPrefix)) {
|
||||
warehouse.fAffixMatchers[numAffixMatchers++] = {prefix, nullptr, flags};
|
||||
if (signum == 1 || !equals(prefix, posPrefix)) {
|
||||
fAffixMatchers[numAffixMatchers++] = {prefix, nullptr, flags};
|
||||
}
|
||||
if (signum == 1 || equals(suffix, posSuffix)) {
|
||||
warehouse.fAffixMatchers[numAffixMatchers++] = {nullptr, suffix, flags};
|
||||
if (signum == 1 || !equals(suffix, posSuffix)) {
|
||||
fAffixMatchers[numAffixMatchers++] = {nullptr, suffix, flags};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -356,19 +370,20 @@ AffixMatcherWarehouse AffixMatcherWarehouse::createAffixMatchers(const AffixPatt
|
|||
do {
|
||||
madeChanges = false;
|
||||
for (int32_t i = 1; i < numAffixMatchers; i++) {
|
||||
if (warehouse.fAffixMatchers[i - 1].compareTo(warehouse.fAffixMatchers[i]) > 0) {
|
||||
if (fAffixMatchers[i - 1].compareTo(fAffixMatchers[i]) > 0) {
|
||||
madeChanges = true;
|
||||
AffixMatcher temp = std::move(warehouse.fAffixMatchers[i - 1]);
|
||||
warehouse.fAffixMatchers[i - 1] = std::move(warehouse.fAffixMatchers[i]);
|
||||
warehouse.fAffixMatchers[i] = std::move(temp);
|
||||
AffixMatcher temp = std::move(fAffixMatchers[i - 1]);
|
||||
fAffixMatchers[i - 1] = std::move(fAffixMatchers[i]);
|
||||
fAffixMatchers[i] = std::move(temp);
|
||||
}
|
||||
}
|
||||
} while (madeChanges);
|
||||
for (int32_t i = 0; i < numAffixMatchers; i++) {
|
||||
output.addMatcher(warehouse.fAffixMatchers[i]);
|
||||
}
|
||||
|
||||
return warehouse;
|
||||
for (int32_t i = 0; i < numAffixMatchers; i++) {
|
||||
// Enable the following line to debug affixes
|
||||
//std::cout << "Adding affix matcher: " << CStr(fAffixMatchers[i].toString())() << std::endl;
|
||||
output.addMatcher(fAffixMatchers[i]);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
@ -454,6 +469,14 @@ int8_t AffixMatcher::compareTo(const AffixMatcher& rhs) const {
|
|||
}
|
||||
}
|
||||
|
||||
UnicodeString AffixMatcher::toString() const {
|
||||
bool isNegative = 0 != (fFlags & FLAG_NEGATIVE);
|
||||
return UnicodeString(u"<Affix") + (isNegative ? u":negative " : u" ") +
|
||||
(fPrefix ? fPrefix->getPattern() : u"null") + u"#" +
|
||||
(fSuffix ? fSuffix->getPattern() : u"null") + u">";
|
||||
|
||||
}
|
||||
|
||||
|
||||
#endif /* #if !UCONFIG_NO_FORMATTING */
|
||||
|
||||
|
|
|
@ -12,6 +12,8 @@
|
|||
#include "numparse_currency.h"
|
||||
#include "number_affixutils.h"
|
||||
|
||||
#include <array>
|
||||
|
||||
U_NAMESPACE_BEGIN namespace numparse {
|
||||
namespace impl {
|
||||
|
||||
|
@ -33,11 +35,57 @@ class CodePointMatcher : public NumberParseMatcher, public UMemory {
|
|||
|
||||
const UnicodeSet& getLeadCodePoints() override;
|
||||
|
||||
UnicodeString toString() const override;
|
||||
|
||||
private:
|
||||
UChar32 fCp;
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* A warehouse to retain ownership of CodePointMatchers.
|
||||
*/
|
||||
class CodePointMatcherWarehouse : public UMemory {
|
||||
private:
|
||||
static constexpr int32_t CODE_POINT_STACK_CAPACITY = 5; // Number of entries directly on the stack
|
||||
static constexpr int32_t CODE_POINT_BATCH_SIZE = 10; // Number of entries per heap allocation
|
||||
|
||||
public:
|
||||
CodePointMatcherWarehouse();
|
||||
|
||||
// A custom destructor is needed to free the memory from MaybeStackArray.
|
||||
// A custom move constructor and move assignment seem to be needed because of the custom destructor.
|
||||
|
||||
~CodePointMatcherWarehouse();
|
||||
|
||||
CodePointMatcherWarehouse(CodePointMatcherWarehouse&& src) U_NOEXCEPT;
|
||||
|
||||
CodePointMatcherWarehouse& operator=(CodePointMatcherWarehouse&& src) U_NOEXCEPT;
|
||||
|
||||
NumberParseMatcher& nextCodePointMatcher(UChar32 cp);
|
||||
|
||||
private:
|
||||
std::array<CodePointMatcher, CODE_POINT_STACK_CAPACITY> codePoints; // By value
|
||||
MaybeStackArray<CodePointMatcher*, 3> codePointsOverflow; // On heap in "batches"
|
||||
int32_t codePointCount; // Total for both the ones by value and on heap
|
||||
int32_t codePointNumBatches; // Number of batches in codePointsOverflow
|
||||
};
|
||||
|
||||
|
||||
struct AffixTokenMatcherSetupData {
|
||||
const UChar* currencyCode;
|
||||
const UnicodeString& currency1;
|
||||
const UnicodeString& currency2;
|
||||
const DecimalFormatSymbols& dfs;
|
||||
IgnorablesMatcher& ignorables;
|
||||
const Locale& locale;
|
||||
|
||||
// const UChar* currencyCode, const UnicodeString* currency1,
|
||||
// const UnicodeString* currency2, const DecimalFormatSymbols* dfs,
|
||||
// IgnorablesMatcher* ignorables, const Locale* locale
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* Small helper class that generates matchers for individual tokens for AffixPatternMatcher.
|
||||
*
|
||||
|
@ -48,21 +96,11 @@ class CodePointMatcher : public NumberParseMatcher, public UMemory {
|
|||
*
|
||||
* @author sffc
|
||||
*/
|
||||
class AffixTokenMatcherWarehouse {
|
||||
private:
|
||||
static constexpr int32_t CODE_POINT_STACK_CAPACITY = 5; // Number of entries directly on the stack
|
||||
static constexpr int32_t CODE_POINT_BATCH_SIZE = 10; // Number of entries per heap allocation
|
||||
|
||||
class AffixTokenMatcherWarehouse : public UMemory {
|
||||
public:
|
||||
AffixTokenMatcherWarehouse() = default; // WARNING: Leaves the object in an unusable state
|
||||
|
||||
AffixTokenMatcherWarehouse(const UChar* currencyCode, const UnicodeString* currency1,
|
||||
const UnicodeString* currency2, const DecimalFormatSymbols* dfs,
|
||||
IgnorablesMatcher* ignorables, const Locale* locale);
|
||||
|
||||
AffixTokenMatcherWarehouse(AffixTokenMatcherWarehouse&& src) U_NOEXCEPT;
|
||||
|
||||
~AffixTokenMatcherWarehouse();
|
||||
AffixTokenMatcherWarehouse(const AffixTokenMatcherSetupData* setupData);
|
||||
|
||||
NumberParseMatcher& minusSign();
|
||||
|
||||
|
@ -74,16 +112,13 @@ class AffixTokenMatcherWarehouse {
|
|||
|
||||
NumberParseMatcher& currency(UErrorCode& status);
|
||||
|
||||
IgnorablesMatcher& ignorables();
|
||||
|
||||
NumberParseMatcher& nextCodePointMatcher(UChar32 cp);
|
||||
|
||||
private:
|
||||
// NOTE: The following fields may be unsafe to access after construction is done!
|
||||
UChar currencyCode[4];
|
||||
const UnicodeString* currency1;
|
||||
const UnicodeString* currency2;
|
||||
const DecimalFormatSymbols* dfs;
|
||||
IgnorablesMatcher* ignorables;
|
||||
const Locale* locale;
|
||||
// NOTE: The following field may be unsafe to access after construction is done!
|
||||
const AffixTokenMatcherSetupData* fSetupData;
|
||||
|
||||
// NOTE: These are default-constructed and should not be used until initialized.
|
||||
MinusSignMatcher fMinusSign;
|
||||
|
@ -92,10 +127,8 @@ class AffixTokenMatcherWarehouse {
|
|||
PermilleMatcher fPermille;
|
||||
CurrencyAnyMatcher fCurrency;
|
||||
|
||||
CodePointMatcher codePoints[CODE_POINT_STACK_CAPACITY]; // By value
|
||||
MaybeStackArray<CodePointMatcher*, 3> codePointsOverflow; // On heap in "batches"
|
||||
int32_t codePointCount; // Total for both the ones by value and on heap
|
||||
int32_t codePointNumBatches; // Number of batches in codePointsOverflow
|
||||
// Use a child class for code point matchers, since it requires non-default operators.
|
||||
CodePointMatcherWarehouse fCodePoints;
|
||||
|
||||
friend class AffixPatternMatcherBuilder;
|
||||
friend class AffixPatternMatcher;
|
||||
|
@ -161,6 +194,8 @@ class AffixMatcher : public NumberParseMatcher, public UMemory {
|
|||
|
||||
int8_t compareTo(const AffixMatcher& rhs) const;
|
||||
|
||||
UnicodeString toString() const override;
|
||||
|
||||
private:
|
||||
AffixPatternMatcher* fPrefix;
|
||||
AffixPatternMatcher* fSuffix;
|
||||
|
@ -175,23 +210,19 @@ class AffixMatcherWarehouse {
|
|||
public:
|
||||
AffixMatcherWarehouse() = default; // WARNING: Leaves the object in an unusable state
|
||||
|
||||
AffixMatcherWarehouse(AffixTokenMatcherWarehouse& warehouse);
|
||||
AffixMatcherWarehouse(AffixTokenMatcherWarehouse* tokenWarehouse);
|
||||
|
||||
AffixMatcherWarehouse& operator=(AffixMatcherWarehouse&& src);
|
||||
|
||||
static AffixMatcherWarehouse createAffixMatchers(const AffixPatternProvider& patternInfo,
|
||||
MutableMatcherCollection& output,
|
||||
AffixTokenMatcherWarehouse tokenWarehouse,
|
||||
const IgnorablesMatcher& ignorables,
|
||||
parse_flags_t parseFlags, UErrorCode& status);
|
||||
void createAffixMatchers(const AffixPatternProvider& patternInfo, MutableMatcherCollection& output,
|
||||
const IgnorablesMatcher& ignorables, parse_flags_t parseFlags,
|
||||
UErrorCode& status);
|
||||
|
||||
private:
|
||||
// 9 is the limit: positive, zero, and negative, each with prefix, suffix, and prefix+suffix
|
||||
AffixMatcher fAffixMatchers[9];
|
||||
// 6 is the limit: positive, zero, and negative, a prefix and a suffix for each
|
||||
AffixPatternMatcher fAffixPatternMatchers[6];
|
||||
// Store all the tokens used by the AffixPatternMatchers
|
||||
AffixTokenMatcherWarehouse fAffixTokenMatcherWarehouse;
|
||||
// Reference to the warehouse for tokens used by the AffixPatternMatchers
|
||||
AffixTokenMatcherWarehouse* fTokenWarehouse;
|
||||
|
||||
friend class AffixMatcher;
|
||||
|
||||
|
|
|
@ -5,6 +5,10 @@
|
|||
|
||||
#if !UCONFIG_NO_FORMATTING && !UPRV_INCOMPLETE_CPP11_SUPPORT
|
||||
|
||||
// Allow implicit conversion from char16_t* to UnicodeString for this file:
|
||||
// Helpful in toString methods and elsewhere.
|
||||
#define UNISTR_FROM_STRING_EXPLICIT
|
||||
|
||||
#include "numparse_types.h"
|
||||
#include "numparse_compositions.h"
|
||||
#include "unicode/uniset.h"
|
||||
|
@ -113,5 +117,9 @@ const NumberParseMatcher* const* ArraySeriesMatcher::end() const {
|
|||
return fMatchers.getAlias() + fMatchersLen;
|
||||
}
|
||||
|
||||
UnicodeString ArraySeriesMatcher::toString() const {
|
||||
return u"<ArraySeries>";
|
||||
}
|
||||
|
||||
|
||||
#endif /* #if !UCONFIG_NO_FORMATTING */
|
||||
|
|
|
@ -87,6 +87,8 @@ class ArraySeriesMatcher : public SeriesMatcher {
|
|||
|
||||
const UnicodeSet& getLeadCodePoints() override;
|
||||
|
||||
UnicodeString toString() const override;
|
||||
|
||||
int32_t length() const override;
|
||||
|
||||
protected:
|
||||
|
|
|
@ -5,6 +5,10 @@
|
|||
|
||||
#if !UCONFIG_NO_FORMATTING && !UPRV_INCOMPLETE_CPP11_SUPPORT
|
||||
|
||||
// Allow implicit conversion from char16_t* to UnicodeString for this file:
|
||||
// Helpful in toString methods and elsewhere.
|
||||
#define UNISTR_FROM_STRING_EXPLICIT
|
||||
|
||||
#include "numparse_types.h"
|
||||
#include "numparse_currency.h"
|
||||
#include "ucurrimp.h"
|
||||
|
@ -66,6 +70,10 @@ const UnicodeSet& CurrencyNamesMatcher::getLeadCodePoints() {
|
|||
return *fLocalLeadCodePoints;
|
||||
}
|
||||
|
||||
UnicodeString CurrencyNamesMatcher::toString() const {
|
||||
return u"<CurrencyNames>";
|
||||
}
|
||||
|
||||
|
||||
CurrencyCustomMatcher::CurrencyCustomMatcher(const char16_t* currencyCode, const UnicodeString& currency1,
|
||||
const UnicodeString& currency2)
|
||||
|
@ -106,6 +114,10 @@ const UnicodeSet& CurrencyCustomMatcher::getLeadCodePoints() {
|
|||
return *fLocalLeadCodePoints;
|
||||
}
|
||||
|
||||
UnicodeString CurrencyCustomMatcher::toString() const {
|
||||
return u"<CurrencyCustom>";
|
||||
}
|
||||
|
||||
|
||||
CurrencyAnyMatcher::CurrencyAnyMatcher() {
|
||||
fMatcherArray[0] = &fNamesMatcher;
|
||||
|
@ -151,5 +163,9 @@ const NumberParseMatcher* const* CurrencyAnyMatcher::end() const {
|
|||
return fMatcherArray + 2;
|
||||
}
|
||||
|
||||
UnicodeString CurrencyAnyMatcher::toString() const {
|
||||
return u"<CurrencyAny>";
|
||||
}
|
||||
|
||||
|
||||
#endif /* #if !UCONFIG_NO_FORMATTING */
|
||||
|
|
|
@ -32,6 +32,8 @@ class CurrencyNamesMatcher : public NumberParseMatcher, public UMemory {
|
|||
|
||||
const UnicodeSet& getLeadCodePoints() override;
|
||||
|
||||
UnicodeString toString() const override;
|
||||
|
||||
private:
|
||||
// We could use Locale instead of CharString here, but
|
||||
// Locale has a non-trivial default constructor.
|
||||
|
@ -51,6 +53,8 @@ class CurrencyCustomMatcher : public NumberParseMatcher, public UMemory {
|
|||
|
||||
const UnicodeSet& getLeadCodePoints() override;
|
||||
|
||||
UnicodeString toString() const override;
|
||||
|
||||
private:
|
||||
UChar fCurrencyCode[4];
|
||||
UnicodeString fCurrency1;
|
||||
|
@ -75,6 +79,8 @@ class CurrencyAnyMatcher : public AnyMatcher, public UMemory {
|
|||
|
||||
const UnicodeSet& getLeadCodePoints() override;
|
||||
|
||||
UnicodeString toString() const override;
|
||||
|
||||
protected:
|
||||
const NumberParseMatcher* const* begin() const override;
|
||||
|
||||
|
|
|
@ -5,6 +5,10 @@
|
|||
|
||||
#if !UCONFIG_NO_FORMATTING && !UPRV_INCOMPLETE_CPP11_SUPPORT
|
||||
|
||||
// Allow implicit conversion from char16_t* to UnicodeString for this file:
|
||||
// Helpful in toString methods and elsewhere.
|
||||
#define UNISTR_FROM_STRING_EXPLICIT
|
||||
|
||||
#include "numparse_types.h"
|
||||
#include "numparse_decimal.h"
|
||||
#include "numparse_unisets.h"
|
||||
|
@ -312,5 +316,9 @@ const UnicodeSet& DecimalMatcher::getLeadCodePoints() {
|
|||
return *fLocalLeadCodePoints;
|
||||
}
|
||||
|
||||
UnicodeString DecimalMatcher::toString() const {
|
||||
return u"<Decimal>";
|
||||
}
|
||||
|
||||
|
||||
#endif /* #if !UCONFIG_NO_FORMATTING */
|
||||
|
|
|
@ -29,6 +29,8 @@ class DecimalMatcher : public NumberParseMatcher, public UMemory {
|
|||
|
||||
const UnicodeSet& getLeadCodePoints() override;
|
||||
|
||||
UnicodeString toString() const override;
|
||||
|
||||
private:
|
||||
/** If true, only accept strings whose grouping sizes match the locale */
|
||||
bool requireGroupingMatch;
|
||||
|
|
|
@ -5,7 +5,8 @@
|
|||
|
||||
#if !UCONFIG_NO_FORMATTING && !UPRV_INCOMPLETE_CPP11_SUPPORT
|
||||
|
||||
// Allow implicit conversion from char16_t* to UnicodeString for this file
|
||||
// Allow implicit conversion from char16_t* to UnicodeString for this file:
|
||||
// Helpful in toString methods and elsewhere.
|
||||
#define UNISTR_FROM_STRING_EXPLICIT
|
||||
|
||||
#include "number_types.h"
|
||||
|
@ -17,6 +18,9 @@
|
|||
#include "unicode/numberformatter.h"
|
||||
|
||||
#include <typeinfo>
|
||||
#include <array>
|
||||
#include <iostream>
|
||||
#include "cstr.h"
|
||||
|
||||
using namespace icu;
|
||||
using namespace icu::number;
|
||||
|
@ -35,24 +39,20 @@ NumberParserImpl::createSimpleParser(const Locale& locale, const UnicodeString&
|
|||
parser->fLocalMatchers.ignorables = {unisets::DEFAULT_IGNORABLES};
|
||||
IgnorablesMatcher& ignorables = parser->fLocalMatchers.ignorables;
|
||||
|
||||
const UChar currencyCode[] = u"USD";
|
||||
UnicodeString currency1(u"IU$");
|
||||
UnicodeString currency2(u"ICU");
|
||||
|
||||
ParsedPatternInfo patternInfo;
|
||||
PatternParser::parseToPatternInfo(patternString, patternInfo, status);
|
||||
|
||||
// The following statement sets up the affix matchers.
|
||||
// AffixMatcherWarehouse warehouse = ;
|
||||
|
||||
parser->fLocalMatchers.affixMatcherWarehouse = std::move(AffixMatcherWarehouse::createAffixMatchers(
|
||||
patternInfo,
|
||||
*parser,
|
||||
AffixTokenMatcherWarehouse(
|
||||
u"USD", ¤cy1, ¤cy2, &symbols, &ignorables, &locale),
|
||||
ignorables,
|
||||
parseFlags,
|
||||
status));
|
||||
|
||||
// The following statements set up the affix matchers.
|
||||
AffixTokenMatcherSetupData affixSetupData = {
|
||||
currencyCode, currency1, currency2, symbols, ignorables, locale};
|
||||
parser->fLocalMatchers.affixTokenMatcherWarehouse = {&affixSetupData};
|
||||
parser->fLocalMatchers.affixMatcherWarehouse = {&parser->fLocalMatchers.affixTokenMatcherWarehouse};
|
||||
parser->fLocalMatchers.affixMatcherWarehouse.createAffixMatchers(
|
||||
patternInfo, *parser, ignorables, parseFlags, status);
|
||||
|
||||
Grouper grouper = Grouper::forStrategy(UNUM_GROUPING_AUTO);
|
||||
grouper.setLocaleData(patternInfo, locale);
|
||||
|
@ -233,7 +233,7 @@ UnicodeString NumberParserImpl::toString() const {
|
|||
UnicodeString result(u"<NumberParserImpl matchers:[");
|
||||
for (int32_t i = 0; i < fNumMatchers; i++) {
|
||||
result.append(u' ');
|
||||
result.append(UnicodeString(typeid(*fMatchers[i]).name()));
|
||||
result.append(fMatchers[i]->toString());
|
||||
}
|
||||
result.append(u" ]>", -1);
|
||||
return result;
|
||||
|
|
|
@ -60,6 +60,7 @@ class NumberParserImpl : public MutableMatcherCollection {
|
|||
ScientificMatcher scientific;
|
||||
CurrencyNamesMatcher currencyNames;
|
||||
AffixMatcherWarehouse affixMatcherWarehouse;
|
||||
AffixTokenMatcherWarehouse affixTokenMatcherWarehouse;
|
||||
} fLocalMatchers;
|
||||
|
||||
NumberParserImpl(parse_flags_t parseFlags, bool computeLeads);
|
||||
|
|
|
@ -5,6 +5,10 @@
|
|||
|
||||
#if !UCONFIG_NO_FORMATTING && !UPRV_INCOMPLETE_CPP11_SUPPORT
|
||||
|
||||
// Allow implicit conversion from char16_t* to UnicodeString for this file:
|
||||
// Helpful in toString methods and elsewhere.
|
||||
#define UNISTR_FROM_STRING_EXPLICIT
|
||||
|
||||
#include "numparse_types.h"
|
||||
#include <cmath>
|
||||
|
||||
|
@ -67,7 +71,11 @@ double ParsedNumber::getDouble() const {
|
|||
}
|
||||
|
||||
// TODO: MIN_LONG
|
||||
return quantity.toDouble();
|
||||
double d = quantity.toDouble();
|
||||
if (0 != (flags & FLAG_NEGATIVE)) {
|
||||
d *= -1;
|
||||
}
|
||||
return d;
|
||||
}
|
||||
|
||||
bool ParsedNumber::isBetterThan(const ParsedNumber& other) {
|
||||
|
|
|
@ -5,6 +5,10 @@
|
|||
|
||||
#if !UCONFIG_NO_FORMATTING && !UPRV_INCOMPLETE_CPP11_SUPPORT
|
||||
|
||||
// Allow implicit conversion from char16_t* to UnicodeString for this file:
|
||||
// Helpful in toString methods and elsewhere.
|
||||
#define UNISTR_FROM_STRING_EXPLICIT
|
||||
|
||||
#include "numparse_types.h"
|
||||
#include "numparse_scientific.h"
|
||||
#include "numparse_unisets.h"
|
||||
|
@ -83,5 +87,9 @@ const UnicodeSet& ScientificMatcher::getLeadCodePoints() {
|
|||
return *fLocalLeadCodePoints;
|
||||
}
|
||||
|
||||
UnicodeString ScientificMatcher::toString() const {
|
||||
return u"<Scientific>";
|
||||
}
|
||||
|
||||
|
||||
#endif /* #if !UCONFIG_NO_FORMATTING */
|
||||
|
|
|
@ -27,6 +27,8 @@ class ScientificMatcher : public NumberParseMatcher, public UMemory {
|
|||
|
||||
const UnicodeSet& getLeadCodePoints() override;
|
||||
|
||||
UnicodeString toString() const override;
|
||||
|
||||
private:
|
||||
UnicodeString fExponentSeparatorString;
|
||||
DecimalMatcher fExponentMatcher;
|
||||
|
|
|
@ -5,6 +5,10 @@
|
|||
|
||||
#if !UCONFIG_NO_FORMATTING && !UPRV_INCOMPLETE_CPP11_SUPPORT
|
||||
|
||||
// Allow implicit conversion from char16_t* to UnicodeString for this file:
|
||||
// Helpful in toString methods and elsewhere.
|
||||
#define UNISTR_FROM_STRING_EXPLICIT
|
||||
|
||||
#include "numparse_types.h"
|
||||
#include "numparse_stringsegment.h"
|
||||
#include "putilimp.h"
|
||||
|
|
|
@ -5,6 +5,10 @@
|
|||
|
||||
#if !UCONFIG_NO_FORMATTING && !UPRV_INCOMPLETE_CPP11_SUPPORT
|
||||
|
||||
// Allow implicit conversion from char16_t* to UnicodeString for this file:
|
||||
// Helpful in toString methods and elsewhere.
|
||||
#define UNISTR_FROM_STRING_EXPLICIT
|
||||
|
||||
#include "numparse_types.h"
|
||||
#include "numparse_symbols.h"
|
||||
#include "numparse_utils.h"
|
||||
|
@ -70,6 +74,11 @@ const UnicodeSet& SymbolMatcher::getLeadCodePoints() {
|
|||
return *fLocalLeadCodePoints;
|
||||
}
|
||||
|
||||
UnicodeString SymbolMatcher::toString() const {
|
||||
// TODO: Customize output for each symbol
|
||||
return u"<Symbol>";
|
||||
}
|
||||
|
||||
|
||||
IgnorablesMatcher::IgnorablesMatcher(unisets::Key key)
|
||||
: SymbolMatcher({}, key) {
|
||||
|
@ -79,6 +88,10 @@ bool IgnorablesMatcher::isFlexible() const {
|
|||
return true;
|
||||
}
|
||||
|
||||
UnicodeString IgnorablesMatcher::toString() const {
|
||||
return u"<Ignorables>";
|
||||
}
|
||||
|
||||
bool IgnorablesMatcher::isDisabled(const ParsedNumber&) const {
|
||||
return false;
|
||||
}
|
||||
|
|
|
@ -30,6 +30,8 @@ class SymbolMatcher : public NumberParseMatcher, public UMemory {
|
|||
|
||||
const UnicodeSet& getLeadCodePoints() override;
|
||||
|
||||
UnicodeString toString() const override;
|
||||
|
||||
virtual bool isDisabled(const ParsedNumber& result) const = 0;
|
||||
|
||||
virtual void accept(StringSegment& segment, ParsedNumber& result) const = 0;
|
||||
|
@ -50,6 +52,8 @@ class IgnorablesMatcher : public SymbolMatcher {
|
|||
|
||||
bool isFlexible() const override;
|
||||
|
||||
UnicodeString toString() const override;
|
||||
|
||||
protected:
|
||||
bool isDisabled(const ParsedNumber& result) const override;
|
||||
|
||||
|
|
|
@ -318,6 +318,9 @@ class NumberParseMatcher {
|
|||
// Default implementation: no-op
|
||||
};
|
||||
|
||||
// String for debugging
|
||||
virtual UnicodeString toString() const = 0;
|
||||
|
||||
protected:
|
||||
// No construction except by subclasses!
|
||||
NumberParseMatcher() = default;
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
|
||||
#if !UCONFIG_NO_FORMATTING && !UPRV_INCOMPLETE_CPP11_SUPPORT
|
||||
|
||||
// Allow implicit conversion from char16_t* to UnicodeString for this file
|
||||
// (useful for UnicodeSet constructor)
|
||||
// Allow implicit conversion from char16_t* to UnicodeString for this file:
|
||||
// Helpful in toString methods and elsewhere.
|
||||
#define UNISTR_FROM_STRING_EXPLICIT
|
||||
|
||||
#include "numparse_unisets.h"
|
||||
|
|
|
@ -69,33 +69,33 @@ void NumberParserTest::testBasic() {
|
|||
{3, u"-∞", u"0", 2, -INFINITY},
|
||||
{3, u"@@@123 @@", u"0", 6, 123.}, // TODO: Should padding be strong instead of weak?
|
||||
{3, u"@@@123@@ ", u"0", 6, 123.}, // TODO: Should padding be strong instead of weak?
|
||||
// {3, u"a51423US dollars", u"a0¤¤¤", 16, 51423.},
|
||||
// {3, u"a 51423 US dollars", u"a0¤¤¤", 18, 51423.},
|
||||
{3, u"a51423US dollars", u"a0¤¤¤", 16, 51423.},
|
||||
{3, u"a 51423 US dollars", u"a0¤¤¤", 18, 51423.},
|
||||
{3, u"514.23 USD", u"¤0", 10, 514.23},
|
||||
{3, u"514.23 GBP", u"¤0", 10, 514.23},
|
||||
// {3, u"a 𝟱𝟭𝟰𝟮𝟯 b", u"a0b", 14, 51423.},
|
||||
// {3, u"-a 𝟱𝟭𝟰𝟮𝟯 b", u"a0b", 15, -51423.},
|
||||
// {3, u"a -𝟱𝟭𝟰𝟮𝟯 b", u"a0b", 15, -51423.},
|
||||
{3, u"a 𝟱𝟭𝟰𝟮𝟯 b", u"a0b", 14, 51423.},
|
||||
{3, u"-a 𝟱𝟭𝟰𝟮𝟯 b", u"a0b", 15, -51423.},
|
||||
{3, u"a -𝟱𝟭𝟰𝟮𝟯 b", u"a0b", 15, -51423.},
|
||||
{3, u"𝟱𝟭𝟰𝟮𝟯", u"[0];(0)", 10, 51423.},
|
||||
{3, u"[𝟱𝟭𝟰𝟮𝟯", u"[0];(0)", 11, 51423.},
|
||||
{3, u"𝟱𝟭𝟰𝟮𝟯]", u"[0];(0)", 11, 51423.},
|
||||
{3, u"[𝟱𝟭𝟰𝟮𝟯]", u"[0];(0)", 12, 51423.},
|
||||
// {3, u"(𝟱𝟭𝟰𝟮𝟯", u"[0];(0)", 11, -51423.},
|
||||
// {3, u"𝟱𝟭𝟰𝟮𝟯)", u"[0];(0)", 11, -51423.},
|
||||
// {3, u"(𝟱𝟭𝟰𝟮𝟯)", u"[0];(0)", 12, -51423.},
|
||||
// {3, u"𝟱𝟭𝟰𝟮𝟯", u"{0};{0}", 10, 51423.},
|
||||
// {3, u"{𝟱𝟭𝟰𝟮𝟯", u"{0};{0}", 11, 51423.},
|
||||
// {3, u"𝟱𝟭𝟰𝟮𝟯}", u"{0};{0}", 11, 51423.},
|
||||
// {3, u"{𝟱𝟭𝟰𝟮𝟯}", u"{0};{0}", 12, 51423.},
|
||||
// {1, u"a40b", u"a0'0b'", 3, 40.}, // greedy code path thinks "40" is the number
|
||||
// {2, u"a40b", u"a0'0b'", 4, 4.}, // slow code path finds the suffix "0b"
|
||||
{3, u"(𝟱𝟭𝟰𝟮𝟯", u"[0];(0)", 11, -51423.},
|
||||
{3, u"𝟱𝟭𝟰𝟮𝟯)", u"[0];(0)", 11, -51423.},
|
||||
{3, u"(𝟱𝟭𝟰𝟮𝟯)", u"[0];(0)", 12, -51423.},
|
||||
{3, u"𝟱𝟭𝟰𝟮𝟯", u"{0};{0}", 10, 51423.},
|
||||
{3, u"{𝟱𝟭𝟰𝟮𝟯", u"{0};{0}", 11, 51423.},
|
||||
{3, u"𝟱𝟭𝟰𝟮𝟯}", u"{0};{0}", 11, 51423.},
|
||||
{3, u"{𝟱𝟭𝟰𝟮𝟯}", u"{0};{0}", 12, 51423.},
|
||||
{1, u"a40b", u"a0'0b'", 3, 40.}, // greedy code path thinks "40" is the number
|
||||
{2, u"a40b", u"a0'0b'", 4, 4.}, // slow code path finds the suffix "0b"
|
||||
{3, u"𝟱.𝟭𝟰𝟮E𝟯", u"0", 12, 5142.},
|
||||
{3, u"𝟱.𝟭𝟰𝟮E-𝟯", u"0", 13, 0.005142},
|
||||
{3, u"𝟱.𝟭𝟰𝟮e-𝟯", u"0", 13, 0.005142},
|
||||
{7, u"5,142.50 Canadian dollars", u"#,##,##0 ¤¤¤", 25, 5142.5},
|
||||
// {3, u"a$ b5", u"a ¤ b0", 5, 5.0},
|
||||
// {3, u"📺1.23", u"📺0;📻0", 6, 1.23},
|
||||
// {3, u"📻1.23", u"📺0;📻0", 6, -1.23},
|
||||
{3, u"a$ b5", u"a ¤ b0", 5, 5.0},
|
||||
{3, u"📺1.23", u"📺0;📻0", 6, 1.23},
|
||||
{3, u"📻1.23", u"📺0;📻0", 6, -1.23},
|
||||
{3, u".00", u"0", 3, 0.0},
|
||||
{3, u" 1,234", u"a0", 35, 1234.}, // should not hang
|
||||
{3, u"NaN", u"0", 3, NAN},
|
||||
|
@ -215,27 +215,29 @@ void NumberParserTest::testSeriesMatcher() {
|
|||
void NumberParserTest::testCurrencyAnyMatcher() {
|
||||
IcuTestErrorCode status(*this, "testCurrencyAnyMatcher");
|
||||
|
||||
UnicodeString currency1(u"IU$");
|
||||
UnicodeString currency2(u"ICU");
|
||||
DecimalFormatSymbols symbols("en", status);
|
||||
IgnorablesMatcher ignorables(unisets::DEFAULT_IGNORABLES);
|
||||
Locale locale("en");
|
||||
AffixTokenMatcherWarehouse warehouse(u"ICU", ¤cy1, ¤cy2, &symbols, &ignorables, &locale);
|
||||
AffixTokenMatcherSetupData affixSetupData = {
|
||||
u"ICU",
|
||||
u"IU$",
|
||||
u"ICU",
|
||||
{"en", status},
|
||||
ignorables,
|
||||
"en"};
|
||||
AffixTokenMatcherWarehouse warehouse(&affixSetupData);
|
||||
NumberParseMatcher& matcher = warehouse.currency(status);
|
||||
|
||||
static const struct TestCase{
|
||||
static const struct TestCase {
|
||||
const char16_t* input;
|
||||
const char16_t* expectedCurrencyCode;
|
||||
} cases[] {
|
||||
{ u"", u"\x00" },
|
||||
{ u"FOO", u"\x00" },
|
||||
{ u"USD", u"USD" },
|
||||
{ u"$", u"USD" },
|
||||
{ u"US dollars", u"USD" },
|
||||
{ u"eu", u"\x00" },
|
||||
{ u"euros", u"EUR" },
|
||||
{ u"ICU", u"ICU" },
|
||||
{ u"IU$", u"ICU" } };
|
||||
} cases[]{{u"", u"\x00"},
|
||||
{u"FOO", u"\x00"},
|
||||
{u"USD", u"USD"},
|
||||
{u"$", u"USD"},
|
||||
{u"US dollars", u"USD"},
|
||||
{u"eu", u"\x00"},
|
||||
{u"euros", u"EUR"},
|
||||
{u"ICU", u"ICU"},
|
||||
{u"IU$", u"ICU"}};
|
||||
for (auto& cas : cases) {
|
||||
UnicodeString input(cas.input);
|
||||
|
||||
|
@ -243,7 +245,8 @@ void NumberParserTest::testCurrencyAnyMatcher() {
|
|||
ParsedNumber result;
|
||||
matcher.match(segment, result, status);
|
||||
assertEquals("Parsing " + input, cas.expectedCurrencyCode, result.currencyCode);
|
||||
assertEquals("Whole string on " + input,
|
||||
assertEquals(
|
||||
"Whole string on " + input,
|
||||
cas.expectedCurrencyCode[0] == 0 ? 0 : input.length(),
|
||||
result.charEnd);
|
||||
}
|
||||
|
@ -251,13 +254,15 @@ void NumberParserTest::testCurrencyAnyMatcher() {
|
|||
|
||||
void NumberParserTest::testAffixPatternMatcher() {
|
||||
IcuTestErrorCode status(*this, "testAffixPatternMatcher");
|
||||
|
||||
UnicodeString currency1(u"foo");
|
||||
UnicodeString currency2(u"bar");
|
||||
DecimalFormatSymbols symbols("en", status);
|
||||
IgnorablesMatcher ignorables(unisets::DEFAULT_IGNORABLES);
|
||||
Locale locale("en");
|
||||
AffixTokenMatcherWarehouse warehouse(u"EUR", ¤cy1, ¤cy2, &symbols, &ignorables, &locale);
|
||||
AffixTokenMatcherSetupData affixSetupData = {
|
||||
u"USD",
|
||||
u"foo",
|
||||
u"bar",
|
||||
{"en", status},
|
||||
ignorables,
|
||||
"en"};
|
||||
AffixTokenMatcherWarehouse warehouse(&affixSetupData);
|
||||
|
||||
static const struct TestCase {
|
||||
bool exactMatch;
|
||||
|
@ -269,8 +274,7 @@ void NumberParserTest::testAffixPatternMatcher() {
|
|||
{true, u"+-%", 3, u"+-%"},
|
||||
{false, u"ab c", 5, u"a bc"},
|
||||
{true, u"abc", 3, u"abc"},
|
||||
{false, u"hello-to+this%very¤long‰string", 59, u"hello-to+this%very USD long‰string"}
|
||||
};
|
||||
{false, u"hello-to+this%very¤long‰string", 59, u"hello-to+this%very USD long‰string"}};
|
||||
|
||||
for (auto& cas : cases) {
|
||||
UnicodeString affixPattern(cas.affixPattern);
|
||||
|
|
|
@ -9,6 +9,8 @@ import com.ibm.icu.text.UnicodeSet;
|
|||
* A mutable class allowing for a String with a variable offset and length. The charAt, length, and
|
||||
* subSequence methods all operate relative to the fixed offset into the String.
|
||||
*
|
||||
* TODO: Make sure that this operates only on code point boundaries.
|
||||
*
|
||||
* @author sffc
|
||||
*/
|
||||
public class StringSegment implements CharSequence {
|
||||
|
|
Loading…
Add table
Reference in a new issue