ICU-13574 Trying to get std::move operator to work on AffixMatcherWarehouse. No luck yet.

X-SVN-Rev: 40895
This commit is contained in:
Shane Carr 2018-02-10 15:49:02 +00:00
parent afbb37febd
commit 7b1857d0f3
7 changed files with 200 additions and 125 deletions

View file

@ -17,6 +17,42 @@ using namespace icu::number;
using namespace icu::number::impl;
namespace {
/**
* Helper method to return whether the given AffixPatternMatcher equals the given pattern string.
* Either both arguments must be null or the pattern string inside the AffixPatternMatcher must equal
* the given pattern string.
*/
static bool matched(const AffixPatternMatcher* affix, const UnicodeString& patternString) {
return (affix == nullptr && patternString.isBogus()) ||
(affix != nullptr && affix->getPattern() == patternString);
}
/**
* Helper method to return the length of the given AffixPatternMatcher. Returns 0 for null.
*/
static int32_t length(const AffixPatternMatcher* matcher) {
return matcher == nullptr ? 0 : matcher->getPattern().length();
}
/**
* Helper method to return whether (1) both lhs and rhs are null/invalid, or (2) if they are both
* valid, whether they are equal according to operator==. Similar to Java Objects.equals()
*/
static bool equals(const AffixPatternMatcher* lhs, const AffixPatternMatcher* rhs) {
if (lhs == nullptr && rhs == nullptr) {
return true;
}
if (lhs == nullptr || rhs == nullptr) {
return false;
}
return *lhs == *rhs;
}
}
AffixPatternMatcherBuilder::AffixPatternMatcherBuilder(const UnicodeString& pattern,
AffixTokenMatcherWarehouse& warehouse,
IgnorablesMatcher* ignorables)
@ -101,6 +137,9 @@ AffixTokenMatcherWarehouse::AffixTokenMatcherWarehouse(const UChar* currencyCode
utils::copyCurrencyCode(this->currencyCode, currencyCode);
}
AffixTokenMatcherWarehouse::AffixTokenMatcherWarehouse(
AffixTokenMatcherWarehouse&& src) U_NOEXCEPT = default;
AffixTokenMatcherWarehouse::~AffixTokenMatcherWarehouse() {
// Delete the variable number of batches of code point matchers
for (int32_t i = 0; i < codePointNumBatches; i++) {
@ -204,79 +243,10 @@ bool AffixPatternMatcher::operator==(const AffixPatternMatcher& other) const {
}
AffixMatcherWarehouse::AffixMatcherWarehouse(const AffixPatternProvider& patternInfo,
NumberParserImpl& output,
AffixTokenMatcherWarehouse& warehouse,
const IgnorablesMatcher& ignorables, parse_flags_t parseFlags,
UErrorCode& status)
: fAffixTokenMatcherWarehouse(std::move(warehouse)) {
if (!isInteresting(patternInfo, ignorables, parseFlags, status)) {
return;
}
AffixMatcherWarehouse::AffixMatcherWarehouse(AffixTokenMatcherWarehouse& warehouse)
: fAffixTokenMatcherWarehouse(std::move(warehouse)) {}
// The affixes have interesting characters, or we are in strict mode.
// Use initial capacity of 6, the highest possible number of AffixMatchers.
UnicodeString sb;
bool includeUnpaired = 0 != (parseFlags & PARSE_FLAG_INCLUDE_UNPAIRED_AFFIXES);
UNumberSignDisplay signDisplay = (0 != (parseFlags & PARSE_FLAG_PLUS_SIGN_ALLOWED)) ? UNUM_SIGN_ALWAYS
: UNUM_SIGN_NEVER;
int32_t numAffixMatchers = 0;
int32_t numAffixPatternMatchers = 0;
AffixPatternMatcher* posPrefix = nullptr;
AffixPatternMatcher* posSuffix = nullptr;
// Pre-process the affix strings to resolve LDML rules like sign display.
for (int8_t signum = 1; signum >= -1; signum--) {
// Generate Prefix
bool hasPrefix = false;
PatternStringUtils::patternInfoToStringBuilder(
patternInfo, true, signum, signDisplay, StandardPlural::OTHER, false, sb);
fAffixPatternMatchers[numAffixPatternMatchers] = AffixPatternMatcher::fromAffixPattern(
sb, warehouse, parseFlags, &hasPrefix, status);
AffixPatternMatcher* prefix = hasPrefix ? &fAffixPatternMatchers[numAffixPatternMatchers++]
: nullptr;
// Generate Suffix
bool hasSuffix = false;
PatternStringUtils::patternInfoToStringBuilder(
patternInfo, false, signum, signDisplay, StandardPlural::OTHER, false, sb);
fAffixPatternMatchers[numAffixPatternMatchers] = AffixPatternMatcher::fromAffixPattern(
sb, warehouse, parseFlags, &hasSuffix, status);
AffixPatternMatcher* suffix = hasSuffix ? &fAffixPatternMatchers[numAffixPatternMatchers++]
: nullptr;
if (signum == 1) {
posPrefix = prefix;
posSuffix = suffix;
} else if (equals(prefix, posPrefix) && equals(suffix, posSuffix)) {
// Skip adding these matchers (we already have equivalents)
continue;
}
// Flags for setting in the ParsedNumber
int flags = (signum == -1) ? FLAG_NEGATIVE : 0;
// Note: it is indeed possible for posPrefix and posSuffix to both be null.
// We still need to add that matcher for strict mode to work.
fAffixMatchers[numAffixMatchers++] = {prefix, suffix, flags};
if (includeUnpaired && prefix != nullptr && suffix != nullptr) {
// The following if statements are designed to prevent adding two identical matchers.
if (signum == 1 || equals(prefix, posPrefix)) {
fAffixMatchers[numAffixMatchers++] = {prefix, nullptr, flags};
}
if (signum == 1 || equals(suffix, posSuffix)) {
fAffixMatchers[numAffixMatchers++] = {nullptr, suffix, flags};
}
}
}
// Put the AffixMatchers in order, and then add them to the output.
// TODO
// Collections.sort(matchers, COMPARATOR);
// output.addMatchers(matchers);
}
AffixMatcherWarehouse& AffixMatcherWarehouse::operator=(AffixMatcherWarehouse&& src) = default;
bool AffixMatcherWarehouse::isInteresting(const AffixPatternProvider& patternInfo,
const IgnorablesMatcher& ignorables, parse_flags_t parseFlags,
@ -308,14 +278,97 @@ bool AffixMatcherWarehouse::isInteresting(const AffixPatternProvider& patternInf
return true;
}
bool AffixMatcherWarehouse::equals(const AffixPatternMatcher* lhs, const AffixPatternMatcher* rhs) {
if (lhs == nullptr && rhs == nullptr) {
return true;
AffixMatcherWarehouse AffixMatcherWarehouse::createAffixMatchers(const AffixPatternProvider& patternInfo,
MutableMatcherCollection& output,
AffixTokenMatcherWarehouse tokenWarehouse,
const IgnorablesMatcher& ignorables,
parse_flags_t parseFlags,
UErrorCode& status) {
if (!isInteresting(patternInfo, ignorables, parseFlags, status)) {
return {};
}
if (lhs == nullptr || rhs == nullptr) {
return false;
AffixMatcherWarehouse warehouse(tokenWarehouse);
// The affixes have interesting characters, or we are in strict mode.
// Use initial capacity of 6, the highest possible number of AffixMatchers.
UnicodeString sb;
bool includeUnpaired = 0 != (parseFlags & PARSE_FLAG_INCLUDE_UNPAIRED_AFFIXES);
UNumberSignDisplay signDisplay = (0 != (parseFlags & PARSE_FLAG_PLUS_SIGN_ALLOWED)) ? UNUM_SIGN_ALWAYS
: UNUM_SIGN_NEVER;
int32_t numAffixMatchers = 0;
int32_t numAffixPatternMatchers = 0;
AffixPatternMatcher* posPrefix = nullptr;
AffixPatternMatcher* posSuffix = nullptr;
// Pre-process the affix strings to resolve LDML rules like sign display.
for (int8_t signum = 1; signum >= -1; signum--) {
// Generate Prefix
bool hasPrefix = false;
PatternStringUtils::patternInfoToStringBuilder(
patternInfo, true, signum, signDisplay, StandardPlural::OTHER, false, sb);
warehouse.fAffixPatternMatchers[numAffixPatternMatchers] = AffixPatternMatcher::fromAffixPattern(
sb, tokenWarehouse, parseFlags, &hasPrefix, status);
AffixPatternMatcher* prefix = hasPrefix
? &warehouse.fAffixPatternMatchers[numAffixPatternMatchers++]
: nullptr;
// Generate Suffix
bool hasSuffix = false;
PatternStringUtils::patternInfoToStringBuilder(
patternInfo, false, signum, signDisplay, StandardPlural::OTHER, false, sb);
warehouse.fAffixPatternMatchers[numAffixPatternMatchers] = AffixPatternMatcher::fromAffixPattern(
sb, tokenWarehouse, parseFlags, &hasSuffix, status);
AffixPatternMatcher* suffix = hasSuffix
? &warehouse.fAffixPatternMatchers[numAffixPatternMatchers++]
: nullptr;
if (signum == 1) {
posPrefix = prefix;
posSuffix = suffix;
} else if (equals(prefix, posPrefix) && equals(suffix, posSuffix)) {
// Skip adding these matchers (we already have equivalents)
continue;
}
// Flags for setting in the ParsedNumber
int flags = (signum == -1) ? FLAG_NEGATIVE : 0;
// Note: it is indeed possible for posPrefix and posSuffix to both be null.
// We still need to add that matcher for strict mode to work.
warehouse.fAffixMatchers[numAffixMatchers++] = {prefix, suffix, flags};
if (includeUnpaired && prefix != nullptr && suffix != nullptr) {
// The following if statements are designed to prevent adding two identical matchers.
if (signum == 1 || equals(prefix, posPrefix)) {
warehouse.fAffixMatchers[numAffixMatchers++] = {prefix, nullptr, flags};
}
if (signum == 1 || equals(suffix, posSuffix)) {
warehouse.fAffixMatchers[numAffixMatchers++] = {nullptr, suffix, flags};
}
}
}
return *lhs == *rhs;
// Put the AffixMatchers in order, and then add them to the output.
// Since there are at most 9 elements, do a simple-to-implement bubble sort.
bool madeChanges;
do {
madeChanges = false;
for (int32_t i = 1; i < numAffixMatchers; i++) {
if (warehouse.fAffixMatchers[i - 1].compareTo(warehouse.fAffixMatchers[i]) > 0) {
madeChanges = true;
AffixMatcher temp = std::move(warehouse.fAffixMatchers[i - 1]);
warehouse.fAffixMatchers[i - 1] = std::move(warehouse.fAffixMatchers[i]);
warehouse.fAffixMatchers[i] = std::move(temp);
}
}
} while (madeChanges);
for (int32_t i = 0; i < numAffixMatchers; i++) {
output.addMatcher(warehouse.fAffixMatchers[i]);
}
return warehouse;
}
@ -390,9 +443,15 @@ void AffixMatcher::postProcess(ParsedNumber& result) const {
}
}
bool AffixMatcher::matched(const AffixPatternMatcher* affix, const UnicodeString& patternString) {
return (affix == nullptr && patternString.isBogus()) ||
(affix != nullptr && affix->getPattern() == patternString);
int8_t AffixMatcher::compareTo(const AffixMatcher& rhs) const {
const AffixMatcher& lhs = *this;
if (length(lhs.fPrefix) != length(rhs.fPrefix)) {
return length(lhs.fPrefix) > length(rhs.fPrefix) ? -1 : 1;
} else if (length(lhs.fSuffix) != length(rhs.fSuffix)) {
return length(lhs.fSuffix) > length(rhs.fSuffix) ? -1 : 1;
} else {
return 0;
}
}

View file

@ -60,7 +60,7 @@ class AffixTokenMatcherWarehouse {
const UnicodeString* currency2, const DecimalFormatSymbols* dfs,
IgnorablesMatcher* ignorables, const Locale* locale);
AffixTokenMatcherWarehouse(AffixTokenMatcherWarehouse&& src) = default;
AffixTokenMatcherWarehouse(AffixTokenMatcherWarehouse&& src) U_NOEXCEPT;
~AffixTokenMatcherWarehouse();
@ -102,7 +102,7 @@ class AffixTokenMatcherWarehouse {
};
class AffixPatternMatcherBuilder : public TokenConsumer {
class AffixPatternMatcherBuilder : public TokenConsumer, public MutableMatcherCollection {
public:
AffixPatternMatcherBuilder(const UnicodeString& pattern, AffixTokenMatcherWarehouse& warehouse,
IgnorablesMatcher* ignorables);
@ -121,7 +121,7 @@ class AffixPatternMatcherBuilder : public TokenConsumer {
AffixTokenMatcherWarehouse& fWarehouse;
IgnorablesMatcher* fIgnorables;
void addMatcher(NumberParseMatcher& matcher);
void addMatcher(NumberParseMatcher& matcher) override;
};
@ -153,25 +153,18 @@ class AffixMatcher : public NumberParseMatcher, public UMemory {
AffixMatcher(AffixPatternMatcher* prefix, AffixPatternMatcher* suffix, result_flags_t flags);
// static void createMatchers() is the constructor for AffixMatcherWarehouse in C++
bool match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const override;
void postProcess(ParsedNumber& result) const override;
const UnicodeSet& getLeadCodePoints() override;
int8_t compareTo(const AffixMatcher& rhs) const;
private:
AffixPatternMatcher* fPrefix;
AffixPatternMatcher* fSuffix;
result_flags_t fFlags;
/**
* Helper method to return whether the given AffixPatternMatcher equals the given pattern string.
* Either both arguments must be null or the pattern string inside the AffixPatternMatcher must equal
* the given pattern string.
*/
static bool matched(const AffixPatternMatcher* affix, const UnicodeString& patternString);
};
@ -182,10 +175,15 @@ class AffixMatcherWarehouse {
public:
AffixMatcherWarehouse() = default; // WARNING: Leaves the object in an unusable state
// in Java, this is AffixMatcher#createMatchers()
AffixMatcherWarehouse(const AffixPatternProvider& patternInfo, NumberParserImpl& output,
AffixTokenMatcherWarehouse& warehouse, const IgnorablesMatcher& ignorables,
parse_flags_t parseFlags, UErrorCode& status);
AffixMatcherWarehouse(AffixTokenMatcherWarehouse& warehouse);
AffixMatcherWarehouse& operator=(AffixMatcherWarehouse&& src);
static AffixMatcherWarehouse createAffixMatchers(const AffixPatternProvider& patternInfo,
MutableMatcherCollection& output,
AffixTokenMatcherWarehouse tokenWarehouse,
const IgnorablesMatcher& ignorables,
parse_flags_t parseFlags, UErrorCode& status);
private:
// 9 is the limit: positive, zero, and negative, each with prefix, suffix, and prefix+suffix
@ -195,14 +193,10 @@ class AffixMatcherWarehouse {
// Store all the tokens used by the AffixPatternMatchers
AffixTokenMatcherWarehouse fAffixTokenMatcherWarehouse;
friend class AffixMatcher;
static bool isInteresting(const AffixPatternProvider& patternInfo, const IgnorablesMatcher& ignorables,
parse_flags_t parseFlags, UErrorCode& status);
/**
* Helper method to return whether (1) both lhs and rhs are null/invalid, or (2) if they are both
* valid, whether they are equal according to operator==. Similar to Java Objects.equals()
*/
static bool equals(const AffixPatternMatcher* lhs, const AffixPatternMatcher* rhs);
};

View file

@ -32,18 +32,27 @@ NumberParserImpl::createSimpleParser(const Locale& locale, const UnicodeString&
auto* parser = new NumberParserImpl(parseFlags, true);
DecimalFormatSymbols symbols(locale, status);
parser->fLocalMatchers.ignorables = std::move(IgnorablesMatcher(unisets::DEFAULT_IGNORABLES));
parser->fLocalMatchers.ignorables = {unisets::DEFAULT_IGNORABLES};
IgnorablesMatcher& ignorables = parser->fLocalMatchers.ignorables;
// MatcherFactory factory = new MatcherFactory();
// factory.currency = Currency.getInstance("USD");
// factory.symbols = symbols;
// factory.ignorables = ignorables;
// factory.locale = locale;
// factory.parseFlags = parseFlags;
UnicodeString currency1(u"IU$");
UnicodeString currency2(u"ICU");
ParsedPatternInfo patternInfo;
PatternParser::parseToPatternInfo(patternString, patternInfo, status);
// AffixMatcher.createMatchers(patternInfo, parser, factory, ignorables, parseFlags);
// The following statement sets up the affix matchers.
// AffixMatcherWarehouse warehouse = ;
parser->fLocalMatchers.affixMatcherWarehouse = std::move(AffixMatcherWarehouse::createAffixMatchers(
patternInfo,
*parser,
AffixTokenMatcherWarehouse(
u"USD", &currency1, &currency2, &symbols, &ignorables, &locale),
ignorables,
parseFlags,
status));
Grouper grouper = Grouper::forStrategy(UNUM_GROUPING_AUTO);
grouper.setLocaleData(patternInfo, locale);

View file

@ -13,18 +13,19 @@
#include "numparse_scientific.h"
#include "unicode/uniset.h"
#include "numparse_currency.h"
#include "numparse_affixes.h"
U_NAMESPACE_BEGIN namespace numparse {
namespace impl {
class NumberParserImpl {
class NumberParserImpl : public MutableMatcherCollection {
public:
~NumberParserImpl();
virtual ~NumberParserImpl();
static NumberParserImpl* createSimpleParser(const Locale& locale, const UnicodeString& patternString,
parse_flags_t parseFlags, UErrorCode& status);
void addMatcher(NumberParseMatcher& matcher);
void addMatcher(NumberParseMatcher& matcher) override;
void freeze();
@ -58,6 +59,7 @@ class NumberParserImpl {
DecimalMatcher decimal;
ScientificMatcher scientific;
CurrencyNamesMatcher currencyNames;
AffixMatcherWarehouse affixMatcherWarehouse;
} fLocalMatchers;
NumberParserImpl(parse_flags_t parseFlags, bool computeLeads);

View file

@ -327,6 +327,17 @@ class NumberParseMatcher {
};
/**
* Interface for use in arguments.
*/
class MutableMatcherCollection {
public:
virtual ~MutableMatcherCollection() = default;
virtual void addMatcher(NumberParseMatcher& matcher) = 0;
};
} // namespace impl
} // namespace numparse
U_NAMESPACE_END

View file

@ -76,10 +76,10 @@ void NumberParserTest::testBasic() {
// {3, u"a 𝟱𝟭𝟰𝟮𝟯 b", u"a0b", 14, 51423.},
// {3, u"-a 𝟱𝟭𝟰𝟮𝟯 b", u"a0b", 15, -51423.},
// {3, u"a -𝟱𝟭𝟰𝟮𝟯 b", u"a0b", 15, -51423.},
// {3, u"𝟱𝟭𝟰𝟮𝟯", u"[0];(0)", 10, 51423.},
// {3, u"[𝟱𝟭𝟰𝟮𝟯", u"[0];(0)", 11, 51423.},
// {3, u"𝟱𝟭𝟰𝟮𝟯]", u"[0];(0)", 11, 51423.},
// {3, u"[𝟱𝟭𝟰𝟮𝟯]", u"[0];(0)", 12, 51423.},
{3, u"𝟱𝟭𝟰𝟮𝟯", u"[0];(0)", 10, 51423.},
{3, u"[𝟱𝟭𝟰𝟮𝟯", u"[0];(0)", 11, 51423.},
{3, u"𝟱𝟭𝟰𝟮𝟯]", u"[0];(0)", 11, 51423.},
{3, u"[𝟱𝟭𝟰𝟮𝟯]", u"[0];(0)", 12, 51423.},
// {3, u"(𝟱𝟭𝟰𝟮𝟯", u"[0];(0)", 11, -51423.},
// {3, u"𝟱𝟭𝟰𝟮𝟯)", u"[0];(0)", 11, -51423.},
// {3, u"(𝟱𝟭𝟰𝟮𝟯)", u"[0];(0)", 12, -51423.},

View file

@ -29,15 +29,15 @@ public class AffixMatcher implements NumberParseMatcher {
*/
public static final Comparator<AffixMatcher> COMPARATOR = new Comparator<AffixMatcher>() {
@Override
public int compare(AffixMatcher o1, AffixMatcher o2) {
if (length(o1.prefix) != length(o2.prefix)) {
return length(o1.prefix) > length(o2.prefix) ? -1 : 1;
} else if (length(o1.suffix) != length(o2.suffix)) {
return length(o1.suffix) > length(o2.suffix) ? -1 : 1;
} else if (!o1.equals(o2)) {
public int compare(AffixMatcher lhs, AffixMatcher rhs) {
if (length(lhs.prefix) != length(rhs.prefix)) {
return length(lhs.prefix) > length(rhs.prefix) ? -1 : 1;
} else if (length(lhs.suffix) != length(rhs.suffix)) {
return length(lhs.suffix) > length(rhs.suffix) ? -1 : 1;
} else if (!lhs.equals(rhs)) {
// If the prefix and suffix are the same length, arbitrarily break ties.
// We can't return zero unless the elements are equal.
return o1.hashCode() > o2.hashCode() ? -1 : 1;
return lhs.hashCode() > rhs.hashCode() ? -1 : 1;
} else {
return 0;
}