mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-08 23:10:40 +00:00
ICU-13574 Adding scientific matcher to ICU4C.
X-SVN-Rev: 40880
This commit is contained in:
parent
b8bab89cb5
commit
e91ff603de
6 changed files with 132 additions and 5 deletions
|
@ -109,7 +109,7 @@ number_integerwidth.o number_longnames.o number_modifiers.o number_notation.o \
|
|||
number_padding.o number_patternmodifier.o number_patternstring.o \
|
||||
number_rounding.o number_scientific.o number_stringbuilder.o \
|
||||
numparse_stringsegment.o numparse_unisets.o numparse_parsednumber.o \
|
||||
numparse_impl.o numparse_symbols.o numparse_decimal.o
|
||||
numparse_impl.o numparse_symbols.o numparse_decimal.o numparse_scientific.o
|
||||
|
||||
|
||||
## Header files to install
|
||||
|
|
|
@ -57,7 +57,7 @@ NumberParserImpl::createSimpleParser(const Locale& locale, const UnicodeString&
|
|||
parser->addMatcher(parser->fLocalMatchers.nan = {symbols});
|
||||
parser->addMatcher(parser->fLocalMatchers.infinity = {symbols});
|
||||
parser->addMatcher(parser->fLocalMatchers.padding = {u"@"});
|
||||
// parser.addMatcher(ScientificMatcher.getInstance(symbols, grouper, parseFlags));
|
||||
parser->addMatcher(parser->fLocalMatchers.scientific = {symbols, grouper});
|
||||
// parser.addMatcher(CurrencyTrieMatcher.getInstance(locale));
|
||||
// parser.addMatcher(new RequireNumberMatcher());
|
||||
|
||||
|
|
|
@ -10,6 +10,7 @@
|
|||
#include "numparse_types.h"
|
||||
#include "numparse_decimal.h"
|
||||
#include "numparse_symbols.h"
|
||||
#include "numparse_scientific.h"
|
||||
#include "unicode/uniset.h"
|
||||
|
||||
U_NAMESPACE_BEGIN namespace numparse {
|
||||
|
@ -54,6 +55,7 @@ class NumberParserImpl {
|
|||
PermilleMatcher permille;
|
||||
PlusSignMatcher plusSign;
|
||||
DecimalMatcher decimal;
|
||||
ScientificMatcher scientific;
|
||||
} fLocalMatchers;
|
||||
|
||||
NumberParserImpl(parse_flags_t parseFlags, bool computeLeads);
|
||||
|
|
84
icu4c/source/i18n/numparse_scientific.cpp
Normal file
84
icu4c/source/i18n/numparse_scientific.cpp
Normal file
|
@ -0,0 +1,84 @@
|
|||
// © 2018 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if !UCONFIG_NO_FORMATTING && !UPRV_INCOMPLETE_CPP11_SUPPORT
|
||||
|
||||
#include "numparse_types.h"
|
||||
#include "numparse_scientific.h"
|
||||
#include "numparse_unisets.h"
|
||||
|
||||
using namespace icu;
|
||||
using namespace icu::numparse;
|
||||
using namespace icu::numparse::impl;
|
||||
|
||||
|
||||
ScientificMatcher::ScientificMatcher(const DecimalFormatSymbols& dfs, const Grouper& grouper)
|
||||
: fExponentSeparatorString(dfs.getConstSymbol(DecimalFormatSymbols::kExponentialSymbol)),
|
||||
fExponentMatcher(dfs, grouper, PARSE_FLAG_INTEGER_ONLY) {
|
||||
}
|
||||
|
||||
bool ScientificMatcher::match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const {
|
||||
// Only accept scientific notation after the mantissa.
|
||||
// Most places use result.hasNumber(), but we need a stronger condition here (i.e., exponent is
|
||||
// not well-defined after NaN or infinity).
|
||||
if (result.quantity.bogus) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// First match the scientific separator, and then match another number after it.
|
||||
int overlap1 = segment.getCommonPrefixLength(fExponentSeparatorString);
|
||||
if (overlap1 == fExponentSeparatorString.length()) {
|
||||
// Full exponent separator match.
|
||||
|
||||
// First attempt to get a code point, returning true if we can't get one.
|
||||
segment.adjustOffset(overlap1);
|
||||
if (segment.length() == 0) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Allow a sign, and then try to match digits.
|
||||
int8_t exponentSign = 1;
|
||||
if (segment.matches(*unisets::get(unisets::MINUS_SIGN))) {
|
||||
exponentSign = -1;
|
||||
segment.adjustOffsetByCodePoint();
|
||||
} else if (segment.matches(*unisets::get(unisets::PLUS_SIGN))) {
|
||||
segment.adjustOffsetByCodePoint();
|
||||
}
|
||||
|
||||
int digitsOffset = segment.getOffset();
|
||||
bool digitsReturnValue = fExponentMatcher.match(segment, result, exponentSign, status);
|
||||
if (segment.getOffset() != digitsOffset) {
|
||||
// At least one exponent digit was matched.
|
||||
result.flags |= FLAG_HAS_EXPONENT;
|
||||
} else {
|
||||
// No exponent digits were matched; un-match the exponent separator.
|
||||
segment.adjustOffset(-overlap1);
|
||||
}
|
||||
return digitsReturnValue;
|
||||
|
||||
} else if (overlap1 == segment.length()) {
|
||||
// Partial exponent separator match
|
||||
return true;
|
||||
}
|
||||
|
||||
// No match
|
||||
return false;
|
||||
}
|
||||
|
||||
const UnicodeSet* ScientificMatcher::getLeadCodePoints() const {
|
||||
UChar32 leadCp = fExponentSeparatorString.char32At(0);
|
||||
const UnicodeSet* s = unisets::get(unisets::SCIENTIFIC_LEAD);
|
||||
if (s->contains(leadCp)) {
|
||||
return new UnicodeSet(*s);
|
||||
} else {
|
||||
UnicodeSet* leadCodePoints = new UnicodeSet();
|
||||
leadCodePoints->add(leadCp);
|
||||
leadCodePoints->freeze();
|
||||
return leadCodePoints;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#endif /* #if !UCONFIG_NO_FORMATTING */
|
41
icu4c/source/i18n/numparse_scientific.h
Normal file
41
icu4c/source/i18n/numparse_scientific.h
Normal file
|
@ -0,0 +1,41 @@
|
|||
// © 2018 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if !UCONFIG_NO_FORMATTING && !UPRV_INCOMPLETE_CPP11_SUPPORT
|
||||
#ifndef __NUMPARSE_SCIENTIFIC_H__
|
||||
#define __NUMPARSE_SCIENTIFIC_H__
|
||||
|
||||
#include "numparse_types.h"
|
||||
#include "numparse_decimal.h"
|
||||
#include "unicode/numberformatter.h"
|
||||
|
||||
using icu::number::impl::Grouper;
|
||||
|
||||
U_NAMESPACE_BEGIN namespace numparse {
|
||||
namespace impl {
|
||||
|
||||
|
||||
class ScientificMatcher : public NumberParseMatcher, public UMemory {
|
||||
public:
|
||||
ScientificMatcher() = default; // WARNING: Leaves the object in an unusable state
|
||||
|
||||
ScientificMatcher(const DecimalFormatSymbols& dfs, const Grouper& grouper);
|
||||
|
||||
bool match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const override;
|
||||
|
||||
const UnicodeSet* getLeadCodePoints() const override;
|
||||
|
||||
private:
|
||||
UnicodeString fExponentSeparatorString;
|
||||
DecimalMatcher fExponentMatcher;
|
||||
};
|
||||
|
||||
|
||||
} // namespace impl
|
||||
} // namespace numparse
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif //__NUMPARSE_SCIENTIFIC_H__
|
||||
#endif /* #if !UCONFIG_NO_FORMATTING */
|
|
@ -85,9 +85,9 @@ void NumberParserTest::testBasic() {
|
|||
// {3, u"{𝟱𝟭𝟰𝟮𝟯}", u"{0};{0}", 12, 51423.},
|
||||
// {1, u"a40b", u"a0'0b'", 3, 40.}, // greedy code path thinks "40" is the number
|
||||
// {2, u"a40b", u"a0'0b'", 4, 4.}, // slow code path finds the suffix "0b"
|
||||
// {3, u"𝟱.𝟭𝟰𝟮E𝟯", u"0", 12, 5142.},
|
||||
// {3, u"𝟱.𝟭𝟰𝟮E-𝟯", u"0", 13, 0.005142},
|
||||
// {3, u"𝟱.𝟭𝟰𝟮e-𝟯", u"0", 13, 0.005142},
|
||||
{3, u"𝟱.𝟭𝟰𝟮E𝟯", u"0", 12, 5142.},
|
||||
{3, u"𝟱.𝟭𝟰𝟮E-𝟯", u"0", 13, 0.005142},
|
||||
{3, u"𝟱.𝟭𝟰𝟮e-𝟯", u"0", 13, 0.005142},
|
||||
// {7, u"5,142.50 Canadian dollars", u"#,##,##0 ¤¤¤", 25, 5142.5},
|
||||
// {3, u"a$ b5", u"a ¤ b0", 5, 5.0},
|
||||
// {3, u"📺1.23", u"📺0;📻0", 6, 1.23},
|
||||
|
|
Loading…
Add table
Reference in a new issue