mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-08 15:05:53 +00:00
ICU-1076 implement toPattern
X-SVN-Rev: 5379
This commit is contained in:
parent
3fe96feba6
commit
7864dddc43
4 changed files with 69 additions and 13 deletions
|
@ -7,6 +7,7 @@
|
|||
*/
|
||||
|
||||
#include "quant.h"
|
||||
#include "unicode/unistr.h"
|
||||
|
||||
Quantifier::Quantifier(UnicodeMatcher *adopted,
|
||||
uint32_t minCount, uint32_t maxCount) {
|
||||
|
@ -61,12 +62,48 @@ UMatchDegree Quantifier::matches(const Replaceable& text,
|
|||
return U_MISMATCH;
|
||||
}
|
||||
|
||||
static const int32_t POW10[] = {1, 10, 100, 1000, 10000, 100000, 1000000,
|
||||
10000000, 100000000, 1000000000};
|
||||
|
||||
void Quantifier::appendNumber(UnicodeString& result, int32_t n) {
|
||||
// assert(n >= 0);
|
||||
// assert(n < 1e10);
|
||||
UBool show = FALSE; // TRUE if we should display digits
|
||||
for (int32_t p=9; p>=0; --p) {
|
||||
int32_t d = n / POW10[p];
|
||||
n -= d * POW10[p];
|
||||
if (d != 0 || p == 0) {
|
||||
show = TRUE;
|
||||
}
|
||||
if (show) {
|
||||
result.append((UChar)(48+d));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Implement UnicodeMatcher
|
||||
*/
|
||||
UnicodeString& Quantifier::toPattern(UnicodeString& result,
|
||||
UBool escapeUnprintable) const {
|
||||
// TODO finish this
|
||||
matcher->toPattern(result, escapeUnprintable);
|
||||
if (minCount == 0) {
|
||||
if (maxCount == 1) {
|
||||
return result.append((UChar)63); /*?*/
|
||||
} else if (maxCount == MAX) {
|
||||
return result.append((UChar)42); /***/
|
||||
}
|
||||
// else fall through
|
||||
} else if (minCount == 1 && maxCount == MAX) {
|
||||
return result.append((UChar)43); /*+*/
|
||||
}
|
||||
result.append((UChar)123); /*{*/
|
||||
appendNumber(result, minCount);
|
||||
result.append((UChar)44); /*,*/
|
||||
if (maxCount != MAX) {
|
||||
appendNumber(result, maxCount);
|
||||
}
|
||||
result.append((UChar)125); /*}*/
|
||||
return result;
|
||||
}
|
||||
|
||||
|
|
|
@ -14,6 +14,8 @@ class Quantifier : public UnicodeMatcher {
|
|||
|
||||
public:
|
||||
|
||||
enum { MAX = 0x7FFFFFFF };
|
||||
|
||||
Quantifier(UnicodeMatcher *adopted,
|
||||
uint32_t minCount, uint32_t maxCount);
|
||||
|
||||
|
@ -47,6 +49,8 @@ class Quantifier : public UnicodeMatcher {
|
|||
|
||||
private:
|
||||
|
||||
static void appendNumber(UnicodeString& result, int32_t n);
|
||||
|
||||
UnicodeMatcher* matcher; // owned
|
||||
|
||||
uint32_t minCount;
|
||||
|
|
|
@ -8,23 +8,21 @@
|
|||
|
||||
#include "strmatch.h"
|
||||
#include "rbt_data.h"
|
||||
#include "rbt_rule.h"
|
||||
|
||||
StringMatcher::StringMatcher(const UnicodeString& theString,
|
||||
int32_t start,
|
||||
int32_t limit,
|
||||
UBool isSeg,
|
||||
const TransliterationRuleData& theData) :
|
||||
data(theData) {
|
||||
data(theData),
|
||||
isSegment(isSeg) {
|
||||
theString.extractBetween(start, limit, pattern);
|
||||
}
|
||||
|
||||
StringMatcher::StringMatcher(const UnicodeString& theString,
|
||||
const TransliterationRuleData& theData) :
|
||||
pattern(theString),
|
||||
data(theData) {
|
||||
}
|
||||
|
||||
StringMatcher::StringMatcher(const StringMatcher& o) :
|
||||
pattern(o.pattern),
|
||||
isSegment(o.isSegment),
|
||||
data(o.data) {
|
||||
}
|
||||
|
||||
|
@ -107,9 +105,26 @@ UMatchDegree StringMatcher::matches(const Replaceable& text,
|
|||
*/
|
||||
UnicodeString& StringMatcher::toPattern(UnicodeString& result,
|
||||
UBool escapeUnprintable) const {
|
||||
for (int32_t i=0; i<pattern.length(); ++i) {
|
||||
// TODO finish this
|
||||
UnicodeString str, quoteBuf;
|
||||
if (isSegment) {
|
||||
result.append((UChar)40); /*(*/
|
||||
}
|
||||
for (int32_t i=0; i<pattern.length(); ++i) {
|
||||
UChar keyChar = pattern.charAt(i);
|
||||
const UnicodeMatcher* m = data.lookup(keyChar);
|
||||
if (m == 0) {
|
||||
TransliterationRule::appendToRule(result, keyChar, FALSE, escapeUnprintable, quoteBuf);
|
||||
} else {
|
||||
TransliterationRule::appendToRule(result, m->toPattern(str, escapeUnprintable),
|
||||
TRUE, escapeUnprintable, quoteBuf);
|
||||
}
|
||||
}
|
||||
if (isSegment) {
|
||||
result.append((UChar)41); /*)*/
|
||||
}
|
||||
// Flush quoteBuf out to result
|
||||
TransliterationRule::appendToRule(result, (UChar32)(isSegment?41/*)*/:-1),
|
||||
TRUE, escapeUnprintable, quoteBuf);
|
||||
return result;
|
||||
}
|
||||
|
||||
|
|
|
@ -23,9 +23,7 @@ class StringMatcher : public UnicodeMatcher {
|
|||
StringMatcher(const UnicodeString& string,
|
||||
int32_t start,
|
||||
int32_t limit,
|
||||
const TransliterationRuleData& data);
|
||||
|
||||
StringMatcher(const UnicodeString& string,
|
||||
UBool isSegment,
|
||||
const TransliterationRuleData& data);
|
||||
|
||||
StringMatcher(const StringMatcher& o);
|
||||
|
@ -64,6 +62,8 @@ class StringMatcher : public UnicodeMatcher {
|
|||
UnicodeString pattern;
|
||||
|
||||
const TransliterationRuleData& data;
|
||||
|
||||
UBool isSegment;
|
||||
};
|
||||
|
||||
#endif
|
||||
|
|
Loading…
Add table
Reference in a new issue