mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-06 05:55:35 +00:00
ICU-20250 make UnicodeSet(intprop=value) faster
- fastpath for UnicodeSet.add(new last range) - fewer UnicodeSet memory allocations: initial internal list array, exponential array growth, allocate strings list/set only when first one is added - faster CodePointTrie.getRange(): fewer calls to filter function - revert UnicodeSet(intprop=value) from trie ranges to range starts + lookup - cache per-int-prop range starts: fewer lookups
This commit is contained in:
parent
be7c5dbcb0
commit
98f9170004
16 changed files with 755 additions and 614 deletions
|
@ -23,6 +23,9 @@
|
|||
#include "umutex.h"
|
||||
#include "uprops.h"
|
||||
|
||||
using icu::LocalPointer;
|
||||
using icu::Normalizer2Factory;
|
||||
using icu::Normalizer2Impl;
|
||||
using icu::UInitOnce;
|
||||
using icu::UnicodeSet;
|
||||
|
||||
|
@ -30,11 +33,13 @@ namespace {
|
|||
|
||||
UBool U_CALLCONV characterproperties_cleanup();
|
||||
|
||||
constexpr int32_t NUM_INCLUSIONS = UPROPS_SRC_COUNT + UCHAR_INT_LIMIT - UCHAR_INT_START;
|
||||
|
||||
struct Inclusion {
|
||||
UnicodeSet *fSet;
|
||||
UInitOnce fInitOnce;
|
||||
};
|
||||
Inclusion gInclusions[UPROPS_SRC_COUNT]; // cached getInclusions()
|
||||
Inclusion gInclusions[NUM_INCLUSIONS]; // cached getInclusions()
|
||||
|
||||
UnicodeSet *sets[UCHAR_BINARY_LIMIT] = {};
|
||||
|
||||
|
@ -80,35 +85,22 @@ UBool U_CALLCONV characterproperties_cleanup() {
|
|||
return TRUE;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
/*
|
||||
Reduce excessive reallocation, and make it easier to detect initialization problems.
|
||||
Usually you don't see smaller sets than this for Unicode 5.0.
|
||||
*/
|
||||
constexpr int32_t DEFAULT_INCLUSION_CAPACITY = 3072;
|
||||
|
||||
void U_CALLCONV CharacterProperties::initInclusion(UPropertySource src, UErrorCode &errorCode) {
|
||||
void U_CALLCONV initInclusion(UPropertySource src, UErrorCode &errorCode) {
|
||||
// This function is invoked only via umtx_initOnce().
|
||||
// This function is a friend of class UnicodeSet.
|
||||
|
||||
U_ASSERT(0 <= src && src < UPROPS_SRC_COUNT);
|
||||
if (src == UPROPS_SRC_NONE) {
|
||||
errorCode = U_INTERNAL_PROGRAM_ERROR;
|
||||
return;
|
||||
}
|
||||
UnicodeSet * &incl = gInclusions[src].fSet;
|
||||
U_ASSERT(incl == nullptr);
|
||||
U_ASSERT(gInclusions[src].fSet == nullptr);
|
||||
|
||||
incl = new UnicodeSet();
|
||||
if (incl == nullptr) {
|
||||
LocalPointer<UnicodeSet> incl(new UnicodeSet());
|
||||
if (incl.isNull()) {
|
||||
errorCode = U_MEMORY_ALLOCATION_ERROR;
|
||||
return;
|
||||
}
|
||||
USetAdder sa = {
|
||||
(USet *)incl,
|
||||
(USet *)incl.getAlias(),
|
||||
_set_add,
|
||||
_set_addRange,
|
||||
_set_addString,
|
||||
|
@ -116,7 +108,6 @@ void U_CALLCONV CharacterProperties::initInclusion(UPropertySource src, UErrorCo
|
|||
nullptr // don't need removeRange()
|
||||
};
|
||||
|
||||
incl->ensureCapacity(DEFAULT_INCLUSION_CAPACITY, errorCode);
|
||||
switch(src) {
|
||||
case UPROPS_SRC_CHAR:
|
||||
uchar_addPropertyStarts(&sa, &errorCode);
|
||||
|
@ -183,12 +174,15 @@ void U_CALLCONV CharacterProperties::initInclusion(UPropertySource src, UErrorCo
|
|||
}
|
||||
|
||||
if (U_FAILURE(errorCode)) {
|
||||
delete incl;
|
||||
incl = nullptr;
|
||||
return;
|
||||
}
|
||||
// Compact for caching
|
||||
if (incl->isBogus()) {
|
||||
errorCode = U_MEMORY_ALLOCATION_ERROR;
|
||||
return;
|
||||
}
|
||||
// Compact for caching.
|
||||
incl->compact();
|
||||
gInclusions[src].fSet = incl.orphan();
|
||||
ucln_common_registerCleanup(UCLN_COMMON_CHARACTERPROPERTIES, characterproperties_cleanup);
|
||||
}
|
||||
|
||||
|
@ -199,15 +193,66 @@ const UnicodeSet *getInclusionsForSource(UPropertySource src, UErrorCode &errorC
|
|||
return nullptr;
|
||||
}
|
||||
Inclusion &i = gInclusions[src];
|
||||
umtx_initOnce(i.fInitOnce, &CharacterProperties::initInclusion, src, errorCode);
|
||||
umtx_initOnce(i.fInitOnce, &initInclusion, src, errorCode);
|
||||
return i.fSet;
|
||||
}
|
||||
|
||||
void U_CALLCONV initIntPropInclusion(UProperty prop, UErrorCode &errorCode) {
|
||||
// This function is invoked only via umtx_initOnce().
|
||||
U_ASSERT(UCHAR_INT_START <= prop && prop < UCHAR_INT_LIMIT);
|
||||
int32_t inclIndex = UPROPS_SRC_COUNT + prop - UCHAR_INT_START;
|
||||
U_ASSERT(gInclusions[inclIndex].fSet == nullptr);
|
||||
UPropertySource src = uprops_getSource(prop);
|
||||
const UnicodeSet *incl = getInclusionsForSource(src, errorCode);
|
||||
if (U_FAILURE(errorCode)) {
|
||||
return;
|
||||
}
|
||||
|
||||
LocalPointer<UnicodeSet> intPropIncl(new UnicodeSet(0, 0));
|
||||
if (intPropIncl.isNull()) {
|
||||
errorCode = U_MEMORY_ALLOCATION_ERROR;
|
||||
return;
|
||||
}
|
||||
int32_t numRanges = incl->getRangeCount();
|
||||
int32_t prevValue = 0;
|
||||
for (int32_t i = 0; i < numRanges; ++i) {
|
||||
UChar32 rangeEnd = incl->getRangeEnd(i);
|
||||
for (UChar32 c = incl->getRangeStart(i); c <= rangeEnd; ++c) {
|
||||
// TODO: Get a UCharacterProperty.IntProperty to avoid the property dispatch.
|
||||
int32_t value = u_getIntPropertyValue(c, prop);
|
||||
if (value != prevValue) {
|
||||
intPropIncl->add(c);
|
||||
prevValue = value;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (intPropIncl->isBogus()) {
|
||||
errorCode = U_MEMORY_ALLOCATION_ERROR;
|
||||
return;
|
||||
}
|
||||
// Compact for caching.
|
||||
intPropIncl->compact();
|
||||
gInclusions[inclIndex].fSet = intPropIncl.orphan();
|
||||
ucln_common_registerCleanup(UCLN_COMMON_CHARACTERPROPERTIES, characterproperties_cleanup);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
const UnicodeSet *CharacterProperties::getInclusionsForProperty(
|
||||
UProperty prop, UErrorCode &errorCode) {
|
||||
if (U_FAILURE(errorCode)) { return nullptr; }
|
||||
UPropertySource src = uprops_getSource(prop);
|
||||
return getInclusionsForSource(src, errorCode);
|
||||
if (UCHAR_INT_START <= prop && prop < UCHAR_INT_LIMIT) {
|
||||
int32_t inclIndex = UPROPS_SRC_COUNT + prop - UCHAR_INT_START;
|
||||
Inclusion &i = gInclusions[inclIndex];
|
||||
umtx_initOnce(i.fInitOnce, &initIntPropInclusion, prop, errorCode);
|
||||
return i.fSet;
|
||||
} else {
|
||||
UPropertySource src = uprops_getSource(prop);
|
||||
return getInclusionsForSource(src, errorCode);
|
||||
}
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
@ -216,7 +261,7 @@ namespace {
|
|||
|
||||
UnicodeSet *makeSet(UProperty property, UErrorCode &errorCode) {
|
||||
if (U_FAILURE(errorCode)) { return nullptr; }
|
||||
icu::LocalPointer<UnicodeSet> set(new UnicodeSet());
|
||||
LocalPointer<UnicodeSet> set(new UnicodeSet());
|
||||
if (set.isNull()) {
|
||||
errorCode = U_MEMORY_ALLOCATION_ERROR;
|
||||
return nullptr;
|
||||
|
|
|
@ -280,7 +280,7 @@ UChar32 getRange(const void *t, UChar32 start,
|
|||
int32_t prevI3Block = -1;
|
||||
int32_t prevBlock = -1;
|
||||
UChar32 c = start;
|
||||
uint32_t value;
|
||||
uint32_t trieValue, value;
|
||||
bool haveValue = false;
|
||||
do {
|
||||
int32_t i3Block;
|
||||
|
@ -319,6 +319,7 @@ UChar32 getRange(const void *t, UChar32 start,
|
|||
return c - 1;
|
||||
}
|
||||
} else {
|
||||
trieValue = trie->nullValue;
|
||||
value = nullValue;
|
||||
if (pValue != nullptr) { *pValue = nullValue; }
|
||||
haveValue = true;
|
||||
|
@ -357,6 +358,7 @@ UChar32 getRange(const void *t, UChar32 start,
|
|||
return c - 1;
|
||||
}
|
||||
} else {
|
||||
trieValue = trie->nullValue;
|
||||
value = nullValue;
|
||||
if (pValue != nullptr) { *pValue = nullValue; }
|
||||
haveValue = true;
|
||||
|
@ -364,23 +366,32 @@ UChar32 getRange(const void *t, UChar32 start,
|
|||
c = (c + dataBlockLength) & ~dataMask;
|
||||
} else {
|
||||
int32_t di = block + (c & dataMask);
|
||||
uint32_t value2 = getValue(trie->data, valueWidth, di);
|
||||
value2 = maybeFilterValue(value2, trie->nullValue, nullValue,
|
||||
filter, context);
|
||||
uint32_t trieValue2 = getValue(trie->data, valueWidth, di);
|
||||
if (haveValue) {
|
||||
if (value2 != value) {
|
||||
return c - 1;
|
||||
if (trieValue2 != trieValue) {
|
||||
if (filter == nullptr ||
|
||||
maybeFilterValue(trieValue2, trie->nullValue, nullValue,
|
||||
filter, context) != value) {
|
||||
return c - 1;
|
||||
}
|
||||
trieValue = trieValue2; // may or may not help
|
||||
}
|
||||
} else {
|
||||
value = value2;
|
||||
trieValue = trieValue2;
|
||||
value = maybeFilterValue(trieValue2, trie->nullValue, nullValue,
|
||||
filter, context);
|
||||
if (pValue != nullptr) { *pValue = value; }
|
||||
haveValue = true;
|
||||
}
|
||||
while ((++c & dataMask) != 0) {
|
||||
if (maybeFilterValue(getValue(trie->data, valueWidth, ++di),
|
||||
trie->nullValue, nullValue,
|
||||
filter, context) != value) {
|
||||
return c - 1;
|
||||
trieValue2 = getValue(trie->data, valueWidth, ++di);
|
||||
if (trieValue2 != trieValue) {
|
||||
if (filter == nullptr ||
|
||||
maybeFilterValue(trieValue2, trie->nullValue, nullValue,
|
||||
filter, context) != value) {
|
||||
return c - 1;
|
||||
}
|
||||
trieValue = trieValue2; // may or may not help
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -309,41 +309,56 @@ UChar32 MutableCodePointTrie::getRange(
|
|||
uint32_t nullValue = initialValue;
|
||||
if (filter != nullptr) { nullValue = filter(context, nullValue); }
|
||||
UChar32 c = start;
|
||||
uint32_t value;
|
||||
uint32_t trieValue, value;
|
||||
bool haveValue = false;
|
||||
int32_t i = c >> UCPTRIE_SHIFT_3;
|
||||
do {
|
||||
if (flags[i] == ALL_SAME) {
|
||||
uint32_t value2 = maybeFilterValue(index[i], initialValue, nullValue,
|
||||
filter, context);
|
||||
uint32_t trieValue2 = index[i];
|
||||
if (haveValue) {
|
||||
if (value2 != value) {
|
||||
return c - 1;
|
||||
if (trieValue2 != trieValue) {
|
||||
if (filter == nullptr ||
|
||||
maybeFilterValue(trieValue2, initialValue, nullValue,
|
||||
filter, context) != value) {
|
||||
return c - 1;
|
||||
}
|
||||
trieValue = trieValue2; // may or may not help
|
||||
}
|
||||
} else {
|
||||
value = value2;
|
||||
trieValue = trieValue2;
|
||||
value = maybeFilterValue(trieValue2, initialValue, nullValue, filter, context);
|
||||
if (pValue != nullptr) { *pValue = value; }
|
||||
haveValue = true;
|
||||
}
|
||||
c = (c + UCPTRIE_SMALL_DATA_BLOCK_LENGTH) & ~UCPTRIE_SMALL_DATA_MASK;
|
||||
} else /* MIXED */ {
|
||||
int32_t di = index[i] + (c & UCPTRIE_SMALL_DATA_MASK);
|
||||
uint32_t value2 = maybeFilterValue(data[di], initialValue, nullValue,
|
||||
filter, context);
|
||||
uint32_t trieValue2 = data[di];
|
||||
if (haveValue) {
|
||||
if (value2 != value) {
|
||||
return c - 1;
|
||||
if (trieValue2 != trieValue) {
|
||||
if (filter == nullptr ||
|
||||
maybeFilterValue(trieValue2, initialValue, nullValue,
|
||||
filter, context) != value) {
|
||||
return c - 1;
|
||||
}
|
||||
trieValue = trieValue2; // may or may not help
|
||||
}
|
||||
} else {
|
||||
value = value2;
|
||||
trieValue = trieValue2;
|
||||
value = maybeFilterValue(trieValue2, initialValue, nullValue, filter, context);
|
||||
if (pValue != nullptr) { *pValue = value; }
|
||||
haveValue = true;
|
||||
}
|
||||
while ((++c & UCPTRIE_SMALL_DATA_MASK) != 0) {
|
||||
if (maybeFilterValue(data[++di], initialValue, nullValue,
|
||||
filter, context) != value) {
|
||||
return c - 1;
|
||||
trieValue2 = data[++di];
|
||||
if (trieValue2 != trieValue) {
|
||||
if (filter == nullptr ||
|
||||
maybeFilterValue(trieValue2, initialValue, nullValue,
|
||||
filter, context) != value) {
|
||||
return c - 1;
|
||||
}
|
||||
}
|
||||
trieValue = trieValue2; // may or may not help
|
||||
}
|
||||
}
|
||||
++i;
|
||||
|
|
|
@ -27,7 +27,6 @@ U_NAMESPACE_BEGIN
|
|||
|
||||
// Forward Declarations.
|
||||
class BMPSet;
|
||||
class CharacterProperties;
|
||||
class ParsePosition;
|
||||
class RBBIRuleScanner;
|
||||
class SymbolTable;
|
||||
|
@ -276,14 +275,23 @@ class RuleCharacterIterator;
|
|||
* @stable ICU 2.0
|
||||
*/
|
||||
class U_COMMON_API UnicodeSet U_FINAL : public UnicodeFilter {
|
||||
private:
|
||||
/**
|
||||
* Enough for sets with few ranges.
|
||||
* For example, White_Space has 10 ranges, list length 21.
|
||||
*/
|
||||
static constexpr int32_t INITIAL_CAPACITY = 25;
|
||||
// fFlags constant
|
||||
static constexpr uint8_t kIsBogus = 1; // This set is bogus (i.e. not valid)
|
||||
|
||||
int32_t len; // length of list used; 0 <= len <= capacity
|
||||
int32_t capacity; // capacity of list
|
||||
UChar32* list; // MUST be terminated with HIGH
|
||||
BMPSet *bmpSet; // The set is frozen iff either bmpSet or stringSpan is not NULL.
|
||||
UChar32* buffer; // internal buffer, may be NULL
|
||||
int32_t bufferCapacity; // capacity of buffer
|
||||
int32_t patLen;
|
||||
UChar32* list = stackList; // MUST be terminated with HIGH
|
||||
int32_t capacity = INITIAL_CAPACITY; // capacity of list
|
||||
int32_t len = 1; // length of list used; 1 <= len <= capacity
|
||||
uint8_t fFlags = 0; // Bit flag (see constants above)
|
||||
|
||||
BMPSet *bmpSet = nullptr; // The set is frozen iff either bmpSet or stringSpan is not NULL.
|
||||
UChar32* buffer = nullptr; // internal buffer, may be NULL
|
||||
int32_t bufferCapacity = 0; // capacity of buffer
|
||||
|
||||
/**
|
||||
* The pattern representation of this set. This may not be the
|
||||
|
@ -294,15 +302,19 @@ class U_COMMON_API UnicodeSet U_FINAL : public UnicodeFilter {
|
|||
* indicating that toPattern() must generate a pattern
|
||||
* representation from the inversion list.
|
||||
*/
|
||||
char16_t *pat;
|
||||
UVector* strings; // maintained in sorted order
|
||||
UnicodeSetStringSpan *stringSpan;
|
||||
char16_t *pat = nullptr;
|
||||
int32_t patLen = 0;
|
||||
|
||||
UVector* strings = nullptr; // maintained in sorted order
|
||||
UnicodeSetStringSpan *stringSpan = nullptr;
|
||||
|
||||
/**
|
||||
* Initial list array.
|
||||
* Avoids some heap allocations, and list is never nullptr.
|
||||
* Increases the object size a bit.
|
||||
*/
|
||||
UChar32 stackList[INITIAL_CAPACITY];
|
||||
|
||||
private:
|
||||
enum { // constants
|
||||
kIsBogus = 1 // This set is bogus (i.e. not valid)
|
||||
};
|
||||
uint8_t fFlags; // Bit flag (see constants above)
|
||||
public:
|
||||
/**
|
||||
* Determine if this object contains a valid set.
|
||||
|
@ -1480,8 +1492,6 @@ private:
|
|||
|
||||
friend class USetAccess;
|
||||
|
||||
int32_t getStringCount() const;
|
||||
|
||||
const UnicodeString* getString(int32_t index) const;
|
||||
|
||||
//----------------------------------------------------------------
|
||||
|
@ -1528,13 +1538,18 @@ private:
|
|||
// Implementation: Utility methods
|
||||
//----------------------------------------------------------------
|
||||
|
||||
void ensureCapacity(int32_t newLen, UErrorCode& ec);
|
||||
static int32_t nextCapacity(int32_t minCapacity);
|
||||
|
||||
void ensureBufferCapacity(int32_t newLen, UErrorCode& ec);
|
||||
bool ensureCapacity(int32_t newLen);
|
||||
|
||||
bool ensureBufferCapacity(int32_t newLen);
|
||||
|
||||
void swapBuffers(void);
|
||||
|
||||
UBool allocateStrings(UErrorCode &status);
|
||||
UBool hasStrings() const;
|
||||
int32_t stringsSize() const;
|
||||
UBool stringsContains(const UnicodeString &s) const;
|
||||
|
||||
UnicodeString& _toPattern(UnicodeString& result,
|
||||
UBool escapeUnprintable) const;
|
||||
|
@ -1614,7 +1629,6 @@ private:
|
|||
UnicodeString& rebuiltPat,
|
||||
UErrorCode& ec);
|
||||
|
||||
friend class CharacterProperties;
|
||||
static const UnicodeSet* getInclusions(int32_t src, UErrorCode &status);
|
||||
|
||||
/**
|
||||
|
@ -1646,7 +1660,10 @@ private:
|
|||
/**
|
||||
* Set the new pattern to cache.
|
||||
*/
|
||||
void setPattern(const UnicodeString& newPat);
|
||||
void setPattern(const UnicodeString& newPat) {
|
||||
setPattern(newPat.getBuffer(), newPat.length());
|
||||
}
|
||||
void setPattern(const char16_t *newPat, int32_t newPatLen);
|
||||
/**
|
||||
* Release existing cached pattern.
|
||||
*/
|
||||
|
|
|
@ -14,6 +14,7 @@
|
|||
#include "unicode/parsepos.h"
|
||||
#include "unicode/symtable.h"
|
||||
#include "unicode/uniset.h"
|
||||
#include "unicode/ustring.h"
|
||||
#include "unicode/utf8.h"
|
||||
#include "unicode/utf16.h"
|
||||
#include "ruleiter.h"
|
||||
|
@ -53,11 +54,8 @@
|
|||
// LOW <= all valid values. ZERO for codepoints
|
||||
#define UNICODESET_LOW 0x000000
|
||||
|
||||
// initial storage. Must be >= 0
|
||||
#define START_EXTRA 16
|
||||
|
||||
// extra amount for growth. Must be >= 0
|
||||
#define GROW_EXTRA START_EXTRA
|
||||
/** Max list [0, 1, 2, ..., max code point, HIGH] */
|
||||
constexpr int32_t MAX_LENGTH = UNICODESET_HIGH + 1;
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
|
@ -137,6 +135,18 @@ static int8_t U_CALLCONV compareUnicodeString(UElement t1, UElement t2) {
|
|||
return a.compare(b);
|
||||
}
|
||||
|
||||
UBool UnicodeSet::hasStrings() const {
|
||||
return strings != nullptr && !strings->isEmpty();
|
||||
}
|
||||
|
||||
int32_t UnicodeSet::stringsSize() const {
|
||||
return strings == nullptr ? 0 : strings->size();
|
||||
}
|
||||
|
||||
UBool UnicodeSet::stringsContains(const UnicodeString &s) const {
|
||||
return strings != nullptr && strings->contains((void*) &s);
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------
|
||||
// Constructors &c
|
||||
//----------------------------------------------------------------
|
||||
|
@ -144,24 +154,8 @@ static int8_t U_CALLCONV compareUnicodeString(UElement t1, UElement t2) {
|
|||
/**
|
||||
* Constructs an empty set.
|
||||
*/
|
||||
UnicodeSet::UnicodeSet() :
|
||||
len(1), capacity(1 + START_EXTRA), list(0), bmpSet(0), buffer(0),
|
||||
bufferCapacity(0), patLen(0), pat(NULL), strings(NULL), stringSpan(NULL),
|
||||
fFlags(0)
|
||||
{
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
allocateStrings(status);
|
||||
if (U_FAILURE(status)) {
|
||||
setToBogus(); // If memory allocation failed, set to bogus state.
|
||||
return;
|
||||
}
|
||||
list = (UChar32*) uprv_malloc(sizeof(UChar32) * capacity);
|
||||
if(list!=NULL){
|
||||
list[0] = UNICODESET_HIGH;
|
||||
} else { // If memory allocation failed, set to bogus state.
|
||||
setToBogus();
|
||||
return;
|
||||
}
|
||||
UnicodeSet::UnicodeSet() {
|
||||
list[0] = UNICODESET_HIGH;
|
||||
_dbgct(this);
|
||||
}
|
||||
|
||||
|
@ -172,89 +166,39 @@ UnicodeSet::UnicodeSet() :
|
|||
* @param start first character, inclusive, of range
|
||||
* @param end last character, inclusive, of range
|
||||
*/
|
||||
UnicodeSet::UnicodeSet(UChar32 start, UChar32 end) :
|
||||
len(1), capacity(1 + START_EXTRA), list(0), bmpSet(0), buffer(0),
|
||||
bufferCapacity(0), patLen(0), pat(NULL), strings(NULL), stringSpan(NULL),
|
||||
fFlags(0)
|
||||
{
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
allocateStrings(status);
|
||||
if (U_FAILURE(status)) {
|
||||
setToBogus(); // If memory allocation failed, set to bogus state.
|
||||
return;
|
||||
}
|
||||
list = (UChar32*) uprv_malloc(sizeof(UChar32) * capacity);
|
||||
if(list!=NULL){
|
||||
list[0] = UNICODESET_HIGH;
|
||||
complement(start, end);
|
||||
} else { // If memory allocation failed, set to bogus state.
|
||||
setToBogus();
|
||||
return;
|
||||
}
|
||||
UnicodeSet::UnicodeSet(UChar32 start, UChar32 end) {
|
||||
list[0] = UNICODESET_HIGH;
|
||||
add(start, end);
|
||||
_dbgct(this);
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs a set that is identical to the given UnicodeSet.
|
||||
*/
|
||||
UnicodeSet::UnicodeSet(const UnicodeSet& o) :
|
||||
UnicodeFilter(o),
|
||||
len(0), capacity(o.isFrozen() ? o.len : o.len + GROW_EXTRA), list(0),
|
||||
bmpSet(0),
|
||||
buffer(0), bufferCapacity(0),
|
||||
patLen(0), pat(NULL), strings(NULL), stringSpan(NULL),
|
||||
fFlags(0)
|
||||
{
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
allocateStrings(status);
|
||||
if (U_FAILURE(status)) {
|
||||
setToBogus(); // If memory allocation failed, set to bogus state.
|
||||
return;
|
||||
}
|
||||
list = (UChar32*) uprv_malloc(sizeof(UChar32) * capacity);
|
||||
if(list!=NULL){
|
||||
*this = o;
|
||||
} else { // If memory allocation failed, set to bogus state.
|
||||
setToBogus();
|
||||
return;
|
||||
}
|
||||
UnicodeSet::UnicodeSet(const UnicodeSet& o) : UnicodeFilter(o) {
|
||||
*this = o;
|
||||
_dbgct(this);
|
||||
}
|
||||
|
||||
// Copy-construct as thawed.
|
||||
UnicodeSet::UnicodeSet(const UnicodeSet& o, UBool /* asThawed */) :
|
||||
UnicodeFilter(o),
|
||||
len(0), capacity(o.len + GROW_EXTRA), list(0),
|
||||
bmpSet(0),
|
||||
buffer(0), bufferCapacity(0),
|
||||
patLen(0), pat(NULL), strings(NULL), stringSpan(NULL),
|
||||
fFlags(0)
|
||||
{
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
allocateStrings(status);
|
||||
if (U_FAILURE(status)) {
|
||||
setToBogus(); // If memory allocation failed, set to bogus state.
|
||||
return;
|
||||
}
|
||||
list = (UChar32*) uprv_malloc(sizeof(UChar32) * capacity);
|
||||
if(list!=NULL){
|
||||
UnicodeSet::UnicodeSet(const UnicodeSet& o, UBool /* asThawed */) : UnicodeFilter(o) {
|
||||
if (ensureCapacity(o.len)) {
|
||||
// *this = o except for bmpSet and stringSpan
|
||||
len = o.len;
|
||||
uprv_memcpy(list, o.list, (size_t)len*sizeof(UChar32));
|
||||
if (strings != NULL && o.strings != NULL) {
|
||||
strings->assign(*o.strings, cloneUnicodeString, status);
|
||||
} else { // Invalid strings.
|
||||
setToBogus();
|
||||
return;
|
||||
if (o.hasStrings()) {
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
if (!allocateStrings(status) ||
|
||||
(strings->assign(*o.strings, cloneUnicodeString, status), U_FAILURE(status))) {
|
||||
setToBogus();
|
||||
return;
|
||||
}
|
||||
}
|
||||
if (o.pat) {
|
||||
setPattern(UnicodeString(o.pat, o.patLen));
|
||||
setPattern(o.pat, o.patLen);
|
||||
}
|
||||
} else { // If memory allocation failed, set to bogus state.
|
||||
setToBogus();
|
||||
return;
|
||||
_dbgct(this);
|
||||
}
|
||||
_dbgct(this);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -262,9 +206,11 @@ UnicodeSet::UnicodeSet(const UnicodeSet& o, UBool /* asThawed */) :
|
|||
*/
|
||||
UnicodeSet::~UnicodeSet() {
|
||||
_dbgdt(this); // first!
|
||||
uprv_free(list);
|
||||
if (list != stackList) {
|
||||
uprv_free(list);
|
||||
}
|
||||
delete bmpSet;
|
||||
if (buffer) {
|
||||
if (buffer != stackList) {
|
||||
uprv_free(buffer);
|
||||
}
|
||||
delete strings;
|
||||
|
@ -290,32 +236,30 @@ UnicodeSet& UnicodeSet::copyFrom(const UnicodeSet& o, UBool asThawed) {
|
|||
setToBogus();
|
||||
return *this;
|
||||
}
|
||||
UErrorCode ec = U_ZERO_ERROR;
|
||||
ensureCapacity(o.len, ec);
|
||||
if (U_FAILURE(ec)) {
|
||||
if (!ensureCapacity(o.len)) {
|
||||
// ensureCapacity will mark the UnicodeSet as Bogus if OOM failure happens.
|
||||
return *this;
|
||||
}
|
||||
len = o.len;
|
||||
uprv_memcpy(list, o.list, (size_t)len*sizeof(UChar32));
|
||||
if (o.bmpSet == NULL || asThawed) {
|
||||
bmpSet = NULL;
|
||||
} else {
|
||||
if (o.bmpSet != nullptr && !asThawed) {
|
||||
bmpSet = new BMPSet(*o.bmpSet, list, len);
|
||||
if (bmpSet == NULL) { // Check for memory allocation error.
|
||||
setToBogus();
|
||||
return *this;
|
||||
}
|
||||
}
|
||||
if (strings != NULL && o.strings != NULL) {
|
||||
strings->assign(*o.strings, cloneUnicodeString, ec);
|
||||
} else { // Invalid strings.
|
||||
setToBogus();
|
||||
return *this;
|
||||
if (o.hasStrings()) {
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
if ((strings == nullptr && !allocateStrings(status)) ||
|
||||
(strings->assign(*o.strings, cloneUnicodeString, status), U_FAILURE(status))) {
|
||||
setToBogus();
|
||||
return *this;
|
||||
}
|
||||
} else if (hasStrings()) {
|
||||
strings->removeAllElements();
|
||||
}
|
||||
if (o.stringSpan == NULL || asThawed) {
|
||||
stringSpan = NULL;
|
||||
} else {
|
||||
if (o.stringSpan != nullptr && !asThawed) {
|
||||
stringSpan = new UnicodeSetStringSpan(*o.stringSpan, *strings);
|
||||
if (stringSpan == NULL) { // Check for memory allocation error.
|
||||
setToBogus();
|
||||
|
@ -324,7 +268,7 @@ UnicodeSet& UnicodeSet::copyFrom(const UnicodeSet& o, UBool asThawed) {
|
|||
}
|
||||
releasePattern();
|
||||
if (o.pat) {
|
||||
setPattern(UnicodeString(o.pat, o.patLen));
|
||||
setPattern(o.pat, o.patLen);
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
@ -357,7 +301,8 @@ UBool UnicodeSet::operator==(const UnicodeSet& o) const {
|
|||
for (int32_t i = 0; i < len; ++i) {
|
||||
if (list[i] != o.list[i]) return FALSE;
|
||||
}
|
||||
if (*strings != *o.strings) return FALSE;
|
||||
if (hasStrings() != o.hasStrings()) { return FALSE; }
|
||||
if (hasStrings() && *strings != *o.strings) return FALSE;
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
|
@ -393,7 +338,7 @@ int32_t UnicodeSet::size(void) const {
|
|||
for (int32_t i = 0; i < count; ++i) {
|
||||
n += getRangeEnd(i) - getRangeStart(i) + 1;
|
||||
}
|
||||
return n + strings->size();
|
||||
return n + stringsSize();
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -402,7 +347,7 @@ int32_t UnicodeSet::size(void) const {
|
|||
* @return <tt>true</tt> if this set contains no elements.
|
||||
*/
|
||||
UBool UnicodeSet::isEmpty(void) const {
|
||||
return len == 1 && strings->size() == 0;
|
||||
return len == 1 && !hasStrings();
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -502,7 +447,7 @@ UBool UnicodeSet::contains(const UnicodeString& s) const {
|
|||
if (s.length() == 0) return FALSE;
|
||||
int32_t cp = getSingleCP(s);
|
||||
if (cp < 0) {
|
||||
return strings->contains((void*) &s);
|
||||
return stringsContains(s);
|
||||
} else {
|
||||
return contains((UChar32) cp);
|
||||
}
|
||||
|
@ -524,8 +469,7 @@ UBool UnicodeSet::containsAll(const UnicodeSet& c) const {
|
|||
return FALSE;
|
||||
}
|
||||
}
|
||||
if (!strings->containsAll(*c.strings)) return FALSE;
|
||||
return TRUE;
|
||||
return !c.hasStrings() || (strings != nullptr && strings->containsAll(*c.strings));
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -571,8 +515,7 @@ UBool UnicodeSet::containsNone(const UnicodeSet& c) const {
|
|||
return FALSE;
|
||||
}
|
||||
}
|
||||
if (!strings->containsNone(*c.strings)) return FALSE;
|
||||
return TRUE;
|
||||
return strings == nullptr || !c.hasStrings() || strings->containsNone(*c.strings);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -613,7 +556,7 @@ UBool UnicodeSet::matchesIndexValue(uint8_t v) const {
|
|||
return TRUE;
|
||||
}
|
||||
}
|
||||
if (strings->size() != 0) {
|
||||
if (hasStrings()) {
|
||||
for (i=0; i<strings->size(); ++i) {
|
||||
const UnicodeString& s = *(const UnicodeString*)strings->elementAt(i);
|
||||
//if (s.length() == 0) {
|
||||
|
@ -648,7 +591,7 @@ UMatchDegree UnicodeSet::matches(const Replaceable& text,
|
|||
return U_MISMATCH;
|
||||
}
|
||||
} else {
|
||||
if (strings->size() != 0) { // try strings first
|
||||
if (hasStrings()) { // try strings first
|
||||
|
||||
// might separate forward and backward loops later
|
||||
// for now they are combined
|
||||
|
@ -849,7 +792,39 @@ UnicodeSet& UnicodeSet::set(UChar32 start, UChar32 end) {
|
|||
*/
|
||||
UnicodeSet& UnicodeSet::add(UChar32 start, UChar32 end) {
|
||||
if (pinCodePoint(start) < pinCodePoint(end)) {
|
||||
UChar32 range[3] = { start, end+1, UNICODESET_HIGH };
|
||||
UChar32 limit = end + 1;
|
||||
// Fast path for adding a new range after the last one.
|
||||
// Odd list length: [..., lastStart, lastLimit, HIGH]
|
||||
if ((len & 1) != 0) {
|
||||
// If the list is empty, set lastLimit low enough to not be adjacent to 0.
|
||||
UChar32 lastLimit = len == 1 ? -2 : list[len - 2];
|
||||
if (lastLimit <= start && !isFrozen() && !isBogus()) {
|
||||
if (lastLimit == start) {
|
||||
// Extend the last range.
|
||||
list[len - 2] = limit;
|
||||
if (limit == UNICODESET_HIGH) {
|
||||
--len;
|
||||
}
|
||||
} else {
|
||||
list[len - 1] = start;
|
||||
if (limit < UNICODESET_HIGH) {
|
||||
if (ensureCapacity(len + 2)) {
|
||||
list[len++] = limit;
|
||||
list[len++] = UNICODESET_HIGH;
|
||||
}
|
||||
} else { // limit == UNICODESET_HIGH
|
||||
if (ensureCapacity(len + 1)) {
|
||||
list[len++] = UNICODESET_HIGH;
|
||||
}
|
||||
}
|
||||
}
|
||||
releasePattern();
|
||||
return *this;
|
||||
}
|
||||
}
|
||||
// This is slow. Could be much faster using findCodePoint(start)
|
||||
// and modifying the list, dealing with adjacent & overlapping ranges.
|
||||
UChar32 range[3] = { start, limit, UNICODESET_HIGH };
|
||||
add(range, 2, 0);
|
||||
} else if (start == end) {
|
||||
add(start);
|
||||
|
@ -918,9 +893,7 @@ UnicodeSet& UnicodeSet::add(UChar32 c) {
|
|||
list[i] = c;
|
||||
// if we touched the HIGH mark, then add a new one
|
||||
if (c == (UNICODESET_HIGH - 1)) {
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
ensureCapacity(len+1, status);
|
||||
if (U_FAILURE(status)) {
|
||||
if (!ensureCapacity(len+1)) {
|
||||
// ensureCapacity will mark the object as Bogus if OOM failure happens.
|
||||
return *this;
|
||||
}
|
||||
|
@ -964,21 +937,13 @@ UnicodeSet& UnicodeSet::add(UChar32 c) {
|
|||
// ^
|
||||
// list[i]
|
||||
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
ensureCapacity(len+2, status);
|
||||
if (U_FAILURE(status)) {
|
||||
if (!ensureCapacity(len+2)) {
|
||||
// ensureCapacity will mark the object as Bogus if OOM failure happens.
|
||||
return *this;
|
||||
}
|
||||
|
||||
//for (int32_t k=len-1; k>=i; --k) {
|
||||
// list[k+2] = list[k];
|
||||
//}
|
||||
UChar32* src = list + len;
|
||||
UChar32* dst = src + 2;
|
||||
UChar32* srclimit = list + i;
|
||||
while (src > srclimit) *(--dst) = *(--src);
|
||||
|
||||
UChar32 *p = list + i;
|
||||
uprv_memmove(p + 2, p, (len - i) * sizeof(*p));
|
||||
list[i] = c;
|
||||
list[i+1] = c+1;
|
||||
len += 2;
|
||||
|
@ -1014,7 +979,7 @@ UnicodeSet& UnicodeSet::add(const UnicodeString& s) {
|
|||
if (s.length() == 0 || isFrozen() || isBogus()) return *this;
|
||||
int32_t cp = getSingleCP(s);
|
||||
if (cp < 0) {
|
||||
if (!strings->contains((void*) &s)) {
|
||||
if (!stringsContains(s)) {
|
||||
_add(s);
|
||||
releasePattern();
|
||||
}
|
||||
|
@ -1033,12 +998,16 @@ void UnicodeSet::_add(const UnicodeString& s) {
|
|||
if (isFrozen() || isBogus()) {
|
||||
return;
|
||||
}
|
||||
UErrorCode ec = U_ZERO_ERROR;
|
||||
if (strings == nullptr && !allocateStrings(ec)) {
|
||||
setToBogus();
|
||||
return;
|
||||
}
|
||||
UnicodeString* t = new UnicodeString(s);
|
||||
if (t == NULL) { // Check for memory allocation error.
|
||||
setToBogus();
|
||||
return;
|
||||
}
|
||||
UErrorCode ec = U_ZERO_ERROR;
|
||||
strings->sortedInsert(t, compareUnicodeString, ec);
|
||||
if (U_FAILURE(ec)) {
|
||||
setToBogus();
|
||||
|
@ -1121,7 +1090,10 @@ UnicodeSet& UnicodeSet::removeAll(const UnicodeString& s) {
|
|||
}
|
||||
|
||||
UnicodeSet& UnicodeSet::removeAllStrings() {
|
||||
strings->removeAllElements();
|
||||
if (!isFrozen() && hasStrings()) {
|
||||
strings->removeAllElements();
|
||||
releasePattern();
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
|
@ -1217,8 +1189,9 @@ UnicodeSet& UnicodeSet::remove(const UnicodeString& s) {
|
|||
if (s.length() == 0 || isFrozen() || isBogus()) return *this;
|
||||
int32_t cp = getSingleCP(s);
|
||||
if (cp < 0) {
|
||||
strings->removeElement((void*) &s);
|
||||
releasePattern();
|
||||
if (strings != nullptr && strings->removeElement((void*) &s)) {
|
||||
releasePattern();
|
||||
}
|
||||
} else {
|
||||
remove((UChar32)cp, (UChar32)cp);
|
||||
}
|
||||
|
@ -1260,24 +1233,17 @@ UnicodeSet& UnicodeSet::complement(void) {
|
|||
if (isFrozen() || isBogus()) {
|
||||
return *this;
|
||||
}
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
if (list[0] == UNICODESET_LOW) {
|
||||
ensureBufferCapacity(len-1, status);
|
||||
if (U_FAILURE(status)) {
|
||||
return *this;
|
||||
}
|
||||
uprv_memcpy(buffer, list + 1, (size_t)(len-1)*sizeof(UChar32));
|
||||
uprv_memmove(list, list + 1, (size_t)(len-1)*sizeof(UChar32));
|
||||
--len;
|
||||
} else {
|
||||
ensureBufferCapacity(len+1, status);
|
||||
if (U_FAILURE(status)) {
|
||||
if (!ensureCapacity(len+1)) {
|
||||
return *this;
|
||||
}
|
||||
uprv_memcpy(buffer + 1, list, (size_t)len*sizeof(UChar32));
|
||||
buffer[0] = UNICODESET_LOW;
|
||||
uprv_memmove(list + 1, list, (size_t)len*sizeof(UChar32));
|
||||
list[0] = UNICODESET_LOW;
|
||||
++len;
|
||||
}
|
||||
swapBuffers();
|
||||
releasePattern();
|
||||
return *this;
|
||||
}
|
||||
|
@ -1294,7 +1260,7 @@ UnicodeSet& UnicodeSet::complement(const UnicodeString& s) {
|
|||
if (s.length() == 0 || isFrozen() || isBogus()) return *this;
|
||||
int32_t cp = getSingleCP(s);
|
||||
if (cp < 0) {
|
||||
if (strings->contains((void*) &s)) {
|
||||
if (stringsContains(s)) {
|
||||
strings->removeElement((void*) &s);
|
||||
} else {
|
||||
_add(s);
|
||||
|
@ -1325,7 +1291,7 @@ UnicodeSet& UnicodeSet::addAll(const UnicodeSet& c) {
|
|||
if ( c.strings!=NULL ) {
|
||||
for (int32_t i=0; i<c.strings->size(); ++i) {
|
||||
const UnicodeString* s = (const UnicodeString*)c.strings->elementAt(i);
|
||||
if (!strings->contains((void*) s)) {
|
||||
if (!stringsContains(*s)) {
|
||||
_add(*s);
|
||||
}
|
||||
}
|
||||
|
@ -1347,7 +1313,13 @@ UnicodeSet& UnicodeSet::retainAll(const UnicodeSet& c) {
|
|||
return *this;
|
||||
}
|
||||
retain(c.list, c.len, 0);
|
||||
strings->retainAll(*c.strings);
|
||||
if (hasStrings()) {
|
||||
if (!c.hasStrings()) {
|
||||
strings->removeAllElements();
|
||||
} else {
|
||||
strings->retainAll(*c.strings);
|
||||
}
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
|
@ -1365,7 +1337,9 @@ UnicodeSet& UnicodeSet::removeAll(const UnicodeSet& c) {
|
|||
return *this;
|
||||
}
|
||||
retain(c.list, c.len, 2);
|
||||
strings->removeAll(*c.strings);
|
||||
if (hasStrings() && c.hasStrings()) {
|
||||
strings->removeAll(*c.strings);
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
|
@ -1383,10 +1357,12 @@ UnicodeSet& UnicodeSet::complementAll(const UnicodeSet& c) {
|
|||
}
|
||||
exclusiveOr(c.list, c.len, 0);
|
||||
|
||||
for (int32_t i=0; i<c.strings->size(); ++i) {
|
||||
void* e = c.strings->elementAt(i);
|
||||
if (!strings->removeElement(e)) {
|
||||
_add(*(const UnicodeString*)e);
|
||||
if (c.strings != nullptr) {
|
||||
for (int32_t i=0; i<c.strings->size(); ++i) {
|
||||
void* e = c.strings->elementAt(i);
|
||||
if (strings == nullptr || !strings->removeElement(e)) {
|
||||
_add(*(const UnicodeString*)e);
|
||||
}
|
||||
}
|
||||
}
|
||||
return *this;
|
||||
|
@ -1400,18 +1376,14 @@ UnicodeSet& UnicodeSet::clear(void) {
|
|||
if (isFrozen()) {
|
||||
return *this;
|
||||
}
|
||||
if (list != NULL) {
|
||||
list[0] = UNICODESET_HIGH;
|
||||
}
|
||||
list[0] = UNICODESET_HIGH;
|
||||
len = 1;
|
||||
releasePattern();
|
||||
if (strings != NULL) {
|
||||
strings->removeAllElements();
|
||||
}
|
||||
if (list != NULL && strings != NULL) {
|
||||
// Remove bogus
|
||||
fFlags = 0;
|
||||
}
|
||||
// Remove bogus
|
||||
fFlags = 0;
|
||||
return *this;
|
||||
}
|
||||
|
||||
|
@ -1445,10 +1417,6 @@ UChar32 UnicodeSet::getRangeEnd(int32_t index) const {
|
|||
return list[index*2 + 1] - 1;
|
||||
}
|
||||
|
||||
int32_t UnicodeSet::getStringCount() const {
|
||||
return strings->size();
|
||||
}
|
||||
|
||||
const UnicodeString* UnicodeSet::getString(int32_t index) const {
|
||||
return (const UnicodeString*) strings->elementAt(index);
|
||||
}
|
||||
|
@ -1462,22 +1430,32 @@ UnicodeSet& UnicodeSet::compact() {
|
|||
return *this;
|
||||
}
|
||||
// Delete buffer first to defragment memory less.
|
||||
if (buffer != NULL) {
|
||||
if (buffer != stackList) {
|
||||
uprv_free(buffer);
|
||||
buffer = NULL;
|
||||
bufferCapacity = 0;
|
||||
}
|
||||
if (len < capacity) {
|
||||
// Make the capacity equal to len or 1.
|
||||
// We don't want to realloc of 0 size.
|
||||
int32_t newCapacity = len + (len == 0);
|
||||
UChar32* temp = (UChar32*) uprv_realloc(list, sizeof(UChar32) * newCapacity);
|
||||
if (list == stackList) {
|
||||
// pass
|
||||
} else if (len <= INITIAL_CAPACITY) {
|
||||
uprv_memcpy(stackList, list, len * sizeof(UChar32));
|
||||
uprv_free(list);
|
||||
list = stackList;
|
||||
capacity = INITIAL_CAPACITY;
|
||||
} else if ((len + 7) < capacity) {
|
||||
// If we have more than a little unused capacity, shrink it to len.
|
||||
UChar32* temp = (UChar32*) uprv_realloc(list, sizeof(UChar32) * len);
|
||||
if (temp) {
|
||||
list = temp;
|
||||
capacity = newCapacity;
|
||||
capacity = len;
|
||||
}
|
||||
// else what the heck happened?! We allocated less memory!
|
||||
// Oh well. We'll keep our original array.
|
||||
}
|
||||
if (strings != nullptr && strings->isEmpty()) {
|
||||
delete strings;
|
||||
strings = nullptr;
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
|
@ -1488,10 +1466,8 @@ UnicodeSet& UnicodeSet::compact() {
|
|||
/**
|
||||
* Deserialize constructor.
|
||||
*/
|
||||
UnicodeSet::UnicodeSet(const uint16_t data[], int32_t dataLen, ESerialization serialization, UErrorCode &ec)
|
||||
: len(1), capacity(1+START_EXTRA), list(0), bmpSet(0), buffer(0),
|
||||
bufferCapacity(0), patLen(0), pat(NULL), strings(NULL), stringSpan(NULL),
|
||||
fFlags(0) {
|
||||
UnicodeSet::UnicodeSet(const uint16_t data[], int32_t dataLen, ESerialization serialization,
|
||||
UErrorCode &ec) {
|
||||
|
||||
if(U_FAILURE(ec)) {
|
||||
setToBogus();
|
||||
|
@ -1506,24 +1482,15 @@ UnicodeSet::UnicodeSet(const uint16_t data[], int32_t dataLen, ESerialization se
|
|||
return;
|
||||
}
|
||||
|
||||
allocateStrings(ec);
|
||||
if (U_FAILURE(ec)) {
|
||||
setToBogus();
|
||||
return;
|
||||
}
|
||||
|
||||
// bmp?
|
||||
int32_t headerSize = ((data[0]&0x8000)) ?2:1;
|
||||
int32_t bmpLength = (headerSize==1)?data[0]:data[1];
|
||||
|
||||
len = (((data[0]&0x7FFF)-bmpLength)/2)+bmpLength;
|
||||
int32_t newLength = (((data[0]&0x7FFF)-bmpLength)/2)+bmpLength;
|
||||
#ifdef DEBUG_SERIALIZE
|
||||
printf("dataLen %d headerSize %d bmpLen %d len %d. data[0]=%X/%X/%X/%X\n", dataLen,headerSize,bmpLength,len, data[0],data[1],data[2],data[3]);
|
||||
printf("dataLen %d headerSize %d bmpLen %d len %d. data[0]=%X/%X/%X/%X\n", dataLen,headerSize,bmpLength,newLength, data[0],data[1],data[2],data[3]);
|
||||
#endif
|
||||
capacity = len+1;
|
||||
list = (UChar32*) uprv_malloc(sizeof(UChar32) * capacity);
|
||||
if(!list || U_FAILURE(ec)) {
|
||||
setToBogus();
|
||||
if(!ensureCapacity(newLength + 1)) { // +1 for HIGH
|
||||
return;
|
||||
}
|
||||
// copy bmp
|
||||
|
@ -1535,15 +1502,18 @@ UnicodeSet::UnicodeSet(const uint16_t data[], int32_t dataLen, ESerialization se
|
|||
#endif
|
||||
}
|
||||
// copy smp
|
||||
for(i=bmpLength;i<len;i++) {
|
||||
for(i=bmpLength;i<newLength;i++) {
|
||||
list[i] = ((UChar32)data[headerSize+bmpLength+(i-bmpLength)*2+0] << 16) +
|
||||
((UChar32)data[headerSize+bmpLength+(i-bmpLength)*2+1]);
|
||||
#ifdef DEBUG_SERIALIZE
|
||||
printf("<<32@%d+[%d] %lX\n", headerSize+bmpLength+i, i, list[i]);
|
||||
#endif
|
||||
}
|
||||
// terminator
|
||||
list[len++]=UNICODESET_HIGH;
|
||||
U_ASSERT(i == newLength);
|
||||
if (i == 0 || list[i - 1] != UNICODESET_HIGH) {
|
||||
list[i++] = UNICODESET_HIGH;
|
||||
}
|
||||
len = i;
|
||||
}
|
||||
|
||||
|
||||
|
@ -1664,33 +1634,65 @@ UBool UnicodeSet::allocateStrings(UErrorCode &status) {
|
|||
return TRUE;
|
||||
}
|
||||
|
||||
void UnicodeSet::ensureCapacity(int32_t newLen, UErrorCode& ec) {
|
||||
if (newLen <= capacity) {
|
||||
return;
|
||||
int32_t UnicodeSet::nextCapacity(int32_t minCapacity) {
|
||||
// Grow exponentially to reduce the frequency of allocations.
|
||||
if (minCapacity < INITIAL_CAPACITY) {
|
||||
return minCapacity + INITIAL_CAPACITY;
|
||||
} else if (minCapacity <= 2500) {
|
||||
return 5 * minCapacity;
|
||||
} else {
|
||||
int32_t newCapacity = 2 * minCapacity;
|
||||
if (newCapacity > MAX_LENGTH) {
|
||||
newCapacity = MAX_LENGTH;
|
||||
}
|
||||
return newCapacity;
|
||||
}
|
||||
UChar32* temp = (UChar32*) uprv_realloc(list, sizeof(UChar32) * (newLen + GROW_EXTRA));
|
||||
if (temp == NULL) {
|
||||
ec = U_MEMORY_ALLOCATION_ERROR;
|
||||
setToBogus(); // set the object to bogus state if an OOM failure occurred.
|
||||
return;
|
||||
}
|
||||
list = temp;
|
||||
capacity = newLen + GROW_EXTRA;
|
||||
// else we keep the original contents on the memory failure.
|
||||
}
|
||||
|
||||
void UnicodeSet::ensureBufferCapacity(int32_t newLen, UErrorCode& ec) {
|
||||
if (buffer != NULL && newLen <= bufferCapacity)
|
||||
return;
|
||||
UChar32* temp = (UChar32*) uprv_realloc(buffer, sizeof(UChar32) * (newLen + GROW_EXTRA));
|
||||
bool UnicodeSet::ensureCapacity(int32_t newLen) {
|
||||
if (newLen > MAX_LENGTH) {
|
||||
newLen = MAX_LENGTH;
|
||||
}
|
||||
if (newLen <= capacity) {
|
||||
return true;
|
||||
}
|
||||
int32_t newCapacity = nextCapacity(newLen);
|
||||
UChar32* temp = (UChar32*) uprv_malloc(newCapacity * sizeof(UChar32));
|
||||
if (temp == NULL) {
|
||||
setToBogus(); // set the object to bogus state if an OOM failure occurred.
|
||||
return false;
|
||||
}
|
||||
// Copy only the actual contents.
|
||||
uprv_memcpy(temp, list, len * sizeof(UChar32));
|
||||
if (list != stackList) {
|
||||
uprv_free(list);
|
||||
}
|
||||
list = temp;
|
||||
capacity = newCapacity;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool UnicodeSet::ensureBufferCapacity(int32_t newLen) {
|
||||
if (newLen > MAX_LENGTH) {
|
||||
newLen = MAX_LENGTH;
|
||||
}
|
||||
if (newLen <= bufferCapacity) {
|
||||
return true;
|
||||
}
|
||||
int32_t newCapacity = nextCapacity(newLen);
|
||||
UChar32* temp = (UChar32*) uprv_malloc(newCapacity * sizeof(UChar32));
|
||||
if (temp == NULL) {
|
||||
ec = U_MEMORY_ALLOCATION_ERROR;
|
||||
setToBogus();
|
||||
return;
|
||||
return false;
|
||||
}
|
||||
// The buffer has no contents to be copied.
|
||||
// It is always filled from scratch after this call.
|
||||
if (buffer != stackList) {
|
||||
uprv_free(buffer);
|
||||
}
|
||||
buffer = temp;
|
||||
bufferCapacity = newLen + GROW_EXTRA;
|
||||
// else we keep the original contents on the memory failure.
|
||||
bufferCapacity = newCapacity;
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -1727,9 +1729,7 @@ void UnicodeSet::exclusiveOr(const UChar32* other, int32_t otherLen, int8_t pola
|
|||
if (isFrozen() || isBogus()) {
|
||||
return;
|
||||
}
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
ensureBufferCapacity(len + otherLen, status);
|
||||
if (U_FAILURE(status)) {
|
||||
if (!ensureBufferCapacity(len + otherLen)) {
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -1777,9 +1777,7 @@ void UnicodeSet::add(const UChar32* other, int32_t otherLen, int8_t polarity) {
|
|||
if (isFrozen() || isBogus() || other==NULL) {
|
||||
return;
|
||||
}
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
ensureBufferCapacity(len + otherLen, status);
|
||||
if (U_FAILURE(status)) {
|
||||
if (!ensureBufferCapacity(len + otherLen)) {
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -1890,9 +1888,7 @@ void UnicodeSet::retain(const UChar32* other, int32_t otherLen, int8_t polarity)
|
|||
if (isFrozen() || isBogus()) {
|
||||
return;
|
||||
}
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
ensureBufferCapacity(len + otherLen, status);
|
||||
if (U_FAILURE(status)) {
|
||||
if (!ensureBufferCapacity(len + otherLen)) {
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -2138,12 +2134,14 @@ UnicodeString& UnicodeSet::_generatePattern(UnicodeString& result,
|
|||
}
|
||||
}
|
||||
|
||||
for (int32_t i = 0; i<strings->size(); ++i) {
|
||||
result.append(OPEN_BRACE);
|
||||
_appendToPat(result,
|
||||
*(const UnicodeString*) strings->elementAt(i),
|
||||
escapeUnprintable);
|
||||
result.append(CLOSE_BRACE);
|
||||
if (strings != nullptr) {
|
||||
for (int32_t i = 0; i<strings->size(); ++i) {
|
||||
result.append(OPEN_BRACE);
|
||||
_appendToPat(result,
|
||||
*(const UnicodeString*) strings->elementAt(i),
|
||||
escapeUnprintable);
|
||||
result.append(CLOSE_BRACE);
|
||||
}
|
||||
}
|
||||
return result.append(SET_CLOSE);
|
||||
}
|
||||
|
@ -2162,13 +2160,12 @@ void UnicodeSet::releasePattern() {
|
|||
/**
|
||||
* Set the new pattern to cache.
|
||||
*/
|
||||
void UnicodeSet::setPattern(const UnicodeString& newPat) {
|
||||
void UnicodeSet::setPattern(const char16_t *newPat, int32_t newPatLen) {
|
||||
releasePattern();
|
||||
int32_t newPatLen = newPat.length();
|
||||
pat = (UChar *)uprv_malloc((newPatLen + 1) * sizeof(UChar));
|
||||
if (pat) {
|
||||
patLen = newPatLen;
|
||||
newPat.extractBetween(0, patLen, pat);
|
||||
u_memcpy(pat, newPat, patLen);
|
||||
pat[patLen] = 0;
|
||||
}
|
||||
// else we don't care if malloc failed. This was just a nice cache.
|
||||
|
@ -2177,30 +2174,15 @@ void UnicodeSet::setPattern(const UnicodeString& newPat) {
|
|||
|
||||
UnicodeFunctor *UnicodeSet::freeze() {
|
||||
if(!isFrozen() && !isBogus()) {
|
||||
// Do most of what compact() does before freezing because
|
||||
// compact() will not work when the set is frozen.
|
||||
// Small modification: Don't shrink if the savings would be tiny (<=GROW_EXTRA).
|
||||
|
||||
// Delete buffer first to defragment memory less.
|
||||
if (buffer != NULL) {
|
||||
uprv_free(buffer);
|
||||
buffer = NULL;
|
||||
}
|
||||
if (capacity > (len + GROW_EXTRA)) {
|
||||
// Make the capacity equal to len or 1.
|
||||
// We don't want to realloc of 0 size.
|
||||
capacity = len + (len == 0);
|
||||
list = (UChar32*) uprv_realloc(list, sizeof(UChar32) * capacity);
|
||||
if (list == NULL) { // Check for memory allocation error.
|
||||
setToBogus();
|
||||
return this;
|
||||
}
|
||||
}
|
||||
compact();
|
||||
|
||||
// Optimize contains() and span() and similar functions.
|
||||
if (!strings->isEmpty()) {
|
||||
if (hasStrings()) {
|
||||
stringSpan = new UnicodeSetStringSpan(*this, *strings, UnicodeSetStringSpan::ALL);
|
||||
if (stringSpan != NULL && !stringSpan->needsStringSpanUTF16()) {
|
||||
if (stringSpan == nullptr) {
|
||||
setToBogus();
|
||||
return this;
|
||||
} else if (!stringSpan->needsStringSpanUTF16()) {
|
||||
// All strings are irrelevant for span() etc. because
|
||||
// all of each string's code points are contained in this set.
|
||||
// Do not check needsStringSpanUTF8() because UTF-8 has at most as
|
||||
|
@ -2233,7 +2215,7 @@ int32_t UnicodeSet::span(const UChar *s, int32_t length, USetSpanCondition spanC
|
|||
}
|
||||
if(stringSpan!=NULL) {
|
||||
return stringSpan->span(s, length, spanCondition);
|
||||
} else if(!strings->isEmpty()) {
|
||||
} else if(hasStrings()) {
|
||||
uint32_t which= spanCondition==USET_SPAN_NOT_CONTAINED ?
|
||||
UnicodeSetStringSpan::FWD_UTF16_NOT_CONTAINED :
|
||||
UnicodeSetStringSpan::FWD_UTF16_CONTAINED;
|
||||
|
@ -2270,7 +2252,7 @@ int32_t UnicodeSet::spanBack(const UChar *s, int32_t length, USetSpanCondition s
|
|||
}
|
||||
if(stringSpan!=NULL) {
|
||||
return stringSpan->spanBack(s, length, spanCondition);
|
||||
} else if(!strings->isEmpty()) {
|
||||
} else if(hasStrings()) {
|
||||
uint32_t which= spanCondition==USET_SPAN_NOT_CONTAINED ?
|
||||
UnicodeSetStringSpan::BACK_UTF16_NOT_CONTAINED :
|
||||
UnicodeSetStringSpan::BACK_UTF16_CONTAINED;
|
||||
|
@ -2308,7 +2290,7 @@ int32_t UnicodeSet::spanUTF8(const char *s, int32_t length, USetSpanCondition sp
|
|||
}
|
||||
if(stringSpan!=NULL) {
|
||||
return stringSpan->spanUTF8((const uint8_t *)s, length, spanCondition);
|
||||
} else if(!strings->isEmpty()) {
|
||||
} else if(hasStrings()) {
|
||||
uint32_t which= spanCondition==USET_SPAN_NOT_CONTAINED ?
|
||||
UnicodeSetStringSpan::FWD_UTF8_NOT_CONTAINED :
|
||||
UnicodeSetStringSpan::FWD_UTF8_CONTAINED;
|
||||
|
@ -2346,7 +2328,7 @@ int32_t UnicodeSet::spanBackUTF8(const char *s, int32_t length, USetSpanConditio
|
|||
}
|
||||
if(stringSpan!=NULL) {
|
||||
return stringSpan->spanBackUTF8((const uint8_t *)s, length, spanCondition);
|
||||
} else if(!strings->isEmpty()) {
|
||||
} else if(hasStrings()) {
|
||||
uint32_t which= spanCondition==USET_SPAN_NOT_CONTAINED ?
|
||||
UnicodeSetStringSpan::BACK_UTF8_NOT_CONTAINED :
|
||||
UnicodeSetStringSpan::BACK_UTF8_CONTAINED;
|
||||
|
|
|
@ -31,10 +31,6 @@
|
|||
#include "util.h"
|
||||
#include "uvector.h"
|
||||
|
||||
// initial storage. Must be >= 0
|
||||
// *** same as in uniset.cpp ! ***
|
||||
#define START_EXTRA 16
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
// TODO memory debugging provided inside uniset.cpp
|
||||
|
@ -49,42 +45,16 @@ U_NAMESPACE_BEGIN
|
|||
UnicodeSet::UnicodeSet(const UnicodeString& pattern,
|
||||
uint32_t options,
|
||||
const SymbolTable* symbols,
|
||||
UErrorCode& status) :
|
||||
len(0), capacity(START_EXTRA), list(0), bmpSet(0), buffer(0),
|
||||
bufferCapacity(0), patLen(0), pat(NULL), strings(NULL), stringSpan(NULL),
|
||||
fFlags(0)
|
||||
{
|
||||
if(U_SUCCESS(status)){
|
||||
list = (UChar32*) uprv_malloc(sizeof(UChar32) * capacity);
|
||||
/* test for NULL */
|
||||
if(list == NULL) {
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
}else{
|
||||
allocateStrings(status);
|
||||
applyPattern(pattern, options, symbols, status);
|
||||
}
|
||||
}
|
||||
UErrorCode& status) {
|
||||
applyPattern(pattern, options, symbols, status);
|
||||
_dbgct(this);
|
||||
}
|
||||
|
||||
UnicodeSet::UnicodeSet(const UnicodeString& pattern, ParsePosition& pos,
|
||||
uint32_t options,
|
||||
const SymbolTable* symbols,
|
||||
UErrorCode& status) :
|
||||
len(0), capacity(START_EXTRA), list(0), bmpSet(0), buffer(0),
|
||||
bufferCapacity(0), patLen(0), pat(NULL), strings(NULL), stringSpan(NULL),
|
||||
fFlags(0)
|
||||
{
|
||||
if(U_SUCCESS(status)){
|
||||
list = (UChar32*) uprv_malloc(sizeof(UChar32) * capacity);
|
||||
/* test for NULL */
|
||||
if(list == NULL) {
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
}else{
|
||||
allocateStrings(status);
|
||||
applyPattern(pattern, pos, options, symbols, status);
|
||||
}
|
||||
}
|
||||
UErrorCode& status) {
|
||||
applyPattern(pattern, pos, options, symbols, status);
|
||||
_dbgct(this);
|
||||
}
|
||||
|
||||
|
@ -199,7 +169,7 @@ UnicodeSet& UnicodeSet::closeOver(int32_t attribute) {
|
|||
// start with input set to guarantee inclusion
|
||||
// USET_CASE: remove strings because the strings will actually be reduced (folded);
|
||||
// therefore, start with no strings and add only those needed
|
||||
if (attribute & USET_CASE_INSENSITIVE) {
|
||||
if ((attribute & USET_CASE_INSENSITIVE) && foldSet.hasStrings()) {
|
||||
foldSet.strings->removeAllElements();
|
||||
}
|
||||
|
||||
|
@ -234,7 +204,7 @@ UnicodeSet& UnicodeSet::closeOver(int32_t attribute) {
|
|||
}
|
||||
}
|
||||
}
|
||||
if (strings != NULL && strings->size() > 0) {
|
||||
if (hasStrings()) {
|
||||
if (attribute & USET_CASE_INSENSITIVE) {
|
||||
for (int32_t j=0; j<strings->size(); ++j) {
|
||||
str = *(const UnicodeString *) strings->elementAt(j);
|
||||
|
|
|
@ -47,10 +47,6 @@
|
|||
|
||||
U_NAMESPACE_USE
|
||||
|
||||
// initial storage. Must be >= 0
|
||||
// *** same as in uniset.cpp ! ***
|
||||
#define START_EXTRA 16
|
||||
|
||||
// Define UChar constants using hex for EBCDIC compatibility
|
||||
// Used #define to reduce private static exports and memory access time.
|
||||
#define SET_OPEN ((UChar)0x005B) /*[*/
|
||||
|
@ -185,21 +181,8 @@ isPOSIXClose(const UnicodeString &pattern, int32_t pos) {
|
|||
* @param pattern a string specifying what characters are in the set
|
||||
*/
|
||||
UnicodeSet::UnicodeSet(const UnicodeString& pattern,
|
||||
UErrorCode& status) :
|
||||
len(0), capacity(START_EXTRA), list(0), bmpSet(0), buffer(0),
|
||||
bufferCapacity(0), patLen(0), pat(NULL), strings(NULL), stringSpan(NULL),
|
||||
fFlags(0)
|
||||
{
|
||||
if(U_SUCCESS(status)){
|
||||
list = (UChar32*) uprv_malloc(sizeof(UChar32) * capacity);
|
||||
/* test for NULL */
|
||||
if(list == NULL) {
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
}else{
|
||||
allocateStrings(status);
|
||||
applyPattern(pattern, status);
|
||||
}
|
||||
}
|
||||
UErrorCode& status) {
|
||||
applyPattern(pattern, status);
|
||||
_dbgct(this);
|
||||
}
|
||||
|
||||
|
@ -713,6 +696,11 @@ static UBool numericValueFilter(UChar32 ch, void* context) {
|
|||
return u_getNumericValue(ch) == *(double*)context;
|
||||
}
|
||||
|
||||
static UBool generalCategoryMaskFilter(UChar32 ch, void* context) {
|
||||
int32_t value = *(int32_t*)context;
|
||||
return (U_GET_GC_MASK((UChar32) ch) & value) != 0;
|
||||
}
|
||||
|
||||
static UBool versionFilter(UChar32 ch, void* context) {
|
||||
static const UVersionInfo none = { 0, 0, 0, 0 };
|
||||
UVersionInfo v;
|
||||
|
@ -721,6 +709,16 @@ static UBool versionFilter(UChar32 ch, void* context) {
|
|||
return uprv_memcmp(&v, &none, sizeof(v)) > 0 && uprv_memcmp(&v, version, sizeof(v)) <= 0;
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
UProperty prop;
|
||||
int32_t value;
|
||||
} IntPropertyContext;
|
||||
|
||||
static UBool intPropertyFilter(UChar32 ch, void* context) {
|
||||
IntPropertyContext* c = (IntPropertyContext*)context;
|
||||
return u_getIntPropertyValue((UChar32) ch, c->prop) == c->value;
|
||||
}
|
||||
|
||||
static UBool scriptExtensionsFilter(UChar32 ch, void* context) {
|
||||
return uscript_hasScript(ch, *(UScriptCode*)context);
|
||||
}
|
||||
|
@ -781,43 +779,6 @@ void UnicodeSet::applyFilter(UnicodeSet::Filter filter,
|
|||
|
||||
namespace {
|
||||
|
||||
/** Maps map values to 1 if the mask contains their value'th bit, all others to 0. */
|
||||
uint32_t U_CALLCONV generalCategoryMaskFilter(const void *context, uint32_t value) {
|
||||
uint32_t mask = *(const uint32_t *)context;
|
||||
value = U_MASK(value) & mask;
|
||||
if (value != 0) { value = 1; }
|
||||
return value;
|
||||
}
|
||||
|
||||
/** Maps one map value to 1, all others to 0. */
|
||||
uint32_t U_CALLCONV intValueFilter(const void *context, uint32_t value) {
|
||||
uint32_t v = *(const uint32_t *)context;
|
||||
return value == v ? 1 : 0;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
void UnicodeSet::applyIntPropertyValue(const UCPMap *map,
|
||||
UCPMapValueFilter *filter, const void *context,
|
||||
UErrorCode &errorCode) {
|
||||
if (U_FAILURE(errorCode)) { return; }
|
||||
clear();
|
||||
UChar32 start = 0, end;
|
||||
uint32_t value;
|
||||
while ((end = ucpmap_getRange(map, start, UCPMAP_RANGE_NORMAL, 0,
|
||||
filter, context, &value)) >= 0) {
|
||||
if (value != 0) {
|
||||
add(start, end);
|
||||
}
|
||||
start = end + 1;
|
||||
}
|
||||
if (isBogus()) {
|
||||
errorCode = U_MEMORY_ALLOCATION_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
static UBool mungeCharName(char* dst, const char* src, int32_t dstCapacity) {
|
||||
/* Note: we use ' ' in compiler code page */
|
||||
int32_t j = 0;
|
||||
|
@ -845,11 +806,10 @@ static UBool mungeCharName(char* dst, const char* src, int32_t dstCapacity) {
|
|||
|
||||
UnicodeSet&
|
||||
UnicodeSet::applyIntPropertyValue(UProperty prop, int32_t value, UErrorCode& ec) {
|
||||
if (U_FAILURE(ec)) { return *this; }
|
||||
// All of the following check isFrozen() before modifying this set.
|
||||
if (U_FAILURE(ec) || isFrozen()) { return *this; }
|
||||
if (prop == UCHAR_GENERAL_CATEGORY_MASK) {
|
||||
const UCPMap *map = u_getIntPropertyMap(UCHAR_GENERAL_CATEGORY, &ec);
|
||||
applyIntPropertyValue(map, generalCategoryMaskFilter, &value, ec);
|
||||
const UnicodeSet* inclusions = CharacterProperties::getInclusionsForProperty(prop, ec);
|
||||
applyFilter(generalCategoryMaskFilter, &value, inclusions, ec);
|
||||
} else if (prop == UCHAR_SCRIPT_EXTENSIONS) {
|
||||
const UnicodeSet* inclusions = CharacterProperties::getInclusionsForProperty(prop, ec);
|
||||
UScriptCode script = (UScriptCode)value;
|
||||
|
@ -866,14 +826,11 @@ UnicodeSet::applyIntPropertyValue(UProperty prop, int32_t value, UErrorCode& ec)
|
|||
clear();
|
||||
}
|
||||
} else if (UCHAR_INT_START <= prop && prop < UCHAR_INT_LIMIT) {
|
||||
const UCPMap *map = u_getIntPropertyMap(prop, &ec);
|
||||
applyIntPropertyValue(map, intValueFilter, &value, ec);
|
||||
const UnicodeSet* inclusions = CharacterProperties::getInclusionsForProperty(prop, ec);
|
||||
IntPropertyContext c = {prop, value};
|
||||
applyFilter(intPropertyFilter, &c, inclusions, ec);
|
||||
} else {
|
||||
// This code used to always call getInclusions(property source)
|
||||
// which sets an error for an unsupported property.
|
||||
ec = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
// Otherwise we would just clear() this set because
|
||||
// getIntPropertyValue(c, prop) returns 0 for all code points.
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
|
|
@ -462,7 +462,6 @@ class UnicodeSet;
|
|||
class CharacterProperties {
|
||||
public:
|
||||
CharacterProperties() = delete;
|
||||
static void U_CALLCONV initInclusion(UPropertySource src, UErrorCode &errorCode);
|
||||
static const UnicodeSet *getInclusionsForProperty(UProperty prop, UErrorCode &errorCode);
|
||||
};
|
||||
|
||||
|
|
|
@ -249,7 +249,7 @@ class USetAccess /* not : public UObject because all methods are static */ {
|
|||
public:
|
||||
/* Try to have the compiler inline these*/
|
||||
inline static int32_t getStringCount(const UnicodeSet& set) {
|
||||
return set.getStringCount();
|
||||
return set.stringsSize();
|
||||
}
|
||||
inline static const UnicodeString* getString(const UnicodeSet& set,
|
||||
int32_t i) {
|
||||
|
|
|
@ -116,7 +116,7 @@ void UnicodeSetIterator::reset() {
|
|||
stringCount = 0;
|
||||
} else {
|
||||
endRange = set->getRangeCount() - 1;
|
||||
stringCount = set->strings->size();
|
||||
stringCount = set->stringsSize();
|
||||
}
|
||||
range = 0;
|
||||
endElement = -1;
|
||||
|
|
|
@ -606,12 +606,7 @@ ConversionTest::TestGetUnicodeSet2() {
|
|||
// First try to see if we have different sets because ucnv_getUnicodeSet()
|
||||
// added strings: The above conversion method does not tell us what strings might be convertible.
|
||||
// Remove strings from the set and compare again.
|
||||
// Unfortunately, there are no good, direct set methods for finding out whether there are strings
|
||||
// in the set, nor for enumerating or removing just them.
|
||||
// Intersect all code points with the set. The intersection will not contain strings.
|
||||
UnicodeSet temp(0, 0x10ffff);
|
||||
temp.retainAll(set);
|
||||
set=temp;
|
||||
set.removeAllStrings();
|
||||
}
|
||||
if(set!=expected) {
|
||||
UnicodeSet diffSet;
|
||||
|
|
|
@ -2,6 +2,8 @@
|
|||
// License & terms of use: http://www.unicode.org/copyright.html#License
|
||||
package com.ibm.icu.impl;
|
||||
|
||||
import com.ibm.icu.lang.UCharacter;
|
||||
import com.ibm.icu.lang.UProperty;
|
||||
import com.ibm.icu.text.UnicodeSet;
|
||||
|
||||
/**
|
||||
|
@ -9,13 +11,16 @@ import com.ibm.icu.text.UnicodeSet;
|
|||
* but below class CharacterProperties and class UnicodeSet.
|
||||
*/
|
||||
public final class CharacterPropertiesImpl {
|
||||
private static final int NUM_INCLUSIONS = UCharacterProperty.SRC_COUNT +
|
||||
UProperty.INT_LIMIT - UProperty.INT_START;
|
||||
|
||||
/**
|
||||
* A set of all characters _except_ the second through last characters of
|
||||
* certain ranges. These ranges are ranges of characters whose
|
||||
* properties are all exactly alike, e.g. CJK Ideographs from
|
||||
* U+4E00 to U+9FA5.
|
||||
*/
|
||||
private static final UnicodeSet inclusions[] = new UnicodeSet[UCharacterProperty.SRC_COUNT];
|
||||
private static final UnicodeSet inclusions[] = new UnicodeSet[NUM_INCLUSIONS];
|
||||
|
||||
/** For {@link UnicodeSet#setDefaultXSymbolTable}. */
|
||||
public static synchronized void clear() {
|
||||
|
@ -24,7 +29,7 @@ public final class CharacterPropertiesImpl {
|
|||
}
|
||||
}
|
||||
|
||||
private static synchronized UnicodeSet getInclusionsForSource(int src) {
|
||||
private static UnicodeSet getInclusionsForSource(int src) {
|
||||
if (inclusions[src] == null) {
|
||||
UnicodeSet incl = new UnicodeSet();
|
||||
switch(src) {
|
||||
|
@ -71,16 +76,48 @@ public final class CharacterPropertiesImpl {
|
|||
// We do not freeze() the set because we only iterate over it,
|
||||
// rather than testing contains(),
|
||||
// so the extra time and memory to optimize that are not necessary.
|
||||
inclusions[src] = incl;
|
||||
inclusions[src] = incl.compact();
|
||||
}
|
||||
return inclusions[src];
|
||||
}
|
||||
|
||||
private static UnicodeSet getIntPropInclusions(int prop) {
|
||||
assert(UProperty.INT_START <= prop && prop < UProperty.INT_LIMIT);
|
||||
int inclIndex = UCharacterProperty.SRC_COUNT + prop - UProperty.INT_START;
|
||||
if (inclusions[inclIndex] != null) {
|
||||
return inclusions[inclIndex];
|
||||
}
|
||||
int src = UCharacterProperty.INSTANCE.getSource(prop);
|
||||
UnicodeSet incl = getInclusionsForSource(src);
|
||||
|
||||
UnicodeSet intPropIncl = new UnicodeSet(0, 0);
|
||||
int numRanges = incl.getRangeCount();
|
||||
int prevValue = 0;
|
||||
for (int i = 0; i < numRanges; ++i) {
|
||||
int rangeEnd = incl.getRangeEnd(i);
|
||||
for (int c = incl.getRangeStart(i); c <= rangeEnd; ++c) {
|
||||
// TODO: Get a UCharacterProperty.IntProperty to avoid the property dispatch.
|
||||
int value = UCharacter.getIntPropertyValue(c, prop);
|
||||
if (value != prevValue) {
|
||||
intPropIncl.add(c);
|
||||
prevValue = value;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Compact for caching.
|
||||
return inclusions[inclIndex] = intPropIncl.compact();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a mutable UnicodeSet -- do not modify!
|
||||
*/
|
||||
public static UnicodeSet getInclusionsForProperty(int prop) {
|
||||
int src = UCharacterProperty.INSTANCE.getSource(prop);
|
||||
return getInclusionsForSource(src);
|
||||
public static synchronized UnicodeSet getInclusionsForProperty(int prop) {
|
||||
if (UProperty.INT_START <= prop && prop < UProperty.INT_LIMIT) {
|
||||
return getIntPropInclusions(prop);
|
||||
} else {
|
||||
int src = UCharacterProperty.INSTANCE.getSource(prop);
|
||||
return getInclusionsForSource(src);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -11,10 +11,12 @@ package com.ibm.icu.text;
|
|||
import java.io.IOException;
|
||||
import java.text.ParsePosition;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.Iterator;
|
||||
import java.util.NoSuchElementException;
|
||||
import java.util.SortedSet;
|
||||
import java.util.TreeSet;
|
||||
|
||||
import com.ibm.icu.impl.BMPSet;
|
||||
|
@ -32,7 +34,6 @@ import com.ibm.icu.lang.CharacterProperties;
|
|||
import com.ibm.icu.lang.UCharacter;
|
||||
import com.ibm.icu.lang.UProperty;
|
||||
import com.ibm.icu.lang.UScript;
|
||||
import com.ibm.icu.util.CodePointMap;
|
||||
import com.ibm.icu.util.Freezable;
|
||||
import com.ibm.icu.util.ICUUncheckedIOException;
|
||||
import com.ibm.icu.util.OutputInt;
|
||||
|
@ -288,6 +289,8 @@ import com.ibm.icu.util.VersionInfo;
|
|||
* @see UnicodeSetSpanner
|
||||
*/
|
||||
public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Comparable<UnicodeSet>, Freezable<UnicodeSet> {
|
||||
private static final SortedSet<String> EMPTY_STRINGS =
|
||||
Collections.unmodifiableSortedSet(new TreeSet<String>());
|
||||
|
||||
/**
|
||||
* Constant for the empty set.
|
||||
|
@ -306,6 +309,15 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
|
|||
private static final int HIGH = 0x110000; // HIGH > all valid values. 10000 for code units.
|
||||
// 110000 for codepoints
|
||||
|
||||
/**
|
||||
* Enough for sets with few ranges.
|
||||
* For example, White_Space has 10 ranges, list length 21.
|
||||
*/
|
||||
private static final int INITIAL_CAPACITY = 25;
|
||||
|
||||
/** Max list [0, 1, 2, ..., max code point, HIGH] */
|
||||
private static final int MAX_LENGTH = HIGH + 1;
|
||||
|
||||
/**
|
||||
* Minimum value that can be stored in a UnicodeSet.
|
||||
* @stable ICU 2.0
|
||||
|
@ -323,9 +335,8 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
|
|||
private int[] rangeList; // internal buffer
|
||||
private int[] buffer; // internal buffer
|
||||
|
||||
// NOTE: normally the field should be of type SortedSet; but that is missing a public clone!!
|
||||
// is not private so that UnicodeSetIterator can get access
|
||||
TreeSet<String> strings = new TreeSet<>();
|
||||
SortedSet<String> strings = EMPTY_STRINGS;
|
||||
|
||||
/**
|
||||
* The pattern representation of this set. This may not be the
|
||||
|
@ -338,9 +349,6 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
|
|||
*/
|
||||
private String pat = null;
|
||||
|
||||
private static final int START_EXTRA = 16; // initial storage. Must be >= 0
|
||||
private static final int GROW_EXTRA = START_EXTRA; // extra amount for growth. Must be >= 0
|
||||
|
||||
// Special property set IDs
|
||||
private static final String ANY_ID = "ANY"; // [\u0000-\U0010FFFF]
|
||||
private static final String ASCII_ID = "ASCII"; // [\u0000-\u007F]
|
||||
|
@ -357,8 +365,9 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
|
|||
* @stable ICU 2.0
|
||||
*/
|
||||
public UnicodeSet() {
|
||||
list = new int[1 + START_EXTRA];
|
||||
list[len++] = HIGH;
|
||||
list = new int[INITIAL_CAPACITY];
|
||||
list[0] = HIGH;
|
||||
len = 1;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -379,7 +388,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
|
|||
*/
|
||||
public UnicodeSet(int start, int end) {
|
||||
this();
|
||||
complement(start, end);
|
||||
add(start, end);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -400,18 +409,16 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
|
|||
int last = -1; // used to ensure that the results are monotonically increasing.
|
||||
int i = 0;
|
||||
while (i < pairs.length) {
|
||||
// start of pair
|
||||
int start = pairs[i];
|
||||
if (last >= start) {
|
||||
throw new IllegalArgumentException("Must be monotonically increasing.");
|
||||
}
|
||||
list[i++] = last = start;
|
||||
// end of pair
|
||||
int end = pairs[i] + 1;
|
||||
if (last >= end) {
|
||||
list[i++] = start;
|
||||
int limit = pairs[i] + 1;
|
||||
if (start >= limit) {
|
||||
throw new IllegalArgumentException("Must be monotonically increasing.");
|
||||
}
|
||||
list[i++] = last = end;
|
||||
list[i++] = last = limit;
|
||||
}
|
||||
list[i] = HIGH; // terminate
|
||||
}
|
||||
|
@ -504,10 +511,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
|
|||
if (isFrozen()) {
|
||||
return this;
|
||||
}
|
||||
UnicodeSet result = new UnicodeSet(this);
|
||||
result.bmpSet = this.bmpSet;
|
||||
result.stringSpan = this.stringSpan;
|
||||
return result;
|
||||
return new UnicodeSet(this);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -533,10 +537,14 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
|
|||
*/
|
||||
public UnicodeSet set(UnicodeSet other) {
|
||||
checkFrozen();
|
||||
list = other.list.clone();
|
||||
list = Arrays.copyOf(other.list, other.len);
|
||||
len = other.len;
|
||||
pat = other.pat;
|
||||
strings = new TreeSet<>(other.strings);
|
||||
if (other.hasStrings()) {
|
||||
strings = new TreeSet<>(other.strings);
|
||||
} else {
|
||||
strings = EMPTY_STRINGS;
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
|
@ -809,7 +817,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
|
|||
}
|
||||
}
|
||||
|
||||
if (includeStrings && strings.size() > 0) {
|
||||
if (includeStrings && hasStrings()) {
|
||||
for (String s : strings) {
|
||||
result.append('{');
|
||||
_appendToPat(result, s, escapeUnprintable);
|
||||
|
@ -823,6 +831,10 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
|
|||
}
|
||||
}
|
||||
|
||||
boolean hasStrings() {
|
||||
return !strings.isEmpty();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the number of elements in this set (its cardinality)
|
||||
* Note than the elements of a set may include both individual
|
||||
|
@ -847,7 +859,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
|
|||
* @stable ICU 2.0
|
||||
*/
|
||||
public boolean isEmpty() {
|
||||
return len == 1 && strings.size() == 0;
|
||||
return len == 1 && !hasStrings();
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -878,7 +890,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
|
|||
return true;
|
||||
}
|
||||
}
|
||||
if (strings.size() != 0) {
|
||||
if (hasStrings()) {
|
||||
for (String s : strings) {
|
||||
//if (s.length() == 0) {
|
||||
// // Empty strings match everything
|
||||
|
@ -915,7 +927,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
|
|||
return U_MISMATCH;
|
||||
}
|
||||
} else {
|
||||
if (strings.size() != 0) { // try strings first
|
||||
if (hasStrings()) { // try strings first
|
||||
|
||||
// might separate forward and backward loops later
|
||||
// for now they are combined
|
||||
|
@ -1033,7 +1045,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
|
|||
public int matchesAt(CharSequence text, int offset) {
|
||||
int lastLen = -1;
|
||||
strings:
|
||||
if (strings.size() != 0) {
|
||||
if (hasStrings()) {
|
||||
char firstChar = text.charAt(offset);
|
||||
String trial = null;
|
||||
// find the first string starting with firstChar
|
||||
|
@ -1190,6 +1202,37 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
|
|||
throw new IllegalArgumentException("Invalid code point U+" + Utility.hex(end, 6));
|
||||
}
|
||||
if (start < end) {
|
||||
int limit = end + 1;
|
||||
// Fast path for adding a new range after the last one.
|
||||
// Odd list length: [..., lastStart, lastLimit, HIGH]
|
||||
if ((len & 1) != 0) {
|
||||
// If the list is empty, set lastLimit low enough to not be adjacent to 0.
|
||||
int lastLimit = len == 1 ? -2 : list[len - 2];
|
||||
if (lastLimit <= start) {
|
||||
checkFrozen();
|
||||
if (lastLimit == start) {
|
||||
// Extend the last range.
|
||||
list[len - 2] = limit;
|
||||
if (limit == HIGH) {
|
||||
--len;
|
||||
}
|
||||
} else {
|
||||
list[len - 1] = start;
|
||||
if (limit < HIGH) {
|
||||
ensureCapacity(len + 2);
|
||||
list[len++] = limit;
|
||||
list[len++] = HIGH;
|
||||
} else { // limit == HIGH
|
||||
ensureCapacity(len + 1);
|
||||
list[len++] = HIGH;
|
||||
}
|
||||
}
|
||||
pat = null;
|
||||
return this;
|
||||
}
|
||||
}
|
||||
// This is slow. Could be much faster using findCodePoint(start)
|
||||
// and modifying the list, dealing with adjacent & overlapping ranges.
|
||||
add(range(start, end), 2, 0);
|
||||
} else if (start == end) {
|
||||
add(start);
|
||||
|
@ -1298,7 +1341,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
|
|||
// NOTE: This has no measurable impact on performance,
|
||||
// but it might help in some usage patterns.
|
||||
if (len+2 > list.length) {
|
||||
int[] temp = new int[len + 2 + GROW_EXTRA];
|
||||
int[] temp = new int[nextCapacity(len + 2)];
|
||||
if (i != 0) System.arraycopy(list, 0, temp, 0, i);
|
||||
System.arraycopy(list, i, temp, i+2, len-i);
|
||||
list = temp;
|
||||
|
@ -1329,14 +1372,24 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
|
|||
checkFrozen();
|
||||
int cp = getSingleCP(s);
|
||||
if (cp < 0) {
|
||||
strings.add(s.toString());
|
||||
pat = null;
|
||||
String str = s.toString();
|
||||
if (!strings.contains(str)) {
|
||||
addString(str);
|
||||
pat = null;
|
||||
}
|
||||
} else {
|
||||
add_unchecked(cp, cp);
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
private void addString(CharSequence s) {
|
||||
if (strings == EMPTY_STRINGS) {
|
||||
strings = new TreeSet<>();
|
||||
}
|
||||
strings.add(s.toString());
|
||||
}
|
||||
|
||||
/**
|
||||
* Utility for getting code point from single code point CharSequence.
|
||||
* See the public UTF16.getSingleCodePoint()
|
||||
|
@ -1416,7 +1469,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
|
|||
*/
|
||||
public final UnicodeSet removeAllStrings() {
|
||||
checkFrozen();
|
||||
if (strings.size() != 0) {
|
||||
if (hasStrings()) {
|
||||
strings.clear();
|
||||
pat = null;
|
||||
}
|
||||
|
@ -1494,16 +1547,16 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
|
|||
* @stable ICU 2.0
|
||||
*/
|
||||
public final UnicodeSet retain(CharSequence cs) {
|
||||
|
||||
int cp = getSingleCP(cs);
|
||||
if (cp < 0) {
|
||||
checkFrozen();
|
||||
String s = cs.toString();
|
||||
boolean isIn = strings.contains(s);
|
||||
if (isIn && size() == 1) {
|
||||
return this;
|
||||
}
|
||||
clear();
|
||||
strings.add(s);
|
||||
addString(s);
|
||||
pat = null;
|
||||
} else {
|
||||
retain(cp, cp);
|
||||
|
@ -1560,8 +1613,12 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
|
|||
public final UnicodeSet remove(CharSequence s) {
|
||||
int cp = getSingleCP(s);
|
||||
if (cp < 0) {
|
||||
strings.remove(s.toString());
|
||||
pat = null;
|
||||
checkFrozen();
|
||||
String str = s.toString();
|
||||
if (strings.contains(str)) {
|
||||
strings.remove(str);
|
||||
pat = null;
|
||||
}
|
||||
} else {
|
||||
remove(cp, cp);
|
||||
}
|
||||
|
@ -1642,7 +1699,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
|
|||
if (strings.contains(s2)) {
|
||||
strings.remove(s2);
|
||||
} else {
|
||||
strings.add(s2);
|
||||
addString(s2);
|
||||
}
|
||||
pat = null;
|
||||
} else {
|
||||
|
@ -1975,7 +2032,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
|
|||
for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) {
|
||||
cp = UTF16.charAt(s, i);
|
||||
if (!contains(cp)) {
|
||||
if (strings.size() == 0) {
|
||||
if (!hasStrings()) {
|
||||
return false;
|
||||
}
|
||||
return containsAll(s, 0);
|
||||
|
@ -2015,7 +2072,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
|
|||
*/
|
||||
@Deprecated
|
||||
public String getRegexEquivalent() {
|
||||
if (strings.size() == 0) {
|
||||
if (!hasStrings()) {
|
||||
return toString();
|
||||
}
|
||||
StringBuilder result = new StringBuilder("(?:");
|
||||
|
@ -2189,7 +2246,13 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
|
|||
public UnicodeSet addAll(UnicodeSet c) {
|
||||
checkFrozen();
|
||||
add(c.list, c.len, 0);
|
||||
strings.addAll(c.strings);
|
||||
if (c.hasStrings()) {
|
||||
if (strings == EMPTY_STRINGS) {
|
||||
strings = new TreeSet<>(c.strings);
|
||||
} else {
|
||||
strings.addAll(c.strings);
|
||||
}
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
|
@ -2206,7 +2269,13 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
|
|||
public UnicodeSet retainAll(UnicodeSet c) {
|
||||
checkFrozen();
|
||||
retain(c.list, c.len, 0);
|
||||
strings.retainAll(c.strings);
|
||||
if (hasStrings()) {
|
||||
if (!c.hasStrings()) {
|
||||
strings.clear();
|
||||
} else {
|
||||
strings.retainAll(c.strings);
|
||||
}
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
|
@ -2223,7 +2292,9 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
|
|||
public UnicodeSet removeAll(UnicodeSet c) {
|
||||
checkFrozen();
|
||||
retain(c.list, c.len, 2);
|
||||
strings.removeAll(c.strings);
|
||||
if (hasStrings() && c.hasStrings()) {
|
||||
strings.removeAll(c.strings);
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
|
@ -2239,7 +2310,13 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
|
|||
public UnicodeSet complementAll(UnicodeSet c) {
|
||||
checkFrozen();
|
||||
xor(c.list, c.len, 0);
|
||||
SortedSetRelation.doOperation(strings, SortedSetRelation.COMPLEMENTALL, c.strings);
|
||||
if (c.hasStrings()) {
|
||||
if (strings == EMPTY_STRINGS) {
|
||||
strings = new TreeSet<>(c.strings);
|
||||
} else {
|
||||
SortedSetRelation.doOperation(strings, SortedSetRelation.COMPLEMENTALL, c.strings);
|
||||
}
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
|
@ -2253,7 +2330,9 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
|
|||
list[0] = HIGH;
|
||||
len = 1;
|
||||
pat = null;
|
||||
strings.clear();
|
||||
if (hasStrings()) {
|
||||
strings.clear();
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
|
@ -2301,13 +2380,15 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
|
|||
*/
|
||||
public UnicodeSet compact() {
|
||||
checkFrozen();
|
||||
if (len != list.length) {
|
||||
int[] temp = new int[len];
|
||||
System.arraycopy(list, 0, temp, 0, len);
|
||||
list = temp;
|
||||
if ((len + 7) < list.length) {
|
||||
// If we have more than a little unused capacity, shrink it to len.
|
||||
list = Arrays.copyOf(list, len);
|
||||
}
|
||||
rangeList = null;
|
||||
buffer = null;
|
||||
if (strings != EMPTY_STRINGS && strings.isEmpty()) {
|
||||
strings = EMPTY_STRINGS;
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
|
@ -2733,6 +2814,9 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
|
|||
if (lastSingle != Integer.MAX_VALUE && curSingle != Integer.MAX_VALUE) {
|
||||
add(lastSingle,curSingle);
|
||||
} else {
|
||||
if (strings == EMPTY_STRINGS) {
|
||||
strings = new TreeSet<>();
|
||||
}
|
||||
try {
|
||||
StringRange.expand(lastString, curString, true, strings);
|
||||
} catch (Exception e) {
|
||||
|
@ -2919,16 +3003,42 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
|
|||
// Implementation: Utility methods
|
||||
//----------------------------------------------------------------
|
||||
|
||||
private int nextCapacity(int minCapacity) {
|
||||
// Grow exponentially to reduce the frequency of allocations.
|
||||
if (minCapacity < INITIAL_CAPACITY) {
|
||||
return minCapacity + INITIAL_CAPACITY;
|
||||
} else if (minCapacity <= 2500) {
|
||||
return 5 * minCapacity;
|
||||
} else {
|
||||
int newCapacity = 2 * minCapacity;
|
||||
if (newCapacity > MAX_LENGTH) {
|
||||
newCapacity = MAX_LENGTH;
|
||||
}
|
||||
return newCapacity;
|
||||
}
|
||||
}
|
||||
|
||||
private void ensureCapacity(int newLen) {
|
||||
if (newLen > MAX_LENGTH) {
|
||||
newLen = MAX_LENGTH;
|
||||
}
|
||||
if (newLen <= list.length) return;
|
||||
int[] temp = new int[newLen + GROW_EXTRA];
|
||||
int newCapacity = nextCapacity(newLen);
|
||||
int[] temp = new int[newCapacity];
|
||||
// Copy only the actual contents.
|
||||
System.arraycopy(list, 0, temp, 0, len);
|
||||
list = temp;
|
||||
}
|
||||
|
||||
private void ensureBufferCapacity(int newLen) {
|
||||
if (newLen > MAX_LENGTH) {
|
||||
newLen = MAX_LENGTH;
|
||||
}
|
||||
if (buffer != null && newLen <= buffer.length) return;
|
||||
buffer = new int[newLen + GROW_EXTRA];
|
||||
int newCapacity = nextCapacity(newLen);
|
||||
buffer = new int[newCapacity];
|
||||
// The buffer has no contents to be copied.
|
||||
// It is always filled from scratch after this call.
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -3186,6 +3296,28 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
|
|||
}
|
||||
}
|
||||
|
||||
private static final class GeneralCategoryMaskFilter implements Filter {
|
||||
int mask;
|
||||
GeneralCategoryMaskFilter(int mask) { this.mask = mask; }
|
||||
@Override
|
||||
public boolean contains(int ch) {
|
||||
return ((1 << UCharacter.getType(ch)) & mask) != 0;
|
||||
}
|
||||
}
|
||||
|
||||
private static final class IntPropertyFilter implements Filter {
|
||||
int prop;
|
||||
int value;
|
||||
IntPropertyFilter(int prop, int value) {
|
||||
this.prop = prop;
|
||||
this.value = value;
|
||||
}
|
||||
@Override
|
||||
public boolean contains(int ch) {
|
||||
return UCharacter.getIntPropertyValue(ch, prop) == value;
|
||||
}
|
||||
}
|
||||
|
||||
private static final class ScriptExtensionsFilter implements Filter {
|
||||
int script;
|
||||
ScriptExtensionsFilter(int script) { this.script = script; }
|
||||
|
@ -3254,38 +3386,6 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
|
|||
}
|
||||
}
|
||||
|
||||
/** Maps map values to 1 if the mask contains their value'th bit, all others to 0. */
|
||||
private static final class GeneralCategoryMaskFilter implements CodePointMap.ValueFilter {
|
||||
int mask;
|
||||
GeneralCategoryMaskFilter(int mask) { this.mask = mask; }
|
||||
@Override
|
||||
public int apply(int value) {
|
||||
value = (1 << value) & mask;
|
||||
if (value != 0) { value = 1; }
|
||||
return value;
|
||||
}
|
||||
}
|
||||
|
||||
/** Maps one map value to 1, all others to 0. */
|
||||
private static final class IntValueFilter implements CodePointMap.ValueFilter {
|
||||
int v;
|
||||
IntValueFilter(int value) { v = value; }
|
||||
@Override
|
||||
public int apply(int value) { return value == v ? 1 : 0; }
|
||||
}
|
||||
|
||||
private void applyIntPropertyValue(CodePointMap map, CodePointMap.ValueFilter filter) {
|
||||
clear();
|
||||
CodePointMap.Range range = new CodePointMap.Range();
|
||||
for (int start = 0; map.getRange(start, filter, range);) {
|
||||
int end = range.getEnd();
|
||||
if (range.getValue() != 0) {
|
||||
add_unchecked(start, end);
|
||||
}
|
||||
start = end + 1;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove leading and trailing Pattern_White_Space and compress
|
||||
* internal Pattern_White_Space to a single space character.
|
||||
|
@ -3340,8 +3440,8 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
|
|||
public UnicodeSet applyIntPropertyValue(int prop, int value) {
|
||||
// All of the following include checkFrozen() before modifying this set.
|
||||
if (prop == UProperty.GENERAL_CATEGORY_MASK) {
|
||||
CodePointMap map = CharacterProperties.getIntPropertyMap(UProperty.GENERAL_CATEGORY);
|
||||
applyIntPropertyValue(map, new GeneralCategoryMaskFilter(value));
|
||||
UnicodeSet inclusions = CharacterPropertiesImpl.getInclusionsForProperty(prop);
|
||||
applyFilter(new GeneralCategoryMaskFilter(value), inclusions);
|
||||
} else if (prop == UProperty.SCRIPT_EXTENSIONS) {
|
||||
UnicodeSet inclusions = CharacterPropertiesImpl.getInclusionsForProperty(prop);
|
||||
applyFilter(new ScriptExtensionsFilter(value), inclusions);
|
||||
|
@ -3355,14 +3455,10 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
|
|||
clear();
|
||||
}
|
||||
} else if (UProperty.INT_START <= prop && prop < UProperty.INT_LIMIT) {
|
||||
CodePointMap map = CharacterProperties.getIntPropertyMap(prop);
|
||||
applyIntPropertyValue(map, new IntValueFilter(value));
|
||||
UnicodeSet inclusions = CharacterPropertiesImpl.getInclusionsForProperty(prop);
|
||||
applyFilter(new IntPropertyFilter(prop, value), inclusions);
|
||||
} else {
|
||||
// This code used to always call getInclusions(property source)
|
||||
// which throws an exception for an unsupported property.
|
||||
throw new IllegalArgumentException("unsupported property " + prop);
|
||||
// Otherwise we would just clear() this set because
|
||||
// getIntPropertyValue(c, prop) returns 0 for all code points.
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
@ -3825,7 +3921,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
|
|||
// start with input set to guarantee inclusion
|
||||
// CASE: remove strings because the strings will actually be reduced (folded);
|
||||
// therefore, start with no strings and add only those needed
|
||||
if((attribute & CASE) != 0) {
|
||||
if((attribute & CASE) != 0 && foldSet.hasStrings()) {
|
||||
foldSet.strings.clear();
|
||||
}
|
||||
|
||||
|
@ -3860,7 +3956,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
|
|||
}
|
||||
}
|
||||
}
|
||||
if (!strings.isEmpty()) {
|
||||
if (hasStrings()) {
|
||||
if ((attribute & CASE) != 0) {
|
||||
for (String s : strings) {
|
||||
String str = UCharacter.foldCase(s, 0);
|
||||
|
@ -3970,25 +4066,10 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
|
|||
@Override
|
||||
public UnicodeSet freeze() {
|
||||
if (!isFrozen()) {
|
||||
// Do most of what compact() does before freezing because
|
||||
// compact() will not work when the set is frozen.
|
||||
// Small modification: Don't shrink if the savings would be tiny (<=GROW_EXTRA).
|
||||
|
||||
// Delete buffer first to defragment memory less.
|
||||
buffer = null;
|
||||
if (list.length > (len + GROW_EXTRA)) {
|
||||
// Make the capacity equal to len or 1.
|
||||
// We don't want to realloc of 0 size.
|
||||
int capacity = (len == 0) ? 1 : len;
|
||||
int[] oldList = list;
|
||||
list = new int[capacity];
|
||||
for (int i = capacity; i-- > 0;) {
|
||||
list[i] = oldList[i];
|
||||
}
|
||||
}
|
||||
compact();
|
||||
|
||||
// Optimize contains() and span() and similar functions.
|
||||
if (!strings.isEmpty()) {
|
||||
if (hasStrings()) {
|
||||
stringSpan = new UnicodeSetStringSpan(this, new ArrayList<>(strings), UnicodeSetStringSpan.ALL);
|
||||
}
|
||||
if (stringSpan == null || !stringSpan.needsStringSpanUTF16()) {
|
||||
|
@ -4040,7 +4121,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
|
|||
}
|
||||
if (stringSpan != null) {
|
||||
return stringSpan.span(s, start, spanCondition);
|
||||
} else if (!strings.isEmpty()) {
|
||||
} else if (hasStrings()) {
|
||||
int which = spanCondition == SpanCondition.NOT_CONTAINED ? UnicodeSetStringSpan.FWD_UTF16_NOT_CONTAINED
|
||||
: UnicodeSetStringSpan.FWD_UTF16_CONTAINED;
|
||||
UnicodeSetStringSpan strSpan = new UnicodeSetStringSpan(this, new ArrayList<>(strings), which);
|
||||
|
@ -4077,7 +4158,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
|
|||
return stringSpan.spanAndCount(s, start, spanCondition, outCount);
|
||||
} else if (bmpSet != null) {
|
||||
return bmpSet.span(s, start, spanCondition, outCount);
|
||||
} else if (!strings.isEmpty()) {
|
||||
} else if (hasStrings()) {
|
||||
int which = spanCondition == SpanCondition.NOT_CONTAINED ? UnicodeSetStringSpan.FWD_UTF16_NOT_CONTAINED
|
||||
: UnicodeSetStringSpan.FWD_UTF16_CONTAINED;
|
||||
which |= UnicodeSetStringSpan.WITH_COUNT;
|
||||
|
@ -4145,7 +4226,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
|
|||
}
|
||||
if (stringSpan != null) {
|
||||
return stringSpan.spanBack(s, fromIndex, spanCondition);
|
||||
} else if (!strings.isEmpty()) {
|
||||
} else if (hasStrings()) {
|
||||
int which = (spanCondition == SpanCondition.NOT_CONTAINED)
|
||||
? UnicodeSetStringSpan.BACK_UTF16_NOT_CONTAINED
|
||||
: UnicodeSetStringSpan.BACK_UTF16_CONTAINED;
|
||||
|
@ -4311,7 +4392,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
|
|||
private int item;
|
||||
private int current;
|
||||
private int limit;
|
||||
private TreeSet<String> sourceStrings;
|
||||
private SortedSet<String> sourceStrings;
|
||||
private Iterator<String> stringIterator;
|
||||
private char[] buffer;
|
||||
|
||||
|
@ -4500,12 +4581,12 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
|
|||
if (0 != (result = list[i] - o.list[i])) {
|
||||
// if either list ran out, compare to the last string
|
||||
if (list[i] == HIGH) {
|
||||
if (strings.isEmpty()) return 1;
|
||||
if (!hasStrings()) return 1;
|
||||
String item = strings.first();
|
||||
return compare(item, o.list[i]);
|
||||
}
|
||||
if (o.list[i] == HIGH) {
|
||||
if (o.strings.isEmpty()) return -1;
|
||||
if (!o.hasStrings()) return -1;
|
||||
String item = o.strings.first();
|
||||
int compareResult = compare(item, list[i]);
|
||||
return compareResult > 0 ? -1 : compareResult < 0 ? 1 : 0; // Reverse the order.
|
||||
|
@ -4638,7 +4719,11 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
|
|||
* @stable ICU 4.4
|
||||
*/
|
||||
public Collection<String> strings() {
|
||||
return Collections.unmodifiableSortedSet(strings);
|
||||
if (hasStrings()) {
|
||||
return Collections.unmodifiableSortedSet(strings);
|
||||
} else {
|
||||
return EMPTY_STRINGS;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -34,13 +34,13 @@ import java.util.Iterator;
|
|||
* }
|
||||
* }
|
||||
* </pre>
|
||||
* <p><b>Warning: </b>For speed, UnicodeSet iteration does not check for concurrent modification.
|
||||
* <p><b>Warning: </b>For speed, UnicodeSet iteration does not check for concurrent modification.
|
||||
* Do not alter the UnicodeSet while iterating.
|
||||
* @author M. Davis
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
public class UnicodeSetIterator {
|
||||
|
||||
|
||||
/**
|
||||
* Value of <tt>codepoint</tt> if the iterator points to a string.
|
||||
* If <tt>codepoint == IS_STRING</tt>, then examine
|
||||
|
@ -48,7 +48,7 @@ public class UnicodeSetIterator {
|
|||
* @stable ICU 2.0
|
||||
*/
|
||||
public static int IS_STRING = -1;
|
||||
|
||||
|
||||
/**
|
||||
* Current code point, or the special value <tt>IS_STRING</tt>, if
|
||||
* the iterator points to a string.
|
||||
|
@ -83,7 +83,7 @@ public class UnicodeSetIterator {
|
|||
public UnicodeSetIterator(UnicodeSet set) {
|
||||
reset(set);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Create an iterator over nothing. <tt>next()</tt> and
|
||||
* <tt>nextRange()</tt> return false. This is a convenience
|
||||
|
@ -93,14 +93,14 @@ public class UnicodeSetIterator {
|
|||
public UnicodeSetIterator() {
|
||||
reset(new UnicodeSet());
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns the next element in the set, either a single code point
|
||||
* or a string. If there are no more elements in the set, return
|
||||
* false. If <tt>codepoint == IS_STRING</tt>, the value is a
|
||||
* string in the <tt>string</tt> field. Otherwise the value is a
|
||||
* single code point in the <tt>codepoint</tt> field.
|
||||
*
|
||||
*
|
||||
* <p>The order of iteration is all code points in sorted order,
|
||||
* followed by all strings sorted order. <tt>codepointEnd</tt> is
|
||||
* undefined after calling this method. <tt>string</tt> is
|
||||
|
@ -108,7 +108,7 @@ public class UnicodeSetIterator {
|
|||
* calls to <tt>next()</tt> and <tt>nextRange()</tt> without
|
||||
* calling <tt>reset()</tt> between them. The results of doing so
|
||||
* are undefined.
|
||||
* <p><b>Warning: </b>For speed, UnicodeSet iteration does not check for concurrent modification.
|
||||
* <p><b>Warning: </b>For speed, UnicodeSet iteration does not check for concurrent modification.
|
||||
* Do not alter the UnicodeSet while iterating.
|
||||
* @return true if there was another element in the set and this
|
||||
* object contains the element.
|
||||
|
@ -124,9 +124,9 @@ public class UnicodeSetIterator {
|
|||
codepoint = codepointEnd = nextElement++;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
// stringIterator == null iff there are no string elements remaining
|
||||
|
||||
|
||||
if (stringIterator == null) {
|
||||
return false;
|
||||
}
|
||||
|
@ -137,7 +137,7 @@ public class UnicodeSetIterator {
|
|||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns the next element in the set, either a code point range
|
||||
* or a string. If there are no more elements in the set, return
|
||||
|
@ -145,7 +145,7 @@ public class UnicodeSetIterator {
|
|||
* string in the <tt>string</tt> field. Otherwise the value is a
|
||||
* range of one or more code points from <tt>codepoint</tt> to
|
||||
* <tt>codepointeEnd</tt> inclusive.
|
||||
*
|
||||
*
|
||||
* <p>The order of iteration is all code points ranges in sorted
|
||||
* order, followed by all strings sorted order. Ranges are
|
||||
* disjoint and non-contiguous. <tt>string</tt> is undefined
|
||||
|
@ -172,9 +172,9 @@ public class UnicodeSetIterator {
|
|||
nextElement = endElement+1;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
// stringIterator == null iff there are no string elements remaining
|
||||
|
||||
|
||||
if (stringIterator == null) {
|
||||
return false;
|
||||
}
|
||||
|
@ -185,7 +185,7 @@ public class UnicodeSetIterator {
|
|||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Sets this iterator to visit the elements of the given set and
|
||||
* resets it to the start of that set. The iterator is valid only
|
||||
|
@ -197,7 +197,7 @@ public class UnicodeSetIterator {
|
|||
set = uset;
|
||||
reset();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Resets this iterator to the start of the set.
|
||||
* @stable ICU 2.0
|
||||
|
@ -206,19 +206,17 @@ public class UnicodeSetIterator {
|
|||
endRange = set.getRangeCount() - 1;
|
||||
range = 0;
|
||||
endElement = -1;
|
||||
nextElement = 0;
|
||||
nextElement = 0;
|
||||
if (endRange >= 0) {
|
||||
loadRange(range);
|
||||
}
|
||||
stringIterator = null;
|
||||
if (set.strings != null) {
|
||||
if (set.hasStrings()) {
|
||||
stringIterator = set.strings.iterator();
|
||||
if (!stringIterator.hasNext()) {
|
||||
stringIterator = null;
|
||||
}
|
||||
} else {
|
||||
stringIterator = null;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Gets the current string from the iterator. Only use after calling next(), not nextRange().
|
||||
* @stable ICU 4.0
|
||||
|
@ -229,13 +227,13 @@ public class UnicodeSetIterator {
|
|||
}
|
||||
return string;
|
||||
}
|
||||
|
||||
|
||||
// ======================= PRIVATES ===========================
|
||||
|
||||
|
||||
private UnicodeSet set;
|
||||
private int endRange = 0;
|
||||
private int range = 0;
|
||||
|
||||
|
||||
/**
|
||||
* @internal
|
||||
* @deprecated This API is ICU internal only.
|
||||
|
@ -244,7 +242,7 @@ public class UnicodeSetIterator {
|
|||
public UnicodeSet getSet() {
|
||||
return set;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* @internal
|
||||
* @deprecated This API is ICU internal only.
|
||||
|
@ -258,7 +256,7 @@ public class UnicodeSetIterator {
|
|||
@Deprecated
|
||||
protected int nextElement;
|
||||
private Iterator<String> stringIterator = null;
|
||||
|
||||
|
||||
/**
|
||||
* Invariant: stringIterator is null when there are no (more) strings remaining
|
||||
*/
|
||||
|
|
|
@ -353,7 +353,8 @@ public abstract class CodePointTrie extends CodePointMap {
|
|||
int prevI3Block = -1;
|
||||
int prevBlock = -1;
|
||||
int c = start;
|
||||
int value = 0; // Initialize to make compiler happy. Real value when haveValue is true.
|
||||
// Initialize to make compiler happy. Real value when haveValue is true.
|
||||
int trieValue = 0, value = 0;
|
||||
boolean haveValue = false;
|
||||
do {
|
||||
int i3Block;
|
||||
|
@ -391,6 +392,7 @@ public abstract class CodePointTrie extends CodePointMap {
|
|||
return true;
|
||||
}
|
||||
} else {
|
||||
trieValue = this.nullValue;
|
||||
value = nullValue;
|
||||
haveValue = true;
|
||||
}
|
||||
|
@ -429,29 +431,39 @@ public abstract class CodePointTrie extends CodePointMap {
|
|||
return true;
|
||||
}
|
||||
} else {
|
||||
trieValue = this.nullValue;
|
||||
value = nullValue;
|
||||
haveValue = true;
|
||||
}
|
||||
c = (c + dataBlockLength) & ~dataMask;
|
||||
} else {
|
||||
int di = block + (c & dataMask);
|
||||
int value2 = data.getFromIndex(di);
|
||||
value2 = maybeFilterValue(value2, this.nullValue, nullValue, filter);
|
||||
int trieValue2 = data.getFromIndex(di);
|
||||
if (haveValue) {
|
||||
if (value2 != value) {
|
||||
range.set(start, c - 1, value);
|
||||
return true;
|
||||
if (trieValue2 != trieValue) {
|
||||
if (filter == null ||
|
||||
maybeFilterValue(trieValue2, this.nullValue, nullValue,
|
||||
filter) != value) {
|
||||
range.set(start, c - 1, value);
|
||||
return true;
|
||||
}
|
||||
trieValue = trieValue2; // may or may not help
|
||||
}
|
||||
} else {
|
||||
value = value2;
|
||||
trieValue = trieValue2;
|
||||
value = maybeFilterValue(trieValue2, this.nullValue, nullValue, filter);
|
||||
haveValue = true;
|
||||
}
|
||||
while ((++c & dataMask) != 0) {
|
||||
if (maybeFilterValue(data.getFromIndex(++di),
|
||||
this.nullValue, nullValue,
|
||||
filter) != value) {
|
||||
range.set(start, c - 1, value);
|
||||
return true;
|
||||
trieValue2 = data.getFromIndex(++di);
|
||||
if (trieValue2 != trieValue) {
|
||||
if (filter == null ||
|
||||
maybeFilterValue(trieValue2, this.nullValue, nullValue,
|
||||
filter) != value) {
|
||||
range.set(start, c - 1, value);
|
||||
return true;
|
||||
}
|
||||
trieValue = trieValue2; // may or may not help
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -178,39 +178,57 @@ public final class MutableCodePointTrie extends CodePointMap implements Cloneabl
|
|||
int nullValue = initialValue;
|
||||
if (filter != null) { nullValue = filter.apply(nullValue); }
|
||||
int c = start;
|
||||
int value = 0; // Initialize to make compiler happy. Real value when haveValue is true.
|
||||
// Initialize to make compiler happy. Real value when haveValue is true.
|
||||
int trieValue = 0, value = 0;
|
||||
boolean haveValue = false;
|
||||
int i = c >> CodePointTrie.SHIFT_3;
|
||||
do {
|
||||
if (flags[i] == ALL_SAME) {
|
||||
int value2 = maybeFilterValue(index[i], initialValue, nullValue, filter);
|
||||
int trieValue2 = index[i];
|
||||
if (haveValue) {
|
||||
if (value2 != value) {
|
||||
range.set(start, c - 1, value);
|
||||
return true;
|
||||
if (trieValue2 != trieValue) {
|
||||
if (filter == null ||
|
||||
maybeFilterValue(trieValue2, initialValue, nullValue,
|
||||
filter) != value) {
|
||||
range.set(start, c - 1, value);
|
||||
return true;
|
||||
}
|
||||
trieValue = trieValue2; // may or may not help
|
||||
}
|
||||
} else {
|
||||
value = value2;
|
||||
trieValue = trieValue2;
|
||||
value = maybeFilterValue(trieValue2, initialValue, nullValue, filter);
|
||||
haveValue = true;
|
||||
}
|
||||
c = (c + CodePointTrie.SMALL_DATA_BLOCK_LENGTH) & ~CodePointTrie.SMALL_DATA_MASK;
|
||||
} else /* MIXED */ {
|
||||
int di = index[i] + (c & CodePointTrie.SMALL_DATA_MASK);
|
||||
int value2 = maybeFilterValue(data[di], initialValue, nullValue, filter);
|
||||
int trieValue2 = data[di];
|
||||
if (haveValue) {
|
||||
if (value2 != value) {
|
||||
range.set(start, c - 1, value);
|
||||
return true;
|
||||
if (trieValue2 != trieValue) {
|
||||
if (filter == null ||
|
||||
maybeFilterValue(trieValue2, initialValue, nullValue,
|
||||
filter) != value) {
|
||||
range.set(start, c - 1, value);
|
||||
return true;
|
||||
}
|
||||
trieValue = trieValue2; // may or may not help
|
||||
}
|
||||
} else {
|
||||
value = value2;
|
||||
trieValue = trieValue2;
|
||||
value = maybeFilterValue(trieValue2, initialValue, nullValue, filter);
|
||||
haveValue = true;
|
||||
}
|
||||
while ((++c & CodePointTrie.SMALL_DATA_MASK) != 0) {
|
||||
if (maybeFilterValue(data[++di], initialValue, nullValue,
|
||||
filter) != value) {
|
||||
range.set(start, c - 1, value);
|
||||
return true;
|
||||
trieValue2 = data[++di];
|
||||
if (trieValue2 != trieValue) {
|
||||
if (filter == null ||
|
||||
maybeFilterValue(trieValue2, initialValue, nullValue,
|
||||
filter) != value) {
|
||||
range.set(start, c - 1, value);
|
||||
return true;
|
||||
}
|
||||
trieValue = trieValue2; // may or may not help
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue