mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-13 08:53:20 +00:00
ICU-176 UTF-16 support with CharacterIterator; new functions for more efficient iteration
X-SVN-Rev: 1117
This commit is contained in:
parent
2b2af0bbc5
commit
7c2d19d828
6 changed files with 553 additions and 372 deletions
|
@ -8,7 +8,5 @@
|
|||
|
||||
#include "unicode/chariter.h"
|
||||
|
||||
const UChar CharacterIterator::DONE = 0xffff;
|
||||
|
||||
CharacterIterator::~CharacterIterator()
|
||||
{}
|
||||
|
|
|
@ -20,188 +20,94 @@
|
|||
UClassID StringCharacterIterator::fgClassID = 0;
|
||||
|
||||
StringCharacterIterator::StringCharacterIterator()
|
||||
: CharacterIterator(),
|
||||
text(),
|
||||
pos(0),
|
||||
begin(0),
|
||||
end(0)
|
||||
: UCharCharacterIterator(),
|
||||
text()
|
||||
{
|
||||
// NEVER DEFAULT CONSTRUCT!
|
||||
}
|
||||
|
||||
StringCharacterIterator::StringCharacterIterator(const UnicodeString& text)
|
||||
: CharacterIterator(),
|
||||
text(text),
|
||||
pos(0),
|
||||
begin(0),
|
||||
end(text.length())
|
||||
{}
|
||||
|
||||
StringCharacterIterator::StringCharacterIterator(const UnicodeString& text,
|
||||
UTextOffset pos)
|
||||
: CharacterIterator(),
|
||||
text(text),
|
||||
pos(pos),
|
||||
begin(0),
|
||||
end(text.length())
|
||||
: UCharCharacterIterator(text.fArray, text.length()),
|
||||
text(text)
|
||||
{
|
||||
// the Java code checks the parameters and throws exceptions we've
|
||||
// decided to punt on this for the time being because changing this
|
||||
// constructor to accept an error code is an API change with
|
||||
// significant impact
|
||||
// we had set the input parameter's array, now we need to set our copy's array
|
||||
UCharCharacterIterator::text = this->text.fArray;
|
||||
}
|
||||
|
||||
StringCharacterIterator::StringCharacterIterator(const UnicodeString& text,
|
||||
UTextOffset begin,
|
||||
UTextOffset end,
|
||||
UTextOffset pos)
|
||||
: CharacterIterator(),
|
||||
text(text),
|
||||
pos(pos),
|
||||
begin(begin),
|
||||
end(end)
|
||||
StringCharacterIterator::StringCharacterIterator(const UnicodeString& text,
|
||||
UTextOffset pos)
|
||||
: UCharCharacterIterator(text.fArray, text.length(), pos),
|
||||
text(text)
|
||||
{
|
||||
// the Java code checks the parameters and throws exceptions we've
|
||||
// decided to punt on this for the time being because changing this
|
||||
// constructor to accept an error code is an API change with
|
||||
// significant impact
|
||||
// we had set the input parameter's array, now we need to set our copy's array
|
||||
UCharCharacterIterator::text = this->text.fArray;
|
||||
}
|
||||
|
||||
StringCharacterIterator::StringCharacterIterator(const UnicodeString& text,
|
||||
UTextOffset begin,
|
||||
UTextOffset end,
|
||||
UTextOffset pos)
|
||||
: UCharCharacterIterator(text.fArray, text.length(), begin, end, pos),
|
||||
text(text)
|
||||
{
|
||||
// we had set the input parameter's array, now we need to set our copy's array
|
||||
UCharCharacterIterator::text = this->text.fArray;
|
||||
}
|
||||
|
||||
StringCharacterIterator::StringCharacterIterator(const StringCharacterIterator& that)
|
||||
: CharacterIterator(that),
|
||||
text(that.text),
|
||||
pos(that.pos),
|
||||
begin(that.begin),
|
||||
end(that.end)
|
||||
: UCharCharacterIterator(that),
|
||||
text(that.text)
|
||||
{
|
||||
// we had set the input parameter's array, now we need to set our copy's array
|
||||
UCharCharacterIterator::text = this->text.fArray;
|
||||
}
|
||||
|
||||
StringCharacterIterator::~StringCharacterIterator()
|
||||
{}
|
||||
StringCharacterIterator::~StringCharacterIterator() {
|
||||
}
|
||||
|
||||
StringCharacterIterator&
|
||||
StringCharacterIterator::operator=(const StringCharacterIterator& that)
|
||||
{
|
||||
text = that.text;
|
||||
pos = that.pos;
|
||||
begin = that.begin;
|
||||
end = that.end;
|
||||
return *this;
|
||||
StringCharacterIterator::operator=(const StringCharacterIterator& that) {
|
||||
UCharCharacterIterator::operator=(that);
|
||||
text = that.text;
|
||||
// we had set the input parameter's array, now we need to set our copy's array
|
||||
UCharCharacterIterator::text = this->text.fArray;
|
||||
return *this;
|
||||
}
|
||||
|
||||
bool_t
|
||||
StringCharacterIterator::operator==(const CharacterIterator& that) const
|
||||
{
|
||||
if (this == &that)
|
||||
return TRUE;
|
||||
|
||||
if (getDynamicClassID() != that.getDynamicClassID())
|
||||
return FALSE;
|
||||
StringCharacterIterator::operator==(const CharacterIterator& that) const {
|
||||
if (this == &that) {
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
StringCharacterIterator& realThat = (StringCharacterIterator&)that;
|
||||
// do not call UCharCharacterIterator::operator==()
|
||||
// because that checks for array pointer equality
|
||||
// while we compare UnicodeString objects
|
||||
|
||||
return text == realThat.text
|
||||
&& pos == realThat.pos
|
||||
&& begin == realThat.begin
|
||||
&& end == realThat.end;
|
||||
}
|
||||
if (getDynamicClassID() != that.getDynamicClassID()) {
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
int32_t
|
||||
StringCharacterIterator::hashCode() const
|
||||
{
|
||||
return text.hashCode() ^ pos ^ begin ^ end;
|
||||
StringCharacterIterator& realThat = (StringCharacterIterator&)that;
|
||||
|
||||
return text == realThat.text
|
||||
&& pos == realThat.pos
|
||||
&& begin == realThat.begin
|
||||
&& end == realThat.end;
|
||||
}
|
||||
|
||||
CharacterIterator*
|
||||
StringCharacterIterator::clone() const
|
||||
{
|
||||
return new StringCharacterIterator(*this);
|
||||
}
|
||||
|
||||
UChar
|
||||
StringCharacterIterator::first()
|
||||
{
|
||||
pos = begin;
|
||||
return text.charAt(pos);
|
||||
}
|
||||
|
||||
UChar
|
||||
StringCharacterIterator::last()
|
||||
{
|
||||
pos = end - 1;
|
||||
return text.charAt(pos);
|
||||
}
|
||||
|
||||
UChar
|
||||
StringCharacterIterator::setIndex(UTextOffset pos)
|
||||
{
|
||||
// should check "pos" here and return an error code, but changing
|
||||
// this function would have significant impact across TIFC, so we
|
||||
// decided to hold off
|
||||
this->pos = pos;
|
||||
return text.charAt(pos);
|
||||
}
|
||||
|
||||
UChar
|
||||
StringCharacterIterator::current() const
|
||||
{
|
||||
if (pos >= begin && pos < end)
|
||||
return text.charAt(pos);
|
||||
else
|
||||
return CharacterIterator::DONE;
|
||||
}
|
||||
|
||||
UChar
|
||||
StringCharacterIterator::next()
|
||||
{
|
||||
if(pos < end - 1) {
|
||||
return text.charAt(++pos);
|
||||
}
|
||||
else {
|
||||
pos = end;
|
||||
return CharacterIterator::DONE;
|
||||
}
|
||||
}
|
||||
|
||||
UChar
|
||||
StringCharacterIterator::previous()
|
||||
{
|
||||
if (pos > begin)
|
||||
return text.charAt(--pos);
|
||||
else
|
||||
return DONE;
|
||||
}
|
||||
|
||||
UTextOffset
|
||||
StringCharacterIterator::startIndex() const
|
||||
{
|
||||
return begin;
|
||||
}
|
||||
|
||||
UTextOffset
|
||||
StringCharacterIterator::endIndex() const
|
||||
{
|
||||
return end;
|
||||
}
|
||||
|
||||
UTextOffset
|
||||
StringCharacterIterator::getIndex() const
|
||||
{
|
||||
return pos;
|
||||
StringCharacterIterator::clone() const {
|
||||
return new StringCharacterIterator(*this);
|
||||
}
|
||||
|
||||
void
|
||||
StringCharacterIterator::setText(const UnicodeString& newText)
|
||||
{
|
||||
StringCharacterIterator::setText(const UnicodeString& newText) {
|
||||
text = newText;
|
||||
begin = 0;
|
||||
end = newText.length();
|
||||
pos = begin;
|
||||
UCharCharacterIterator::setText(text.fArray, text.length());
|
||||
}
|
||||
|
||||
void
|
||||
StringCharacterIterator::getText(UnicodeString& result)
|
||||
{
|
||||
result = text;
|
||||
StringCharacterIterator::getText(UnicodeString& result) {
|
||||
result = text;
|
||||
}
|
||||
|
||||
|
|
|
@ -6,20 +6,89 @@
|
|||
*/
|
||||
|
||||
#include "unicode/uchriter.h"
|
||||
#include "uhash.h"
|
||||
|
||||
UCharCharacterIterator::UCharCharacterIterator()
|
||||
: CharacterIterator(),
|
||||
text(0),
|
||||
textLength(0),
|
||||
pos(0),
|
||||
begin(0),
|
||||
end(0)
|
||||
{
|
||||
// never default construct!
|
||||
}
|
||||
|
||||
UCharCharacterIterator::UCharCharacterIterator(const UChar* text,
|
||||
int32_t textLength)
|
||||
int32_t textLength)
|
||||
: CharacterIterator(),
|
||||
text(text),
|
||||
textLength(textLength),
|
||||
pos(0),
|
||||
begin(0),
|
||||
end(textLength)
|
||||
{
|
||||
if(text == 0 || textLength < 0) {
|
||||
textLength = end = 0;
|
||||
}
|
||||
}
|
||||
|
||||
UCharCharacterIterator::UCharCharacterIterator(const UChar* text,
|
||||
int32_t textLength,
|
||||
UTextOffset pos)
|
||||
: CharacterIterator(),
|
||||
text(text),
|
||||
textLength(textLength),
|
||||
pos(pos),
|
||||
begin(0),
|
||||
end(textLength)
|
||||
{
|
||||
if(text == 0 || textLength < 0) {
|
||||
textLength = end = 0;
|
||||
}
|
||||
if(pos < 0) {
|
||||
pos = 0;
|
||||
} else if(pos > end) {
|
||||
pos = end;
|
||||
}
|
||||
}
|
||||
|
||||
UCharCharacterIterator::UCharCharacterIterator(const UChar* text,
|
||||
int32_t textLength,
|
||||
UTextOffset begin,
|
||||
UTextOffset end,
|
||||
UTextOffset pos)
|
||||
: CharacterIterator(),
|
||||
text(text),
|
||||
textLength(textLength),
|
||||
pos(pos),
|
||||
begin(begin),
|
||||
end(end)
|
||||
{
|
||||
if(text == 0 || textLength < 0) {
|
||||
textLength = 0;
|
||||
}
|
||||
if(begin < 0) {
|
||||
begin = 0;
|
||||
} else if(begin > textLength) {
|
||||
begin = textLength;
|
||||
}
|
||||
if(end < begin) {
|
||||
end = begin;
|
||||
} else if(end > textLength) {
|
||||
end = textLength;
|
||||
}
|
||||
if(pos < begin) {
|
||||
pos = begin;
|
||||
} else if(pos > end) {
|
||||
pos = end;
|
||||
}
|
||||
}
|
||||
|
||||
UCharCharacterIterator::UCharCharacterIterator(const UCharCharacterIterator& that)
|
||||
: CharacterIterator(that),
|
||||
text(that.text),
|
||||
textLength(that.textLength),
|
||||
pos(that.pos),
|
||||
begin(that.begin),
|
||||
end(that.end)
|
||||
|
@ -27,134 +96,261 @@ UCharCharacterIterator::UCharCharacterIterator(const UCharCharacterIterator& tha
|
|||
}
|
||||
|
||||
UCharCharacterIterator&
|
||||
UCharCharacterIterator::operator=(const UCharCharacterIterator& that)
|
||||
{
|
||||
UCharCharacterIterator::operator=(const UCharCharacterIterator& that) {
|
||||
text = that.text;
|
||||
textLength = that.textLength;
|
||||
pos = that.pos;
|
||||
begin = that.begin;
|
||||
end = that.end;
|
||||
return *this;
|
||||
}
|
||||
|
||||
UCharCharacterIterator::~UCharCharacterIterator()
|
||||
{}
|
||||
UCharCharacterIterator::~UCharCharacterIterator() {
|
||||
}
|
||||
|
||||
bool_t
|
||||
UCharCharacterIterator::operator==(const CharacterIterator& that) const
|
||||
{
|
||||
if (this == &that)
|
||||
UCharCharacterIterator::operator==(const CharacterIterator& that) const {
|
||||
if (this == &that) {
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
if (getDynamicClassID() != that.getDynamicClassID())
|
||||
if (getDynamicClassID() != that.getDynamicClassID()) {
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
UCharCharacterIterator& realThat = (UCharCharacterIterator&)that;
|
||||
|
||||
return text == realThat.text
|
||||
&& textLength == realThat.textLength
|
||||
&& pos == realThat.pos
|
||||
&& begin == realThat.begin
|
||||
&& end == realThat.end;
|
||||
}
|
||||
|
||||
int32_t
|
||||
UCharCharacterIterator::hashCode() const
|
||||
{
|
||||
return pos ^ begin ^ end;
|
||||
UCharCharacterIterator::hashCode() const {
|
||||
return uhash_hashUCharsN(text, textLength) ^ pos ^ begin ^ end;
|
||||
}
|
||||
|
||||
CharacterIterator*
|
||||
UCharCharacterIterator::clone() const
|
||||
{
|
||||
UCharCharacterIterator::clone() const {
|
||||
return new UCharCharacterIterator(*this);
|
||||
}
|
||||
|
||||
UChar
|
||||
UCharCharacterIterator::first()
|
||||
{
|
||||
UCharCharacterIterator::first() {
|
||||
pos = begin;
|
||||
return text[pos];
|
||||
}
|
||||
|
||||
UChar
|
||||
UCharCharacterIterator::last()
|
||||
{
|
||||
pos = end - 1;
|
||||
return text[pos];
|
||||
}
|
||||
|
||||
UChar
|
||||
UCharCharacterIterator::setIndex(UTextOffset pos)
|
||||
{
|
||||
// should check "pos" here and return an error code, but changing this
|
||||
// function would have significant impact across TIFC, so we decided to hold off
|
||||
this->pos = pos;
|
||||
return text[pos];
|
||||
}
|
||||
|
||||
UChar
|
||||
UCharCharacterIterator::current() const
|
||||
{
|
||||
if (pos >= begin && pos < end)
|
||||
if(pos < end) {
|
||||
return text[pos];
|
||||
else
|
||||
return CharacterIterator::DONE;
|
||||
}
|
||||
|
||||
UChar
|
||||
UCharCharacterIterator::next()
|
||||
{
|
||||
if (pos < end - 1)
|
||||
{
|
||||
pos += 1;
|
||||
return text[pos];
|
||||
}
|
||||
else
|
||||
{
|
||||
pos = end;
|
||||
return CharacterIterator::DONE;
|
||||
}
|
||||
}
|
||||
|
||||
UChar
|
||||
UCharCharacterIterator::previous()
|
||||
{
|
||||
if (pos > begin)
|
||||
return text[--pos];
|
||||
else
|
||||
} else {
|
||||
return DONE;
|
||||
}
|
||||
}
|
||||
|
||||
UTextOffset
|
||||
UCharCharacterIterator::startIndex() const
|
||||
{
|
||||
UCharCharacterIterator::setToStart() {
|
||||
return pos = begin;
|
||||
}
|
||||
|
||||
UChar
|
||||
UCharCharacterIterator::last() {
|
||||
pos = end;
|
||||
if(pos > begin) {
|
||||
return text[--pos];
|
||||
} else {
|
||||
return DONE;
|
||||
}
|
||||
}
|
||||
|
||||
UTextOffset
|
||||
UCharCharacterIterator::setToEnd() {
|
||||
return pos = end;
|
||||
}
|
||||
|
||||
UChar
|
||||
UCharCharacterIterator::setIndex(UTextOffset pos) {
|
||||
if(pos < begin) {
|
||||
pos = begin;
|
||||
} else if(pos > end) {
|
||||
pos = end;
|
||||
}
|
||||
this->pos = pos;
|
||||
if(pos < end) {
|
||||
return text[pos];
|
||||
} else {
|
||||
return DONE;
|
||||
}
|
||||
}
|
||||
|
||||
UChar
|
||||
UCharCharacterIterator::current() const {
|
||||
if (pos >= begin && pos < end) {
|
||||
return text[pos];
|
||||
} else {
|
||||
return DONE;
|
||||
}
|
||||
}
|
||||
|
||||
UChar
|
||||
UCharCharacterIterator::next() {
|
||||
if (pos + 1 < end) {
|
||||
return text[++pos];
|
||||
} else {
|
||||
/* make current() return DONE */
|
||||
pos = end;
|
||||
return DONE;
|
||||
}
|
||||
}
|
||||
|
||||
UChar
|
||||
UCharCharacterIterator::nextPostInc() {
|
||||
if (pos < end) {
|
||||
return text[pos++];
|
||||
} else {
|
||||
return DONE;
|
||||
}
|
||||
}
|
||||
|
||||
bool_t
|
||||
UCharCharacterIterator::hasNext() {
|
||||
return pos < end ? TRUE : FALSE;
|
||||
}
|
||||
|
||||
UChar
|
||||
UCharCharacterIterator::previous() {
|
||||
if (pos > begin) {
|
||||
return text[--pos];
|
||||
} else {
|
||||
return DONE;
|
||||
}
|
||||
}
|
||||
|
||||
bool_t
|
||||
UCharCharacterIterator::hasPrevious() {
|
||||
return pos > begin ? TRUE : FALSE;
|
||||
}
|
||||
|
||||
UChar32
|
||||
UCharCharacterIterator::first32() {
|
||||
pos = begin;
|
||||
if(pos < end) {
|
||||
UTextOffset i = pos;
|
||||
UChar32 c;
|
||||
UTF_NEXT_CHAR(text, i, end, c);
|
||||
return c;
|
||||
} else {
|
||||
return DONE;
|
||||
}
|
||||
}
|
||||
|
||||
UChar32
|
||||
UCharCharacterIterator::last32() {
|
||||
pos = end;
|
||||
if(pos > begin) {
|
||||
UChar32 c;
|
||||
UTF_PREV_CHAR(text, begin, pos, c);
|
||||
return c;
|
||||
} else {
|
||||
return DONE;
|
||||
}
|
||||
}
|
||||
|
||||
UChar32
|
||||
UCharCharacterIterator::setIndex32(UTextOffset pos) {
|
||||
if(pos < begin) {
|
||||
pos = begin;
|
||||
} else if(pos > end) {
|
||||
pos = end;
|
||||
}
|
||||
if(pos < end) {
|
||||
UTF_SET_CHAR_START(text, begin, pos);
|
||||
UTextOffset i = this->pos = pos;
|
||||
UChar32 c;
|
||||
UTF_NEXT_CHAR(text, i, end, c);
|
||||
return c;
|
||||
} else {
|
||||
this->pos = pos;
|
||||
return DONE;
|
||||
}
|
||||
}
|
||||
|
||||
UChar32
|
||||
UCharCharacterIterator::current32() const {
|
||||
if (pos >= begin && pos < end) {
|
||||
UChar32 c;
|
||||
UTF_GET_CHAR(text, begin, pos, end, c);
|
||||
return c;
|
||||
} else {
|
||||
return DONE;
|
||||
}
|
||||
}
|
||||
|
||||
UChar32
|
||||
UCharCharacterIterator::next32() {
|
||||
if (pos < end) {
|
||||
UTF_FWD_1(text, pos, end);
|
||||
if(pos < end) {
|
||||
UTextOffset i = pos;
|
||||
UChar32 c;
|
||||
UTF_NEXT_CHAR(text, i, end, c);
|
||||
return c;
|
||||
}
|
||||
}
|
||||
/* make current() return DONE */
|
||||
pos = end;
|
||||
return DONE;
|
||||
}
|
||||
|
||||
UChar32
|
||||
UCharCharacterIterator::next32PostInc() {
|
||||
if (pos < end) {
|
||||
UChar32 c;
|
||||
UTF_NEXT_CHAR(text, pos, end, c);
|
||||
return c;
|
||||
} else {
|
||||
return DONE;
|
||||
}
|
||||
}
|
||||
|
||||
UChar32
|
||||
UCharCharacterIterator::previous32() {
|
||||
if (pos > begin) {
|
||||
UChar32 c;
|
||||
UTF_PREV_CHAR(text, begin, pos, c);
|
||||
return c;
|
||||
} else {
|
||||
return DONE;
|
||||
}
|
||||
}
|
||||
|
||||
UTextOffset
|
||||
UCharCharacterIterator::startIndex() const {
|
||||
return begin;
|
||||
}
|
||||
|
||||
UTextOffset
|
||||
UCharCharacterIterator::endIndex() const
|
||||
{
|
||||
UCharCharacterIterator::endIndex() const {
|
||||
return end;
|
||||
}
|
||||
|
||||
UTextOffset
|
||||
UCharCharacterIterator::getIndex() const
|
||||
{
|
||||
UCharCharacterIterator::getIndex() const {
|
||||
return pos;
|
||||
}
|
||||
|
||||
void UCharCharacterIterator::setText(const UChar* newText,
|
||||
int32_t newTextLength)
|
||||
{
|
||||
int32_t newTextLength) {
|
||||
text = newText;
|
||||
begin = 0;
|
||||
end = newTextLength;
|
||||
pos = begin;
|
||||
if(newText == 0 || newTextLength < 0) {
|
||||
newTextLength = 0;
|
||||
}
|
||||
end = textLength = newTextLength;
|
||||
pos = begin = 0;
|
||||
}
|
||||
|
||||
void
|
||||
UCharCharacterIterator::getText(UnicodeString& result)
|
||||
{
|
||||
result = UnicodeString(text, end);
|
||||
UCharCharacterIterator::getText(UnicodeString& result) {
|
||||
result = UnicodeString(text, textLength);
|
||||
}
|
||||
|
||||
char UCharCharacterIterator::fgClassID = 0;
|
||||
|
|
|
@ -88,7 +88,7 @@ public:
|
|||
/**
|
||||
* Value returned by most of CharacterIterator's functions
|
||||
* when the iterator has reached the limits of its iteration. */
|
||||
static const UChar DONE;
|
||||
enum { DONE = 0xffff };
|
||||
|
||||
/**
|
||||
* Destructor.
|
||||
|
@ -128,49 +128,120 @@ public:
|
|||
virtual int32_t hashCode(void) const = 0;
|
||||
|
||||
/**
|
||||
* Sets the iterator to refer to the first character in its
|
||||
* iteration range, and returns that character,
|
||||
* Sets the iterator to refer to the first code unit in its
|
||||
* iteration range, and returns that code unit,
|
||||
* @draft
|
||||
*/
|
||||
virtual UChar first(void) = 0;
|
||||
|
||||
/**
|
||||
* Sets the iterator to refer to the last character in its
|
||||
* iteration range, and returns that character.
|
||||
* Sets the iterator to refer to the first code point in its
|
||||
* iteration range, and returns that code unit,
|
||||
* @draft
|
||||
*/
|
||||
virtual UChar32 first32(void) = 0;
|
||||
|
||||
virtual UTextOffset setToStart() = 0;
|
||||
|
||||
/**
|
||||
* Sets the iterator to refer to the last code unit in its
|
||||
* iteration range, and returns that code unit.
|
||||
* @draft
|
||||
*/
|
||||
virtual UChar last(void) = 0;
|
||||
|
||||
/**
|
||||
* Sets the iterator to refer to the "position"-th character
|
||||
* Sets the iterator to refer to the last code point in its
|
||||
* iteration range, and returns that code unit.
|
||||
* @draft
|
||||
*/
|
||||
virtual UChar32 last32(void) = 0;
|
||||
|
||||
virtual UTextOffset setToEnd() = 0;
|
||||
|
||||
/**
|
||||
* Sets the iterator to refer to the "position"-th code unit
|
||||
* in the text-storage object the iterator refers to, and
|
||||
* returns that character.
|
||||
* returns that code unit.
|
||||
* @draft
|
||||
*/
|
||||
virtual UChar setIndex(UTextOffset position) = 0;
|
||||
|
||||
/**
|
||||
* Returns the character the iterator currently refers to.
|
||||
* Sets the iterator to refer to the beginning of the code point
|
||||
* that contains the "position"-th code unit
|
||||
* in the text-storage object the iterator refers to, and
|
||||
* returns that code point.
|
||||
* @draft
|
||||
*/
|
||||
virtual UChar32 setIndex32(UTextOffset position) = 0;
|
||||
|
||||
/**
|
||||
* Returns the code unit the iterator currently refers to.
|
||||
* @draft
|
||||
*/
|
||||
virtual UChar current(void) const = 0;
|
||||
|
||||
/**
|
||||
* Advances to the next character in the iteration range
|
||||
* (toward last()), and returns that character. If there are
|
||||
* no more characters to return, returns DONE.
|
||||
* Returns the code point the iterator currently refers to.
|
||||
* @draft
|
||||
*/
|
||||
virtual UChar32 current32(void) const = 0;
|
||||
|
||||
/**
|
||||
* Advances to the next code unit in the iteration range
|
||||
* (toward last()), and returns that code unit. If there are
|
||||
* no more code units to return, returns DONE.
|
||||
* @draft
|
||||
*/
|
||||
virtual UChar next(void) = 0;
|
||||
|
||||
/**
|
||||
* Advances to the previous character in the iteration rance
|
||||
* (toward first()), and returns that character. If there are
|
||||
* no more characters to return, returns DONE.
|
||||
* Gets the current code unit for returning and advances to the next code unit
|
||||
* in the iteration range
|
||||
* (toward last()). If there are
|
||||
* no more code units to return, returns DONE.
|
||||
* @draft
|
||||
*/
|
||||
virtual UChar nextPostInc(void) = 0;
|
||||
|
||||
/**
|
||||
* Advances to the next code point in the iteration range
|
||||
* (toward last()), and returns that code point. If there are
|
||||
* no more code points to return, returns DONE.
|
||||
* @draft
|
||||
*/
|
||||
virtual UChar32 next32(void) = 0;
|
||||
|
||||
/**
|
||||
* Gets the current code point for returning and advances to the next code point
|
||||
* in the iteration range
|
||||
* (toward last()). If there are
|
||||
* no more code points to return, returns DONE.
|
||||
* @draft
|
||||
*/
|
||||
virtual UChar32 next32PostInc(void) = 0;
|
||||
|
||||
virtual bool_t hasNext() = 0;
|
||||
|
||||
/**
|
||||
* Advances to the previous code unit in the iteration rance
|
||||
* (toward first()), and returns that code unit. If there are
|
||||
* no more code units to return, returns DONE.
|
||||
* @draft
|
||||
*/
|
||||
virtual UChar previous(void) = 0;
|
||||
|
||||
/**
|
||||
* Advances to the previous code point in the iteration rance
|
||||
* (toward first()), and returns that code point. If there are
|
||||
* no more code points to return, returns DONE.
|
||||
* @draft
|
||||
*/
|
||||
virtual UChar32 previous32(void) = 0;
|
||||
|
||||
virtual bool_t hasPrevious() = 0;
|
||||
|
||||
/**
|
||||
* Returns the numeric index in the underlying text-storage
|
||||
* object of the character returned by first(). Since it's
|
||||
|
@ -221,6 +292,3 @@ protected:
|
|||
};
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -20,18 +20,22 @@
|
|||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/chariter.h"
|
||||
#include "unicode/uchriter.h"
|
||||
|
||||
/**
|
||||
* A concrete subclass of CharacterIterator that iterates over the
|
||||
* characters in a UnicodeString. It's possible not only to create an
|
||||
* characters (code units or code points) in a UnicodeString.
|
||||
* It's possible not only to create an
|
||||
* iterator that iterates over an entire UnicodeString, but also to
|
||||
* create only that iterates over only a subrange of a UnicodeString
|
||||
* create one that iterates over only a subrange of a UnicodeString
|
||||
* (iterators over different subranges of the same UnicodeString don't
|
||||
* compare equal). */
|
||||
class U_COMMON_API StringCharacterIterator : public CharacterIterator {
|
||||
* compare equal).
|
||||
*/
|
||||
class U_COMMON_API StringCharacterIterator : public UCharCharacterIterator {
|
||||
public:
|
||||
/**
|
||||
* Create an iterator over the UnicodeString referred to by "text".
|
||||
* The UnicodeString object is copied.
|
||||
* The iteration range is the whole string, and the starting position is 0.
|
||||
* @stable
|
||||
*/
|
||||
|
@ -49,8 +53,9 @@ public:
|
|||
|
||||
/**
|
||||
* Create an iterator over the UnicodeString referred to by "text".
|
||||
* The iteration range begins with the character specified by
|
||||
* "begin" and ends with the character BEFORE the character specfied
|
||||
* The UnicodeString object is copied.
|
||||
* The iteration range begins with the code unit specified by
|
||||
* "begin" and ends with the code unit BEFORE the code unit specfied
|
||||
* by "end". The starting position is specified by "pos". If
|
||||
* "begin" and "end" don't form a valid range on "text" (i.e., begin
|
||||
* >= end or either is negative or greater than text.size()), or
|
||||
|
@ -67,6 +72,7 @@ public:
|
|||
* Copy constructor. The new iterator iterates over the same range
|
||||
* of the same string as "that", and its initial position is the
|
||||
* same as "that"'s current position.
|
||||
* The UnicodeString object in "that" is copied.
|
||||
* @stable
|
||||
*/
|
||||
StringCharacterIterator(const StringCharacterIterator& that);
|
||||
|
@ -78,7 +84,7 @@ public:
|
|||
virtual ~StringCharacterIterator();
|
||||
|
||||
/**
|
||||
* Assignment operator. *this is altered to iterate over the sane
|
||||
* Assignment operator. *this is altered to iterate over the same
|
||||
* range of the same string as "that", and refers to the same
|
||||
* character within that string as "that" does.
|
||||
* @stable
|
||||
|
@ -93,12 +99,6 @@ public:
|
|||
*/
|
||||
virtual bool_t operator==(const CharacterIterator& that) const;
|
||||
|
||||
/**
|
||||
* Generates a hash code for this iterator.
|
||||
* @stable
|
||||
*/
|
||||
virtual int32_t hashCode(void) const;
|
||||
|
||||
/**
|
||||
* Returns a new StringCharacterIterator referring to the same
|
||||
* character in the same range of the same string as this one. The
|
||||
|
@ -107,79 +107,12 @@ public:
|
|||
*/
|
||||
virtual CharacterIterator* clone(void) const;
|
||||
|
||||
/**
|
||||
* Sets the iterator to refer to the first character in its
|
||||
* iteration range, and returns that character,
|
||||
* @draft
|
||||
*/
|
||||
virtual UChar first(void);
|
||||
|
||||
/**
|
||||
* Sets the iterator to refer to the last character in its iteration
|
||||
* range, and returns that character.
|
||||
* @draft
|
||||
*/
|
||||
virtual UChar last(void);
|
||||
|
||||
/**
|
||||
* Sets the iterator to refer to the "position"-th character in the
|
||||
* UnicodeString the iterator refers to, and returns that character.
|
||||
* If the index is outside the iterator's iteration range, the
|
||||
* behavior of the iterator is undefined.
|
||||
* @draft
|
||||
*/
|
||||
virtual UChar setIndex(UTextOffset pos);
|
||||
|
||||
/**
|
||||
* Returns the character the iterator currently refers to.
|
||||
* @draft
|
||||
*/
|
||||
virtual UChar current(void) const;
|
||||
|
||||
/**
|
||||
* Advances to the next character in the iteration range (toward
|
||||
* last()), and returns that character. If there are no more
|
||||
* characters to return, returns DONE.
|
||||
* @draft
|
||||
*/
|
||||
virtual UChar next(void);
|
||||
|
||||
/**
|
||||
* Advances to the previous character in the iteration rance (toward
|
||||
* first()), and returns that character. If there are no more
|
||||
* characters to return, returns DONE.
|
||||
* @draft
|
||||
*/
|
||||
virtual UChar previous(void);
|
||||
|
||||
/**
|
||||
* Returns the numeric index of the first character in this
|
||||
* iterator's iteration range.
|
||||
* @stable
|
||||
*/
|
||||
virtual UTextOffset startIndex(void) const;
|
||||
|
||||
/**
|
||||
* Returns the numeric index of the character immediately BEYOND the
|
||||
* last character in this iterator's iteration range.
|
||||
* @stable
|
||||
*/
|
||||
virtual UTextOffset endIndex(void) const;
|
||||
|
||||
/**
|
||||
* Returns the numeric index in the underlying UnicodeString of the
|
||||
* character the iterator currently refers to (i.e., the character
|
||||
* returned by current()).
|
||||
* @stable
|
||||
*/
|
||||
virtual UTextOffset getIndex(void) const;
|
||||
|
||||
/**
|
||||
* Sets the iterator to iterate over the provided string.
|
||||
* @draft
|
||||
*/
|
||||
virtual void setText(const UnicodeString& newText);
|
||||
|
||||
void setText(const UnicodeString& newText);
|
||||
|
||||
/**
|
||||
* Copies the UnicodeString under iteration into the UnicodeString
|
||||
* referred to by "result". Even if this iterator iterates across
|
||||
|
@ -203,19 +136,13 @@ public:
|
|||
static UClassID getStaticClassID(void)
|
||||
{ return (UClassID)(&fgClassID); }
|
||||
|
||||
private:
|
||||
protected:
|
||||
StringCharacterIterator();
|
||||
void setText(const UChar* newText, int32_t newTextLength);
|
||||
|
||||
UnicodeString text;
|
||||
UTextOffset pos;
|
||||
UTextOffset begin;
|
||||
UTextOffset end;
|
||||
|
||||
static UClassID fgClassID;
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -14,21 +14,53 @@
|
|||
|
||||
/**
|
||||
* A concrete subclass of CharacterIterator that iterates over the
|
||||
* characters in a UnicodeString. It's possible not only to create an
|
||||
* iterator that iterates over an entire UnicodeString, but also to
|
||||
* create only that iterates over only a subrange of a UnicodeString
|
||||
* (iterators over different subranges of the same UnicodeString don't
|
||||
* compare equal). */
|
||||
* characters (code units or code points) in a UChar array.
|
||||
* It's possible not only to create an
|
||||
* iterator that iterates over an entire UChar array, but also to
|
||||
* create one that iterates over only a subrange of a UChar array
|
||||
* (iterators over different subranges of the same UChar array don't
|
||||
* compare equal).
|
||||
*/
|
||||
class U_COMMON_API UCharCharacterIterator : public CharacterIterator {
|
||||
public:
|
||||
/**
|
||||
* Create an iterator over the UnicodeString referred to by "text".
|
||||
* The iteration range is the whole string, and the starting
|
||||
* position is 0.
|
||||
* Create an iterator over the UChar array referred to by "text".
|
||||
* The iteration range is 0 to <code>len-1</code>.
|
||||
* text is only aliased, not adopted (the
|
||||
* destructor will not delete it).
|
||||
* @stable
|
||||
*/
|
||||
UCharCharacterIterator(const UChar* text, int32_t len);
|
||||
|
||||
/**
|
||||
* Create an iterator over the UChar array referred to by "text".
|
||||
* The iteration range is 0 to <code>len-1</code>.
|
||||
* text is only aliased, not adopted (the
|
||||
* destructor will not delete it).
|
||||
* The starting
|
||||
* position is specified by "pos". If "pos" is outside the valid
|
||||
* iteration range, the behavior of this object is undefined.
|
||||
* @stable
|
||||
*/
|
||||
UCharCharacterIterator(const UChar* text, int32_t len,
|
||||
UTextOffset pos);
|
||||
|
||||
/**
|
||||
* Create an iterator over the UChar array referred to by "text".
|
||||
* The iteration range is 0 to <code>end-1</code>.
|
||||
* text is only aliased, not adopted (the
|
||||
* destructor will not delete it).
|
||||
* The starting
|
||||
* position is specified by "pos". If begin and end do not
|
||||
* form a valid iteration range or "pos" is outside the valid
|
||||
* iteration range, the behavior of this object is undefined.
|
||||
* @stable
|
||||
*/
|
||||
UCharCharacterIterator(const UChar* text, int32_t len,
|
||||
UTextOffset begin,
|
||||
UTextOffset end,
|
||||
UTextOffset pos);
|
||||
|
||||
/**
|
||||
* Copy constructor. The new iterator iterates over the same range
|
||||
* of the same string as "that", and its initial position is the
|
||||
|
@ -66,7 +98,7 @@ public:
|
|||
virtual int32_t hashCode(void) const;
|
||||
|
||||
/**
|
||||
* Returns a new StringCharacterIterator referring to the same
|
||||
* Returns a new UCharCharacterIterator referring to the same
|
||||
* character in the same range of the same string as this one. The
|
||||
* caller must delete the new iterator.
|
||||
* @stable
|
||||
|
@ -74,22 +106,40 @@ public:
|
|||
virtual CharacterIterator* clone(void) const;
|
||||
|
||||
/**
|
||||
* Sets the iterator to refer to the first character in its
|
||||
* iteration range, and returns that character,
|
||||
* Sets the iterator to refer to the first code unit in its
|
||||
* iteration range, and returns that code unit,
|
||||
* @draft
|
||||
*/
|
||||
virtual UChar first(void);
|
||||
|
||||
/**
|
||||
* Sets the iterator to refer to the last character in its iteration
|
||||
* range, and returns that character.
|
||||
* Sets the iterator to refer to the first code point in its
|
||||
* iteration range, and returns that code point,
|
||||
* @draft
|
||||
*/
|
||||
virtual UChar32 first32(void);
|
||||
|
||||
virtual UTextOffset setToStart();
|
||||
|
||||
/**
|
||||
* Sets the iterator to refer to the last code unit in its iteration
|
||||
* range, and returns that code unit.
|
||||
* @draft
|
||||
*/
|
||||
virtual UChar last(void);
|
||||
|
||||
/**
|
||||
* Sets the iterator to refer to the "position"-th character in the
|
||||
* UnicodeString the iterator refers to, and returns that character.
|
||||
* Sets the iterator to refer to the last code point in its iteration
|
||||
* range, and returns that code point.
|
||||
* @draft
|
||||
*/
|
||||
virtual UChar32 last32(void);
|
||||
|
||||
virtual UTextOffset setToEnd();
|
||||
|
||||
/**
|
||||
* Sets the iterator to refer to the "position"-th code unit in the
|
||||
* UChar array the iterator refers to, and returns that code unit.
|
||||
* If the index is outside the iterator's iteration range, the
|
||||
* behavior of the iterator is undefined.
|
||||
* @draft
|
||||
|
@ -97,44 +147,83 @@ public:
|
|||
virtual UChar setIndex(UTextOffset pos);
|
||||
|
||||
/**
|
||||
* Returns the character the iterator currently refers to.
|
||||
* Sets the iterator to refer to the "position"-th code point in the
|
||||
* UChar array the iterator refers to, and returns that code point.
|
||||
* If the index is outside the iterator's iteration range, the
|
||||
* behavior of the iterator is undefined.
|
||||
* @draft
|
||||
*/
|
||||
virtual UChar32 setIndex32(UTextOffset pos);
|
||||
|
||||
/**
|
||||
* Returns the code unit the iterator currently refers to.
|
||||
* @draft
|
||||
*/
|
||||
virtual UChar current(void) const;
|
||||
|
||||
/**
|
||||
* Advances to the next character in the iteration range (toward
|
||||
* last()), and returns that character. If there are no more
|
||||
* characters to return, returns DONE.
|
||||
* Returns the code point the iterator currently refers to.
|
||||
* @draft
|
||||
*/
|
||||
virtual UChar32 current32(void) const;
|
||||
|
||||
/**
|
||||
* Advances to the next code unit in the iteration range (toward
|
||||
* last()), and returns that code unit. If there are no more
|
||||
* code units to return, returns DONE.
|
||||
* @draft
|
||||
*/
|
||||
virtual UChar next(void);
|
||||
|
||||
virtual UChar nextPostInc(void);
|
||||
|
||||
/**
|
||||
* Advances to the previous character in the iteration rance (toward
|
||||
* first()), and returns that character. If there are no more
|
||||
* characters to return, returns DONE.
|
||||
* Advances to the next code point in the iteration range (toward
|
||||
* last()), and returns that code point. If there are no more
|
||||
* code points to return, returns DONE.
|
||||
* @draft
|
||||
*/
|
||||
virtual UChar32 next32(void);
|
||||
|
||||
virtual UChar32 next32PostInc(void);
|
||||
|
||||
virtual bool_t hasNext();
|
||||
|
||||
/**
|
||||
* Advances to the previous code unit in the iteration rance (toward
|
||||
* first()), and returns that code unit. If there are no more
|
||||
* code units to return, returns DONE.
|
||||
* @draft
|
||||
*/
|
||||
virtual UChar previous(void);
|
||||
|
||||
/**
|
||||
* Returns the numeric index of the first character in this
|
||||
* Advances to the previous code point in the iteration rance (toward
|
||||
* first()), and returns that code point. If there are no more
|
||||
* code points to return, returns DONE.
|
||||
* @draft
|
||||
*/
|
||||
virtual UChar32 previous32(void);
|
||||
|
||||
virtual bool_t hasPrevious();
|
||||
|
||||
/**
|
||||
* Returns the numeric index of the first code unit in this
|
||||
* iterator's iteration range.
|
||||
* @stable
|
||||
*/
|
||||
virtual UTextOffset startIndex(void) const;
|
||||
|
||||
/**
|
||||
* Returns the numeric index of the character immediately BEYOND the
|
||||
* last character in this iterator's iteration range.
|
||||
* Returns the numeric index of the code unit immediately BEYOND the
|
||||
* last code unit in this iterator's iteration range.
|
||||
* @stable
|
||||
*/
|
||||
virtual UTextOffset endIndex(void) const;
|
||||
|
||||
/**
|
||||
* Returns the numeric index in the underlying UnicodeString of the
|
||||
* character the iterator currently refers to (i.e., the character
|
||||
* Returns the numeric index in the underlying UChar array of the
|
||||
* code unit the iterator currently refers to (i.e., the code unit
|
||||
* returned by current()).
|
||||
* @stable
|
||||
*/
|
||||
|
@ -144,11 +233,10 @@ public:
|
|||
* Sets the iterator to iterate over a new range of text
|
||||
* @draft
|
||||
*/
|
||||
virtual void setText(const UChar* newText,
|
||||
int32_t newTextLength);
|
||||
void setText(const UChar* newText, int32_t newTextLength);
|
||||
|
||||
/**
|
||||
* Copies the UnicodeString under iteration into the UnicodeString
|
||||
* Copies the UChar array under iteration into the UnicodeString
|
||||
* referred to by "result". Even if this iterator iterates across
|
||||
* only a part of this string, the whole string is copied. @param
|
||||
* result Receives a copy of the text under iteration.
|
||||
|
@ -170,10 +258,11 @@ public:
|
|||
static UClassID getStaticClassID(void)
|
||||
{ return (UClassID)(&fgClassID); }
|
||||
|
||||
private:
|
||||
protected:
|
||||
UCharCharacterIterator();
|
||||
|
||||
const UChar* text;
|
||||
int32_t textLength; // need this for correct getText() and hashCode()
|
||||
UTextOffset pos;
|
||||
UTextOffset begin;
|
||||
UTextOffset end;
|
||||
|
@ -182,6 +271,3 @@ private:
|
|||
};
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue