ICU-176 UTF-16 support with CharacterIterator; new functions for more efficient iteration

X-SVN-Rev: 1117
This commit is contained in:
Markus Scherer 2000-04-12 19:36:30 +00:00
parent 2b2af0bbc5
commit 7c2d19d828
6 changed files with 553 additions and 372 deletions

View file

@ -8,7 +8,5 @@
#include "unicode/chariter.h"
const UChar CharacterIterator::DONE = 0xffff;
CharacterIterator::~CharacterIterator()
{}

View file

@ -20,188 +20,94 @@
UClassID StringCharacterIterator::fgClassID = 0;
StringCharacterIterator::StringCharacterIterator()
: CharacterIterator(),
text(),
pos(0),
begin(0),
end(0)
: UCharCharacterIterator(),
text()
{
// NEVER DEFAULT CONSTRUCT!
}
StringCharacterIterator::StringCharacterIterator(const UnicodeString& text)
: CharacterIterator(),
text(text),
pos(0),
begin(0),
end(text.length())
{}
StringCharacterIterator::StringCharacterIterator(const UnicodeString& text,
UTextOffset pos)
: CharacterIterator(),
text(text),
pos(pos),
begin(0),
end(text.length())
: UCharCharacterIterator(text.fArray, text.length()),
text(text)
{
// the Java code checks the parameters and throws exceptions we've
// decided to punt on this for the time being because changing this
// constructor to accept an error code is an API change with
// significant impact
// we had set the input parameter's array, now we need to set our copy's array
UCharCharacterIterator::text = this->text.fArray;
}
StringCharacterIterator::StringCharacterIterator(const UnicodeString& text,
UTextOffset begin,
UTextOffset end,
UTextOffset pos)
: CharacterIterator(),
text(text),
pos(pos),
begin(begin),
end(end)
StringCharacterIterator::StringCharacterIterator(const UnicodeString& text,
UTextOffset pos)
: UCharCharacterIterator(text.fArray, text.length(), pos),
text(text)
{
// the Java code checks the parameters and throws exceptions we've
// decided to punt on this for the time being because changing this
// constructor to accept an error code is an API change with
// significant impact
// we had set the input parameter's array, now we need to set our copy's array
UCharCharacterIterator::text = this->text.fArray;
}
StringCharacterIterator::StringCharacterIterator(const UnicodeString& text,
UTextOffset begin,
UTextOffset end,
UTextOffset pos)
: UCharCharacterIterator(text.fArray, text.length(), begin, end, pos),
text(text)
{
// we had set the input parameter's array, now we need to set our copy's array
UCharCharacterIterator::text = this->text.fArray;
}
StringCharacterIterator::StringCharacterIterator(const StringCharacterIterator& that)
: CharacterIterator(that),
text(that.text),
pos(that.pos),
begin(that.begin),
end(that.end)
: UCharCharacterIterator(that),
text(that.text)
{
// we had set the input parameter's array, now we need to set our copy's array
UCharCharacterIterator::text = this->text.fArray;
}
StringCharacterIterator::~StringCharacterIterator()
{}
StringCharacterIterator::~StringCharacterIterator() {
}
StringCharacterIterator&
StringCharacterIterator::operator=(const StringCharacterIterator& that)
{
text = that.text;
pos = that.pos;
begin = that.begin;
end = that.end;
return *this;
StringCharacterIterator::operator=(const StringCharacterIterator& that) {
UCharCharacterIterator::operator=(that);
text = that.text;
// we had set the input parameter's array, now we need to set our copy's array
UCharCharacterIterator::text = this->text.fArray;
return *this;
}
bool_t
StringCharacterIterator::operator==(const CharacterIterator& that) const
{
if (this == &that)
return TRUE;
if (getDynamicClassID() != that.getDynamicClassID())
return FALSE;
StringCharacterIterator::operator==(const CharacterIterator& that) const {
if (this == &that) {
return TRUE;
}
StringCharacterIterator& realThat = (StringCharacterIterator&)that;
// do not call UCharCharacterIterator::operator==()
// because that checks for array pointer equality
// while we compare UnicodeString objects
return text == realThat.text
&& pos == realThat.pos
&& begin == realThat.begin
&& end == realThat.end;
}
if (getDynamicClassID() != that.getDynamicClassID()) {
return FALSE;
}
int32_t
StringCharacterIterator::hashCode() const
{
return text.hashCode() ^ pos ^ begin ^ end;
StringCharacterIterator& realThat = (StringCharacterIterator&)that;
return text == realThat.text
&& pos == realThat.pos
&& begin == realThat.begin
&& end == realThat.end;
}
CharacterIterator*
StringCharacterIterator::clone() const
{
return new StringCharacterIterator(*this);
}
UChar
StringCharacterIterator::first()
{
pos = begin;
return text.charAt(pos);
}
UChar
StringCharacterIterator::last()
{
pos = end - 1;
return text.charAt(pos);
}
UChar
StringCharacterIterator::setIndex(UTextOffset pos)
{
// should check "pos" here and return an error code, but changing
// this function would have significant impact across TIFC, so we
// decided to hold off
this->pos = pos;
return text.charAt(pos);
}
UChar
StringCharacterIterator::current() const
{
if (pos >= begin && pos < end)
return text.charAt(pos);
else
return CharacterIterator::DONE;
}
UChar
StringCharacterIterator::next()
{
if(pos < end - 1) {
return text.charAt(++pos);
}
else {
pos = end;
return CharacterIterator::DONE;
}
}
UChar
StringCharacterIterator::previous()
{
if (pos > begin)
return text.charAt(--pos);
else
return DONE;
}
UTextOffset
StringCharacterIterator::startIndex() const
{
return begin;
}
UTextOffset
StringCharacterIterator::endIndex() const
{
return end;
}
UTextOffset
StringCharacterIterator::getIndex() const
{
return pos;
StringCharacterIterator::clone() const {
return new StringCharacterIterator(*this);
}
void
StringCharacterIterator::setText(const UnicodeString& newText)
{
StringCharacterIterator::setText(const UnicodeString& newText) {
text = newText;
begin = 0;
end = newText.length();
pos = begin;
UCharCharacterIterator::setText(text.fArray, text.length());
}
void
StringCharacterIterator::getText(UnicodeString& result)
{
result = text;
StringCharacterIterator::getText(UnicodeString& result) {
result = text;
}

View file

@ -6,20 +6,89 @@
*/
#include "unicode/uchriter.h"
#include "uhash.h"
UCharCharacterIterator::UCharCharacterIterator()
: CharacterIterator(),
text(0),
textLength(0),
pos(0),
begin(0),
end(0)
{
// never default construct!
}
UCharCharacterIterator::UCharCharacterIterator(const UChar* text,
int32_t textLength)
int32_t textLength)
: CharacterIterator(),
text(text),
textLength(textLength),
pos(0),
begin(0),
end(textLength)
{
if(text == 0 || textLength < 0) {
textLength = end = 0;
}
}
UCharCharacterIterator::UCharCharacterIterator(const UChar* text,
int32_t textLength,
UTextOffset pos)
: CharacterIterator(),
text(text),
textLength(textLength),
pos(pos),
begin(0),
end(textLength)
{
if(text == 0 || textLength < 0) {
textLength = end = 0;
}
if(pos < 0) {
pos = 0;
} else if(pos > end) {
pos = end;
}
}
UCharCharacterIterator::UCharCharacterIterator(const UChar* text,
int32_t textLength,
UTextOffset begin,
UTextOffset end,
UTextOffset pos)
: CharacterIterator(),
text(text),
textLength(textLength),
pos(pos),
begin(begin),
end(end)
{
if(text == 0 || textLength < 0) {
textLength = 0;
}
if(begin < 0) {
begin = 0;
} else if(begin > textLength) {
begin = textLength;
}
if(end < begin) {
end = begin;
} else if(end > textLength) {
end = textLength;
}
if(pos < begin) {
pos = begin;
} else if(pos > end) {
pos = end;
}
}
UCharCharacterIterator::UCharCharacterIterator(const UCharCharacterIterator& that)
: CharacterIterator(that),
text(that.text),
textLength(that.textLength),
pos(that.pos),
begin(that.begin),
end(that.end)
@ -27,134 +96,261 @@ UCharCharacterIterator::UCharCharacterIterator(const UCharCharacterIterator& tha
}
UCharCharacterIterator&
UCharCharacterIterator::operator=(const UCharCharacterIterator& that)
{
UCharCharacterIterator::operator=(const UCharCharacterIterator& that) {
text = that.text;
textLength = that.textLength;
pos = that.pos;
begin = that.begin;
end = that.end;
return *this;
}
UCharCharacterIterator::~UCharCharacterIterator()
{}
UCharCharacterIterator::~UCharCharacterIterator() {
}
bool_t
UCharCharacterIterator::operator==(const CharacterIterator& that) const
{
if (this == &that)
UCharCharacterIterator::operator==(const CharacterIterator& that) const {
if (this == &that) {
return TRUE;
}
if (getDynamicClassID() != that.getDynamicClassID())
if (getDynamicClassID() != that.getDynamicClassID()) {
return FALSE;
}
UCharCharacterIterator& realThat = (UCharCharacterIterator&)that;
return text == realThat.text
&& textLength == realThat.textLength
&& pos == realThat.pos
&& begin == realThat.begin
&& end == realThat.end;
}
int32_t
UCharCharacterIterator::hashCode() const
{
return pos ^ begin ^ end;
UCharCharacterIterator::hashCode() const {
return uhash_hashUCharsN(text, textLength) ^ pos ^ begin ^ end;
}
CharacterIterator*
UCharCharacterIterator::clone() const
{
UCharCharacterIterator::clone() const {
return new UCharCharacterIterator(*this);
}
UChar
UCharCharacterIterator::first()
{
UCharCharacterIterator::first() {
pos = begin;
return text[pos];
}
UChar
UCharCharacterIterator::last()
{
pos = end - 1;
return text[pos];
}
UChar
UCharCharacterIterator::setIndex(UTextOffset pos)
{
// should check "pos" here and return an error code, but changing this
// function would have significant impact across TIFC, so we decided to hold off
this->pos = pos;
return text[pos];
}
UChar
UCharCharacterIterator::current() const
{
if (pos >= begin && pos < end)
if(pos < end) {
return text[pos];
else
return CharacterIterator::DONE;
}
UChar
UCharCharacterIterator::next()
{
if (pos < end - 1)
{
pos += 1;
return text[pos];
}
else
{
pos = end;
return CharacterIterator::DONE;
}
}
UChar
UCharCharacterIterator::previous()
{
if (pos > begin)
return text[--pos];
else
} else {
return DONE;
}
}
UTextOffset
UCharCharacterIterator::startIndex() const
{
UCharCharacterIterator::setToStart() {
return pos = begin;
}
UChar
UCharCharacterIterator::last() {
pos = end;
if(pos > begin) {
return text[--pos];
} else {
return DONE;
}
}
UTextOffset
UCharCharacterIterator::setToEnd() {
return pos = end;
}
UChar
UCharCharacterIterator::setIndex(UTextOffset pos) {
if(pos < begin) {
pos = begin;
} else if(pos > end) {
pos = end;
}
this->pos = pos;
if(pos < end) {
return text[pos];
} else {
return DONE;
}
}
UChar
UCharCharacterIterator::current() const {
if (pos >= begin && pos < end) {
return text[pos];
} else {
return DONE;
}
}
UChar
UCharCharacterIterator::next() {
if (pos + 1 < end) {
return text[++pos];
} else {
/* make current() return DONE */
pos = end;
return DONE;
}
}
UChar
UCharCharacterIterator::nextPostInc() {
if (pos < end) {
return text[pos++];
} else {
return DONE;
}
}
bool_t
UCharCharacterIterator::hasNext() {
return pos < end ? TRUE : FALSE;
}
UChar
UCharCharacterIterator::previous() {
if (pos > begin) {
return text[--pos];
} else {
return DONE;
}
}
bool_t
UCharCharacterIterator::hasPrevious() {
return pos > begin ? TRUE : FALSE;
}
UChar32
UCharCharacterIterator::first32() {
pos = begin;
if(pos < end) {
UTextOffset i = pos;
UChar32 c;
UTF_NEXT_CHAR(text, i, end, c);
return c;
} else {
return DONE;
}
}
UChar32
UCharCharacterIterator::last32() {
pos = end;
if(pos > begin) {
UChar32 c;
UTF_PREV_CHAR(text, begin, pos, c);
return c;
} else {
return DONE;
}
}
UChar32
UCharCharacterIterator::setIndex32(UTextOffset pos) {
if(pos < begin) {
pos = begin;
} else if(pos > end) {
pos = end;
}
if(pos < end) {
UTF_SET_CHAR_START(text, begin, pos);
UTextOffset i = this->pos = pos;
UChar32 c;
UTF_NEXT_CHAR(text, i, end, c);
return c;
} else {
this->pos = pos;
return DONE;
}
}
UChar32
UCharCharacterIterator::current32() const {
if (pos >= begin && pos < end) {
UChar32 c;
UTF_GET_CHAR(text, begin, pos, end, c);
return c;
} else {
return DONE;
}
}
UChar32
UCharCharacterIterator::next32() {
if (pos < end) {
UTF_FWD_1(text, pos, end);
if(pos < end) {
UTextOffset i = pos;
UChar32 c;
UTF_NEXT_CHAR(text, i, end, c);
return c;
}
}
/* make current() return DONE */
pos = end;
return DONE;
}
UChar32
UCharCharacterIterator::next32PostInc() {
if (pos < end) {
UChar32 c;
UTF_NEXT_CHAR(text, pos, end, c);
return c;
} else {
return DONE;
}
}
UChar32
UCharCharacterIterator::previous32() {
if (pos > begin) {
UChar32 c;
UTF_PREV_CHAR(text, begin, pos, c);
return c;
} else {
return DONE;
}
}
UTextOffset
UCharCharacterIterator::startIndex() const {
return begin;
}
UTextOffset
UCharCharacterIterator::endIndex() const
{
UCharCharacterIterator::endIndex() const {
return end;
}
UTextOffset
UCharCharacterIterator::getIndex() const
{
UCharCharacterIterator::getIndex() const {
return pos;
}
void UCharCharacterIterator::setText(const UChar* newText,
int32_t newTextLength)
{
int32_t newTextLength) {
text = newText;
begin = 0;
end = newTextLength;
pos = begin;
if(newText == 0 || newTextLength < 0) {
newTextLength = 0;
}
end = textLength = newTextLength;
pos = begin = 0;
}
void
UCharCharacterIterator::getText(UnicodeString& result)
{
result = UnicodeString(text, end);
UCharCharacterIterator::getText(UnicodeString& result) {
result = UnicodeString(text, textLength);
}
char UCharCharacterIterator::fgClassID = 0;

View file

@ -88,7 +88,7 @@ public:
/**
* Value returned by most of CharacterIterator's functions
* when the iterator has reached the limits of its iteration. */
static const UChar DONE;
enum { DONE = 0xffff };
/**
* Destructor.
@ -128,49 +128,120 @@ public:
virtual int32_t hashCode(void) const = 0;
/**
* Sets the iterator to refer to the first character in its
* iteration range, and returns that character,
* Sets the iterator to refer to the first code unit in its
* iteration range, and returns that code unit,
* @draft
*/
virtual UChar first(void) = 0;
/**
* Sets the iterator to refer to the last character in its
* iteration range, and returns that character.
* Sets the iterator to refer to the first code point in its
* iteration range, and returns that code unit,
* @draft
*/
virtual UChar32 first32(void) = 0;
virtual UTextOffset setToStart() = 0;
/**
* Sets the iterator to refer to the last code unit in its
* iteration range, and returns that code unit.
* @draft
*/
virtual UChar last(void) = 0;
/**
* Sets the iterator to refer to the "position"-th character
* Sets the iterator to refer to the last code point in its
* iteration range, and returns that code unit.
* @draft
*/
virtual UChar32 last32(void) = 0;
virtual UTextOffset setToEnd() = 0;
/**
* Sets the iterator to refer to the "position"-th code unit
* in the text-storage object the iterator refers to, and
* returns that character.
* returns that code unit.
* @draft
*/
virtual UChar setIndex(UTextOffset position) = 0;
/**
* Returns the character the iterator currently refers to.
* Sets the iterator to refer to the beginning of the code point
* that contains the "position"-th code unit
* in the text-storage object the iterator refers to, and
* returns that code point.
* @draft
*/
virtual UChar32 setIndex32(UTextOffset position) = 0;
/**
* Returns the code unit the iterator currently refers to.
* @draft
*/
virtual UChar current(void) const = 0;
/**
* Advances to the next character in the iteration range
* (toward last()), and returns that character. If there are
* no more characters to return, returns DONE.
* Returns the code point the iterator currently refers to.
* @draft
*/
virtual UChar32 current32(void) const = 0;
/**
* Advances to the next code unit in the iteration range
* (toward last()), and returns that code unit. If there are
* no more code units to return, returns DONE.
* @draft
*/
virtual UChar next(void) = 0;
/**
* Advances to the previous character in the iteration rance
* (toward first()), and returns that character. If there are
* no more characters to return, returns DONE.
* Gets the current code unit for returning and advances to the next code unit
* in the iteration range
* (toward last()). If there are
* no more code units to return, returns DONE.
* @draft
*/
virtual UChar nextPostInc(void) = 0;
/**
* Advances to the next code point in the iteration range
* (toward last()), and returns that code point. If there are
* no more code points to return, returns DONE.
* @draft
*/
virtual UChar32 next32(void) = 0;
/**
* Gets the current code point for returning and advances to the next code point
* in the iteration range
* (toward last()). If there are
* no more code points to return, returns DONE.
* @draft
*/
virtual UChar32 next32PostInc(void) = 0;
virtual bool_t hasNext() = 0;
/**
* Advances to the previous code unit in the iteration rance
* (toward first()), and returns that code unit. If there are
* no more code units to return, returns DONE.
* @draft
*/
virtual UChar previous(void) = 0;
/**
* Advances to the previous code point in the iteration rance
* (toward first()), and returns that code point. If there are
* no more code points to return, returns DONE.
* @draft
*/
virtual UChar32 previous32(void) = 0;
virtual bool_t hasPrevious() = 0;
/**
* Returns the numeric index in the underlying text-storage
* object of the character returned by first(). Since it's
@ -221,6 +292,3 @@ protected:
};
#endif

View file

@ -20,18 +20,22 @@
#include "unicode/utypes.h"
#include "unicode/chariter.h"
#include "unicode/uchriter.h"
/**
* A concrete subclass of CharacterIterator that iterates over the
* characters in a UnicodeString. It's possible not only to create an
* characters (code units or code points) in a UnicodeString.
* It's possible not only to create an
* iterator that iterates over an entire UnicodeString, but also to
* create only that iterates over only a subrange of a UnicodeString
* create one that iterates over only a subrange of a UnicodeString
* (iterators over different subranges of the same UnicodeString don't
* compare equal). */
class U_COMMON_API StringCharacterIterator : public CharacterIterator {
* compare equal).
*/
class U_COMMON_API StringCharacterIterator : public UCharCharacterIterator {
public:
/**
* Create an iterator over the UnicodeString referred to by "text".
* The UnicodeString object is copied.
* The iteration range is the whole string, and the starting position is 0.
* @stable
*/
@ -49,8 +53,9 @@ public:
/**
* Create an iterator over the UnicodeString referred to by "text".
* The iteration range begins with the character specified by
* "begin" and ends with the character BEFORE the character specfied
* The UnicodeString object is copied.
* The iteration range begins with the code unit specified by
* "begin" and ends with the code unit BEFORE the code unit specfied
* by "end". The starting position is specified by "pos". If
* "begin" and "end" don't form a valid range on "text" (i.e., begin
* >= end or either is negative or greater than text.size()), or
@ -67,6 +72,7 @@ public:
* Copy constructor. The new iterator iterates over the same range
* of the same string as "that", and its initial position is the
* same as "that"'s current position.
* The UnicodeString object in "that" is copied.
* @stable
*/
StringCharacterIterator(const StringCharacterIterator& that);
@ -78,7 +84,7 @@ public:
virtual ~StringCharacterIterator();
/**
* Assignment operator. *this is altered to iterate over the sane
* Assignment operator. *this is altered to iterate over the same
* range of the same string as "that", and refers to the same
* character within that string as "that" does.
* @stable
@ -93,12 +99,6 @@ public:
*/
virtual bool_t operator==(const CharacterIterator& that) const;
/**
* Generates a hash code for this iterator.
* @stable
*/
virtual int32_t hashCode(void) const;
/**
* Returns a new StringCharacterIterator referring to the same
* character in the same range of the same string as this one. The
@ -107,79 +107,12 @@ public:
*/
virtual CharacterIterator* clone(void) const;
/**
* Sets the iterator to refer to the first character in its
* iteration range, and returns that character,
* @draft
*/
virtual UChar first(void);
/**
* Sets the iterator to refer to the last character in its iteration
* range, and returns that character.
* @draft
*/
virtual UChar last(void);
/**
* Sets the iterator to refer to the "position"-th character in the
* UnicodeString the iterator refers to, and returns that character.
* If the index is outside the iterator's iteration range, the
* behavior of the iterator is undefined.
* @draft
*/
virtual UChar setIndex(UTextOffset pos);
/**
* Returns the character the iterator currently refers to.
* @draft
*/
virtual UChar current(void) const;
/**
* Advances to the next character in the iteration range (toward
* last()), and returns that character. If there are no more
* characters to return, returns DONE.
* @draft
*/
virtual UChar next(void);
/**
* Advances to the previous character in the iteration rance (toward
* first()), and returns that character. If there are no more
* characters to return, returns DONE.
* @draft
*/
virtual UChar previous(void);
/**
* Returns the numeric index of the first character in this
* iterator's iteration range.
* @stable
*/
virtual UTextOffset startIndex(void) const;
/**
* Returns the numeric index of the character immediately BEYOND the
* last character in this iterator's iteration range.
* @stable
*/
virtual UTextOffset endIndex(void) const;
/**
* Returns the numeric index in the underlying UnicodeString of the
* character the iterator currently refers to (i.e., the character
* returned by current()).
* @stable
*/
virtual UTextOffset getIndex(void) const;
/**
* Sets the iterator to iterate over the provided string.
* @draft
*/
virtual void setText(const UnicodeString& newText);
void setText(const UnicodeString& newText);
/**
* Copies the UnicodeString under iteration into the UnicodeString
* referred to by "result". Even if this iterator iterates across
@ -203,19 +136,13 @@ public:
static UClassID getStaticClassID(void)
{ return (UClassID)(&fgClassID); }
private:
protected:
StringCharacterIterator();
void setText(const UChar* newText, int32_t newTextLength);
UnicodeString text;
UTextOffset pos;
UTextOffset begin;
UTextOffset end;
static UClassID fgClassID;
};
#endif

View file

@ -14,21 +14,53 @@
/**
* A concrete subclass of CharacterIterator that iterates over the
* characters in a UnicodeString. It's possible not only to create an
* iterator that iterates over an entire UnicodeString, but also to
* create only that iterates over only a subrange of a UnicodeString
* (iterators over different subranges of the same UnicodeString don't
* compare equal). */
* characters (code units or code points) in a UChar array.
* It's possible not only to create an
* iterator that iterates over an entire UChar array, but also to
* create one that iterates over only a subrange of a UChar array
* (iterators over different subranges of the same UChar array don't
* compare equal).
*/
class U_COMMON_API UCharCharacterIterator : public CharacterIterator {
public:
/**
* Create an iterator over the UnicodeString referred to by "text".
* The iteration range is the whole string, and the starting
* position is 0.
* Create an iterator over the UChar array referred to by "text".
* The iteration range is 0 to <code>len-1</code>.
* text is only aliased, not adopted (the
* destructor will not delete it).
* @stable
*/
UCharCharacterIterator(const UChar* text, int32_t len);
/**
* Create an iterator over the UChar array referred to by "text".
* The iteration range is 0 to <code>len-1</code>.
* text is only aliased, not adopted (the
* destructor will not delete it).
* The starting
* position is specified by "pos". If "pos" is outside the valid
* iteration range, the behavior of this object is undefined.
* @stable
*/
UCharCharacterIterator(const UChar* text, int32_t len,
UTextOffset pos);
/**
* Create an iterator over the UChar array referred to by "text".
* The iteration range is 0 to <code>end-1</code>.
* text is only aliased, not adopted (the
* destructor will not delete it).
* The starting
* position is specified by "pos". If begin and end do not
* form a valid iteration range or "pos" is outside the valid
* iteration range, the behavior of this object is undefined.
* @stable
*/
UCharCharacterIterator(const UChar* text, int32_t len,
UTextOffset begin,
UTextOffset end,
UTextOffset pos);
/**
* Copy constructor. The new iterator iterates over the same range
* of the same string as "that", and its initial position is the
@ -66,7 +98,7 @@ public:
virtual int32_t hashCode(void) const;
/**
* Returns a new StringCharacterIterator referring to the same
* Returns a new UCharCharacterIterator referring to the same
* character in the same range of the same string as this one. The
* caller must delete the new iterator.
* @stable
@ -74,22 +106,40 @@ public:
virtual CharacterIterator* clone(void) const;
/**
* Sets the iterator to refer to the first character in its
* iteration range, and returns that character,
* Sets the iterator to refer to the first code unit in its
* iteration range, and returns that code unit,
* @draft
*/
virtual UChar first(void);
/**
* Sets the iterator to refer to the last character in its iteration
* range, and returns that character.
* Sets the iterator to refer to the first code point in its
* iteration range, and returns that code point,
* @draft
*/
virtual UChar32 first32(void);
virtual UTextOffset setToStart();
/**
* Sets the iterator to refer to the last code unit in its iteration
* range, and returns that code unit.
* @draft
*/
virtual UChar last(void);
/**
* Sets the iterator to refer to the "position"-th character in the
* UnicodeString the iterator refers to, and returns that character.
* Sets the iterator to refer to the last code point in its iteration
* range, and returns that code point.
* @draft
*/
virtual UChar32 last32(void);
virtual UTextOffset setToEnd();
/**
* Sets the iterator to refer to the "position"-th code unit in the
* UChar array the iterator refers to, and returns that code unit.
* If the index is outside the iterator's iteration range, the
* behavior of the iterator is undefined.
* @draft
@ -97,44 +147,83 @@ public:
virtual UChar setIndex(UTextOffset pos);
/**
* Returns the character the iterator currently refers to.
* Sets the iterator to refer to the "position"-th code point in the
* UChar array the iterator refers to, and returns that code point.
* If the index is outside the iterator's iteration range, the
* behavior of the iterator is undefined.
* @draft
*/
virtual UChar32 setIndex32(UTextOffset pos);
/**
* Returns the code unit the iterator currently refers to.
* @draft
*/
virtual UChar current(void) const;
/**
* Advances to the next character in the iteration range (toward
* last()), and returns that character. If there are no more
* characters to return, returns DONE.
* Returns the code point the iterator currently refers to.
* @draft
*/
virtual UChar32 current32(void) const;
/**
* Advances to the next code unit in the iteration range (toward
* last()), and returns that code unit. If there are no more
* code units to return, returns DONE.
* @draft
*/
virtual UChar next(void);
virtual UChar nextPostInc(void);
/**
* Advances to the previous character in the iteration rance (toward
* first()), and returns that character. If there are no more
* characters to return, returns DONE.
* Advances to the next code point in the iteration range (toward
* last()), and returns that code point. If there are no more
* code points to return, returns DONE.
* @draft
*/
virtual UChar32 next32(void);
virtual UChar32 next32PostInc(void);
virtual bool_t hasNext();
/**
* Advances to the previous code unit in the iteration rance (toward
* first()), and returns that code unit. If there are no more
* code units to return, returns DONE.
* @draft
*/
virtual UChar previous(void);
/**
* Returns the numeric index of the first character in this
* Advances to the previous code point in the iteration rance (toward
* first()), and returns that code point. If there are no more
* code points to return, returns DONE.
* @draft
*/
virtual UChar32 previous32(void);
virtual bool_t hasPrevious();
/**
* Returns the numeric index of the first code unit in this
* iterator's iteration range.
* @stable
*/
virtual UTextOffset startIndex(void) const;
/**
* Returns the numeric index of the character immediately BEYOND the
* last character in this iterator's iteration range.
* Returns the numeric index of the code unit immediately BEYOND the
* last code unit in this iterator's iteration range.
* @stable
*/
virtual UTextOffset endIndex(void) const;
/**
* Returns the numeric index in the underlying UnicodeString of the
* character the iterator currently refers to (i.e., the character
* Returns the numeric index in the underlying UChar array of the
* code unit the iterator currently refers to (i.e., the code unit
* returned by current()).
* @stable
*/
@ -144,11 +233,10 @@ public:
* Sets the iterator to iterate over a new range of text
* @draft
*/
virtual void setText(const UChar* newText,
int32_t newTextLength);
void setText(const UChar* newText, int32_t newTextLength);
/**
* Copies the UnicodeString under iteration into the UnicodeString
* Copies the UChar array under iteration into the UnicodeString
* referred to by "result". Even if this iterator iterates across
* only a part of this string, the whole string is copied. @param
* result Receives a copy of the text under iteration.
@ -170,10 +258,11 @@ public:
static UClassID getStaticClassID(void)
{ return (UClassID)(&fgClassID); }
private:
protected:
UCharCharacterIterator();
const UChar* text;
int32_t textLength; // need this for correct getText() and hashCode()
UTextOffset pos;
UTextOffset begin;
UTextOffset end;
@ -182,6 +271,3 @@ private:
};
#endif