ICU-2397 iterator improvements

X-SVN-Rev: 10894
This commit is contained in:
Markus Scherer 2003-01-22 21:59:26 +00:00
parent 2153f35323
commit 4dea6082dd
3 changed files with 92 additions and 50 deletions

View file

@ -172,11 +172,7 @@ stringIteratorPrevious(UCharIterator *iter) {
static uint32_t U_CALLCONV
stringIteratorGetState(const UCharIterator *iter) {
if(iter==NULL) {
return 0xffffffff; /* invalid */
} else {
return (uint32_t)iter->index;
}
return (uint32_t)iter->index;
}
static void U_CALLCONV
@ -432,11 +428,7 @@ characterIteratorPrevious(UCharIterator *iter) {
static uint32_t U_CALLCONV
characterIteratorGetState(const UCharIterator *iter) {
if(iter==NULL || iter->context==NULL) {
return 0xffffffff; /* invalid */
} else {
return ((CharacterIterator *)(iter->context))->getIndex();
}
return ((CharacterIterator *)(iter->context))->getIndex();
}
static void U_CALLCONV
@ -701,9 +693,22 @@ utf8IteratorMove(UCharIterator *iter, int32_t delta, UCharIteratorOrigin origin)
break;
case UITER_LIMIT:
case UITER_LENGTH:
pos=utf8IteratorGetIndex(iter, UITER_LENGTH)+delta;
havePos=TRUE;
/* even if the UTF-16 index was unknown, we know it now: iter->index>=0 here */
if(iter->length>=0) {
pos=iter->length+delta;
havePos=TRUE;
} else {
/* pin to the end, avoid counting the length */
iter->index=-1;
iter->start=iter->limit;
iter->reservedField=0;
if(delta>=0) {
return UITER_UNKNOWN_INDEX;
} else {
/* the current UTF-16 index is unknown, use only delta */
pos=0;
havePos=FALSE;
}
}
break;
default:
return -1; /* Error */
@ -743,7 +748,7 @@ utf8IteratorMove(UCharIterator *iter, int32_t delta, UCharIteratorOrigin origin)
} else {
/* move relative to unknown UTF-16 index */
if(delta==0) {
return UITER_MOVE_UNKNOWN_INDEX; /* nothing to do */
return UITER_UNKNOWN_INDEX; /* nothing to do */
} else if(-delta>=iter->start) {
/* moving backwards by more UChars than there are UTF-8 bytes, pin to 0 */
iter->index=iter->start=iter->reservedField=0;
@ -753,7 +758,7 @@ utf8IteratorMove(UCharIterator *iter, int32_t delta, UCharIteratorOrigin origin)
iter->index=iter->length; /* may or may not be <0 (unknown) */
iter->start=iter->limit;
iter->reservedField=0;
return iter->index>=0 ? iter->index : UITER_MOVE_UNKNOWN_INDEX;
return iter->index>=0 ? iter->index : UITER_UNKNOWN_INDEX;
}
}
@ -828,7 +833,7 @@ utf8IteratorMove(UCharIterator *iter, int32_t delta, UCharIteratorOrigin origin)
return iter->index=i; /* reached the beginning */
} else {
/* we still don't know the UTF-16 index */
return UITER_MOVE_UNKNOWN_INDEX;
return UITER_UNKNOWN_INDEX;
}
}
}
@ -940,15 +945,11 @@ utf8IteratorPrevious(UCharIterator *iter) {
static uint32_t U_CALLCONV
utf8IteratorGetState(const UCharIterator *iter) {
if(iter==NULL) {
return 1; /* invalid */
} else {
uint32_t state=(uint32_t)(iter->start<<1);
if(iter->reservedField!=0) {
state|=1;
}
return state;
uint32_t state=(uint32_t)(iter->start<<1);
if(iter->reservedField!=0) {
state|=1;
}
return state;
}
static void U_CALLCONV
@ -957,6 +958,8 @@ utf8IteratorSetState(UCharIterator *iter, uint32_t state, UErrorCode *pErrorCode
/* do nothing */
} else if(iter==NULL) {
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
} else if(state==utf8IteratorGetState(iter)) {
/* setting to the current state: no-op */
} else {
int32_t index=(int32_t)(state>>1); /* UTF-8 index */
state&=1; /* 1 if in surrogate pair, must be index>=4 */
@ -1086,7 +1089,7 @@ uiter_previous32(UCharIterator *iter) {
U_CAPI uint32_t U_EXPORT2
uiter_getState(const UCharIterator *iter) {
if(iter==NULL || iter->getState==NULL) {
return 0xffffffff;
return UITER_NO_STATE;
} else {
return iter->getState(iter);
}

View file

@ -55,18 +55,33 @@ enum {
/**
* Constant value that may be returned by UCharIteratorMove
* indicating that the final UTF-16 index is not known, but that the move succeeded.
* This can occur after a setState() when the current UTF-16 index is not known
* and a move relative to the current index is requested.
* This can occur when moving relative to limit or length, or
* when moving relative to the current index after a setState()
* when the current UTF-16 index is not known.
*
* It would be very inefficient to have to count from the beginning of the text
* just to get the current index after moving relative to it.
* just to get the current/limit/length index after moving relative to it.
* The actual index can be determined with getIndex(UITER_CURRENT)
* which will count the UChars if necessary.
*
* @draft ICU 2.6
*/
UITER_MOVE_UNKNOWN_INDEX=-2
UITER_UNKNOWN_INDEX=-2
};
/**
* Constant for UCharIterator getState() indicating an error or
* an unknown state.
* Returned by uiter_getState()/UCharIteratorGetState
* when an error occurs.
* Also, some UCharIterator implementations may not be able to return
* a valid state for each position. This will be clearly documented
* for each such iterator (none of the public ones here).
*
* @draft ICU 2.6
*/
#define UITER_NO_STATE ((uint32_t)0xffffffff)
/**
* Function type declaration for UCharIterator.getIndex().
*
@ -103,22 +118,23 @@ UCharIteratorGetIndex(UCharIterator *iter, UCharIteratorOrigin origin);
* because an iterator implementation may have to count the rest of the
* UChars if the native storage is not UTF-16.
*
* When moving relative to the current position after setState() was called,
* move() may return UITER_MOVE_UNKNOWN_INDEX (-2) to avoid an inefficient
* When moving relative to the limit or length, or
* relative to the current position after setState() was called,
* move() may return UITER_UNKNOWN_INDEX (-2) to avoid an inefficient
* determination of the actual UTF-16 index.
* The actual index can be determined with getIndex(UITER_CURRENT)
* which will count the UChars if necessary.
* See UITER_MOVE_UNKNOWN_INDEX for details.
* See UITER_UNKNOWN_INDEX for details.
*
* @param iter the UCharIterator structure ("this pointer")
* @param delta can be positive, zero, or negative
* @param origin move relative to the 0, start, limit, length, or current index
* @return the new index, or U_SENTINEL on an error condition,
* or UITER_MOVE_UNKNOWN_INDEX when the index is not known.
* or UITER_UNKNOWN_INDEX when the index is not known.
*
* @see UCharIteratorOrigin
* @see UCharIterator
* @see UITER_MOVE_UNKNOWN_INDEX
* @see UITER_UNKNOWN_INDEX
* @draft ICU 2.1
*/
typedef int32_t U_CALLCONV
@ -242,11 +258,16 @@ UCharIteratorReserved(UCharIterator *iter, int32_t something);
* the correct text contents and move relative to the current position
* without performance degradation.
*
* Some UCharIterator implementations may not be able to return
* a valid state for each position, in which case they return UITER_NO_STATE instead.
* This will be clearly documented for each such iterator (none of the public ones here).
*
* @param iter the UCharIterator structure ("this pointer")
* @return the state word
*
* @see UCharIterator
* @see UCharIteratorSetState
* @see UITER_NO_STATE
* @draft ICU 2.6
*/
typedef uint32_t U_CALLCONV
@ -495,13 +516,19 @@ uiter_previous32(UCharIterator *iter);
/**
* Get the "state" of the iterator in the form of a single 32-bit word.
* This is a convenience function that calls iter->getState(iter)
* if iter->getState is not NULL; if it is NULL, then 0xffffffff is returned.
* if iter->getState is not NULL;
* if it is NULL or any other error occurs, then UITER_NO_STATE is returned.
*
* Some UCharIterator implementations may not be able to return
* a valid state for each position, in which case they return UITER_NO_STATE instead.
* This will be clearly documented for each such iterator (none of the public ones here).
*
* @param iter the UCharIterator structure ("this pointer")
* @return the state word
*
* @see UCharIterator
* @see UCharIteratorGetState
* @see UITER_NO_STATE
* @draft ICU 2.6
*/
U_CAPI uint32_t U_EXPORT2
@ -596,7 +623,8 @@ uiter_setUTF16BE(UCharIterator *iter, const char *s, int32_t length);
* (from a 4-byte UTF-8 sequence for the corresponding supplementary code point)
*
* getState() cannot also encode the UTF-16 index in the state value.
* move() after setState() may return UITER_MOVE_UNKNOWN_INDEX.
* move(relative to limit or length), or
* move(relative to current) after setState(), may return UITER_UNKNOWN_INDEX.
*
* @param iter UCharIterator structure to be set for iteration
* @param s UTF-8 string to iterate over

View file

@ -365,9 +365,22 @@ lenient8IteratorMove(UCharIterator *iter, int32_t delta, UCharIteratorOrigin ori
break;
case UITER_LIMIT:
case UITER_LENGTH:
pos=lenient8IteratorGetIndex(iter, UITER_LENGTH)+delta;
havePos=TRUE;
/* even if the UTF-16 index was unknown, we know it now: iter->index>=0 here */
if(iter->length>=0) {
pos=iter->length+delta;
havePos=TRUE;
} else {
/* pin to the end, avoid counting the length */
iter->index=-1;
iter->start=iter->limit;
iter->reservedField=0;
if(delta>=0) {
return UITER_UNKNOWN_INDEX;
} else {
/* the current UTF-16 index is unknown, use only delta */
pos=0;
havePos=FALSE;
}
}
break;
default:
return -1; /* Error */
@ -407,7 +420,7 @@ lenient8IteratorMove(UCharIterator *iter, int32_t delta, UCharIteratorOrigin ori
} else {
/* move relative to unknown UTF-16 index */
if(delta==0) {
return UITER_MOVE_UNKNOWN_INDEX; /* nothing to do */
return UITER_UNKNOWN_INDEX; /* nothing to do */
} else if(-delta>=iter->start) {
/* moving backwards by more UChars than there are UTF-8 bytes, pin to 0 */
iter->index=iter->start=iter->reservedField=0;
@ -417,7 +430,7 @@ lenient8IteratorMove(UCharIterator *iter, int32_t delta, UCharIteratorOrigin ori
iter->index=iter->length; /* may or may not be <0 (unknown) */
iter->start=iter->limit;
iter->reservedField=0;
return iter->index>=0 ? iter->index : UITER_MOVE_UNKNOWN_INDEX;
return iter->index>=0 ? iter->index : UITER_UNKNOWN_INDEX;
}
}
@ -492,7 +505,7 @@ lenient8IteratorMove(UCharIterator *iter, int32_t delta, UCharIteratorOrigin ori
return iter->index=i; /* reached the beginning */
} else {
/* we still don't know the UTF-16 index */
return UITER_MOVE_UNKNOWN_INDEX;
return UITER_UNKNOWN_INDEX;
}
}
}
@ -604,15 +617,11 @@ lenient8IteratorPrevious(UCharIterator *iter) {
static uint32_t U_CALLCONV
lenient8IteratorGetState(const UCharIterator *iter) {
if(iter==NULL) {
return 1; /* invalid */
} else {
uint32_t state=(uint32_t)(iter->start<<1);
if(iter->reservedField!=0) {
state|=1;
}
return state;
uint32_t state=(uint32_t)(iter->start<<1);
if(iter->reservedField!=0) {
state|=1;
}
return state;
}
static void U_CALLCONV
@ -621,6 +630,8 @@ lenient8IteratorSetState(UCharIterator *iter, uint32_t state, UErrorCode *pError
/* do nothing */
} else if(iter==NULL) {
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
} else if(state==lenient8IteratorGetState(iter)) {
/* setting to the current state: no-op */
} else {
int32_t index=(int32_t)(state>>1); /* UTF-8 index */
state&=1; /* 1 if in surrogate pair, must be index>=4 */