mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-20 20:19:32 +00:00
ICU-2397 iterator improvements
X-SVN-Rev: 10894
This commit is contained in:
parent
2153f35323
commit
4dea6082dd
3 changed files with 92 additions and 50 deletions
|
@ -172,11 +172,7 @@ stringIteratorPrevious(UCharIterator *iter) {
|
|||
|
||||
static uint32_t U_CALLCONV
|
||||
stringIteratorGetState(const UCharIterator *iter) {
|
||||
if(iter==NULL) {
|
||||
return 0xffffffff; /* invalid */
|
||||
} else {
|
||||
return (uint32_t)iter->index;
|
||||
}
|
||||
return (uint32_t)iter->index;
|
||||
}
|
||||
|
||||
static void U_CALLCONV
|
||||
|
@ -432,11 +428,7 @@ characterIteratorPrevious(UCharIterator *iter) {
|
|||
|
||||
static uint32_t U_CALLCONV
|
||||
characterIteratorGetState(const UCharIterator *iter) {
|
||||
if(iter==NULL || iter->context==NULL) {
|
||||
return 0xffffffff; /* invalid */
|
||||
} else {
|
||||
return ((CharacterIterator *)(iter->context))->getIndex();
|
||||
}
|
||||
return ((CharacterIterator *)(iter->context))->getIndex();
|
||||
}
|
||||
|
||||
static void U_CALLCONV
|
||||
|
@ -701,9 +693,22 @@ utf8IteratorMove(UCharIterator *iter, int32_t delta, UCharIteratorOrigin origin)
|
|||
break;
|
||||
case UITER_LIMIT:
|
||||
case UITER_LENGTH:
|
||||
pos=utf8IteratorGetIndex(iter, UITER_LENGTH)+delta;
|
||||
havePos=TRUE;
|
||||
/* even if the UTF-16 index was unknown, we know it now: iter->index>=0 here */
|
||||
if(iter->length>=0) {
|
||||
pos=iter->length+delta;
|
||||
havePos=TRUE;
|
||||
} else {
|
||||
/* pin to the end, avoid counting the length */
|
||||
iter->index=-1;
|
||||
iter->start=iter->limit;
|
||||
iter->reservedField=0;
|
||||
if(delta>=0) {
|
||||
return UITER_UNKNOWN_INDEX;
|
||||
} else {
|
||||
/* the current UTF-16 index is unknown, use only delta */
|
||||
pos=0;
|
||||
havePos=FALSE;
|
||||
}
|
||||
}
|
||||
break;
|
||||
default:
|
||||
return -1; /* Error */
|
||||
|
@ -743,7 +748,7 @@ utf8IteratorMove(UCharIterator *iter, int32_t delta, UCharIteratorOrigin origin)
|
|||
} else {
|
||||
/* move relative to unknown UTF-16 index */
|
||||
if(delta==0) {
|
||||
return UITER_MOVE_UNKNOWN_INDEX; /* nothing to do */
|
||||
return UITER_UNKNOWN_INDEX; /* nothing to do */
|
||||
} else if(-delta>=iter->start) {
|
||||
/* moving backwards by more UChars than there are UTF-8 bytes, pin to 0 */
|
||||
iter->index=iter->start=iter->reservedField=0;
|
||||
|
@ -753,7 +758,7 @@ utf8IteratorMove(UCharIterator *iter, int32_t delta, UCharIteratorOrigin origin)
|
|||
iter->index=iter->length; /* may or may not be <0 (unknown) */
|
||||
iter->start=iter->limit;
|
||||
iter->reservedField=0;
|
||||
return iter->index>=0 ? iter->index : UITER_MOVE_UNKNOWN_INDEX;
|
||||
return iter->index>=0 ? iter->index : UITER_UNKNOWN_INDEX;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -828,7 +833,7 @@ utf8IteratorMove(UCharIterator *iter, int32_t delta, UCharIteratorOrigin origin)
|
|||
return iter->index=i; /* reached the beginning */
|
||||
} else {
|
||||
/* we still don't know the UTF-16 index */
|
||||
return UITER_MOVE_UNKNOWN_INDEX;
|
||||
return UITER_UNKNOWN_INDEX;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -940,15 +945,11 @@ utf8IteratorPrevious(UCharIterator *iter) {
|
|||
|
||||
static uint32_t U_CALLCONV
|
||||
utf8IteratorGetState(const UCharIterator *iter) {
|
||||
if(iter==NULL) {
|
||||
return 1; /* invalid */
|
||||
} else {
|
||||
uint32_t state=(uint32_t)(iter->start<<1);
|
||||
if(iter->reservedField!=0) {
|
||||
state|=1;
|
||||
}
|
||||
return state;
|
||||
uint32_t state=(uint32_t)(iter->start<<1);
|
||||
if(iter->reservedField!=0) {
|
||||
state|=1;
|
||||
}
|
||||
return state;
|
||||
}
|
||||
|
||||
static void U_CALLCONV
|
||||
|
@ -957,6 +958,8 @@ utf8IteratorSetState(UCharIterator *iter, uint32_t state, UErrorCode *pErrorCode
|
|||
/* do nothing */
|
||||
} else if(iter==NULL) {
|
||||
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
|
||||
} else if(state==utf8IteratorGetState(iter)) {
|
||||
/* setting to the current state: no-op */
|
||||
} else {
|
||||
int32_t index=(int32_t)(state>>1); /* UTF-8 index */
|
||||
state&=1; /* 1 if in surrogate pair, must be index>=4 */
|
||||
|
@ -1086,7 +1089,7 @@ uiter_previous32(UCharIterator *iter) {
|
|||
U_CAPI uint32_t U_EXPORT2
|
||||
uiter_getState(const UCharIterator *iter) {
|
||||
if(iter==NULL || iter->getState==NULL) {
|
||||
return 0xffffffff;
|
||||
return UITER_NO_STATE;
|
||||
} else {
|
||||
return iter->getState(iter);
|
||||
}
|
||||
|
|
|
@ -55,18 +55,33 @@ enum {
|
|||
/**
|
||||
* Constant value that may be returned by UCharIteratorMove
|
||||
* indicating that the final UTF-16 index is not known, but that the move succeeded.
|
||||
* This can occur after a setState() when the current UTF-16 index is not known
|
||||
* and a move relative to the current index is requested.
|
||||
* This can occur when moving relative to limit or length, or
|
||||
* when moving relative to the current index after a setState()
|
||||
* when the current UTF-16 index is not known.
|
||||
*
|
||||
* It would be very inefficient to have to count from the beginning of the text
|
||||
* just to get the current index after moving relative to it.
|
||||
* just to get the current/limit/length index after moving relative to it.
|
||||
* The actual index can be determined with getIndex(UITER_CURRENT)
|
||||
* which will count the UChars if necessary.
|
||||
*
|
||||
* @draft ICU 2.6
|
||||
*/
|
||||
UITER_MOVE_UNKNOWN_INDEX=-2
|
||||
UITER_UNKNOWN_INDEX=-2
|
||||
};
|
||||
|
||||
/**
|
||||
* Constant for UCharIterator getState() indicating an error or
|
||||
* an unknown state.
|
||||
* Returned by uiter_getState()/UCharIteratorGetState
|
||||
* when an error occurs.
|
||||
* Also, some UCharIterator implementations may not be able to return
|
||||
* a valid state for each position. This will be clearly documented
|
||||
* for each such iterator (none of the public ones here).
|
||||
*
|
||||
* @draft ICU 2.6
|
||||
*/
|
||||
#define UITER_NO_STATE ((uint32_t)0xffffffff)
|
||||
|
||||
/**
|
||||
* Function type declaration for UCharIterator.getIndex().
|
||||
*
|
||||
|
@ -103,22 +118,23 @@ UCharIteratorGetIndex(UCharIterator *iter, UCharIteratorOrigin origin);
|
|||
* because an iterator implementation may have to count the rest of the
|
||||
* UChars if the native storage is not UTF-16.
|
||||
*
|
||||
* When moving relative to the current position after setState() was called,
|
||||
* move() may return UITER_MOVE_UNKNOWN_INDEX (-2) to avoid an inefficient
|
||||
* When moving relative to the limit or length, or
|
||||
* relative to the current position after setState() was called,
|
||||
* move() may return UITER_UNKNOWN_INDEX (-2) to avoid an inefficient
|
||||
* determination of the actual UTF-16 index.
|
||||
* The actual index can be determined with getIndex(UITER_CURRENT)
|
||||
* which will count the UChars if necessary.
|
||||
* See UITER_MOVE_UNKNOWN_INDEX for details.
|
||||
* See UITER_UNKNOWN_INDEX for details.
|
||||
*
|
||||
* @param iter the UCharIterator structure ("this pointer")
|
||||
* @param delta can be positive, zero, or negative
|
||||
* @param origin move relative to the 0, start, limit, length, or current index
|
||||
* @return the new index, or U_SENTINEL on an error condition,
|
||||
* or UITER_MOVE_UNKNOWN_INDEX when the index is not known.
|
||||
* or UITER_UNKNOWN_INDEX when the index is not known.
|
||||
*
|
||||
* @see UCharIteratorOrigin
|
||||
* @see UCharIterator
|
||||
* @see UITER_MOVE_UNKNOWN_INDEX
|
||||
* @see UITER_UNKNOWN_INDEX
|
||||
* @draft ICU 2.1
|
||||
*/
|
||||
typedef int32_t U_CALLCONV
|
||||
|
@ -242,11 +258,16 @@ UCharIteratorReserved(UCharIterator *iter, int32_t something);
|
|||
* the correct text contents and move relative to the current position
|
||||
* without performance degradation.
|
||||
*
|
||||
* Some UCharIterator implementations may not be able to return
|
||||
* a valid state for each position, in which case they return UITER_NO_STATE instead.
|
||||
* This will be clearly documented for each such iterator (none of the public ones here).
|
||||
*
|
||||
* @param iter the UCharIterator structure ("this pointer")
|
||||
* @return the state word
|
||||
*
|
||||
* @see UCharIterator
|
||||
* @see UCharIteratorSetState
|
||||
* @see UITER_NO_STATE
|
||||
* @draft ICU 2.6
|
||||
*/
|
||||
typedef uint32_t U_CALLCONV
|
||||
|
@ -495,13 +516,19 @@ uiter_previous32(UCharIterator *iter);
|
|||
/**
|
||||
* Get the "state" of the iterator in the form of a single 32-bit word.
|
||||
* This is a convenience function that calls iter->getState(iter)
|
||||
* if iter->getState is not NULL; if it is NULL, then 0xffffffff is returned.
|
||||
* if iter->getState is not NULL;
|
||||
* if it is NULL or any other error occurs, then UITER_NO_STATE is returned.
|
||||
*
|
||||
* Some UCharIterator implementations may not be able to return
|
||||
* a valid state for each position, in which case they return UITER_NO_STATE instead.
|
||||
* This will be clearly documented for each such iterator (none of the public ones here).
|
||||
*
|
||||
* @param iter the UCharIterator structure ("this pointer")
|
||||
* @return the state word
|
||||
*
|
||||
* @see UCharIterator
|
||||
* @see UCharIteratorGetState
|
||||
* @see UITER_NO_STATE
|
||||
* @draft ICU 2.6
|
||||
*/
|
||||
U_CAPI uint32_t U_EXPORT2
|
||||
|
@ -596,7 +623,8 @@ uiter_setUTF16BE(UCharIterator *iter, const char *s, int32_t length);
|
|||
* (from a 4-byte UTF-8 sequence for the corresponding supplementary code point)
|
||||
*
|
||||
* getState() cannot also encode the UTF-16 index in the state value.
|
||||
* move() after setState() may return UITER_MOVE_UNKNOWN_INDEX.
|
||||
* move(relative to limit or length), or
|
||||
* move(relative to current) after setState(), may return UITER_UNKNOWN_INDEX.
|
||||
*
|
||||
* @param iter UCharIterator structure to be set for iteration
|
||||
* @param s UTF-8 string to iterate over
|
||||
|
|
|
@ -365,9 +365,22 @@ lenient8IteratorMove(UCharIterator *iter, int32_t delta, UCharIteratorOrigin ori
|
|||
break;
|
||||
case UITER_LIMIT:
|
||||
case UITER_LENGTH:
|
||||
pos=lenient8IteratorGetIndex(iter, UITER_LENGTH)+delta;
|
||||
havePos=TRUE;
|
||||
/* even if the UTF-16 index was unknown, we know it now: iter->index>=0 here */
|
||||
if(iter->length>=0) {
|
||||
pos=iter->length+delta;
|
||||
havePos=TRUE;
|
||||
} else {
|
||||
/* pin to the end, avoid counting the length */
|
||||
iter->index=-1;
|
||||
iter->start=iter->limit;
|
||||
iter->reservedField=0;
|
||||
if(delta>=0) {
|
||||
return UITER_UNKNOWN_INDEX;
|
||||
} else {
|
||||
/* the current UTF-16 index is unknown, use only delta */
|
||||
pos=0;
|
||||
havePos=FALSE;
|
||||
}
|
||||
}
|
||||
break;
|
||||
default:
|
||||
return -1; /* Error */
|
||||
|
@ -407,7 +420,7 @@ lenient8IteratorMove(UCharIterator *iter, int32_t delta, UCharIteratorOrigin ori
|
|||
} else {
|
||||
/* move relative to unknown UTF-16 index */
|
||||
if(delta==0) {
|
||||
return UITER_MOVE_UNKNOWN_INDEX; /* nothing to do */
|
||||
return UITER_UNKNOWN_INDEX; /* nothing to do */
|
||||
} else if(-delta>=iter->start) {
|
||||
/* moving backwards by more UChars than there are UTF-8 bytes, pin to 0 */
|
||||
iter->index=iter->start=iter->reservedField=0;
|
||||
|
@ -417,7 +430,7 @@ lenient8IteratorMove(UCharIterator *iter, int32_t delta, UCharIteratorOrigin ori
|
|||
iter->index=iter->length; /* may or may not be <0 (unknown) */
|
||||
iter->start=iter->limit;
|
||||
iter->reservedField=0;
|
||||
return iter->index>=0 ? iter->index : UITER_MOVE_UNKNOWN_INDEX;
|
||||
return iter->index>=0 ? iter->index : UITER_UNKNOWN_INDEX;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -492,7 +505,7 @@ lenient8IteratorMove(UCharIterator *iter, int32_t delta, UCharIteratorOrigin ori
|
|||
return iter->index=i; /* reached the beginning */
|
||||
} else {
|
||||
/* we still don't know the UTF-16 index */
|
||||
return UITER_MOVE_UNKNOWN_INDEX;
|
||||
return UITER_UNKNOWN_INDEX;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -604,15 +617,11 @@ lenient8IteratorPrevious(UCharIterator *iter) {
|
|||
|
||||
static uint32_t U_CALLCONV
|
||||
lenient8IteratorGetState(const UCharIterator *iter) {
|
||||
if(iter==NULL) {
|
||||
return 1; /* invalid */
|
||||
} else {
|
||||
uint32_t state=(uint32_t)(iter->start<<1);
|
||||
if(iter->reservedField!=0) {
|
||||
state|=1;
|
||||
}
|
||||
return state;
|
||||
uint32_t state=(uint32_t)(iter->start<<1);
|
||||
if(iter->reservedField!=0) {
|
||||
state|=1;
|
||||
}
|
||||
return state;
|
||||
}
|
||||
|
||||
static void U_CALLCONV
|
||||
|
@ -621,6 +630,8 @@ lenient8IteratorSetState(UCharIterator *iter, uint32_t state, UErrorCode *pError
|
|||
/* do nothing */
|
||||
} else if(iter==NULL) {
|
||||
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
|
||||
} else if(state==lenient8IteratorGetState(iter)) {
|
||||
/* setting to the current state: no-op */
|
||||
} else {
|
||||
int32_t index=(int32_t)(state>>1); /* UTF-8 index */
|
||||
state&=1; /* 1 if in surrogate pair, must be index>=4 */
|
||||
|
|
Loading…
Add table
Reference in a new issue