mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-07 22:44:49 +00:00
ICU-170 fix and improve lastIndexOf() and improve indexOf()
X-SVN-Rev: 284
This commit is contained in:
parent
b70728c7bd
commit
5f5e9bfece
2 changed files with 116 additions and 81 deletions
|
@ -193,7 +193,9 @@ UnicodeString::UnicodeString(const char *codepageData,
|
|||
fHashCode(kEmptyHashCode),
|
||||
fBogus(FALSE)
|
||||
{
|
||||
doCodepageCreate(codepageData, dataLength, codepage);
|
||||
if(codepageData != 0) {
|
||||
doCodepageCreate(codepageData, dataLength, codepage);
|
||||
}
|
||||
}
|
||||
|
||||
//========================================
|
||||
|
@ -370,6 +372,47 @@ UnicodeString::doExtract(UTextOffset start,
|
|||
}
|
||||
|
||||
|
||||
UTextOffset
|
||||
UnicodeString::indexOf(const UChar *srcChars,
|
||||
UTextOffset srcStart,
|
||||
int32_t srcLength,
|
||||
UTextOffset start,
|
||||
int32_t length) const
|
||||
{
|
||||
if(isBogus() || srcChars == 0 || srcStart < 0 || srcLength <= 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
// now we will only work with srcLength-1
|
||||
--srcLength;
|
||||
|
||||
// get the indices within bounds
|
||||
pinIndices(start, length);
|
||||
|
||||
// set length for the last possible match start position
|
||||
// note the --srcLength above
|
||||
length -= srcLength;
|
||||
|
||||
if(length <= 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
const UChar *array = getArrayStart();
|
||||
UTextOffset limit = start + length;
|
||||
|
||||
// search for the first char, then compare the rest of the string
|
||||
// increment srcStart here for that, matching the --srcLength above
|
||||
UChar ch = srcChars[srcStart++];
|
||||
|
||||
do {
|
||||
if(array[start] == ch && (srcLength == 0 || compare(start + 1, srcLength, srcChars, srcStart, srcLength) == 0)) {
|
||||
return start;
|
||||
}
|
||||
} while(++start < limit);
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
UTextOffset
|
||||
UnicodeString::doIndexOf(UChar c,
|
||||
UTextOffset start,
|
||||
|
@ -377,15 +420,63 @@ UnicodeString::doIndexOf(UChar c,
|
|||
{
|
||||
// pin indices
|
||||
pinIndices(start, length);
|
||||
if(length == 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
// find the first occurrence of c
|
||||
const UChar *begin = getArrayStart() + start;
|
||||
const UChar *limit = begin + length;
|
||||
|
||||
while(begin < limit && *begin != c)
|
||||
++begin;
|
||||
do {
|
||||
if(*begin == c) {
|
||||
return begin - getArrayStart();
|
||||
}
|
||||
} while(++begin < limit);
|
||||
|
||||
return (begin == limit ? -1 : begin - getArrayStart());
|
||||
return -1;
|
||||
}
|
||||
|
||||
UTextOffset
|
||||
UnicodeString::lastIndexOf(const UChar *srcChars,
|
||||
UTextOffset srcStart,
|
||||
int32_t srcLength,
|
||||
UTextOffset start,
|
||||
int32_t length) const
|
||||
{
|
||||
if(isBogus() || srcChars == 0 || srcStart < 0 || srcLength <= 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
// now we will only work with srcLength-1
|
||||
--srcLength;
|
||||
|
||||
// get the indices within bounds
|
||||
pinIndices(start, length);
|
||||
|
||||
// set length for the last possible match start position
|
||||
// note the --srcLength above
|
||||
length -= srcLength;
|
||||
|
||||
if(length <= 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
const UChar *array = getArrayStart();
|
||||
UTextOffset pos;
|
||||
|
||||
// search for the first char, then compare the rest of the string
|
||||
// increment srcStart here for that, matching the --srcLength above
|
||||
UChar ch = srcChars[srcStart++];
|
||||
|
||||
pos = start + length;
|
||||
do {
|
||||
if(array[--pos] == ch && (srcLength == 0 || compare(pos + 1, srcLength, srcChars, srcStart, srcLength) == 0)) {
|
||||
return pos;
|
||||
}
|
||||
} while(pos > start);
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
UTextOffset
|
||||
|
@ -395,14 +486,20 @@ UnicodeString::doLastIndexOf(UChar c,
|
|||
{
|
||||
// pin indices
|
||||
pinIndices(start, length);
|
||||
if(length == 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
const UChar *begin = getArrayStart() + start + length;
|
||||
const UChar *limit = begin - length;
|
||||
const UChar *begin = getArrayStart() + start;
|
||||
const UChar *limit = begin + length;
|
||||
|
||||
while(begin > limit && *begin != c)
|
||||
--begin;
|
||||
do {
|
||||
if(*--limit == c) {
|
||||
return limit - getArrayStart();
|
||||
}
|
||||
} while(limit > begin);
|
||||
|
||||
return (begin == limit ? -1 : begin - getArrayStart());
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -1706,20 +1706,12 @@ UnicodeString::indexOf(const UnicodeString& srcText,
|
|||
UTextOffset start,
|
||||
int32_t length) const
|
||||
{
|
||||
UTextOffset pos = 0, myStart = start;
|
||||
int32_t myLength = length, limit = start + length;
|
||||
|
||||
// cache for speed
|
||||
UChar ch = srcText.charAt(srcStart);
|
||||
|
||||
while( (pos = indexOf(ch, myStart, myLength)) != -1 && myLength > 0) {
|
||||
if(compare(pos, srcLength, srcText, srcStart, srcLength) == 0)
|
||||
return pos;
|
||||
|
||||
myStart = pos + 1;
|
||||
myLength = limit - myStart;
|
||||
if(!srcText.isBogus()) {
|
||||
srcText.pinIndices(srcStart, srcLength);
|
||||
if(srcLength > 0) {
|
||||
return indexOf(srcText.getArrayStart(), srcStart, srcLength, start, length);
|
||||
}
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
@ -1736,30 +1728,6 @@ UnicodeString::indexOf(const UChar *srcChars,
|
|||
int32_t length) const
|
||||
{ return indexOf(srcChars, 0, srcLength, start, length); }
|
||||
|
||||
inline UTextOffset
|
||||
UnicodeString::indexOf(const UChar *srcChars,
|
||||
UTextOffset srcStart,
|
||||
int32_t srcLength,
|
||||
UTextOffset start,
|
||||
int32_t length) const
|
||||
{
|
||||
UTextOffset pos = 0, myStart = start;
|
||||
int32_t myLength = length, limit = start + length;
|
||||
|
||||
// cache for speed
|
||||
UChar ch = srcChars[srcStart];
|
||||
|
||||
while( (pos = indexOf(ch, myStart, myLength)) != -1 && myLength > 0) {
|
||||
if(compare(pos, srcLength, srcChars, srcStart, srcLength) == 0)
|
||||
return pos;
|
||||
|
||||
myStart = pos + 1;
|
||||
myLength = limit - myStart;
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
inline UTextOffset
|
||||
UnicodeString::indexOf(UChar c) const
|
||||
{ return doIndexOf(c, 0, fLength); }
|
||||
|
@ -1797,19 +1765,12 @@ UnicodeString::lastIndexOf(const UnicodeString& srcText,
|
|||
UTextOffset start,
|
||||
int32_t length) const
|
||||
{
|
||||
UTextOffset pos = 0, myStart = start;
|
||||
int32_t myLength = length, limit = start + length;
|
||||
|
||||
// cache for speed
|
||||
UChar ch = srcText.charAt(srcStart);
|
||||
|
||||
while( (pos = lastIndexOf(ch, myStart, myLength)) != -1 && myLength > 0) {
|
||||
if(compare(pos, srcLength, srcText, srcStart, srcLength) == 0)
|
||||
return pos;
|
||||
|
||||
myLength = pos - myStart - 1;
|
||||
if(!srcText.isBogus()) {
|
||||
srcText.pinIndices(srcStart, srcLength);
|
||||
if(srcLength > 0) {
|
||||
return lastIndexOf(srcText.getArrayStart(), srcStart, srcLength, start, length);
|
||||
}
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
@ -1826,29 +1787,6 @@ UnicodeString::lastIndexOf(const UChar *srcChars,
|
|||
int32_t length) const
|
||||
{ return lastIndexOf(srcChars, 0, srcLength, start, length); }
|
||||
|
||||
inline UTextOffset
|
||||
UnicodeString::lastIndexOf(const UChar *srcChars,
|
||||
UTextOffset srcStart,
|
||||
int32_t srcLength,
|
||||
UTextOffset start,
|
||||
int32_t length) const
|
||||
{
|
||||
UTextOffset pos = 0, myStart = start;
|
||||
int32_t myLength = length, limit = start + length;
|
||||
|
||||
// cache for speed
|
||||
UChar ch = srcChars[srcStart];
|
||||
|
||||
while( (pos = lastIndexOf(ch, myStart, myLength)) != -1 && myLength > 0) {
|
||||
if(compare(pos, srcLength, srcChars, srcStart, srcLength) == 0)
|
||||
return pos;
|
||||
|
||||
myLength = pos - myStart - 1;
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
inline UTextOffset
|
||||
UnicodeString::lastIndexOf(UChar c) const
|
||||
{ return doLastIndexOf(c, 0, fLength); }
|
||||
|
|
Loading…
Add table
Reference in a new issue