ICU-170 fix and improve lastIndexOf() and improve indexOf()

X-SVN-Rev: 284
This commit is contained in:
Markus Scherer 1999-12-03 00:04:14 +00:00
parent b70728c7bd
commit 5f5e9bfece
2 changed files with 116 additions and 81 deletions

View file

@ -193,7 +193,9 @@ UnicodeString::UnicodeString(const char *codepageData,
fHashCode(kEmptyHashCode),
fBogus(FALSE)
{
doCodepageCreate(codepageData, dataLength, codepage);
if(codepageData != 0) {
doCodepageCreate(codepageData, dataLength, codepage);
}
}
//========================================
@ -370,6 +372,47 @@ UnicodeString::doExtract(UTextOffset start,
}
UTextOffset
UnicodeString::indexOf(const UChar *srcChars,
UTextOffset srcStart,
int32_t srcLength,
UTextOffset start,
int32_t length) const
{
if(isBogus() || srcChars == 0 || srcStart < 0 || srcLength <= 0) {
return -1;
}
// now we will only work with srcLength-1
--srcLength;
// get the indices within bounds
pinIndices(start, length);
// set length for the last possible match start position
// note the --srcLength above
length -= srcLength;
if(length <= 0) {
return -1;
}
const UChar *array = getArrayStart();
UTextOffset limit = start + length;
// search for the first char, then compare the rest of the string
// increment srcStart here for that, matching the --srcLength above
UChar ch = srcChars[srcStart++];
do {
if(array[start] == ch && (srcLength == 0 || compare(start + 1, srcLength, srcChars, srcStart, srcLength) == 0)) {
return start;
}
} while(++start < limit);
return -1;
}
UTextOffset
UnicodeString::doIndexOf(UChar c,
UTextOffset start,
@ -377,15 +420,63 @@ UnicodeString::doIndexOf(UChar c,
{
// pin indices
pinIndices(start, length);
if(length == 0) {
return -1;
}
// find the first occurrence of c
const UChar *begin = getArrayStart() + start;
const UChar *limit = begin + length;
while(begin < limit && *begin != c)
++begin;
do {
if(*begin == c) {
return begin - getArrayStart();
}
} while(++begin < limit);
return (begin == limit ? -1 : begin - getArrayStart());
return -1;
}
UTextOffset
UnicodeString::lastIndexOf(const UChar *srcChars,
UTextOffset srcStart,
int32_t srcLength,
UTextOffset start,
int32_t length) const
{
if(isBogus() || srcChars == 0 || srcStart < 0 || srcLength <= 0) {
return -1;
}
// now we will only work with srcLength-1
--srcLength;
// get the indices within bounds
pinIndices(start, length);
// set length for the last possible match start position
// note the --srcLength above
length -= srcLength;
if(length <= 0) {
return -1;
}
const UChar *array = getArrayStart();
UTextOffset pos;
// search for the first char, then compare the rest of the string
// increment srcStart here for that, matching the --srcLength above
UChar ch = srcChars[srcStart++];
pos = start + length;
do {
if(array[--pos] == ch && (srcLength == 0 || compare(pos + 1, srcLength, srcChars, srcStart, srcLength) == 0)) {
return pos;
}
} while(pos > start);
return -1;
}
UTextOffset
@ -395,14 +486,20 @@ UnicodeString::doLastIndexOf(UChar c,
{
// pin indices
pinIndices(start, length);
if(length == 0) {
return -1;
}
const UChar *begin = getArrayStart() + start + length;
const UChar *limit = begin - length;
const UChar *begin = getArrayStart() + start;
const UChar *limit = begin + length;
while(begin > limit && *begin != c)
--begin;
do {
if(*--limit == c) {
return limit - getArrayStart();
}
} while(limit > begin);
return (begin == limit ? -1 : begin - getArrayStart());
return -1;
}

View file

@ -1706,20 +1706,12 @@ UnicodeString::indexOf(const UnicodeString& srcText,
UTextOffset start,
int32_t length) const
{
UTextOffset pos = 0, myStart = start;
int32_t myLength = length, limit = start + length;
// cache for speed
UChar ch = srcText.charAt(srcStart);
while( (pos = indexOf(ch, myStart, myLength)) != -1 && myLength > 0) {
if(compare(pos, srcLength, srcText, srcStart, srcLength) == 0)
return pos;
myStart = pos + 1;
myLength = limit - myStart;
if(!srcText.isBogus()) {
srcText.pinIndices(srcStart, srcLength);
if(srcLength > 0) {
return indexOf(srcText.getArrayStart(), srcStart, srcLength, start, length);
}
}
return -1;
}
@ -1736,30 +1728,6 @@ UnicodeString::indexOf(const UChar *srcChars,
int32_t length) const
{ return indexOf(srcChars, 0, srcLength, start, length); }
inline UTextOffset
UnicodeString::indexOf(const UChar *srcChars,
UTextOffset srcStart,
int32_t srcLength,
UTextOffset start,
int32_t length) const
{
UTextOffset pos = 0, myStart = start;
int32_t myLength = length, limit = start + length;
// cache for speed
UChar ch = srcChars[srcStart];
while( (pos = indexOf(ch, myStart, myLength)) != -1 && myLength > 0) {
if(compare(pos, srcLength, srcChars, srcStart, srcLength) == 0)
return pos;
myStart = pos + 1;
myLength = limit - myStart;
}
return -1;
}
inline UTextOffset
UnicodeString::indexOf(UChar c) const
{ return doIndexOf(c, 0, fLength); }
@ -1797,19 +1765,12 @@ UnicodeString::lastIndexOf(const UnicodeString& srcText,
UTextOffset start,
int32_t length) const
{
UTextOffset pos = 0, myStart = start;
int32_t myLength = length, limit = start + length;
// cache for speed
UChar ch = srcText.charAt(srcStart);
while( (pos = lastIndexOf(ch, myStart, myLength)) != -1 && myLength > 0) {
if(compare(pos, srcLength, srcText, srcStart, srcLength) == 0)
return pos;
myLength = pos - myStart - 1;
if(!srcText.isBogus()) {
srcText.pinIndices(srcStart, srcLength);
if(srcLength > 0) {
return lastIndexOf(srcText.getArrayStart(), srcStart, srcLength, start, length);
}
}
return -1;
}
@ -1826,29 +1787,6 @@ UnicodeString::lastIndexOf(const UChar *srcChars,
int32_t length) const
{ return lastIndexOf(srcChars, 0, srcLength, start, length); }
inline UTextOffset
UnicodeString::lastIndexOf(const UChar *srcChars,
UTextOffset srcStart,
int32_t srcLength,
UTextOffset start,
int32_t length) const
{
UTextOffset pos = 0, myStart = start;
int32_t myLength = length, limit = start + length;
// cache for speed
UChar ch = srcChars[srcStart];
while( (pos = lastIndexOf(ch, myStart, myLength)) != -1 && myLength > 0) {
if(compare(pos, srcLength, srcChars, srcStart, srcLength) == 0)
return pos;
myLength = pos - myStart - 1;
}
return -1;
}
inline UTextOffset
UnicodeString::lastIndexOf(UChar c) const
{ return doLastIndexOf(c, 0, fLength); }