From 5f5e9bfeced57bb6d4d093fd2a678796270e3635 Mon Sep 17 00:00:00 2001 From: Markus Scherer Date: Fri, 3 Dec 1999 00:04:14 +0000 Subject: [PATCH] ICU-170 fix and improve lastIndexOf() and improve indexOf() X-SVN-Rev: 284 --- icu4c/source/common/unistr.cpp | 115 ++++++++++++++++++++++++++++++--- icu4c/source/common/unistr.h | 82 +++-------------------- 2 files changed, 116 insertions(+), 81 deletions(-) diff --git a/icu4c/source/common/unistr.cpp b/icu4c/source/common/unistr.cpp index 732a97965db..a0f6f4d1b1c 100644 --- a/icu4c/source/common/unistr.cpp +++ b/icu4c/source/common/unistr.cpp @@ -193,7 +193,9 @@ UnicodeString::UnicodeString(const char *codepageData, fHashCode(kEmptyHashCode), fBogus(FALSE) { - doCodepageCreate(codepageData, dataLength, codepage); + if(codepageData != 0) { + doCodepageCreate(codepageData, dataLength, codepage); + } } //======================================== @@ -370,6 +372,47 @@ UnicodeString::doExtract(UTextOffset start, } +UTextOffset +UnicodeString::indexOf(const UChar *srcChars, + UTextOffset srcStart, + int32_t srcLength, + UTextOffset start, + int32_t length) const +{ + if(isBogus() || srcChars == 0 || srcStart < 0 || srcLength <= 0) { + return -1; + } + + // now we will only work with srcLength-1 + --srcLength; + + // get the indices within bounds + pinIndices(start, length); + + // set length for the last possible match start position + // note the --srcLength above + length -= srcLength; + + if(length <= 0) { + return -1; + } + + const UChar *array = getArrayStart(); + UTextOffset limit = start + length; + + // search for the first char, then compare the rest of the string + // increment srcStart here for that, matching the --srcLength above + UChar ch = srcChars[srcStart++]; + + do { + if(array[start] == ch && (srcLength == 0 || compare(start + 1, srcLength, srcChars, srcStart, srcLength) == 0)) { + return start; + } + } while(++start < limit); + + return -1; +} + UTextOffset UnicodeString::doIndexOf(UChar c, UTextOffset start, @@ -377,15 +420,63 @@ UnicodeString::doIndexOf(UChar c, { // pin indices pinIndices(start, length); + if(length == 0) { + return -1; + } // find the first occurrence of c const UChar *begin = getArrayStart() + start; const UChar *limit = begin + length; - while(begin < limit && *begin != c) - ++begin; + do { + if(*begin == c) { + return begin - getArrayStart(); + } + } while(++begin < limit); - return (begin == limit ? -1 : begin - getArrayStart()); + return -1; +} + +UTextOffset +UnicodeString::lastIndexOf(const UChar *srcChars, + UTextOffset srcStart, + int32_t srcLength, + UTextOffset start, + int32_t length) const +{ + if(isBogus() || srcChars == 0 || srcStart < 0 || srcLength <= 0) { + return -1; + } + + // now we will only work with srcLength-1 + --srcLength; + + // get the indices within bounds + pinIndices(start, length); + + // set length for the last possible match start position + // note the --srcLength above + length -= srcLength; + + if(length <= 0) { + return -1; + } + + const UChar *array = getArrayStart(); + UTextOffset pos; + + // search for the first char, then compare the rest of the string + // increment srcStart here for that, matching the --srcLength above + UChar ch = srcChars[srcStart++]; + + pos = start + length; + do { + if(array[--pos] == ch && (srcLength == 0 || compare(pos + 1, srcLength, srcChars, srcStart, srcLength) == 0)) { + return pos; + } + } while(pos > start); + + return -1; } UTextOffset @@ -395,14 +486,20 @@ UnicodeString::doLastIndexOf(UChar c, { // pin indices pinIndices(start, length); + if(length == 0) { + return -1; + } - const UChar *begin = getArrayStart() + start + length; - const UChar *limit = begin - length; + const UChar *begin = getArrayStart() + start; + const UChar *limit = begin + length; - while(begin > limit && *begin != c) - --begin; + do { + if(*--limit == c) { + return limit - getArrayStart(); + } + } while(limit > begin); - return (begin == limit ? -1 : begin - getArrayStart()); + return -1; } diff --git a/icu4c/source/common/unistr.h b/icu4c/source/common/unistr.h index b55b6f5d9e9..bf673996510 100644 --- a/icu4c/source/common/unistr.h +++ b/icu4c/source/common/unistr.h @@ -1706,20 +1706,12 @@ UnicodeString::indexOf(const UnicodeString& srcText, UTextOffset start, int32_t length) const { - UTextOffset pos = 0, myStart = start; - int32_t myLength = length, limit = start + length; - - // cache for speed - UChar ch = srcText.charAt(srcStart); - - while( (pos = indexOf(ch, myStart, myLength)) != -1 && myLength > 0) { - if(compare(pos, srcLength, srcText, srcStart, srcLength) == 0) - return pos; - - myStart = pos + 1; - myLength = limit - myStart; + if(!srcText.isBogus()) { + srcText.pinIndices(srcStart, srcLength); + if(srcLength > 0) { + return indexOf(srcText.getArrayStart(), srcStart, srcLength, start, length); + } } - return -1; } @@ -1736,30 +1728,6 @@ UnicodeString::indexOf(const UChar *srcChars, int32_t length) const { return indexOf(srcChars, 0, srcLength, start, length); } -inline UTextOffset -UnicodeString::indexOf(const UChar *srcChars, - UTextOffset srcStart, - int32_t srcLength, - UTextOffset start, - int32_t length) const -{ - UTextOffset pos = 0, myStart = start; - int32_t myLength = length, limit = start + length; - - // cache for speed - UChar ch = srcChars[srcStart]; - - while( (pos = indexOf(ch, myStart, myLength)) != -1 && myLength > 0) { - if(compare(pos, srcLength, srcChars, srcStart, srcLength) == 0) - return pos; - - myStart = pos + 1; - myLength = limit - myStart; - } - - return -1; -} - inline UTextOffset UnicodeString::indexOf(UChar c) const { return doIndexOf(c, 0, fLength); } @@ -1797,19 +1765,12 @@ UnicodeString::lastIndexOf(const UnicodeString& srcText, UTextOffset start, int32_t length) const { - UTextOffset pos = 0, myStart = start; - int32_t myLength = length, limit = start + length; - - // cache for speed - UChar ch = srcText.charAt(srcStart); - - while( (pos = lastIndexOf(ch, myStart, myLength)) != -1 && myLength > 0) { - if(compare(pos, srcLength, srcText, srcStart, srcLength) == 0) - return pos; - - myLength = pos - myStart - 1; + if(!srcText.isBogus()) { + srcText.pinIndices(srcStart, srcLength); + if(srcLength > 0) { + return lastIndexOf(srcText.getArrayStart(), srcStart, srcLength, start, length); + } } - return -1; } @@ -1826,29 +1787,6 @@ UnicodeString::lastIndexOf(const UChar *srcChars, int32_t length) const { return lastIndexOf(srcChars, 0, srcLength, start, length); } -inline UTextOffset -UnicodeString::lastIndexOf(const UChar *srcChars, - UTextOffset srcStart, - int32_t srcLength, - UTextOffset start, - int32_t length) const -{ - UTextOffset pos = 0, myStart = start; - int32_t myLength = length, limit = start + length; - - // cache for speed - UChar ch = srcChars[srcStart]; - - while( (pos = lastIndexOf(ch, myStart, myLength)) != -1 && myLength > 0) { - if(compare(pos, srcLength, srcChars, srcStart, srcLength) == 0) - return pos; - - myLength = pos - myStart - 1; - } - - return -1; -} - inline UTextOffset UnicodeString::lastIndexOf(UChar c) const { return doLastIndexOf(c, 0, fLength); }