diff --git a/icu4c/source/common/dbbi.cpp b/icu4c/source/common/dbbi.cpp index 2f83f6c3e39..9f4928473f8 100644 --- a/icu4c/source/common/dbbi.cpp +++ b/icu4c/source/common/dbbi.cpp @@ -1,6 +1,6 @@ /* ********************************************************************** -* Copyright (C) 1999-2004 IBM Corp. All rights reserved. +* Copyright (C) 1999-2005 IBM Corp. All rights reserved. ********************************************************************** * Date Name Description * 12/1/99 rgillam Complete port from Java. @@ -37,7 +37,7 @@ RuleBasedBreakIterator() { DictionaryBasedBreakIterator::DictionaryBasedBreakIterator(UDataMemory* rbbiData, - const char* dictionaryFilename, + const char* dictionaryFilename, UErrorCode& status) : RuleBasedBreakIterator(rbbiData, status) { @@ -143,7 +143,7 @@ DictionaryBasedBreakIterator::previous() reset(); int32_t result = RuleBasedBreakIterator::previous(); if (cachedBreakPositions != NULL) { - for (positionInCache=0; + for (positionInCache=0; cachedBreakPositions[positionInCache] != result; positionInCache++); U_ASSERT(positionInCache < numCachedBreakPositions); @@ -334,7 +334,7 @@ BreakIterator * DictionaryBasedBreakIterator::createBufferClone(void *stackBuff } // - // If user buffer size is zero this is a preflight operation to + // If user buffer size is zero this is a preflight operation to // obtain the needed buffer size, allowing for worst case misalignment. // if (bufferSize == 0) { @@ -367,7 +367,7 @@ BreakIterator * DictionaryBasedBreakIterator::createBufferClone(void *stackBuff } // - // Initialize the clone object. + // Initialize the clone object. // TODO: using an overloaded C++ "operator new" to directly initialize the // copy in the user's buffer would be better, but it doesn't seem // to get along with namespaces. Investigate why. @@ -383,7 +383,7 @@ BreakIterator * DictionaryBasedBreakIterator::createBufferClone(void *stackBuff if (status != U_SAFECLONE_ALLOCATED_WARNING) { clone->fBufferClone = TRUE; } - return clone; + return clone; } @@ -405,15 +405,15 @@ DictionaryBasedBreakIterator::divideUpDictionaryRange(int32_t startPos, int32_t // that needs to be kept with the word). Seek from the beginning of the // range to the first dictionary character fText->setIndex(startPos); - UChar c = fText->current(); + UChar32 c = fText->current32(); while (isDictionaryChar(c) == FALSE) { - c = fText->next(); + c = fText->next32(); } if (U_FAILURE(status)) { return; // UStack below overwrites the status error codes } - + // initialize. We maintain two stacks: currentBreakPositions contains // the list of break positions that will be returned if we successfully // finish traversing the whole range now. possibleBreakPositions lists @@ -429,9 +429,9 @@ DictionaryBasedBreakIterator::divideUpDictionaryRange(int32_t startPos, int32_t // further, this saves us from having to follow each possible path // through the text all the way to the error (hopefully avoiding many // future recursive calls as well). - // there can be only one kind of error in UStack and UVector, so we'll + // there can be only one kind of error in UStack and UVector, so we'll // just let the error fall through - UStack currentBreakPositions(status); + UStack currentBreakPositions(status); UStack possibleBreakPositions(status); UVector wrongBreakPositions(status); @@ -456,8 +456,15 @@ DictionaryBasedBreakIterator::divideUpDictionaryRange(int32_t startPos, int32_t return; } // initialize (we always exit the loop with a break statement) - c = fText->current(); + c = fText->current32(); for (;;) { + // The dictionary implementation doesn't do supplementary chars. + // Put them through as an unpaired surrogate, which + // will end any dictionary match in progress. + // With any luck, this dictionary implementation will be retired soon. + if (c>0x10000) { + c = 0xd800; + } // if we can transition to state "-1" from our current state, we're // on the last character of a legal word. Push that position onto @@ -470,7 +477,7 @@ DictionaryBasedBreakIterator::divideUpDictionaryRange(int32_t startPos, int32_t } // look up the new state to transition to in the dictionary - state = fTables->fDictionary->at(state, c); + state = fTables->fDictionary->at(state, (UChar)c); // if the character we're sitting on causes us to transition to // the "end of word" state, then it was a non-dictionary character @@ -515,7 +522,7 @@ DictionaryBasedBreakIterator::divideUpDictionaryRange(int32_t startPos, int32_t possibleBreakPositions.peeki())) { possibleBreakPositions.popi(); } - + // if we've used up all possible break-position combinations, there's // an error or an unknown word in the text. In this case, we start // over, treating the farthest character we've reached as the beginning @@ -532,7 +539,8 @@ DictionaryBasedBreakIterator::divideUpDictionaryRange(int32_t startPos, int32_t } bestBreakPositions.removeAllElements(); if (farthestEndPoint < endPos) { - fText->setIndex(farthestEndPoint + 1); + fText->setIndex(farthestEndPoint); + fText->next32(); } else { break; @@ -547,7 +555,7 @@ DictionaryBasedBreakIterator::divideUpDictionaryRange(int32_t startPos, int32_t return; } } - fText->next(); + fText->next32(); currentBreakPositions.push(fText->getIndex(), status); if (U_FAILURE(status)) { return; @@ -574,7 +582,7 @@ DictionaryBasedBreakIterator::divideUpDictionaryRange(int32_t startPos, int32_t // re-sync "c" for the next go-round, and drop out of the loop if // we've made it off the end of the range - c = fText->current(); + c = fText->current32(); if (fText->getIndex() >= endPos) { break; } @@ -583,7 +591,7 @@ DictionaryBasedBreakIterator::divideUpDictionaryRange(int32_t startPos, int32_t // if we didn't hit any exceptional conditions on this last iteration, // just advance to the next character and loop else { - c = fText->next(); + c = fText->next32(); } } diff --git a/icu4c/source/common/rbbi.cpp b/icu4c/source/common/rbbi.cpp index 4a053c2a4c6..d7aec8bccbf 100644 --- a/icu4c/source/common/rbbi.cpp +++ b/icu4c/source/common/rbbi.cpp @@ -1348,6 +1348,21 @@ UBool RuleBasedBreakIterator::isDictionaryChar(UChar32 c) { } +//------------------------------------------------------------------------------- +// +// UText functions +// +//------------------------------------------------------------------------------- +void RuleBasedBreakIterator::setText(UText *ut, UErrorCode &status) { + // TODO: implement this. +} + + +UText *RuleBasedBreakIterator::getUText(UText *fillIn, UErrorCode &status) const { + // TODO: implement this. + return fillIn; +} + U_NAMESPACE_END diff --git a/icu4c/source/common/ubrk.cpp b/icu4c/source/common/ubrk.cpp index 710fd980eb7..7bf3e1066b6 100644 --- a/icu4c/source/common/ubrk.cpp +++ b/icu4c/source/common/ubrk.cpp @@ -1,6 +1,6 @@ /* ***************************************************************************************** -* Copyright (C) 1996-2004, International Business Machines +* Copyright (C) 1996-2005, International Business Machines * Corporation and others. All Rights Reserved. ***************************************************************************************** */ @@ -70,14 +70,16 @@ ubrk_open(UBreakIteratorType type, return 0; } - UCharCharacterIterator *iter = 0; - iter = new UCharCharacterIterator(text, textLength); - if(iter == 0) { - *status = U_MEMORY_ALLOCATION_ERROR; - delete result; - return 0; + if (text != NULL) { + UCharCharacterIterator *iter = 0; + iter = new UCharCharacterIterator(text, textLength); + if(iter == 0) { + *status = U_MEMORY_ALLOCATION_ERROR; + delete result; + return 0; + } + result->adoptText(iter); } - result->adoptText(iter); return (UBreakIterator*)result; } @@ -186,6 +188,19 @@ ubrk_setText(UBreakIterator* bi, } } +U_DRAFT void U_EXPORT2 +ubrk_setUText(UBreakIterator *bi, + UText *text, + UErrorCode *status) +{ + BreakIterator *brit = (BreakIterator *)bi; + brit->setText(text, *status); +} + + + + + U_CAPI int32_t U_EXPORT2 ubrk_current(const UBreakIterator *bi) { @@ -273,8 +288,8 @@ ubrk_getRuleStatusVec(UBreakIterator *bi, int32_t *fillInVec, int32_t capacity, U_CAPI const char* U_EXPORT2 -ubrk_getLocaleByType(const UBreakIterator *bi, - ULocDataLocaleType type, +ubrk_getLocaleByType(const UBreakIterator *bi, + ULocDataLocaleType type, UErrorCode* status) { if (bi == NULL) { diff --git a/icu4c/source/common/unicode/brkiter.h b/icu4c/source/common/unicode/brkiter.h index 6bdc3860abf..8fd448efd81 100644 --- a/icu4c/source/common/unicode/brkiter.h +++ b/icu4c/source/common/unicode/brkiter.h @@ -261,6 +261,23 @@ public: */ virtual const CharacterIterator& getText(void) const = 0; + + /** + * Get a UText for the text being analyzed. + * The returned UText is a shallow clone of the UText used internally + * by the break iterator implementation. It can safely be used to + * access the text without impacting any break iterator operations, + * but the underlying text itself must not be altered. + * + * @param fillIn A UText to be filled in. If NULL, a new UText will be + * allocated to hold the result. + * @status receives any error codes. + * @return The current UText for this break iterator. If an input + * UText was provided, it will always be returned. + * @draft ICU 3.4 + */ + virtual UText *getUText(UText *fillIn, UErrorCode &status) const = 0; + /** * Change the text over which this operates. The text boundary is * reset to the start. @@ -270,12 +287,19 @@ public: virtual void setText(const UnicodeString &text) = 0; /** - * Change the text over which this operates. The boundary iteration position is - * reset to the start. + * Reset the break iterator to operate over the text represented by + * the UText. The iterator position is reset to the start. + * + * This function makes a shallow clone of the supplied UText. This means + * that the caller is free to immediately close or otherwise reuse the + * Utext that was passed as a parameter, but that the underlying text itself + * must not be altered while being referenced by the break iterator. + * * @param text The UText used to change the text. - * @stable ICU 2.0 + * @status receives any error codes. + * @draft ICU 3.4 */ - //virtual void setText(UText &text) = 0; + virtual void setText(UText *text, UErrorCode &status) = 0; /** * Change the text over which this operates. The text boundary is diff --git a/icu4c/source/common/unicode/rbbi.h b/icu4c/source/common/unicode/rbbi.h index 2eacd89b836..d4e206cc2fa 100644 --- a/icu4c/source/common/unicode/rbbi.h +++ b/icu4c/source/common/unicode/rbbi.h @@ -17,10 +17,10 @@ #include "unicode/utypes.h" /** - * \file + * \file * \brief C++ API: Rule Based Break Iterator */ - + #if !UCONFIG_NO_BREAK_ITERATION #include "unicode/brkiter.h" @@ -242,6 +242,22 @@ public: virtual const CharacterIterator& getText(void) const; + /** + * Get a UText for the text being analyzed. + * The returned UText is a shallow clone of the UText used internally + * by the break iterator implementation. It can safely be used to + * access the text without impacting any break iterator operations, + * but the underlying text itself must not be altered. + * + * @param fillIn A UText to be filled in. If NULL, a new UText will be + * allocated to hold the result. + * @status receives any error codes. + * @return The current UText for this break iterator. If an input + * UText was provided, it will always be returned. + * @draft ICU 3.4 + */ + virtual UText *getUText(UText *fillIn, UErrorCode &status) const; + /** * Set the iterator to analyze a new piece of text. This function resets * the current iteration position to the beginning of the text. @@ -259,6 +275,21 @@ public: */ virtual void setText(const UnicodeString& newText); + /** + * Reset the break iterator to operate over the text represented by + * the UText. The iterator position is reset to the start. + * + * This function makes a shallow clone of the supplied UText. This means + * that the caller is free to immediately close or otherwise reuse the + * Utext that was passed as a parameter, but that the underlying text itself + * must not be altered while being referenced by the break iterator. + * + * @param text The UText used to change the text. + * @param status Receives any error codes. + * @draft ICU 3.4 + */ + virtual void setText(UText *text, UErrorCode &status); + /** * Sets the current iteration position to the beginning of the text. * (i.e., the CharacterIterator's starting offset). diff --git a/icu4c/source/common/unicode/ubrk.h b/icu4c/source/common/unicode/ubrk.h index 5359ea29ea9..f4e734aa5c4 100644 --- a/icu4c/source/common/unicode/ubrk.h +++ b/icu4c/source/common/unicode/ubrk.h @@ -8,6 +8,7 @@ #include "unicode/utypes.h" #include "unicode/uloc.h" +#include "unicode/utext.h" /** * A text-break iterator. @@ -392,6 +393,21 @@ ubrk_setText(UBreakIterator* bi, int32_t textLength, UErrorCode* status); + +/** + * Sets an existing iterator to point to a new piece of text + * @param bi The iterator to use + * @param text The text to be set + * @param status The error code + * @draft ICU 3.4 + */ +U_DRAFT void U_EXPORT2 +ubrk_setUText(UBreakIterator* bi, + UText* text, + UErrorCode* status); + + + /** * Determine the most recently-returned text boundary. * diff --git a/icu4c/source/common/unicode/utext.h b/icu4c/source/common/unicode/utext.h index f58e6b8a701..29bc500a8fc 100644 --- a/icu4c/source/common/unicode/utext.h +++ b/icu4c/source/common/unicode/utext.h @@ -329,7 +329,7 @@ utext_isLengthExpensive(const UText *ut); * * This function is roughly equivalent to the the sequence * utext_setIndex(index); - * utext_current(); + * utext_current32(); * (There is a difference if the index is out of bounds by being less than zero) * * @param ut the text to be accessed @@ -354,7 +354,7 @@ utext_char32At(UText *ut, int32_t nativeIndex); * @draft ICU 3.4 */ U_DRAFT UChar32 U_EXPORT2 -utext_current(UText *ut); +utext_current32(UText *ut); /** @@ -750,32 +750,32 @@ enum { * For example, byte indexes into UTF-8 text or UTF-32 indexes into UTF-32 text. * @draft ICU 3.4 */ - UTEXT_PROVIDER_NON_UTF16_INDEXES, + UTEXT_PROVIDER_NON_UTF16_INDEXES = 0, /** * It is potentially time consuming for the provider to determine the length of the text. * @draft ICU 3.4 */ - UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE, + UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE = 1, /** * Text chunks remain valid and usable until the text object is modified or * deleted, not just until the next time the access() function is called * (which is the default). * @draft ICU 3.4 */ - UTEXT_PROVIDER_STABLE_CHUNKS, + UTEXT_PROVIDER_STABLE_CHUNKS = 2, /** * The provider supports modifying the text via the replace() and copy() * functions. * @see Replaceable * @draft ICU 3.4 */ - UTEXT_PROVIDER_WRITABLE, + UTEXT_PROVIDER_WRITABLE = 3, /** * There is meta data associated with the text. * @see Replaceable::hasMetaData() * @draft ICU 3.4 */ - UTEXT_PROVIDER_HAS_META_DATA + UTEXT_PROVIDER_HAS_META_DATA = 4 }; /** diff --git a/icu4c/source/common/utext.cpp b/icu4c/source/common/utext.cpp index 6751791cbf6..056b908d0e2 100644 --- a/icu4c/source/common/utext.cpp +++ b/icu4c/source/common/utext.cpp @@ -101,7 +101,7 @@ utext_setIndex(UText *ut, int32_t index) { if (index>ut->chunk.nativeStart && index < ut->chunk.nativeLimit) { UChar c = ut->chunk.contents[ut->chunk.offset]; if (U16_TRAIL(c)) { - utext_current(ut); // force index to the start of the curent code point. + utext_current32(ut); // force index to the start of the curent code point. } } } @@ -111,7 +111,7 @@ utext_setIndex(UText *ut, int32_t index) { U_DRAFT UChar32 U_EXPORT2 -utext_current(UText *ut) { +utext_current32(UText *ut) { UChar32 c = U_SENTINEL; if (ut->chunk.offset < ut->chunk.length) { c = ut->chunk.contents[ut->chunk.offset]; @@ -161,7 +161,7 @@ utext_next32(UText *ut) { if (U16_IS_SURROGATE(c)) { // looking at a surrogate. Could be unpaired, need to be careful. // Speed doesn't matter, will be very rare. - c = utext_current(ut); + c = utext_current32(ut); if (U_IS_SUPPLEMENTARY(c)) { offset++; } @@ -192,7 +192,7 @@ utext_previous32(UText *ut) { if (U16_IS_SURROGATE(c)) { // Note that utext_current() will move the chunk offset to the lead surrogate // if we come in referring to trail half of a surrogate pair. - c = utext_current(ut); + c = utext_current32(ut); } prev32_return: @@ -224,7 +224,7 @@ utext_next32From(UText *ut, int32_t index) { // Surrogate code unit. Could be pointing at either half of a pair, or at // an unpaired surrogate. Let utext_current() do the work. Speed doesn't matter. chunk->offset = offset; - c = utext_current(ut); + c = utext_current32(ut); if (U_IS_SUPPLEMENTARY(c)) { offset++; } @@ -257,8 +257,8 @@ utext_previous32From(UText *ut, int32_t index) { c = chunk->contents[offset]; chunk->offset = offset; if (U16_IS_SURROGATE(c)) { - c = utext_current(ut); // get supplementary char if not unpaired surrogate, - // and adjust offset to start. + c = utext_current32(ut); // get supplementary char if not unpaired surrogate, + // and adjust offset to start. } prev32return: return c; @@ -911,7 +911,6 @@ U_CDECL_END // //------------------------------------------------------------------------------ -#if 0 // initially commented out to reduce testing /* * TODO: use a flag in RepText to support readonly strings? @@ -922,124 +921,159 @@ U_CDECL_END // to allow for possible trimming for code point boundaries enum { REP_TEXT_CHUNK_SIZE=10 }; -struct RepText : public UText { - /* chunk UChars */ - UChar s[REP_TEXT_CHUNK_SIZE]; +struct ReplExtra { + /* + * Chunk UChars. + * +1 to simplify filling with surrogate pair at the end. + */ + UChar s[REP_TEXT_CHUNK_SIZE+1]; }; + U_CDECL_BEGIN static UText * U_CALLCONV -repTextClone(const UText *t) { - RepText *t2=(RepText *)uprv_malloc(sizeof(RepText)); - if(t2!=NULL) { - *t2=*(const RepText *)t; - t2->context=((const Replaceable *)t->context)->clone(); - if(t2->context==NULL) { - uprv_free(t2); - t2=NULL; - } +repTextClone(UText *dest, const UText *src, UBool deep, UErrorCode *status) { + // First do a generic shallow clone. Does everything needed for the UText struct itself. + dest = noopTextClone(dest, src, deep, status); + + if (deep && U_SUCCESS(*status)) { + const Replaceable *replSrc = (const Replaceable *)src->context; + dest->context = replSrc->clone(); } - return t2; + return dest; } -static int32_t U_CALLCONV -repTextGetProperties(UText *t) { - int32_t props=I32_FLAG(UTEXT_PROVIDER_WRITABLE); - if(((const Replaceable *)((const RepText *)t)->context)->hasMetaData()) { - props|=I32_FLAG(UTEXT_PROVIDER_HAS_META_DATA); - } - return props; -} + static int32_t U_CALLCONV -repTextLength(UText *t) { - return ((const Replaceable *)((const RepText *)t)->context)->length(); +repTextLength(UText *ut) { + const Replaceable *replSrc = (const Replaceable *)ut->context; + int32_t len = replSrc->length(); + return len; } -static int32_t U_CALLCONV -repTextAccess(UText *t, int32_t index, UBool forward, UTextChunk *chunk) { - RepText *rt=(RepText *)t; - const Replaceable *rep=(const Replaceable *)rt->context; - int32_t start, limit, length=rep->length(); - int32_t chunkStart, chunkLength, chunkOffset; + +static UBool U_CALLCONV +repTextAccess(UText *ut, int32_t index, UBool forward, UTextChunk *chunk) { + const Replaceable *rep=(const Replaceable *)ut->context; + int32_t start; // index of the start of the chunk to be loaded + int32_t limit; // index of the end+1 of the chunk to be loaded. + int32_t length=rep->length(); // Full length of the input text (bigger than a chunk) + /* * Compute start/limit boundaries around index, for a segment of text * to be extracted. - * The segment will be trimmed to not include halves of surrogate pairs. + * To allow for the possibility that our user gave an index to the trailing + * half of a surrogate pair, we must request one extra preceding UChar when + * going in the forward direction. This will ensure that the buffer has the + * entire code point at the specified index. */ if(forward) { - if(length<=index) { - return -1; + + if (index>=ut->chunk.nativeStart && indexchunk.nativeLimit) { + // Buffer already contains the requested position. + ut->chunk.offset = index - ut->chunk.nativeStart; + return TRUE; } - limit=index+REP_TEXT_CHUNK_SIZE-1; - if(limit>length) { - limit=length; + if (index>=length && ut->chunk.nativeLimit==length) { + // Request for end of string, and buffer already extends up to it. + // Can't get the data, but don't change the buffer. + ut->chunk.offset = length - ut->chunk.nativeStart; + return FALSE; + } + + if (index<0) { + index = 0; + } + ut->chunk.nativeLimit = index + REP_TEXT_CHUNK_SIZE - 1; + // Going forward, so we want to have the buffer with stuff at and beyond + // the requested index. The -1 gets us one code point before the + // requested index also, to handle the case of the index being on + // a trail surrogate of a surrogate pair. + if(ut->chunk.nativeLimit > length) { + ut->chunk.nativeLimit = length; + } + // unless buffer ran off end, start is index-1. + ut->chunk.nativeStart = ut->chunk.nativeLimit - REP_TEXT_CHUNK_SIZE; + if(ut->chunk.nativeStart < 0) { + ut->chunk.nativeStart = 0; + } + } else { + // Reverse iteration. Fill buffer with data preceding the requested index. + if(index<0) { + index = 0; + } + if (index>ut->chunk.nativeStart && index<=ut->chunk.nativeLimit) { + // Requested position already in buffer. + ut->chunk.offset = index - ut->chunk.nativeStart; + return TRUE; + } + if (index==0 && ut->chunk.nativeStart==0) { + // Request for start, buffer already begins at start. + // No data, but keep the buffer as is. + ut->chunk.offset = 0; + return FALSE; + } + limit = index; + if (limit>length) { + limit = length; } start=limit-REP_TEXT_CHUNK_SIZE; if(start<0) { start=0; } - } else { - if(index<0) { - return -1; - } - start=index-REP_TEXT_CHUNK_SIZE+1; - if(start<0) { - start=0; - } - limit=start+REP_TEXT_CHUNK_SIZE; - if(lengths, 0, REP_TEXT_CHUNK_SIZE); // writable alias - rep->extractBetween(start, limit, buffer); + ReplExtra *ex = (ReplExtra *)ut->pExtra; + // UnicodeString with its buffer a writable alias to the chunk buffer + UnicodeString buffer(ex->s, 0 /*buffer length*/, REP_TEXT_CHUNK_SIZE /*buffer capacity*/); + rep->extractBetween(ut->chunk.nativeStart, ut->chunk.nativeLimit, buffer); - chunkStart=0; - chunkLength=limit-start; - chunkOffset=index-start; + ut->chunk.contents = ex->s; + ut->chunk.length = ut->chunk.nativeLimit - ut->chunk.nativeStart; + ut->chunk.offset = index - ut->chunk.nativeStart; - // trim contents for code point boundaries - if(0s[chunkStart])) { - ++chunkStart; - --chunkLength; - ++start; - } - if(limits[chunkStart+chunkLength-1])) { - --chunkLength; - --limit; + // Surrogate pairs from the input text must not span chunk boundaries. + // If end of chunk could be the start of a surrogate, trim it off. + if (ut->chunk.nativeLimit < length && + U16_IS_LEAD(ex->s[ut->chunk.length-1])) { + ut->chunk.length--; + } + + + // if the first UChar in the chunk could be the trailing half of a surrogate pair, + // trim it off. + if(ut->chunk.nativeStart>0 && U16_IS_TRAIL(ex->s[0])) { + ++(ut->chunk.contents); + --(ut->chunk.length); + --(ut->chunk.offset); } // adjust the index/chunkOffset to a code point boundary - U16_SET_CP_START(rt->s, chunkStart, chunkOffset); + U16_SET_CP_START(ut->chunk.contents, 0, ut->chunk.offset); - chunk->contents=rt->s+chunkStart; - chunk->length=chunkLength; - chunk->start=start; - chunk->limit=limit; - chunk->nonUTF16Indexes=FALSE; - return chunkOffset; // chunkOffset corresponding to index + return TRUE; } + + static int32_t U_CALLCONV -repTextExtract(UText *t, +repTextExtract(UText *ut, int32_t start, int32_t limit, UChar *dest, int32_t destCapacity, - UErrorCode *pErrorCode) { - RepText *rt=(RepText *)t; - const Replaceable *rep=(const Replaceable *)rt->context; + UErrorCode *status) { + const Replaceable *rep=(const Replaceable *)ut->context; int32_t length=rep->length(); - if(U_FAILURE(*pErrorCode)) { + if(U_FAILURE(*status)) { return 0; } if(destCapacity<0 || (dest==NULL && destCapacity>0)) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + *status=U_ILLEGAL_ARGUMENT_ERROR; } if(start<0 || start>limit || lengthextractBetween(start, limit, buffer); - return u_terminateUChars(dest, destCapacity, length, pErrorCode); + return u_terminateUChars(dest, destCapacity, length, status); } static int32_t U_CALLCONV -repTextReplace(UText *t, +repTextReplace(UText *ut, int32_t start, int32_t limit, const UChar *src, int32_t length, - UTextChunk *chunk, - UErrorCode *pErrorCode) { - RepText *rt=(RepText *)t; - Replaceable *rep=(Replaceable *)rt->context; + UErrorCode *status) { + Replaceable *rep=(Replaceable *)ut->context; int32_t oldLength; - if(U_FAILURE(*pErrorCode)) { + if(U_FAILURE(*status)) { return 0; } if(src==NULL && length!=0) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + *status=U_ILLEGAL_ARGUMENT_ERROR; + return 0; } oldLength=rep->length(); // will subtract from new length if(start<0 || start>limit || oldLengthcontext; + UErrorCode *status) { + Replaceable *rep=(Replaceable *)ut->context; int32_t length=rep->length(); - if(U_FAILURE(*pErrorCode)) { + if(U_FAILURE(*status)) { return; } if( start<0 || start>limit || lengthproviderProperties = I32_FLAG(UTEXT_PROVIDER_WRITABLE); + if(rep->hasMetaData()) { + ut->providerProperties |=I32_FLAG(UTEXT_PROVIDER_HAS_META_DATA); } - *((UText *)rt)=repText; - rt->context=rep; - return rt; + + ut->clone = noopTextClone; + ut->length = repTextLength; + ut->access = repTextAccess; + ut->extract = repTextExtract; + ut->replace = repTextReplace; + ut->copy = repTextCopy; + + ut->context=rep; + return ut; } -U_DRAFT void U_EXPORT2 -utext_closeReplaceable(UText *t) { - if(t!=NULL) { - uprv_free((RepText *)t); - } -} - -U_DRAFT void U_EXPORT2 -utext_resetReplaceable(UText *t, Replaceable *rep, UErrorCode *pErrorCode) { - if(U_FAILURE(*pErrorCode)) { - return; - } - if(rep==NULL) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return; - } - RepText *rt=(RepText *)t; - rt->context=rep; -} U_CDECL_END -#endif diff --git a/icu4c/source/test/cintltst/utexttst.c b/icu4c/source/test/cintltst/utexttst.c index fbbe09e8730..1554b47fad2 100644 --- a/icu4c/source/test/cintltst/utexttst.c +++ b/icu4c/source/test/cintltst/utexttst.c @@ -128,17 +128,17 @@ static void TestAPI(void) { c = utext_char32At(uta, 0); TEST_ASSERT(c==uString[0]); - c = utext_current(uta); + c = utext_current32(uta); TEST_ASSERT(c==uString[0]); c = utext_next32(uta); TEST_ASSERT(c==uString[0]); - c = utext_current(uta); + c = utext_current32(uta); TEST_ASSERT(c==uString[1]); c = utext_previous32(uta); TEST_ASSERT(c==uString[0]); - c = utext_current(uta); + c = utext_current32(uta); TEST_ASSERT(c==uString[0]); c = utext_next32From(uta, 1); @@ -170,7 +170,7 @@ static void TestAPI(void) { utext_setIndex(uta, 0); c = UTEXT_NEXT32(uta); TEST_ASSERT(c==uString[0]); - c = utext_current(uta); + c = utext_current32(uta); TEST_ASSERT(c==uString[1]); c = UTEXT_PREVIOUS32(uta);