ICU-3944 text access, work in progress

X-SVN-Rev: 17988
This commit is contained in:
Andy Heninger 2005-06-23 05:51:28 +00:00
parent 9fc80fe9b2
commit 32b19f04b2
9 changed files with 303 additions and 187 deletions

View file

@ -1,6 +1,6 @@
/*
**********************************************************************
* Copyright (C) 1999-2004 IBM Corp. All rights reserved.
* Copyright (C) 1999-2005 IBM Corp. All rights reserved.
**********************************************************************
* Date Name Description
* 12/1/99 rgillam Complete port from Java.
@ -37,7 +37,7 @@ RuleBasedBreakIterator() {
DictionaryBasedBreakIterator::DictionaryBasedBreakIterator(UDataMemory* rbbiData,
const char* dictionaryFilename,
const char* dictionaryFilename,
UErrorCode& status)
: RuleBasedBreakIterator(rbbiData, status)
{
@ -143,7 +143,7 @@ DictionaryBasedBreakIterator::previous()
reset();
int32_t result = RuleBasedBreakIterator::previous();
if (cachedBreakPositions != NULL) {
for (positionInCache=0;
for (positionInCache=0;
cachedBreakPositions[positionInCache] != result;
positionInCache++);
U_ASSERT(positionInCache < numCachedBreakPositions);
@ -334,7 +334,7 @@ BreakIterator * DictionaryBasedBreakIterator::createBufferClone(void *stackBuff
}
//
// If user buffer size is zero this is a preflight operation to
// If user buffer size is zero this is a preflight operation to
// obtain the needed buffer size, allowing for worst case misalignment.
//
if (bufferSize == 0) {
@ -367,7 +367,7 @@ BreakIterator * DictionaryBasedBreakIterator::createBufferClone(void *stackBuff
}
//
// Initialize the clone object.
// Initialize the clone object.
// TODO: using an overloaded C++ "operator new" to directly initialize the
// copy in the user's buffer would be better, but it doesn't seem
// to get along with namespaces. Investigate why.
@ -383,7 +383,7 @@ BreakIterator * DictionaryBasedBreakIterator::createBufferClone(void *stackBuff
if (status != U_SAFECLONE_ALLOCATED_WARNING) {
clone->fBufferClone = TRUE;
}
return clone;
return clone;
}
@ -405,15 +405,15 @@ DictionaryBasedBreakIterator::divideUpDictionaryRange(int32_t startPos, int32_t
// that needs to be kept with the word). Seek from the beginning of the
// range to the first dictionary character
fText->setIndex(startPos);
UChar c = fText->current();
UChar32 c = fText->current32();
while (isDictionaryChar(c) == FALSE) {
c = fText->next();
c = fText->next32();
}
if (U_FAILURE(status)) {
return; // UStack below overwrites the status error codes
}
// initialize. We maintain two stacks: currentBreakPositions contains
// the list of break positions that will be returned if we successfully
// finish traversing the whole range now. possibleBreakPositions lists
@ -429,9 +429,9 @@ DictionaryBasedBreakIterator::divideUpDictionaryRange(int32_t startPos, int32_t
// further, this saves us from having to follow each possible path
// through the text all the way to the error (hopefully avoiding many
// future recursive calls as well).
// there can be only one kind of error in UStack and UVector, so we'll
// there can be only one kind of error in UStack and UVector, so we'll
// just let the error fall through
UStack currentBreakPositions(status);
UStack currentBreakPositions(status);
UStack possibleBreakPositions(status);
UVector wrongBreakPositions(status);
@ -456,8 +456,15 @@ DictionaryBasedBreakIterator::divideUpDictionaryRange(int32_t startPos, int32_t
return;
}
// initialize (we always exit the loop with a break statement)
c = fText->current();
c = fText->current32();
for (;;) {
// The dictionary implementation doesn't do supplementary chars.
// Put them through as an unpaired surrogate, which
// will end any dictionary match in progress.
// With any luck, this dictionary implementation will be retired soon.
if (c>0x10000) {
c = 0xd800;
}
// if we can transition to state "-1" from our current state, we're
// on the last character of a legal word. Push that position onto
@ -470,7 +477,7 @@ DictionaryBasedBreakIterator::divideUpDictionaryRange(int32_t startPos, int32_t
}
// look up the new state to transition to in the dictionary
state = fTables->fDictionary->at(state, c);
state = fTables->fDictionary->at(state, (UChar)c);
// if the character we're sitting on causes us to transition to
// the "end of word" state, then it was a non-dictionary character
@ -515,7 +522,7 @@ DictionaryBasedBreakIterator::divideUpDictionaryRange(int32_t startPos, int32_t
possibleBreakPositions.peeki())) {
possibleBreakPositions.popi();
}
// if we've used up all possible break-position combinations, there's
// an error or an unknown word in the text. In this case, we start
// over, treating the farthest character we've reached as the beginning
@ -532,7 +539,8 @@ DictionaryBasedBreakIterator::divideUpDictionaryRange(int32_t startPos, int32_t
}
bestBreakPositions.removeAllElements();
if (farthestEndPoint < endPos) {
fText->setIndex(farthestEndPoint + 1);
fText->setIndex(farthestEndPoint);
fText->next32();
}
else {
break;
@ -547,7 +555,7 @@ DictionaryBasedBreakIterator::divideUpDictionaryRange(int32_t startPos, int32_t
return;
}
}
fText->next();
fText->next32();
currentBreakPositions.push(fText->getIndex(), status);
if (U_FAILURE(status)) {
return;
@ -574,7 +582,7 @@ DictionaryBasedBreakIterator::divideUpDictionaryRange(int32_t startPos, int32_t
// re-sync "c" for the next go-round, and drop out of the loop if
// we've made it off the end of the range
c = fText->current();
c = fText->current32();
if (fText->getIndex() >= endPos) {
break;
}
@ -583,7 +591,7 @@ DictionaryBasedBreakIterator::divideUpDictionaryRange(int32_t startPos, int32_t
// if we didn't hit any exceptional conditions on this last iteration,
// just advance to the next character and loop
else {
c = fText->next();
c = fText->next32();
}
}

View file

@ -1348,6 +1348,21 @@ UBool RuleBasedBreakIterator::isDictionaryChar(UChar32 c) {
}
//-------------------------------------------------------------------------------
//
// UText functions
//
//-------------------------------------------------------------------------------
void RuleBasedBreakIterator::setText(UText *ut, UErrorCode &status) {
// TODO: implement this.
}
UText *RuleBasedBreakIterator::getUText(UText *fillIn, UErrorCode &status) const {
// TODO: implement this.
return fillIn;
}
U_NAMESPACE_END

View file

@ -1,6 +1,6 @@
/*
*****************************************************************************************
* Copyright (C) 1996-2004, International Business Machines
* Copyright (C) 1996-2005, International Business Machines
* Corporation and others. All Rights Reserved.
*****************************************************************************************
*/
@ -70,14 +70,16 @@ ubrk_open(UBreakIteratorType type,
return 0;
}
UCharCharacterIterator *iter = 0;
iter = new UCharCharacterIterator(text, textLength);
if(iter == 0) {
*status = U_MEMORY_ALLOCATION_ERROR;
delete result;
return 0;
if (text != NULL) {
UCharCharacterIterator *iter = 0;
iter = new UCharCharacterIterator(text, textLength);
if(iter == 0) {
*status = U_MEMORY_ALLOCATION_ERROR;
delete result;
return 0;
}
result->adoptText(iter);
}
result->adoptText(iter);
return (UBreakIterator*)result;
}
@ -186,6 +188,19 @@ ubrk_setText(UBreakIterator* bi,
}
}
U_DRAFT void U_EXPORT2
ubrk_setUText(UBreakIterator *bi,
UText *text,
UErrorCode *status)
{
BreakIterator *brit = (BreakIterator *)bi;
brit->setText(text, *status);
}
U_CAPI int32_t U_EXPORT2
ubrk_current(const UBreakIterator *bi)
{
@ -273,8 +288,8 @@ ubrk_getRuleStatusVec(UBreakIterator *bi, int32_t *fillInVec, int32_t capacity,
U_CAPI const char* U_EXPORT2
ubrk_getLocaleByType(const UBreakIterator *bi,
ULocDataLocaleType type,
ubrk_getLocaleByType(const UBreakIterator *bi,
ULocDataLocaleType type,
UErrorCode* status)
{
if (bi == NULL) {

View file

@ -261,6 +261,23 @@ public:
*/
virtual const CharacterIterator& getText(void) const = 0;
/**
* Get a UText for the text being analyzed.
* The returned UText is a shallow clone of the UText used internally
* by the break iterator implementation. It can safely be used to
* access the text without impacting any break iterator operations,
* but the underlying text itself must not be altered.
*
* @param fillIn A UText to be filled in. If NULL, a new UText will be
* allocated to hold the result.
* @status receives any error codes.
* @return The current UText for this break iterator. If an input
* UText was provided, it will always be returned.
* @draft ICU 3.4
*/
virtual UText *getUText(UText *fillIn, UErrorCode &status) const = 0;
/**
* Change the text over which this operates. The text boundary is
* reset to the start.
@ -270,12 +287,19 @@ public:
virtual void setText(const UnicodeString &text) = 0;
/**
* Change the text over which this operates. The boundary iteration position is
* reset to the start.
* Reset the break iterator to operate over the text represented by
* the UText. The iterator position is reset to the start.
*
* This function makes a shallow clone of the supplied UText. This means
* that the caller is free to immediately close or otherwise reuse the
* Utext that was passed as a parameter, but that the underlying text itself
* must not be altered while being referenced by the break iterator.
*
* @param text The UText used to change the text.
* @stable ICU 2.0
* @status receives any error codes.
* @draft ICU 3.4
*/
//virtual void setText(UText &text) = 0;
virtual void setText(UText *text, UErrorCode &status) = 0;
/**
* Change the text over which this operates. The text boundary is

View file

@ -17,10 +17,10 @@
#include "unicode/utypes.h"
/**
* \file
* \file
* \brief C++ API: Rule Based Break Iterator
*/
#if !UCONFIG_NO_BREAK_ITERATION
#include "unicode/brkiter.h"
@ -242,6 +242,22 @@ public:
virtual const CharacterIterator& getText(void) const;
/**
* Get a UText for the text being analyzed.
* The returned UText is a shallow clone of the UText used internally
* by the break iterator implementation. It can safely be used to
* access the text without impacting any break iterator operations,
* but the underlying text itself must not be altered.
*
* @param fillIn A UText to be filled in. If NULL, a new UText will be
* allocated to hold the result.
* @status receives any error codes.
* @return The current UText for this break iterator. If an input
* UText was provided, it will always be returned.
* @draft ICU 3.4
*/
virtual UText *getUText(UText *fillIn, UErrorCode &status) const;
/**
* Set the iterator to analyze a new piece of text. This function resets
* the current iteration position to the beginning of the text.
@ -259,6 +275,21 @@ public:
*/
virtual void setText(const UnicodeString& newText);
/**
* Reset the break iterator to operate over the text represented by
* the UText. The iterator position is reset to the start.
*
* This function makes a shallow clone of the supplied UText. This means
* that the caller is free to immediately close or otherwise reuse the
* Utext that was passed as a parameter, but that the underlying text itself
* must not be altered while being referenced by the break iterator.
*
* @param text The UText used to change the text.
* @param status Receives any error codes.
* @draft ICU 3.4
*/
virtual void setText(UText *text, UErrorCode &status);
/**
* Sets the current iteration position to the beginning of the text.
* (i.e., the CharacterIterator's starting offset).

View file

@ -8,6 +8,7 @@
#include "unicode/utypes.h"
#include "unicode/uloc.h"
#include "unicode/utext.h"
/**
* A text-break iterator.
@ -392,6 +393,21 @@ ubrk_setText(UBreakIterator* bi,
int32_t textLength,
UErrorCode* status);
/**
* Sets an existing iterator to point to a new piece of text
* @param bi The iterator to use
* @param text The text to be set
* @param status The error code
* @draft ICU 3.4
*/
U_DRAFT void U_EXPORT2
ubrk_setUText(UBreakIterator* bi,
UText* text,
UErrorCode* status);
/**
* Determine the most recently-returned text boundary.
*

View file

@ -329,7 +329,7 @@ utext_isLengthExpensive(const UText *ut);
*
* This function is roughly equivalent to the the sequence
* utext_setIndex(index);
* utext_current();
* utext_current32();
* (There is a difference if the index is out of bounds by being less than zero)
*
* @param ut the text to be accessed
@ -354,7 +354,7 @@ utext_char32At(UText *ut, int32_t nativeIndex);
* @draft ICU 3.4
*/
U_DRAFT UChar32 U_EXPORT2
utext_current(UText *ut);
utext_current32(UText *ut);
/**
@ -750,32 +750,32 @@ enum {
* For example, byte indexes into UTF-8 text or UTF-32 indexes into UTF-32 text.
* @draft ICU 3.4
*/
UTEXT_PROVIDER_NON_UTF16_INDEXES,
UTEXT_PROVIDER_NON_UTF16_INDEXES = 0,
/**
* It is potentially time consuming for the provider to determine the length of the text.
* @draft ICU 3.4
*/
UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE,
UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE = 1,
/**
* Text chunks remain valid and usable until the text object is modified or
* deleted, not just until the next time the access() function is called
* (which is the default).
* @draft ICU 3.4
*/
UTEXT_PROVIDER_STABLE_CHUNKS,
UTEXT_PROVIDER_STABLE_CHUNKS = 2,
/**
* The provider supports modifying the text via the replace() and copy()
* functions.
* @see Replaceable
* @draft ICU 3.4
*/
UTEXT_PROVIDER_WRITABLE,
UTEXT_PROVIDER_WRITABLE = 3,
/**
* There is meta data associated with the text.
* @see Replaceable::hasMetaData()
* @draft ICU 3.4
*/
UTEXT_PROVIDER_HAS_META_DATA
UTEXT_PROVIDER_HAS_META_DATA = 4
};
/**

View file

@ -101,7 +101,7 @@ utext_setIndex(UText *ut, int32_t index) {
if (index>ut->chunk.nativeStart && index < ut->chunk.nativeLimit) {
UChar c = ut->chunk.contents[ut->chunk.offset];
if (U16_TRAIL(c)) {
utext_current(ut); // force index to the start of the curent code point.
utext_current32(ut); // force index to the start of the curent code point.
}
}
}
@ -111,7 +111,7 @@ utext_setIndex(UText *ut, int32_t index) {
U_DRAFT UChar32 U_EXPORT2
utext_current(UText *ut) {
utext_current32(UText *ut) {
UChar32 c = U_SENTINEL;
if (ut->chunk.offset < ut->chunk.length) {
c = ut->chunk.contents[ut->chunk.offset];
@ -161,7 +161,7 @@ utext_next32(UText *ut) {
if (U16_IS_SURROGATE(c)) {
// looking at a surrogate. Could be unpaired, need to be careful.
// Speed doesn't matter, will be very rare.
c = utext_current(ut);
c = utext_current32(ut);
if (U_IS_SUPPLEMENTARY(c)) {
offset++;
}
@ -192,7 +192,7 @@ utext_previous32(UText *ut) {
if (U16_IS_SURROGATE(c)) {
// Note that utext_current() will move the chunk offset to the lead surrogate
// if we come in referring to trail half of a surrogate pair.
c = utext_current(ut);
c = utext_current32(ut);
}
prev32_return:
@ -224,7 +224,7 @@ utext_next32From(UText *ut, int32_t index) {
// Surrogate code unit. Could be pointing at either half of a pair, or at
// an unpaired surrogate. Let utext_current() do the work. Speed doesn't matter.
chunk->offset = offset;
c = utext_current(ut);
c = utext_current32(ut);
if (U_IS_SUPPLEMENTARY(c)) {
offset++;
}
@ -257,8 +257,8 @@ utext_previous32From(UText *ut, int32_t index) {
c = chunk->contents[offset];
chunk->offset = offset;
if (U16_IS_SURROGATE(c)) {
c = utext_current(ut); // get supplementary char if not unpaired surrogate,
// and adjust offset to start.
c = utext_current32(ut); // get supplementary char if not unpaired surrogate,
// and adjust offset to start.
}
prev32return:
return c;
@ -911,7 +911,6 @@ U_CDECL_END
//
//------------------------------------------------------------------------------
#if 0 // initially commented out to reduce testing
/*
* TODO: use a flag in RepText to support readonly strings?
@ -922,124 +921,159 @@ U_CDECL_END
// to allow for possible trimming for code point boundaries
enum { REP_TEXT_CHUNK_SIZE=10 };
struct RepText : public UText {
/* chunk UChars */
UChar s[REP_TEXT_CHUNK_SIZE];
struct ReplExtra {
/*
* Chunk UChars.
* +1 to simplify filling with surrogate pair at the end.
*/
UChar s[REP_TEXT_CHUNK_SIZE+1];
};
U_CDECL_BEGIN
static UText * U_CALLCONV
repTextClone(const UText *t) {
RepText *t2=(RepText *)uprv_malloc(sizeof(RepText));
if(t2!=NULL) {
*t2=*(const RepText *)t;
t2->context=((const Replaceable *)t->context)->clone();
if(t2->context==NULL) {
uprv_free(t2);
t2=NULL;
}
repTextClone(UText *dest, const UText *src, UBool deep, UErrorCode *status) {
// First do a generic shallow clone. Does everything needed for the UText struct itself.
dest = noopTextClone(dest, src, deep, status);
if (deep && U_SUCCESS(*status)) {
const Replaceable *replSrc = (const Replaceable *)src->context;
dest->context = replSrc->clone();
}
return t2;
return dest;
}
static int32_t U_CALLCONV
repTextGetProperties(UText *t) {
int32_t props=I32_FLAG(UTEXT_PROVIDER_WRITABLE);
if(((const Replaceable *)((const RepText *)t)->context)->hasMetaData()) {
props|=I32_FLAG(UTEXT_PROVIDER_HAS_META_DATA);
}
return props;
}
static int32_t U_CALLCONV
repTextLength(UText *t) {
return ((const Replaceable *)((const RepText *)t)->context)->length();
repTextLength(UText *ut) {
const Replaceable *replSrc = (const Replaceable *)ut->context;
int32_t len = replSrc->length();
return len;
}
static int32_t U_CALLCONV
repTextAccess(UText *t, int32_t index, UBool forward, UTextChunk *chunk) {
RepText *rt=(RepText *)t;
const Replaceable *rep=(const Replaceable *)rt->context;
int32_t start, limit, length=rep->length();
int32_t chunkStart, chunkLength, chunkOffset;
static UBool U_CALLCONV
repTextAccess(UText *ut, int32_t index, UBool forward, UTextChunk *chunk) {
const Replaceable *rep=(const Replaceable *)ut->context;
int32_t start; // index of the start of the chunk to be loaded
int32_t limit; // index of the end+1 of the chunk to be loaded.
int32_t length=rep->length(); // Full length of the input text (bigger than a chunk)
/*
* Compute start/limit boundaries around index, for a segment of text
* to be extracted.
* The segment will be trimmed to not include halves of surrogate pairs.
* To allow for the possibility that our user gave an index to the trailing
* half of a surrogate pair, we must request one extra preceding UChar when
* going in the forward direction. This will ensure that the buffer has the
* entire code point at the specified index.
*/
if(forward) {
if(length<=index) {
return -1;
if (index>=ut->chunk.nativeStart && index<ut->chunk.nativeLimit) {
// Buffer already contains the requested position.
ut->chunk.offset = index - ut->chunk.nativeStart;
return TRUE;
}
limit=index+REP_TEXT_CHUNK_SIZE-1;
if(limit>length) {
limit=length;
if (index>=length && ut->chunk.nativeLimit==length) {
// Request for end of string, and buffer already extends up to it.
// Can't get the data, but don't change the buffer.
ut->chunk.offset = length - ut->chunk.nativeStart;
return FALSE;
}
if (index<0) {
index = 0;
}
ut->chunk.nativeLimit = index + REP_TEXT_CHUNK_SIZE - 1;
// Going forward, so we want to have the buffer with stuff at and beyond
// the requested index. The -1 gets us one code point before the
// requested index also, to handle the case of the index being on
// a trail surrogate of a surrogate pair.
if(ut->chunk.nativeLimit > length) {
ut->chunk.nativeLimit = length;
}
// unless buffer ran off end, start is index-1.
ut->chunk.nativeStart = ut->chunk.nativeLimit - REP_TEXT_CHUNK_SIZE;
if(ut->chunk.nativeStart < 0) {
ut->chunk.nativeStart = 0;
}
} else {
// Reverse iteration. Fill buffer with data preceding the requested index.
if(index<0) {
index = 0;
}
if (index>ut->chunk.nativeStart && index<=ut->chunk.nativeLimit) {
// Requested position already in buffer.
ut->chunk.offset = index - ut->chunk.nativeStart;
return TRUE;
}
if (index==0 && ut->chunk.nativeStart==0) {
// Request for start, buffer already begins at start.
// No data, but keep the buffer as is.
ut->chunk.offset = 0;
return FALSE;
}
limit = index;
if (limit>length) {
limit = length;
}
start=limit-REP_TEXT_CHUNK_SIZE;
if(start<0) {
start=0;
}
} else {
if(index<0) {
return -1;
}
start=index-REP_TEXT_CHUNK_SIZE+1;
if(start<0) {
start=0;
}
limit=start+REP_TEXT_CHUNK_SIZE;
if(length<limit) {
limit=length;
}
}
UnicodeString buffer(rt->s, 0, REP_TEXT_CHUNK_SIZE); // writable alias
rep->extractBetween(start, limit, buffer);
ReplExtra *ex = (ReplExtra *)ut->pExtra;
// UnicodeString with its buffer a writable alias to the chunk buffer
UnicodeString buffer(ex->s, 0 /*buffer length*/, REP_TEXT_CHUNK_SIZE /*buffer capacity*/);
rep->extractBetween(ut->chunk.nativeStart, ut->chunk.nativeLimit, buffer);
chunkStart=0;
chunkLength=limit-start;
chunkOffset=index-start;
ut->chunk.contents = ex->s;
ut->chunk.length = ut->chunk.nativeLimit - ut->chunk.nativeStart;
ut->chunk.offset = index - ut->chunk.nativeStart;
// trim contents for code point boundaries
if(0<start && U16_IS_TRAIL(rt->s[chunkStart])) {
++chunkStart;
--chunkLength;
++start;
}
if(limit<length && U16_IS_LEAD(rt->s[chunkStart+chunkLength-1])) {
--chunkLength;
--limit;
// Surrogate pairs from the input text must not span chunk boundaries.
// If end of chunk could be the start of a surrogate, trim it off.
if (ut->chunk.nativeLimit < length &&
U16_IS_LEAD(ex->s[ut->chunk.length-1])) {
ut->chunk.length--;
}
// if the first UChar in the chunk could be the trailing half of a surrogate pair,
// trim it off.
if(ut->chunk.nativeStart>0 && U16_IS_TRAIL(ex->s[0])) {
++(ut->chunk.contents);
--(ut->chunk.length);
--(ut->chunk.offset);
}
// adjust the index/chunkOffset to a code point boundary
U16_SET_CP_START(rt->s, chunkStart, chunkOffset);
U16_SET_CP_START(ut->chunk.contents, 0, ut->chunk.offset);
chunk->contents=rt->s+chunkStart;
chunk->length=chunkLength;
chunk->start=start;
chunk->limit=limit;
chunk->nonUTF16Indexes=FALSE;
return chunkOffset; // chunkOffset corresponding to index
return TRUE;
}
static int32_t U_CALLCONV
repTextExtract(UText *t,
repTextExtract(UText *ut,
int32_t start, int32_t limit,
UChar *dest, int32_t destCapacity,
UErrorCode *pErrorCode) {
RepText *rt=(RepText *)t;
const Replaceable *rep=(const Replaceable *)rt->context;
UErrorCode *status) {
const Replaceable *rep=(const Replaceable *)ut->context;
int32_t length=rep->length();
if(U_FAILURE(*pErrorCode)) {
if(U_FAILURE(*status)) {
return 0;
}
if(destCapacity<0 || (dest==NULL && destCapacity>0)) {
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
*status=U_ILLEGAL_ARGUMENT_ERROR;
}
if(start<0 || start>limit || length<limit) {
*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
*status=U_INDEX_OUTOFBOUNDS_ERROR;
return 0;
}
length=limit-start;
@ -1048,28 +1082,27 @@ repTextExtract(UText *t,
}
UnicodeString buffer(dest, 0, destCapacity); // writable alias
rep->extractBetween(start, limit, buffer);
return u_terminateUChars(dest, destCapacity, length, pErrorCode);
return u_terminateUChars(dest, destCapacity, length, status);
}
static int32_t U_CALLCONV
repTextReplace(UText *t,
repTextReplace(UText *ut,
int32_t start, int32_t limit,
const UChar *src, int32_t length,
UTextChunk *chunk,
UErrorCode *pErrorCode) {
RepText *rt=(RepText *)t;
Replaceable *rep=(Replaceable *)rt->context;
UErrorCode *status) {
Replaceable *rep=(Replaceable *)ut->context;
int32_t oldLength;
if(U_FAILURE(*pErrorCode)) {
if(U_FAILURE(*status)) {
return 0;
}
if(src==NULL && length!=0) {
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
*status=U_ILLEGAL_ARGUMENT_ERROR;
return 0;
}
oldLength=rep->length(); // will subtract from new length
if(start<0 || start>limit || oldLength<limit) {
*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
*status=U_INDEX_OUTOFBOUNDS_ERROR;
return 0;
}
// prepare
@ -1082,24 +1115,22 @@ repTextReplace(UText *t,
}
static void U_CALLCONV
repTextCopy(UText *t,
repTextCopy(UText *ut,
int32_t start, int32_t limit,
int32_t destIndex,
UBool move,
UTextChunk *chunk,
UErrorCode *pErrorCode) {
RepText *rt=(RepText *)t;
Replaceable *rep=(Replaceable *)rt->context;
UErrorCode *status) {
Replaceable *rep=(Replaceable *)ut->context;
int32_t length=rep->length();
if(U_FAILURE(*pErrorCode)) {
if(U_FAILURE(*status)) {
return;
}
if( start<0 || start>limit || length<limit ||
destIndex<0 || length<destIndex ||
(start<destIndex && destIndex<limit)
) {
*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
*status=U_INDEX_OUTOFBOUNDS_ERROR;
return;
}
if(move) {
@ -1118,61 +1149,37 @@ repTextCopy(UText *t,
// never invalidate the chunk because we have a copy of the characters
}
static const UText repText={
NULL, NULL, NULL, NULL,
(int32_t)sizeof(UText), 0, 0, 0,
repTextClone,
repTextGetProperties,
repTextLength,
repTextAccess,
repTextExtract,
repTextReplace,
repTextCopy,
NULL, // mapOffsetToNative
NULL // mapIndexToUTF16
};
U_DRAFT UText * U_EXPORT2
utext_openReplaceable(Replaceable *rep, UErrorCode *pErrorCode) {
if(U_FAILURE(*pErrorCode)) {
utext_openReplaceable(UText *ut, Replaceable *rep, UErrorCode *status) {
if(U_FAILURE(*status)) {
return NULL;
}
if(rep==NULL) {
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
*status=U_ILLEGAL_ARGUMENT_ERROR;
return NULL;
}
RepText *rt=(RepText *)uprv_malloc(sizeof(RepText));
if(rt==NULL) {
*pErrorCode=U_MEMORY_ALLOCATION_ERROR;
return NULL;
ut = utext_setup(ut, sizeof(ReplExtra), status);
ut->providerProperties = I32_FLAG(UTEXT_PROVIDER_WRITABLE);
if(rep->hasMetaData()) {
ut->providerProperties |=I32_FLAG(UTEXT_PROVIDER_HAS_META_DATA);
}
*((UText *)rt)=repText;
rt->context=rep;
return rt;
ut->clone = noopTextClone;
ut->length = repTextLength;
ut->access = repTextAccess;
ut->extract = repTextExtract;
ut->replace = repTextReplace;
ut->copy = repTextCopy;
ut->context=rep;
return ut;
}
U_DRAFT void U_EXPORT2
utext_closeReplaceable(UText *t) {
if(t!=NULL) {
uprv_free((RepText *)t);
}
}
U_DRAFT void U_EXPORT2
utext_resetReplaceable(UText *t, Replaceable *rep, UErrorCode *pErrorCode) {
if(U_FAILURE(*pErrorCode)) {
return;
}
if(rep==NULL) {
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
return;
}
RepText *rt=(RepText *)t;
rt->context=rep;
}
U_CDECL_END
#endif

View file

@ -128,17 +128,17 @@ static void TestAPI(void) {
c = utext_char32At(uta, 0);
TEST_ASSERT(c==uString[0]);
c = utext_current(uta);
c = utext_current32(uta);
TEST_ASSERT(c==uString[0]);
c = utext_next32(uta);
TEST_ASSERT(c==uString[0]);
c = utext_current(uta);
c = utext_current32(uta);
TEST_ASSERT(c==uString[1]);
c = utext_previous32(uta);
TEST_ASSERT(c==uString[0]);
c = utext_current(uta);
c = utext_current32(uta);
TEST_ASSERT(c==uString[0]);
c = utext_next32From(uta, 1);
@ -170,7 +170,7 @@ static void TestAPI(void) {
utext_setIndex(uta, 0);
c = UTEXT_NEXT32(uta);
TEST_ASSERT(c==uString[0]);
c = utext_current(uta);
c = utext_current32(uta);
TEST_ASSERT(c==uString[1]);
c = UTEXT_PREVIOUS32(uta);