ICU-3944 text access, work in progress

X-SVN-Rev: 18095
This commit is contained in:
Andy Heninger 2005-06-30 05:22:08 +00:00
parent 1ed751762f
commit aa87e3bac7
3 changed files with 569 additions and 130 deletions

View file

@ -536,22 +536,32 @@ resetChunk(UTextChunk *chunk, int32_t index) {
}
}
//
// invalidateChunk Reset a chunk to have no contents, so that the next call
// to access will new data to load.
// This is needed when copy/move/replace operate directly on the
// backing text, potentially putting it out of sync with the
// contents in the chunk.
//
static void
invalidateChunk(UTextChunk *chunk) {
chunk->length = 0;
chunk->nativeLimit = 0;
chunk->nativeStart = 0;
chunk->offset = 0;
}
//------------------------------------------------------------------------------
//
// No-Op UText implementation for illegal input
//
//------------------------------------------------------------------------------
U_CDECL_BEGIN
static UText * U_CALLCONV
//
// Clone. This is a generic copy-the-utext-by-value clone function that can be
// used as-is with some utext types, and as helper by other clones.
//
noopTextClone(UText * dest, const UText * src, UBool /*deep*/, UErrorCode * status) {
static UText * U_CALLCONV
shallowTextClone(UText * dest, const UText * src, UErrorCode * status) {
if (U_FAILURE(*status)) {
return NULL;
}
@ -594,52 +604,9 @@ noopTextClone(UText * dest, const UText * src, UBool /*deep*/, UErrorCode * sta
}
static int32_t U_CALLCONV
noopTextLength(UText * /* t */) {
return 0;
}
static UBool U_CALLCONV
noopTextAccess(UText * /* t */, int32_t /* index */, UBool /* forward*/,
UTextChunk * /* chunk */) {
return FALSE;
}
static int32_t U_CALLCONV
noopTextExtract(UText * /* t */,
int32_t /* start */, int32_t /* limit */,
UChar * /* dest */, int32_t /* destCapacity */,
UErrorCode * /* pErrorCode */) {
return 0;
}
static int32_t U_CALLCONV
noopTextMapOffsetToNative(UText * /* t */, int32_t /* offset */) {
return 0;
}
static int32_t U_CALLCONV
noopTextMapIndexToUTF16(UText * /* t */, int32_t /* index */) {
return 0;
}
U_CDECL_END
static const UText noopText={
UTEXT_INITIALIZER_HEAD,
noopTextClone,
noopTextLength,
noopTextAccess,
noopTextExtract,
NULL, // replace
NULL, // copy
noopTextMapOffsetToNative,
noopTextMapIndexToUTF16,
NULL // close
};
//------------------------------------------------------------------------------
//
@ -859,6 +826,42 @@ utf8TextMapIndexToUTF16(UText *ut, int32_t index) {
return offset;
}
static UText * U_CALLCONV
utf8TextClone(UText *dest, const UText *src, UBool deep, UErrorCode *status)
{
// First do a generic shallow clone. Does everything needed for the UText struct itself.
dest = shallowTextClone(dest, src, status);
// For deep clones, make a copy of the string.
// The copied storage is owned by the newly created clone.
// A non-NULL pointer in UText.p is the signal to the close() function to delete
// it.
//
if (deep && U_SUCCESS(*status)) {
int32_t len = src->b;
char *copyStr = (char *)uprv_malloc(len+1);
if (copyStr == NULL) {
*status = U_MEMORY_ALLOCATION_ERROR;
} else {
uprv_memcpy(copyStr, src->context, len+1);
dest->context = copyStr;
dest->p = copyStr;
}
}
return dest;
}
static void U_CALLCONV
utf8TextClose(UText *ut) {
// Most of the work of close is done by the generic UText framework close.
// All that needs to be done here is delete the Replaceable if the UText
// owns it. This occurs if the UText was created by cloning.
char *s = (char *)ut->p;
uprv_free(s);
ut->p = NULL;
}
@ -878,12 +881,13 @@ utext_openUTF8(UText *ut, const char *s, int32_t length, UErrorCode *status) {
}
ut->providerProperties = I32_FLAG(UTEXT_PROVIDER_NON_UTF16_INDEXES);
ut->clone = noopTextClone;
ut->clone = utf8TextClone;
ut->nativeLength = utf8TextLength;
ut->access = utf8TextAccess;
ut->extract = utf8TextExtract;
ut->mapOffsetToNative = utf8TextMapOffsetToNative;
ut->mapNativeIndexToUTF16 = utf8TextMapIndexToUTF16;
ut->close = utf8TextClose;
ut->context=s;
if(length>=0) {
@ -902,11 +906,6 @@ U_CDECL_END
/* UText implementation wrapper for Replaceable (read/write) ---------------- */
//------------------------------------------------------------------------------
//
@ -918,10 +917,6 @@ U_CDECL_END
//------------------------------------------------------------------------------
/*
* TODO: use a flag in RepText to support readonly strings?
* -> omit UTEXT_PROVIDER_WRITABLE
*/
// minimum chunk size for this implementation: 3
// to allow for possible trimming for code point boundaries
@ -941,16 +936,32 @@ U_CDECL_BEGIN
static UText * U_CALLCONV
repTextClone(UText *dest, const UText *src, UBool deep, UErrorCode *status) {
// First do a generic shallow clone. Does everything needed for the UText struct itself.
dest = noopTextClone(dest, src, deep, status);
dest = shallowTextClone(dest, src, status);
// For deep clones, make a copy of the Replaceable.
// The copied Replaceable storage is owned by the newly created UText clone.
// A non-NULL pointer in UText.p is the signal to the close() function to delete
// it.
//
if (deep && U_SUCCESS(*status)) {
const Replaceable *replSrc = (const Replaceable *)src->context;
dest->context = replSrc->clone();
dest->p = dest->context;
}
return dest;
}
static void U_CALLCONV
repTextClose(UText *ut) {
// Most of the work of close is done by the generic UText framework close.
// All that needs to be done here is delete the Replaceable if the UText
// owns it. This occurs if the UText was created by cloning.
Replaceable *rep = (Replaceable *)ut->p;
delete rep;
ut->p = NULL;
}
static int32_t U_CALLCONV
repTextLength(UText *ut) {
@ -963,8 +974,6 @@ repTextLength(UText *ut) {
static UBool U_CALLCONV
repTextAccess(UText *ut, int32_t index, UBool forward, UTextChunk* /* chunk*/ ) {
const Replaceable *rep=(const Replaceable *)ut->context;
int32_t start; // index of the start of the chunk to be loaded
int32_t limit; // index of the end+1 of the chunk to be loaded.
int32_t length=rep->length(); // Full length of the input text (bigger than a chunk)
@ -1022,21 +1031,31 @@ repTextAccess(UText *ut, int32_t index, UBool forward, UTextChunk* /* chunk*/ )
ut->chunk.offset = 0;
return FALSE;
}
limit = index;
if (limit>length) {
limit = length;
// Figure out the bounds of the chunk to extract for reverse iteration.
// Need to worry about chunk not splitting surrogate pairs, and while still
// containing the data we need.
// Fix by requesting a chunk that includes an extra UChar at the end.
// If this turns out to be a lead surrogate, we can lop it off and still have
// the data we wanted.
ut->chunk.nativeStart = index + 1 - REP_TEXT_CHUNK_SIZE;
if (ut->chunk.nativeStart < 0) {
ut->chunk.nativeStart = 0;
}
start=limit-REP_TEXT_CHUNK_SIZE;
if(start<0) {
start=0;
ut->chunk.nativeLimit = index + 1;
if (ut->chunk.nativeLimit > length) {
ut->chunk.nativeLimit = length;
}
}
// Extract the new chunk of text from the Replaceable source.
ReplExtra *ex = (ReplExtra *)ut->pExtra;
// UnicodeString with its buffer a writable alias to the chunk buffer
UnicodeString buffer(ex->s, 0 /*buffer length*/, REP_TEXT_CHUNK_SIZE /*buffer capacity*/);
rep->extractBetween(ut->chunk.nativeStart, ut->chunk.nativeLimit, buffer);
ut->chunk.contents = ex->s;
ut->chunk.contents = ex->s;
ut->chunk.length = ut->chunk.nativeLimit - ut->chunk.nativeStart;
ut->chunk.offset = index - ut->chunk.nativeStart;
@ -1045,13 +1064,17 @@ repTextAccess(UText *ut, int32_t index, UBool forward, UTextChunk* /* chunk*/ )
if (ut->chunk.nativeLimit < length &&
U16_IS_LEAD(ex->s[ut->chunk.length-1])) {
ut->chunk.length--;
ut->chunk.nativeLimit--;
if (ut->chunk.offset > ut->chunk.length) {
ut->chunk.offset = ut->chunk.length;
}
}
// if the first UChar in the chunk could be the trailing half of a surrogate pair,
// trim it off.
if(ut->chunk.nativeStart>0 && U16_IS_TRAIL(ex->s[0])) {
++(ut->chunk.contents);
++(ut->chunk.nativeStart);
--(ut->chunk.length);
--(ut->chunk.offset);
}
@ -1070,7 +1093,8 @@ repTextExtract(UText *ut,
UChar *dest, int32_t destCapacity,
UErrorCode *status) {
const Replaceable *rep=(const Replaceable *)ut->context;
int32_t length=rep->length();
int32_t length=rep->length();
int32_t lengthToExtract = length;
if(U_FAILURE(*status)) {
return 0;
@ -1084,7 +1108,7 @@ repTextExtract(UText *ut,
}
length=limit-start;
if(length>destCapacity) {
length=destCapacity;
limit = start + destCapacity;
}
UnicodeString buffer(dest, 0, destCapacity); // writable alias
rep->extractBetween(start, limit, buffer);
@ -1107,38 +1131,67 @@ repTextReplace(UText *ut,
return 0;
}
oldLength=rep->length(); // will subtract from new length
if(start<0 || start>limit || oldLength<limit) {
if(start<0 || start>limit ) {
*status=U_INDEX_OUTOFBOUNDS_ERROR;
return 0;
}
// prepare
UnicodeString buffer((UBool)(length<0), src, length); // read-only alias
// replace
rep->handleReplaceBetween(start, limit, buffer);
// post-processing
return rep->length()-oldLength;
// never invalidate the chunk because we have a copy of the characters
if (start > oldLength) {
start = oldLength;
}
if (limit > oldLength) {
limit = oldLength;
}
// Do the actual replace operation using methods of the Replaceable class
UnicodeString replStr((UBool)(length<0), src, length); // read-only alias
rep->handleReplaceBetween(start, limit, replStr);
int32_t newLength = rep->length();
int32_t lengthDelta = newLength - oldLength;
// Is the UText chunk buffer OK?
if (ut->chunk.nativeLimit > start) {
// this replace operation may have impacted the current chunk.
// invalidate it, which will force a reload on the next access.
invalidateChunk(&ut->chunk);
}
// set the iteration position to the end of the newly inserted replacement text.
int32_t newIndexPos = limit + lengthDelta;
repTextAccess(ut, newIndexPos, TRUE, &ut->chunk);
return lengthDelta;
}
static void U_CALLCONV
repTextCopy(UText *ut,
int32_t start, int32_t limit,
int32_t destIndex,
UBool move,
UErrorCode *status) {
int32_t start, int32_t limit,
int32_t destIndex,
UBool move,
UErrorCode *status)
{
Replaceable *rep=(Replaceable *)ut->context;
int32_t length=rep->length();
if(U_FAILURE(*status)) {
return;
}
if( start<0 || start>limit || length<limit ||
destIndex<0 || length<destIndex ||
(start<destIndex && destIndex<limit)
) {
if( start<0 || start>limit || destIndex<0 ||
(start<destIndex && destIndex<limit) )
{
*status=U_INDEX_OUTOFBOUNDS_ERROR;
return;
}
if (destIndex > length) {
destIndex = length;
}
if (limit > length) {
limit = length;
}
if (start > length) {
start = length;
}
if(move) {
// move: copy to destIndex, then replace original with nothing
int32_t segLength=limit-start;
@ -1152,13 +1205,37 @@ repTextCopy(UText *ut,
// copy
rep->copy(start, limit, destIndex);
}
// never invalidate the chunk because we have a copy of the characters
// If the change to the text touched the region in the chunk buffer,
// invalidate the buffer.
int32_t firstAffectedIndex = destIndex;
if (move && start<firstAffectedIndex) {
firstAffectedIndex = start;
}
if (firstAffectedIndex < ut->chunk.nativeLimit) {
// changes may have affected range covered by the chunk
invalidateChunk(&ut->chunk);
}
// Put iteration position at the newly inserted (moved) block,
int32_t nativeIterIndex = destIndex + limit - start;
if (move && destIndex>start) {
// moved a block of text towards the end of the string.
nativeIterIndex = destIndex;
}
// Set position, reload chunk if needed.
repTextAccess(ut, nativeIterIndex, TRUE, &ut->chunk);
}
U_DRAFT UText * U_EXPORT2
utext_openReplaceable(UText *ut, Replaceable *rep, UErrorCode *status) {
utext_openReplaceable(UText *ut, Replaceable *rep, UErrorCode *status)
{
if(U_FAILURE(*status)) {
return NULL;
}
@ -1179,6 +1256,7 @@ utext_openReplaceable(UText *ut, Replaceable *rep, UErrorCode *status) {
ut->extract = repTextExtract;
ut->replace = repTextReplace;
ut->copy = repTextCopy;
ut->close = repTextClose;
ut->context=rep;
return ut;
@ -1195,36 +1273,46 @@ U_CDECL_END
//------------------------------------------------------------------------------
//
// UText implementation for UnicodeString (read/write)
// UText implementation for UnicodeString (read/write) and
// for const UnicodeString (read only)
// (same implementation, only the flags are different)
//
// Use of UText data members:
// context pointer to UnicodeString
// p pointer to UnicodeString IF this UText owns the string
// and it must be deleted on close(). NULL otherwise.
//
//------------------------------------------------------------------------------
U_CDECL_BEGIN
/*
* TODO: use a flag in UText to support readonly strings?
* -> omit UTEXT_PROVIDER_WRITABLE
*/
static UText * U_CALLCONV
unistrTextClone(UText * /* dest */, const UText * /*src*/, UBool /*deep*/, UErrorCode * /*status*/) {
// TODO: fix this.
#if 0
UText *t2=(UText *)uprv_malloc(sizeof(UText));
if(t2!=NULL) {
*t2=*t;
t2->context=((const UnicodeString *)t->context)->clone();
if(t2->context==NULL) {
uprv_free(t2);
t2=NULL;
}
unistrTextClone(UText *dest, const UText *src, UBool deep, UErrorCode *status) {
// First do a generic shallow clone. Does everything needed for the UText struct itself.
dest = shallowTextClone(dest, src, status);
// For deep clones, make a copy of the UnicodeSring.
// The copied UnicodeString storage is owned by the newly created UText clone.
// A non-NULL pointer in UText.p is the signal to the close() function to delete
// the UText.
//
if (deep && U_SUCCESS(*status)) {
const UnicodeString *srcString = (const UnicodeString *)src->context;
dest->context = new UnicodeString(*srcString);
dest->p = dest->context;
}
return t2;
#endif
return NULL;
return dest;
}
static void U_CALLCONV
unistrTextClose(UText *ut) {
// Most of the work of close is done by the generic UText framework close.
// All that needs to be done here is delete the UnicodeString if the UText
// owns it. This occurs if the UText was created by cloning.
UnicodeString *str = (UnicodeString *)ut->p;
delete str;
ut->p = NULL;
}
@ -1241,6 +1329,7 @@ unistrTextAccess(UText *ut, int32_t index, UBool forward, UTextChunk *chunk) {
if (chunk->nativeLimit != length) {
// This chunk is not yet set up. Do it now.
// TODO: probably simplify things to move this into the open operation.
chunk->contents = us->getBuffer();
chunk->length = length;
chunk->nativeStart = 0;
@ -1295,11 +1384,11 @@ unistrTextExtract(UText *t,
}
static int32_t U_CALLCONV
unistrTextReplace(UText *t,
unistrTextReplace(UText *ut,
int32_t start, int32_t limit,
const UChar *src, int32_t length,
UErrorCode *pErrorCode) {
UnicodeString *us=(UnicodeString *)t->context;
UnicodeString *us=(UnicodeString *)ut->context;
int32_t oldLength;
if(U_FAILURE(*pErrorCode)) {
@ -1309,37 +1398,58 @@ unistrTextReplace(UText *t,
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
}
oldLength=us->length(); // will subtract from new length
if(start<0 || start>limit || oldLength<limit) {
if(start<0 || start>limit) {
*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
return 0;
}
if (start>oldLength) {
start = oldLength;
}
if (limit>oldLength) {
limit = oldLength;
}
// replace
us->replace(start, limit-start, src, length);
int32_t newLength = us->length();
// TODO: update chunk, set our iteration position.
return us->length()-oldLength;
// Update the chunk description.
ut->chunk.contents = us->getBuffer();
ut->chunk.length = newLength;
ut->chunk.nativeLimit = newLength;
// Set iteration position to the point just following the newly inserted text.
int32_t lengthDelta = newLength - oldLength;
ut->chunk.offset = limit + lengthDelta;
return lengthDelta;
}
static void U_CALLCONV
unistrTextCopy(UText *t,
unistrTextCopy(UText *ut,
int32_t start, int32_t limit,
int32_t destIndex,
UBool move,
UErrorCode *pErrorCode) {
UnicodeString *us=(UnicodeString *)t->context;
UnicodeString *us=(UnicodeString *)ut->context;
int32_t length=us->length();
if(U_FAILURE(*pErrorCode)) {
return;
}
if( start<0 || start>limit || length<limit ||
destIndex<0 || length<destIndex ||
if( start<0 || start>limit || destIndex<0 ||
(start<destIndex && destIndex<limit)
) {
*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
return;
}
if (limit>length) {
limit = length;
}
if (destIndex>length) {
destIndex = length;
}
if(move) {
// move: copy to destIndex, then replace original with nothing
int32_t segLength=limit-start;
@ -1352,7 +1462,21 @@ unistrTextCopy(UText *t,
// copy
us->copy(start, limit, destIndex);
}
// TODO: update chunk description, set iteration position.
// update chunk description, set iteration position.
ut->chunk.contents = us->getBuffer();
if (move==FALSE) {
// copy operation, string length grows
ut->chunk.length += limit-start;
ut->chunk.nativeLimit = ut->chunk.length;
}
// Iteration position to end of the newly inserted text.
ut->chunk.offset = destIndex+limit-start;
if (move && destIndex>start) { //TODO: backwards? check.
ut->chunk.offset = destIndex;
}
}
U_CDECL_END
@ -1368,6 +1492,7 @@ utext_openUnicodeString(UText *ut, UnicodeString *s, UErrorCode *status) {
ut->extract = unistrTextExtract;
ut->replace = unistrTextReplace;
ut->copy = unistrTextCopy;
ut->close = unistrTextClose;
ut->context = s;
ut->providerProperties = I32_FLAG(UTEXT_PROVIDER_STABLE_CHUNKS)|
@ -1386,6 +1511,7 @@ utext_openConstUnicodeString(UText *ut, const UnicodeString *s, UErrorCode *stat
ut->nativeLength = unistrTextLength;
ut->access = unistrTextAccess;
ut->extract = unistrTextExtract;
ut->close = unistrTextClose;
ut->context = s;
ut->providerProperties = I32_FLAG(UTEXT_PROVIDER_STABLE_CHUNKS);
@ -1408,12 +1534,48 @@ U_CDECL_BEGIN
static UText * U_CALLCONV
ucstrTextClone(UText *dest, const UText * src, UBool deep, UErrorCode * status) {
UText *clone = noopTextClone(dest, src, deep, status);
// TODO: fix this.
return clone;
// First do a generic shallow clone.
dest = shallowTextClone(dest, src, status);
// For deep clones, make a copy of the string.
// The copied storage is owned by the newly created clone.
// A non-NULL pointer in UText.p is the signal to the close() function to delete
// it.
//
if (deep && U_SUCCESS(*status)) {
int32_t len = utext_nativeLength(dest);
// The cloned string IS going to be NUL terminated, whether or not the orginal was.
const UChar *srcStr = (const UChar *)src->context;
UChar *copyStr = (UChar *)uprv_malloc((len+1) * sizeof(UChar));
if (copyStr == NULL) {
*status = U_MEMORY_ALLOCATION_ERROR;
} else {
int i;
for (i=0; i<len; i++) {
copyStr[i] = srcStr[i];
}
copyStr[len] = 0;
dest->context = copyStr;
dest->p = copyStr;
}
}
return dest;
}
static void U_CALLCONV
ucstrTextClose(UText *ut) {
// Most of the work of close is done by the generic UText framework close.
// All that needs to be done here is delete the Replaceable if the UText
// owns it. This occurs if the UText was created by cloning.
UChar *s = (UChar *)ut->p;
uprv_free(s);
ut->p = NULL;
}
static int32_t U_CALLCONV
ucstrTextLength(UText *ut) {
if (ut->a < 0) {
@ -1575,6 +1737,7 @@ utext_openUChars(UText *ut, const UChar *s, int32_t length, UErrorCode *status)
ut->extract = ucstrTextExtract;
ut->replace = NULL;
ut->copy = NULL;
ut->close = ucstrTextClose;
ut->context = s;
ut->providerProperties = I32_FLAG(UTEXT_PROVIDER_STABLE_CHUNKS);
@ -1583,9 +1746,9 @@ utext_openUChars(UText *ut, const UChar *s, int32_t length, UErrorCode *status)
}
ut->a = length;
ut->chunk.contents = s;
ut->chunk.length = length;
ut->chunk.nativeStart = 0;
ut->chunk.nativeLimit = length>=0? length : 0;
ut->chunk.length = ut->chunk.nativeLimit;
ut->chunk.nonUTF16Indexes = FALSE;
}
return ut;

View file

@ -18,16 +18,19 @@
#include <unicode/ustring.h>
#include "utxttest.h"
UBool gFailed = FALSE;
static UBool gFailed = FALSE;
static int gTestNum = 0;
#define TEST_ASSERT(x) \
{if ((x)==FALSE) {errln("Test failure in file %s at line %d\n", __FILE__, __LINE__);\
{if ((x)==FALSE) {errln("Test #%d failure in file %s at line %d\n", gTestNum, __FILE__, __LINE__);\
gFailed = TRUE;\
}}
#define TEST_SUCCESS(status) \
{if (U_FAILURE(status)) {errln("Test failure in file %s at line %d. Error = \"%s\"\n", \
__FILE__, __LINE__, u_errorName(status)); \
{if (U_FAILURE(status)) {errln("Test #%d failure in file %s at line %d. Error = \"%s\"\n", \
gTestNum, __FILE__, __LINE__, u_errorName(status)); \
gFailed = TRUE;\
}}
@ -38,7 +41,6 @@ UTextTest::~UTextTest() {
}
void
UTextTest::runIndexedTest(int32_t index, UBool exec,
const char* &name, char* /*par*/) {
@ -187,6 +189,7 @@ void UTextTest::TestString(const UnicodeString &s) {
ut = utext_openUnicodeString(NULL, &sa, &status);
TEST_SUCCESS(status);
TestAccess(sa, ut, cpCount, cpMap);
TestCMR(sa, ut, cpCount, cpMap, cpMap);
utext_close(ut);
@ -202,7 +205,8 @@ void UTextTest::TestString(const UnicodeString &s) {
status = U_ZERO_ERROR;
ut = utext_openReplaceable(NULL, &sa, &status);
TEST_SUCCESS(status);
// TestAccess(sa, ut, cpCount, cpMap);
TestAccess(sa, ut, cpCount, cpMap);
TestCMR(sa, ut, cpCount, cpMap, cpMap);
utext_close(ut);
@ -239,9 +243,268 @@ void UTextTest::TestString(const UnicodeString &s) {
delete []u8String;
}
// TestCMR test Copy, Move and Replace operations.
// us UnicodeString containing the test text.
// ut UText containing the same test text.
// cpCount number of code points in the test text.
// nativeMap Mapping from code points to native indexes for the UText.
// u16Map Mapping from code points to UTF-16 indexes, for use with teh UnicodeString.
//
// This function runs a whole series of opertions on each incoming UText.
// The UText is deep-cloned prior to each operation, so that the original UText remains unchanged.
//
void UTextTest::TestCMR(const UnicodeString &us, UText *ut, int cpCount, m *nativeMap, m *u16Map) {
TEST_ASSERT(utext_isWritable(ut) == TRUE);
int srcLengthType; // Loop variables for selecting the postion and length
int srcPosType; // of the block to operate on within the source text.
int destPosType;
int srcIndex; // Code Point indexes of the block to operate on for
int srcLength; // a specific test.
int destIndex; // Code point index of the destination for a copy/move test.
int32_t nativeStart; // Native unit indexes for a test.
int32_t nativeLimit;
int32_t nativeDest;
int32_t u16Start; // UTF-16 indexes for a test.
int32_t u16Limit; // used when performing the same operation in a Unicode String
int32_t u16Dest;
// Iterate over a whole series of source index, length and a target indexes.
// This is done with code point indexes; these will be later translated to native
// indexes using the cpMap.
for (srcLengthType=1; srcLengthType<=3; srcLengthType++) {
switch (srcLengthType) {
case 1: srcLength = 1; break;
case 2: srcLength = 5; break;
case 3: srcLength = cpCount / 3;
}
for (srcPosType=1; srcPosType<=5; srcPosType++) {
switch (srcPosType) {
case 1: srcIndex = 0; break;
case 2: srcIndex = 1; break;
case 3: srcIndex = cpCount - srcLength; break;
case 4: srcIndex = cpCount - srcLength - 1; break;
case 5: srcIndex = cpCount / 2; break;
}
if (srcIndex < 0 || srcIndex + srcLength > cpCount) {
// filter out bogus test cases -
// those with a source range that falls of an edge of the string.
continue;
}
//
// Copy and move tests.
// iterate over a variety of destination positions.
//
for (destPosType=1; destPosType<=4; destPosType++) {
switch (destPosType) {
case 1: destIndex = 0; break;
case 2: destIndex = 1; break;
case 3: destIndex = srcIndex - 1; break;
case 4: destIndex = srcIndex + srcLength + 1; break;
case 5: destIndex = cpCount-1; break;
case 6: destIndex = cpCount; break;
}
if (destIndex<0 || destIndex>cpCount) {
// filter out bogus test cases.
continue;
}
nativeStart = nativeMap[srcIndex].nativeIdx;
nativeLimit = nativeMap[srcIndex+srcLength].nativeIdx;
nativeDest = nativeMap[destIndex].nativeIdx;
u16Start = u16Map[srcIndex].nativeIdx;
u16Limit = u16Map[srcIndex+srcLength].nativeIdx;
u16Dest = u16Map[destIndex].nativeIdx;
gFailed = FALSE;
TestCopyMove(us, ut, FALSE,
nativeStart, nativeLimit, nativeDest,
u16Start, u16Limit, u16Dest);
TestCopyMove(us, ut, TRUE,
nativeStart, nativeLimit, nativeDest,
u16Start, u16Limit, u16Dest);
if (gFailed) {
return;
}
}
//
// Replace tests.
//
UnicodeString fullRepString("This is an arbitrary string that will be used as replacement text");
for (int32_t replStrLen=0; replStrLen<20; replStrLen++) {
UnicodeString repStr(fullRepString, 0, replStrLen);
TestReplace(us, ut,
nativeStart, nativeLimit,
u16Start, u16Limit,
repStr);
if (gFailed) {
return;
}
}
}
}
}
//
// TestCopyMove run a single test case for utext_copy.
// Test cases are created in TestCMR and dispatched here for execution.
//
void UTextTest::TestCopyMove(const UnicodeString &us, UText *ut, UBool move,
int32_t nativeStart, int32_t nativeLimit, int32_t nativeDest,
int32_t u16Start, int32_t u16Limit, int32_t u16Dest)
{
UErrorCode status = U_ZERO_ERROR;
UText *targetUT = NULL;
gTestNum++;
gFailed = FALSE;
//
// clone the UText. The test will be run in the cloned copy
// so that we don't alter the original.
//
targetUT = utext_clone(NULL, ut, TRUE, &status);
TEST_SUCCESS(status);
UnicodeString targetUS(us); // And copy the reference string.
// do the test operation first in the reference
targetUS.copy(u16Start, u16Limit, u16Dest);
if (move) {
// delete out the source range.
if (u16Limit < u16Dest) {
targetUS.removeBetween(u16Start, u16Limit);
} else {
int32_t amtCopied = u16Limit - u16Start;
targetUS.removeBetween(u16Start+amtCopied, u16Limit+amtCopied);
}
}
// Do the same operation in the UText under test
utext_copy(targetUT, nativeStart, nativeLimit, nativeDest, move, &status);
if (nativeDest > nativeStart && nativeDest < nativeLimit) {
TEST_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
} else {
TEST_SUCCESS(status);
// Compare the results of the two parallel tests
int32_t usi = 0; // UnicodeString postion, utf-16 index.
int32_t uti = 0; // UText position, native index.
int32_t cpi; // char32 position (code point index)
UChar32 usc; // code point from Unicode String
UChar32 utc; // code point from UText
utext_setNativeIndex(targetUT, 0);
for (cpi=0; ; cpi++) {
usc = targetUS.char32At(usi);
utc = utext_next32(targetUT);
if (utc < 0) {
break;
}
TEST_ASSERT(uti == usi);
TEST_ASSERT(utc == usc);
usi = targetUS.moveIndex32(usi, 1);
uti = utext_getNativeIndex(targetUT);
if (gFailed) {
goto cleanupAndReturn;
}
}
int32_t expectedNativeLength = utext_nativeLength(ut);
if (move == FALSE) {
expectedNativeLength += nativeLimit - nativeStart;
}
uti = utext_getNativeIndex(targetUT);
TEST_ASSERT(uti == expectedNativeLength);
}
cleanupAndReturn:
utext_close(targetUT);
}
//
// TestReplace Test a single Replace operation.
//
void UTextTest::TestReplace(
const UnicodeString &us, // reference UnicodeString in which to do the replace
UText *ut, // UnicodeText object under test.
int32_t nativeStart, // Range to be replaced, in UText native units.
int32_t nativeLimit,
int32_t u16Start, // Range to be replaced, in UTF-16 units
int32_t u16Limit, // for use in the reference UnicodeString.
const UnicodeString &repStr) // The replacement string
{
UErrorCode status = U_ZERO_ERROR;
UText *targetUT = NULL;
gTestNum++;
gFailed = FALSE;
//
// clone the target UText. The test will be run in the cloned copy
// so that we don't alter the original.
//
targetUT = utext_clone(NULL, ut, TRUE, &status);
TEST_SUCCESS(status);
UnicodeString targetUS(us); // And copy the reference string.
//
// Do the replace operation in the Unicode String, to
// produce a reference result.
//
targetUS.replace(u16Start, u16Limit-u16Start, repStr);
//
// Do the replace on the UText under test
//
const UChar *rs = repStr.getBuffer();
int32_t rsLen = repStr.length();
int32_t actualDelta = utext_replace(targetUT, nativeStart, nativeLimit, rs, rsLen, &status);
int32_t expectedDelta = repStr.length() - (nativeLimit - nativeStart);
TEST_ASSERT(actualDelta == expectedDelta);
//
// Compare the results
//
int32_t usi = 0; // UnicodeString postion, utf-16 index.
int32_t uti = 0; // UText position, native index.
int32_t cpi; // char32 position (code point index)
UChar32 usc; // code point from Unicode String
UChar32 utc; // code point from UText
int32_t expectedNativeLength = 0;
utext_setNativeIndex(targetUT, 0);
for (cpi=0; ; cpi++) {
usc = targetUS.char32At(usi);
utc = utext_next32(targetUT);
if (utc < 0) {
break;
}
TEST_ASSERT(uti == usi);
TEST_ASSERT(utc == usc);
usi = targetUS.moveIndex32(usi, 1);
uti = utext_getNativeIndex(targetUT);
if (gFailed) {
goto cleanupAndReturn;
}
}
expectedNativeLength = utext_nativeLength(ut) + expectedDelta;
uti = utext_getNativeIndex(targetUT);
TEST_ASSERT(uti == expectedNativeLength);
cleanupAndReturn:
utext_close(targetUT);
}
void UTextTest::TestAccess(const UnicodeString &us, UText *ut, int cpCount, m *cpMap) {
UErrorCode status = U_ZERO_ERROR;
gTestNum++;
//
// Check the length from the UText

View file

@ -38,6 +38,19 @@ private:
void TestString(const UnicodeString &s);
void TestAccess(const UnicodeString &us, UText *ut, int cpCount, m *cpMap);
void TestCMR (const UnicodeString &us, UText *ut, int cpCount, m *nativeMap, m *utf16Map);
void TestCopyMove(const UnicodeString &us, UText *ut, UBool move,
int32_t nativeStart, int32_t nativeLimit, int32_t nativeDest,
int32_t u16Start, int32_t u16Limit, int32_t u16Dest);
void TestReplace(const UnicodeString &us, // reference UnicodeString in which to do the replace
UText *ut, // UnicodeText object under test.
int32_t nativeStart, // Range to be replaced, in UText native units.
int32_t nativeLimit,
int32_t u16Start, // Range to be replaced, in UTF-16 units
int32_t u16Limit, // for use in the reference UnicodeString.
const UnicodeString &repStr); // The replacement string
};