ICU-3944 text access, tests and fixes

X-SVN-Rev: 18190
This commit is contained in:
Andy Heninger 2005-07-11 07:40:18 +00:00
parent 6bf5e3f373
commit 6edb932e50
2 changed files with 106 additions and 16 deletions

View file

@ -239,7 +239,6 @@ U_DRAFT UChar32 U_EXPORT2
utext_previous32From(UText *ut, int32_t index) {
UTextChunk *chunk = &ut->chunk;
UChar32 c = U_SENTINEL;
UChar32 startingChar;
if(index<=chunk->nativeStart || index>chunk->nativeLimit) {
// Requested native index is outside of the current chunk.
@ -250,7 +249,15 @@ utext_previous32From(UText *ut, int32_t index) {
} else if(chunk->nonUTF16Indexes) {
chunk->offset=ut->mapNativeIndexToUTF16(ut, index);
} else {
// This chunk uses UTF-16 indexing. Index into it.
chunk->offset = index - chunk->nativeStart;
// put offset onto a code point boundary if it isn't there already.
if (index>ut->chunk.nativeStart && index < ut->chunk.nativeLimit) {
c = chunk->contents[chunk->offset];
if (U16_TRAIL(c)) {
utext_current32(ut); // force index to the start of the curent code point.
}
}
}
if (chunk->offset<=0) {
@ -258,16 +265,13 @@ utext_previous32From(UText *ut, int32_t index) {
goto prev32return;
}
// Do the operation assuming that there are no surrogates involved, either
// at the starting position or at the previous position. Fast, common case.
startingChar = chunk->contents[chunk->offset];
(chunk->offset)--;
// Do the operation assuming that there are no surrogates involved. Fast, common case.
chunk->offset--;
c = chunk->contents[chunk->offset];
// Check for surrogates, do the operation over if there are any.
if (U16_IS_SURROGATE(startingChar) || U16_IS_SURROGATE(c)) {
utext_setNativeIndex(ut, index); // setIndex() handles case of initial index on a trail surrogate
c = utext_previous32(ut); // previous32() handles case of previous char being a supplementary.
// Check for the char being a surrogate, get the whole char if it is.
if (U16_IS_SURROGATE(c)) {
c = utext_current32(ut);
}
prev32return:
@ -1104,6 +1108,14 @@ repTextAccess(UText *ut, int32_t index, UBool forward, UTextChunk* /* chunk*/ )
const Replaceable *rep=(const Replaceable *)ut->context;
int32_t length=rep->length(); // Full length of the input text (bigger than a chunk)
// clip the requested index to the limits of the text.
if (index<0) {
index = 0;
}
if (index>length) {
index = length;
}
/*
* Compute start/limit boundaries around index, for a segment of text
@ -1127,9 +1139,6 @@ repTextAccess(UText *ut, int32_t index, UBool forward, UTextChunk* /* chunk*/ )
return FALSE;
}
if (index<0) {
index = 0;
}
ut->chunk.nativeLimit = index + REP_TEXT_CHUNK_SIZE - 1;
// Going forward, so we want to have the buffer with stuff at and beyond
// the requested index. The -1 gets us one code point before the
@ -1145,9 +1154,6 @@ repTextAccess(UText *ut, int32_t index, UBool forward, UTextChunk* /* chunk*/ )
}
} else {
// Reverse iteration. Fill buffer with data preceding the requested index.
if(index<0) {
index = 0;
}
if (index>ut->chunk.nativeStart && index<=ut->chunk.nativeLimit) {
// Requested position already in buffer.
ut->chunk.offset = index - ut->chunk.nativeStart;
@ -1229,10 +1235,27 @@ repTextExtract(UText *ut,
if(destCapacity<0 || (dest==NULL && destCapacity>0)) {
*status=U_ILLEGAL_ARGUMENT_ERROR;
}
if(start<0 || start>limit || length<limit) {
if(start<0 || start>limit) {
*status=U_INDEX_OUTOFBOUNDS_ERROR;
return 0;
}
if (start>length) {
start=length;
}
if (limit>length) {
limit=length;
}
// adjust start, limit if they point to trail half of surrogates
if (start<length && U16_IS_TRAIL(rep->charAt(start)) &&
U_IS_SUPPLEMENTARY(rep->char32At(start))){
start--;
}
if (limit<length && U16_IS_TRAIL(rep->charAt(limit)) &&
U_IS_SUPPLEMENTARY(rep->char32At(limit))){
limit--;
}
length=limit-start;
if(length>destCapacity) {
limit = start + destCapacity;

View file

@ -1024,6 +1024,73 @@ void UTextTest::ErrorTest()
utext_close(ut);
}
{ // Similar test, with UText over Replaceable
// TODO: merge the common parts of these tests.
UnicodeString u16str("\\u1000\\U00011000\\u2000\\U00022000");
int32_t startMap[] ={ 0, 1, 1, 3, 4, 4, 6, 6};
int32_t nextMap[] = { 1, 3, 3, 4, 6, 6, 6, 6};
int32_t prevMap[] = { 0, 0, 0, 1, 3, 3, 4, 4};
UChar32 c32Map[] = {0x1000, 0x11000, 0x11000, 0x2000, 0x22000, 0x22000, -1, -1};
UChar32 pr32Map[] = { -1, 0x1000, 0x1000, 0x11000, 0x2000, 0x2000, 0x22000, 0x22000};
int32_t exLen[] = { 1, 0, 2, 1, 0, 2, 0, 0,};
u16str = u16str.unescape();
UErrorCode status = U_ZERO_ERROR;
UText *ut = utext_openReplaceable(NULL, &u16str, &status);
TEST_SUCCESS(status);
int32_t startMapLimit = sizeof(startMap) / sizeof(int32_t);
int i;
for (i=0; i<startMapLimit; i++) {
utext_setNativeIndex(ut, i);
int32_t cpIndex = utext_getNativeIndex(ut);
TEST_ASSERT(cpIndex == startMap[i]);
}
// Check char32At
for (i=0; i<startMapLimit; i++) {
UChar32 c32 = utext_char32At(ut, i);
TEST_ASSERT(c32 == c32Map[i]);
int32_t cpIndex = utext_getNativeIndex(ut);
TEST_ASSERT(cpIndex == startMap[i]);
}
// Check utext_next32From
for (i=0; i<startMapLimit; i++) {
UChar32 c32 = utext_next32From(ut, i);
TEST_ASSERT(c32 == c32Map[i]);
int32_t cpIndex = utext_getNativeIndex(ut);
TEST_ASSERT(cpIndex == nextMap[i]);
}
// check utext_previous32From
for (i=0; i<startMapLimit; i++) {
UChar32 c32 = utext_previous32From(ut, i);
TEST_ASSERT(c32 == pr32Map[i]);
int32_t cpIndex = utext_getNativeIndex(ut);
TEST_ASSERT(cpIndex == prevMap[i]);
}
// check Extract
// Extract from i to i+1, which may be zero or one code points,
// depending on whether the indices straddle a cp boundary.
for (i=0; i<startMapLimit; i++) {
UChar buf[3];
status = U_ZERO_ERROR;
int32_t extractedLen = utext_extract(ut, i, i+1, buf, 3, &status);
TEST_SUCCESS(status);
TEST_ASSERT(extractedLen == exLen[i]);
if (extractedLen > 0) {
UChar32 c32;
U16_GET(buf, 0, 0, extractedLen, c32);
TEST_ASSERT(c32 == c32Map[i]);
}
}
utext_close(ut);
}
}