mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-13 08:53:20 +00:00
ICU-3944 text access, tests and fixes
X-SVN-Rev: 18190
This commit is contained in:
parent
6bf5e3f373
commit
6edb932e50
2 changed files with 106 additions and 16 deletions
|
@ -239,7 +239,6 @@ U_DRAFT UChar32 U_EXPORT2
|
|||
utext_previous32From(UText *ut, int32_t index) {
|
||||
UTextChunk *chunk = &ut->chunk;
|
||||
UChar32 c = U_SENTINEL;
|
||||
UChar32 startingChar;
|
||||
|
||||
if(index<=chunk->nativeStart || index>chunk->nativeLimit) {
|
||||
// Requested native index is outside of the current chunk.
|
||||
|
@ -250,7 +249,15 @@ utext_previous32From(UText *ut, int32_t index) {
|
|||
} else if(chunk->nonUTF16Indexes) {
|
||||
chunk->offset=ut->mapNativeIndexToUTF16(ut, index);
|
||||
} else {
|
||||
// This chunk uses UTF-16 indexing. Index into it.
|
||||
chunk->offset = index - chunk->nativeStart;
|
||||
// put offset onto a code point boundary if it isn't there already.
|
||||
if (index>ut->chunk.nativeStart && index < ut->chunk.nativeLimit) {
|
||||
c = chunk->contents[chunk->offset];
|
||||
if (U16_TRAIL(c)) {
|
||||
utext_current32(ut); // force index to the start of the curent code point.
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (chunk->offset<=0) {
|
||||
|
@ -258,16 +265,13 @@ utext_previous32From(UText *ut, int32_t index) {
|
|||
goto prev32return;
|
||||
}
|
||||
|
||||
// Do the operation assuming that there are no surrogates involved, either
|
||||
// at the starting position or at the previous position. Fast, common case.
|
||||
startingChar = chunk->contents[chunk->offset];
|
||||
(chunk->offset)--;
|
||||
// Do the operation assuming that there are no surrogates involved. Fast, common case.
|
||||
chunk->offset--;
|
||||
c = chunk->contents[chunk->offset];
|
||||
|
||||
// Check for surrogates, do the operation over if there are any.
|
||||
if (U16_IS_SURROGATE(startingChar) || U16_IS_SURROGATE(c)) {
|
||||
utext_setNativeIndex(ut, index); // setIndex() handles case of initial index on a trail surrogate
|
||||
c = utext_previous32(ut); // previous32() handles case of previous char being a supplementary.
|
||||
// Check for the char being a surrogate, get the whole char if it is.
|
||||
if (U16_IS_SURROGATE(c)) {
|
||||
c = utext_current32(ut);
|
||||
}
|
||||
|
||||
prev32return:
|
||||
|
@ -1104,6 +1108,14 @@ repTextAccess(UText *ut, int32_t index, UBool forward, UTextChunk* /* chunk*/ )
|
|||
const Replaceable *rep=(const Replaceable *)ut->context;
|
||||
int32_t length=rep->length(); // Full length of the input text (bigger than a chunk)
|
||||
|
||||
// clip the requested index to the limits of the text.
|
||||
if (index<0) {
|
||||
index = 0;
|
||||
}
|
||||
if (index>length) {
|
||||
index = length;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Compute start/limit boundaries around index, for a segment of text
|
||||
|
@ -1127,9 +1139,6 @@ repTextAccess(UText *ut, int32_t index, UBool forward, UTextChunk* /* chunk*/ )
|
|||
return FALSE;
|
||||
}
|
||||
|
||||
if (index<0) {
|
||||
index = 0;
|
||||
}
|
||||
ut->chunk.nativeLimit = index + REP_TEXT_CHUNK_SIZE - 1;
|
||||
// Going forward, so we want to have the buffer with stuff at and beyond
|
||||
// the requested index. The -1 gets us one code point before the
|
||||
|
@ -1145,9 +1154,6 @@ repTextAccess(UText *ut, int32_t index, UBool forward, UTextChunk* /* chunk*/ )
|
|||
}
|
||||
} else {
|
||||
// Reverse iteration. Fill buffer with data preceding the requested index.
|
||||
if(index<0) {
|
||||
index = 0;
|
||||
}
|
||||
if (index>ut->chunk.nativeStart && index<=ut->chunk.nativeLimit) {
|
||||
// Requested position already in buffer.
|
||||
ut->chunk.offset = index - ut->chunk.nativeStart;
|
||||
|
@ -1229,10 +1235,27 @@ repTextExtract(UText *ut,
|
|||
if(destCapacity<0 || (dest==NULL && destCapacity>0)) {
|
||||
*status=U_ILLEGAL_ARGUMENT_ERROR;
|
||||
}
|
||||
if(start<0 || start>limit || length<limit) {
|
||||
if(start<0 || start>limit) {
|
||||
*status=U_INDEX_OUTOFBOUNDS_ERROR;
|
||||
return 0;
|
||||
}
|
||||
if (start>length) {
|
||||
start=length;
|
||||
}
|
||||
if (limit>length) {
|
||||
limit=length;
|
||||
}
|
||||
|
||||
// adjust start, limit if they point to trail half of surrogates
|
||||
if (start<length && U16_IS_TRAIL(rep->charAt(start)) &&
|
||||
U_IS_SUPPLEMENTARY(rep->char32At(start))){
|
||||
start--;
|
||||
}
|
||||
if (limit<length && U16_IS_TRAIL(rep->charAt(limit)) &&
|
||||
U_IS_SUPPLEMENTARY(rep->char32At(limit))){
|
||||
limit--;
|
||||
}
|
||||
|
||||
length=limit-start;
|
||||
if(length>destCapacity) {
|
||||
limit = start + destCapacity;
|
||||
|
|
|
@ -1024,6 +1024,73 @@ void UTextTest::ErrorTest()
|
|||
utext_close(ut);
|
||||
}
|
||||
|
||||
{ // Similar test, with UText over Replaceable
|
||||
// TODO: merge the common parts of these tests.
|
||||
|
||||
UnicodeString u16str("\\u1000\\U00011000\\u2000\\U00022000");
|
||||
int32_t startMap[] ={ 0, 1, 1, 3, 4, 4, 6, 6};
|
||||
int32_t nextMap[] = { 1, 3, 3, 4, 6, 6, 6, 6};
|
||||
int32_t prevMap[] = { 0, 0, 0, 1, 3, 3, 4, 4};
|
||||
UChar32 c32Map[] = {0x1000, 0x11000, 0x11000, 0x2000, 0x22000, 0x22000, -1, -1};
|
||||
UChar32 pr32Map[] = { -1, 0x1000, 0x1000, 0x11000, 0x2000, 0x2000, 0x22000, 0x22000};
|
||||
int32_t exLen[] = { 1, 0, 2, 1, 0, 2, 0, 0,};
|
||||
|
||||
u16str = u16str.unescape();
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
UText *ut = utext_openReplaceable(NULL, &u16str, &status);
|
||||
TEST_SUCCESS(status);
|
||||
|
||||
int32_t startMapLimit = sizeof(startMap) / sizeof(int32_t);
|
||||
int i;
|
||||
for (i=0; i<startMapLimit; i++) {
|
||||
utext_setNativeIndex(ut, i);
|
||||
int32_t cpIndex = utext_getNativeIndex(ut);
|
||||
TEST_ASSERT(cpIndex == startMap[i]);
|
||||
}
|
||||
|
||||
// Check char32At
|
||||
for (i=0; i<startMapLimit; i++) {
|
||||
UChar32 c32 = utext_char32At(ut, i);
|
||||
TEST_ASSERT(c32 == c32Map[i]);
|
||||
int32_t cpIndex = utext_getNativeIndex(ut);
|
||||
TEST_ASSERT(cpIndex == startMap[i]);
|
||||
}
|
||||
|
||||
// Check utext_next32From
|
||||
for (i=0; i<startMapLimit; i++) {
|
||||
UChar32 c32 = utext_next32From(ut, i);
|
||||
TEST_ASSERT(c32 == c32Map[i]);
|
||||
int32_t cpIndex = utext_getNativeIndex(ut);
|
||||
TEST_ASSERT(cpIndex == nextMap[i]);
|
||||
}
|
||||
|
||||
// check utext_previous32From
|
||||
for (i=0; i<startMapLimit; i++) {
|
||||
UChar32 c32 = utext_previous32From(ut, i);
|
||||
TEST_ASSERT(c32 == pr32Map[i]);
|
||||
int32_t cpIndex = utext_getNativeIndex(ut);
|
||||
TEST_ASSERT(cpIndex == prevMap[i]);
|
||||
}
|
||||
|
||||
// check Extract
|
||||
// Extract from i to i+1, which may be zero or one code points,
|
||||
// depending on whether the indices straddle a cp boundary.
|
||||
for (i=0; i<startMapLimit; i++) {
|
||||
UChar buf[3];
|
||||
status = U_ZERO_ERROR;
|
||||
int32_t extractedLen = utext_extract(ut, i, i+1, buf, 3, &status);
|
||||
TEST_SUCCESS(status);
|
||||
TEST_ASSERT(extractedLen == exLen[i]);
|
||||
if (extractedLen > 0) {
|
||||
UChar32 c32;
|
||||
U16_GET(buf, 0, 0, extractedLen, c32);
|
||||
TEST_ASSERT(c32 == c32Map[i]);
|
||||
}
|
||||
}
|
||||
|
||||
utext_close(ut);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue