ICU-6677 fix and extend u_strToUTF32() input string validation

X-SVN-Rev: 25443
This commit is contained in:
Markus Scherer 2009-02-19 18:14:25 +00:00
parent 64f350c92a
commit a679ccf60c
2 changed files with 147 additions and 60 deletions

View file

@ -31,7 +31,7 @@
#include "ustr_imp.h"
U_CAPI UChar* U_EXPORT2
u_strFromUTF32(UChar *dest,
u_strFromUTF32(UChar *dest,
int32_t destCapacity,
int32_t *pDestLength,
const UChar32 *src,
@ -47,7 +47,7 @@ u_strFromUTF32(UChar *dest,
if(U_FAILURE(*pErrorCode)){
return NULL;
}
if((src==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){
if((src==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)) {
*pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
return NULL;
}
@ -64,19 +64,16 @@ u_strFromUTF32(UChar *dest,
++src;
*pDest++ = (UChar)ch;
}
if(ch == 0) {
srcLength = 0;
} else {
/* "complicated" case, get the remaining string length */
const UChar32 *src0 = src;
while(*++src != 0) {}
srcLength = (int32_t)(src - src0);
src = src0;
srcLimit = src;
if(ch != 0) {
/* "complicated" case, find the end of the remaining string */
while(*++srcLimit != 0) {}
}
} else {
srcLimit = src + srcLength;
}
/* convert with length */
srcLimit = src + srcLength;
while(src < srcLimit && pDest < destLimit) {
ch = *src++;
if((uint32_t)ch < 0xd800 || (0xe000 <= ch && ch <= 0xffff)) {
@ -124,74 +121,85 @@ u_strFromUTF32(UChar *dest,
U_CAPI UChar32* U_EXPORT2
u_strToUTF32(UChar32 *dest,
int32_t destCapacity,
int32_t *pDestLength,
int32_t destCapacity,
int32_t *pDestLength,
const UChar *src,
int32_t srcLength,
UErrorCode *pErrorCode)
{
const UChar* pSrc = src;
const UChar* pSrcLimit;
int32_t reqLength=0;
uint32_t ch=0;
uint32_t *pDest = (uint32_t *)dest;
uint32_t *destLimit = pDest + destCapacity;
UChar ch2=0;
int32_t srcLength,
UErrorCode *pErrorCode) {
const UChar *srcLimit;
UChar32 ch;
UChar ch2;
UChar32 *destLimit;
UChar32 *pDest;
int32_t reqLength;
/* args check */
if(U_FAILURE(*pErrorCode)){
return NULL;
}
if((src==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){
if((src==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)) {
*pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
return NULL;
}
if(srcLength==-1) {
while((ch=*pSrc)!=0 && pDest!=destLimit) {
++pSrc;
/*need not check for NUL because NUL fails UTF_IS_TRAIL() anyway*/
if(UTF_IS_LEAD(ch) && UTF_IS_TRAIL(ch2=*pSrc)) {
++pSrc;
ch=UTF16_GET_PAIR_VALUE(ch, ch2);
}
*(pDest++)= ch;
pDest = dest;
destLimit = dest + destCapacity;
reqLength = 0;
if(srcLength < 0) {
/* simple loop for conversion of a NUL-terminated BMP string */
while((ch=*src) != 0 && !U16_IS_SURROGATE(ch) && pDest < destLimit) {
++src;
*pDest++ = ch;
}
while((ch=*pSrc++)!=0) {
if(UTF_IS_LEAD(ch) && UTF_IS_TRAIL(ch2=*pSrc)) {
++pSrc;
}
++reqLength;
srcLimit = src;
if(ch != 0) {
/* "complicated" case, find the end of the remaining string */
while(*++srcLimit != 0) {}
}
} else {
pSrcLimit = pSrc+srcLength;
while(pSrc<pSrcLimit && pDest<destLimit) {
ch=*pSrc++;
if(UTF_IS_LEAD(ch) && pSrc<pSrcLimit && UTF_IS_TRAIL(ch2=*pSrc)) {
++pSrc;
ch=UTF16_GET_PAIR_VALUE(ch, ch2);
}
*(pDest++)= ch;
}
while(pSrc!=pSrcLimit) {
ch=*pSrc++;
if(UTF_IS_LEAD(ch) && pSrc<pSrcLimit && UTF_IS_TRAIL(ch2=*pSrc)) {
++pSrc;
}
++reqLength;
}
srcLimit = src + srcLength;
}
reqLength+=(int32_t)(pDest - (uint32_t *)dest);
if(pDestLength){
/* convert with length */
while(src < srcLimit && pDest < destLimit) {
ch = *src++;
if(!U16_IS_SURROGATE(ch)) {
/* write ch below */
} else if(U16_IS_SURROGATE_LEAD(ch) && src < srcLimit && U16_IS_TRAIL(ch2 = *src)) {
++src;
ch = U16_GET_SUPPLEMENTARY(ch, ch2);
} else {
/* unpaired surrogate */
*pErrorCode = U_INVALID_CHAR_FOUND;
return NULL;
}
*pDest++ = ch;
}
/* preflight the remaining string */
while(src < srcLimit) {
ch = *src++;
if(!U16_IS_SURROGATE(ch)) {
/* ++reqLength below */
} else if(U16_IS_SURROGATE_LEAD(ch) && src < srcLimit && U16_IS_TRAIL(*src)) {
++src;
} else {
/* unpaired surrogate */
*pErrorCode = U_INVALID_CHAR_FOUND;
return NULL;
}
++reqLength;
}
reqLength += (int32_t)(pDest - dest);
if(pDestLength) {
*pDestLength = reqLength;
}
/* Terminate the buffer */
u_terminateUChar32s(dest,destCapacity,reqLength,pErrorCode);
u_terminateUChar32s(dest, destCapacity, reqLength, pErrorCode);
return dest;
}

View file

@ -31,6 +31,7 @@
void addUCharTransformTest(TestNode** root);
static void Test_strToUTF32(void);
static void Test_strToUTF32_surrogates(void);
static void Test_strFromUTF32(void);
static void Test_strFromUTF32_surrogates(void);
static void Test_UChar_UTF8_API(void);
@ -44,6 +45,7 @@ void
addUCharTransformTest(TestNode** root)
{
addTest(root, &Test_strToUTF32, "custrtrn/Test_strToUTF32");
addTest(root, &Test_strToUTF32_surrogates, "custrtrn/Test_strToUTF32_surrogates");
addTest(root, &Test_strFromUTF32, "custrtrn/Test_strFromUTF32");
addTest(root, &Test_strFromUTF32_surrogates, "custrtrn/Test_strFromUTF32_surrogates");
addTest(root, &Test_UChar_UTF8_API, "custrtrn/Test_UChar_UTF8_API");
@ -195,6 +197,83 @@ static void Test_strToUTF32(void){
}
}
/* test unpaired surrogates */
static void Test_strToUTF32_surrogates() {
UErrorCode err = U_ZERO_ERROR;
UChar32 u32Target[400];
int32_t len16, u32DestLen;
int i;
static const UChar surr16[] = { 0x41, 0xd900, 0x61, 0xdc00, 0x5a, 0xd900, 0xdc00, 0x7a, 0 };
static const UChar32 expected[] = { 0x5a, 0x50000, 0x7a, 0 };
len16 = LENGTHOF(surr16);
for(i = 0; i < 4; ++i) {
err = U_ZERO_ERROR;
u_strToUTF32(u32Target, 0, &u32DestLen, surr16+i, len16-i, &err);
if(err != U_INVALID_CHAR_FOUND) {
log_err("u_strToUTF32(preflight surr16+%ld) sets %s != U_INVALID_CHAR_FOUND\n",
(long)i, u_errorName(err));
return;
}
err = U_ZERO_ERROR;
u_strToUTF32(u32Target, LENGTHOF(u32Target), &u32DestLen, surr16+i, len16-i, &err);
if(err != U_INVALID_CHAR_FOUND) {
log_err("u_strToUTF32(surr16+%ld) sets %s != U_INVALID_CHAR_FOUND\n",
(long)i, u_errorName(err));
return;
}
err = U_ZERO_ERROR;
u_strToUTF32(NULL, 0, &u32DestLen, surr16+i, -1, &err);
if(err != U_INVALID_CHAR_FOUND) {
log_err("u_strToUTF32(preflight surr16+%ld/NUL) sets %s != U_INVALID_CHAR_FOUND\n",
(long)i, u_errorName(err));
return;
}
err = U_ZERO_ERROR;
u_strToUTF32(u32Target, LENGTHOF(u32Target), &u32DestLen, surr16+i, -1, &err);
if(err != U_INVALID_CHAR_FOUND) {
log_err("u_strToUTF32(surr16+%ld/NUL) sets %s != U_INVALID_CHAR_FOUND\n",
(long)i, u_errorName(err));
return;
}
}
err = U_ZERO_ERROR;
u_strToUTF32(u32Target, 0, &u32DestLen, surr16+4, len16-4-1, &err);
if(err != U_BUFFER_OVERFLOW_ERROR || u32DestLen != 3) {
log_err("u_strToUTF32(preflight surr16+4) sets %s != U_BUFFER_OVERFLOW_ERROR or an unexpected length\n",
u_errorName(err));
return;
}
err = U_ZERO_ERROR;
u_strToUTF32(u32Target, LENGTHOF(u32Target), &u32DestLen, surr16+4, len16-4-1, &err);
if(err != U_ZERO_ERROR || u32DestLen != 3 || uprv_memcmp(u32Target, expected, 4*4)) {
log_err("u_strToUTF32(surr16+4) sets %s != U_ZERO_ERROR or does not produce the expected string\n",
u_errorName(err));
return;
}
err = U_ZERO_ERROR;
u_strToUTF32(NULL, 0, &u32DestLen, surr16+4, -1, &err);
if(err != U_BUFFER_OVERFLOW_ERROR || u32DestLen != 3) {
log_err("u_strToUTF32(preflight surr16+4/NUL) sets %s != U_BUFFER_OVERFLOW_ERROR or an unexpected length\n",
u_errorName(err));
return;
}
err = U_ZERO_ERROR;
u_strToUTF32(u32Target, LENGTHOF(u32Target), &u32DestLen, surr16+4, -1, &err);
if(err != U_ZERO_ERROR || u32DestLen != 3 || uprv_memcmp(u32Target, expected, 4*4)) {
log_err("u_strToUTF32(surr16+4/NUL) sets %s != U_ZERO_ERROR or does not produce the expected string\n",
u_errorName(err));
return;
}
}
static void Test_strFromUTF32(void){
UErrorCode err = U_ZERO_ERROR;
UChar uTarget[400];