mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-10 07:39:16 +00:00
ICU-6677 fix and extend u_strToUTF32() input string validation
X-SVN-Rev: 25443
This commit is contained in:
parent
64f350c92a
commit
a679ccf60c
2 changed files with 147 additions and 60 deletions
|
@ -31,7 +31,7 @@
|
|||
#include "ustr_imp.h"
|
||||
|
||||
U_CAPI UChar* U_EXPORT2
|
||||
u_strFromUTF32(UChar *dest,
|
||||
u_strFromUTF32(UChar *dest,
|
||||
int32_t destCapacity,
|
||||
int32_t *pDestLength,
|
||||
const UChar32 *src,
|
||||
|
@ -47,7 +47,7 @@ u_strFromUTF32(UChar *dest,
|
|||
if(U_FAILURE(*pErrorCode)){
|
||||
return NULL;
|
||||
}
|
||||
if((src==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){
|
||||
if((src==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)) {
|
||||
*pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return NULL;
|
||||
}
|
||||
|
@ -64,19 +64,16 @@ u_strFromUTF32(UChar *dest,
|
|||
++src;
|
||||
*pDest++ = (UChar)ch;
|
||||
}
|
||||
if(ch == 0) {
|
||||
srcLength = 0;
|
||||
} else {
|
||||
/* "complicated" case, get the remaining string length */
|
||||
const UChar32 *src0 = src;
|
||||
while(*++src != 0) {}
|
||||
srcLength = (int32_t)(src - src0);
|
||||
src = src0;
|
||||
srcLimit = src;
|
||||
if(ch != 0) {
|
||||
/* "complicated" case, find the end of the remaining string */
|
||||
while(*++srcLimit != 0) {}
|
||||
}
|
||||
} else {
|
||||
srcLimit = src + srcLength;
|
||||
}
|
||||
|
||||
/* convert with length */
|
||||
srcLimit = src + srcLength;
|
||||
while(src < srcLimit && pDest < destLimit) {
|
||||
ch = *src++;
|
||||
if((uint32_t)ch < 0xd800 || (0xe000 <= ch && ch <= 0xffff)) {
|
||||
|
@ -124,74 +121,85 @@ u_strFromUTF32(UChar *dest,
|
|||
|
||||
U_CAPI UChar32* U_EXPORT2
|
||||
u_strToUTF32(UChar32 *dest,
|
||||
int32_t destCapacity,
|
||||
int32_t *pDestLength,
|
||||
int32_t destCapacity,
|
||||
int32_t *pDestLength,
|
||||
const UChar *src,
|
||||
int32_t srcLength,
|
||||
UErrorCode *pErrorCode)
|
||||
{
|
||||
const UChar* pSrc = src;
|
||||
const UChar* pSrcLimit;
|
||||
int32_t reqLength=0;
|
||||
uint32_t ch=0;
|
||||
uint32_t *pDest = (uint32_t *)dest;
|
||||
uint32_t *destLimit = pDest + destCapacity;
|
||||
UChar ch2=0;
|
||||
int32_t srcLength,
|
||||
UErrorCode *pErrorCode) {
|
||||
const UChar *srcLimit;
|
||||
UChar32 ch;
|
||||
UChar ch2;
|
||||
UChar32 *destLimit;
|
||||
UChar32 *pDest;
|
||||
int32_t reqLength;
|
||||
|
||||
/* args check */
|
||||
if(U_FAILURE(*pErrorCode)){
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
if((src==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){
|
||||
if((src==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)) {
|
||||
*pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if(srcLength==-1) {
|
||||
while((ch=*pSrc)!=0 && pDest!=destLimit) {
|
||||
++pSrc;
|
||||
/*need not check for NUL because NUL fails UTF_IS_TRAIL() anyway*/
|
||||
if(UTF_IS_LEAD(ch) && UTF_IS_TRAIL(ch2=*pSrc)) {
|
||||
++pSrc;
|
||||
ch=UTF16_GET_PAIR_VALUE(ch, ch2);
|
||||
}
|
||||
*(pDest++)= ch;
|
||||
pDest = dest;
|
||||
destLimit = dest + destCapacity;
|
||||
reqLength = 0;
|
||||
|
||||
if(srcLength < 0) {
|
||||
/* simple loop for conversion of a NUL-terminated BMP string */
|
||||
while((ch=*src) != 0 && !U16_IS_SURROGATE(ch) && pDest < destLimit) {
|
||||
++src;
|
||||
*pDest++ = ch;
|
||||
}
|
||||
while((ch=*pSrc++)!=0) {
|
||||
if(UTF_IS_LEAD(ch) && UTF_IS_TRAIL(ch2=*pSrc)) {
|
||||
++pSrc;
|
||||
}
|
||||
++reqLength;
|
||||
srcLimit = src;
|
||||
if(ch != 0) {
|
||||
/* "complicated" case, find the end of the remaining string */
|
||||
while(*++srcLimit != 0) {}
|
||||
}
|
||||
} else {
|
||||
pSrcLimit = pSrc+srcLength;
|
||||
while(pSrc<pSrcLimit && pDest<destLimit) {
|
||||
ch=*pSrc++;
|
||||
if(UTF_IS_LEAD(ch) && pSrc<pSrcLimit && UTF_IS_TRAIL(ch2=*pSrc)) {
|
||||
++pSrc;
|
||||
ch=UTF16_GET_PAIR_VALUE(ch, ch2);
|
||||
}
|
||||
*(pDest++)= ch;
|
||||
}
|
||||
while(pSrc!=pSrcLimit) {
|
||||
ch=*pSrc++;
|
||||
if(UTF_IS_LEAD(ch) && pSrc<pSrcLimit && UTF_IS_TRAIL(ch2=*pSrc)) {
|
||||
++pSrc;
|
||||
}
|
||||
++reqLength;
|
||||
}
|
||||
srcLimit = src + srcLength;
|
||||
}
|
||||
|
||||
reqLength+=(int32_t)(pDest - (uint32_t *)dest);
|
||||
if(pDestLength){
|
||||
/* convert with length */
|
||||
while(src < srcLimit && pDest < destLimit) {
|
||||
ch = *src++;
|
||||
if(!U16_IS_SURROGATE(ch)) {
|
||||
/* write ch below */
|
||||
} else if(U16_IS_SURROGATE_LEAD(ch) && src < srcLimit && U16_IS_TRAIL(ch2 = *src)) {
|
||||
++src;
|
||||
ch = U16_GET_SUPPLEMENTARY(ch, ch2);
|
||||
} else {
|
||||
/* unpaired surrogate */
|
||||
*pErrorCode = U_INVALID_CHAR_FOUND;
|
||||
return NULL;
|
||||
}
|
||||
*pDest++ = ch;
|
||||
}
|
||||
|
||||
/* preflight the remaining string */
|
||||
while(src < srcLimit) {
|
||||
ch = *src++;
|
||||
if(!U16_IS_SURROGATE(ch)) {
|
||||
/* ++reqLength below */
|
||||
} else if(U16_IS_SURROGATE_LEAD(ch) && src < srcLimit && U16_IS_TRAIL(*src)) {
|
||||
++src;
|
||||
} else {
|
||||
/* unpaired surrogate */
|
||||
*pErrorCode = U_INVALID_CHAR_FOUND;
|
||||
return NULL;
|
||||
}
|
||||
++reqLength;
|
||||
}
|
||||
|
||||
reqLength += (int32_t)(pDest - dest);
|
||||
if(pDestLength) {
|
||||
*pDestLength = reqLength;
|
||||
}
|
||||
|
||||
/* Terminate the buffer */
|
||||
u_terminateUChar32s(dest,destCapacity,reqLength,pErrorCode);
|
||||
|
||||
u_terminateUChar32s(dest, destCapacity, reqLength, pErrorCode);
|
||||
|
||||
return dest;
|
||||
}
|
||||
|
||||
|
|
|
@ -31,6 +31,7 @@
|
|||
void addUCharTransformTest(TestNode** root);
|
||||
|
||||
static void Test_strToUTF32(void);
|
||||
static void Test_strToUTF32_surrogates(void);
|
||||
static void Test_strFromUTF32(void);
|
||||
static void Test_strFromUTF32_surrogates(void);
|
||||
static void Test_UChar_UTF8_API(void);
|
||||
|
@ -44,6 +45,7 @@ void
|
|||
addUCharTransformTest(TestNode** root)
|
||||
{
|
||||
addTest(root, &Test_strToUTF32, "custrtrn/Test_strToUTF32");
|
||||
addTest(root, &Test_strToUTF32_surrogates, "custrtrn/Test_strToUTF32_surrogates");
|
||||
addTest(root, &Test_strFromUTF32, "custrtrn/Test_strFromUTF32");
|
||||
addTest(root, &Test_strFromUTF32_surrogates, "custrtrn/Test_strFromUTF32_surrogates");
|
||||
addTest(root, &Test_UChar_UTF8_API, "custrtrn/Test_UChar_UTF8_API");
|
||||
|
@ -195,6 +197,83 @@ static void Test_strToUTF32(void){
|
|||
}
|
||||
}
|
||||
|
||||
/* test unpaired surrogates */
|
||||
static void Test_strToUTF32_surrogates() {
|
||||
UErrorCode err = U_ZERO_ERROR;
|
||||
UChar32 u32Target[400];
|
||||
int32_t len16, u32DestLen;
|
||||
int i;
|
||||
|
||||
static const UChar surr16[] = { 0x41, 0xd900, 0x61, 0xdc00, 0x5a, 0xd900, 0xdc00, 0x7a, 0 };
|
||||
static const UChar32 expected[] = { 0x5a, 0x50000, 0x7a, 0 };
|
||||
len16 = LENGTHOF(surr16);
|
||||
for(i = 0; i < 4; ++i) {
|
||||
err = U_ZERO_ERROR;
|
||||
u_strToUTF32(u32Target, 0, &u32DestLen, surr16+i, len16-i, &err);
|
||||
if(err != U_INVALID_CHAR_FOUND) {
|
||||
log_err("u_strToUTF32(preflight surr16+%ld) sets %s != U_INVALID_CHAR_FOUND\n",
|
||||
(long)i, u_errorName(err));
|
||||
return;
|
||||
}
|
||||
|
||||
err = U_ZERO_ERROR;
|
||||
u_strToUTF32(u32Target, LENGTHOF(u32Target), &u32DestLen, surr16+i, len16-i, &err);
|
||||
if(err != U_INVALID_CHAR_FOUND) {
|
||||
log_err("u_strToUTF32(surr16+%ld) sets %s != U_INVALID_CHAR_FOUND\n",
|
||||
(long)i, u_errorName(err));
|
||||
return;
|
||||
}
|
||||
|
||||
err = U_ZERO_ERROR;
|
||||
u_strToUTF32(NULL, 0, &u32DestLen, surr16+i, -1, &err);
|
||||
if(err != U_INVALID_CHAR_FOUND) {
|
||||
log_err("u_strToUTF32(preflight surr16+%ld/NUL) sets %s != U_INVALID_CHAR_FOUND\n",
|
||||
(long)i, u_errorName(err));
|
||||
return;
|
||||
}
|
||||
|
||||
err = U_ZERO_ERROR;
|
||||
u_strToUTF32(u32Target, LENGTHOF(u32Target), &u32DestLen, surr16+i, -1, &err);
|
||||
if(err != U_INVALID_CHAR_FOUND) {
|
||||
log_err("u_strToUTF32(surr16+%ld/NUL) sets %s != U_INVALID_CHAR_FOUND\n",
|
||||
(long)i, u_errorName(err));
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
err = U_ZERO_ERROR;
|
||||
u_strToUTF32(u32Target, 0, &u32DestLen, surr16+4, len16-4-1, &err);
|
||||
if(err != U_BUFFER_OVERFLOW_ERROR || u32DestLen != 3) {
|
||||
log_err("u_strToUTF32(preflight surr16+4) sets %s != U_BUFFER_OVERFLOW_ERROR or an unexpected length\n",
|
||||
u_errorName(err));
|
||||
return;
|
||||
}
|
||||
|
||||
err = U_ZERO_ERROR;
|
||||
u_strToUTF32(u32Target, LENGTHOF(u32Target), &u32DestLen, surr16+4, len16-4-1, &err);
|
||||
if(err != U_ZERO_ERROR || u32DestLen != 3 || uprv_memcmp(u32Target, expected, 4*4)) {
|
||||
log_err("u_strToUTF32(surr16+4) sets %s != U_ZERO_ERROR or does not produce the expected string\n",
|
||||
u_errorName(err));
|
||||
return;
|
||||
}
|
||||
|
||||
err = U_ZERO_ERROR;
|
||||
u_strToUTF32(NULL, 0, &u32DestLen, surr16+4, -1, &err);
|
||||
if(err != U_BUFFER_OVERFLOW_ERROR || u32DestLen != 3) {
|
||||
log_err("u_strToUTF32(preflight surr16+4/NUL) sets %s != U_BUFFER_OVERFLOW_ERROR or an unexpected length\n",
|
||||
u_errorName(err));
|
||||
return;
|
||||
}
|
||||
|
||||
err = U_ZERO_ERROR;
|
||||
u_strToUTF32(u32Target, LENGTHOF(u32Target), &u32DestLen, surr16+4, -1, &err);
|
||||
if(err != U_ZERO_ERROR || u32DestLen != 3 || uprv_memcmp(u32Target, expected, 4*4)) {
|
||||
log_err("u_strToUTF32(surr16+4/NUL) sets %s != U_ZERO_ERROR or does not produce the expected string\n",
|
||||
u_errorName(err));
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
static void Test_strFromUTF32(void){
|
||||
UErrorCode err = U_ZERO_ERROR;
|
||||
UChar uTarget[400];
|
||||
|
|
Loading…
Add table
Reference in a new issue