ICU-1967 UTF-8 set out-of-band isError

X-SVN-Rev: 9559
This commit is contained in:
Markus Scherer 2002-08-03 00:54:39 +00:00
parent 5ca17eb877
commit 50ffaabba6
3 changed files with 33 additions and 11 deletions

View file

@ -58,7 +58,7 @@ utf8_countTrailBytes[256];
#define UTF8_MASK_LEAD_BYTE(leadByte, countTrailBytes) ((leadByte)&=(1<<(6-(countTrailBytes)))-1)
U_CAPI UChar32 U_EXPORT2
utf8_nextCharSafeBody(const uint8_t *s, int32_t *pi, int32_t length, UChar32 c, UBool strict);
utf8_nextCharSafeBody(const uint8_t *s, int32_t *pi, int32_t length, UChar32 c, UBool strict, UBool *pIsError);
U_CAPI int32_t U_EXPORT2
utf8_appendCharSafeBody(uint8_t *s, int32_t i, int32_t length, UChar32 c);
@ -209,7 +209,7 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i);
(c)=(s)[(i)++]; \
if((c)>=0x80) { \
if(UTF8_IS_LEAD(c)) { \
(c)=utf8_nextCharSafeBody(s, &(i), (int32_t)(length), c, strict); \
(c)=utf8_nextCharSafeBody(s, &(i), (int32_t)(length), c, strict, NULL); \
} else { \
(c)=UTF8_ERROR_VALUE_1; \
} \

View file

@ -228,7 +228,8 @@ u_strFromUTF8(UChar *dest,
int32_t index = 0;
int32_t reqLength = 0;
uint8_t* pSrc = (uint8_t*) src;
UBool isError;
/* args check */
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)){
return NULL;
@ -248,10 +249,13 @@ u_strFromUTF8(UChar *dest,
if(ch <=0x7f){
*pDest++=(UChar)ch;
}else{
ch=utf8_nextCharSafeBody(pSrc, &index, srcLength, ch, FALSE);
if(ch<=0xFFFF){
ch=utf8_nextCharSafeBody(pSrc, &index, srcLength, ch, FALSE, &isError);
if(isError){
*pErrorCode = U_INVALID_CHAR_FOUND;
return NULL;
}else if(ch<=0xFFFF){
*(pDest++)=(UChar)ch;
}else if(ch<=0x10ffff){
}else{
*(pDest++)=UTF16_LEAD(ch);
if(pDest<pDestLimit){
*(pDest++)=UTF16_TRAIL(ch);
@ -259,9 +263,6 @@ u_strFromUTF8(UChar *dest,
reqLength++;
break;
}
}else{
*pErrorCode = U_INVALID_CHAR_FOUND;
return NULL;
}
}
}
@ -271,7 +272,11 @@ u_strFromUTF8(UChar *dest,
if(ch <= 0x7f){
reqLength++;
}else{
ch=utf8_nextCharSafeBody(pSrc, &index, srcLength, ch, FALSE);
ch=utf8_nextCharSafeBody(pSrc, &index, srcLength, ch, FALSE, &isError);
if(isError){
*pErrorCode = U_INVALID_CHAR_FOUND;
return NULL;
}
reqLength+=UTF_CHAR_LENGTH(ch);
}
}

View file

@ -83,7 +83,7 @@ utf8_errorValue[6]={
};
U_CAPI UChar32 U_EXPORT2
utf8_nextCharSafeBody(const uint8_t *s, int32_t *pi, int32_t length, UChar32 c, UBool strict) {
utf8_nextCharSafeBody(const uint8_t *s, int32_t *pi, int32_t length, UChar32 c, UBool strict, UBool *pIsError) {
int32_t i=*pi;
uint8_t count=UTF8_COUNT_TRAIL_BYTES(c);
if((i)+count<=(length)) {
@ -118,6 +118,9 @@ utf8_nextCharSafeBody(const uint8_t *s, int32_t *pi, int32_t length, UChar32 c,
illegal|=(trail&0xc0)^0x80;
break;
case 0:
if(pIsError!=NULL) {
*pIsError=TRUE;
}
return UTF8_ERROR_VALUE_1;
/* no default branch to optimize switch() - all values are covered */
}
@ -143,9 +146,20 @@ utf8_nextCharSafeBody(const uint8_t *s, int32_t *pi, int32_t length, UChar32 c,
--count;
}
c=utf8_errorValue[errorCount-count];
if(pIsError!=NULL) {
*pIsError=TRUE;
}
} else if((strict) && UTF_IS_UNICODE_NONCHAR(c)) {
/* strict: forbid non-characters like U+fffe */
c=utf8_errorValue[count];
if(pIsError!=NULL) {
*pIsError=TRUE;
}
} else {
/* good result */
if(pIsError!=NULL) {
*pIsError=FALSE;
}
}
} else /* too few bytes left */ {
/* error handling */
@ -155,6 +169,9 @@ utf8_nextCharSafeBody(const uint8_t *s, int32_t *pi, int32_t length, UChar32 c,
++(i);
}
c=utf8_errorValue[i-i0];
if(pIsError!=NULL) {
*pIsError=TRUE;
}
}
*pi=i;
return c;