mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-09 07:22:11 +00:00
ICU-1967 UTF-8 set out-of-band isError
X-SVN-Rev: 9559
This commit is contained in:
parent
5ca17eb877
commit
50ffaabba6
3 changed files with 33 additions and 11 deletions
|
@ -58,7 +58,7 @@ utf8_countTrailBytes[256];
|
|||
#define UTF8_MASK_LEAD_BYTE(leadByte, countTrailBytes) ((leadByte)&=(1<<(6-(countTrailBytes)))-1)
|
||||
|
||||
U_CAPI UChar32 U_EXPORT2
|
||||
utf8_nextCharSafeBody(const uint8_t *s, int32_t *pi, int32_t length, UChar32 c, UBool strict);
|
||||
utf8_nextCharSafeBody(const uint8_t *s, int32_t *pi, int32_t length, UChar32 c, UBool strict, UBool *pIsError);
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
utf8_appendCharSafeBody(uint8_t *s, int32_t i, int32_t length, UChar32 c);
|
||||
|
@ -209,7 +209,7 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i);
|
|||
(c)=(s)[(i)++]; \
|
||||
if((c)>=0x80) { \
|
||||
if(UTF8_IS_LEAD(c)) { \
|
||||
(c)=utf8_nextCharSafeBody(s, &(i), (int32_t)(length), c, strict); \
|
||||
(c)=utf8_nextCharSafeBody(s, &(i), (int32_t)(length), c, strict, NULL); \
|
||||
} else { \
|
||||
(c)=UTF8_ERROR_VALUE_1; \
|
||||
} \
|
||||
|
|
|
@ -228,7 +228,8 @@ u_strFromUTF8(UChar *dest,
|
|||
int32_t index = 0;
|
||||
int32_t reqLength = 0;
|
||||
uint8_t* pSrc = (uint8_t*) src;
|
||||
|
||||
UBool isError;
|
||||
|
||||
/* args check */
|
||||
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)){
|
||||
return NULL;
|
||||
|
@ -248,10 +249,13 @@ u_strFromUTF8(UChar *dest,
|
|||
if(ch <=0x7f){
|
||||
*pDest++=(UChar)ch;
|
||||
}else{
|
||||
ch=utf8_nextCharSafeBody(pSrc, &index, srcLength, ch, FALSE);
|
||||
if(ch<=0xFFFF){
|
||||
ch=utf8_nextCharSafeBody(pSrc, &index, srcLength, ch, FALSE, &isError);
|
||||
if(isError){
|
||||
*pErrorCode = U_INVALID_CHAR_FOUND;
|
||||
return NULL;
|
||||
}else if(ch<=0xFFFF){
|
||||
*(pDest++)=(UChar)ch;
|
||||
}else if(ch<=0x10ffff){
|
||||
}else{
|
||||
*(pDest++)=UTF16_LEAD(ch);
|
||||
if(pDest<pDestLimit){
|
||||
*(pDest++)=UTF16_TRAIL(ch);
|
||||
|
@ -259,9 +263,6 @@ u_strFromUTF8(UChar *dest,
|
|||
reqLength++;
|
||||
break;
|
||||
}
|
||||
}else{
|
||||
*pErrorCode = U_INVALID_CHAR_FOUND;
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -271,7 +272,11 @@ u_strFromUTF8(UChar *dest,
|
|||
if(ch <= 0x7f){
|
||||
reqLength++;
|
||||
}else{
|
||||
ch=utf8_nextCharSafeBody(pSrc, &index, srcLength, ch, FALSE);
|
||||
ch=utf8_nextCharSafeBody(pSrc, &index, srcLength, ch, FALSE, &isError);
|
||||
if(isError){
|
||||
*pErrorCode = U_INVALID_CHAR_FOUND;
|
||||
return NULL;
|
||||
}
|
||||
reqLength+=UTF_CHAR_LENGTH(ch);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -83,7 +83,7 @@ utf8_errorValue[6]={
|
|||
};
|
||||
|
||||
U_CAPI UChar32 U_EXPORT2
|
||||
utf8_nextCharSafeBody(const uint8_t *s, int32_t *pi, int32_t length, UChar32 c, UBool strict) {
|
||||
utf8_nextCharSafeBody(const uint8_t *s, int32_t *pi, int32_t length, UChar32 c, UBool strict, UBool *pIsError) {
|
||||
int32_t i=*pi;
|
||||
uint8_t count=UTF8_COUNT_TRAIL_BYTES(c);
|
||||
if((i)+count<=(length)) {
|
||||
|
@ -118,6 +118,9 @@ utf8_nextCharSafeBody(const uint8_t *s, int32_t *pi, int32_t length, UChar32 c,
|
|||
illegal|=(trail&0xc0)^0x80;
|
||||
break;
|
||||
case 0:
|
||||
if(pIsError!=NULL) {
|
||||
*pIsError=TRUE;
|
||||
}
|
||||
return UTF8_ERROR_VALUE_1;
|
||||
/* no default branch to optimize switch() - all values are covered */
|
||||
}
|
||||
|
@ -143,9 +146,20 @@ utf8_nextCharSafeBody(const uint8_t *s, int32_t *pi, int32_t length, UChar32 c,
|
|||
--count;
|
||||
}
|
||||
c=utf8_errorValue[errorCount-count];
|
||||
if(pIsError!=NULL) {
|
||||
*pIsError=TRUE;
|
||||
}
|
||||
} else if((strict) && UTF_IS_UNICODE_NONCHAR(c)) {
|
||||
/* strict: forbid non-characters like U+fffe */
|
||||
c=utf8_errorValue[count];
|
||||
if(pIsError!=NULL) {
|
||||
*pIsError=TRUE;
|
||||
}
|
||||
} else {
|
||||
/* good result */
|
||||
if(pIsError!=NULL) {
|
||||
*pIsError=FALSE;
|
||||
}
|
||||
}
|
||||
} else /* too few bytes left */ {
|
||||
/* error handling */
|
||||
|
@ -155,6 +169,9 @@ utf8_nextCharSafeBody(const uint8_t *s, int32_t *pi, int32_t length, UChar32 c,
|
|||
++(i);
|
||||
}
|
||||
c=utf8_errorValue[i-i0];
|
||||
if(pIsError!=NULL) {
|
||||
*pIsError=TRUE;
|
||||
}
|
||||
}
|
||||
*pi=i;
|
||||
return c;
|
||||
|
|
Loading…
Add table
Reference in a new issue