ICU-13467 U8_NEXT() handles all in macro, no function call

X-SVN-Rev: 40698
This commit is contained in:
Markus Scherer 2017-12-05 19:19:34 +00:00
parent 9a542da329
commit 56fd8a74c5

View file

@ -348,29 +348,7 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i);
* @see U8_NEXT_UNSAFE
* @stable ICU 2.4
*/
#define U8_NEXT(s, i, length, c) { \
(c)=(uint8_t)(s)[(i)++]; \
if(!U8_IS_SINGLE(c)) { \
uint8_t __t1, __t2; \
if( /* handle U+0800..U+FFFF inline */ \
(0xe0<=(c) && (c)<0xf0) && \
(((i)+1)<(length) || (length)<0) && \
U8_IS_VALID_LEAD3_AND_T1((c), __t1=(s)[i]) && \
(__t2=(s)[(i)+1]-0x80)<=0x3f) { \
(c)=(((c)&0xf)<<12)|((__t1&0x3f)<<6)|__t2; \
(i)+=2; \
} else if( /* handle U+0080..U+07FF inline */ \
((c)<0xe0 && (c)>=0xc2) && \
((i)!=(length)) && \
(__t1=(s)[i]-0x80)<=0x3f) { \
(c)=(((c)&0x1f)<<6)|__t1; \
++(i); \
} else { \
/* function call for "complicated" and error cases */ \
(c)=utf8_nextCharSafeBody((const uint8_t *)s, &(i), (length), c, -1); \
} \
} \
}
#define U8_NEXT(s, i, length, c) U8_INTERNAL_NEXT_OR_SUB(s, i, length, c, U_SENTINEL)
/**
* Get a code point from a string at a code point boundary offset,
@ -396,26 +374,33 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i);
* @see U8_NEXT
* @stable ICU 51
*/
#define U8_NEXT_OR_FFFD(s, i, length, c) { \
#define U8_NEXT_OR_FFFD(s, i, length, c) U8_INTERNAL_NEXT_OR_SUB(s, i, length, c, 0xfffd)
/** @internal */
#define U8_INTERNAL_NEXT_OR_SUB(s, i, length, c, sub) { \
(c)=(uint8_t)(s)[(i)++]; \
if(!U8_IS_SINGLE(c)) { \
uint8_t __t1, __t2; \
if( /* handle U+0800..U+FFFF inline */ \
(0xe0<=(c) && (c)<0xf0) && \
(((i)+1)<(length) || (length)<0) && \
U8_IS_VALID_LEAD3_AND_T1((c), __t1=(s)[i]) && \
(__t2=(s)[(i)+1]-0x80)<=0x3f) { \
(c)=(((c)&0xf)<<12)|((__t1&0x3f)<<6)|__t2; \
(i)+=2; \
} else if( /* handle U+0080..U+07FF inline */ \
((c)<0xe0 && (c)>=0xc2) && \
((i)!=(length)) && \
(__t1=(s)[i]-0x80)<=0x3f) { \
(c)=(((c)&0x1f)<<6)|__t1; \
++(i); \
uint8_t __t; \
if((i)!=(length) && \
/* fetch/validate/assemble all but last trail byte */ \
((c)>=0xe0 ? \
((c)<0xf0 ? /* U+0800..U+FFFF except surrogates */ \
U8_LEAD3_T1_BITS[(c)&=0xf]&(1<<((__t=(s)[i])>>5)) && \
(__t&=0x3f, 1) \
: /* U+10000..U+10FFFF */ \
((c)-=0xf0)<=4 && \
U8_LEAD4_T1_BITS[(__t=(s)[i])>>4]&(1<<(c)) && \
((c)=((c)<<6)|(__t&0x3f), ++(i)!=(length)) && \
(__t=(s)[i]-0x80)<=0x3f) && \
/* valid second-to-last trail byte */ \
((c)=((c)<<6)|__t, ++(i)!=(length)) \
: /* U+0080..U+07FF */ \
(c)>=0xc2 && ((c)&=0x1f, 1)) && \
/* last trail byte */ \
(__t=(s)[i]-0x80)<=0x3f && \
((c)=((c)<<6)|__t, ++(i), 1)) { \
} else { \
/* function call for "complicated" and error cases */ \
(c)=utf8_nextCharSafeBody((const uint8_t *)s, &(i), (length), c, -3); \
(c)=(sub); /* ill-formed*/ \
} \
} \
}
@ -434,21 +419,22 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i);
* @stable ICU 2.4
*/
#define U8_APPEND_UNSAFE(s, i, c) { \
if((uint32_t)(c)<=0x7f) { \
(s)[(i)++]=(uint8_t)(c); \
uint32_t __uc=(c); \
if(__uc<=0x7f) { \
(s)[(i)++]=(uint8_t)__uc; \
} else { \
if((uint32_t)(c)<=0x7ff) { \
(s)[(i)++]=(uint8_t)(((c)>>6)|0xc0); \
if(__uc<=0x7ff) { \
(s)[(i)++]=(uint8_t)((__uc>>6)|0xc0); \
} else { \
if((uint32_t)(c)<=0xffff) { \
(s)[(i)++]=(uint8_t)(((c)>>12)|0xe0); \
if(__uc<=0xffff) { \
(s)[(i)++]=(uint8_t)((__uc>>12)|0xe0); \
} else { \
(s)[(i)++]=(uint8_t)(((c)>>18)|0xf0); \
(s)[(i)++]=(uint8_t)((((c)>>12)&0x3f)|0x80); \
(s)[(i)++]=(uint8_t)((__uc>>18)|0xf0); \
(s)[(i)++]=(uint8_t)(((__uc>>12)&0x3f)|0x80); \
} \
(s)[(i)++]=(uint8_t)((((c)>>6)&0x3f)|0x80); \
(s)[(i)++]=(uint8_t)(((__uc>>6)&0x3f)|0x80); \
} \
(s)[(i)++]=(uint8_t)(((c)&0x3f)|0x80); \
(s)[(i)++]=(uint8_t)((__uc&0x3f)|0x80); \
} \
}
@ -470,17 +456,23 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i);
* @stable ICU 2.4
*/
#define U8_APPEND(s, i, capacity, c, isError) { \
if((uint32_t)(c)<=0x7f) { \
(s)[(i)++]=(uint8_t)(c); \
} else if((uint32_t)(c)<=0x7ff && (i)+1<(capacity)) { \
(s)[(i)++]=(uint8_t)(((c)>>6)|0xc0); \
(s)[(i)++]=(uint8_t)(((c)&0x3f)|0x80); \
} else if((uint32_t)(c)<=0xd7ff && (i)+2<(capacity)) { \
(s)[(i)++]=(uint8_t)(((c)>>12)|0xe0); \
(s)[(i)++]=(uint8_t)((((c)>>6)&0x3f)|0x80); \
(s)[(i)++]=(uint8_t)(((c)&0x3f)|0x80); \
uint32_t __uc=(c); \
if(__uc<=0x7f) { \
(s)[(i)++]=(uint8_t)__uc; \
} else if(__uc<=0x7ff && (i)+1<(capacity)) { \
(s)[(i)++]=(uint8_t)((__uc>>6)|0xc0); \
(s)[(i)++]=(uint8_t)((__uc&0x3f)|0x80); \
} else if((__uc<=0xd7ff || (0xe000<=__uc && __uc<=0xffff)) && (i)+2<(capacity)) { \
(s)[(i)++]=(uint8_t)((__uc>>12)|0xe0); \
(s)[(i)++]=(uint8_t)(((__uc>>6)&0x3f)|0x80); \
(s)[(i)++]=(uint8_t)((__uc&0x3f)|0x80); \
} else if(0xffff<__uc && __uc<=0x10ffff && (i)+3<(capacity)) { \
(s)[(i)++]=(uint8_t)((__uc>>18)|0xf0); \
(s)[(i)++]=(uint8_t)(((__uc>>12)&0x3f)|0x80); \
(s)[(i)++]=(uint8_t)(((__uc>>6)&0x3f)|0x80); \
(s)[(i)++]=(uint8_t)((__uc&0x3f)|0x80); \
} else { \
(i)=utf8_appendCharSafeBody(s, (i), (capacity), c, &(isError)); \
(isError)=TRUE; \
} \
}