mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-08 06:53:45 +00:00
ICU-176 utf macros get, prev, back take start parameter
X-SVN-Rev: 1116
This commit is contained in:
parent
c117cd37bb
commit
2b2af0bbc5
6 changed files with 140 additions and 128 deletions
|
@ -149,7 +149,7 @@ typedef int32_t UTextOffset;
|
|||
# define UTF_ARRAY_SIZE(size) UTF16_ARRAY_SIZE(size)
|
||||
|
||||
# define UTF_GET_CHAR_UNSAFE(s, i, c) UTF16_GET_CHAR_UNSAFE(s, i, c)
|
||||
# define UTF_GET_CHAR_SAFE(s, i, length, c, strict) UTF16_GET_CHAR_SAFE(s, i, length, c, strict)
|
||||
# define UTF_GET_CHAR_SAFE(s, start, i, length, c, strict) UTF16_GET_CHAR_SAFE(s, start, i, length, c, strict)
|
||||
|
||||
# define UTF_NEXT_CHAR_UNSAFE(s, i, c) UTF16_NEXT_CHAR_UNSAFE(s, i, c)
|
||||
# define UTF_NEXT_CHAR_SAFE(s, i, length, c, strict) UTF16_NEXT_CHAR_SAFE(s, i, length, c, strict)
|
||||
|
@ -164,19 +164,19 @@ typedef int32_t UTextOffset;
|
|||
# define UTF_FWD_N_SAFE(s, i, length, n) UTF16_FWD_N_SAFE(s, i, length, n)
|
||||
|
||||
# define UTF_SET_CHAR_START_UNSAFE(s, i) UTF16_SET_CHAR_START_UNSAFE(s, i)
|
||||
# define UTF_SET_CHAR_START_SAFE(s, i) UTF16_SET_CHAR_START_SAFE(s, i)
|
||||
# define UTF_SET_CHAR_START_SAFE(s, start, i) UTF16_SET_CHAR_START_SAFE(s, start, i)
|
||||
|
||||
# define UTF_PREV_CHAR_UNSAFE(s, i, c) UTF16_PREV_CHAR_UNSAFE(s, i, c)
|
||||
# define UTF_PREV_CHAR_SAFE(s, i, c, strict) UTF16_PREV_CHAR_SAFE(s, i, c, strict)
|
||||
# define UTF_PREV_CHAR_SAFE(s, start, i, c, strict) UTF16_PREV_CHAR_SAFE(s, start, i, c, strict)
|
||||
|
||||
# define UTF_BACK_1_UNSAFE(s, i) UTF16_BACK_1_UNSAFE(s, i)
|
||||
# define UTF_BACK_1_SAFE(s, i) UTF16_BACK_1_SAFE(s, i)
|
||||
# define UTF_BACK_1_SAFE(s, start, i) UTF16_BACK_1_SAFE(s, start, i)
|
||||
|
||||
# define UTF_BACK_N_UNSAFE(s, i, n) UTF16_BACK_N_UNSAFE(s, i, n)
|
||||
# define UTF_BACK_N_SAFE(s, i, n) UTF16_BACK_N_SAFE(s, i, n)
|
||||
# define UTF_BACK_N_SAFE(s, start, i, n) UTF16_BACK_N_SAFE(s, start, i, n)
|
||||
|
||||
# define UTF_SET_CHAR_LIMIT_UNSAFE(s, i) UTF16_SET_CHAR_LIMIT_UNSAFE(s, i)
|
||||
# define UTF_SET_CHAR_LIMIT_SAFE(s, i, length) UTF16_SET_CHAR_LIMIT_SAFE(s, i, length)
|
||||
# define UTF_SET_CHAR_LIMIT_SAFE(s, start, i, length) UTF16_SET_CHAR_LIMIT_SAFE(s, start, i, length)
|
||||
|
||||
#elif UTF_SIZE==32
|
||||
|
||||
|
@ -192,48 +192,48 @@ typedef int32_t UTextOffset;
|
|||
|
||||
#ifdef UTF_SAFE
|
||||
|
||||
# define UTF_GET_CHAR(s, i, length, c) UTF_GET_CHAR_SAFE(s, i, length, c, FALSE)
|
||||
# define UTF_GET_CHAR(s, start, i, length, c) UTF_GET_CHAR_SAFE(s, start, i, length, c, FALSE)
|
||||
|
||||
# define UTF_NEXT_CHAR(s, i, length, c) UTF_NEXT_CHAR_SAFE(s, i, length, c, FALSE)
|
||||
# define UTF_APPEND_CHAR(s, i, length, c) UTF_APPEND_CHAR_SAFE(s, i, length, c)
|
||||
# define UTF_FWD_1(s, i, length) UTF_FWD_1_SAFE(s, i, length)
|
||||
# define UTF_FWD_N(s, i, length, n) UTF_FWD_N_SAFE(s, i, length, n)
|
||||
# define UTF_SET_CHAR_START(s, i) UTF_SET_CHAR_START_SAFE(s, i)
|
||||
# define UTF_SET_CHAR_START(s, start, i) UTF_SET_CHAR_START_SAFE(s, start, i)
|
||||
|
||||
# define UTF_PREV_CHAR(s, i, c) UTF_PREV_CHAR_SAFE(s, i, c, FALSE)
|
||||
# define UTF_BACK_1(s, i) UTF_BACK_1_SAFE(s, i)
|
||||
# define UTF_BACK_N(s, i, n) UTF_BACK_N_SAFE(s, i, n)
|
||||
# define UTF_SET_CHAR_LIMIT(s, i, length) UTF_SET_CHAR_LIMIT_SAFE(s, i, length)
|
||||
# define UTF_PREV_CHAR(s, start, i, c) UTF_PREV_CHAR_SAFE(s, start, i, c, FALSE)
|
||||
# define UTF_BACK_1(s, start, i) UTF_BACK_1_SAFE(s, start, i)
|
||||
# define UTF_BACK_N(s, start, i, n) UTF_BACK_N_SAFE(s, start, i, n)
|
||||
# define UTF_SET_CHAR_LIMIT(s, start, i, length) UTF_SET_CHAR_LIMIT_SAFE(s, start, i, length)
|
||||
|
||||
#elif defined(UTF_STRICT)
|
||||
|
||||
# define UTF_GET_CHAR(s, i, length, c) UTF_GET_CHAR_SAFE(s, i, length, c, TRUE)
|
||||
# define UTF_GET_CHAR(s, start, i, length, c) UTF_GET_CHAR_SAFE(s, start, i, length, c, TRUE)
|
||||
|
||||
# define UTF_NEXT_CHAR(s, i, length, c) UTF_NEXT_CHAR_SAFE(s, i, length, c, TRUE)
|
||||
# define UTF_APPEND_CHAR(s, i, length, c) UTF_APPEND_CHAR_SAFE(s, i, length, c)
|
||||
# define UTF_FWD_1(s, i, length) UTF_FWD_1_SAFE(s, i, length)
|
||||
# define UTF_FWD_N(s, i, length, n) UTF_FWD_N_SAFE(s, i, length, n)
|
||||
# define UTF_SET_CHAR_START(s, i) UTF_SET_CHAR_START_SAFE(s, i)
|
||||
# define UTF_SET_CHAR_START(s, start, i) UTF_SET_CHAR_START_SAFE(s, start, i)
|
||||
|
||||
# define UTF_PREV_CHAR(s, i, c) UTF_PREV_CHAR_SAFE(s, i, c, TRUE)
|
||||
# define UTF_BACK_1(s, i) UTF_BACK_1_SAFE(s, i)
|
||||
# define UTF_BACK_N(s, i, n) UTF_BACK_N_SAFE(s, i, n)
|
||||
# define UTF_SET_CHAR_LIMIT(s, i, length) UTF_SET_CHAR_LIMIT_SAFE(s, i, length)
|
||||
# define UTF_PREV_CHAR(s, start, i, c) UTF_PREV_CHAR_SAFE(s, start, i, c, TRUE)
|
||||
# define UTF_BACK_1(s, start, i) UTF_BACK_1_SAFE(s, start, i)
|
||||
# define UTF_BACK_N(s, start, i, n) UTF_BACK_N_SAFE(s, start, i, n)
|
||||
# define UTF_SET_CHAR_LIMIT(s, start, i, length) UTF_SET_CHAR_LIMIT_SAFE(s, start, i, length)
|
||||
|
||||
#else /* UTF_UNSAFE */
|
||||
|
||||
# define UTF_GET_CHAR(s, i, length, c) UTF_GET_CHAR_UNSAFE(s, i, c)
|
||||
# define UTF_GET_CHAR(s, start, i, length, c) UTF_GET_CHAR_UNSAFE(s, i, c)
|
||||
|
||||
# define UTF_NEXT_CHAR(s, i, length, c) UTF_NEXT_CHAR_UNSAFE(s, i, c)
|
||||
# define UTF_APPEND_CHAR(s, i, length, c) UTF_APPEND_CHAR_UNSAFE(s, i, c)
|
||||
# define UTF_FWD_1(s, i, length) UTF_FWD_1_UNSAFE(s, i)
|
||||
# define UTF_FWD_N(s, i, length, n) UTF_FWD_N_UNSAFE(s, i, n)
|
||||
# define UTF_SET_CHAR_START(s, i) UTF_SET_CHAR_START_UNSAFE(s, i)
|
||||
# define UTF_SET_CHAR_START(s, start, i) UTF_SET_CHAR_START_UNSAFE(s, i)
|
||||
|
||||
# define UTF_PREV_CHAR(s, i, c) UTF_PREV_CHAR_UNSAFE(s, i, c)
|
||||
# define UTF_BACK_1(s, i) UTF_BACK_1_UNSAFE(s, i)
|
||||
# define UTF_BACK_N(s, i, n) UTF_BACK_N_UNSAFE(s, i, n)
|
||||
# define UTF_SET_CHAR_LIMIT(s, i, length) UTF_SET_CHAR_LIMIT_UNSAFE(s, i)
|
||||
# define UTF_PREV_CHAR(s, start, i, c) UTF_PREV_CHAR_UNSAFE(s, i, c)
|
||||
# define UTF_BACK_1(s, start, i) UTF_BACK_1_UNSAFE(s, i)
|
||||
# define UTF_BACK_N(s, start, i, n) UTF_BACK_N_UNSAFE(s, i, n)
|
||||
# define UTF_SET_CHAR_LIMIT(s, start, i, length) UTF_SET_CHAR_LIMIT_UNSAFE(s, i)
|
||||
|
||||
#endif
|
||||
|
||||
|
|
|
@ -72,7 +72,7 @@
|
|||
} \
|
||||
}
|
||||
|
||||
#define UTF16_GET_CHAR_SAFE(s, i, length, c, strict) { \
|
||||
#define UTF16_GET_CHAR_SAFE(s, start, i, length, c, strict) { \
|
||||
(c)=(s)[i]; \
|
||||
if(UTF_IS_SURROGATE(c)) { \
|
||||
uint16_t __c2; \
|
||||
|
@ -85,7 +85,7 @@
|
|||
(c)=UTF_ERROR_VALUE; \
|
||||
} \
|
||||
} else { \
|
||||
if((i)>0 && UTF_IS_FIRST_SURROGATE(__c2=(s)[(i)-1])) { \
|
||||
if((i)>(start) && UTF_IS_FIRST_SURROGATE(__c2=(s)[(i)-1])) { \
|
||||
(c)=UTF16_GET_PAIR_VALUE(__c2, (c)); \
|
||||
/* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() */ \
|
||||
} else if(strict) {\
|
||||
|
@ -207,8 +207,8 @@
|
|||
} \
|
||||
}
|
||||
|
||||
#define UTF16_SET_CHAR_START_SAFE(s, i) { \
|
||||
if(UTF_IS_SECOND_SURROGATE((s)[i]) && (i)>0 && UTF_IS_FIRST_SURROGATE((s)[(i)-1])) { \
|
||||
#define UTF16_SET_CHAR_START_SAFE(s, start, i) { \
|
||||
if(UTF_IS_SECOND_SURROGATE((s)[i]) && (i)>(start) && UTF_IS_FIRST_SURROGATE((s)[(i)-1])) { \
|
||||
--(i); \
|
||||
} \
|
||||
}
|
||||
|
@ -267,11 +267,11 @@
|
|||
|
||||
/* safe versions with error-checking and optional regularity-checking */
|
||||
|
||||
#define UTF16_PREV_CHAR_SAFE(s, i, c, strict) { \
|
||||
#define UTF16_PREV_CHAR_SAFE(s, start, i, c, strict) { \
|
||||
(c)=(s)[--(i)]; \
|
||||
if(UTF_IS_SECOND_SURROGATE(c)) { \
|
||||
uint16_t __c2; \
|
||||
if((i)>0 && UTF_IS_FIRST_SURROGATE(__c2=(s)[(i)-1])) { \
|
||||
if((i)>(start) && UTF_IS_FIRST_SURROGATE(__c2=(s)[(i)-1])) { \
|
||||
--(i); \
|
||||
(c)=UTF16_GET_PAIR_VALUE(__c2, (c)); \
|
||||
/* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() */ \
|
||||
|
@ -286,22 +286,22 @@
|
|||
} \
|
||||
}
|
||||
|
||||
#define UTF16_BACK_1_SAFE(s, i) { \
|
||||
if(UTF_IS_SECOND_SURROGATE((s)[--(i)]) && (i)>0 && UTF_IS_FIRST_SURROGATE((s)[(i)-1])) { \
|
||||
#define UTF16_BACK_1_SAFE(s, start, i) { \
|
||||
if(UTF_IS_SECOND_SURROGATE((s)[--(i)]) && (i)>(start) && UTF_IS_FIRST_SURROGATE((s)[(i)-1])) { \
|
||||
--(i); \
|
||||
} \
|
||||
}
|
||||
|
||||
#define UTF16_BACK_N_SAFE(s, i, n) { \
|
||||
#define UTF16_BACK_N_SAFE(s, start, i, n) { \
|
||||
UTextOffset __N=(n); \
|
||||
while(__N>0 && (i)>0) { \
|
||||
UTF16_BACK_1_SAFE(s, i); \
|
||||
while(__N>0 && (i)>(start)) { \
|
||||
UTF16_BACK_1_SAFE(s, start, i); \
|
||||
--__N; \
|
||||
} \
|
||||
}
|
||||
|
||||
#define UTF16_SET_CHAR_LIMIT_SAFE(s, i, length) { \
|
||||
if((i)<(length) && UTF_IS_FIRST_SURROGATE((s)[(i)-1]) && UTF_IS_SECOND_SURROGATE((s)[i])) { \
|
||||
#define UTF16_SET_CHAR_LIMIT_SAFE(s, start, i, length) { \
|
||||
if((start)<(i) && (i)<(length) && UTF_IS_FIRST_SURROGATE((s)[(i)-1]) && UTF_IS_SECOND_SURROGATE((s)[i])) { \
|
||||
++(i); \
|
||||
} \
|
||||
}
|
||||
|
|
|
@ -54,7 +54,7 @@
|
|||
(c)=(s)[i]; \
|
||||
}
|
||||
|
||||
#define UTF32_GET_CHAR_SAFE(s, i, length, c, strict) { \
|
||||
#define UTF32_GET_CHAR_SAFE(s, start, i, length, c, strict) { \
|
||||
(c)=(s)[i]; \
|
||||
if(!UTF32_IS_SAFE(c, strict)) { \
|
||||
(c)=UTF_ERROR_VALUE; \
|
||||
|
@ -107,7 +107,7 @@
|
|||
} \
|
||||
}
|
||||
|
||||
#define UTF32_SET_CHAR_START_SAFE(s, i) { \
|
||||
#define UTF32_SET_CHAR_START_SAFE(s, start, i) { \
|
||||
}
|
||||
|
||||
/* definitions with backward iteration -------------------------------------- */
|
||||
|
@ -127,24 +127,21 @@
|
|||
#define UTF32_SET_CHAR_LIMIT_UNSAFE(s, i) { \
|
||||
}
|
||||
|
||||
#define UTF32_PREV_CHAR_SAFE(s, i, c, strict) { \
|
||||
#define UTF32_PREV_CHAR_SAFE(s, start, i, c, strict) { \
|
||||
(c)=(s)[--(i)]; \
|
||||
if(!UTF32_IS_SAFE(c, strict)) { \
|
||||
(c)=UTF_ERROR_VALUE; \
|
||||
} \
|
||||
}
|
||||
|
||||
#define UTF32_BACK_1_SAFE(s, i) { \
|
||||
if((i)>0) { \
|
||||
--(i); \
|
||||
} \
|
||||
#define UTF32_BACK_1_SAFE(s, start, i) { \
|
||||
--(i); \
|
||||
}
|
||||
|
||||
#define UTF32_BACK_N_SAFE(s, i, n) { \
|
||||
if((i)>=(n)) { \
|
||||
(i)-=(n); \
|
||||
} else { \
|
||||
(i)=0; \
|
||||
#define UTF32_BACK_N_SAFE(s, start, i, n) { \
|
||||
(i)-=(n); \
|
||||
if((i)<(start)) { \
|
||||
(i)=(start); \
|
||||
} \
|
||||
}
|
||||
|
||||
|
|
|
@ -45,10 +45,10 @@ U_CAPI UTextOffset U_EXPORT2
|
|||
utf8_appendCharSafeBody(uint8_t *s, UTextOffset i, UTextOffset length, UChar32 c);
|
||||
|
||||
U_CAPI UChar32 U_EXPORT2
|
||||
utf8_prevCharSafeBody(const uint8_t *s, UTextOffset *pi, UChar32 c, bool_t strict);
|
||||
utf8_prevCharSafeBody(const uint8_t *s, UTextOffset start, UTextOffset *pi, UChar32 c, bool_t strict);
|
||||
|
||||
U_CAPI UTextOffset U_EXPORT2
|
||||
utf8_back1SafeBody(const uint8_t *s, UTextOffset i);
|
||||
utf8_back1SafeBody(const uint8_t *s, UTextOffset start, UTextOffset i);
|
||||
|
||||
/*
|
||||
* For the semantics of all of these macros, see utf16.h.
|
||||
|
@ -110,9 +110,9 @@ utf8_back1SafeBody(const uint8_t *s, UTextOffset i);
|
|||
UTF8_NEXT_CHAR_UNSAFE(s, __I, c); \
|
||||
}
|
||||
|
||||
#define UTF8_GET_CHAR_SAFE(s, i, length, c, strict) { \
|
||||
#define UTF8_GET_CHAR_SAFE(s, start, i, length, c, strict) { \
|
||||
UTextOffset __I=(UTextOffset)(i); \
|
||||
UTF8_SET_CHAR_START_SAFE(s, __I); \
|
||||
UTF8_SET_CHAR_START_SAFE(s, start, __I); \
|
||||
UTF8_NEXT_CHAR_SAFE(s, __I, length, c, strict); \
|
||||
}
|
||||
|
||||
|
@ -131,7 +131,7 @@ utf8_back1SafeBody(const uint8_t *s, UTextOffset i);
|
|||
*/
|
||||
#define UTF8_NEXT_CHAR_UNSAFE(s, i, c) { \
|
||||
(c)=(s)[(i)++]; \
|
||||
if((c)&0x80) { \
|
||||
if((uint8_t)((c)-0xc0)<0x35) { \
|
||||
uint8_t __count=UTF8_COUNT_TRAIL_BYTES(c); \
|
||||
UTF8_MASK_LEAD_BYTE(c, __count); \
|
||||
switch(__count) { \
|
||||
|
@ -185,7 +185,7 @@ utf8_back1SafeBody(const uint8_t *s, UTextOffset i);
|
|||
|
||||
#define UTF8_NEXT_CHAR_SAFE(s, i, length, c, strict) { \
|
||||
(c)=(s)[(i)++]; \
|
||||
if((c)&0x80) { \
|
||||
if(UTF8_IS_LEAD(c)) { \
|
||||
(c)=utf8_nextCharSafeBody(s, &(i), (UTextOffset)(length), c, strict); \
|
||||
} \
|
||||
}
|
||||
|
@ -199,17 +199,15 @@ utf8_back1SafeBody(const uint8_t *s, UTextOffset i);
|
|||
}
|
||||
|
||||
#define UTF8_FWD_1_SAFE(s, i, length) { \
|
||||
if((i)<(length)) { \
|
||||
uint8_t __b=(s)[(i)++]; \
|
||||
if(__b&0x80) { \
|
||||
uint8_t __count=UTF8_COUNT_TRAIL_BYTES(__b); \
|
||||
if((i)+__count>(length)) { \
|
||||
__count=(length)-(i); \
|
||||
} \
|
||||
while(__count>0 && UTF8_IS_TRAIL((s)[i])) { \
|
||||
++(i); \
|
||||
--__count; \
|
||||
} \
|
||||
uint8_t __b=(s)[(i)++]; \
|
||||
if(UTF8_IS_LEAD(__b)) { \
|
||||
uint8_t __count=UTF8_COUNT_TRAIL_BYTES(__b); \
|
||||
if((i)+__count>(length)) { \
|
||||
__count=(length)-(i); \
|
||||
} \
|
||||
while(__count>0 && UTF8_IS_TRAIL((s)[i])) { \
|
||||
++(i); \
|
||||
--__count; \
|
||||
} \
|
||||
} \
|
||||
}
|
||||
|
@ -222,9 +220,9 @@ utf8_back1SafeBody(const uint8_t *s, UTextOffset i);
|
|||
} \
|
||||
}
|
||||
|
||||
#define UTF8_SET_CHAR_START_SAFE(s, i) { \
|
||||
if((s)[(i)]&0x80) { \
|
||||
(i)=utf8_back1SafeBody(s, (UTextOffset)(i)); \
|
||||
#define UTF8_SET_CHAR_START_SAFE(s, start, i) { \
|
||||
if(UTF8_IS_TRAIL((s)[(i)])) { \
|
||||
(i)=utf8_back1SafeBody(s, start, (UTextOffset)(i)); \
|
||||
} \
|
||||
}
|
||||
|
||||
|
@ -232,10 +230,10 @@ utf8_back1SafeBody(const uint8_t *s, UTextOffset i);
|
|||
|
||||
#define UTF8_PREV_CHAR_UNSAFE(s, i, c) { \
|
||||
(c)=(s)[--(i)]; \
|
||||
if((c)&0x80) { \
|
||||
if(UTF8_IS_TRAIL(c)) { \
|
||||
uint8_t __b, __count=1, __shift=6; \
|
||||
\
|
||||
/* c must be a trail byte */ \
|
||||
/* c is a trail byte */ \
|
||||
(c)&=0x3f; \
|
||||
for(;;) { \
|
||||
__b=(s)[--(i)]; \
|
||||
|
@ -269,33 +267,35 @@ utf8_back1SafeBody(const uint8_t *s, UTextOffset i);
|
|||
UTF8_FWD_1_UNSAFE(s, i); \
|
||||
}
|
||||
|
||||
#define UTF8_PREV_CHAR_SAFE(s, i, c, strict) { \
|
||||
#define UTF8_PREV_CHAR_SAFE(s, start, i, c, strict) { \
|
||||
(c)=(s)[--(i)]; \
|
||||
if((c)&0x80) { \
|
||||
(c)=utf8_prevCharSafeBody(s, &(i), c, strict); \
|
||||
if(UTF8_IS_TRAIL((c))) { \
|
||||
(c)=utf8_prevCharSafeBody(s, start, &(i), c, strict); \
|
||||
} \
|
||||
}
|
||||
|
||||
#define UTF8_BACK_1_SAFE(s, i) { \
|
||||
if((i)>0) { \
|
||||
--(i); \
|
||||
if((s)[(i)]&0x80) { \
|
||||
(i)=utf8_back1SafeBody(s, (UTextOffset)(i)); \
|
||||
} \
|
||||
#define UTF8_BACK_1_SAFE(s, start, i) { \
|
||||
if(UTF8_IS_TRAIL((s)[--(i)])) { \
|
||||
(i)=utf8_back1SafeBody(s, start, (UTextOffset)(i)); \
|
||||
} \
|
||||
}
|
||||
|
||||
#define UTF8_BACK_N_SAFE(s, i, n) { \
|
||||
#define UTF8_BACK_N_SAFE(s, start, i, n) { \
|
||||
UTextOffset __N=(n); \
|
||||
while(__N>0 && (i)>0) { \
|
||||
UTF8_BACK_1_SAFE(s, i); \
|
||||
while(__N>0 && (i)>(start)) { \
|
||||
UTF8_BACK_1_SAFE(s, start, i); \
|
||||
--__N; \
|
||||
} \
|
||||
}
|
||||
|
||||
#define UTF8_SET_CHAR_LIMIT_SAFE(s, i, length) { \
|
||||
UTF8_BACK_1_SAFE(s, i); \
|
||||
UTF8_FWD_1_SAFE(s, i, length); \
|
||||
#define UTF8_SET_CHAR_LIMIT_SAFE(s, start, i, length) { \
|
||||
if((start)<(i) && (i)<(length)) { \
|
||||
UTF8_BACK_1_SAFE(s, start, i); \
|
||||
(i)+=1+UTF8_COUNT_TRAIL_BYTES((s)[i]); \
|
||||
if((i)>(length)) { \
|
||||
(i)=(length); \
|
||||
} \
|
||||
} \
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
@ -1085,7 +1085,7 @@ UnicodeString::trim()
|
|||
if(i <= 0) {
|
||||
break;
|
||||
}
|
||||
UTF_PREV_CHAR(fArray, i, c);
|
||||
UTF_PREV_CHAR(fArray, 0, i, c);
|
||||
if(!(c == 0x20 || Unicode::isWhitespace(c))) {
|
||||
break;
|
||||
}
|
||||
|
|
|
@ -193,69 +193,84 @@ utf8_appendCharSafeBody(uint8_t *s, UTextOffset i, UTextOffset length, UChar32 c
|
|||
}
|
||||
|
||||
U_CAPI UChar32 U_EXPORT2
|
||||
utf8_prevCharSafeBody(const uint8_t *s, UTextOffset *pi, UChar32 c, bool_t strict) {
|
||||
utf8_prevCharSafeBody(const uint8_t *s, UTextOffset start, UTextOffset *pi, UChar32 c, bool_t strict) {
|
||||
UTextOffset i=*pi;
|
||||
if(UTF8_IS_TRAIL(c)) {
|
||||
uint8_t b, count=1, shift=6;
|
||||
uint8_t b, count=1, shift=6;
|
||||
|
||||
c&=0x3f;
|
||||
while(i>0 && count<6) {
|
||||
b=s[--i];
|
||||
if((uint8_t)(b-0x80)<0x7e) { /* 0x80<=b<0xfe */
|
||||
if(b&0x40) {
|
||||
/* lead byte */
|
||||
uint8_t shouldCount=UTF8_COUNT_TRAIL_BYTES(b);
|
||||
/* extract value bits from the last trail byte */
|
||||
c&=0x3f;
|
||||
|
||||
if(count==shouldCount) {
|
||||
*pi=i;
|
||||
UTF8_MASK_LEAD_BYTE(b, count);
|
||||
c|=(UChar32)b<<shift;
|
||||
if( c>0x10ffff ||
|
||||
(strict) &&
|
||||
(UTF_IS_SURROGATE(c) ||
|
||||
count>=4 || (c)<utf8_minRegular[count] || ((c)&0xfffe)==0xfffe)
|
||||
) {
|
||||
/* irregular sequence */
|
||||
} else {
|
||||
return c;
|
||||
}
|
||||
for(;;) {
|
||||
if(i<=start) {
|
||||
/* no lead byte at all */
|
||||
c=UTF8_ERROR_VALUE_1;
|
||||
break;
|
||||
}
|
||||
|
||||
/* read another previous byte */
|
||||
b=s[--i];
|
||||
if((uint8_t)(b-0x80)<0x7e) { /* 0x80<=b<0xfe */
|
||||
if(b&0x40) {
|
||||
/* lead byte, this will always end the loop */
|
||||
uint8_t shouldCount=UTF8_COUNT_TRAIL_BYTES(b);
|
||||
|
||||
if(count==shouldCount) {
|
||||
/* set the new position */
|
||||
*pi=i;
|
||||
UTF8_MASK_LEAD_BYTE(b, count);
|
||||
c|=(UChar32)b<<shift;
|
||||
if( c>0x10ffff ||
|
||||
(strict) &&
|
||||
(UTF_IS_SURROGATE(c) ||
|
||||
count>=4 || (c)<utf8_minRegular[count] || ((c)&0xfffe)==0xfffe)
|
||||
) {
|
||||
/* irregular sequence */
|
||||
c=utf8_errorValue[count];
|
||||
} else {
|
||||
/* the lead byte does not match the number of trail bytes */
|
||||
/* only set the position to the lead byte if it would
|
||||
include the trail byte that we started with */
|
||||
if(count<shouldCount) {
|
||||
*pi=i;
|
||||
}
|
||||
/* exit with correct c */
|
||||
}
|
||||
break;
|
||||
} else {
|
||||
/* trail byte */
|
||||
c|=(UChar32)(b&0x3f)<<shift;
|
||||
++count;
|
||||
shift+=6;
|
||||
/* the lead byte does not match the number of trail bytes */
|
||||
/* only set the position to the lead byte if it would
|
||||
include the trail byte that we started with */
|
||||
if(count<shouldCount) {
|
||||
*pi=i;
|
||||
c=utf8_errorValue[count];
|
||||
} else {
|
||||
c=UTF8_ERROR_VALUE_1;
|
||||
}
|
||||
}
|
||||
break;
|
||||
} else if(count<5) {
|
||||
/* trail byte */
|
||||
c|=(UChar32)(b&0x3f)<<shift;
|
||||
++count;
|
||||
shift+=6;
|
||||
} else {
|
||||
/* single-byte character precedes trailing bytes */
|
||||
/* more than 5 trail bytes is illegal */
|
||||
c=UTF8_ERROR_VALUE_1;
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
/* single-byte character precedes trailing bytes */
|
||||
c=UTF8_ERROR_VALUE_1;
|
||||
break;
|
||||
}
|
||||
/* i==0 or count==6 - no lead byte in legal distance */
|
||||
/* } else { called with single lead byte */
|
||||
}
|
||||
return UTF_ERROR_VALUE;
|
||||
return c;
|
||||
}
|
||||
|
||||
U_CAPI UTextOffset U_EXPORT2
|
||||
utf8_back1SafeBody(const uint8_t *s, UTextOffset i) {
|
||||
utf8_back1SafeBody(const uint8_t *s, UTextOffset start, UTextOffset i) {
|
||||
/* i had been decremented once before the function call */
|
||||
UTextOffset I=i, Z;
|
||||
uint8_t b;
|
||||
|
||||
/* read at most the 6 bytes s[Z] to s[i], inclusively */
|
||||
if(I>5) {
|
||||
if(I-5>start) {
|
||||
Z=I-5;
|
||||
} else {
|
||||
Z=0;
|
||||
Z=start;
|
||||
}
|
||||
|
||||
/* return I if the sequence starting there is long enough to include i */
|
||||
|
|
Loading…
Add table
Reference in a new issue