mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-07 22:44:49 +00:00
ICU-4558 fix and clarify utfxx.h API docs for input string indexes for U16_ and U8_ macros
X-SVN-Rev: 19988
This commit is contained in:
parent
d03be86b5f
commit
5d32123d29
3 changed files with 35 additions and 25 deletions
|
@ -63,6 +63,14 @@
|
|||
* malformed sequences can be expressed unambiguously with a distinct subrange
|
||||
* of Unicode code points.)
|
||||
*
|
||||
* The regular "safe" macros require that the initial, passed-in string index
|
||||
* is within bounds. They only check the index when they read more than one
|
||||
* code unit. This is usually done with code similar to the following loop:
|
||||
* <pre>while(i<length) {
|
||||
* U16_NEXT(s, i, length, c);
|
||||
* // use c
|
||||
* }</pre>
|
||||
*
|
||||
* When it is safe to assume that text is well-formed UTF-16
|
||||
* (does not contain single, unpaired surrogates), then one can use
|
||||
* U16_..._UNSAFE macros.
|
||||
|
@ -80,6 +88,8 @@
|
|||
* The unsafe UTF-8 macros are entirely implemented inside the macro definitions
|
||||
* and are fast, while the safe UTF-8 macros call functions for all but the
|
||||
* trivial (ASCII) cases.
|
||||
* (ICU 3.6 optimizes U8_NEXT() and U8_APPEND() to handle most other common
|
||||
* characters inline as well.)
|
||||
*
|
||||
* Unlike with UTF-16, malformed sequences cannot be expressed with distinct
|
||||
* code point values (0..U+10ffff). They are indicated with negative values instead.
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 1999-2005, International Business Machines
|
||||
* Copyright (C) 1999-2006, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
|
@ -179,7 +179,7 @@
|
|||
*
|
||||
* @param s const UChar * string
|
||||
* @param start starting string offset (usually 0)
|
||||
* @param i string offset, start<=i<length
|
||||
* @param i string offset, must be start<=i<length
|
||||
* @param length string length
|
||||
* @param c output UChar32 variable
|
||||
* @see U16_GET_UNSAFE
|
||||
|
@ -243,7 +243,7 @@
|
|||
* will be returned as the code point.
|
||||
*
|
||||
* @param s const UChar * string
|
||||
* @param i string offset, i<length
|
||||
* @param i string offset, must be i<length
|
||||
* @param length string length
|
||||
* @param c output UChar32 variable
|
||||
* @see U16_NEXT_UNSAFE
|
||||
|
@ -292,7 +292,7 @@
|
|||
* then isError is set to TRUE.
|
||||
*
|
||||
* @param s const UChar * string buffer
|
||||
* @param i string offset, i<length
|
||||
* @param i string offset, must be i<capacity
|
||||
* @param capacity size of the string buffer
|
||||
* @param c code point to append
|
||||
* @param isError output UBool set to TRUE if an error occurs, otherwise not modified
|
||||
|
@ -332,7 +332,7 @@
|
|||
* "Safe" macro, handles unpaired surrogates and checks for string boundaries.
|
||||
*
|
||||
* @param s const UChar * string
|
||||
* @param i string offset, i<length
|
||||
* @param i string offset, must be i<length
|
||||
* @param length string length
|
||||
* @see U16_FWD_1_UNSAFE
|
||||
* @stable ICU 2.4
|
||||
|
@ -370,7 +370,7 @@
|
|||
* "Safe" macro, handles unpaired surrogates and checks for string boundaries.
|
||||
*
|
||||
* @param s const UChar * string
|
||||
* @param i string offset, i<length
|
||||
* @param i string offset, must be i<length
|
||||
* @param length string length
|
||||
* @param n number of code points to skip
|
||||
* @see U16_FWD_N_UNSAFE
|
||||
|
@ -413,7 +413,7 @@
|
|||
*
|
||||
* @param s const UChar * string
|
||||
* @param start starting string offset (usually 0)
|
||||
* @param i string offset, start<=i
|
||||
* @param i string offset, must be start<=i
|
||||
* @see U16_SET_CP_START_UNSAFE
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
|
@ -468,7 +468,7 @@
|
|||
*
|
||||
* @param s const UChar * string
|
||||
* @param start starting string offset (usually 0)
|
||||
* @param i string offset, start<=i
|
||||
* @param i string offset, must be start<i
|
||||
* @param c output UChar32 variable
|
||||
* @see U16_PREV_UNSAFE
|
||||
* @stable ICU 2.4
|
||||
|
@ -509,7 +509,7 @@
|
|||
*
|
||||
* @param s const UChar * string
|
||||
* @param start starting string offset (usually 0)
|
||||
* @param i string offset, start<=i
|
||||
* @param i string offset, must be start<i
|
||||
* @see U16_BACK_1_UNSAFE
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
|
@ -549,7 +549,7 @@
|
|||
*
|
||||
* @param s const UChar * string
|
||||
* @param start start of string
|
||||
* @param i string offset, i<length
|
||||
* @param i string offset, must be start<i
|
||||
* @param n number of code points to skip
|
||||
* @see U16_BACK_N_UNSAFE
|
||||
* @stable ICU 2.4
|
||||
|
|
|
@ -181,7 +181,7 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i);
|
|||
*
|
||||
* @param s const uint8_t * string
|
||||
* @param start starting string offset
|
||||
* @param i string offset, start<=i<length
|
||||
* @param i string offset, must be start<=i<length
|
||||
* @param length string length
|
||||
* @param c output UChar32 variable, set to <0 in case of an error
|
||||
* @see U8_GET_UNSAFE
|
||||
|
@ -243,7 +243,7 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i);
|
|||
* c is set to a negative value.
|
||||
*
|
||||
* @param s const uint8_t * string
|
||||
* @param i string offset, i<length
|
||||
* @param i string offset, must be i<length
|
||||
* @param length string length
|
||||
* @param c output UChar32 variable, set to <0 in case of an error
|
||||
* @see U8_NEXT_UNSAFE
|
||||
|
@ -320,25 +320,25 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i);
|
|||
* then isError is set to TRUE.
|
||||
*
|
||||
* @param s const uint8_t * string buffer
|
||||
* @param i string offset, i<length
|
||||
* @param length size of the string buffer
|
||||
* @param i string offset, must be i<capacity
|
||||
* @param capacity size of the string buffer
|
||||
* @param c code point to append
|
||||
* @param isError output UBool set to TRUE if an error occurs, otherwise not modified
|
||||
* @see U8_APPEND_UNSAFE
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U8_APPEND(s, i, length, c, isError) { \
|
||||
#define U8_APPEND(s, i, capacity, c, isError) { \
|
||||
if((uint32_t)(c)<=0x7f) { \
|
||||
(s)[(i)++]=(uint8_t)(c); \
|
||||
} else if((uint32_t)(c)<=0x7ff && (i)+1<(length)) { \
|
||||
} else if((uint32_t)(c)<=0x7ff && (i)+1<(capacity)) { \
|
||||
(s)[(i)++]=(uint8_t)(((c)>>6)|0xc0); \
|
||||
(s)[(i)++]=(uint8_t)(((c)&0x3f)|0x80); \
|
||||
} else if((uint32_t)(c)<=0xd7ff && (i)+2<(length)) { \
|
||||
} else if((uint32_t)(c)<=0xd7ff && (i)+2<(capacity)) { \
|
||||
(s)[(i)++]=(uint8_t)(((c)>>12)|0xe0); \
|
||||
(s)[(i)++]=(uint8_t)((((c)>>6)&0x3f)|0x80); \
|
||||
(s)[(i)++]=(uint8_t)(((c)&0x3f)|0x80); \
|
||||
} else { \
|
||||
(i)=utf8_appendCharSafeBody(s, (int32_t)(i), (int32_t)(length), c, &(isError)); \
|
||||
(i)=utf8_appendCharSafeBody(s, (int32_t)(i), (int32_t)(capacity), c, &(isError)); \
|
||||
} \
|
||||
}
|
||||
|
||||
|
@ -362,7 +362,7 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i);
|
|||
* "Safe" macro, checks for illegal sequences and for string boundaries.
|
||||
*
|
||||
* @param s const uint8_t * string
|
||||
* @param i string offset, i<length
|
||||
* @param i string offset, must be i<length
|
||||
* @param length string length
|
||||
* @see U8_FWD_1_UNSAFE
|
||||
* @stable ICU 2.4
|
||||
|
@ -408,7 +408,7 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i);
|
|||
* "Safe" macro, checks for illegal sequences and for string boundaries.
|
||||
*
|
||||
* @param s const uint8_t * string
|
||||
* @param i string offset, i<length
|
||||
* @param i string offset, must be i<length
|
||||
* @param length string length
|
||||
* @param n number of code points to skip
|
||||
* @see U8_FWD_N_UNSAFE
|
||||
|
@ -449,7 +449,7 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i);
|
|||
*
|
||||
* @param s const uint8_t * string
|
||||
* @param start starting string offset (usually 0)
|
||||
* @param i string offset, start<=i
|
||||
* @param i string offset, must be start<=i
|
||||
* @see U8_SET_CP_START_UNSAFE
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
|
@ -517,7 +517,7 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i);
|
|||
*
|
||||
* @param s const uint8_t * string
|
||||
* @param start starting string offset (usually 0)
|
||||
* @param i string offset, start<=i
|
||||
* @param i string offset, must be start<i
|
||||
* @param c output UChar32 variable, set to <0 in case of an error
|
||||
* @see U8_PREV_UNSAFE
|
||||
* @stable ICU 2.4
|
||||
|
@ -556,7 +556,7 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i);
|
|||
*
|
||||
* @param s const uint8_t * string
|
||||
* @param start starting string offset (usually 0)
|
||||
* @param i string offset, start<=i
|
||||
* @param i string offset, must be start<i
|
||||
* @see U8_BACK_1_UNSAFE
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
|
@ -596,7 +596,7 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i);
|
|||
*
|
||||
* @param s const uint8_t * string
|
||||
* @param start index of the start of the string
|
||||
* @param i string offset, i<length
|
||||
* @param i string offset, must be start<i
|
||||
* @param n number of code points to skip
|
||||
* @see U8_BACK_N_UNSAFE
|
||||
* @stable ICU 2.4
|
||||
|
@ -637,7 +637,7 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i);
|
|||
*
|
||||
* @param s const uint8_t * string
|
||||
* @param start starting string offset (usually 0)
|
||||
* @param i string offset, start<=i<=length
|
||||
* @param i string offset, must be start<=i<=length
|
||||
* @param length string length
|
||||
* @see U8_SET_CP_LIMIT_UNSAFE
|
||||
* @stable ICU 2.4
|
||||
|
|
Loading…
Add table
Reference in a new issue