mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-06 22:15:31 +00:00
ICU-3944 text access, work in progress
X-SVN-Rev: 17958
This commit is contained in:
parent
3609db9872
commit
40e2e39792
4 changed files with 780 additions and 506 deletions
|
@ -45,6 +45,63 @@
|
|||
* or system with a unique text storage format can implement a set of
|
||||
* UText provider functions for that format, which will then allow other
|
||||
* ICU services to operate on that format.
|
||||
*
|
||||
*
|
||||
* <em>Iterating over text</em>
|
||||
*
|
||||
* Here is sample code for a forward iteration over the contents of a UText
|
||||
*
|
||||
* \code
|
||||
* UChar32 c;
|
||||
* UText *ut = whatever();
|
||||
*
|
||||
* for (c=utext_next32From(ut, 0); c!=U_SENTINEL; c=utext_next32(ut)) {
|
||||
* // do whatever the codepoint c here.
|
||||
* }
|
||||
* \endcode
|
||||
*
|
||||
* And here is similar code to iterate in the revese direction, from the end
|
||||
* of the text towards the beginning.
|
||||
*
|
||||
* \code
|
||||
* UChar32 c;
|
||||
* UText *ut = whatever();
|
||||
* int textLength = utext_length(ut);
|
||||
* for (c=utext_previous32From(ut, textLength); c!=U_SENTINEL; c=utext_previous32(ut)) {
|
||||
* // do whatever the codepoint c here.
|
||||
* }
|
||||
* \endcode
|
||||
*
|
||||
* <em>Characters and Indexing</em>
|
||||
*
|
||||
* Indexing into text by UText functions is nearly always in terms of the native
|
||||
* indexing of the underlying text storage. The storage format could be utf-8
|
||||
* or utf-32, for example. When coding to the UText access API, no assumptions
|
||||
* can be made regarding the size of characters, or how far an index
|
||||
* may move when iterating between characters.
|
||||
*
|
||||
* All indices supplied to UText functions are pinned to the length of the
|
||||
* text. An out-of-bounds index is not considered to be an error, but is
|
||||
* adjusted to be in the range 0 <= index <= length of input text.
|
||||
*
|
||||
*
|
||||
* When an index position is returned from a UText function, it will be
|
||||
* a native index to the underlying text. In the case of multi-unit characers,
|
||||
* tt will always refer to the first position, never to the interior. This
|
||||
* is essentially the same thing as saying that a returned index will always
|
||||
* point to a boundary between characters.
|
||||
*
|
||||
* When a native index is supplied to a UText function, all indices that
|
||||
* refer to any part of a multi-unit character representation are considered
|
||||
* to be equivalent. In the case of multi-unit characers, an incoming index
|
||||
* will be logically normalized to refer to the start of the character.
|
||||
*
|
||||
* It is possible to test whether a native index is on a code point boundary
|
||||
* by doing a utext_setIndex() followed by a utext_getIndex().
|
||||
* If the index returns unchanged, it was on a code point boundary. If
|
||||
* an adjusted index is returned, the original index referred to the
|
||||
* interior of a character.
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
|
@ -102,8 +159,10 @@ utext_close(UText *ut);
|
|||
/**
|
||||
* Open a read-only UText implementation for UTF-8 strings.
|
||||
*
|
||||
* Any invalid utf-8 sequences in the input will appear on the output side
|
||||
* of the UText as Unicode Replacement characters, \uFFFD.
|
||||
* Any invalid utf-8 in the input will be handled in this way:
|
||||
* a sequence of bytes that has the form of a trunctated, but otherwise valid,
|
||||
* utf-8 sequence will be replaced by a single unicode replacement character, \uFFFD.
|
||||
* Any other illegal bytes will each be replaced by a \uFFFD.
|
||||
*
|
||||
* @param ut Pointer to a UText struct. If NULL, a new UText will be created.
|
||||
* If non-NULL, must refer to an initialized UText struct, which will then
|
||||
|
@ -119,31 +178,6 @@ utext_close(UText *ut);
|
|||
U_DRAFT UText * U_EXPORT2
|
||||
utext_openUTF8(UText *ut, const uint8_t *s, int32_t length, UErrorCode *status);
|
||||
|
||||
/**
|
||||
* Open a read-only UText implementation for a SBCS strings.
|
||||
* The implementation converts 1:1 according to the provided mapping table.
|
||||
* Supplementary code points are not supported.
|
||||
*
|
||||
* @param ut Pointer to a UText struct. If NULL, a new UText will be created.
|
||||
* If non-NULL, must refer to an initialized UText struct, which will then
|
||||
* be reset to reference the specified input string.
|
||||
* @param toU Mapping table for conversion from SBCS to Unicode (BMP only).
|
||||
* The mapping table must be available during the lifetime of the
|
||||
* UText object.
|
||||
* @param s A byte text string
|
||||
* @param length The length of the input string in bytes, or -1 if the string is
|
||||
* zero terminated.
|
||||
* @param status Errors are returned here.
|
||||
* @return A pointer to the UText. If a pre-allocated UText was provided, it
|
||||
* will always be used and returned.
|
||||
* @draft ICU 3.4
|
||||
*/
|
||||
U_DRAFT UText * U_EXPORT2
|
||||
utext_openSBCS(UText *ut,
|
||||
const UChar toU[256],
|
||||
const char *s, int32_t length,
|
||||
UErrorCode *status);
|
||||
|
||||
|
||||
/**
|
||||
* Open a read-only UText for UChar * string.
|
||||
|
@ -160,12 +194,12 @@ utext_openSBCS(UText *ut,
|
|||
* @draft ICU 3.4
|
||||
*/
|
||||
U_DRAFT UText * U_EXPORT2
|
||||
utext_openUChar(UText *ut, const UChar *s, int32_t length, UErrorCode *status);
|
||||
utext_openUChars(UText *ut, const UChar *s, int32_t length, UErrorCode *status);
|
||||
|
||||
|
||||
#ifdef XP_CPLUSPLUS
|
||||
/**
|
||||
* Open a UText for a UnicodeString.
|
||||
* Open a writable UText for a non-const UnicodeString.
|
||||
*
|
||||
* @param t Pointer to a UText struct. If NULL, a new UText will be created.
|
||||
* If non-NULL, must refer to an initialized UText struct, which will then
|
||||
|
@ -181,7 +215,7 @@ utext_openUnicodeString(UText *t, UnicodeString *s, UErrorCode *status);
|
|||
|
||||
|
||||
/**
|
||||
* Open a UText for a const UnicodeString. The resulting UText will not be writeable.
|
||||
* Open a UText for a const UnicodeString. The resulting UText will not be writable.
|
||||
*
|
||||
* @param t Pointer to a UText struct. If NULL, a new UText will be created.
|
||||
* If non-NULL, must refer to an initialized UText struct, which will then
|
||||
|
@ -254,7 +288,7 @@ utext_clone(UText *dest, const UText *src, UBool deep, UErrorCode *status);
|
|||
/**
|
||||
* Get the length of the text. Depending on the characteristics
|
||||
* of the underlying text represenation, this may be expensive.
|
||||
* @see utext_lengthIsExpensive()
|
||||
* @see utext_isLengthExpensive()
|
||||
*
|
||||
*
|
||||
* @param ut the text to be accessed.
|
||||
|
@ -269,28 +303,33 @@ utext_length(UText *ut);
|
|||
* Return TRUE if calculating the length of the text could be expensive.
|
||||
* Finding the length of NUL terminated strings is considered to be expensive.
|
||||
*
|
||||
* Note that the value of this function may change
|
||||
* as the result of other operations on a UText.
|
||||
* Once the length of a string has been discovered, it will no longer
|
||||
* be expensive to report it.
|
||||
*
|
||||
* @param ut the text to be accessed.
|
||||
* @return TRUE if determining the lenght of the text could be time consuming.
|
||||
* @draft ICU 3.4
|
||||
*/
|
||||
U_DRAFT UBool U_EXPORT2
|
||||
utext_lengthIsExpensive(const UText *ut);
|
||||
utext_isLengthExpensive(const UText *ut);
|
||||
|
||||
/**
|
||||
* Returns the code point at the requested index,
|
||||
* or U_SENTINEL (-1) if it is out of bounds.
|
||||
* Sets the current iteration position to the specified index.
|
||||
*
|
||||
* If the specified index points to the interior of a multi-unit
|
||||
* character - one of the trail bytes of a utf-8 sequence, for example -
|
||||
* the complete code point will be returned, and the current
|
||||
* iteration position will be left at the start of the code point.
|
||||
* the complete code point will be returned.
|
||||
*
|
||||
* TODO: drop this function as being dangerous? There is no clean way for applications
|
||||
* to increment the index, which is in native units. Likely user error to increment
|
||||
* it by utf-16 units. next32From(index) does same thing, except for where iteration
|
||||
* position is left.
|
||||
* The iteration position will be set to the start of the returned code point.
|
||||
*
|
||||
* This function is roughly equivalent to the the sequence
|
||||
* utext_setIndex(index);
|
||||
* utext_current();
|
||||
* (There is a difference if the index is out of bounds by being less than zero)
|
||||
*
|
||||
* @param ut the text to be accessed
|
||||
* @param the native index of the character to be accessed. If the index points
|
||||
* to other than the first unit of a multi-unit character, it will be adjusted
|
||||
|
@ -299,7 +338,7 @@ utext_lengthIsExpensive(const UText *ut);
|
|||
* @draft ICU 3.4
|
||||
*/
|
||||
U_DRAFT UChar32 U_EXPORT2
|
||||
utext_char32At(UText *ut, int32_t index);
|
||||
utext_char32At(UText *ut, int32_t nativeIndex);
|
||||
|
||||
|
||||
/**
|
||||
|
@ -309,7 +348,7 @@ utext_char32At(UText *ut, int32_t index);
|
|||
* the input text.
|
||||
*
|
||||
* @param ut the text to be accessed.
|
||||
* @return the Unicode code point at the specified index.
|
||||
* @return the Unicode code point at the current iterator position.
|
||||
* @draft ICU 3.4
|
||||
*/
|
||||
U_DRAFT UChar32 U_EXPORT2
|
||||
|
@ -358,20 +397,17 @@ utext_previous32(UText *ut);
|
|||
* and return the code point starting at or before that index.
|
||||
* Leave the iteration index at the start of the following code point.
|
||||
*
|
||||
* An inline macro version of this function, UTEXT_NEXT32FROM(),
|
||||
* is available for performance critical use.
|
||||
|
||||
* This function is the most efficient and convenient way to
|
||||
* begin a forward iteration.
|
||||
*
|
||||
* @param ut the text to be accessed.
|
||||
* @param index Iteration index.
|
||||
* @param index Iteration index, in the native units of the text provider.
|
||||
* @return Code point which starts at or before index,
|
||||
* or U_SENTINEL (-1) if it is out of bounds.
|
||||
* @draft ICU 3.4
|
||||
*/
|
||||
U_DRAFT UChar32 U_EXPORT2
|
||||
utext_next32From(UText *ut, int32_t index);
|
||||
utext_next32From(UText *ut, int32_t nativeIndex);
|
||||
|
||||
|
||||
|
||||
|
@ -380,21 +416,18 @@ utext_next32From(UText *ut, int32_t index);
|
|||
* one specified by the initial index. Leave the iteration position
|
||||
* at the start of the returned code point.
|
||||
*
|
||||
* An inline macro version of this function, UTEXT_PREVIOUS32FROM(),
|
||||
* is available for performance critical use.
|
||||
|
||||
* This function is the most efficient and convenient way to
|
||||
* begin a backwards iteration.
|
||||
*
|
||||
* @param ut the text to be accessed.
|
||||
* @param index Iteration index.
|
||||
* @param index Iteration index in the native units of the thext provider.
|
||||
* @return Code point preceding the one at the initial index,
|
||||
* or U_SENTINEL (-1) if it is out of bounds.
|
||||
*
|
||||
* @draft ICU 3.4
|
||||
*/
|
||||
U_DRAFT UChar32 U_EXPORT2
|
||||
utext_previous32From(UText *ut, int32_t index);
|
||||
utext_previous32From(UText *ut, int32_t nativeIndex);
|
||||
|
||||
/**
|
||||
* Get the current iterator position, which can range from 0 to
|
||||
|
@ -405,33 +438,40 @@ utext_previous32From(UText *ut, int32_t index);
|
|||
* code point boundary
|
||||
*
|
||||
* @param ut the text to be accessed.
|
||||
* @return the current index position, in native units.
|
||||
* @return the current index position, in the native units of the text provider.
|
||||
* @draft ICU 3.4
|
||||
*/
|
||||
U_DRAFT int32_t U_EXPORT2
|
||||
utext_getIndex(UText *ut);
|
||||
|
||||
/**
|
||||
* Set the current iteration position to the specified index.
|
||||
* Set the current iteration position to the nearest code point
|
||||
* boundary at or preceding the specified index.
|
||||
* The index is in the native units of the original input text.
|
||||
* If the index is out of range, it will be trimmed to be witnin
|
||||
* If the index is out of range, it will be trimmed to be within
|
||||
* the range of the input text.
|
||||
* If the specifed index does not fall on a code point boundary in
|
||||
* the input text, it will be adjusted back to do so.
|
||||
* <p/>
|
||||
* It will usually be more efficient to begin an iteration
|
||||
* using the functions utext_next32From() or utext_previous32From()
|
||||
* rather than setIndex().
|
||||
* <p/>
|
||||
* Moving the index position to an adjacent character is best done
|
||||
* with utext_next32(), utext_previous32() or utext_moveIndex().
|
||||
* Attempting to do direct arithmetic on the index position is
|
||||
* complicated by the fact that the size (in native units) of a
|
||||
* character depends on the underlying representation of the character
|
||||
* (utf-8, utf-16, utf-32, arbitrary codepage), and is not
|
||||
* easily knowable.
|
||||
*
|
||||
* @param ut the text to be accessed.
|
||||
* @param index the native unit index of the new iteration position.
|
||||
* @draft ICU 3.4
|
||||
*/
|
||||
U_DRAFT void U_EXPORT2
|
||||
utext_setIndex(UText *ut, int32_t index);
|
||||
utext_setIndex(UText *ut, int32_t nativeIndex);
|
||||
|
||||
/**
|
||||
* Move the iterator postion by delta code points. The amount to move
|
||||
* Move the iterator postion by delta code points. The number of code points
|
||||
* is a signed number; a negative delta will move the iterator backwards,
|
||||
* towards the start of the text.
|
||||
* <p/>
|
||||
|
@ -439,6 +479,10 @@ utext_setIndex(UText *ut, int32_t index);
|
|||
* forward or backward, but no further backward than to 0 and
|
||||
* no further forward than to length().
|
||||
* The resulting index value will be in between 0 and length(), inclusive.
|
||||
* <p/>
|
||||
* Because the index is kept in the native units of the text provider, the
|
||||
* actual numeric amount by which the index moves depends on the
|
||||
* underlying text storage representation of the text provider.
|
||||
*
|
||||
* @param ut the text to be accessed.
|
||||
* @param delta the signed number of code points to move the iteration position.
|
||||
|
@ -467,22 +511,21 @@ utext_moveIndex(UText *ut, int32_t delta);
|
|||
* @param ut the UText from which to extract data.
|
||||
* @param start the native index of the first character to extract.
|
||||
* @param limit the native string index of the position following the last
|
||||
* character to extract.
|
||||
* character to extract. If the specified limit is greater than the length
|
||||
* of the text, the limit will be trimmed back to the text length.
|
||||
* @param dest the UChar (utf-16) buffer into which the extracted text is placed
|
||||
* @param destCapacity The size, in UChars, of the destination buffer. May be zero
|
||||
* for precomputing the required size.
|
||||
* @param status receives any error status.
|
||||
* U_BUFFER_OVERFLOW_ERROR: the extracted text was truncated because the
|
||||
* buffer was too small. Returns number of UChars for preflighting.
|
||||
* @return Number of UChars in the data. Does not include a trailing NUL.
|
||||
*
|
||||
* TODO: how should invalid source data be handled? Corrupt utf-8, for example.
|
||||
* @return Number of UChars in the data to be extracted. Does not include a trailing NUL.
|
||||
*
|
||||
* @draft ICU 3.4
|
||||
*/
|
||||
U_DRAFT int32_t U_EXPORT2
|
||||
utext_extract(UText *ut,
|
||||
int32_t start, int32_t limit,
|
||||
int32_t nativeStart, int32_t nativeLimit,
|
||||
UChar *dest, int32_t destCapacity,
|
||||
UErrorCode *status);
|
||||
|
||||
|
@ -533,38 +576,6 @@ utext_extract(UText *ut,
|
|||
(ut)->chunk.contents[--((ut)->chunk.offset)] : utext_previous32(ut))
|
||||
|
||||
|
||||
/**
|
||||
* inline version of utext_next32from(), for performance-critical situations.
|
||||
*
|
||||
* Set the iteration index, access the text for forward iteration,
|
||||
* and return the code point starting at or before that index.
|
||||
* Leave the iteration index at the start of the following code point.
|
||||
*
|
||||
* @draft ICU 3.4
|
||||
*/
|
||||
#define UTEXT_NEXT32FROM(ut, index) \
|
||||
((index) >= (ut)->chunk.start && \
|
||||
(index) < (ut)->chunk.limit && \
|
||||
!(ut)->chunk.nonUTF16Indexes && \
|
||||
(ut)->chunk.contents[(ut)->chunk.offset=(ut)->chunk.start+(index)] < 0xd800 ? \
|
||||
(ut)->chunk.contents[((ut)->chunk.offset)++] : utext_next32From(ut, index))
|
||||
|
||||
/**
|
||||
* inline version of utext_previous32from(), for performance-critical situations.
|
||||
*
|
||||
* Set the iteration index, and return the code point preceding the
|
||||
* one specified by the initial index. Leave the iteration position
|
||||
* at the start of the returned code point.
|
||||
*
|
||||
* @draft ICU 3.4
|
||||
*/
|
||||
#define UTEXT_PREVIOUS32FROM(ut, index) \
|
||||
((index) > (ut)->chunk.start && \
|
||||
(index) <= (ut)->chunk.limit && \
|
||||
!(ut)->chunk.nonUTF16Indexes && \
|
||||
(ut)->chunk.contents[(ut)->chunk.offset=(ut)->chunk.start+(index-1)] < 0xd800 ? \
|
||||
(ut)->chunk.contents[(ut)->chunk.offset] : utext_previous32From(ut, index))
|
||||
|
||||
|
||||
|
||||
/************************************************************************************
|
||||
|
@ -587,7 +598,7 @@ utext_extract(UText *ut,
|
|||
*
|
||||
*/
|
||||
U_DRAFT UBool U_EXPORT2
|
||||
utext_isWriteable(const UText *ut);
|
||||
utext_isWriteble(const UText *ut);
|
||||
|
||||
|
||||
/**
|
||||
|
@ -609,7 +620,7 @@ utext_hasMetaData(const UText *ut);
|
|||
* newly inserted replacement text.
|
||||
*
|
||||
* This function is only available on UText types that support writing,
|
||||
* that is, ones where utext_isWriteable() returns TRUE.
|
||||
* that is, ones where utext_isWritable() returns TRUE.
|
||||
*
|
||||
* When using this function, there should be only a single UText opened onto the
|
||||
* underlying native text string. Behavior after a replace operation
|
||||
|
@ -617,8 +628,8 @@ utext_hasMetaData(const UText *ut);
|
|||
* modified string.
|
||||
*
|
||||
* @param ut the UText representing the text to be operated on.
|
||||
* @param start the native index of the start of the region to be replaced
|
||||
* @param limit the native index of the character following the region to be replaced.
|
||||
* @param nativeStart the native index of the start of the region to be replaced
|
||||
* @param nativeLimit the native index of the character following the region to be replaced.
|
||||
* @param replacementText pointer to the replacement text
|
||||
* @param replacmentLength length of the replacement text, or -1 if the text is NUL terminated.
|
||||
* @param status receives any error status. Possible errors include
|
||||
|
@ -631,7 +642,7 @@ utext_hasMetaData(const UText *ut);
|
|||
*/
|
||||
U_DRAFT int32_t U_EXPORT2
|
||||
utext_replace(UText *ut,
|
||||
int32_t start, int32_t limit,
|
||||
int32_t nativeStart, int32_t nativeLimit,
|
||||
const UChar *replacementText, int32_t replacementLength,
|
||||
UErrorCode *status);
|
||||
|
||||
|
@ -648,25 +659,25 @@ utext_replace(UText *ut,
|
|||
* it does not replace or overwrite any existing text.
|
||||
*
|
||||
* This function is only available on UText types that support writing,
|
||||
* that is, ones where utext_isWriteable() returns TRUE.
|
||||
* that is, ones where utext_isWritable() returns TRUE.
|
||||
*
|
||||
* When using this function, there should be only a single UText opened onto the
|
||||
* underlying native text string. Behavior after a copy operation
|
||||
* on a UText is undefined in any other additional UTexts that refer to the
|
||||
* modified string.
|
||||
*
|
||||
* @param ut The UText representing the text to be operated on.
|
||||
* @param start The native index of the start of the region to be copied or moved
|
||||
* @param limit The native index of the character following the region to be replaced.
|
||||
* @param destIndex The native destination index to which the source substring is copied or moved.
|
||||
* @param move If TRUE, then the substring is moved, not copied/duplicated.
|
||||
* @param status receives any error status. Possible errors include U_NO_WRITE_PERMISSION
|
||||
* @param ut The UText representing the text to be operated on.
|
||||
* @param nativeStart The native index of the start of the region to be copied or moved
|
||||
* @param nativeLimit The native index of the character following the region to be replaced.
|
||||
* @param destIndex The native destination index to which the source substring is copied or moved.
|
||||
* @param move If TRUE, then the substring is moved, not copied/duplicated.
|
||||
* @param status receives any error status. Possible errors include U_NO_WRITE_PERMISSION
|
||||
*
|
||||
* @draft ICU 3.4
|
||||
*/
|
||||
U_DRAFT void U_EXPORT2
|
||||
utext_copy(UText *ut,
|
||||
int32_t start, int32_t limit,
|
||||
int32_t nativeStart, int32_t nativeLimit,
|
||||
int32_t destIndex,
|
||||
UBool move,
|
||||
UErrorCode *status);
|
||||
|
@ -709,10 +720,10 @@ struct UTextChunk {
|
|||
int32_t length;
|
||||
|
||||
/** (Native) text index corresponding to the start of the chunk. */
|
||||
int32_t start;
|
||||
int32_t nativeStart;
|
||||
|
||||
/** (Native) text index corresponding to the end of the chunk (contents+length). */
|
||||
int32_t limit;
|
||||
int32_t nativeLimit;
|
||||
|
||||
/** If TRUE, then non-UTF-16 indexes are used in this chunk. */
|
||||
UBool nonUTF16Indexes;
|
||||
|
@ -739,10 +750,10 @@ enum {
|
|||
*/
|
||||
UTEXT_PROVIDER_NON_UTF16_INDEXES,
|
||||
/**
|
||||
* The provider can return the text length inexpensively.
|
||||
* It is potentially time consuming for the provider to determine the length of the text.
|
||||
* @draft ICU 3.4
|
||||
*/
|
||||
UTEXT_PROVIDER_LENGTH_IS_INEXPENSIVE,
|
||||
UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE,
|
||||
/**
|
||||
* Text chunks remain valid and usable until the text object is modified or
|
||||
* deleted, not just until the next time the access() function is called
|
||||
|
@ -799,18 +810,6 @@ enum {
|
|||
typedef UText * U_CALLCONV
|
||||
UTextClone(UText *dest, const UText *src, UBool deep, UErrorCode *status);
|
||||
|
||||
/**
|
||||
* Function type declaration for UText.GetProperties().
|
||||
*
|
||||
* Gets the provider properties for this UText.
|
||||
*
|
||||
* @param ut the UText to get properties for.
|
||||
* @return Provider properties bit field.
|
||||
*
|
||||
* @draft ICU 3.4
|
||||
*/
|
||||
typedef int32_t U_CALLCONV
|
||||
UTextGetProperties(UText *ut);
|
||||
|
||||
/**
|
||||
* Function type declaration for UText.length().
|
||||
|
@ -821,7 +820,7 @@ UTextGetProperties(UText *ut);
|
|||
* @draft ICU 3.4
|
||||
*/
|
||||
typedef int32_t U_CALLCONV
|
||||
UTextLength(UText *ut);
|
||||
UTextNativeLength(UText *ut);
|
||||
|
||||
/**
|
||||
* Function type declaration for UText.access(). Get the description of the text chunk
|
||||
|
@ -830,22 +829,22 @@ UTextLength(UText *ut);
|
|||
* of bounds, the iteration position will be left at the start or end
|
||||
* of the string, as appropriate.
|
||||
*
|
||||
* @param ut the UText being accessed.
|
||||
* @param index Requested (native) index of the text to be accessed.
|
||||
* @param forward If TRUE, then the returned chunk must contain text
|
||||
* starting from the index, so that start<=index<limit.
|
||||
* If FALSE, then the returned chunk must contain text
|
||||
* before the index, so that start<index<=limit.
|
||||
* @return True if the requested index could be accessed. The chunk
|
||||
* will contain the requested text.
|
||||
* False value if a chunk cannot be accessed
|
||||
* (the requested index is out of bounds).
|
||||
* @param ut the UText being accessed.
|
||||
* @param nativeIndex Requested index of the text to be accessed.
|
||||
* @param forward If TRUE, then the returned chunk must contain text
|
||||
* starting from the index, so that start<=index<limit.
|
||||
* If FALSE, then the returned chunk must contain text
|
||||
* before the index, so that start<index<=limit.
|
||||
* @return True if the requested index could be accessed. The chunk
|
||||
* will contain the requested text.
|
||||
* False value if a chunk cannot be accessed
|
||||
* (the requested index is out of bounds).
|
||||
*
|
||||
* @see UText
|
||||
* @draft ICU 3.4
|
||||
*/
|
||||
typedef UBool U_CALLCONV
|
||||
UTextAccess(UText *ut, int32_t index, UBool forward, UTextChunk *chunk);
|
||||
UTextAccess(UText *ut, int32_t nativeIndex, UBool forward, UTextChunk *chunk);
|
||||
|
||||
/**
|
||||
* Function type declaration for UText.extract().
|
||||
|
@ -860,23 +859,23 @@ UTextAccess(UText *ut, int32_t index, UBool forward, UTextChunk *chunk);
|
|||
* The extracted string will (if you are a user) / must (if you are a text provider)
|
||||
* be NUL-terminated if there is sufficient space in the destination buffer.
|
||||
*
|
||||
* @param ut the UText from which to extract data.
|
||||
* @param start the native index of the first characer to extract.
|
||||
* @param limit the native string index of the position following the last
|
||||
* character to extract.
|
||||
* @param dest the UChar (utf-16) buffer into which the extracted text is placed
|
||||
* @param ut the UText from which to extract data.
|
||||
* @param nativeStart the native index of the first characer to extract.
|
||||
* @param nativeLimit the native string index of the position following the last
|
||||
* character to extract.
|
||||
* @param dest the UChar (utf-16) buffer into which the extracted text is placed
|
||||
* @param destCapacity The size, in UChars, of the destination buffer. May be zero
|
||||
* for precomputing the required size.
|
||||
* @param status receives any error status.
|
||||
* If U_BUFFER_OVERFLOW_ERROR: Returns number of UChars for
|
||||
* preflighting.
|
||||
* for precomputing the required size.
|
||||
* @param status receives any error status.
|
||||
* If U_BUFFER_OVERFLOW_ERROR: Returns number of UChars for
|
||||
* preflighting.
|
||||
* @return Number of UChars in the data. Does not include a trailing NUL.
|
||||
*
|
||||
* @draft ICU 3.4
|
||||
*/
|
||||
typedef int32_t U_CALLCONV
|
||||
UTextExtract(UText *ut,
|
||||
int32_t start, int32_t limit,
|
||||
int32_t nativeStart, int32_t nativeLimit,
|
||||
UChar *dest, int32_t destCapacity,
|
||||
UErrorCode *status);
|
||||
|
||||
|
@ -897,10 +896,10 @@ UTextExtract(UText *ut,
|
|||
* by the replace operation.
|
||||
*
|
||||
* @param ut the UText representing the text to be operated on.
|
||||
* @param start the native index of the start of the region to be replaced
|
||||
* @param limit the native index of the character following the region to be replaced.
|
||||
* @param nativeStart the index of the start of the region to be replaced
|
||||
* @param nativeLimit the index of the character following the region to be replaced.
|
||||
* @param replacementText pointer to the replacement text
|
||||
* @param replacmentLength length of the replacement text, or -1 if the text is NUL terminated.
|
||||
* @param replacmentLength length of the replacement text in UChars, or -1 if the text is NUL terminated.
|
||||
* @param status receives any error status. Possible errors include
|
||||
* U_NO_WRITE_PERMISSION
|
||||
*
|
||||
|
@ -911,8 +910,8 @@ UTextExtract(UText *ut,
|
|||
*/
|
||||
typedef int32_t U_CALLCONV
|
||||
UTextReplace(UText *t,
|
||||
int32_t start, int32_t limit,
|
||||
const UChar *src, int32_t length,
|
||||
int32_t nativeStart, int32_t nativeLimit,
|
||||
const UChar *replacementText, int32_t replacmentLength,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
|
@ -934,57 +933,58 @@ UTextReplace(UText *t,
|
|||
* taking into account any changes to the underlying string's structure caused
|
||||
* by the replace operation.
|
||||
*
|
||||
* @param ut The UText representing the text to be operated on.
|
||||
* @param start The native index of the start of the region to be copied or moved
|
||||
* @param limit The native index of the character following the region to be replaced.
|
||||
* @param destIndex The native destination index to which the source substring is copied or moved.
|
||||
* @param move If TRUE, then the substring is moved, not copied/duplicated.
|
||||
* @param status receives any error status. Possible errors include U_NO_WRITE_PERMISSION
|
||||
* @param ut The UText representing the text to be operated on.
|
||||
* @param nativeStart The index of the start of the region to be copied or moved
|
||||
* @param nativeLimit The index of the character following the region to be replaced.
|
||||
* @param nativeDest The destination index to which the source substring is copied or moved.
|
||||
* @param move If TRUE, then the substring is moved, not copied/duplicated.
|
||||
* @param status receives any error status. Possible errors include U_NO_WRITE_PERMISSION
|
||||
*
|
||||
* @draft ICU 3.4
|
||||
*/
|
||||
typedef void U_CALLCONV
|
||||
UTextCopy(UText *t,
|
||||
int32_t start, int32_t limit,
|
||||
int32_t destIndex,
|
||||
int32_t nativeStart, int32_t nativeLimit,
|
||||
int32_t nativeDest,
|
||||
UBool move,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* Function type declaration for UText.mapOffsetToNative().
|
||||
* Map from a UChar offset within the current text chunk within the UText to
|
||||
* the corresponding native index in the orginal source text.
|
||||
*
|
||||
* This is required only for text providers that do not use native utf-16 indexes.
|
||||
*
|
||||
* TODO: specify behavior with out-of-bounds offset? Shouldn't ever occur.
|
||||
*
|
||||
* @param ut the UText.
|
||||
* @param chunk The UTextChunk in which to perform a mapping.
|
||||
* TODO: keep this as a separate parameter, or just imply that the function
|
||||
* works on the chunk embedded in the UText?
|
||||
* @param offset UTF-16 offset relative to the current text chunk embedded in the UText
|
||||
* @param offset UTF-16 offset within text chunk
|
||||
* 0<=offset<=chunk->length.
|
||||
* @return Absolute (native) index corresponding to the UTF-16 offset
|
||||
* relative to the current text chunk.
|
||||
* @return Absolute (native) index corresponding to the specified chunk offset.
|
||||
* The returned native index should always be to a code point boundary.
|
||||
*
|
||||
* @draft ICU 3.4
|
||||
*/
|
||||
typedef int32_t U_CALLCONV
|
||||
UTextMapOffsetToNative(UText *ut, UTextChunk *chunk, int32_t offset);
|
||||
UTextMapOffsetToNative(UText *ut, int32_t offset);
|
||||
|
||||
/**
|
||||
* Function type declaration for UText.mapIndexToUTF16().
|
||||
* This is required only for text providers that do not use native utf-16 indexes.
|
||||
* Map from a native index to a UChar offset within a text chunk
|
||||
*
|
||||
* @param ut The UText containing the text chunk.
|
||||
* @param chunk the text chunk in which the mapping occurs.
|
||||
* TODO: keep this as a separate parameter, or just imply that the function
|
||||
* works on the chunk embedded in the UText?
|
||||
* @param index Absolute (native) text index, chunk->start<=index<=chunk->limit.
|
||||
* @return Chunk-relative UTF-16 offset corresponding to the absolute (native)
|
||||
* index.
|
||||
* This function is required only for text providers that do not use native utf-16 indexes.
|
||||
*
|
||||
* @see UText
|
||||
* @param ut The UText containing the text chunk.
|
||||
* @param nativeIndex Absolute (native) text index, chunk->start<=index<=chunk->limit.
|
||||
* @return Chunk-relative UTF-16 offset corresponding to the specified native
|
||||
* index.
|
||||
*
|
||||
* TODO: specify behavior with out-of-bounds index? Shouldn't ever occur.
|
||||
* @draft ICU 3.4
|
||||
*/
|
||||
typedef int32_t U_CALLCONV
|
||||
UTextMapIndexToUTF16(UText *ut, UTextChunk *chunk, int32_t index);
|
||||
UTextMapIndexToUTF16(UText *ut, int32_t nativeIndex);
|
||||
|
||||
|
||||
/**
|
||||
|
@ -1077,6 +1077,15 @@ struct UText {
|
|||
int32_t a, b, c;
|
||||
|
||||
|
||||
/**
|
||||
* Text provider properties. This set of flags is maintainted by the
|
||||
* text provider implementation.
|
||||
* @draft ICU 3.4
|
||||
*/
|
||||
int32_t providerProperties;
|
||||
|
||||
|
||||
|
||||
/** desciptor for the text chunk that includes or is adjacent to
|
||||
* the current iteration position.
|
||||
* @draft ICU 3.4
|
||||
|
@ -1084,14 +1093,6 @@ struct UText {
|
|||
UTextChunk chunk;
|
||||
|
||||
|
||||
/**
|
||||
* Text provider properties
|
||||
* @draft ICU 3.4
|
||||
*/
|
||||
int32_t providerProperties;
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* (public) Function pointer for UTextClone
|
||||
*
|
||||
|
@ -1100,14 +1101,6 @@ struct UText {
|
|||
*/
|
||||
UTextClone *clone;
|
||||
|
||||
/**
|
||||
* (public) function pointer for UTextGetProperties
|
||||
*
|
||||
* @see UTextGetProperties
|
||||
* @draft ICU 3.4
|
||||
*/
|
||||
UTextGetProperties *properties;
|
||||
|
||||
/**
|
||||
* (public) function pointer for UTextLength
|
||||
* May be expensive to compute!
|
||||
|
@ -1115,7 +1108,7 @@ struct UText {
|
|||
* @see UTextLength
|
||||
* @draft ICU 3.4
|
||||
*/
|
||||
UTextLength *length;
|
||||
UTextNativeLength *length;
|
||||
|
||||
/**
|
||||
* (public) Function pointer for UTextAccess.
|
||||
|
@ -1224,16 +1217,16 @@ enum {
|
|||
* @internal
|
||||
*/
|
||||
#define UTEXT_INITIALZIER_HEAD \
|
||||
NULL, /* context */ \
|
||||
NULL, NULL, NULL, /* p, q, r */ \
|
||||
NULL, /* pExtra */ \
|
||||
0, /* extraSize */ \
|
||||
0, /* flags */ \
|
||||
UTEXT_MAGIC, /* magic */ \
|
||||
sizeof(UText), /* sizeOfStruct */ \
|
||||
0, 0, 0, /* a, b, c */ \
|
||||
UTEXT_CHUNK_INIT, /* UTextChunk */ \
|
||||
-1 /* provderProps */
|
||||
NULL, /* context */ \
|
||||
NULL, NULL, NULL, /* p, q, r */ \
|
||||
NULL, /* pExtra */ \
|
||||
0, /* extraSize */ \
|
||||
0, /* flags */ \
|
||||
UTEXT_MAGIC, /* magic */ \
|
||||
sizeof(UText), /* sizeOfStruct */ \
|
||||
0, 0, 0, /* a, b, c */ \
|
||||
0, /* providerProps */ \
|
||||
UTEXT_CHUNK_INIT /* UTextChunk */
|
||||
|
||||
|
||||
|
||||
|
@ -1247,7 +1240,6 @@ enum {
|
|||
#define UTEXT_INITIALIZER { \
|
||||
UTEXT_INITIALZIER_HEAD, \
|
||||
NULL, /* clone () */ \
|
||||
NULL, /* properties ()*/ \
|
||||
NULL, /* length () */ \
|
||||
NULL, /* access () */ \
|
||||
NULL, /* extract () */ \
|
||||
|
|
|
@ -21,6 +21,7 @@
|
|||
#include "ustr_imp.h"
|
||||
#include "cmemory.h"
|
||||
#include "cstring.h"
|
||||
#include "uassert.h"
|
||||
|
||||
|
||||
#define I32_FLAG(bitIndex) ((int32_t)1<<(bitIndex))
|
||||
|
@ -38,7 +39,7 @@ utext_moveIndex(UText *ut, int32_t delta) {
|
|||
UBool retval = TRUE;
|
||||
if(delta>0) {
|
||||
do {
|
||||
if(ut->chunk.offset>=ut->chunk.length && !utext_access(ut, ut->chunk.limit, TRUE)) {
|
||||
if(ut->chunk.offset>=ut->chunk.length && !utext_access(ut, ut->chunk.nativeLimit, TRUE)) {
|
||||
retval = FALSE;
|
||||
break;
|
||||
}
|
||||
|
@ -46,7 +47,7 @@ utext_moveIndex(UText *ut, int32_t delta) {
|
|||
} while(--delta>0);
|
||||
} else if (delta<0) {
|
||||
do {
|
||||
if(ut->chunk.offset<=0 && !utext_access(ut, ut->chunk.start, FALSE)) {
|
||||
if(ut->chunk.offset<=0 && !utext_access(ut, ut->chunk.nativeStart, FALSE)) {
|
||||
retval = FALSE;
|
||||
break;
|
||||
}
|
||||
|
@ -63,12 +64,20 @@ utext_length(UText *ut) {
|
|||
return ut->length(ut);
|
||||
}
|
||||
|
||||
|
||||
U_DRAFT UBool U_EXPORT2
|
||||
utext_isLengthExpensive(const UText *ut) {
|
||||
UBool r = (ut->providerProperties & I32_FLAG(UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE)) != 0;
|
||||
return r;
|
||||
}
|
||||
|
||||
|
||||
U_DRAFT int32_t U_EXPORT2
|
||||
utext_getIndex(UText *ut) {
|
||||
if(!ut->chunk.nonUTF16Indexes || ut->chunk.offset==0) {
|
||||
return ut->chunk.start+ut->chunk.offset;
|
||||
return ut->chunk.nativeStart+ut->chunk.offset;
|
||||
} else {
|
||||
return ut->mapOffsetToNative(ut, &ut->chunk, ut->chunk.offset);
|
||||
return ut->mapOffsetToNative(ut, ut->chunk.offset);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -76,23 +85,23 @@ utext_getIndex(UText *ut) {
|
|||
|
||||
U_DRAFT void U_EXPORT2
|
||||
utext_setIndex(UText *ut, int32_t index) {
|
||||
// TODO - revise for keeping index always valid.
|
||||
if(index<ut->chunk.start || ut->chunk.limit<index) {
|
||||
// The desired position is outside of the current chunk. Invalidate it and
|
||||
// leave it to next32() or previous32() to access the text
|
||||
// in the desired direction.
|
||||
if(index<ut->chunk.nativeStart || ut->chunk.nativeLimit<index) {
|
||||
// The desired position is outside of the current chunk.
|
||||
// Access the new position. Assume a forward iteration from here,
|
||||
// which will also be optimimum for a single random access.
|
||||
// Reverse iterations may suffer slightly.
|
||||
ut->access(ut, index, TRUE, &ut->chunk);
|
||||
} else if(ut->chunk.nonUTF16Indexes) {
|
||||
ut->chunk.offset=ut->mapIndexToUTF16(ut, &ut->chunk, index);
|
||||
ut->chunk.offset=ut->mapIndexToUTF16(ut, index);
|
||||
} else {
|
||||
ut->chunk.offset=index-ut->chunk.start;
|
||||
ut->chunk.offset=index-ut->chunk.nativeStart;
|
||||
// Our convention is that the index must always be on a code point boundary.
|
||||
// If we are somewhere in the middle of a utf-16 buffer, check that new index
|
||||
// is not in the middle of a surrogate pair.
|
||||
if (index>ut->chunk.start && index < ut->chunk.limit) { // TODO: clean up end-of-chunk / end of input handling. Everywhere.
|
||||
if (index>ut->chunk.nativeStart && index < ut->chunk.nativeLimit) {
|
||||
UChar c = ut->chunk.contents[ut->chunk.offset];
|
||||
if (U16_TRAIL(c)) {
|
||||
utext_current(ut); // force index onto a code point boundary.
|
||||
utext_current(ut); // force index to the start of the curent code point.
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -123,6 +132,18 @@ utext_current(UText *ut) {
|
|||
return c;
|
||||
}
|
||||
|
||||
|
||||
U_DRAFT UChar32 U_EXPORT2
|
||||
utext_char32At(UText *ut, int32_t nativeIndex) {
|
||||
UChar32 c = U_SENTINEL;
|
||||
utext_setIndex(ut, nativeIndex);
|
||||
if (nativeIndex >= 0 && nativeIndex < ut->chunk.nativeLimit) {
|
||||
c = ut->chunk.contents[ut->chunk.offset];
|
||||
}
|
||||
return c;
|
||||
}
|
||||
|
||||
|
||||
U_DRAFT UChar32 U_EXPORT2
|
||||
utext_next32(UText *ut) {
|
||||
UTextChunk *chunk = &ut->chunk;
|
||||
|
@ -130,7 +151,7 @@ utext_next32(UText *ut) {
|
|||
UChar32 c = U_SENTINEL;
|
||||
|
||||
if (offset >= chunk->length) {
|
||||
if (ut->access(ut, chunk->limit, TRUE, chunk) == FALSE) {
|
||||
if (ut->access(ut, chunk->nativeLimit, TRUE, chunk) == FALSE) {
|
||||
goto next32_return;
|
||||
}
|
||||
offset = chunk->offset;
|
||||
|
@ -160,7 +181,7 @@ utext_previous32(UText *ut) {
|
|||
UChar32 c = U_SENTINEL;
|
||||
|
||||
if (offset <= 0) {
|
||||
if (ut->access(ut, chunk->start, FALSE, chunk) == FALSE) {
|
||||
if (ut->access(ut, chunk->nativeStart, FALSE, chunk) == FALSE) {
|
||||
goto prev32_return;
|
||||
}
|
||||
offset = chunk->offset;
|
||||
|
@ -186,16 +207,16 @@ utext_next32From(UText *ut, int32_t index) {
|
|||
UTextChunk *chunk = &ut->chunk;
|
||||
UChar32 c = U_SENTINEL;
|
||||
|
||||
if(index<chunk->start || index>=chunk->limit) {
|
||||
if(index<chunk->nativeStart || index>=chunk->nativeLimit) {
|
||||
if(!ut->access(ut, index, TRUE, chunk)) {
|
||||
// no chunk available here
|
||||
goto next32return;
|
||||
}
|
||||
offset = chunk->offset;
|
||||
} else if(chunk->nonUTF16Indexes) {
|
||||
offset=ut->mapIndexToUTF16(ut, chunk, index);
|
||||
offset=ut->mapIndexToUTF16(ut, index);
|
||||
} else {
|
||||
offset = index - chunk->start;
|
||||
offset = index - chunk->nativeStart;
|
||||
}
|
||||
|
||||
c = chunk->contents[offset++];
|
||||
|
@ -220,16 +241,16 @@ utext_previous32From(UText *ut, int32_t index) {
|
|||
UTextChunk *chunk = &ut->chunk;
|
||||
UChar32 c = U_SENTINEL;
|
||||
|
||||
if(index<=chunk->start || index>chunk->limit) {
|
||||
if(index<=chunk->nativeStart || index>chunk->nativeLimit) {
|
||||
if(!ut->access(ut, index, FALSE, chunk)) {
|
||||
// no chunk available here
|
||||
goto prev32return;
|
||||
}
|
||||
offset = chunk->offset;
|
||||
} else if(chunk->nonUTF16Indexes) {
|
||||
offset=ut->mapIndexToUTF16(ut, chunk, index);
|
||||
offset=ut->mapIndexToUTF16(ut, index);
|
||||
} else {
|
||||
offset = index - chunk->start;
|
||||
offset = index - chunk->nativeStart;
|
||||
}
|
||||
|
||||
offset--;
|
||||
|
@ -253,6 +274,66 @@ utext_extract(UText *ut,
|
|||
}
|
||||
|
||||
|
||||
|
||||
|
||||
U_DRAFT UBool U_EXPORT2
|
||||
utext_isWriteble(const UText *ut)
|
||||
{
|
||||
UBool b = (ut->providerProperties & I32_FLAG(UTEXT_PROVIDER_WRITABLE)) != 0;
|
||||
return b;
|
||||
}
|
||||
|
||||
|
||||
U_DRAFT UBool U_EXPORT2
|
||||
utext_hasMetaData(const UText *ut)
|
||||
{
|
||||
UBool b = (ut->providerProperties & I32_FLAG(UTEXT_PROVIDER_HAS_META_DATA)) != 0;
|
||||
return b;
|
||||
}
|
||||
|
||||
|
||||
|
||||
U_DRAFT int32_t U_EXPORT2
|
||||
utext_replace(UText *ut,
|
||||
int32_t nativeStart, int32_t nativeLimit,
|
||||
const UChar *replacementText, int32_t replacementLength,
|
||||
UErrorCode *status)
|
||||
{
|
||||
if (U_FAILURE(*status)) {
|
||||
return 0;
|
||||
}
|
||||
if ((ut->providerProperties & I32_FLAG(UTEXT_PROVIDER_WRITABLE)) == 0) {
|
||||
*status = U_NO_WRITE_PERMISSION;
|
||||
return 0;
|
||||
}
|
||||
int32_t i = ut->replace(ut, nativeStart, nativeLimit, replacementText, replacementLength, status);
|
||||
return i;
|
||||
}
|
||||
|
||||
U_DRAFT void U_EXPORT2
|
||||
utext_copy(UText *ut,
|
||||
int32_t nativeStart, int32_t nativeLimit,
|
||||
int32_t destIndex,
|
||||
UBool move,
|
||||
UErrorCode *status)
|
||||
{
|
||||
if (U_FAILURE(*status)) {
|
||||
return;
|
||||
}
|
||||
if ((ut->providerProperties & I32_FLAG(UTEXT_PROVIDER_WRITABLE)) == 0) {
|
||||
*status = U_NO_WRITE_PERMISSION;
|
||||
return;
|
||||
}
|
||||
ut->copy(ut, nativeStart, nativeLimit, destIndex, move, status);
|
||||
}
|
||||
|
||||
|
||||
|
||||
U_DRAFT UText * U_EXPORT2
|
||||
utext_clone(UText *dest, const UText *src, UBool deep, UErrorCode *status) {
|
||||
return src->clone(dest, src, deep, status);
|
||||
}
|
||||
|
||||
U_DRAFT UBool U_EXPORT2
|
||||
utext_compare(UText *ut, const UChar *s, int32_t length, UBool codePointOrder) {
|
||||
int32_t segLength, result;
|
||||
|
@ -285,7 +366,7 @@ utext_compare(UText *ut, const UChar *s, int32_t length, UBool codePointOrder) {
|
|||
return 0;
|
||||
}
|
||||
|
||||
if(!ut->access(ut, ut->chunk.limit, TRUE, &ut->chunk)) {
|
||||
if(!ut->access(ut, ut->chunk.nativeLimit, TRUE, &ut->chunk)) {
|
||||
// the text ends before the string does
|
||||
return -1;
|
||||
}
|
||||
|
@ -321,9 +402,10 @@ enum {
|
|||
//
|
||||
// Extended form of a UText. The purpose is to aid in computing the total size required
|
||||
// when a provider asks for a UText to be allocated with extra storage.
|
||||
//
|
||||
struct ExtendedUText: public UText {
|
||||
void *extension;
|
||||
|
||||
struct ExtendedUText {
|
||||
UText ut;
|
||||
UAlignedMemory extension;
|
||||
};
|
||||
|
||||
static const UText emptyText = UTEXT_INITIALIZER;
|
||||
|
@ -338,14 +420,18 @@ utext_setup(UText *ut, int32_t extraSpace, UErrorCode *status) {
|
|||
// We need to heap-allocate storage for the new UText
|
||||
int32_t spaceRequired = sizeof(UText);
|
||||
if (extraSpace > 0) {
|
||||
spaceRequired = sizeof(ExtendedUText) + extraSpace - sizeof(void *);
|
||||
spaceRequired = sizeof(ExtendedUText) + extraSpace - sizeof(UAlignedMemory);
|
||||
}
|
||||
ut = (UText *)uprv_malloc(spaceRequired);
|
||||
*ut = emptyText;
|
||||
ut->flags |= UTEXT_HEAP_ALLOCATED;
|
||||
if (spaceRequired>0) {
|
||||
ut->extraSize = spaceRequired;
|
||||
ut->pExtra = &((ExtendedUText *)ut)->extension;
|
||||
if (ut == NULL) {
|
||||
*status = U_MEMORY_ALLOCATION_ERROR;
|
||||
} else {
|
||||
*ut = emptyText;
|
||||
ut->flags |= UTEXT_HEAP_ALLOCATED;
|
||||
if (spaceRequired>0) {
|
||||
ut->extraSize = spaceRequired;
|
||||
ut->pExtra = &((ExtendedUText *)ut)->extension;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// We have been supplied with an already existing UText.
|
||||
|
@ -378,6 +464,9 @@ utext_setup(UText *ut, int32_t extraSpace, UErrorCode *status) {
|
|||
}
|
||||
}
|
||||
}
|
||||
if (U_SUCCESS(*status)) {
|
||||
ut->flags |= UTEXT_OPEN;
|
||||
}
|
||||
return ut;
|
||||
}
|
||||
|
||||
|
@ -429,15 +518,15 @@ utext_close(UText *ut) {
|
|||
//
|
||||
static void
|
||||
resetChunk(UTextChunk *chunk, int32_t index) {
|
||||
if (index==chunk->limit) {
|
||||
if (index==chunk->nativeLimit) {
|
||||
chunk->offset = chunk->length;
|
||||
} else if (index==chunk->start) {
|
||||
} else if (index==chunk->nativeStart) {
|
||||
chunk->offset = 0;
|
||||
} else {
|
||||
chunk->length = 0;
|
||||
chunk->start = index;
|
||||
chunk->limit = index;
|
||||
chunk->offset = 0;
|
||||
chunk->length = 0;
|
||||
chunk->nativeStart = index;
|
||||
chunk->nativeLimit = index;
|
||||
chunk->offset = 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -452,16 +541,52 @@ resetChunk(UTextChunk *chunk, int32_t index) {
|
|||
U_CDECL_BEGIN
|
||||
|
||||
static UText * U_CALLCONV
|
||||
noopTextClone(UText * /* dest */, const UText * /*src*/, UBool /*deep*/, UErrorCode * /*status*/) {
|
||||
return NULL; // not supported
|
||||
//
|
||||
// Clone. This is a generic copy-the-utext-by-value clone function that can be
|
||||
// used as-is with some utext types, and as helper by other clones.
|
||||
//
|
||||
noopTextClone(UText * dest, const UText * src, UBool deep, UErrorCode * status) {
|
||||
if (U_FAILURE(*status)) {
|
||||
return NULL;
|
||||
}
|
||||
int32_t srcExtraSize = src->extraSize;
|
||||
|
||||
//
|
||||
// Use the generic text_setup to allocate storage if required.
|
||||
//
|
||||
dest = utext_setup(dest, srcExtraSize, status);
|
||||
if (U_FAILURE(*status)) {
|
||||
return dest;
|
||||
}
|
||||
|
||||
//
|
||||
// flags (how the UText was allocated) and the pointer to the
|
||||
// extra storage must retain the values in the cloned utext that
|
||||
// were set up by utext_setup. Save them separately before
|
||||
// copying the whole struct.
|
||||
//
|
||||
void *destExtra = dest->pExtra;
|
||||
int32_t flags = dest->flags;
|
||||
|
||||
|
||||
//
|
||||
// Copy the whole UText struct by value.
|
||||
// Any "Extra" storage is copied also.
|
||||
//
|
||||
int sizeToCopy = src->sizeOfStruct;
|
||||
if (sizeToCopy > dest->sizeOfStruct) {
|
||||
sizeToCopy = dest->sizeOfStruct;
|
||||
}
|
||||
uprv_memcpy(dest, src, sizeToCopy);
|
||||
dest->pExtra = destExtra;
|
||||
dest->flags = flags;
|
||||
if (srcExtraSize > 0) {
|
||||
uprv_memcpy(dest->pExtra, src->pExtra, srcExtraSize);
|
||||
}
|
||||
|
||||
return dest;
|
||||
}
|
||||
|
||||
static int32_t U_CALLCONV
|
||||
noopTextGetProperties(UText * /*t*/) {
|
||||
return
|
||||
I32_FLAG(UTEXT_PROVIDER_LENGTH_IS_INEXPENSIVE)|
|
||||
I32_FLAG(UTEXT_PROVIDER_STABLE_CHUNKS);
|
||||
}
|
||||
|
||||
static int32_t U_CALLCONV
|
||||
noopTextLength(UText * /* t */) {
|
||||
|
@ -483,12 +608,12 @@ noopTextExtract(UText * /* t */,
|
|||
}
|
||||
|
||||
static int32_t U_CALLCONV
|
||||
noopTextMapOffsetToNative(UText * /* t */, UTextChunk * /* chunk */, int32_t /* offset */) {
|
||||
noopTextMapOffsetToNative(UText * /* t */, int32_t /* offset */) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int32_t U_CALLCONV
|
||||
noopTextMapIndexToUTF16(UText * /* t */, UTextChunk * /* chunk */, int32_t /* index */) {
|
||||
noopTextMapIndexToUTF16(UText * /* t */, int32_t /* index */) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -498,7 +623,6 @@ U_CDECL_END
|
|||
static const UText noopText={
|
||||
UTEXT_INITIALZIER_HEAD,
|
||||
noopTextClone,
|
||||
noopTextGetProperties,
|
||||
noopTextLength,
|
||||
noopTextAccess,
|
||||
noopTextExtract,
|
||||
|
@ -550,14 +674,6 @@ struct UTF8Extra {
|
|||
|
||||
U_CDECL_BEGIN
|
||||
|
||||
static int32_t U_CALLCONV
|
||||
utf8TextGetProperties(UText * /*t*/) {
|
||||
return
|
||||
I32_FLAG(UTEXT_PROVIDER_NON_UTF16_INDEXES)|
|
||||
I32_FLAG(UTEXT_PROVIDER_LENGTH_IS_INEXPENSIVE);
|
||||
// not UTEXT_PROVIDER_STABLE_CHUNKS because chunk-related data is kept
|
||||
// in UTF8Text, so only one at a time can be active
|
||||
}
|
||||
|
||||
static int32_t U_CALLCONV
|
||||
utf8TextLength(UText *ut) {
|
||||
|
@ -590,7 +706,7 @@ utf8TextAccess(UText *ut, int32_t index, UBool forward, UTextChunk *chunk) {
|
|||
return FALSE;
|
||||
}
|
||||
|
||||
chunk->start=index;
|
||||
chunk->nativeStart=index;
|
||||
c=s8[index];
|
||||
if(c<=0x7f) {
|
||||
// get a run of ASCII characters.
|
||||
|
@ -621,11 +737,11 @@ utf8TextAccess(UText *ut, int32_t index, UBool forward, UTextChunk *chunk) {
|
|||
map[i]=index;
|
||||
chunk->nonUTF16Indexes=TRUE;
|
||||
}
|
||||
chunk->contents = u16buf;
|
||||
chunk->length = i;
|
||||
chunk->limit = index;
|
||||
ut->q = map;
|
||||
chunk->offset = 0; // chunkOffset corresponding to index
|
||||
chunk->contents = u16buf;
|
||||
chunk->length = i;
|
||||
chunk->nativeLimit = index;
|
||||
ut->q = map;
|
||||
chunk->offset = 0; // chunkOffset corresponding to index
|
||||
return TRUE;
|
||||
} else {
|
||||
// Reverse Access. The chunk buffer must be filled so as to contain the
|
||||
|
@ -635,7 +751,7 @@ utf8TextAccess(UText *ut, int32_t index, UBool forward, UTextChunk *chunk) {
|
|||
return FALSE;
|
||||
}
|
||||
|
||||
chunk->limit=index;
|
||||
chunk->nativeLimit=index;
|
||||
c=s8[index-1];
|
||||
if(c<=0x7f) {
|
||||
// get a chunk of ASCII characters. Don't build the index map
|
||||
|
@ -684,10 +800,10 @@ utf8TextAccess(UText *ut, int32_t index, UBool forward, UTextChunk *chunk) {
|
|||
chunk->nonUTF16Indexes=TRUE;
|
||||
}
|
||||
// Common reverse iteration, for both UTF16 and non-UTIF16 indexes.
|
||||
chunk->contents = u16buf+i;
|
||||
chunk->length = (UTF8_TEXT_CHUNK_SIZE)-i;
|
||||
chunk->start = index;
|
||||
chunk->offset = chunk->length; // chunkOffset corresponding to index
|
||||
chunk->contents = u16buf+i;
|
||||
chunk->length = (UTF8_TEXT_CHUNK_SIZE)-i;
|
||||
chunk->nativeStart = index;
|
||||
chunk->offset = chunk->length; // chunkOffset corresponding to index
|
||||
return TRUE;
|
||||
}
|
||||
}
|
||||
|
@ -717,18 +833,20 @@ utf8TextExtract(UText *ut,
|
|||
|
||||
// Assume nonUTF16Indexes and 0<=offset<=chunk->length
|
||||
static int32_t U_CALLCONV
|
||||
utf8TextMapOffsetToNative(UText *ut, UTextChunk * /* chunk */, int32_t offset) {
|
||||
utf8TextMapOffsetToNative(UText *ut, int32_t offset) {
|
||||
// UText.q points to the index mapping array that is allocated in the extra storage area.
|
||||
U_ASSERT(offset>=0 && offset<=ut->chunk.length);
|
||||
int32_t *map=(int32_t *)(ut->q);
|
||||
return map[offset];
|
||||
}
|
||||
|
||||
// Assume nonUTF16Indexes and chunk->start<=index<=chunk->limit
|
||||
static int32_t U_CALLCONV
|
||||
utf8TextMapIndexToUTF16(UText *ut, UTextChunk * /*chunk */, int32_t index) {
|
||||
utf8TextMapIndexToUTF16(UText *ut, int32_t index) {
|
||||
int32_t *map=(int32_t *)(ut->q);
|
||||
int32_t offset=0;
|
||||
|
||||
U_ASSERT(index>=ut->chunk.nativeStart && index<=ut->chunk.nativeLimit);
|
||||
while(index>map[offset]) {
|
||||
++offset;
|
||||
}
|
||||
|
@ -752,9 +870,9 @@ utext_openUTF8(UText *ut, const uint8_t *s, int32_t length, UErrorCode *status)
|
|||
if (U_FAILURE(*status)) {
|
||||
return ut;
|
||||
}
|
||||
ut->providerProperties = I32_FLAG(UTEXT_PROVIDER_NON_UTF16_INDEXES);
|
||||
|
||||
ut->clone = noopTextClone;
|
||||
ut->properties = utf8TextGetProperties;
|
||||
ut->length = utf8TextLength;
|
||||
ut->access = utf8TextAccess;
|
||||
ut->extract = utf8TextExtract;
|
||||
|
@ -777,190 +895,6 @@ U_CDECL_END
|
|||
|
||||
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
//
|
||||
// UText implementation for SBCS strings (read-only)
|
||||
//
|
||||
// Use of UText data members:
|
||||
// context pointer to SBCS string
|
||||
//
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
|
||||
enum { SBCS_TEXT_CHUNK_SIZE=10 };
|
||||
|
||||
struct SBCSText : public UText {
|
||||
/* pointer to SBCS-to-BMP mapping table */
|
||||
const UChar *toU;
|
||||
/* length of UTF-8 string (in bytes) */
|
||||
int32_t length;
|
||||
/* chunk UChars */
|
||||
UChar s[SBCS_TEXT_CHUNK_SIZE];
|
||||
};
|
||||
|
||||
|
||||
U_CDECL_BEGIN
|
||||
|
||||
static int32_t U_CALLCONV
|
||||
sbcsTextGetProperties(UText * /*t*/) {
|
||||
return
|
||||
I32_FLAG(UTEXT_PROVIDER_LENGTH_IS_INEXPENSIVE);
|
||||
// not UTEXT_PROVIDER_STABLE_CHUNKS because chunk-related data is kept
|
||||
// in SBCSText, so only one at a time can be active
|
||||
}
|
||||
|
||||
static int32_t U_CALLCONV
|
||||
sbcsTextLength(UText *t) {
|
||||
return ((SBCSText *)t)->length;
|
||||
}
|
||||
|
||||
static UBool U_CALLCONV
|
||||
sbcsTextAccess(UText *ut, int32_t index, UBool forward, UTextChunk *chunk) {
|
||||
SBCSText *ts=(SBCSText *)ut;
|
||||
const uint8_t *s8=(const uint8_t *)ts->context;
|
||||
int32_t i, count, length=ts->length;
|
||||
|
||||
chunk->nonUTF16Indexes=FALSE;
|
||||
|
||||
if(forward) {
|
||||
if(length<=index) {
|
||||
resetChunk(chunk, length);
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
count=length-index;
|
||||
if(count>SBCS_TEXT_CHUNK_SIZE) {
|
||||
count=SBCS_TEXT_CHUNK_SIZE;
|
||||
}
|
||||
chunk->start=index;
|
||||
for(i=0; i<count; ++index, ++i) {
|
||||
ts->s[i]=ts->toU[s8[index]];
|
||||
}
|
||||
chunk->contents=ts->s;
|
||||
chunk->length=i;
|
||||
chunk->limit=index;
|
||||
chunk->offset = 0; // chunkOffset corresponding to index
|
||||
return TRUE;
|
||||
} else {
|
||||
if(index<=0) {
|
||||
resetChunk(chunk, 0);
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
if(index<=SBCS_TEXT_CHUNK_SIZE) {
|
||||
count=index;
|
||||
} else {
|
||||
count=SBCS_TEXT_CHUNK_SIZE;
|
||||
}
|
||||
chunk->limit=index;
|
||||
for(i=count; i>0;) {
|
||||
ts->s[--i]=ts->toU[s8[--index]];
|
||||
}
|
||||
chunk->contents=ts->s;
|
||||
chunk->length=count;
|
||||
chunk->start=index;
|
||||
chunk->offset=count; // chunkOffset corresponding to index
|
||||
return TRUE;
|
||||
}
|
||||
}
|
||||
|
||||
static int32_t U_CALLCONV
|
||||
sbcsTextExtract(UText *t,
|
||||
int32_t start, int32_t limit,
|
||||
UChar *dest, int32_t destCapacity,
|
||||
UErrorCode *pErrorCode) {
|
||||
SBCSText *ts=(SBCSText *)t;
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
return 0;
|
||||
}
|
||||
if(destCapacity<0 || (dest==NULL && destCapacity>0)) {
|
||||
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
|
||||
}
|
||||
if(start<0 || start>limit || ts->length<limit) {
|
||||
*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
|
||||
return 0;
|
||||
}
|
||||
const uint8_t *s8=(const uint8_t *)ts->context+start;
|
||||
UChar *d=dest;
|
||||
const UChar *destLimit;
|
||||
int32_t destLength=limit-start;
|
||||
if(destLength>destCapacity) {
|
||||
destLength=destCapacity;
|
||||
}
|
||||
destLimit=dest+destLength;
|
||||
while(d<destLimit) {
|
||||
*d++=ts->toU[*s8++];
|
||||
}
|
||||
return u_terminateUChars(dest, destCapacity, destLength, pErrorCode);
|
||||
}
|
||||
|
||||
static const UText sbcsText={
|
||||
UTEXT_INITIALZIER_HEAD,
|
||||
noopTextClone,
|
||||
sbcsTextGetProperties,
|
||||
sbcsTextLength,
|
||||
sbcsTextAccess,
|
||||
sbcsTextExtract,
|
||||
NULL, // replace
|
||||
NULL, // copy
|
||||
NULL, // mapOffsetToNative
|
||||
NULL, // mapIndexToUTF16
|
||||
NULL // close
|
||||
};
|
||||
|
||||
U_DRAFT UText * U_EXPORT2
|
||||
utext_openSBCS(UText * /*ut */,
|
||||
const UChar /* toU*/[256] ,
|
||||
const char *s, int32_t length,
|
||||
UErrorCode *pErrorCode) {
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
return NULL;
|
||||
}
|
||||
if(s==NULL || length<-1) {
|
||||
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return NULL;
|
||||
}
|
||||
SBCSText *ts=(SBCSText *)uprv_malloc(sizeof(SBCSText));
|
||||
if(ts==NULL) {
|
||||
*pErrorCode=U_MEMORY_ALLOCATION_ERROR;
|
||||
return NULL;
|
||||
}
|
||||
*((UText *)ts)=sbcsText;
|
||||
ts->context=s;
|
||||
if(length>=0) {
|
||||
ts->length=length;
|
||||
} else {
|
||||
ts->length=(int32_t)uprv_strlen(s);
|
||||
}
|
||||
return ts;
|
||||
}
|
||||
|
||||
U_DRAFT void U_EXPORT2
|
||||
utext_closeSBCS(UText *t) {
|
||||
if(t!=NULL) {
|
||||
uprv_free((SBCSText *)t);
|
||||
}
|
||||
}
|
||||
|
||||
U_DRAFT void U_EXPORT2
|
||||
utext_resetSBCS(UText *t, const char *s, int32_t length, UErrorCode *pErrorCode) {
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
return;
|
||||
}
|
||||
if(s==NULL || length<-1) {
|
||||
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return;
|
||||
}
|
||||
SBCSText *ts=(SBCSText *)t;
|
||||
ts->context=s;
|
||||
if(length>=0) {
|
||||
ts->length=length;
|
||||
} else {
|
||||
ts->length=(int32_t)uprv_strlen(s);
|
||||
}
|
||||
}
|
||||
|
||||
U_CDECL_END
|
||||
|
||||
/* UText implementation wrapper for Replaceable (read/write) ---------------- */
|
||||
|
||||
|
@ -1283,7 +1217,6 @@ unistrTextClone(UText * /* dest */, const UText * /*src*/, UBool /*deep*/, UErro
|
|||
static int32_t U_CALLCONV
|
||||
unistrTextGetProperties(UText * /*t*/) {
|
||||
return
|
||||
I32_FLAG(UTEXT_PROVIDER_LENGTH_IS_INEXPENSIVE)|
|
||||
I32_FLAG(UTEXT_PROVIDER_STABLE_CHUNKS)|
|
||||
I32_FLAG(UTEXT_PROVIDER_WRITABLE);
|
||||
}
|
||||
|
@ -1299,13 +1232,13 @@ unistrTextAccess(UText *ut, int32_t index, UBool forward, UTextChunk *chunk) {
|
|||
const UnicodeString *us = (const UnicodeString *)ut->context;
|
||||
int32_t length = us->length();
|
||||
|
||||
if (chunk->limit != length) {
|
||||
if (chunk->nativeLimit != length) {
|
||||
// This chunk is not yet set up. Do it now.
|
||||
chunk->contents=us->getBuffer();
|
||||
chunk->length=length;
|
||||
chunk->start=0;
|
||||
chunk->limit=length;
|
||||
chunk->nonUTF16Indexes=FALSE;
|
||||
chunk->contents = us->getBuffer();
|
||||
chunk->length = length;
|
||||
chunk->nativeStart = 0;
|
||||
chunk->nativeLimit = length;
|
||||
chunk->nonUTF16Indexes = FALSE;
|
||||
}
|
||||
|
||||
// pin the requested index to the bounds of the string,
|
||||
|
@ -1423,7 +1356,6 @@ utext_openUnicodeString(UText *ut, UnicodeString *s, UErrorCode *status) {
|
|||
ut = utext_setup(ut, 0, status);
|
||||
if (U_SUCCESS(*status)) {
|
||||
ut->clone = unistrTextClone;
|
||||
ut->properties = unistrTextGetProperties;
|
||||
ut->length = unistrTextLength;
|
||||
ut->access = unistrTextAccess;
|
||||
ut->extract = unistrTextExtract;
|
||||
|
@ -1431,6 +1363,205 @@ utext_openUnicodeString(UText *ut, UnicodeString *s, UErrorCode *status) {
|
|||
ut->copy = unistrTextCopy;
|
||||
|
||||
ut->context = s;
|
||||
ut->providerProperties = I32_FLAG(UTEXT_PROVIDER_STABLE_CHUNKS)|
|
||||
I32_FLAG(UTEXT_PROVIDER_WRITABLE);
|
||||
}
|
||||
return ut;
|
||||
}
|
||||
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
//
|
||||
// UText implementation for const UChar * strings
|
||||
//
|
||||
// Use of UText data members:
|
||||
// context pointer to UnicodeString
|
||||
// a length. -1 if not yet known.
|
||||
//
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
U_CDECL_BEGIN
|
||||
|
||||
|
||||
static UText * U_CALLCONV
|
||||
ucstrTextClone(UText * /* dest */, const UText * /*src*/, UBool /*deep*/, UErrorCode * /*status*/) {
|
||||
// TODO: fix this.
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
static int32_t U_CALLCONV
|
||||
ucstrTextLength(UText *ut) {
|
||||
if (ut->a < 0) {
|
||||
// null terminated, we don't yet know the length. Scan for it.
|
||||
// Access is not convenient for doing this
|
||||
// because the current interation postion can't be changed.
|
||||
const UChar *str = (const UChar *)ut->context;
|
||||
for (;;) {
|
||||
if (str[ut->chunk.nativeLimit] == 0) {
|
||||
break;
|
||||
}
|
||||
ut->chunk.nativeLimit++;
|
||||
}
|
||||
ut->a = ut->chunk.nativeLimit;
|
||||
ut->chunk.length = ut->chunk.nativeLimit;
|
||||
ut->providerProperties &= ~I32_FLAG(UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE);
|
||||
}
|
||||
return ut->a;
|
||||
}
|
||||
|
||||
|
||||
static UBool U_CALLCONV
|
||||
ucstrTextAccess(UText *ut, int32_t index, UBool forward, UTextChunk *chunk) {
|
||||
const UChar *str = (const UChar *)ut->context;
|
||||
|
||||
// pin the requested index to the bounds of the string,
|
||||
// and set current iteration position.
|
||||
if (index<0) {
|
||||
index = 0;
|
||||
} else if (index < ut->chunk.nativeLimit) {
|
||||
// The request data is within the chunk as it is known so far.
|
||||
// There is nothing more that needs to be done within this access function.
|
||||
} else if (ut->a >= 0) {
|
||||
// We know the length of this string, and the user is requesting something
|
||||
// at or beyond the length. Trim the requested index to the length.
|
||||
index = ut->a;
|
||||
} else {
|
||||
// Null terminated string, length not yet known.
|
||||
// Scan down another 32 UChars or to the requested index, whichever is further
|
||||
int scanLimit = ut->chunk.nativeLimit + 32;
|
||||
if (scanLimit <= index) {
|
||||
scanLimit = index+1; // TODO: beware int overflow
|
||||
}
|
||||
for (; ut->chunk.nativeLimit<scanLimit; ut->chunk.nativeLimit++) {
|
||||
if (str[ut->chunk.nativeLimit] == 0) {
|
||||
// We found the end of the string. Remember it, trim the index to it,
|
||||
// and bail out of here.
|
||||
ut->a = ut->chunk.nativeLimit;
|
||||
ut->chunk.length = ut->chunk.nativeLimit;
|
||||
if (index > ut->chunk.nativeLimit) {
|
||||
index = ut->chunk.nativeLimit;
|
||||
}
|
||||
ut->providerProperties &= ~I32_FLAG(UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE);
|
||||
goto breakout;
|
||||
}
|
||||
}
|
||||
// We scanned through the next batch of UChars without finding the end.
|
||||
// The endpoint of a chunk must not be left in the middle of a surrogate pair.
|
||||
// If the current end is on a lead surrogate, back the end up by one.
|
||||
// It doesn't matter if the end char happens to be an unpaired surrogate,
|
||||
// and it's simpler not to worry about it.
|
||||
if (U16_IS_LEAD(str[ut->chunk.nativeLimit-1])) {
|
||||
--ut->chunk.nativeLimit;
|
||||
}
|
||||
}
|
||||
breakout:
|
||||
chunk->offset = index;
|
||||
|
||||
// Check whether request is at the start or end
|
||||
UBool retVal = (forward && index<ut->chunk.nativeLimit) || (!forward && index>0);
|
||||
return retVal;
|
||||
}
|
||||
|
||||
|
||||
|
||||
static int32_t U_CALLCONV
|
||||
ucstrTextExtract(UText *ut,
|
||||
int32_t start, int32_t limit,
|
||||
UChar *dest, int32_t destCapacity,
|
||||
UErrorCode *pErrorCode) {
|
||||
|
||||
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
return 0;
|
||||
}
|
||||
if(destCapacity<0 || (dest==NULL && destCapacity>0)) {
|
||||
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return 0;
|
||||
}
|
||||
|
||||
const UChar *s=(const UChar *)ut->context;
|
||||
int32_t strLength=ut->a;
|
||||
int32_t si, di;
|
||||
|
||||
// If text is null terminated and we haven't yet scanned down as far as the starting
|
||||
// position of the extract, do it now.
|
||||
if (strLength<0 && limit>=ut->chunk.nativeLimit) {
|
||||
ucstrTextAccess(ut, start, TRUE, &ut->chunk);
|
||||
}
|
||||
|
||||
// Raise an error if starting position is outside of the string.
|
||||
if(start<0 || start>limit) {
|
||||
*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (strLength >= 0 && limit > strLength) {
|
||||
// String length is known. Trim requested limit to be no more than the length
|
||||
limit = strLength;
|
||||
}
|
||||
|
||||
di = 0;
|
||||
for (si=start; si<limit; si++) {
|
||||
if (strLength<0 && s[si]==0) {
|
||||
// Just hit the end of a null-terminated string.
|
||||
ut->a = si; // set string length for this UText
|
||||
ut->chunk.nativeLimit = si;
|
||||
ut->chunk.length = si;
|
||||
//
|
||||
break;
|
||||
}
|
||||
if (di<destCapacity) {
|
||||
// only store if there is space.
|
||||
dest[di] = s[si];
|
||||
} else {
|
||||
if (strLength>=0) {
|
||||
// We have filled the destination buffer, and the string is known.
|
||||
// Cut the loop short. There is no need to scan string termination.
|
||||
di = strLength;
|
||||
break;
|
||||
}
|
||||
}
|
||||
di++;
|
||||
}
|
||||
|
||||
u_terminateUChars(dest, destCapacity, di, pErrorCode);
|
||||
return di;
|
||||
}
|
||||
|
||||
|
||||
|
||||
U_CDECL_END
|
||||
|
||||
|
||||
U_DRAFT UText * U_EXPORT2
|
||||
utext_openUChars(UText *ut, const UChar *s, int32_t length, UErrorCode *status) {
|
||||
if (U_FAILURE(*status)) {
|
||||
return NULL;
|
||||
}
|
||||
if (length < -1) {
|
||||
*status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return NULL;
|
||||
}
|
||||
ut = utext_setup(ut, 0, status);
|
||||
if (U_SUCCESS(*status)) {
|
||||
ut->clone = noopTextClone;
|
||||
ut->length = ucstrTextLength;
|
||||
ut->access = ucstrTextAccess;
|
||||
ut->extract = ucstrTextExtract;
|
||||
ut->replace = NULL;
|
||||
ut->copy = NULL;
|
||||
|
||||
ut->context = s;
|
||||
ut->providerProperties = I32_FLAG(UTEXT_PROVIDER_STABLE_CHUNKS);
|
||||
if (length==-1) {
|
||||
ut->providerProperties |= I32_FLAG(UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE);
|
||||
}
|
||||
ut->a = length;
|
||||
ut->chunk.contents = s;
|
||||
ut->chunk.nativeStart = 0;
|
||||
ut->chunk.nativeLimit = length>=0? length : 0;
|
||||
ut->chunk.nonUTF16Indexes = FALSE;
|
||||
}
|
||||
return ut;
|
||||
}
|
||||
|
|
|
@ -15,8 +15,10 @@
|
|||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/utext.h"
|
||||
#include "unicode/ustring.h"
|
||||
#include "cintltst.h"
|
||||
|
||||
#include "memory.h"
|
||||
#include "string.h"
|
||||
|
||||
|
||||
static void TestAPI(void);
|
||||
|
@ -45,7 +47,7 @@ addUTextTest(TestNode** root)
|
|||
/*
|
||||
* TestAPI verify that the UText API is accessible from C programs.
|
||||
* This is not intended to be a complete test of the API functionality. That is
|
||||
* in the C++ intltest program.
|
||||
* in the C++ intltest program.
|
||||
* This test is intended to check that everything can be accessed and built in
|
||||
* a pure C enviornment.
|
||||
*/
|
||||
|
@ -55,19 +57,193 @@ static void TestAPI(void) {
|
|||
UErrorCode status = U_ZERO_ERROR;
|
||||
UBool gFailed = FALSE;
|
||||
|
||||
UText utLoc = UTEXT_INITIALIZER;
|
||||
const char * cString = "Hello, World";
|
||||
UChar uString[] = {0x41, 0x42, 0x43, 0};
|
||||
uint8_t *utf8String;
|
||||
UText *uta;
|
||||
UText *utb;
|
||||
// Open
|
||||
{
|
||||
UText utLoc = UTEXT_INITIALIZER;
|
||||
const char * cString = "Hello, World";
|
||||
UChar uString[] = {0x41, 0x42, 0x43, 0};
|
||||
uint8_t *utf8String;
|
||||
UText *uta;
|
||||
UText *utb;
|
||||
UChar c;
|
||||
|
||||
utf8String = (uint8_t *)cString;
|
||||
uta = utext_openUTF8(&utLoc, utf8String, -1, &status);
|
||||
TEST_SUCCESS(status);
|
||||
TEST_ASSERT(uta == &utLoc);
|
||||
status = U_ZERO_ERROR;
|
||||
uta = utext_openUChars(NULL, uString, -1, &status);
|
||||
TEST_SUCCESS(status);
|
||||
c = utext_next32(uta);
|
||||
TEST_ASSERT(c == 0x41);
|
||||
utb = utext_close(uta);
|
||||
TEST_ASSERT(utb == NULL);
|
||||
|
||||
uta = utext_close(&utLoc);
|
||||
TEST_ASSERT(uta == &utLoc);
|
||||
utf8String = (uint8_t *)cString;
|
||||
uta = utext_openUTF8(&utLoc, utf8String, -1, &status);
|
||||
TEST_SUCCESS(status);
|
||||
TEST_ASSERT(uta == &utLoc);
|
||||
|
||||
uta = utext_close(&utLoc);
|
||||
TEST_ASSERT(uta == &utLoc);
|
||||
}
|
||||
|
||||
// utext_clone()
|
||||
{
|
||||
UChar uString[] = {0x41, 0x42, 0x43, 0};
|
||||
int len;
|
||||
UText *uta;
|
||||
UText *utb;
|
||||
|
||||
status = U_ZERO_ERROR;
|
||||
uta = utext_openUChars(NULL, uString, -1, &status);
|
||||
TEST_SUCCESS(status);
|
||||
utb = utext_clone(NULL, uta, FALSE, &status);
|
||||
TEST_SUCCESS(status);
|
||||
TEST_ASSERT(utb != NULL);
|
||||
TEST_ASSERT(utb != uta);
|
||||
len = utext_length(uta);
|
||||
TEST_ASSERT(len == u_strlen(uString));
|
||||
utext_close(uta);
|
||||
utext_close(utb);
|
||||
}
|
||||
|
||||
// basic access functions
|
||||
{
|
||||
UChar uString[] = {0x41, 0x42, 0x43, 0};
|
||||
UText *uta;
|
||||
UChar32 c;
|
||||
int32_t len;
|
||||
UBool b;
|
||||
int32_t i;
|
||||
|
||||
status = U_ZERO_ERROR;
|
||||
uta = utext_openUChars(NULL, uString, -1, &status);
|
||||
TEST_ASSERT(uta!=NULL);
|
||||
TEST_SUCCESS(status);
|
||||
b = utext_isLengthExpensive(uta);
|
||||
TEST_ASSERT(b==TRUE);
|
||||
len = utext_length(uta);
|
||||
TEST_ASSERT(len == u_strlen(uString));
|
||||
b = utext_isLengthExpensive(uta);
|
||||
TEST_ASSERT(b==FALSE);
|
||||
|
||||
c = utext_char32At(uta, 0);
|
||||
TEST_ASSERT(c==uString[0]);
|
||||
|
||||
c = utext_current(uta);
|
||||
TEST_ASSERT(c==uString[0]);
|
||||
|
||||
c = utext_next32(uta);
|
||||
TEST_ASSERT(c==uString[0]);
|
||||
c = utext_current(uta);
|
||||
TEST_ASSERT(c==uString[1]);
|
||||
|
||||
c = utext_previous32(uta);
|
||||
TEST_ASSERT(c==uString[0]);
|
||||
c = utext_current(uta);
|
||||
TEST_ASSERT(c==uString[0]);
|
||||
|
||||
c = utext_next32From(uta, 1);
|
||||
TEST_ASSERT(c==uString[1]);
|
||||
c = utext_next32From(uta, u_strlen(uString));
|
||||
TEST_ASSERT(c==U_SENTINEL);
|
||||
|
||||
c = utext_previous32From(uta, 2);
|
||||
TEST_ASSERT(c==uString[1]);
|
||||
i = utext_getIndex(uta);
|
||||
TEST_ASSERT(i == 1);
|
||||
|
||||
utext_setIndex(uta, 0);
|
||||
b = utext_moveIndex(uta, 1);
|
||||
TEST_ASSERT(b==TRUE);
|
||||
i = utext_getIndex(uta);
|
||||
TEST_ASSERT(i==1);
|
||||
|
||||
b = utext_moveIndex(uta, u_strlen(uString)-1);
|
||||
TEST_ASSERT(b==TRUE);
|
||||
i = utext_getIndex(uta);
|
||||
TEST_ASSERT(i==u_strlen(uString));
|
||||
|
||||
b = utext_moveIndex(uta, 1);
|
||||
TEST_ASSERT(b==FALSE);
|
||||
i = utext_getIndex(uta);
|
||||
TEST_ASSERT(i==u_strlen(uString));
|
||||
|
||||
utext_setIndex(uta, 0);
|
||||
c = UTEXT_NEXT32(uta);
|
||||
TEST_ASSERT(c==uString[0]);
|
||||
c = utext_current(uta);
|
||||
TEST_ASSERT(c==uString[1]);
|
||||
|
||||
c = UTEXT_PREVIOUS32(uta);
|
||||
TEST_ASSERT(c==uString[0]);
|
||||
c = UTEXT_PREVIOUS32(uta);
|
||||
TEST_ASSERT(c==U_SENTINEL);
|
||||
|
||||
|
||||
utext_close(uta);
|
||||
}
|
||||
|
||||
{
|
||||
//
|
||||
// extract
|
||||
//
|
||||
UText *uta;
|
||||
UChar uString[] = {0x41, 0x42, 0x43, 0};
|
||||
UChar buf[100];
|
||||
int32_t i;
|
||||
|
||||
status = U_ZERO_ERROR;
|
||||
uta = utext_openUChars(NULL, uString, -1, &status);
|
||||
TEST_SUCCESS(status);
|
||||
|
||||
status = U_ZERO_ERROR;
|
||||
i = utext_extract(uta, 0, 100, NULL, 0, &status);
|
||||
TEST_ASSERT(status==U_BUFFER_OVERFLOW_ERROR);
|
||||
TEST_ASSERT(i == u_strlen(uString));
|
||||
|
||||
status = U_ZERO_ERROR;
|
||||
memset(buf, 0, sizeof(buf));
|
||||
i = utext_extract(uta, 0, 100, buf, 100, &status);
|
||||
TEST_SUCCESS(status);
|
||||
TEST_ASSERT(i == u_strlen(uString));
|
||||
i = u_strcmp(uString, buf);
|
||||
TEST_ASSERT(i == 0);
|
||||
utext_close(uta);
|
||||
}
|
||||
|
||||
{
|
||||
//
|
||||
// Copy, Replace, isWritable
|
||||
// Can't create an editable UText from plain C, so all we
|
||||
// can easily do is check that errors returned.
|
||||
UText *uta;
|
||||
UChar uString[] = {0x41, 0x42, 0x43, 0};
|
||||
UBool b;
|
||||
|
||||
status = U_ZERO_ERROR;
|
||||
uta = utext_openUChars(NULL, uString, -1, &status);
|
||||
TEST_SUCCESS(status);
|
||||
|
||||
b = utext_isWriteble(uta);
|
||||
TEST_ASSERT(b == FALSE);
|
||||
|
||||
b = utext_hasMetaData(uta);
|
||||
TEST_ASSERT(b == FALSE);
|
||||
|
||||
utext_replace(uta,
|
||||
0, 1, /* start, limit */
|
||||
uString, -1, /* replacement, replacement length */
|
||||
&status);
|
||||
TEST_ASSERT(status == U_NO_WRITE_PERMISSION);
|
||||
|
||||
|
||||
utext_copy(uta,
|
||||
0, 1, /* start, limit */
|
||||
2, /* destination index */
|
||||
FALSE, /* move flag */
|
||||
&status);
|
||||
TEST_ASSERT(status == U_NO_WRITE_PERMISSION);
|
||||
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
}
|
|
@ -285,31 +285,6 @@ void UTextTest::TestAccess(const UnicodeString &us, UText *ut, int cpCount, m *c
|
|||
}
|
||||
}
|
||||
|
||||
cpIndex = 0;
|
||||
for (i=0; i<cpCount; i++) {
|
||||
cpIndex = (cpIndex + 9973) % cpCount;
|
||||
index = cpMap[cpIndex].nativeIdx;
|
||||
expectedC = cpMap[cpIndex].cp;
|
||||
foundC = UTEXT_NEXT32FROM(ut, index);
|
||||
TEST_ASSERT(expectedC == foundC);
|
||||
TEST_ASSERT(expectedIndex == foundIndex);
|
||||
if (gFailed) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
cpIndex = 0;
|
||||
for (i=0; i<cpCount; i++) {
|
||||
cpIndex = (cpIndex + 9973) % cpCount;
|
||||
index = cpMap[cpIndex+1].nativeIdx;
|
||||
expectedC = cpMap[cpIndex].cp;
|
||||
foundC = UTEXT_PREVIOUS32FROM(ut, index);
|
||||
TEST_ASSERT(expectedC == foundC);
|
||||
TEST_ASSERT(expectedIndex == foundIndex);
|
||||
if (gFailed) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
//
|
||||
// moveIndex(int32_t delta);
|
||||
|
|
Loading…
Add table
Reference in a new issue