diff --git a/icu4c/source/common/unicode/ucnv.h b/icu4c/source/common/unicode/ucnv.h
index 69094ca380f..e819936c481 100644
--- a/icu4c/source/common/unicode/ucnv.h
+++ b/icu4c/source/common/unicode/ucnv.h
@@ -13,6 +13,7 @@
* 04/04/99 helena Fixed internal header inclusion.
* 05/11/00 helena Added setFallback and usesFallback APIs.
* 06/29/2000 helena Major rewrite of the callback APIs.
+ * 12/07/2000 srl Update of documentation
*/
/**
@@ -37,13 +38,18 @@ typedef struct UConverter UConverter;
U_CDECL_BEGIN
-/*maximum length of the converter names */
+/* maximum length of the converter names */
#define UCNV_MAX_CONVERTER_NAME_LENGTH 60
#define UCNV_MAX_FULL_FILE_NAME_LENGTH (600+UCNV_MAX_CONVERTER_NAME_LENGTH)
#define UCNV_SI 0x0F /*Shift in for EBDCDIC_STATEFUL and iso2022 states */
#define UCNV_SO 0x0E /*Shift out for EBDCDIC_STATEFUL and iso2022 states */
+/**
+ * Enum for specifying basic types of converters
+ * @see ucnv_getType
+ */
+
typedef enum {
UCNV_UNSUPPORTED_CONVERTER = -1,
UCNV_SBCS = 0,
@@ -78,11 +84,29 @@ typedef enum {
} UConverterType;
+/**
+ * Enum for specifying which platform a converter ID refers to
+ * @see ucnv_getPlatform
+ * @see ucnv_openCCSID
+ */
+
typedef enum {
UCNV_UNKNOWN = -1,
UCNV_IBM = 0
} UConverterPlatform;
+/**
+ * Function pointer for error callback in the codepage to unicode direction.
+ * Called when an error has occured in conversion to unicode, or on open/close of the callback (see reason).
+ * @param context Pointer to the callback's private data
+ * @param args Information about the conversion in progress
+ * @param codePoints Points to 'length' bytes of the concerned codepage sequence
+ * @param length Size (in bytes) of the concerned codepage sequence
+ * @param reason Defines the reason the callback was invoked
+ * @see ucnv_setToUCallBack
+ * @see UConverterToUnicodeArgs
+ */
+
typedef void (*UConverterToUCallback) (
void* context,
UConverterToUnicodeArgs *args,
@@ -91,12 +115,24 @@ typedef void (*UConverterToUCallback) (
UConverterCallbackReason reason,
UErrorCode *);
+/**
+ * Function pointer for error callback in the unicode to codepage direction.
+ * Called when an error has occured in conversion from unicode, or on open/close of the callback (see reason).
+ * @param context Pointer to the callback's private data
+ * @param args Information about the conversion in progress
+ * @param codeUnits Points to 'length' UChars of the concerned Unicode sequence
+ * @param length Size (in bytes) of the concerned codepage sequence
+ * @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint.
+ * @param reason Defines the reason the callback was invoked
+ * @see ucnv_setFromUCallBack
+ */
+
typedef void (*UConverterFromUCallback) (
void* context,
UConverterFromUnicodeArgs *args,
const UChar* codeUnits,
int32_t length,
- UChar32 codePoint, /* HSYS: why can't just use the macros on the code units? */
+ UChar32 codePoint,
UConverterCallbackReason reason,
UErrorCode *);
@@ -108,7 +144,10 @@ U_CDECL_END
*/
#define UCNV_OPTION_SEP_CHAR ','
-/** String version of UCNV_OPTION_SEP_CHAR. */
+/**
+ * String version of UCNV_OPTION_SEP_CHAR.
+ * @see ucnv_open
+ */
#define UCNV_OPTION_SEP_STRING ","
/**
@@ -117,7 +156,10 @@ U_CDECL_END
*/
#define UCNV_VALUE_SEP_CHAR '='
-/** String version of UCNV_VALUE_SEP_CHAR. */
+/**
+ * String version of UCNV_VALUE_SEP_CHAR.
+ * @see ucnv_open
+ */
#define UCNV_VALUE_SEP_STRING "="
/**
@@ -137,6 +179,7 @@ U_CDECL_END
* @return 0 if the names match, or a negative value if the name1
* lexically precedes name2, or a positive value if the name1
* lexically follows name2.
+ * @draft
*/
U_CAPI int U_EXPORT2
ucnv_compareNames(const char *name1, const char *name2);
@@ -170,52 +213,59 @@ ucnv_compareNames(const char *name1, const char *name2);
* @stable
*/
-U_CAPI
-UConverter* U_EXPORT2 ucnv_open (const char *converterName, UErrorCode * err);
+U_CAPI UConverter* U_EXPORT2
+ucnv_open (const char *converterName, UErrorCode * err);
/**
- * Creates a Unicode converter with the names specified as unicode string. The name should be limited to
- * the ASCII-7 alphanumerics range.
+ * Creates a Unicode converter with the names specified as unicode string.
+ * The name should be limited to the ASCII-7 alphanumerics range.
* The actual name will be resolved with the alias file
* using a case-insensitive string comparison that ignores
* the delimiters '-', '_', and ' ' (dash, underscore, and space).
* E.g., the names "UTF8", "utf-8", and "Utf 8" are all equivalent.
- * If NULL is passed for the converter name, it will create one with the
- * getDefaultName return value.
- * @param converterName : name of the uconv table in a zero terminated Unicode string
- * @param err outgoing error status U_MEMORY_ALLOCATION_ERROR, TABLE_NOT_FOUND
- * @return the created Unicode converter object, or NULL if an error occured
+ * If NULL is passed for the converter name, it will create
+ * one with the ucnv_getDefaultName() return value.
+ * @param converterName : name of the uconv table in a zero terminated
+ * Unicode string
+ * @param err outgoing error status U_MEMORY_ALLOCATION_ERROR,
+ * TABLE_NOT_FOUND
+ * @return the created Unicode converter object, or NULL if an
+ * error occured
* @see ucnv_open
* @see ucnv_openCCSID
* @see ucnv_close
+ * @see ucnv_getDefaultName
* @stable
*/
-U_CAPI UConverter* U_EXPORT2 ucnv_openU (const UChar * name,
- UErrorCode * err);
-
-
+U_CAPI UConverter* U_EXPORT2
+ucnv_openU (const UChar * name,
+ UErrorCode * err);
/**
- * Creates a UConverter object using a CCSID number.
+ * Creates a UConverter object from a CCSID number and platform pair
*
- * @param codepage : codepage # of the uconv table
- * @param platform : codepage's platform (now only IBM supported)
+ * @param codepage codepage number to create
+ * @param platform the platform in which the codepage number exists
* @param err error status U_MEMORY_ALLOCATION_ERROR, TABLE_NOT_FOUND
- * @return the created Unicode converter object, or NULL if and error occured
+ * @return the created Unicode converter object, or NULL if an error
+ * occured.
* @see ucnv_open
* @see ucnv_openU
* @see ucnv_close
+ * @see UConverterPlatform
* @stable
*/
-U_CAPI UConverter* U_EXPORT2 ucnv_openCCSID (int32_t codepage,
- UConverterPlatform platform,
- UErrorCode * err);
-
+U_CAPI UConverter* U_EXPORT2
+ucnv_openCCSID (int32_t codepage,
+ UConverterPlatform platform,
+ UErrorCode * err);
/**
- * Deletes the unicode converter.
+ * Deletes the unicode converter and releases resources associated
+ * with just this instance.
+ * Does not free up shared converter tables.
*
* @param converter the converter object to be deleted
* @see ucnv_open
@@ -223,18 +273,19 @@ U_CAPI UConverter* U_EXPORT2 ucnv_openCCSID (int32_t codepage,
* @see ucnv_openCCSID
* @stable
*/
-U_CAPI void U_EXPORT2 ucnv_close (UConverter * converter);
-
+U_CAPI void U_EXPORT2
+ucnv_close (UConverter * converter);
/**
* Fills in the output parameter, subChars, with the substitution characters
* as multiple bytes.
*
- * @param converter: the Unicode converter
- * @param subChars: the subsitution characters
- * @param len: on input the capacity of subChars, on output the number of bytes copied to it
- * @param err: the outgoing error status code.
+ * @param converter the Unicode converter
+ * @param subChars the subsitution characters
+ * @param len on input the capacity of subChars, on output the number
+ * of bytes copied to it
+ * @param err the outgoing error status code.
* If the substitution character array is too small, an
* U_INDEX_OUTOFBOUNDS_ERROR will be returned.
* @see ucnv_setSubstChars
@@ -242,15 +293,15 @@ U_CAPI void U_EXPORT2 ucnv_close (UConverter * converter);
*/
U_CAPI void U_EXPORT2
- ucnv_getSubstChars (const UConverter * converter,
- char *subChars,
- int8_t * len,
- UErrorCode * err);
+ucnv_getSubstChars (const UConverter * converter,
+ char *subChars,
+ int8_t * len,
+ UErrorCode * err);
/**
* Sets the substitution chars when converting from unicode to a codepage. The
- * substitution is specified as a string of 1-4 bytes, and may contain NULL byte.
- * The fill-in parameter err will get the error status on return.
+ * substitution is specified as a string of 1-4 bytes, and may contain
+ * NULL byte.
* @param converter the Unicode converter
* @param subChars the substitution character byte sequence we want set
* @param len the number of bytes in subChars
@@ -261,61 +312,60 @@ U_CAPI void U_EXPORT2
*/
U_CAPI void U_EXPORT2
- ucnv_setSubstChars (UConverter * converter,
- const char *subChars,
- int8_t len,
- UErrorCode * err);
-
-
+ucnv_setSubstChars (UConverter * converter,
+ const char *subChars,
+ int8_t len,
+ UErrorCode * err);
/**
* Fills in the output parameter, errBytes, with the error characters from the
* last failing conversion.
*
- * @param converter: the Unicode converter
- * @param errBytes: the bytes in error
- * @param len: on input the capacity of errBytes, on output the number of bytes copied to it
- * @param err: the outgoing error status code.
+ * @param converter the Unicode converter
+ * @param errBytes the codepage bytes which were in error
+ * @param len on input the capacity of errBytes, on output the number of
+ * bytes which were copied to it
+ * @param err the error status code.
* If the substitution character array is too small, an
* U_INDEX_OUTOFBOUNDS_ERROR will be returned.
* @stable
*/
U_CAPI void U_EXPORT2
- ucnv_getInvalidChars (const UConverter * converter,
- char *errBytes,
- int8_t * len,
- UErrorCode * err);
-
+ucnv_getInvalidChars (const UConverter * converter,
+ char *errBytes,
+ int8_t * len,
+ UErrorCode * err);
/**
* Fills in the output parameter, errChars, with the error characters from the
* last failing conversion.
*
- * @param converter: the Unicode converter
- * @param errUChars: the UChars in error
- * @param len: on input the capacity of errUChars, on output the number of UChars copied to it
- * @param err: the outgoing error status code.
+ * @param converter the Unicode converter
+ * @param errUChars the UChars which were in error
+ * @param len on input the capacity of errUChars, on output the number of
+ * UChars which were copied to it
+ * @param err the error status code.
* If the substitution character array is too small, an
* U_INDEX_OUTOFBOUNDS_ERROR will be returned.
* @stable
*/
U_CAPI void U_EXPORT2
- ucnv_getInvalidUChars (const UConverter * converter,
- UChar *errUChars,
- int8_t * len,
- UErrorCode * err);
+ucnv_getInvalidUChars (const UConverter * converter,
+ UChar *errUChars,
+ int8_t * len,
+ UErrorCode * err);
/**
* Resets the state of a converter to the default state. This is used
- * in the case of error to restart a conversion from a known default state.
+ * in the case of an error, to restart a conversion from a known default state.
* It will also empty the internal output buffers.
* @param converter the Unicode converter
* @stable
*/
U_CAPI void U_EXPORT2
- ucnv_reset (UConverter * converter);
+ucnv_reset (UConverter * converter);
/**
* Resets the to-Unicode part of a converter state to the default state.
@@ -325,8 +375,9 @@ U_CAPI void U_EXPORT2
* @param converter the Unicode converter
* @draft
*/
+
U_CAPI void
- ucnv_resetToUnicode(UConverter *converter);
+ucnv_resetToUnicode(UConverter *converter);
/**
* Resets the from-Unicode part of a converter state to the default state.
@@ -337,99 +388,104 @@ U_CAPI void
* @draft
*/
U_CAPI void
- ucnv_resetFromUnicode(UConverter *converter);
+ucnv_resetFromUnicode(UConverter *converter);
/**
- * Returns the maximum length of bytes used by a character. This varies between 1 and 4
+ * Returns the maximum length of bytes used by a character. This varies
+ * between 1 and 4
* @param converter the Unicode converter
* @return the maximum number of bytes allowed by this particular converter
* @see ucnv_getMinCharSize
* @stable
*/
U_CAPI int8_t U_EXPORT2
- ucnv_getMaxCharSize (const UConverter * converter);
+ucnv_getMaxCharSize (const UConverter * converter);
/**
- * Returns the minimum byte length for characters in this codepage. This is either
- * 1 or 2 for all supported codepages.
+ * Returns the minimum byte length for characters in this codepage.
+ * This is either 1 or 2 for all supported codepages.
* @param converter the Unicode converter
* @return the minimum number of bytes allowed by this particular converter
* @see ucnv_getMaxCharSize
* @stable
*/
U_CAPI int8_t U_EXPORT2
- ucnv_getMinCharSize (const UConverter * converter);
-
+ucnv_getMinCharSize (const UConverter * converter);
/**
- * Returns the display name of the converter passed in based on the Locale passed in,
- * in the case the locale contains no display name, the internal ASCII name will be
- * filled in.
+ * Returns the display name of the converter passed in based on the Locale
+ * passed in. If the locale contains no display name, the internal ASCII
+ * name will be filled in.
*
* @param converter the Unicode converter.
* @param displayLocale is the specific Locale we want to localised for
* @param displayName user provided buffer to be filled in
* @param displayNameCapacty size of displayName Buffer
- * @param err: outgoing error code.
+ * @param err error status code
* @return displayNameLength number of UChar needed in displayName
* @see ucnv_getName
* @stable
*/
-U_CAPI
- int32_t U_EXPORT2 ucnv_getDisplayName (const UConverter * converter,
- const char *displayLocale,
- UChar * displayName,
- int32_t displayNameCapacity,
- UErrorCode * err);
+U_CAPI int32_t U_EXPORT2
+ucnv_getDisplayName (const UConverter * converter,
+ const char *displayLocale,
+ UChar * displayName,
+ int32_t displayNameCapacity,
+ UErrorCode * err);
/**
- * Gets the name of the converter (zero-terminated).
- * the name will be the internal name of the converter, the lifetime of the returned
- * string will be that of the converter passed to this function.
+ * Gets the internal, canonical name of the converter (zero-terminated).
+ * The lifetime of the returned string will be that of the converter
+ * passed to this function.
* @param converter the Unicode converter
* @param err UErrorCode status
* @return the internal name of the converter
* @see ucnv_getDisplayName
* @stable
*/
-U_CAPI
- const char * U_EXPORT2 ucnv_getName (const UConverter * converter, UErrorCode * err);
+
+U_CAPI const char * U_EXPORT2
+ucnv_getName (const UConverter * converter, UErrorCode * err);
/**
* Gets a codepage number associated with the converter. This is not guaranteed
* to be the one used to create the converter. Some converters do not represent
- * IBM registered codepages and return zero for the codepage number.
- * The error code fill-in parameter indicates if the codepage number is available.
+ * platform registered codepages and return zero for the codepage number.
+ * The error code fill-in parameter indicates if the codepage number
+ * is available.
+ * Does not check if the converter is NULL or if converter's data
+ * table is NULL.
* @param converter the Unicode converter
* @param err the error status code.
- * the converter is NULL or if converter's data table is NULL.
* @return If any error occurrs, -1 will be returned otherwise, the codepage number
* will be returned
* @stable
*/
U_CAPI int32_t U_EXPORT2
- ucnv_getCCSID (const UConverter * converter,
- UErrorCode * err);
+ucnv_getCCSID (const UConverter * converter,
+ UErrorCode * err);
/**
- * Gets a codepage platform associated with the converter. Currently, only IBM is supported
- * The error code fill-in parameter indicates if the codepage number is available.
+ * Gets a codepage platform associated with the converter. Currently,
+ * only UCNV_IBM will be returned.
+ * Does not test if the converter is NULL or if converter's data
+ * table is NULL.
* @param converter the Unicode converter
* @param err the error status code.
- * the converter is NULL or if converter's data table is NULL.
* @return The codepage platform
* @stable
*/
U_CAPI UConverterPlatform U_EXPORT2
- ucnv_getPlatform (const UConverter * converter,
- UErrorCode * err);
+ucnv_getPlatform (const UConverter * converter,
+ UErrorCode * err);
/**
- *Gets the type of conversion associated with the converter
- * e.g. SBCS, MBCS, DBCS, UTF8, UTF16_BE, UTF16_LE, ISO_2022, EBCDIC_STATEFUL, LATIN_1
- * @param converter: a valid, opened converter
+ * Gets the type of the converter
+ * e.g. SBCS, MBCS, DBCS, UTF8, UTF16_BE, UTF16_LE, ISO_2022,
+ * EBCDIC_STATEFUL, LATIN_1
+ * @param converter a valid, opened converter
* @return the type of the converter
* @stable
*/
@@ -437,15 +493,17 @@ U_CAPI UConverterType U_EXPORT2
ucnv_getType (const UConverter * converter);
/**
- *Gets the "starter" bytes for the converters of type MBCS
- *will fill in an U_ILLEGAL_ARGUMENT_ERROR if converter passed in
- *is not MBCS.
- *fills in an array of boolean, with the value of the byte as offset to the array.
- *At return, if TRUE is found in at offset 0x20, it means that the byte 0x20 is a starter byte
- *in this converter.
- * @param converter: a valid, opened converter of type MBCS
- * @param starters: an array of size 256 to be filled in
- * @param err: an array of size 256 to be filled in
+ * Gets the "starter" (lead) bytes for converters of type MBCS.
+ * Will fill in an U_ILLEGAL_ARGUMENT_ERROR if converter passed in
+ * is not MBCS. Fills in an array of type UBool, with the value of the byte
+ * as offset to the array. For example, if (starters[0x20] == TRUE) at return,
+ * it means that the byte 0x20 is a starter byte in this converter.
+ * Context pointers are always owned by the caller.
+ *
+ * @param converter a valid, opened converter of type MBCS
+ * @param starters an array of size 256 to be filled in
+ * @param err error status, U_ILLEGAL_ARGUMENT_ERROR if the
+ * converter is not a type which can return starters.
* @see ucnv_getType
* @stable
*/
@@ -455,101 +513,127 @@ U_CAPI void U_EXPORT2 ucnv_getStarters(const UConverter* converter,
/**
- * Gets the current calback function used by the converter when illegal or invalid sequence found.
+ * Gets the current calback function used by the converter when an illegal
+ * or invalid codepage sequence is found.
+ * Context pointers are always owned by the caller.
*
* @param converter the unicode converter
- * @param action the callback function pointer
- * @param context the callback function state
+ * @param action fillin: returns the callback function pointer
+ * @param context fillin: returns the callback's private void* context
* @see ucnv_setToUCallBack
* @stable
*/
U_CAPI void U_EXPORT2
- ucnv_getToUCallBack (const UConverter * converter,
- UConverterToUCallback *action,
- void **context);
+ucnv_getToUCallBack (const UConverter * converter,
+ UConverterToUCallback *action,
+ void **context);
/**
- * Gets the current callback function used by the converter when illegal or invalid sequence found.
+ * Gets the current callback function used by the converter when illegal
+ * or invalid Unicode sequence is found.
+ * Context pointers are always owned by the caller.
*
* @param converter the unicode converter
- * @param action the callback function pointer
- * @param context the callback function state
+ * @param action fillin: returns the callback function pointer
+ * @param context fillin: returns the callback's private void* context
* @see ucnv_setFromUCallBack
* @stable
*/
U_CAPI void U_EXPORT2
- ucnv_getFromUCallBack (const UConverter * converter,
- UConverterFromUCallback *action,
- void **context);
+ucnv_getFromUCallBack (const UConverter * converter,
+ UConverterFromUCallback *action,
+ void **context);
/**
- * Gets the current callback function used by the converter when illegal or invalid sequence found.
+ * Changes the callback function used by the converter when
+ * an illegal or invalid sequence is found.
+ * Context pointers are always owned by the caller.
*
* @param converter the unicode converter
- * @param newAction the callback function we want to set.
- * @param newContext the new toUnicode callback function state
- * @param oldAction the previously assigned callback function pointer
- * @param oldContext the new toUnicode callback function state
+ * @param newAction the new callback function
+ * @param newContext the new toUnicode callback context pointer
+ * @param oldAction fillin: returns the old callback function pointer
+ * @param oldContext fillin: returns the old callback's private void* context
* @param err The error code status
* @see ucnv_getToUCallBack
* @stable
*/
U_CAPI void U_EXPORT2
- ucnv_setToUCallBack (UConverter * converter,
- UConverterToUCallback newAction,
- void* newContext,
- UConverterToUCallback *oldAction,
- void** oldContext,
- UErrorCode * err);
+ucnv_setToUCallBack (UConverter * converter,
+ UConverterToUCallback newAction,
+ void* newContext,
+ UConverterToUCallback *oldAction,
+ void** oldContext,
+ UErrorCode * err);
/**
- * Gets the current callback function used by the converter when illegal or invalid sequence found.
+ * Changes the current callback function used by the converter when
+ * an illegal or invalid sequence is found.
+ * Context pointers are always owned by the caller.
*
* @param converter the unicode converter
- * @param newAction the callback function we want to set.
- * @param newContext the new fromUnicode callback function state
- * @param oldAction the previously assigned callback function pointer
- * @param oldContext the new fromUnicode callback function state
+ * @param newAction the new callback function
+ * @param newContext the new fromUnicode callback context pointer
+ * @param oldAction fillin: returns the old callback function pointer
+ * @param oldContext fillin: returns the old callback's private void* context
* @param err The error code status
* @see ucnv_getFromUCallBack
* @stable
*/
U_CAPI void U_EXPORT2
- ucnv_setFromUCallBack (UConverter * converter,
- UConverterFromUCallback newAction,
- void *newContext,
- UConverterFromUCallback *oldAction,
- void **oldContext,
- UErrorCode * err);
-
+ucnv_setFromUCallBack (UConverter * converter,
+ UConverterFromUCallback newAction,
+ void *newContext,
+ UConverterFromUCallback *oldAction,
+ void **oldContext,
+ UErrorCode * err);
/**
- * Transcodes an array of unicode characters to an array of codepage characters.
- * The source pointer is an I/O parameter, it starts out pointing where the function is
- * to begin transcoding, and ends up pointing after the first sequence of the bytes
- * that it encounters that are semantically invalid.
- * if ucnv_setToUCallBack is called with an action other than STOP
- * before a call is made to this API, consumed and source should point to the same place
- * (unless target ends with an imcomplete sequence of bytes and flush is FALSE).
- * the target buffer buffer needs to be a least the size of the maximum # of bytes per characters
- * allowed by the target codepage.
+ * Converts an array of unicode characters to an array of codepage
+ * characters. This function is optimized for converting a continuous
+ * stream of data in buffer-sized chunks, where the entire source and
+ * target does not fit in available buffers.
+ *
+ * The source pointer is an in/out parameter. It starts out pointing where the
+ * conversion is to begin, and ends up pointing after the last UChar consumed.
+ *
+ * Target similarly starts out pointer at the first available byte in the output
+ * buffer, and ends up pointing after the last byte written to the output.
+ *
+ * The converter always attempts to consume the entire source buffer, unless
+ * (1.) the target buffer is full, or (2.) a failing error is returned from the
+ * current callback function. When a successful error status has been
+ * returned, it means that all of the source buffer has been
+ * consumed. At that point, the caller should reset the source and
+ * sourceLimit pointers to point to the next chunk.
+ *
+ * This is a stateful conversion. Additionally, even when all source data has
+ * been consumed, some data may be in the converters' internal state.
+ * Call this function repeatedly, updating the target pointers with
+ * the next empty chunk of target in case of a
+ * U_BUFFER_OVERFLOW_ERROR, and updating the source pointers
+ * with the next chunk of source when a successful error status is
+ * returned, until there are no more chunks of source data.
* @param converter the Unicode converter
- * @param converter the Unicode converter
- * @param target : I/O parameter. Input : Points to the beginning of the buffer to copy
+ * @param target I/O parameter. Input : Points to the beginning of the buffer to copy
* codepage characters to. Output : points to after the last codepage character copied
* to target.
- * @param targetLimit the pointer to the end of the target array
- * @param source the source Unicode character array
- * @param sourceLimit the pointer to the end of the source array
+ * @param targetLimit the pointer just after last of the target buffer
+ * @param source I/O parameter, pointer to pointer to the source Unicode character buffer.
+ * @param sourceLimit the pointer just after the last of the source buffer
* @param offsets if NULL is passed, nothing will happen to it, otherwise it needs to have the same number
* of allocated cells as target. Will fill in offsets from target to source pointer
* e.g: offsets[3] is equal to 6, it means that the target[3] was a result of transcoding source[6]
- * For output data carried across calls -1 will be placed for offsets.
- * @param flush TRUE if the buffer is the last buffer of the conversion interation
- * and the conversion will finish with this call, FALSE otherwise.
+ * For output data carried across calls, and other data without a specific source character
+ * (such as from escape sequences or callbacks) -1 will be placed for offsets.
+ * @param flush set to TRUE if the current source buffer is the last available
+ * chunk of the source, FALSE otherwise. Note that if a failing status is returned,
+ * this function may have to be called multiple times wiht flush set to TRUE until
+ * the source buffer is consumed.
* @param err the error status. U_ILLEGAL_ARGUMENT_ERROR will be set if the
* converter is NULL.
- * U_BUFFER_OVERFLOW_ERROR
will be set if the target is full and there is still input left in the source.
+ * U_BUFFER_OVERFLOW_ERROR
will be set if the target is full and there is
+ * still data to be written to the target.
* @see ucnv_fromUChars
* @see ucnv_convert
* @see ucnv_getMinCharSize
@@ -557,103 +641,130 @@ U_CAPI void U_EXPORT2
* @draft backslash versus Yen sign in shift-JIS
*/
-U_CAPI
- void U_EXPORT2 ucnv_fromUnicode (UConverter * converter,
- char **target,
- const char *targetLimit,
- const UChar ** source,
- const UChar * sourceLimit,
- int32_t* offsets,
- UBool flush,
- UErrorCode * err);
+U_CAPI void U_EXPORT2
+ucnv_fromUnicode (UConverter * converter,
+ char **target,
+ const char *targetLimit,
+ const UChar ** source,
+ const UChar * sourceLimit,
+ int32_t* offsets,
+ UBool flush,
+ UErrorCode * err);
/**
- * Converts an array of codepage characters into an array of unicode characters.
- * The source pointer is an I/O parameter, it starts out pointing at the place
- * to begin translating, and ends up pointing after the first sequence of the bytes
- * that it encounters that are semantically invalid.
- * if ucnv_setFromUCallBack is called with an action other than STOP
- * before a call is made to this API, consumed and source should point to the same place
- * (unless target ends with an imcomplete sequence of bytes and flush is FALSE).
+ * Converts a buffer of codepage bytes into an array of unicode UChars
+ * characters. This function is optimized for converting a continuous
+ * stream of data in buffer-sized chunks, where the entire source and
+ * target does not fit in available buffers.
+ *
+ * The source pointer is an in/out parameter. It starts out pointing where the
+ * conversion is to begin, and ends up pointing after the last byte of source consumed.
+ *
+ * Target similarly starts out pointer at the first available UChar in the output
+ * buffer, and ends up pointing after the last UChar written to the output.
+ * It does NOT necessarily keep UChar sequences together.
+ *
+ * The converter always attempts to consume the entire source buffer, unless
+ * (1.) the target buffer is full, or (2.) a failing error is returned from the
+ * current callback function. When a successful error status has been
+ * returned, it means that all of the source buffer has been
+ * consumed. At that point, the caller should reset the source and
+ * sourceLimit pointers to point to the next chunk.
+ *
+ * This is a stateful conversion. Additionally, even when all source data has
+ * been consumed, some data may be in the converters' internal state.
+ * Call this function repeatedly, updating the target pointers with
+ * the next empty chunk of target in case of a
+ * U_BUFFER_OVERFLOW_ERROR, and updating the source pointers
+ * with the next chunk of source when a successful error status is
+ * returned, until there are no more chunks of source data.
* @param converter the Unicode converter
- * @param target : I/O parameter. Input : Points to the beginning of the buffer to copy
- * Unicode characters to. Output : points to after the last UChar copied to target.
- * @param targetLimit the pointer to the end of the target array
- * @param source the source codepage character array
- * @param sourceLimit the pointer to the end of the source array
+ * @param target I/O parameter. Input : Points to the beginning of the buffer to copy
+ * UChars into. Output : points to after the last UChar copied.
+ * @param targetLimit the pointer just after the end of the target buffer
+ * @param source I/O parameter, pointer to pointer to the source codepage buffer.
+ * @param sourceLimit the pointer to the byte after the end of the source buffer
* @param offsets if NULL is passed, nothing will happen to it, otherwise it needs to have the same number
* of allocated cells as target. Will fill in offsets from target to source pointer
* e.g: offsets[3] is equal to 6, it means that the target[3] was a result of transcoding source[6]
- * For output data carried across calls -1 will be placed for offsets.
- * @param flush TRUE if the buffer is the last buffer and the conversion will finish
- * in this call, FALSE otherwise.
- * @param err the error code status U_ILLEGAL_ARGUMENT_ERROR will be returned if the
- * converter is NULL, or if targetLimit and sourceLimit are misaligned.
- * U_BUFFER_OVERFLOW_ERROR
will be set if the target is full and there is still input left in the source.
- * @see ucnv_toUChars
- * @see ucnv_getNextUChar
+ * For output data carried across calls, and other data without a specific source character
+ * (such as from escape sequences or callbacks) -1 will be placed for offsets.
+ * @param flush set to TRUE if the current source buffer is the last available
+ * chunk of the source, FALSE otherwise. Note that if a failing status is returned,
+ * this function may have to be called multiple times wiht flush set to TRUE until
+ * the source buffer is consumed.
+ * @param err the error status. U_ILLEGAL_ARGUMENT_ERROR will be set if the
+ * converter is NULL.
+ * U_BUFFER_OVERFLOW_ERROR
will be set if the target is full and there is
+ * still data to be written to the target.
+ * @see ucnv_fromUChars
* @see ucnv_convert
+ * @see ucnv_getMinCharSize
* @see ucnv_setFromUCallBack
+ * @see ucnv_getNextUChar
+ * @draft backslash versus Yen sign in shift-JIS
* @stable
*/
-U_CAPI
- void U_EXPORT2 ucnv_toUnicode (UConverter * converter,
- UChar ** target,
- const UChar * targetLimit,
- const char **source,
- const char *sourceLimit,
- int32_t* offsets,
- UBool flush,
- UErrorCode * err);
+U_CAPI void U_EXPORT2
+ucnv_toUnicode (UConverter * converter,
+ UChar ** target,
+ const UChar * targetLimit,
+ const char **source,
+ const char *sourceLimit,
+ int32_t* offsets,
+ UBool flush,
+ UErrorCode * err);
/**
- * Transcodes the source Unicode string to the target string in a codepage encoding
- * with the specified Unicode converter. For example, if a Unicode to/from JIS
- * converter is specified, the source string in Unicode will be transcoded to JIS
- * encoding. The result will be stored in JIS encoding.
- * if any problems during conversion are encountered it will SUBSTITUTE with the default (initial)
- * substitute characters.
+ * Converts the source Unicode string into the target codepage with the
+ * specified Unicode converter. If any problems during conversion
+ * are encountered, the currently installed fromUnicode callback will be used.
* This function is a more convenient but less efficient version of \Ref{ucnv_fromUnicode}.
+ * targetLength may be 0 if you only want to know the exact number of
+ * target bytes required.
+ * The maximum target buffer size required (barring callbacks) will be
+ * sourceLength*ucnv_getMaxCharSize()
* @param converter the Unicode converter
- * @param source the source Unicode string (zero Terminated)
- * @param target the target string in codepage encoding (not zero-terminated because some
- * codepage do not use '\0' as a string terminator
- * @param targetCapacity Input the number of bytes available in the target buffer
- * @param source the source buffer to convert with
- * @param sourceLength the length of the source buffer. If -1 is passed in as the value,
- * the source buffer is NULL terminated string and whole source buffer will be converted.
+ * @param target the target buffer (not zero-terminated
+ * because the structure of codepages varies. There is
+ * not a reliable way to produce a terminator.)
+ * @param targetCapacity the number of bytes available in the target buffer
+ * @param source the source Unicode string
+ * @param sourceLength the length of the source string. If -1 is passed in as the
+ * value, the source buffer is a zero terminated string and whole source buffer
+ * will be converted.
* @param err the error status code.
- * U_ILLEGAL_ARGUMENT_ERROR is returned if the converter is NULL or the source or target string is empty.
- * U_BUFFER_OVERFLOW_ERROR
will be set if the target is full and there is still input left in the source.
+ * U_ILLEGAL_ARGUMENT_ERROR is returned if the converter is NULL
+ * or the source or target string is empty.
+ * U_BUFFER_OVERFLOW_ERROR
will be set if the target is full and
+ * there is still input left in the source.
* @return number of bytes needed in target, regardless of targetCapacity
* @see ucnv_fromUnicode
* @see ucnv_convert
* @draft backslash versus Yen sign in shift-JIS
*/
-U_CAPI
- int32_t U_EXPORT2 ucnv_fromUChars (const UConverter * converter,
- char *target,
- int32_t targetCapacity,
- const UChar * source,
- int32_t sourceLength,
- UErrorCode * err);
-
-
-
-
+U_CAPI int32_t U_EXPORT2
+ucnv_fromUChars (const UConverter * converter,
+ char *target,
+ int32_t targetCapacity,
+ const UChar * source,
+ int32_t sourceLength,
+ UErrorCode * err);
/**
- * Transcode the source string in codepage encoding to the target string in
- * Unicode encoding. For example, if a Unicode to/from JIS
- * converter is specified, the source string in JIS encoding will be transcoded
- * to Unicode and placed into a provided target buffer.
- * if any problems during conversion are encountered it will SUBSTITUTE with the Unicode REPLACEMENT char
- * We recomment, the size of the target buffer needs to be at least as long as the maximum # of bytes per char
- * in this character set.
- * A zero-terminator will be placed at the end of the target buffer
+ * Converts the source string in codepage encoding into the target string in
+ * Unicode encoding. For example, if a JIS converter is used, the source
+ * string in JIS encoding will be converted into Unicode and placed into
+ * the provided target buffer. If any problems during conversion
+ * are encountered, the currently installed fromUnicode callback will be used.
+ * Barring callbacks which may write longer sequences, the target buffer should
+ * be of size 1+(2*(sourceLen / ucnv_getMinCharSize())) because the worst case
+ * is that each source sequence is the minimum size, and that sequence produces
+ * a surrogate pair. (plus the zero termination).
+ * A zero-terminator will be placed at the end of the target buffer.
* This function is a more convenient but less efficient version of \Ref{ucnv_toUnicode}.
* @param converter the Unicode converter
* @param source the source string in codepage encoding
@@ -672,17 +783,19 @@ U_CAPI
* @see ucnv_convert
* @stable
*/
-U_CAPI
- int32_t U_EXPORT2 ucnv_toUChars (const UConverter * converter,
- UChar * target,
- int32_t targetCapacity,
- const char *source,
- int32_t sourceSize,
- UErrorCode * err);
+U_CAPI int32_t U_EXPORT2
+ucnv_toUChars (const UConverter * converter,
+ UChar * target,
+ int32_t targetCapacity,
+ const char *source,
+ int32_t sourceSize,
+ UErrorCode * err);
+
/********************************
- * Will convert a codepage buffer one character at a time.
- *
This function was written to be efficient when transcoding small amounts of data at a time. + * Will convert a codepage buffer into unicode one character at a time. + *
This function was written to be efficient when transcoding small + * amounts of data at a time. * In that case it will be more efficient than \Ref{ucnv_toUnicode}. * When converting large buffers use \Ref{ucnv_toUnicode}.
* @@ -702,41 +815,45 @@ U_CAPI * (Note that SCSU is actually a mix of these codepage types.) * * - *@param converter an open UConverter - *@param source the address of a pointer to the codepage buffer, will be updated to point after - *the bytes consumed in the conversion call. - *@param points to the end of the input buffer - *@param err fills in error status (see ucnv_toUnicode) - *U_INDEX_OUTOFBOUNDS_ERROR
will be set if the input is empty or does not convert
- * to any output (e.g.: pure state-change codes SI/SO, escape sequences for ISO 2022,
- * callback did not output anything, ...).
- * This function will not set a U_BUFFER_OVERFLOW_ERROR
because the "buffer" is
- * the return code. However, there might be subsequent output stored in the converter object
+ * @param converter an open UConverter
+ * @param source the address of a pointer to the codepage buffer, will be
+ * updated to point after the bytes consumed in the conversion call.
+ * @param sourceLimit points to the end of the input buffer
+ * @param err fills in error status (see ucnv_toUnicode)
+ * U_INDEX_OUTOFBOUNDS_ERROR
will be set if the input
+ * is empty or does not convert to any output (e.g.: pure state-change
+ * codes SI/SO, escape sequences for ISO 2022,
+ * or if the callback did not output anything, ...).
+ * This function will not set a U_BUFFER_OVERFLOW_ERROR
because
+ * the "buffer" is the return code. However, there might be subsequent output
+ * stored in the converter object
* that will be returned in following calls to this function.
- *@return a UChar32 resulting from the partial conversion of source
- *@see ucnv_toUnicode
- *@see ucnv_toUChars
- *@see ucnv_convert
- *@stable
+ * @return a UChar32 resulting from the partial conversion of source
+ * @see ucnv_toUnicode
+ * @see ucnv_toUChars
+ * @see ucnv_convert
+ * @stable
*/
-U_CAPI
- UChar32 U_EXPORT2 ucnv_getNextUChar (UConverter * converter,
- const char **source,
- const char *sourceLimit,
- UErrorCode * err);
+U_CAPI UChar32 U_EXPORT2
+ucnv_getNextUChar (UConverter * converter,
+ const char **source,
+ const char * sourceLimit,
+ UErrorCode * err);
/**************************
* Will convert a sequence of bytes from one codepage to another.
* This is NOT AN EFFICIENT way to transcode.
-* use \Ref{ucnv_toUnicode} and \Ref{ucnv_fromUnicode} for efficiency
-* @param toConverterName: The name of the converter that will be used to encode the output buffer
-* @param fromConverterName: The name of the converter that will be used to decode the input buffer
-* @param target: Pointer to the output buffer to write to
-* @param targetCapacity: on input contains the capacity of target
-* @param source: Pointer to the input buffer
-* @param sourceLength: on input contains the capacity of source
-* @param err: fills in an error status
+* use \Ref{ucnv_toUnicode} and \Ref{ucnv_fromUnicode} for efficiency.
+* @param toConverterName The name of the converter that will be used
+* in conversion from unicode into the output buffer
+* @param fromConverterName: The name of the converter that will be used
+* in conversion from the source buffer into intermediate unicode.
+* @param target Pointer to the output buffer
+* @param targetCapacity capacity of the target, in bytes
+* @param source Pointer to the input buffer
+* @param sourceLength capacity of the source, in bytes
+* @param err error status.
* U_BUFFER_OVERFLOW_ERROR
will be set if the target is full and there is still input left in the source.
* @return will be filled in with the number of bytes needed in target
* @see ucnv_fromUnicode
@@ -746,54 +863,57 @@ U_CAPI
* @see ucnv_getNextUChar
* @draft backslash versus Yen sign in shift-JIS
*/
-U_CAPI
- int32_t U_EXPORT2 ucnv_convert (const char *toConverterName,
- const char *fromConverterName,
- char *target,
- int32_t targetCapacity,
- const char *source,
- int32_t sourceLength,
- UErrorCode * err);
+U_CAPI int32_t U_EXPORT2
+ucnv_convert (const char *toConverterName,
+ const char *fromConverterName,
+ char *target,
+ int32_t targetCapacity,
+ const char *source,
+ int32_t sourceLength,
+ UErrorCode * err);
+
/**
* SYSTEM API
- * Iterates through every cached converter and frees all the unused ones.
+ * Frees up memory occupied by unused, cached converter shared data.
*
* @return the number of cached converters successfully deleted
+ * @see ucnv_close
* @stable
* @system
*/
-U_CAPI int32_t U_EXPORT2 ucnv_flushCache (void);
+U_CAPI int32_t U_EXPORT2
+ucnv_flushCache (void);
/**
- * provides a string containing the internal name (based on the alias file) of the converter.
- * given an index.
- * @param n the number of converters available on the system ([0..ucnv_countAvaiable()])
- * @return a pointer a string (library owned), or NULL if the index is out of bounds.
- * @see ucnv_countAvailable
- * @stable
- */
-U_CAPI
- const char * U_EXPORT2 ucnv_getAvailableName (int32_t n);
-
-/**
- * returns the number of available converters.
+ * returns the number of available converters, as per the alias file.
*
* @return the number of available converters
* @see ucnv_getAvailableName
* @stable
*/
-U_CAPI int32_t U_EXPORT2 ucnv_countAvailable (void);
-
+U_CAPI int32_t U_EXPORT2
+ucnv_countAvailable (void);
/**
- * Gives the number of aliases for given converter or alias name.
+ * Gets the name of the specified converter from a list of all converters
+ * contaied in the alias file.
+ * @param n the index to a converter available on the system (in the range [0..ucnv_countAvaiable()])
+ * @return a pointer a string (library owned), or NULL if the index is out of bounds.
+ * @see ucnv_countAvailable
+ * @stable
+ */
+U_CAPI const char* U_EXPORT2
+ucnv_getAvailableName (int32_t n);
+
+/**
+ * Gives the number of aliases for a given converter or alias name.
* Note that additional aliases are recognized by ucnv_open().
* This method only enumerates the listed entries in the alias file.
* @param alias alias name
- * @param pErrorCode result of operation
- * @return number of names on alias list
+ * @param pErrorCode error status
+ * @return number of names on alias list for given alias
* @stable
*/
U_CAPI uint16_t
@@ -807,6 +927,7 @@ ucnv_countAliases(const char *alias, UErrorCode *pErrorCode);
* @param n index in alias list
* @param pErrorCode result of operation
* @return returns the name of the alias at given index
+ * @see ucnv_countAliases
* @stable
*/
U_CAPI const char *
@@ -819,7 +940,8 @@ ucnv_getAlias(const char *alias, uint16_t n, UErrorCode *pErrorCode);
* @param alias alias name
* @param aliases fill-in list, aliases is a pointer to an array of
* ucnv_countAliases()
string-pointers
- * (const char *
) that will be filled in
+ * (const char *
) that will be filled in.
+ * The strings themselves are owned by the library.
* @param pErrorCode result of operation
* @stable
*/
@@ -838,7 +960,7 @@ ucnv_countStandards(void);
* Gives the name of the standard at given index of standard list.
* @param n index in standard list
* @param pErrorCode result of operation
- * @return returns the name of the standard at given index
+ * @return returns the name of the standard at given index. Owned by the library.
* @stable
*/
U_CAPI const char * U_EXPORT2
@@ -852,7 +974,7 @@ ucnv_getStandard(uint16_t n, UErrorCode *pErrorCode);
* are such standards
* @return returns the standard converter name;
* if a standard converter name cannot be determined,
- * then NULL
is returned
+ * then NULL
is returned. Owned by the library.
* @stable
*/
U_CAPI const char * U_EXPORT2
@@ -863,20 +985,23 @@ ucnv_getStandardName(const char *name, const char *standard, UErrorCode *pErrorC
*
* @return returns the current default converter name;
* if a default converter name cannot be determined,
- * then NULL
is returned
+ * then NULL
is returned.
+ * Storage owned by the library
* @see ucnv_setDefaultName
* @stable
*/
-U_CAPI const char * U_EXPORT2 ucnv_getDefaultName (void);
+U_CAPI const char * U_EXPORT2
+ucnv_getDefaultName (void);
/**
- * sets the current default converter name.
- * The lifetime of the return ptr is that of the library
- * @param name: the converter name you want as default (has to appear in alias file)
+ * sets the current default converter name. Caller must own the storage for 'name'
+ * and preserve it indefinitely.
+ * @param name the converter name to be the default (must exist).
* @see ucnv_getDefaultName
* @system
*/
-U_CAPI void U_EXPORT2 ucnv_setDefaultName (const char *name);
+U_CAPI void U_EXPORT2
+ucnv_setDefaultName (const char *name);
/**
* Fixes the backslash character mismapping. For example, in SJIS, the backslash
@@ -885,11 +1010,16 @@ U_CAPI void U_EXPORT2 ucnv_setDefaultName (const char *name);
* character back to yen or backslash in SJIS. This function will take the input
* buffer and replace all the yen sign characters with backslash. This is necessary
* when the user tries to open a file with the input buffer on Windows.
+ * This function will test the converter to see whether such mapping is
+ * required.
+ * @param cnv The converter representing the target codepage.
* @param source the input buffer to be fixed
* @param sourceLength the length of the input buffer
+ * @see ucnv_isAmbiguous
* @draft
*/
-U_CAPI void U_EXPORT2 ucnv_fixFileSeparator(const UConverter *cnv, UChar* source, int32_t sourceLen);
+U_CAPI void U_EXPORT2
+ucnv_fixFileSeparator(const UConverter *cnv, UChar* source, int32_t sourceLen);
/**
* Determines if the converter contains ambiguous mappings of the same
@@ -898,11 +1028,12 @@ U_CAPI void U_EXPORT2 ucnv_fixFileSeparator(const UConverter *cnv, UChar* source
* character, FALSE otherwise.
* @draft
*/
-U_CAPI UBool U_EXPORT2 ucnv_isAmbiguous(const UConverter *cnv);
+U_CAPI UBool U_EXPORT2
+ucnv_isAmbiguous(const UConverter *cnv);
/**
* Sets the converter to use fallback mapping or not.
- * @param cnv The converter to set the fallback mapping usage for.
+ * @param cnv The converter to set the fallback mapping usage on.
* @param usesFallback TRUE if the user wants the converter to take advantage of the fallback
* mapping, FALSE otherwise.
* @draft