diff --git a/icu4c/source/common/unicode/ucnv.h b/icu4c/source/common/unicode/ucnv.h index 6ba05f06dd7..3181b81a660 100644 --- a/icu4c/source/common/unicode/ucnv.h +++ b/icu4c/source/common/unicode/ucnv.h @@ -33,6 +33,12 @@ * {@link ucnv_getAlias()} and {@link ucnv_getStandardName()} are some of the * more frequently used alias functions to get this information.

* + *

When a converter encounters an illegal, irregular, invalid or unmappable character + * its default behavior is to use a substitution character to replace the + * bad byte sequence. This behavior can be changed by using {@link ucnv_getFromUCallBack()} + * or {@link ucnv_getToUCallBack()} on the converter. The header ucnv_err.h defines + * many other callback actions that can be used instead of a character substitution.

+ * *

More information about this API can be found in our * User's * Guide.

@@ -775,12 +781,13 @@ ucnv_getFromUCallBack (const UConverter * converter, * Changes the callback function used by the converter when * an illegal or invalid sequence is found. * Context pointers are always owned by the caller. + * Predefined actions and contexts can be found in the ucnv_err.h header. * * @param converter the unicode converter * @param newAction the new callback function - * @param newContext the new toUnicode callback context pointer - * @param oldAction fillin: returns the old callback function pointer - * @param oldContext fillin: returns the old callback's private void* context + * @param newContext the new toUnicode callback context pointer. This can be NULL. + * @param oldAction fillin: returns the old callback function pointer. This can be NULL. + * @param oldContext fillin: returns the old callback's private void* context. This can be NULL. * @param err The error code status * @see ucnv_getToUCallBack * @stable ICU 2.0 @@ -797,12 +804,13 @@ ucnv_setToUCallBack (UConverter * converter, * Changes the current callback function used by the converter when * an illegal or invalid sequence is found. * Context pointers are always owned by the caller. + * Predefined actions and contexts can be found in the ucnv_err.h header. * * @param converter the unicode converter * @param newAction the new callback function - * @param newContext the new fromUnicode callback context pointer - * @param oldAction fillin: returns the old callback function pointer - * @param oldContext fillin: returns the old callback's private void* context + * @param newContext the new fromUnicode callback context pointer. This can be NULL. + * @param oldAction fillin: returns the old callback function pointer. This can be NULL. + * @param oldContext fillin: returns the old callback's private void* context. This can be NULL. * @param err The error code status * @see ucnv_getFromUCallBack * @stable ICU 2.0 diff --git a/icu4c/source/common/unicode/ucnv_err.h b/icu4c/source/common/unicode/ucnv_err.h index b4ce1b0be88..c03a2379eae 100644 --- a/icu4c/source/common/unicode/ucnv_err.h +++ b/icu4c/source/common/unicode/ucnv_err.h @@ -7,6 +7,7 @@ * * ucnv_err.h: */ + /** * \file * \brief C UConverter predefined error callbacks @@ -17,36 +18,66 @@ * can also be considered only as an example of what can be done with * callbacks. You may of course write your own. * - * These Functions, although public, should NEVER be called directly, they should be used as parameters to - * the ucnv_setFromUCallback and ucnv_setToUCallback functions, to - * set the behaviour of a converter - * when it encounters ILLEGAL/UNMAPPED/INVALID sequences. + * If you want to write your own, you may also find the functions from + * ucnv_cb.h useful when writing your own callbacks. * - * usage example: 'STOP' doesn't need any context, but newContext - * could be set to something other than 'NULL' if needed. + * These functions, although public, should NEVER be called directly. + * They should be used as parameters to the ucnv_setFromUCallback + * and ucnv_setToUCallback functions, to set the behaviour of a converter + * when it encounters ILLEGAL/UNMAPPED/INVALID sequences. + * + * usage example: 'STOP' doesn't need any context, but newContext + * could be set to something other than 'NULL' if needed. The available + * contexts in this header can modify the default behavior of the callback. * * \code - * UErrorCode err = U_ZERO_ERROR; - * UConverter* myConverter = ucnv_open("ibm-949", &err); - * const void *newContext = NULL; + * UErrorCode err = U_ZERO_ERROR; + * UConverter *myConverter = ucnv_open("ibm-949", &err); * const void *oldContext; * UConverterFromUCallback oldAction; * * - * if (U_SUCCESS(err)) - * { - * ucnv_setFromUCallBack(myConverter, + * if (U_SUCCESS(err)) + * { + * ucnv_setFromUCallBack(myConverter, * UCNV_FROM_U_CALLBACK_STOP, - * newContext, + * NULL, * &oldAction, * &oldContext, - * &status); - * } + * &status); + * } * \endcode * - * The code above tells "myConverter" to stop when it encounters a ILLEGAL/TRUNCATED/INVALID sequences when it is used to - * convert from Unicode -> Codepage. - * The behavior from Codepage to Unicode is not changed. + * The code above tells "myConverter" to stop when it encounters an + * ILLEGAL/TRUNCATED/INVALID sequences when it is used to convert from + * Unicode -> Codepage. The behavior from Codepage to Unicode is not changed, + * and ucnv_setToUCallBack would need to be called in order to change + * that behavior too. + * + * Here is an example with a context: + * + * \code + * UErrorCode err = U_ZERO_ERROR; + * UConverter *myConverter = ucnv_open("ibm-949", &err); + * const void *oldContext; + * UConverterFromUCallback oldAction; + * + * + * if (U_SUCCESS(err)) + * { + * ucnv_setToUCallBack(myConverter, + * UCNV_TO_U_CALLBACK_SUBSTITUTE, + * UCNV_SUB_STOP_ON_ILLEGAL, + * &oldAction, + * &oldContext, + * &status); + * } + * \endcode + * + * The code above tells "myConverter" to stop when it encounters an + * ILLEGAL/TRUNCATED/INVALID sequences when it is used to convert from + * Codepage -> Unicode. Any unmapped and legal characters will be + * substituted to be the default substitution character. */ /* This file isn't designed to be included all by itself. */ @@ -62,47 +93,47 @@ /** - * FROM_U, TO_U options for sub callback + * FROM_U, TO_U context options for sub callback * @stable ICU 2.0 */ #define UCNV_SUB_STOP_ON_ILLEGAL "i" /** - * FROM_U, TO_U options for skip callback + * FROM_U, TO_U context options for skip callback * @stable ICU 2.0 */ #define UCNV_SKIP_STOP_ON_ILLEGAL "i" /** - * FROM_U_CALLBACK_ESCAPE option to escape the code unit according to ICU (%UXXXX) + * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to ICU (%UXXXX) * @stable ICU 2.0 */ #define UCNV_ESCAPE_ICU NULL /** - * FROM_U_CALLBACK_ESCAPE option to escape the code unit according to JAVA (\uXXXX) + * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to JAVA (\uXXXX) * @stable ICU 2.0 */ #define UCNV_ESCAPE_JAVA "J" /** - * FROM_U_CALLBACK_ESCAPE option to escape the code unit according to C (\uXXXX \UXXXXXXXX) + * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to C (\uXXXX \UXXXXXXXX) * TO_U_CALLBACK_ESCAPE option to escape the character value accoding to C (\xXXXX) * @stable ICU 2.0 */ #define UCNV_ESCAPE_C "C" /** - * FROM_U_CALLBACK_ESCAPE option to escape the code unit according to XML Decimal escape (&#DDDD) - * TO_U_CALLBACK_ESCAPE option to escape the character value accoding to XML Decimal escape (&#DDDD) + * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to XML Decimal escape (&#DDDD) + * TO_U_CALLBACK_ESCAPE context option to escape the character value accoding to XML Decimal escape (&#DDDD) * @stable ICU 2.0 */ #define UCNV_ESCAPE_XML_DEC "D" /** - * FROM_U_CALLBACK_ESCAPE option to escape the code unit according to XML Hex escape (&#xXXXX) - * TO_U_CALLBACK_ESCAPE option to escape the character value accoding to XML Hex escape (&#xXXXX) + * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to XML Hex escape (&#xXXXX) + * TO_U_CALLBACK_ESCAPE context option to escape the character value accoding to XML Hex escape (&#xXXXX) * @stable ICU 2.0 */ #define UCNV_ESCAPE_XML_HEX "X" /** - * FROM_U_CALLBACK_ESCAPE option to escape teh code unit according to Unicode (U+XXXXX) + * FROM_U_CALLBACK_ESCAPE context option to escape teh code unit according to Unicode (U+XXXXX) * @stable ICU 2.0 */ #define UCNV_ESCAPE_UNICODE "U"