diff --git a/icu4c/source/common/ucnv_cnv.h b/icu4c/source/common/ucnv_cnv.h index 085bfdbf937..a4f468ea89c 100644 --- a/icu4c/source/common/ucnv_cnv.h +++ b/icu4c/source/common/ucnv_cnv.h @@ -98,7 +98,17 @@ typedef UConverter * (*UConverterSafeClone) (const UConverter *cnv, int32_t *pBufferSize, UErrorCode *status); -/** ### TODO @draft ICU 2.6 */ +/** + * Fills the set of Unicode code points that can be converted by an ICU converter. + * The API function ucnv_getUnicodeSet() clears the USet before calling + * the converter's getUnicodeSet() implementation; the converter should only + * add the appropriate code points to allow recursive use. + * For example, the ISO-2022-JP converter will call each subconverter's + * getUnicodeSet() implementation to consecutively add code points to + * the same USet, which will result in a union of the sets of all subconverters. + * + * For more documentation, see ucnv_getUnicodeSet() in ucnv.h. + */ typedef void (*UConverterGetUnicodeSet) (const UConverter *cnv, USet *set, UConverterUnicodeSet which, @@ -156,7 +166,7 @@ struct UConverterImpl { UConverterGetName getName; UConverterWriteSub writeSub; UConverterSafeClone safeClone; - UConverterGetUnicodeSet getUnicodeSet; /* ### TODO ICU 2.6 */ + UConverterGetUnicodeSet getUnicodeSet; }; extern const UConverterSharedData diff --git a/icu4c/source/common/unicode/ucnv.h b/icu4c/source/common/unicode/ucnv.h index 47597fa9c49..d3e3e60f40e 100644 --- a/icu4c/source/common/unicode/ucnv.h +++ b/icu4c/source/common/unicode/ucnv.h @@ -693,7 +693,40 @@ typedef enum UConverterUnicodeSet { UCNV_SET_COUNT } UConverterUnicodeSet; -/** ### TODO @draft ICU 2.6 */ +/** + * Returns the set of Unicode code points that can be converted by an ICU converter. + * + * The current implementation returns only one kind of set (UCNV_ROUNDTRIP_SET): + * The set of all Unicode code points that can be roundtrip-converted + * (converted without any data loss) with the converter. + * This set will not include code points that have fallback mappings + * or are only the result of reverse fallback mappings. + * See UTR #22 "Character Mapping Markup Language" + * at http://www.unicode.org/reports/tr22/ + * + * This is useful for example for + * - checking that a string or document can be roundtrip-converted with a converter, + * without/before actually performing the conversion + * - testing if a converter can be used for text for typical text for a certain locale, + * by comparing its roundtrip set with the set of ExemplarCharacters from + * ICU's locale data or other sources + * + * In the future, there may be more UConverterUnicodeSet choices to select + * sets with different properties. + * + * @param cnv The converter for which a set is requested. + * @param set A valid USet; it will be cleared by the function before + * the converter's specific set is filled into the USet. + * @param which A UConverterUnicodeSet selector; + * currently UCNV_ROUNDTRIP_SET is the only supported value. + * @param pErrorCode ICU error code in/out parameter. + * Must fulfill U_SUCCESS before the function call. + * + * @see UConverterUnicodeSet + * @see uset_open + * @see uset_close + * @draft ICU 2.6 + */ U_CAPI void U_EXPORT2 ucnv_getUnicodeSet(const UConverter *cnv, USet *set,