ICU-2481 make UNORM_UNICODE_3_2 and related public; move obsolete UNormalizationMode constants to the obsolete library

X-SVN-Rev: 11301
2025-04-14 17:24:01 +00:00 · 2003-03-13 23:01:03 +00:00 · 2003-03-13 23:01:03 +00:00 · f9ce852e79
commit f9ce852e79
parent 7771903a16
1 changed files with 91 additions and 48 deletions
--- a/icu4c/source/common/unicode/unorm.h
+++ b/icu4c/source/common/unicode/unorm.h
@ -1,6 +1,6 @@
 /*
 *******************************************************************************
-* Copyright (c) 1996-2001, International Business Machines Corporation
+* Copyright (c) 1996-2003, International Business Machines Corporation
 *               and others. All Rights Reserved.
 *******************************************************************************
 * File unorm.h
@ -139,54 +139,35 @@ typedef enum {

  /** One more than the highest normalization mode constant. @stable ICU 2.0 */
  UNORM_MODE_COUNT
-
-  /* *** The rest of this enum is obsolete. *** */
-
-#ifdef ICU_UNORM_USE_DEPRECATES
-  /**
-   * No decomposition/composition
-   * @obsolete ICU 2.4. Use UNORM_NONE instead since this API will be removed in that release.
-   */
-  ,UCOL_NO_NORMALIZATION = 1,
-  /**
-   * Canonical decomposition
-   * @obsolete ICU 2.4. Use UNORM_NFD instead since this API will be removed in that release.
-   */
-  UCOL_DECOMP_CAN = 2,
-  /**
-   * Compatibility decomposition
-   * @obsolete ICU 2.4. Use UNORM_NFKD instead since this API will be removed in that release.
-   */
-  UCOL_DECOMP_COMPAT = 3,
-  /**
-   * Default normalization
-   * @obsolete ICU 2.4. Use UNORM_NFKD or UNORM_DEFAULT instead since this API will be removed in that release.
-   */
-  UCOL_DEFAULT_NORMALIZATION = UCOL_DECOMP_COMPAT, 
-  /**
-   * Canonical decomposition followed by canonical composition
-   * @obsolete ICU 2.4. Use UNORM_NFC instead since this API will be removed in that release.
-   */
-  UCOL_DECOMP_CAN_COMP_COMPAT = 4,
-  /**
-   * Compatibility decomposition followed by canonical composition
-   * @obsolete ICU 2.4. Use UNORM_NFKC instead since this API will be removed in that release.
-   */
-  UCOL_DECOMP_COMPAT_COMP_CAN =5,
-#endif /* ICU_UNORM_USE_DEPRECATES */
 } UNormalizationMode;

+/**
+ * Constants for options flags for normalization.
+ * Use 0 for default options,
+ * including normalization according to the Unicode version
+ * that is currently supported by ICU (see u_getUnicodeVersion).
+ * @draft ICU 2.6
+ */
+enum {
+    /**
+     * Options bit set value to select Unicode 3.2 normalization
+     * (except NormalizationCorrections).
+     * At most one Unicode version can be selected at a time.
+     * @draft ICU 2.6
+     */
+    UNORM_UNICODE_3_2=0x20
+};
+
 /**
 * Normalize a string.
 * The string will be normalized according the specified normalization mode
- * and options (there are currently no options defined).
+ * and options.
 *
 * @param source The string to normalize.
 * @param sourceLength The length of source, or -1 if NUL-terminated.
 * @param mode The normalization mode; one of UNORM_NONE, 
 *             UNORM_NFD, UNORM_NFC, UNORM_NFKC, UNORM_NFKD, UNORM_DEFAULT.
- * @param options The normalization options, ORed together (0 for no options);
- *                currently there is no option defined.
+ * @param options The normalization options, ORed together (0 for no options).
 * @param result A pointer to a buffer to receive the result string.
 *               The result string is NUL-terminated if possible.
 * @param resultLength The maximum size of result.
@ -256,6 +237,27 @@ unorm_quickCheck(const UChar *source, int32_t sourcelength,
                 UNormalizationMode mode,
                 UErrorCode *status);

+/**
+ * Performing quick check on a string; same as unorm_quickCheck but
+ * takes an extra options parameter like most normalization functions.
+ *
+ * @param src        String that is to be tested if it is in a normalization format.
+ * @param srcLength  Length of source to test, or -1 if NUL-terminated.
+ * @paran mode       Which normalization form to test for.
+ * @param options    The normalization options, ORed together (0 for no options).
+ * @param pErrorCode ICU error code in/out parameter.
+ *                   Must fulfill U_SUCCESS before the function call.
+ * @return UNORM_YES, UNORM_NO or UNORM_MAYBE
+ *
+ * @see unorm_quickCheck
+ * @see unorm_isNormalized
+ * @draft ICU 2.6
+ */
+U_CAPI UNormalizationCheckResult U_EXPORT2
+unorm_quickCheckWithOptions(const UChar *src, int32_t srcLength, 
+                            UNormalizationMode mode, int32_t options,
+                            UErrorCode *pErrorCode);
+
 /**
 * Test if a string is in a given normalization form.
 * This is semantically equivalent to source.equals(normalize(source, mode)) .
@ -282,6 +284,28 @@ unorm_isNormalized(const UChar *src, int32_t srcLength,
                   UNormalizationMode mode,
                   UErrorCode *pErrorCode);

+/**
+ * Test if a string is in a given normalization form; same as unorm_isNormalized but
+ * takes an extra options parameter like most normalization functions.
+ *
+ * @param src        String that is to be tested if it is in a normalization format.
+ * @param srcLength  Length of source to test, or -1 if NUL-terminated.
+ * @paran mode       Which normalization form to test for.
+ * @param options    The normalization options, ORed together (0 for no options).
+ * @param pErrorCode ICU error code in/out parameter.
+ *                   Must fulfill U_SUCCESS before the function call.
+ * @return Boolean value indicating whether the source string is in the
+ *         "mode/options" normalization form.
+ *
+ * @see unorm_quickCheck
+ * @see unorm_isNormalized
+ * @draft ICU 2.6
+ */
+U_CAPI UBool U_EXPORT2
+unorm_isNormalizedWithOptions(const UChar *src, int32_t srcLength,
+                              UNormalizationMode mode, int32_t options,
+                              UErrorCode *pErrorCode);
+
 /**
 * Iterative normalization forward.
 * This function (together with unorm_previous) is somewhat
@ -339,7 +363,7 @@ unorm_isNormalized(const UChar *src, int32_t srcLength,
 * @param dest The output buffer; can be NULL if destCapacity==0 for pure preflighting.
 * @param destCapacity The number of UChars that fit into dest.
 * @param mode The normalization mode.
- * @param options A bit set of normalization options.
+ * @param options The normalization options, ORed together (0 for no options).
 * @param doNormalize Indicates if the source text up to the next boundary
 *                    is to be normalized (TRUE) or just copied (FALSE).
 * @param pNeededToNormalize Output flag indicating if the normalization resulted in
@ -353,7 +377,7 @@ unorm_isNormalized(const UChar *src, int32_t srcLength,
 * @see unorm_previous
 * @see unorm_normalize
 *
- * @draft ICU 2.1
+ * @stable ICU 2.1
 */
 U_CAPI int32_t U_EXPORT2
 unorm_next(UCharIterator *src,
@ -372,7 +396,7 @@ unorm_next(UCharIterator *src,
 * @param dest The output buffer; can be NULL if destCapacity==0 for pure preflighting.
 * @param destCapacity The number of UChars that fit into dest.
 * @param mode The normalization mode.
- * @param options A bit set of normalization options.
+ * @param options The normalization options, ORed together (0 for no options).
 * @param doNormalize Indicates if the source text up to the next boundary
 *                    is to be normalized (TRUE) or just copied (FALSE).
 * @param pNeededToNormalize Output flag indicating if the normalization resulted in
@ -386,7 +410,7 @@ unorm_next(UCharIterator *src,
 * @see unorm_next
 * @see unorm_normalize
 *
- * @draft ICU 2.1
+ * @stable ICU 2.1
 */
 U_CAPI int32_t U_EXPORT2
 unorm_previous(UCharIterator *src,
@ -399,11 +423,11 @@ unorm_previous(UCharIterator *src,
 * Concatenate normalized strings, making sure that the result is normalized as well.
 *
 * If both the left and the right strings are in
- * the normalization form according to "mode",
+ * the normalization form according to "mode/options",
 * then the result will be
 *
 * \code
- *     dest=normalize(left+right, mode)
+ *     dest=normalize(left+right, mode, options)
 * \endcode
 *
 * With the input strings already being normalized,
@ -421,7 +445,7 @@ unorm_previous(UCharIterator *src,
 * @param dest The output buffer; can be NULL if destCapacity==0 for pure preflighting.
 * @param destCapacity The number of UChars that fit into dest.
 * @param mode The normalization mode.
- * @param options A bit set of normalization options.
+ * @param options The normalization options, ORed together (0 for no options).
 * @param pErrorCode ICU error code in/out parameter.
 *                   Must fulfill U_SUCCESS before the function call.
 * @return Length of output (number of UChars) when successful or buffer overflow.
@ -430,7 +454,7 @@ unorm_previous(UCharIterator *src,
 * @see unorm_next
 * @see unorm_previous
 *
- * @draft ICU 2.1
+ * @stable ICU 2.1
 */
 U_CAPI int32_t U_EXPORT2
 unorm_concatenate(const UChar *left, int32_t leftLength,
@ -463,6 +487,23 @@ unorm_concatenate(const UChar *left, int32_t leftLength,
 #define U_COMPARE_CODE_POINT_ORDER  0x8000
 #endif

+/**
+ * Lowest-order bit number of unorm_compare() options bits corresponding to
+ * normalization options bits.
+ *
+ * The options parameter for unorm_compare() uses most bits for
+ * itself and for various comparison and folding flags.
+ * The most significant bits, however, are shifted down and passed on
+ * to the normalization implementation.
+ * (That is, from unorm_compare(..., options, ...),
+ * options>>UNORM_COMPARE_NORM_OPTIONS_SHIFT will be passed on to the
+ * internal normalization functions.)
+ *
+ * @see unorm_compare
+ * @draft ICU 2.6
+ */
+#define UNORM_COMPARE_NORM_OPTIONS_SHIFT 20
+
 /**
 * Compare two strings for canonical equivalence.
 * Further options include case-insensitive comparison and
@ -480,7 +521,7 @@ unorm_concatenate(const UChar *left, int32_t leftLength,
 * For FCD strings and short non-FCD strings there is no memory allocation.
 *
 * Semantically, this is equivalent to
- *   strcmp[CodePointOrder](NFD(foldCase(s1)), NFD(foldCase(s2)))
+ *   strcmp[CodePointOrder](NFD(foldCase(NFD(s1))), NFD(foldCase(NFD(s2))))
 * where code point order and foldCase are all optional.
 *
 * UAX 21 2.5 Caseless Matching specifies that for a canonical caseless match
@ -516,6 +557,8 @@ unorm_concatenate(const UChar *left, int32_t leftLength,
 *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
 *    (see u_strCaseCompare for details)
 *
+ *   - regular normalization options shifted left by UNORM_COMPARE_NORM_OPTIONS_SHIFT
+ *
 * @param pErrorCode ICU error code in/out parameter.
 *                   Must fulfill U_SUCCESS before the function call.
 * @return <0 or 0 or >0 as usual for string comparisons