ICU-2481 improve prototype and support backlevel Unicode 3.2 normalization

X-SVN-Rev: 11167
2025-04-10 07:39:16 +00:00 · 2003-02-26 01:45:06 +00:00 · 2003-02-26 01:45:06 +00:00 · 2b7ba6ffc3
commit 2b7ba6ffc3
parent e9d1b6386c
8 changed files with 17452 additions and 265 deletions
--- a/icu4c/source/common/normlzr.cpp
+++ b/icu4c/source/common/normlzr.cpp
@ -624,7 +624,7 @@ Normalizer::nextNormalize() {
    errorCode=U_ZERO_ERROR;
    p=buffer.getBuffer(-1);
    length=unorm_next(text, p, buffer.getCapacity(),
-                      fUMode, fOptions!=0,
+                      fUMode, fOptions,
                      TRUE, 0,
                      &errorCode);
    buffer.releaseBuffer(length);
@ -633,7 +633,7 @@ Normalizer::nextNormalize() {
        text->move(text, nextIndex, UITER_ZERO);
        p=buffer.getBuffer(length);
        length=unorm_next(text, p, buffer.getCapacity(),
-                          fUMode, fOptions!=0,
+                          fUMode, fOptions,
                          TRUE, 0,
                          &errorCode);
        buffer.releaseBuffer(length);
--- a/icu4c/source/common/unicode/normlzr.h
+++ b/icu4c/source/common/unicode/normlzr.h
@ -14,6 +14,7 @@
 #include "unicode/unistr.h"
 #include "unicode/chariter.h"
 #include "unicode/unorm.h"
+#include "unormimp.h" // ### TODO remove when prototyping is done!!

 struct UCharIterator;
 typedef struct UCharIterator UCharIterator; /**< C typedef for struct UCharIterator. @draft ICU 2.1 */
@ -275,6 +276,10 @@ public:
  static inline UNormalizationCheckResult
  quickCheck(const UnicodeString &source, UNormalizationMode mode, UErrorCode &status);

+  /** ### TODO @draft ICU 2.6 */
+  static inline UNormalizationCheckResult
+  quickCheck(const UnicodeString &source, UNormalizationMode mode, int32_t options, UErrorCode &status);
+
  /**
   * Test if a string is in a given normalization form.
   * This is semantically equivalent to source.equals(normalize(source, mode)) .
@ -298,6 +303,10 @@ public:
  static inline UBool
  isNormalized(const UnicodeString &src, UNormalizationMode mode, UErrorCode &errorCode);

+  /** ### TODO @draft ICU 2.6 */
+  static inline UBool
+  isNormalized(const UnicodeString &src, UNormalizationMode mode, int32_t options, UErrorCode &errorCode);
+
  /**
   * Concatenate normalized strings, making sure that the result is normalized as well.
   *
@ -1101,6 +1110,18 @@ Normalizer::quickCheck(const UnicodeString& source,
                            mode, &status);
 }

+inline UNormalizationCheckResult
+Normalizer::quickCheck(const UnicodeString& source,
+                       UNormalizationMode mode, int32_t options,
+                       UErrorCode &status) {
+    if(U_FAILURE(status)) {
+        return UNORM_MAYBE;
+    }
+
+    return unorm_quickCheckWithOptions(source.getBuffer(), source.length(),
+                                       mode, options, &status);
+}
+
 inline UBool
 Normalizer::isNormalized(const UnicodeString& source,
                         UNormalizationMode mode, 
@ -1113,6 +1134,18 @@ Normalizer::isNormalized(const UnicodeString& source,
                              mode, &status);
 }

+inline UBool
+Normalizer::isNormalized(const UnicodeString& source,
+                         UNormalizationMode mode, int32_t options,
+                         UErrorCode &status) {
+    if(U_FAILURE(status)) {
+        return FALSE;
+    }
+
+    return unorm_isNormalizedWithOptions(source.getBuffer(), source.length(),
+                                         mode, options, &status);
+}
+
 inline int32_t
 Normalizer::compare(const UnicodeString &s1, const UnicodeString &s2,
                    uint32_t options,
--- a/icu4c/source/common/unorm.cpp
+++ b/icu4c/source/common/unorm.cpp
@ -50,6 +50,16 @@
 * which means that these functions will be over-pessimistic when
 * exclusions are applied.
 * This may not matter because subsequent checks and normalizations do apply the exclusions.
+ *
+ * 2003feb25: Added support for Unicode 3.2 normalization, for IDNA.
+ * This excludes all post-Unicode 3.2 code points.
+ *
+ * Normalization exclusions have the following effect on excluded code points c:
+ * - c is not decomposed
+ * - c is not a composition target
+ * - c does not combine forward or backward for composition
+ *   except that this is not implemented for Jamo
+ * - c is treated as having a combining class of 0
 */
 #define LENGTHOF(array) (sizeof(array)/sizeof((array)[0]))

@ -65,6 +75,20 @@ enum {
    _STACK_BUFFER_CAPACITY=100
 };

+/* ### TODO prototype
+ * Constants for the bit fields in the options bit set parameter.
+ * These need not be public.
+ * A user only needs to know the currently assigned values.
+ * The number and positions of reserved bits per field can remain private.
+ */
+enum {
+    _NORM_OPTIONS_NX_MASK=0x1f,
+    _NORM_OPTIONS_UNICODE_MASK=0xe0,
+    _NORM_OPTIONS_SETS_MASK=0xff,
+
+    _NORM_OPTIONS_UNICODE_SHIFT=5
+};
+
 /* Korean Hangul and Jamo constants */
 enum {
    JAMO_L_BASE=0x1100,     /* "lead" jamo */
@ -138,7 +162,7 @@ static const UChar *
 _composePart(UChar *stackBuffer, UChar *&buffer, int32_t &bufferCapacity, int32_t &length,
             const UChar *prevStarter, const UChar *src,
             uint32_t qcMask, uint8_t &prevCC,
-             const UnicodeSet *dx,
+             const UnicodeSet *nx,
             UErrorCode *pErrorCode);

 /* load unorm.dat ----------------------------------------------------------- */
@ -167,7 +191,7 @@ static UBool formatVersion_2_1=FALSE, formatVersion_2_2=FALSE;
 static UVersionInfo dataVersion={ 3, 1, 0, 0 };

 /* ### TODO: prototype ### cache UnicodeSets for each combination of exclusion flags */
-static UnicodeSet *dxCache[UNORM_DX_MASK+1]={ NULL };
+static UnicodeSet *nxCache[_NORM_OPTIONS_SETS_MASK+1]={ NULL };

 U_CDECL_BEGIN

@ -182,9 +206,10 @@ unorm_cleanup() {
    dataErrorCode=U_ZERO_ERROR;
    haveNormData=0;

-    for(i=0; i<LENGTHOF(dxCache); ++i) {
-        delete dxCache[i];
+    for(i=0; i<LENGTHOF(nxCache); ++i) {
+        delete nxCache[i];
    }
+    uprv_memset(nxCache, 0, sizeof(nxCache));

    return TRUE;
 }
@ -398,24 +423,23 @@ _getExtraData(uint32_t norm32) {
    return extraData+(norm32>>_NORM_EXTRA_SHIFT);
 }

-/* decomposition exclusion sets --------------------------------------------- */
+/* normalization exclusion sets --------------------------------------------- */

 /*
- * Decomposition exclusion UnicodeSets are used for tailored normalization,
+ * Normalization exclusion UnicodeSets are used for tailored normalization,
 * Unicode public review issue number 7. (http://www.unicode.org/review/)
 *
 * By specifying one or several sets of code points,
- * those do not get decomposed in normalization, even though Unicode might
- * otherwise define a decomposition for them.
+ * those code points become inert for normalization.
 *
 * ### TODO: This is a prototype. Assess if it should become a permanent part of ICU.
 */

 static const UnicodeSet *
-internalGetDXHangul(UErrorCode &errorCode) {
+internalGetNXHangul(UErrorCode &errorCode) {
    /* internal function, does not check for incoming U_FAILURE */

-    if(dxCache[UNORM_DX_HANGUL]==NULL) {
+    if(nxCache[UNORM_NX_HANGUL]==NULL) {
        UnicodeSet *set=new UnicodeSet(0xac00, 0xd7a3);
        if(set==NULL) {
            errorCode=U_MEMORY_ALLOCATION_ERROR;
@ -423,8 +447,8 @@ internalGetDXHangul(UErrorCode &errorCode) {
        }

        umtx_lock(NULL);
-        if(dxCache[UNORM_DX_HANGUL]==NULL) {
-            dxCache[UNORM_DX_HANGUL]=set;
+        if(nxCache[UNORM_NX_HANGUL]==NULL) {
+            nxCache[UNORM_NX_HANGUL]=set;
            set=NULL;
        }
        umtx_unlock(NULL);
@ -432,14 +456,14 @@ internalGetDXHangul(UErrorCode &errorCode) {
        delete set;
    }

-    return dxCache[UNORM_DX_HANGUL];
+    return nxCache[UNORM_NX_HANGUL];
 }

 static const UnicodeSet *
-internalGetDXCJKCompat(UErrorCode &errorCode) {
+internalGetNXCJKCompat(UErrorCode &errorCode) {
    /* internal function, does not check for incoming U_FAILURE */

-    if(dxCache[UNORM_DX_CJK_COMPAT]==NULL) {
+    if(nxCache[UNORM_NX_CJK_COMPAT]==NULL) {
        /* build a set from [CJK Ideographs]&[has canonical decomposition] */
        UnicodeSet *set, *hasDecomp;

@ -481,8 +505,8 @@ internalGetDXCJKCompat(UErrorCode &errorCode) {
        /* hasDecomp now contains all ideographs that decompose canonically */

        umtx_lock(NULL);
-        if(dxCache[UNORM_DX_CJK_COMPAT]==NULL) {
-            dxCache[UNORM_DX_CJK_COMPAT]=hasDecomp;
+        if(nxCache[UNORM_NX_CJK_COMPAT]==NULL) {
+            nxCache[UNORM_NX_CJK_COMPAT]=hasDecomp;
            hasDecomp=NULL;
        }
        umtx_unlock(NULL);
@ -491,14 +515,14 @@ internalGetDXCJKCompat(UErrorCode &errorCode) {
        delete set;
    }

-    return dxCache[UNORM_DX_CJK_COMPAT];
+    return nxCache[UNORM_NX_CJK_COMPAT];
 }

 static const UnicodeSet *
-internalGetDXAUmlaut(UErrorCode &errorCode) {
+internalGetNXAUmlaut(UErrorCode &errorCode) {
    /* internal function, does not check for incoming U_FAILURE */

-    if(dxCache[UNORM_DX_A_UMLAUT]==NULL) {
+    if(nxCache[UNORM_NX_A_UMLAUT]==NULL) {
        UnicodeSet *set=new UnicodeSet(0xe4, 0xe4);
        if(set==NULL) {
            errorCode=U_MEMORY_ALLOCATION_ERROR;
@ -506,8 +530,8 @@ internalGetDXAUmlaut(UErrorCode &errorCode) {
        }

        umtx_lock(NULL);
-        if(dxCache[UNORM_DX_A_UMLAUT]==NULL) {
-            dxCache[UNORM_DX_A_UMLAUT]=set;
+        if(nxCache[UNORM_NX_A_UMLAUT]==NULL) {
+            nxCache[UNORM_NX_A_UMLAUT]=set;
            set=NULL;
        }
        umtx_unlock(NULL);
@ -515,22 +539,70 @@ internalGetDXAUmlaut(UErrorCode &errorCode) {
        delete set;
    }

-    return dxCache[UNORM_DX_A_UMLAUT];
+    return nxCache[UNORM_NX_A_UMLAUT];
+}
+
+static const UnicodeSet *
+internalGetNXUnicode(uint32_t options, UErrorCode &errorCode) {
+    /* internal function, does not check for incoming U_FAILURE */
+    options&=_NORM_OPTIONS_UNICODE_MASK;
+    if(options==0) {
+        return NULL;
+    }
+
+    if(nxCache[options]==NULL) {
+        /* build a set with all code points that were not designated by the specified Unicode version */
+        UnicodeSet *set;
+
+        switch(options) {
+        case UNORM_UNICODE_3_2:
+            set=new UnicodeSet(UNICODE_STRING("[:^Age=3.2:]", 12), errorCode);
+            break;
+        default:
+            errorCode=U_ILLEGAL_ARGUMENT_ERROR;
+            return NULL;
+        }
+
+        if(set==NULL) {
+            errorCode=U_MEMORY_ALLOCATION_ERROR;
+            return NULL;
+        }
+        if(U_FAILURE(errorCode)) {
+            delete set;
+            return NULL;
+        }
+
+        umtx_lock(NULL);
+        if(nxCache[options]==NULL) {
+            nxCache[options]=set;
+            set=NULL;
+        }
+        umtx_unlock(NULL);
+
+        delete set;
+    }
+
+    return nxCache[options];
 }

 /* Get a decomposition exclusion set. The data must be loaded. */
 static const UnicodeSet *
-internalGetDX(int32_t options, UErrorCode &errorCode) {
-    if(dxCache[options]==NULL) {
+internalGetNX(int32_t options, UErrorCode &errorCode) {
+    options&=_NORM_OPTIONS_SETS_MASK;
+
+    if(nxCache[options]==NULL) {
        /* return basic sets */
-        if(options==UNORM_DX_HANGUL) {
-            return internalGetDXHangul(errorCode);
+        if(options==UNORM_NX_HANGUL) {
+            return internalGetNXHangul(errorCode);
        }
-        if(options==UNORM_DX_CJK_COMPAT) {
-            return internalGetDXCJKCompat(errorCode);
+        if(options==UNORM_NX_CJK_COMPAT) {
+            return internalGetNXCJKCompat(errorCode);
        }
-        if(options==UNORM_DX_A_UMLAUT) {
-            return internalGetDXAUmlaut(errorCode);
+        if(options==UNORM_NX_A_UMLAUT) {
+            return internalGetNXAUmlaut(errorCode);
+        }
+        if((options&_NORM_OPTIONS_UNICODE_MASK)!=0 && (options&_NORM_OPTIONS_NX_MASK)==0) {
+            return internalGetNXUnicode(options, errorCode);
        }

        /* build a set from multiple subsets */
@ -543,13 +615,16 @@ internalGetDX(int32_t options, UErrorCode &errorCode) {
            return NULL;
        }

-        if((options&UNORM_DX_HANGUL)!=0 && NULL!=(other=internalGetDXHangul(errorCode))) {
+        if((options&UNORM_NX_HANGUL)!=0 && NULL!=(other=internalGetNXHangul(errorCode))) {
            set->addAll(*other);
        }
-        if((options&UNORM_DX_CJK_COMPAT)!=0 && NULL!=(other=internalGetDXCJKCompat(errorCode))) {
+        if((options&UNORM_NX_CJK_COMPAT)!=0 && NULL!=(other=internalGetNXCJKCompat(errorCode))) {
            set->addAll(*other);
        }
-        if((options&UNORM_DX_A_UMLAUT)!=0 && NULL!=(other=internalGetDXAUmlaut(errorCode))) {
+        if((options&UNORM_NX_A_UMLAUT)!=0 && NULL!=(other=internalGetNXAUmlaut(errorCode))) {
+            set->addAll(*other);
+        }
+        if((options&_NORM_OPTIONS_UNICODE_MASK)!=0 && NULL!=(other=internalGetNXUnicode(options, errorCode))) {
            set->addAll(*other);
        }

@ -559,8 +634,8 @@ internalGetDX(int32_t options, UErrorCode &errorCode) {
        }

        umtx_lock(NULL);
-        if(dxCache[options]==NULL) {
-            dxCache[options]=set;
+        if(nxCache[options]==NULL) {
+            nxCache[options]=set;
            set=NULL;
        }
        umtx_unlock(NULL);
@ -568,27 +643,27 @@ internalGetDX(int32_t options, UErrorCode &errorCode) {
        delete set;
    }

-    return dxCache[options];
+    return nxCache[options];
 }

 static inline const UnicodeSet *
-getDX(int32_t options, UErrorCode &errorCode) {
-    if(U_FAILURE(errorCode) || (options&=UNORM_DX_MASK)==0) {
+getNX(int32_t options, UErrorCode &errorCode) {
+    if(U_FAILURE(errorCode) || (options&=_NORM_OPTIONS_SETS_MASK)==0) {
        /* incoming failure, or no decomposition exclusions requested */
        return NULL;
    } else {
-        return internalGetDX(options, errorCode);
+        return internalGetNX(options, errorCode);
    }
 }

 static inline UBool
-dx_contains(const UnicodeSet *dx, UChar32 c) {
-    return dx!=NULL && dx->contains(c);
+nx_contains(const UnicodeSet *nx, UChar32 c) {
+    return nx!=NULL && nx->contains(c);
 }

 static inline UBool
-dx_contains(const UnicodeSet *dx, UChar c, UChar c2) {
-    return dx!=NULL && dx->contains(c2==0 ? c : U16_GET_SUPPLEMENTARY(c, c2));
+nx_contains(const UnicodeSet *nx, UChar c, UChar c2) {
+    return nx!=NULL && nx->contains(c2==0 ? c : U16_GET_SUPPLEMENTARY(c, c2));
 }

 /* other normalization primitives ------------------------------------------- */
@ -1232,7 +1307,7 @@ _mergeOrdered(UChar *start, UChar *current,
 /* quick check functions ---------------------------------------------------- */

 static UBool
-unorm_checkFCD(const UChar *src, int32_t srcLength, const UnicodeSet *dx) {
+unorm_checkFCD(const UChar *src, int32_t srcLength, const UnicodeSet *nx) {
    const UChar *limit;
    UChar c, c2;
    uint16_t fcd16;
@ -1302,20 +1377,9 @@ unorm_checkFCD(const UChar *src, int32_t srcLength, const UnicodeSet *dx) {
            c2=0;
        }

-        /*
-         * If (c, c2) is excluded, then replace the code point's FCD data
-         * with the regular UCD cc because it does not decompose.
-         */
-        if(dx!=NULL) {
-            UChar32 cp;
-
-            cp= c2==0 ? c : U16_GET_SUPPLEMENTARY(c, c2);
-            if(dx->contains(cp)) {
-                uint32_t norm32;
-                UTRIE_GET32(&normTrie, cp, norm32);
-                /* This depends on knowing that _NORM_CC_MASK==0xff00 */
-                fcd16=(uint16_t)(norm32&0xff00)|(((uint16_t)norm32)>>8);
-            }
+        if(nx_contains(nx, c, c2)) {
+            prevCC=0; /* excluded: fcd16==0 */
+            continue;
        }

        /*
@ -1331,10 +1395,10 @@ unorm_checkFCD(const UChar *src, int32_t srcLength, const UnicodeSet *dx) {
        if(cc!=0) {
            if(prevCC<0) {
                /* the previous character was <_NORM_MIN_WITH_LEAD_CC, we need to get its trail cc */
-                if(!dx_contains(dx, (UChar32)-prevCC)) {
+                if(!nx_contains(nx, (UChar32)-prevCC)) {
                    prevCC=(int16_t)(_getFCD16((UChar)-prevCC)&0xff);
                } else {
-                    prevCC=0; /* excluded; UCD cc's of code points <U+0300 are all 0 */
+                    prevCC=0; /* excluded: fcd16==0 */
                }
            }

@ -1351,7 +1415,7 @@ _quickCheck(const UChar *src,
            int32_t srcLength,
            UNormalizationMode mode,
            UBool allowMaybe,
-            const UnicodeSet *dx,
+            const UnicodeSet *nx,
            UErrorCode *pErrorCode) {
    UChar stackBuffer[_STACK_BUFFER_CAPACITY];
    UChar *buffer;
@ -1396,7 +1460,7 @@ _quickCheck(const UChar *src,
        qcMask=_NORM_QC_NFKD;
        break;
    case UNORM_FCD:
-        return unorm_checkFCD(src, srcLength, dx) ? UNORM_YES : UNORM_NO;
+        return unorm_checkFCD(src, srcLength, nx) ? UNORM_YES : UNORM_NO;
    default:
        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
        return UNORM_MAYBE;
@ -1460,6 +1524,11 @@ _quickCheck(const UChar *src,
            c2=0;
        }

+        if(nx_contains(nx, c, c2)) {
+            /* excluded: norm32==0 */
+            norm32=0;
+        }
+
        /* check the combining order */
        cc=(uint8_t)(norm32>>_NORM_CC_SHIFT);
        if(cc!=0 && cc<prevCC) {
@ -1469,11 +1538,6 @@ _quickCheck(const UChar *src,
        prevCC=cc;

        /* check for "no" or "maybe" quick check flags */
-        if(dx_contains(dx, c, c2)) {
-            /* excluded: treat like "yes" */
-            continue;
-        }
-
        qcNorm32=norm32&qcMask;
        if(qcNorm32&_NORM_QC_ANY_NO) {
            result=UNORM_NO;
@ -1506,7 +1570,7 @@ _quickCheck(const UChar *src,
                             prevStarter,
                             src,
                             qcMask,
-                             prevCC, dx, pErrorCode);
+                             prevCC, nx, pErrorCode);
                if(U_FAILURE(*pErrorCode)) {
                    result=UNORM_MAYBE; /* error (out of memory) */
                    break;
@ -1540,10 +1604,10 @@ unorm_quickCheck(const UChar *src,
 }

 U_CAPI UNormalizationCheckResult U_EXPORT2
-unorm_quickCheckTailored(const UChar *src, int32_t srcLength, 
-                         UNormalizationMode mode, int32_t options,
-                         UErrorCode *pErrorCode) {
-    return _quickCheck(src, srcLength, mode, TRUE, getDX(options, *pErrorCode), pErrorCode);
+unorm_quickCheckWithOptions(const UChar *src, int32_t srcLength, 
+                            UNormalizationMode mode, int32_t options,
+                            UErrorCode *pErrorCode) {
+    return _quickCheck(src, srcLength, mode, TRUE, getNX(options, *pErrorCode), pErrorCode);
 }

 U_CAPI UBool U_EXPORT2
@ -1553,6 +1617,13 @@ unorm_isNormalized(const UChar *src, int32_t srcLength,
    return (UBool)(UNORM_YES==_quickCheck(src, srcLength, mode, FALSE, NULL, pErrorCode));
 }

+U_CAPI UBool U_EXPORT2
+unorm_isNormalizedWithOptions(const UChar *src, int32_t srcLength,
+                              UNormalizationMode mode, int32_t options,
+                              UErrorCode *pErrorCode) {
+    return (UBool)(UNORM_YES==_quickCheck(src, srcLength, mode, FALSE, getNX(options, *pErrorCode), pErrorCode));
+}
+
 /* make NFD & NFKD ---------------------------------------------------------- */

 U_CAPI int32_t U_EXPORT2
@ -1644,7 +1715,7 @@ unorm_getDecomposition(UChar32 c, UBool compat,
 static int32_t
 _decompose(UChar *dest, int32_t destCapacity,
           const UChar *src, int32_t srcLength,
-           UBool compat, const UnicodeSet *dx,
+           UBool compat, const UnicodeSet *nx,
           uint8_t &outTrailCC) {
    UChar buffer[3];
    const UChar *limit, *prevSrc, *p;
@ -1728,7 +1799,7 @@ _decompose(UChar *dest, int32_t destCapacity,
         * otherwise, p[length] is merged in with _mergeOrdered()
         */
        if(isNorm32HangulOrJamo(norm32)) {
-            if(dx_contains(dx, c)) {
+            if(nx_contains(nx, c)) {
                c2=0;
                p=NULL;
                length=1;
@ -1769,7 +1840,11 @@ _decompose(UChar *dest, int32_t destCapacity,
            }

            /* get the decomposition and the lead and trail cc's */
-            if((norm32&qcMask)==0 || dx_contains(dx, c, c2)) {
+            if(nx_contains(nx, c, c2)) {
+                /* excluded: norm32==0 */
+                cc=trailCC=0;
+                p=NULL;
+            } else if((norm32&qcMask)==0) {
                /* c does not decompose */
                cc=trailCC=(uint8_t)(norm32>>_NORM_CC_SHIFT);
                p=NULL;
@ -1835,7 +1910,7 @@ unorm_decompose(UChar *dest, int32_t destCapacity,
                const UChar *src, int32_t srcLength,
                UBool compat, int32_t options,
                UErrorCode *pErrorCode) {
-    const UnicodeSet *dx;
+    const UnicodeSet *nx;
    int32_t destIndex;
    uint8_t trailCC;

@ -1843,14 +1918,14 @@ unorm_decompose(UChar *dest, int32_t destCapacity,
        return 0;
    }

-    dx=getDX(options, *pErrorCode);
+    nx=getNX(options, *pErrorCode);
    if(U_FAILURE(*pErrorCode)) {
        return 0;
    }

    destIndex=_decompose(dest, destCapacity,
                         src, srcLength,
-                         compat, dx,
+                         compat, nx,
                         trailCC);

    return u_terminateUChars(dest, destCapacity, destIndex, pErrorCode);
@ -1913,7 +1988,7 @@ _findSafeFCD(const UChar *src, const UChar *limit, uint16_t fcd16) {
 static uint8_t
 _decomposeFCD(const UChar *src, const UChar *decompLimit,
              UChar *dest, int32_t &destIndex, int32_t destCapacity,
-              const UnicodeSet *dx) {
+              const UnicodeSet *nx) {
    const UChar *p;
    uint32_t norm32;
    int32_t reorderStartIndex, length;
@ -1959,7 +2034,11 @@ _decomposeFCD(const UChar *src, const UChar *decompLimit,
        }

        /* get the decomposition and the lead and trail cc's */
-        if((norm32&_NORM_QC_NFD)==0 || dx_contains(dx, c, c2)) {
+        if(nx_contains(nx, c, c2)) {
+            /* excluded: norm32==0 */
+            cc=trailCC=0;
+            p=NULL;
+        } else if((norm32&_NORM_QC_NFD)==0) {
            /* c does not decompose */
            cc=trailCC=(uint8_t)(norm32>>_NORM_CC_SHIFT);
            p=NULL;
@ -2021,7 +2100,7 @@ _decomposeFCD(const UChar *src, const UChar *decompLimit,
 static int32_t
 unorm_makeFCD(UChar *dest, int32_t destCapacity,
              const UChar *src, int32_t srcLength,
-              const UnicodeSet *dx,
+              const UnicodeSet *nx,
              UErrorCode *pErrorCode) {
    const UChar *limit, *prevSrc, *decompStart;
    int32_t destIndex, length;
@ -2105,10 +2184,10 @@ unorm_makeFCD(UChar *dest, int32_t destCapacity,
            /* prevCC<0 is only possible from the above loop, i.e., only if prevSrc<src */
            if(prevCC<0) {
                /* the previous character was <_NORM_MIN_WITH_LEAD_CC, we need to get its trail cc */
-                if(!dx_contains(dx, (UChar32)-prevCC)) {
+                if(!nx_contains(nx, (UChar32)-prevCC)) {
                    prevCC=(int16_t)(_getFCD16((UChar)-prevCC)&0xff);
                } else {
-                    prevCC=0; /* excluded; UCD cc's of code points <U+0300 are all 0 */
+                    prevCC=0; /* excluded: fcd16==0 */
                }

                /*
@ -2152,21 +2231,8 @@ unorm_makeFCD(UChar *dest, int32_t destCapacity,
        }

        /* we are looking at the character (c, c2) at [prevSrc..src[ */
-
-        /*
-         * If (c, c2) is excluded, then replace the code point's FCD data
-         * with the regular UCD cc because it does not decompose.
-         */
-        if(dx!=NULL) {
-            UChar32 cp;
-
-            cp= c2==0 ? c : U16_GET_SUPPLEMENTARY(c, c2);
-            if(dx->contains(cp)) {
-                uint32_t norm32;
-                UTRIE_GET32(&normTrie, cp, norm32);
-                /* This depends on knowing that _NORM_CC_MASK==0xff00 */
-                fcd16=(uint16_t)(norm32&0xff00)|(((uint16_t)norm32)>>8);
-            }
+        if(nx_contains(nx, c, c2)) {
+            fcd16=0; /* excluded: fcd16==0 */
        }

        /* check the combining order, get the lead cc */
@ -2208,7 +2274,7 @@ unorm_makeFCD(UChar *dest, int32_t destCapacity,
             */
            prevCC=_decomposeFCD(decompStart, src,
                                 dest, destIndex, destCapacity,
-                                 dx);
+                                 nx);
            decompStart=src;
        }
    }
@ -2223,30 +2289,27 @@ static inline uint32_t
 _getNextCombining(UChar *&p, const UChar *limit,
                  UChar &c, UChar &c2,
                  uint16_t &combiningIndex, uint8_t &cc,
-                  const UnicodeSet *dx) {
+                  const UnicodeSet *nx) {
    uint32_t norm32, combineFlags;

+    /* get properties */
    c=*p++;
    norm32=_getNorm32(c);
+
+    /* preset output values for most characters */
+    c2=0;
+    combiningIndex=0;
+    cc=0;
+
    if((norm32&(_NORM_CC_MASK|_NORM_COMBINES_ANY))==0) {
-        c2=0;
-        combiningIndex=0;
-        cc=0;
        return 0;
    } else {
        if(isNorm32Regular(norm32)) {
-            c2=0;
+            /* set cc etc. below */
        } else if(isNorm32HangulOrJamo(norm32)) {
            /* a compatibility decomposition contained Jamos */
-            c2=0;
-            cc=0;
-            // ### only if excluding composition parts -- if(!dx_contains(dx, c)) {
-                combiningIndex=(uint16_t)(0xfff0|(norm32>>_NORM_EXTRA_SHIFT));
-                return norm32&_NORM_COMBINES_ANY;
-            // ### only if excluding composition parts -- } else {
-            // ### only if excluding composition parts --     combiningIndex=0;
-            // ### only if excluding composition parts --     return 0;
-            // ### only if excluding composition parts -- }
+            combiningIndex=(uint16_t)(0xfff0|(norm32>>_NORM_EXTRA_SHIFT));
+            return norm32&_NORM_COMBINES_ANY;
        } else {
            /* c is a lead surrogate, get the real norm32 */
            if(p!=limit && UTF_IS_SECOND_SURROGATE(c2=*p)) {
@ -2254,25 +2317,21 @@ _getNextCombining(UChar *&p, const UChar *limit,
                norm32=_getNorm32FromSurrogatePair(norm32, c2);
            } else {
                c2=0;
-                combiningIndex=0;
-                cc=0;
                return 0;
            }
        }

+        if(nx_contains(nx, c, c2)) {
+            return 0; /* excluded: norm32==0 */
+        }
+
        cc=(uint8_t)(norm32>>_NORM_CC_SHIFT);

-        // ### only if excluding composition parts -- if(!dx_contains(dx, c, c2)) {
-            combineFlags=norm32&_NORM_COMBINES_ANY;
-            if(combineFlags!=0) {
-                combiningIndex=*(_getExtraData(norm32)-1);
-            }
-
-            return combineFlags;
-        // ### only if excluding composition parts -- } else {
-        // ### only if excluding composition parts --     combiningIndex=0;
-        // ### only if excluding composition parts --     return 0;
-        // ### only if excluding composition parts -- }
+        combineFlags=norm32&_NORM_COMBINES_ANY;
+        if(combineFlags!=0) {
+            combiningIndex=*(_getExtraData(norm32)-1);
+        }
+        return combineFlags;
    }
 }

@ -2372,7 +2431,7 @@ _combine(const uint16_t *table, uint16_t combineBackIndex,
 * while the combining mark that is removed has at least one code unit
 */
 static uint8_t
-_recompose(UChar *p, UChar *&limit, const UnicodeSet *dx) {
+_recompose(UChar *p, UChar *&limit, const UnicodeSet *nx) {
    UChar *starter, *pRemove, *q, *r;
    uint32_t combineFlags;
    UChar c, c2;
@ -2389,7 +2448,7 @@ _recompose(UChar *p, UChar *&limit, const UnicodeSet *dx) {
    prevCC=0;

    for(;;) {
-        combineFlags=_getNextCombining(p, limit, c, c2, combineBackIndex, cc, dx);
+        combineFlags=_getNextCombining(p, limit, c, c2, combineBackIndex, cc, nx);
        if((combineFlags&_NORM_COMBINES_BACK) && starter!=NULL) {
            if(combineBackIndex&0x8000) {
                /* c is a Jamo V/T, see if we can compose it with the previous character */
@ -2401,13 +2460,11 @@ _recompose(UChar *p, UChar *&limit, const UnicodeSet *dx) {
                    if(c2<JAMO_L_COUNT) {
                        pRemove=p-1;
                        c=(UChar)(HANGUL_BASE+(c2*JAMO_V_COUNT+(c-JAMO_V_BASE))*JAMO_T_COUNT);
-                        // ### only if excluding composition parts -- forbid intermediate (LV) if excluded
-                        // ### when not excluding parts, we catch the final syllable below
-                        if(p!=limit && (c2=(UChar)(*p-JAMO_T_BASE))<JAMO_T_COUNT /* ### only if excluding composition parts -- && !dx_contains(dx, c2)*/) {
+                        if(p!=limit && (c2=(UChar)(*p-JAMO_T_BASE))<JAMO_T_COUNT) {
                            ++p;
                            c+=c2;
                        }
-                        if(!dx_contains(dx, c)) {
+                        if(!nx_contains(nx, c)) {
                            *starter=c;
                        } else {
                            /* excluded */
@ -2463,7 +2520,7 @@ _recompose(UChar *p, UChar *&limit, const UnicodeSet *dx) {
                /* the starter and the combining mark (c, c2) do combine and */
                0!=(result=_combine(combiningTable+combineFwdIndex, combineBackIndex, value, value2)) &&
                /* the composition result is not excluded */
-                !dx_contains(dx, value, value2)
+                !nx_contains(nx, value, value2)
            ) {
                /* replace the starter with the composition, remove the combining mark */
                pRemove= c2==0 ? p-1 : p-2; /* pointer to the combining mark */
@ -2633,7 +2690,7 @@ static const UChar *
 _composePart(UChar *stackBuffer, UChar *&buffer, int32_t &bufferCapacity, int32_t &length,
             const UChar *prevStarter, const UChar *src,
             uint32_t qcMask, uint8_t &prevCC,
-             const UnicodeSet *dx,
+             const UnicodeSet *nx,
             UErrorCode *pErrorCode) {
    UChar *recomposeLimit;
    uint8_t trailCC;
@ -2644,7 +2701,7 @@ _composePart(UChar *stackBuffer, UChar *&buffer, int32_t &bufferCapacity, int32_
    /* decompose [prevStarter..src[ */
    length=_decompose(buffer, bufferCapacity,
                      prevStarter, src-prevStarter,
-                      compat, dx,
+                      compat, nx,
                      trailCC);
    if(length>bufferCapacity) {
        if(!u_growBufferFromStatic(stackBuffer, &buffer, &bufferCapacity, 2*length, 0)) {
@ -2653,14 +2710,14 @@ _composePart(UChar *stackBuffer, UChar *&buffer, int32_t &bufferCapacity, int32_
        }
        length=_decompose(buffer, bufferCapacity,
                          prevStarter, src-prevStarter,
-                          compat, dx,
+                          compat, nx,
                          trailCC);
    }

    /* recompose the decomposition */
    recomposeLimit=buffer+length;
    if(length>=2) {
-        prevCC=_recompose(buffer, recomposeLimit, dx);
+        prevCC=_recompose(buffer, recomposeLimit, nx);
    }

    /* return with a pointer to the recomposition and its length */
@ -2670,26 +2727,19 @@ _composePart(UChar *stackBuffer, UChar *&buffer, int32_t &bufferCapacity, int32_

 static inline UBool
 _composeHangul(UChar prev, UChar c, uint32_t norm32, const UChar *&src, const UChar *limit,
-               UBool compat, UChar *dest, const UnicodeSet *dx) {
-    /* ### only if excluding composition parts -- if(dx!=NULL && (dx->contains(prev) || dx->contains(c))) {
-        return FALSE;
-    }*/
+               UBool compat, UChar *dest, const UnicodeSet *nx) {
    if(isJamoVTNorm32JamoV(norm32)) {
        /* c is a Jamo V, compose with previous Jamo L and following Jamo T */
        prev=(UChar)(prev-JAMO_L_BASE);
        if(prev<JAMO_L_COUNT) {
            c=(UChar)(HANGUL_BASE+(prev*JAMO_V_COUNT+(c-JAMO_V_BASE))*JAMO_T_COUNT);
-            // ### only if excluding composition parts -- forbid intermediate (LV) if excluded
-            // ### when not excluding parts, we catch the final syllable below

            /* check if the next character is a Jamo T (normal or compatibility) */
            if(src!=limit) {
                UChar next, t;

                next=*src;
-                // ### only if excluding composition parts -- if(dx_contains(dx, next)) {
-                    /* excluded */
-                /* ### only if excluding composition parts -- } else*/ if((t=(UChar)(next-JAMO_T_BASE))<JAMO_T_COUNT) {
+                if((t=(UChar)(next-JAMO_T_BASE))<JAMO_T_COUNT) {
                    /* normal Jamo T */
                    ++src;
                    c+=t;
@ -2710,7 +2760,7 @@ _composeHangul(UChar prev, UChar c, uint32_t norm32, const UChar *&src, const UC
                    }
                }
            }
-            if(dx_contains(dx, c)) {
+            if(nx_contains(nx, c)) {
                if(!isHangulWithoutJamoT(c)) {
                    --src; /* undo ++src from reading the Jamo T */
                }
@ -2724,7 +2774,7 @@ _composeHangul(UChar prev, UChar c, uint32_t norm32, const UChar *&src, const UC
    } else if(isHangulWithoutJamoT(prev)) {
        /* c is a Jamo T, compose with previous Hangul LV that does not contain a Jamo T */
        c=(UChar)(prev+(c-JAMO_T_BASE));
-        if(dx_contains(dx, c)) {
+        if(nx_contains(nx, c)) {
            return FALSE;
        }
        if(dest!=0) {
@ -2738,7 +2788,7 @@ _composeHangul(UChar prev, UChar c, uint32_t norm32, const UChar *&src, const UC
 static int32_t
 _compose(UChar *dest, int32_t destCapacity,
         const UChar *src, int32_t srcLength,
-         UBool compat, const UnicodeSet *dx,
+         UBool compat, const UnicodeSet *nx,
         UErrorCode *pErrorCode) {
    UChar stackBuffer[_STACK_BUFFER_CAPACITY];
    UChar *buffer;
@ -2888,7 +2938,7 @@ _compose(UChar *dest, int32_t destCapacity,
                _composeHangul(
                    *(prevSrc-1), c, norm32, src, limit, compat,
                    destIndex<=destCapacity ? dest+(destIndex-1) : 0,
-                    dx)
+                    nx)
            ) {
                prevStarter=src;
                continue;
@ -2917,7 +2967,10 @@ _compose(UChar *dest, int32_t destCapacity,
            }

            /* we are looking at the character (c, c2) at [prevSrc..src[ */
-            if((norm32&qcMask)==0 || dx_contains(dx, c, c2)) {
+            if(nx_contains(nx, c, c2)) {
+                /* excluded: norm32==0 */
+                cc=0;
+            } else if((norm32&qcMask)==0) {
                cc=(uint8_t)(norm32>>_NORM_CC_SHIFT);
            } else {
                const UChar *p;
@ -2959,7 +3012,7 @@ _compose(UChar *dest, int32_t destCapacity,
                               prevStarter, src,
                               qcMask,
                               prevCC,          /* output */
-                               dx,
+                               nx,
                               pErrorCode);

                if(p==NULL) {
@ -3022,21 +3075,21 @@ unorm_compose(UChar *dest, int32_t destCapacity,
              const UChar *src, int32_t srcLength,
              UBool compat, int32_t options,
              UErrorCode *pErrorCode) {
-    const UnicodeSet *dx;
+    const UnicodeSet *nx;
    int32_t destIndex;

    if(!_haveData(*pErrorCode)) {
        return 0;
    }

-    dx=getDX(options, *pErrorCode);
+    nx=getNX(options, *pErrorCode);
    if(U_FAILURE(*pErrorCode)) {
        return 0;
    }

    destIndex=_compose(dest, destCapacity,
                       src, srcLength,
-                       compat, dx,
+                       compat, nx,
                       pErrorCode);

    return u_terminateUChars(dest, destCapacity, destIndex, pErrorCode);
@ -3054,7 +3107,7 @@ unorm_internalNormalize(UChar *dest, int32_t destCapacity,
                        const UChar *src, int32_t srcLength,
                        UNormalizationMode mode, int32_t options,
                        UErrorCode *pErrorCode) {
-    const UnicodeSet *dx;
+    const UnicodeSet *nx;

    switch(mode) {
    case UNORM_NFD:
@ -3078,13 +3131,13 @@ unorm_internalNormalize(UChar *dest, int32_t destCapacity,
                             TRUE, options,
                             pErrorCode);
    case UNORM_FCD:
-        dx=getDX(options, *pErrorCode);
+        nx=getNX(options, *pErrorCode);
        if(U_FAILURE(*pErrorCode)) {
            return 0;
        }
        return unorm_makeFCD(dest, destCapacity,
                             src, srcLength,
-                             dx,
+                             nx,
                             pErrorCode);
    case UNORM_NONE:
        /* just copy the string */
@ -4245,8 +4298,8 @@ unorm_compare(const UChar *s1, int32_t length1,
              uint32_t options,
              UErrorCode *pErrorCode) {
    UChar fcd1[300], fcd2[300];
-    UChar *f1, *f2, *d1, *d2;
-    const UnicodeSet *dx;
+    UChar *d1, *d2;
+    const UnicodeSet *nx;
    int32_t result;

    /* argument checking */
@ -4265,12 +4318,12 @@ unorm_compare(const UChar *s1, int32_t length1,
        return 0;
    }

-    dx=getDX((int32_t)(options>>UNORM_COMPARE_NORM_OPTIONS_SHIFT), *pErrorCode);
+    nx=getNX((int32_t)(options>>UNORM_COMPARE_NORM_OPTIONS_SHIFT), *pErrorCode);
    if(U_FAILURE(*pErrorCode)) {
        return 0;
    }

-    f1=f2=d1=d2=0;
+    d1=d2=0;
    options|=_COMPARE_EQUIV;
    result=0;

@ -4295,8 +4348,8 @@ unorm_compare(const UChar *s1, int32_t length1,
        UBool isFCD1, isFCD2;

        // check if s1 and/or s2 fulfill the FCD conditions
-        isFCD1=unorm_checkFCD(s1, length1, dx);
-        isFCD2=unorm_checkFCD(s2, length2, dx);
+        isFCD1=unorm_checkFCD(s1, length1, nx);
+        isFCD2=unorm_checkFCD(s2, length2, nx);

        if(!isFCD1 && !isFCD2) {
            // if both strings need normalization then make them NFD right away and
@ -4307,7 +4360,7 @@ unorm_compare(const UChar *s1, int32_t length1,

            _len1=_decompose(fcd1, sizeof(fcd1)/U_SIZEOF_UCHAR,
                             s1, length1,
-                             FALSE, dx,
+                             FALSE, nx,
                             trailCC);
            if(_len1<=(int32_t)(sizeof(fcd1)/U_SIZEOF_UCHAR)) {
                s1=fcd1;
@ -4320,7 +4373,7 @@ unorm_compare(const UChar *s1, int32_t length1,

                _len1=_decompose(d1, _len1,
                                 s1, length1,
-                                 FALSE, dx,
+                                 FALSE, nx,
                                 trailCC);

                s1=d1;
@ -4329,7 +4382,7 @@ unorm_compare(const UChar *s1, int32_t length1,

            _len2=_decompose(fcd2, sizeof(fcd2)/U_SIZEOF_UCHAR,
                             s2, length2,
-                             FALSE, dx,
+                             FALSE, nx,
                             trailCC);
            if(_len2<=(int32_t)(sizeof(fcd2)/U_SIZEOF_UCHAR)) {
                s2=fcd2;
@ -4342,7 +4395,7 @@ unorm_compare(const UChar *s1, int32_t length1,

                _len2=_decompose(d2, _len2,
                                 s2, length2,
-                                 FALSE, dx,
+                                 FALSE, nx,
                                 trailCC);

                s2=d2;
@ -4357,7 +4410,7 @@ unorm_compare(const UChar *s1, int32_t length1,
            if(!isFCD1) {
                _len1=unorm_makeFCD(fcd1, sizeof(fcd1)/U_SIZEOF_UCHAR,
                                    s1, length1,
-                                    dx,
+                                    nx,
                                    pErrorCode);
                if(*pErrorCode!=U_BUFFER_OVERFLOW_ERROR) {
                    s1=fcd1;
@ -4371,7 +4424,7 @@ unorm_compare(const UChar *s1, int32_t length1,
                    *pErrorCode=U_ZERO_ERROR;
                    _len1=unorm_makeFCD(d1, _len1,
                                        s1, length1,
-                                        dx,
+                                        nx,
                                        pErrorCode);
                    if(U_FAILURE(*pErrorCode)) {
                        goto cleanup;
@ -4385,7 +4438,7 @@ unorm_compare(const UChar *s1, int32_t length1,
            if(!isFCD2) {
                _len2=unorm_makeFCD(fcd2, sizeof(fcd2)/U_SIZEOF_UCHAR,
                                    s2, length2,
-                                    dx,
+                                    nx,
                                    pErrorCode);
                if(*pErrorCode!=U_BUFFER_OVERFLOW_ERROR) {
                    s2=fcd2;
@ -4399,7 +4452,7 @@ unorm_compare(const UChar *s1, int32_t length1,
                    *pErrorCode=U_ZERO_ERROR;
                    _len2=unorm_makeFCD(d2, _len2,
                                        s2, length2,
-                                        dx,
+                                        nx,
                                        pErrorCode);
                    if(U_FAILURE(*pErrorCode)) {
                        goto cleanup;
@ -4423,12 +4476,6 @@ unorm_compare(const UChar *s1, int32_t length1,
    }

 cleanup:
-    if(f1!=0) {
-        uprv_free(f1);
-    }
-    if(f2!=0) {
-        uprv_free(f2);
-    }
    if(d1!=0) {
        uprv_free(d1);
    }
--- a/icu4c/source/common/unormimp.h
+++ b/icu4c/source/common/unormimp.h
@ -147,18 +147,21 @@ enum {
    _NORM_DECOMP_LENGTH_MASK=0x7f
 };

-/* Constants for options flags for tailored normalization. ### TODO prototype, see unorm.cpp */
+/* Constants for options flags for normalization. ### TODO prototype, see unorm.cpp */
 enum {
    /** Options bit 0, do not decompose Hangul syllables. @draft ICU 2.6 */
-    UNORM_DX_HANGUL=1,
+    UNORM_NX_HANGUL=1,
    /** Options bit 1, do not decompose CJK compatibility characters. @draft ICU 2.6 */
-    UNORM_DX_CJK_COMPAT=2,
+    UNORM_NX_CJK_COMPAT=2,
    /** Options bit 2, do not decompose a-umlaut, only for testing. @internal */
-    UNORM_DX_A_UMLAUT=4,
-    /** This many of the least significant options bits are used to specify decomposition exclusions. @draft ICU 2.6 */
-    UNORM_DX_COUNT=4,
-    /** Options bit mask for decomposition exclusions. @draft ICU 2.6 */
-    UNORM_DX_MASK=(1<<UNORM_DX_COUNT)-1
+    UNORM_NX_A_UMLAUT=4,
+
+    /**
+     * Options bit set value to select Unicode 3.2 normalization (except NormalizationCorrections).
+     * At most one Unicode version can be selected at a time.
+     * @draft ICU 2.6
+     */
+    UNORM_UNICODE_3_2=0x20,
 };

 /**
@ -181,9 +184,15 @@ enum {
 * @draft ICU 2.6
 */
 U_CAPI UNormalizationCheckResult U_EXPORT2
-unorm_quickCheckTailored(const UChar *src, int32_t srcLength, 
-                         UNormalizationMode mode, int32_t options,
-                         UErrorCode *pErrorCode);
+unorm_quickCheckWithOptions(const UChar *src, int32_t srcLength, 
+                            UNormalizationMode mode, int32_t options,
+                            UErrorCode *pErrorCode);
+
+/** ### TODO @draft ICU 2.6 */
+U_CAPI UBool U_EXPORT2
+unorm_isNormalizedWithOptions(const UChar *src, int32_t srcLength,
+                              UNormalizationMode mode, int32_t options,
+                              UErrorCode *pErrorCode);

 /**
 * Is the normalizer data loaded?
--- a/icu4c/source/test/intltest/normconf.cpp
+++ b/icu4c/source/test/intltest/normconf.cpp
@ -9,6 +9,7 @@
 #include "unicode/uchar.h"
 #include "unicode/normlzr.h"
 #include "unicode/uniset.h"
+#include "unormimp.h"
 #include "cstring.h"
 #include "filestrm.h"
 #include "normconf.h"
@ -27,8 +28,9 @@

 void NormalizerConformanceTest::runIndexedTest(int32_t index, UBool exec, const char* &name, char* /*par*/) {
    switch (index) {
-        CASE(0,TestConformance);
-        // CASE(1,TestCase6);
+        CASE(0, TestConformance);
+        CASE(1, TestConformance32);
+        // CASE(2, TestCase6);
        default: name = ""; break;
    }
 }
@ -57,46 +59,103 @@ void NormalizerConformanceTest::compare(const UnicodeString& s1, const UnicodeSt
        errln("Normalizer::compare() failed for s1: " + prettify(s1) + " s2: " +prettify(s2));
    }
 }
+
+FileStream *
+NormalizerConformanceTest::openNormalizationTestFile(const char *filename) {
+    char path[2000];
+    const char *folder;
+    FileStream *input;
+    UErrorCode errorCode;
+
+    // look inside ICU_DATA first
+    folder=u_getDataDirectory();
+    if(folder!=NULL) {
+        strcpy(path, folder);
+        strcat(path, "unidata" U_FILE_SEP_STRING);
+        strcat(path, filename);
+        input=T_FileStream_open(path, "rb");
+        if(input!=NULL) {
+            return input;
+        }
+    }
+
+    // find icu/source/data/unidata relative to the test data
+    errorCode=U_ZERO_ERROR;
+    folder=loadTestData(errorCode);
+    if(U_SUCCESS(errorCode)) {
+        strcpy(path, folder);
+        strcat(path, U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".."
+                     U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".."
+                     U_FILE_SEP_STRING "data" U_FILE_SEP_STRING "unidata" U_FILE_SEP_STRING);
+        strcat(path, filename);
+        input=T_FileStream_open(path, "rb");
+        if(input!=NULL) {
+            return input;
+        }
+    }
+
+    // look in icu/source/test/testdata/out/build
+    errorCode=U_ZERO_ERROR;
+    folder=loadTestData(errorCode);
+    if(U_SUCCESS(errorCode)) {
+        strcpy(path, folder);
+        strcat(path, U_FILE_SEP_STRING);
+        strcat(path, filename);
+        input=T_FileStream_open(path, "rb");
+        if(input!=NULL) {
+            return input;
+        }
+    }
+
+    // look in icu/source/test/testdata
+    errorCode=U_ZERO_ERROR;
+    folder=loadTestData(errorCode);
+    if(U_SUCCESS(errorCode)) {
+        strcpy(path, folder);
+        strcat(path, U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".." U_FILE_SEP_STRING);
+        strcat(path, filename);
+        input=T_FileStream_open(path, "rb");
+        if(input!=NULL) {
+            return input;
+        }
+    }
+
+    // find icu/source/data/unidata relative to U_TOPSRCDIR
+#if defined(U_TOPSRCDIR)
+    strcpy(path, U_TOPSRCDIR U_FILE_SEP_STRING "data" U_FILE_SEP_STRING "unidata" U_FILE_SEP_STRING);
+    strcat(path, filename);
+    input=T_FileStream_open(path, "rb");
+    if(input!=NULL) {
+        return input;
+    }
+#endif
+
+    errln("Failed to open %s", filename);
+    return NULL;
+}
+
 /**
 * Test the conformance of Normalizer to
- * http://www.unicode.org/unicode/reports/tr15/conformance/Draft-TestSuite.txt.
- * This file must be located at the path specified as TEST_SUITE_FILE.
+ * http://www.unicode.org/Public/UNIDATA/NormalizationTest.txt
 */
-void NormalizerConformanceTest::TestConformance(void) {
+void NormalizerConformanceTest::TestConformance() {
+    TestConformance(openNormalizationTestFile("NormalizationTest.txt"), 0);
+}
+
+void NormalizerConformanceTest::TestConformance32() {
+    TestConformance(openNormalizationTestFile("NormalizationTest-3.2.0.txt"), UNORM_UNICODE_3_2);
+}
+
+void NormalizerConformanceTest::TestConformance(FileStream *input, int32_t options) {
    enum { BUF_SIZE = 1024 };
    char lineBuf[BUF_SIZE];
    UnicodeString fields[FIELD_COUNT];
    int32_t passCount = 0;
    int32_t failCount = 0;
-    char newPath[256];
-    char backupPath[256];
-    FileStream *input = NULL;
    UChar32 c;

-    /* Look inside ICU_DATA first */
-    strcpy(newPath, u_getDataDirectory());
-    strcat(newPath, "unidata" U_FILE_SEP_STRING );
-    strcat(newPath, TEST_SUITE_FILE);
-
-    // As a fallback, try to guess where the source data was located
-    //   at the time ICU was built, and look there.
-#if defined (U_TOPSRCDIR)
-    strcpy(backupPath, U_TOPSRCDIR  U_FILE_SEP_STRING "data");
-#else
-    UErrorCode   err = U_ZERO_ERROR;
-    strcpy(backupPath, loadTestData(err));
-    strcat(backupPath, U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".." U_FILE_SEP_STRING "data");
-#endif
-    strcat(backupPath, U_FILE_SEP_STRING "unidata" U_FILE_SEP_STRING TEST_SUITE_FILE);
-
-    input = T_FileStream_open(newPath, "rb");
-
-    if (input == 0) {
-      input = T_FileStream_open(backupPath, "rb");
-      if (input == 0) {
-        errln("Failed to open either " + UnicodeString(newPath) + " or " + UnicodeString(backupPath) );
+    if(input==NULL) {
        return;
-      }
    }

    // UnicodeSet for all code points that are not mentioned in NormalizationTest.txt
@ -149,7 +208,7 @@ void NormalizerConformanceTest::TestConformance(void) {
            other.remove(c);
        }

-        if (checkConformance(fields, lineBuf)) {
+        if (checkConformance(fields, lineBuf, options)) {
            ++passCount;
        } else {
            ++failCount;
@ -181,7 +240,7 @@ void NormalizerConformanceTest::TestConformance(void) {
        fields[0]=fields[1]=fields[2]=fields[3]=fields[4].setTo(c);
        sprintf(lineBuf, "not mentioned code point U+%04lx", (long)c);

-        if (checkConformance(fields, lineBuf)) {
+        if (checkConformance(fields, lineBuf, options)) {
            ++passCount;
        } else {
            ++failCount;
@ -215,7 +274,8 @@ void NormalizerConformanceTest::TestConformance(void) {
 * @return true if the test passes
 */
 UBool NormalizerConformanceTest::checkConformance(const UnicodeString* field,
-                                                  const char *line) {
+                                                  const char *line,
+                                                  int32_t options) {
    UBool pass = TRUE;
    UErrorCode status = U_ZERO_ERROR;
    UnicodeString out, fcd;
@ -224,87 +284,87 @@ UBool NormalizerConformanceTest::checkConformance(const UnicodeString* field,
    for (int32_t i=0; i<FIELD_COUNT; ++i) {
        fieldNum = i+1;
        if (i<3) {
-            Normalizer::normalize(field[i], UNORM_NFC, 0, out, status);
+            Normalizer::normalize(field[i], UNORM_NFC, options, out, status);
            pass &= assertEqual("C", field[i], out, field[1], "c2!=C(c", fieldNum);
-            iterativeNorm(field[i], UNORM_NFC, out, +1);
+            iterativeNorm(field[i], UNORM_NFC, options, out, +1);
            pass &= assertEqual("C(+1)", field[i], out, field[1], "c2!=C(c", fieldNum);
-            iterativeNorm(field[i], UNORM_NFC, out, -1);
+            iterativeNorm(field[i], UNORM_NFC, options, out, -1);
            pass &= assertEqual("C(-1)", field[i], out, field[1], "c2!=C(c", fieldNum);

-            Normalizer::normalize(field[i], UNORM_NFD, 0, out, status);
+            Normalizer::normalize(field[i], UNORM_NFD, options, out, status);
            pass &= assertEqual("D", field[i], out, field[2], "c3!=D(c", fieldNum);
-            iterativeNorm(field[i], UNORM_NFD, out, +1);
+            iterativeNorm(field[i], UNORM_NFD, options, out, +1);
            pass &= assertEqual("D(+1)", field[i], out, field[2], "c3!=D(c", fieldNum);
-            iterativeNorm(field[i], UNORM_NFD, out, -1);
+            iterativeNorm(field[i], UNORM_NFD, options, out, -1);
            pass &= assertEqual("D(-1)", field[i], out, field[2], "c3!=D(c", fieldNum);
        }
-        Normalizer::normalize(field[i], UNORM_NFKC, 0, out, status);
+        Normalizer::normalize(field[i], UNORM_NFKC, options, out, status);
        pass &= assertEqual("KC", field[i], out, field[3], "c4!=KC(c", fieldNum);
-        iterativeNorm(field[i], UNORM_NFKC, out, +1);
+        iterativeNorm(field[i], UNORM_NFKC, options, out, +1);
        pass &= assertEqual("KC(+1)", field[i], out, field[3], "c4!=KC(c", fieldNum);
-        iterativeNorm(field[i], UNORM_NFKC, out, -1);
+        iterativeNorm(field[i], UNORM_NFKC, options, out, -1);
        pass &= assertEqual("KC(-1)", field[i], out, field[3], "c4!=KC(c", fieldNum);

-        Normalizer::normalize(field[i], UNORM_NFKD, 0, out, status);
+        Normalizer::normalize(field[i], UNORM_NFKD, options, out, status);
        pass &= assertEqual("KD", field[i], out, field[4], "c5!=KD(c", fieldNum);
-        iterativeNorm(field[i], UNORM_NFKD, out, +1);
+        iterativeNorm(field[i], UNORM_NFKD, options, out, +1);
        pass &= assertEqual("KD(+1)", field[i], out, field[4], "c5!=KD(c", fieldNum);
-        iterativeNorm(field[i], UNORM_NFKD, out, -1);
+        iterativeNorm(field[i], UNORM_NFKD, options, out, -1);
        pass &= assertEqual("KD(-1)", field[i], out, field[4], "c5!=KD(c", fieldNum);
    }
    compare(field[1],field[2]);
    compare(field[0],field[1]);
    // test quick checks
-    if(UNORM_NO == Normalizer::quickCheck(field[1], UNORM_NFC, status)) {
+    if(UNORM_NO == Normalizer::quickCheck(field[1], UNORM_NFC, options, status)) {
        errln("Normalizer error: quickCheck(NFC(s), UNORM_NFC) is UNORM_NO");
        pass = FALSE;
    }
-    if(UNORM_NO == Normalizer::quickCheck(field[2], UNORM_NFD, status)) {
+    if(UNORM_NO == Normalizer::quickCheck(field[2], UNORM_NFD, options, status)) {
        errln("Normalizer error: quickCheck(NFD(s), UNORM_NFD) is UNORM_NO");
        pass = FALSE;
    }
-    if(UNORM_NO == Normalizer::quickCheck(field[3], UNORM_NFKC, status)) {
+    if(UNORM_NO == Normalizer::quickCheck(field[3], UNORM_NFKC, options, status)) {
        errln("Normalizer error: quickCheck(NFKC(s), UNORM_NFKC) is UNORM_NO");
        pass = FALSE;
    }
-    if(UNORM_NO == Normalizer::quickCheck(field[4], UNORM_NFKD, status)) {
+    if(UNORM_NO == Normalizer::quickCheck(field[4], UNORM_NFKD, options, status)) {
        errln("Normalizer error: quickCheck(NFKD(s), UNORM_NFKD) is UNORM_NO");
        pass = FALSE;
    }

-    if(!Normalizer::isNormalized(field[1], UNORM_NFC, status)) {
+    if(!Normalizer::isNormalized(field[1], UNORM_NFC, options, status)) {
        errln("Normalizer error: isNormalized(NFC(s), UNORM_NFC) is FALSE");
        pass = FALSE;
    }
-    if(field[0]!=field[1] && Normalizer::isNormalized(field[0], UNORM_NFC, status)) {
+    if(field[0]!=field[1] && Normalizer::isNormalized(field[0], UNORM_NFC, options, status)) {
        errln("Normalizer error: isNormalized(s, UNORM_NFC) is TRUE");
        pass = FALSE;
    }
-    if(!Normalizer::isNormalized(field[3], UNORM_NFKC, status)) {
+    if(!Normalizer::isNormalized(field[3], UNORM_NFKC, options, status)) {
        errln("Normalizer error: isNormalized(NFKC(s), UNORM_NFKC) is FALSE");
        pass = FALSE;
    }
-    if(field[0]!=field[3] && Normalizer::isNormalized(field[0], UNORM_NFKC, status)) {
+    if(field[0]!=field[3] && Normalizer::isNormalized(field[0], UNORM_NFKC, options, status)) {
        errln("Normalizer error: isNormalized(s, UNORM_NFKC) is TRUE");
        pass = FALSE;
    }

    // test FCD quick check and "makeFCD"
-    Normalizer::normalize(field[0], UNORM_FCD, 0, fcd, status);
-    if(UNORM_NO == Normalizer::quickCheck(fcd, UNORM_FCD, status)) {
+    Normalizer::normalize(field[0], UNORM_FCD, options, fcd, status);
+    if(UNORM_NO == Normalizer::quickCheck(fcd, UNORM_FCD, options, status)) {
        errln("Normalizer error: quickCheck(FCD(s), UNORM_FCD) is UNORM_NO");
        pass = FALSE;
    }
-    if(UNORM_NO == Normalizer::quickCheck(field[2], UNORM_FCD, status)) {
+    if(UNORM_NO == Normalizer::quickCheck(field[2], UNORM_FCD, options, status)) {
        errln("Normalizer error: quickCheck(NFD(s), UNORM_FCD) is UNORM_NO");
        pass = FALSE;
    }
-    if(UNORM_NO == Normalizer::quickCheck(field[4], UNORM_FCD, status)) {
+    if(UNORM_NO == Normalizer::quickCheck(field[4], UNORM_FCD, options, status)) {
        errln("Normalizer error: quickCheck(NFKD(s), UNORM_FCD) is UNORM_NO");
        pass = FALSE;
    }

-    Normalizer::normalize(fcd, UNORM_NFD, 0, out, status);
+    Normalizer::normalize(fcd, UNORM_NFD, options, out, status);
    if(out != field[2]) {
        errln("Normalizer error: NFD(FCD(s))!=NFD(s)");
        pass = FALSE;
@ -323,14 +383,12 @@ UBool NormalizerConformanceTest::checkConformance(const UnicodeString* field,
        int32_t rc;

        status=U_ZERO_ERROR;
-        rc=unorm_compare(field[0].getBuffer(), field[0].length(),
-                         field[2].getBuffer(), field[2].length(),
-                         U_COMPARE_IGNORE_CASE, &status);
+        rc=Normalizer::compare(field[0], field[2], (options<<UNORM_COMPARE_NORM_OPTIONS_SHIFT)|U_COMPARE_IGNORE_CASE, status);
        if(U_FAILURE(status)) {
-            errln("unorm_compare(case-insensitive) sets %s", u_errorName(status));
+            errln("Normalizer::compare(case-insensitive) sets %s", u_errorName(status));
            pass=FALSE;
        } else if(rc!=0) {
-            errln("unorm_compare(original, NFD, case-insensitive) returned %d instead of 0 for equal", rc);
+            errln("Normalizer::compare(original, NFD, case-insensitive) returned %d instead of 0 for equal", rc);
            pass=FALSE;
        }
    }
@ -346,12 +404,14 @@ UBool NormalizerConformanceTest::checkConformance(const UnicodeString* field,
 * @param dir either +1 or -1
 */
 void NormalizerConformanceTest::iterativeNorm(const UnicodeString& str,
-                                              UNormalizationMode mode,
+                                              UNormalizationMode mode, int32_t options,
                                              UnicodeString& result,
                                              int8_t dir) {
    UErrorCode status = U_ZERO_ERROR;
    normalizer.setText(str, status);
    normalizer.setMode(mode);
+    normalizer.setOption(-1, 0);        // reset all options
+    normalizer.setOption(options, 1);   // set desired options
    result.truncate(0);
    if (U_FAILURE(status)) {
        return;
@ -486,6 +546,6 @@ void NormalizerConformanceTest::_testOneLine(const char *line) {
    if (!hexsplit(line, ';', fields, FIELD_COUNT)) {
        errln((UnicodeString)"Unable to parse line " + line);
    } else {
-        checkConformance(fields, line);
+        checkConformance(fields, line, 0);
    }
 }
--- a/icu4c/source/test/intltest/normconf.h
+++ b/icu4c/source/test/intltest/normconf.h
@ -12,8 +12,7 @@
 #include "unicode/normlzr.h"
 #include "intltest.h"

-#define TEST_SUITE_DIR  "unidata"
-#define TEST_SUITE_FILE "NormalizationTest.txt"
+typedef struct _FileStream FileStream;

 class NormalizerConformanceTest : public IntlTest {
    Normalizer normalizer;
@ -27,15 +26,18 @@ class NormalizerConformanceTest : public IntlTest {
    /**
     * Test the conformance of Normalizer to
     * http://www.unicode.org/Public/UNIDATA/NormalizationTest.txt
-     * This file must be located at the path specified as TEST_SUITE_FILE.
     */
-    void TestConformance(void);
+    void TestConformance();
+    void TestConformance32();
+    void TestConformance(FileStream *input, int32_t options);

    // Specific tests for debugging.  These are generally failures taken from
    // the conformance file, but culled out to make debugging easier.
    void TestCase6(void);

 private:
+    FileStream *openNormalizationTestFile(const char *filename);
+
    /**
     * Verify the conformance of the given line of the Unicode
     * normalization (UTR 15) test suite file.  For each line,
@ -52,10 +54,11 @@ class NormalizerConformanceTest : public IntlTest {
     * @return true if the test passes
     */
    UBool checkConformance(const UnicodeString* field,
-                           const char *line);
+                           const char *line,
+                           int32_t options);

    void iterativeNorm(const UnicodeString& str,
-                       UNormalizationMode mode,
+                       UNormalizationMode mode, int32_t options,
                       UnicodeString& result,
                       int8_t dir);

--- a/icu4c/source/test/perf/normperf/normperf.h
+++ b/icu4c/source/test/perf/normperf/normperf.h
@ -276,7 +276,7 @@ int32_t ICUNormFCD(const UChar* src, int32_t srcLen,UChar* dest, int32_t dstLen,

 int32_t ICUQuickCheck(const UChar* src,int32_t srcLen, UNormalizationMode mode, int32_t options, UErrorCode* status){
 #if (U_ICU_VERSION_MAJOR_NUM > 2 ) || ((U_ICU_VERSION_MAJOR_NUM == 2 )&&(U_ICU_VERSION_MINOR_NUM >= 6))
-    return unorm_quickCheckTailored(src,srcLen,mode, options, status);
+    return unorm_quickCheckWithOptions(src,srcLen,mode, options, status);
 #else
    return unorm_quickCheck(src,srcLen,mode,status);
 #endif
--- a/icu4c/source/test/testdata/NormalizationTest-3.2.0.txt
+++ b/icu4c/source/test/testdata/NormalizationTest-3.2.0.txt