From d2966f1a4d1350fa2405686388bc0706a2e15760 Mon Sep 17 00:00:00 2001
From: Markus Scherer <markus.icu@gmail.com>
Date: Sat, 15 Feb 2003 01:54:25 +0000
Subject: [PATCH] ICU-2481 prototype tailored normalization; remove old
 ignore_hangul

X-SVN-Rev: 11065
---
 icu4c/source/common/caniter.cpp     |   2 +-
 icu4c/source/common/normlzr.cpp     |  18 +-
 icu4c/source/common/unicode/unorm.h |  12 -
 icu4c/source/common/unorm.cpp       | 500 +++++++++++++++++++++++-----
 icu4c/source/common/unormimp.h      |  44 ++-
 5 files changed, 470 insertions(+), 106 deletions(-)

diff --git a/icu4c/source/common/caniter.cpp b/icu4c/source/common/caniter.cpp
index a1f508e0b5f..85e45bdbe8c 100644
--- a/icu4c/source/common/caniter.cpp
+++ b/icu4c/source/common/caniter.cpp
@@ -672,7 +672,7 @@ Hashtable *CanonicalIterator::extract(UChar32 comp, const UChar *segment, int32_
     int32_t tempLen = inputLen + bufLen;
 
     UChar trial[bufSize];
-    unorm_decompose(trial, bufSize, temp, tempLen, FALSE, FALSE, &status);
+    unorm_decompose(trial, bufSize, temp, tempLen, FALSE, 0, &status);
 
     /* Test for buffer overflows */
     if(U_FAILURE(status)) {
diff --git a/icu4c/source/common/normlzr.cpp b/icu4c/source/common/normlzr.cpp
index a09a99722dd..db8a39527d4 100644
--- a/icu4c/source/common/normlzr.cpp
+++ b/icu4c/source/common/normlzr.cpp
@@ -18,12 +18,6 @@
 
 U_NAMESPACE_BEGIN
 
-#ifndef ICU_UNORM_USE_DEPRECATES
-enum {
-    IGNORE_HANGUL=1
-};
-#endif /* ICU_UNORM_USE_DEPRECATES */
-
 const char Normalizer::fgClassID=0;
 
 //-------------------------------------------------------------------------
@@ -197,7 +191,7 @@ Normalizer::normalize(const UnicodeString& source,
         UChar *buffer=dest->getBuffer(source.length());
         int32_t length=unorm_internalNormalize(buffer, dest->getCapacity(),
                                                source.getBuffer(), source.length(),
-                                               mode, (options&IGNORE_HANGUL)!=0,
+                                               mode, options,
                                                &status);
         dest->releaseBuffer(length);
         if(status==U_BUFFER_OVERFLOW_ERROR) {
@@ -205,7 +199,7 @@ Normalizer::normalize(const UnicodeString& source,
             buffer=dest->getBuffer(length);
             length=unorm_internalNormalize(buffer, dest->getCapacity(),
                                            source.getBuffer(), source.length(),
-                                           mode, (options&IGNORE_HANGUL)!=0,
+                                           mode, options,
                                            &status);
             dest->releaseBuffer(length);
         }
@@ -243,7 +237,7 @@ Normalizer::compose(const UnicodeString& source,
         UChar *buffer=dest->getBuffer(source.length());
         int32_t length=unorm_compose(buffer, dest->getCapacity(),
                                      source.getBuffer(), source.length(),
-                                     compat, (options&IGNORE_HANGUL)!=0,
+                                     compat, options,
                                      &status);
         dest->releaseBuffer(length);
         if(status==U_BUFFER_OVERFLOW_ERROR) {
@@ -251,7 +245,7 @@ Normalizer::compose(const UnicodeString& source,
             buffer=dest->getBuffer(length);
             length=unorm_compose(buffer, dest->getCapacity(),
                                  source.getBuffer(), source.length(),
-                                 compat, (options&IGNORE_HANGUL)!=0,
+                                 compat, options,
                                  &status);
             dest->releaseBuffer(length);
         }
@@ -289,7 +283,7 @@ Normalizer::decompose(const UnicodeString& source,
         UChar *buffer=dest->getBuffer(source.length());
         int32_t length=unorm_decompose(buffer, dest->getCapacity(),
                                      source.getBuffer(), source.length(),
-                                     compat, (options&IGNORE_HANGUL)!=0,
+                                     compat, options,
                                      &status);
         dest->releaseBuffer(length);
         if(status==U_BUFFER_OVERFLOW_ERROR) {
@@ -297,7 +291,7 @@ Normalizer::decompose(const UnicodeString& source,
             buffer=dest->getBuffer(length);
             length=unorm_decompose(buffer, dest->getCapacity(),
                                    source.getBuffer(), source.length(),
-                                   compat, (options&IGNORE_HANGUL)!=0,
+                                   compat, options,
                                    &status);
             dest->releaseBuffer(length);
         }
diff --git a/icu4c/source/common/unicode/unorm.h b/icu4c/source/common/unicode/unorm.h
index 808b5b086dc..67cf9a344d0 100644
--- a/icu4c/source/common/unicode/unorm.h
+++ b/icu4c/source/common/unicode/unorm.h
@@ -173,18 +173,6 @@ typedef enum {
    * @obsolete ICU 2.4. Use UNORM_NFKC instead since this API will be removed in that release.
    */
   UCOL_DECOMP_COMPAT_COMP_CAN =5,
-
-  /**
-   * Do not normalize Hangul.
-   * @obsolete ICU 2.2. Obsolete option, to be removed (or moved to private for documentation) in that release.
-   */
-  UCOL_IGNORE_HANGUL    = 16,
-
-  /**
-   * Do not normalize Hangul.
-   * @obsolete ICU 2.2. Obsolete option, to be removed (or moved to private for documentation) in that release.
-   */
-  UNORM_IGNORE_HANGUL    = 16
 #endif /* ICU_UNORM_USE_DEPRECATES */
 } UNormalizationMode;
 
diff --git a/icu4c/source/common/unorm.cpp b/icu4c/source/common/unorm.cpp
index 63aac85e305..6fd6bb40d09 100644
--- a/icu4c/source/common/unorm.cpp
+++ b/icu4c/source/common/unorm.cpp
@@ -35,6 +35,24 @@
 #include "unicode/uset.h"
 #include "unormimp.h"
 
+/* ### TODO: These depend on whether tailored normalization becomes permanent. */
+#include "unicode/uniset.h"
+#include "unicode/usetiter.h"
+
+/*
+ * ### TODO: status of prototype for tailored normalization
+ *
+ * My main thrust so far was for unorm_normalize() and unorm_quickCheck().
+ * isNormalized() should work, I think.
+ * I have not yet thought about iterative normalization at all.
+ *
+ * Generally, any function that searches for a safe boundary has not been touched,
+ * which means that these functions will be over-pessimistic when
+ * exclusions are applied.
+ * This may not matter because subsequent checks and normalizations do apply the exclusions.
+ */
+#define LENGTHOF(array) (sizeof(array)/sizeof((array)[0]))
+
 /*
  * This new implementation of the normalization code loads its data from
  * unorm.dat, which is generated with the gennorm tool.
@@ -47,12 +65,6 @@ enum {
     _STACK_BUFFER_CAPACITY=100
 };
 
-#ifndef ICU_UNORM_USE_DEPRECATES
-enum {
-    UNORM_IGNORE_HANGUL=16
-};
-#endif /* ICU_UNORM_USE_DEPRECATES */
-
 /* Korean Hangul and Jamo constants */
 enum {
     JAMO_L_BASE=0x1100,     /* "lead" jamo */
@@ -112,6 +124,8 @@ isJamoVTNorm32JamoV(uint32_t norm32) {
     return norm32<_NORM_JAMO_V_TOP;
 }
 
+/* some prototypes ---------------------------------------------------------- */
+
 static const UChar *
 _findPreviousStarter(const UChar *start, const UChar *src,
                      uint32_t ccOrQCMask, uint32_t decompQCMask, UChar minNoMaybe);
@@ -124,6 +138,7 @@ static const UChar *
 _composePart(UChar *stackBuffer, UChar *&buffer, int32_t &bufferCapacity, int32_t &length,
              const UChar *prevStarter, const UChar *src,
              uint32_t qcMask, uint8_t &prevCC,
+             const UnicodeSet *dx,
              UErrorCode *pErrorCode);
 
 /* load unorm.dat ----------------------------------------------------------- */
@@ -151,10 +166,15 @@ static UBool formatVersion_2_1=FALSE, formatVersion_2_2=FALSE;
 /* the Unicode version of the normalization data */
 static UVersionInfo dataVersion={ 3, 1, 0, 0 };
 
+/* ### TODO: prototype ### cache UnicodeSets for each combination of exclusion flags */
+static UnicodeSet *dxCache[UNORM_DX_MASK+1]={ NULL };
+
 U_CDECL_BEGIN
 
 UBool
 unorm_cleanup() {
+    int32_t i;
+
     if(normData!=NULL) {
         udata_close(normData);
         normData=NULL;
@@ -162,6 +182,10 @@ unorm_cleanup() {
     dataErrorCode=U_ZERO_ERROR;
     haveNormData=0;
 
+    for(i=0; i<LENGTHOF(dxCache); ++i) {
+        delete dxCache[i];
+    }
+
     return TRUE;
 }
 
@@ -374,6 +398,201 @@ _getExtraData(uint32_t norm32) {
     return extraData+(norm32>>_NORM_EXTRA_SHIFT);
 }
 
+/* decomposition exclusion sets --------------------------------------------- */
+
+/*
+ * Decomposition exclusion UnicodeSets are used for tailored normalization,
+ * Unicode public review issue number 7. (http://www.unicode.org/review/)
+ *
+ * By specifying one or several sets of code points,
+ * those do not get decomposed in normalization, even though Unicode might
+ * otherwise define a decomposition for them.
+ *
+ * ### TODO: This is a prototype. Assess if it should become a permanent part of ICU.
+ */
+
+static const UnicodeSet *
+internalGetDXHangul(UErrorCode &errorCode) {
+    /* internal function, does not check for incoming U_FAILURE */
+
+    if(dxCache[UNORM_DX_HANGUL]==NULL) {
+        UnicodeSet *set=new UnicodeSet(0xac00, 0xd7a3);
+        if(set==NULL) {
+            errorCode=U_MEMORY_ALLOCATION_ERROR;
+            return NULL;
+        }
+
+        umtx_lock(NULL);
+        if(dxCache[UNORM_DX_HANGUL]==NULL) {
+            dxCache[UNORM_DX_HANGUL]=set;
+            set=NULL;
+        }
+        umtx_unlock(NULL);
+
+        delete set;
+    }
+
+    return dxCache[UNORM_DX_HANGUL];
+}
+
+static const UnicodeSet *
+internalGetDXCJKCompat(UErrorCode &errorCode) {
+    /* internal function, does not check for incoming U_FAILURE */
+
+    if(dxCache[UNORM_DX_CJK_COMPAT]==NULL) {
+        /* build a set from [CJK Ideographs]-[has canonical decomposition] */
+        UnicodeSet *set, *hasDecomp;
+
+        set=new UnicodeSet(UNICODE_STRING("[:Ideographic:]", 15), errorCode);
+        if(set==NULL) {
+            errorCode=U_MEMORY_ALLOCATION_ERROR;
+            return NULL;
+        }
+        if(U_FAILURE(errorCode)) {
+            delete set;
+            return NULL;
+        }
+
+        /* start with an empty set for [has canonical decomposition] */
+        hasDecomp=new UnicodeSet();
+        if(hasDecomp==NULL) {
+            delete set;
+            errorCode=U_MEMORY_ALLOCATION_ERROR;
+            return NULL;
+        }
+
+        /* iterate over all ideographs and remember which canonically decompose */
+        UnicodeSetIterator it(*set);
+        UChar32 start, end;
+        uint32_t norm32;
+
+        while(it.nextRange() && !it.isString()) {
+            start=it.getCodepoint();
+            end=it.getCodepointEnd();
+            while(start<=end) {
+                UTRIE_GET32(&normTrie, start, norm32);
+                if(norm32&_NORM_QC_NFD) {
+                    hasDecomp->add(start);
+                }
+                ++start;
+            }
+        }
+
+        /* compute set difference */
+        set->removeAll(*hasDecomp);
+
+        umtx_lock(NULL);
+        if(dxCache[UNORM_DX_CJK_COMPAT]==NULL) {
+            dxCache[UNORM_DX_CJK_COMPAT]=set;
+            set=NULL;
+        }
+        umtx_unlock(NULL);
+
+        delete set;
+    }
+
+    return dxCache[UNORM_DX_CJK_COMPAT];
+}
+
+static const UnicodeSet *
+internalGetDXAUmlaut(UErrorCode &errorCode) {
+    /* internal function, does not check for incoming U_FAILURE */
+
+    if(dxCache[UNORM_DX_A_UMLAUT]==NULL) {
+        UnicodeSet *set=new UnicodeSet(0xe4, 0xe4);
+        if(set==NULL) {
+            errorCode=U_MEMORY_ALLOCATION_ERROR;
+            return NULL;
+        }
+
+        umtx_lock(NULL);
+        if(dxCache[UNORM_DX_A_UMLAUT]==NULL) {
+            dxCache[UNORM_DX_A_UMLAUT]=set;
+            set=NULL;
+        }
+        umtx_unlock(NULL);
+
+        delete set;
+    }
+
+    return dxCache[UNORM_DX_A_UMLAUT];
+}
+
+/* Get a decomposition exclusion set. The data must be loaded. */
+static const UnicodeSet *
+internalGetDX(int32_t options, UErrorCode &errorCode) {
+    if(dxCache[options]==NULL) {
+        /* return basic sets */
+        if(options==UNORM_DX_HANGUL) {
+            return internalGetDXHangul(errorCode);
+        }
+        if(options==UNORM_DX_CJK_COMPAT) {
+            return internalGetDXCJKCompat(errorCode);
+        }
+        if(options==UNORM_DX_A_UMLAUT) {
+            return internalGetDXCJKCompat(errorCode);
+        }
+
+        /* build a set from multiple subsets */
+        UnicodeSet *set;
+        const UnicodeSet *other;
+
+        set=new UnicodeSet();
+        if(set==NULL) {
+            errorCode=U_MEMORY_ALLOCATION_ERROR;
+            return NULL;
+        }
+
+        if((options&UNORM_DX_HANGUL)!=0 && NULL!=(other=internalGetDXHangul(errorCode))) {
+            set->addAll(*other);
+        }
+        if((options&UNORM_DX_CJK_COMPAT)!=0 && NULL!=(other=internalGetDXCJKCompat(errorCode))) {
+            set->addAll(*other);
+        }
+        if((options&UNORM_DX_A_UMLAUT)!=0 && NULL!=(other=internalGetDXAUmlaut(errorCode))) {
+            set->addAll(*other);
+        }
+
+        if(U_FAILURE(errorCode)) {
+            delete set;
+            return NULL;
+        }
+
+        umtx_lock(NULL);
+        if(dxCache[options]==NULL) {
+            dxCache[options]=set;
+            set=NULL;
+        }
+        umtx_unlock(NULL);
+
+        delete set;
+    }
+
+    return dxCache[options];
+}
+
+static inline const UnicodeSet *
+getDX(int32_t options, UErrorCode &errorCode) {
+    if(U_FAILURE(errorCode) || (options&=UNORM_DX_MASK)==0) {
+        /* incoming failure, or no decomposition exclusions requested */
+        return NULL;
+    } else {
+        return internalGetDX(options, errorCode);
+    }
+}
+
+static inline UBool
+dx_contains(const UnicodeSet *dx, UChar32 c) {
+    return dx!=NULL && dx->contains(c);
+}
+
+static inline UBool
+dx_contains(const UnicodeSet *dx, UChar c, UChar c2) {
+    return dx!=NULL && dx->contains(c2==0 ? c : U16_GET_SUPPLEMENTARY(c, c2));
+}
+
+/* other normalization primitives ------------------------------------------- */
+
 /* get the canonical or compatibility decomposition for one character */
 static inline const UChar *
 _decompose(uint32_t norm32, uint32_t qcMask, int32_t &length,
@@ -1013,7 +1232,7 @@ _mergeOrdered(UChar *start, UChar *current,
 /* quick check functions ---------------------------------------------------- */
 
 static UBool
-unorm_checkFCD(const UChar *src, int32_t srcLength) {
+unorm_checkFCD(const UChar *src, int32_t srcLength, const UnicodeSet *dx) {
     const UChar *limit;
     UChar c, c2;
     uint16_t fcd16;
@@ -1076,8 +1295,27 @@ unorm_checkFCD(const UChar *src, int32_t srcLength) {
                 ++src;
                 fcd16=_getFCD16FromSurrogatePair(fcd16, c2);
             } else {
+                c2=0;
                 fcd16=0;
             }
+        } else {
+            c2=0;
+        }
+
+        /*
+         * If (c, c2) is excluded, then replace the code point's FCD data
+         * with the regular UCD cc because it does not decompose.
+         */
+        if(dx!=NULL) {
+            UChar32 cp;
+
+            cp= c2==0 ? c : U16_GET_SUPPLEMENTARY(c, c2);
+            if(dx->contains(cp)) {
+                uint32_t norm32;
+                UTRIE_GET32(&normTrie, cp, norm32);
+                /* This depends on knowing that _NORM_CC_MASK==0xff00 */
+                fcd16=(uint16_t)(norm32&0xff00)|(((uint16_t)norm32)>>8);
+            }
         }
 
         /*
@@ -1093,7 +1331,11 @@ unorm_checkFCD(const UChar *src, int32_t srcLength) {
         if(cc!=0) {
             if(prevCC<0) {
                 /* the previous character was <_NORM_MIN_WITH_LEAD_CC, we need to get its trail cc */
-                prevCC=(int16_t)(_getFCD16((UChar)-prevCC)&0xff);
+                if(!dx_contains(dx, (UChar32)-prevCC)) {
+                    prevCC=(int16_t)(_getFCD16((UChar)-prevCC)&0xff);
+                } else {
+                    prevCC=0; /* excluded; UCD cc's of code points <U+0300 are all 0 */
+                }
             }
 
             if(cc<prevCC) {
@@ -1109,6 +1351,7 @@ _quickCheck(const UChar *src,
             int32_t srcLength,
             UNormalizationMode mode,
             UBool allowMaybe,
+            const UnicodeSet *dx,
             UErrorCode *pErrorCode) {
     UChar stackBuffer[_STACK_BUFFER_CAPACITY];
     UChar *buffer;
@@ -1153,7 +1396,7 @@ _quickCheck(const UChar *src,
         qcMask=_NORM_QC_NFKD;
         break;
     case UNORM_FCD:
-        return unorm_checkFCD(src, srcLength) ? UNORM_YES : UNORM_NO;
+        return unorm_checkFCD(src, srcLength, dx) ? UNORM_YES : UNORM_NO;
     default:
         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
         return UNORM_MAYBE;
@@ -1210,8 +1453,11 @@ _quickCheck(const UChar *src,
                 ++src;
                 norm32=_getNorm32FromSurrogatePair(norm32, c2);
             } else {
+                c2=0;
                 norm32=0;
             }
+        } else {
+            c2=0;
         }
 
         /* check the combining order */
@@ -1223,6 +1469,11 @@ _quickCheck(const UChar *src,
         prevCC=cc;
 
         /* check for "no" or "maybe" quick check flags */
+        if(dx_contains(dx, c, c2)) {
+            /* excluded: treat like "yes" */
+            continue;
+        }
+
         qcNorm32=norm32&qcMask;
         if(qcNorm32&_NORM_QC_ANY_NO) {
             result=UNORM_NO;
@@ -1255,7 +1506,7 @@ _quickCheck(const UChar *src,
                              prevStarter,
                              src,
                              qcMask,
-                             prevCC, pErrorCode);
+                             prevCC, dx, pErrorCode);
                 if(U_FAILURE(*pErrorCode)) {
                     result=UNORM_MAYBE; /* error (out of memory) */
                     break;
@@ -1283,16 +1534,23 @@ endloop:
 U_CAPI UNormalizationCheckResult U_EXPORT2
 unorm_quickCheck(const UChar *src,
                  int32_t srcLength, 
-                 UNormalizationMode mode, 
+                 UNormalizationMode mode,
                  UErrorCode *pErrorCode) {
-    return _quickCheck(src, srcLength, mode, TRUE, pErrorCode);
+    return _quickCheck(src, srcLength, mode, TRUE, NULL, pErrorCode);
+}
+
+U_CAPI UNormalizationCheckResult U_EXPORT2
+unorm_quickCheckTailored(const UChar *src, int32_t srcLength, 
+                         UNormalizationMode mode, int32_t options,
+                         UErrorCode *pErrorCode) {
+    return _quickCheck(src, srcLength, mode, TRUE, getDX(options, *pErrorCode), pErrorCode);
 }
 
 U_CAPI UBool U_EXPORT2
 unorm_isNormalized(const UChar *src, int32_t srcLength,
                    UNormalizationMode mode,
                    UErrorCode *pErrorCode) {
-    return (UBool)(UNORM_YES==_quickCheck(src, srcLength, mode, FALSE, pErrorCode));
+    return (UBool)(UNORM_YES==_quickCheck(src, srcLength, mode, FALSE, NULL, pErrorCode));
 }
 
 /* make NFD & NFKD ---------------------------------------------------------- */
@@ -1386,7 +1644,7 @@ unorm_getDecomposition(UChar32 c, UBool compat,
 static int32_t
 _decompose(UChar *dest, int32_t destCapacity,
            const UChar *src, int32_t srcLength,
-           UBool compat, UBool ignoreHangul,
+           UBool compat, const UnicodeSet *dx,
            uint8_t &outTrailCC) {
     UChar buffer[3];
     const UChar *limit, *prevSrc, *p;
@@ -1470,7 +1728,7 @@ _decompose(UChar *dest, int32_t destCapacity,
          * otherwise, p[length] is merged in with _mergeOrdered()
          */
         if(isNorm32HangulOrJamo(norm32)) {
-            if(ignoreHangul) {
+            if(dx_contains(dx, c)) {
                 c2=0;
                 p=NULL;
                 length=1;
@@ -1511,7 +1769,7 @@ _decompose(UChar *dest, int32_t destCapacity,
             }
 
             /* get the decomposition and the lead and trail cc's */
-            if((norm32&qcMask)==0) {
+            if((norm32&qcMask)==0 || dx_contains(dx, c, c2)) {
                 /* c does not decompose */
                 cc=trailCC=(uint8_t)(norm32>>_NORM_CC_SHIFT);
                 p=NULL;
@@ -1575,8 +1833,9 @@ _decompose(UChar *dest, int32_t destCapacity,
 U_CAPI int32_t U_EXPORT2
 unorm_decompose(UChar *dest, int32_t destCapacity,
                 const UChar *src, int32_t srcLength,
-                UBool compat, UBool ignoreHangul,
+                UBool compat, int32_t options,
                 UErrorCode *pErrorCode) {
+    const UnicodeSet *dx;
     int32_t destIndex;
     uint8_t trailCC;
 
@@ -1584,9 +1843,14 @@ unorm_decompose(UChar *dest, int32_t destCapacity,
         return 0;
     }
 
+    dx=getDX(options, *pErrorCode);
+    if(U_FAILURE(*pErrorCode)) {
+        return 0;
+    }
+
     destIndex=_decompose(dest, destCapacity,
                          src, srcLength,
-                         compat, ignoreHangul,
+                         compat, dx,
                          trailCC);
 
     return u_terminateUChars(dest, destCapacity, destIndex, pErrorCode);
@@ -1648,7 +1912,8 @@ _findSafeFCD(const UChar *src, const UChar *limit, uint16_t fcd16) {
 
 static uint8_t
 _decomposeFCD(const UChar *src, const UChar *decompLimit,
-              UChar *dest, int32_t &destIndex, int32_t destCapacity) {
+              UChar *dest, int32_t &destIndex, int32_t destCapacity,
+              const UnicodeSet *dx) {
     const UChar *p;
     uint32_t norm32;
     int32_t reorderStartIndex, length;
@@ -1694,7 +1959,7 @@ _decomposeFCD(const UChar *src, const UChar *decompLimit,
         }
 
         /* get the decomposition and the lead and trail cc's */
-        if((norm32&_NORM_QC_NFD)==0) {
+        if((norm32&_NORM_QC_NFD)==0 || dx_contains(dx, c, c2)) {
             /* c does not decompose */
             cc=trailCC=(uint8_t)(norm32>>_NORM_CC_SHIFT);
             p=NULL;
@@ -1756,6 +2021,7 @@ _decomposeFCD(const UChar *src, const UChar *decompLimit,
 static int32_t
 unorm_makeFCD(UChar *dest, int32_t destCapacity,
               const UChar *src, int32_t srcLength,
+              const UnicodeSet *dx,
               UErrorCode *pErrorCode) {
     const UChar *limit, *prevSrc, *decompStart;
     int32_t destIndex, length;
@@ -1839,7 +2105,11 @@ unorm_makeFCD(UChar *dest, int32_t destCapacity,
             /* prevCC<0 is only possible from the above loop, i.e., only if prevSrc<src */
             if(prevCC<0) {
                 /* the previous character was <_NORM_MIN_WITH_LEAD_CC, we need to get its trail cc */
-                prevCC=(int16_t)(_getFCD16((UChar)-prevCC)&0xff);
+                if(!dx_contains(dx, (UChar32)-prevCC)) {
+                    prevCC=(int16_t)(_getFCD16((UChar)-prevCC)&0xff);
+                } else {
+                    prevCC=0; /* excluded; UCD cc's of code points <U+0300 are all 0 */
+                }
 
                 /*
                  * set a pointer to this below-U+0300 character;
@@ -1883,6 +2153,22 @@ unorm_makeFCD(UChar *dest, int32_t destCapacity,
 
         /* we are looking at the character (c, c2) at [prevSrc..src[ */
 
+        /*
+         * If (c, c2) is excluded, then replace the code point's FCD data
+         * with the regular UCD cc because it does not decompose.
+         */
+        if(dx!=NULL) {
+            UChar32 cp;
+
+            cp= c2==0 ? c : U16_GET_SUPPLEMENTARY(c, c2);
+            if(dx->contains(cp)) {
+                uint32_t norm32;
+                UTRIE_GET32(&normTrie, cp, norm32);
+                /* This depends on knowing that _NORM_CC_MASK==0xff00 */
+                fcd16=(uint16_t)(norm32&0xff00)|(((uint16_t)norm32)>>8);
+            }
+        }
+
         /* check the combining order, get the lead cc */
         cc=(int16_t)(fcd16>>8);
         if(cc==0 || cc>=prevCC) {
@@ -1921,7 +2207,8 @@ unorm_makeFCD(UChar *dest, int32_t destCapacity,
              * decompose and reorder a limited piece of the text
              */
             prevCC=_decomposeFCD(decompStart, src,
-                                 dest, destIndex, destCapacity);
+                                 dest, destIndex, destCapacity,
+                                 dx);
             decompStart=src;
         }
     }
@@ -1935,7 +2222,8 @@ unorm_makeFCD(UChar *dest, int32_t destCapacity,
 static inline uint32_t
 _getNextCombining(UChar *&p, const UChar *limit,
                   UChar &c, UChar &c2,
-                  uint16_t &combiningIndex, uint8_t &cc) {
+                  uint16_t &combiningIndex, uint8_t &cc,
+                  const UnicodeSet *dx) {
     uint32_t norm32, combineFlags;
 
     c=*p++;
@@ -1951,9 +2239,14 @@ _getNextCombining(UChar *&p, const UChar *limit,
         } else if(isNorm32HangulOrJamo(norm32)) {
             /* a compatibility decomposition contained Jamos */
             c2=0;
-            combiningIndex=(uint16_t)(0xfff0|(norm32>>_NORM_EXTRA_SHIFT));
             cc=0;
-            return norm32&_NORM_COMBINES_ANY;
+            if(!dx_contains(dx, c)) {
+                combiningIndex=(uint16_t)(0xfff0|(norm32>>_NORM_EXTRA_SHIFT));
+                return norm32&_NORM_COMBINES_ANY;
+            } else {
+                combiningIndex=0;
+                return 0;
+            }
         } else {
             /* c is a lead surrogate, get the real norm32 */
             if(p!=limit && UTF_IS_SECOND_SURROGATE(c2=*p)) {
@@ -1967,13 +2260,19 @@ _getNextCombining(UChar *&p, const UChar *limit,
             }
         }
 
-        combineFlags=norm32&_NORM_COMBINES_ANY;
-        if(combineFlags!=0) {
-            combiningIndex=*(_getExtraData(norm32)-1);
-        }
-
         cc=(uint8_t)(norm32>>_NORM_CC_SHIFT);
-        return combineFlags;
+
+        if(!dx_contains(dx, c, c2)) {
+            combineFlags=norm32&_NORM_COMBINES_ANY;
+            if(combineFlags!=0) {
+                combiningIndex=*(_getExtraData(norm32)-1);
+            }
+
+            return combineFlags;
+        } else {
+            combiningIndex=0;
+            return 0;
+        }
     }
 }
 
@@ -2073,7 +2372,7 @@ _combine(const uint16_t *table, uint16_t combineBackIndex,
  * while the combining mark that is removed has at least one code unit
  */
 static uint8_t
-_recompose(UChar *p, UChar *&limit) {
+_recompose(UChar *p, UChar *&limit, const UnicodeSet *dx) {
     UChar *starter, *pRemove, *q, *r;
     uint32_t combineFlags;
     UChar c, c2;
@@ -2090,7 +2389,7 @@ _recompose(UChar *p, UChar *&limit) {
     prevCC=0;
 
     for(;;) {
-        combineFlags=_getNextCombining(p, limit, c, c2, combineBackIndex, cc);
+        combineFlags=_getNextCombining(p, limit, c, c2, combineBackIndex, cc, dx);
         if((combineFlags&_NORM_COMBINES_BACK) && starter!=NULL) {
             if(combineBackIndex&0x8000) {
                 /* c is a Jamo V/T, see if we can compose it with the previous character */
@@ -2102,18 +2401,27 @@ _recompose(UChar *p, UChar *&limit) {
                     if(c2<JAMO_L_COUNT) {
                         pRemove=p-1;
                         c=(UChar)(HANGUL_BASE+(c2*JAMO_V_COUNT+(c-JAMO_V_BASE))*JAMO_T_COUNT);
-                        if(p!=limit && (c2=(UChar)(*p-JAMO_T_BASE))<JAMO_T_COUNT) {
+                        if(p!=limit && (c2=(UChar)(*p-JAMO_T_BASE))<JAMO_T_COUNT && !dx_contains(dx, c2)) {
                             ++p;
                             c+=c2;
                         }
-                        *starter=c;
+                        if(!dx_contains(dx, c)) {
+                            *starter=c;
+                        } else {
+                            /* excluded */
+                            if(!isHangulWithoutJamoT(c)) {
+                                --p; /* undo the ++p from reading the Jamo T */
+                            }
+                            /* c is modified but not used any more -- c=*(p-1); -- re-read the Jamo V/T */
+                            pRemove=NULL;
+                        }
                     }
 #if 0
                 /*
                  * The following is disabled with #if 0 because it can not occur:
                  * Since the input is in NFD, there are no Hangul LV syllables that
                  * a Jamo T could combine with.
-                 * All Jamo Ts are combined above when handling Jamo Ls.
+                 * All Jamo Ts are combined above when handling Jamo Vs.
                  */
                 } else {
                     /* Jamo T, compose with previous Hangul that does not have a Jamo T */
@@ -2150,8 +2458,10 @@ _recompose(UChar *p, UChar *&limit) {
                 !(combineFwdIndex&0x8000) &&
                 /* the combining mark is not blocked and */
                 (prevCC<cc || prevCC==0) &&
-                /* the starter and the combining mark (c, c2) do combine */
-                0!=(result=_combine(combiningTable+combineFwdIndex, combineBackIndex, value, value2))
+                /* the starter and the combining mark (c, c2) do combine and */
+                0!=(result=_combine(combiningTable+combineFwdIndex, combineBackIndex, value, value2)) &&
+                /* the composition result is not excluded */
+                !dx_contains(dx, value, value2)
             ) {
                 /* replace the starter with the composition, remove the combining mark */
                 pRemove= c2==0 ? p-1 : p-2; /* pointer to the combining mark */
@@ -2224,7 +2534,7 @@ _recompose(UChar *p, UChar *&limit) {
 
         /* if (c, c2) did not combine, then check if it is a starter */
         if(cc==0) {
-            /* found a new starter */
+            /* found a new starter; combineFlags==0 if (c, c2) is excluded */
             if(combineFlags&_NORM_COMBINES_FWD) {
                 /* it may combine with something, prepare for it */
                 if(c2==0) {
@@ -2321,6 +2631,7 @@ static const UChar *
 _composePart(UChar *stackBuffer, UChar *&buffer, int32_t &bufferCapacity, int32_t &length,
              const UChar *prevStarter, const UChar *src,
              uint32_t qcMask, uint8_t &prevCC,
+             const UnicodeSet *dx,
              UErrorCode *pErrorCode) {
     UChar *recomposeLimit;
     uint8_t trailCC;
@@ -2331,7 +2642,7 @@ _composePart(UChar *stackBuffer, UChar *&buffer, int32_t &bufferCapacity, int32_
     /* decompose [prevStarter..src[ */
     length=_decompose(buffer, bufferCapacity,
                       prevStarter, src-prevStarter,
-                      compat, FALSE,
+                      compat, dx,
                       trailCC);
     if(length>bufferCapacity) {
         if(!u_growBufferFromStatic(stackBuffer, &buffer, &bufferCapacity, 2*length, 0)) {
@@ -2340,14 +2651,14 @@ _composePart(UChar *stackBuffer, UChar *&buffer, int32_t &bufferCapacity, int32_
         }
         length=_decompose(buffer, bufferCapacity,
                           prevStarter, src-prevStarter,
-                          compat, FALSE,
+                          compat, dx,
                           trailCC);
     }
 
     /* recompose the decomposition */
     recomposeLimit=buffer+length;
     if(length>=2) {
-        prevCC=_recompose(buffer, recomposeLimit);
+        prevCC=_recompose(buffer, recomposeLimit, dx);
     }
 
     /* return with a pointer to the recomposition and its length */
@@ -2357,7 +2668,10 @@ _composePart(UChar *stackBuffer, UChar *&buffer, int32_t &bufferCapacity, int32_
 
 static inline UBool
 _composeHangul(UChar prev, UChar c, uint32_t norm32, const UChar *&src, const UChar *limit,
-               UBool compat, UChar *dest) {
+               UBool compat, UChar *dest, const UnicodeSet *dx) {
+    if(dx!=NULL && (dx->contains(prev) || dx->contains(c))) {
+        return FALSE;
+    }
     if(isJamoVTNorm32JamoV(norm32)) {
         /* c is a Jamo V, compose with previous Jamo L and following Jamo T */
         prev=(UChar)(prev-JAMO_L_BASE);
@@ -2369,7 +2683,9 @@ _composeHangul(UChar prev, UChar c, uint32_t norm32, const UChar *&src, const UC
                 UChar next, t;
 
                 next=*src;
-                if((t=(UChar)(next-JAMO_T_BASE))<JAMO_T_COUNT) {
+                if(dx_contains(dx, next)) {
+                    /* excluded */
+                } else if((t=(UChar)(next-JAMO_T_BASE))<JAMO_T_COUNT) {
                     /* normal Jamo T */
                     ++src;
                     c+=t;
@@ -2390,6 +2706,12 @@ _composeHangul(UChar prev, UChar c, uint32_t norm32, const UChar *&src, const UC
                     }
                 }
             }
+            if(dx_contains(dx, c)) {
+                if(!isHangulWithoutJamoT(c)) {
+                    --src; /* undo ++src from reading the Jamo T */
+                }
+                return FALSE;
+            }
             if(dest!=0) {
                 *dest=c;
             }
@@ -2397,8 +2719,12 @@ _composeHangul(UChar prev, UChar c, uint32_t norm32, const UChar *&src, const UC
         }
     } else if(isHangulWithoutJamoT(prev)) {
         /* c is a Jamo T, compose with previous Hangul LV that does not contain a Jamo T */
+        c=(UChar)(prev+(c-JAMO_T_BASE));
+        if(dx_contains(dx, c)) {
+            return FALSE;
+        }
         if(dest!=0) {
-            *dest=(UChar)(prev+(c-JAMO_T_BASE));
+            *dest=c;
         }
         return TRUE;
     }
@@ -2408,7 +2734,7 @@ _composeHangul(UChar prev, UChar c, uint32_t norm32, const UChar *&src, const UC
 static int32_t
 _compose(UChar *dest, int32_t destCapacity,
          const UChar *src, int32_t srcLength,
-         UBool compat, UBool /* ### TODO: need to do this? -- ignoreHangul -- ### */,
+         UBool compat, const UnicodeSet *dx,
          UErrorCode *pErrorCode) {
     UChar stackBuffer[_STACK_BUFFER_CAPACITY];
     UChar *buffer;
@@ -2553,11 +2879,12 @@ _compose(UChar *dest, int32_t destCapacity,
             prevCC=cc=0;
             reorderStartIndex=destIndex;
 
-            if( /* ### TODO: do we need to do this? !ignoreHangul && ### */
+            if(
                 destIndex>0 &&
                 _composeHangul(
                     *(prevSrc-1), c, norm32, src, limit, compat,
-                    destIndex<=destCapacity ? dest+(destIndex-1) : 0)
+                    destIndex<=destCapacity ? dest+(destIndex-1) : 0,
+                    dx)
             ) {
                 prevStarter=src;
                 continue;
@@ -2586,7 +2913,7 @@ _compose(UChar *dest, int32_t destCapacity,
             }
 
             /* we are looking at the character (c, c2) at [prevSrc..src[ */
-            if((norm32&qcMask)==0) {
+            if((norm32&qcMask)==0 || dx_contains(dx, c, c2)) {
                 cc=(uint8_t)(norm32>>_NORM_CC_SHIFT);
             } else {
                 const UChar *p;
@@ -2628,6 +2955,7 @@ _compose(UChar *dest, int32_t destCapacity,
                                prevStarter, src,
                                qcMask,
                                prevCC,          /* output */
+                               dx,
                                pErrorCode);
 
                 if(p==NULL) {
@@ -2688,29 +3016,28 @@ _compose(UChar *dest, int32_t destCapacity,
 U_CAPI int32_t U_EXPORT2
 unorm_compose(UChar *dest, int32_t destCapacity,
               const UChar *src, int32_t srcLength,
-              UBool compat, UBool ignoreHangul,
+              UBool compat, int32_t options,
               UErrorCode *pErrorCode) {
+    const UnicodeSet *dx;
     int32_t destIndex;
 
     if(!_haveData(*pErrorCode)) {
         return 0;
     }
 
+    dx=getDX(options, *pErrorCode);
+    if(U_FAILURE(*pErrorCode)) {
+        return 0;
+    }
+
     destIndex=_compose(dest, destCapacity,
                        src, srcLength,
-                       compat, ignoreHangul,
+                       compat, dx,
                        pErrorCode);
 
     return u_terminateUChars(dest, destCapacity, destIndex, pErrorCode);
 }
 
-/*
- ### TODO
- task items:
- - 2.0 Java sample code from unicode.org compare vs. JNI around C implementation - do monkey test
- - 2.1 port that sample code to C/C++ and run as part of regular test suite
- */
-
 /* normalize() API ---------------------------------------------------------- */
 
 /**
@@ -2721,32 +3048,39 @@ unorm_compose(UChar *dest, int32_t destCapacity,
 U_CAPI int32_t U_EXPORT2
 unorm_internalNormalize(UChar *dest, int32_t destCapacity,
                         const UChar *src, int32_t srcLength,
-                        UNormalizationMode mode, UBool ignoreHangul,
+                        UNormalizationMode mode, int32_t options,
                         UErrorCode *pErrorCode) {
+    const UnicodeSet *dx;
+
     switch(mode) {
     case UNORM_NFD:
         return unorm_decompose(dest, destCapacity,
                                src, srcLength,
-                               FALSE, ignoreHangul,
+                               FALSE, options,
                                pErrorCode);
     case UNORM_NFKD:
         return unorm_decompose(dest, destCapacity,
                                src, srcLength,
-                               TRUE, ignoreHangul,
+                               TRUE, options,
                                pErrorCode);
     case UNORM_NFC:
         return unorm_compose(dest, destCapacity,
                              src, srcLength,
-                             FALSE, ignoreHangul,
+                             FALSE, options,
                              pErrorCode);
     case UNORM_NFKC:
         return unorm_compose(dest, destCapacity,
                              src, srcLength,
-                             TRUE, ignoreHangul,
+                             TRUE, options,
                              pErrorCode);
     case UNORM_FCD:
+        dx=getDX(options, *pErrorCode);
+        if(U_FAILURE(*pErrorCode)) {
+            return 0;
+        }
         return unorm_makeFCD(dest, destCapacity,
                              src, srcLength,
+                             dx,
                              pErrorCode);
     case UNORM_NONE:
         /* just copy the string */
@@ -2766,7 +3100,7 @@ unorm_internalNormalize(UChar *dest, int32_t destCapacity,
 /** Public API for normalizing. */
 U_CAPI int32_t U_EXPORT2
 unorm_normalize(const UChar *src, int32_t srcLength,
-                UNormalizationMode mode, int32_t option,
+                UNormalizationMode mode, int32_t options,
                 UChar *dest, int32_t destCapacity,
                 UErrorCode *pErrorCode) {
     /* check argument values */
@@ -2792,7 +3126,7 @@ unorm_normalize(const UChar *src, int32_t srcLength,
 
     return unorm_internalNormalize(dest, destCapacity,
                                    src, srcLength,
-                                   mode, (UBool)((option&(UNORM_IGNORE_HANGUL|1))!=0),
+                                   mode, options,
                                    pErrorCode);
 }
 
@@ -3026,7 +3360,7 @@ unorm_previous(UCharIterator *src,
         if(doNormalize) {
             destLength=unorm_internalNormalize(dest, destCapacity,
                                                buffer+startIndex, bufferLength,
-                                               mode, (UBool)((options&(UNORM_IGNORE_HANGUL|1))!=0),
+                                               mode, options,
                                                pErrorCode);
             if(pNeededToNormalize!=0 && U_SUCCESS(*pErrorCode)) {
                 *pNeededToNormalize=
@@ -3272,7 +3606,7 @@ unorm_next(UCharIterator *src,
         if(doNormalize) {
             destLength=unorm_internalNormalize(dest, destCapacity,
                                                buffer, bufferLength,
-                                               mode, (UBool)((options&(UNORM_IGNORE_HANGUL|1))!=0),
+                                               mode, options,
                                                pErrorCode);
             if(pNeededToNormalize!=0 && U_SUCCESS(*pErrorCode)) {
                 *pNeededToNormalize=
@@ -3422,12 +3756,12 @@ unorm_concatenate(const UChar *left, int32_t leftLength,
     if(destCapacity>destLength) {
         destLength+=unorm_internalNormalize(dest+destLength, destCapacity-destLength,
                                             buffer, bufferLength,
-                                            mode, (UBool)((options&(UNORM_IGNORE_HANGUL|1))!=0),
+                                            mode, options,
                                             pErrorCode);
     } else {
         destLength+=unorm_internalNormalize(NULL, 0,
                                             buffer, bufferLength,
-                                            mode, (UBool)((options&(UNORM_IGNORE_HANGUL|1))!=0),
+                                            mode, options,
                                             pErrorCode);
     }
     /*
@@ -3906,6 +4240,7 @@ unorm_compare(const UChar *s1, int32_t length1,
               UErrorCode *pErrorCode) {
     UChar fold1[300], fold2[300], fcd1[300], fcd2[300];
     UChar *f1, *f2, *d1, *d2;
+    const UnicodeSet *dx;
     int32_t result;
 
     /* argument checking */
@@ -3924,17 +4259,22 @@ unorm_compare(const UChar *s1, int32_t length1,
         return 0;
     }
 
+    dx=getDX((int32_t)(options>>UNORM_COMPARE_NORM_OPTIONS_SHIFT), *pErrorCode);
+    if(U_FAILURE(*pErrorCode)) {
+        return 0;
+    }
+
     f1=f2=d1=d2=0;
     options|=_COMPARE_EQUIV;
     result=0;
-    
+
     if(!(options&UNORM_INPUT_IS_FCD)) {
         int32_t _len1, _len2;
         UBool isFCD1, isFCD2;
 
         // check if s1 and/or s2 fulfill the FCD conditions
-        isFCD1=unorm_checkFCD(s1, length1);
-        isFCD2=unorm_checkFCD(s2, length2);
+        isFCD1=unorm_checkFCD(s1, length1, dx);
+        isFCD2=unorm_checkFCD(s2, length2, dx);
 
         if((options&U_COMPARE_IGNORE_CASE)!=0 && !(isFCD1 && isFCD2)) {
             // case-fold first to keep the order of operations as in UAX 21 2.5
@@ -3992,8 +4332,8 @@ unorm_compare(const UChar *s1, int32_t length1,
 
             // turn off U_COMPARE_IGNORE_CASE and re-check FCD
             options&=~U_COMPARE_IGNORE_CASE;
-            isFCD1=unorm_checkFCD(s1, length1);
-            isFCD2=unorm_checkFCD(s2, length2);
+            isFCD1=unorm_checkFCD(s1, length1, dx);
+            isFCD2=unorm_checkFCD(s2, length2, dx);
         }
 
         if(!isFCD1 && !isFCD2) {
@@ -4005,7 +4345,7 @@ unorm_compare(const UChar *s1, int32_t length1,
 
             _len1=_decompose(fcd1, sizeof(fcd1)/U_SIZEOF_UCHAR,
                              s1, length1,
-                             FALSE, FALSE,
+                             FALSE, dx,
                              trailCC);
             if(_len1<=(int32_t)(sizeof(fcd1)/U_SIZEOF_UCHAR)) {
                 s1=fcd1;
@@ -4018,7 +4358,7 @@ unorm_compare(const UChar *s1, int32_t length1,
 
                 _len1=_decompose(d1, _len1,
                                  s1, length1,
-                                 FALSE, FALSE,
+                                 FALSE, dx,
                                  trailCC);
 
                 s1=d1;
@@ -4027,7 +4367,7 @@ unorm_compare(const UChar *s1, int32_t length1,
 
             _len2=_decompose(fcd2, sizeof(fcd2)/U_SIZEOF_UCHAR,
                              s2, length2,
-                             FALSE, FALSE,
+                             FALSE, dx,
                              trailCC);
             if(_len2<=(int32_t)(sizeof(fcd2)/U_SIZEOF_UCHAR)) {
                 s2=fcd2;
@@ -4040,7 +4380,7 @@ unorm_compare(const UChar *s1, int32_t length1,
 
                 _len2=_decompose(d2, _len2,
                                  s2, length2,
-                                 FALSE, FALSE,
+                                 FALSE, dx,
                                  trailCC);
 
                 s2=d2;
@@ -4055,6 +4395,7 @@ unorm_compare(const UChar *s1, int32_t length1,
             if(!isFCD1) {
                 _len1=unorm_makeFCD(fcd1, sizeof(fcd1)/U_SIZEOF_UCHAR,
                                     s1, length1,
+                                    dx,
                                     pErrorCode);
                 if(*pErrorCode!=U_BUFFER_OVERFLOW_ERROR) {
                     s1=fcd1;
@@ -4068,6 +4409,7 @@ unorm_compare(const UChar *s1, int32_t length1,
                     *pErrorCode=U_ZERO_ERROR;
                     _len1=unorm_makeFCD(d1, _len1,
                                         s1, length1,
+                                        dx,
                                         pErrorCode);
                     if(U_FAILURE(*pErrorCode)) {
                         goto cleanup;
@@ -4081,6 +4423,7 @@ unorm_compare(const UChar *s1, int32_t length1,
             if(!isFCD2) {
                 _len2=unorm_makeFCD(fcd2, sizeof(fcd2)/U_SIZEOF_UCHAR,
                                     s2, length2,
+                                    dx,
                                     pErrorCode);
                 if(*pErrorCode!=U_BUFFER_OVERFLOW_ERROR) {
                     s2=fcd2;
@@ -4094,6 +4437,7 @@ unorm_compare(const UChar *s1, int32_t length1,
                     *pErrorCode=U_ZERO_ERROR;
                     _len2=unorm_makeFCD(d2, _len2,
                                         s2, length2,
+                                        dx,
                                         pErrorCode);
                     if(U_FAILURE(*pErrorCode)) {
                         goto cleanup;
diff --git a/icu4c/source/common/unormimp.h b/icu4c/source/common/unormimp.h
index 966d1f42662..3fa02d33563 100644
--- a/icu4c/source/common/unormimp.h
+++ b/icu4c/source/common/unormimp.h
@@ -147,6 +147,44 @@ enum {
     _NORM_DECOMP_LENGTH_MASK=0x7f
 };
 
+/* Constants for options flags for tailored normalization. ### TODO prototype, see unorm.cpp */
+enum {
+    /** Options bit 0, do not decompose Hangul syllables. @draft ICU 2.6 */
+    UNORM_DX_HANGUL=1,
+    /** Options bit 1, do not decompose CJK compatibility characters. @draft ICU 2.6 */
+    UNORM_DX_CJK_COMPAT=2,
+    /** Options bit 2, do not decompose a-umlaut, only for testing. @internal */
+    UNORM_DX_A_UMLAUT=4,
+    /** This many of the least significant options bits are used to specify decomposition exclusions. @draft ICU 2.6 */
+    UNORM_DX_COUNT=4,
+    /** Options bit mask for decomposition exclusions. @draft ICU 2.6 */
+    UNORM_DX_MASK=(1<<UNORM_DX_COUNT)-1
+};
+
+/**
+ * Lowest-order bit number of unorm_compare() options bits corresponding to
+ * normalization options bits.
+ *
+ * The options parameter for unorm_compare() uses most bits for
+ * itself and for various comparison and folding flags.
+ * The most significant bits, however, are shifted down and passed on
+ * to the normalization implementation.
+ * (options>>UNORM_COMPARE_NORM_OPTIONS_SHIFT)
+ *
+ * ### TODO prototype, see unorm.cpp
+ * @draft ICU 2.6
+ */
+#define UNORM_COMPARE_NORM_OPTIONS_SHIFT 20
+
+/**
+ * ### TODO prototype, see unorm.cpp
+ * @draft ICU 2.6
+ */
+U_CAPI UNormalizationCheckResult U_EXPORT2
+unorm_quickCheckTailored(const UChar *src, int32_t srcLength, 
+                         UNormalizationMode mode, int32_t options,
+                         UErrorCode *pErrorCode);
+
 /**
  * Is the normalizer data loaded?
  * This is used internally before other internal normalizer functions
@@ -170,7 +208,7 @@ unorm_haveData(UErrorCode *pErrorCode);
 U_CAPI int32_t U_EXPORT2
 unorm_internalNormalize(UChar *dest, int32_t destCapacity,
                         const UChar *src, int32_t srcLength,
-                        UNormalizationMode mode, UBool ignoreHangul,
+                        UNormalizationMode mode, int32_t options,
                         UErrorCode *pErrorCode);
 
 /**
@@ -180,7 +218,7 @@ unorm_internalNormalize(UChar *dest, int32_t destCapacity,
 U_CAPI int32_t U_EXPORT2
 unorm_decompose(UChar *dest, int32_t destCapacity,
                 const UChar *src, int32_t srcLength,
-                UBool compat, UBool ignoreHangul,
+                UBool compat, int32_t options,
                 UErrorCode *pErrorCode);
 
 /**
@@ -190,7 +228,7 @@ unorm_decompose(UChar *dest, int32_t destCapacity,
 U_CAPI int32_t U_EXPORT2
 unorm_compose(UChar *dest, int32_t destCapacity,
               const UChar *src, int32_t srcLength,
-              UBool compat, UBool ignoreHangul,
+              UBool compat, int32_t options,
               UErrorCode *pErrorCode);
 
 /**