diff --git a/.gitignore b/.gitignore
index a97abb29039..0f7d4a8bdb5 100644
--- a/.gitignore
+++ b/.gitignore
@@ -212,6 +212,14 @@ icu4c/source/tools/gennames/Makefile
 icu4c/source/tools/gennames/Release
 icu4c/source/tools/gennames/gennames
 icu4c/source/tools/gennames/tmp
+icu4c/source/tools/gennorm/*.d
+icu4c/source/tools/gennorm/*.pdb
+icu4c/source/tools/gennorm/*.plg
+icu4c/source/tools/gennorm/Debug
+icu4c/source/tools/gennorm/Makefile
+icu4c/source/tools/gennorm/Release
+icu4c/source/tools/gennorm/gennorm
+icu4c/source/tools/gennorm/tmp
 icu4c/source/tools/genprops/*.d
 icu4c/source/tools/genprops/*.pdb
 icu4c/source/tools/genprops/Debug
diff --git a/icu4c/source/allinone/allinone.dsw b/icu4c/source/allinone/allinone.dsw
index f2ad6711170..e5ec3ff740c 100644
--- a/icu4c/source/allinone/allinone.dsw
+++ b/icu4c/source/allinone/allinone.dsw
@@ -74,6 +74,18 @@ Package=<4>
     Begin Project Dependency
     Project_Dep_Name decmn
     End Project Dependency
+    Begin Project Dependency
+    Project_Dep_Name genfchk
+    End Project Dependency
+    Begin Project Dependency
+    Project_Dep_Name gennorm
+    End Project Dependency
+    Begin Project Dependency
+    Project_Dep_Name genqchk
+    End Project Dependency
+    Begin Project Dependency
+    Project_Dep_Name genuca
+    End Project Dependency
 }}}
 
 ###############################################################################
@@ -96,16 +108,10 @@ Package=<4>
     Project_Dep_Name i18n
     End Project Dependency
     Begin Project Dependency
-    Project_Dep_Name makeconv
-    End Project Dependency
-    Begin Project Dependency
     Project_Dep_Name gencol
     End Project Dependency
     Begin Project Dependency
-    Project_Dep_Name genrb
-    End Project Dependency
-    Begin Project Dependency
-    Project_Dep_Name gentest
+    Project_Dep_Name toolutil
     End Project Dependency
 }}}
 
@@ -270,6 +276,21 @@ Package=<4>
 
 ###############################################################################
 
+Project: "gennorm"=..\tools\gennorm\gennorm.dsp - Package Owner=<4>
+
+Package=<5>
+{{{
+}}}
+
+Package=<4>
+{{{
+    Begin Project Dependency
+    Project_Dep_Name common
+    End Project Dependency
+}}}
+
+###############################################################################
+
 Project: "genprops"=..\tools\genprops\genprops.dsp - Package Owner=<4>
 
 Package=<5>
@@ -432,30 +453,9 @@ Package=<4>
     Project_Dep_Name i18n
     End Project Dependency
     Begin Project Dependency
-    Project_Dep_Name makeconv
-    End Project Dependency
-    Begin Project Dependency
     Project_Dep_Name gencol
     End Project Dependency
     Begin Project Dependency
-    Project_Dep_Name genrb
-    End Project Dependency
-    Begin Project Dependency
-    Project_Dep_Name genccode
-    End Project Dependency
-    Begin Project Dependency
-    Project_Dep_Name gencmn
-    End Project Dependency
-    Begin Project Dependency
-    Project_Dep_Name gencnval
-    End Project Dependency
-    Begin Project Dependency
-    Project_Dep_Name gennames
-    End Project Dependency
-    Begin Project Dependency
-    Project_Dep_Name gentz
-    End Project Dependency
-    Begin Project Dependency
     Project_Dep_Name toolutil
     End Project Dependency
 }}}
@@ -548,6 +548,15 @@ Package=<4>
     Begin Project Dependency
     Project_Dep_Name genqchk
     End Project Dependency
+    Begin Project Dependency
+    Project_Dep_Name common
+    End Project Dependency
+    Begin Project Dependency
+    Project_Dep_Name gennorm
+    End Project Dependency
+    Begin Project Dependency
+    Project_Dep_Name i18n
+    End Project Dependency
 }}}
 
 ###############################################################################
diff --git a/icu4c/source/common/common.dsp b/icu4c/source/common/common.dsp
index 1c8b4af8025..fa9d12ff5b5 100644
--- a/icu4c/source/common/common.dsp
+++ b/icu4c/source/common/common.dsp
@@ -1268,6 +1268,10 @@ InputPath=.\unicode\unorm.h
 # End Source File
 # Begin Source File
 
+SOURCE=.\unormimp.h
+# End Source File
+# Begin Source File
+
 SOURCE=.\unicode\urep.h
 
 !IF  "$(CFG)" == "common - Win32 Release"
diff --git a/icu4c/source/common/normlzr.cpp b/icu4c/source/common/normlzr.cpp
index d61176a9bf2..843b3bc090d 100644
--- a/icu4c/source/common/normlzr.cpp
+++ b/icu4c/source/common/normlzr.cpp
@@ -29,6 +29,11 @@
 #include "unicode/unicode.h"
 #include "mutex.h"
 
+/* ### TODO: new implementation */
+#include "unormimp.h"
+
+
+
 
 #define ARRAY_LENGTH(array) (sizeof (array) / sizeof (*array))
 /**
@@ -666,6 +671,25 @@ Normalizer::decompose(const UnicodeString& source,
                       UnicodeString& result, 
                       UErrorCode &status)
 {
+    /* ### TODO: begin new implementation */
+    if(unorm_usesNewImplementation()) {
+        if(source.isBogus()) {
+            result.setToBogus();
+        } else {
+            /* make sure that we do not operate on the same buffer in source and result */
+            result.cloneArrayIfNeeded(-1, source.length()+20, FALSE);
+            result.fLength=unorm_decompose(result.fArray, result.fCapacity,
+                                           source.fArray, source.fLength,
+                                           compat, (options&IGNORE_HANGUL)!=0,
+                                           UnicodeString::growBuffer, &result,
+                                           &status);
+            if(U_FAILURE(status)) {
+                result.setToBogus();
+            }
+        }
+        return;
+    }
+    /* ### end new implementation */
     if (U_FAILURE(status)) {
         return;
     }
diff --git a/icu4c/source/common/unorm.cpp b/icu4c/source/common/unorm.cpp
index 64641e35fa4..46fb719ccd6 100644
--- a/icu4c/source/common/unorm.cpp
+++ b/icu4c/source/common/unorm.cpp
@@ -18,6 +18,7 @@
 *                         mode NFC.
 */
 
+#include "unicode/utypes.h"
 #include "unicode/unorm.h"
 #include "unicode/normlzr.h"
 #include "unicode/ustring.h"
@@ -25,11 +26,949 @@
 #include "cpputils.h"
 #include "ustr_imp.h"
 #include "umutex.h"
+#include "unormimp.h"
 
-/* added by synwee */
+/* added by synwee ### TODO: remove once the new implementation is finished */
 #include "unicode/uchar.h"
 #include "unicode/utf16.h"
 
+/* ### TODO: remove this once the new implementation is finished */
+static UBool useNewImplementation=FALSE;
+
+U_CAPI void U_EXPORT2
+unorm_setNewImplementation(UBool useNew) {
+    useNewImplementation=useNew;
+}
+
+U_CAPI UBool U_EXPORT2
+unorm_usesNewImplementation() {
+    return useNewImplementation;
+}
+
+/* new implementation ------------------------------------------------------- */
+
+/* Korean Hangul and Jamo constants */
+enum {
+    JAMO_L_BASE=0x1100,     /* "lead" jamo */
+    JAMO_V_BASE=0x1161,     /* "vowel" jamo */
+    JAMO_T_BASE=0x11a7,     /* "trail" jamo */
+
+    HANGUL_BASE=0xac00,
+
+    JAMO_L_COUNT=19,
+    JAMO_V_COUNT=21,
+    JAMO_T_COUNT=28
+};
+
+/* load unorm.dat ----------------------------------------------------------- */
+
+/* for a description of the file format, see icu/source/tools/gennorm/store.c */
+#define DATA_NAME "unorm"
+#define DATA_TYPE "dat"
+
+static UDataMemory *normData=NULL;
+static UErrorCode dataErrorCode=U_ZERO_ERROR;
+static int8_t haveNormData=0;
+
+/*
+ * pointers into the memory-mapped unorm.dat
+ */
+static const uint16_t *indexes=NULL,
+                      *normTrieIndex=NULL, *extraData=NULL,
+                      *combiningTable=NULL,
+                      *fcdTrieIndex=NULL;
+
+/*
+ * note that there is no uint32_t *normTrieData:
+ * the indexes in the trie are adjusted so that they point to the data based on
+ * (uint32_t *)normTrieIndex - this saves one variable at runtime
+ */
+#define normTrieData ((uint32_t *)normTrieIndex)
+
+/* similarly for the FCD trie index and data - but both are uint16_t * */
+
+/* the Unicode version of the normalization data */
+static UVersionInfo dataVersion={ 3, 1, 0, 0 };
+
+static UBool U_CALLCONV
+isAcceptable(void *context,
+             const char *type, const char *name,
+             const UDataInfo *pInfo) {
+    if(
+        pInfo->size>=20 &&
+        pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
+        pInfo->charsetFamily==U_CHARSET_FAMILY &&
+        pInfo->dataFormat[0]==0x4e &&   /* dataFormat="Norm" */
+        pInfo->dataFormat[1]==0x6f &&
+        pInfo->dataFormat[2]==0x72 &&
+        pInfo->dataFormat[3]==0x6d &&
+        pInfo->formatVersion[0]==1 &&
+        pInfo->formatVersion[3]==_NORM_TRIE_SHIFT
+    ) {
+        uprv_memcpy(dataVersion, pInfo->dataVersion, 4);
+        return TRUE;
+    } else {
+        return FALSE;
+    }
+}
+
+static int8_t
+loadNormData(UErrorCode &errorCode) {
+    /* load Unicode normalization data from file */
+    if(haveNormData==0) {
+        UDataMemory *data;
+        const uint16_t *p=NULL;
+
+        if(&errorCode==NULL || U_FAILURE(errorCode)) {
+            return 0;
+        }
+
+        /* open the data outside the mutex block */
+        data=udata_openChoice(NULL, DATA_TYPE, DATA_NAME, isAcceptable, NULL, &errorCode);
+        dataErrorCode=errorCode;
+        if(U_FAILURE(errorCode)) {
+            return haveNormData=-1;
+        }
+
+        p=(const uint16_t *)udata_getMemory(data);
+
+        /* in the mutex block, set the data for this process */
+        umtx_lock(NULL);
+        if(normData==NULL) {
+            normData=data;
+            data=NULL;
+            indexes=p;
+            p=NULL;
+        }
+        umtx_unlock(NULL);
+
+        /* initialize some variables */
+        normTrieIndex=indexes+indexes[_NORM_INDEX_COUNT];
+        extraData=normTrieIndex+indexes[_NORM_INDEX_TRIE_INDEX_COUNT]+2*indexes[_NORM_INDEX_TRIE_DATA_COUNT];
+        combiningTable=extraData+indexes[_NORM_INDEX_UCHAR_COUNT];
+        fcdTrieIndex=combiningTable+indexes[_NORM_INDEX_COMBINE_DATA_COUNT];
+        haveNormData=1;
+
+        /* if a different thread set it first, then close the extra data */
+        if(data!=NULL) {
+            udata_close(data); /* NULL if it was set correctly */
+        }
+    }
+
+    return haveNormData;
+}
+
+inline UBool
+_haveData(UErrorCode &errorCode) {
+    if(haveNormData!=0) {
+        errorCode=dataErrorCode;
+        return (UBool)(haveNormData>0);
+    } else {
+        return (UBool)(loadNormData(errorCode)>0);
+    }
+}
+
+U_CAPI UBool U_EXPORT2
+unorm_haveData(UErrorCode *pErrorCode) {
+    return _haveData(*pErrorCode);
+}
+
+/* data access primitives --------------------------------------------------- */
+
+inline uint32_t
+_getNorm32(UChar c) {
+    return
+        normTrieData[
+            normTrieIndex[
+                c>>_NORM_TRIE_SHIFT
+            ]+
+            (c&_NORM_STAGE_2_MASK)
+        ];
+}
+
+inline uint32_t
+_getNorm32FromSurrogatePair(uint32_t norm32, UChar c2) {
+    /* the surrogate index in norm32 is an offset over the BMP top of stage 1 */
+    uint32_t c=
+        ((norm32>>(_NORM_EXTRA_SHIFT-10))&0xffc00)|
+        (c2&0x3ff);
+    return
+        normTrieData[
+            normTrieIndex[
+                _NORM_STAGE_1_BMP_COUNT+
+                (c>>_NORM_TRIE_SHIFT)
+            ]+
+            (c&_NORM_STAGE_2_MASK)
+        ];
+}
+
+inline uint16_t
+_getFCD16(UChar c) {
+    return
+        fcdTrieIndex[
+            fcdTrieIndex[
+                c>>_NORM_TRIE_SHIFT
+            ]+
+            (c&_NORM_STAGE_2_MASK)
+        ];
+}
+
+inline uint16_t
+_getFCD16FromSurrogatePair(uint16_t fcd16, UChar c2) {
+    /* the surrogate index in fcd16 is an absolute offset over the start of stage 1 */
+    uint32_t c=
+        ((uint32_t)fcd16<<10)|
+        (c2&0x3ff);
+    return
+        fcdTrieIndex[
+            fcdTrieIndex[
+                c>>_NORM_TRIE_SHIFT
+            ]+
+            (c&_NORM_STAGE_2_MASK)
+        ];
+}
+
+inline const uint16_t *
+_getExtraData(uint32_t norm32) {
+    return extraData+(norm32>>_NORM_EXTRA_SHIFT);
+}
+
+/*
+ * get the combining class of (c, c2)=*p++
+ * before: p<limit  after: p<=limit
+ * if only one code unit is used, then c2==0
+ */
+inline uint8_t
+_getNextCC(const UChar *&p, const UChar *limit, UChar &c, UChar &c2) {
+    uint32_t norm32;
+
+    c=*p++;
+    norm32=_getNorm32(c);
+    if((norm32&_NORM_CC_MASK)==0) {
+        c2=0;
+        return 0;
+    } else {
+        if(norm32<_NORM_MIN_SPECIAL || _NORM_SURROGATES_TOP<=norm32) {
+            c2=0;
+        } else {
+            /* c is a lead surrogate, get the real norm32 */
+            if(p!=limit && (c2=*p, UTF_IS_SECOND_SURROGATE(c2))) {
+                ++p;
+                norm32=_getNorm32FromSurrogatePair(norm32, c2);
+            } else {
+                c2=0;
+                return 0;
+            }
+        }
+
+        return (uint8_t)(norm32>>_NORM_CC_SHIFT);
+    }
+}
+
+/*
+ * get the combining class of (c, c2)=*--p
+ * before: start<p  after: start<=p
+ */
+inline uint8_t
+_getPrevCC(const UChar *start, const UChar *&p) {
+    uint32_t norm32;
+    UChar c, c2;
+
+    c=*--p;
+
+    /* check for a surrogate before getting norm32 to see if we need to predecrement further */
+    if(!UTF_IS_SURROGATE(c)) {
+        return (uint8_t)(_getNorm32(c)>>_NORM_CC_SHIFT);
+    } else if(UTF_IS_SURROGATE_FIRST(c)) {
+        /* unpaired first surrogate */
+        return 0;
+    } else if(p!=start && (c2=*(p-1), UTF_IS_FIRST_SURROGATE(c2))) {
+        --p;
+        norm32=_getNorm32(c2);
+        if((norm32&_NORM_CC_MASK)==0) {
+            /* all surrogate pairs with this lead surrogate have cc==0 */
+            return 0;
+        } else {
+            /* norm32 must be a surrogate special */
+            return (uint8_t)(_getNorm32FromSurrogatePair(norm32, c)>>_NORM_CC_SHIFT);
+        }
+    } else {
+        /* unpaired second surrogate */
+        return 0;
+    }
+}
+
+/* reorder UTF-16 in-place -------------------------------------------------- */
+
+/*
+ * merge two parts of a UTF-16 string in-place
+ * to canonically order (order by combining classes) their concatenation
+ *
+ * before: [start..p[ is already ordered, and
+ *         [p..limit[ is ordered in itself, but
+ *                    not in relation to [start..p[
+ * after: [start..limit[ is ordered
+ *
+ * the algorithm is a simple bubble-sort that takes the characters from *p++
+ * and inserts them in correct combining class order into the preceding part
+ * of the string
+ *
+ * returns the trailing combining class
+ */
+static uint8_t
+_mergeOrdered(const UChar *start, UChar *p, const UChar *limit) {
+    const UChar *pBack, *pPreBack;
+    UChar *pSplit, *q;
+    UChar c, c2;
+    uint8_t cc, prevCC, trailCC=0;
+
+    if(start==p) {
+        /* nothing to do */
+        if(start!=limit) {
+            return _getPrevCC(start, limit);
+        } else {
+            return 0;
+        }
+    }
+
+    while(p<limit) {
+        pSplit=p;
+        cc=_getNextCC(p, limit, c, c2);
+        if(cc==0) {
+            /* does not bubble back */
+            trailCC=0;
+            break;
+        } else {
+            /* search for the insertion point where cc>=prevCC */
+            pPreBack=pBack=pSplit;
+            prevCC=_getPrevCC(start, pPreBack);
+            if(cc>=prevCC) {
+                /* does not bubble back */
+                trailCC=cc;
+                break;
+            } else {
+                /* this will be the last code point, so keep its cc */
+                trailCC=prevCC;
+                pBack=pPreBack;
+                while(start<pPreBack) {
+                    prevCC=_getPrevCC(start, pPreBack);
+                    if(cc>=prevCC) {
+                        break;
+                    }
+                    pBack=pPreBack;
+                }
+
+                /*
+                 * this is where we are right now with all these pointers:
+                 * [start..pPreBack[ 0..? code points that we can ignore
+                 * [pPreBack..pBack[ 0..1 code points with prevCC<=cc
+                 * [pBack..pSplit[   0..n code points with >cc, move up to insert (c, c2)
+                 * [pSplit..p[          1 code point (c, c2) with cc
+                 * [p..limit[        0..? code points yet to be bubbled in
+                 */
+
+                /* move the code units in between up */
+                q=p;
+                do {
+                    *--q=*--pSplit;
+                } while(pBack!=pSplit);
+
+                /* insert (c, c2) */
+                *pSplit=c;
+                if(c2!=0) {
+                    *(pSplit+1)=c2;
+                }
+
+                /* we know that the new part is ordered in itself, so we can move start up */
+                start=q; /* set it to after where (c, c2) were inserted */
+            }
+        }
+    }
+
+    if(p==limit) {
+        /* we know the cc of the last code point */
+        return trailCC;
+    } else {
+        return _getPrevCC(start, limit);
+    }
+}
+
+/*
+ * simpler, more efficient version of _mergeOrdered() -
+ * inserts only one code point into the preceding string
+ * assume that (c, c2) has not yet inserted at [pSplit..p[
+ */
+static uint8_t
+_insertOrdered(const UChar *start, UChar *pSplit, UChar *p,
+               UChar c, UChar c2, uint8_t cc) {
+    const UChar *pBack, *pPreBack;
+    UChar *q;
+    uint8_t prevCC, trailCC=cc;
+
+    if(start<pSplit && cc!=0) {
+        /* search for the insertion point where cc>=prevCC */
+        pPreBack=pBack=pSplit;
+        prevCC=_getPrevCC(start, pPreBack);
+        if(cc<prevCC) {
+            /* this will be the last code point, so keep its cc */
+            trailCC=prevCC;
+            pBack=pPreBack;
+            while(start<pPreBack) {
+                prevCC=_getPrevCC(start, pPreBack);
+                if(cc>=prevCC) {
+                    break;
+                }
+                pBack=pPreBack;
+            }
+
+            /*
+             * this is where we are right now with all these pointers:
+             * [start..pPreBack[ 0..? code points that we can ignore
+             * [pPreBack..pBack[ 0..1 code points with prevCC<=cc
+             * [pBack..pSplit[   0..n code points with >cc, move up to insert (c, c2)
+             * [pSplit..p[          1 code point (c, c2) with cc
+             */
+
+            /* move the code units in between up */
+            q=p;
+            do {
+                *--q=*--pSplit;
+            } while(pBack!=pSplit);
+        }
+    }
+
+    /* insert (c, c2) */
+    *pSplit=c;
+    if(c2!=0) {
+        *(pSplit+1)=c2;
+    }
+
+    /* we know the cc of the last code point */
+    return trailCC;
+}
+
+/* quick check functions ---------------------------------------------------- */
+
+static UBool
+unorm_checkFCD(const UChar *src,
+               int32_t srcLength, 
+               UErrorCode *pErrorCode) {
+    const UChar *limit;
+    UChar c, c2;
+    uint16_t fcd16;
+    int16_t prevCC, cc;
+
+    /* check arguments */
+    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
+        return FALSE;
+    }
+
+    if(src==NULL || srcLength<-1) {
+        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+        return FALSE;
+    }
+
+    if(!_haveData(*pErrorCode)) {
+        return FALSE;
+    }
+
+    /* initialize */
+    prevCC=0;
+
+    if(srcLength>=0) {
+        /* string with length */
+        limit=src+srcLength;
+    } else /* srcLength==-1 */ {
+        /* zero-terminated string */
+        limit=NULL;
+    }
+
+    U_ALIGN_CODE(16);
+
+    for(;;) {
+        /* skip a run of code units below the minimum or with irrelevant data for the FCD check */
+        if(limit==NULL) {
+            for(;;) {
+                c=*src++;
+                if(c<_NORM_MIN_WITH_LEAD_CC) {
+                    if(c==0) {
+                        return TRUE;
+                    }
+                    prevCC=-(int16_t)c;
+                } else if((fcd16=_getFCD16(c))==0) {
+                    prevCC=0;
+                } else {
+                    break;
+                }
+            }
+        } else {
+            for(;;) {
+                if(src==limit) {
+                    return TRUE;
+                } else if((c=*src++)<_NORM_MIN_WITH_LEAD_CC) {
+                    prevCC=-(int16_t)c;
+                } else if((fcd16=_getFCD16(c))==0) {
+                    prevCC=0;
+                } else {
+                    break;
+                }
+            }
+        }
+
+        /* check one above-minimum, relevant code unit */
+        if(UTF_IS_FIRST_SURROGATE(c)) {
+            /* c is a lead surrogate, get the real fcd16 */
+            if((limit==NULL || src!=limit) && (c2=*src, UTF_IS_SECOND_SURROGATE(c2))) {
+                ++src;
+                fcd16=_getFCD16FromSurrogatePair(fcd16, c2);
+            } else {
+                fcd16=0;
+            }
+        }
+
+        /*
+         * prevCC has values from the following ranges:
+         * 0..0xff - the previous trail combining class
+         * <0      - the negative value of the previous code unit;
+         *           that code unit was <_NORM_MIN_WITH_LEAD_CC and its _getFCD16()
+         *           was deferred so that average text is checked faster
+         */
+
+        /* check the combining order */
+        cc=(int16_t)(fcd16>>8);
+        if(cc!=0) {
+            if(prevCC<0) {
+                /* the previous character was <_NORM_MIN_WITH_LEAD_CC, we need to get its trail cc */
+                prevCC=(int16_t)_getFCD16((UChar)-prevCC)&0xff;
+            }
+
+            if(cc<prevCC) {
+                return FALSE;
+            }
+        }
+        prevCC=(int16_t)fcd16&0xff;
+    }
+}
+
+static UNormalizationCheckResult
+_unorm_quickCheck(const UChar *src,
+                 int32_t srcLength, 
+                 UNormalizationMode mode, 
+                 UErrorCode *pErrorCode) {
+    const UChar *limit;
+    uint32_t norm32, ccOrQCMask, qcMask;
+    UChar c, c2, minNoMaybe;
+    uint8_t cc, prevCC;
+    UNormalizationCheckResult result;
+
+    /* check arguments */
+    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
+        return UNORM_MAYBE;
+    }
+
+    if(src==NULL || srcLength<-1) {
+        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+        return UNORM_MAYBE;
+    }
+
+    if(!_haveData(*pErrorCode)) {
+        return UNORM_MAYBE;
+    }
+
+    /* check for a valid mode and set the quick check minimum and mask */
+    switch(mode) {
+    case UNORM_NFC:
+        minNoMaybe=(UChar)indexes[_NORM_INDEX_MIN_NFC_NO_MAYBE];
+        qcMask=_NORM_QC_NFC;
+        break;
+    case UNORM_NFKC:
+        minNoMaybe=(UChar)indexes[_NORM_INDEX_MIN_NFKC_NO_MAYBE];
+        qcMask=_NORM_QC_NFKC;
+        break;
+    case UNORM_NFD:
+        minNoMaybe=(UChar)indexes[_NORM_INDEX_MIN_NFD_NO_MAYBE];
+        qcMask=_NORM_QC_NFD;
+        break;
+    case UNORM_NFKD:
+        minNoMaybe=(UChar)indexes[_NORM_INDEX_MIN_NFKD_NO_MAYBE];
+        qcMask=_NORM_QC_NFKD;
+        break;
+    /* ### TODO: case UNORM_FCD: return unorm_checkFCD(); */
+    default:
+        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+        return UNORM_MAYBE;
+    }
+
+    /* initialize */
+    ccOrQCMask=_NORM_CC_MASK|qcMask;
+    result=UNORM_YES;
+    prevCC=0;
+
+    if(srcLength>=0) {
+        /* string with length */
+        limit=src+srcLength;
+    } else /* srcLength==-1 */ {
+        /* zero-terminated string */
+        limit=NULL;
+    }
+
+    U_ALIGN_CODE(16);
+
+    for(;;) {
+        /* skip a run of code units below the minimum or with irrelevant data for the quick check */
+        if(limit==NULL) {
+            for(;;) {
+                c=*src++;
+                if(c<minNoMaybe) {
+                    if(c==0) {
+                        return result;
+                    }
+                } else if(((norm32=_getNorm32(c))&ccOrQCMask)!=0) {
+                    break;
+                }
+                prevCC=0;
+            }
+        } else {
+            for(;;) {
+                if(src==limit) {
+                    return result;
+                } else if((c=*src++)>=minNoMaybe && ((norm32=_getNorm32(c))&ccOrQCMask)!=0) {
+                    break;
+                }
+                prevCC=0;
+            }
+        }
+
+        /* check one above-minimum, relevant code unit */
+        if(_NORM_MIN_SPECIAL<=norm32 && norm32<_NORM_SURROGATES_TOP) {
+            /* c is a lead surrogate, get the real norm32 */
+            if((limit==NULL || src!=limit) && (c2=*src, UTF_IS_SECOND_SURROGATE(c2))) {
+                ++src;
+                norm32=_getNorm32FromSurrogatePair(norm32, c2);
+            } else {
+                norm32=0;
+            }
+        }
+
+        /* check the combining order */
+        cc=(uint8_t)(norm32>>_NORM_CC_SHIFT);
+        if(cc!=0 && cc<prevCC) {
+            return UNORM_NO;
+        }
+        prevCC=cc;
+
+        /* check for "no" or "maybe" quick check flags */
+        norm32&=qcMask;
+        if(norm32&_NORM_QC_ANY_NO) {
+            return UNORM_NO;
+        } else if(norm32!=0) {
+            result=UNORM_MAYBE;
+        }
+    }
+
+    return result;
+}
+
+/* make NFD & NFKD ---------------------------------------------------------- */
+
+U_CFUNC int32_t
+unorm_decompose(UChar *dest, int32_t destCapacity,
+                const UChar *src, int32_t srcLength,
+                UBool compat, UBool ignoreHangul,
+                GrowBuffer *growBuffer, void *context,
+                UErrorCode *pErrorCode) {
+    UChar buffer[3];
+    const UChar *limit, *prevSrc, *p, *reorderStart;
+    uint32_t norm32, ccOrQCMask, qcMask;
+    int32_t destIndex, length;
+    UChar c, c2, minNoMaybe;
+    uint8_t cc, prevCC, trailCC;
+    UBool canGrow;
+
+    if(!_haveData(*pErrorCode)) {
+        return 0;
+    }
+
+    if(!compat) {
+        minNoMaybe=(UChar)indexes[_NORM_INDEX_MIN_NFD_NO_MAYBE];
+        qcMask=_NORM_QC_NFD;
+    } else {
+        minNoMaybe=(UChar)indexes[_NORM_INDEX_MIN_NFKD_NO_MAYBE];
+        qcMask=_NORM_QC_NFKD;
+    }
+
+    /* initialize */
+    reorderStart=dest;
+    ccOrQCMask=_NORM_CC_MASK|qcMask;
+    destIndex=0;
+    prevCC=0;
+
+    /* do not attempt to grow if there is no growBuffer function or if it has failed before */
+    canGrow=(UBool)(growBuffer!=NULL);
+
+    if(srcLength>=0) {
+        /* string with length */
+        limit=src+srcLength;
+    } else /* srcLength==-1 */ {
+        /* zero-terminated string */
+        limit=NULL;
+    }
+
+    U_ALIGN_CODE(16);
+
+    for(;;) {
+        /* count code units below the minimum or with irrelevant data for the quick check */
+        prevSrc=src;
+        if(limit==NULL) {
+            while((c=*src)<minNoMaybe ? c!=0 : ((norm32=_getNorm32(c))&ccOrQCMask)==0) {
+                prevCC=0;
+                ++src;
+            }
+        } else {
+            while(src!=limit && ((c=*src)<minNoMaybe || ((norm32=_getNorm32(c))&ccOrQCMask)==0)) {
+                prevCC=0;
+                ++src;
+            }
+        }
+
+        /* copy these code units all at once */
+        if(src!=prevSrc) {
+            length=(int32_t)(src-prevSrc);
+            if( (destIndex+length)<=destCapacity ||
+                /* attempt to grow the buffer */
+                (canGrow && (canGrow=growBuffer(context, &dest, &destCapacity,
+                                                limit==NULL ?
+                                                    2*(destCapacity)+length+20 :
+                                                    destCapacity+length+2*(limit-src)+20,
+                                                destIndex)))
+            ) {
+                do {
+                    dest[destIndex++]=*prevSrc++;
+                } while(src!=prevSrc);
+                reorderStart=dest+destIndex;
+            } else {
+                /* buffer overflow */
+                /* keep incrementing the destIndex for preflighting */
+                destIndex+=length;
+            }
+        }
+
+        /* end of source reached? */
+        if(limit==NULL ? c==0 : src==limit) {
+            break;
+        }
+
+        /* c already contains *src and norm32 is set for it, increment src */
+        ++src;
+
+        /* check one above-minimum, relevant code unit */
+        /*
+         * generally, set p and length to the decomposition string
+         * in simple cases, p==NULL and (c, c2) will hold the length code units to append
+         * in all cases, set cc to the lead and trailCC to the trail combining class
+         */
+        if(norm32>=_NORM_MIN_HANGUL) {
+            if(ignoreHangul) {
+                c2=0;
+                p=NULL;
+                length=1;
+            } else {
+                /* Hangul syllable: decompose algorithmically */
+                p=buffer;
+                cc=trailCC=0;
+
+                c-=HANGUL_BASE;
+
+                c2=(UChar)(c%JAMO_T_COUNT);
+                c/=JAMO_T_COUNT;
+                if(c2>0) {
+                    buffer[2]=(UChar)(JAMO_T_BASE+c2);
+                    length=3;
+                } else {
+                    length=2;
+                }
+
+                buffer[1]=(UChar)(JAMO_V_BASE+c%JAMO_V_COUNT);
+                buffer[0]=(UChar)(JAMO_L_BASE+c/JAMO_V_COUNT);
+            }
+        } else {
+            if(norm32<_NORM_MIN_SPECIAL) {
+                c2=0;
+                length=1;
+            } else {
+                /* c is a lead surrogate, get the real norm32 */
+                if((limit==NULL || src!=limit) && (c2=*src, UTF_IS_SECOND_SURROGATE(c2))) {
+                    ++src;
+                    length=2;
+                    norm32=_getNorm32FromSurrogatePair(norm32, c2);
+                } else {
+                    c2=0;
+                    length=1;
+                    norm32=0;
+                }
+            }
+
+            /* get the decomposition and the lead and trail cc's */
+            if((norm32&qcMask)==0) {
+                /* c does not decompose */
+                cc=trailCC=(uint8_t)(norm32>>_NORM_CC_SHIFT);
+                p=NULL;
+            } else {
+                /* c decomposes, get everything from the variable-length extra data */
+                p=(const UChar *)_getExtraData(norm32);
+                length=*p++;
+
+                if((norm32&qcMask&_NORM_QC_NFKD)!=0 && length>=0x100) {
+                    /* use compatibility decomposition, skip canonical data */
+                    p+=((length>>7)&1)+(length&0x7f);
+                    length>>=8;
+                }
+
+                if(length&0x80) {
+                    /* get the lead and trail cc's */
+                    UChar bothCCs=*p++;
+                    cc=(uint8_t)(bothCCs>>8);
+                    trailCC=(uint8_t)bothCCs;
+                } else {
+                    /* lead and trail cc's are both 0 */
+                    cc=trailCC=0;
+                }
+
+                length&=0x7f;
+                if(length==1) {
+                    /* fastpath a single code unit from decomposition */
+                    c=*p;
+                    c2=0;
+                    p=NULL;
+                }
+            }
+        }
+
+        /* append the decomposition to the destination buffer, assume length>0 */
+        if( (destIndex+length)<=destCapacity ||
+            /* attempt to grow the buffer */
+            (canGrow && (canGrow=growBuffer(context, &dest, &destCapacity,
+                                            limit==NULL ?
+                                                2*(destCapacity)+length+20 :
+                                                destCapacity+length+2*(limit-src)+20,
+                                            destIndex)))
+        ) {
+            UChar *reorderSplit=dest+destIndex;
+            if(p==NULL) {
+                /* fastpath: single code point */
+                if(cc!=0 && cc<prevCC) {
+                    /* (c, c2) is out of order with respect to the preceding text */
+                    destIndex+=length;
+                    trailCC=_insertOrdered(reorderStart, reorderSplit, dest+destIndex, c, c2, cc);
+                } else {
+                    /* just append (c, c2) */
+                    dest[destIndex++]=c;
+                    if(c2!=0) {
+                        dest[destIndex++]=c2;
+                    }
+                }
+            } else {
+                /* general: multiple code points (ordered by themselves) from decomposition */
+                /* append the decomposition */
+                do {
+                    dest[destIndex++]=*p++;
+                } while(--length>0);
+
+                if(cc!=0 && cc<prevCC) {
+                    /* the decomposition is out of order with respect to the preceding text */
+                    trailCC=_mergeOrdered(reorderStart, reorderSplit, dest+destIndex);
+                }
+            }
+        } else {
+            /* buffer overflow */
+            /* keep incrementing the destIndex for preflighting */
+            destIndex+=length;
+        }
+
+        prevCC=trailCC;
+        if(prevCC==0) {
+            reorderStart=dest+destIndex;
+        }
+    }
+
+#if 1
+    /* ### TODO: this passes the tests but seems weird */
+    /* we may NUL-terminate if it fits as a convenience */
+    if(destIndex<destCapacity) {
+        dest[destIndex]=0;
+    } else if(destIndex>destCapacity) {
+        *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+    }
+#else
+    /* ### TODO: this looks slightly to much more reasonable but fails some tests, esp. /tscoll/cmsccoll/TestIncrementalNormalize */
+    if(limit==NULL) {
+        /* assume that we must NUL-terminate */
+        if(destIndex<destCapacity) {
+            /* ### TODO: this one would make sense -- dest[destIndex++]=0; -- but the following is more compatible */
+            dest[destIndex]=0;
+        } else {
+            /* ### TODO: same as above -- ++destIndex; */
+            *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+        }
+    } else {
+        /* we may NUL-terminate if it fits as a convenience */
+        if(destIndex<destCapacity) {
+            dest[destIndex]=0;
+        } else if(destIndex>destCapacity) {
+            *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+        }
+    }
+#endif
+
+    return destIndex;
+}
+
+/* make NFC & NFKC ---------------------------------------------------------- */
+
+U_CFUNC int32_t
+unorm_compose(UChar *dest, int32_t destCapacity,
+              const UChar *src, int32_t srcLength,
+              UBool compat, UBool ignoreHangul,
+              GrowBuffer *growBuffer, void *context,
+              UErrorCode *pErrorCode) {
+    /* ### TODO: for now, this is just basically the same as the old unorm_normalize() */
+  if(U_FAILURE(*pErrorCode)) return -1;
+
+  /* synwee : removed hard coded conversion */
+  Normalizer::EMode normMode = compat ? Normalizer::COMPOSE_COMPAT : Normalizer::COMPOSE;
+  if (U_FAILURE(*pErrorCode)) {
+    return -1;
+  }
+
+  int32_t len = (srcLength == -1 ? u_strlen(src) : srcLength);
+  const UnicodeString source(srcLength == -1, src, len);
+  UnicodeString dst(dest, 0, destCapacity);
+  /* synwee : note quickcheck is added in C ++ normalize method */
+  Normalizer::normalize(source, normMode, ignoreHangul ? Normalizer::IGNORE_HANGUL : 0, dst, *pErrorCode);
+  return uprv_fillOutputString(dst, dest, destCapacity, pErrorCode);
+}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+/* old implementation ------------------------------------------------------- */
+
 /* added by synwee for trie manipulation*/
 #define STAGE_1_SHIFT_            10
 #define STAGE_2_SHIFT_            4
@@ -134,29 +1073,87 @@ static const uint16_t *FCHK_STAGE_2_;
 static const uint16_t *FCHK_STAGE_3_;
 
 U_CAPI int32_t
-unorm_normalize(const UChar*            source,
-        int32_t                 sourceLength, 
+unorm_normalize(const UChar*            src,
+        int32_t                 srcLength, 
         UNormalizationMode      mode, 
         int32_t                 option,
-        UChar*                  result,
-        int32_t                 resultLength,
-        UErrorCode*             status)
+        UChar*                  dest,
+        int32_t                 destCapacity,
+        UErrorCode*             pErrorCode)
 {
-  if(U_FAILURE(*status)) return -1;
+    if(useNewImplementation) {
+        UBool ignoreHangul;
+
+        /* check argument values */
+        if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
+            return 0;
+        }
+
+        if( destCapacity<0 || (dest==NULL && destCapacity>0) ||
+            src==NULL || srcLength<-1
+        ) {
+            *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+            return 0;
+        }
+
+        /* check for overlapping src and destination */
+        /* ### TODO: real API may provide a temp buffer */
+        if( (src>=dest && src<(dest+destCapacity)) ||
+            (srcLength>0 && dest>=src && dest<(src+srcLength))
+        ) {
+            *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+            return 0;
+        }
+
+        ignoreHangul= (option&UNORM_IGNORE_HANGUL)!=0;
+
+        switch(mode) {
+        case UNORM_NFD:
+            return unorm_decompose(dest, destCapacity,
+                                   src, srcLength,
+                                   FALSE, ignoreHangul,
+                                   NULL, NULL,
+                                   pErrorCode);
+        case UNORM_NFKD:
+            return unorm_decompose(dest, destCapacity,
+                                   src, srcLength,
+                                   TRUE, ignoreHangul,
+                                   NULL, NULL,
+                                   pErrorCode);
+        case UNORM_NFC:
+            return unorm_compose(dest, destCapacity,
+                                 src, srcLength,
+                                 FALSE, ignoreHangul,
+                                 NULL, NULL,
+                                 pErrorCode);
+        case UNORM_NFKC:
+            return unorm_compose(dest, destCapacity,
+                                 src, srcLength,
+                                 TRUE, ignoreHangul,
+                                 NULL, NULL,
+                                 pErrorCode);
+        /* ### TODO: case UNORM_FCD: return unorm_makeFCD(); */
+        default:
+            *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+            return 0;
+        }
+    }
+
+  if(U_FAILURE(*pErrorCode)) return -1;
 
   /* synwee : removed hard coded conversion */
-  Normalizer::EMode normMode = Normalizer::getNormalizerEMode(mode, *status);
-  if (U_FAILURE(*status))
+  Normalizer::EMode normMode = Normalizer::getNormalizerEMode(mode, *pErrorCode);
+  if (U_FAILURE(*pErrorCode))
     return -1;
 
-  int32_t len = (sourceLength == -1 ? u_strlen(source) : sourceLength);
-  const UnicodeString src(sourceLength == -1, source, len);
-  UnicodeString dst(result, 0, resultLength);
+  int32_t len = (srcLength == -1 ? u_strlen(src) : srcLength);
+  const UnicodeString source(srcLength == -1, src, len);
+  UnicodeString dst(dest, 0, destCapacity);
   /* synwee : note quickcheck is added in C ++ normalize method */
   if ((option & UNORM_IGNORE_HANGUL) != 0)
     option = Normalizer::IGNORE_HANGUL;
-  Normalizer::normalize(src, normMode, option, dst, *status);
-  return uprv_fillOutputString(dst, result, resultLength, status);
+  Normalizer::normalize(source, normMode, option, dst, *pErrorCode);
+  return uprv_fillOutputString(dst, dest, destCapacity, pErrorCode);
 }
 
 static UBool U_CALLCONV
@@ -260,6 +1257,10 @@ unorm_quickCheck(const UChar             *source,
   const UChar                *psource;
   const UChar                *pend             = 0;
 
+  if(useNewImplementation) {
+    return _unorm_quickCheck(source, sourcelength, mode, status);
+  }
+
   if (!loadQuickCheckData(status) || U_FAILURE(*status)) {
       return UNORM_MAYBE;
   }
@@ -502,6 +1503,10 @@ U_CAPI const uint16_t * getFCHK_STAGE_3_(UErrorCode *error)
 U_CAPI UBool 
 checkFCD(const UChar* source, int32_t sourcelength, UErrorCode* status)
 {
+    if(useNewImplementation) {
+        return unorm_checkFCD(source, sourcelength, status);
+    }
+
         UChar32  codepoint;
   const UChar   *psource;
   const UChar   *pend = 0;
diff --git a/icu4c/source/common/unormimp.h b/icu4c/source/common/unormimp.h
new file mode 100644
index 00000000000..a50a88d682f
--- /dev/null
+++ b/icu4c/source/common/unormimp.h
@@ -0,0 +1,164 @@
+/*
+*******************************************************************************
+*
+*   Copyright (C) 2001, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+*******************************************************************************
+*   file name:  unormimp.h
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2001may25
+*   created by: Markus W. Scherer
+*/
+
+#ifndef __UNORMIMP_H__
+#define __UNORMIMP_H__
+
+#include "unicode/utypes.h"
+#include "ustr_imp.h"
+
+/* trie constants */
+enum {
+    /*
+     * must be <=10:
+     * above 10, a lead surrogate's block is smaller than a stage 2 block
+     */
+    _NORM_TRIE_SHIFT=5,
+
+    _NORM_STAGE_2_BLOCK_COUNT=1<<_NORM_TRIE_SHIFT,
+    _NORM_STAGE_2_MASK=_NORM_STAGE_2_BLOCK_COUNT-1,
+
+    _NORM_STAGE_1_BMP_COUNT=(1<<(16-_NORM_TRIE_SHIFT)),
+
+    _NORM_SURROGATE_BLOCK_BITS=10-_NORM_TRIE_SHIFT,
+    _NORM_SURROGATE_BLOCK_COUNT=(1<<_NORM_SURROGATE_BLOCK_BITS)
+};
+
+/* this may be >0xffff and may not work as an enum */
+#define _NORM_STAGE_1_MAX_COUNT (0x110000>>_NORM_TRIE_SHIFT)
+
+/* value constants */
+enum {
+    /* quick check flags 0..3 set mean "no" for their forms */
+    _NORM_QC_NFC=0x11,          /* no|maybe */
+    _NORM_QC_NFKC=0x22,         /* no|maybe */
+    _NORM_QC_NFD=4,             /* no */
+    _NORM_QC_NFKD=8,            /* no */
+
+    _NORM_QC_ANY_NO=0xf,
+
+    /* quick check flags 4..5 mean "maybe" for their forms; test flags>=_NORM_QC_MAYBE */
+    _NORM_QC_MAYBE=0x10,
+    _NORM_QC_ANY_MAYBE=0x30,
+
+    _NORM_COMBINES_FWD=0x40,
+    _NORM_COMBINES_BACK=0x80,
+    _NORM_COMBINES_ANY=0xc0,
+
+#if 0
+    _NORM_CC_TYPE_MASK=0xc0,
+    _NORM_CC_TYPE_NONE=0,       /* no cc - lead and trail cc are 0 */
+    _NORM_CC_TYPE_SAME=0x40,    /* lead and trail cc are same, non-zero, and in value */
+    _NORM_CC_TYPE_TRAIL=0x80,   /* lead cc=0, trail cc in value */
+    _NORM_CC_TYPE_TWO=0xc0,     /* 0 != lead cc < trail cc, lead cc in value, trail cc in extra data */
+
+    _NORM_CC_HAS_LEAD=0x40,     /* side effect of the above flags: if and only if bit 6 is 0, then lead cc is 0 */
+    _NORM_CC_HAS_LEAD_HAS_TRAIL=0x80,   /* if(has lead) then one can check for (has trail) instead of (&cc mask==same/two) */
+#endif
+
+    _NORM_CC_SHIFT=8,           /* UnicodeData.txt combining class in bits 15..8 */
+    _NORM_CC_MASK=0xff00,
+
+    _NORM_EXTRA_SHIFT=16,               /* 16 bits for the index to UChars and other extra data */
+    _NORM_EXTRA_INDEX_TOP=0xfc00,       /* start of surrogate specials after shift */
+
+    _NORM_EXTRA_SURROGATE_MASK=0x3ff,
+    _NORM_EXTRA_SURROGATE_TOP=0x3f0,    /* hangul etc. */
+
+    _NORM_EXTRA_HANGUL=_NORM_EXTRA_SURROGATE_TOP,
+    _NORM_EXTRA_JAMO_1,                 /* ### not used */
+    _NORM_EXTRA_JAMO_2,
+    _NORM_EXTRA_JAMO_3
+};
+
+/* value constants using >16 bits */
+#define _NORM_MIN_SPECIAL       0xfc000000
+#define _NORM_SURROGATES_TOP    0xfff00000
+#define _NORM_MIN_HANGUL        0xfff00000
+#define _NORM_MIN_JAMO2         0xfff20000
+#define _NORM_JAMO2_TOP         0xfff30000
+
+
+/* index values */
+enum {
+    _NORM_INDEX_COUNT,
+    _NORM_INDEX_TRIE_SHIFT,
+    _NORM_INDEX_TRIE_INDEX_COUNT,
+    _NORM_INDEX_TRIE_DATA_COUNT,
+    _NORM_INDEX_UCHAR_COUNT,
+
+    _NORM_INDEX_COMBINE_DATA_COUNT,
+    _NORM_INDEX_COMBINE_FWD_COUNT,
+    _NORM_INDEX_COMBINE_BOTH_COUNT,
+    _NORM_INDEX_COMBINE_BACK_COUNT,
+
+    _NORM_INDEX_MIN_NFC_NO_MAYBE,
+    _NORM_INDEX_MIN_NFKC_NO_MAYBE,
+    _NORM_INDEX_MIN_NFD_NO_MAYBE,
+    _NORM_INDEX_MIN_NFKD_NO_MAYBE,
+
+    _NORM_INDEX_FCD_TRIE_INDEX_COUNT,
+    _NORM_INDEX_FCD_TRIE_DATA_COUNT,
+
+    _NORM_INDEX_TOP=16
+};
+
+enum {
+    /* FCD check: everything below this code point is known to have a 0 lead combining class */
+    _NORM_MIN_WITH_LEAD_CC=0x300
+};
+
+/**
+ * Is the normalizer data loaded?
+ * This is used internally before other internal normalizer functions
+ * are called.
+ * It saves this check in each of many normalization calls that
+ * are made for, e.g., collation.
+ *
+ * @param pErrorCode as usual
+ * @return boolean value for whether the normalization data is loaded
+ *
+ * @internal
+ */
+U_CAPI UBool U_EXPORT2
+unorm_haveData(UErrorCode *pErrorCode);
+
+/**
+ * internal API, used by normlzr.cpp
+ * @internal
+ */
+U_CFUNC int32_t
+unorm_decompose(UChar *dest, int32_t destCapacity,
+                const UChar *src, int32_t srcLength,
+                UBool compat, UBool ignoreHangul,
+                GrowBuffer *growBuffer, void *context,
+                UErrorCode *pErrorCode);
+
+/**
+ * internal API, but used by tests
+ * @internal
+ */
+U_CAPI void U_EXPORT2
+unorm_setNewImplementation(UBool useNew);
+
+/**
+ * internal API, but used by tests
+ * @internal
+ */
+U_CAPI UBool U_EXPORT2
+unorm_usesNewImplementation();
+
+#endif
diff --git a/icu4c/source/data/build/Makefile.in b/icu4c/source/data/build/Makefile.in
index 5827fe73131..f167d79926b 100644
--- a/icu4c/source/data/build/Makefile.in
+++ b/icu4c/source/data/build/Makefile.in
@@ -53,7 +53,7 @@ all-local: thaidict.brk build-local
 ##### Define all the data files. the build rule that depends on them is below.
 
 ## DAT files - Misc. data files.
-DAT_FILES=qchk.dat fchk.dat uprops.dat unames.dat cnvalias.dat tz.dat ucadata.dat invuca.dat
+DAT_FILES=qchk.dat fchk.dat uprops.dat unames.dat unorm.dat cnvalias.dat tz.dat ucadata.dat invuca.dat
 TEST_DAT_FILES=$(TESTOBJDATADIR)/test.dat
 
 ## BRK files
@@ -150,6 +150,11 @@ uprops.dat: $(UNICODEDATADIR)/UnicodeData.txt $(UNICODEDATADIR)/Mirror.txt $(TOO
 	@echo Creating uprops.dat
 	@ICU_DATA=. $(INVOKE) $(TOOLDIR)/genprops/genprops -s $(UNICODEDATADIR) -d . -u $(UNICODE_VERSION)
 
+# unorm.dat
+unorm.dat: $(UNICODEDATADIR)/UnicodeData.txt $(UNICODEDATADIR)/DerivedNormalizationProperties.txt $(UNICODEDATADIR)/Mirror.txt $(TOOLDIR)/gennorm/gennorm
+	@echo Creating unorm.dat
+	@ICU_DATA=. $(INVOKE) $(TOOLDIR)/gennorm/gennorm -s $(UNICODEDATADIR) -d . -u $(UNICODE_VERSION)
+
 # ucadata.dat
 ucadata.dat: $(UNICODEDATADIR)/FractionalUCA.txt $(TOOLDIR)/genuca/genuca
 	@echo Creating ucadata.dat and invuca.dat
@@ -205,7 +210,7 @@ endif
 $(TESTOBJDATADIR)/%.res: $(TESTSRCDATADIR)/%.txt $(TOOLDIR)/genrb/genrb
 	@ICU_DATA=. $(INVOKE) $(TOOLDIR)/genrb/genrb -s $(TESTSRCDATADIR) -d $(TESTOBJDATADIR) $(<F)
 
-%.res: $(SRCDATADIR)/%.txt $(TOOLDIR)/genrb/genrb ucadata.dat uprops.dat
+%.res: $(SRCDATADIR)/%.txt $(TOOLDIR)/genrb/genrb ucadata.dat uprops.dat unorm.dat
 	@ICU_DATA=. $(INVOKE) $(TOOLDIR)/genrb/genrb -s $(SRCDATADIR) -d . $(<F)
 
 
@@ -219,7 +224,7 @@ Makefile: $(srcdir)/Makefile.in  $(top_builddir)/config.status
 ###########
 ########### 390 support
 UCMFILES390=ebcdic-xml-us.ucm ibm-37-s390.ucm ibm-1047-s390.ucm ibm-4909.ucm
-ALLFILES390=qchk.dat fchk.dat uprops.dat cnvalias.dat $(UCMFILES390:.ucm=.cnv)
+ALLFILES390=qchk.dat fchk.dat uprops.dat unorm.dat cnvalias.dat $(UCMFILES390:.ucm=.cnv)
 
 icudata390.lst:  $(SRCLISTDEPS)
 	@echo Generating $@ list of 390 data files
diff --git a/icu4c/source/data/build/makedata.mak b/icu4c/source/data/build/makedata.mak
index dda6a3ca25a..5285d62b434 100644
--- a/icu4c/source/data/build/makedata.mak
+++ b/icu4c/source/data/build/makedata.mak
@@ -137,7 +137,7 @@ testdata: ucadata.dat $(RB_FILES) {"$(ICUTOOLS)\genrb\$(CFG)"}genrb.exe
 BRK_FILES = "$(ICUDBLD)\sent.brk" "$(ICUDBLD)\char.brk" "$(ICUDBLD)\line.brk" "$(ICUDBLD)\word.brk" "$(ICUDBLD)\line_th.brk" "$(ICUDBLD)\word_th.brk"
 
 #invoke pkgdata
-"$(DLL_OUTPUT)\$(U_ICUDATA_NAME).dll" :  $(CNV_FILES) $(BRK_FILES) qchk.dat fchk.dat uprops.dat unames.dat cnvalias.dat tz.dat ucadata.dat invuca.dat $(ALL_RES) icudata.res
+"$(DLL_OUTPUT)\$(U_ICUDATA_NAME).dll" :  $(CNV_FILES) $(BRK_FILES) qchk.dat fchk.dat uprops.dat unames.dat unorm.dat cnvalias.dat tz.dat ucadata.dat invuca.dat $(ALL_RES) icudata.res
 	@echo Building icu data
 	@cd "$(ICUDBLD)"
  	"$(ICUTOOLS)\pkgdata\$(CFG)\pkgdata" -e icudata -v -T . -m dll -c -p $(U_ICUDATA_NAME) -O "$(PKGOPT)" -d "$(DLL_OUTPUT)" -s . <<pkgdatain.txt
@@ -145,6 +145,7 @@ qchk.dat
 fchk.dat
 uprops.dat
 unames.dat
+unorm.dat
 cnvalias.dat
 tz.dat
 ucadata.dat
@@ -196,6 +197,7 @@ CLEAN :
 	-@erase "fchk*.*"
 	-@erase "uprops*.*"
 	-@erase "unames*.*"
+	-@erase "unorm*.*"
 	-@erase "cnvalias*.*"
 	-@erase "tz*.*"
 	-@erase "ibm*_cnv.c"
@@ -266,7 +268,7 @@ fchk.dat: "$(ICUDATA)\unidata\FCDCheck.txt" "$(ICUTOOLS)\genfchk\$(CFG)\genfchk.
 unames.dat: {"$(ICUDATA)"}\unidata\UnicodeData.txt "$(ICUTOOLS)\gennames\$(CFG)\gennames.exe"
 	@echo Creating data file for Unicode Names
 	@set ICU_DATA=$(ICUDBLD)
-	@"$(ICUTOOLS)\gennames\$(CFG)\gennames" $(ICUDATA)\unidata\UnicodeData.txt
+	@"$(ICUTOOLS)\gennames\$(CFG)\gennames" -1 $(ICUDATA)\unidata\UnicodeData.txt
 
 # Targets for uprops.dat
 uprops.dat: "$(ICUDATA)\unidata\UnicodeData.txt" "$(ICUTOOLS)\genprops\$(CFG)\genprops.exe"
@@ -274,6 +276,12 @@ uprops.dat: "$(ICUDATA)\unidata\UnicodeData.txt" "$(ICUTOOLS)\genprops\$(CFG)\ge
 	@set ICU_DATA=$(ICUDBLD)
 	@"$(ICUTOOLS)\genprops\$(CFG)\genprops" -s "$(ICUDATA)\unidata"
 
+# Targets for unorm.dat
+unorm.dat: "$(ICUDATA)\unidata\UnicodeData.txt" "$(ICUDATA)\unidata\DerivedNormalizationProperties.txt" "$(ICUTOOLS)\gennorm\$(CFG)\gennorm.exe"
+	@echo Creating data file for Unicode Normalization
+	@set ICU_DATA=$(ICUDBLD)
+	@"$(ICUTOOLS)\gennorm\$(CFG)\gennorm" -s "$(ICUDATA)\unidata"
+
 # Targets for converters
 cnvalias.dat : {"$(ICUDATA)"}\convrtrs.txt "$(ICUTOOLS)\gencnval\$(CFG)\gencnval.exe"
 	@echo Creating data file for Converter Aliases
@@ -294,18 +302,18 @@ ucadata.dat: "$(ICUDATA)\unidata\FractionalUCA.txt" "$(ICUTOOLS)\genuca\$(CFG)\g
 
 invuca.dat: ucadata.dat
 
-{"$(ICUTOOLS)\genrb\$(CFG)"}genrb.exe : ucadata.dat qchk.dat fchk.dat uprops.dat
+{"$(ICUTOOLS)\genrb\$(CFG)"}genrb.exe : ucadata.dat qchk.dat fchk.dat uprops.dat unorm.dat
 
-ucadata.dat : uprops.dat qchk.dat fchk.dat uprops.dat
+ucadata.dat : uprops.dat qchk.dat fchk.dat unorm.dat
 
 # Dependencies on the tools
 convrtrs.txt : {"$(ICUTOOLS)\gencnval\$(CFG)"}gencnval.exe
 
 tz.txt : {"$(ICUTOOLS)\gentz\$(CFG)"}gentz.exe
 
-uprops.dat unames.dat cnvalias.dat tz.dat ucadata.dat invuca.dat: {"$(ICUTOOLS)\genccode\$(CFG)"}genccode.exe
+uprops.dat unames.dat unorm.dat cnvalias.dat tz.dat ucadata.dat invuca.dat: {"$(ICUTOOLS)\genccode\$(CFG)"}genccode.exe
 
 
-$(TRANSLIT_SOURCE) $(GENRB_SOURCE) : {"$(ICUTOOLS)\genrb\$(CFG)"}genrb.exe ucadata.dat qchk.dat fchk.dat uprops.dat
+$(TRANSLIT_SOURCE) $(GENRB_SOURCE) : {"$(ICUTOOLS)\genrb\$(CFG)"}genrb.exe ucadata.dat qchk.dat fchk.dat uprops.dat unorm.dat
 
 $(UCM_SOURCE) : {"$(ICUTOOLS)\makeconv\$(CFG)"}makeconv.exe {"$(ICUTOOLS)\genccode\$(CFG)"}genccode.exe
diff --git a/icu4c/source/data/makedata.mak b/icu4c/source/data/makedata.mak
index dda6a3ca25a..5285d62b434 100644
--- a/icu4c/source/data/makedata.mak
+++ b/icu4c/source/data/makedata.mak
@@ -137,7 +137,7 @@ testdata: ucadata.dat $(RB_FILES) {"$(ICUTOOLS)\genrb\$(CFG)"}genrb.exe
 BRK_FILES = "$(ICUDBLD)\sent.brk" "$(ICUDBLD)\char.brk" "$(ICUDBLD)\line.brk" "$(ICUDBLD)\word.brk" "$(ICUDBLD)\line_th.brk" "$(ICUDBLD)\word_th.brk"
 
 #invoke pkgdata
-"$(DLL_OUTPUT)\$(U_ICUDATA_NAME).dll" :  $(CNV_FILES) $(BRK_FILES) qchk.dat fchk.dat uprops.dat unames.dat cnvalias.dat tz.dat ucadata.dat invuca.dat $(ALL_RES) icudata.res
+"$(DLL_OUTPUT)\$(U_ICUDATA_NAME).dll" :  $(CNV_FILES) $(BRK_FILES) qchk.dat fchk.dat uprops.dat unames.dat unorm.dat cnvalias.dat tz.dat ucadata.dat invuca.dat $(ALL_RES) icudata.res
 	@echo Building icu data
 	@cd "$(ICUDBLD)"
  	"$(ICUTOOLS)\pkgdata\$(CFG)\pkgdata" -e icudata -v -T . -m dll -c -p $(U_ICUDATA_NAME) -O "$(PKGOPT)" -d "$(DLL_OUTPUT)" -s . <<pkgdatain.txt
@@ -145,6 +145,7 @@ qchk.dat
 fchk.dat
 uprops.dat
 unames.dat
+unorm.dat
 cnvalias.dat
 tz.dat
 ucadata.dat
@@ -196,6 +197,7 @@ CLEAN :
 	-@erase "fchk*.*"
 	-@erase "uprops*.*"
 	-@erase "unames*.*"
+	-@erase "unorm*.*"
 	-@erase "cnvalias*.*"
 	-@erase "tz*.*"
 	-@erase "ibm*_cnv.c"
@@ -266,7 +268,7 @@ fchk.dat: "$(ICUDATA)\unidata\FCDCheck.txt" "$(ICUTOOLS)\genfchk\$(CFG)\genfchk.
 unames.dat: {"$(ICUDATA)"}\unidata\UnicodeData.txt "$(ICUTOOLS)\gennames\$(CFG)\gennames.exe"
 	@echo Creating data file for Unicode Names
 	@set ICU_DATA=$(ICUDBLD)
-	@"$(ICUTOOLS)\gennames\$(CFG)\gennames" $(ICUDATA)\unidata\UnicodeData.txt
+	@"$(ICUTOOLS)\gennames\$(CFG)\gennames" -1 $(ICUDATA)\unidata\UnicodeData.txt
 
 # Targets for uprops.dat
 uprops.dat: "$(ICUDATA)\unidata\UnicodeData.txt" "$(ICUTOOLS)\genprops\$(CFG)\genprops.exe"
@@ -274,6 +276,12 @@ uprops.dat: "$(ICUDATA)\unidata\UnicodeData.txt" "$(ICUTOOLS)\genprops\$(CFG)\ge
 	@set ICU_DATA=$(ICUDBLD)
 	@"$(ICUTOOLS)\genprops\$(CFG)\genprops" -s "$(ICUDATA)\unidata"
 
+# Targets for unorm.dat
+unorm.dat: "$(ICUDATA)\unidata\UnicodeData.txt" "$(ICUDATA)\unidata\DerivedNormalizationProperties.txt" "$(ICUTOOLS)\gennorm\$(CFG)\gennorm.exe"
+	@echo Creating data file for Unicode Normalization
+	@set ICU_DATA=$(ICUDBLD)
+	@"$(ICUTOOLS)\gennorm\$(CFG)\gennorm" -s "$(ICUDATA)\unidata"
+
 # Targets for converters
 cnvalias.dat : {"$(ICUDATA)"}\convrtrs.txt "$(ICUTOOLS)\gencnval\$(CFG)\gencnval.exe"
 	@echo Creating data file for Converter Aliases
@@ -294,18 +302,18 @@ ucadata.dat: "$(ICUDATA)\unidata\FractionalUCA.txt" "$(ICUTOOLS)\genuca\$(CFG)\g
 
 invuca.dat: ucadata.dat
 
-{"$(ICUTOOLS)\genrb\$(CFG)"}genrb.exe : ucadata.dat qchk.dat fchk.dat uprops.dat
+{"$(ICUTOOLS)\genrb\$(CFG)"}genrb.exe : ucadata.dat qchk.dat fchk.dat uprops.dat unorm.dat
 
-ucadata.dat : uprops.dat qchk.dat fchk.dat uprops.dat
+ucadata.dat : uprops.dat qchk.dat fchk.dat unorm.dat
 
 # Dependencies on the tools
 convrtrs.txt : {"$(ICUTOOLS)\gencnval\$(CFG)"}gencnval.exe
 
 tz.txt : {"$(ICUTOOLS)\gentz\$(CFG)"}gentz.exe
 
-uprops.dat unames.dat cnvalias.dat tz.dat ucadata.dat invuca.dat: {"$(ICUTOOLS)\genccode\$(CFG)"}genccode.exe
+uprops.dat unames.dat unorm.dat cnvalias.dat tz.dat ucadata.dat invuca.dat: {"$(ICUTOOLS)\genccode\$(CFG)"}genccode.exe
 
 
-$(TRANSLIT_SOURCE) $(GENRB_SOURCE) : {"$(ICUTOOLS)\genrb\$(CFG)"}genrb.exe ucadata.dat qchk.dat fchk.dat uprops.dat
+$(TRANSLIT_SOURCE) $(GENRB_SOURCE) : {"$(ICUTOOLS)\genrb\$(CFG)"}genrb.exe ucadata.dat qchk.dat fchk.dat uprops.dat unorm.dat
 
 $(UCM_SOURCE) : {"$(ICUTOOLS)\makeconv\$(CFG)"}makeconv.exe {"$(ICUTOOLS)\genccode\$(CFG)"}genccode.exe
diff --git a/icu4c/source/tools/Makefile.in b/icu4c/source/tools/Makefile.in
index 7ebc1002f89..5d64d1af77e 100644
--- a/icu4c/source/tools/Makefile.in
+++ b/icu4c/source/tools/Makefile.in
@@ -59,7 +59,7 @@ VERSION = @VERSION@
 
 
 SUBDIRS = ctestfw toolutil makeconv genrb genuca \
-genccode genqchk genfchk genprops gennames gencmn gencnval gentz gentest pkgdata
+genccode genqchk genfchk genprops gennames gennorm gencmn gencnval gentz gentest pkgdata
 
 ## List of phony targets
 .PHONY : all all-local all-recursive install install-local install-files install-dlls build-cmnfile build-dll		\
diff --git a/icu4c/source/tools/gennorm/.cvsignore b/icu4c/source/tools/gennorm/.cvsignore
new file mode 100644
index 00000000000..f52ef0d39af
--- /dev/null
+++ b/icu4c/source/tools/gennorm/.cvsignore
@@ -0,0 +1,8 @@
+tmp
+Debug
+Release
+Makefile
+*.d
+*.pdb
+*.plg
+gennorm
diff --git a/icu4c/source/tools/gennorm/Makefile.in b/icu4c/source/tools/gennorm/Makefile.in
new file mode 100644
index 00000000000..ad79c26a0eb
--- /dev/null
+++ b/icu4c/source/tools/gennorm/Makefile.in
@@ -0,0 +1,94 @@
+## Makefile.in for ICU - tools/gennorm
+## Copyright (c) 2001, International Business Machines Corporation and
+## others. All Rights Reserved.
+## Steven R. Loomis/Markus W. Scherer
+
+## Source directory information
+srcdir = @srcdir@
+top_srcdir = @top_srcdir@
+
+top_builddir = ../..
+
+include $(top_builddir)/icudefs.mk
+
+## Platform-specific setup
+include @platform_make_fragment@
+
+##
+
+## Build directory information
+subdir = tools/gennorm
+
+ICUDATADIR=$(top_builddir)/data
+UNICODEDATADIR=$(top_srcdir)/../data/unidata
+
+## Extra files to remove for 'make clean'
+CLEANFILES = *~ $(DEPS) $(RES_FILES) $(TEST_FILES)
+
+## Target information
+TARGET = gennorm
+
+DEFS = @DEFS@
+CPPFLAGS = @CPPFLAGS@ -I$(top_builddir)/common -I$(top_srcdir)/common -I$(srcdir)/../toolutil
+CFLAGS = @CFLAGS@
+CXXFLAGS = @CXXFLAGS@
+ENABLE_RPATH = @ENABLE_RPATH@
+ifeq ($(ENABLE_RPATH),YES)
+RPATHLDFLAGS = $(LD_RPATH)$(LD_RPATH_PRE)$(libdir)
+endif
+LDFLAGS = @LDFLAGS@ $(RPATHLDFLAGS)
+INVOKE = $(LDLIBRARYPATH_ENVVAR)=$(top_builddir)/common:$(top_builddir)/tools/toolutil:$$$(LDLIBRARYPATH_ENVVAR)
+LIBS = $(LIBICUTOOLUTIL) @LIBS@ @LIB_M@
+
+OBJECTS = gennorm.o store.o
+
+DEPS = $(OBJECTS:.o=.d)
+
+## List of phony targets
+.PHONY : all all-local install install-local clean clean-local		\
+distclean distclean-local dist dist-local check	\
+check-local build-data
+
+## Clear suffix list
+.SUFFIXES :
+
+## List of standard targets
+all: all-local
+install: install-local
+clean: clean-local
+distclean : distclean-local
+dist: dist-local
+check: all check-local
+
+all-local: $(TARGET) build-data
+
+install-local: all-local
+	$(MKINSTALLDIRS) $(DESTDIR)$(sbindir)
+	$(INSTALL) $(TARGET) $(DESTDIR)$(sbindir)/$(TARGET)
+
+dist-local:
+
+clean-local:
+	test -z "$(CLEANFILES)" || $(RMV) $(CLEANFILES)
+	$(RMV) $(TARGET) $(OBJECTS)
+
+distclean-local: clean-local
+	$(RMV) Makefile
+
+check-local: all-local
+
+Makefile: $(srcdir)/Makefile.in  $(top_builddir)/config.status
+	cd $(top_builddir) \
+	 && CONFIG_FILES=$(subdir)/$@ CONFIG_HEADERS= $(SHELL) ./config.status
+
+$(TARGET) : $(OBJECTS)
+	$(LINK.cc) -o $@ $^ $(LIBS) 
+
+ifeq (,$(MAKECMDGOALS))
+-include $(DEPS)
+else
+ifneq ($(patsubst %clean,,$(MAKECMDGOALS)),)
+-include $(DEPS)
+endif
+endif
+
diff --git a/icu4c/source/tools/gennorm/gennorm.c b/icu4c/source/tools/gennorm/gennorm.c
new file mode 100644
index 00000000000..96e69c710b2
--- /dev/null
+++ b/icu4c/source/tools/gennorm/gennorm.c
@@ -0,0 +1,471 @@
+/*
+*******************************************************************************
+*
+*   Copyright (C) 2001, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+*******************************************************************************
+*   file name:  gennorm.c
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2001may25
+*   created by: Markus W. Scherer
+*
+*   This program reads the Unicode character database text file,
+*   parses it, and extracts the data for normalization.
+*   It then preprocesses it and writes a binary file for efficient use
+*   in various Unicode text normalization processes.
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "unicode/utypes.h"
+#include "unicode/uchar.h"
+#include "unicode/putil.h"
+#include "cmemory.h"
+#include "cstring.h"
+#include "unicode/udata.h"
+#include "unewdata.h"
+#include "uoptions.h"
+#include "uparse.h"
+#include "unormimp.h"
+
+U_CDECL_BEGIN
+#include "gennorm.h"
+U_CDECL_END
+
+UBool beVerbose=FALSE, haveCopyright=TRUE;
+
+/* prototypes --------------------------------------------------------------- */
+
+static void
+parseDerivedNormalizationProperties(const char *filename, UErrorCode *pErrorCode);
+
+static void
+parseDB(const char *filename, UErrorCode *pErrorCode);
+
+/* -------------------------------------------------------------------------- */
+
+static UOption options[]={
+    UOPTION_HELP_H,
+    UOPTION_HELP_QUESTION_MARK,
+    UOPTION_VERBOSE,
+    UOPTION_COPYRIGHT,
+    UOPTION_DESTDIR,
+    UOPTION_SOURCEDIR,
+    { "unicode", NULL, NULL, NULL, 'u', UOPT_REQUIRES_ARG, 0 }
+};
+
+extern int
+main(int argc, char* argv[]) {
+    char filename[300];
+    const char *srcDir=NULL, *destDir=NULL, *suffix=NULL;
+    char *basename=NULL;
+    UErrorCode errorCode=U_ZERO_ERROR;
+
+    /* preset then read command line options */
+    options[4].value=u_getDataDirectory();
+    options[5].value="";
+    options[6].value="3.0.0";
+    argc=u_parseArgs(argc, argv, sizeof(options)/sizeof(options[0]), options);
+
+    /* error handling, printing usage message */
+    if(argc<0) {
+        fprintf(stderr,
+            "error in command line argument \"%s\"\n",
+            argv[-argc]);
+    }
+    if(argc<0 || options[0].doesOccur || options[1].doesOccur) {
+        fprintf(stderr,
+            "usage: %s [-options] [suffix]\n"
+            "\tread the UnicodeData.txt file and other Unicode properties files and\n"
+            "\tcreate a binary file " DATA_NAME "." DATA_TYPE " with the character properties\n"
+            "\toptions:\n"
+            "\t\t-h or -? or --help  this usage text\n"
+            "\t\t-v or --verbose     verbose output\n"
+            "\t\t-c or --copyright   include a copyright notice\n"
+            "\t\t-d or --destdir     destination directory, followed by the path\n"
+            "\t\t-s or --sourcedir   source directory, followed by the path\n"
+            "\t\t-u or --unicode     Unicode version, followed by the version like 3.0.0\n"
+            "\t\tsuffix              suffix that is to be appended with a '-'\n"
+            "\t\t                    to the source file basenames before opening;\n"
+            "\t\t                    'gennorm new' will read UnicodeData-new.txt etc.\n",
+            argv[0]);
+        return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
+    }
+
+    /* get the options values */
+    beVerbose=options[2].doesOccur;
+    haveCopyright=options[3].doesOccur;
+    srcDir=options[5].value;
+    destDir=options[4].value;
+
+    if(argc>=2) {
+        suffix=argv[1];
+    } else {
+        suffix=NULL;
+    }
+
+    setUnicodeVersion(options[6].value);
+
+    /* prepare the filename beginning with the source dir */
+    uprv_strcpy(filename, srcDir);
+    basename=filename+uprv_strlen(filename);
+    if(basename>filename && *(basename-1)!=U_FILE_SEP_CHAR) {
+        *basename++=U_FILE_SEP_CHAR;
+    }
+
+    /* initialize */
+    init();
+
+    /* process DerivedNormalizationProperties.txt (quick check flags) */
+    if(suffix==NULL) {
+        uprv_strcpy(basename, "DerivedNormalizationProperties.txt");
+    } else {
+        uprv_strcpy(basename, "DerivedNormalizationProperties");
+        basename[30]='-';
+        uprv_strcpy(basename+31, suffix);
+        uprv_strcat(basename+31, ".txt");
+    }
+    parseDerivedNormalizationProperties(filename, &errorCode);
+
+    /* process UnicodeData.txt */
+    if(suffix==NULL) {
+        uprv_strcpy(basename, "UnicodeData.txt");
+    } else {
+        uprv_strcpy(basename, "UnicodeData");
+        basename[11]='-';
+        uprv_strcpy(basename+12, suffix);
+        uprv_strcat(basename+12, ".txt");
+    }
+    parseDB(filename, &errorCode);
+
+    /* process parsed data */
+    if(U_SUCCESS(errorCode)) {
+        processData();
+
+        /* write the properties data file */
+        generateData(destDir);
+    }
+
+    return errorCode;
+}
+
+/* parsing helpers ---------------------------------------------------------- */
+
+static const char *
+skipWhitespace(const char *s) {
+    while(*s==' ' || *s=='\t') {
+        ++s;
+    }
+    return s;
+}
+
+/*
+ * parse a list of code points
+ * store them as a UTF-32 string in dest[destCapacity] with the string length in dest[0]
+ * set the first code point in *pFirst
+ * return the number of code points
+ */
+static int32_t
+parseCodePoints(const char *s,
+                uint32_t *dest, int32_t destCapacity,
+                UErrorCode *pErrorCode) {
+    char *end;
+    uint32_t value;
+    int32_t count;
+
+    count=0;
+    for(;;) {
+        s=skipWhitespace(s);
+        if(*s==';' || *s==0) {
+            return count;
+        }
+
+        /* read one code point */
+        value=(uint32_t)uprv_strtoul(s, &end, 16);
+        if(end<=s || (*end!=' ' && *end!='\t' && *end!=';') || value>=0x110000) {
+            fprintf(stderr, "gennorm: syntax error parsing code point at %s\n", s);
+            *pErrorCode=U_PARSE_ERROR;
+            return -1;
+        }
+
+        /* overflow? */
+        if(count>=destCapacity) {
+            fprintf(stderr, "gennorm: code point sequence too long at at %s\n", s);
+            *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
+            return -1;
+        }
+
+        /* append it to the destination array */
+        dest[count++]=value;
+
+        /* go to the following characters */
+        s=end;
+    }
+}
+
+/* read a range like start or start..end */
+static int32_t
+parseCodePointRange(const char *s,
+                    uint32_t *pStart, uint32_t *pEnd,
+                    UErrorCode *pErrorCode) {
+    char *end;
+    uint32_t value;
+
+    s=skipWhitespace(s);
+    if(*s==';' || *s==0) {
+        fprintf(stderr, "gennorm: syntax error parsing range at %s - empty field\n", s);
+        *pErrorCode=U_PARSE_ERROR;
+        return -1;
+    }
+
+    /* read the start code point */
+    value=(uint32_t)uprv_strtoul(s, &end, 16);
+    if(end<=s || (*end!=' ' && *end!='\t' && *end!='.' && *end!=';') || value>=0x110000) {
+        fprintf(stderr, "gennorm: syntax error parsing range start code point at %s\n", s);
+        *pErrorCode=U_PARSE_ERROR;
+        return -1;
+    }
+    *pStart=*pEnd=value;
+
+    /* is there a "..end"? */
+    s=skipWhitespace(end);
+    if(*s==';' || *s==0) {
+        return 1;
+    }
+
+    if(*s!='.' || s[1]!='.') {
+        fprintf(stderr, "gennorm: syntax error parsing range at %s\n", s);
+        *pErrorCode=U_PARSE_ERROR;
+        return -1;
+    }
+    s+=2;
+
+    /* read the end code point */
+    value=(uint32_t)uprv_strtoul(s, &end, 16);
+    if(end<=s || (*end!=' ' && *end!='\t' && *end!=';') || value>=0x110000) {
+        fprintf(stderr, "gennorm: syntax error parsing range end code point at %s\n", s);
+        *pErrorCode=U_PARSE_ERROR;
+        return -1;
+    }
+    *pEnd=value;
+
+    /* is this a valid range? */
+    if(value<*pStart) {
+        fprintf(stderr, "gennorm: syntax error parsing range at %s - not a valid range\n", s);
+        *pErrorCode=U_PARSE_ERROR;
+        return -1;
+    }
+
+    /* no garbage after that? */
+    s=skipWhitespace(end);
+    if(*s==';' || *s==0) {
+        return value-*pStart+1;
+    } else {
+        fprintf(stderr, "gennorm: syntax error parsing range at %s\n", s);
+        *pErrorCode=U_PARSE_ERROR;
+        return -1;
+    }
+}
+
+/* parser for DerivedNormalizationProperties.txt ---------------------------- */
+
+static void
+derivedNormalizationPropertiesLineFn(void *context,
+                                     char *fields[][2], int32_t fieldCount,
+                                     UErrorCode *pErrorCode) {
+    char *s;
+    uint32_t start, end;
+    int32_t count;
+    uint8_t qcFlags;
+
+    /* get code point range */
+    count=parseCodePointRange(fields[0][0], &start, &end, pErrorCode);
+    if(U_FAILURE(*pErrorCode)) {
+        fprintf(stderr, "gennorm: error parsing DerivedNormalizationProperties.txt mapping at %s\n", fields[0][0]);
+        exit(*pErrorCode);
+    }
+
+    /* ignore hangul - handle explicitly */
+    if(start==0xac00) {
+        return;
+    }
+
+    /* get property - ignore unrecognized ones */
+    s=(char *)skipWhitespace(fields[1][0]);
+    if(*s=='N' && s[1]=='F') {
+        qcFlags=0x11;
+        s+=2;
+        if(*s=='K') {
+            qcFlags<<=1;
+            ++s;
+        }
+
+        if(*s=='C' && s[1]=='_') {
+            s+=2;
+        } else if(*s=='D' && s[1]=='_') {
+            qcFlags<<=2;
+            s+=2;
+        } else {
+            return;
+        }
+
+        if(0==uprv_memcmp(s, "NO", 2)) {
+            qcFlags&=0xf;
+        } else if(0==uprv_memcmp(s, "MAYBE", 5)) {
+            qcFlags&=0x30;
+        } else {
+            return;
+        }
+
+        /* set this flag for all code points in this range */
+        while(start<=end) {
+            setQCFlags(start++, qcFlags);
+        }
+    } else if(0==uprv_memcmp(s, "Comp_Ex", 7)) {
+        while(start<=end) {
+            setCompositionExclusion(start++);
+        }
+    }
+}
+
+static void
+parseDerivedNormalizationProperties(const char *filename, UErrorCode *pErrorCode) {
+    char *fields[2][2];
+
+    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
+        return;
+    }
+
+    u_parseDelimitedFile(filename, ';', fields, 2, derivedNormalizationPropertiesLineFn, NULL, pErrorCode);
+}
+
+/* parser for UnicodeData.txt ----------------------------------------------- */
+
+static void
+unicodeDataLineFn(void *context,
+                  char *fields[][2], int32_t fieldCount,
+                  UErrorCode *pErrorCode) {
+    uint32_t decomp[40];
+    Norm norm;
+    const char *s;
+    char *end;
+    uint32_t code, value;
+    int32_t length;
+    UBool isCompat, something=FALSE;
+
+    /* ignore First and Last entries for ranges */
+    if( *fields[1][0]=='<' &&
+        (length=(fields[1][1]-fields[1][0]))>=9 &&
+        (0==uprv_memcmp(", First>", fields[1][1]-8, 8) || 0==uprv_memcmp(", Last>", fields[1][1]-7, 7))
+    ) {
+        return;
+    }
+
+    /* reset the properties */
+    uprv_memset(&norm, 0, sizeof(Norm));
+
+    /* get the character code, field 0 */
+    code=(uint32_t)uprv_strtoul(fields[0][0], &end, 16);
+    if(end<=fields[0][0] || end!=fields[0][1]) {
+        fprintf(stderr, "gennorm: syntax error in field 0 at %s\n", fields[0][0]);
+        *pErrorCode=U_PARSE_ERROR;
+        exit(U_PARSE_ERROR);
+    }
+
+    /* get canonical combining class, field 3 */
+    value=(uint32_t)uprv_strtoul(fields[3][0], &end, 10);
+    if(end<=fields[3][0] || end!=fields[3][1] || value>0xff) {
+        fprintf(stderr, "gennorm: syntax error in field 3 at %s\n", fields[0][0]);
+        *pErrorCode=U_PARSE_ERROR;
+        exit(U_PARSE_ERROR);
+    }
+    if(value>0) {
+        norm.udataCC=(uint8_t)value;
+        something=TRUE;
+    }
+
+    /* get the decomposition, field 5 */
+    if(fields[5][0]<fields[5][1]) {
+        if(*(s=fields[5][0])=='<') {
+            ++s;
+            isCompat=TRUE;
+
+            /* skip and ignore the compatibility type name */
+            do {
+                if(s==fields[5][1]) {
+                    /* missing '>' */
+                    fprintf(stderr, "gennorm: syntax error in field 5 at %s\n", fields[0][0]);
+                    *pErrorCode=U_PARSE_ERROR;
+                    exit(U_PARSE_ERROR);
+                }
+            } while(*s++!='>');
+        } else {
+            isCompat=FALSE;
+        }
+
+        /* parse the decomposition string */
+        length=parseCodePoints(s, decomp, sizeof(decomp)/4, pErrorCode);
+        if(U_FAILURE(*pErrorCode)) {
+            exit(*pErrorCode);
+        }
+
+        /* store the string */
+        if(length>0) {
+            something=TRUE;
+            if(isCompat) {
+                norm.lenNFKD=(uint8_t)length;
+                norm.nfkd=decomp;
+            } else {
+                if(length>2) {
+                    fprintf(stderr, "gennorm: error - length of NFD(U+%04lx) = %ld >2 in UnicodeData - illegal\n",
+                            code, length);
+                    *pErrorCode=U_PARSE_ERROR;
+                    exit(U_PARSE_ERROR);
+                }
+                norm.lenNFD=(uint8_t)length;
+                norm.nfd=decomp;
+            }
+        }
+    }
+
+    /* check for non-character code points */
+    if((code&0xfffe)==0xfffe || (uint32_t)(code-0xfdd0)<0x20 || code>0x10ffff) {
+        fprintf(stderr, "gennorm: error - properties for non-character code point U+%04lx\n",
+                code);
+        *pErrorCode=U_PARSE_ERROR;
+        exit(U_PARSE_ERROR);
+    }
+
+    if(something) {
+        /* there are normalization values, so store them */
+        if(beVerbose) {
+            printf("store values for U+%04lx: cc=%d, lenNFD=%ld, lenNFKD=%ld\n",
+                   code, norm.udataCC, norm.lenNFD, norm.lenNFKD);
+        }
+        storeNorm(code, &norm);
+    }
+}
+
+static void
+parseDB(const char *filename, UErrorCode *pErrorCode) {
+    char *fields[15][2];
+
+    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
+        return;
+    }
+
+    u_parseDelimitedFile(filename, ';', fields, 15, unicodeDataLineFn, NULL, pErrorCode);
+}
+
+/*
+ * Hey, Emacs, please set the following:
+ *
+ * Local Variables:
+ * indent-tabs-mode: nil
+ * End:
+ *
+ */
diff --git a/icu4c/source/tools/gennorm/gennorm.dsp b/icu4c/source/tools/gennorm/gennorm.dsp
new file mode 100644
index 00000000000..578611f26da
--- /dev/null
+++ b/icu4c/source/tools/gennorm/gennorm.dsp
@@ -0,0 +1,128 @@
+# Microsoft Developer Studio Project File - Name="gennorm" - Package Owner=<4>
+# Microsoft Developer Studio Generated Build File, Format Version 6.00
+# ** DO NOT EDIT **
+
+# TARGTYPE "Win32 (x86) Console Application" 0x0103
+
+CFG=gennorm - Win32 Debug
+!MESSAGE This is not a valid makefile. To build this project using NMAKE,
+!MESSAGE use the Export Makefile command and run
+!MESSAGE 
+!MESSAGE NMAKE /f "gennorm.mak".
+!MESSAGE 
+!MESSAGE You can specify a configuration when running NMAKE
+!MESSAGE by defining the macro CFG on the command line. For example:
+!MESSAGE 
+!MESSAGE NMAKE /f "gennorm.mak" CFG="gennorm - Win32 Debug"
+!MESSAGE 
+!MESSAGE Possible choices for configuration are:
+!MESSAGE 
+!MESSAGE "gennorm - Win32 Release" (based on "Win32 (x86) Console Application")
+!MESSAGE "gennorm - Win32 Debug" (based on "Win32 (x86) Console Application")
+!MESSAGE 
+
+# Begin Project
+# PROP AllowPerConfigDependencies 0
+# PROP Scc_ProjName ""
+# PROP Scc_LocalPath ""
+CPP=cl.exe
+RSC=rc.exe
+
+!IF  "$(CFG)" == "gennorm - Win32 Release"
+
+# PROP BASE Use_MFC 0
+# PROP BASE Use_Debug_Libraries 0
+# PROP BASE Output_Dir "Release"
+# PROP BASE Intermediate_Dir "Release"
+# PROP BASE Target_Dir ""
+# PROP Use_MFC 0
+# PROP Use_Debug_Libraries 0
+# PROP Output_Dir "Release"
+# PROP Intermediate_Dir "Release"
+# PROP Ignore_Export_Lib 0
+# PROP Target_Dir ""
+# ADD BASE CPP /nologo /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /c
+# ADD CPP /nologo /MD /Za /W3 /GX /O2 /I "..\toolutil" /I "..\..\common" /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /c
+# ADD BASE RSC /l 0x409 /d "NDEBUG"
+# ADD RSC /l 0x409 /d "NDEBUG"
+BSC32=bscmake.exe
+# ADD BASE BSC32 /nologo
+# ADD BSC32 /nologo
+LINK32=link.exe
+# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /machine:I386
+# ADD LINK32 icutu.lib /nologo /subsystem:console /machine:I386 /libpath:"..\toolutil\Release" /libpath:"..\..\..\lib\Release" /libpath:"..\..\..\lib"
+# Begin Custom Build
+InputPath=.\Release\gennorm.exe
+InputName=gennorm
+SOURCE="$(InputPath)"
+
+"..\..\..\bin\$(InputName).exe" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
+	copy $(InputPath) ..\..\..\bin
+
+# End Custom Build
+
+!ELSEIF  "$(CFG)" == "gennorm - Win32 Debug"
+
+# PROP BASE Use_MFC 0
+# PROP BASE Use_Debug_Libraries 1
+# PROP BASE Output_Dir "Debug"
+# PROP BASE Intermediate_Dir "Debug"
+# PROP BASE Target_Dir ""
+# PROP Use_MFC 0
+# PROP Use_Debug_Libraries 1
+# PROP Output_Dir "Debug"
+# PROP Intermediate_Dir "Debug"
+# PROP Ignore_Export_Lib 0
+# PROP Target_Dir ""
+# ADD BASE CPP /nologo /W3 /Gm /GX /ZI /Od /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /GZ /c
+# ADD CPP /nologo /MDd /Za /W3 /Gm /GX /ZI /Od /I "..\toolutil" /I "..\..\common" /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /GZ /c
+# ADD BASE RSC /l 0x409 /d "_DEBUG"
+# ADD RSC /l 0x409 /d "_DEBUG"
+BSC32=bscmake.exe
+# ADD BASE BSC32 /nologo
+# ADD BSC32 /nologo
+LINK32=link.exe
+# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /debug /machine:I386 /pdbtype:sept
+# ADD LINK32 icutud.lib /nologo /subsystem:console /debug /machine:I386 /pdbtype:sept /libpath:"..\toolutil\Debug" /libpath:"..\..\..\lib\Debug" /libpath:"..\..\..\lib"
+# Begin Custom Build
+InputPath=.\Debug\gennorm.exe
+InputName=gennorm
+SOURCE="$(InputPath)"
+
+"..\..\..\bin\$(InputName).exe" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
+	copy $(InputPath) ..\..\..\bin
+
+# End Custom Build
+
+!ENDIF 
+
+# Begin Target
+
+# Name "gennorm - Win32 Release"
+# Name "gennorm - Win32 Debug"
+# Begin Group "Source Files"
+
+# PROP Default_Filter "c;cpp;rc"
+# Begin Source File
+
+SOURCE=.\gennorm.c
+# End Source File
+# Begin Source File
+
+SOURCE=.\store.c
+# End Source File
+# End Group
+# Begin Group "Header Files"
+
+# PROP Default_Filter "h"
+# Begin Source File
+
+SOURCE=.\gennorm.h
+# End Source File
+# End Group
+# Begin Group "Resource Files"
+
+# PROP Default_Filter "ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe"
+# End Group
+# End Target
+# End Project
diff --git a/icu4c/source/tools/gennorm/gennorm.h b/icu4c/source/tools/gennorm/gennorm.h
new file mode 100644
index 00000000000..e15caaf2d71
--- /dev/null
+++ b/icu4c/source/tools/gennorm/gennorm.h
@@ -0,0 +1,63 @@
+/*
+*******************************************************************************
+*
+*   Copyright (C) 1999-2001, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+*******************************************************************************
+*   file name:  genprops.h
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 1999dec13
+*   created by: Markus W. Scherer
+*/
+
+#ifndef __GENPROPS_H__
+#define __GENPROPS_H__
+
+#include "unicode/utypes.h"
+
+/* file definitions */
+#define DATA_NAME "unorm"
+#define DATA_TYPE "dat"
+
+/*
+ * data structure that holds the normalization properties for one or more
+ * code point(s) at build time
+ */
+typedef struct Norm {
+    uint8_t udataCC, lenNFD, lenNFKD;
+    uint8_t qcFlags, combiningFlags;
+    uint16_t canonBothCCs, compatBothCCs, combiningIndex, specialTag;
+    uint32_t *nfd, *nfkd;
+} Norm;
+
+/* global flags */
+extern UBool beVerbose, haveCopyright;
+
+/* prototypes */
+extern void
+setUnicodeVersion(const char *v);
+
+extern void
+init(void);
+
+extern void
+storeNorm(uint32_t code, Norm *norm);
+
+extern void
+setQCFlags(uint32_t code, uint8_t qcFlags);
+
+extern void
+setCompositionExclusion(uint32_t code);
+
+extern void
+processData(void);
+
+extern void
+generateData(const char *dataDir);
+
+#endif
+
diff --git a/icu4c/source/tools/gennorm/store.c b/icu4c/source/tools/gennorm/store.c
new file mode 100644
index 00000000000..2b42998b21f
--- /dev/null
+++ b/icu4c/source/tools/gennorm/store.c
@@ -0,0 +1,1428 @@
+/*
+*******************************************************************************
+*
+*   Copyright (C) 1999-2001, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+*******************************************************************************
+*   file name:  store.c
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2001may25
+*   created by: Markus W. Scherer
+*
+*   Store Unicode normalization data in a memory-mappable file.
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "unicode/utypes.h"
+#include "unicode/uchar.h"
+#include "cmemory.h"
+#include "cstring.h"
+#include "filestrm.h"
+#include "unicode/udata.h"
+#include "unewdata.h"
+#include "unormimp.h"
+#include "gennorm.h"
+
+#define DO_DEBUG_OUT 0
+
+/* file data ---------------------------------------------------------------- */
+
+/* UDataInfo cf. udata.h */
+static UDataInfo dataInfo={
+    sizeof(UDataInfo),
+    0,
+
+    U_IS_BIG_ENDIAN,
+    U_CHARSET_FAMILY,
+    U_SIZEOF_UCHAR,
+    0,
+
+    { 0x4e, 0x6f, 0x72, 0x6d },   /* dataFormat="Norm" */
+    {1, 0, 0, _NORM_TRIE_SHIFT},  /* formatVersion - [3] contains the trie shift! */
+    {3, 1, 0, 0}                  /* dataVersion (Unicode version) */
+};
+
+extern void
+setUnicodeVersion(const char *v) {
+    UVersionInfo version;
+    u_versionFromString(version, v);
+    uprv_memcpy(dataInfo.dataVersion, version, 4);
+}
+
+static uint16_t indexes[_NORM_INDEX_TOP]={ 0 };
+
+/* tool memory helper ------------------------------------------------------- */
+
+typedef struct UToolMemory {
+    char name[64];
+    uint32_t count, size, index;
+    uint32_t array[1];
+} UToolMemory;
+
+static UToolMemory *
+utm_open(const char *name, uint32_t count, uint32_t size) {
+    UToolMemory *mem=(UToolMemory *)uprv_malloc(sizeof(UToolMemory)+count*size);
+    if(mem==NULL) {
+        fprintf(stderr, "error: %s - out of memory\n", name);
+        exit(U_MEMORY_ALLOCATION_ERROR);
+    }
+    uprv_strcpy(mem->name, name);
+    mem->count=count;
+    mem->size=size;
+    mem->index=0;
+    return mem;
+}
+
+static void
+utm_close(UToolMemory *mem) {
+    if(mem!=NULL) {
+        uprv_free(mem);
+    }
+}
+
+static void *
+utm_getStart(UToolMemory *mem) {
+    return (char *)mem->array;
+}
+
+static void *
+utm_alloc(UToolMemory *mem) {
+    char *p=(char *)mem->array+mem->index*mem->size;
+    if(++mem->index<=mem->count) {
+        uprv_memset(p, 0, mem->size);
+        return p;
+    } else {
+        fprintf(stderr, "error: %s - trying to use more than %ld preallocated units\n",
+                mem->name, mem->count);
+        exit(U_MEMORY_ALLOCATION_ERROR);
+    }
+}
+
+static void *
+utm_allocN(UToolMemory *mem, int32_t n) {
+    char *p=(char *)mem->array+mem->index*mem->size;
+    if((mem->index+=(uint32_t)n)<=mem->count) {
+        uprv_memset(p, 0, n*mem->size);
+        return p;
+    } else {
+        fprintf(stderr, "error: %s - trying to use more than %ld preallocated units\n",
+                mem->name, mem->count);
+        exit(U_MEMORY_ALLOCATION_ERROR);
+    }
+}
+
+/* builder data ------------------------------------------------------------- */
+
+typedef void EnumTrieFn(void *context, uint32_t code, Norm *norm);
+
+static UToolMemory *stage2Mem, *normMem, *utf32Mem, *extraMem, *combiningTriplesMem;
+
+static uint16_t stage1[_NORM_STAGE_1_MAX_COUNT], fcdStage1[_NORM_STAGE_1_MAX_COUNT];
+static uint16_t *stage2;
+
+static Norm *norms;
+
+/*
+ * set a flag for each code point that was seen in decompositions -
+ * avoid to decompose ones that have not been used before
+ */
+static uint32_t haveSeenFlags[256];
+
+static uint32_t combiningCPs[2000];
+static uint16_t combiningIndexes[2000];
+static uint16_t combineFwdTop=0, combineBothTop=0, combineBackTop=0;
+
+typedef struct CombiningTriple {
+    uint16_t leadIndex, trailIndex;
+    uint32_t lead, trail, combined;
+} CombiningTriple;
+
+/* 15b in the combining index -> <=0x8000 pairs of uint16_t in the combining table */
+static uint16_t combiningTable[2*0x8000];
+static uint16_t combiningTableTop=0;
+
+/* stage 2 table after turning Norm structs into 32-bit words */
+static uint32_t *norm32Table=NULL, *fcdTable=NULL;
+
+/* number of units used in stage 1 and norm32Table, and same for FCD */
+static uint16_t stage1Top, fcdStage1Top,
+                norm32TableTop, fcdTableTop;
+
+extern void
+init() {
+    /* reset stage 1 of the trie */
+    uprv_memset(stage1, 0, sizeof(stage1));
+
+    /* allocate stage 2 of the trie and reset the first block */
+    stage2Mem=utm_open("gennorm trie stage 2", 30000, sizeof(*stage2));
+    stage2=utm_allocN(stage2Mem, _NORM_STAGE_2_BLOCK_COUNT);
+
+    /* allocate Norm structures and reset the first one */
+    normMem=utm_open("gennorm normalization structs", 20000, sizeof(Norm));
+    norms=utm_alloc(normMem);
+
+    /* allocate UTF-32 string memory */
+    utf32Mem=utm_open("gennorm UTF-32 strings", 30000, 4);
+
+    /* reset all "have seen" flags */
+    uprv_memset(haveSeenFlags, 0, sizeof(haveSeenFlags));
+
+    /* allocate extra data memory for UTF-16 decomposition strings and other values */
+    extraMem=utm_open("gennorm extra 16-bit memory", _NORM_EXTRA_INDEX_TOP, 2);
+
+    /* allocate temporary memory for combining triples */
+    combiningTriplesMem=utm_open("gennorm combining triples", 0x4000, sizeof(CombiningTriple));
+
+    /* set the minimum code points for no/maybe quick check values to the end of the BMP */
+    indexes[_NORM_INDEX_MIN_NFC_NO_MAYBE]=0xffff;
+    indexes[_NORM_INDEX_MIN_NFKC_NO_MAYBE]=0xffff;
+    indexes[_NORM_INDEX_MIN_NFD_NO_MAYBE]=0xffff;
+    indexes[_NORM_INDEX_MIN_NFKD_NO_MAYBE]=0xffff;
+}
+
+/* get or create a block in stage 2 of the trie */
+static uint16_t
+createStage2Block(uint32_t code) {
+    uint32_t i;
+    uint16_t j;
+
+    i=code>>_NORM_TRIE_SHIFT;
+    j=stage1[i];
+    if(j==0) {
+        /* allocate a stage 2 block */
+        uint16_t *p;
+
+        p=(uint16_t *)utm_allocN(stage2Mem, _NORM_STAGE_2_BLOCK_COUNT);
+        stage1[i]=j=p-stage2;
+    }
+    return j;
+}
+
+/*
+ * get or create a Norm unit;
+ * get or create the intermediate trie entries for it as well
+ */
+static Norm *
+createNorm(code) {
+    Norm *p;
+    uint16_t stage2Block, k;
+
+    stage2Block=createStage2Block(code);
+    k=(uint16_t)(stage2Block+(code&_NORM_STAGE_2_MASK));
+    if(stage2[k]==0) {
+        /* allocate Norm */
+        p=(Norm *)utm_alloc(normMem);
+        stage2[k]=p-norms;
+    } else {
+        p=norms+stage2[k];
+    }
+    return p;
+}
+
+/* get an existing Norm unit */
+static Norm *
+getNorm(code) {
+    uint32_t i;
+    uint16_t j;
+
+    /* access stage 1 and get the stage 2 block start index */
+    i=code>>_NORM_TRIE_SHIFT;
+    j=stage1[i];
+    if(j==0) {
+        return NULL;
+    }
+
+    /* access stage 2 and get the Norm unit */
+    i=(uint16_t)(j+(code&_NORM_STAGE_2_MASK));
+    j=stage2[i];
+    if(j==0) {
+        return NULL;
+    } else {
+        return norms+j;
+    }
+}
+
+/* get the canonical combining class of a character */
+static uint8_t
+getCCFromCP(uint32_t code) {
+    Norm *norm=getNorm(code);
+    if(norm==NULL) {
+        return 0;
+    } else {
+        return norm->udataCC;
+    }
+}
+
+/*
+ * enumerate all code points with their Norm structs and call a function for each
+ * return the number of code points with data
+ */
+static uint32_t
+enumTrie(EnumTrieFn *fn, void *context) {
+    uint32_t code, count, i;
+    uint16_t j, k, l;
+
+    code=0;
+    for(i=0; i<_NORM_STAGE_1_MAX_COUNT; ++i) {
+        j=stage1[i];
+        if(j!=0) {
+            for(k=0; k<_NORM_STAGE_2_BLOCK_COUNT; ++k) {
+                l=stage2[j+k];
+                if(l!=0) {
+                    fn(context, code, norms+l);
+                    ++count;
+                }
+                ++code;
+            }
+        } else {
+            code+=_NORM_STAGE_2_BLOCK_COUNT;
+        }
+    }
+    return count;
+}
+
+static void
+setHaveSeenString(const uint32_t *s, int32_t length) {
+    uint32_t c;
+
+    while(length>0) {
+        c=*s++;
+        haveSeenFlags[(c>>5)&0xff]|=(1<<(c&0x1f));
+        --length;
+    }
+}
+
+#define HAVE_SEEN(c) (haveSeenFlags[((c)>>5)&0xff]&(1<<((c)&0x1f)))
+
+/* handle combining data ---------------------------------------------------- */
+
+static void
+addCombiningCP(uint32_t code, uint8_t flags) {
+    uint32_t newEntry;
+    uint16_t i;
+
+    newEntry=code|((uint32_t)flags<<24);
+
+    /* search for this code point */
+    for(i=0; i<combineBackTop; ++i) {
+        if(code==(combiningCPs[i]&0xffffff)) {
+            /* found it */
+            if(newEntry==combiningCPs[i]) {
+                return; /* no change */
+            }
+
+            /* combine the flags, remove the old entry from the old place, and insert the new one */
+            newEntry|=combiningCPs[i];
+            if(i!=--combineBackTop) {
+                uprv_memmove(combiningCPs+i, combiningCPs+i+1, (combineBackTop-i)*4);
+            }
+            if(i<combineBothTop) {
+                --combineBothTop;
+            }
+            if(i<combineFwdTop) {
+                --combineFwdTop;
+            }
+            break;
+        }
+    }
+
+    /* not found or modified, insert it */
+    if(combineBackTop>=sizeof(combiningCPs)/4) {
+        fprintf(stderr, "error: gennorm combining code points - trying to use more than %ld units\n",
+                sizeof(combiningCPs)/4);
+        exit(U_MEMORY_ALLOCATION_ERROR);
+    }
+
+    /* set i to the insertion point */
+    flags=(uint8_t)(newEntry>>24);
+    if(flags==1) {
+        i=combineFwdTop++;
+        ++combineBothTop;
+    } else if(flags==3) {
+        i=combineBothTop++;
+    } else /* flags==2 */ {
+        i=combineBackTop;
+    }
+
+    /* move the following code points up one and insert newEntry at i */
+    if(i<combineBackTop) {
+        uprv_memmove(combiningCPs+i+1, combiningCPs+i, (combineBackTop-i)*4);
+    }
+    combiningCPs[i]=newEntry;
+
+    /* finally increment the total counter */
+    ++combineBackTop;
+}
+
+static uint16_t
+findCombiningCP(uint32_t code, UBool isLead) {
+    uint16_t i, limit;
+
+    if(isLead) {
+        i=0;
+        limit=combineBothTop;
+    } else {
+        i=combineFwdTop;
+        limit=combineBackTop;
+    }
+
+    /* search for this code point */
+    for(; i<limit; ++i) {
+        if(code==(combiningCPs[i]&0xffffff)) {
+            /* found it */
+            return i;
+        }
+    }
+
+    /* not found */
+    return 0xffff;
+}
+
+static void
+addCombiningTriple(uint32_t lead, uint32_t trail, uint32_t combined) {
+    CombiningTriple *triple;
+
+    /*
+     * set combiningFlags for the two code points
+     * do this after decomposition so that getNorm() above returns NULL
+     * if we do not have actual sub-decomposition data for the initial NFD here
+     */
+    createNorm(lead)->combiningFlags|=1;    /* combines forward */
+    createNorm(trail)->combiningFlags|=2;    /* combines backward */
+
+    addCombiningCP(lead, 1);
+    addCombiningCP(trail, 2);
+
+    triple=(CombiningTriple *)utm_alloc(combiningTriplesMem);
+    triple->lead=lead;
+    triple->trail=trail;
+    triple->combined=combined;
+}
+
+static int
+compareTriples(const void *l, const void *r) {
+    int diff;
+    diff=(int)((CombiningTriple *)l)->leadIndex-
+         (int)((CombiningTriple *)r)->leadIndex;
+    if(diff==0) {
+        diff=(int)((CombiningTriple *)l)->trailIndex-
+             (int)((CombiningTriple *)r)->trailIndex;
+    }
+    return diff;
+}
+
+static void
+processCombining() {
+    CombiningTriple *triples;
+    uint16_t *p;
+    uint32_t combined;
+    uint16_t i, j, count, tableTop, finalIndex;
+
+    triples=utm_getStart(combiningTriplesMem);
+
+    /* add lead and trail indexes to the triples for sorting */
+    count=(uint16_t)combiningTriplesMem->index;
+    for(i=0; i<count; ++i) {
+        /* findCombiningCP() must always find the code point */
+        triples[i].leadIndex=findCombiningCP(triples[i].lead, TRUE);
+        triples[i].trailIndex=findCombiningCP(triples[i].trail, FALSE);
+    }
+
+    /* sort them by leadIndex, trailIndex */
+    qsort(triples, count, sizeof(CombiningTriple), compareTriples);
+
+    /* calculate final combining indexes and store them in the Norm entries */
+    tableTop=0;
+    j=0; /* triples counter */
+
+    /* first, combining indexes of fwd/both characters are indexes into the combiningTable */
+    for(i=0; i<combineBothTop; ++i) {
+        /* start a new table */
+
+        /* assign combining index */
+        createNorm(combiningCPs[i]&0xffffff)->combiningIndex=combiningIndexes[i]=tableTop;
+
+        /* calculate the length of the combining data for this lead code point in the combiningTable */
+        while(j<count && i==triples[j].leadIndex) {
+            /* count 2 16-bit units per composition code unit */
+            tableTop+=2*UTF16_CHAR_LENGTH(combined=triples[j++].combined);
+        }
+    }
+
+    /* second, combining indexes of back-only characters are simply incremented from here to be unique */
+    finalIndex=tableTop;
+    for(; i<combineBackTop; ++i) {
+        createNorm(combiningCPs[i]&0xffffff)->combiningIndex=combiningIndexes[i]=finalIndex++;
+    }
+
+    /* it must be tableTop<0x7fff because bit 15 is used in combiningTable as an end-for-this-lead marker */
+    if(tableTop>=sizeof(combiningTable)/4) {
+        fprintf(stderr, "error: gennorm combining table - trying to use %u units, more than the %ld units available\n",
+                tableTop, sizeof(combiningTable)/4);
+        exit(U_MEMORY_ALLOCATION_ERROR);
+    }
+
+    combiningTableTop=tableTop;
+
+    /* store the combining data in the combiningTable, with the final indexes from above */
+    p=combiningTable;
+    j=0; /* triples counter */
+
+    /*
+     * this is essentially the same loop as above, but
+     * it writes the table data instead of calculating and setting the final indexes;
+     * it is necessary to have two passes so that all the final indexes are known before
+     * they are written into the table
+     */
+    for(i=0; i<combineBothTop; ++i) {
+        /* start a new table */
+
+        /* store the combining data for this lead code point in the combiningTable */
+        while(j<count && i==triples[j].leadIndex) {
+            finalIndex=combiningIndexes[triples[j].trailIndex];
+            combined=triples[j].combined;
+            if(combined<=0xffff) {
+                *p++=finalIndex;
+                *p++=(uint16_t)combined;
+            } else {
+                *p++=finalIndex;
+                *p++=(uint16_t)(0xd7c0+(combined>>10));
+                *p++=finalIndex;
+                *p++=(uint16_t)(0xdc00|(combined&0x3ff));
+            }
+            ++j;
+        }
+
+        /* set a marker on the last final trail index in this lead's table */
+        *(p-2)|=0x8000;
+    }
+
+    /* post condition: tableTop==(p-combiningTable) */
+}
+
+/* processing incoming normalization data ----------------------------------- */
+
+/*
+ * decompose the one decomposition further, may generate two decompositions
+ * apply all previous characters' decompositions to this one
+ */
+static void
+decompStoreNewNF(uint32_t code, Norm *norm) {
+    uint32_t nfd[40], nfkd[40];
+    uint32_t *s32;
+    Norm *p;
+    uint32_t c;
+    int32_t i, length;
+    uint8_t lenNFD=0, lenNFKD=0;
+    UBool changedNFD=FALSE, changedNFKD=FALSE;
+
+    if((length=norm->lenNFD)!=0) {
+        /* always allocate the original string */
+        changedNFD=TRUE;
+        s32=norm->nfd;
+    } else if((length=norm->lenNFKD)!=0) {
+        /* always allocate the original string */
+        changedNFKD=TRUE;
+        s32=norm->nfkd;
+    } else {
+        /* no decomposition here, nothing to do */
+        return;
+    }
+
+    /* decompose each code point */
+    for(i=0; i<length; ++i) {
+        c=s32[i];
+        p=getNorm(c);
+        if(p==NULL) {
+            /* no data, no decomposition */
+            nfd[lenNFD++]=c;
+            nfkd[lenNFKD++]=c;
+            continue;
+        }
+
+        /* canonically decompose c */
+        if(changedNFD) {
+            if(p->lenNFD!=0) {
+                uprv_memcpy(nfd+lenNFD, p->nfd, p->lenNFD*4);
+                lenNFD+=p->lenNFD;
+            } else {
+                nfd[lenNFD++]=c;
+            }
+        }
+
+        /* compatibility-decompose c */
+        if(p->lenNFKD!=0) {
+            uprv_memcpy(nfkd+lenNFKD, p->nfkd, p->lenNFKD*4);
+            lenNFKD+=p->lenNFKD;
+            changedNFKD=TRUE;
+        } else if(p->lenNFD!=0) {
+            uprv_memcpy(nfkd+lenNFKD, p->nfd, p->lenNFD*4);
+            lenNFKD+=p->lenNFD;
+            changedNFKD=TRUE;
+        } else {
+            nfkd[lenNFKD++]=c;
+        }
+    }
+
+    /* assume that norm->lenNFD==1 or ==2 */
+    if(norm->lenNFD==2 && !(norm->combiningFlags&0x80)) {
+        addCombiningTriple(s32[0], s32[1], code);
+    }
+
+    if(changedNFD) {
+        if(lenNFD!=0) {
+            s32=utm_allocN(utf32Mem, lenNFD);
+            uprv_memcpy(s32, nfd, lenNFD*4);
+        } else {
+            s32=NULL;
+        }
+        norm->lenNFD=lenNFD;
+        norm->nfd=s32;
+        setHaveSeenString(nfd, lenNFD);
+    }
+    if(changedNFKD) {
+        if(lenNFKD!=0) {
+            s32=utm_allocN(utf32Mem, lenNFKD);
+            uprv_memcpy(s32, nfkd, lenNFKD*4);
+        } else {
+            s32=NULL;
+        }
+        norm->lenNFKD=lenNFKD;
+        norm->nfkd=s32;
+        setHaveSeenString(nfkd, lenNFKD);
+    }
+}
+
+typedef struct DecompSingle {
+    uint32_t c;
+    Norm *norm;
+} DecompSingle;
+
+/*
+ * apply this one character's decompositions (there is at least one!) to
+ * all previous characters' decompositions to decompose them further
+ */
+static void
+decompWithSingleFn(void *context, uint32_t code, Norm *norm) {
+    uint32_t nfd[40], nfkd[40];
+    uint32_t *s32;
+    DecompSingle *me=(DecompSingle *)context;
+    uint32_t c, myC;
+    int32_t i, length;
+    uint8_t lenNFD, lenNFKD, myLenNFD, myLenNFKD;
+    UBool changedNFD=FALSE, changedNFKD=FALSE;
+
+    /* get the new character's data */
+    myC=me->c;
+    myLenNFD=me->norm->lenNFD;
+    myLenNFKD=me->norm->lenNFKD;
+    /* assume that myC has at least one decomposition */
+
+    if((length=norm->lenNFD)!=0 && myLenNFD!=0) {
+        /* apply NFD(myC) to norm->nfd */
+        s32=norm->nfd;
+        lenNFD=0;
+        for(i=0; i<length; ++i) {
+            c=s32[i];
+            if(c==myC) {
+                uprv_memcpy(nfd+lenNFD, me->norm->nfd, myLenNFD*4);
+                lenNFD+=myLenNFD;
+                changedNFD=TRUE;
+            } else {
+                nfd[lenNFD++]=c;
+            }
+        }
+    }
+
+    if((length=norm->lenNFKD)!=0) {
+        /* apply NFD(myC) and NFKD(myC) to norm->nfkd */
+        s32=norm->nfkd;
+        lenNFKD=0;
+        for(i=0; i<length; ++i) {
+            c=s32[i];
+            if(c==myC) {
+                if(myLenNFKD!=0) {
+                    uprv_memcpy(nfkd+lenNFKD, me->norm->nfkd, myLenNFKD*4);
+                    lenNFKD+=myLenNFKD;
+                } else /* assume myLenNFD!=0 */ {
+                    uprv_memcpy(nfkd+lenNFKD, me->norm->nfd, myLenNFD*4);
+                    lenNFKD+=myLenNFD;
+                }
+                changedNFKD=TRUE;
+            } else {
+                nfkd[lenNFKD++]=c;
+            }
+        }
+    } else if((length=norm->lenNFD)!=0 && myLenNFKD!=0) {
+        /* apply NFKD(myC) to norm->nfd, forming a new norm->nfkd */
+        s32=norm->nfd;
+        lenNFKD=0;
+        for(i=0; i<length; ++i) {
+            c=s32[i];
+            if(c==myC) {
+                uprv_memcpy(nfkd+lenNFKD, me->norm->nfkd, myLenNFKD*4);
+                lenNFKD+=myLenNFKD;
+                changedNFKD=TRUE;
+            } else {
+                nfkd[lenNFKD++]=c;
+            }
+        }
+    }
+
+    /* set the new decompositions, forget the old ones */
+    if(changedNFD) {
+        if(lenNFD!=0) {
+            if(lenNFD>norm->lenNFD) {
+                s32=utm_allocN(utf32Mem, lenNFD);
+            } else {
+                s32=norm->nfd;
+            }
+            uprv_memcpy(s32, nfd, lenNFD*4);
+        } else {
+            s32=NULL;
+        }
+        norm->lenNFD=lenNFD;
+        norm->nfd=s32;
+    }
+    if(changedNFKD) {
+        if(lenNFKD!=0) {
+            if(lenNFKD>norm->lenNFKD) {
+                s32=utm_allocN(utf32Mem, lenNFKD);
+            } else {
+                s32=norm->nfkd;
+            }
+            uprv_memcpy(s32, nfkd, lenNFKD*4);
+        } else {
+            s32=NULL;
+        }
+        norm->lenNFKD=lenNFKD;
+        norm->nfkd=s32;
+    }
+}
+
+/*
+ * process the data for one code point listed in UnicodeData;
+ * UnicodeData itself never maps a code point to both NFD and NFKD
+ */
+extern void
+storeNorm(uint32_t code, Norm *norm) {
+    DecompSingle decompSingle;
+    Norm *p;
+
+    /* copy existing derived normalization properties */
+    p=createNorm(code);
+    norm->qcFlags=p->qcFlags;
+    norm->combiningFlags=p->combiningFlags;
+
+    /* process the decomposition if if there is at one here */
+    if((norm->lenNFD|norm->lenNFKD)!=0) {
+        /* decompose this one decomposition further, may generate two decompositions */
+        decompStoreNewNF(code, norm);
+
+        /* has this code point been used in previous decompositions? */
+        if(HAVE_SEEN(code)) {
+            /* use this decomposition to decompose other decompositions further */
+            decompSingle.c=code;
+            decompSingle.norm=norm;
+            enumTrie(decompWithSingleFn, &decompSingle);
+        }
+    }
+
+    /* store the data */
+    uprv_memcpy(p, norm, sizeof(Norm));
+}
+
+extern void
+setQCFlags(uint32_t code, uint8_t qcFlags) {
+    createNorm(code)->qcFlags|=qcFlags;
+
+    /* adjust the minimum code point for quick check no/maybe */
+    if(code<0xffff) {
+        if((qcFlags&_NORM_QC_NFC) && (uint16_t)code<indexes[_NORM_INDEX_MIN_NFC_NO_MAYBE]) {
+            indexes[_NORM_INDEX_MIN_NFC_NO_MAYBE]=(uint16_t)code;
+        }
+        if((qcFlags&_NORM_QC_NFKC) && (uint16_t)code<indexes[_NORM_INDEX_MIN_NFKC_NO_MAYBE]) {
+            indexes[_NORM_INDEX_MIN_NFKC_NO_MAYBE]=(uint16_t)code;
+        }
+        if((qcFlags&_NORM_QC_NFD) && (uint16_t)code<indexes[_NORM_INDEX_MIN_NFD_NO_MAYBE]) {
+            indexes[_NORM_INDEX_MIN_NFD_NO_MAYBE]=(uint16_t)code;
+        }
+        if((qcFlags&_NORM_QC_NFKD) && (uint16_t)code<indexes[_NORM_INDEX_MIN_NFKD_NO_MAYBE]) {
+            indexes[_NORM_INDEX_MIN_NFKD_NO_MAYBE]=(uint16_t)code;
+        }
+    }
+}
+
+extern void
+setCompositionExclusion(uint32_t code) {
+    createNorm(code)->combiningFlags|=0x80;
+}
+
+static void
+setHangulJamoSpecials() {
+    Norm *norm;
+    uint16_t *pStage2Block;
+    uint32_t c;
+    uint16_t i;
+
+    /*
+     * Hangul syllables are algorithmically decomposed into Jamos,
+     * and Jamos are algorithmically composed into Hangul syllables.
+     * The quick check flags are parsed, except for Hangul.
+     */
+
+#if 0
+    /* set Jamo 1 specials */
+    for(c=0x1100; c<=0x1112; ++c) {
+        norm=createNorm(c);
+        norm->specialTag=_NORM_EXTRA_INDEX_TOP+_NORM_EXTRA_JAMO_1;
+        norm->combiningFlags=1;
+    }
+#endif
+
+    /* set Jamo 2 specials */
+    for(c=0x1161; c<=0x1175; ++c) {
+        norm=createNorm(c);
+        norm->specialTag=_NORM_EXTRA_INDEX_TOP+_NORM_EXTRA_JAMO_2;
+        norm->combiningFlags=3;
+    }
+
+    /* set Jamo 3 specials */
+    for(c=0x11a8; c<=0x11c2; ++c) {
+        norm=createNorm(c);
+        norm->specialTag=_NORM_EXTRA_INDEX_TOP+_NORM_EXTRA_JAMO_3;
+        norm->combiningFlags=2;
+    }
+
+    /* set Hangul specials, precompacted */
+    norm=(Norm *)utm_alloc(normMem);
+    norm->specialTag=_NORM_EXTRA_INDEX_TOP+_NORM_EXTRA_HANGUL;
+    norm->qcFlags=_NORM_QC_NFD|_NORM_QC_NFKD;
+
+    /* set one complete stage 2 block with this Hangul information */
+    pStage2Block=(uint16_t *)utm_allocN(stage2Mem, _NORM_STAGE_2_BLOCK_COUNT);
+    for(i=0; i<_NORM_STAGE_2_BLOCK_COUNT; ++i) {
+        pStage2Block[i]=norm-norms;
+    }
+
+    /* set these data for U+ac00..U+d7a3 */
+    c=0xac00;
+
+    /* set a partial stage 2 block before pStage2Block can be repeated */
+    if(c&_NORM_STAGE_2_MASK) {
+        i=createStage2Block(c)+(uint16_t)(c&_NORM_STAGE_2_MASK);
+        do {
+            stage2[i++]=norm-norms;
+        } while(++c&_NORM_STAGE_2_MASK);
+    }
+
+    /* set full stage 1 blocks to the common stage 2 block */
+    while(c<(0xd7a3&~_NORM_STAGE_2_MASK)) {
+        stage1[c>>_NORM_TRIE_SHIFT]=pStage2Block-stage2;
+        c+=_NORM_STAGE_2_BLOCK_COUNT;
+    }
+
+    /* set a partial stage 2 block after the repetition */
+    i=createStage2Block(c);
+    while(c<=0xd7a3) {
+        stage2[i++]=norm-norms;
+        ++c;
+    }
+}
+
+/* build runtime structures ------------------------------------------------- */
+
+/* canonically reorder a UTF-32 string; return { leadCC, trailCC } */
+static uint16_t
+reorderString(uint32_t *s, int32_t length) {
+    uint8_t ccs[40];
+    uint32_t c;
+    int32_t i, j;
+    uint8_t cc, prevCC;
+
+    if(length<=0) {
+        return 0;
+    }
+
+    for(i=0; i<length; ++i) {
+        /* get the i-th code point and its combining class */
+        c=s[i];
+        cc=getCCFromCP(c);
+        if(cc!=0 && i!=0) {
+            /* it is a combining mark, see if it needs to be moved back */
+            j=i;
+            do {
+                prevCC=ccs[j-1];
+                if(prevCC<=cc) {
+                    break;  /* found the right place */
+                }
+                /* move the previous code point here and go back */
+                s[j]=s[j-1];
+                ccs[j]=prevCC;
+            } while(--j!=0);
+            s[j]=c;
+            ccs[j]=cc;
+        } else {
+            /* just store the combining class */
+            ccs[i]=cc;
+        }
+    }
+
+    return ((uint16_t)ccs[0]<<8)|ccs[length-1];
+}
+
+static UBool combineAndQC[64]={ 0 };
+
+/*
+ * canonically reorder the up to two decompositions
+ * and store the leading and trailing combining classes accordingly
+ */
+static void
+postParseFn(void *context, uint32_t code, Norm *norm) {
+    int32_t length;
+
+    /* canonically order the NFD */
+    length=norm->lenNFD;
+    if(length>0) {
+        norm->canonBothCCs=reorderString(norm->nfd, length);
+    }
+
+    /* canonically reorder the NFKD */
+    length=norm->lenNFKD;
+    if(length>0) {
+        norm->compatBothCCs=reorderString(norm->nfkd, length);
+    }
+
+    /* verify that code has a decomposition if and only if the quick check flags say "no" on NF(K)D */
+    if((norm->lenNFD!=0) != ((norm->qcFlags&_NORM_QC_NFD)!=0)) {
+        printf("U+%04lx has NFD[%d] but quick check 0x%02x\n", code, norm->lenNFD, norm->qcFlags);
+    }
+    if(((norm->lenNFD|norm->lenNFKD)!=0) != ((norm->qcFlags&(_NORM_QC_NFD|_NORM_QC_NFKD))!=0)) {
+        printf("U+%04lx has NFD[%d] NFKD[%d] but quick check 0x%02x\n", code, norm->lenNFD, norm->lenNFKD, norm->qcFlags);
+    }
+
+    /* ### see which combinations of combiningFlags and qcFlags are used for NFC/NFKC */
+    combineAndQC[(norm->qcFlags&0x33)|((norm->combiningFlags&3)<<2)]=1;
+
+    if(norm->combiningFlags&1) {
+        if(norm->udataCC!=0) {
+            /* illegal - data-derivable composition exclusion */
+            printf("U+%04lx combines forward but udataCC==%u\n", code, norm->udataCC);
+        }
+    }
+    if(norm->combiningFlags&2) {
+        if((norm->qcFlags&0x11)==0) {
+            printf("U+%04lx combines backward but qcNF?C==0\n", code);
+        }
+#if 0
+        /* occurs sometimes */
+        if(norm->udataCC==0) {
+            printf("U+%04lx combines backward but udataCC==0\n", code);
+        }
+#endif
+    }
+    if((norm->combiningFlags&3)==3) {
+        printf("U+%04lx combines both ways\n", code);
+    }
+}
+
+/* ### debug */
+static uint32_t countCCSame=0, countCCTrail=0, countCCTwo=0;
+
+static uint32_t
+make32BitNorm(Norm *norm) {
+    UChar extra[100];
+    uint32_t word;
+    int32_t i, length, beforeZero=0, count, start;
+
+    /* reset the 32-bit word and set the quick check flags */
+    word=norm->qcFlags;
+
+    /* set the UnicodeData combining class */
+    word|=(uint32_t)norm->udataCC<<_NORM_CC_SHIFT;
+
+    /* set the combining flag and index */
+    if(norm->combiningFlags&3) {
+        word|=(uint32_t)(norm->combiningFlags&3)<<6;
+    }
+
+    /* set the combining index value into the extra data */
+    if(norm->combiningIndex!=0) {
+        extra[0]=norm->combiningIndex;
+        beforeZero=1;
+    }
+
+    count=beforeZero;
+
+    /* write the decompositions */
+    if((norm->lenNFD|norm->lenNFKD)!=0) {
+        extra[count++]=0; /* set the pieces when available, into extra[beforeZero] */
+
+        length=norm->lenNFD;
+        if(length>0) {
+            if(norm->canonBothCCs!=0) {
+                extra[beforeZero]|=0x80;
+                extra[count++]=norm->canonBothCCs;
+            }
+            start=count;
+            for(i=0; i<length; ++i) {
+                UTF_APPEND_CHAR_UNSAFE(extra, count, norm->nfd[i]);
+            }
+            extra[beforeZero]|=(UChar)(count-start); /* set the decomp length as the number of UTF-16 code units */
+        }
+
+        length=norm->lenNFKD;
+        if(length>0) {
+            if(norm->compatBothCCs!=0) {
+                extra[beforeZero]|=0x8000;
+                extra[count++]=norm->compatBothCCs;
+            }
+            start=count;
+            for(i=0; i<length; ++i) {
+                UTF_APPEND_CHAR_UNSAFE(extra, count, norm->nfkd[i]);
+            }
+            extra[beforeZero]|=(UChar)((count-start)<<8); /* set the decomp length as the number of UTF-16 code units */
+        }
+    }
+
+    /* allocate and copy the extra data */
+    if(count!=0) {
+        UChar *p;
+
+        if(norm->specialTag!=0) {
+            fprintf(stderr, "error: gennorm - illegal to have both extra data and a special tag (0x%x)\n", norm->specialTag);
+            exit(U_ILLEGAL_ARGUMENT_ERROR);
+        }
+
+        p=(UChar *)utm_allocN(extraMem, count);
+        uprv_memcpy(p, extra, count*2);
+
+        /* set the extra index, offset by beforeZero */
+        word|=(uint32_t)(beforeZero+(p-(UChar *)utm_getStart(extraMem)))<<_NORM_EXTRA_SHIFT;
+    } else if(norm->specialTag!=0) {
+        /* set a special tag instead of an extra index */
+        word|=(uint32_t)norm->specialTag<<_NORM_EXTRA_SHIFT;
+    }
+
+    return word;
+}
+
+/* turn all Norm structs into corresponding 32-bit norm values */
+static void
+makeAll32() {
+    uint16_t i, count;
+
+    /*
+     * allocate and fill the table of 32-bit normalization data
+     * leave space for data for the up to 1024 lead surrogates
+     */
+    norm32TableTop=(uint16_t)stage2Mem->index;
+    norm32Table=(uint32_t *)uprv_malloc((norm32TableTop+1024)*4);
+    if(norm32Table==NULL) {
+        fprintf(stderr, "error: gennorm - unable to allocate %ld 32-bit words for norm32Table\n",
+                norm32TableTop+1024);
+        exit(U_MEMORY_ALLOCATION_ERROR);
+    }
+
+    /* reset all entries */
+    uprv_memset(norm32Table, 0, (norm32TableTop+1024)*4);
+
+    count=0;
+
+    /* skip the first, all-empty block */
+    for(i=_NORM_STAGE_2_BLOCK_COUNT; i<norm32TableTop; ++i) {
+        if(stage2[i]!=0) {
+            if(0!=(norm32Table[i]=make32BitNorm(norms+stage2[i]))) {
+                ++count;
+            }
+        }
+    }
+
+    printf("count of 16-bit extra data: %lu\n", extraMem->index);
+    printf("count of (uncompacted) non-zero 32-bit words: %lu\n", count);
+    printf("count CC frequencies: same %lu  trail %lu  two %lu\n", countCCSame, countCCTrail, countCCTwo);
+}
+
+/*
+ * extract all Norm.canonBothCCs into the FCD table
+ * set 32-bit values to use the common fold and compact functions
+ */
+static void
+makeFCD() {
+    static uint16_t map[0x10000>>_NORM_TRIE_SHIFT];
+    Norm *norm;
+    uint32_t i, oredValues;
+    uint16_t bothCCs, delta;
+
+    /*
+     * allocate and fill the table of 32-bit normalization data
+     * leave space for data for the up to 1024 lead surrogates
+     */
+    fcdTableTop=(uint16_t)stage2Mem->index;
+    fcdTable=(uint32_t *)uprv_malloc((fcdTableTop+1024)*4);
+    if(fcdTable==NULL) {
+        fprintf(stderr, "error: gennorm - unable to allocate %ld 32-bit words for fcdTable\n",
+                fcdTableTop+1024);
+        exit(U_MEMORY_ALLOCATION_ERROR);
+    }
+
+    /* reset all entries */
+    uprv_memset(fcdTable, 0, (fcdTableTop+1024)*4);
+
+    /* compact out the all-zero stage 2 blocks */
+    map[0]=0;
+    delta=0;
+
+    /* oredValues detects all-zero stage 2 blocks that will be removed from fcdStage1 */
+    oredValues=0;
+
+    /* skip the first, all-empty block */
+    for(i=_NORM_STAGE_2_BLOCK_COUNT; i<fcdTableTop; ++i) {
+        if(stage2[i]!=0) {
+            norm=norms+stage2[i];
+            bothCCs=norm->canonBothCCs;
+            if(bothCCs==0) {
+                /* if there are no decomposition cc's then use the udataCC twice */
+                bothCCs=norm->udataCC;
+                bothCCs|=bothCCs<<8;
+            }
+            oredValues|=fcdTable[i-delta]=bothCCs;
+        }
+
+        if((i&_NORM_STAGE_2_MASK)==_NORM_STAGE_2_MASK) {
+            /* at the end of a stage 2 block, check if there are any non-zero entries */
+            if(oredValues==0) {
+                /* all zero: skip this block */
+                delta+=_NORM_STAGE_2_BLOCK_COUNT;
+                map[i>>_NORM_TRIE_SHIFT]=(uint16_t)0;
+            } else {
+                /* keep this block */
+                map[i>>_NORM_TRIE_SHIFT]=(uint16_t)(i&~_NORM_STAGE_2_MASK)-delta;
+                oredValues=0;
+            }
+        }
+    }
+
+    /* now adjust stage 1 */
+    for(i=0; i<_NORM_STAGE_1_MAX_COUNT; ++i) {
+        fcdStage1[i]=map[fcdStage1[i]>>_NORM_TRIE_SHIFT];
+    }
+
+    printf("FCD: omitted %u stage 2 entries in all-zero blocks\n", delta);
+
+    /* adjust the table top */
+    fcdTableTop-=delta;
+}
+
+/*
+ * Fold the supplementary code point data for one lead surrogate.
+ */
+static uint16_t
+foldLeadSurrogate(uint16_t *parent, uint16_t parentCount,
+                  uint32_t *stage, uint16_t *pStageCount,
+                  uint32_t base,
+                  UBool isNorm32) {
+    uint32_t leadNorm32=0;
+    uint32_t i, j, s2;
+    uint32_t leadSurrogate=0xd7c0+(base>>10);
+
+    printf("supplementary data for lead surrogate U+%04lx\n", leadSurrogate);
+
+    /* calculate the 32-bit data word for the lead surrogate */
+    for(i=0; i<_NORM_SURROGATE_BLOCK_COUNT; ++i) {
+        s2=parent[(base>>_NORM_TRIE_SHIFT)+i];
+        if(s2!=0) {
+            for(j=0; j<_NORM_STAGE_2_BLOCK_COUNT; ++j) {
+                /* basically, or all 32-bit data into the one for the lead surrogate */
+                leadNorm32|=stage[s2+j];
+            }
+        }
+    }
+
+    if(isNorm32) {
+        /* turn multi-bit fields into the worst-case value */
+        if(leadNorm32&_NORM_CC_MASK) {
+            leadNorm32|=_NORM_CC_MASK;
+        }
+
+        /* clean up unnecessarily ored bit fields */
+        leadNorm32&=~((uint32_t)0xffffffff<<_NORM_EXTRA_SHIFT);
+
+        if(leadNorm32==0) {
+            /* nothing to do (only composition exclusions?) */
+            return 0;
+        }
+
+        /* add the extra surrogate index, offset by the BMP top, for the new stage 1 location */
+        leadNorm32|=(
+            (uint32_t)_NORM_EXTRA_INDEX_TOP+
+            (uint32_t)((parentCount-_NORM_STAGE_1_BMP_COUNT)>>_NORM_SURROGATE_BLOCK_BITS)
+        )<<_NORM_EXTRA_SHIFT;
+    } else {
+        if(leadNorm32==0) {
+            /* FCD: nothing to do */
+            return 0;
+        }
+
+        /*
+         * For FCD, replace the entire combined value by the surrogate index
+         * and make sure that it is not 0 (by not offsetting it by the BMP top,
+         * since here we have enough bits for this);
+         * lead surrogates are tested at runtime on the character code itself
+         * instead on special values of the trie data -
+         * this is because 16 bits in the FCD trie data do not allow for anything
+         * but the two leading and trailing combining classes of the canonical decomposition.
+         */
+        leadNorm32=parentCount>>_NORM_SURROGATE_BLOCK_BITS;
+    }
+
+    /* enter the lead surrogate's data */
+    s2=parent[leadSurrogate>>_NORM_TRIE_SHIFT];
+    if(s2==0) {
+        /* allocate a new stage 2 block in stage (the memory is there from makeAll32()/makeFCD()) */
+        s2=parent[leadSurrogate>>_NORM_TRIE_SHIFT]=*pStageCount;
+        *pStageCount+=_NORM_STAGE_2_BLOCK_COUNT;
+    }
+    stage[s2+(leadSurrogate&_NORM_STAGE_2_MASK)]=leadNorm32;
+
+    /* move the actual stage 1 indexes from the supplementary position to the new one */
+    uprv_memmove(parent+parentCount, parent+(base>>_NORM_TRIE_SHIFT), _NORM_SURROGATE_BLOCK_COUNT*2);
+
+    /* increment stage 1 top */
+    return _NORM_SURROGATE_BLOCK_COUNT;
+}
+
+/*
+ * Fold the normalization data for supplementary code points into
+ * a compact area on top of the BMP-part of the trie index,
+ * with the lead surrogates indexing this compact area.
+ *
+ * Use after makeAll32().
+ */
+static uint16_t
+foldSupplementary(uint16_t *parent, uint16_t parentCount,
+                  uint32_t *stage, uint16_t *pStageCount,
+                  UBool isNorm32) {
+    uint32_t c;
+    uint16_t i;
+
+    /* search for any stage 1 entries for supplementary code points */
+    for(c=0x10000; c<0x110000;) {
+        i=parent[c>>_NORM_TRIE_SHIFT];
+        if(i!=0) {
+            /* there is data, treat the full block for a lead surrogate */
+            c&=~0x3ff;
+            parentCount+=foldLeadSurrogate(parent, parentCount, stage, pStageCount, c, isNorm32);
+            c+=0x400;
+        } else {
+            c+=_NORM_STAGE_2_BLOCK_COUNT;
+        }
+    }
+
+    printf("trie index count: BMP %u  all Unicode %lu  folded %u\n",
+           _NORM_STAGE_1_BMP_COUNT, _NORM_STAGE_1_MAX_COUNT, parentCount);
+    return parentCount;
+}
+
+static uint16_t
+compact(uint16_t *parent, uint16_t parentCount,
+        uint32_t *stage, uint16_t stageCount) {
+    /*
+     * This function is the common implementation for compacting
+     * the stage 2 tables of 32-bit values.
+     * It is a copy of genprops/store.c's compactStage() adapted for the 32-bit stage 2 tables.
+     */
+    static uint16_t map[0x10000>>_NORM_TRIE_SHIFT];
+    uint32_t x;
+    uint16_t i, start, prevEnd, newStart;
+
+    map[0]=0;
+    newStart=_NORM_STAGE_2_BLOCK_COUNT;
+    for(start=newStart; start<stageCount;) {
+        prevEnd=(uint16_t)(newStart-1);
+        x=stage[start];
+        if(x==stage[prevEnd]) {
+            /* overlap by at least one */
+            for(i=1; i<_NORM_STAGE_2_BLOCK_COUNT && x==stage[start+i] && x==stage[prevEnd-i]; ++i) {}
+
+            /* overlap by i */
+            map[start>>_NORM_TRIE_SHIFT]=(uint16_t)(newStart-i);
+
+            /* move the non-overlapping indexes to their new positions */
+            start+=i;
+            for(i=(uint16_t)(_NORM_STAGE_2_BLOCK_COUNT-i); i>0; --i) {
+                stage[newStart++]=stage[start++];
+            }
+        } else if(newStart<start) {
+            /* move the indexes to their new positions */
+            map[start>>_NORM_TRIE_SHIFT]=newStart;
+            for(i=_NORM_STAGE_2_BLOCK_COUNT; i>0; --i) {
+                stage[newStart++]=stage[start++];
+            }
+        } else /* no overlap && newStart==start */ {
+            map[start>>_NORM_TRIE_SHIFT]=start;
+            newStart+=_NORM_STAGE_2_BLOCK_COUNT;
+            start=newStart;
+        }
+    }
+
+    /* now adjust the parent table */
+    for(i=0; i<parentCount; ++i) {
+        parent[i]=map[parent[i]>>_NORM_TRIE_SHIFT];
+    }
+
+    /* we saved some space */
+    printf("compacting trie: count of 32-bit words %lu->%lu\n", stageCount, newStart);
+    return newStart;
+}
+
+extern void
+processData() {
+#if 0
+    uint16_t i;
+#endif
+
+    processCombining();
+
+    /* canonically reorder decompositions and assign combining classes for decompositions */
+    enumTrie(postParseFn, NULL);
+
+#if 0
+    for(i=1; i<64; ++i) {
+        if(combineAndQC[i]) {
+            printf("combiningFlags==0x%02x  qcFlags(NF?C)==0x%02x\n", (i&0xc)>>2, i&0x33);
+        }
+    }
+#endif
+
+    /* add hangul/jamo specials */
+    setHangulJamoSpecials();
+
+    /* copy stage 1 for the FCD trie */
+    uprv_memcpy(fcdStage1, stage1, sizeof(stage1));
+
+    /* --- finalize data for quick checks & normalization: stage1/norm32Table --- */
+
+    /* turn the Norm structs (stage2, norms) into 32-bit data words (norm32Table) */
+    makeAll32();
+
+    /* fold supplementary code points into lead surrogates */
+    stage1Top=foldSupplementary(stage1, _NORM_STAGE_1_BMP_COUNT, norm32Table, &norm32TableTop, TRUE);
+
+    /* compact stage 2 */
+    norm32TableTop=compact(stage1, stage1Top, norm32Table, norm32TableTop);
+
+    /* --- finalize data for FCD checks: fcdStage1/fcdTable --- */
+
+    /* FCD data: take Norm.canonBothCCs and store them in the FCD table */
+    makeFCD();
+
+    /* FCD: fold supplementary code points into lead surrogates */
+    fcdStage1Top=foldSupplementary(fcdStage1, _NORM_STAGE_1_BMP_COUNT, fcdTable, &fcdTableTop, FALSE);
+
+    /* FCD: compact stage 2 */
+    fcdTableTop=compact(fcdStage1, fcdStage1Top, fcdTable, fcdTableTop);
+
+    /* ### debug output */
+#if 0
+    printf("number of stage 2 entries: %ld\n", stage2Mem->index);
+    printf("size of stage 1 (BMP) & 2 (uncompacted) + extra data: %ld bytes\n", _NORM_STAGE_1_BMP_COUNT*2+stage2Mem->index*4+extraMem->index*2);
+#endif
+    printf("combining CPs tops: fwd %u  both %u  back %u\n", combineFwdTop, combineBothTop, combineBackTop);
+    printf("combining table count: %u\n", combiningTableTop);
+}
+
+extern void
+generateData(const char *dataDir) {
+    UNewDataMemory *pData;
+    uint16_t *p16;
+    UErrorCode errorCode=U_ZERO_ERROR;
+    uint32_t size, dataLength;
+    uint16_t i;
+
+    size=
+        _NORM_INDEX_TOP*2+
+        stage1Top*2+
+        norm32TableTop*4+
+        extraMem->index*2+
+        combiningTableTop*2+
+        fcdStage1Top*2+
+        fcdTableTop*2;
+
+    printf("size of " DATA_NAME "." DATA_TYPE " contents: %lu bytes\n", size);
+
+    indexes[_NORM_INDEX_COUNT]=_NORM_INDEX_TOP;
+    indexes[_NORM_INDEX_TRIE_SHIFT]=_NORM_TRIE_SHIFT;
+    indexes[_NORM_INDEX_TRIE_INDEX_COUNT]=stage1Top;
+    indexes[_NORM_INDEX_TRIE_DATA_COUNT]=norm32TableTop;
+    indexes[_NORM_INDEX_UCHAR_COUNT]=(uint16_t)extraMem->index;
+
+    indexes[_NORM_INDEX_COMBINE_DATA_COUNT]=combiningTableTop;
+    indexes[_NORM_INDEX_COMBINE_FWD_COUNT]=combineFwdTop;
+    indexes[_NORM_INDEX_COMBINE_BOTH_COUNT]=combineBothTop-combineFwdTop;
+    indexes[_NORM_INDEX_COMBINE_BACK_COUNT]=combineBackTop-combineBothTop;
+
+    indexes[_NORM_INDEX_FCD_TRIE_INDEX_COUNT]=fcdStage1Top;
+    indexes[_NORM_INDEX_FCD_TRIE_DATA_COUNT]=fcdTableTop;
+
+    /* adjust the stage 1 indexes to offset stage 2 from the beginning of stage 1 */
+
+    /* stage1/norm32Table */
+    for(i=0; i<stage1Top; ++i) {
+        stage1[i]+=stage1Top/2; /* stage 2 is 32-bit indexed */
+    }
+
+    /* fcdStage1/fcdTable */
+    for(i=0; i<fcdStage1Top; ++i) {
+        fcdStage1[i]+=fcdStage1Top; /* FCD stage 2 is 16-bit indexed */
+    }
+
+    /* reduce the contents of fcdTable from 32-bit values to 16-bit values, in-place (destructive!) */
+    p16=(uint16_t *)fcdTable;
+    for(i=0; i<fcdTableTop; ++i) {
+        p16[i]=(uint16_t)fcdTable[i];
+    }
+
+    /* write the data */
+    pData=udata_create(dataDir, DATA_TYPE, DATA_NAME, &dataInfo,
+                       haveCopyright ? U_COPYRIGHT_STRING : NULL, &errorCode);
+    if(U_FAILURE(errorCode)) {
+        fprintf(stderr, "gennorm: unable to create the output file, error %d\n", errorCode);
+        exit(errorCode);
+    }
+
+    udata_writeBlock(pData, indexes, sizeof(indexes));
+    udata_writeBlock(pData, stage1, stage1Top*2);
+    udata_writeBlock(pData, norm32Table, norm32TableTop*4);
+    udata_writeBlock(pData, utm_getStart(extraMem), extraMem->index*2);
+    udata_writeBlock(pData, combiningTable, combiningTableTop*2);
+    udata_writeBlock(pData, fcdStage1, fcdStage1Top*2);
+    udata_writeBlock(pData, fcdTable, fcdTableTop*2);
+
+    /* finish up */
+    dataLength=udata_finish(pData, &errorCode);
+    if(U_FAILURE(errorCode)) {
+        fprintf(stderr, "gennorm: error %d writing the output file\n", errorCode);
+        exit(errorCode);
+    }
+
+    if(dataLength!=size) {
+        fprintf(stderr, "gennorm: data length %lu != calculated size %lu\n",
+            dataLength, size);
+        exit(U_INTERNAL_PROGRAM_ERROR);
+    }
+}
+
+/*
+ * Hey, Emacs, please set the following:
+ *
+ * Local Variables:
+ * indent-tabs-mode: nil
+ * End:
+ *
+ */