ICU-7273 merge in Normalizer2 API & code, and ICU-5785 UnicodeSet::span(UnicodeString) and ICU-7296 tempSubString()/retainBetween(); merge -r 26971:27150 branches/markus/norm2

X-SVN-Rev: 27155
2025-04-05 21:45:37 +00:00 · 2010-01-06 23:50:03 +00:00 · 2010-01-06 23:50:03 +00:00 · 8ddbd1394c
commit 8ddbd1394c
parent 11acc7e54f
98 changed files with 24433 additions and 8028 deletions
--- a/.gitattributes
+++ b/.gitattributes
@ -49,6 +49,10 @@ README text !eol
 *.tri2 -text

 icu4c/icu4c.css -text
+icu4c/source/data/in/nfc.nrm -text
+icu4c/source/data/in/nfkc.nrm -text
+icu4c/source/data/in/nfkc_cf.nrm -text
+icu4c/source/data/in/unorm.icu -text
 icu4c/source/data/locales/pool.res -text
 icu4c/source/samples/ucnv/data02.bin -text
 icu4c/source/test/perf/README -text
--- a/.gitignore
+++ b/.gitignore
@ -560,6 +560,20 @@ icu4c/source/tools/gennorm/gennorm.vcproj.*.*.user
 icu4c/source/tools/gennorm/release
 icu4c/source/tools/gennorm/x64
 icu4c/source/tools/gennorm/x86
+icu4c/source/tools/gennorm2/*.d
+icu4c/source/tools/gennorm2/*.o
+icu4c/source/tools/gennorm2/*.pdb
+icu4c/source/tools/gennorm2/*.plg
+icu4c/source/tools/gennorm2/Debug
+icu4c/source/tools/gennorm2/Makefile
+icu4c/source/tools/gennorm2/Release
+icu4c/source/tools/gennorm2/debug
+icu4c/source/tools/gennorm2/gennorm2
+icu4c/source/tools/gennorm2/gennorm2.[0-9]
+icu4c/source/tools/gennorm2/gennorm2.vcproj.*.*.user
+icu4c/source/tools/gennorm2/release
+icu4c/source/tools/gennorm2/x64
+icu4c/source/tools/gennorm2/x86
 icu4c/source/tools/genpname/*.d
 icu4c/source/tools/genpname/*.o
 icu4c/source/tools/genpname/*.pdb
--- a/icu4c/source/allinone/allinone.sln
+++ b/icu4c/source/allinone/allinone.sln
@ -1,5 +1,5 @@
 Microsoft Visual Studio Solution File, Format Version 10.00
-# Visual Studio 2008
+# Visual C++ Express 2008
 Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "cal", "..\samples\cal\cal.vcproj", "{F7659D77-09CF-4FE9-ACEE-927287AA9509}"
 	ProjectSection(ProjectDependencies) = postProject
 		{0178B127-6269-407D-B112-93877BB62776} = {0178B127-6269-407D-B112-93877BB62776}
@ -259,6 +259,12 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "gencfu", "..\tools\gencfu\g
 		{73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D} = {73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D}
 	EndProjectSection
 EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "gennorm2", "..\tools\gennorm2\gennorm2.vcproj", "{C7891A65-80AB-4245-912E-5F1E17B0E6C4}"
+	ProjectSection(ProjectDependencies) = postProject
+		{6B231032-3CB5-4EED-9210-810D666A23A0} = {6B231032-3CB5-4EED-9210-810D666A23A0}
+		{73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D} = {73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D}
+	EndProjectSection
+EndProject
 Global
 	GlobalSection(SolutionConfigurationPlatforms) = preSolution
 		Debug|Win32 = Debug|Win32
@ -555,6 +561,14 @@ Global
 		{691EE0C0-DC57-4A48-8AEE-8ED75EB3A057}.Release|Win32.Build.0 = Release|Win32
 		{691EE0C0-DC57-4A48-8AEE-8ED75EB3A057}.Release|x64.ActiveCfg = Release|x64
 		{691EE0C0-DC57-4A48-8AEE-8ED75EB3A057}.Release|x64.Build.0 = Release|x64
+		{C7891A65-80AB-4245-912E-5F1E17B0E6C4}.Debug|Win32.ActiveCfg = Debug|Win32
+		{C7891A65-80AB-4245-912E-5F1E17B0E6C4}.Debug|Win32.Build.0 = Debug|Win32
+		{C7891A65-80AB-4245-912E-5F1E17B0E6C4}.Debug|x64.ActiveCfg = Debug|Win32
+		{C7891A65-80AB-4245-912E-5F1E17B0E6C4}.Debug|x64.Build.0 = Debug|Win32
+		{C7891A65-80AB-4245-912E-5F1E17B0E6C4}.Release|Win32.ActiveCfg = Release|Win32
+		{C7891A65-80AB-4245-912E-5F1E17B0E6C4}.Release|Win32.Build.0 = Release|Win32
+		{C7891A65-80AB-4245-912E-5F1E17B0E6C4}.Release|x64.ActiveCfg = Release|Win32
+		{C7891A65-80AB-4245-912E-5F1E17B0E6C4}.Release|x64.Build.0 = Release|Win32
 	EndGlobalSection
 	GlobalSection(SolutionProperties) = preSolution
 		HideSolutionNode = FALSE
--- a/icu4c/source/common/Makefile.in
+++ b/icu4c/source/common/Makefile.in
@ -1,6 +1,6 @@
 #******************************************************************************
 #
-#   Copyright (C) 1999-2009, International Business Machines
+#   Copyright (C) 1999-2010, International Business Machines
 #   Corporation and others.  All Rights Reserved.
 #
 #******************************************************************************
@ -78,7 +78,8 @@ ucat.o locmap.o uloc.o locid.o locutil.o \
 bytestream.o stringpiece.o \
 ustr_cnv.o unistr_cnv.o unistr.o unistr_case.o unistr_props.o \
 utf_impl.o ustring.o ustrcase.o ucasemap.o cstring.o ustrfmt.o ustrtrns.o ustr_wcs.o utext.o \
-normlzr.o unorm.o unormcmp.o unorm_it.o chariter.o schriter.o uchriter.o uiter.o \
+normalizer2impl.o normalizer2.o filterednormalizer2.o normlzr.o unorm.o unormcmp.o unorm_it.o \
+chariter.o schriter.o uchriter.o uiter.o \
 uchar.o uprops.o ucase.o propname.o ubidi_props.o ubidi.o ubidiwrt.o ubidiln.o ushape.o \
 uscript.o usc_impl.o unames.o \
 utrie.o utrie2.o utrie2_builder.o bmpset.o unisetspan.o uset_props.o uniset_props.o uset.o uniset.o usetiter.o ruleiter.o caniter.o unifilt.o unifunct.o \
--- a/icu4c/source/common/caniter.cpp
+++ b/icu4c/source/common/caniter.cpp
@ -1,6 +1,6 @@
 /*
 *****************************************************************************
- * Copyright (C) 1996-2006, International Business Machines Corporation and  *
+ * Copyright (C) 1996-2010, International Business Machines Corporation and  *
 * others. All Rights Reserved.                                              *
 *****************************************************************************
 */
@ -12,6 +12,7 @@
 #include "unicode/uset.h"
 #include "unicode/ustring.h"
 #include "hash.h"
+#include "normalizer2impl.h"
 #include "unormimp.h"
 #include "unicode/caniter.h"
 #include "unicode/normlzr.h"
@ -68,7 +69,8 @@ CanonicalIterator::CanonicalIterator(const UnicodeString &sourceStr, UErrorCode
    pieces_length(0),
    pieces_lengths(NULL),
    current(NULL),
-    current_length(0)
+    current_length(0),
+    nfd(*Normalizer2Factory::getNFDInstance(status))
 {
    if(U_SUCCESS(status)) {
      setSource(sourceStr, status);
@ -499,73 +501,39 @@ Hashtable *CanonicalIterator::extract(Hashtable *fillinResult, UChar32 comp, con
        return NULL;
    }

-    const int32_t bufSize = 256;
-    int32_t bufLen = 0;
-    UChar temp[bufSize];
-
-    int32_t inputLen = 0, decompLen;
-    UChar stackBuffer[4];
-    const UChar *decomp;
-
-    U16_APPEND_UNSAFE(temp, inputLen, comp);
-    decomp = unorm_getCanonicalDecomposition(comp, stackBuffer, &decompLen);
-    if(decomp == NULL) {
-        /* copy temp */
-        stackBuffer[0] = temp[0];
-        if(inputLen > 1) {
-            stackBuffer[1] = temp[1];
-        }
-        decomp = stackBuffer;
-        decompLen = inputLen;
-    }
-
-    UChar *buff = temp+inputLen;
+    UnicodeString temp(comp);
+    int32_t inputLen=temp.length();
+    UnicodeString decompString;
+    nfd.normalize(temp, decompString, status);
+    const UChar *decomp=decompString.getBuffer();
+    int32_t decompLen=decompString.length();

    // See if it matches the start of segment (at segmentPos)
    UBool ok = FALSE;
    UChar32 cp;
    int32_t decompPos = 0;
    UChar32 decompCp;
-    UTF_NEXT_CHAR(decomp, decompPos, decompLen, decompCp);
+    U16_NEXT(decomp, decompPos, decompLen, decompCp);

-    int32_t i;
-    UBool overflow = FALSE;
-
-    i = segmentPos;
+    int32_t i = segmentPos;
    while(i < segLen) {
-        UTF_NEXT_CHAR(segment, i, segLen, cp);
+        U16_NEXT(segment, i, segLen, cp);

        if (cp == decompCp) { // if equal, eat another cp from decomp

            //if (PROGRESS) printf("  matches: %s\n", UToS(Tr(UnicodeString(cp))));

            if (decompPos == decompLen) { // done, have all decomp characters!
-                //u_strcat(buff+bufLen, segment+i);
-                uprv_memcpy(buff+bufLen, segment+i, (segLen-i)*sizeof(UChar));
-                bufLen+=segLen-i;
-
+                temp.append(segment+i, segLen-i);
                ok = TRUE;
                break;
            }
-            UTF_NEXT_CHAR(decomp, decompPos, decompLen, decompCp);
+            U16_NEXT(decomp, decompPos, decompLen, decompCp);
        } else {
            //if (PROGRESS) printf("  buffer: %s\n", UToS(Tr(UnicodeString(cp))));

            // brute force approach
-
-            U16_APPEND(buff, bufLen, bufSize, cp, overflow);
-
-            if(overflow) {
-                /*
-                 * ### TODO handle buffer overflow
-                 * The buffer is large, but an overflow may still happen with
-                 * unusual input (many combining marks?).
-                 * Reallocate buffer and continue.
-                 * markus 20020929
-                 */
-
-                overflow = FALSE;
-            }
+            temp.append(cp);

            /* TODO: optimize
            // since we know that the classes are monotonically increasing, after zero
@ -585,25 +553,20 @@ Hashtable *CanonicalIterator::extract(Hashtable *fillinResult, UChar32 comp, con

    //if (PROGRESS) printf("Matches\n");

-    if (bufLen == 0) {
+    if (inputLen == temp.length()) {
        fillinResult->put(UnicodeString(), new UnicodeString(), status);
        return fillinResult; // succeed, but no remainder
    }

    // brute force approach
    // check to make sure result is canonically equivalent
-    int32_t tempLen = inputLen + bufLen;
-
-    UChar trial[bufSize];
-    unorm_decompose(trial, bufSize, temp, tempLen, FALSE, 0, &status);
-
-    if(U_FAILURE(status)
-        || uprv_memcmp(segment+segmentPos, trial, (segLen - segmentPos)*sizeof(UChar)) != 0)
-    {
+    UnicodeString trial;
+    nfd.normalize(temp, trial, status);
+    if(U_FAILURE(status) || trial.compare(segment+segmentPos, segLen - segmentPos) != 0) {
        return NULL;
    }

-    return getEquivalents2(fillinResult, buff, bufLen, status);
+    return getEquivalents2(fillinResult, temp.getBuffer()+inputLen, temp.length()-inputLen, status);
 }

 U_NAMESPACE_END
--- a/icu4c/source/common/common.vcproj
+++ b/icu4c/source/common/common.vcproj
@ -769,7 +769,7 @@
 			Name="collation"
 			>
 			<File
-				RelativePath=".\ucol_swp.c"
+				RelativePath=".\ucol_swp.cpp"
 				>
 				<FileConfiguration
 					Name="Release|Win32"
@ -961,7 +961,7 @@
 				>
 			</File>
 			<File
-				RelativePath=".\utrie2.c"
+				RelativePath=".\utrie2.cpp"
 				>
 			</File>
 			<File
@ -1172,6 +1172,10 @@
 				RelativePath=".\locmap.h"
 				>
 			</File>
+			<File
+				RelativePath=".\mutex.cpp"
+				>
+			</File>
 			<File
 				RelativePath=".\mutex.h"
 				>
@ -3057,6 +3061,62 @@
 					/>
 				</FileConfiguration>
 			</File>
+			<File
+				RelativePath=".\filterednormalizer2.cpp"
+				>
+			</File>
+			<File
+				RelativePath=".\normalizer2.cpp"
+				>
+			</File>
+			<File
+				RelativePath=".\unicode\normalizer2.h"
+				>
+				<FileConfiguration
+					Name="Release|Win32"
+					>
+					<Tool
+						Name="VCCustomBuildTool"
+						CommandLine="copy &quot;$(InputPath)&quot; ..\..\include\unicode&#x0D;&#x0A;"
+						Outputs="..\..\include\unicode\$(InputFileName)"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug|Win32"
+					>
+					<Tool
+						Name="VCCustomBuildTool"
+						CommandLine="copy &quot;$(InputPath)&quot; ..\..\include\unicode&#x0D;&#x0A;"
+						Outputs="..\..\include\unicode\$(InputFileName)"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release|x64"
+					>
+					<Tool
+						Name="VCCustomBuildTool"
+						CommandLine="copy &quot;$(InputPath)&quot; ..\..\include\unicode&#x0D;&#x0A;"
+						Outputs="..\..\include\unicode\$(InputFileName)"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug|x64"
+					>
+					<Tool
+						Name="VCCustomBuildTool"
+						CommandLine="copy &quot;$(InputPath)&quot; ..\..\include\unicode&#x0D;&#x0A;"
+						Outputs="..\..\include\unicode\$(InputFileName)"
+					/>
+				</FileConfiguration>
+			</File>
+			<File
+				RelativePath=".\normalizer2impl.cpp"
+				>
+			</File>
+			<File
+				RelativePath=".\normalizer2impl.h"
+				>
+			</File>
 			<File
 				RelativePath=".\normlzr.cpp"
 				>
@ -3145,6 +3205,46 @@
 					/>
 				</FileConfiguration>
 			</File>
+			<File
+				RelativePath=".\unicode\unorm2.h"
+				>
+				<FileConfiguration
+					Name="Release|Win32"
+					>
+					<Tool
+						Name="VCCustomBuildTool"
+						CommandLine="copy &quot;$(InputPath)&quot; ..\..\include\unicode&#x0D;&#x0A;"
+						Outputs="..\..\include\unicode\$(InputFileName)"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug|Win32"
+					>
+					<Tool
+						Name="VCCustomBuildTool"
+						CommandLine="copy &quot;$(InputPath)&quot; ..\..\include\unicode&#x0D;&#x0A;"
+						Outputs="..\..\include\unicode\$(InputFileName)"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release|x64"
+					>
+					<Tool
+						Name="VCCustomBuildTool"
+						CommandLine="copy &quot;$(InputPath)&quot; ..\..\include\unicode&#x0D;&#x0A;"
+						Outputs="..\..\include\unicode\$(InputFileName)"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug|x64"
+					>
+					<Tool
+						Name="VCCustomBuildTool"
+						CommandLine="copy &quot;$(InputPath)&quot; ..\..\include\unicode&#x0D;&#x0A;"
+						Outputs="..\..\include\unicode\$(InputFileName)"
+					/>
+				</FileConfiguration>
+			</File>
 			<File
 				RelativePath=".\unorm_it.c"
 				>
@ -3470,7 +3570,7 @@
 				>
 			</File>
 			<File
-				RelativePath=".\uprops.c"
+				RelativePath=".\uprops.cpp"
 				>
 			</File>
 			<File
--- a/icu4c/source/common/cpputils.h
+++ b/icu4c/source/common/cpputils.h
@ -1,7 +1,7 @@
 /*
 ******************************************************************************
 *
-*   Copyright (C) 1997-2006, International Business Machines
+*   Copyright (C) 1997-2010, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 *
 ******************************************************************************
@ -81,4 +81,15 @@ uprv_arrayCopy(const U_NAMESPACE_QUALIFIER UnicodeString *src, int32_t srcStart,
        U_NAMESPACE_QUALIFIER UnicodeString *dst, int32_t dstStart, int32_t count)
 { uprv_arrayCopy(src+srcStart, dst+dstStart, count); }

+/**
+ * Checks that the string is readable and writable.
+ * Sets U_ILLEGAL_ARGUMENT_ERROR if the string isBogus() or has an open getBuffer().
+ */
+inline void
+uprv_checkCanGetBuffer(const UnicodeString &s, UErrorCode &errorCode) {
+    if(U_SUCCESS(errorCode) && s.isBogus()) {
+        errorCode=U_ILLEGAL_ARGUMENT_ERROR;
+    }
+}
+
 #endif /* _CPPUTILS */
--- a/icu4c/source/common/filterednormalizer2.cpp
+++ b/icu4c/source/common/filterednormalizer2.cpp
@ -0,0 +1,261 @@
+/*
+*******************************************************************************
+*
+*   Copyright (C) 2009-2010, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+*******************************************************************************
+*   file name:  filterednormalizer2.cpp
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2009dec10
+*   created by: Markus W. Scherer
+*/
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_NORMALIZATION
+
+#include "unicode/normalizer2.h"
+#include "unicode/uniset.h"
+#include "unicode/unistr.h"
+#include "unicode/unorm.h"
+#include "cpputils.h"
+
+U_NAMESPACE_BEGIN
+
+UnicodeString &
+FilteredNormalizer2::normalize(const UnicodeString &src,
+                               UnicodeString &dest,
+                               UErrorCode &errorCode) const {
+    uprv_checkCanGetBuffer(src, errorCode);
+    if(U_FAILURE(errorCode)) {
+        dest.setToBogus();
+        return dest;
+    }
+    if(&dest==&src) {
+        errorCode=U_ILLEGAL_ARGUMENT_ERROR;
+        return dest;
+    }
+    dest.remove();
+    return normalize(src, dest, USET_SPAN_SIMPLE, errorCode);
+}
+
+// Internal: No argument checking, and appends to dest.
+// Pass as input spanCondition the one that is likely to yield a non-zero
+// span length at the start of src.
+// For set=[:age=3.2:], since almost all common characters were in Unicode 3.2,
+// USET_SPAN_SIMPLE should be passed in for the start of src
+// and USET_SPAN_NOT_CONTAINED should be passed in if we continue after
+// an in-filter prefix.
+UnicodeString &
+FilteredNormalizer2::normalize(const UnicodeString &src,
+                               UnicodeString &dest,
+                               USetSpanCondition spanCondition,
+                               UErrorCode &errorCode) const {
+    UnicodeString tempDest;  // Don't throw away destination buffer between iterations.
+    for(int32_t prevSpanLimit=0; prevSpanLimit<src.length();) {
+        int32_t spanLimit=set.span(src, prevSpanLimit, spanCondition);
+        int32_t spanLength=spanLimit-prevSpanLimit;
+        if(spanCondition==USET_SPAN_NOT_CONTAINED) {
+            if(spanLength!=0) {
+                dest.append(src, prevSpanLimit, spanLength);
+            }
+            spanCondition=USET_SPAN_SIMPLE;
+        } else {
+            if(spanLength!=0) {
+                // Not norm2.normalizeSecondAndAppend() because we do not want
+                // to modify the non-filter part of dest.
+                dest.append(norm2.normalize(src.tempSubStringBetween(prevSpanLimit, spanLimit),
+                                            tempDest, errorCode));
+                if(U_FAILURE(errorCode)) {
+                    break;
+                }
+            }
+            spanCondition=USET_SPAN_NOT_CONTAINED;
+        }
+        prevSpanLimit=spanLimit;
+    }
+    return dest;
+}
+
+UnicodeString &
+FilteredNormalizer2::normalizeSecondAndAppend(UnicodeString &first,
+                                              const UnicodeString &second,
+                                              UErrorCode &errorCode) const {
+    return normalizeSecondAndAppend(first, second, TRUE, errorCode);
+}
+
+UnicodeString &
+FilteredNormalizer2::append(UnicodeString &first,
+                            const UnicodeString &second,
+                            UErrorCode &errorCode) const {
+    return normalizeSecondAndAppend(first, second, FALSE, errorCode);
+}
+
+UnicodeString &
+FilteredNormalizer2::normalizeSecondAndAppend(UnicodeString &first,
+                                              const UnicodeString &second,
+                                              UBool doNormalize,
+                                              UErrorCode &errorCode) const {
+    uprv_checkCanGetBuffer(first, errorCode);
+    uprv_checkCanGetBuffer(second, errorCode);
+    if(U_FAILURE(errorCode)) {
+        return first;
+    }
+    if(&first==&second) {
+        errorCode=U_ILLEGAL_ARGUMENT_ERROR;
+        return first;
+    }
+    if(first.isEmpty()) {
+        if(doNormalize) {
+            return normalize(second, first, errorCode);
+        } else {
+            return first=second;
+        }
+    }
+    // merge the in-filter suffix of the first string with the in-filter prefix of the second
+    int32_t prefixLimit=set.span(second, 0, USET_SPAN_SIMPLE);
+    if(prefixLimit!=0) {
+        UnicodeString prefix(second.tempSubString(0, prefixLimit));
+        int32_t suffixStart=set.spanBack(first, INT32_MAX, USET_SPAN_SIMPLE);
+        if(suffixStart==0) {
+            if(doNormalize) {
+                norm2.normalizeSecondAndAppend(first, prefix, errorCode);
+            } else {
+                norm2.append(first, prefix, errorCode);
+            }
+        } else {
+            UnicodeString middle(first, suffixStart, INT32_MAX);
+            if(doNormalize) {
+                norm2.normalizeSecondAndAppend(middle, prefix, errorCode);
+            } else {
+                norm2.append(middle, prefix, errorCode);
+            }
+            first.replace(suffixStart, INT32_MAX, middle);
+        }
+    }
+    if(prefixLimit<second.length()) {
+        UnicodeString rest(second.tempSubString(prefixLimit, INT32_MAX));
+        if(doNormalize) {
+            normalize(rest, first, USET_SPAN_NOT_CONTAINED, errorCode);
+        } else {
+            first.append(rest);
+        }
+    }
+    return first;
+}
+
+UBool
+FilteredNormalizer2::isNormalized(const UnicodeString &s, UErrorCode &errorCode) const {
+    uprv_checkCanGetBuffer(s, errorCode);
+    if(U_FAILURE(errorCode)) {
+        return FALSE;
+    }
+    USetSpanCondition spanCondition=USET_SPAN_SIMPLE;
+    for(int32_t prevSpanLimit=0; prevSpanLimit<s.length();) {
+        int32_t spanLimit=set.span(s, prevSpanLimit, spanCondition);
+        if(spanCondition==USET_SPAN_NOT_CONTAINED) {
+            spanCondition=USET_SPAN_SIMPLE;
+        } else {
+            if( !norm2.isNormalized(s.tempSubStringBetween(prevSpanLimit, spanLimit), errorCode) ||
+                U_FAILURE(errorCode)
+            ) {
+                return FALSE;
+            }
+            spanCondition=USET_SPAN_NOT_CONTAINED;
+        }
+        prevSpanLimit=spanLimit;
+    }
+    return TRUE;
+}
+
+UNormalizationCheckResult
+FilteredNormalizer2::quickCheck(const UnicodeString &s, UErrorCode &errorCode) const {
+    uprv_checkCanGetBuffer(s, errorCode);
+    if(U_FAILURE(errorCode)) {
+        return UNORM_MAYBE;
+    }
+    USetSpanCondition spanCondition=USET_SPAN_SIMPLE;
+    for(int32_t prevSpanLimit=0; prevSpanLimit<s.length();) {
+        int32_t spanLimit=set.span(s, prevSpanLimit, spanCondition);
+        if(spanCondition==USET_SPAN_NOT_CONTAINED) {
+            spanCondition=USET_SPAN_SIMPLE;
+        } else {
+            UNormalizationCheckResult qcResult=
+                norm2.quickCheck(s.tempSubStringBetween(prevSpanLimit, spanLimit), errorCode);
+            if(U_FAILURE(errorCode) || qcResult!=UNORM_YES) {
+                return qcResult;
+            }
+            spanCondition=USET_SPAN_NOT_CONTAINED;
+        }
+        prevSpanLimit=spanLimit;
+    }
+    return UNORM_YES;
+}
+
+int32_t
+FilteredNormalizer2::spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const {
+    uprv_checkCanGetBuffer(s, errorCode);
+    if(U_FAILURE(errorCode)) {
+        return 0;
+    }
+    USetSpanCondition spanCondition=USET_SPAN_SIMPLE;
+    for(int32_t prevSpanLimit=0; prevSpanLimit<s.length();) {
+        int32_t spanLimit=set.span(s, prevSpanLimit, spanCondition);
+        if(spanCondition==USET_SPAN_NOT_CONTAINED) {
+            spanCondition=USET_SPAN_SIMPLE;
+        } else {
+            int32_t yesLimit=
+                prevSpanLimit+
+                norm2.spanQuickCheckYes(
+                    s.tempSubStringBetween(prevSpanLimit, spanLimit), errorCode);
+            if(U_FAILURE(errorCode) || yesLimit<spanLimit) {
+                return yesLimit;
+            }
+            spanCondition=USET_SPAN_NOT_CONTAINED;
+        }
+        prevSpanLimit=spanLimit;
+    }
+    return s.length();
+}
+
+UBool
+FilteredNormalizer2::hasBoundaryBefore(UChar32 c) const {
+    return !set.contains(c) || norm2.hasBoundaryBefore(c);
+}
+
+UBool
+FilteredNormalizer2::hasBoundaryAfter(UChar32 c) const {
+    return !set.contains(c) || norm2.hasBoundaryAfter(c);
+}
+
+UBool
+FilteredNormalizer2::isInert(UChar32 c) const {
+    return !set.contains(c) || norm2.isInert(c);
+}
+
+UOBJECT_DEFINE_RTTI_IMPLEMENTATION(FilteredNormalizer2)
+
+U_DRAFT UNormalizer2 * U_EXPORT2
+unorm2_openFiltered(const UNormalizer2 *norm2, const USet *filterSet, UErrorCode *pErrorCode) {
+    if(U_FAILURE(*pErrorCode)) {
+        return NULL;
+    }
+    if(filterSet==NULL) {
+        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+        return NULL;
+    }
+    Normalizer2 *fn2=new FilteredNormalizer2(*(Normalizer2 *)norm2,
+                                             *UnicodeSet::fromUSet(filterSet));
+    if(fn2==NULL) {
+        *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
+    }
+    return (UNormalizer2 *)fn2;
+}
+
+U_NAMESPACE_END
+
+#endif  // !UCONFIG_NO_NORMALIZATION
--- a/icu4c/source/common/mutex.cpp
+++ b/icu4c/source/common/mutex.cpp
@ -1,18 +1,91 @@
-/**
+/*
 *******************************************************************************
-* Copyright (C) 2008, International Business Machines Corporation.       *
-* All Rights Reserved.                                                        *
+*
+*   Copyright (C) 2008-2010, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
 *******************************************************************************
+*   file name:  mutex.cpp
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:4
 */

 #include "unicode/utypes.h"
+#include "mutex.h"
+
+U_NAMESPACE_BEGIN
+
+void *SimpleSingleton::getInstance(InstantiatorFn *instantiator, const void *context,
+                                   void *&duplicate,
+                                   UErrorCode &errorCode) {
+    duplicate=NULL;
+    if(U_FAILURE(errorCode)) {
+        return NULL;
+    }
+    void *instance;
+    UMTX_CHECK(NULL, fInstance, instance);
+    if(instance!=NULL) {
+        return instance;
+    } else {
+        instance=instantiator(context, errorCode);
+        Mutex mutex;
+        if(fInstance==NULL && U_SUCCESS(errorCode)) {
+            fInstance=instance;
+        } else {
+            duplicate=instance;
+        }
+        return fInstance;
+    }
+}
+
+void *TriStateSingleton::getInstance(InstantiatorFn *instantiator, const void *context,
+                                     void *&duplicate,
+                                     UErrorCode &errorCode) {
+    duplicate=NULL;
+    if(U_FAILURE(errorCode)) {
+        return NULL;
+    }
+    int8_t haveInstance;
+    UMTX_CHECK(NULL, fHaveInstance, haveInstance);
+    if(haveInstance>0) {
+        return fInstance;  // instance was created
+    } else if(haveInstance<0) {
+        errorCode=fErrorCode;  // instance creation failed
+        return NULL;
+    } else /* haveInstance==0 */ {
+        void *instance=instantiator(context, errorCode);
+        Mutex mutex;
+        if(fHaveInstance==0) {
+            if(U_SUCCESS(errorCode)) {
+                fInstance=instance;
+                instance=NULL;
+                fHaveInstance=1;
+            } else {
+                fErrorCode=errorCode;
+                fHaveInstance=-1;
+            }
+        } else {
+            errorCode=fErrorCode;
+        }
+        duplicate=instance;
+        return fInstance;
+    }
+}
+
+void TriStateSingleton::reset() {
+    fInstance=NULL;
+    fErrorCode=U_ZERO_ERROR;
+    fHaveInstance=0;
+}

 #if UCONFIG_NO_SERVICE

 /* If UCONFIG_NO_SERVICE, then there is no invocation of Mutex elsewhere in
   common, so add one here to force an export */
-#include "mutex.h"
 static Mutex *aMutex = 0;

 /* UCONFIG_NO_SERVICE */
 #endif
+
+U_NAMESPACE_END
--- a/icu4c/source/common/mutex.h
+++ b/icu4c/source/common/mutex.h
@ -1,7 +1,7 @@
 /*
 ******************************************************************************
 *
-*   Copyright (C) 1997-2009, International Business Machines
+*   Copyright (C) 1997-2010, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 *
 ******************************************************************************
@ -71,6 +71,128 @@ inline Mutex::~Mutex()
  umtx_unlock(fMutex);
 }

+// common code for singletons ---------------------------------------------- ***
+
+/**
+ * Function pointer for the instantiator parameter of
+ * SimpleSingleton::getInstance() and TriStateSingleton::getInstance().
+ * The function creates some object, optionally using the context parameter.
+ * The function need not check for U_FAILURE(errorCode).
+ */
+typedef void *InstantiatorFn(const void *context, UErrorCode &errorCode);
+
+/**
+ * Singleton struct with shared instantiation/mutexing code.
+ * Simple: Does not remember if a previous instantiation failed.
+ * Best used if the instantiation can really only fail with an out-of-memory error,
+ * otherwise use a TriStateSingleton.
+ * Best used via SimpleSingletonWrapper or similar.
+ * Define a static SimpleSingleton instance via the STATIC_SIMPLE_SINGLETON macro.
+ */
+struct SimpleSingleton {
+    void *fInstance;
+
+    /**
+     * Returns the singleton instance, or NULL if it could not be created.
+     * Calls the instantiator with the context if the instance has not been
+     * created yet. In a race condition, the duplicate may not be NULL.
+     * The caller must delete the duplicate.
+     * The caller need not initialize the duplicate before the call.
+     */
+    void *getInstance(InstantiatorFn *instantiator, const void *context,
+                      void *&duplicate,
+                      UErrorCode &errorCode);
+    /**
+     * Resets the fields. The caller must have deleted the singleton instance.
+     * Not mutexed.
+     * Call this from a cleanup function.
+     */
+    void reset() { fInstance=NULL; }
+};
+
+#define STATIC_SIMPLE_SINGLETON(name) static SimpleSingleton name={ NULL }
+
+/**
+ * Handy wrapper for an SimpleSingleton.
+ * Intended for temporary use on the stack, to make the SimpleSingleton easier to deal with.
+ * Takes care of the duplicate deletion and type casting.
+ */
+template<typename T>
+class SimpleSingletonWrapper {
+public:
+    SimpleSingletonWrapper(SimpleSingleton &s) : singleton(s) {}
+    void deleteInstance() {
+        delete (T *)singleton.fInstance;
+        singleton.reset();
+    }
+    T *getInstance(InstantiatorFn *instantiator, const void *context,
+                   UErrorCode &errorCode) {
+        void *duplicate;
+        T *instance=(T *)singleton.getInstance(instantiator, context, duplicate, errorCode);
+        delete (T *)duplicate;
+        return instance;
+    }
+private:
+    SimpleSingleton &singleton;
+};
+
+/**
+ * Singleton struct with shared instantiation/mutexing code.
+ * Tri-state: Instantiation succeeded/failed/not attempted yet.
+ * Best used via TriStateSingletonWrapper or similar.
+ * Define a static TriStateSingleton instance via the STATIC_TRI_STATE_SINGLETON macro.
+ */
+struct TriStateSingleton {
+    void *fInstance;
+    UErrorCode fErrorCode;
+    int8_t fHaveInstance;
+
+    /**
+     * Returns the singleton instance, or NULL if it could not be created.
+     * Calls the instantiator with the context if the instance has not been
+     * created yet. In a race condition, the duplicate may not be NULL.
+     * The caller must delete the duplicate.
+     * The caller need not initialize the duplicate before the call.
+     * The singleton creation is only attempted once. If it fails,
+     * the singleton will then always return NULL.
+     */
+    void *getInstance(InstantiatorFn *instantiator, const void *context,
+                      void *&duplicate,
+                      UErrorCode &errorCode);
+    /**
+     * Resets the fields. The caller must have deleted the singleton instance.
+     * Not mutexed.
+     * Call this from a cleanup function.
+     */
+    void reset();
+};
+
+#define STATIC_TRI_STATE_SINGLETON(name) static TriStateSingleton name={ NULL, U_ZERO_ERROR, 0 }
+
+/**
+ * Handy wrapper for an TriStateSingleton.
+ * Intended for temporary use on the stack, to make the TriStateSingleton easier to deal with.
+ * Takes care of the duplicate deletion and type casting.
+ */
+template<typename T>
+class TriStateSingletonWrapper {
+public:
+    TriStateSingletonWrapper(TriStateSingleton &s) : singleton(s) {}
+    void deleteInstance() {
+        delete (T *)singleton.fInstance;
+        singleton.reset();
+    }
+    T *getInstance(InstantiatorFn *instantiator, const void *context,
+                   UErrorCode &errorCode) {
+        void *duplicate;
+        T *instance=(T *)singleton.getInstance(instantiator, context, duplicate, errorCode);
+        delete (T *)duplicate;
+        return instance;
+    }
+private:
+    TriStateSingleton &singleton;
+};
+
 U_NAMESPACE_END

 #endif //_MUTEX_
--- a/icu4c/source/common/normalizer2.cpp
+++ b/icu4c/source/common/normalizer2.cpp
@ -0,0 +1,744 @@
+/*
+*******************************************************************************
+*
+*   Copyright (C) 2009-2010, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+*******************************************************************************
+*   file name:  normalizer2.cpp
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2009nov22
+*   created by: Markus W. Scherer
+*/
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_NORMALIZATION
+
+#include "unicode/localpointer.h"
+#include "unicode/normalizer2.h"
+#include "unicode/unistr.h"
+#include "unicode/unorm.h"
+#include "cpputils.h"
+#include "cstring.h"
+#include "mutex.h"
+#include "normalizer2impl.h"
+#include "ucln_cmn.h"
+
+U_NAMESPACE_BEGIN
+
+// Public API dispatch via Normalizer2 subclasses -------------------------- ***
+
+// Normalizer2 implementation for the old UNORM_NONE.
+class NoopNormalizer2 : public Normalizer2 {
+    virtual UnicodeString &
+    normalize(const UnicodeString &src,
+              UnicodeString &dest,
+              UErrorCode &errorCode) const {
+        if(U_SUCCESS(errorCode)) {
+            if(&dest!=&src) {
+                dest=src;
+            } else {
+                errorCode=U_ILLEGAL_ARGUMENT_ERROR;
+            }
+        }
+        return dest;
+    }
+    virtual UnicodeString &
+    normalizeSecondAndAppend(UnicodeString &first,
+                             const UnicodeString &second,
+                             UErrorCode &errorCode) const {
+        if(U_SUCCESS(errorCode)) {
+            first.append(second);
+        }
+        return first;
+    }
+    virtual UnicodeString &
+    append(UnicodeString &first,
+           const UnicodeString &second,
+           UErrorCode &errorCode) const {
+        if(U_SUCCESS(errorCode)) {
+            if(&first!=&second) {
+                first.append(second);
+            } else {
+                errorCode=U_ILLEGAL_ARGUMENT_ERROR;
+            }
+        }
+        return first;
+    }
+    virtual UBool
+    isNormalized(const UnicodeString &s, UErrorCode &errorCode) const {
+        return TRUE;
+    }
+    virtual UNormalizationCheckResult
+    quickCheck(const UnicodeString &s, UErrorCode &errorCode) const {
+        return UNORM_YES;
+    }
+    virtual int32_t
+    spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const {
+        return s.length();
+    }
+    virtual UBool hasBoundaryBefore(UChar32 c) const { return TRUE; }
+    virtual UBool hasBoundaryAfter(UChar32 c) const { return TRUE; }
+    virtual UBool isInert(UChar32 c) const { return TRUE; }
+
+    static UClassID U_EXPORT2 getStaticClassID();
+    virtual UClassID getDynamicClassID() const;
+};
+
+UOBJECT_DEFINE_RTTI_IMPLEMENTATION(NoopNormalizer2)
+
+// Intermediate class:
+// Has Normalizer2Impl and does boilerplate argument checking and setup.
+class Normalizer2WithImpl : public Normalizer2 {
+public:
+    Normalizer2WithImpl(const Normalizer2Impl &ni) : impl(ni) {}
+
+    // normalize
+    virtual UnicodeString &
+    normalize(const UnicodeString &src,
+              UnicodeString &dest,
+              UErrorCode &errorCode) const {
+        if(U_FAILURE(errorCode)) {
+            dest.setToBogus();
+            return dest;
+        }
+        const UChar *sArray=src.getBuffer();
+        if(&dest==&src || sArray==NULL) {
+            errorCode=U_ILLEGAL_ARGUMENT_ERROR;
+            dest.setToBogus();
+            return dest;
+        }
+        dest.remove();
+        ReorderingBuffer buffer(impl, dest);
+        if(buffer.init(src.length(), errorCode)) {
+            normalize(sArray, sArray+src.length(), buffer, errorCode);
+        }
+        return dest;
+    }
+    virtual void
+    normalize(const UChar *src, const UChar *limit,
+              ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0;
+
+    // normalize and append
+    virtual UnicodeString &
+    normalizeSecondAndAppend(UnicodeString &first,
+                             const UnicodeString &second,
+                             UErrorCode &errorCode) const {
+        return normalizeSecondAndAppend(first, second, TRUE, errorCode);
+    }
+    virtual UnicodeString &
+    append(UnicodeString &first,
+           const UnicodeString &second,
+           UErrorCode &errorCode) const {
+        return normalizeSecondAndAppend(first, second, FALSE, errorCode);
+    }
+    UnicodeString &
+    normalizeSecondAndAppend(UnicodeString &first,
+                             const UnicodeString &second,
+                             UBool doNormalize,
+                             UErrorCode &errorCode) const {
+        uprv_checkCanGetBuffer(first, errorCode);
+        if(U_FAILURE(errorCode)) {
+            return first;
+        }
+        const UChar *secondArray=second.getBuffer();
+        if(&first==&second || secondArray==NULL) {
+            errorCode=U_ILLEGAL_ARGUMENT_ERROR;
+            return first;
+        }
+        ReorderingBuffer buffer(impl, first);
+        if(buffer.init(first.length()+second.length(), errorCode)) {
+            normalizeAndAppend(secondArray, secondArray+second.length(), doNormalize,
+                               buffer, errorCode);
+        }
+        return first;
+    }
+    virtual void
+    normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
+                       ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0;
+
+    // quick checks
+    virtual UBool
+    isNormalized(const UnicodeString &s, UErrorCode &errorCode) const {
+        if(U_FAILURE(errorCode)) {
+            return FALSE;
+        }
+        const UChar *sArray=s.getBuffer();
+        if(sArray==NULL) {
+            errorCode=U_ILLEGAL_ARGUMENT_ERROR;
+            return FALSE;
+        }
+        const UChar *sLimit=sArray+s.length();
+        return sLimit==spanQuickCheckYes(sArray, sLimit, errorCode);
+    }
+    virtual UNormalizationCheckResult
+    quickCheck(const UnicodeString &s, UErrorCode &errorCode) const {
+        return Normalizer2WithImpl::isNormalized(s, errorCode) ? UNORM_YES : UNORM_NO;
+    }
+    virtual int32_t
+    spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const {
+        if(U_FAILURE(errorCode)) {
+            return 0;
+        }
+        const UChar *sArray=s.getBuffer();
+        if(sArray==NULL) {
+            errorCode=U_ILLEGAL_ARGUMENT_ERROR;
+            return 0;
+        }
+        return (int32_t)(spanQuickCheckYes(sArray, sArray+s.length(), errorCode)-sArray);
+    }
+    virtual const UChar *
+    spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const = 0;
+
+    virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const {
+        return UNORM_YES;
+    }
+
+    static UClassID U_EXPORT2 getStaticClassID();
+    virtual UClassID getDynamicClassID() const;
+
+    const Normalizer2Impl &impl;
+};
+
+UOBJECT_DEFINE_RTTI_IMPLEMENTATION(Normalizer2WithImpl)
+
+class DecomposeNormalizer2 : public Normalizer2WithImpl {
+public:
+    DecomposeNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {}
+
+    virtual void
+    normalize(const UChar *src, const UChar *limit,
+              ReorderingBuffer &buffer, UErrorCode &errorCode) const {
+        impl.decompose(src, limit, &buffer, errorCode);
+    }
+    virtual void
+    normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
+                       ReorderingBuffer &buffer, UErrorCode &errorCode) const {
+        impl.decomposeAndAppend(src, limit, doNormalize, buffer, errorCode);
+    }
+    virtual const UChar *
+    spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const {
+        return impl.decompose(src, limit, NULL, errorCode);
+    }
+    virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const {
+        return impl.isDecompYes(impl.getNorm16(c)) ? UNORM_YES : UNORM_NO;
+    }
+    virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasDecompBoundary(c, TRUE); }
+    virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasDecompBoundary(c, FALSE); }
+    virtual UBool isInert(UChar32 c) const { return impl.isDecompInert(c); }
+};
+
+class ComposeNormalizer2 : public Normalizer2WithImpl {
+public:
+    ComposeNormalizer2(const Normalizer2Impl &ni, UBool fcc) :
+        Normalizer2WithImpl(ni), onlyContiguous(fcc) {}
+
+    virtual void
+    normalize(const UChar *src, const UChar *limit,
+              ReorderingBuffer &buffer, UErrorCode &errorCode) const {
+        impl.compose(src, limit, onlyContiguous, TRUE, buffer, errorCode);
+    }
+    virtual void
+    normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
+                       ReorderingBuffer &buffer, UErrorCode &errorCode) const {
+        impl.composeAndAppend(src, limit, doNormalize, onlyContiguous, buffer, errorCode);
+    }
+
+    virtual UBool
+    isNormalized(const UnicodeString &s, UErrorCode &errorCode) const {
+        if(U_FAILURE(errorCode)) {
+            return FALSE;
+        }
+        const UChar *sArray=s.getBuffer();
+        if(sArray==NULL) {
+            errorCode=U_ILLEGAL_ARGUMENT_ERROR;
+            return FALSE;
+        }
+        UnicodeString temp;
+        ReorderingBuffer buffer(impl, temp);
+        if(!buffer.init(5, errorCode)) {  // small destCapacity for substring normalization
+            return FALSE;
+        }
+        return impl.compose(sArray, sArray+s.length(), onlyContiguous, FALSE, buffer, errorCode);
+    }
+    virtual UNormalizationCheckResult
+    quickCheck(const UnicodeString &s, UErrorCode &errorCode) const {
+        if(U_FAILURE(errorCode)) {
+            return UNORM_MAYBE;
+        }
+        const UChar *sArray=s.getBuffer();
+        if(sArray==NULL) {
+            errorCode=U_ILLEGAL_ARGUMENT_ERROR;
+            return UNORM_MAYBE;
+        }
+        UNormalizationCheckResult qcResult=UNORM_YES;
+        impl.composeQuickCheck(sArray, sArray+s.length(), onlyContiguous, &qcResult);
+        return qcResult;
+    }
+    virtual const UChar *
+    spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const {
+        return impl.composeQuickCheck(src, limit, onlyContiguous, NULL);
+    }
+    virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const {
+        return impl.getCompQuickCheck(impl.getNorm16(c));
+    }
+    virtual UBool hasBoundaryBefore(UChar32 c) const {
+        return impl.hasCompBoundaryBefore(c);
+    }
+    virtual UBool hasBoundaryAfter(UChar32 c) const {
+        return impl.hasCompBoundaryAfter(c, onlyContiguous, FALSE);
+    }
+    virtual UBool isInert(UChar32 c) const {
+        return impl.hasCompBoundaryAfter(c, onlyContiguous, TRUE);
+    }
+private:
+    UBool onlyContiguous;
+};
+
+class FCDNormalizer2 : public Normalizer2WithImpl {
+public:
+    FCDNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {}
+
+    virtual void
+    normalize(const UChar *src, const UChar *limit,
+              ReorderingBuffer &buffer, UErrorCode &errorCode) const {
+        impl.makeFCD(src, limit, &buffer, errorCode);
+    }
+    virtual void
+    normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
+                       ReorderingBuffer &buffer, UErrorCode &errorCode) const {
+        impl.makeFCDAndAppend(src, limit, doNormalize, buffer, errorCode);
+    }
+    virtual const UChar *
+    spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const {
+        return impl.makeFCD(src, limit, NULL, errorCode);
+    }
+    virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasFCDBoundaryBefore(c); }
+    virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasFCDBoundaryAfter(c); }
+    virtual UBool isInert(UChar32 c) const { return impl.isFCDInert(c); }
+};
+
+// instance cache ---------------------------------------------------------- ***
+
+struct Norm2AllModes : public UMemory {
+    static Norm2AllModes *createInstance(const char *packageName,
+                                         const char *name,
+                                         UErrorCode &errorCode);
+    Norm2AllModes() : comp(impl, FALSE), decomp(impl), fcd(impl), fcc(impl, TRUE) {}
+
+    Normalizer2Impl impl;
+    ComposeNormalizer2 comp;
+    DecomposeNormalizer2 decomp;
+    FCDNormalizer2 fcd;
+    ComposeNormalizer2 fcc;
+};
+
+Norm2AllModes *
+Norm2AllModes::createInstance(const char *packageName,
+                              const char *name,
+                              UErrorCode &errorCode) {
+    if(U_FAILURE(errorCode)) {
+        return NULL;
+    }
+    LocalPointer<Norm2AllModes> allModes(new Norm2AllModes);
+    if(allModes.isNull()) {
+        errorCode=U_MEMORY_ALLOCATION_ERROR;
+        return NULL;
+    }
+    allModes->impl.load(packageName, name, errorCode);
+    return U_SUCCESS(errorCode) ? allModes.orphan() : NULL;
+}
+
+U_CDECL_BEGIN
+static UBool U_CALLCONV uprv_normalizer2_cleanup();
+U_CDECL_END
+
+class Norm2AllModesSingleton : public TriStateSingletonWrapper<Norm2AllModes> {
+public:
+    Norm2AllModesSingleton(TriStateSingleton &s, const char *n) :
+        TriStateSingletonWrapper<Norm2AllModes>(s), name(n) {}
+    Norm2AllModes *getInstance(UErrorCode &errorCode) {
+        return TriStateSingletonWrapper<Norm2AllModes>::getInstance(createInstance, name, errorCode);
+    }
+private:
+    static void *createInstance(const void *context, UErrorCode &errorCode) {
+        ucln_common_registerCleanup(UCLN_COMMON_NORMALIZER2, uprv_normalizer2_cleanup);
+        return Norm2AllModes::createInstance(NULL, (const char *)context, errorCode);
+    }
+
+    const char *name;
+};
+
+STATIC_TRI_STATE_SINGLETON(nfcSingleton);
+STATIC_TRI_STATE_SINGLETON(nfkcSingleton);
+STATIC_TRI_STATE_SINGLETON(nfkc_cfSingleton);
+
+class Norm2Singleton : public SimpleSingletonWrapper<Normalizer2> {
+public:
+    Norm2Singleton(SimpleSingleton &s) : SimpleSingletonWrapper<Normalizer2>(s) {}
+    Normalizer2 *getInstance(UErrorCode &errorCode) {
+        return SimpleSingletonWrapper<Normalizer2>::getInstance(createInstance, NULL, errorCode);
+    }
+private:
+    static void *createInstance(const void *context, UErrorCode &errorCode) {
+        Normalizer2 *noop=new NoopNormalizer2;
+        if(noop==NULL) {
+            errorCode=U_MEMORY_ALLOCATION_ERROR;
+        }
+        ucln_common_registerCleanup(UCLN_COMMON_NORMALIZER2, uprv_normalizer2_cleanup);
+        return noop;
+    }
+};
+
+STATIC_SIMPLE_SINGLETON(noopSingleton);
+
+U_CDECL_BEGIN
+
+static UBool U_CALLCONV uprv_normalizer2_cleanup() {
+    Norm2AllModesSingleton(nfcSingleton, NULL).deleteInstance();
+    Norm2AllModesSingleton(nfkcSingleton, NULL).deleteInstance();
+    Norm2AllModesSingleton(nfkc_cfSingleton, NULL).deleteInstance();
+    Norm2Singleton(noopSingleton).deleteInstance();
+    return TRUE;
+}
+
+U_CDECL_END
+
+const Normalizer2 *Normalizer2Factory::getNFCInstance(UErrorCode &errorCode) {
+    Norm2AllModes *allModes=Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode);
+    return allModes!=NULL ? &allModes->comp : NULL;
+}
+
+const Normalizer2 *Normalizer2Factory::getNFDInstance(UErrorCode &errorCode) {
+    Norm2AllModes *allModes=Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode);
+    return allModes!=NULL ? &allModes->decomp : NULL;
+}
+
+const Normalizer2 *Normalizer2Factory::getFCDInstance(UErrorCode &errorCode) {
+    Norm2AllModes *allModes=Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode);
+    if(allModes!=NULL) {
+        allModes->impl.getFCDTrie(errorCode);
+        return &allModes->fcd;
+    } else {
+        return NULL;
+    }
+}
+
+const Normalizer2 *Normalizer2Factory::getFCCInstance(UErrorCode &errorCode) {
+    Norm2AllModes *allModes=Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode);
+    return allModes!=NULL ? &allModes->fcc : NULL;
+}
+
+const Normalizer2 *Normalizer2Factory::getNFKCInstance(UErrorCode &errorCode) {
+    Norm2AllModes *allModes=
+        Norm2AllModesSingleton(nfkcSingleton, "nfkc").getInstance(errorCode);
+    return allModes!=NULL ? &allModes->comp : NULL;
+}
+
+const Normalizer2 *Normalizer2Factory::getNFKDInstance(UErrorCode &errorCode) {
+    Norm2AllModes *allModes=
+        Norm2AllModesSingleton(nfkcSingleton, "nfkc").getInstance(errorCode);
+    return allModes!=NULL ? &allModes->decomp : NULL;
+}
+
+const Normalizer2 *Normalizer2Factory::getNFKC_CFInstance(UErrorCode &errorCode) {
+    Norm2AllModes *allModes=
+        Norm2AllModesSingleton(nfkc_cfSingleton, "nfkc_cf").getInstance(errorCode);
+    return allModes!=NULL ? &allModes->comp : NULL;
+}
+
+const Normalizer2 *Normalizer2Factory::getNoopInstance(UErrorCode &errorCode) {
+    return Norm2Singleton(noopSingleton).getInstance(errorCode);
+}
+
+const Normalizer2 *
+Normalizer2Factory::getInstance(UNormalizationMode mode, UErrorCode &errorCode) {
+    if(U_FAILURE(errorCode)) {
+        return NULL;
+    }
+    switch(mode) {
+    case UNORM_NFD:
+        return getNFDInstance(errorCode);
+    case UNORM_NFKD:
+        return getNFKDInstance(errorCode);
+    case UNORM_NFC:
+        return getNFCInstance(errorCode);
+    case UNORM_NFKC:
+        return getNFKCInstance(errorCode);
+    case UNORM_FCD:
+        return getFCDInstance(errorCode);
+    default:  // UNORM_NONE
+        return getNoopInstance(errorCode);
+    }
+}
+
+const Normalizer2Impl *
+Normalizer2Factory::getNFCImpl(UErrorCode &errorCode) {
+    Norm2AllModes *allModes=
+        Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode);
+    return allModes!=NULL ? &allModes->impl : NULL;
+}
+
+const Normalizer2Impl *
+Normalizer2Factory::getNFKCImpl(UErrorCode &errorCode) {
+    Norm2AllModes *allModes=
+        Norm2AllModesSingleton(nfkcSingleton, "nfkc").getInstance(errorCode);
+    return allModes!=NULL ? &allModes->impl : NULL;
+}
+
+const Normalizer2Impl *
+Normalizer2Factory::getNFKC_CFImpl(UErrorCode &errorCode) {
+    Norm2AllModes *allModes=
+        Norm2AllModesSingleton(nfkc_cfSingleton, "nfkc_cf").getInstance(errorCode);
+    return allModes!=NULL ? &allModes->impl : NULL;
+}
+
+const UTrie2 *
+Normalizer2Factory::getFCDTrie(UErrorCode &errorCode) {
+    Norm2AllModes *allModes=
+        Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode);
+    if(allModes!=NULL) {
+        return allModes->impl.getFCDTrie(errorCode);
+    } else {
+        return NULL;
+    }
+}
+
+const Normalizer2 *
+Normalizer2::getInstance(const char *packageName,
+                         const char *name,
+                         UNormalization2Mode mode,
+                         UErrorCode &errorCode) {
+    if(U_FAILURE(errorCode)) {
+        return NULL;
+    }
+    if(packageName==NULL) {
+        Norm2AllModes *allModes=NULL;
+        if(0==uprv_strcmp(name, "nfc")) {
+            allModes=Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode);
+        } else if(0==uprv_strcmp(name, "nfkc")) {
+            allModes=Norm2AllModesSingleton(nfkcSingleton, "nfkc").getInstance(errorCode);
+        } else if(0==uprv_strcmp(name, "nfkc_cf")) {
+            allModes=Norm2AllModesSingleton(nfkc_cfSingleton, "nfkc_cf").getInstance(errorCode);
+        }
+        if(allModes!=NULL) {
+            switch(mode) {
+            case UNORM2_COMPOSE:
+                return &allModes->comp;
+            case UNORM2_DECOMPOSE:
+                return &allModes->decomp;
+            case UNORM2_FCD:
+                allModes->impl.getFCDTrie(errorCode);
+                return &allModes->fcd;
+            case UNORM2_COMPOSE_CONTIGUOUS:
+                return &allModes->fcc;
+            default:
+                break;  // do nothing
+            }
+        }
+    }
+    if(U_SUCCESS(errorCode)) {
+        // TODO: Real loading and caching...
+        errorCode=U_UNSUPPORTED_ERROR;
+    }
+    return NULL;
+}
+
+UOBJECT_DEFINE_ABSTRACT_RTTI_IMPLEMENTATION(Normalizer2)
+
+// C API ------------------------------------------------------------------- ***
+
+U_DRAFT const UNormalizer2 * U_EXPORT2
+unorm2_getInstance(const char *packageName,
+                   const char *name,
+                   UNormalization2Mode mode,
+                   UErrorCode *pErrorCode) {
+    return (const UNormalizer2 *)Normalizer2::getInstance(packageName, name, mode, *pErrorCode);
+}
+
+U_DRAFT void U_EXPORT2
+unorm2_close(UNormalizer2 *norm2) {
+    delete (Normalizer2 *)norm2;
+}
+
+U_DRAFT int32_t U_EXPORT2
+unorm2_normalize(const UNormalizer2 *norm2,
+                 const UChar *src, int32_t length,
+                 UChar *dest, int32_t capacity,
+                 UErrorCode *pErrorCode) {
+    if(U_FAILURE(*pErrorCode)) {
+        return 0;
+    }
+    if(src==NULL || length<-1 || capacity<0 || (dest==NULL && capacity>0) || src==dest) {
+        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+        return 0;
+    }
+    UnicodeString destString(dest, 0, capacity);
+    const Normalizer2 *n2=(const Normalizer2 *)norm2;
+    if(n2->getDynamicClassID()==Normalizer2WithImpl::getStaticClassID()) {
+        // Avoid duplicate argument checking and support NUL-terminated src.
+        const Normalizer2WithImpl *n2wi=(const Normalizer2WithImpl *)n2;
+        ReorderingBuffer buffer(n2wi->impl, destString);
+        if(buffer.init(length, *pErrorCode)) {
+            n2wi->normalize(src, length>=0 ? src+length : NULL, buffer, *pErrorCode);
+        }
+    } else {
+        UnicodeString srcString(length<0, src, length);
+        n2->normalize(srcString, destString, *pErrorCode);
+    }
+    return destString.extract(dest, capacity, *pErrorCode);
+}
+
+static int32_t
+normalizeSecondAndAppend(const UNormalizer2 *norm2,
+                         UChar *first, int32_t firstLength, int32_t firstCapacity,
+                         const UChar *second, int32_t secondLength,
+                         UBool doNormalize,
+                         UErrorCode *pErrorCode) {
+    if(U_FAILURE(*pErrorCode)) {
+        return 0;
+    }
+    if( second==NULL || secondLength<-1 ||
+        firstCapacity<0 || (first==NULL && firstCapacity>0) || firstLength<-1 ||
+        first==second
+    ) {
+        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+        return 0;
+    }
+    UnicodeString firstString(first, firstLength, firstCapacity);
+    const Normalizer2 *n2=(const Normalizer2 *)norm2;
+    if(n2->getDynamicClassID()==Normalizer2WithImpl::getStaticClassID()) {
+        // Avoid duplicate argument checking and support NUL-terminated src.
+        const Normalizer2WithImpl *n2wi=(const Normalizer2WithImpl *)n2;
+        ReorderingBuffer buffer(n2wi->impl, firstString);
+        if(buffer.init(firstLength+secondLength+1, *pErrorCode)) {  // destCapacity>=-1
+            n2wi->normalizeAndAppend(second, secondLength>=0 ? second+secondLength : NULL,
+                                     doNormalize, buffer, *pErrorCode);
+        }
+    } else {
+        UnicodeString secondString(secondLength<0, second, secondLength);
+        if(doNormalize) {
+            n2->normalizeSecondAndAppend(firstString, secondString, *pErrorCode);
+        } else {
+            n2->append(firstString, secondString, *pErrorCode);
+        }
+    }
+    return firstString.extract(first, firstCapacity, *pErrorCode);
+}
+
+U_DRAFT int32_t U_EXPORT2
+unorm2_normalizeSecondAndAppend(const UNormalizer2 *norm2,
+                                UChar *first, int32_t firstLength, int32_t firstCapacity,
+                                const UChar *second, int32_t secondLength,
+                                UErrorCode *pErrorCode) {
+    return normalizeSecondAndAppend(norm2,
+                                    first, firstLength, firstCapacity,
+                                    second, secondLength,
+                                    TRUE, pErrorCode);
+}
+
+U_DRAFT int32_t U_EXPORT2
+unorm2_append(const UNormalizer2 *norm2,
+              UChar *first, int32_t firstLength, int32_t firstCapacity,
+              const UChar *second, int32_t secondLength,
+              UErrorCode *pErrorCode) {
+    return normalizeSecondAndAppend(norm2,
+                                    first, firstLength, firstCapacity,
+                                    second, secondLength,
+                                    FALSE, pErrorCode);
+}
+
+U_DRAFT UBool U_EXPORT2
+unorm2_isNormalized(const UNormalizer2 *norm2,
+                    const UChar *s, int32_t length,
+                    UErrorCode *pErrorCode) {
+    if(U_FAILURE(*pErrorCode)) {
+        return 0;
+    }
+    if(s==NULL || length<-1) {
+        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+        return 0;
+    }
+    UnicodeString sString(length<0, s, length);
+    return ((const Normalizer2 *)norm2)->isNormalized(sString, *pErrorCode);
+}
+
+U_DRAFT UNormalizationCheckResult U_EXPORT2
+unorm2_quickCheck(const UNormalizer2 *norm2,
+                  const UChar *s, int32_t length,
+                  UErrorCode *pErrorCode) {
+    if(U_FAILURE(*pErrorCode)) {
+        return UNORM_NO;
+    }
+    if(s==NULL || length<-1) {
+        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+        return UNORM_NO;
+    }
+    UnicodeString sString(length<0, s, length);
+    return ((const Normalizer2 *)norm2)->quickCheck(sString, *pErrorCode);
+}
+
+U_DRAFT int32_t U_EXPORT2
+unorm2_spanQuickCheckYes(const UNormalizer2 *norm2,
+                         const UChar *s, int32_t length,
+                         UErrorCode *pErrorCode) {
+    if(U_FAILURE(*pErrorCode)) {
+        return 0;
+    }
+    if(s==NULL || length<-1) {
+        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+        return 0;
+    }
+    UnicodeString sString(length<0, s, length);
+    return ((const Normalizer2 *)norm2)->spanQuickCheckYes(sString, *pErrorCode);
+}
+
+U_DRAFT UBool U_EXPORT2
+unorm2_hasBoundaryBefore(const UNormalizer2 *norm2, UChar32 c) {
+    return ((const Normalizer2 *)norm2)->hasBoundaryBefore(c);
+}
+
+U_DRAFT UBool U_EXPORT2
+unorm2_hasBoundaryAfter(const UNormalizer2 *norm2, UChar32 c) {
+    return ((const Normalizer2 *)norm2)->hasBoundaryAfter(c);
+}
+
+U_DRAFT UBool U_EXPORT2
+unorm2_isInert(const UNormalizer2 *norm2, UChar32 c) {
+    return ((const Normalizer2 *)norm2)->isInert(c);
+}
+
+// Some properties APIs ---------------------------------------------------- ***
+
+U_CFUNC UNormalizationCheckResult U_EXPORT2
+unorm_getQuickCheck(UChar32 c, UNormalizationMode mode) {
+    if(mode<=UNORM_NONE || UNORM_FCD<=mode) {
+        return UNORM_YES;
+    }
+    UErrorCode errorCode=U_ZERO_ERROR;
+    const Normalizer2 *norm2=Normalizer2Factory::getInstance(mode, errorCode);
+    if(U_SUCCESS(errorCode)) {
+        return ((const Normalizer2WithImpl *)norm2)->getQuickCheck(c);
+    } else {
+        return UNORM_MAYBE;
+    }
+}
+
+U_CAPI const uint16_t * U_EXPORT2
+unorm_getFCDTrieIndex(UChar32 &fcdHighStart, UErrorCode *pErrorCode) {
+    const UTrie2 *trie=Normalizer2Factory::getFCDTrie(*pErrorCode);
+    if(U_SUCCESS(*pErrorCode)) {
+        fcdHighStart=trie->highStart;
+        return trie->index;
+    } else {
+        return NULL;
+    }
+}
+
+U_NAMESPACE_END
+
+#endif  // !UCONFIG_NO_NORMALIZATION
--- a/icu4c/source/common/normalizer2impl.cpp
+++ b/icu4c/source/common/normalizer2impl.cpp
--- a/icu4c/source/common/normalizer2impl.h
+++ b/icu4c/source/common/normalizer2impl.h
@ -0,0 +1,603 @@
+/*
+*******************************************************************************
+*
+*   Copyright (C) 2009-2010, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+*******************************************************************************
+*   file name:  normalizer2impl.h
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2009nov22
+*   created by: Markus W. Scherer
+*/
+
+#ifndef __NORMALIZER2IMPL_H__
+#define __NORMALIZER2IMPL_H__
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_NORMALIZATION
+
+#include "unicode/normalizer2.h"
+#include "unicode/udata.h"
+#include "unicode/unistr.h"
+#include "unicode/unorm.h"
+#include "mutex.h"
+#include "uset_imp.h"
+#include "utrie2.h"
+
+U_NAMESPACE_BEGIN
+
+class Hangul {
+public:
+    /* Korean Hangul and Jamo constants */
+    enum {
+        JAMO_L_BASE=0x1100,     /* "lead" jamo */
+        JAMO_V_BASE=0x1161,     /* "vowel" jamo */
+        JAMO_T_BASE=0x11a7,     /* "trail" jamo */
+
+        HANGUL_BASE=0xac00,
+
+        JAMO_L_COUNT=19,
+        JAMO_V_COUNT=21,
+        JAMO_T_COUNT=28,
+
+        HANGUL_COUNT=JAMO_L_COUNT*JAMO_V_COUNT*JAMO_T_COUNT,
+        HANGUL_LIMIT=HANGUL_BASE+HANGUL_COUNT
+    };
+
+    static inline UBool isHangul(UChar32 c) {
+        return HANGUL_BASE<=c && c<HANGUL_LIMIT;
+    }
+    static inline UBool
+    isHangulWithoutJamoT(UChar c) {
+        c-=HANGUL_BASE;
+        return c<HANGUL_COUNT && c%JAMO_T_COUNT==0;
+    }
+    static inline UBool isJamoL(UChar32 c) {
+        return (uint32_t)(c-JAMO_L_BASE)<JAMO_L_COUNT;
+    }
+    static inline UBool isJamoV(UChar32 c) {
+        return (uint32_t)(c-JAMO_V_BASE)<JAMO_V_COUNT;
+    }
+
+    /**
+     * Decomposes c, which must be a Hangul syllable, into buffer
+     * and returns the length of the decomposition (2 or 3).
+     */
+    static inline int32_t decompose(UChar32 c, UChar buffer[3]) {
+        c-=HANGUL_BASE;
+        UChar32 c2=c%JAMO_T_COUNT;
+        c/=JAMO_T_COUNT;
+        buffer[0]=(UChar)(JAMO_L_BASE+c/JAMO_V_COUNT);
+        buffer[1]=(UChar)(JAMO_V_BASE+c%JAMO_V_COUNT);
+        if(c2==0) {
+            return 2;
+        } else {
+            buffer[2]=(UChar)(JAMO_T_BASE+c2);
+            return 3;
+        }
+    }
+private:
+    Hangul();  // no instantiation
+};
+
+class Normalizer2Impl;
+
+class ReorderingBuffer : public UMemory {
+public:
+    ReorderingBuffer(const Normalizer2Impl &ni, UnicodeString &dest) :
+        impl(ni), str(dest),
+        start(NULL), reorderStart(NULL), limit(NULL),
+        remainingCapacity(0), lastCC(0) {}
+    ~ReorderingBuffer() {
+        if(start!=NULL) {
+            str.releaseBuffer((int32_t)(limit-start));
+        }
+    }
+    UBool init(int32_t destCapacity, UErrorCode &errorCode);
+
+    UBool isEmpty() const { return start==limit; }
+    int32_t length() const { return (int32_t)(limit-start); }
+    UChar *getStart() { return start; }
+    UChar *getLimit() { return limit; }
+    uint8_t getLastCC() const { return lastCC; }
+
+    UBool append(UChar32 c, uint8_t cc, UErrorCode &errorCode) {
+        return (c<=0xffff) ?
+            appendBMP((UChar)c, cc, errorCode) :
+            appendSupplementary(c, cc, errorCode);
+    }
+    // s must be in NFD, otherwise change the implementation.
+    UBool append(const UChar *s, int32_t length,
+                 uint8_t leadCC, uint8_t trailCC,
+                 UErrorCode &errorCode);
+    UBool appendBMP(UChar c, uint8_t cc, UErrorCode &errorCode) {
+        if(remainingCapacity==0 && !resize(1, errorCode)) {
+            return FALSE;
+        }
+        if(lastCC<=cc || cc==0) {
+            *limit++=c;
+            lastCC=cc;
+            if(cc<=1) {
+                reorderStart=limit;
+            }
+        } else {
+            insert(c, cc);
+        }
+        --remainingCapacity;
+        return TRUE;
+    }
+    UBool appendZeroCC(UChar32 c, UErrorCode &errorCode);
+    UBool appendZeroCC(const UChar *s, const UChar *sLimit, UErrorCode &errorCode);
+    void removeZeroCCSuffix(int32_t length);
+    void setReorderingLimitAndLastCC(UChar *newLimit, uint8_t newLastCC) {
+        remainingCapacity+=(int32_t)(limit-newLimit);
+        reorderStart=limit=newLimit;
+        lastCC=newLastCC;
+    }
+private:
+    /*
+     * TODO: Revisit whether it makes sense to track reorderStart.
+     * It is set to after the last known character with cc<=1,
+     * which stops previousCC() before it reads that character and looks up its cc.
+     * previousCC() is normally only called from insert().
+     * In other words, reorderStart speeds up the insertion of a combining mark
+     * into a multi-combining mark sequence where it does not belong at the end.
+     * This might not be worth the trouble.
+     * On the other hand, it's not a huge amount of trouble.
+     *
+     * We probably need it for UNORM_SIMPLE_APPEND.
+     */
+
+    UBool appendSupplementary(UChar32 c, uint8_t cc, UErrorCode &errorCode);
+    void insert(UChar32 c, uint8_t cc);
+    static void writeCodePoint(UChar *p, UChar32 c) {
+        if(c<=0xffff) {
+            *p=(UChar)c;
+        } else {
+            p[0]=U16_LEAD(c);
+            p[1]=U16_TRAIL(c);
+        }
+    }
+    UBool resize(int32_t appendLength, UErrorCode &errorCode);
+
+    const Normalizer2Impl &impl;
+    UnicodeString &str;
+    UChar *start, *reorderStart, *limit;
+    int32_t remainingCapacity;
+    uint8_t lastCC;
+
+    // private backward iterator
+    void setIterator() { codePointStart=limit; }
+    void skipPrevious();  // Requires start<codePointStart.
+    uint8_t previousCC();  // Returns 0 if there is no previous character.
+
+    UChar *codePointStart, *codePointLimit;
+};
+
+class U_COMMON_API Normalizer2Impl : public UMemory {
+public:
+    Normalizer2Impl() : memory(NULL), normTrie(NULL) {
+        fcdTrieSingleton.fInstance=NULL;
+    }
+    ~Normalizer2Impl();
+
+    void load(const char *packageName, const char *name, UErrorCode &errorCode);
+
+    void addPropertyStarts(const USetAdder *sa, UErrorCode &errorCode) const;
+
+    // low-level properties ------------------------------------------------ ***
+
+    const UTrie2 *getNormTrie() const { return normTrie; }
+    const UTrie2 *getFCDTrie(UErrorCode &errorCode) const ;
+
+    uint16_t getNorm16(UChar32 c) const { return UTRIE2_GET16(normTrie, c); }
+    uint16_t getNorm16FromBMP(UChar c) const { return UTRIE2_GET16(normTrie, c); }
+    uint16_t getNorm16FromSingleLead(UChar c) const {
+        return UTRIE2_GET16_FROM_U16_SINGLE_LEAD(normTrie, c);
+    }
+    uint16_t getNorm16FromSupplementary(UChar32 c) const {
+        return UTRIE2_GET16_FROM_SUPP(normTrie, c);
+    }
+    uint16_t getNorm16FromSurrogatePair(UChar c, UChar c2) const {
+        return getNorm16FromSupplementary(U16_GET_SUPPLEMENTARY(c, c2));
+    }
+
+    UNormalizationCheckResult getCompQuickCheck(uint16_t norm16) const {
+        if(norm16<minNoNo || MIN_YES_YES_WITH_CC<=norm16) {
+            return UNORM_YES;
+        } else if(minMaybeYes<=norm16) {
+            return UNORM_MAYBE;
+        } else {
+            return UNORM_NO;
+        }
+    }
+    UBool isCompNo(uint16_t norm16) const { return minNoNo<=norm16 && norm16<minMaybeYes; }
+    UBool isDecompYes(uint16_t norm16) const { return norm16<minYesNo || minMaybeYes<=norm16; }
+
+    uint8_t getCC(uint16_t norm16) const {
+        if(norm16>=MIN_NORMAL_MAYBE_YES) {
+            return (uint8_t)norm16;
+        }
+        if(norm16<minNoNo || limitNoNo<=norm16) {
+            return 0;
+        }
+        return getCCFromNoNo(norm16);
+    }
+    static uint8_t getCCFromYesOrMaybe(uint16_t norm16) {
+        return norm16>=MIN_NORMAL_MAYBE_YES ? (uint8_t)norm16 : 0;
+    }
+
+    uint16_t getFCD16(UChar32 c) const { return UTRIE2_GET16(fcdTrie(), c); }
+    uint16_t getFCD16FromBMP(UChar c) const { return UTRIE2_GET16(fcdTrie(), c); }
+    uint16_t getFCD16FromSingleLead(UChar c) const {
+        return UTRIE2_GET16_FROM_U16_SINGLE_LEAD(fcdTrie(), c);
+    }
+    uint16_t getFCD16FromSupplementary(UChar32 c) const {
+        return UTRIE2_GET16_FROM_SUPP(fcdTrie(), c);
+    }
+    uint16_t getFCD16FromSurrogatePair(UChar c, UChar c2) const {
+        return getFCD16FromSupplementary(U16_GET_SUPPLEMENTARY(c, c2));
+    }
+
+    void setFCD16FromNorm16(UChar32 start, UChar32 end, uint16_t norm16,
+                            UTrie2 *newFCDTrie, UErrorCode &errorCode) const;
+
+    /**
+     * Get the decomposition for one code point.
+     * @param c code point
+     * @param buffer out-only buffer for algorithmic decompositions
+     * @param length out-only, takes the length of the decomposition, if any
+     * @return pointer to the decomposition, or NULL if none
+     */
+    const UChar *getDecomposition(UChar32 c, UChar buffer[4], int32_t &length) const;
+
+    enum {
+        MIN_CCC_LCCC_CP=0x300
+    };
+
+    enum {
+        MIN_YES_YES_WITH_CC=0xff01,
+        JAMO_VT=0xff00,
+        MIN_NORMAL_MAYBE_YES=0xfe00,
+        JAMO_L=1,
+        MAX_DELTA=0x40
+    };
+
+    enum {
+        // Byte offsets from the start of the data, after the generic header.
+        IX_NORM_TRIE_OFFSET,
+        IX_EXTRA_DATA_OFFSET,
+        IX_RESERVED2_OFFSET,
+        IX_RESERVED3_OFFSET,
+        IX_RESERVED4_OFFSET,
+        IX_RESERVED5_OFFSET,
+        IX_RESERVED6_OFFSET,
+        IX_TOTAL_SIZE,
+
+        // Code point thresholds for quick check codes.
+        IX_MIN_DECOMP_NO_CP,
+        IX_MIN_COMP_NO_MAYBE_CP,
+
+        // Norm16 value thresholds for quick check combinations and types of extra data.
+        IX_MIN_YES_NO,
+        IX_MIN_NO_NO,
+        IX_LIMIT_NO_NO,
+        IX_MIN_MAYBE_YES,
+
+        IX_RESERVED14,
+        IX_RESERVED15,
+        IX_COUNT
+    };
+
+    enum {
+        MAPPING_HAS_CCC_LCCC_WORD=0x80,
+        MAPPING_PLUS_COMPOSITION_LIST=0x40,
+        MAPPING_NO_COMP_BOUNDARY_AFTER=0x20,
+        MAPPING_LENGTH_MASK=0x1f
+    };
+
+    enum {
+        COMP_1_LAST_TUPLE=0x8000,
+        COMP_1_TRIPLE=1,
+        COMP_1_TRAIL_LIMIT=0x3400,
+        COMP_1_TRAIL_MASK=0x7ffe,
+        COMP_1_TRAIL_SHIFT=9,  // 10-1 for the "triple" bit
+        COMP_2_TRAIL_SHIFT=6,
+        COMP_2_TRAIL_MASK=0xffc0
+    };
+
+    // higher-level functionality ------------------------------------------ ***
+
+    const UChar *decompose(const UChar *src, const UChar *limit,
+                           ReorderingBuffer *buffer, UErrorCode &errorCode) const;
+    void decomposeAndAppend(const UChar *src, const UChar *limit,
+                            UBool doDecompose,
+                            ReorderingBuffer &buffer,
+                            UErrorCode &errorCode) const;
+    UBool compose(const UChar *src, const UChar *limit,
+                  UBool onlyContiguous,
+                  UBool doCompose,
+                  ReorderingBuffer &buffer,
+                  UErrorCode &errorCode) const;
+    const UChar *composeQuickCheck(const UChar *src, const UChar *limit,
+                                   UBool onlyContiguous,
+                                   UNormalizationCheckResult *pQCResult) const;
+    void composeAndAppend(const UChar *src, const UChar *limit,
+                          UBool doCompose,
+                          UBool onlyContiguous,
+                          ReorderingBuffer &buffer,
+                          UErrorCode &errorCode) const;
+    const UChar *makeFCD(const UChar *src, const UChar *limit,
+                         ReorderingBuffer *buffer, UErrorCode &errorCode) const;
+    void makeFCDAndAppend(const UChar *src, const UChar *limit,
+                          UBool doMakeFCD,
+                          ReorderingBuffer &buffer,
+                          UErrorCode &errorCode) const;
+
+    UBool hasDecompBoundary(UChar32 c, UBool before) const;
+    UBool isDecompInert(UChar32 c) const { return isDecompYesAndZeroCC(getNorm16(c)); }
+
+    UBool hasCompBoundaryBefore(UChar32 c) const {
+        return c<minCompNoMaybeCP || hasCompBoundaryBefore(c, getNorm16(c));
+    }
+    UBool hasCompBoundaryAfter(UChar32 c, UBool onlyContiguous, UBool testInert) const;
+
+    UBool hasFCDBoundaryBefore(UChar32 c) const { return c<MIN_CCC_LCCC_CP || getFCD16(c)<=0xff; }
+    UBool hasFCDBoundaryAfter(UChar32 c) const {
+        uint16_t fcd16=getFCD16(c);
+        return fcd16<=1 || (fcd16&0xff)==0;
+    }
+    UBool isFCDInert(UChar32 c) const { return getFCD16(c)<=1; }
+private:
+    static UBool U_CALLCONV
+    isAcceptable(void *context, const char *type, const char *name, const UDataInfo *pInfo);
+
+    UBool isMaybe(uint16_t norm16) const { return minMaybeYes<=norm16 && norm16<=JAMO_VT; }
+    UBool isMaybeOrNonZeroCC(uint16_t norm16) const { return norm16>=minMaybeYes; }
+    static UBool isInert(uint16_t norm16) { return norm16==0; }
+    // static UBool isJamoL(uint16_t norm16) const { return norm16==1; }
+    static UBool isJamoVT(uint16_t norm16) { return norm16==JAMO_VT; }
+    UBool isHangul(uint16_t norm16) const { return norm16==minYesNo; }
+    UBool isCompYesAndZeroCC(uint16_t norm16) const { return norm16<minNoNo; }
+    // UBool isCompYes(uint16_t norm16) const {
+    //     return norm16>=MIN_YES_YES_WITH_CC || norm16<minNoNo;
+    // }
+    // UBool isCompYesOrMaybe(uint16_t norm16) const {
+    //     return norm16<minNoNo || minMaybeYes<=norm16;
+    // }
+    UBool hasZeroCCFromDecompYes(uint16_t norm16) {
+        return norm16<=MIN_NORMAL_MAYBE_YES || norm16==JAMO_VT;
+    }
+    UBool isDecompYesAndZeroCC(uint16_t norm16) const {
+        return norm16<minYesNo ||
+               norm16==JAMO_VT ||
+               (minMaybeYes<=norm16 && norm16<=MIN_NORMAL_MAYBE_YES);
+    }
+    /**
+     * A little faster and simpler than isDecompYesAndZeroCC() but does not include
+     * the MaybeYes which combine-forward and have ccc=0.
+     * (Standard Unicode 5.2 normalization does not have such characters.)
+     */
+    UBool isMostDecompYesAndZeroCC(uint16_t norm16) const {
+        return norm16<minYesNo || norm16==MIN_NORMAL_MAYBE_YES || norm16==JAMO_VT;
+    }
+    UBool isDecompNoAlgorithmic(uint16_t norm16) const { return norm16>=limitNoNo; }
+
+    // For use with isCompYes().
+    // Perhaps the compiler can combine the two tests for MIN_YES_YES_WITH_CC.
+    // static uint8_t getCCFromYes(uint16_t norm16) {
+    //     return norm16>=MIN_YES_YES_WITH_CC ? (uint8_t)norm16 : 0;
+    // }
+    uint8_t getCCFromNoNo(uint16_t norm16) const {
+        const uint16_t *mapping=getMapping(norm16);
+        if(*mapping&MAPPING_HAS_CCC_LCCC_WORD) {
+            return (uint8_t)mapping[1];
+        } else {
+            return 0;
+        }
+    }
+    // requires that the [cpStart..cpLimit[ character passes isCompYesAndZeroCC()
+    uint8_t getTrailCCFromCompYesAndZeroCC(const UChar *cpStart, const UChar *cpLimit) const;
+
+    // Requires algorithmic-NoNo.
+    UChar32 mapAlgorithmic(UChar32 c, uint16_t norm16) const {
+        return c+norm16-(minMaybeYes-MAX_DELTA-1);
+    }
+
+    // Requires minYesNo<norm16<limitNoNo.
+    const uint16_t *getMapping(uint16_t norm16) const { return extraData+norm16; }
+    const uint16_t *getCompositionsListForDecompYesAndZeroCC(uint16_t norm16) const {
+        if(norm16==0 || MIN_NORMAL_MAYBE_YES<=norm16) {
+            return NULL;
+        } else if(norm16<minMaybeYes) {
+            return extraData+norm16;  // for yesYes; if Jamo L: harmless empty list
+        } else {
+            return maybeYesCompositions+norm16-minMaybeYes;
+        }
+    }
+    const uint16_t *getCompositionsListForComposite(uint16_t norm16) const {
+        const uint16_t *list=extraData+norm16;  // composite has both mapping & compositions list
+        return list+  // mapping pointer
+            1+  // +1 to skip the first unit with the mapping lenth
+            (*list&MAPPING_LENGTH_MASK)+  // + mapping length
+            ((*list>>7)&1);  // +1 if MAPPING_HAS_CCC_LCCC_WORD
+    }
+
+    const UChar *copyLowPrefixFromNulTerminated(const UChar *src,
+                                                UChar32 minNeedDataCP,
+                                                ReorderingBuffer *buffer,
+                                                UErrorCode &errorCode) const;
+    UBool decomposeShort(const UChar *src, const UChar *limit,
+                         ReorderingBuffer &buffer, UErrorCode &errorCode) const;
+    UBool decompose(UChar32 c, uint16_t norm16,
+                    ReorderingBuffer &buffer, UErrorCode &errorCode) const;
+
+    static int32_t combine(const uint16_t *list, UChar32 trail);
+    void recompose(ReorderingBuffer &buffer, int32_t recomposeStartIndex,
+                   UBool onlyContiguous) const;
+
+    UBool hasCompBoundaryBefore(UChar32 c, uint16_t norm16) const;
+    const UChar *findPreviousCompBoundary(const UChar *start, const UChar *p) const;
+    const UChar *findNextCompBoundary(const UChar *p, const UChar *limit) const;
+
+    const UTrie2 *fcdTrie() const { return (const UTrie2 *)fcdTrieSingleton.fInstance; }
+
+    const UChar *findPreviousFCDBoundary(const UChar *start, const UChar *p) const;
+    const UChar *findNextFCDBoundary(const UChar *p, const UChar *limit) const;
+
+    UDataMemory *memory;
+    UVersionInfo dataVersion;
+
+    // Code point thresholds for quick check codes.
+    UChar32 minDecompNoCP;
+    UChar32 minCompNoMaybeCP;
+
+    // Norm16 value thresholds for quick check combinations and types of extra data.
+    uint16_t minYesNo;
+    uint16_t minNoNo;
+    uint16_t limitNoNo;
+    uint16_t minMaybeYes;
+
+    UTrie2 *normTrie;
+    const uint16_t *maybeYesCompositions;
+    const uint16_t *extraData;  // mappings and/or compositions for yesYes, yesNo & noNo characters
+
+    SimpleSingleton fcdTrieSingleton;
+};
+
+/**
+ * ICU-internal shortcut for quick access to standard Unicode normalization.
+ */
+class U_COMMON_API Normalizer2Factory {
+public:
+    static const Normalizer2 *getNFCInstance(UErrorCode &errorCode);
+    static const Normalizer2 *getNFDInstance(UErrorCode &errorCode);
+    static const Normalizer2 *getFCDInstance(UErrorCode &errorCode);
+    static const Normalizer2 *getFCCInstance(UErrorCode &errorCode);
+    static const Normalizer2 *getNFKCInstance(UErrorCode &errorCode);
+    static const Normalizer2 *getNFKDInstance(UErrorCode &errorCode);
+    static const Normalizer2 *getNFKC_CFInstance(UErrorCode &errorCode);
+    static const Normalizer2 *getNoopInstance(UErrorCode &errorCode);
+
+    static const Normalizer2 *getInstance(UNormalizationMode mode, UErrorCode &errorCode);
+
+    static const Normalizer2Impl *getNFCImpl(UErrorCode &errorCode);
+    static const Normalizer2Impl *getNFKCImpl(UErrorCode &errorCode);
+    static const Normalizer2Impl *getNFKC_CFImpl(UErrorCode &errorCode);
+
+    static const UTrie2 *getFCDTrie(UErrorCode &errorCode);
+private:
+    Normalizer2Factory();  // No instantiation.
+};
+
+U_CAPI int32_t U_EXPORT2
+unorm2_swap(const UDataSwapper *ds,
+            const void *inData, int32_t length, void *outData,
+            UErrorCode *pErrorCode);
+
+/**
+ * Get the NF*_QC property for a code point, for u_getIntPropertyValue().
+ * @internal
+ */
+U_CFUNC UNormalizationCheckResult U_EXPORT2
+unorm_getQuickCheck(UChar32 c, UNormalizationMode mode);
+
+/**
+ * Internal API, used by collation code.
+ * Get access to the internal FCD trie table to be able to perform
+ * incremental, per-code unit, FCD checks in collation.
+ * One pointer is sufficient because the trie index values are offset
+ * by the index size, so that the same pointer is used to access the trie data.
+ * Code points at fcdHighStart and above have a zero FCD value.
+ * @internal
+ */
+U_CAPI const uint16_t * U_EXPORT2
+unorm_getFCDTrieIndex(UChar32 &fcdHighStart, UErrorCode *pErrorCode);
+
+/**
+ * Internal API, used by collation code.
+ * Get the FCD value for a code unit, with
+ * bits 15..8   lead combining class
+ * bits  7..0   trail combining class
+ *
+ * If c is a lead surrogate and the value is not 0,
+ * then some of c's associated supplementary code points have a non-zero FCD value.
+ *
+ * @internal
+ */
+static inline uint16_t
+unorm_getFCD16(const uint16_t *fcdTrieIndex, UChar c) {
+    return fcdTrieIndex[_UTRIE2_INDEX_FROM_U16_SINGLE_LEAD(fcdTrieIndex, c)];
+}
+
+/**
+ * Internal API, used by collation code.
+ * Get the FCD value of the next code point (post-increment), with
+ * bits 15..8   lead combining class
+ * bits  7..0   trail combining class
+ *
+ * @internal
+ */
+static inline uint16_t
+unorm_nextFCD16(const uint16_t *fcdTrieIndex, UChar32 fcdHighStart,
+                const UChar *&s, const UChar *limit) {
+    UChar32 c=*s++;
+    uint16_t fcd=fcdTrieIndex[_UTRIE2_INDEX_FROM_U16_SINGLE_LEAD(fcdTrieIndex, c)];
+    if(fcd!=0 && U16_IS_LEAD(c)) {
+        UChar c2;
+        if(s!=limit && U16_IS_TRAIL(c2=*s)) {
+            ++s;
+            c=U16_GET_SUPPLEMENTARY(c, c2);
+            if(c<fcdHighStart) {
+                fcd=fcdTrieIndex[_UTRIE2_INDEX_FROM_SUPP(fcdTrieIndex, c)];
+            } else {
+                fcd=0;
+            }
+        } else /* unpaired lead surrogate */ {
+            fcd=0;
+        }
+    }
+    return fcd;
+}
+
+/**
+ * Internal API, used by collation code.
+ * Get the FCD value of the previous code point (pre-decrement), with
+ * bits 15..8   lead combining class
+ * bits  7..0   trail combining class
+ *
+ * @internal
+ */
+static inline uint16_t
+unorm_prevFCD16(const uint16_t *fcdTrieIndex, UChar32 fcdHighStart,
+                const UChar *start, const UChar *&s) {
+    UChar32 c=*--s;
+    uint16_t fcd;
+    if(!U16_IS_SURROGATE(c)) {
+        fcd=fcdTrieIndex[_UTRIE2_INDEX_FROM_U16_SINGLE_LEAD(fcdTrieIndex, c)];
+    } else {
+        UChar c2;
+        if(U16_IS_SURROGATE_TRAIL(c) && s!=start && U16_IS_LEAD(c2=*(s-1))) {
+            --s;
+            c=U16_GET_SUPPLEMENTARY(c2, c);
+            if(c<fcdHighStart) {
+                fcd=fcdTrieIndex[_UTRIE2_INDEX_FROM_SUPP(fcdTrieIndex, c)];
+            } else {
+                fcd=0;
+            }
+        } else /* unpaired surrogate */ {
+            fcd=0;
+        }
+    }
+    return fcd;
+}
+
+U_NAMESPACE_END
+
+#endif  /* !UCONFIG_NO_NORMALIZATION */
+#endif  /* __NORMALIZER2IMPL_H__ */
--- a/icu4c/source/common/normlzr.cpp
+++ b/icu4c/source/common/normlzr.cpp
@ -1,7 +1,7 @@
 /*
 *************************************************************************
 * COPYRIGHT: 
- * Copyright (c) 1996-2005, International Business Machines Corporation and
+ * Copyright (c) 1996-2010, International Business Machines Corporation and
 * others. All Rights Reserved.
 *************************************************************************
 */
@ -10,14 +10,15 @@

 #if !UCONFIG_NO_NORMALIZATION

+#include "unicode/uniset.h"
 #include "unicode/unistr.h"
 #include "unicode/chariter.h"
 #include "unicode/schriter.h"
 #include "unicode/uchriter.h"
-#include "unicode/uiter.h"
 #include "unicode/normlzr.h"
 #include "cmemory.h"
-#include "unormimp.h"
+#include "normalizer2impl.h"
+#include "uprops.h"  // for uniset_getUnicode32Instance()

 U_NAMESPACE_BEGIN

@ -28,72 +29,68 @@ UOBJECT_DEFINE_RTTI_IMPLEMENTATION(Normalizer)
 //-------------------------------------------------------------------------

 Normalizer::Normalizer(const UnicodeString& str, UNormalizationMode mode) :
-    UObject(), fUMode(mode), fOptions(0),
+    UObject(), fFilteredNorm2(NULL), fNorm2(NULL), fUMode(mode), fOptions(0),
+    text(new StringCharacterIterator(str)),
    currentIndex(0), nextIndex(0),
    buffer(), bufferPos(0)
 {
-    init(new StringCharacterIterator(str));
+    init();
 }

 Normalizer::Normalizer(const UChar *str, int32_t length, UNormalizationMode mode) :
-    UObject(), fUMode(mode), fOptions(0),
+    UObject(), fFilteredNorm2(NULL), fNorm2(NULL), fUMode(mode), fOptions(0),
+    text(new UCharCharacterIterator(str, length)),
    currentIndex(0), nextIndex(0),
    buffer(), bufferPos(0)
 {
-    init(new UCharCharacterIterator(str, length));
+    init();
 }

 Normalizer::Normalizer(const CharacterIterator& iter, UNormalizationMode mode) :
-    UObject(), fUMode(mode), fOptions(0),
+    UObject(), fFilteredNorm2(NULL), fNorm2(NULL), fUMode(mode), fOptions(0),
+    text(iter.clone()),
    currentIndex(0), nextIndex(0),
    buffer(), bufferPos(0)
 {
-    init(iter.clone());
+    init();
 }

 Normalizer::Normalizer(const Normalizer &copy) :
-    UObject(copy), fUMode(copy.fUMode), fOptions(copy.fOptions),
+    UObject(copy), fFilteredNorm2(NULL), fNorm2(NULL), fUMode(copy.fUMode), fOptions(copy.fOptions),
+    text(copy.text->clone()),
    currentIndex(copy.currentIndex), nextIndex(copy.nextIndex),
    buffer(copy.buffer), bufferPos(copy.bufferPos)
 {
-    init(((CharacterIterator *)(copy.text->context))->clone());
+    init();
 }

 static const UChar _NUL=0;

 void
-Normalizer::init(CharacterIterator *iter) {
+Normalizer::init() {
    UErrorCode errorCode=U_ZERO_ERROR;
-
-    text=(UCharIterator *)uprv_malloc(sizeof(UCharIterator));
-    if(text!=NULL) {
-        if(unorm_haveData(&errorCode)) {
-            uiter_setCharacterIterator(text, iter);
-        } else {
-            delete iter;
-            uiter_setCharacterIterator(text, new UCharCharacterIterator(&_NUL, 0));
-        }
-    } else {
-        delete iter;
+    fNorm2=Normalizer2Factory::getInstance(fUMode, errorCode);
+    if(fOptions&UNORM_UNICODE_3_2) {
+        delete fFilteredNorm2;
+        fNorm2=fFilteredNorm2=
+            new FilteredNormalizer2(*fNorm2, *uniset_getUnicode32Instance(errorCode));
+    }
+    if(U_FAILURE(errorCode)) {
+        errorCode=U_ZERO_ERROR;
+        fNorm2=Normalizer2Factory::getNoopInstance(errorCode);
    }
 }

 Normalizer::~Normalizer()
 {
-    if(text!=NULL) {
-        delete (CharacterIterator *)text->context;
-        uprv_free(text);
-    }
+    delete fFilteredNorm2;
+    delete text;
 }

 Normalizer* 
 Normalizer::clone() const
 {
-    if(this!=0) {
-        return new Normalizer(*this);
-    } else {
-        return 0;
-    }
+    return new Normalizer(*this);
 }

 /**
@ -101,7 +98,7 @@ Normalizer::clone() const
 */
 int32_t Normalizer::hashCode() const
 {
-    return ((CharacterIterator *)(text->context))->hashCode() + fUMode + fOptions + buffer.hashCode() + bufferPos + currentIndex + nextIndex;
+    return text->hashCode() + fUMode + fOptions + buffer.hashCode() + bufferPos + currentIndex + nextIndex;
 }
    
 UBool Normalizer::operator==(const Normalizer& that) const
@ -110,7 +107,7 @@ UBool Normalizer::operator==(const Normalizer& that) const
        this==&that ||
        fUMode==that.fUMode &&
        fOptions==that.fOptions &&
-        *((CharacterIterator *)(text->context))==*((CharacterIterator *)(that.text->context)) &&
+        *text==*that.text &&
        buffer==that.buffer &&
        bufferPos==that.bufferPos &&
        nextIndex==that.nextIndex;
@ -140,29 +137,18 @@ Normalizer::normalize(const UnicodeString& source,
            // the source and result strings are the same object, use a temporary one
            dest=&localDest;
        }
-
-        UChar *buffer=dest->getBuffer(source.length());
-        int32_t length=unorm_internalNormalize(buffer, dest->getCapacity(),
-                                               source.getBuffer(), source.length(),
-                                               mode, options,
-                                               &status);
-        dest->releaseBuffer(U_SUCCESS(status) ? length : 0);
-        if(status==U_BUFFER_OVERFLOW_ERROR) {
-            status=U_ZERO_ERROR;
-            buffer=dest->getBuffer(length);
-            length=unorm_internalNormalize(buffer, dest->getCapacity(),
-                                           source.getBuffer(), source.length(),
-                                           mode, options,
-                                           &status);
-            dest->releaseBuffer(U_SUCCESS(status) ? length : 0);
+        const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, status);
+        if(U_SUCCESS(status)) {
+            if(options&UNORM_UNICODE_3_2) {
+                FilteredNormalizer2(*n2, *uniset_getUnicode32Instance(status)).
+                    normalize(source, *dest, status);
+            } else {
+                n2->normalize(source, *dest, status);
+            }
        }
-
-        if(dest==&localDest) {
+        if(dest==&localDest && U_SUCCESS(status)) {
            result=*dest;
        }
-        if(U_FAILURE(status)) {
-            result.setToBogus();
-        }
    }
 }

@ -171,45 +157,7 @@ Normalizer::compose(const UnicodeString& source,
                    UBool compat, int32_t options,
                    UnicodeString& result, 
                    UErrorCode &status) {
-    if(source.isBogus() || U_FAILURE(status)) {
-        result.setToBogus();
-        if(U_SUCCESS(status)) {
-            status=U_ILLEGAL_ARGUMENT_ERROR;
-        }
-    } else {
-        UnicodeString localDest;
-        UnicodeString *dest;
-
-        if(&source!=&result) {
-            dest=&result;
-        } else {
-            // the source and result strings are the same object, use a temporary one
-            dest=&localDest;
-        }
-
-        UChar *buffer=dest->getBuffer(source.length());
-        int32_t length=unorm_compose(buffer, dest->getCapacity(),
-                                     source.getBuffer(), source.length(),
-                                     compat, options,
-                                     &status);
-        dest->releaseBuffer(U_SUCCESS(status) ? length : 0);
-        if(status==U_BUFFER_OVERFLOW_ERROR) {
-            status=U_ZERO_ERROR;
-            buffer=dest->getBuffer(length);
-            length=unorm_compose(buffer, dest->getCapacity(),
-                                 source.getBuffer(), source.length(),
-                                 compat, options,
-                                 &status);
-            dest->releaseBuffer(U_SUCCESS(status) ? length : 0);
-        }
-
-        if(dest==&localDest) {
-            result=*dest;
-        }
-        if(U_FAILURE(status)) {
-            result.setToBogus();
-        }
-    }
+    normalize(source, compat ? UNORM_NFKC : UNORM_NFC, options, result, status);
 }

 void U_EXPORT2
@ -217,44 +165,40 @@ Normalizer::decompose(const UnicodeString& source,
                      UBool compat, int32_t options,
                      UnicodeString& result, 
                      UErrorCode &status) {
-    if(source.isBogus() || U_FAILURE(status)) {
-        result.setToBogus();
-        if(U_SUCCESS(status)) {
-            status=U_ILLEGAL_ARGUMENT_ERROR;
+    normalize(source, compat ? UNORM_NFKD : UNORM_NFD, options, result, status);
+}
+
+UNormalizationCheckResult
+Normalizer::quickCheck(const UnicodeString& source,
+                       UNormalizationMode mode, int32_t options,
+                       UErrorCode &status) {
+    const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, status);
+    if(U_SUCCESS(status)) {
+        if(options&UNORM_UNICODE_3_2) {
+            return FilteredNormalizer2(*n2, *uniset_getUnicode32Instance(status)).
+                quickCheck(source, status);
+        } else {
+            return n2->quickCheck(source, status);
        }
    } else {
-        UnicodeString localDest;
-        UnicodeString *dest;
+        return UNORM_MAYBE;
+    }
+}

-        if(&source!=&result) {
-            dest=&result;
+UBool
+Normalizer::isNormalized(const UnicodeString& source,
+                         UNormalizationMode mode, int32_t options,
+                         UErrorCode &status) {
+    const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, status);
+    if(U_SUCCESS(status)) {
+        if(options&UNORM_UNICODE_3_2) {
+            return FilteredNormalizer2(*n2, *uniset_getUnicode32Instance(status)).
+                isNormalized(source, status);
        } else {
-            // the source and result strings are the same object, use a temporary one
-            dest=&localDest;
-        }
-
-        UChar *buffer=dest->getBuffer(source.length());
-        int32_t length=unorm_decompose(buffer, dest->getCapacity(),
-                                     source.getBuffer(), source.length(),
-                                     compat, options,
-                                     &status);
-        dest->releaseBuffer(U_SUCCESS(status) ? length : 0);
-        if(status==U_BUFFER_OVERFLOW_ERROR) {
-            status=U_ZERO_ERROR;
-            buffer=dest->getBuffer(length);
-            length=unorm_decompose(buffer, dest->getCapacity(),
-                                   source.getBuffer(), source.length(),
-                                   compat, options,
-                                   &status);
-            dest->releaseBuffer(U_SUCCESS(status) ? length : 0);
-        }
-
-        if(dest==&localDest) {
-            result=*dest;
-        }
-        if(U_FAILURE(status)) {
-            result.setToBogus();
+            return n2->isNormalized(source, status);
        }
+    } else {
+        return FALSE;
    }
 }

@ -272,37 +216,25 @@ Normalizer::concatenate(UnicodeString &left, UnicodeString &right,
        UnicodeString localDest;
        UnicodeString *dest;

-        if(&left!=&result && &right!=&result) {
+        if(&right!=&result) {
            dest=&result;
        } else {
-            // the source and result strings are the same object, use a temporary one
+            // the right and result strings are the same object, use a temporary one
            dest=&localDest;
        }
-
-        UChar *buffer=dest->getBuffer(left.length()+right.length());
-        int32_t length=unorm_concatenate(left.getBuffer(), left.length(),
-                                         right.getBuffer(), right.length(),
-                                         buffer, dest->getCapacity(),
-                                         mode, options,
-                                         &errorCode);
-        dest->releaseBuffer(U_SUCCESS(errorCode) ? length : 0);
-        if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
-            errorCode=U_ZERO_ERROR;
-            buffer=dest->getBuffer(length);
-            int32_t length=unorm_concatenate(left.getBuffer(), left.length(),
-                                             right.getBuffer(), right.length(),
-                                             buffer, dest->getCapacity(),
-                                             mode, options,
-                                             &errorCode);
-            dest->releaseBuffer(U_SUCCESS(errorCode) ? length : 0);
+        *dest=left;
+        const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, errorCode);
+        if(U_SUCCESS(errorCode)) {
+            if(options&UNORM_UNICODE_3_2) {
+                FilteredNormalizer2(*n2, *uniset_getUnicode32Instance(errorCode)).
+                    append(*dest, right, errorCode);
+            } else {
+                n2->append(*dest, right, errorCode);
+            }
        }
-
-        if(dest==&localDest) {
+        if(dest==&localDest && U_SUCCESS(errorCode)) {
            result=*dest;
        }
-        if(U_FAILURE(errorCode)) {
-            result.setToBogus();
-        }
    }
    return result;
 }
@ -353,19 +285,20 @@ UChar32 Normalizer::previous() {
 }

 void Normalizer::reset() {
-    currentIndex=nextIndex=text->move(text, 0, UITER_START);
+    currentIndex=nextIndex=text->setToStart();
    clearBuffer();
 }

 void
 Normalizer::setIndexOnly(int32_t index) {
-    currentIndex=nextIndex=text->move(text, index, UITER_ZERO); // validates index
+    text->setIndex(index);  // pins index
+    currentIndex=nextIndex=text->getIndex();
    clearBuffer();
 }

 /**
- * Return the first character in the normalized text->  This resets
- * the <tt>Normalizer's</tt> position to the beginning of the text->
+ * Return the first character in the normalized text.  This resets
+ * the <tt>Normalizer's</tt> position to the beginning of the text.
 */
 UChar32 Normalizer::first() {
    reset();
@ -373,12 +306,12 @@ UChar32 Normalizer::first() {
 }

 /**
- * Return the last character in the normalized text->  This resets
+ * Return the last character in the normalized text.  This resets
 * the <tt>Normalizer's</tt> position to be just before the
 * the input text corresponding to that normalized character.
 */
 UChar32 Normalizer::last() {
-    currentIndex=nextIndex=text->move(text, 0, UITER_LIMIT);
+    currentIndex=nextIndex=text->setToEnd();
    clearBuffer();
    return previous();
 }
@ -406,21 +339,21 @@ int32_t Normalizer::getIndex() const {
 }

 /**
- * Retrieve the index of the start of the input text->  This is the begin index
+ * Retrieve the index of the start of the input text.  This is the begin index
 * of the <tt>CharacterIterator</tt> or the start (i.e. 0) of the <tt>String</tt>
 * over which this <tt>Normalizer</tt> is iterating
 */
 int32_t Normalizer::startIndex() const {
-    return text->getIndex(text, UITER_START);
+    return text->startIndex();
 }

 /**
- * Retrieve the index of the end of the input text->  This is the end index
+ * Retrieve the index of the end of the input text.  This is the end index
 * of the <tt>CharacterIterator</tt> or the length of the <tt>String</tt>
 * over which this <tt>Normalizer</tt> is iterating
 */
 int32_t Normalizer::endIndex() const {
-    return text->getIndex(text, UITER_LIMIT);
+    return text->endIndex();
 }

 //-------------------------------------------------------------------------
@ -431,6 +364,7 @@ void
 Normalizer::setMode(UNormalizationMode newMode) 
 {
    fUMode = newMode;
+    init();
 }

 UNormalizationMode
@ -448,6 +382,7 @@ Normalizer::setOption(int32_t option,
    } else {
        fOptions &= (~option);
    }
+    init();
 }

 UBool
@ -458,7 +393,7 @@ Normalizer::getOption(int32_t option) const

 /**
 * Set the input text over which this <tt>Normalizer</tt> will iterate.
- * The iteration position is set to the beginning of the input text->
+ * The iteration position is set to the beginning of the input text.
 */
 void
 Normalizer::setText(const UnicodeString& newText, 
@ -472,8 +407,8 @@ Normalizer::setText(const UnicodeString& newText,
        status = U_MEMORY_ALLOCATION_ERROR;
        return;
    }
-    delete (CharacterIterator *)(text->context);
-    text->context = newIter;
+    delete text;
+    text = newIter;
    reset();
 }

@ -493,8 +428,8 @@ Normalizer::setText(const CharacterIterator& newText,
        status = U_MEMORY_ALLOCATION_ERROR;
        return;
    }
-    delete (CharacterIterator *)(text->context);
-    text->context = newIter;
+    delete text;
+    text = newIter;
    reset();
 }

@ -511,8 +446,8 @@ Normalizer::setText(const UChar* newText,
        status = U_MEMORY_ALLOCATION_ERROR;
        return;
    }
-    delete (CharacterIterator *)(text->context);
-    text->context = newIter;
+    delete text;
+    text = newIter;
    reset();
 }

@ -523,7 +458,7 @@ Normalizer::setText(const UChar* newText,
 void
 Normalizer::getText(UnicodeString&  result) 
 {
-    ((CharacterIterator *)(text->context))->getText(result);
+    text->getText(result);
 }

 //-------------------------------------------------------------------------
@ -537,72 +472,48 @@ void Normalizer::clearBuffer() {

 UBool
 Normalizer::nextNormalize() {
-    UChar *p;
-    int32_t length;
-    UErrorCode errorCode;
-
    clearBuffer();
    currentIndex=nextIndex;
-    text->move(text, nextIndex, UITER_ZERO);
-    if(!text->hasNext(text)) {
+    text->setIndex(nextIndex);
+    if(!text->hasNext()) {
        return FALSE;
    }
-
-    errorCode=U_ZERO_ERROR;
-    p=buffer.getBuffer(-1);
-    length=unorm_next(text, p, buffer.getCapacity(),
-                      fUMode, fOptions,
-                      TRUE, 0,
-                      &errorCode);
-    buffer.releaseBuffer(U_SUCCESS(errorCode) ? length : 0);
-    if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
-        errorCode=U_ZERO_ERROR;
-        text->move(text, nextIndex, UITER_ZERO);
-        p=buffer.getBuffer(length);
-        length=unorm_next(text, p, buffer.getCapacity(),
-                          fUMode, fOptions,
-                          TRUE, 0,
-                          &errorCode);
-        buffer.releaseBuffer(U_SUCCESS(errorCode) ? length : 0);
+    // Skip at least one character so we make progress.
+    UnicodeString segment(text->next32PostInc());
+    while(text->hasNext()) {
+        UChar32 c;
+        if(fNorm2->hasBoundaryBefore(c=text->next32PostInc())) {
+            text->move32(-1, CharacterIterator::kCurrent);
+            break;
+        }
+        segment.append(c);
    }
-
-    nextIndex=text->getIndex(text, UITER_CURRENT);
+    nextIndex=text->getIndex();
+    UErrorCode errorCode=U_ZERO_ERROR;
+    fNorm2->normalize(segment, buffer, errorCode);
    return U_SUCCESS(errorCode) && !buffer.isEmpty();
 }

 UBool
 Normalizer::previousNormalize() {
-    UChar *p;
-    int32_t length;
-    UErrorCode errorCode;
-
    clearBuffer();
    nextIndex=currentIndex;
-    text->move(text, currentIndex, UITER_ZERO);
-    if(!text->hasPrevious(text)) {
+    text->setIndex(currentIndex);
+    if(!text->hasPrevious()) {
        return FALSE;
    }
-
-    errorCode=U_ZERO_ERROR;
-    p=buffer.getBuffer(-1);
-    length=unorm_previous(text, p, buffer.getCapacity(),
-                          fUMode, fOptions,
-                          TRUE, 0,
-                          &errorCode);
-    buffer.releaseBuffer(U_SUCCESS(errorCode) ? length : 0);
-    if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
-        errorCode=U_ZERO_ERROR;
-        text->move(text, currentIndex, UITER_ZERO);
-        p=buffer.getBuffer(length);
-        length=unorm_previous(text, p, buffer.getCapacity(),
-                              fUMode, fOptions,
-                              TRUE, 0,
-                              &errorCode);
-        buffer.releaseBuffer(U_SUCCESS(errorCode) ? length : 0);
+    UnicodeString segment;
+    while(text->hasPrevious()) {
+        UChar32 c=text->previous32();
+        segment.insert(0, c);
+        if(fNorm2->hasBoundaryBefore(c)) {
+            break;
+        }
    }
-
+    currentIndex=text->getIndex();
+    UErrorCode errorCode=U_ZERO_ERROR;
+    fNorm2->normalize(segment, buffer, errorCode);
    bufferPos=buffer.length();
-    currentIndex=text->getIndex(text, UITER_CURRENT);
    return U_SUCCESS(errorCode) && !buffer.isEmpty();
 }

--- a/icu4c/source/common/uchar.c
+++ b/icu4c/source/common/uchar.c
@ -1,6 +1,6 @@
 /*
 ********************************************************************************
-*   Copyright (C) 1996-2009, International Business Machines
+*   Copyright (C) 1996-2010, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 ********************************************************************************
 *
@ -28,7 +28,6 @@
 #include "ucln_cmn.h"
 #include "utrie2.h"
 #include "udataswp.h"
-#include "unormimp.h" /* JAMO_L_BASE etc. */
 #include "uprops.h"

 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
@ -650,10 +649,6 @@ u_getNumericValue(UChar32 c) {
    }
 }

-/* ICU 3.4: bidi/shaping properties moved to ubidi_props.c */
-
-/* ICU 2.1: u_getCombiningClass() moved to unorm.cpp */
-
 U_CAPI int32_t U_EXPORT2
 u_digit(UChar32 ch, int8_t radix) {
    int8_t value;
--- a/icu4c/source/common/ucln_cmn.h
+++ b/icu4c/source/common/ucln_cmn.h
@ -1,7 +1,7 @@
 /*
 ******************************************************************************
 *                                                                            *
-* Copyright (C) 2001-2006, International Business Machines                   *
+* Copyright (C) 2001-2010, International Business Machines                   *
 *                Corporation and others. All Rights Reserved.                *
 *                                                                            *
 ******************************************************************************
@ -41,6 +41,7 @@ typedef enum ECleanupCommonType {
    UCLN_COMMON_LOCALE,
    UCLN_COMMON_ULOC,
    UCLN_COMMON_UNORM,
+    UCLN_COMMON_NORMALIZER2,
    UCLN_COMMON_USET,
    UCLN_COMMON_UNAMES,
    UCLN_COMMON_PNAME,
--- a/icu4c/source/common/ucol_swp.cpp
+++ b/icu4c/source/common/ucol_swp.cpp
@ -1,7 +1,7 @@
 /*
 *******************************************************************************
 *
-*   Copyright (C) 2003-2009, International Business Machines
+*   Copyright (C) 2003-2010, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 *
 *******************************************************************************
--- a/icu4c/source/common/unicode/caniter.h
+++ b/icu4c/source/common/unicode/caniter.h
@ -1,6 +1,6 @@
 /*
 *******************************************************************************
- * Copyright (C) 1996-2006, International Business Machines Corporation and    *
+ * Copyright (C) 1996-2010, International Business Machines Corporation and    *
 * others. All Rights Reserved.                                                *
 *******************************************************************************
 */
@ -31,6 +31,7 @@
 U_NAMESPACE_BEGIN

 class Hashtable;
+class Normalizer2;

 /**
 * This class allows one to iterate through all the strings that are canonically equivalent to a given
@ -174,6 +175,8 @@ private:
    // transient fields
    UnicodeString buffer;

+    const Normalizer2 &nfd;
+
    // we have a segment, in NFD. Find all the strings that are canonically equivalent to it.
    UnicodeString *getEquivalents(const UnicodeString &segment, int32_t &result_len, UErrorCode &status); //private String[] getEquivalents(String segment)

--- a/icu4c/source/common/unicode/normalizer2.h
+++ b/icu4c/source/common/unicode/normalizer2.h
@ -0,0 +1,460 @@
+/*
+*******************************************************************************
+*
+*   Copyright (C) 2009-2010, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+*******************************************************************************
+*   file name:  normalizer2.h
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2009nov22
+*   created by: Markus W. Scherer
+*/
+
+#ifndef __NORMALIZER2_H__
+#define __NORMALIZER2_H__
+
+/**
+ * \file
+ * \brief C++ API: New API for Unicode Normalization.
+ */
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_NORMALIZATION
+
+#include "unicode/uniset.h"
+#include "unicode/unistr.h"
+#include "unicode/unorm2.h"
+
+U_NAMESPACE_BEGIN
+
+/**
+ * Unicode normalization functionality for standard Unicode normalization or
+ * for using custom mapping tables.
+ * All instances of this class are unmodifiable/immutable.
+ * Instances returned by getInstance() are singletons that must not be deleted by the caller.
+ *
+ * Some of the functions in this class identify normalization boundaries.
+ * At a normalization boundary, the portions of the string
+ * before it and starting from it do not interact and can be handled independently.
+ *
+ * The spanQuickCheckYes() stops at a normalization boundary.
+ * When the goal is a normalized string, then the text before the boundary
+ * can be copied, and the remainder can be processed with normalizeSecondAndAppend().
+ *
+ * The isBoundary() function tests whether a character is at a normalization boundary.
+ * This is used for moving from one normalization boundary to the next
+ * or preceding boundary, and for performing iterative normalization.
+ *
+ * Iterative normalization is useful when only a small portion of a
+ * longer string needs to be processed.
+ * In ICU, iterative normalization is used by the NormalizationTransliterator
+ * (to avoid replacing already-normalized text) and ucol_nextSortKeyPart()
+ * (to process only the substring for which sort key bytes are computed).
+ *
+ * The set of normalization boundaries returned by these functions may not be
+ * complete: There may be more boundaries that could be returned.
+ * Different functions may return different boundaries.
+ * @draft ICU 4.4
+ */
+class U_COMMON_API Normalizer2 : public UObject {
+public:
+    /**
+     * Returns a Normalizer2 instance which uses the specified data file
+     * (packageName/name similar to ucnv_openPackage() and ures_open()/ResourceBundle)
+     * and which composes or decomposes text according to the specified mode.
+     * Returns an unmodifiable singleton instance. Do not delete it.
+     *
+     * Use packageName=NULL for data files that are part of ICU's own data.
+     * Use name="nfc" and UNORM2_COMPOSE/UNORM2_DECOMPOSE for Unicode standard NFC/NFD.
+     * Use name="nfkc" and UNORM2_COMPOSE/UNORM2_DECOMPOSE for Unicode standard NFKC/NFKD.
+     * Use name="nfkc_cf" and UNORM2_COMPOSE for Unicode standard NFKC_CF=NFKC_Casefold.
+     *
+     * @param packageName NULL for ICU built-in data, otherwise application data package name
+     * @param name "nfc" or "nfkc" or "nfkc_cf" or name of custom data file
+     * @param mode normalization mode (compose or decompose etc.)
+     * @param errorCode Standard ICU error code. Its input value must
+     *                  pass the U_SUCCESS() test, or else the function returns
+     *                  immediately. Check for U_FAILURE() on output or use with
+     *                  function chaining. (See User Guide for details.)
+     * @return the requested Normalizer2, if successful
+     * @draft ICU 4.4
+     */
+    static const Normalizer2 *
+    getInstance(const char *packageName,
+                const char *name,
+                UNormalization2Mode mode,
+                UErrorCode &errorCode);
+
+    /**
+     * Returns the normalized form of the source string.
+     * @param src source string
+     * @param errorCode Standard ICU error code. Its input value must
+     *                  pass the U_SUCCESS() test, or else the function returns
+     *                  immediately. Check for U_FAILURE() on output or use with
+     *                  function chaining. (See User Guide for details.)
+     * @return normalized src
+     * @draft ICU 4.4
+     */
+    UnicodeString
+    normalize(const UnicodeString &src, UErrorCode &errorCode) const {
+        UnicodeString result;
+        normalize(src, result, errorCode);
+        return result;
+    }
+    /**
+     * Writes the normalized form of the source string to the destination string
+     * (replacing its contents) and returns the destination string.
+     * The source and destination strings must be different objects.
+     * @param src source string
+     * @param dest destination string; its contents is replaced with normalized src
+     * @param errorCode Standard ICU error code. Its input value must
+     *                  pass the U_SUCCESS() test, or else the function returns
+     *                  immediately. Check for U_FAILURE() on output or use with
+     *                  function chaining. (See User Guide for details.)
+     * @return dest
+     * @draft ICU 4.4
+     */
+    virtual UnicodeString &
+    normalize(const UnicodeString &src,
+              UnicodeString &dest,
+              UErrorCode &errorCode) const = 0;
+    /**
+     * Appends the normalized form of the second string to the first string
+     * (merging them at the boundary) and returns the first string.
+     * The result is normalized if the first string was normalized.
+     * The first and second strings must be different objects.
+     * @param first string, should be normalized
+     * @param second string, will be normalized
+     * @param errorCode Standard ICU error code. Its input value must
+     *                  pass the U_SUCCESS() test, or else the function returns
+     *                  immediately. Check for U_FAILURE() on output or use with
+     *                  function chaining. (See User Guide for details.)
+     * @return first
+     * @draft ICU 4.4
+     */
+    virtual UnicodeString &
+    normalizeSecondAndAppend(UnicodeString &first,
+                             const UnicodeString &second,
+                             UErrorCode &errorCode) const = 0;
+    /**
+     * Appends the second string to the first string
+     * (merging them at the boundary) and returns the first string.
+     * The result is normalized if both the strings were normalized.
+     * The first and second strings must be different objects.
+     * @param first string, should be normalized
+     * @param second string, should be normalized
+     * @param errorCode Standard ICU error code. Its input value must
+     *                  pass the U_SUCCESS() test, or else the function returns
+     *                  immediately. Check for U_FAILURE() on output or use with
+     *                  function chaining. (See User Guide for details.)
+     * @return first
+     * @draft ICU 4.4
+     */
+    virtual UnicodeString &
+    append(UnicodeString &first,
+           const UnicodeString &second,
+           UErrorCode &errorCode) const = 0;
+
+    /**
+     * Tests if the string is normalized.
+     * Internally, in cases where the quickCheck() method would return "maybe"
+     * (which is only possible for the two COMPOSE modes) this method
+     * resolves to "yes" or "no" to provide a definitive result,
+     * at the cost of doing more work in those cases.
+     * @param s input string
+     * @param errorCode Standard ICU error code. Its input value must
+     *                  pass the U_SUCCESS() test, or else the function returns
+     *                  immediately. Check for U_FAILURE() on output or use with
+     *                  function chaining. (See User Guide for details.)
+     * @return TRUE if s is normalized
+     * @draft ICU 4.4
+     */
+    virtual UBool
+    isNormalized(const UnicodeString &s, UErrorCode &errorCode) const = 0;
+
+    /**
+     * Tests if the string is normalized.
+     * For the two COMPOSE modes, the result could be "maybe" in cases that
+     * would take a little more work to resolve definitively.
+     * Use spanQuickCheckYes() and normalizeSecondAndAppend() for a faster
+     * combination of quick check + normalization, to avoid
+     * re-checking the "yes" prefix.
+     * @param s input string
+     * @param errorCode Standard ICU error code. Its input value must
+     *                  pass the U_SUCCESS() test, or else the function returns
+     *                  immediately. Check for U_FAILURE() on output or use with
+     *                  function chaining. (See User Guide for details.)
+     * @return UNormalizationCheckResult
+     * @draft ICU 4.4
+     */
+    virtual UNormalizationCheckResult
+    quickCheck(const UnicodeString &s, UErrorCode &errorCode) const = 0;
+
+    /**
+     * Returns the end of the normalized substring of the input string.
+     * In other words, with <code>end=spanQuickCheckYes(s, ec);</code>
+     * the substring <code>UnicodeString(s, 0, end)</code>
+     * will pass the quick check with a "yes" result.
+     *
+     * The returned end index is usually one or more characters before the
+     * "no" or "maybe" character: The end index is at a normalization boundary.
+     * (See the class documentation for more about normalization boundaries.)
+     *
+     * When the goal is a normalized string and most input strings are expected
+     * to be normalized already, then call this method,
+     * and if it returns a prefix shorter than the input string,
+     * copy that prefix and use normalizeSecondAndAppend() for the remainder.
+     * @param s input string
+     * @param errorCode Standard ICU error code. Its input value must
+     *                  pass the U_SUCCESS() test, or else the function returns
+     *                  immediately. Check for U_FAILURE() on output or use with
+     *                  function chaining. (See User Guide for details.)
+     * @return UNormalizationCheckResult
+     * @draft ICU 4.4
+     */
+    virtual int32_t
+    spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const = 0;
+
+    /**
+     * Tests if the character has a normalization boundary before it.
+     * If true, then the character does not normalization-interact with
+     * preceding characters.
+     * In other words, a string containing this character can be normalized
+     * by processing portions before this character and starting from this
+     * character independently.
+     * This is used for iterative normalization. See the class documentation for details.
+     * @param c character to test
+     * @return TRUE if c has a normalization boundary before it
+     * @draft ICU 4.4
+     */
+    virtual UBool hasBoundaryBefore(UChar32 c) const = 0;
+
+    /**
+     * Tests if the character has a normalization boundary after it.
+     * If true, then the character does not normalization-interact with
+     * following characters.
+     * In other words, a string containing this character can be normalized
+     * by processing portions up to this character and after this
+     * character independently.
+     * This is used for iterative normalization. See the class documentation for details.
+     * @param c character to test
+     * @return TRUE if c has a normalization boundary after it
+     * @draft ICU 4.4
+     */
+    virtual UBool hasBoundaryAfter(UChar32 c) const = 0;
+
+    /**
+     * Tests if the character is normalization-inert.
+     * If true, then the character does not change, nor normalization-interact with
+     * preceding or following characters.
+     * In other words, a string containing this character can be normalized
+     * by processing portions before this character and after this
+     * character independently.
+     * This is used for iterative normalization. See the class documentation for details.
+     * @param c character to test
+     * @return TRUE if c is normalization-inert
+     * @draft ICU 4.4
+     */
+    virtual UBool isInert(UChar32 c) const = 0;
+
+    /**
+     * ICU "poor man's RTTI", returns a UClassID for this class.
+     * @returns a UClassID for this class.
+     * @draft ICU 4.4
+     */
+    static UClassID U_EXPORT2 getStaticClassID();
+
+    /**
+     * ICU "poor man's RTTI", returns a UClassID for the actual class.
+     * @return a UClassID for the actual class.
+     * @draft ICU 4.4
+     */
+    virtual UClassID getDynamicClassID() const = 0;
+};
+
+/**
+ * Normalization filtered by a UnicodeSet.
+ * Normalizes portions of the text contained in the filter set and leaves
+ * portions not contained in the filter set unchanged.
+ * Filtering is done via UnicodeSet::span(..., USET_SPAN_SIMPLE).
+ * Not-in-the-filter text is treated as "is normalized" and "quick check yes".
+ * This class implements all of (and only) the Normalizer2 API.
+ * An instance of this class is unmodifiable/immutable but is constructed and
+ * must be destructed by the owner.
+ * @draft ICU 4.4
+ */
+class U_COMMON_API FilteredNormalizer2 : public Normalizer2 {
+public:
+    /**
+     * Constructs a filtered normalizer wrapping any Normalizer2 instance
+     * and a filter set.
+     * Both are aliased and must not be modified or deleted while this object
+     * is used.
+     * The filter set should be frozen; otherwise the performance will suffer greatly.
+     * @param n2 wrapped Normalizer2 instance
+     * @param filterSet UnicodeSet which determines the characters to be normalized
+     * @draft ICU 4.4
+     */
+    FilteredNormalizer2(const Normalizer2 &n2, const UnicodeSet &filterSet) :
+            norm2(n2), set(filterSet) {}
+
+    /**
+     * Writes the normalized form of the source string to the destination string
+     * (replacing its contents) and returns the destination string.
+     * The source and destination strings must be different objects.
+     * @param src source string
+     * @param dest destination string; its contents is replaced with normalized src
+     * @param errorCode Standard ICU error code. Its input value must
+     *                  pass the U_SUCCESS() test, or else the function returns
+     *                  immediately. Check for U_FAILURE() on output or use with
+     *                  function chaining. (See User Guide for details.)
+     * @return dest
+     * @draft ICU 4.4
+     */
+    virtual UnicodeString &
+    normalize(const UnicodeString &src,
+              UnicodeString &dest,
+              UErrorCode &errorCode) const;
+    /**
+     * Appends the normalized form of the second string to the first string
+     * (merging them at the boundary) and returns the first string.
+     * The result is normalized if the first string was normalized.
+     * The first and second strings must be different objects.
+     * @param first string, should be normalized
+     * @param second string, will be normalized
+     * @param errorCode Standard ICU error code. Its input value must
+     *                  pass the U_SUCCESS() test, or else the function returns
+     *                  immediately. Check for U_FAILURE() on output or use with
+     *                  function chaining. (See User Guide for details.)
+     * @return first
+     * @draft ICU 4.4
+     */
+    virtual UnicodeString &
+    normalizeSecondAndAppend(UnicodeString &first,
+                             const UnicodeString &second,
+                             UErrorCode &errorCode) const;
+    /**
+     * Appends the second string to the first string
+     * (merging them at the boundary) and returns the first string.
+     * The result is normalized if both the strings were normalized.
+     * The first and second strings must be different objects.
+     * @param first string, should be normalized
+     * @param second string, should be normalized
+     * @param errorCode Standard ICU error code. Its input value must
+     *                  pass the U_SUCCESS() test, or else the function returns
+     *                  immediately. Check for U_FAILURE() on output or use with
+     *                  function chaining. (See User Guide for details.)
+     * @return first
+     * @draft ICU 4.4
+     */
+    virtual UnicodeString &
+    append(UnicodeString &first,
+           const UnicodeString &second,
+           UErrorCode &errorCode) const;
+
+    /**
+     * Tests if the string is normalized.
+     * For details see the Normalizer2 base class documentation.
+     * @param s input string
+     * @param errorCode Standard ICU error code. Its input value must
+     *                  pass the U_SUCCESS() test, or else the function returns
+     *                  immediately. Check for U_FAILURE() on output or use with
+     *                  function chaining. (See User Guide for details.)
+     * @return TRUE if s is normalized
+     * @draft ICU 4.4
+     */
+    virtual UBool
+    isNormalized(const UnicodeString &s, UErrorCode &errorCode) const;
+    /**
+     * Tests if the string is normalized.
+     * For details see the Normalizer2 base class documentation.
+     * @param s input string
+     * @param errorCode Standard ICU error code. Its input value must
+     *                  pass the U_SUCCESS() test, or else the function returns
+     *                  immediately. Check for U_FAILURE() on output or use with
+     *                  function chaining. (See User Guide for details.)
+     * @return UNormalizationCheckResult
+     * @draft ICU 4.4
+     */
+    virtual UNormalizationCheckResult
+    quickCheck(const UnicodeString &s, UErrorCode &errorCode) const;
+    /**
+     * Returns the end of the normalized substring of the input string.
+     * For details see the Normalizer2 base class documentation.
+     * @param s input string
+     * @param errorCode Standard ICU error code. Its input value must
+     *                  pass the U_SUCCESS() test, or else the function returns
+     *                  immediately. Check for U_FAILURE() on output or use with
+     *                  function chaining. (See User Guide for details.)
+     * @return UNormalizationCheckResult
+     * @draft ICU 4.4
+     */
+    virtual int32_t
+    spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const;
+
+    /**
+     * Tests if the character has a normalization boundary before it.
+     * For details see the Normalizer2 base class documentation.
+     * @param c character to test
+     * @return TRUE if c has a normalization boundary before it
+     * @draft ICU 4.4
+     */
+    virtual UBool hasBoundaryBefore(UChar32 c) const;
+
+    /**
+     * Tests if the character has a normalization boundary after it.
+     * For details see the Normalizer2 base class documentation.
+     * @param c character to test
+     * @return TRUE if c has a normalization boundary after it
+     * @draft ICU 4.4
+     */
+    virtual UBool hasBoundaryAfter(UChar32 c) const;
+
+    /**
+     * Tests if the character is normalization-inert.
+     * For details see the Normalizer2 base class documentation.
+     * @param c character to test
+     * @return TRUE if c is normalization-inert
+     * @draft ICU 4.4
+     */
+    virtual UBool isInert(UChar32 c) const;
+
+    /**
+     * ICU "poor man's RTTI", returns a UClassID for this class.
+     * @returns a UClassID for this class.
+     * @draft ICU 4.4
+     */
+    static UClassID U_EXPORT2 getStaticClassID();
+
+    /**
+     * ICU "poor man's RTTI", returns a UClassID for the actual class.
+     * @return a UClassID for the actual class.
+     * @draft ICU 4.4
+     */
+    virtual UClassID getDynamicClassID() const;
+private:
+    UnicodeString &
+    normalize(const UnicodeString &src,
+              UnicodeString &dest,
+              USetSpanCondition spanCondition,
+              UErrorCode &errorCode) const;
+
+    UnicodeString &
+    normalizeSecondAndAppend(UnicodeString &first,
+                             const UnicodeString &second,
+                             UBool doNormalize,
+                             UErrorCode &errorCode) const;
+
+    const Normalizer2 &norm2;
+    const UnicodeSet &set;
+};
+
+U_NAMESPACE_END
+
+#endif  // !UCONFIG_NO_NORMALIZATION
+#endif  // __NORMALIZER2_H__
--- a/icu4c/source/common/unicode/normlzr.h
+++ b/icu4c/source/common/unicode/normlzr.h
@ -1,7 +1,7 @@
 /*
 ********************************************************************
 * COPYRIGHT:
- * Copyright (c) 1996-2006, International Business Machines Corporation and
+ * Copyright (c) 1996-2010, International Business Machines Corporation and
 * others. All Rights Reserved.
 ********************************************************************
 */
@ -18,14 +18,11 @@
 
 #if !UCONFIG_NO_NORMALIZATION

-#include "unicode/uobject.h"
-#include "unicode/unistr.h"
 #include "unicode/chariter.h"
+#include "unicode/normalizer2.h"
+#include "unicode/unistr.h"
 #include "unicode/unorm.h"
-
-
-struct UCharIterator;
-typedef struct UCharIterator UCharIterator; /**< C typedef for struct UCharIterator. @stable ICU 2.1 */
+#include "unicode/uobject.h"

 U_NAMESPACE_BEGIN
 /**
@ -33,6 +30,10 @@ U_NAMESPACE_BEGIN
 * <a href="http://www.unicode.org/unicode/reports/tr15/" target="unicode">
 * Unicode Standard Annex #15: Unicode Normalization Forms</a>.
 *
+ * Note: This API has been replaced by the Normalizer2 class and is only available
+ * for backward compatibility. This class simply delegates to the Normalizer2 class.
+ * There is one exception: The new API does not provide a replacement for Normalizer::compare().
+ *
 * The Normalizer class consists of two parts:
 * - static functions that normalize strings or test if strings are normalized
 * - a Normalizer object is an iterator that takes any kind of text and
@ -40,13 +41,11 @@ U_NAMESPACE_BEGIN
 *
 * The Normalizer class is not suitable for subclassing.
 *
- * The static functions are basically wrappers around the C implementation,
- * using UnicodeString instead of UChar*.
 * For basic information about normalization forms and details about the C API
 * please see the documentation in unorm.h.
 *
 * The iterator API with the Normalizer constructors and the non-static functions
- * uses a CharacterIterator as input. It is possible to pass a string which
+ * use a CharacterIterator as input. It is possible to pass a string which
 * is then internally wrapped in a CharacterIterator.
 * The input text is not normalized all at once, but incrementally where needed
 * (providing efficient random access).
@ -287,7 +286,7 @@ public:
   * @see isNormalized
   * @stable ICU 2.6
   */
-  static inline UNormalizationCheckResult
+  static UNormalizationCheckResult
  quickCheck(const UnicodeString &source, UNormalizationMode mode, int32_t options, UErrorCode &status);

  /**
@ -328,7 +327,7 @@ public:
   * @see quickCheck
   * @stable ICU 2.6
   */
-  static inline UBool
+  static UBool
  isNormalized(const UnicodeString &src, UNormalizationMode mode, int32_t options, UErrorCode &errorCode);

  /**
@ -726,18 +725,20 @@ private:
  UBool nextNormalize();
  UBool previousNormalize();

-  void    init(CharacterIterator *iter);
+  void    init();
  void    clearBuffer(void);

  //-------------------------------------------------------------------------
  // Private data
  //-------------------------------------------------------------------------

+  FilteredNormalizer2*fFilteredNorm2;  // owned if not NULL
+  const Normalizer2  *fNorm2;  // not owned; may be equal to fFilteredNorm2
  UNormalizationMode  fUMode;
  int32_t             fOptions;

  // The input text and our position in it
-  UCharIterator       *text;
+  CharacterIterator  *text;

  // The normalization buffer is the result of normalization
  // of the source in [currentIndex..nextIndex[ .
@ -746,7 +747,6 @@ private:
  // A buffer for holding intermediate results
  UnicodeString       buffer;
  int32_t         bufferPos;
-
 };

 //-------------------------------------------------------------------------
@ -761,48 +761,14 @@ inline UNormalizationCheckResult
 Normalizer::quickCheck(const UnicodeString& source,
                       UNormalizationMode mode,
                       UErrorCode &status) {
-    if(U_FAILURE(status)) {
-        return UNORM_MAYBE;
-    }
-
-    return unorm_quickCheck(source.getBuffer(), source.length(),
-                            mode, &status);
-}
-
-inline UNormalizationCheckResult
-Normalizer::quickCheck(const UnicodeString& source,
-                       UNormalizationMode mode, int32_t options,
-                       UErrorCode &status) {
-    if(U_FAILURE(status)) {
-        return UNORM_MAYBE;
-    }
-
-    return unorm_quickCheckWithOptions(source.getBuffer(), source.length(),
-                                       mode, options, &status);
+    return quickCheck(source, mode, 0, status);
 }

 inline UBool
 Normalizer::isNormalized(const UnicodeString& source,
                         UNormalizationMode mode,
                         UErrorCode &status) {
-    if(U_FAILURE(status)) {
-        return FALSE;
-    }
-
-    return unorm_isNormalized(source.getBuffer(), source.length(),
-                              mode, &status);
-}
-
-inline UBool
-Normalizer::isNormalized(const UnicodeString& source,
-                         UNormalizationMode mode, int32_t options,
-                         UErrorCode &status) {
-    if(U_FAILURE(status)) {
-        return FALSE;
-    }
-
-    return unorm_isNormalizedWithOptions(source.getBuffer(), source.length(),
-                                         mode, options, &status);
+    return isNormalized(source, mode, 0, status);
 }

 inline int32_t
--- a/icu4c/source/common/unicode/uchar.h
+++ b/icu4c/source/common/unicode/uchar.h
@ -1,6 +1,6 @@
 /*
 **********************************************************************
-*   Copyright (C) 1997-2009, International Business Machines
+*   Copyright (C) 1997-2010, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 **********************************************************************
 *
@ -321,51 +321,29 @@ typedef enum UProperty {
    /** Binary property NFD_Inert.
        ICU-specific property for characters that are inert under NFD,
        i.e., they do not interact with adjacent characters.
-        Used for example in normalizing transforms in incremental mode
-        to find the boundary of safely normalizable text despite possible
-        text additions.
-
-        There is one such property per normalization form.
-        These properties are computed as follows - an inert character is:
-        a) unassigned, or ALL of the following:
-        b) of combining class 0.
-        c) not decomposed by this normalization form.
-        AND if NFC or NFKC,
-        d) can never compose with a previous character.
-        e) can never compose with a following character.
-        f) can never change if another character is added.
-           Example: a-breve might satisfy all but f, but if you
-           add an ogonek it changes to a-ogonek + breve
-
-        See also com.ibm.text.UCD.NFSkippable in the ICU4J repository,
-        and icu/source/common/unormimp.h .
+        See the documentation for the Normalizer2 class and the
+        Normalizer2::isInert() method.
        @stable ICU 3.0 */
    UCHAR_NFD_INERT=37,
    /** Binary property NFKD_Inert.
        ICU-specific property for characters that are inert under NFKD,
        i.e., they do not interact with adjacent characters.
-        Used for example in normalizing transforms in incremental mode
-        to find the boundary of safely normalizable text despite possible
-        text additions.
-        @see UCHAR_NFD_INERT
+        See the documentation for the Normalizer2 class and the
+        Normalizer2::isInert() method.
        @stable ICU 3.0 */
    UCHAR_NFKD_INERT=38,
    /** Binary property NFC_Inert.
        ICU-specific property for characters that are inert under NFC,
        i.e., they do not interact with adjacent characters.
-        Used for example in normalizing transforms in incremental mode
-        to find the boundary of safely normalizable text despite possible
-        text additions.
-        @see UCHAR_NFD_INERT
+        See the documentation for the Normalizer2 class and the
+        Normalizer2::isInert() method.
        @stable ICU 3.0 */
    UCHAR_NFC_INERT=39,
    /** Binary property NFKC_Inert.
        ICU-specific property for characters that are inert under NFKC,
        i.e., they do not interact with adjacent characters.
-        Used for example in normalizing transforms in incremental mode
-        to find the boundary of safely normalizable text despite possible
-        text additions.
-        @see UCHAR_NFD_INERT
+        See the documentation for the Normalizer2 class and the
+        Normalizer2::isInert() method.
        @stable ICU 3.0 */
    UCHAR_NFKC_INERT=40,
    /** Binary Property Segment_Starter.
@ -428,8 +406,10 @@ typedef enum UProperty {
    UCHAR_CHANGES_WHEN_CASEFOLDED=54,
    /** Binary property Changes_When_Casemapped. @draft ICU 4.4 */
    UCHAR_CHANGES_WHEN_CASEMAPPED=55,
+    /** Binary property Changes_When_NFKC_Casefolded. @draft ICU 4.4 */
+    UCHAR_CHANGES_WHEN_NFKC_CASEFOLDED=56,
    /** One more than the last constant for binary Unicode properties. @stable ICU 2.1 */
-    UCHAR_BINARY_LIMIT=56,
+    UCHAR_BINARY_LIMIT=57,

    /** Enumerated property Bidi_Class.
        Same as u_charDirection, returns UCharDirection values. @stable ICU 2.2 */
--- a/icu4c/source/common/unicode/uniset.h
+++ b/icu4c/source/common/unicode/uniset.h
@ -1,6 +1,6 @@
 /*
 ***************************************************************************
-* Copyright (C) 1999-2009, International Business Machines Corporation
+* Copyright (C) 1999-2010, International Business Machines Corporation
 * and others. All Rights Reserved.
 ***************************************************************************
 *   Date        Name        Description
@ -861,6 +861,20 @@ public:
     */
    int32_t span(const UChar *s, int32_t length, USetSpanCondition spanCondition) const;

+    /**
+     * Returns the end of the substring of the input string according to the USetSpanCondition.
+     * Same as <code>start+span(s.getBuffer()+start, s.length()-start, spanCondition)</code>
+     * after pinning start to 0<=start<=s.length().
+     * @param s the string
+     * @param start the start index in the string for the span operation
+     * @param spanCondition specifies the containment condition
+     * @return the exclusive end of the substring according to the spanCondition;
+     *         the substring s.tempSubStringBetween(start, end) fulfills the spanCondition
+     * @draft ICU 4.4
+     * @see USetSpanCondition
+     */
+    inline int32_t span(const UnicodeString &s, int32_t start, USetSpanCondition spanCondition) const;
+
    /**
     * Returns the start of the trailing substring of the input string which
     * consists only of characters and strings that are contained in this set
@ -880,6 +894,21 @@ public:
     */
    int32_t spanBack(const UChar *s, int32_t length, USetSpanCondition spanCondition) const;

+    /**
+     * Returns the start of the substring of the input string according to the USetSpanCondition.
+     * Same as <code>spanBack(s.getBuffer(), limit, spanCondition)</code>
+     * after pinning limit to 0<=end<=s.length().
+     * @param s the string
+     * @param limit the exclusive-end index in the string for the span operation
+     *              (use s.length() or INT32_MAX for spanning back from the end of the string)
+     * @param spanCondition specifies the containment condition
+     * @return the start of the substring according to the spanCondition;
+     *         the substring s.tempSubStringBetween(start, limit) fulfills the spanCondition
+     * @draft ICU 4.4
+     * @see USetSpanCondition
+     */
+    inline int32_t spanBack(const UnicodeString &s, int32_t limit, USetSpanCondition spanCondition) const;
+
    /**
     * Returns the length of the initial substring of the input string which
     * consists only of characters and strings that are contained in this set
@ -1619,6 +1648,26 @@ inline const USet *UnicodeSet::toUSet() const {
    return reinterpret_cast<const USet *>(this);
 }

+inline int32_t UnicodeSet::span(const UnicodeString &s, int32_t start, USetSpanCondition spanCondition) const {
+    int32_t sLength=s.length();
+    if(start<0) {
+        start=0;
+    } else if(start>sLength) {
+        start=sLength;
+    }
+    return start+span(s.getBuffer()+start, sLength-start, spanCondition);
+}
+
+inline int32_t UnicodeSet::spanBack(const UnicodeString &s, int32_t limit, USetSpanCondition spanCondition) const {
+    int32_t sLength=s.length();
+    if(limit<0) {
+        limit=0;
+    } else if(limit>sLength) {
+        limit=sLength;
+    }
+    return spanBack(s.getBuffer(), limit, spanCondition);
+}
+
 U_NAMESPACE_END

 #endif
--- a/icu4c/source/common/unicode/unistr.h
+++ b/icu4c/source/common/unicode/unistr.h
@ -1,6 +1,6 @@
 /*
 **********************************************************************
-*   Copyright (C) 1998-2009, International Business Machines
+*   Copyright (C) 1998-2010, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 **********************************************************************
 *
@ -1566,6 +1566,33 @@ public:

 #endif

+  /**
+   * Create a temporary substring for the specified range.
+   * Unlike the substring constructor and setTo() functions,
+   * the object returned here will be a read-only alias (using getBuffer())
+   * rather than copying the text.
+   * As a result, this substring operation is much faster but requires
+   * that the original string not be modified or deleted during the lifetime
+   * of the returned substring object.
+   * @param start offset of the first character visible in the substring
+   * @param length length of the substring
+   * @return a read-only alias UnicodeString object for the substring
+   * @draft ICU 4.4
+   */
+  UnicodeString tempSubString(int32_t start=0, int32_t length=INT32_MAX) const;
+
+  /**
+   * Create a temporary substring for the specified range.
+   * Same as tempSubString(start, length) except that the substring range
+   * is specified as a (start, limit) pair (with an exclusive limit index)
+   * rather than a (start, length) pair.
+   * @param start offset of the first character visible in the substring
+   * @param limit offset immediately following the last character visible in the substring
+   * @return a read-only alias UnicodeString object for the substring
+   * @draft ICU 4.4
+   */
+  inline UnicodeString tempSubStringBetween(int32_t start, int32_t limit=INT32_MAX) const;
+
  /**
   * Convert the UnicodeString to UTF-8 and write the result
   * to a ByteSink. This is called by toUTF8String().
@ -2396,6 +2423,16 @@ public:
  inline UnicodeString& removeBetween(int32_t start,
                                      int32_t limit = (int32_t)INT32_MAX);

+  /**
+   * Retain only the characters in the range
+   * [<code>start</code>, <code>limit</code>) from the UnicodeString object.
+   * Removes characters before <code>start</code> and at and after <code>limit</code>.
+   * @param start the offset of the first character to retain
+   * @param limit the offset immediately following the range to retain
+   * @return a reference to this
+   * @draft ICU 4.4
+   */
+  inline UnicodeString &retainBetween(int32_t start, int32_t limit = INT32_MAX);

  /* Length operations */

@ -4068,6 +4105,11 @@ UnicodeString::extractBetween(int32_t start,
  doExtract(start, limit - start, dst, dstStart);
 }

+inline UnicodeString
+UnicodeString::tempSubStringBetween(int32_t start, int32_t limit) const {
+    return tempSubString(start, limit - start);
+}
+
 inline UChar
 UnicodeString::doCharAt(int32_t offset) const
 {
@ -4161,7 +4203,13 @@ UnicodeString::getTerminatedBuffer() {
  } else {
    UChar *array = getArrayStart();
    int32_t len = length();
-    if(len < getCapacity()) {
+    if(len < getCapacity() && ((fFlags&kRefCounted) == 0 || refCount() == 1)) {
+      /*
+       * kRefCounted: Do not write the NUL if the buffer is shared.
+       * That is mostly safe, except when the length of one copy was modified
+       * without copy-on-write, e.g., via truncate(newLength) or remove(void).
+       * Then the NUL would be written into the middle of another copy's string.
+       */
      if(!(fFlags&kBufferIsReadonly)) {
        /*
         * We must not write to a readonly buffer, but it is known to be
@ -4332,10 +4380,12 @@ inline UnicodeString&
 UnicodeString::remove()
 {
  // remove() of a bogus string makes the string empty and non-bogus
-  if(isBogus()) {
-    unBogus();
+  // we also un-alias a read-only alias to deal with NUL-termination
+  // issues with getTerminatedBuffer()
+  if(fFlags & (kIsBogus|kBufferIsReadonly)) {
+    setToEmpty();
  } else {
-    setLength(0);
+    fShortLength = 0;
  }
  return *this;
 }
@ -4356,6 +4406,12 @@ UnicodeString::removeBetween(int32_t start,
                int32_t limit)
 { return doReplace(start, limit - start, NULL, 0, 0); }

+inline UnicodeString &
+UnicodeString::retainBetween(int32_t start, int32_t limit) {
+  truncate(limit);
+  return doReplace(0, start, NULL, 0, 0);
+}
+
 inline UBool
 UnicodeString::truncate(int32_t targetLength)
 {
@ -4365,6 +4421,9 @@ UnicodeString::truncate(int32_t targetLength)
    return FALSE;
  } else if((uint32_t)targetLength < (uint32_t)length()) {
    setLength(targetLength);
+    if(fFlags&kBufferIsReadonly) {
+      fUnion.fFields.fCapacity = targetLength;  // not NUL-terminated any more
+    }
    return TRUE;
  } else {
    return FALSE;
--- a/icu4c/source/common/unicode/unorm.h
+++ b/icu4c/source/common/unicode/unorm.h
@ -1,6 +1,6 @@
 /*
 *******************************************************************************
-* Copyright (c) 1996-2007, International Business Machines Corporation
+* Copyright (c) 1996-2010, International Business Machines Corporation
 *               and others. All Rights Reserved.
 *******************************************************************************
 * File unorm.h
@ -20,6 +20,7 @@
 #if !UCONFIG_NO_NORMALIZATION

 #include "unicode/uiter.h"
+#include "unicode/unorm2.h"

 /**
 * \file
@ -27,6 +28,11 @@
 *
 * <h2>Unicode normalization API</h2>
 *
+ * Note: This API has been replaced by the unorm2.h API and is only available
+ * for backward compatibility. The functions here simply delegate to the
+ * unorm2.h functions, for example unorm2_getInstance() and unorm2_normalize().
+ * There is one exception: The new API does not provide a replacement for unorm_compare().
+ *
 * <code>unorm_normalize</code> transforms Unicode text into an equivalent composed or
 * decomposed form, allowing for easier sorting and searching of text.
 * <code>unorm_normalize</code> supports the standard normalization forms described in
@ -202,28 +208,7 @@ unorm_normalize(const UChar *source, int32_t sourceLength,
                UNormalizationMode mode, int32_t options,
                UChar *result, int32_t resultLength,
                UErrorCode *status);
-#endif
-/**
- * Result values for unorm_quickCheck().
- * For details see Unicode Technical Report 15.
- * @stable ICU 2.0
- */
-typedef enum UNormalizationCheckResult {
-  /** 
-   * Indicates that string is not in the normalized format
-   */
-  UNORM_NO,
-  /** 
-   * Indicates that string is in the normalized format
-   */
-  UNORM_YES,
-  /** 
-   * Indicates that string cannot be determined if it is in the normalized 
-   * format without further thorough checks.
-   */
-  UNORM_MAYBE
-} UNormalizationCheckResult;
-#if !UCONFIG_NO_NORMALIZATION
+
 /**
 * Performing quick check on a string, to quickly determine if the string is 
 * in a particular normalization format.
--- a/icu4c/source/common/unicode/unorm2.h
+++ b/icu4c/source/common/unicode/unorm2.h
@ -0,0 +1,348 @@
+/*
+*******************************************************************************
+*
+*   Copyright (C) 2009-2010, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+*******************************************************************************
+*   file name:  unorm2.h
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2009dec15
+*   created by: Markus W. Scherer
+*/
+
+#ifndef __UNORM2_H__
+#define __UNORM2_H__
+
+/**
+ * \file
+ * \brief C API: New API for Unicode Normalization.
+ *
+ * Unicode normalization functionality for standard Unicode normalization or
+ * for using custom mapping tables.
+ * All instances of UNormalizer2 are unmodifiable/immutable.
+ * Instances returned by unorm2_getInstance() are singletons that must not be deleted by the caller.
+ * For more details see the Normalizer2 C++ class.
+ */
+
+#include "unicode/utypes.h"
+#include "unicode/uset.h"
+
+/**
+ * Constants for normalization modes.
+ * For details about standard Unicode normalization forms
+ * and about the algorithms which are also used with custom mapping tables
+ * see http://www.unicode.org/unicode/reports/tr15/
+ * @draft ICU 4.4
+ */
+typedef enum {
+    /**
+     * Decomposition followed by composition.
+     * Same as standard NFC when using an "nfc" instance.
+     * Same as standard NFKC when using an "nfkc" instance.
+     * For details about standard Unicode normalization forms
+     * see http://www.unicode.org/unicode/reports/tr15/
+     * @draft ICU 4.4
+     */
+    UNORM2_COMPOSE,
+    /**
+     * Map, and reorder canonically.
+     * Same as standard NFD when using an "nfc" instance.
+     * Same as standard NFKD when using an "nfkc" instance.
+     * For details about standard Unicode normalization forms
+     * see http://www.unicode.org/unicode/reports/tr15/
+     * @draft ICU 4.4
+     */
+    UNORM2_DECOMPOSE,
+    /**
+     * "Fast C or D" form.
+     * Further decomposition <i>without reordering</i>
+     * would yield the same form as DECOMPOSE.
+     * Text in "Fast C or D" form can be processed efficiently with data tables
+     * that are "canonically closed", that is, that provide equivalent data for
+     * equivalent text, without having to be fully normalized.
+     * Not a standard Unicode normalization form.
+     * Not a unique form: Different FCD strings can be canonically equivalent.
+     * For details see http://www.unicode.org/notes/tn5/#FCD
+     * @draft ICU 4.4
+     */
+    UNORM2_FCD,
+    /**
+     * Compose only contiguously.
+     * Also known as "FCC" or "Fast C Contiguous".
+     * The result will often but not always be in NFC.
+     * The result will conform to FCD which is useful for processing.
+     * Not a standard Unicode normalization form.
+     * For details see http://www.unicode.org/notes/tn5/#FCC
+     * @draft ICU 4.4
+     */
+    UNORM2_COMPOSE_CONTIGUOUS
+} UNormalization2Mode;
+
+/**
+ * Result values for normalization quick check functions.
+ * For details see http://www.unicode.org/reports/tr15/#Detecting_Normalization_Forms
+ * @stable ICU 2.0
+ */
+typedef enum UNormalizationCheckResult {
+  /** 
+   * The input string is not in the normalization form.
+   * @stable ICU 2.0
+   */
+  UNORM_NO,
+  /** 
+   * The input string is in the normalization form.
+   * @stable ICU 2.0
+   */
+  UNORM_YES,
+  /** 
+   * The input string may or may not be in the normalization form.
+   * This value is only returned for composition forms like NFC and FCC,
+   * when a backward-combining character is found for which the surrounding text
+   * would have to be analyzed further.
+   * @stable ICU 2.0
+   */
+  UNORM_MAYBE
+} UNormalizationCheckResult;
+
+/**
+ * Opaque C service object type for the new normalization API.
+ * @draft ICU 4.4
+ */
+struct UNormalizer2;
+typedef struct UNormalizer2 UNormalizer2;  /**< C typedef for struct UNormalizer2. @draft ICU 4.4 */
+
+#if !UCONFIG_NO_NORMALIZATION
+
+/**
+ * Returns a UNormalizer2 instance which uses the specified data file
+ * (packageName/name similar to ucnv_openPackage() and ures_open()/ResourceBundle)
+ * and which composes or decomposes text according to the specified mode.
+ * Returns an unmodifiable singleton instance. Do not delete it.
+ *
+ * Use packageName=NULL for data files that are part of ICU's own data.
+ * Use name="nfc" and UNORM2_COMPOSE/UNORM2_DECOMPOSE for Unicode standard NFC/NFD.
+ * Use name="nfkc" and UNORM2_COMPOSE/UNORM2_DECOMPOSE for Unicode standard NFKC/NFKD.
+ * Use name="nfkc_cf" and UNORM2_COMPOSE for Unicode standard NFKC_CF=NFKC_Casefold.
+ *
+ * @param packageName NULL for ICU built-in data, otherwise application data package name
+ * @param name "nfc" or "nfkc" or "nfkc_cf" or name of custom data file
+ * @param mode normalization mode (compose or decompose etc.)
+ * @param pErrorCode Standard ICU error code. Its input value must
+ *                  pass the U_SUCCESS() test, or else the function returns
+ *                  immediately. Check for U_FAILURE() on output or use with
+ *                  function chaining. (See User Guide for details.)
+ * @return the requested UNormalizer2, if successful
+ * @draft ICU 4.4
+ */
+U_DRAFT const UNormalizer2 * U_EXPORT2
+unorm2_getInstance(const char *packageName,
+                   const char *name,
+                   UNormalization2Mode mode,
+                   UErrorCode *pErrorCode);
+
+/**
+ * Constructs a filtered normalizer wrapping any UNormalizer2 instance
+ * and a filter set.
+ * Both are aliased and must not be modified or deleted while this object
+ * is used.
+ * The filter set should be frozen; otherwise the performance will suffer greatly.
+ * @param norm2 wrapped Normalizer2 instance
+ * @param filterSet USet which determines the characters to be normalized
+ * @param pErrorCode Standard ICU error code. Its input value must
+ *                   pass the U_SUCCESS() test, or else the function returns
+ *                   immediately. Check for U_FAILURE() on output or use with
+ *                   function chaining. (See User Guide for details.)
+ * @return the requested UNormalizer2, if successful
+ * @draft ICU 4.4
+ */
+U_DRAFT UNormalizer2 * U_EXPORT2
+unorm2_openFiltered(const UNormalizer2 *norm2, const USet *filterSet, UErrorCode *pErrorCode);
+
+/**
+ * Closes a UNormalizer2 instance from unorm2_openFiltered().
+ * Do not close instances from unorm2_getInstance()!
+ * @param norm2 UNormalizer2 instance to be closed
+ * @draft ICU 4.4
+ */
+U_DRAFT void U_EXPORT2
+unorm2_close(UNormalizer2 *norm2);
+
+/**
+ * Writes the normalized form of the source string to the destination string
+ * (replacing its contents) and returns the length of the destination string.
+ * The source and destination strings must be different buffers.
+ * @param norm2 UNormalizer2 instance
+ * @param src source string
+ * @param length length of the source string, or -1 if NUL-terminated
+ * @param dest destination string; its contents is replaced with normalized src
+ * @param capacity number of UChars that can be written to dest
+ * @param pErrorCode Standard ICU error code. Its input value must
+ *                   pass the U_SUCCESS() test, or else the function returns
+ *                   immediately. Check for U_FAILURE() on output or use with
+ *                   function chaining. (See User Guide for details.)
+ * @return dest
+ * @draft ICU 4.4
+ */
+U_DRAFT int32_t U_EXPORT2
+unorm2_normalize(const UNormalizer2 *norm2,
+                 const UChar *src, int32_t length,
+                 UChar *dest, int32_t capacity,
+                 UErrorCode *pErrorCode);
+/**
+ * Appends the normalized form of the second string to the first string
+ * (merging them at the boundary) and returns the length of the first string.
+ * The result is normalized if the first string was normalized.
+ * The first and second strings must be different buffers.
+ * @param norm2 UNormalizer2 instance
+ * @param first string, should be normalized
+ * @param firstLength length of the first string, or -1 if NUL-terminated
+ * @param firstCapacity number of UChars that can be written to first
+ * @param second string, will be normalized
+ * @param secondLength length of the source string, or -1 if NUL-terminated
+ * @param pErrorCode Standard ICU error code. Its input value must
+ *                   pass the U_SUCCESS() test, or else the function returns
+ *                   immediately. Check for U_FAILURE() on output or use with
+ *                   function chaining. (See User Guide for details.)
+ * @return first
+ * @draft ICU 4.4
+ */
+U_DRAFT int32_t U_EXPORT2
+unorm2_normalizeSecondAndAppend(const UNormalizer2 *norm2,
+                                UChar *first, int32_t firstLength, int32_t firstCapacity,
+                                const UChar *second, int32_t secondLength,
+                                UErrorCode *pErrorCode);
+/**
+ * Appends the second string to the first string
+ * (merging them at the boundary) and returns the length of the first string.
+ * The result is normalized if both the strings were normalized.
+ * The first and second strings must be different buffers.
+ * @param norm2 UNormalizer2 instance
+ * @param first string, should be normalized
+ * @param firstLength length of the first string, or -1 if NUL-terminated
+ * @param firstCapacity number of UChars that can be written to first
+ * @param second string, should be normalized
+ * @param secondLength length of the source string, or -1 if NUL-terminated
+ * @param pErrorCode Standard ICU error code. Its input value must
+ *                   pass the U_SUCCESS() test, or else the function returns
+ *                   immediately. Check for U_FAILURE() on output or use with
+ *                   function chaining. (See User Guide for details.)
+ * @return first
+ * @draft ICU 4.4
+ */
+U_DRAFT int32_t U_EXPORT2
+unorm2_append(const UNormalizer2 *norm2,
+              UChar *first, int32_t firstLength, int32_t firstCapacity,
+              const UChar *second, int32_t secondLength,
+              UErrorCode *pErrorCode);
+
+/**
+ * Tests if the string is normalized.
+ * Internally, in cases where the quickCheck() method would return "maybe"
+ * (which is only possible for the two COMPOSE modes) this method
+ * resolves to "yes" or "no" to provide a definitive result,
+ * at the cost of doing more work in those cases.
+ * @param norm2 UNormalizer2 instance
+ * @param s input string
+ * @param length length of the string, or -1 if NUL-terminated
+ * @param pErrorCode Standard ICU error code. Its input value must
+ *                   pass the U_SUCCESS() test, or else the function returns
+ *                   immediately. Check for U_FAILURE() on output or use with
+ *                   function chaining. (See User Guide for details.)
+ * @return TRUE if s is normalized
+ * @draft ICU 4.4
+ */
+U_DRAFT UBool U_EXPORT2
+unorm2_isNormalized(const UNormalizer2 *norm2,
+                    const UChar *s, int32_t length,
+                    UErrorCode *pErrorCode);
+
+/**
+ * Tests if the string is normalized.
+ * For the two COMPOSE modes, the result could be "maybe" in cases that
+ * would take a little more work to resolve definitively.
+ * Use spanQuickCheckYes() and normalizeSecondAndAppend() for a faster
+ * combination of quick check + normalization, to avoid
+ * re-checking the "yes" prefix.
+ * @param norm2 UNormalizer2 instance
+ * @param s input string
+ * @param length length of the string, or -1 if NUL-terminated
+ * @param pErrorCode Standard ICU error code. Its input value must
+ *                   pass the U_SUCCESS() test, or else the function returns
+ *                   immediately. Check for U_FAILURE() on output or use with
+ *                   function chaining. (See User Guide for details.)
+ * @return UNormalizationCheckResult
+ * @draft ICU 4.4
+ */
+U_DRAFT UNormalizationCheckResult U_EXPORT2
+unorm2_quickCheck(const UNormalizer2 *norm2,
+                  const UChar *s, int32_t length,
+                  UErrorCode *pErrorCode);
+
+/**
+ * Returns the end of the normalized substring of the input string.
+ * In other words, with <code>end=spanQuickCheckYes(s, ec);</code>
+ * the substring <code>UnicodeString(s, 0, end)</code>
+ * will pass the quick check with a "yes" result.
+ *
+ * The returned end index is usually one or more characters before the
+ * "no" or "maybe" character: The end index is at a normalization boundary.
+ * (See the class documentation for more about normalization boundaries.)
+ *
+ * When the goal is a normalized string and most input strings are expected
+ * to be normalized already, then call this method,
+ * and if it returns a prefix shorter than the input string,
+ * copy that prefix and use normalizeSecondAndAppend() for the remainder.
+ * @param norm2 UNormalizer2 instance
+ * @param s input string
+ * @param length length of the string, or -1 if NUL-terminated
+ * @param pErrorCode Standard ICU error code. Its input value must
+ *                   pass the U_SUCCESS() test, or else the function returns
+ *                   immediately. Check for U_FAILURE() on output or use with
+ *                   function chaining. (See User Guide for details.)
+ * @return UNormalizationCheckResult
+ * @draft ICU 4.4
+ */
+U_DRAFT int32_t U_EXPORT2
+unorm2_spanQuickCheckYes(const UNormalizer2 *norm2,
+                         const UChar *s, int32_t length,
+                         UErrorCode *pErrorCode);
+
+/**
+ * Tests if the character has a normalization boundary before it.
+ * For details see the Normalizer2 base class documentation.
+ * @param norm2 UNormalizer2 instance
+ * @param c character to test
+ * @return TRUE if c has a normalization boundary before it
+ * @draft ICU 4.4
+ */
+U_DRAFT UBool U_EXPORT2
+unorm2_hasBoundaryBefore(const UNormalizer2 *norm2, UChar32 c);
+
+/**
+ * Tests if the character has a normalization boundary after it.
+ * For details see the Normalizer2 base class documentation.
+ * @param norm2 UNormalizer2 instance
+ * @param c character to test
+ * @return TRUE if c has a normalization boundary after it
+ * @draft ICU 4.4
+ */
+U_DRAFT UBool U_EXPORT2
+unorm2_hasBoundaryAfter(const UNormalizer2 *norm2, UChar32 c);
+
+/**
+ * Tests if the character is normalization-inert.
+ * For details see the Normalizer2 base class documentation.
+ * @param norm2 UNormalizer2 instance
+ * @param c character to test
+ * @return TRUE if c is normalization-inert
+ * @draft ICU 4.4
+ */
+U_DRAFT UBool U_EXPORT2
+unorm2_isInert(const UNormalizer2 *norm2, UChar32 c);
+
+#endif  /* !UCONFIG_NO_NORMALIZATION */
+#endif  /* __UNORM2_H__ */
--- a/icu4c/source/common/uniset_props.cpp
+++ b/icu4c/source/common/uniset_props.cpp
@ -1,7 +1,7 @@
 /*
 *******************************************************************************
 *
-*   Copyright (C) 1999-2009, International Business Machines
+*   Copyright (C) 1999-2010, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 *
 *******************************************************************************
@ -33,12 +33,15 @@
 #include "uvector.h"
 #include "uprops.h"
 #include "propname.h"
+#include "normalizer2impl.h"
 #include "unormimp.h"
 #include "ucase.h"
 #include "ubidi_props.h"
 #include "uinvchar.h"
+#include "uprops.h"
 #include "charstr.h"
 #include "cstring.h"
+#include "mutex.h"
 #include "umutex.h"
 #include "uassert.h"
 #include "hash.h"
@ -91,10 +94,43 @@ static const char ASSIGNED[] = "Assigned"; // [:^Cn:]
 */
 //static const UChar CATEGORY_CLOSE[] = {COLON, SET_CLOSE, 0x0000}; /* ":]" */

+// Cached sets ------------------------------------------------------------- ***
+
+U_CDECL_BEGIN
+static UBool U_CALLCONV uset_cleanup();
+U_CDECL_END
+
+// Not a TriStateSingletonWrapper because we think the UnicodeSet constructor
+// can only fail with an out-of-memory error
+// if we have a correct pattern and the properties data is hardcoded and always available.
+class UnicodeSetSingleton : public SimpleSingletonWrapper<UnicodeSet> {
+public:
+    UnicodeSetSingleton(SimpleSingleton &s, const char *pattern) :
+            SimpleSingletonWrapper<UnicodeSet>(s), fPattern(pattern) {}
+    UnicodeSet *getInstance(UErrorCode &errorCode) {
+        return SimpleSingletonWrapper<UnicodeSet>::getInstance(createInstance, fPattern, errorCode);
+    }
+private:
+    static void *createInstance(const void *context, UErrorCode &errorCode) {
+        UnicodeString pattern((const char *)context, -1, US_INV);
+        UnicodeSet *set=new UnicodeSet(pattern, errorCode);
+        if(set==NULL) {
+            errorCode=U_MEMORY_ALLOCATION_ERROR;
+        }
+        set->freeze();
+        ucln_common_registerCleanup(UCLN_COMMON_USET, uset_cleanup);
+        return set;
+    }
+
+    const char *fPattern;
+};
+
 U_CDECL_BEGIN

 static UnicodeSet *INCLUSIONS[UPROPS_SRC_COUNT] = { NULL }; // cached getInclusions()

+STATIC_SIMPLE_SINGLETON(uni32Singleton);
+
 //----------------------------------------------------------------
 // Inclusions list
 //----------------------------------------------------------------
@ -128,7 +164,7 @@ static UBool U_CALLCONV uset_cleanup(void) {
            INCLUSIONS[i] = NULL;
        }
    }
-
+    UnicodeSetSingleton(uni32Singleton, NULL).deleteInstance();
    return TRUE;
 }

@ -177,6 +213,27 @@ const UnicodeSet* UnicodeSet::getInclusions(int32_t src, UErrorCode &status) {
                ucase_addPropertyStarts(ucase_getSingleton(&status), &sa, &status);
                unorm_addPropertyStarts(&sa, &status);
                break;
+            case UPROPS_SRC_NFC: {
+                const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(status);
+                if(U_SUCCESS(status)) {
+                    impl->addPropertyStarts(&sa, status);
+                }
+                break;
+            }
+            case UPROPS_SRC_NFKC: {
+                const Normalizer2Impl *impl=Normalizer2Factory::getNFKCImpl(status);
+                if(U_SUCCESS(status)) {
+                    impl->addPropertyStarts(&sa, status);
+                }
+                break;
+            }
+            case UPROPS_SRC_NFKC_CF: {
+                const Normalizer2Impl *impl=Normalizer2Factory::getNFKC_CFImpl(status);
+                if(U_SUCCESS(status)) {
+                    impl->addPropertyStarts(&sa, status);
+                }
+                break;
+            }
 #endif
            case UPROPS_SRC_CASE:
                ucase_addPropertyStarts(ucase_getSingleton(&status), &sa, &status);
@ -207,6 +264,13 @@ const UnicodeSet* UnicodeSet::getInclusions(int32_t src, UErrorCode &status) {
    return INCLUSIONS[src];
 }

+// Cache some sets for other services -------------------------------------- ***
+
+U_CFUNC UnicodeSet *
+uniset_getUnicode32Instance(UErrorCode &errorCode) {
+    return UnicodeSetSingleton(uni32Singleton, "[:age=3.2:]").getInstance(errorCode);
+}
+
 // helper functions for matching of pattern syntax pieces ------------------ ***
 // these functions are parallel to the PERL_OPEN etc. strings above

--- a/icu4c/source/common/unistr.cpp
+++ b/icu4c/source/common/unistr.cpp
@ -1,6 +1,6 @@
 /*
 ******************************************************************************
-* Copyright (C) 1999-2009, International Business Machines Corporation and   *
+* Copyright (C) 1999-2010, International Business Machines Corporation and   *
 * others. All Rights Reserved.                                               *
 ******************************************************************************
 *
@ -780,6 +780,17 @@ UnicodeString::extract(int32_t start,
  return u_terminateChars(target, targetCapacity, length, &status);
 }

+UnicodeString
+UnicodeString::tempSubString(int32_t start, int32_t len) const {
+  pinIndices(start, len);
+  const UChar *array = getBuffer();  // not getArrayStart() to check kIsBogus & kOpenGetBuffer
+  if(array==NULL) {
+    array=fUnion.fStackBuffer;  // anything not NULL because that would make an empty string
+    len=-2;  // bogus result string
+  }
+  return UnicodeString(FALSE, array + start, len);
+}
+
 int32_t
 UnicodeString::toUTF8(int32_t start, int32_t len,
                      char *target, int32_t capacity) const {
@ -1218,6 +1229,28 @@ UnicodeString::doReplace(int32_t start,
    return *this;
  }

+  int32_t oldLength = this->length();
+
+  // optimize (read-only alias).remove(0, start) and .remove(start, end)
+  if((fFlags&kBufferIsReadonly) && srcLength == 0) {
+    if(start == 0) {
+      // remove prefix by adjusting the array pointer
+      pinIndex(length);
+      fUnion.fFields.fArray += length;
+      fUnion.fFields.fCapacity -= length;
+      setLength(oldLength - length);
+      return *this;
+    } else {
+      pinIndex(start);
+      if(length >= (oldLength - start)) {
+        // remove suffix by reducing the length (like truncate())
+        setLength(start);
+        fUnion.fFields.fCapacity = start;  // not NUL-terminated any more
+        return *this;
+      }
+    }
+  }
+
  if(srcChars == 0) {
    srcStart = srcLength = 0;
  } else if(srcLength < 0) {
@ -1225,8 +1258,6 @@ UnicodeString::doReplace(int32_t start,
    srcLength = u_strlen(srcChars + srcStart);
  }

-  int32_t oldLength = this->length();
-
  // calculate the size of the string after the replace
  int32_t newSize;

@ -1594,4 +1625,3 @@ static void uprv_UnicodeStringDummy(void) {
    delete [] (new UnicodeString[2]);
 }
 #endif
-
--- a/icu4c/source/common/unorm.cpp
+++ b/icu4c/source/common/unorm.cpp
--- a/icu4c/source/common/unorm_props_data.c
+++ b/icu4c/source/common/unorm_props_data.c
@ -1,5 +1,5 @@
 /*
- * Copyright (C) 1999-2009, International Business Machines
+ * Copyright (C) 1999-2010, International Business Machines
 * Corporation and others.  All Rights Reserved.
 *
 * file name: unorm_props_data.c
@ -14,6 +14,7 @@ static const int32_t indexes[_NORM_INDEX_TOP]={
 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
 };

+#if 0
 static const uint16_t normTrie_index[2532]={
 0,8,0x10,0x18,0x28,0x30,0x38,0x40,0x48,0x50,0x58,0x60,0x68,0x70,0x77,0x7f,
 0x87,0x8f,0x1f,0x27,0x94,0x9c,0xa3,0xab,0xb3,0xbb,0xc3,0xcb,0xd3,0xdb,0xe3,0xeb,
@ -835,6 +836,7 @@ static const UTrie2 normTrie={
    0x2810,
    NULL, 0, FALSE, FALSE, 0, NULL
 };
+#endif

 static const uint16_t extraData[16431]={
 0x1c2,0xff02,0x20,0x3b9,0xff01,0x3c5,0xff01,0x3cd,0xff01,0x3cb,0xff01,0x3c3,0xff01,0x61,0xff01,0xe6,
@ -1866,6 +1868,7 @@ static const uint16_t extraData[16431]={
 0x773,0x776,0x77c,0x782,0x788,0x78e,0x794,0x797,0x79a,0x79d,0x7a0,0x7a3,0x7a6,0x7a9,0x7ac
 };

+#if 0
 static const uint16_t combiningTable[1967]={
 0x7af,0xc0,0x7b0,0xc1,0x7b1,0x20c2,0x7b2,0xc3,0x7b3,0x20c4,0x7b4,0x20c5,0x7b6,0x100,0x7b7,0x2102,
 0x7b8,0x104,0x7b9,0x2226,0x7ba,0x1cd,0x7bd,0x200,0x7be,0x202,0x7d6,0x1e00,0x7d7,0x3ea0,0x87dd,0x1ea2,
@ -2416,6 +2419,7 @@ static const UTrie2 fcdTrie={
    0x1968,
    NULL, 0, FALSE, FALSE, 0, NULL
 };
+#endif

 static const uint16_t auxTrie_index[6664]={
 0x278,0x280,0x288,0x290,0x278,0x280,0x2a8,0x2b0,0x2b8,0x2c0,0x2c8,0x2d0,0x278,0x280,0x2d8,0x2e0,
--- a/icu4c/source/common/unormcmp.cpp
+++ b/icu4c/source/common/unormcmp.cpp
@ -1,7 +1,7 @@
 /*
 *******************************************************************************
 *
-*   Copyright (C) 2001-2009, International Business Machines
+*   Copyright (C) 2001-2010, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 *
 *******************************************************************************
@ -22,12 +22,13 @@

 #if !UCONFIG_NO_NORMALIZATION

-#include "unicode/ustring.h"
 #include "unicode/unorm.h"
-#include "unicode/uniset.h"
-#include "unormimp.h"
-#include "ucase.h"
+#include "unicode/ustring.h"
 #include "cmemory.h"
+#include "normalizer2impl.h"
+#include "ucase.h"
+#include "uprops.h"
+#include "ustr_imp.h"

 U_NAMESPACE_USE

@ -134,12 +135,19 @@ struct CmpEquivLevel {
 };
 typedef struct CmpEquivLevel CmpEquivLevel;

+/**
+ * Internal option for unorm_cmpEquivFold() for decomposing.
+ * If not set, just do strcasecmp().
+ */
+#define _COMPARE_EQUIV 0x80000
+
 /* internal function */
 static int32_t
 unorm_cmpEquivFold(const UChar *s1, int32_t length1,
                   const UChar *s2, int32_t length2,
                   uint32_t options,
                   UErrorCode *pErrorCode) {
+    const Normalizer2Impl *nfcImpl;
    const UCaseProps *csp;

    /* current-level start/limit - s1/s2 as current */
@ -152,7 +160,7 @@ unorm_cmpEquivFold(const UChar *s1, int32_t length1,
    /* stacks of previous-level start/current/limit */
    CmpEquivLevel stack1[2], stack2[2];

-    /* decomposition buffers for Hangul */
+    /* buffers for algorithmic decompositions */
    UChar decomp1[4], decomp2[4];

    /* case folding buffers, only use current-level start/limit */
@ -173,19 +181,19 @@ unorm_cmpEquivFold(const UChar *s1, int32_t length1,
     */

    /* normalization/properties data loaded? */
-    if( ((options&_COMPARE_EQUIV)!=0 && !unorm_haveData(pErrorCode)) ||
-        U_FAILURE(*pErrorCode)
-    ) {
-        return 0;
+    if((options&_COMPARE_EQUIV)!=0) {
+        nfcImpl=Normalizer2Factory::getNFCImpl(*pErrorCode);
+    } else {
+        nfcImpl=NULL;
    }
    if((options&U_COMPARE_IGNORE_CASE)!=0) {
        csp=ucase_getSingleton(pErrorCode);
-        if(U_FAILURE(*pErrorCode)) {
-            return 0;
-        }
    } else {
        csp=NULL;
    }
+    if(U_FAILURE(*pErrorCode)) {
+        return 0;
+    }

    /* initialize */
    start1=s1;
@ -404,7 +412,7 @@ unorm_cmpEquivFold(const UChar *s1, int32_t length1,
        }

        if( level1<2 && (options&_COMPARE_EQUIV) &&
-            0!=(p=unorm_getCanonicalDecomposition((UChar32)cp1, decomp1, &length))
+            0!=(p=nfcImpl->getDecomposition((UChar32)cp1, decomp1, length))
        ) {
            /* cp1 decomposes into p[length] */
            if(U_IS_SURROGATE(c1)) {
@ -445,7 +453,7 @@ unorm_cmpEquivFold(const UChar *s1, int32_t length1,
        }

        if( level2<2 && (options&_COMPARE_EQUIV) &&
-            0!=(p=unorm_getCanonicalDecomposition((UChar32)cp2, decomp2, &length))
+            0!=(p=nfcImpl->getDecomposition((UChar32)cp2, decomp2, length))
        ) {
            /* cp2 decomposes into p[length] */
            if(U_IS_SURROGATE(c2)) {
@ -534,14 +542,8 @@ unorm_compare(const UChar *s1, int32_t length1,
              const UChar *s2, int32_t length2,
              uint32_t options,
              UErrorCode *pErrorCode) {
-    MaybeStackArray<UChar, 300> fcd1, fcd2;
-    const UnicodeSet *nx;
-    UNormalizationMode mode;
-    int32_t normOptions;
-    int32_t result;
-
    /* argument checking */
-    if(pErrorCode==0 || U_FAILURE(*pErrorCode)) {
+    if(U_FAILURE(*pErrorCode)) {
        return 0;
    }
    if(s1==0 || length1<-1 || s2==0 || length2<-1) {
@ -549,21 +551,9 @@ unorm_compare(const UChar *s1, int32_t length1,
        return 0;
    }

-    if(!unorm_haveData(pErrorCode)) {
-        return 0;
-    }
-    if(!uprv_haveProperties(pErrorCode)) {
-        return 0;
-    }
-
-    normOptions=(int32_t)(options>>UNORM_COMPARE_NORM_OPTIONS_SHIFT);
-    nx=unorm_getNX(normOptions, pErrorCode);
-    if(U_FAILURE(*pErrorCode)) {
-        return 0;
-    }
-
+    UnicodeString fcd1, fcd2;
+    int32_t normOptions=(int32_t)(options>>UNORM_COMPARE_NORM_OPTIONS_SHIFT);
    options|=_COMPARE_EQUIV;
-    result=0;

    /*
     * UAX #21 Case Mappings, as fixed for Unicode version 4
@ -586,20 +576,30 @@ unorm_compare(const UChar *s1, int32_t length1,
     * are first decomposed or not, so an FCD check - a check only for
     * canonical order - is not sufficient.
     */
-    if(options&U_FOLD_CASE_EXCLUDE_SPECIAL_I) {
-        mode=UNORM_NFD;
-        options&=~UNORM_INPUT_IS_FCD;
-    } else {
-        mode=UNORM_FCD;
-    }
-
-    if(!(options&UNORM_INPUT_IS_FCD)) {
-        int32_t _len1, _len2;
-        UBool isFCD1, isFCD2;
+    if(!(options&UNORM_INPUT_IS_FCD) || (options&U_FOLD_CASE_EXCLUDE_SPECIAL_I)) {
+        const Normalizer2 *n2;
+        if(options&U_FOLD_CASE_EXCLUDE_SPECIAL_I) {
+            n2=Normalizer2Factory::getNFDInstance(*pErrorCode);
+        } else {
+            n2=Normalizer2Factory::getFCDInstance(*pErrorCode);
+        }

        // check if s1 and/or s2 fulfill the FCD conditions
-        isFCD1= UNORM_YES==unorm_internalQuickCheck(s1, length1, mode, TRUE, nx, pErrorCode);
-        isFCD2= UNORM_YES==unorm_internalQuickCheck(s2, length2, mode, TRUE, nx, pErrorCode);
+        const UnicodeSet *uni32;
+        if(normOptions&UNORM_UNICODE_3_2) {
+            uni32=uniset_getUnicode32Instance(*pErrorCode);
+        } else {
+            uni32=NULL;  // unused
+        }
+        FilteredNormalizer2 fn2(*n2, *uni32);
+        if(normOptions&UNORM_UNICODE_3_2) {
+            n2=&fn2;
+        }
+
+        UnicodeString str1(length1<0, s1, length1);
+        UnicodeString str2(length2<0, s2, length2);
+        int32_t spanQCYes1=n2->spanQuickCheckYes(str1, *pErrorCode);
+        int32_t spanQCYes2=n2->spanQuickCheckYes(str2, *pErrorCode);
        if(U_FAILURE(*pErrorCode)) {
            return 0;
        }
@ -613,59 +613,27 @@ unorm_compare(const UChar *s1, int32_t length1,
         * Therefore, ICU 2.6 removes that optimization.
         */

-        if(!isFCD1) {
-            _len1=unorm_internalNormalizeWithNX(fcd1.getAlias(), fcd1.getCapacity(),
-                                                s1, length1,
-                                                mode, normOptions, nx,
-                                                pErrorCode);
-            if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
-                if(fcd1.resize(_len1)==NULL) {
-                    *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
-                    return result;
-                }
-
-                *pErrorCode=U_ZERO_ERROR;
-                _len1=unorm_internalNormalizeWithNX(fcd1.getAlias(), fcd1.getCapacity(),
-                                                    s1, length1,
-                                                    mode, normOptions, nx,
-                                                    pErrorCode);
-                if(U_FAILURE(*pErrorCode)) {
-                    return result;
-                }
-            }
-            s1=fcd1.getAlias();
-            length1=_len1;
+        if(spanQCYes1<str1.length()) {
+            UnicodeString unnormalized=str1.tempSubString(spanQCYes1);
+            fcd1.setTo(FALSE, str1.getBuffer(), spanQCYes1);
+            n2->normalizeSecondAndAppend(fcd1, unnormalized, *pErrorCode);
+            s1=fcd1.getBuffer();
+            length1=fcd1.length();
        }
-
-        if(!isFCD2) {
-            _len2=unorm_internalNormalizeWithNX(fcd2.getAlias(), fcd2.getCapacity(),
-                                                s2, length2,
-                                                mode, normOptions, nx,
-                                                pErrorCode);
-            if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
-                if(fcd2.resize(_len2)==NULL) {
-                    *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
-                    return result;
-                }
-
-                *pErrorCode=U_ZERO_ERROR;
-                _len2=unorm_internalNormalizeWithNX(fcd2.getAlias(), fcd2.getCapacity(),
-                                                    s2, length2,
-                                                    mode, normOptions, nx,
-                                                    pErrorCode);
-                if(U_FAILURE(*pErrorCode)) {
-                    return result;
-                }
-            }
-            s2=fcd2.getAlias();
-            length2=_len2;
+        if(spanQCYes2<str2.length()) {
+            UnicodeString unnormalized=str2.tempSubString(spanQCYes2);
+            fcd2.setTo(FALSE, str2.getBuffer(), spanQCYes2);
+            n2->normalizeSecondAndAppend(fcd2, unnormalized, *pErrorCode);
+            s2=fcd2.getBuffer();
+            length2=fcd2.length();
        }
    }

    if(U_SUCCESS(*pErrorCode)) {
-        result=unorm_cmpEquivFold(s1, length1, s2, length2, options, pErrorCode);
+        return unorm_cmpEquivFold(s1, length1, s2, length2, options, pErrorCode);
+    } else {
+        return 0;
    }
-    return result;
 }

 #endif /* #if !UCONFIG_NO_NORMALIZATION */
--- a/icu4c/source/common/unormimp.h
+++ b/icu4c/source/common/unormimp.h
@ -1,7 +1,7 @@
 /*
 *******************************************************************************
 *
-*   Copyright (C) 2001-2009, International Business Machines
+*   Copyright (C) 2001-2010, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 *
 *******************************************************************************
@ -163,25 +163,6 @@ enum {
    _NORM_DECOMP_LENGTH_MASK=0x7f
 };

-#endif /* #if !UCONFIG_NO_NORMALIZATION */
-
-/* Korean Hangul and Jamo constants */
-enum {
-    JAMO_L_BASE=0x1100,     /* "lead" jamo */
-    JAMO_V_BASE=0x1161,     /* "vowel" jamo */
-    JAMO_T_BASE=0x11a7,     /* "trail" jamo */
-
-    HANGUL_BASE=0xac00,
-
-    JAMO_L_COUNT=19,
-    JAMO_V_COUNT=21,
-    JAMO_T_COUNT=28,
-
-    HANGUL_COUNT=JAMO_L_COUNT*JAMO_V_COUNT*JAMO_T_COUNT
-};
-
-#if !UCONFIG_NO_NORMALIZATION
-
 /* Constants for options flags for normalization. @draft ICU 2.6 */
 enum {
    /** Options bit 0, do not decompose Hangul syllables. @draft ICU 2.6 */
@ -205,199 +186,6 @@ enum {
 U_CAPI UBool U_EXPORT2
 unorm_haveData(UErrorCode *pErrorCode);

-/**
- * Internal API for normalizing.
- * Does not check for bad input.
- * @internal
- */
-U_CAPI int32_t U_EXPORT2
-unorm_internalNormalize(UChar *dest, int32_t destCapacity,
-                        const UChar *src, int32_t srcLength,
-                        UNormalizationMode mode, int32_t options,
-                        UErrorCode *pErrorCode);
-
-#ifdef XP_CPLUSPLUS
-
-/**
- * Internal API for normalizing.
- * Does not check for bad input.
- * Requires _haveData() to be true.
- * @internal
- */
-U_CFUNC int32_t
-unorm_internalNormalizeWithNX(UChar *dest, int32_t destCapacity,
-                              const UChar *src, int32_t srcLength,
-                              UNormalizationMode mode, int32_t options, const U_NAMESPACE_QUALIFIER UnicodeSet *nx,
-                              UErrorCode *pErrorCode);
-
-#endif
-
-/**
- * internal API, used by normlzr.cpp
- * @internal
- */
-U_CAPI int32_t U_EXPORT2
-unorm_decompose(UChar *dest, int32_t destCapacity,
-                const UChar *src, int32_t srcLength,
-                UBool compat, int32_t options,
-                UErrorCode *pErrorCode);
-
-/**
- * internal API, used by normlzr.cpp
- * @internal
- */
-U_CAPI int32_t U_EXPORT2
-unorm_compose(UChar *dest, int32_t destCapacity,
-              const UChar *src, int32_t srcLength,
-              UBool compat, int32_t options,
-              UErrorCode *pErrorCode);
-
-#ifdef XP_CPLUSPLUS
-
-/**
- * internal API, used by unormcmp.cpp
- * @internal
- */
-U_CFUNC UNormalizationCheckResult
-unorm_internalQuickCheck(const UChar *src,
-                         int32_t srcLength,
-                         UNormalizationMode mode,
-                         UBool allowMaybe,
-                         const U_NAMESPACE_QUALIFIER UnicodeSet *nx,
-                         UErrorCode *pErrorCode);
-
-#endif
-
-#endif /* #if !UCONFIG_NO_NORMALIZATION */
-
-/**
- * Internal option for unorm_cmpEquivFold() for decomposing.
- * If not set, just do strcasecmp().
- * @internal
- */
-#define _COMPARE_EQUIV 0x80000
-
-#ifndef U_COMPARE_IGNORE_CASE
-/* see also unorm.h */
-/**
- * Option bit for unorm_compare:
- * Perform case-insensitive comparison.
- * @draft ICU 2.2
- */
-#define U_COMPARE_IGNORE_CASE       0x10000
-#endif
-
-/**
- * Internal option for unorm_cmpEquivFold() for strncmp style.
- * If set, checks for both string length and terminating NUL.
- * @internal
- */
-#define _STRNCMP_STYLE 0x1000
-
-#if !UCONFIG_NO_NORMALIZATION
-
-/**
- * Internal API to get the 16-bit FCD value (lccc + tccc) for c,
- * for u_getIntPropertyValue().
- * @internal
- */
-U_CFUNC uint16_t U_EXPORT2
-unorm_getFCD16FromCodePoint(UChar32 c);
-
-#ifdef XP_CPLUSPLUS
-
-/**
- * Internal API, used by collation code.
- * Get access to the internal FCD trie table to be able to perform
- * incremental, per-code unit, FCD checks in collation.
- * One pointer is sufficient because the trie index values are offset
- * by the index size, so that the same pointer is used to access the trie data.
- * Code points at fcdHighStart and above have a zero FCD value.
- * @internal
- */
-U_CAPI const uint16_t * U_EXPORT2
-unorm_getFCDTrieIndex(UChar32 &fcdHighStart, UErrorCode *pErrorCode);
-
-/**
- * Internal API, used by collation code.
- * Get the FCD value for a code unit, with
- * bits 15..8   lead combining class
- * bits  7..0   trail combining class
- *
- * If c is a lead surrogate and the value is not 0,
- * then some of c's associated supplementary code points have a non-zero FCD value.
- *
- * @internal
- */
-static inline uint16_t
-unorm_getFCD16(const uint16_t *fcdTrieIndex, UChar c) {
-    return fcdTrieIndex[_UTRIE2_INDEX_FROM_U16_SINGLE_LEAD(fcdTrieIndex, c)];
-}
-
-/**
- * Internal API, used by collation code.
- * Get the FCD value of the next code point (post-increment), with
- * bits 15..8   lead combining class
- * bits  7..0   trail combining class
- *
- * @internal
- */
-static inline uint16_t
-unorm_nextFCD16(const uint16_t *fcdTrieIndex, UChar32 fcdHighStart,
-                const UChar *&s, const UChar *limit) {
-    UChar32 c=*s++;
-    uint16_t fcd=fcdTrieIndex[_UTRIE2_INDEX_FROM_U16_SINGLE_LEAD(fcdTrieIndex, c)];
-    if(fcd!=0 && U16_IS_LEAD(c)) {
-        UChar c2;
-        if(s!=limit && U16_IS_TRAIL(c2=*s)) {
-            ++s;
-            c=U16_GET_SUPPLEMENTARY(c, c2);
-            if(c<fcdHighStart) {
-                fcd=fcdTrieIndex[_UTRIE2_INDEX_FROM_SUPP(fcdTrieIndex, c)];
-            } else {
-                fcd=0;
-            }
-        } else /* unpaired lead surrogate */ {
-            fcd=0;
-        }
-    }
-    return fcd;
-}
-
-/**
- * Internal API, used by collation code.
- * Get the FCD value of the previous code point (pre-decrement), with
- * bits 15..8   lead combining class
- * bits  7..0   trail combining class
- *
- * @internal
- */
-static inline uint16_t
-unorm_prevFCD16(const uint16_t *fcdTrieIndex, UChar32 fcdHighStart,
-                const UChar *start, const UChar *&s) {
-    UChar32 c=*--s;
-    uint16_t fcd;
-    if(!U16_IS_SURROGATE(c)) {
-        fcd=fcdTrieIndex[_UTRIE2_INDEX_FROM_U16_SINGLE_LEAD(fcdTrieIndex, c)];
-    } else {
-        UChar c2;
-        if(U16_IS_SURROGATE_TRAIL(c) && s!=start && U16_IS_LEAD(c2=*(s-1))) {
-            --s;
-            c=U16_GET_SUPPLEMENTARY(c2, c);
-            if(c<fcdHighStart) {
-                fcd=fcdTrieIndex[_UTRIE2_INDEX_FROM_SUPP(fcdTrieIndex, c)];
-            } else {
-                fcd=0;
-            }
-        } else /* unpaired surrogate */ {
-            fcd=0;
-        }
-    }
-    return fcd;
-}
-
-#endif
-
 /**
 * internal API, used by StringPrep
 * @internal
@ -405,35 +193,6 @@ unorm_prevFCD16(const uint16_t *fcdTrieIndex, UChar32 fcdHighStart,
 U_CAPI void U_EXPORT2
 unorm_getUnicodeVersion(UVersionInfo *versionInfo, UErrorCode *pErrorCode);

-/**
- * Get the canonical decomposition for one code point.
- * Requires unorm_haveData() and buffer!=NULL and pLength!=NULL.
- * @param c code point
- * @param buffer out-only buffer for algorithmic decompositions of Hangul
- * @param length out-only, takes the length of the decomposition, if any
- * @return pointer to decomposition, or 0 if none
- * @internal
- */
-U_CFUNC const UChar *
-unorm_getCanonicalDecomposition(UChar32 c, UChar buffer[4], int32_t *pLength);
-
-/**
- * internal API, used by the canonical iterator
- * TODO Consider using signature similar to unorm_getCanonicalDecomposition()
- * for more efficiency
- * @internal
- */
-U_CAPI int32_t U_EXPORT2
-unorm_getDecomposition(UChar32 c, UBool compat,
-                       UChar *dest, int32_t destCapacity);
-
-/**
- * internal API, used by uprops.cpp
- * @internal
- */
-U_CFUNC UBool U_EXPORT2
-unorm_internalIsFullCompositionExclusion(UChar32 c);
-
 /**
 * Internal API, used by enumeration of canonically equivalent strings
 * @internal
@ -448,13 +207,6 @@ unorm_isCanonSafeStart(UChar32 c);
 U_CAPI UBool U_EXPORT2
 unorm_getCanonStartSet(UChar32 c, USerializedSet *fillSet);

-/**
- * Is c an NF<mode>-skippable code point? See unormimp.h.
- * @internal
- */
-U_CAPI UBool U_EXPORT2
-unorm_isNFSkippable(UChar32 c, UNormalizationMode mode);
-
 #ifdef XP_CPLUSPLUS

 /**
@ -484,13 +236,6 @@ unorm_swap(const UDataSwapper *ds,
           const void *inData, int32_t length, void *outData,
           UErrorCode *pErrorCode);

-/**
- * Get the NF*_QC property for a code point, for u_getIntPropertyValue().
- * @internal
- */
-U_CFUNC UNormalizationCheckResult U_EXPORT2
-unorm_getQuickCheck(UChar32 c, UNormalizationMode mode);
-
 /**
 * Description of the format of unorm.icu version 2.3.
 *
--- a/icu4c/source/common/uprops.cpp
+++ b/icu4c/source/common/uprops.cpp
@ -1,11 +1,11 @@
 /*
 *******************************************************************************
 *
-*   Copyright (C) 2002-2009, International Business Machines
+*   Copyright (C) 2002-2010, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 *
 *******************************************************************************
-*   file name:  uprops.h
+*   file name:  uprops.cpp
 *   encoding:   US-ASCII
 *   tab size:   8 (not used)
 *   indentation:4
@ -26,6 +26,7 @@
 #include "unicode/uscript.h"
 #include "unicode/ustring.h"
 #include "cstring.h"
+#include "normalizer2impl.h"
 #include "ucln_cmn.h"
 #include "umutex.h"
 #include "unormimp.h"
@ -106,7 +107,7 @@ static const struct {
    {  1,               U_MASK(UPROPS_DEPRECATED) },
    {  1,               U_MASK(UPROPS_DIACRITIC) },
    {  1,               U_MASK(UPROPS_EXTENDER) },
-    { UPROPS_SRC_NORM,  0 },                                    /* UCHAR_FULL_COMPOSITION_EXCLUSION */
+    { UPROPS_SRC_NFC,  0 },                                     /* UCHAR_FULL_COMPOSITION_EXCLUSION */
    {  1,               U_MASK(UPROPS_GRAPHEME_BASE) },
    {  1,               U_MASK(UPROPS_GRAPHEME_EXTEND) },
    {  1,               U_MASK(UPROPS_GRAPHEME_LINK) },
@ -134,10 +135,10 @@ static const struct {
    { UPROPS_SRC_CASE,  0 },                                    /* UCHAR_CASE_SENSITIVE */
    {  1,               U_MASK(UPROPS_S_TERM) },
    {  1,               U_MASK(UPROPS_VARIATION_SELECTOR) },
-    { UPROPS_SRC_NORM,  0 },                                    /* UCHAR_NFD_INERT */
-    { UPROPS_SRC_NORM,  0 },                                    /* UCHAR_NFKD_INERT */
-    { UPROPS_SRC_NORM,  0 },                                    /* UCHAR_NFC_INERT */
-    { UPROPS_SRC_NORM,  0 },                                    /* UCHAR_NFKC_INERT */
+    { UPROPS_SRC_NFC,   0 },                                    /* UCHAR_NFD_INERT */
+    { UPROPS_SRC_NFKC,  0 },                                    /* UCHAR_NFKD_INERT */
+    { UPROPS_SRC_NFC,   0 },                                    /* UCHAR_NFC_INERT */
+    { UPROPS_SRC_NFKC,  0 },                                    /* UCHAR_NFKC_INERT */
    { UPROPS_SRC_NORM,  0 },                                    /* UCHAR_SEGMENT_STARTER */
    {  1,               U_MASK(UPROPS_PATTERN_SYNTAX) },
    {  1,               U_MASK(UPROPS_PATTERN_WHITE_SPACE) },
@ -152,7 +153,8 @@ static const struct {
    { UPROPS_SRC_CASE,  0 },                                    /* UCHAR_CHANGES_WHEN_UPPERCASED */
    { UPROPS_SRC_CASE,  0 },                                    /* UCHAR_CHANGES_WHEN_TITLECASED */
    { UPROPS_SRC_CASE_AND_NORM,  0 },                           /* UCHAR_CHANGES_WHEN_CASEFOLDED */
-    { UPROPS_SRC_CASE,  0 }                                     /* UCHAR_CHANGES_WHEN_CASEMAPPED */
+    { UPROPS_SRC_CASE,  0 },                                    /* UCHAR_CHANGES_WHEN_CASEMAPPED */
+    { UPROPS_SRC_NFKC_CF, 0 }                                   /* UCHAR_CHANGES_WHEN_NFKC_CASEFOLDED */
 };

 U_CAPI UBool U_EXPORT2
@ -173,18 +175,56 @@ u_hasBinaryProperty(UChar32 c, UProperty which) {
 #if !UCONFIG_NO_NORMALIZATION
                /* normalization properties from unorm.icu */
                switch(which) {
-                case UCHAR_FULL_COMPOSITION_EXCLUSION:
-                    return unorm_internalIsFullCompositionExclusion(c);
-                case UCHAR_NFD_INERT:
-                case UCHAR_NFKD_INERT:
-                case UCHAR_NFC_INERT:
-                case UCHAR_NFKC_INERT:
-                    return unorm_isNFSkippable(c, (UNormalizationMode)(which-UCHAR_NFD_INERT+UNORM_NFD));
                case UCHAR_SEGMENT_STARTER:
                    return unorm_isCanonSafeStart(c);
                default:
                    break;
                }
+#endif
+            } else if(column==UPROPS_SRC_NFC || column==UPROPS_SRC_NFKC) {
+#if !UCONFIG_NO_NORMALIZATION
+                UErrorCode errorCode=U_ZERO_ERROR;
+                switch(which) {
+                case UCHAR_FULL_COMPOSITION_EXCLUSION: {
+                    // By definition, Full_Composition_Exclusion is the same as NFC_QC=No.
+                    const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(errorCode);
+                    if(U_SUCCESS(errorCode)) {
+                        return impl->isCompNo(impl->getNorm16(c));
+                    }
+                    break;
+                }
+                default: {
+                    // UCHAR_NF..._INERT properties
+                    const Normalizer2 *norm2=Normalizer2Factory::getInstance(
+                        (UNormalizationMode)(which-UCHAR_NFD_INERT+UNORM_NFD), errorCode);
+                    if(U_SUCCESS(errorCode)) {
+                        return norm2->isInert(c);
+                    }
+                    break;
+                }
+                }
+#endif
+            } else if(column==UPROPS_SRC_NFKC_CF) {
+                // currently only for UCHAR_CHANGES_WHEN_NFKC_CASEFOLDED
+#if !UCONFIG_NO_NORMALIZATION
+                UErrorCode errorCode=U_ZERO_ERROR;
+                const Normalizer2Impl *kcf=Normalizer2Factory::getNFKC_CFImpl(errorCode);
+                if(U_SUCCESS(errorCode)) {
+                    UnicodeString src(c);
+                    UnicodeString dest;
+                    {
+                        // The ReorderingBuffer must be in a block because its destructor
+                        // needs to release dest's buffer before we look at its contents.
+                        ReorderingBuffer buffer(*kcf, dest);
+                        // Small destCapacity for NFKC_CF(c).
+                        if(U_SUCCESS(errorCode) && buffer.init(5, errorCode)) {
+                            const UChar *srcArray=src.getBuffer();
+                            kcf->compose(srcArray, srcArray+src.length(), FALSE,
+                                         TRUE, buffer, errorCode);
+                        }
+                    }
+                    return U_SUCCESS(errorCode) && dest!=src;
+                }
 #endif
            } else if(column==UPROPS_SRC_BIDI) {
                /* bidi/shaping properties */
@ -225,14 +265,16 @@ u_hasBinaryProperty(UChar32 c, UProperty which) {
            } else if(column==UPROPS_SRC_CASE_AND_NORM) {
 #if !UCONFIG_NO_NORMALIZATION
                UChar nfdBuffer[4];
-                const UChar *nfd=NULL;
+                const UChar *nfd;
                int32_t nfdLength;
-                UErrorCode errorCode = U_ZERO_ERROR;
+                UErrorCode errorCode=U_ZERO_ERROR;
+                const Normalizer2Impl *nfcImpl=Normalizer2Factory::getNFCImpl(errorCode);
+                if(U_FAILURE(errorCode)) {
+                    return FALSE;
+                }
                switch(which) {
                case UCHAR_CHANGES_WHEN_CASEFOLDED:
-                    if(unorm_haveData(&errorCode)) {
-                        nfd=unorm_getCanonicalDecomposition(c, nfdBuffer, &nfdLength);
-                    }
+                    nfd=nfcImpl->getDecomposition(c, nfdBuffer, nfdLength);
                    if(nfd!=NULL) {
                        /* c has a decomposition */
                        if(nfdLength==1) {
@ -274,6 +316,32 @@ u_hasBinaryProperty(UChar32 c, UProperty which) {
    return FALSE;
 }

+#if !UCONFIG_NO_NORMALIZATION
+
+U_CAPI uint8_t U_EXPORT2
+u_getCombiningClass(UChar32 c) {
+    UErrorCode errorCode=U_ZERO_ERROR;
+    const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(errorCode);
+    if(U_SUCCESS(errorCode)) {
+        return impl->getCC(impl->getNorm16(c));
+    } else {
+        return 0;
+    }
+}
+
+static uint16_t
+getFCD16(UChar32 c) {
+    UErrorCode errorCode=U_ZERO_ERROR;
+    const UTrie2 *trie=Normalizer2Factory::getFCDTrie(errorCode);
+    if(U_SUCCESS(errorCode)) {
+        return UTRIE2_GET16(trie, c);
+    } else {
+        return 0;
+    }
+}
+
+#endif
+
 /*
 * Map some of the Grapheme Cluster Break values to Hangul Syllable Types.
 * Hangul_Syllable_Type is fully redundant with a subset of Grapheme_Cluster_Break.
@ -311,11 +379,9 @@ u_getIntPropertyValue(UChar32 c, UProperty which) {
            return (int32_t)u_charDirection(c);
        case UCHAR_BLOCK:
            return (int32_t)ublock_getCode(c);
-        case UCHAR_CANONICAL_COMBINING_CLASS:
 #if !UCONFIG_NO_NORMALIZATION
+        case UCHAR_CANONICAL_COMBINING_CLASS:
            return u_getCombiningClass(c);
-#else
-            return 0;
 #endif
        case UCHAR_DECOMPOSITION_TYPE:
            return (int32_t)(u_getUnicodeProperties(c, 2)&UPROPS_DT_MASK);
@ -352,9 +418,9 @@ u_getIntPropertyValue(UChar32 c, UProperty which) {
        case UCHAR_NFKC_QUICK_CHECK:
            return (int32_t)unorm_getQuickCheck(c, (UNormalizationMode)(which-UCHAR_NFD_QUICK_CHECK+UNORM_NFD));
        case UCHAR_LEAD_CANONICAL_COMBINING_CLASS:
-            return unorm_getFCD16FromCodePoint(c)>>8;
+            return getFCD16(c)>>8;
        case UCHAR_TRAIL_CANONICAL_COMBINING_CLASS:
-            return unorm_getFCD16FromCodePoint(c)&0xff;
+            return getFCD16(c)&0xff;
 #endif
        case UCHAR_GRAPHEME_CLUSTER_BREAK:
            return (int32_t)(u_getUnicodeProperties(c, 2)&UPROPS_GCB_MASK)>>UPROPS_GCB_SHIFT;
@ -462,12 +528,13 @@ uprops_getSource(UProperty which) {

        case UCHAR_CANONICAL_COMBINING_CLASS:
        case UCHAR_NFD_QUICK_CHECK:
-        case UCHAR_NFKD_QUICK_CHECK:
        case UCHAR_NFC_QUICK_CHECK:
-        case UCHAR_NFKC_QUICK_CHECK:
        case UCHAR_LEAD_CANONICAL_COMBINING_CLASS:
        case UCHAR_TRAIL_CANONICAL_COMBINING_CLASS:
-            return UPROPS_SRC_NORM;
+            return UPROPS_SRC_NFC;
+        case UCHAR_NFKD_QUICK_CHECK:
+        case UCHAR_NFKC_QUICK_CHECK:
+            return UPROPS_SRC_NFKC;

        case UCHAR_BIDI_CLASS:
        case UCHAR_JOINING_GROUP:
--- a/icu4c/source/common/uprops.h
+++ b/icu4c/source/common/uprops.h
@ -1,7 +1,7 @@
 /*
 *******************************************************************************
 *
-*   Copyright (C) 2002-2009, International Business Machines
+*   Copyright (C) 2002-2010, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 *
 *******************************************************************************
@ -332,6 +332,12 @@ enum UPropertySource {
    UPROPS_SRC_CHAR_AND_PROPSVEC,
    /** From ucase.c/ucase.icu as well as unorm.cpp/unorm.icu */
    UPROPS_SRC_CASE_AND_NORM,
+    /** From normalizer2impl.cpp/nfc.nrm */
+    UPROPS_SRC_NFC,
+    /** From normalizer2impl.cpp/nfkc.nrm */
+    UPROPS_SRC_NFKC,
+    /** From normalizer2impl.cpp/nfkc_cf.nrm */
+    UPROPS_SRC_NFKC_CF,
    /** One more than the highest UPropertySource (UPROPS_SRC_) constant. */
    UPROPS_SRC_COUNT
 };
@ -390,4 +396,18 @@ uchar_swapNames(const UDataSwapper *ds,
                const void *inData, int32_t length, void *outData,
                UErrorCode *pErrorCode);

+#ifdef XP_CPLUSPLUS
+
+U_NAMESPACE_BEGIN
+
+class UnicodeSet;
+
+// implemented in uniset_props.cpp
+U_CFUNC UnicodeSet *
+uniset_getUnicode32Instance(UErrorCode &errorCode);
+
+U_NAMESPACE_END
+
+#endif
+
 #endif
--- a/icu4c/source/common/ustr_imp.h
+++ b/icu4c/source/common/ustr_imp.h
@ -1,6 +1,6 @@
 /*  
 **********************************************************************
-*   Copyright (C) 1999-2009, International Business Machines
+*   Copyright (C) 1999-2010, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 **********************************************************************
 *   file name:  ustr_imp.h
@ -25,6 +25,23 @@
    typedef struct UBreakIterator UBreakIterator;
 #endif

+#ifndef U_COMPARE_IGNORE_CASE
+/* see also unorm.h */
+/**
+ * Option bit for unorm_compare:
+ * Perform case-insensitive comparison.
+ * @draft ICU 2.2
+ */
+#define U_COMPARE_IGNORE_CASE       0x10000
+#endif
+
+/**
+ * Internal option for unorm_cmpEquivFold() for strncmp style.
+ * If set, checks for both string length and terminating NUL.
+ * @internal
+ */
+#define _STRNCMP_STYLE 0x1000
+
 /**
 * Compare two strings in code point order or code unit order.
 * Works in strcmp style (both lengths -1),
--- a/icu4c/source/common/ustrcase.c
+++ b/icu4c/source/common/ustrcase.c
@ -1,7 +1,7 @@
 /*
 *******************************************************************************
 *
-*   Copyright (C) 2001-2009, International Business Machines
+*   Copyright (C) 2001-2010, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 *
 *******************************************************************************
@ -25,7 +25,6 @@
 #include "unicode/ubrk.h"
 #include "cmemory.h"
 #include "ucase.h"
-#include "unormimp.h"
 #include "ustr_imp.h"

 /* string casing ------------------------------------------------------------ */
--- a/icu4c/source/common/utrie2.cpp
+++ b/icu4c/source/common/utrie2.cpp
@ -1,11 +1,11 @@
 /*
 ******************************************************************************
 *
-*   Copyright (C) 2001-2009, International Business Machines
+*   Copyright (C) 2001-2010, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 *
 ******************************************************************************
-*   file name:  utrie2.c
+*   file name:  utrie2.cpp
 *   encoding:   US-ASCII
 *   tab size:   8 (not used)
 *   indentation:4
@ -423,7 +423,7 @@ utrie2_swap(const UDataSwapper *ds,
    trie.indexLength=ds->readUInt16(inTrie->indexLength);
    trie.shiftedDataLength=ds->readUInt16(inTrie->shiftedDataLength);

-    valueBits=trie.options&UTRIE2_OPTIONS_VALUE_BITS_MASK;
+    valueBits=(UTrie2ValueBits)(trie.options&UTRIE2_OPTIONS_VALUE_BITS_MASK);
    dataLength=(int32_t)trie.shiftedDataLength<<UTRIE2_INDEX_SHIFT;

    if( trie.signature!=UTRIE2_SIG ||
@ -696,3 +696,39 @@ utrie2_enumForLeadSurrogate(const UTrie2 *trie, UChar32 lead,
    lead=(lead-0xd7c0)<<10;   /* start code point */
    enumEitherTrie(trie, lead, lead+0x400, enumValue, enumRange, context);
 }
+
+/* C++ convenience wrappers ------------------------------------------------- */
+
+U_NAMESPACE_BEGIN
+
+uint16_t BackwardUTrie2StringIterator::previous16() {
+    codePointLimit=codePointStart;
+    if(start>=codePointStart) {
+        codePoint=U_SENTINEL;
+        return 0;
+    }
+    uint16_t result;
+    UTRIE2_U16_PREV16(trie, start, codePointStart, codePoint, result);
+    return result;
+}
+
+uint16_t ForwardUTrie2StringIterator::next16() {
+    codePointStart=codePointLimit;
+    if(codePointLimit==limit) {
+        codePoint=U_SENTINEL;
+        return 0;
+    }
+    uint16_t result;
+    UTRIE2_U16_NEXT16(trie, codePointLimit, limit, codePoint, result);
+    return result;
+}
+
+UTrie2 *UTrie2Singleton::getInstance(InstantiatorFn *instantiator, const void *context,
+                                     UErrorCode &errorCode) {
+    void *duplicate;
+    UTrie2 *instance=(UTrie2 *)singleton.getInstance(instantiator, context, duplicate, errorCode);
+    utrie2_close((UTrie2 *)duplicate);
+    return instance;
+}
+
+U_NAMESPACE_END
--- a/icu4c/source/common/utrie2.h
+++ b/icu4c/source/common/utrie2.h
@ -1,7 +1,7 @@
 /*
 ******************************************************************************
 *
-*   Copyright (C) 2001-2009, International Business Machines
+*   Copyright (C) 2001-2010, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 *
 ******************************************************************************
@ -605,8 +605,70 @@ utrie2_set32ForLeadSurrogateCodeUnit(UTrie2 *trie,
 */
 #define UTRIE2_GET32_FROM_SUPP(trie, c) _UTRIE2_GET_FROM_SUPP((trie), data32, c)

+U_CDECL_END
+
+/* C++ convenience wrappers ------------------------------------------------- */
+
+#ifdef XP_CPLUSPLUS
+
+#include "mutex.h"
+
+U_NAMESPACE_BEGIN
+
+// Use the Forward/Backward subclasses below.
+class UTrie2StringIterator : public UMemory {
+public:
+    UTrie2StringIterator(const UTrie2 *t, const UChar *p) :
+        trie(t), codePointStart(p), codePointLimit(p), codePoint(U_SENTINEL) {}
+
+    const UTrie2 *trie;
+    const UChar *codePointStart, *codePointLimit;
+    UChar32 codePoint;
+};
+
+class BackwardUTrie2StringIterator : public UTrie2StringIterator {
+public:
+    BackwardUTrie2StringIterator(const UTrie2 *t, const UChar *s, const UChar *p) :
+        UTrie2StringIterator(t, p), start(s) {}
+
+    uint16_t previous16();
+
+    const UChar *start;
+};
+
+class ForwardUTrie2StringIterator : public UTrie2StringIterator {
+public:
+    // Iteration limit l can be NULL.
+    // In that case, the caller must detect c==0 and stop.
+    ForwardUTrie2StringIterator(const UTrie2 *t, const UChar *p, const UChar *l) :
+        UTrie2StringIterator(t, p), limit(l) {}
+
+    uint16_t next16();
+
+    const UChar *limit;
+};
+
+class UTrie2Singleton {
+public:
+    UTrie2Singleton(SimpleSingleton &s) : singleton(s) {}
+    void deleteInstance() {
+        utrie2_close((UTrie2 *)singleton.fInstance);
+        singleton.reset();
+    }
+    UTrie2 *getInstance(InstantiatorFn *instantiator, const void *context,
+                        UErrorCode &errorCode);
+private:
+    SimpleSingleton &singleton;
+};
+
+U_NAMESPACE_END
+
+#endif
+
 /* Internal definitions ----------------------------------------------------- */

+U_CDECL_BEGIN
+
 /** Build-time trie structure. */
 struct UNewTrie2;
 typedef struct UNewTrie2 UNewTrie2;
--- a/icu4c/source/configure
+++ b/icu4c/source/configure
@ -2,7 +2,7 @@
 # Guess values for system-dependent variables and create Makefiles.
 # Generated by GNU Autoconf 2.63.
 #
-#  Copyright (c) 1999-2009, International Business Machines Corporation and others. All Rights Reserved.
+#  Copyright (c) 1999-2010, International Business Machines Corporation and others. All Rights Reserved.
 #
 # Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001,
 # 2002, 2003, 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
@ -10583,7 +10583,7 @@ then
 fi

 # output the Makefiles
-ac_config_files="$ac_config_files icudefs.mk Makefile data/pkgdataMakefile config/Makefile.inc config/pkgdataMakefile data/Makefile stubdata/Makefile common/Makefile i18n/Makefile layout/Makefile layoutex/Makefile io/Makefile extra/Makefile extra/uconv/Makefile extra/uconv/pkgdataMakefile extra/scrptrun/Makefile tools/Makefile tools/ctestfw/Makefile tools/toolutil/Makefile tools/makeconv/Makefile tools/genrb/Makefile tools/genuca/Makefile tools/genccode/Makefile tools/gencmn/Makefile tools/gencnval/Makefile tools/genctd/Makefile tools/gennames/Makefile tools/gentest/Makefile tools/gennorm/Makefile tools/genprops/Makefile tools/gencase/Makefile tools/genbidi/Makefile tools/genpname/Makefile tools/genbrk/Makefile tools/gensprep/Makefile tools/icupkg/Makefile tools/icuswap/Makefile tools/pkgdata/Makefile tools/tzcode/Makefile tools/gencfu/Makefile test/Makefile test/compat/Makefile test/testdata/Makefile test/testdata/pkgdataMakefile test/hdrtst/Makefile test/intltest/Makefile test/cintltst/Makefile test/iotest/Makefile test/letest/Makefile test/perf/Makefile test/perf/collationperf/Makefile test/perf/ubrkperf/Makefile test/perf/charperf/Makefile test/perf/convperf/Makefile test/perf/normperf/Makefile test/perf/strsrchperf/Makefile test/perf/unisetperf/Makefile test/perf/usetperf/Makefile test/perf/ustrperf/Makefile test/perf/utfperf/Makefile test/perf/utrie2perf/Makefile samples/Makefile samples/date/Makefile samples/cal/Makefile samples/layout/Makefile common/unicode/platform.h"
+ac_config_files="$ac_config_files icudefs.mk Makefile data/pkgdataMakefile config/Makefile.inc config/pkgdataMakefile data/Makefile stubdata/Makefile common/Makefile i18n/Makefile layout/Makefile layoutex/Makefile io/Makefile extra/Makefile extra/uconv/Makefile extra/uconv/pkgdataMakefile extra/scrptrun/Makefile tools/Makefile tools/ctestfw/Makefile tools/toolutil/Makefile tools/makeconv/Makefile tools/genrb/Makefile tools/genuca/Makefile tools/genccode/Makefile tools/gencmn/Makefile tools/gencnval/Makefile tools/genctd/Makefile tools/gennames/Makefile tools/gentest/Makefile tools/gennorm/Makefile tools/gennorm2/Makefile tools/genprops/Makefile tools/gencase/Makefile tools/genbidi/Makefile tools/genpname/Makefile tools/genbrk/Makefile tools/gensprep/Makefile tools/icupkg/Makefile tools/icuswap/Makefile tools/pkgdata/Makefile tools/tzcode/Makefile tools/gencfu/Makefile test/Makefile test/compat/Makefile test/testdata/Makefile test/testdata/pkgdataMakefile test/hdrtst/Makefile test/intltest/Makefile test/cintltst/Makefile test/iotest/Makefile test/letest/Makefile test/perf/Makefile test/perf/collationperf/Makefile test/perf/ubrkperf/Makefile test/perf/charperf/Makefile test/perf/convperf/Makefile test/perf/normperf/Makefile test/perf/strsrchperf/Makefile test/perf/unisetperf/Makefile test/perf/usetperf/Makefile test/perf/ustrperf/Makefile test/perf/utfperf/Makefile test/perf/utrie2perf/Makefile samples/Makefile samples/date/Makefile samples/cal/Makefile samples/layout/Makefile common/unicode/platform.h"

 cat >confcache <<\_ACEOF
 # This file is a shell script that caches the results of configure
@ -11210,6 +11210,7 @@ do
    "tools/gennames/Makefile") CONFIG_FILES="$CONFIG_FILES tools/gennames/Makefile" ;;
    "tools/gentest/Makefile") CONFIG_FILES="$CONFIG_FILES tools/gentest/Makefile" ;;
    "tools/gennorm/Makefile") CONFIG_FILES="$CONFIG_FILES tools/gennorm/Makefile" ;;
+    "tools/gennorm2/Makefile") CONFIG_FILES="$CONFIG_FILES tools/gennorm2/Makefile" ;;
    "tools/genprops/Makefile") CONFIG_FILES="$CONFIG_FILES tools/genprops/Makefile" ;;
    "tools/gencase/Makefile") CONFIG_FILES="$CONFIG_FILES tools/gencase/Makefile" ;;
    "tools/genbidi/Makefile") CONFIG_FILES="$CONFIG_FILES tools/genbidi/Makefile" ;;
--- a/icu4c/source/configure.in
+++ b/icu4c/source/configure.in
@ -1,5 +1,5 @@
 	# -*-autoconf-*-
-AC_COPYRIGHT([ Copyright (c) 1999-2009, International Business Machines Corporation and others. All Rights Reserved. ])
+AC_COPYRIGHT([ Copyright (c) 1999-2010, International Business Machines Corporation and others. All Rights Reserved. ])
 # configure.in for ICU
 # Stephen F. Booth, heavily modified by Yves and others

@ -1223,6 +1223,7 @@ AC_CONFIG_FILES([icudefs.mk \
 		tools/gennames/Makefile \
 		tools/gentest/Makefile \
 		tools/gennorm/Makefile \
+		tools/gennorm2/Makefile \
 		tools/genprops/Makefile \
 		tools/gencase/Makefile \
 		tools/genbidi/Makefile \
--- a/icu4c/source/data/Makefile.in
+++ b/icu4c/source/data/Makefile.in
@ -1,5 +1,5 @@
 ## Makefile.in for ICU data
-## Copyright (c) 1999-2009, International Business Machines Corporation and
+## Copyright (c) 1999-2010, International Business Machines Corporation and
 ## others. All Rights Reserved.

 ## Source directory information
@ -223,7 +223,7 @@ package390: $(OUTTMPDIR)/icudata390.lst $(PKGDATA_LIST) ./icupkg.inc packagedata
 #  2005-may-05 Removed Unicode properties files (unorm.icu, uprops.icu, ucase.icu, ubidi.icu)
 #  from data build. See Jitterbug 4497. (makedata.mak revision 1.117)
 #
-DAT_FILES_SHORT=pnames.icu unames.icu cnvalias.icu coll/ucadata.icu coll/invuca.icu 
+DAT_FILES_SHORT=pnames.icu unames.icu cnvalias.icu coll/ucadata.icu coll/invuca.icu nfc.nrm nfkc.nrm nfkc_cf.nrm
 DAT_FILES=$(DAT_FILES_SHORT:%=$(BUILDDIR)/%)

 ## BRK files
@ -488,14 +488,17 @@ $(BUILDDIR)/pnames.icu: $(UNICODEDATADIR)/PropertyAliases.txt $(UNICODEDATADIR)/
 	$(INVOKE) $(TOOLBINDIR)/genpname -d $(BUILDDIR)

 # unorm.icu
-$(BUILDDIR)/unorm.icu: $(UNICODEDATADIR)/UnicodeData.txt $(UNICODEDATADIR)/DerivedNormalizationProps.txt $(UNICODEDATADIR)/BidiMirroring.txt $(TOOLBINDIR)/gennorm$(TOOLEXEEXT) $(BUILDDIR)/$(ICUDT)pnames.icu $(BUILDDIR)/$(ICUDT)uprops.icu $(BUILDDIR)/$(ICUDT)ucase.icu
-	$(INVOKE) $(TOOLBINDIR)/gennorm -s $(UNICODEDATADIR) -i $(BUILDDIR) -d $(BUILDDIR) -u $(UNICODE_VERSION)
+# ICU 4.4: $(BUILDDIR)/unorm.icu is now prebuilt, see below.
+$(OUTTMPDIR)/unorm_props_data.c: $(UNICODEDATADIR)/UnicodeData.txt $(UNICODEDATADIR)/DerivedNormalizationProps.txt $(UNICODEDATADIR)/BidiMirroring.txt $(TOOLBINDIR)/gennorm$(TOOLEXEEXT) $(BUILDDIR)/$(ICUDT)pnames.icu $(BUILDDIR)/$(ICUDT)uprops.icu $(BUILDDIR)/$(ICUDT)ucase.icu
 	$(INVOKE) $(TOOLBINDIR)/gennorm --csource -s $(UNICODEDATADIR) -i $(BUILDDIR) -d $(OUTTMPDIR) -u $(UNICODE_VERSION)

+# unorm.icu used to be built like this:
+#	$(INVOKE) $(TOOLBINDIR)/gennorm -s $(UNICODEDATADIR) -i $(BUILDDIR) -d $(BUILDDIR) -u $(UNICODE_VERSION)
+
 # ucadata.icu
 # used to depend on $(BUILDDIR)/$(ICUDT)unorm.icu $(BUILDDIR)/$(ICUDT)ucase.icu
 # see Jitterbug 4497
-$(COLBLDDIR)/ucadata.icu $(COLBLDDIR)/invuca.icu: $(UNICODEDATADIR)/FractionalUCA.txt $(TOOLBINDIR)/genuca$(TOOLEXEEXT)
+$(COLBLDDIR)/ucadata.icu $(COLBLDDIR)/invuca.icu: $(UNICODEDATADIR)/FractionalUCA.txt $(TOOLBINDIR)/genuca$(TOOLEXEEXT) $(BUILDDIR)/$(ICUDT)nfc.nrm
 	$(INVOKE) $(TOOLBINDIR)/genuca -s $(UNICODEDATADIR) -d $(COLBLDDIR) -i $(BUILDDIR) 

 # unames.icu
@ -506,6 +509,13 @@ $(BUILDDIR)/unames.icu: $(UNICODEDATADIR)/UnicodeData.txt $(UNICODEDATADIR)/Name
 $(BUILDDIR)/cnvalias.icu: $(UCMSRCDIR)/convrtrs.txt $(TOOLBINDIR)/gencnval$(TOOLEXEEXT)
 	$(INVOKE) $(TOOLBINDIR)/gencnval -d $(BUILDDIR) $(UCMSRCDIR)/convrtrs.txt

+# Targets for prebuilt Unicode data
+$(BUILDDIR)/unorm.icu: $(SRCDATADIR)/in/unorm.icu
+	$(INVOKE) $(TOOLBINDIR)/icupkg -t$(ICUDATA_CHAR) $< $@
+
+$(BUILDDIR)/%.nrm: $(SRCDATADIR)/in/%.nrm
+	$(INVOKE) $(TOOLBINDIR)/icupkg -t$(ICUDATA_CHAR) $< $@
+
 ####################################################    SPP
 # SPP FILES

@ -751,7 +761,7 @@ $(INDEX_RES_FILE): $(INDEX_FILE) $(TOOLBINDIR)/genrb$(TOOLEXEEXT)
 # They are not built by default but need to be built for ICU4J data and for getting the .c source files
 # when updating the Unicode data.
 # Changed in Makefile.in revision 1.147. See Jitterbug 4497.
-uni-core-data: build-dir $(UNI_CORE_TARGET_DATA)
+uni-core-data: build-dir $(UNI_CORE_TARGET_DATA) $(OUTTMPDIR)/unorm_props_data.c
 	@echo Unicode .icu files built to $(BUILDDIR)
 	@echo Unicode .c source files built to $(OUTTMPDIR)

--- a/icu4c/source/data/in/nfc.nrm
+++ b/icu4c/source/data/in/nfc.nrm
--- a/icu4c/source/data/in/nfkc.nrm
+++ b/icu4c/source/data/in/nfkc.nrm
--- a/icu4c/source/data/in/nfkc_cf.nrm
+++ b/icu4c/source/data/in/nfkc_cf.nrm
--- a/icu4c/source/data/in/unorm.icu
+++ b/icu4c/source/data/in/unorm.icu
--- a/icu4c/source/data/makedata.mak
+++ b/icu4c/source/data/makedata.mak
@ -1,5 +1,5 @@
 #**********************************************************************
-#* Copyright (C) 1999-2009, International Business Machines Corporation
+#* Copyright (C) 1999-2010, International Business Machines Corporation
 #* and others.  All Rights Reserved.
 #**********************************************************************
 # nmake file for creating data files on win32
@ -28,7 +28,7 @@ ICU_LIB_TARGET=$(DLL_OUTPUT)\$(U_ICUDATA_NAME).dll
 !MESSAGE ICU data make path is $(ICUMAKE)

 # Suffixes for data files
-.SUFFIXES : .ucm .cnv .dll .dat .res .txt .c
+.SUFFIXES : .nrm .icu .ucm .cnv .dll .dat .res .txt .c

 ICUOUT=$(ICUMAKE)\out

@ -474,8 +474,8 @@ ALL : GODATA "$(ICU_LIB_TARGET)" "$(TESTDATAOUT)\testdata.dat"
 # when updating the Unicode data.
 # Changed in makedata.mak revision 1.117. See Jitterbug 4497.
 # Command line:
-#   C:\svn\icuproj\icu\trunk\source\data>nmake -f makedata.mak ICUMAKE=C:\svn\icuproj\icu\trunk\source\data\ CFG=Debug uni-core-data
-uni-core-data: GODATA "$(ICUBLD_PKG)\uprops.icu" "$(ICUBLD_PKG)\ucase.icu" "$(ICUBLD_PKG)\ubidi.icu" "$(ICUBLD_PKG)\unorm.icu"
+#   C:\svn\icuproj\icu\trunk\source\data>nmake -f makedata.mak ICUMAKE=C:\svn\icuproj\icu\trunk\source\data\ CFG=x86\Debug uni-core-data
+uni-core-data: GODATA "$(ICUBLD_PKG)\uprops.icu" "$(ICUBLD_PKG)\ucase.icu" "$(ICUBLD_PKG)\ubidi.icu" "$(ICUBLD_PKG)\unorm.icu" "$(ICUTMP)\unorm_props_data.c"
 	@echo Unicode .icu files built to "$(ICUBLD_PKG)"
 	@echo Unicode .c source files built to "$(ICUTMP)"

@ -553,7 +553,7 @@ testdata.jar: GODATA "$(ICUOUT)\icu4j\testdata.jar"
 	copy "$(ICUTMP)\$(ICUPKG).dat" "$(ICUOUT)\$(U_ICUDATA_NAME)$(U_ICUDATA_ENDIAN_SUFFIX).dat"
 	-@erase "$(ICUTMP)\$(ICUPKG).dat"
 !ELSE
-"$(ICU_LIB_TARGET)" : $(COMMON_ICUDATA_DEPENDENCIES) $(CNV_FILES) "$(ICUBLD_PKG)\unames.icu" "$(ICUBLD_PKG)\pnames.icu" "$(ICUBLD_PKG)\cnvalias.icu" "$(ICUBLD_PKG)\$(ICUCOL)\ucadata.icu" "$(ICUBLD_PKG)\$(ICUCOL)\invuca.icu" $(CURR_RES_FILES) $(LANG_RES_FILES) $(REGION_RES_FILES) $(ZONE_RES_FILES) $(BRK_FILES) $(BRK_CTD_FILES) $(BRK_RES_FILES) $(COL_COL_FILES) $(RBNF_RES_FILES) $(TRANSLIT_RES_FILES) $(ALL_RES) $(SPREP_FILES) "$(ICUBLD_PKG)\confusables.cfu"
+"$(ICU_LIB_TARGET)" : $(COMMON_ICUDATA_DEPENDENCIES) $(CNV_FILES) "$(ICUBLD_PKG)\unames.icu" "$(ICUBLD_PKG)\pnames.icu" "$(ICUBLD_PKG)\cnvalias.icu" "$(ICUBLD_PKG)\nfc.nrm" "$(ICUBLD_PKG)\nfkc.nrm" "$(ICUBLD_PKG)\nfkc_cf.nrm" "$(ICUBLD_PKG)\$(ICUCOL)\ucadata.icu" "$(ICUBLD_PKG)\$(ICUCOL)\invuca.icu" $(CURR_RES_FILES) $(LANG_RES_FILES) $(REGION_RES_FILES) $(ZONE_RES_FILES) $(BRK_FILES) $(BRK_CTD_FILES) $(BRK_RES_FILES) $(COL_COL_FILES) $(RBNF_RES_FILES) $(TRANSLIT_RES_FILES) $(ALL_RES) $(SPREP_FILES) "$(ICUBLD_PKG)\confusables.cfu"
 	@echo Building icu data
 	cd "$(ICUBLD_PKG)"
 	"$(ICUPBIN)\pkgdata" $(COMMON_ICUDATA_ARGUMENTS) <<"$(ICUTMP)\icudata.lst"
@ -563,6 +563,9 @@ confusables.cfu
 $(ICUCOL)\ucadata.icu
 $(ICUCOL)\invuca.icu
 cnvalias.icu
+nfc.nrm
+nfkc.nrm
+nfkc_cf.nrm
 $(CNV_FILES:.cnv =.cnv
 )
 $(ALL_RES:.res =.res
@ -627,6 +630,7 @@ CLEAN : GODATA
 	-@erase "*.exp"
 	-@erase "*.icu"
 	-@erase "*.lib"
+	-@erase "*.nrm"
 	-@erase "*.res"
 	-@erase "*.spp"
 	-@erase "*.txt"
@ -878,9 +882,10 @@ res_index:table(nofallback) {
 	@"$(ICUTOOLS)\gencase\$(CFG)\gencase" --csource -u $(UNICODE_VERSION) -i "$(ICUBLD_PKG)" -s "$(ICUUNIDATA)" -d "$(ICUTMP)"

 # Targets for unorm.icu
-"$(ICUBLD_PKG)\unorm.icu": "$(ICUUNIDATA)\*.txt" "$(ICUTOOLS)\gennorm\$(CFG)\gennorm.exe" "$(ICUBLD_PKG)\pnames.icu" "$(ICUBLD_PKG)\uprops.icu" "$(ICUBLD_PKG)\ucase.icu"
+# ICU 4.4: "$(ICUBLD_PKG)\unorm.icu" is now prebuilt, see below.
+"$(ICUTMP)\unorm_props_data.c": "$(ICUUNIDATA)\*.txt" "$(ICUTOOLS)\gennorm\$(CFG)\gennorm.exe" "$(ICUBLD_PKG)\pnames.icu" "$(ICUBLD_PKG)\uprops.icu" "$(ICUBLD_PKG)\ucase.icu"
 	@echo Creating data file for Unicode Normalization
-	@"$(ICUTOOLS)\gennorm\$(CFG)\gennorm" -u $(UNICODE_VERSION) -i "$(ICUBLD_PKG)" -s "$(ICUUNIDATA)" -d "$(ICUBLD_PKG)"
+	@rem @"$(ICUTOOLS)\gennorm\$(CFG)\gennorm" -u $(UNICODE_VERSION) -i "$(ICUBLD_PKG)" -s "$(ICUUNIDATA)" -d "$(ICUBLD_PKG)"
 	@"$(ICUTOOLS)\gennorm\$(CFG)\gennorm" --csource -u $(UNICODE_VERSION) -i "$(ICUBLD_PKG)" -s "$(ICUUNIDATA)" -d "$(ICUTMP)"

 # Targets for converters
@ -891,10 +896,23 @@ res_index:table(nofallback) {
 # Targets for ucadata.icu & invuca.icu
 # used to depend on "$(ICUBLD_PKG)\uprops.icu" "$(ICUBLD_PKG)\ucase.icu" "$(ICUBLD_PKG)\unorm.icu"
 # see Jitterbug 4497
-"$(ICUBLD_PKG)\$(ICUCOL)\invuca.icu" "$(ICUBLD_PKG)\$(ICUCOL)\ucadata.icu": "$(ICUUNIDATA)\FractionalUCA.txt" "$(ICUTOOLS)\genuca\$(CFG)\genuca.exe"
+"$(ICUBLD_PKG)\$(ICUCOL)\invuca.icu" "$(ICUBLD_PKG)\$(ICUCOL)\ucadata.icu": "$(ICUUNIDATA)\FractionalUCA.txt" "$(ICUTOOLS)\genuca\$(CFG)\genuca.exe" "$(ICUBLD_PKG)\nfc.nrm"
 	@echo Creating UCA data files
 	@"$(ICUTOOLS)\genuca\$(CFG)\genuca" -d "$(ICUBLD_PKG)\$(ICUCOL)" -i "$(ICUBLD_PKG)" -s "$(ICUUNIDATA)"

+# Targets for prebuilt Unicode data
+"$(ICUBLD_PKG)\unorm.icu": $(ICUSRCDATA_RELATIVE_PATH)\in\unorm.icu
+	"$(ICUPBIN)\icupkg" -tl $? $@
+
+"$(ICUBLD_PKG)\nfc.nrm": $(ICUSRCDATA_RELATIVE_PATH)\in\nfc.nrm
+	"$(ICUPBIN)\icupkg" -tl $? $@
+
+"$(ICUBLD_PKG)\nfkc.nrm": $(ICUSRCDATA_RELATIVE_PATH)\in\nfkc.nrm
+	"$(ICUPBIN)\icupkg" -tl $? $@
+
+"$(ICUBLD_PKG)\nfkc_cf.nrm": $(ICUSRCDATA_RELATIVE_PATH)\in\nfkc_cf.nrm
+	"$(ICUPBIN)\icupkg" -tl $? $@
+
 # Stringprep .spp file generation.
 {$(ICUSRCDATA_RELATIVE_PATH)\$(ICUSPREP)}.txt.spp:
 	@echo Creating $@
@ -924,6 +942,6 @@ $(MISC_SOURCE) $(RB_FILES) $(CURR_FILES) $(LANG_FILES) $(REGION_FILES) $(ZONE_FI
 # This used to depend on "$(ICUBLD_PKG)\uprops.icu" "$(ICUBLD_PKG)\ucase.icu" "$(ICUBLD_PKG)\ubidi.icu" "$(ICUBLD_PKG)\unorm.icu"
 # This data is now hard coded as a part of the library.
 # See Jitterbug 4497 for details.
-$(BRK_SOURCE) : "$(ICUBLD_PKG)\unames.icu" "$(ICUBLD_PKG)\pnames.icu"
+$(BRK_SOURCE) : "$(ICUBLD_PKG)\unames.icu" "$(ICUBLD_PKG)\pnames.icu" "$(ICUBLD_PKG)\nfc.nrm"
 !ENDIF

--- a/icu4c/source/data/unidata/norm2/nfc.txt
+++ b/icu4c/source/data/unidata/norm2/nfc.txt
--- a/icu4c/source/data/unidata/norm2/nfkc.txt
+++ b/icu4c/source/data/unidata/norm2/nfkc.txt
--- a/icu4c/source/data/unidata/norm2/nfkc_cf.txt
+++ b/icu4c/source/data/unidata/norm2/nfkc_cf.txt
--- a/icu4c/source/i18n/bmsearch.cpp
+++ b/icu4c/source/i18n/bmsearch.cpp
@ -1,6 +1,6 @@
 /*
 ******************************************************************************
- *   Copyright (C) 1996-2009, International Business Machines                 *
+ *   Copyright (C) 1996-2010, International Business Machines                 *
 *   Corporation and others.  All Rights Reserved.                            *
 ******************************************************************************
 */
@ -27,7 +27,7 @@
 #include "hash.h"
 #include "uhash.h"
 #include "ucol_imp.h"
-#include "unormimp.h"
+#include "normalizer2impl.h"

 #include "unicode/colldata.h"
 #include "unicode/bmsearch.h"
@ -81,6 +81,7 @@ private:
    uint32_t variableTop;
    UBool toShift;
    UCollator *coll;
+    const Normalizer2 &nfd;

    const UnicodeString *targetString;
    const UChar *targetBuffer;
@ -93,6 +94,7 @@ private:
 Target::Target(UCollator *theCollator, const UnicodeString *target, int32_t patternLength, UErrorCode &status)
    : bufferSize(0), bufferMin(0), bufferMax(0),
      strengthMask(0), strength(UCOL_PRIMARY), variableTop(0), toShift(FALSE), coll(theCollator),
+      nfd(*Normalizer2Factory::getNFDInstance(status)),
      targetString(NULL), targetBuffer(NULL), targetLength(0), elements(NULL), charBreakIterator(NULL)
 {
    strength = ucol_getStrength(coll);
@ -348,63 +350,14 @@ UBool Target::isIdentical(UnicodeString &pattern, int32_t start, int32_t end)
        return TRUE;
    }

-    UChar t2[32], p2[32];
-    const UChar *pBuffer = pattern.getBuffer();
-    int32_t pLength = pattern.length();
-    int32_t length = end - start;
-
-    UErrorCode status = U_ZERO_ERROR, status2 = U_ZERO_ERROR;
-
-    int32_t decomplength = unorm_decompose(t2, ARRAY_SIZE(t2),
-                                       targetBuffer + start, length,
-                                       FALSE, 0, &status);
-
-    // use separate status2 in case of buffer overflow
-    if (decomplength != unorm_decompose(p2, ARRAY_SIZE(p2),
-                                        pBuffer, pLength,
-                                        FALSE, 0, &status2)) {
-        return FALSE; // lengths are different
-    }
-
-    // compare contents
-    UChar *text, *pat;
-
-    if(U_SUCCESS(status)) {
-        text = t2;
-        pat = p2;
-    } else if(status == U_BUFFER_OVERFLOW_ERROR) {
-        status = U_ZERO_ERROR;
-
-        // allocate one buffer for both decompositions
-        text = NEW_ARRAY(UChar, decomplength * 2);
-
-        // Check for allocation failure.
-        if (text == NULL) {
-        	return FALSE;
-        }
-
-        pat = text + decomplength;
-
-        unorm_decompose(text, decomplength, targetBuffer + start,
-                        length, FALSE, 0, &status);
-
-        unorm_decompose(pat, decomplength, pBuffer,
-                        pLength, FALSE, 0, &status);
-    } else {
-        // NFD failed, make sure that u_memcmp() does not overrun t2 & p2
-        // and that we don't uprv_free() an undefined text pointer
-        text = pat = t2;
-        decomplength = 0;
-    }
-
-    UBool result = (UBool)(u_memcmp(pat, text, decomplength) == 0);
-
-    if(text != t2) {
-        DELETE_ARRAY(text);
-    }
-
+    // Note: We could use Normalizer::compare() or similar, but for short strings
+    // which may not be in FCD it might be faster to just NFD them.
+    UErrorCode status = U_ZERO_ERROR;
+    UnicodeString t2, p2;
+    nfd.normalize(UnicodeString(FALSE, targetBuffer + start, end - start), t2, status);
+    nfd.normalize(pattern, p2, status);
    // return FALSE if NFD failed
-    return U_SUCCESS(status) && result;
+    return U_SUCCESS(status) && t2 == p2;
 }

 #define HASH_TABLE_SIZE 257
--- a/icu4c/source/i18n/coleitr.cpp
+++ b/icu4c/source/i18n/coleitr.cpp
@ -1,6 +1,6 @@
 /*
 *******************************************************************************
-* Copyright (C) 1996-2009, International Business Machines Corporation and    *
+* Copyright (C) 1996-2010, International Business Machines Corporation and    *
 * others. All Rights Reserved.                                                *
 *******************************************************************************
 */
@ -122,9 +122,9 @@ UBool CollationElementIterator::operator==(
        }
        // both are in the normalization buffer
        if (m_data_->iteratordata_.pos 
-            - m_data_->iteratordata_.writableBuffer 
+            - m_data_->iteratordata_.writableBuffer.getBuffer()
            != that.m_data_->iteratordata_.pos 
-            - that.m_data_->iteratordata_.writableBuffer) {
+            - that.m_data_->iteratordata_.writableBuffer.getBuffer()) {
            // not in the same position in the normalization buffer
            return FALSE;
        }
@ -176,7 +176,7 @@ void CollationElementIterator::setText(const UnicodeString& source,
    int32_t length = source.length();
    UChar *string = NULL;
    if (m_data_->isWritable && m_data_->iteratordata_.string != NULL) {
-        uprv_free(m_data_->iteratordata_.string);
+        uprv_free((UChar *)m_data_->iteratordata_.string);
    }
    m_data_->isWritable = TRUE;
    if (length > 0) {
@ -200,7 +200,7 @@ void CollationElementIterator::setText(const UnicodeString& source,
    /* Free offsetBuffer before initializing it. */
    ucol_freeOffsetBuffer(&(m_data_->iteratordata_));
    uprv_init_collIterate(m_data_->iteratordata_.coll, string, length, 
-        &m_data_->iteratordata_);
+        &m_data_->iteratordata_, &status);

    m_data_->reset_   = TRUE;
 }
@ -241,13 +241,13 @@ void CollationElementIterator::setText(CharacterIterator& source,
    }

    if (m_data_->isWritable && m_data_->iteratordata_.string != NULL) {
-        uprv_free(m_data_->iteratordata_.string);
+        uprv_free((UChar *)m_data_->iteratordata_.string);
    }
    m_data_->isWritable = TRUE;
    /* Free offsetBuffer before initializing it. */
    ucol_freeOffsetBuffer(&(m_data_->iteratordata_));
    uprv_init_collIterate(m_data_->iteratordata_.coll, buffer, length, 
-        &m_data_->iteratordata_);
+        &m_data_->iteratordata_, &status);
    m_data_->reset_   = TRUE;
 }

@ -407,7 +407,7 @@ const CollationElementIterator& CollationElementIterator::operator=(
        if (length > 0) {
            coliter->string = (UChar *)uprv_malloc(length * U_SIZEOF_UCHAR);
            if(coliter->string != NULL) {
-                uprv_memcpy(coliter->string, othercoliter->string,
+                uprv_memcpy((UChar *)coliter->string, othercoliter->string,
                    length * U_SIZEOF_UCHAR);
            } else { // Error: couldn't allocate memory. No copying should be done
                length = 0;
@ -423,27 +423,8 @@ const CollationElementIterator& CollationElementIterator::operator=(
        /* handle writable buffer here */

        if (othercoliter->flags & UCOL_ITER_INNORMBUF) {
-            uint32_t wlength = u_strlen(othercoliter->writableBuffer) + 1;
-            if (wlength < coliter->writableBufSize) {
-                uprv_memcpy(coliter->stackWritableBuffer, 
-                    othercoliter->stackWritableBuffer, 
-                    wlength * U_SIZEOF_UCHAR);
-            }
-            else {
-                if (coliter->writableBuffer != coliter->stackWritableBuffer) {
-                    uprv_free(coliter->writableBuffer);
-                }
-                coliter->writableBuffer = (UChar *)uprv_malloc(
-                    wlength * U_SIZEOF_UCHAR);
-                if(coliter->writableBuffer != NULL) {
-                    uprv_memcpy(coliter->writableBuffer, 
-                        othercoliter->writableBuffer,
-                        wlength * U_SIZEOF_UCHAR);
-                    coliter->writableBufSize = wlength;
-                } else { // Error: couldn't allocate memory for writableBuffer
-                    coliter->writableBufSize = 0;
-                }
-            }
+            coliter->writableBuffer = othercoliter->writableBuffer;
+            coliter->writableBuffer.getTerminatedBuffer();
        }

        /* current position */
@ -453,13 +434,9 @@ const CollationElementIterator& CollationElementIterator::operator=(
            coliter->pos = coliter->string + 
                (othercoliter->pos - othercoliter->string);
        }
-        else if (coliter->writableBuffer != NULL) {
-            coliter->pos = coliter->writableBuffer + 
-                (othercoliter->pos - othercoliter->writableBuffer);
-        }
        else {
-            // Error: couldn't allocate memory for writableBuffer
-            coliter->pos = NULL;
+            coliter->pos = coliter->writableBuffer.getTerminatedBuffer() + 
+                (othercoliter->pos - othercoliter->writableBuffer.getBuffer());
        }

        /* CE buffer */
--- a/icu4c/source/i18n/i18n.vcproj
+++ b/icu4c/source/i18n/i18n.vcproj
@ -895,7 +895,7 @@
 				>
 			</File>
 			<File
-				RelativePath=".\ucol_wgt.c"
+				RelativePath=".\ucol_wgt.cpp"
 				>
 			</File>
 			<File
--- a/icu4c/source/i18n/nortrans.cpp
+++ b/icu4c/source/i18n/nortrans.cpp
@ -1,6 +1,6 @@
 /*
 **********************************************************************
-*   Copyright (C) 2001-2007, International Business Machines
+*   Copyright (C) 2001-2010, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 **********************************************************************
 *   Date        Name        Description
@ -12,37 +12,43 @@

 #if !UCONFIG_NO_TRANSLITERATION

-#include "unicode/uniset.h"
-#include "unicode/uiter.h"
+#include "unicode/normalizer2.h"
+#include "cstring.h"
 #include "nortrans.h"
-#include "unormimp.h"
-#include "ucln_in.h"

 U_NAMESPACE_BEGIN

 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(NormalizationTransliterator)

+static inline Transliterator::Token cstrToken(const char *s) {
+    return Transliterator::pointerToken((void *)s);
+}
+
 /**
 * System registration hook.
 */
 void NormalizationTransliterator::registerIDs() {
-    UErrorCode errorCode = U_ZERO_ERROR;
-    if(!unorm_haveData(&errorCode)) {
-        return;
-    }
-
+    // In the Token, the byte after the NUL is the UNormalization2Mode.
    Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-NFC"),
-                                     _create, integerToken(UNORM_NFC));
+                                     _create, cstrToken("nfc\0\0"));
    Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-NFKC"),
-                                     _create, integerToken(UNORM_NFKC));
+                                     _create, cstrToken("nfkc\0\0"));
    Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-NFD"),
-                                     _create, integerToken(UNORM_NFD));
+                                     _create, cstrToken("nfc\0\1"));
    Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-NFKD"),
-                                     _create, integerToken(UNORM_NFKD));
+                                     _create, cstrToken("nfkc\0\1"));
+    Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-FCD"),
+                                     _create, cstrToken("nfc\0\2"));
+    Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-FCC"),
+                                     _create, cstrToken("nfc\0\3"));
    Transliterator::_registerSpecialInverse(UNICODE_STRING_SIMPLE("NFC"),
                                            UNICODE_STRING_SIMPLE("NFD"), TRUE);
    Transliterator::_registerSpecialInverse(UNICODE_STRING_SIMPLE("NFKC"),
                                            UNICODE_STRING_SIMPLE("NFKD"), TRUE);
+    Transliterator::_registerSpecialInverse(UNICODE_STRING_SIMPLE("FCC"),
+                                            UNICODE_STRING_SIMPLE("NFD"), FALSE);
+    Transliterator::_registerSpecialInverse(UNICODE_STRING_SIMPLE("FCD"),
+                                            UNICODE_STRING_SIMPLE("FCD"), FALSE);
 }

 /**
@ -50,19 +56,23 @@ void NormalizationTransliterator::registerIDs() {
 */
 Transliterator* NormalizationTransliterator::_create(const UnicodeString& ID,
                                                     Token context) {
-    return new NormalizationTransliterator(ID, (UNormalizationMode) context.integer, 0);
+    const char *name = (const char *)context.pointer;
+    UNormalization2Mode mode = (UNormalization2Mode)uprv_strchr(name, 0)[1];
+    UErrorCode errorCode = U_ZERO_ERROR;
+    const Normalizer2 *norm2 = Normalizer2::getInstance(NULL, name, mode, errorCode);
+    if(U_SUCCESS(errorCode)) {
+        return new NormalizationTransliterator(ID, *norm2);
+    } else {
+        return NULL;
+    }
 }

 /**
 * Constructs a transliterator.
 */
-NormalizationTransliterator::NormalizationTransliterator(
-                                 const UnicodeString& id,
-                                 UNormalizationMode mode, int32_t opt) :
-    Transliterator(id, 0) {
-    fMode = mode;
-    options = opt;
-}
+NormalizationTransliterator::NormalizationTransliterator(const UnicodeString& id,
+                                                         const Normalizer2 &norm2) :
+    Transliterator(id, 0), fNorm2(norm2) {}

 /**
 * Destructor.
@ -74,20 +84,7 @@ NormalizationTransliterator::~NormalizationTransliterator() {
 * Copy constructor.
 */
 NormalizationTransliterator::NormalizationTransliterator(const NormalizationTransliterator& o) :
-Transliterator(o) {
-    fMode = o.fMode;
-    options = o.options;
-}
-
-/**
- * Assignment operator.
- */
-/*NormalizationTransliterator& NormalizationTransliterator::operator=(const NormalizationTransliterator& o) {
-    Transliterator::operator=(o);
-    fMode = o.fMode;
-    options = o.options;
-    return *this;
-}*/
+    Transliterator(o), fNorm2(o.fNorm2) {}

 /**
 * Transliterator API.
@ -104,23 +101,10 @@ void NormalizationTransliterator::handleTransliterate(Replaceable& text, UTransP
    // start and limit of the input range
    int32_t start = offsets.start;
    int32_t limit = offsets.limit;
-    int32_t length, delta;
-
    if(start >= limit) {
        return;
    }

-    // a C code unit iterator, implemented around the Replaceable
-    UCharIterator iter;
-    uiter_setReplaceable(&iter, &text);
-
-    // the output string and buffer pointer
-    UnicodeString output;
-    UChar *buffer;
-    UBool neededToNormalize;
-
-    UErrorCode errorCode;
-
    /*
     * Normalize as short chunks at a time as possible even in
     * bulk mode, so that styled text is minimally disrupted.
@ -129,101 +113,62 @@ void NormalizationTransliterator::handleTransliterate(Replaceable& text, UTransP
     *
     * If it was known that the input text is not styled, then
     * a bulk mode normalization could look like this:
-     *
-
-    UChar staticChars[256];
-    UnicodeString input;
-
-    length = limit - start;
-    input.setTo(staticChars, 0, sizeof(staticChars)/U_SIZEOF_UCHAR); // writable alias

+    UnicodeString input, normalized;
+    int32_t length = limit - start;
    _Replaceable_extractBetween(text, start, limit, input.getBuffer(length));
    input.releaseBuffer(length);

    UErrorCode status = U_ZERO_ERROR;
-    Normalizer::normalize(input, fMode, options, output, status);
+    fNorm2.normalize(input, normalized, status);

-    text.handleReplaceBetween(start, limit, output);
+    text.handleReplaceBetween(start, limit, normalized);

-    int32_t delta = output.length() - length;
+    int32_t delta = normalized.length() - length;
    offsets.contextLimit += delta;
    offsets.limit += delta;
    offsets.start = limit + delta;

-     *
     */
-    while(start < limit) {
-        // set the iterator limits for the remaining input range
-        // this is a moving target because of the replacements in the text object
-        iter.start = iter.index = start;
-        iter.limit = limit;
-
-        // incrementally normalize a small chunk of the input
-        buffer = output.getBuffer(-1);
-        errorCode = U_ZERO_ERROR;
-        length = unorm_next(&iter, buffer, output.getCapacity(),
-                            fMode, 0,
-                            TRUE, &neededToNormalize,
-                            &errorCode);
-        output.releaseBuffer(U_SUCCESS(errorCode) ? length : 0);
-
-        if(errorCode == U_BUFFER_OVERFLOW_ERROR) {
-            // use a larger output string buffer and do it again from the start
-            iter.index = start;
-            buffer = output.getBuffer(length);
-            errorCode = U_ZERO_ERROR;
-            length = unorm_next(&iter, buffer, output.getCapacity(),
-                                fMode, 0,
-                                TRUE, &neededToNormalize,
-                                &errorCode);
-            output.releaseBuffer(U_SUCCESS(errorCode) ? length : 0);
+    UErrorCode errorCode = U_ZERO_ERROR;
+    UnicodeString segment;
+    UnicodeString normalized;
+    UChar32 c = text.char32At(start);
+    do {
+        int32_t prev = start;
+        // Skip at least one character so we make progress.
+        // c holds the character at start.
+        segment.setTo(c);
+        start += U16_LENGTH(c);
+        while(start < limit && !fNorm2.hasBoundaryBefore(c = text.char32At(start))) {
+            segment.append(c);
+            start += U16_LENGTH(c);
        }
-
-        if(U_FAILURE(errorCode)) {
-            break;
-        }
-
-        limit = iter.index;
-        if(isIncremental && limit == iter.limit) {
+        if(start == limit && isIncremental && !fNorm2.hasBoundaryAfter(c)) {
            // stop in incremental mode when we reach the input limit
            // in case there are additional characters that could change the
            // normalization result
-
-            // UNLESS all characters in the result of the normalization of
-            // the last run are in the skippable set
-            const UChar *s=output.getBuffer();
-            int32_t i=0, outLength=output.length();
-            UChar32 c;
-
-            while(i<outLength) {
-                U16_NEXT(s, i, outLength, c);
-                if(!unorm_isNFSkippable(c, fMode)) {
-                    outLength=-1; // I wish C++ had labeled loops and break outer; ...
-                    break;
-                }
-            }
-            if (outLength<0) {
-                break;
-            }
+            start=prev;
+            break;
        }
-
-        if(neededToNormalize) {
+        fNorm2.normalize(segment, normalized, errorCode);
+        if(U_FAILURE(errorCode)) {
+            break;
+        }
+        if(segment != normalized) {
            // replace the input chunk with its normalized form
-            text.handleReplaceBetween(start, limit, output);
+            text.handleReplaceBetween(prev, start, normalized);

            // update all necessary indexes accordingly
-            delta = length - (limit - start);   // length change in the text object
-            start = limit += delta;             // the next chunk starts where this one ends, with adjustment
-            limit = offsets.limit += delta;     // set the iteration limit to the adjusted end of the input range
-            offsets.contextLimit += delta;
-        } else {
-            // delta == 0
-            start = limit;
-            limit = offsets.limit;
+            int32_t delta = normalized.length() - (start - prev);
+            start += delta;
+            limit += delta;
        }
-    }
+    } while(start < limit);

    offsets.start = start;
+    offsets.contextLimit += limit - offsets.limit;
+    offsets.limit = limit;
 }

 U_NAMESPACE_END
--- a/icu4c/source/i18n/nortrans.h
+++ b/icu4c/source/i18n/nortrans.h
@ -1,6 +1,6 @@
 /*
 **********************************************************************
-*   Copyright (C) 2001-2007, International Business Machines
+*   Copyright (C) 2001-2010, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 **********************************************************************
 *   Date        Name        Description
@ -15,7 +15,7 @@
 #if !UCONFIG_NO_TRANSLITERATION

 #include "unicode/translit.h"
-#include "unicode/normlzr.h"
+#include "unicode/normalizer2.h"

 U_NAMESPACE_BEGIN

@ -24,16 +24,7 @@ U_NAMESPACE_BEGIN
 * @author Alan Liu
 */
 class NormalizationTransliterator : public Transliterator {
-
-    /**
-     * The normalization mode of this transliterator.
-     */
-    UNormalizationMode fMode;
-
-    /**
-     * Normalization options for this transliterator.
-     */
-    int32_t options;
+    const Normalizer2 &fNorm2;

 public:

@ -93,8 +84,7 @@ class NormalizationTransliterator : public Transliterator {
     * Constructs a transliterator.  This method is private.
     * Public users must use the factory method createInstance().
     */
-    NormalizationTransliterator(const UnicodeString& id,
-                                UNormalizationMode mode, int32_t opt);
+    NormalizationTransliterator(const UnicodeString& id, const Normalizer2 &norm2);

 private:
    /**
--- a/icu4c/source/i18n/ucol.cpp
+++ b/icu4c/source/i18n/ucol.cpp
--- a/icu4c/source/i18n/ucol_bld.cpp
+++ b/icu4c/source/i18n/ucol_bld.cpp
@ -1,7 +1,7 @@
 /*
 *******************************************************************************
 *
-*   Copyright (C) 2001-2008, International Business Machines
+*   Copyright (C) 2001-2010, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 *
 *******************************************************************************
@ -25,12 +25,12 @@
 #include "unicode/udata.h"
 #include "unicode/uchar.h"
 #include "unicode/uniset.h"
+#include "normalizer2impl.h"
 #include "ucol_bld.h"
 #include "ucol_elm.h"
 #include "ucol_cnt.h"
 #include "ucln_in.h"
 #include "umutex.h"
-#include "unormimp.h"
 #include "cmemory.h"

 static const InverseUCATableHeader* _staticInvUCA = NULL;
@ -626,7 +626,7 @@ uint8_t ucol_uprv_getCaseBits(const UCollator *UCA, const UChar *src, uint32_t l
    nLen = unorm_normalize(src, len, UNORM_NFKD, 0, n, 128, status);
    if(U_SUCCESS(*status)) {
        for(i = 0; i < nLen; i++) {
-            uprv_init_collIterate(UCA, &n[i], 1, &s);
+            uprv_init_collIterate(UCA, &n[i], 1, &s, status);
            order = ucol_getNextCE(UCA, &s, status);
            if(isContinuation(order)) {
                *status = U_INTERNAL_PROGRAM_ERROR;
@ -878,7 +878,7 @@ U_CFUNC void ucol_createElements(UColTokenParser *src, tempUCATable *t, UColTokL
                    /* then pick CEs out until there is no more and stuff them into expansion */
                    collIterate s;
                    uint32_t order = 0;
-                    uprv_init_collIterate(src->UCA, expOffset + src->source, 1, &s);
+                    uprv_init_collIterate(src->UCA, expOffset + src->source, 1, &s, status);

                    for(;;) {
                        order = ucol_getNextCE(src->UCA, &s, status);
@ -1045,7 +1045,7 @@ ucol_uprv_bld_copyRangeFromUCA(UColTokenParser *src, tempUCATable *t,
                // it doesn't make any difference whether we have to go to the UCA
                // or not.
                {
-                    uprv_init_collIterate(src->UCA, el.uchars, el.cSize, &colIt);
+                    uprv_init_collIterate(src->UCA, el.uchars, el.cSize, &colIt, status);
                    while(CE != UCOL_NO_MORE_CES) {
                        CE = ucol_getNextCE(src->UCA, &colIt, status);
                        if(CE != UCOL_NO_MORE_CES) {
--- a/icu4c/source/i18n/ucol_elm.cpp
+++ b/icu4c/source/i18n/ucol_elm.cpp
@ -1,7 +1,7 @@
 /*
 *******************************************************************************
 *
-*   Copyright (C) 2001-2009, International Business Machines
+*   Copyright (C) 2001-2010, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 *
 *******************************************************************************
@ -31,6 +31,7 @@
 #include "unicode/unistr.h"
 #include "unicode/ucoleitr.h"
 #include "unicode/normlzr.h"
+#include "normalizer2impl.h"
 #include "ucol_elm.h"
 #include "ucol_tok.h"
 #include "ucol_cnt.h"
@ -1602,6 +1603,7 @@ struct enumStruct {
    tempUCATable *t;
    UCollator *tempColl;
    UCollationElements* colEl;
+    const Normalizer2Impl *nfcImpl;
    int32_t noOfClosures;
    UErrorCode *status;
 };
@ -1615,7 +1617,8 @@ _enumCategoryRangeClosureCategory(const void *context, UChar32 start, UChar32 li
        UCollator *tempColl = ((enumStruct *)context)->tempColl;
        UCollationElements* colEl = ((enumStruct *)context)->colEl;
        UCAElements el;
-        UChar decomp[256] = { 0 };
+        UChar decompBuffer[4];
+        const UChar *decomp;
        int32_t noOfDec = 0;

        UChar32 u32 = 0;
@ -1623,13 +1626,14 @@ _enumCategoryRangeClosureCategory(const void *context, UChar32 start, UChar32 li
        uint32_t len = 0;

        for(u32 = start; u32 < limit; u32++) {
-            noOfDec = unorm_getDecomposition(u32, FALSE, decomp, 256);
+            decomp = ((enumStruct *)context)->nfcImpl->
+                getDecomposition(u32, decompBuffer, noOfDec);
            //if((noOfDec = unorm_normalize(comp, len, UNORM_NFD, 0, decomp, 256, status)) > 1
            //|| (noOfDec == 1 && *decomp != (UChar)u32))
-            if(noOfDec > 0) // if we're positive, that means there is no decomposition
+            if(decomp != NULL)
            {
                len = 0;
-                UTF_APPEND_CHAR_UNSAFE(comp, len, u32);
+                U16_APPEND_UNSAFE(comp, len, u32);
                if(ucol_strcoll(tempColl, comp, len, decomp, noOfDec) != UCOL_EQUAL) {
 #ifdef UCOL_DEBUG
                    fprintf(stderr, "Closure: %08X -> ", u32);
@ -1640,7 +1644,7 @@ _enumCategoryRangeClosureCategory(const void *context, UChar32 start, UChar32 li
                    fprintf(stderr, "\n");
 #endif
                    ((enumStruct *)context)->noOfClosures++;
-                    el.cPoints = decomp;
+                    el.cPoints = (UChar *)decomp;
                    el.cSize = noOfDec;
                    el.noOfCEs = 0;
                    el.prefix = el.prefixChars;
@ -1938,7 +1942,7 @@ uprv_uca_canonicalClosure(tempUCATable *t,
    UChar  baseChar, firstCM;
    UChar32 fcdHighStart;
    const uint16_t *fcdTrieIndex = unorm_getFCDTrieIndex(fcdHighStart, status);
-
+    context.nfcImpl=Normalizer2Factory::getNFCImpl(*status);
    if(U_FAILURE(*status)) {
        return 0;
    }
--- a/icu4c/source/i18n/ucol_imp.h
+++ b/icu4c/source/i18n/ucol_imp.h
@ -1,7 +1,7 @@
 /*
 *******************************************************************************
 *
-*   Copyright (C) 1998-2009, International Business Machines
+*   Copyright (C) 1998-2010, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 *
 *******************************************************************************
@ -41,6 +41,10 @@

 #if !UCONFIG_NO_COLLATION

+#ifdef XP_CPLUSPLUS
+#include "unicode/normalizer2.h"
+#include "unicode/unistr.h"
+#endif
 #include "unicode/ucol.h"
 #include "utrie.h"
 #include "cmemory.h"
@ -264,12 +268,14 @@ minimum number for special Jamo

 #define NFC_ZERO_CC_BLOCK_LIMIT_  0x300

-typedef struct collIterate {
-  UChar *string; /* Original string */
+#ifdef XP_CPLUSPLUS
+
+typedef struct collIterate : public UMemory {
+  const UChar *string; /* Original string */
  /* UChar *start;  Pointer to the start of the source string. Either points to string
                    or to writableBuffer */
-  UChar *endp;   /* string end ptr.  Is undefined for null terminated strings */
-  UChar *pos; /* This is position in the string.  Can be to original or writable buf */
+  const UChar *endp; /* string end ptr.  Is undefined for null terminated strings */
+  const UChar *pos; /* This is position in the string.  Can be to original or writable buf */

  uint32_t *toReturn; /* This is the CE from CEs buffer that should be returned */
  uint32_t *CEpos; /* This is the position to which we have stored processed CEs */
@ -279,16 +285,15 @@ typedef struct collIterate {
  int32_t offsetRepeatCount;  /* Repeat stored offset if non-zero */
  int32_t offsetRepeatValue;  /* offset value to repeat */

-  UChar *writableBuffer;
-  uint32_t writableBufSize;
-  UChar *fcdPosition; /* Position in the original string to continue FCD check from. */
+  UnicodeString writableBuffer;
+  const UChar *fcdPosition; /* Position in the original string to continue FCD check from. */
  const UCollator *coll;
+  const Normalizer2 *nfd;
  uint8_t   flags;
  uint8_t   origFlags;
  uint32_t *extendCEs; /* This is use if CEs is not big enough */
  int32_t extendCEsSize; /* Holds the size of the dynamic CEs buffer */
  uint32_t CEs[UCOL_EXPAND_CE_BUFFER_SIZE]; /* This is where we store CEs */
-  UChar stackWritableBuffer[UCOL_WRITABLE_BUFFER_SIZE]; /* A writable buffer. */

  int32_t *offsetBuffer;    /* A dynamic buffer to hold offsets */
  int32_t offsetBufferSize; /* The size of the offset buffer */
@ -297,6 +302,12 @@ typedef struct collIterate {
  /*int32_t iteratorIndex;*/
 } collIterate;

+#else
+
+typedef struct collIterate collIterate;
+
+#endif
+
 #define paddedsize(something) ((something)+((((something)%4)!=0)?(4-(something)%4):0))
 #define headersize (paddedsize(sizeof(UCATableHeader))+paddedsize(sizeof(UColOptionSet)))

@ -305,19 +316,34 @@ struct used internally in getSpecial*CE.
 data similar to collIterate.
 */
 struct collIterateState {
-    UChar    *pos; /* This is position in the string.  Can be to original or writable buf */
-    UChar    *returnPos;
-    UChar    *fcdPosition; /* Position in the original string to continue FCD check from. */
-    UChar    *bufferaddress; /* address of the normalization buffer */
-    uint32_t  buffersize;
+    const UChar *pos; /* This is position in the string.  Can be to original or writable buf */
+    const UChar *returnPos;
+    const UChar *fcdPosition; /* Position in the original string to continue FCD check from. */
+    const UChar *bufferaddress; /* address of the normalization buffer */
+    int32_t  buffersize;
    uint8_t   flags;
    uint8_t   origFlags;
    uint32_t   iteratorIndex;
    int32_t    iteratorMove;
 };

-U_CAPI void U_EXPORT2 
-uprv_init_collIterate(const UCollator *collator, const UChar *sourceString, int32_t sourceLen, collIterate *s);
+U_CAPI void U_EXPORT2
+uprv_init_collIterate(const UCollator *collator,
+                      const UChar *sourceString, int32_t sourceLen,
+                      collIterate *s, UErrorCode *status);
+
+/* Internal functions for C test code. */
+U_CAPI collIterate * U_EXPORT2
+uprv_new_collIterate(UErrorCode *status);
+
+U_CAPI void U_EXPORT2
+uprv_delete_collIterate(collIterate *s);
+
+/* @return s->pos == s->endp */
+U_CAPI UBool U_EXPORT2
+uprv_collIterateAtEnd(collIterate *s);
+
+#ifdef XP_CPLUSPLUS

 U_NAMESPACE_BEGIN

@ -326,7 +352,7 @@ typedef struct UCollationPCE UCollationPCE;

 U_NAMESPACE_END

-struct UCollationElements
+struct UCollationElements : public UMemory
 {
  /**
  * Struct wrapper for source data
@ -351,6 +377,8 @@ struct UCollationElements
 U_CAPI void U_EXPORT2
 uprv_init_pce(const struct UCollationElements *elems);

+#endif
+
 #define UCOL_LEVELTERMINATOR 1

 /* mask off anything but primary order */
@ -1066,7 +1094,6 @@ static inline UBool ucol_unsafeCP(UChar c, const UCollator *coll) {
 /* The offsetBuffer in collIterate might need to be freed to avoid memory leaks. */
 void ucol_freeOffsetBuffer(collIterate *s); 

-
 #endif /* #if !UCONFIG_NO_COLLATION */

 #endif
--- a/icu4c/source/i18n/ucol_tok.cpp
+++ b/icu4c/source/i18n/ucol_tok.cpp
@ -1,7 +1,7 @@
 /*
 *******************************************************************************
 *
-*   Copyright (C) 2001-2009, International Business Machines
+*   Copyright (C) 2001-2010, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 *
 *******************************************************************************
@ -1108,7 +1108,7 @@ reset may be null.
 handled.
 */

-static UColToken *ucol_tok_initAReset(UColTokenParser *src, UChar *expand, uint32_t *expandNext,
+static UColToken *ucol_tok_initAReset(UColTokenParser *src, const UChar *expand, uint32_t *expandNext,
                                      UParseError *parseError, UErrorCode *status)
 {
    if(src->resultLen == src->listCapacity) {
@ -1200,9 +1200,12 @@ inline UColToken *getVirginBefore(UColTokenParser *src, UColToken *sourceToken,
    uint32_t CE, SecondCE;
    uint32_t invPos;
    if(sourceToken != NULL) {
-        uprv_init_collIterate(src->UCA, src->source+((sourceToken->source)&0xFFFFFF), 1, &s);
+        uprv_init_collIterate(src->UCA, src->source+((sourceToken->source)&0xFFFFFF), 1, &s, status);
    } else {
-        uprv_init_collIterate(src->UCA, src->source+src->parsedToken.charsOffset /**charsOffset*/, 1, &s);
+        uprv_init_collIterate(src->UCA, src->source+src->parsedToken.charsOffset /**charsOffset*/, 1, &s, status);
+    }
+    if(U_FAILURE(*status)) {
+        return NULL;
    }

    baseCE = ucol_getNextCE(src->UCA, &s, status) & 0xFFFFFF3F;
@ -1684,10 +1687,10 @@ uint32_t ucol_tok_assembleTokenList(UColTokenParser *src, UParseError *parseErro
                        collIterate s;
                        uint32_t CE = UCOL_NOT_FOUND, SecondCE = UCOL_NOT_FOUND;

-                        uprv_init_collIterate(src->UCA, src->source+src->parsedToken.charsOffset, src->parsedToken.charsLen, &s);
+                        uprv_init_collIterate(src->UCA, src->source+src->parsedToken.charsOffset, src->parsedToken.charsLen, &s, status);

                        CE = ucol_getNextCE(src->UCA, &s, status);
-                        UChar *expand = s.pos;
+                        const UChar *expand = s.pos;
                        SecondCE = ucol_getNextCE(src->UCA, &s, status);

                        ListList[src->resultLen].baseCE = CE & 0xFFFFFF3F;
--- a/icu4c/source/i18n/ucol_wgt.cpp
+++ b/icu4c/source/i18n/ucol_wgt.cpp
@ -1,7 +1,7 @@
 /*  
 *******************************************************************************
 *
-*   Copyright (C) 1999-2009, International Business Machines
+*   Copyright (C) 1999-2010, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 *
 *******************************************************************************
--- a/icu4c/source/i18n/ucoleitr.cpp
+++ b/icu4c/source/i18n/ucoleitr.cpp
@ -1,6 +1,6 @@
 /*
 ******************************************************************************
-*   Copyright (C) 2001-2009, International Business Machines
+*   Copyright (C) 2001-20109, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 ******************************************************************************
 *
@ -313,19 +313,16 @@ ucol_openElements(const UCollator  *coll,
                        int32_t    textLength,
                        UErrorCode *status)
 {
-    UCollationElements *result;
-
    if (U_FAILURE(*status)) {
        return NULL;
    }

-    result = (UCollationElements *)uprv_malloc(sizeof(UCollationElements));
-    /* test for NULL */
+    UCollationElements *result = new UCollationElements;
    if (result == NULL) {
        *status = U_MEMORY_ALLOCATION_ERROR;
        return NULL;
    }
-    
+
    result->reset_ = TRUE;
    result->isWritable = FALSE;
    result->pce = NULL;
@ -333,7 +330,7 @@ ucol_openElements(const UCollator  *coll,
    if (text == NULL) {
        textLength = 0;
    }
-    uprv_init_collIterate(coll, text, textLength, &result->iteratordata_);
+    uprv_init_collIterate(coll, text, textLength, &result->iteratordata_, status);

    return result;
 }
@ -345,30 +342,24 @@ ucol_closeElements(UCollationElements *elems)
 	if (elems != NULL) {
 	  collIterate *ci = &elems->iteratordata_;

-	  if (ci != NULL) {
-		  if (ci->writableBuffer != ci->stackWritableBuffer) {
-			uprv_free(ci->writableBuffer);
-		  }
+	  if (ci->extendCEs) {
+		  uprv_free(ci->extendCEs);
+	  }

-		  if (ci->extendCEs) {
-			  uprv_free(ci->extendCEs);
-		  }
-
-		  if (ci->offsetBuffer) {
-			  uprv_free(ci->offsetBuffer);
-		  }
+	  if (ci->offsetBuffer) {
+		  uprv_free(ci->offsetBuffer);
 	  }

 	  if (elems->isWritable && elems->iteratordata_.string != NULL)
 	  {
-		uprv_free(elems->iteratordata_.string);
+		uprv_free((UChar *)elems->iteratordata_.string);
 	  }

 	  if (elems->pce != NULL) {
 		  delete elems->pce;
 	  }

-	  uprv_free(elems);
+	  delete elems;
 	}
 }

@ -387,11 +378,7 @@ ucol_reset(UCollationElements *elems)
        ci->flags |= UCOL_ITER_NORM;
    }

-    if (ci->stackWritableBuffer != ci->writableBuffer) {
-        uprv_free(ci->writableBuffer);
-        ci->writableBuffer = ci->stackWritableBuffer;
-        ci->writableBufSize = UCOL_WRITABLE_BUFFER_SIZE;
-    }
+    ci->writableBuffer.remove();
    ci->fcdPosition = NULL;

  //ci->offsetReturn = ci->offsetStore = NULL;
@ -686,7 +673,7 @@ ucol_setText(      UCollationElements *elems,

    if (elems->isWritable && elems->iteratordata_.string != NULL)
    {
-        uprv_free(elems->iteratordata_.string);
+        uprv_free((UChar *)elems->iteratordata_.string);
    }

    if (text == NULL) {
@ -698,7 +685,7 @@ ucol_setText(      UCollationElements *elems,
    /* free offset buffer to avoid memory leak before initializing. */
    ucol_freeOffsetBuffer(&(elems->iteratordata_));
    uprv_init_collIterate(elems->iteratordata_.coll, text, textLength, 
-                          &elems->iteratordata_);
+                          &elems->iteratordata_, status);

    elems->reset_   = TRUE;
 }
--- a/icu4c/source/i18n/usearch.cpp
+++ b/icu4c/source/i18n/usearch.cpp
@ -1,6 +1,6 @@
 /*
 **********************************************************************
-*   Copyright (C) 2001-2009 IBM and others. All rights reserved.
+*   Copyright (C) 2001-2010 IBM and others. All rights reserved.
 **********************************************************************
 *   Date        Name        Description
 *  07/02/2001   synwee      Creation.
@ -14,12 +14,14 @@
 #include "unicode/usearch.h"
 #include "unicode/ustring.h"
 #include "unicode/uchar.h"
+#include "normalizer2impl.h"
 #include "unormimp.h"
 #include "ucol_imp.h"
 #include "usrchimp.h"
 #include "cmemory.h"
 #include "ucln_in.h"
 #include "uassert.h"
+#include "ustr_imp.h"

 U_NAMESPACE_USE

@ -311,7 +313,11 @@ inline uint16_t initializePatternCETable(UStringSearch *strsrch,
    else {
        uprv_init_collIterate(strsrch->collator, pattern->text,
                         pattern->textLength,
-                         &coleiter->iteratordata_);
+                         &coleiter->iteratordata_,
+                         status);
+    }
+    if(U_FAILURE(*status)) {
+        return 0;
    }

    if (pattern->CE != cetable && pattern->CE) {
@ -381,7 +387,11 @@ inline uint16_t initializePatternPCETable(UStringSearch *strsrch,
    } else {
        uprv_init_collIterate(strsrch->collator, pattern->text,
                              pattern->textLength,
-                              &coleiter->iteratordata_);
+                              &coleiter->iteratordata_,
+                              status);
+    }
+    if(U_FAILURE(*status)) {
+        return 0;
    }

    if (pattern->PCE != pcetable && pattern->PCE != NULL) {
@ -1074,54 +1084,20 @@ static
 inline UBool checkIdentical(const UStringSearch *strsrch, int32_t start,
                                  int32_t    end)
 {
-    UChar t2[32], p2[32];
-    int32_t length = end - start;
    if (strsrch->strength != UCOL_IDENTICAL) {
        return TRUE;
    }

-    UErrorCode status = U_ZERO_ERROR, status2 = U_ZERO_ERROR;
-    int32_t decomplength = unorm_decompose(t2, LENGTHOF(t2),
-                                       strsrch->search->text + start, length,
-                                       FALSE, 0, &status);
-    // use separate status2 in case of buffer overflow
-    if (decomplength != unorm_decompose(p2, LENGTHOF(p2),
-                                        strsrch->pattern.text,
-                                        strsrch->pattern.textLength,
-                                        FALSE, 0, &status2)) {
-        return FALSE; // lengths are different
-    }
-
-    // compare contents
-    UChar *text, *pattern;
-    if(U_SUCCESS(status)) {
-        text = t2;
-        pattern = p2;
-    } else if(status==U_BUFFER_OVERFLOW_ERROR) {
-        status = U_ZERO_ERROR;
-        // allocate one buffer for both decompositions
-        text = (UChar *)uprv_malloc(decomplength * 2 * U_SIZEOF_UCHAR);
-        // Check for allocation failure.
-        if (text == NULL) {
-        	return FALSE;
-        }
-        pattern = text + decomplength;
-        unorm_decompose(text, decomplength, strsrch->search->text + start,
-                        length, FALSE, 0, &status);
-        unorm_decompose(pattern, decomplength, strsrch->pattern.text,
-                        strsrch->pattern.textLength, FALSE, 0, &status);
-    } else {
-        // NFD failed, make sure that u_memcmp() does not overrun t2 & p2
-        // and that we don't uprv_free() an undefined text pointer
-        text = pattern = t2;
-        decomplength = 0;
-    }
-    UBool result = (UBool)(u_memcmp(pattern, text, decomplength) == 0);
-    if(text != t2) {
-        uprv_free(text);
-    }
+    // Note: We could use Normalizer::compare() or similar, but for short strings
+    // which may not be in FCD it might be faster to just NFD them.
+    UErrorCode status = U_ZERO_ERROR;
+    UnicodeString t2, p2;
+    strsrch->nfd->normalize(
+        UnicodeString(FALSE, strsrch->search->text + start, end - start), t2, status);
+    strsrch->nfd->normalize(
+        UnicodeString(FALSE, strsrch->pattern.text, strsrch->pattern.textLength), p2, status);
    // return FALSE if NFD failed
-    return U_SUCCESS(status) && result;
+    return U_SUCCESS(status) && t2 == p2;
 }

 #if BOYER_MOORE
@ -2724,6 +2700,8 @@ U_CAPI UStringSearch * U_EXPORT2 usearch_openFromCollator(
                                                            UCOL_SHIFTED;
        result->variableTop = ucol_getVariableTop(collator, status);

+        result->nfd         = Normalizer2Factory::getNFDInstance(*status);
+
        if (U_FAILURE(*status)) {
            uprv_free(result);
            return NULL;
@ -3040,7 +3018,8 @@ U_CAPI void U_EXPORT2 usearch_setCollator(      UStringSearch *strsrch,
                    ucol_freeOffsetBuffer(&(strsrch->textIter->iteratordata_));
                    uprv_init_collIterate(collator, strsrch->search->text,
                                          strsrch->search->textLength,
-                                          &(strsrch->textIter->iteratordata_));
+                                          &(strsrch->textIter->iteratordata_),
+                                          status);
                    strsrch->utilIter->iteratordata_.coll = collator;
                }
            }
@ -3432,7 +3411,8 @@ U_CAPI void U_EXPORT2 usearch_reset(UStringSearch *strsrch)
        ucol_freeOffsetBuffer(&(strsrch->textIter->iteratordata_));
        uprv_init_collIterate(strsrch->collator, strsrch->search->text,
                              strsrch->search->textLength,
-                              &(strsrch->textIter->iteratordata_));
+                              &(strsrch->textIter->iteratordata_),
+                              &status);
        strsrch->search->matchedLength      = 0;
        strsrch->search->matchedIndex       = USEARCH_DONE;
        strsrch->search->isOverlap          = FALSE;
--- a/icu4c/source/i18n/usrchimp.h
+++ b/icu4c/source/i18n/usrchimp.h
@ -1,6 +1,6 @@
 /*
 **********************************************************************
-*   Copyright (C) 2001-2008 IBM and others. All rights reserved.
+*   Copyright (C) 2001-2010 IBM and others. All rights reserved.
 **********************************************************************
 *   Date        Name        Description
 *  08/13/2001   synwee      Creation.
@ -13,6 +13,7 @@

 #if !UCONFIG_NO_COLLATION

+#include "unicode/normalizer2.h"
 #include "unicode/ucol.h"
 #include "unicode/ucoleitr.h"
 #include "unicode/ubrk.h"
@ -59,6 +60,7 @@ struct UStringSearch {
    struct USearch            *search;
    struct UPattern            pattern;
    const  UCollator          *collator;
+    const  Normalizer2        *nfd;
    // positions within the collation element iterator is used to determine
    // if we are at the start of the text.
           UCollationElements *textIter;
--- a/icu4c/source/test/cintltst/callcoll.c
+++ b/icu4c/source/test/cintltst/callcoll.c
@ -1,6 +1,6 @@
 /********************************************************************
 * COPYRIGHT: 
- * Copyright (c) 1997-2009, International Business Machines Corporation and
+ * Copyright (c) 1997-2010, International Business Machines Corporation and
 * others. All Rights Reserved.
 ********************************************************************/
 /*******************************************************************************
@ -52,7 +52,6 @@
 #include "calldata.h"
 #include "cstring.h"
 #include "cmemory.h"
-#include "ucol_imp.h"

 /* set to 1 to test offsets in backAndForth() */
 #define TEST_OFFSETS 0
@ -148,13 +147,14 @@ static char* U_EXPORT2 sortKeyToString(const UCollator *coll, const uint8_t *sor
    int32_t strength = UCOL_PRIMARY;
    uint32_t res_size = 0;
    UBool doneCase = FALSE;
+    UErrorCode errorCode = U_ZERO_ERROR;

    char *current = buffer;
    const uint8_t *currentSk = sortkey;

    uprv_strcpy(current, "[");

-    while(strength <= UCOL_QUATERNARY && strength <= coll->strength) {
+    while(strength <= UCOL_QUATERNARY && strength <= ucol_getStrength(coll)) {
        if(strength > UCOL_PRIMARY) {
            uprv_strcat(current, " . ");
        }
@ -162,20 +162,20 @@ static char* U_EXPORT2 sortKeyToString(const UCollator *coll, const uint8_t *sor
            uprv_appendByteToHexString(current, *currentSk++);
            uprv_strcat(current, " ");
        }
-        if(coll->caseLevel == UCOL_ON && strength == UCOL_SECONDARY && doneCase == FALSE) {
+        if(ucol_getAttribute(coll, UCOL_CASE_LEVEL, &errorCode) == UCOL_ON && strength == UCOL_SECONDARY && doneCase == FALSE) {
            doneCase = TRUE;
-        } else if(coll->caseLevel == UCOL_OFF || doneCase == TRUE || strength != UCOL_SECONDARY) {
+        } else if(ucol_getAttribute(coll, UCOL_CASE_LEVEL, &errorCode) == UCOL_OFF || doneCase == TRUE || strength != UCOL_SECONDARY) {
            strength ++;
        }
        if (*currentSk) {
            uprv_appendByteToHexString(current, *currentSk++); /* This should print '01' */
        }
-        if(strength == UCOL_QUATERNARY && coll->alternateHandling == UCOL_NON_IGNORABLE) {
+        if(strength == UCOL_QUATERNARY && ucol_getAttribute(coll, UCOL_ALTERNATE_HANDLING, &errorCode) == UCOL_NON_IGNORABLE) {
            break;
        }
    }

-    if(coll->strength == UCOL_IDENTICAL) {
+    if(ucol_getStrength(coll) == UCOL_IDENTICAL) {
        uprv_strcat(current, " . ");
        while(*currentSk != 0) {
            uprv_appendByteToHexString(current, *currentSk++);
@ -214,7 +214,7 @@ UBool hasCollationElements(const char *locName) {

  UErrorCode status = U_ZERO_ERROR;

-  UResourceBundle *loc = ures_open(U_ICUDATA_COLL, locName, &status);;
+  UResourceBundle *loc = ures_open(U_ICUDATA_NAME U_TREE_SEPARATOR_STRING "coll", locName, &status);;

  if(U_SUCCESS(status)) {
    status = U_ZERO_ERROR;
--- a/icu4c/source/test/cintltst/citertst.c
+++ b/icu4c/source/test/cintltst/citertst.c
@ -1,6 +1,6 @@
 /********************************************************************
 * COPYRIGHT:
- * Copyright (c) 1997-2009, International Business Machines Corporation and
+ * Copyright (c) 1997-2010, International Business Machines Corporation and
 * others. All Rights Reserved.
 ********************************************************************/
 /********************************************************************************
@ -994,11 +994,6 @@ static void TestSmallBuffer()
      free(orders);

      ucol_reset(testiter);
-      /* ensures that the writable buffer was cleared */
-      if (testiter->iteratordata_.writableBuffer !=
-          testiter->iteratordata_.stackWritableBuffer) {
-          log_err("Error Writable buffer in collation element iterator not reset\n");
-      }

      /* ensures closing of elements done properly to clear writable buffer */
      ucol_next(testiter, &status);
--- a/icu4c/source/test/cintltst/cmsccoll.c
+++ b/icu4c/source/test/cintltst/cmsccoll.c
@ -1,6 +1,6 @@
 /********************************************************************
 * COPYRIGHT:
- * Copyright (c) 2001-2009, International Business Machines Corporation and
+ * Copyright (c) 2001-2010, International Business Machines Corporation and
 * others. All Rights Reserved.
 ********************************************************************/
 /*******************************************************************************
@ -1093,7 +1093,7 @@ static void testCEs(UCollator *coll, UErrorCode *status) {
    UColOptionSet opts;
    UParseError parseError;
    UChar *rulesCopy = NULL;
-    collIterate c;
+    collIterate *c = uprv_new_collIterate(status);
    UCAConstants *consts = NULL;
    uint32_t UCOL_RESET_TOP_VALUE, /*UCOL_RESET_TOP_CONT, */
        UCOL_NEXT_TOP_VALUE, UCOL_NEXT_TOP_CONT;
@ -1102,12 +1102,15 @@ static void testCEs(UCollator *coll, UErrorCode *status) {

    if (U_FAILURE(*status)) {
        log_err("Could not open root collator %s\n", u_errorName(*status));
+        uprv_delete_collIterate(c);
        return;
    }

    colLoc = ucol_getLocaleByType(coll, ULOC_ACTUAL_LOCALE, status);
    if (U_FAILURE(*status)) {
        log_err("Could not get collator name: %s\n", u_errorName(*status));
+        ucol_close(UCA);
+        uprv_delete_collIterate(c);
        return;
    }

@ -1183,15 +1186,15 @@ static void testCEs(UCollator *coll, UErrorCode *status) {
            varT = (UBool)((specs & UCOL_TOK_VARIABLE_TOP) != 0);
            top_ = (UBool)((specs & UCOL_TOK_TOP) != 0);

-            uprv_init_collIterate(coll, rulesCopy+chOffset, chLen, &c);
+            uprv_init_collIterate(coll, rulesCopy+chOffset, chLen, c, status);

-            currCE = ucol_getNextCE(coll, &c, status);
+            currCE = ucol_getNextCE(coll, c, status);
            if(currCE == 0 && UCOL_ISTHAIPREVOWEL(*(rulesCopy+chOffset))) {
                log_verbose("Thai prevowel detected. Will pick next CE\n");
-                currCE = ucol_getNextCE(coll, &c, status);
+                currCE = ucol_getNextCE(coll, c, status);
            }

-            currContCE = ucol_getNextCE(coll, &c, status);
+            currContCE = ucol_getNextCE(coll, c, status);
            if(!isContinuation(currContCE)) {
                currContCE = 0;
            }
@ -1272,6 +1275,7 @@ static void testCEs(UCollator *coll, UErrorCode *status) {
        free(rulesCopy);
    }
    ucol_close(UCA);
+    uprv_delete_collIterate(c);
 }

 #if 0
@ -2992,10 +2996,11 @@ static void TestVariableTopSetting(void) {
            uint32_t CE = UCOL_NO_MORE_CES;

            /* before we start screaming, let's see if there is a problem with the rules */
-            collIterate s;
-            uprv_init_collIterate(coll, rulesCopy+oldChOffset, oldChLen, &s);
+            UErrorCode collIterateStatus = U_ZERO_ERROR;
+            collIterate *s = uprv_new_collIterate(&collIterateStatus);
+            uprv_init_collIterate(coll, rulesCopy+oldChOffset, oldChLen, s, &collIterateStatus);

-            CE = ucol_getNextCE(coll, &s, &status);
+            CE = ucol_getNextCE(coll, s, &status);

            for(i = 0; i < oldChLen; i++) {
              j = sprintf(buf, "%04X ", *(rulesCopy+oldChOffset+i));
@ -3004,7 +3009,7 @@ static void TestVariableTopSetting(void) {
            if(status == U_PRIMARY_TOO_LONG_ERROR) {
              log_verbose("= Expected failure for %s =", buffer);
            } else {
-              if(s.pos == s.endp) {
+              if(uprv_collIterateAtEnd(s)) {
                log_err("Unexpected failure setting variable top at offset %d. Error %s. Codepoints: %s\n",
                  oldChOffset, u_errorName(status), buffer);
              } else {
@ -3012,6 +3017,7 @@ static void TestVariableTopSetting(void) {
                  buffer);
              }
            }
+            uprv_delete_collIterate(s);
          }
          varTop2 = ucol_getVariableTop(coll, &status);
          if((varTop1 & 0xFFFF0000) != (varTop2 & 0xFFFF0000)) {
--- a/icu4c/source/test/cintltst/cnormtst.c
+++ b/icu4c/source/test/cintltst/cnormtst.c
@ -1,6 +1,6 @@
 /********************************************************************
 * COPYRIGHT: 
- * Copyright (c) 1997-2009, International Business Machines Corporation and
+ * Copyright (c) 1997-2010, International Business Machines Corporation and
 * others. All Rights Reserved.
 ********************************************************************/
 /********************************************************************************
@ -1334,17 +1334,6 @@ TestNextPrevious() {
        log_err("error unorm_next(U_MISPLACED_QUANTIFIER) %s\n", u_errorName(errorCode));
        return;
    }
-
-    /* missing pErrorCode */
-    buffer[0]=5;
-    iter.index=1;
-    length=unorm_next(&iter, buffer, sizeof(buffer)/U_SIZEOF_UCHAR,
-                      UNORM_NFD, 0, TRUE, NULL,
-                      NULL);
-    if(iter.index!=1 || buffer[0]!=5) {
-        log_err("error unorm_next(pErrorCode==NULL) %s\n", u_errorName(errorCode));
-        return;
-    }
 }

 static void
--- a/icu4c/source/test/cintltst/cucdtst.c
+++ b/icu4c/source/test/cintltst/cucdtst.c
@ -1,6 +1,6 @@
 /********************************************************************
 * COPYRIGHT:
- * Copyright (c) 1997-2009, International Business Machines Corporation and
+ * Copyright (c) 1997-2010, International Business Machines Corporation and
 * others. All Rights Reserved.
 ********************************************************************/
 /*******************************************************************************
@ -22,6 +22,7 @@
 #include "unicode/putil.h"
 #include "unicode/ustring.h"
 #include "unicode/uloc.h"
+#include "unicode/unorm2.h"

 #include "cintltst.h"
 #include "putilimp.h"
@ -2942,6 +2943,7 @@ TestConsistency() {
    UErrorCode errorCode;

 #if !UCONFIG_NO_NORMALIZATION
+    const UNormalizer2 *norm2;
    USerializedSet sset;
 #endif
    UChar32 start, end;
@ -3070,15 +3072,26 @@ TestConsistency() {
     * In general, the set for the middle such character should be a subset
     * of the set for the first.
     */
+    errorCode=U_ZERO_ERROR;
+    norm2=unorm2_getInstance(NULL, "nfc", UNORM2_DECOMPOSE, &errorCode);
+    if(U_FAILURE(errorCode)) {
+        log_data_err("unorm2_getInstance(NFD) failed - %s\n", u_errorName(errorCode));
+        return;
+    }
+
    set1=uset_open(1, 0);
    set2=uset_open(1, 0);

    if (unorm_getCanonStartSet(0x49, &sset)) {
+        UChar source[1];
+
        _setAddSerialized(set1, &sset);

        /* enumerate all characters that are plausible to be latin letters */
        for(start=0xa0; start<0x2000; ++start) {
-            if(unorm_getDecomposition(start, FALSE, buffer16, LENGTHOF(buffer16))>1 && buffer16[0]==0x49) {
+            source[0]=(UChar)start;
+            length=unorm2_normalize(norm2, source, 1, buffer16, LENGTHOF(buffer16), &errorCode);
+            if(length>1 && buffer16[0]==0x49) {
                uset_add(set2, start);
            }
        }
--- a/icu4c/source/test/hdrtst/cxxfiles.txt
+++ b/icu4c/source/test/hdrtst/cxxfiles.txt
@ -1,4 +1,4 @@
-# Copyright (c) 2001-2009 International Business Machines 
+# Copyright (c) 2001-20109 International Business Machines 
 # Corporation and others. All Rights Reserved.
 # common & i18n
 bidi.h
@ -38,6 +38,7 @@ measfmt.h
 measunit.h
 measure.h
 msgfmt.h
+normalizer2.h
 normlzr.h
 numfmt.h
 numsys.h
--- a/icu4c/source/test/intltest/tstnorm.cpp
+++ b/icu4c/source/test/intltest/tstnorm.cpp
@ -1,6 +1,6 @@
 /********************************************************************
 * COPYRIGHT: 
- * Copyright (c) 1997-2009, International Business Machines Corporation and
+ * Copyright (c) 1997-2010, International Business Machines Corporation and
 * others. All Rights Reserved.
 ********************************************************************/

@ -776,38 +776,10 @@ void BasicNormalizerTest::TestConcatenate() {
        },
        /* ### TODO: add more interesting cases */
        {
-            "D", 
-            "\\u0340\\u0341\\u0343\\u0344\\u0374\\u037E\\u0387\\u0958" 
-            "\\u0959\\u095A\\u095B\\u095C\\u095D\\u095E\\u095F\\u09DC" 
-            "\\u09DD\\u09DF\\u0A33\\u0A36\\u0A59\\u0A5A\\u0A5B\\u0A5E" 
-            "\\u0B5C\\u0B5D\\u0F43\\u0F4D\\u0F52\\u0F57\\u0F5C\\u0F69" 
-            "\\u0F73\\u0F75\\u0F76\\u0F78\\u0F81\\u0F93\\u0F9D\\u0FA2" 
-            "\\u0FA7\\u0FAC\\u0FB9\\u1F71\\u1F73\\u1F75\\u1F77\\u1F79" 
-            "\\u1F7B\\u1F7D\\u1FBB\\u1FBE\\u1FC9\\u1FCB\\u1FD3\\u1FDB",
-            
-            "\\u1FE3\\u1FEB\\u1FEE\\u1FEF\\u1FF9\\u1FFB\\u1FFD\\u2000" 
-            "\\u2001\\u2126\\u212A\\u212B\\u2329\\u232A\\uF900\\uFA10" 
-            "\\uFA12\\uFA15\\uFA20\\uFA22\\uFA25\\uFA26\\uFA2A\\uFB1F" 
-            "\\uFB2A\\uFB2B\\uFB2C\\uFB2D\\uFB2E\\uFB2F\\uFB30\\uFB31" 
-            "\\uFB32\\uFB33\\uFB34\\uFB35\\uFB36\\uFB38\\uFB39\\uFB3A" 
-            "\\uFB3B\\uFB3C\\uFB3E\\uFB40\\uFB41\\uFB43\\uFB44\\uFB46" 
-            "\\uFB47\\uFB48\\uFB49\\uFB4A\\uFB4B\\uFB4C\\uFB4D\\uFB4E",
-           
-            "\\u0340\\u0341\\u0343\\u0344\\u0374\\u037E\\u0387\\u0958"
-            "\\u0959\\u095A\\u095B\\u095C\\u095D\\u095E\\u095F\\u09DC"
-            "\\u09DD\\u09DF\\u0A33\\u0A36\\u0A59\\u0A5A\\u0A5B\\u0A5E"
-            "\\u0B5C\\u0B5D\\u0F43\\u0F4D\\u0F52\\u0F57\\u0F5C\\u0F69"
-            "\\u0F73\\u0F75\\u0F76\\u0F78\\u0F81\\u0F93\\u0F9D\\u0FA2"
-            "\\u0FA7\\u0FAC\\u0FB9\\u1F71\\u1F73\\u1F75\\u1F77\\u1F79"
-            "\\u1F7B\\u1F7D\\u1FBB\\u1FBE\\u1FC9\\u1FCB\\u1FD3\\u0399"
-            "\\u0301\\u03C5\\u0308\\u0301\\u1FEB\\u1FEE\\u1FEF\\u1FF9"
-            "\\u1FFB\\u1FFD\\u2000\\u2001\\u2126\\u212A\\u212B\\u2329"
-            "\\u232A\\uF900\\uFA10\\uFA12\\uFA15\\uFA20\\uFA22\\uFA25"
-            "\\uFA26\\uFA2A\\uFB1F\\uFB2A\\uFB2B\\uFB2C\\uFB2D\\uFB2E"
-            "\\uFB2F\\uFB30\\uFB31\\uFB32\\uFB33\\uFB34\\uFB35\\uFB36"
-            "\\uFB38\\uFB39\\uFB3A\\uFB3B\\uFB3C\\uFB3E\\uFB40\\uFB41"
-            "\\uFB43\\uFB44\\uFB46\\uFB47\\uFB48\\uFB49\\uFB4A\\uFB4B"
-            "\\uFB4C\\uFB4D\\uFB4E"
+            "D",
+            "\\u03B1\\u0345",
+            "\\u0C4D\\U000110BA\\U0001D169",
+            "\\u03B1\\U0001D169\\U000110BA\\u0C4D\\u0345"
        }
    };

@ -1743,72 +1715,23 @@ U_CDECL_END

 void
 BasicNormalizerTest::TestSkippable() {
-    UnicodeSet starts, diff, skipSets[UNORM_MODE_COUNT], expectSets[UNORM_MODE_COUNT];
-    UnicodeSet *startsPtr = &starts;
+    UnicodeSet diff, skipSets[UNORM_MODE_COUNT], expectSets[UNORM_MODE_COUNT];
    UnicodeString s, pattern;
-    UChar32 start, limit, rangeStart, rangeEnd;
-    int32_t i, range, count;
-
-    UErrorCode status;

    /* build NF*Skippable sets from runtime data */
-    status=U_ZERO_ERROR;
-    USetAdder sa = {
-        (USet *)startsPtr,
-        _set_add,
-        _set_addRange,
-        _set_addString,
-        NULL, // don't need remove()
-        NULL
-    };
-    unorm_addPropertyStarts(&sa, &status);
-    if(U_FAILURE(status)) {
-        errln("unable to load normalization data for unorm_addPropertyStarts(() - %s\n", u_errorName(status));
+    IcuTestErrorCode errorCode(*this, "TestSkippable");
+    skipSets[UNORM_NFD].applyPattern(UNICODE_STRING_SIMPLE("[:NFD_Inert:]"), errorCode);
+    skipSets[UNORM_NFKD].applyPattern(UNICODE_STRING_SIMPLE("[:NFKD_Inert:]"), errorCode);
+    skipSets[UNORM_NFC].applyPattern(UNICODE_STRING_SIMPLE("[:NFC_Inert:]"), errorCode);
+    skipSets[UNORM_NFKC].applyPattern(UNICODE_STRING_SIMPLE("[:NFKC_Inert:]"), errorCode);
+    if(errorCode.logIfFailureAndReset("UnicodeSet(NF..._Inert) failed")) {
        return;
    }
-    count=starts.getRangeCount();
-
-    start=limit=0;
-    rangeStart=rangeEnd=0;
-    range=0;
-    for(;;) {
-        if(start<limit) {
-            /* get properties for start and apply them to [start..limit[ */
-            if(unorm_isNFSkippable(start, UNORM_NFD)) {
-                skipSets[UNORM_NFD].add(start, limit-1);
-            }
-            if(unorm_isNFSkippable(start, UNORM_NFKD)) {
-                skipSets[UNORM_NFKD].add(start, limit-1);
-            }
-            if(unorm_isNFSkippable(start, UNORM_NFC)) {
-                skipSets[UNORM_NFC].add(start, limit-1);
-            }
-            if(unorm_isNFSkippable(start, UNORM_NFKC)) {
-                skipSets[UNORM_NFKC].add(start, limit-1);
-            }
-        }
-
-        /* go to next range of same properties */
-        start=limit;
-        if(++limit>rangeEnd) {
-            if(range<count) {
-                limit=rangeStart=starts.getRangeStart(range);
-                rangeEnd=starts.getRangeEnd(range);
-                ++range;
-            } else if(range==count) {
-                /* additional range to complete the Unicode code space */
-                limit=rangeStart=rangeEnd=0x110000;
-                ++range;
-            } else {
-                break;
-            }
-        }
-    }

    /* get expected sets from hardcoded patterns */
    initExpectedSkippables(expectSets);

-    for(i=UNORM_NONE; i<UNORM_MODE_COUNT; ++i) {
+    for(int32_t i=UNORM_NONE; i<UNORM_MODE_COUNT; ++i) {
        if(skipSets[i]!=expectSets[i]) {
            errln("error: TestSkippable skipSets[%d]!=expectedSets[%d]\n"
                  "may need to update hardcoded UnicodeSet patterns in\n"
--- a/icu4c/source/test/intltest/tstnrapi.cpp
+++ b/icu4c/source/test/intltest/tstnrapi.cpp
@ -1,6 +1,6 @@
 /********************************************************************
 * COPYRIGHT: 
- * Copyright (c) 1997-2005, International Business Machines Corporation and
+ * Copyright (c) 1997-2010, International Business Machines Corporation and
 * others. All Rights Reserved.
 ********************************************************************/

@ -163,25 +163,6 @@ BasicNormalizerTest::TestNormalizerAPI() {
    if(s.charAt(1)!=0xe4) {
        errln("error in Normalizer::decompose(self)");
    }
-
-    // test internal normalization exclusion options
-    // s contains a compatibility CJK character and a Hangul syllable
-    s=UnicodeString("a\\uFACE\\uD7A3b", -1, US_INV).unescape();
-    status=U_ZERO_ERROR;
-    Normalizer::decompose(s, FALSE, UNORM_NX_HANGUL, out, status);
-    if(U_FAILURE(status) || out!=UNICODE_STRING_SIMPLE("a\\u9F9C\\uD7A3b").unescape()) {
-        errln("Normalizer::decompose(UNORM_NX_HANGUL) failed - %s", u_errorName(status));
-    }
-    status=U_ZERO_ERROR;
-    Normalizer::decompose(s, FALSE, UNORM_NX_CJK_COMPAT, out, status);
-    if(U_FAILURE(status) || out!=UNICODE_STRING_SIMPLE("a\\uFACE\\u1112\\u1175\\u11c2b").unescape()) {
-        errln("Normalizer::decompose(UNORM_NX_CJK_COMPAT) failed - %s", u_errorName(status));
-    }
-    status=U_ZERO_ERROR;
-    Normalizer::decompose(s, FALSE, UNORM_NX_CJK_COMPAT|UNORM_NX_HANGUL, out, status);
-    if(U_FAILURE(status) || out!=UNICODE_STRING_SIMPLE("a\\uFACE\\uD7A3b").unescape()) {
-        errln("Normalizer::decompose(UNORM_NX_CJK_COMPAT|UNORM_NX_HANGUL) failed - %s", u_errorName(status));
-    }
 }

 #endif
--- a/icu4c/source/test/intltest/ucdtest.cpp
+++ b/icu4c/source/test/intltest/ucdtest.cpp
@ -1,6 +1,6 @@
 /********************************************************************
 * COPYRIGHT: 
- * Copyright (c) 1997-2009, International Business Machines Corporation and
+ * Copyright (c) 1997-2010, International Business Machines Corporation and
 * others. All Rights Reserved.
 ********************************************************************/

@ -15,6 +15,19 @@

 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof(array[0]))

+static const char *ignorePropNames[]={
+    "FC_NFKC",
+    "NFD_QC",
+    "NFC_QC",
+    "NFKD_QC",
+    "NFKC_QC",
+    "Expands_On_NFD",
+    "Expands_On_NFC",
+    "Expands_On_NFKD",
+    "Expands_On_NFKC",
+    "NFKC_CF"
+};
+
 UnicodeTest::UnicodeTest()
 {
    UErrorCode errorCode=U_ZERO_ERROR;
@ -23,6 +36,10 @@ UnicodeTest::UnicodeTest()
        delete unknownPropertyNames;
        unknownPropertyNames=NULL;
    }
+    // Ignore some property names altogether.
+    for(int32_t i=0; i<LENGTHOF(ignorePropNames); ++i) {
+        unknownPropertyNames->puti(UnicodeString(ignorePropNames[i], -1, US_INV), 1, errorCode);
+    }
 }

 UnicodeTest::~UnicodeTest()
@ -76,7 +93,7 @@ getTokenIndex(const char *const tokens[], int32_t countTokens, const char *s) {
 }

 static const char *const
-derivedCorePropsNames[]={
+derivedPropsNames[]={
    "Math",
    "Alphabetic",
    "Lowercase",
@ -86,6 +103,7 @@ derivedCorePropsNames[]={
    "XID_Start",
    "XID_Continue",
    "Default_Ignorable_Code_Point",
+    "Full_Composition_Exclusion",
    "Grapheme_Extend",
    "Grapheme_Link", /* Unicode 5 moves this property here from PropList.txt */
    "Grapheme_Base",
@ -95,11 +113,12 @@ derivedCorePropsNames[]={
    "Changes_When_Uppercased",
    "Changes_When_Titlecased",
    "Changes_When_Casefolded",
-    "Changes_When_Casemapped"
+    "Changes_When_Casemapped",
+    "Changes_When_NFKC_Casefolded"
 };

 static const UProperty
-derivedCorePropsIndex[]={
+derivedPropsIndex[]={
    UCHAR_MATH,
    UCHAR_ALPHABETIC,
    UCHAR_LOWERCASE,
@ -109,6 +128,7 @@ derivedCorePropsIndex[]={
    UCHAR_XID_START,
    UCHAR_XID_CONTINUE,
    UCHAR_DEFAULT_IGNORABLE_CODE_POINT,
+    UCHAR_FULL_COMPOSITION_EXCLUSION,
    UCHAR_GRAPHEME_EXTEND,
    UCHAR_GRAPHEME_LINK,
    UCHAR_GRAPHEME_BASE,
@ -118,17 +138,18 @@ derivedCorePropsIndex[]={
    UCHAR_CHANGES_WHEN_UPPERCASED,
    UCHAR_CHANGES_WHEN_TITLECASED,
    UCHAR_CHANGES_WHEN_CASEFOLDED,
-    UCHAR_CHANGES_WHEN_CASEMAPPED
+    UCHAR_CHANGES_WHEN_CASEMAPPED,
+    UCHAR_CHANGES_WHEN_NFKC_CASEFOLDED
 };

-static int32_t numErrors[LENGTHOF(derivedCorePropsIndex)]={ 0 };
+static int32_t numErrors[LENGTHOF(derivedPropsIndex)]={ 0 };

 enum { MAX_ERRORS=50 };

 U_CFUNC void U_CALLCONV
-derivedCorePropsLineFn(void *context,
-                        char *fields[][2], int32_t /* fieldCount */,
-                        UErrorCode *pErrorCode)
+derivedPropsLineFn(void *context,
+                   char *fields[][2], int32_t /* fieldCount */,
+                   UErrorCode *pErrorCode)
 {
    UnicodeTest *me=(UnicodeTest *)context;
    uint32_t start, end;
@ -136,35 +157,35 @@ derivedCorePropsLineFn(void *context,

    u_parseCodePointRange(fields[0][0], &start, &end, pErrorCode);
    if(U_FAILURE(*pErrorCode)) {
-        me->errln("UnicodeTest: syntax error in DerivedCoreProperties.txt field 0 at %s\n", fields[0][0]);
+        me->errln("UnicodeTest: syntax error in DerivedCoreProperties.txt or DerivedNormalizationProps.txt field 0 at %s\n", fields[0][0]);
        return;
    }

    /* parse derived binary property name, ignore unknown names */
-    i=getTokenIndex(derivedCorePropsNames, LENGTHOF(derivedCorePropsNames), fields[1][0]);
+    i=getTokenIndex(derivedPropsNames, LENGTHOF(derivedPropsNames), fields[1][0]);
    if(i<0) {
        UnicodeString propName(fields[1][0], (int32_t)(fields[1][1]-fields[1][0]));
        propName.trim();
        if(me->unknownPropertyNames->find(propName)==NULL) {
            UErrorCode errorCode=U_ZERO_ERROR;
            me->unknownPropertyNames->puti(propName, 1, errorCode);
-            me->errln("UnicodeTest warning: unknown property name '%s' in DerivedCoreProperties.txt\n", fields[1][0]);
+            me->errln("UnicodeTest warning: unknown property name '%s' in DerivedCoreProperties.txt or DerivedNormalizationProps.txt\n", fields[1][0]);
        }
        return;
    }

-    me->derivedCoreProps[i].add(start, end);
+    me->derivedProps[i].add(start, end);
 }

 void UnicodeTest::TestAdditionalProperties() {
-    // test DerivedCoreProperties.txt
-    if(LENGTHOF(derivedCoreProps)<LENGTHOF(derivedCorePropsNames)) {
-        errln("error: UnicodeTest::derivedCoreProps[] too short, need at least %d UnicodeSets\n",
-              LENGTHOF(derivedCorePropsNames));
+    // test DerivedCoreProperties.txt and DerivedNormalizationProps.txt
+    if(LENGTHOF(derivedProps)<LENGTHOF(derivedPropsNames)) {
+        errln("error: UnicodeTest::derivedProps[] too short, need at least %d UnicodeSets\n",
+              LENGTHOF(derivedPropsNames));
        return;
    }
-    if(LENGTHOF(derivedCorePropsIndex)!=LENGTHOF(derivedCorePropsNames)) {
-        errln("error in ucdtest.cpp: LENGTHOF(derivedCorePropsIndex)!=LENGTHOF(derivedCorePropsNames)\n");
+    if(LENGTHOF(derivedPropsIndex)!=LENGTHOF(derivedPropsNames)) {
+        errln("error in ucdtest.cpp: LENGTHOF(derivedPropsIndex)!=LENGTHOF(derivedPropsNames)\n");
        return;
    }

@ -188,16 +209,25 @@ void UnicodeTest::TestAdditionalProperties() {
    strcat(backupPath, U_FILE_SEP_STRING);
    strcat(backupPath, "unidata" U_FILE_SEP_STRING "DerivedCoreProperties.txt");

-    u_parseDelimitedFile(newPath, ';', fields, 2, derivedCorePropsLineFn, this, &errorCode);
+    char *path=newPath;
+    u_parseDelimitedFile(newPath, ';', fields, 2, derivedPropsLineFn, this, &errorCode);

    if(errorCode==U_FILE_ACCESS_ERROR) {
        errorCode=U_ZERO_ERROR;
-        u_parseDelimitedFile(backupPath, ';', fields, 2, derivedCorePropsLineFn, this, &errorCode);
+        path=backupPath;
+        u_parseDelimitedFile(backupPath, ';', fields, 2, derivedPropsLineFn, this, &errorCode);
    }
    if(U_FAILURE(errorCode)) {
        errln("error parsing DerivedCoreProperties.txt: %s\n", u_errorName(errorCode));
        return;
    }
+    char *basename=path+strlen(path)-strlen("DerivedCoreProperties.txt");
+    strcpy(basename, "DerivedNormalizationProps.txt");
+    u_parseDelimitedFile(path, ';', fields, 2, derivedPropsLineFn, this, &errorCode);
+    if(U_FAILURE(errorCode)) {
+        errln("error parsing DerivedNormalizationProps.txt: %s\n", u_errorName(errorCode));
+        return;
+    }

    // now we have all derived core properties in the UnicodeSets
    // run them all through the API
@ -206,14 +236,14 @@ void UnicodeTest::TestAdditionalProperties() {
    UChar32 start, end;

    // test all TRUE properties
-    for(i=0; i<LENGTHOF(derivedCorePropsNames); ++i) {
-        rangeCount=derivedCoreProps[i].getRangeCount();
+    for(i=0; i<LENGTHOF(derivedPropsNames); ++i) {
+        rangeCount=derivedProps[i].getRangeCount();
        for(range=0; range<rangeCount && numErrors[i]<MAX_ERRORS; ++range) {
-            start=derivedCoreProps[i].getRangeStart(range);
-            end=derivedCoreProps[i].getRangeEnd(range);
+            start=derivedProps[i].getRangeStart(range);
+            end=derivedProps[i].getRangeEnd(range);
            for(; start<=end; ++start) {
-                if(!u_hasBinaryProperty(start, derivedCorePropsIndex[i])) {
-                    errln("UnicodeTest error: u_hasBinaryProperty(U+%04lx, %s)==FALSE is wrong\n", start, derivedCorePropsNames[i]);
+                if(!u_hasBinaryProperty(start, derivedPropsIndex[i])) {
+                    errln("UnicodeTest error: u_hasBinaryProperty(U+%04lx, %s)==FALSE is wrong\n", start, derivedPropsNames[i]);
                    if(++numErrors[i]>=MAX_ERRORS) {
                      errln("Too many errors, moving to the next test");
                      break;
@ -224,19 +254,19 @@ void UnicodeTest::TestAdditionalProperties() {
    }

    // invert all properties
-    for(i=0; i<LENGTHOF(derivedCorePropsNames); ++i) {
-        derivedCoreProps[i].complement();
+    for(i=0; i<LENGTHOF(derivedPropsNames); ++i) {
+        derivedProps[i].complement();
    }

    // test all FALSE properties
-    for(i=0; i<LENGTHOF(derivedCorePropsNames); ++i) {
-        rangeCount=derivedCoreProps[i].getRangeCount();
+    for(i=0; i<LENGTHOF(derivedPropsNames); ++i) {
+        rangeCount=derivedProps[i].getRangeCount();
        for(range=0; range<rangeCount && numErrors[i]<MAX_ERRORS; ++range) {
-            start=derivedCoreProps[i].getRangeStart(range);
-            end=derivedCoreProps[i].getRangeEnd(range);
+            start=derivedProps[i].getRangeStart(range);
+            end=derivedProps[i].getRangeEnd(range);
            for(; start<=end; ++start) {
-                if(u_hasBinaryProperty(start, derivedCorePropsIndex[i])) {
-                    errln("UnicodeTest error: u_hasBinaryProperty(U+%04lx, %s)==TRUE is wrong\n", start, derivedCorePropsNames[i]);
+                if(u_hasBinaryProperty(start, derivedPropsIndex[i])) {
+                    errln("UnicodeTest error: u_hasBinaryProperty(U+%04lx, %s)==TRUE is wrong\n", start, derivedPropsNames[i]);
                    if(++numErrors[i]>=MAX_ERRORS) {
                      errln("Too many errors, moving to the next test");
                      break;
--- a/icu4c/source/test/intltest/ucdtest.h
+++ b/icu4c/source/test/intltest/ucdtest.h
@ -1,6 +1,6 @@
 /********************************************************************
 * COPYRIGHT: 
- * Copyright (c) 1997-2009, International Business Machines Corporation and
+ * Copyright (c) 1997-2010, International Business Machines Corporation and
 * others. All Rights Reserved.
 ********************************************************************/

@ -13,9 +13,9 @@ U_CFUNC void U_CALLCONV unicodeDataLineFn(void *context,
                              UErrorCode *pErrorCode);

 U_CFUNC void U_CALLCONV
-derivedCorePropsLineFn(void *context,
-                       char *fields[][2], int32_t fieldCount,
-                       UErrorCode *pErrorCode);
+derivedPropsLineFn(void *context,
+                   char *fields[][2], int32_t fieldCount,
+                   UErrorCode *pErrorCode);

 U_NAMESPACE_BEGIN

@ -43,11 +43,11 @@ private:
                              UErrorCode *pErrorCode);

    friend void U_CALLCONV
-    derivedCorePropsLineFn(void *context,
+    derivedPropsLineFn(void *context,
                           char *fields[][2], int32_t fieldCount,
                           UErrorCode *pErrorCode);

-    UnicodeSet derivedCoreProps[30];
+    UnicodeSet derivedProps[30];
    U_NAMESPACE_QUALIFIER Hashtable *unknownPropertyNames;
 };

--- a/icu4c/source/test/intltest/usettest.cpp
+++ b/icu4c/source/test/intltest/usettest.cpp
@ -1,6 +1,6 @@
 /*
 ********************************************************************************
-*   Copyright (C) 1999-2009 International Business Machines Corporation and
+*   Copyright (C) 1999-2010 International Business Machines Corporation and
 *   others. All Rights Reserved.
 ********************************************************************************
 *   Date        Name        Description
@ -709,6 +709,37 @@ void UnicodeSetTest::TestAPI() {
    TEST_ASSERT((void *)constUSet == (void *)constSet);
    const UnicodeSet *constSetx = UnicodeSet::fromUSet(constUSet);
    TEST_ASSERT((void *)constSetx == (void *)constUSet);
+
+    // span(UnicodeString) and spanBack(UnicodeString) convenience methods
+    UnicodeString longString=UNICODE_STRING_SIMPLE("aaaaaaaaaabbbbbbbbbbcccccccccc");
+    UnicodeSet ac(0x61, 0x63);
+    ac.remove(0x62).freeze();
+    if( ac.span(longString, -5, USET_SPAN_CONTAINED)!=10 ||
+        ac.span(longString, 0, USET_SPAN_CONTAINED)!=10 ||
+        ac.span(longString, 5, USET_SPAN_CONTAINED)!=10 ||
+        ac.span(longString, 10, USET_SPAN_CONTAINED)!=10 ||
+        ac.span(longString, 15, USET_SPAN_CONTAINED)!=15 ||
+        ac.span(longString, 20, USET_SPAN_CONTAINED)!=30 ||
+        ac.span(longString, 25, USET_SPAN_CONTAINED)!=30 ||
+        ac.span(longString, 30, USET_SPAN_CONTAINED)!=30 ||
+        ac.span(longString, 35, USET_SPAN_CONTAINED)!=30 ||
+        ac.span(longString, INT32_MAX, USET_SPAN_CONTAINED)!=30
+    ) {
+        errln("UnicodeSet.span(UnicodeString, ...) returns incorrect end indexes");
+    }
+    if( ac.spanBack(longString, -5, USET_SPAN_CONTAINED)!=0 ||
+        ac.spanBack(longString, 0, USET_SPAN_CONTAINED)!=0 ||
+        ac.spanBack(longString, 5, USET_SPAN_CONTAINED)!=0 ||
+        ac.spanBack(longString, 10, USET_SPAN_CONTAINED)!=0 ||
+        ac.spanBack(longString, 15, USET_SPAN_CONTAINED)!=15 ||
+        ac.spanBack(longString, 20, USET_SPAN_CONTAINED)!=20 ||
+        ac.spanBack(longString, 25, USET_SPAN_CONTAINED)!=20 ||
+        ac.spanBack(longString, 30, USET_SPAN_CONTAINED)!=20 ||
+        ac.spanBack(longString, 35, USET_SPAN_CONTAINED)!=20 ||
+        ac.spanBack(longString, INT32_MAX, USET_SPAN_CONTAINED)!=20
+    ) {
+        errln("UnicodeSet.spanBack(UnicodeString, ...) returns incorrect start indexes");
+    }
 }

 void UnicodeSetTest::TestIteration() {
--- a/icu4c/source/test/intltest/ustrtest.cpp
+++ b/icu4c/source/test/intltest/ustrtest.cpp
@ -1,6 +1,6 @@
 /********************************************************************
 * COPYRIGHT: 
- * Copyright (c) 1997-2009, International Business Machines Corporation and
+ * Copyright (c) 1997-2010, International Business Machines Corporation and
 * others. All Rights Reserved.
 ********************************************************************/

@ -62,6 +62,7 @@ void UnicodeStringTest::runIndexedTest( int32_t index, UBool exec, const char* &
        case 17: name = "TestNameSpace"; if (exec) TestNameSpace(); break;
        case 18: name = "TestUTF32"; if (exec) TestUTF32(); break;
        case 19: name = "TestUTF8"; if (exec) TestUTF8(); break;
+        case 20: name = "TestReadOnlyAlias"; if (exec) TestReadOnlyAlias(); break;

        default: name = ""; break; //needed to end loop
    }
@ -1120,6 +1121,30 @@ UnicodeStringTest::TestMiscellaneous()
    if(test1.hasMetaData() || UnicodeString().hasMetaData()) {
        errln("UnicodeString::hasMetaData() returns TRUE");
    }
+
+    // test getTerminatedBuffer() on a truncated, shared, heap-allocated string
+    test1=UNICODE_STRING_SIMPLE("abcdefghijklmnopqrstuvwxyz0123456789.");
+    test1.truncate(36);  // ensure length()<getCapacity()
+    test2=test1;  // share the buffer
+    test1.truncate(5);
+    if(test1.length()!=5 || test1.getTerminatedBuffer()[5]!=0) {
+        errln("UnicodeString(shared buffer).truncate() failed");
+    }
+    if(test2.length()!=36 || test2[5]!=0x66 || u_strlen(test2.getTerminatedBuffer())!=36) {
+        errln("UnicodeString(shared buffer).truncate().getTerminatedBuffer() "
+              "modified another copy of the string!");
+    }
+    test1=UNICODE_STRING_SIMPLE("abcdefghijklmnopqrstuvwxyz0123456789.");
+    test1.truncate(36);  // ensure length()<getCapacity()
+    test2=test1;  // share the buffer
+    test1.remove();
+    if(test1.length()!=0 || test1.getTerminatedBuffer()[0]!=0) {
+        errln("UnicodeString(shared buffer).remove() failed");
+    }
+    if(test2.length()!=36 || test2[0]!=0x61 || u_strlen(test2.getTerminatedBuffer())!=36) {
+        errln("UnicodeString(shared buffer).remove().getTerminatedBuffer() "
+              "modified another copy of the string!");
+    }
 }

 void
@ -1873,3 +1898,108 @@ UnicodeStringTest::TestUTF8() {
    }
 #endif
 }
+
+// Test if this compiler supports Return Value Optimization of unnamed temporary objects.
+static UnicodeString wrapUChars(const UChar *uchars) {
+    return UnicodeString(TRUE, uchars, -1);
+}
+
+void
+UnicodeStringTest::TestReadOnlyAlias() {
+    UChar uchars[]={ 0x61, 0x62, 0 };
+    UnicodeString alias(TRUE, uchars, 2);
+    if(alias.length()!=2 || alias.getBuffer()!=uchars || alias.getTerminatedBuffer()!=uchars) {
+        errln("UnicodeString read-only-aliasing constructor does not behave as expected.");
+        return;
+    }
+    alias.truncate(1);
+    if(alias.length()!=1 || alias.getBuffer()!=uchars) {
+        errln("UnicodeString(read-only-alias).truncate() did not preserve aliasing as expected.");
+    }
+    if(alias.getTerminatedBuffer()==uchars) {
+        errln("UnicodeString(read-only-alias).truncate().getTerminatedBuffer() "
+              "did not allocate and copy as expected.");
+    }
+    if(uchars[1]!=0x62) {
+        errln("UnicodeString(read-only-alias).truncate().getTerminatedBuffer() "
+              "modified the original buffer.");
+    }
+    if(1!=u_strlen(alias.getTerminatedBuffer())) {
+        errln("UnicodeString(read-only-alias).truncate().getTerminatedBuffer() "
+              "does not return a buffer terminated at the proper length.");
+    }
+
+    alias.setTo(TRUE, uchars, 2);
+    if(alias.length()!=2 || alias.getBuffer()!=uchars || alias.getTerminatedBuffer()!=uchars) {
+        errln("UnicodeString read-only-aliasing setTo() does not behave as expected.");
+        return;
+    }
+    alias.remove();
+    if(alias.length()!=0) {
+        errln("UnicodeString(read-only-alias).remove() did not work.");
+    }
+    if(alias.getTerminatedBuffer()==uchars) {
+        errln("UnicodeString(read-only-alias).remove().getTerminatedBuffer() "
+              "did not un-alias as expected.");
+    }
+    if(uchars[0]!=0x61) {
+        errln("UnicodeString(read-only-alias).remove().getTerminatedBuffer() "
+              "modified the original buffer.");
+    }
+    if(0!=u_strlen(alias.getTerminatedBuffer())) {
+        errln("UnicodeString.setTo(read-only-alias).remove().getTerminatedBuffer() "
+              "does not return a buffer terminated at length 0.");
+    }
+
+    UnicodeString longString=UNICODE_STRING_SIMPLE("abcdefghijklmnopqrstuvwxyz0123456789");
+    alias.setTo(FALSE, longString.getBuffer(), longString.length());
+    alias.remove(0, 10);
+    if(longString.compare(10, INT32_MAX, alias)!=0 || alias.getBuffer()!=longString.getBuffer()+10) {
+        errln("UnicodeString.setTo(read-only-alias).remove(0, 10) did not preserve aliasing as expected.");
+    }
+    alias.setTo(FALSE, longString.getBuffer(), longString.length());
+    alias.remove(27, 99);
+    if(longString.compare(0, 27, alias)!=0 || alias.getBuffer()!=longString.getBuffer()) {
+        errln("UnicodeString.setTo(read-only-alias).remove(27, 99) did not preserve aliasing as expected.");
+    }
+    alias.setTo(FALSE, longString.getBuffer(), longString.length());
+    alias.retainBetween(6, 30);
+    if(longString.compare(6, 24, alias)!=0 || alias.getBuffer()!=longString.getBuffer()+6) {
+        errln("UnicodeString.setTo(read-only-alias).retainBetween(6, 30) did not preserve aliasing as expected.");
+    }
+
+    UChar abc[]={ 0x61, 0x62, 0x63, 0 };
+    UBool hasRVO= wrapUChars(abc).getBuffer()==abc;
+
+    UnicodeString temp;
+    temp.fastCopyFrom(longString.tempSubString());
+    if(temp!=longString || (hasRVO && temp.getBuffer()!=longString.getBuffer())) {
+        errln("UnicodeString.tempSubString() failed");
+    }
+    temp.fastCopyFrom(longString.tempSubString(-3, 5));
+    if(longString.compare(0, 5, temp)!=0 || (hasRVO && temp.getBuffer()!=longString.getBuffer())) {
+        errln("UnicodeString.tempSubString(-3, 5) failed");
+    }
+    temp.fastCopyFrom(longString.tempSubString(17));
+    if(longString.compare(17, INT32_MAX, temp)!=0 || (hasRVO && temp.getBuffer()!=longString.getBuffer()+17)) {
+        errln("UnicodeString.tempSubString(17) failed");
+    }
+    temp.fastCopyFrom(longString.tempSubString(99));
+    if(!temp.isEmpty()) {
+        errln("UnicodeString.tempSubString(99) failed");
+    }
+    temp.fastCopyFrom(longString.tempSubStringBetween(6));
+    if(longString.compare(6, INT32_MAX, temp)!=0 || (hasRVO && temp.getBuffer()!=longString.getBuffer()+6)) {
+        errln("UnicodeString.tempSubStringBetween(6) failed");
+    }
+    temp.fastCopyFrom(longString.tempSubStringBetween(8, 18));
+    if(longString.compare(8, 10, temp)!=0 || (hasRVO && temp.getBuffer()!=longString.getBuffer()+8)) {
+        errln("UnicodeString.tempSubStringBetween(8, 18) failed");
+    }
+    UnicodeString bogusString;
+    bogusString.setToBogus();
+    temp.fastCopyFrom(bogusString.tempSubStringBetween(8, 18));
+    if(!temp.isBogus()) {
+        errln("UnicodeString.setToBogus().tempSubStringBetween(8, 18) failed");
+    }
+}
--- a/icu4c/source/test/intltest/ustrtest.h
+++ b/icu4c/source/test/intltest/ustrtest.h
@ -1,6 +1,6 @@
 /********************************************************************
 * COPYRIGHT: 
- * Copyright (c) 1997-2009, International Business Machines Corporation and
+ * Copyright (c) 1997-2010, International Business Machines Corporation and
 * others. All Rights Reserved.
 ********************************************************************/

@ -77,6 +77,7 @@ public:
    void TestNameSpace(void);
    void TestUTF32(void);
    void TestUTF8(void);
+    void TestReadOnlyAlias(void);
 };

 class StringCaseTest: public IntlTest {
--- a/icu4c/source/tools/Makefile.in
+++ b/icu4c/source/tools/Makefile.in
@ -1,5 +1,5 @@
 ## Makefile.in for ICU tools
-## Copyright (c) 1999-2009, International Business Machines Corporation and
+## Copyright (c) 1999-2010, International Business Machines Corporation and
 ## others. All Rights Reserved.

 ## Source directory information
@ -15,7 +15,7 @@ subdir = tools

 SUBDIRS = toolutil ctestfw makeconv genrb genuca genbrk genctd \
 gennames genpname gencnval gensprep genccode gencmn icupkg pkgdata \
-gentest genprops gencase genbidi gennorm gencfu
+gentest genprops gencase genbidi gennorm gennorm2 gencfu

 ## List of phony targets
 .PHONY : all all-local all-recursive install install-local	\
--- a/icu4c/source/tools/gennames/gennames.vcproj
+++ b/icu4c/source/tools/gennames/gennames.vcproj
@ -389,25 +389,10 @@
 	<References>
 	</References>
 	<Files>
-		<Filter
-			Name="Source Files"
-			Filter="cpp;c;cxx;rc;def;r;odl;idl;hpj;bat"
+		<File
+			RelativePath=".\gennames.c"
 			>
-			<File
-				RelativePath=".\gennames.c"
-				>
-			</File>
-		</Filter>
-		<Filter
-			Name="Header Files"
-			Filter="h;hpp;hxx;hm;inl"
-			>
-		</Filter>
-		<Filter
-			Name="Resource Files"
-			Filter="ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe"
-			>
-		</Filter>
+		</File>
 	</Files>
 	<Globals>
 	</Globals>
--- a/icu4c/source/tools/gennorm/gennorm.c
+++ b/icu4c/source/tools/gennorm/gennorm.c
@ -1,7 +1,7 @@
 /*
 *******************************************************************************
 *
-*   Copyright (C) 2001-2005, International Business Machines
+*   Copyright (C) 2001-2010, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 *
 *******************************************************************************
@ -61,7 +61,8 @@ enum {
    UNICODE_VERSION,
    ICUDATADIR,
    CSOURCE,
-    STORE_FLAGS
+    STORE_FLAGS,
+    WRITE_NORM2
 };

 static UOption options[]={
@ -74,7 +75,8 @@ static UOption options[]={
    UOPTION_DEF("unicode", 'u', UOPT_REQUIRES_ARG),
    UOPTION_ICUDATADIR,
    UOPTION_DEF("csource", 'C', UOPT_NO_ARG),
-    UOPTION_DEF("prune", 'p', UOPT_REQUIRES_ARG)
+    UOPTION_DEF("prune", 'p', UOPT_REQUIRES_ARG),
+    UOPTION_DEF("write-norm2", '\1', UOPT_NO_ARG)
 };

 extern int
@ -140,6 +142,8 @@ main(int argc, char* argv[]) {
            "\t                    to the source file basenames before opening;\n"
            "\t                    'gennorm new' will read UnicodeData-new.txt etc.\n",
            u_getDataDirectory());
+        fprintf(stderr,
+            "\t--write-norm2      write nfc.txt and nfkc.txt files for gennorm2\n");
        return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
    }

@ -243,7 +247,7 @@ main(int argc, char* argv[]) {
    /* prepare the filename beginning with the source dir */
    uprv_strcpy(filename, srcDir);
    basename=filename+uprv_strlen(filename);
-    if(basename>filename && *(basename-1)!=U_FILE_SEP_CHAR) {
+    if(basename>filename && *(basename-1)!=U_FILE_SEP_CHAR && *(basename-1)!=U_FILE_ALT_SEP_CHAR) {
        *basename++=U_FILE_SEP_CHAR;
    }

@ -286,6 +290,10 @@ main(int argc, char* argv[]) {

    /* process parsed data */
    if(U_SUCCESS(errorCode)) {
+        if(options[WRITE_NORM2].doesOccur) {
+            writeNorm2(destDir);
+        }
+
        processData();

        /* write the properties data file */
--- a/icu4c/source/tools/gennorm/gennorm.h
+++ b/icu4c/source/tools/gennorm/gennorm.h
@ -1,7 +1,7 @@
 /*
 *******************************************************************************
 *
-*   Copyright (C) 1999-2005, International Business Machines
+*   Copyright (C) 1999-2010, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 *
 *******************************************************************************
@ -83,6 +83,9 @@ setCompositionExclusion(uint32_t code);
 U_CFUNC void
 setFNC(uint32_t c, UChar *s);

+extern void
+writeNorm2(const char *dataDir);
+
 extern void
 processData(void);

--- a/icu4c/source/tools/gennorm/gennorm.vcproj
+++ b/icu4c/source/tools/gennorm/gennorm.vcproj
@ -389,33 +389,18 @@
 	<References>
 	</References>
 	<Files>
-		<Filter
-			Name="Source Files"
-			Filter="c;cpp;rc"
+		<File
+			RelativePath=".\gennorm.c"
 			>
-			<File
-				RelativePath=".\gennorm.c"
-				>
-			</File>
-			<File
-				RelativePath=".\store.c"
-				>
-			</File>
-		</Filter>
-		<Filter
-			Name="Header Files"
-			Filter="h"
+		</File>
+		<File
+			RelativePath=".\gennorm.h"
 			>
-			<File
-				RelativePath=".\gennorm.h"
-				>
-			</File>
-		</Filter>
-		<Filter
-			Name="Resource Files"
-			Filter="ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe"
+		</File>
+		<File
+			RelativePath=".\store.c"
 			>
-		</Filter>
+		</File>
 	</Files>
 	<Globals>
 	</Globals>
--- a/icu4c/source/tools/gennorm/store.c
+++ b/icu4c/source/tools/gennorm/store.c
@ -1,7 +1,7 @@
 /*
 *******************************************************************************
 *
-*   Copyright (C) 1999-2009, International Business Machines
+*   Copyright (C) 1999-2010, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 *
 *******************************************************************************
@ -106,11 +106,13 @@ static UToolMemory *normMem, *utf32Mem, *extraMem, *combiningTriplesMem;

 static Norm *norms;

+#if GENNORM_OBSOLETE
 /*
 * set a flag for each code point that was seen in decompositions -
 * avoid to decompose ones that have not been used before
 */
 static uint32_t haveSeenFlags[256];
+#endif

 /* set of characters with NFD_QC=No (i.e., those with canonical decompositions) */
 static USet *nfdQCNoSet;
@ -192,8 +194,10 @@ init() {
    /* allocate UTF-32 string memory */
    utf32Mem=utm_open("gennorm UTF-32 strings", 30000, 30000, 4);

+#if GENNORM_OBSOLETE
    /* reset all "have seen" flags */
    uprv_memset(haveSeenFlags, 0, sizeof(haveSeenFlags));
+#endif

    /* open an empty set */
    nfdQCNoSet=uset_open(1, 0);
@ -289,6 +293,7 @@ enumTrie(EnumTrieFn *fn, void *context) {
    return count;
 }

+#if GENNORM_OBSOLETE
 static void
 setHaveSeenString(const uint32_t *s, int32_t length) {
    uint32_t c;
@ -301,6 +306,7 @@ setHaveSeenString(const uint32_t *s, int32_t length) {
 }

 #define HAVE_SEEN(c) (haveSeenFlags[((c)>>5)&0xff]&(1<<((c)&0x1f)))
+#endif

 /* handle combining data ---------------------------------------------------- */

@ -410,6 +416,7 @@ findCombiningCP(uint32_t code, UBool isLead) {
    return 0xffff;
 }

+#if GENNORM_OBSOLETE
 static void
 addCombiningTriple(uint32_t lead, uint32_t trail, uint32_t combined) {
    CombiningTriple *triple;
@ -434,6 +441,7 @@ addCombiningTriple(uint32_t lead, uint32_t trail, uint32_t combined) {
    triple->trail=trail;
    triple->combined=combined;
 }
+#endif

 static int
 compareTriples(const void *l, const void *r) {
@ -560,6 +568,7 @@ processCombining() {

 /* processing incoming normalization data ----------------------------------- */

+#if GENNORM_OBSOLETE
 /*
 * Decompose Hangul syllables algorithmically and fill a pseudo-Norm struct.
 * c must be a Hangul syllable code point.
@ -594,6 +603,7 @@ getHangulDecomposition(uint32_t c, Norm *pHangulNorm, uint32_t hangulBuffer[3])
        pHangulNorm->lenNFKD=length;
    }
 }
+#endif

 /*
 * decompose the one decomposition further, may generate two decompositions
@ -601,6 +611,20 @@ getHangulDecomposition(uint32_t c, Norm *pHangulNorm, uint32_t hangulBuffer[3])
 */
 static void
 decompStoreNewNF(uint32_t code, Norm *norm) {
+#if !GENNORM_OBSOLETE
+    /* always allocate the original string */
+    uint32_t *s32;
+    uint8_t length;
+    if((length=norm->lenNFD)!=0) {
+        s32=utm_allocN(utf32Mem, norm->lenNFD);
+        uprv_memcpy(s32, norm->nfd, norm->lenNFD*4);
+        norm->nfd=s32;
+    } else if((length=norm->lenNFKD)!=0) {
+        s32=utm_allocN(utf32Mem, norm->lenNFKD);
+        uprv_memcpy(s32, norm->nfkd, norm->lenNFKD*4);
+        norm->nfkd=s32;
+    }
+#else
    uint32_t nfd[40], nfkd[40], hangulBuffer[3];
    Norm hangulNorm;

@ -695,8 +719,10 @@ decompStoreNewNF(uint32_t code, Norm *norm) {
        norm->nfkd=s32;
        setHaveSeenString(nfkd, lenNFKD);
    }
+#endif
 }

+#if GENNORM_OBSOLETE
 typedef struct DecompSingle {
    uint32_t c;
    Norm *norm;
@ -800,6 +826,7 @@ decompWithSingleFn(void *context, uint32_t code, Norm *norm) {
        norm->nfkd=s32;
    }
 }
+#endif

 /*
 * process the data for one code point listed in UnicodeData;
@ -807,7 +834,9 @@ decompWithSingleFn(void *context, uint32_t code, Norm *norm) {
 */
 extern void
 storeNorm(uint32_t code, Norm *norm) {
+#if GENNORM_OBSOLETE
    DecompSingle decompSingle;
+#endif
    Norm *p;

    if(DO_NOT_STORE(UGENNORM_STORE_COMPAT)) {
@ -826,6 +855,7 @@ storeNorm(uint32_t code, Norm *norm) {
        /* decompose this one decomposition further, may generate two decompositions */
        decompStoreNewNF(code, norm);

+#if GENNORM_OBSOLETE
        /* has this code point been used in previous decompositions? */
        if(HAVE_SEEN(code)) {
            /* use this decomposition to decompose other decompositions further */
@ -833,6 +863,7 @@ storeNorm(uint32_t code, Norm *norm) {
            decompSingle.norm=norm;
            enumTrie(decompWithSingleFn, &decompSingle);
        }
+#endif
    }

    /* store the data */
@ -1815,6 +1846,144 @@ getFoldingAuxOffset(uint32_t data) {

 #endif /* #if !UCONFIG_NO_NORMALIZATION */

+static void
+writeAllCC(FILE *f) {
+    uint32_t i;
+    UChar32 prevCode, code;
+    uint8_t prevCC, cc;
+    UBool isInBlockZero;
+
+    fprintf(f, "# Canonical_Combining_Class (ccc) values\n");
+    prevCode=0;
+    prevCC=0;
+    for(code=0; code<=0x110000;) {
+        if(code==0x110000) {
+            cc=0;
+        } else {
+            i=utrie_get32(normTrie, code, &isInBlockZero);
+            if(i==0 || isInBlockZero) {
+                cc=0;
+            } else {
+                cc=norms[i].udataCC;
+            }
+        }
+        if(prevCC!=cc) {
+            if(prevCC!=0) {
+                uint32_t lastCode=code-1;
+                if(prevCode==lastCode) {
+                    fprintf(f, "%04lX:%d\n", (long)lastCode, prevCC);
+                } else {
+                    fprintf(f, "%04lX..%04lX:%d\n",
+                            (long)prevCode, (long)lastCode, prevCC);
+                }
+            }
+            prevCode=code;
+            prevCC=cc;
+        }
+        if(isInBlockZero) {
+            code+=UTRIE_DATA_BLOCK_LENGTH;
+        } else {
+            ++code;
+        }
+    }
+}
+
+static UBool
+hasMapping(uint32_t code) {
+    Norm *norm=norms+utrie_get32(normTrie, code, NULL);
+    return norm->lenNFD!=0 || norm->lenNFKD!=0;
+}
+
+static UBool
+hasOneWayMapping(uint32_t code, UBool withCompat) {
+    for(;;) {
+        Norm *norm=norms+utrie_get32(normTrie, code, NULL);
+        uint8_t length;
+        if((length=norm->lenNFD)!=0) {
+            /*
+             * The canonical decomposition is a one-way mapping if
+             * - it does not map to exactly two code points
+             * - the code has ccc!=0
+             * - the code has the Composition_Exclusion property
+             * - its starter has a one-way mapping (loop for this)
+             * - its non-starter decomposes
+             */
+            if( length!=2 ||
+                norm->udataCC!=0 ||
+                norm->combiningFlags&0x80 ||
+                hasMapping(norm->nfd[1])
+            ) {
+                return TRUE;
+            }
+            code=norm->nfd[0];  /* continue */
+        } else if(withCompat && norm->lenNFKD!=0) {
+            return TRUE;
+        } else {
+            return FALSE;
+        }
+    }
+}
+
+static void
+writeAllMappings(FILE *f, UBool withCompat) {
+    uint32_t i, code;
+    UBool isInBlockZero;
+
+    if(withCompat) {
+        fprintf(f, "\n# Canonical and compatibility decomposition mappings\n");
+    } else {
+        fprintf(f, "\n# Canonical decomposition mappings\n");
+    }
+    for(code=0; code<=0x10ffff;) {
+        i=utrie_get32(normTrie, code, &isInBlockZero);
+        if(isInBlockZero) {
+            code+=UTRIE_DATA_BLOCK_LENGTH;
+        } else {
+            if(i!=0) {
+                uint32_t *s32;
+                uint8_t length;
+                char separator;
+                if((length=norms[i].lenNFD)!=0) {
+                    s32=norms[i].nfd;
+                    separator= hasOneWayMapping(code, withCompat) ? '>' : '=';
+                } else if(withCompat && (length=norms[i].lenNFKD)!=0) {
+                    s32=norms[i].nfkd;
+                    separator='>';
+                }
+                if(length!=0) {
+                    uint8_t j;
+                    fprintf(f, "%04lX%c", (long)code, separator);
+                    for(j=0; j<length; ++j) {
+                        if(j!=0) {
+                            fputc(' ', f);
+                        }
+                        fprintf(f, "%04lX", (long)s32[j]);
+                    }
+                    fputc('\n', f);
+                }
+            }
+            ++code;
+        }
+    }
+}
+
+static void
+writeNorm2TextFile(const char *path, const char *filename, UBool withCompat) {
+    FILE *f=usrc_createTextData(path, filename);
+    if(f==NULL) {
+        exit(U_FILE_ACCESS_ERROR);
+    }
+    writeAllCC(f);
+    writeAllMappings(f, withCompat);
+    fclose(f);
+}
+
+extern void
+writeNorm2(const char *dataDir) {
+    writeNorm2TextFile(dataDir, "nfc.txt", FALSE);
+    writeNorm2TextFile(dataDir, "nfkc.txt", TRUE);
+}
+
 extern void
 generateData(const char *dataDir, UBool csource) {
    static uint8_t normTrieBlock[100000], fcdTrieBlock[100000], auxTrieBlock[100000];
--- a/icu4c/source/tools/gennorm2/Makefile.in
+++ b/icu4c/source/tools/gennorm2/Makefile.in
@ -0,0 +1,82 @@
+## Makefile.in for ICU - tools/gennorm2
+## Copyright (c) 2009-2010, International Business Machines Corporation and
+## others. All Rights Reserved.
+## Steven R. Loomis/Markus W. Scherer
+
+## Source directory information
+srcdir = @srcdir@
+top_srcdir = @top_srcdir@
+
+top_builddir = ../..
+
+include $(top_builddir)/icudefs.mk
+
+## Build directory information
+subdir = tools/gennorm2
+
+TARGET_STUB_NAME = gennorm2
+
+## Extra files to remove for 'make clean'
+CLEANFILES = *~ $(DEPS)
+
+## Target information
+TARGET = $(BINDIR)/$(TARGET_STUB_NAME)$(EXEEXT)
+
+ifneq ($(top_builddir),$(top_srcdir))
+CPPFLAGS += -I$(top_builddir)/common
+endif
+CPPFLAGS += -I$(top_srcdir)/common -I$(srcdir)/../toolutil
+LIBS = $(LIBICUTOOLUTIL) $(LIBICUI18N) $(LIBICUUC) $(DEFAULT_LIBS) $(LIB_M)
+
+OBJECTS = gennorm2.o n2builder.o
+
+DEPS = $(OBJECTS:.o=.d)
+
+## List of phony targets
+.PHONY : all all-local install install-local clean clean-local	\
+distclean distclean-local dist dist-local check check-local install-man
+
+## Clear suffix list
+.SUFFIXES :
+
+## List of standard targets
+all: all-local
+install: install-local
+clean: clean-local
+distclean : distclean-local
+dist: dist-local
+check: all check-local
+
+all-local: $(TARGET)
+
+install-local: all-local
+#	$(MKINSTALLDIRS) $(DESTDIR)$(sbindir)
+#	$(INSTALL) $(TARGET) $(DESTDIR)$(sbindir)
+
+dist-local:
+
+clean-local:
+	test -z "$(CLEANFILES)" || $(RMV) $(CLEANFILES)
+	$(RMV) $(TARGET) $(OBJECTS)
+
+distclean-local: clean-local
+	$(RMV) Makefile
+
+check-local: all-local
+
+Makefile: $(srcdir)/Makefile.in  $(top_builddir)/config.status
+	cd $(top_builddir) \
+	 && CONFIG_FILES=$(subdir)/$@ CONFIG_HEADERS= $(SHELL) ./config.status
+
+$(TARGET) : $(OBJECTS)
+	$(LINK.cc) $(OUTOPT)$@ $^ $(LIBS)
+	$(POST_BUILD_STEP)
+
+
+ifeq (,$(MAKECMDGOALS))
+-include $(DEPS)
+else
+ifneq ($(patsubst %clean,,$(MAKECMDGOALS)),)
+-include $(DEPS)
+endif
+endif
--- a/icu4c/source/tools/gennorm2/gennorm2.cpp
+++ b/icu4c/source/tools/gennorm2/gennorm2.cpp
@ -0,0 +1,258 @@
+/*
+*******************************************************************************
+*
+*   Copyright (C) 2009-2010, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+*******************************************************************************
+*   file name:  gennorm2.cpp
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2009nov25
+*   created by: Markus W. Scherer
+*
+*   This program reads text files that define Unicode normalization,
+*   parses them, and builds a binary data file.
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <string>
+#include "unicode/utypes.h"
+#include "unicode/errorcode.h"
+#include "unicode/localpointer.h"
+#include "unicode/putil.h"
+#include "unicode/uchar.h"
+#include "unicode/unistr.h"
+#include "n2builder.h"
+#include "normalizer2impl.h"
+#include "toolutil.h"
+#include "uoptions.h"
+#include "uparse.h"
+
+#if UCONFIG_NO_NORMALIZATION
+#include "unewdata.h"
+#endif
+
+#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
+
+U_NAMESPACE_BEGIN
+
+UBool beVerbose=FALSE, haveCopyright=TRUE;
+
+U_DEFINE_LOCAL_OPEN_POINTER(LocalStdioFilePointer, FILE, fclose);
+
+#if !UCONFIG_NO_NORMALIZATION
+void parseFile(FILE *f, Normalizer2DataBuilder &builder);
+#endif
+
+/* -------------------------------------------------------------------------- */
+
+enum {
+    HELP_H,
+    HELP_QUESTION_MARK,
+    VERBOSE,
+    COPYRIGHT,
+    SOURCEDIR,
+    OUTPUT_FILENAME,
+    UNICODE_VERSION
+};
+
+static UOption options[]={
+    UOPTION_HELP_H,
+    UOPTION_HELP_QUESTION_MARK,
+    UOPTION_VERBOSE,
+    UOPTION_COPYRIGHT,
+    UOPTION_SOURCEDIR,
+    UOPTION_DEF("output", 'o', UOPT_REQUIRES_ARG),
+    UOPTION_DEF("unicode", 'u', UOPT_REQUIRES_ARG)
+};
+
+extern "C" int
+main(int argc, char* argv[]) {
+    U_MAIN_INIT_ARGS(argc, argv);
+
+    /* preset then read command line options */
+    options[SOURCEDIR].value="";
+    options[UNICODE_VERSION].value=U_UNICODE_VERSION;
+    argc=u_parseArgs(argc, argv, sizeof(options)/sizeof(options[HELP_H]), options);
+
+    /* error handling, printing usage message */
+    if(argc<0) {
+        fprintf(stderr,
+            "error in command line argument \"%s\"\n",
+            argv[-argc]);
+    }
+    if(!options[OUTPUT_FILENAME].doesOccur) {
+        argc=-1;
+    }
+    if( argc<2 ||
+        options[HELP_H].doesOccur || options[HELP_QUESTION_MARK].doesOccur
+    ) {
+        /*
+         * Broken into chunks because the C89 standard says the minimum
+         * required supported string length is 509 bytes.
+         */
+        fprintf(stderr,
+            "Usage: %s [-options] infiles+ -o outputfilename\n"
+            "\n"
+            "Reads the infiles with normalization data and\n"
+            "creates a binary file (outputfilename) with the data.\n"
+            "\n",
+            argv[0]);
+        fprintf(stderr,
+            "Options:\n"
+            "\t-h or -? or --help  this usage text\n"
+            "\t-v or --verbose     verbose output\n"
+            "\t-c or --copyright   include a copyright notice\n"
+            "\t-u or --unicode     Unicode version, followed by the version like 5.2.0\n");
+        fprintf(stderr,
+            "\t-s or --sourcedir   source directory, followed by the path\n"
+            "\t-o or --output      output filename\n");
+        return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
+    }
+
+    beVerbose=options[VERBOSE].doesOccur;
+    haveCopyright=options[COPYRIGHT].doesOccur;
+
+    IcuToolErrorCode errorCode("gennorm2/main()");
+
+#if UCONFIG_NO_NORMALIZATION
+
+    fprintf(stderr,
+        "gennorm2 writes a dummy binary data file "
+        "because UCONFIG_NO_NORMALIZATION is set, \n"
+        "see icu/source/common/unicode/uconfig.h\n");
+    udata_createDummy(NULL, NULL, options[OUTPUT_FILENAME].value, errorCode);
+    return U_UNSUPPORTED_ERROR;
+
+#else
+
+    LocalPointer<Normalizer2DataBuilder> builder(new Normalizer2DataBuilder(errorCode));
+    errorCode.assertSuccess();
+
+    builder->setUnicodeVersion(options[UNICODE_VERSION].value);
+
+    // prepare the filename beginning with the source dir
+    std::string filename(options[SOURCEDIR].value);
+    int32_t pathLength=filename.length();
+    if( pathLength>0 &&
+        filename[pathLength-1]!=U_FILE_SEP_CHAR &&
+        filename[pathLength-1]!=U_FILE_ALT_SEP_CHAR
+    ) {
+        filename.push_back(U_FILE_SEP_CHAR);
+        pathLength=filename.length();
+    }
+
+    for(int i=1; i<argc; ++i) {
+        printf("gennorm2: processing %s\n", argv[i]);
+        filename.append(argv[i]);
+        LocalStdioFilePointer f(fopen(filename.c_str(), "r"));
+        if(f==NULL) {
+            fprintf(stderr, "gennorm2 error: unable to open %s\n", filename.c_str());
+            exit(U_FILE_ACCESS_ERROR);
+        }
+        builder->setOverrideHandling(Normalizer2DataBuilder::OVERRIDE_PREVIOUS);
+        parseFile(f.getAlias(), *builder);
+        filename.erase(pathLength);
+    }
+
+    builder->writeBinaryFile(options[OUTPUT_FILENAME].value);
+
+    return errorCode.get();
+
+#endif
+}
+
+#if !UCONFIG_NO_NORMALIZATION
+
+void parseFile(FILE *f, Normalizer2DataBuilder &builder) {
+    IcuToolErrorCode errorCode("gennorm2/parseFile()");
+    char line[300];
+    uint32_t startCP, endCP;
+    while(NULL!=fgets(line, (int)sizeof(line), f)) {
+        char *comment=(char *)strchr(line, '#');
+        if(comment!=NULL) {
+            *comment=0;
+        }
+        u_rtrim(line);
+        if(line[0]==0) {
+            continue;  // skip empty and comment-only lines
+        }
+        if(line[0]=='*') {
+            continue;  // reserved syntax
+        }
+        const char *delimiter;
+        int32_t rangeLength=
+            u_parseCodePointRangeAnyTerminator(line, &startCP, &endCP, &delimiter, errorCode);
+        if(errorCode.isFailure()) {
+            fprintf(stderr, "gennorm2 error: parsing code point range from %s\n", line);
+            exit(errorCode.reset());
+        }
+        delimiter=u_skipWhitespace(delimiter);
+        if(*delimiter==':') {
+            const char *s=u_skipWhitespace(delimiter+1);
+            char *end;
+            unsigned long value=strtoul(s, &end, 10);
+            if(end<=s || *u_skipWhitespace(end)!=0 || value>=0xff) {
+                fprintf(stderr, "gennorm2 error: parsing ccc from %s\n", line);
+                exit(U_PARSE_ERROR);
+            }
+            for(UChar32 c=(UChar32)startCP; c<=(UChar32)endCP; ++c) {
+                builder.setCC(c, (uint8_t)value);
+            }
+            continue;
+        }
+        if(*delimiter=='-') {
+            if(*u_skipWhitespace(delimiter+1)!=0) {
+                fprintf(stderr, "gennorm2 error: parsing remove-mapping %s\n", line);
+                exit(U_PARSE_ERROR);
+            }
+            for(UChar32 c=(UChar32)startCP; c<=(UChar32)endCP; ++c) {
+                builder.removeMapping(c);
+            }
+            continue;
+        }
+        if(*delimiter=='=' || *delimiter=='>') {
+            UChar uchars[Normalizer2Impl::MAPPING_LENGTH_MASK];
+            int32_t length=u_parseString(delimiter+1, uchars, LENGTHOF(uchars), NULL, errorCode);
+            if(errorCode.isFailure()) {
+                fprintf(stderr, "gennorm2 error: parsing mapping string from %s\n", line);
+                exit(errorCode.reset());
+            }
+            UnicodeString mapping(FALSE, uchars, length);
+            if(*delimiter=='=') {
+                if(rangeLength!=1) {
+                    fprintf(stderr,
+                            "gennorm2 error: round-trip mapping for more than 1 code point on %s\n",
+                            line);
+                    exit(U_PARSE_ERROR);
+                }
+                builder.setRoundTripMapping((UChar32)startCP, mapping);
+            } else {
+                for(UChar32 c=(UChar32)startCP; c<=(UChar32)endCP; ++c) {
+                    builder.setOneWayMapping(c, mapping);
+                }
+            }
+            continue;
+        }
+        fprintf(stderr, "gennorm2 error: unrecognized data line %s\n", line);
+        exit(U_PARSE_ERROR);
+    }
+}
+
+#endif // !UCONFIG_NO_NORMALIZATION
+
+U_NAMESPACE_END
+
+/*
+ * Hey, Emacs, please set the following:
+ *
+ * Local Variables:
+ * indent-tabs-mode: nil
+ * End:
+ *
+ */
--- a/icu4c/source/tools/gennorm2/gennorm2.vcproj
+++ b/icu4c/source/tools/gennorm2/gennorm2.vcproj
@ -0,0 +1,409 @@
+<?xml version="1.0" encoding="Windows-1252"?>
+<VisualStudioProject
+	ProjectType="Visual C++"
+	Version="9.00"
+	Name="gennorm2"
+	ProjectGUID="{C7891A65-80AB-4245-912E-5F1E17B0E6C4}"
+	RootNamespace="gennorm2"
+	Keyword="Win32Proj"
+	TargetFrameworkVersion="196613"
+	>
+	<Platforms>
+		<Platform
+			Name="Win32"
+		/>
+		<Platform
+			Name="x64"
+		/>
+	</Platforms>
+	<ToolFiles>
+	</ToolFiles>
+	<Configurations>
+		<Configuration
+			Name="Release|Win32"
+			OutputDirectory=".\x86\Release"
+			IntermediateDirectory=".\x86\Release"
+			ConfigurationType="1"
+			CharacterSet="1"
+			WholeProgramOptimization="1"
+			>
+			<Tool
+				Name="VCPreBuildEventTool"
+			/>
+			<Tool
+				Name="VCCustomBuildTool"
+				CommandLine="copy &quot;$(TargetPath)&quot; ..\..\..\bin&#x0D;&#x0A;"
+				Outputs="..\..\..\bin\$(TargetFileName)"
+			/>
+			<Tool
+				Name="VCXMLDataGeneratorTool"
+			/>
+			<Tool
+				Name="VCWebServiceProxyGeneratorTool"
+			/>
+			<Tool
+				Name="VCMIDLTool"
+			/>
+			<Tool
+				Name="VCCLCompilerTool"
+				AdditionalIncludeDirectories="..\..\common;..\toolutil"
+				PreprocessorDefinitions="WIN32;NDEBUG;_CRT_SECURE_NO_DEPRECATE"
+				Optimization="2"
+				EnableIntrinsicFunctions="true"
+				StringPooling="true"
+				RuntimeLibrary="2"
+				EnableFunctionLevelLinking="true"
+				DisableLanguageExtensions="true"
+				TreatWChar_tAsBuiltInType="true"
+				PrecompiledHeaderFile=".\x86\Release\gennorm2.pch"
+				AssemblerListingLocation=".\x86\Release\"
+				ObjectFile=".\x86\Release\"
+				ProgramDataBaseFileName=".\x86\Release\"
+				WarningLevel="3"
+				DebugInformationFormat="3"
+				SuppressStartupBanner="true"
+				CompileAs="0"
+			/>
+			<Tool
+				Name="VCManagedResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCResourceCompilerTool"
+				PreprocessorDefinitions="NDEBUG"
+				Culture="1033"
+			/>
+			<Tool
+				Name="VCPreLinkEventTool"
+			/>
+			<Tool
+				Name="VCLinkerTool"
+				OutputFile=".\x86\Release\gennorm2.exe"
+				LinkIncremental="1"
+				SuppressStartupBanner="true"
+				ProgramDatabaseFile=".\x86\Release\gennorm2.pdb"
+				GenerateDebugInformation="true"
+				SubSystem="1"
+				OptimizeReferences="2"
+				EnableCOMDATFolding="2"
+				TargetMachine="1"
+				RandomizedBaseAddress="1"
+				DataExecutionPrevention="1"
+			/>
+			<Tool
+				Name="VCALinkTool"
+			/>
+			<Tool
+				Name="VCManifestTool"
+			/>
+			<Tool
+				Name="VCXDCMakeTool"
+			/>
+			<Tool
+				Name="VCBscMakeTool"
+			/>
+			<Tool
+				Name="VCFxCopTool"
+			/>
+			<Tool
+				Name="VCAppVerifierTool"
+			/>
+			<Tool
+				Name="VCPostBuildEventTool"
+			/>
+		</Configuration>
+		<Configuration
+			Name="Debug|Win32"
+			OutputDirectory=".\x86\Debug"
+			IntermediateDirectory=".\x86\Debug"
+			ConfigurationType="1"
+			CharacterSet="1"
+			>
+			<Tool
+				Name="VCPreBuildEventTool"
+			/>
+			<Tool
+				Name="VCCustomBuildTool"
+				CommandLine="copy &quot;$(TargetPath)&quot; ..\..\..\bin&#x0D;&#x0A;"
+				Outputs="..\..\..\bin\$(TargetFileName)"
+			/>
+			<Tool
+				Name="VCXMLDataGeneratorTool"
+			/>
+			<Tool
+				Name="VCWebServiceProxyGeneratorTool"
+			/>
+			<Tool
+				Name="VCMIDLTool"
+			/>
+			<Tool
+				Name="VCCLCompilerTool"
+				Optimization="0"
+				AdditionalIncludeDirectories="..\..\common;..\toolutil"
+				PreprocessorDefinitions="WIN32;_DEBUG;_CRT_SECURE_NO_DEPRECATE"
+				EnableIntrinsicFunctions="true"
+				MinimalRebuild="true"
+				BasicRuntimeChecks="3"
+				StringPooling="true"
+				RuntimeLibrary="3"
+				BufferSecurityCheck="true"
+				DisableLanguageExtensions="true"
+				TreatWChar_tAsBuiltInType="true"
+				PrecompiledHeaderFile=".\x86\Debug\gennorm2.pch"
+				AssemblerListingLocation=".\x86\Debug\"
+				ObjectFile=".\x86\Debug\"
+				ProgramDataBaseFileName=".\x86\Debug\"
+				BrowseInformation="1"
+				WarningLevel="3"
+				SuppressStartupBanner="true"
+				DebugInformationFormat="4"
+				CompileAs="0"
+			/>
+			<Tool
+				Name="VCManagedResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCResourceCompilerTool"
+				PreprocessorDefinitions="_DEBUG"
+				Culture="1033"
+			/>
+			<Tool
+				Name="VCPreLinkEventTool"
+			/>
+			<Tool
+				Name="VCLinkerTool"
+				OutputFile=".\x86\Debug\gennorm2.exe"
+				LinkIncremental="2"
+				SuppressStartupBanner="true"
+				GenerateDebugInformation="true"
+				ProgramDatabaseFile=".\x86\Debug\gennorm2.pdb"
+				SubSystem="1"
+				TargetMachine="1"
+				RandomizedBaseAddress="1"
+				DataExecutionPrevention="1"
+			/>
+			<Tool
+				Name="VCALinkTool"
+			/>
+			<Tool
+				Name="VCManifestTool"
+			/>
+			<Tool
+				Name="VCXDCMakeTool"
+			/>
+			<Tool
+				Name="VCBscMakeTool"
+			/>
+			<Tool
+				Name="VCFxCopTool"
+			/>
+			<Tool
+				Name="VCAppVerifierTool"
+			/>
+			<Tool
+				Name="VCPostBuildEventTool"
+			/>
+		</Configuration>
+		<Configuration
+			Name="Release|x64"
+			OutputDirectory=".\x64\Release"
+			IntermediateDirectory=".\x64\Release"
+			ConfigurationType="1"
+			CharacterSet="1"
+			WholeProgramOptimization="1"
+			>
+			<Tool
+				Name="VCPreBuildEventTool"
+			/>
+			<Tool
+				Name="VCCustomBuildTool"
+				CommandLine="copy &quot;$(TargetPath)&quot; ..\..\..\bin64&#x0D;&#x0A;"
+				Outputs="..\..\..\bin64\$(TargetFileName)"
+			/>
+			<Tool
+				Name="VCXMLDataGeneratorTool"
+			/>
+			<Tool
+				Name="VCWebServiceProxyGeneratorTool"
+			/>
+			<Tool
+				Name="VCMIDLTool"
+			/>
+			<Tool
+				Name="VCCLCompilerTool"
+				AdditionalIncludeDirectories="..\..\common;..\toolutil"
+				PreprocessorDefinitions="WIN64;WIN32;NDEBUG;_CRT_SECURE_NO_DEPRECATE"
+				Optimization="2"
+				EnableIntrinsicFunctions="true"
+				StringPooling="true"
+				RuntimeLibrary="2"
+				EnableFunctionLevelLinking="true"
+				DisableLanguageExtensions="true"
+				TreatWChar_tAsBuiltInType="true"
+				PrecompiledHeaderFile=".\x64\Release\gennorm2.pch"
+				AssemblerListingLocation=".\x64\Release\"
+				ObjectFile=".\x64\Release\"
+				ProgramDataBaseFileName=".\x64\Release\"
+				WarningLevel="3"
+				DebugInformationFormat="3"
+				SuppressStartupBanner="true"
+				CompileAs="0"
+			/>
+			<Tool
+				Name="VCManagedResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCResourceCompilerTool"
+				PreprocessorDefinitions="NDEBUG"
+				Culture="1033"
+			/>
+			<Tool
+				Name="VCPreLinkEventTool"
+			/>
+			<Tool
+				Name="VCLinkerTool"
+				OutputFile=".\x64\Release\gennorm2.exe"
+				LinkIncremental="1"
+				SuppressStartupBanner="true"
+				ProgramDatabaseFile=".\x64\Release\gennorm2.pdb"
+				GenerateDebugInformation="true"
+				SubSystem="1"
+				OptimizeReferences="2"
+				EnableCOMDATFolding="2"
+				TargetMachine="17"
+				RandomizedBaseAddress="1"
+				DataExecutionPrevention="1"
+			/>
+			<Tool
+				Name="VCALinkTool"
+			/>
+			<Tool
+				Name="VCManifestTool"
+			/>
+			<Tool
+				Name="VCXDCMakeTool"
+			/>
+			<Tool
+				Name="VCBscMakeTool"
+			/>
+			<Tool
+				Name="VCFxCopTool"
+			/>
+			<Tool
+				Name="VCAppVerifierTool"
+			/>
+			<Tool
+				Name="VCPostBuildEventTool"
+			/>
+		</Configuration>
+		<Configuration
+			Name="Debug|x64"
+			OutputDirectory=".\x64\Debug"
+			IntermediateDirectory=".\x64\Debug"
+			ConfigurationType="1"
+			CharacterSet="1"
+			>
+			<Tool
+				Name="VCPreBuildEventTool"
+			/>
+			<Tool
+				Name="VCCustomBuildTool"
+				CommandLine="copy &quot;$(TargetPath)&quot; ..\..\..\bin64&#x0D;&#x0A;"
+				Outputs="..\..\..\bin64\$(TargetFileName)"
+			/>
+			<Tool
+				Name="VCXMLDataGeneratorTool"
+			/>
+			<Tool
+				Name="VCWebServiceProxyGeneratorTool"
+			/>
+			<Tool
+				Name="VCMIDLTool"
+			/>
+			<Tool
+				Name="VCCLCompilerTool"
+				Optimization="0"
+				AdditionalIncludeDirectories="..\..\common;..\toolutil"
+				PreprocessorDefinitions="WIN64;WIN32;_DEBUG;_CRT_SECURE_NO_DEPRECATE"
+				EnableIntrinsicFunctions="true"
+				MinimalRebuild="true"
+				BasicRuntimeChecks="3"
+				StringPooling="true"
+				RuntimeLibrary="3"
+				BufferSecurityCheck="true"
+				DisableLanguageExtensions="true"
+				TreatWChar_tAsBuiltInType="true"
+				PrecompiledHeaderFile=".\x64\Debug\gennorm2.pch"
+				AssemblerListingLocation=".\x64\Debug\"
+				ObjectFile=".\x64\Debug\"
+				ProgramDataBaseFileName=".\x64\Debug\"
+				BrowseInformation="1"
+				WarningLevel="3"
+				SuppressStartupBanner="true"
+				DebugInformationFormat="4"
+				CompileAs="0"
+			/>
+			<Tool
+				Name="VCManagedResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCResourceCompilerTool"
+				PreprocessorDefinitions="_DEBUG"
+				Culture="1033"
+			/>
+			<Tool
+				Name="VCPreLinkEventTool"
+			/>
+			<Tool
+				Name="VCLinkerTool"
+				OutputFile=".\x64\Debug\gennorm2.exe"
+				LinkIncremental="2"
+				SuppressStartupBanner="true"
+				GenerateDebugInformation="true"
+				ProgramDatabaseFile=".\x64\Debug\gennorm2.pdb"
+				SubSystem="1"
+				TargetMachine="17"
+				RandomizedBaseAddress="1"
+				DataExecutionPrevention="1"
+			/>
+			<Tool
+				Name="VCALinkTool"
+			/>
+			<Tool
+				Name="VCManifestTool"
+			/>
+			<Tool
+				Name="VCXDCMakeTool"
+			/>
+			<Tool
+				Name="VCBscMakeTool"
+			/>
+			<Tool
+				Name="VCFxCopTool"
+			/>
+			<Tool
+				Name="VCAppVerifierTool"
+			/>
+			<Tool
+				Name="VCPostBuildEventTool"
+			/>
+		</Configuration>
+	</Configurations>
+	<References>
+	</References>
+	<Files>
+		<File
+			RelativePath=".\gennorm2.cpp"
+			>
+		</File>
+		<File
+			RelativePath=".\n2builder.cpp"
+			>
+		</File>
+		<File
+			RelativePath=".\n2builder.h"
+			>
+		</File>
+	</Files>
+	<Globals>
+	</Globals>
+</VisualStudioProject>
--- a/icu4c/source/tools/gennorm2/n2builder.cpp
+++ b/icu4c/source/tools/gennorm2/n2builder.cpp
--- a/icu4c/source/tools/gennorm2/n2builder.h
+++ b/icu4c/source/tools/gennorm2/n2builder.h
@ -0,0 +1,113 @@
+/*
+*******************************************************************************
+*
+*   Copyright (C) 2009-2010, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+*******************************************************************************
+*   file name:  n2builder.h
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2009nov25
+*   created by: Markus W. Scherer
+*/
+
+#ifndef __N2BUILDER_H__
+#define __N2BUILDER_H__
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_NORMALIZATION
+
+#include "unicode/errorcode.h"
+#include "unicode/unistr.h"
+#include "normalizer2impl.h"  // for IX_COUNT
+#include "toolutil.h"
+#include "utrie2.h"
+
+U_NAMESPACE_BEGIN
+
+extern UBool beVerbose, haveCopyright;
+
+struct Norm;
+
+class BuilderReorderingBuffer;
+class ExtraDataWriter;
+
+class Normalizer2DataBuilder {
+public:
+    Normalizer2DataBuilder(UErrorCode &errorCode);
+    ~Normalizer2DataBuilder();
+
+    enum OverrideHandling {
+        OVERRIDE_NONE,
+        OVERRIDE_ANY,
+        OVERRIDE_PREVIOUS
+    };
+
+    void setOverrideHandling(OverrideHandling oh);
+
+    void setCC(UChar32 c, uint8_t cc);
+    void setOneWayMapping(UChar32 c, const UnicodeString &m);
+    void setRoundTripMapping(UChar32 c, const UnicodeString &m);
+    void removeMapping(UChar32 c);
+
+    void setUnicodeVersion(const char *v);
+
+    void writeBinaryFile(const char *filename);
+
+private:
+    friend class CompositionBuilder;
+    friend class Decomposer;
+    friend class ExtraDataWriter;
+    friend class Norm16Writer;
+
+    // No copy constructor nor assignment operator.
+    Normalizer2DataBuilder(const Normalizer2DataBuilder &other);
+    Normalizer2DataBuilder &operator=(const Normalizer2DataBuilder &other);
+
+    Norm *allocNorm();
+    Norm *getNorm(UChar32 c);
+    Norm *createNorm(UChar32 c);
+    Norm *checkNormForMapping(Norm *p, UChar32 c);  // check for permitted overrides
+
+    const Norm &getNormRef(UChar32 c) const;
+    uint8_t getCC(UChar32 c) const;
+    UBool combinesWithCCBetween(const Norm &norm, uint8_t lowCC, uint8_t highCC) const;
+    UChar32 combine(const Norm &norm, UChar32 trail) const;
+
+    void addComposition(UChar32 start, UChar32 end, uint32_t value);
+    UBool decompose(UChar32 start, UChar32 end, uint32_t value);
+    void reorder(Norm *p, BuilderReorderingBuffer &buffer);
+    UBool hasNoCompBoundaryAfter(BuilderReorderingBuffer &buffer);
+    void setHangulData();
+    void writeMapping(UChar32 c, const Norm *p, UnicodeString &dataString);
+    void writeCompositions(UChar32 c, const Norm *p, UnicodeString &dataString);
+    void writeExtraData(UChar32 c, uint32_t value, ExtraDataWriter &writer);
+    int32_t getCenterNoNoDelta() {
+        return indexes[Normalizer2Impl::IX_MIN_MAYBE_YES]-Normalizer2Impl::MAX_DELTA-1;
+    }
+    void writeNorm16(UChar32 start, UChar32 end, uint32_t value);
+    void processData();
+
+    UTrie2 *normTrie;
+    UToolMemory *normMem;
+    Norm *norms;
+
+    int32_t phase;
+    OverrideHandling overrideHandling;
+
+    int32_t indexes[Normalizer2Impl::IX_COUNT];
+    UTrie2 *norm16Trie;
+    UnicodeString extraData;
+
+    UVersionInfo unicodeVersion;
+};
+
+U_NAMESPACE_END
+
+#endif // #if !UCONFIG_NO_NORMALIZATION
+
+#endif  // __N2BUILDER_H__
--- a/icu4c/source/tools/genpname/data.h
+++ b/icu4c/source/tools/genpname/data.h
--- a/icu4c/source/tools/genpname/genpname.vcproj
+++ b/icu4c/source/tools/genpname/genpname.vcproj
@ -389,29 +389,14 @@
 	<References>
 	</References>
 	<Files>
-		<Filter
-			Name="Source Files"
-			Filter="cpp;c;cxx;rc;def;r;odl;idl;hpj;bat"
+		<File
+			RelativePath=".\data.h"
 			>
-			<File
-				RelativePath=".\genpname.cpp"
-				>
-			</File>
-		</Filter>
-		<Filter
-			Name="Header Files"
-			Filter="h;hpp;hxx;hm;inl"
+		</File>
+		<File
+			RelativePath=".\genpname.cpp"
 			>
-			<File
-				RelativePath=".\data.h"
-				>
-			</File>
-		</Filter>
-		<Filter
-			Name="Resource Files"
-			Filter="ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe"
-			>
-		</Filter>
+		</File>
 	</Files>
 	<Globals>
 	</Globals>
--- a/icu4c/source/tools/toolutil/swapimpl.cpp
+++ b/icu4c/source/tools/toolutil/swapimpl.cpp
@ -1,7 +1,7 @@
 /*
 *******************************************************************************
 *
-*   Copyright (C) 2005-2009, International Business Machines
+*   Copyright (C) 2005-2010, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 *
 *******************************************************************************
@ -50,6 +50,7 @@
 #include "ucol_swp.h"
 #include "ucnv_bld.h"
 #include "unormimp.h"
+#include "normalizer2impl.h"
 #include "sprpimpl.h"
 #include "propname.h"
 #include "rbbidata.h"
@ -619,6 +620,7 @@ static const struct {

 #if !UCONFIG_NO_NORMALIZATION
    { { 0x4e, 0x6f, 0x72, 0x6d }, unorm_swap },         /* dataFormat="Norm" */
+    { { 0x4e, 0x72, 0x6d, 0x32 }, unorm2_swap },        /* dataFormat="Nrm2" */
 #endif
 #if !UCONFIG_NO_COLLATION
    { { 0x55, 0x43, 0x6f, 0x6c }, ucol_swap },          /* dataFormat="UCol" */
--- a/icu4c/source/tools/toolutil/toolutil.cpp
+++ b/icu4c/source/tools/toolutil/toolutil.cpp
@ -1,7 +1,7 @@
 /*
 *******************************************************************************
 *
-*   Copyright (C) 1999-2009, International Business Machines
+*   Copyright (C) 1999-2010, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 *
 *******************************************************************************
@ -21,11 +21,6 @@
 #include <stdio.h>
 #include <sys/stat.h>
 #include "unicode/utypes.h"
-#include "unicode/putil.h"
-#include "cmemory.h"
-#include "cstring.h"
-#include "toolutil.h"
-#include "unicode/ucal.h"

 #ifdef U_WINDOWS
 #   define VC_EXTRALEAN
@ -42,6 +37,27 @@
 #endif
 #include <errno.h>

+#include "unicode/errorcode.h"
+#include "unicode/putil.h"
+#include "cmemory.h"
+#include "cstring.h"
+#include "toolutil.h"
+#include "unicode/ucal.h"
+
+U_NAMESPACE_BEGIN
+
+IcuToolErrorCode::~IcuToolErrorCode() {
+    // Safe because our handleFailure() does not throw exceptions.
+    if(isFailure()) { handleFailure(); }
+}
+
+void IcuToolErrorCode::handleFailure() const {
+    fprintf(stderr, "error at %s: %s\n", location, errorName());
+    exit(errorCode);
+}
+
+U_NAMESPACE_END
+
 static int32_t currentYear = -1;

 U_CAPI int32_t U_EXPORT2 getCurrentYear() {
@ -235,6 +251,7 @@ utm_hasCapacity(UToolMemory *mem, int32_t capacity) {
            fprintf(stderr, "error: %s - out of memory\n", mem->name);
            exit(U_MEMORY_ALLOCATION_ERROR);
        }
+        mem->capacity=newCapacity;
    }

    return TRUE;
@ -242,9 +259,11 @@ utm_hasCapacity(UToolMemory *mem, int32_t capacity) {

 U_CAPI void * U_EXPORT2
 utm_alloc(UToolMemory *mem) {
-    char *p=(char *)mem->array+mem->idx*mem->size;
-    int32_t newIndex=mem->idx+1;
+    char *p=NULL;
+    int32_t oldIndex=mem->idx;
+    int32_t newIndex=oldIndex+1;
    if(utm_hasCapacity(mem, newIndex)) {
+        p=(char *)mem->array+oldIndex*mem->size;
        mem->idx=newIndex;
        uprv_memset(p, 0, mem->size);
    }
@ -253,9 +272,11 @@ utm_alloc(UToolMemory *mem) {

 U_CAPI void * U_EXPORT2
 utm_allocN(UToolMemory *mem, int32_t n) {
-    char *p=(char *)mem->array+mem->idx*mem->size;
-    int32_t newIndex=mem->idx+n;
+    char *p=NULL;
+    int32_t oldIndex=mem->idx;
+    int32_t newIndex=oldIndex+n;
    if(utm_hasCapacity(mem, newIndex)) {
+        p=(char *)mem->array+oldIndex*mem->size;
        mem->idx=newIndex;
        uprv_memset(p, 0, n*mem->size);
    }
--- a/icu4c/source/tools/toolutil/toolutil.h
+++ b/icu4c/source/tools/toolutil/toolutil.h
@ -1,7 +1,7 @@
 /*
 *******************************************************************************
 *
-*   Copyright (C) 1999-2009, International Business Machines
+*   Copyright (C) 1999-2010, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 *
 *******************************************************************************
@ -21,6 +21,33 @@

 #include "unicode/utypes.h"

+#ifdef XP_CPLUSPLUS
+
+#include "unicode/errorcode.h"
+
+U_NAMESPACE_BEGIN
+
+/**
+ * ErrorCode subclass for use in ICU command-line tools.
+ * The destructor calls handleFailure() which calls exit(errorCode) when isFailure().
+ */
+class U_TOOLUTIL_API IcuToolErrorCode : public ErrorCode {
+public:
+    /**
+     * @param loc A short string describing where the IcuToolErrorCode is used.
+     */
+    IcuToolErrorCode(const char *loc) : location(loc) {}
+    virtual ~IcuToolErrorCode();
+protected:
+    virtual void handleFailure() const;
+private:
+    const char *location;
+};
+
+U_NAMESPACE_END
+
+#endif
+
 /*
 * For Windows, a path/filename may be the short (8.3) version
 * of the "real", long one. In this case, the short one
--- a/icu4c/source/tools/toolutil/toolutil.vcproj
+++ b/icu4c/source/tools/toolutil/toolutil.vcproj
@ -407,261 +407,246 @@
 	<References>
 	</References>
 	<Files>
-		<Filter
-			Name="Source Files"
-			Filter="cpp;c;cxx;rc;def;r;odl;idl;hpj;bat"
+		<File
+			RelativePath=".\filestrm.c"
 			>
-			<File
-				RelativePath=".\filestrm.c"
-				>
-			</File>
-			<File
-				RelativePath=".\filetools.cpp"
-				>
-			</File>
-			<File
-				RelativePath=".\flagparser.c"
-				>
-			</File>
-			<File
-				RelativePath=".\package.cpp"
-				>
-			</File>
-			<File
-				RelativePath=".\pkg_genc.c"
-				>
-				<FileConfiguration
-					Name="Release|Win32"
-					>
-					<Tool
-						Name="VCCLCompilerTool"
-						DisableLanguageExtensions="false"
-					/>
-				</FileConfiguration>
-				<FileConfiguration
-					Name="Debug|Win32"
-					>
-					<Tool
-						Name="VCCLCompilerTool"
-						DisableLanguageExtensions="false"
-					/>
-				</FileConfiguration>
-				<FileConfiguration
-					Name="Release|x64"
-					>
-					<Tool
-						Name="VCCLCompilerTool"
-						DisableLanguageExtensions="false"
-					/>
-				</FileConfiguration>
-				<FileConfiguration
-					Name="Debug|x64"
-					>
-					<Tool
-						Name="VCCLCompilerTool"
-						DisableLanguageExtensions="false"
-					/>
-				</FileConfiguration>
-			</File>
-			<File
-				RelativePath=".\pkg_gencmn.c"
-				>
-				<FileConfiguration
-					Name="Release|Win32"
-					>
-					<Tool
-						Name="VCCLCompilerTool"
-						DisableLanguageExtensions="false"
-					/>
-				</FileConfiguration>
-				<FileConfiguration
-					Name="Debug|Win32"
-					>
-					<Tool
-						Name="VCCLCompilerTool"
-						DisableLanguageExtensions="false"
-					/>
-				</FileConfiguration>
-				<FileConfiguration
-					Name="Release|x64"
-					>
-					<Tool
-						Name="VCCLCompilerTool"
-						DisableLanguageExtensions="false"
-					/>
-				</FileConfiguration>
-				<FileConfiguration
-					Name="Debug|x64"
-					>
-					<Tool
-						Name="VCCLCompilerTool"
-						DisableLanguageExtensions="false"
-					/>
-				</FileConfiguration>
-			</File>
-			<File
-				RelativePath=".\pkg_icu.cpp"
-				>
-			</File>
-			<File
-				RelativePath=".\pkgitems.cpp"
-				>
-			</File>
-			<File
-				RelativePath=".\swapimpl.cpp"
-				>
-			</File>
-			<File
-				RelativePath=".\toolutil.c"
-				>
-				<FileConfiguration
-					Name="Release|Win32"
-					>
-					<Tool
-						Name="VCCLCompilerTool"
-						DisableLanguageExtensions="false"
-					/>
-				</FileConfiguration>
-				<FileConfiguration
-					Name="Debug|Win32"
-					>
-					<Tool
-						Name="VCCLCompilerTool"
-						DisableLanguageExtensions="false"
-					/>
-				</FileConfiguration>
-				<FileConfiguration
-					Name="Release|x64"
-					>
-					<Tool
-						Name="VCCLCompilerTool"
-						DisableLanguageExtensions="false"
-					/>
-				</FileConfiguration>
-				<FileConfiguration
-					Name="Debug|x64"
-					>
-					<Tool
-						Name="VCCLCompilerTool"
-						DisableLanguageExtensions="false"
-					/>
-				</FileConfiguration>
-			</File>
-			<File
-				RelativePath=".\ucbuf.c"
-				>
-			</File>
-			<File
-				RelativePath=".\ucm.c"
-				>
-			</File>
-			<File
-				RelativePath=".\ucmstate.c"
-				>
-			</File>
-			<File
-				RelativePath=".\unewdata.c"
-				>
-			</File>
-			<File
-				RelativePath=".\uoptions.c"
-				>
-			</File>
-			<File
-				RelativePath=".\uparse.c"
-				>
-			</File>
-			<File
-				RelativePath=".\writesrc.c"
-				>
-			</File>
-			<File
-				RelativePath=".\xmlparser.cpp"
-				>
-			</File>
-		</Filter>
-		<Filter
-			Name="Header Files"
-			Filter="h;hpp;hxx;hm;inl"
+		</File>
+		<File
+			RelativePath=".\filestrm.h"
 			>
-			<File
-				RelativePath=".\filestrm.h"
-				>
-			</File>
-			<File
-				RelativePath=".\filetools.h"
-				>
-			</File>
-			<File
-				RelativePath=".\flagparser.h"
-				>
-			</File>
-			<File
-				RelativePath=".\package.h"
-				>
-			</File>
-			<File
-				RelativePath=".\pkg_genc.h"
-				>
-			</File>
-			<File
-				RelativePath=".\pkg_gencmn.h"
-				>
-			</File>
-			<File
-				RelativePath=".\pkg_icu.h"
-				>
-			</File>
-			<File
-				RelativePath=".\pkg_imp.h"
-				>
-			</File>
-			<File
-				RelativePath=".\platform_xopen_source_extended.h"
-				>
-			</File>
-			<File
-				RelativePath=".\swapimpl.h"
-				>
-			</File>
-			<File
-				RelativePath=".\toolutil.h"
-				>
-			</File>
-			<File
-				RelativePath=".\ucbuf.h"
-				>
-			</File>
-			<File
-				RelativePath=".\ucm.h"
-				>
-			</File>
-			<File
-				RelativePath=".\unewdata.h"
-				>
-			</File>
-			<File
-				RelativePath=".\uoptions.h"
-				>
-			</File>
-			<File
-				RelativePath=".\uparse.h"
-				>
-			</File>
-			<File
-				RelativePath=".\writesrc.h"
-				>
-			</File>
-			<File
-				RelativePath=".\xmlparser.h"
-				>
-			</File>
-		</Filter>
-		<Filter
-			Name="Resource Files"
-			Filter="ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe"
+		</File>
+		<File
+			RelativePath=".\filetools.cpp"
 			>
-		</Filter>
+		</File>
+		<File
+			RelativePath=".\filetools.h"
+			>
+		</File>
+		<File
+			RelativePath=".\flagparser.c"
+			>
+		</File>
+		<File
+			RelativePath=".\flagparser.h"
+			>
+		</File>
+		<File
+			RelativePath=".\package.cpp"
+			>
+		</File>
+		<File
+			RelativePath=".\package.h"
+			>
+		</File>
+		<File
+			RelativePath=".\pkg_genc.c"
+			>
+			<FileConfiguration
+				Name="Release|Win32"
+				>
+				<Tool
+					Name="VCCLCompilerTool"
+					DisableLanguageExtensions="false"
+				/>
+			</FileConfiguration>
+			<FileConfiguration
+				Name="Debug|Win32"
+				>
+				<Tool
+					Name="VCCLCompilerTool"
+					DisableLanguageExtensions="false"
+				/>
+			</FileConfiguration>
+			<FileConfiguration
+				Name="Release|x64"
+				>
+				<Tool
+					Name="VCCLCompilerTool"
+					DisableLanguageExtensions="false"
+				/>
+			</FileConfiguration>
+			<FileConfiguration
+				Name="Debug|x64"
+				>
+				<Tool
+					Name="VCCLCompilerTool"
+					DisableLanguageExtensions="false"
+				/>
+			</FileConfiguration>
+		</File>
+		<File
+			RelativePath=".\pkg_genc.h"
+			>
+		</File>
+		<File
+			RelativePath=".\pkg_gencmn.c"
+			>
+			<FileConfiguration
+				Name="Release|Win32"
+				>
+				<Tool
+					Name="VCCLCompilerTool"
+					DisableLanguageExtensions="false"
+				/>
+			</FileConfiguration>
+			<FileConfiguration
+				Name="Debug|Win32"
+				>
+				<Tool
+					Name="VCCLCompilerTool"
+					DisableLanguageExtensions="false"
+				/>
+			</FileConfiguration>
+			<FileConfiguration
+				Name="Release|x64"
+				>
+				<Tool
+					Name="VCCLCompilerTool"
+					DisableLanguageExtensions="false"
+				/>
+			</FileConfiguration>
+			<FileConfiguration
+				Name="Debug|x64"
+				>
+				<Tool
+					Name="VCCLCompilerTool"
+					DisableLanguageExtensions="false"
+				/>
+			</FileConfiguration>
+		</File>
+		<File
+			RelativePath=".\pkg_gencmn.h"
+			>
+		</File>
+		<File
+			RelativePath=".\pkg_icu.cpp"
+			>
+		</File>
+		<File
+			RelativePath=".\pkg_icu.h"
+			>
+		</File>
+		<File
+			RelativePath=".\pkg_imp.h"
+			>
+		</File>
+		<File
+			RelativePath=".\pkgitems.cpp"
+			>
+		</File>
+		<File
+			RelativePath=".\platform_xopen_source_extended.h"
+			>
+		</File>
+		<File
+			RelativePath=".\swapimpl.cpp"
+			>
+		</File>
+		<File
+			RelativePath=".\swapimpl.h"
+			>
+		</File>
+		<File
+			RelativePath=".\toolutil.cpp"
+			>
+			<FileConfiguration
+				Name="Release|Win32"
+				>
+				<Tool
+					Name="VCCLCompilerTool"
+					DisableLanguageExtensions="false"
+				/>
+			</FileConfiguration>
+			<FileConfiguration
+				Name="Debug|Win32"
+				>
+				<Tool
+					Name="VCCLCompilerTool"
+					DisableLanguageExtensions="false"
+				/>
+			</FileConfiguration>
+			<FileConfiguration
+				Name="Release|x64"
+				>
+				<Tool
+					Name="VCCLCompilerTool"
+					DisableLanguageExtensions="false"
+				/>
+			</FileConfiguration>
+			<FileConfiguration
+				Name="Debug|x64"
+				>
+				<Tool
+					Name="VCCLCompilerTool"
+					DisableLanguageExtensions="false"
+				/>
+			</FileConfiguration>
+		</File>
+		<File
+			RelativePath=".\toolutil.h"
+			>
+		</File>
+		<File
+			RelativePath=".\ucbuf.c"
+			>
+		</File>
+		<File
+			RelativePath=".\ucbuf.h"
+			>
+		</File>
+		<File
+			RelativePath=".\ucm.c"
+			>
+		</File>
+		<File
+			RelativePath=".\ucm.h"
+			>
+		</File>
+		<File
+			RelativePath=".\ucmstate.c"
+			>
+		</File>
+		<File
+			RelativePath=".\unewdata.c"
+			>
+		</File>
+		<File
+			RelativePath=".\unewdata.h"
+			>
+		</File>
+		<File
+			RelativePath=".\uoptions.c"
+			>
+		</File>
+		<File
+			RelativePath=".\uoptions.h"
+			>
+		</File>
+		<File
+			RelativePath=".\uparse.c"
+			>
+		</File>
+		<File
+			RelativePath=".\uparse.h"
+			>
+		</File>
+		<File
+			RelativePath=".\writesrc.c"
+			>
+		</File>
+		<File
+			RelativePath=".\writesrc.h"
+			>
+		</File>
+		<File
+			RelativePath=".\xmlparser.cpp"
+			>
+		</File>
+		<File
+			RelativePath=".\xmlparser.h"
+			>
+		</File>
 	</Files>
 	<Globals>
 	</Globals>
--- a/icu4c/source/tools/toolutil/unewdata.c
+++ b/icu4c/source/tools/toolutil/unewdata.c
@ -1,7 +1,7 @@
 /*
 *******************************************************************************
 *
-*   Copyright (C) 1999,2008, International Business Machines
+*   Copyright (C) 1999-2010, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 *
 *******************************************************************************
@ -14,6 +14,7 @@
 *   created by: Markus W. Scherer
 */

+#include <stdio.h>
 #include "unicode/utypes.h"
 #include "unicode/putil.h"
 #include "unicode/ustring.h"
@ -162,6 +163,33 @@ udata_finish(UNewDataMemory *pData, UErrorCode *pErrorCode) {
    return fileLength;
 }

+/* dummy UDataInfo cf. udata.h */
+static const UDataInfo dummyDataInfo = {
+    sizeof(UDataInfo),
+    0,
+
+    U_IS_BIG_ENDIAN,
+    U_CHARSET_FAMILY,
+    U_SIZEOF_UCHAR,
+    0,
+
+    { 0, 0, 0, 0 },                 /* dummy dataFormat */
+    { 0, 0, 0, 0 },                 /* dummy formatVersion */
+    { 0, 0, 0, 0 }                  /* dummy dataVersion */
+};
+
+U_CAPI void U_EXPORT2
+udata_createDummy(const char *dir, const char *type, const char *name, UErrorCode *pErrorCode) {
+    if(U_SUCCESS(*pErrorCode)) {
+        udata_finish(udata_create(dir, type, name, &dummyDataInfo, NULL, pErrorCode), pErrorCode);
+        if(U_FAILURE(*pErrorCode)) {
+            fprintf(stderr, "error %s writing dummy data file %s" U_FILE_SEP_STRING "%s.%s\n",
+                    u_errorName(*pErrorCode), dir, name, type);
+            exit(*pErrorCode);
+        }
+    }
+}
+
 U_CAPI void U_EXPORT2
 udata_write8(UNewDataMemory *pData, uint8_t byte) {
    if(pData!=NULL && pData->file!=NULL) {
--- a/icu4c/source/tools/toolutil/unewdata.h
+++ b/icu4c/source/tools/toolutil/unewdata.h
@ -1,7 +1,7 @@
 /*
 *******************************************************************************
 *
-*   Copyright (C) 1999-2000, International Business Machines
+*   Copyright (C) 1999-2010, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 *
 *******************************************************************************
@ -66,6 +66,10 @@ udata_create(const char *dir, const char *type, const char *name,
 U_CAPI uint32_t U_EXPORT2
 udata_finish(UNewDataMemory *pData, UErrorCode *pErrorCode);

+/** @memo Write a dummy data file. */
+U_CAPI void U_EXPORT2
+udata_createDummy(const char *dir, const char *type, const char *name, UErrorCode *pErrorCode);
+
 /** @memo Write an 8-bit byte to the file. */
 U_CAPI void U_EXPORT2
 udata_write8(UNewDataMemory *pData, uint8_t byte);
--- a/icu4c/source/tools/toolutil/uparse.c
+++ b/icu4c/source/tools/toolutil/uparse.c
@ -1,7 +1,7 @@
 /*
 *******************************************************************************
 *
-*   Copyright (C) 2000-2009, International Business Machines
+*   Copyright (C) 2000-2010, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 *
 *******************************************************************************
@ -81,7 +81,7 @@ u_parseDelimitedFile(const char *filename, char delimiter,
    char *start, *limit;
    int32_t i, length;

-    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
+    if(U_FAILURE(*pErrorCode)) {
        return;
    }

@ -193,7 +193,7 @@ u_parseCodePoints(const char *s,
    uint32_t value;
    int32_t count;

-    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
+    if(U_FAILURE(*pErrorCode)) {
        return 0;
    }
    if(s==NULL || destCapacity<0 || (destCapacity>0 && dest==NULL)) {
@ -242,7 +242,7 @@ u_parseString(const char *s,
    uint32_t value;
    int32_t destLength;

-    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
+    if(U_FAILURE(*pErrorCode)) {
        return 0;
    }
    if(s==NULL || destCapacity<0 || (destCapacity>0 && dest==NULL)) {
@ -275,15 +275,16 @@ u_parseString(const char *s,
        }

        /* store the first code point */
-        if(destLength==0 && pFirst!=NULL) {
+        if(pFirst!=NULL) {
            *pFirst=value;
+            pFirst=NULL;
        }

        /* append it to the destination array */
-        if((destLength+UTF_CHAR_LENGTH(value))<=destCapacity) {
-            UTF_APPEND_CHAR_UNSAFE(dest, destLength, value);
+        if((destLength+U16_LENGTH(value))<=destCapacity) {
+            U16_APPEND_UNSAFE(dest, destLength, value);
        } else {
-            destLength+=UTF_CHAR_LENGTH(value);
+            destLength+=U16_LENGTH(value);
        }

        /* go to the following characters */
@ -293,13 +294,14 @@ u_parseString(const char *s,

 /* read a range like start or start..end */
 U_CAPI int32_t U_EXPORT2
-u_parseCodePointRange(const char *s,
-                      uint32_t *pStart, uint32_t *pEnd,
-                      UErrorCode *pErrorCode) {
+u_parseCodePointRangeAnyTerminator(const char *s,
+                                   uint32_t *pStart, uint32_t *pEnd,
+                                   const char **terminator,
+                                   UErrorCode *pErrorCode) {
    char *end;
    uint32_t value;

-    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
+    if(U_FAILURE(*pErrorCode)) {
        return 0;
    }
    if(s==NULL || pStart==NULL || pEnd==NULL) {
@ -307,15 +309,10 @@ u_parseCodePointRange(const char *s,
        return 0;
    }

-    s=u_skipWhitespace(s);
-    if(*s==';' || *s==0) {
-        *pErrorCode=U_PARSE_ERROR;
-        return 0;
-    }
-
    /* read the start code point */
+    s=u_skipWhitespace(s);
    value=(uint32_t)uprv_strtoul(s, &end, 16);
-    if(end<=s || (!IS_INV_WHITESPACE(*end) && *end!='.' && *end!=';' && *end!=0) || value>=0x110000) {
+    if(end<=s || value>=0x110000) {
        *pErrorCode=U_PARSE_ERROR;
        return 0;
    }
@ -323,19 +320,15 @@ u_parseCodePointRange(const char *s,

    /* is there a "..end"? */
    s=u_skipWhitespace(end);
-    if(*s==';' || *s==0) {
+    if(*s!='.' || s[1]!='.') {
+        *terminator=end;
        return 1;
    }
-
-    if(*s!='.' || s[1]!='.') {
-        *pErrorCode=U_PARSE_ERROR;
-        return 0;
-    }
-    s+=2;
+    s=u_skipWhitespace(s+2);

    /* read the end code point */
    value=(uint32_t)uprv_strtoul(s, &end, 16);
-    if(end<=s || (!IS_INV_WHITESPACE(*end) && *end!=';' && *end!=0) || value>=0x110000) {
+    if(end<=s || value>=0x110000) {
        *pErrorCode=U_PARSE_ERROR;
        return 0;
    }
@ -347,14 +340,25 @@ u_parseCodePointRange(const char *s,
        return 0;
    }

-    /* no garbage after that? */
-    s=u_skipWhitespace(end);
-    if(*s==';' || *s==0) {
-        return value-*pStart+1;
-    } else {
-        *pErrorCode=U_PARSE_ERROR;
-        return 0;
+    *terminator=end;
+    return value-*pStart+1;
+}
+
+U_CAPI int32_t U_EXPORT2
+u_parseCodePointRange(const char *s,
+                      uint32_t *pStart, uint32_t *pEnd,
+                      UErrorCode *pErrorCode) {
+    const char *terminator;
+    int32_t rangeLength=
+        u_parseCodePointRangeAnyTerminator(s, pStart, pEnd, &terminator, pErrorCode);
+    if(U_SUCCESS(*pErrorCode)) {
+        terminator=u_skipWhitespace(terminator);
+        if(*terminator!=';' && *terminator!=0) {
+            *pErrorCode=U_PARSE_ERROR;
+            return 0;
+        }
    }
+    return rangeLength;
 }

 U_CAPI int32_t U_EXPORT2
--- a/icu4c/source/tools/toolutil/uparse.h
+++ b/icu4c/source/tools/toolutil/uparse.h
@ -1,7 +1,7 @@
 /*
 *******************************************************************************
 *
-*   Copyright (C) 2000-2009, International Business Machines
+*   Copyright (C) 2000-2010, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 *
 *******************************************************************************
@ -126,6 +126,16 @@ u_parseCodePointRange(const char *s,
                      uint32_t *pStart, uint32_t *pEnd,
                      UErrorCode *pErrorCode);

+/**
+ * Same as u_parseCodePointRange() but the range may be terminated by
+ * any character. The position of the terminating character is returned via
+ * the *terminator output parameter.
+ */
+U_CAPI int32_t U_EXPORT2
+u_parseCodePointRangeAnyTerminator(const char *s,
+                                   uint32_t *pStart, uint32_t *pEnd,
+                                   const char **terminator,
+                                   UErrorCode *pErrorCode);

 U_CAPI int32_t U_EXPORT2
 u_parseUTF8(const char *source, int32_t sLen, char *dest, int32_t destCapacity, UErrorCode *status);
--- a/icu4c/source/tools/toolutil/writesrc.c
+++ b/icu4c/source/tools/toolutil/writesrc.c
@ -1,7 +1,7 @@
 /*
 *******************************************************************************
 *
-*   Copyright (C) 2005-2008, International Business Machines
+*   Copyright (C) 2005-2010, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 *
 *******************************************************************************
@ -24,8 +24,8 @@
 #include "cstring.h"
 #include "writesrc.h"

-U_CAPI FILE * U_EXPORT2
-usrc_create(const char *path, const char *filename) {
+static FILE *
+usrc_createWithHeader(const char *path, const char *filename, const char *header) {
    char buffer[1024];
    const char *p;
    char *q;
@ -55,19 +55,7 @@ usrc_create(const char *path, const char *filename) {
        lt=localtime(&t);
        strftime(year, sizeof(year), "%Y", lt);
        strftime(buffer, sizeof(buffer), "%Y-%m-%d", lt);
-        fprintf(
-            f,
-            "/*\n"
-            " * Copyright (C) 1999-%s, International Business Machines\n"
-            " * Corporation and others.  All Rights Reserved.\n"
-            " *\n"
-            " * file name: %s\n"
-            " *\n"
-            " * machine-generated on: %s\n"
-            " */\n\n",
-            year,
-            filename,
-            buffer);
+        fprintf(f, header, year, filename, buffer);
    } else {
        fprintf(
            stderr,
@ -77,6 +65,33 @@ usrc_create(const char *path, const char *filename) {
    return f;
 }

+U_CAPI FILE * U_EXPORT2
+usrc_create(const char *path, const char *filename) {
+    const char *header=
+        "/*\n"
+        " * Copyright (C) 1999-%s, International Business Machines\n"
+        " * Corporation and others.  All Rights Reserved.\n"
+        " *\n"
+        " * file name: %s\n"
+        " *\n"
+        " * machine-generated on: %s\n"
+        " */\n\n";
+    return usrc_createWithHeader(path, filename, header);
+}
+
+U_CAPI FILE * U_EXPORT2
+usrc_createTextData(const char *path, const char *filename) {
+    const char *header=
+        "# Copyright (C) 1999-%s, International Business Machines\n"
+        "# Corporation and others.  All Rights Reserved.\n"
+        "#\n"
+        "# file name: %s\n"
+        "#\n"
+        "# machine-generated on: %s\n"
+        "#\n\n";
+    return usrc_createWithHeader(path, filename, header);
+}
+
 U_CAPI void U_EXPORT2
 usrc_writeArray(FILE *f,
                const char *prefix,
--- a/icu4c/source/tools/toolutil/writesrc.h
+++ b/icu4c/source/tools/toolutil/writesrc.h
@ -1,7 +1,7 @@
 /*
 *******************************************************************************
 *
-*   Copyright (C) 2005-2008, International Business Machines
+*   Copyright (C) 2005-2010, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 *
 *******************************************************************************
@ -25,10 +25,18 @@

 /**
 * Create a source text file and write a header comment with the ICU copyright.
+ * Writes a C/Java-style comment.
 */
 U_CAPI FILE * U_EXPORT2
 usrc_create(const char *path, const char *filename);

+/**
+ * Create a source text file and write a header comment with the ICU copyright.
+ * Writes the comment with # lines, as used in scripts and text data.
+ */
+U_CAPI FILE * U_EXPORT2
+usrc_createTextData(const char *path, const char *filename);
+
 /**
 * Write the contents of an array of 8/16/32-bit words.
 * The prefix and postfix are optional (can be NULL) and are written first/last.