ICU-13177 Merging trunk to branch

X-SVN-Rev: 40460
2025-04-06 14:05:32 +00:00 · 2017-09-26 09:33:44 +00:00 · 2017-09-26 09:33:44 +00:00 · c09ca5d6b9
commit c09ca5d6b9
parent 0c9a9cf832 75495acb8f
407 changed files with 13072 additions and 12173 deletions
--- a/icu4c/LICENSE
+++ b/icu4c/LICENSE
@ -1,3 +1,6 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
 COPYRIGHT AND PERMISSION NOTICE (ICU 58 and later)

 Copyright © 1991-2017 Unicode, Inc. All rights reserved.
@ -131,7 +134,7 @@ property of their respective owners.
 #  ---------COPYING.libtabe ---- BEGIN--------------------
 #
 #  /*
- #   * Copyrighy (c) 1999 TaBE Project.
+ #   * Copyright (c) 1999 TaBE Project.
 #   * Copyright (c) 1999 Pai-Hsiang Hsiao.
 #   * All rights reserved.
 #   *
--- a/icu4c/readme.html
+++ b/icu4c/readme.html
@ -630,6 +630,14 @@
        (via -D or uconfig.h, as above)
        and include those header files explicitly that you actually need.<br />
        Note: The ICU test suites cannot be compiled with this setting.</li>
+      <li><b>utf_old.h:</b>
+        All of utf_old.h is deprecated or obsolete.<br />
+        Beginning with ICU 60,
+        you should define <code>U_HIDE_OBSOLETE_UTF_OLD_H</code> to 1
+        (via -D or uconfig.h, as above).
+        Use of any of these macros should be replaced as noted
+        in the comments for the obsolete macro.<br />
+        Note: The ICU test suites <i>can</i> be compiled with this setting.</li>
      <li><b>.dat file:</b> By default, the ICU data is built into
        a shared library (DLL). This is convenient because it requires no
        install-time or runtime configuration,
--- a/icu4c/source/Doxyfile.in
+++ b/icu4c/source/Doxyfile.in
@ -194,7 +194,7 @@ EXPAND_ONLY_PREDEF     = YES
 SEARCH_INCLUDES        = YES
 INCLUDE_PATH           = 
 INCLUDE_FILE_PATTERNS  = 
-PREDEFINED             = U_EXPORT2= U_STABLE= U_DRAFT= U_INTERNAL= U_SYSTEM= U_DEPRECATED= U_OBSOLETE= U_CALLCONV= U_CDECL_BEGIN= U_CDECL_END=  U_NO_THROW=\  "U_NAMESPACE_BEGIN=namespace icu{" "U_NAMESPACE_END=}" U_SHOW_CPLUSPLUS_API=1 U_DEFINE_LOCAL_OPEN_POINTER()= U_IN_DOXYGEN=1 U_OVERRIDE=override U_FINAL=final UCONFIG_ENABLE_PLUGINS=1 U_CHAR16_IS_TYPEDEF=0 U_CPLUSPLUS_VERSION=11 U_WCHAR_IS_UTF16
+PREDEFINED             = U_EXPORT2= U_STABLE= U_DRAFT= U_INTERNAL= U_SYSTEM= U_DEPRECATED= U_OBSOLETE= U_CALLCONV_FPTR= U_CALLCONV= U_CDECL_BEGIN= U_CDECL_END=  U_NO_THROW=\  "U_NAMESPACE_BEGIN=namespace icu{" "U_NAMESPACE_END=}" U_SHOW_CPLUSPLUS_API=1 U_DEFINE_LOCAL_OPEN_POINTER()= U_IN_DOXYGEN=1 U_OVERRIDE= U_FINAL=final UCONFIG_ENABLE_PLUGINS=1 U_CHAR16_IS_TYPEDEF=0 U_CPLUSPLUS_VERSION=11 U_WCHAR_IS_UTF16 U_NOEXCEPT=
 EXPAND_AS_DEFINED      = 
 SKIP_FUNCTION_MACROS   = YES
 #---------------------------------------------------------------------------
--- a/icu4c/source/common/Makefile.in
+++ b/icu4c/source/common/Makefile.in
@ -89,7 +89,7 @@ ucnv_ext.o ucnvmbcs.o ucnv2022.o ucnvhz.o ucnv_lmb.o ucnvisci.o ucnvdisp.o ucnv_
 resource.o uresbund.o ures_cnv.o uresdata.o resbund.o resbund_cnv.o \
 ucurr.o \
 messagepattern.o ucat.o locmap.o uloc.o locid.o locutil.o locavailable.o locdispnames.o locdspnm.o loclikely.o locresdata.o \
-bytestream.o stringpiece.o \
+bytestream.o stringpiece.o bytesinkutil.o \
 stringtriebuilder.o bytestriebuilder.o \
 bytestrie.o bytestrieiterator.o \
 ucharstrie.o ucharstriebuilder.o ucharstrieiterator.o \
@ -104,7 +104,7 @@ patternprops.o uchar.o uprops.o ucase.o propname.o ubidi_props.o ubidi.o ubidiwr
 uscript.o uscript_props.o usc_impl.o unames.o \
 utrie.o utrie2.o utrie2_builder.o bmpset.o unisetspan.o uset_props.o uniset_props.o uniset_closure.o uset.o uniset.o usetiter.o ruleiter.o caniter.o unifilt.o unifunct.o \
 uarrsort.o brkiter.o ubrk.o brkeng.o dictbe.o filteredbrk.o \
-rbbi.o rbbidata.o rbbinode.o rbbirb.o rbbiscan.o rbbisetb.o rbbistbl.o rbbitblb.o \
+rbbi.o rbbidata.o rbbinode.o rbbirb.o rbbiscan.o rbbisetb.o rbbistbl.o rbbitblb.o rbbi_cache.o \
 serv.o servnotf.o servls.o servlk.o servlkf.o servrbf.o servslkf.o \
 uidna.o usprep.o uts46.o punycode.o \
 util.o util_props.o parsepos.o locbased.o cwchar.o wintz.o dtintrv.o ucnvsel.o propsvec.o \
--- a/icu4c/source/common/bmpset.cpp
+++ b/icu4c/source/common/bmpset.cpp
@ -28,7 +28,7 @@ U_NAMESPACE_BEGIN

 BMPSet::BMPSet(const int32_t *parentList, int32_t parentListLength) :
        list(parentList), listLength(parentListLength) {
-    uprv_memset(asciiBytes, 0, sizeof(asciiBytes));
+    uprv_memset(latin1Contains, 0, sizeof(latin1Contains));
    uprv_memset(table7FF, 0, sizeof(table7FF));
    uprv_memset(bmpBlockBits, 0, sizeof(bmpBlockBits));

@ -45,14 +45,16 @@ BMPSet::BMPSet(const int32_t *parentList, int32_t parentListLength) :
        list4kStarts[i]=findCodePoint(i<<12, list4kStarts[i-1], listLength-1);
    }
    list4kStarts[0x11]=listLength-1;
+    containsFFFD=containsSlow(0xfffd, list4kStarts[0xf], list4kStarts[0x10]);

    initBits();
    overrideIllegal();
 }

 BMPSet::BMPSet(const BMPSet &otherBMPSet, const int32_t *newParentList, int32_t newParentListLength) :
+        containsFFFD(otherBMPSet.containsFFFD),
        list(newParentList), listLength(newParentListLength) {
-    uprv_memcpy(asciiBytes, otherBMPSet.asciiBytes, sizeof(asciiBytes));
+    uprv_memcpy(latin1Contains, otherBMPSet.latin1Contains, sizeof(latin1Contains));
    uprv_memcpy(table7FF, otherBMPSet.table7FF, sizeof(table7FF));
    uprv_memcpy(bmpBlockBits, otherBMPSet.bmpBlockBits, sizeof(bmpBlockBits));
    uprv_memcpy(list4kStarts, otherBMPSet.list4kStarts, sizeof(list4kStarts));
@ -120,7 +122,7 @@ void BMPSet::initBits() {
    UChar32 start, limit;
    int32_t listIndex=0;

-    // Set asciiBytes[].
+    // Set latin1Contains[].
    do {
        start=list[listIndex++];
        if(listIndex<listLength) {
@ -128,13 +130,30 @@ void BMPSet::initBits() {
        } else {
            limit=0x110000;
        }
-        if(start>=0x80) {
+        if(start>=0x100) {
            break;
        }
        do {
-            asciiBytes[start++]=1;
-        } while(start<limit && start<0x80);
-    } while(limit<=0x80);
+            latin1Contains[start++]=1;
+        } while(start<limit && start<0x100);
+    } while(limit<=0x100);
+
+    // Find the first range overlapping with (or after) 80..FF again,
+    // to include them in table7FF as well.
+    for(listIndex=0;;) {
+        start=list[listIndex++];
+        if(listIndex<listLength) {
+            limit=list[listIndex++];
+        } else {
+            limit=0x110000;
+        }
+        if(limit>0x80) {
+            if(start<0x80) {
+                start=0x80;
+            }
+            break;
+        }
+    }

    // Set table7FF[].
    while(start<0x800) {
@ -204,19 +223,14 @@ void BMPSet::initBits() {
 * for faster validity checking at runtime.
 * No need to set 0 values where they were reset to 0 in the constructor
 * and not modified by initBits().
- * (asciiBytes[] trail bytes, table7FF[] 0..7F, bmpBlockBits[] 0..7FF)
+ * (table7FF[] 0..7F, bmpBlockBits[] 0..7FF)
 * Need to set 0 values for surrogates D800..DFFF.
 */
 void BMPSet::overrideIllegal() {
    uint32_t bits, mask;
    int32_t i;

-    if(containsSlow(0xfffd, list4kStarts[0xf], list4kStarts[0x10])) {
-        // contains(FFFD)==TRUE
-        for(i=0x80; i<0xc0; ++i) {
-            asciiBytes[i]=1;
-        }
-
+    if(containsFFFD) {
        bits=3;                 // Lead bytes 0xC0 and 0xC1.
        for(i=0; i<64; ++i) {
            table7FF[i]|=bits;
@ -233,7 +247,6 @@ void BMPSet::overrideIllegal() {
            bmpBlockBits[i]=(bmpBlockBits[i]&mask)|bits;
        }
    } else {
-        // contains(FFFD)==FALSE
        mask=~(0x10001<<0xd);   // Lead byte 0xED.
        for(i=32; i<64; ++i) {  // Second half of 4k block.
            bmpBlockBits[i]&=mask;
@ -277,8 +290,8 @@ int32_t BMPSet::findCodePoint(UChar32 c, int32_t lo, int32_t hi) const {

 UBool
 BMPSet::contains(UChar32 c) const {
-    if((uint32_t)c<=0x7f) {
-        return (UBool)asciiBytes[c];
+    if((uint32_t)c<=0xff) {
+        return (UBool)latin1Contains[c];
    } else if((uint32_t)c<=0x7ff) {
        return (UBool)((table7FF[c&0x3f]&((uint32_t)1<<(c>>6)))!=0);
    } else if((uint32_t)c<0xd800 || (c>=0xe000 && c<=0xffff)) {
@ -314,8 +327,8 @@ BMPSet::span(const UChar *s, const UChar *limit, USetSpanCondition spanCondition
        // span
        do {
            c=*s;
-            if(c<=0x7f) {
-                if(!asciiBytes[c]) {
+            if(c<=0xff) {
+                if(!latin1Contains[c]) {
                    break;
                }
            } else if(c<=0x7ff) {
@ -354,8 +367,8 @@ BMPSet::span(const UChar *s, const UChar *limit, USetSpanCondition spanCondition
        // span not
        do {
            c=*s;
-            if(c<=0x7f) {
-                if(asciiBytes[c]) {
+            if(c<=0xff) {
+                if(latin1Contains[c]) {
                    break;
                }
            } else if(c<=0x7ff) {
@ -403,8 +416,8 @@ BMPSet::spanBack(const UChar *s, const UChar *limit, USetSpanCondition spanCondi
        // span
        for(;;) {
            c=*(--limit);
-            if(c<=0x7f) {
-                if(!asciiBytes[c]) {
+            if(c<=0xff) {
+                if(!latin1Contains[c]) {
                    break;
                }
            } else if(c<=0x7ff) {
@ -446,8 +459,8 @@ BMPSet::spanBack(const UChar *s, const UChar *limit, USetSpanCondition spanCondi
        // span not
        for(;;) {
            c=*(--limit);
-            if(c<=0x7f) {
-                if(asciiBytes[c]) {
+            if(c<=0xff) {
+                if(latin1Contains[c]) {
                    break;
                }
            } else if(c<=0x7ff) {
@ -497,22 +510,22 @@ const uint8_t *
 BMPSet::spanUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanCondition) const {
    const uint8_t *limit=s+length;
    uint8_t b=*s;
-    if((int8_t)b>=0) {
+    if(U8_IS_SINGLE(b)) {
        // Initial all-ASCII span.
        if(spanCondition) {
            do {
-                if(!asciiBytes[b] || ++s==limit) {
+                if(!latin1Contains[b] || ++s==limit) {
                    return s;
                }
                b=*s;
-            } while((int8_t)b>=0);
+            } while(U8_IS_SINGLE(b));
        } else {
            do {
-                if(asciiBytes[b] || ++s==limit) {
+                if(latin1Contains[b] || ++s==limit) {
                    return s;
                }
                b=*s;
-            } while((int8_t)b>=0);
+            } while(U8_IS_SINGLE(b));
        }
        length=(int32_t)(limit-s);
    }
@ -540,20 +553,20 @@ BMPSet::spanUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanConditi
            // single trail byte, check for preceding 3- or 4-byte lead byte
            if(length>=2 && (b=*(limit-2))>=0xe0) {
                limit-=2;
-                if(asciiBytes[0x80]!=spanCondition) {
+                if(containsFFFD!=spanCondition) {
                    limit0=limit;
                }
            } else if(b<0xc0 && b>=0x80 && length>=3 && (b=*(limit-3))>=0xf0) {
                // 4-byte lead byte with only two trail bytes
                limit-=3;
-                if(asciiBytes[0x80]!=spanCondition) {
+                if(containsFFFD!=spanCondition) {
                    limit0=limit;
                }
            }
        } else {
            // lead byte with no trail bytes
            --limit;
-            if(asciiBytes[0x80]!=spanCondition) {
+            if(containsFFFD!=spanCondition) {
                limit0=limit;
            }
        }
@ -563,26 +576,26 @@ BMPSet::spanUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanConditi

    while(s<limit) {
        b=*s;
-        if(b<0xc0) {
-            // ASCII; or trail bytes with the result of contains(FFFD).
+        if(U8_IS_SINGLE(b)) {
+            // ASCII
            if(spanCondition) {
                do {
-                    if(!asciiBytes[b]) {
+                    if(!latin1Contains[b]) {
                        return s;
                    } else if(++s==limit) {
                        return limit0;
                    }
                    b=*s;
-                } while(b<0xc0);
+                } while(U8_IS_SINGLE(b));
            } else {
                do {
-                    if(asciiBytes[b]) {
+                    if(latin1Contains[b]) {
                        return s;
                    } else if(++s==limit) {
                        return limit0;
                    }
                    b=*s;
-                } while(b<0xc0);
+                } while(U8_IS_SINGLE(b));
            }
        }
        ++s;  // Advance past the lead byte.
@ -619,7 +632,7 @@ BMPSet::spanUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanConditi
                UChar32 c=((UChar32)(b-0xf0)<<18)|((UChar32)t1<<12)|(t2<<6)|t3;
                if( (   (0x10000<=c && c<=0x10ffff) ?
                            containsSlow(c, list4kStarts[0x10], list4kStarts[0x11]) :
-                            asciiBytes[0x80]
+                            containsFFFD
                    ) != spanCondition
                ) {
                    return s-1;
@ -627,8 +640,9 @@ BMPSet::spanUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanConditi
                s+=3;
                continue;
            }
-        } else /* 0xc0<=b<0xe0 */ {
+        } else {
            if( /* handle U+0000..U+07FF inline */
+                b>=0xc0 &&
                (t1=(uint8_t)(*s-0x80)) <= 0x3f
            ) {
                if((USetSpanCondition)((table7FF[t1]&((uint32_t)1<<(b&0x1f)))!=0) != spanCondition) {
@ -642,7 +656,7 @@ BMPSet::spanUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanConditi
        // Give an illegal sequence the same value as the result of contains(FFFD).
        // Handle each byte of an illegal sequence separately to simplify the code;
        // no need to optimize error handling.
-        if(asciiBytes[0x80]!=spanCondition) {
+        if(containsFFFD!=spanCondition) {
            return s-1;
        }
    }
@ -667,26 +681,26 @@ BMPSet::spanBackUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanCon

    do {
        b=s[--length];
-        if((int8_t)b>=0) {
+        if(U8_IS_SINGLE(b)) {
            // ASCII sub-span
            if(spanCondition) {
                do {
-                    if(!asciiBytes[b]) {
+                    if(!latin1Contains[b]) {
                        return length+1;
                    } else if(length==0) {
                        return 0;
                    }
                    b=s[--length];
-                } while((int8_t)b>=0);
+                } while(U8_IS_SINGLE(b));
            } else {
                do {
-                    if(asciiBytes[b]) {
+                    if(latin1Contains[b]) {
                        return length+1;
                    } else if(length==0) {
                        return 0;
                    }
                    b=s[--length];
-                } while((int8_t)b>=0);
+                } while(U8_IS_SINGLE(b));
            }
        }

--- a/icu4c/source/common/bmpset.h
+++ b/icu4c/source/common/bmpset.h
@ -28,11 +28,12 @@ U_NAMESPACE_BEGIN
 * Helper class for frozen UnicodeSets, implements contains() and span()
 * optimized for BMP code points. Structured to be UTF-8-friendly.
 *
- * ASCII: Look up bytes.
+ * Latin-1: Look up bytes.
 * 2-byte characters: Bits organized vertically.
 * 3-byte characters: Use zero/one/mixed data per 64-block in U+0000..U+FFFF,
 *                    with mixed for illegal ranges.
- * Supplementary characters: Call contains() on the parent set.
+ * Supplementary characters: Binary search over
+ * the supplementary part of the parent set's inversion list.
 */
 class BMPSet : public UMemory {
 public:
@ -96,12 +97,12 @@ private:
    inline UBool containsSlow(UChar32 c, int32_t lo, int32_t hi) const;

    /*
-     * One byte per ASCII character, or trail byte in lead position.
-     * 0 or 1 for ASCII characters.
-     * The value for trail bytes is the result of contains(FFFD)
-     * for faster validity checking at runtime.
+     * One byte 0 or 1 per Latin-1 character.
     */
-    UBool asciiBytes[0xc0];
+    UBool latin1Contains[0x100];
+
+    /* TRUE if contains(U+FFFD). */
+    UBool containsFFFD;

    /*
     * One bit per code point from U+0000..U+07FF.
--- a/icu4c/source/common/brkeng.cpp
+++ b/icu4c/source/common/brkeng.cpp
@ -11,9 +11,6 @@

 #if !UCONFIG_NO_BREAK_ITERATION

-#include "brkeng.h"
-#include "cmemory.h"
-#include "dictbe.h"
 #include "unicode/uchar.h"
 #include "unicode/uniset.h"
 #include "unicode/chariter.h"
@ -24,6 +21,10 @@
 #include "unicode/uscript.h"
 #include "unicode/ucharstrie.h"
 #include "unicode/bytestrie.h"
+
+#include "brkeng.h"
+#include "cmemory.h"
+#include "dictbe.h"
 #include "charstr.h"
 #include "dictionarydata.h"
 #include "mutex.h"
@ -80,23 +81,15 @@ UnhandledEngine::handles(UChar32 c, int32_t breakType) const {

 int32_t
 UnhandledEngine::findBreaks( UText *text,
-                                 int32_t startPos,
-                                 int32_t endPos,
-                                 UBool reverse,
-                                 int32_t breakType,
-                                 UStack &/*foundBreaks*/ ) const {
+                             int32_t /* startPos */,
+                             int32_t endPos,
+                             int32_t breakType,
+                             UVector32 &/*foundBreaks*/ ) const {
    if (breakType >= 0 && breakType < UPRV_LENGTHOF(fHandled)) {
        UChar32 c = utext_current32(text); 
-        if (reverse) {
-            while((int32_t)utext_getNativeIndex(text) > startPos && fHandled[breakType]->contains(c)) {
-                c = utext_previous32(text);
-            }
-        }
-        else {
-            while((int32_t)utext_getNativeIndex(text) < endPos && fHandled[breakType]->contains(c)) {
-                utext_next32(text);            // TODO:  recast loop to work with post-increment operations.
-                c = utext_current32(text);
-            }
+        while((int32_t)utext_getNativeIndex(text) < endPos && fHandled[breakType]->contains(c)) {
+            utext_next32(text);            // TODO:  recast loop to work with post-increment operations.
+            c = utext_current32(text);
        }
    }
    return 0;
--- a/icu4c/source/common/brkeng.h
+++ b/icu4c/source/common/brkeng.h
@ -19,6 +19,7 @@ U_NAMESPACE_BEGIN

 class UnicodeSet;
 class UStack;
+class UVector32;
 class DictionaryMatcher;

 /*******************************************************************
@ -67,18 +68,15 @@ class LanguageBreakEngine : public UMemory {
  * is capable of handling.
  * @param startPos The start of the run within the supplied text.
  * @param endPos The end of the run within the supplied text.
-  * @param reverse Whether the caller is looking for breaks in a reverse
-  * direction.
  * @param breakType The type of break desired, or -1.
-  * @param foundBreaks An allocated C array of the breaks found, if any
+  * @param foundBreaks A Vector of int32_t to receive the breaks.
  * @return The number of breaks found.
  */
  virtual int32_t findBreaks( UText *text,
                              int32_t startPos,
                              int32_t endPos,
-                              UBool reverse,
                              int32_t breakType,
-                              UStack &foundBreaks ) const = 0;
+                              UVector32 &foundBreaks ) const = 0;

 };

@ -192,8 +190,6 @@ class UnhandledEngine : public LanguageBreakEngine {
  * is capable of handling.
  * @param startPos The start of the run within the supplied text.
  * @param endPos The end of the run within the supplied text.
-  * @param reverse Whether the caller is looking for breaks in a reverse
-  * direction.
  * @param breakType The type of break desired, or -1.
  * @param foundBreaks An allocated C array of the breaks found, if any
  * @return The number of breaks found.
@ -201,9 +197,8 @@ class UnhandledEngine : public LanguageBreakEngine {
  virtual int32_t findBreaks( UText *text,
                              int32_t startPos,
                              int32_t endPos,
-                              UBool reverse,
                              int32_t breakType,
-                              UStack &foundBreaks ) const;
+                              UVector32 &foundBreaks ) const;

 /**
  * <p>Tell the engine to handle a particular character and break type.</p>
--- a/icu4c/source/common/bytesinkutil.cpp
+++ b/icu4c/source/common/bytesinkutil.cpp
@ -0,0 +1,123 @@
+// © 2017 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+// bytesinkutil.cpp
+// created: 2017sep14 Markus W. Scherer
+
+#include "unicode/utypes.h"
+#include "unicode/bytestream.h"
+#include "unicode/edits.h"
+#include "unicode/stringoptions.h"
+#include "unicode/utf8.h"
+#include "unicode/utf16.h"
+#include "bytesinkutil.h"
+#include "cmemory.h"
+#include "uassert.h"
+
+U_NAMESPACE_BEGIN
+
+UBool
+ByteSinkUtil::appendChange(int32_t length, const char16_t *s16, int32_t s16Length,
+                           ByteSink &sink, Edits *edits, UErrorCode &errorCode) {
+    if (U_FAILURE(errorCode)) { return FALSE; }
+    char scratch[200];
+    int32_t s8Length = 0;
+    for (int32_t i = 0; i < s16Length;) {
+        int32_t capacity;
+        int32_t desiredCapacity = s16Length - i;
+        if (desiredCapacity < (INT32_MAX / 3)) {
+            desiredCapacity *= 3;  // max 3 UTF-8 bytes per UTF-16 code unit
+        } else if (desiredCapacity < (INT32_MAX / 2)) {
+            desiredCapacity *= 2;
+        } else {
+            desiredCapacity = INT32_MAX;
+        }
+        char *buffer = sink.GetAppendBuffer(U8_MAX_LENGTH, desiredCapacity,
+                                            scratch, UPRV_LENGTHOF(scratch), &capacity);
+        capacity -= U8_MAX_LENGTH - 1;
+        int32_t j = 0;
+        for (; i < s16Length && j < capacity;) {
+            UChar32 c;
+            U16_NEXT_UNSAFE(s16, i, c);
+            U8_APPEND_UNSAFE(buffer, j, c);
+        }
+        if (j > (INT32_MAX - s8Length)) {
+            errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
+            return FALSE;
+        }
+        sink.Append(buffer, j);
+        s8Length += j;
+    }
+    if (edits != nullptr) {
+        edits->addReplace(length, s8Length);
+    }
+    return TRUE;
+}
+
+UBool
+ByteSinkUtil::appendChange(const uint8_t *s, const uint8_t *limit,
+                           const char16_t *s16, int32_t s16Length,
+                           ByteSink &sink, Edits *edits, UErrorCode &errorCode) {
+    if (U_FAILURE(errorCode)) { return FALSE; }
+    if ((limit - s) > INT32_MAX) {
+        errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
+        return FALSE;
+    }
+    return appendChange((int32_t)(limit - s), s16, s16Length, sink, edits, errorCode);
+}
+
+void
+ByteSinkUtil::appendCodePoint(int32_t length, UChar32 c, ByteSink &sink, Edits *edits) {
+    char s8[U8_MAX_LENGTH];
+    int32_t s8Length = 0;
+    U8_APPEND_UNSAFE(s8, s8Length, c);
+    if (edits != nullptr) {
+        edits->addReplace(length, s8Length);
+    }
+    sink.Append(s8, s8Length);
+}
+
+namespace {
+
+// See unicode/utf8.h U8_APPEND_UNSAFE().
+inline uint8_t getTwoByteLead(UChar32 c) { return (uint8_t)((c >> 6) | 0xc0); }
+inline uint8_t getTwoByteTrail(UChar32 c) { return (uint8_t)((c & 0x3f) | 0x80); }
+
+}  // namespace
+
+void
+ByteSinkUtil::appendTwoBytes(UChar32 c, ByteSink &sink) {
+    U_ASSERT(0x80 <= c && c <= 0x7ff);  // 2-byte UTF-8
+    char s8[2] = { (char)getTwoByteLead(c), (char)getTwoByteTrail(c) };
+    sink.Append(s8, 2);
+}
+
+UBool
+ByteSinkUtil::appendUnchanged(const uint8_t *s, int32_t length,
+                              ByteSink &sink, uint32_t options, Edits *edits,
+                              UErrorCode &errorCode) {
+    if (U_FAILURE(errorCode)) { return FALSE; }
+    if (length > 0) {
+        if (edits != nullptr) {
+            edits->addUnchanged(length);
+        }
+        if ((options & U_OMIT_UNCHANGED_TEXT) == 0) {
+            sink.Append(reinterpret_cast<const char *>(s), length);
+        }
+    }
+    return TRUE;
+}
+
+UBool
+ByteSinkUtil::appendUnchanged(const uint8_t *s, const uint8_t *limit,
+                              ByteSink &sink, uint32_t options, Edits *edits,
+                              UErrorCode &errorCode) {
+    if (U_FAILURE(errorCode)) { return FALSE; }
+    if ((limit - s) > INT32_MAX) {
+        errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
+        return FALSE;
+    }
+    return appendUnchanged(s, (int32_t)(limit - s), sink, options, edits, errorCode);
+}
+
+U_NAMESPACE_END
--- a/icu4c/source/common/bytesinkutil.h
+++ b/icu4c/source/common/bytesinkutil.h
@ -0,0 +1,53 @@
+// © 2017 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+// bytesinkutil.h
+// created: 2017sep14 Markus W. Scherer
+
+#include "unicode/utypes.h"
+#include "unicode/bytestream.h"
+#include "unicode/edits.h"
+#include "cmemory.h"
+#include "uassert.h"
+
+U_NAMESPACE_BEGIN
+
+class ByteSink;
+class Edits;
+
+class U_COMMON_API ByteSinkUtil {
+public:
+    ByteSinkUtil() = delete;  // all static
+
+    /** (length) bytes were mapped to valid (s16, s16Length). */
+    static UBool appendChange(int32_t length,
+                              const char16_t *s16, int32_t s16Length,
+                              ByteSink &sink, Edits *edits, UErrorCode &errorCode);
+
+    /** The bytes at [s, limit[ were mapped to valid (s16, s16Length). */
+    static UBool appendChange(const uint8_t *s, const uint8_t *limit,
+                              const char16_t *s16, int32_t s16Length,
+                              ByteSink &sink, Edits *edits, UErrorCode &errorCode);
+
+    /** (length) bytes were mapped/changed to valid code point c. */
+    static void appendCodePoint(int32_t length, UChar32 c, ByteSink &sink, Edits *edits = nullptr);
+
+    /** The few bytes at [src, nextSrc[ were mapped/changed to valid code point c. */
+    static inline void appendCodePoint(const uint8_t *src, const uint8_t *nextSrc, UChar32 c,
+                                       ByteSink &sink, Edits *edits = nullptr) {
+        appendCodePoint((int32_t)(nextSrc - src), c, sink, edits);
+    }
+
+    /** Append the two-byte character (U+0080..U+07FF). */
+    static void appendTwoBytes(UChar32 c, ByteSink &sink);
+
+    static UBool appendUnchanged(const uint8_t *s, int32_t length,
+                                 ByteSink &sink, uint32_t options, Edits *edits,
+                                 UErrorCode &errorCode);
+
+    static UBool appendUnchanged(const uint8_t *s, const uint8_t *limit,
+                                 ByteSink &sink, uint32_t options, Edits *edits,
+                                 UErrorCode &errorCode);
+};
+
+U_NAMESPACE_END
--- a/icu4c/source/common/bytestream.cpp
+++ b/icu4c/source/common/bytestream.cpp
@ -45,6 +45,12 @@ void CheckedArrayByteSink::Append(const char* bytes, int32_t n) {
  if (n <= 0) {
    return;
  }
+  if (n > (INT32_MAX - appended_)) {
+    // TODO: Report as integer overflow, not merely buffer overflow.
+    appended_ = INT32_MAX;
+    overflowed_ = TRUE;
+    return;
+  }
  appended_ += n;
  int32_t available = capacity_ - size_;
  if (n > available) {
--- a/icu4c/source/common/common.vcxproj
+++ b/icu4c/source/common/common.vcxproj
@ -268,6 +268,8 @@
    </ClCompile>
    <ClCompile Include="rbbitblb.cpp">
    </ClCompile>
+    <ClCompile Include="rbbi_cache.cpp">
+    </ClCompile>
    <ClCompile Include="dictionarydata.cpp" />
    <ClCompile Include="ubrk.cpp" />
    <ClCompile Include="ucol_swp.cpp">
@ -445,6 +447,7 @@
    </ClCompile>
    <ClCompile Include="usprep.cpp" />
    <ClCompile Include="appendable.cpp" />
+    <ClCompile Include="bytesinkutil.cpp" />
    <ClCompile Include="bytestream.cpp" />
    <ClCompile Include="bytestrie.cpp" />
    <ClCompile Include="bytestriebuilder.cpp" />
@ -572,6 +575,7 @@
    <ClInclude Include="rbbiscan.h" />
    <ClInclude Include="rbbisetb.h" />
    <ClInclude Include="rbbitblb.h" />
+    <ClInclude Include="rbbi_cache.h" />
    <ClInclude Include="dictionarydata.h" />
    <CustomBuild Include="unicode\ubrk.h">
      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">copy "%(FullPath)" ..\..\include\unicode
@ -1478,6 +1482,7 @@
 </Command>
      <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
    </CustomBuild>
+    <ClInclude Include="bytesinkutil.h" />
    <CustomBuild Include="unicode\bytestream.h">
      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">copy "%(FullPath)" ..\..\include\unicode
 </Command>
--- a/icu4c/source/common/common.vcxproj.filters
+++ b/icu4c/source/common/common.vcxproj.filters
@ -97,6 +97,9 @@
    <ClCompile Include="rbbitblb.cpp">
      <Filter>break iteration</Filter>
    </ClCompile>
+    <ClCompile Include="rbbi_cache.cpp">
+      <Filter>break iteration</Filter>
+    </ClCompile>
    <ClCompile Include="ubrk.cpp">
      <Filter>break iteration</Filter>
    </ClCompile>
@ -460,6 +463,9 @@
    <ClCompile Include="usprep.cpp">
      <Filter>sprep</Filter>
    </ClCompile>
+    <ClCompile Include="bytesinkutil.cpp">
+      <Filter>strings</Filter>
+    </ClCompile>
    <ClCompile Include="bytestream.cpp">
      <Filter>strings</Filter>
    </ClCompile>
@ -636,6 +642,9 @@
    <ClInclude Include="rbbitblb.h">
      <Filter>break iteration</Filter>
    </ClInclude>
+    <ClInclude Include="rbbi_cache.h">
+      <Filter>break iteration</Filter>
+    </ClInclude>
    <ClInclude Include="ubrkimpl.h">
      <Filter>break iteration</Filter>
    </ClInclude>
@ -861,6 +870,9 @@
    <ClInclude Include="sprpimpl.h">
      <Filter>sprep</Filter>
    </ClInclude>
+    <ClInclude Include="bytesinkutil.h">
+      <Filter>strings</Filter>
+    </ClInclude>
    <ClInclude Include="charstr.h">
      <Filter>strings</Filter>
    </ClInclude>
--- a/icu4c/source/common/common_uwp.vcxproj
+++ b/icu4c/source/common/common_uwp.vcxproj
@ -299,6 +299,8 @@
    </ClCompile>
    <ClCompile Include="rbbitblb.cpp">
    </ClCompile>
+    <ClCompile Include="rbbi_cache.cpp">
+    </ClCompile>
    <ClCompile Include="dictionarydata.cpp" />
    <ClCompile Include="ubrk.cpp" />
    <ClCompile Include="ucol_swp.cpp">
@ -452,6 +454,7 @@
    </ClCompile>
    <ClCompile Include="usprep.cpp" />
    <ClCompile Include="appendable.cpp" />
+    <ClCompile Include="bytesinkutil.cpp" />
    <ClCompile Include="bytestream.cpp" />
    <ClCompile Include="bytestrie.cpp" />
    <ClCompile Include="bytestriebuilder.cpp" />
@ -529,6 +532,7 @@
    <ClInclude Include="rbbiscan.h" />
    <ClInclude Include="rbbisetb.h" />
    <ClInclude Include="rbbitblb.h" />
+    <ClInclude Include="rbbi_cache.h" />
    <ClInclude Include="dictionarydata.h" />
    <CustomBuild Include="unicode\ubrk.h">
      <Command>copy "%(FullPath)" ..\..\include\unicode</Command>
@ -894,6 +898,7 @@
 </Command>
      <Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
    </CustomBuild>
+    <ClInclude Include="bytesinkutil.h" />
    <CustomBuild Include="unicode\bytestream.h">
      <Command>copy "%(FullPath)" ..\..\include\unicode
 </Command>
--- a/icu4c/source/common/dictbe.cpp
+++ b/icu4c/source/common/dictbe.cpp
@ -46,9 +46,9 @@ int32_t
 DictionaryBreakEngine::findBreaks( UText *text,
                                 int32_t startPos,
                                 int32_t endPos,
-                                 UBool reverse,
                                 int32_t breakType,
-                                 UStack &foundBreaks ) const {
+                                 UVector32 &foundBreaks ) const {
+    (void)startPos;            // TODO: remove this param?
    int32_t result = 0;

    // Find the span of characters included in the set.
@ -60,34 +60,12 @@ DictionaryBreakEngine::findBreaks( UText *text,
    int32_t rangeStart;
    int32_t rangeEnd;
    UChar32 c = utext_current32(text);
-    if (reverse) {
-        UBool   isDict = fSet.contains(c);
-        while((current = (int32_t)utext_getNativeIndex(text)) > startPos && isDict) {
-            c = utext_previous32(text);
-            isDict = fSet.contains(c);
-        }
-        if (current < startPos) {
-            rangeStart = startPos;
-        } else {
-            rangeStart = current;
-            if (!isDict) {
-                utext_next32(text);
-                rangeStart = (int32_t)utext_getNativeIndex(text);
-            }
-        }
-        // rangeEnd = start + 1;
-        utext_setNativeIndex(text, start);
-        utext_next32(text);
-        rangeEnd = (int32_t)utext_getNativeIndex(text);
-    }
-    else {
-        while((current = (int32_t)utext_getNativeIndex(text)) < endPos && fSet.contains(c)) {
-            utext_next32(text);         // TODO:  recast loop for postincrement
-            c = utext_current32(text);
-        }
-        rangeStart = start;
-        rangeEnd = current;
+    while((current = (int32_t)utext_getNativeIndex(text)) < endPos && fSet.contains(c)) {
+        utext_next32(text);         // TODO:  recast loop for postincrement
+        c = utext_current32(text);
    }
+    rangeStart = start;
+    rangeEnd = current;
    if (breakType >= 0 && breakType < 32 && (((uint32_t)1 << breakType) & fTypes)) {
        result = divideUpDictionaryRange(text, rangeStart, rangeEnd, foundBreaks);
        utext_setNativeIndex(text, current);
@ -248,7 +226,7 @@ int32_t
 ThaiBreakEngine::divideUpDictionaryRange( UText *text,
                                                int32_t rangeStart,
                                                int32_t rangeEnd,
-                                                UStack &foundBreaks ) const {
+                                                UVector32 &foundBreaks ) const {
    utext_setNativeIndex(text, rangeStart);
    utext_moveIndex32(text, THAI_MIN_WORD_SPAN);
    if (utext_getNativeIndex(text) >= rangeEnd) {
@ -487,7 +465,7 @@ int32_t
 LaoBreakEngine::divideUpDictionaryRange( UText *text,
                                                int32_t rangeStart,
                                                int32_t rangeEnd,
-                                                UStack &foundBreaks ) const {
+                                                UVector32 &foundBreaks ) const {
    if ((rangeEnd - rangeStart) < LAO_MIN_WORD_SPAN) {
        return 0;       // Not enough characters for two words
    }
@ -680,7 +658,7 @@ int32_t
 BurmeseBreakEngine::divideUpDictionaryRange( UText *text,
                                                int32_t rangeStart,
                                                int32_t rangeEnd,
-                                                UStack &foundBreaks ) const {
+                                                UVector32 &foundBreaks ) const {
    if ((rangeEnd - rangeStart) < BURMESE_MIN_WORD_SPAN) {
        return 0;       // Not enough characters for two words
    }
@ -885,7 +863,7 @@ int32_t
 KhmerBreakEngine::divideUpDictionaryRange( UText *text,
                                                int32_t rangeStart,
                                                int32_t rangeEnd,
-                                                UStack &foundBreaks ) const {
+                                                UVector32 &foundBreaks ) const {
    if ((rangeEnd - rangeStart) < KHMER_MIN_WORD_SPAN) {
        return 0;       // Not enough characters for two words
    }
@ -1110,9 +1088,9 @@ static inline uint32_t getKatakanaCost(int32_t wordLength){
    return (wordLength > kMaxKatakanaLength) ? 8192 : katakanaCost[wordLength];
 }

-static inline bool isKatakana(uint16_t value) {
-    return (value >= 0x30A1u && value <= 0x30FEu && value != 0x30FBu) ||
-            (value >= 0xFF66u && value <= 0xFF9fu);
+static inline bool isKatakana(UChar32 value) {
+    return (value >= 0x30A1 && value <= 0x30FE && value != 0x30FB) ||
+            (value >= 0xFF66 && value <= 0xFF9f);
 }


@ -1128,14 +1106,14 @@ static inline int32_t utext_i32_flag(int32_t bitIndex) {
 * @param text A UText representing the text
 * @param rangeStart The start of the range of dictionary characters
 * @param rangeEnd The end of the range of dictionary characters
- * @param foundBreaks Output of C array of int32_t break positions, or 0
+ * @param foundBreaks vector<int32> to receive the break positions
 * @return The number of breaks found
 */
 int32_t 
 CjkBreakEngine::divideUpDictionaryRange( UText *inText,
        int32_t rangeStart,
        int32_t rangeEnd,
-        UStack &foundBreaks ) const {
+        UVector32 &foundBreaks ) const {
    if (rangeStart >= rangeEnd) {
        return 0;
    }
--- a/icu4c/source/common/dictbe.h
+++ b/icu4c/source/common/dictbe.h
@ -15,6 +15,7 @@
 #include "unicode/utext.h"

 #include "brkeng.h"
+#include "uvectr32.h"

 U_NAMESPACE_BEGIN

@ -84,21 +85,18 @@ class DictionaryBreakEngine : public LanguageBreakEngine {
   *
   * @param text A UText representing the text. The iterator is left at
   * the end of the run of characters which the engine is capable of handling 
-   * that starts from the first (or last) character in the range.
+   * that starts from the first character in the range.
   * @param startPos The start of the run within the supplied text.
   * @param endPos The end of the run within the supplied text.
-   * @param reverse Whether the caller is looking for breaks in a reverse
-   * direction.
   * @param breakType The type of break desired, or -1.
-   * @param foundBreaks An allocated C array of the breaks found, if any
+   * @param foundBreaks vector of int32_t to receive the break positions
   * @return The number of breaks found.
   */
  virtual int32_t findBreaks( UText *text,
                              int32_t startPos,
                              int32_t endPos,
-                              UBool reverse,
                              int32_t breakType,
-                              UStack &foundBreaks ) const;
+                              UVector32 &foundBreaks ) const;

 protected:

@ -128,7 +126,7 @@ class DictionaryBreakEngine : public LanguageBreakEngine {
  virtual int32_t divideUpDictionaryRange( UText *text,
                                           int32_t rangeStart,
                                           int32_t rangeEnd,
-                                           UStack &foundBreaks ) const = 0;
+                                           UVector32 &foundBreaks ) const = 0;

 };

@ -185,7 +183,7 @@ class ThaiBreakEngine : public DictionaryBreakEngine {
  virtual int32_t divideUpDictionaryRange( UText *text,
                                           int32_t rangeStart,
                                           int32_t rangeEnd,
-                                           UStack &foundBreaks ) const;
+                                           UVector32 &foundBreaks ) const;

 };

@ -241,7 +239,7 @@ class LaoBreakEngine : public DictionaryBreakEngine {
  virtual int32_t divideUpDictionaryRange( UText *text,
                                           int32_t rangeStart,
                                           int32_t rangeEnd,
-                                           UStack &foundBreaks ) const;
+                                           UVector32 &foundBreaks ) const;

 };

@ -297,7 +295,7 @@ class BurmeseBreakEngine : public DictionaryBreakEngine {
  virtual int32_t divideUpDictionaryRange( UText *text, 
                                           int32_t rangeStart, 
                                           int32_t rangeEnd, 
-                                           UStack &foundBreaks ) const; 
+                                           UVector32 &foundBreaks ) const; 
 
 }; 
 
@ -353,7 +351,7 @@ class KhmerBreakEngine : public DictionaryBreakEngine {
  virtual int32_t divideUpDictionaryRange( UText *text, 
                                           int32_t rangeStart, 
                                           int32_t rangeEnd, 
-                                           UStack &foundBreaks ) const; 
+                                           UVector32 &foundBreaks ) const; 
 
 }; 
 
@ -417,7 +415,7 @@ class CjkBreakEngine : public DictionaryBreakEngine {
  virtual int32_t divideUpDictionaryRange( UText *text,
          int32_t rangeStart,
          int32_t rangeEnd,
-          UStack &foundBreaks ) const;
+          UVector32 &foundBreaks ) const;

 };

--- a/icu4c/source/common/edits.cpp
+++ b/icu4c/source/common/edits.cpp
@ -17,10 +17,10 @@ namespace {
 const int32_t MAX_UNCHANGED_LENGTH = 0x1000;
 const int32_t MAX_UNCHANGED = MAX_UNCHANGED_LENGTH - 1;

-// 0wwwcccccccccccc with w=1..6 records ccc+1 replacements of w:w text units.
-// No length change.
-const int32_t MAX_SHORT_WIDTH = 6;
-const int32_t MAX_SHORT_CHANGE_LENGTH = 0xfff;
+// 0mmmnnnccccccccc with m=1..6 records ccc+1 replacements of m:n text units.
+const int32_t MAX_SHORT_CHANGE_OLD_LENGTH = 6;
+const int32_t MAX_SHORT_CHANGE_NEW_LENGTH = 7;
+const int32_t SHORT_CHANGE_NUM_MASK = 0x1ff;
 const int32_t MAX_SHORT_CHANGE = 0x6fff;

 // 0111mmmmmmnnnnnn records a replacement of m text units with n.
@ -138,20 +138,6 @@ void Edits::addUnchanged(int32_t unchangedLength) {

 void Edits::addReplace(int32_t oldLength, int32_t newLength) {
    if(U_FAILURE(errorCode_)) { return; }
-    if(oldLength == newLength && 0 < oldLength && oldLength <= MAX_SHORT_WIDTH) {
-        // Replacement of short oldLength text units by same-length new text.
-        // Merge into previous short-replacement record, if any.
-        ++numChanges;
-        int32_t last = lastUnit();
-        if(MAX_UNCHANGED < last && last < MAX_SHORT_CHANGE &&
-                (last >> 12) == oldLength && (last & 0xfff) < MAX_SHORT_CHANGE_LENGTH) {
-            setLastUnit(last + 1);
-            return;
-        }
-        append(oldLength << 12);
-        return;
-    }
-
    if(oldLength < 0 || newLength < 0) {
        errorCode_ = U_ILLEGAL_ARGUMENT_ERROR;
        return;
@ -171,6 +157,21 @@ void Edits::addReplace(int32_t oldLength, int32_t newLength) {
        delta += newDelta;
    }

+    if(0 < oldLength && oldLength <= MAX_SHORT_CHANGE_OLD_LENGTH &&
+            newLength <= MAX_SHORT_CHANGE_NEW_LENGTH) {
+        // Merge into previous same-lengths short-replacement record, if any.
+        int32_t u = (oldLength << 12) | (newLength << 9);
+        int32_t last = lastUnit();
+        if(MAX_UNCHANGED < last && last < MAX_SHORT_CHANGE &&
+                (last & ~SHORT_CHANGE_NUM_MASK) == u &&
+                (last & SHORT_CHANGE_NUM_MASK) < SHORT_CHANGE_NUM_MASK) {
+            setLastUnit(last + 1);
+            return;
+        }
+        append(u);
+        return;
+    }
+
    int32_t head = 0x7000;
    if (oldLength < LENGTH_IN_1TRAIL && newLength < LENGTH_IN_1TRAIL) {
        head |= oldLength << 6;
@ -396,7 +397,7 @@ Edits &Edits::mergeAndAppend(const Edits &ab, const Edits &bc, UErrorCode &error
 Edits::Iterator::Iterator(const uint16_t *a, int32_t len, UBool oc, UBool crs) :
        array(a), index(0), length(len), remaining(0),
        onlyChanges_(oc), coarse(crs),
-        changed(FALSE), oldLength_(0), newLength_(0),
+        dir(0), changed(FALSE), oldLength_(0), newLength_(0),
        srcIndex(0), replIndex(0), destIndex(0) {}

 int32_t Edits::Iterator::readLength(int32_t head) {
@ -418,7 +419,7 @@ int32_t Edits::Iterator::readLength(int32_t head) {
    }
 }

-void Edits::Iterator::updateIndexes() {
+void Edits::Iterator::updateNextIndexes() {
    srcIndex += oldLength_;
    if (changed) {
        replIndex += newLength_;
@ -426,22 +427,52 @@ void Edits::Iterator::updateIndexes() {
    destIndex += newLength_;
 }

+void Edits::Iterator::updatePreviousIndexes() {
+    srcIndex -= oldLength_;
+    if (changed) {
+        replIndex -= newLength_;
+    }
+    destIndex -= newLength_;
+}
+
 UBool Edits::Iterator::noNext() {
-    // No change beyond the string.
+    // No change before or beyond the string.
+    dir = 0;
    changed = FALSE;
    oldLength_ = newLength_ = 0;
    return FALSE;
 }

 UBool Edits::Iterator::next(UBool onlyChanges, UErrorCode &errorCode) {
+    // Forward iteration: Update the string indexes to the limit of the current span,
+    // and post-increment-read array units to assemble a new span.
+    // Leaves the array index one after the last unit of that span.
    if (U_FAILURE(errorCode)) { return FALSE; }
    // We have an errorCode in case we need to start guarding against integer overflows.
    // It is also convenient for caller loops if we bail out when an error was set elsewhere.
-    updateIndexes();
-    if (remaining > 0) {
-        // Fine-grained iterator: Continue a sequence of equal-length changes.
-        --remaining;
-        return TRUE;
+    if (dir > 0) {
+        updateNextIndexes();
+    } else {
+        if (dir < 0) {
+            // Turn around from previous() to next().
+            // Post-increment-read the same span again.
+            if (remaining > 0) {
+                // Fine-grained iterator:
+                // Stay on the current one of a sequence of compressed changes.
+                ++index;  // next() rests on the index after the sequence unit.
+                dir = 1;
+                return TRUE;
+            }
+        }
+        dir = 1;
+    }
+    if (remaining >= 1) {
+        // Fine-grained iterator: Continue a sequence of compressed changes.
+        if (remaining > 1) {
+            --remaining;
+            return TRUE;
+        }
+        remaining = 0;
    }
    if (index >= length) {
        return noNext();
@ -457,7 +488,7 @@ UBool Edits::Iterator::next(UBool onlyChanges, UErrorCode &errorCode) {
        }
        newLength_ = oldLength_;
        if (onlyChanges) {
-            updateIndexes();
+            updateNextIndexes();
            if (index >= length) {
                return noNext();
            }
@ -469,14 +500,19 @@ UBool Edits::Iterator::next(UBool onlyChanges, UErrorCode &errorCode) {
    }
    changed = TRUE;
    if (u <= MAX_SHORT_CHANGE) {
+        int32_t oldLen = u >> 12;
+        int32_t newLen = (u >> 9) & MAX_SHORT_CHANGE_NEW_LENGTH;
+        int32_t num = (u & SHORT_CHANGE_NUM_MASK) + 1;
        if (coarse) {
-            int32_t w = u >> 12;
-            int32_t len = (u & 0xfff) + 1;
-            oldLength_ = newLength_ = len * w;
+            oldLength_ = num * oldLen;
+            newLength_ = num * newLen;
        } else {
-            // Split a sequence of equal-length changes that was compressed into one unit.
-            oldLength_ = newLength_ = u >> 12;
-            remaining = u & 0xfff;
+            // Split a sequence of changes that was compressed into one unit.
+            oldLength_ = oldLen;
+            newLength_ = newLen;
+            if (num > 1) {
+                remaining = num;  // This is the first of two or more changes.
+            }
            return TRUE;
        }
    } else {
@ -491,22 +527,127 @@ UBool Edits::Iterator::next(UBool onlyChanges, UErrorCode &errorCode) {
    while (index < length && (u = array[index]) > MAX_UNCHANGED) {
        ++index;
        if (u <= MAX_SHORT_CHANGE) {
-            int32_t w = u >> 12;
-            int32_t len = (u & 0xfff) + 1;
-            len = len * w;
-            oldLength_ += len;
-            newLength_ += len;
+            int32_t num = (u & SHORT_CHANGE_NUM_MASK) + 1;
+            oldLength_ += (u >> 12) * num;
+            newLength_ += ((u >> 9) & MAX_SHORT_CHANGE_NEW_LENGTH) * num;
        } else {
            U_ASSERT(u <= 0x7fff);
-            int32_t oldLen = readLength((u >> 6) & 0x3f);
-            int32_t newLen = readLength(u & 0x3f);
-            oldLength_ += oldLen;
-            newLength_ += newLen;
+            oldLength_ += readLength((u >> 6) & 0x3f);
+            newLength_ += readLength(u & 0x3f);
        }
    }
    return TRUE;
 }

+UBool Edits::Iterator::previous(UErrorCode &errorCode) {
+    // Backward iteration: Pre-decrement-read array units to assemble a new span,
+    // then update the string indexes to the start of that span.
+    // Leaves the array index on the head unit of that span.
+    if (U_FAILURE(errorCode)) { return FALSE; }
+    // We have an errorCode in case we need to start guarding against integer overflows.
+    // It is also convenient for caller loops if we bail out when an error was set elsewhere.
+    if (dir >= 0) {
+        if (dir > 0) {
+            // Turn around from next() to previous().
+            // Set the string indexes to the span limit and
+            // pre-decrement-read the same span again.
+            if (remaining > 0) {
+                // Fine-grained iterator:
+                // Stay on the current one of a sequence of compressed changes.
+                --index;  // previous() rests on the sequence unit.
+                dir = -1;
+                return TRUE;
+            }
+            updateNextIndexes();
+        }
+        dir = -1;
+    }
+    if (remaining > 0) {
+        // Fine-grained iterator: Continue a sequence of compressed changes.
+        int32_t u = array[index];
+        U_ASSERT(MAX_UNCHANGED < u && u <= MAX_SHORT_CHANGE);
+        if (remaining <= (u & SHORT_CHANGE_NUM_MASK)) {
+            ++remaining;
+            updatePreviousIndexes();
+            return TRUE;
+        }
+        remaining = 0;
+    }
+    if (index <= 0) {
+        return noNext();
+    }
+    int32_t u = array[--index];
+    if (u <= MAX_UNCHANGED) {
+        // Combine adjacent unchanged ranges.
+        changed = FALSE;
+        oldLength_ = u + 1;
+        while (index > 0 && (u = array[index - 1]) <= MAX_UNCHANGED) {
+            --index;
+            oldLength_ += u + 1;
+        }
+        newLength_ = oldLength_;
+        // No need to handle onlyChanges as long as previous() is called only from findIndex().
+        updatePreviousIndexes();
+        return TRUE;
+    }
+    changed = TRUE;
+    if (u <= MAX_SHORT_CHANGE) {
+        int32_t oldLen = u >> 12;
+        int32_t newLen = (u >> 9) & MAX_SHORT_CHANGE_NEW_LENGTH;
+        int32_t num = (u & SHORT_CHANGE_NUM_MASK) + 1;
+        if (coarse) {
+            oldLength_ = num * oldLen;
+            newLength_ = num * newLen;
+        } else {
+            // Split a sequence of changes that was compressed into one unit.
+            oldLength_ = oldLen;
+            newLength_ = newLen;
+            if (num > 1) {
+                remaining = 1;  // This is the last of two or more changes.
+            }
+            updatePreviousIndexes();
+            return TRUE;
+        }
+    } else {
+        if (u <= 0x7fff) {
+            // The change is encoded in u alone.
+            oldLength_ = readLength((u >> 6) & 0x3f);
+            newLength_ = readLength(u & 0x3f);
+        } else {
+            // Back up to the head of the change, read the lengths,
+            // and reset the index to the head again.
+            U_ASSERT(index > 0);
+            while ((u = array[--index]) > 0x7fff) {}
+            U_ASSERT(u > MAX_SHORT_CHANGE);
+            int32_t headIndex = index++;
+            oldLength_ = readLength((u >> 6) & 0x3f);
+            newLength_ = readLength(u & 0x3f);
+            index = headIndex;
+        }
+        if (!coarse) {
+            updatePreviousIndexes();
+            return TRUE;
+        }
+    }
+    // Combine adjacent changes.
+    while (index > 0 && (u = array[index - 1]) > MAX_UNCHANGED) {
+        --index;
+        if (u <= MAX_SHORT_CHANGE) {
+            int32_t num = (u & SHORT_CHANGE_NUM_MASK) + 1;
+            oldLength_ += (u >> 12) * num;
+            newLength_ += ((u >> 9) & MAX_SHORT_CHANGE_NEW_LENGTH) * num;
+        } else if (u <= 0x7fff) {
+            // Read the lengths, and reset the index to the head again.
+            int32_t headIndex = index++;
+            oldLength_ += readLength((u >> 6) & 0x3f);
+            newLength_ += readLength(u & 0x3f);
+            index = headIndex;
+        }
+    }
+    updatePreviousIndexes();
+    return TRUE;
+}
+
 int32_t Edits::Iterator::findIndex(int32_t i, UBool findSource, UErrorCode &errorCode) {
    if (U_FAILURE(errorCode) || i < 0) { return -1; }
    int32_t spanStart, spanLength;
@ -518,7 +659,44 @@ int32_t Edits::Iterator::findIndex(int32_t i, UBool findSource, UErrorCode &erro
        spanLength = newLength_;
    }
    if (i < spanStart) {
+        if (i >= (spanStart / 2)) {
+            // Search backwards.
+            for (;;) {
+                UBool hasPrevious = previous(errorCode);
+                U_ASSERT(hasPrevious);  // because i>=0 and the first span starts at 0
+                (void)hasPrevious;  // avoid unused-variable warning
+                spanStart = findSource ? srcIndex : destIndex;
+                if (i >= spanStart) {
+                    // The index is in the current span.
+                    return 0;
+                }
+                if (remaining > 0) {
+                    // Is the index in one of the remaining compressed edits?
+                    // spanStart is the start of the current span, first of the remaining ones.
+                    spanLength = findSource ? oldLength_ : newLength_;
+                    int32_t u = array[index];
+                    U_ASSERT(MAX_UNCHANGED < u && u <= MAX_SHORT_CHANGE);
+                    int32_t num = (u & SHORT_CHANGE_NUM_MASK) + 1 - remaining;
+                    int32_t len = num * spanLength;
+                    if (i >= (spanStart - len)) {
+                        int32_t n = ((spanStart - i - 1) / spanLength) + 1;
+                        // 1 <= n <= num
+                        srcIndex -= n * oldLength_;
+                        replIndex -= n * newLength_;
+                        destIndex -= n * newLength_;
+                        remaining += n;
+                        return 0;
+                    }
+                    // Skip all of these edits at once.
+                    srcIndex -= num * oldLength_;
+                    replIndex -= num * newLength_;
+                    destIndex -= num * newLength_;
+                    remaining = 0;
+                }
+            }
+        }
        // Reset the iterator to the start.
+        dir = 0;
        index = remaining = oldLength_ = newLength_ = srcIndex = replIndex = destIndex = 0;
    } else if (i < (spanStart + spanLength)) {
        // The index is in the current span.
@ -536,21 +714,21 @@ int32_t Edits::Iterator::findIndex(int32_t i, UBool findSource, UErrorCode &erro
            // The index is in the current span.
            return 0;
        }
-        if (remaining > 0) {
+        if (remaining > 1) {
            // Is the index in one of the remaining compressed edits?
-            // spanStart is the start of the current span, before the remaining ones.
-            int32_t len = (remaining + 1) * spanLength;
+            // spanStart is the start of the current span, first of the remaining ones.
+            int32_t len = remaining * spanLength;
            if (i < (spanStart + len)) {
-                int32_t n = (i - spanStart) / spanLength;  // 1 <= n <= remaining
-                len = n * spanLength;
-                srcIndex += len;
-                replIndex += len;
-                destIndex += len;
+                int32_t n = (i - spanStart) / spanLength;  // 1 <= n <= remaining - 1
+                srcIndex += n * oldLength_;
+                replIndex += n * newLength_;
+                destIndex += n * newLength_;
                remaining -= n;
                return 0;
            }
            // Make next() skip all of these edits at once.
-            oldLength_ = newLength_ = len;
+            oldLength_ *= remaining;
+            newLength_ *= remaining;
            remaining = 0;
        }
    }
--- a/icu4c/source/common/filteredbrk.cpp
+++ b/icu4c/source/common/filteredbrk.cpp
@ -694,7 +694,7 @@ FilteredBreakIteratorBuilder::createInstance(const Locale& where, UErrorCode& st
 }

 FilteredBreakIteratorBuilder *
-FilteredBreakIteratorBuilder::createInstance(UErrorCode& status) {
+FilteredBreakIteratorBuilder::createEmptyInstance(UErrorCode& status) {
  if(U_FAILURE(status)) return NULL;
  LocalPointer<FilteredBreakIteratorBuilder> ret(new SimpleFilteredBreakIteratorBuilder(status), status);
  return (U_SUCCESS(status))? ret.orphan(): NULL;
--- a/icu4c/source/common/filterednormalizer2.cpp
+++ b/icu4c/source/common/filterednormalizer2.cpp
@ -22,11 +22,11 @@

 #include "unicode/edits.h"
 #include "unicode/normalizer2.h"
+#include "unicode/stringoptions.h"
 #include "unicode/uniset.h"
 #include "unicode/unistr.h"
 #include "unicode/unorm.h"
 #include "cpputils.h"
-#include "ustr_imp.h"  // U_EDITS_NO_RESET

 U_NAMESPACE_BEGIN

--- a/icu4c/source/common/hash.h
+++ b/icu4c/source/common/hash.h
@ -33,6 +33,8 @@ class U_COMMON_API Hashtable : public UMemory {

    inline void init(UHashFunction *keyHash, UKeyComparator *keyComp, UValueComparator *valueComp, UErrorCode& status);

+    inline void initSize(UHashFunction *keyHash, UKeyComparator *keyComp, UValueComparator *valueComp, int32_t size, UErrorCode& status);
+
 public:
    /**
     * Construct a hashtable
@ -41,6 +43,14 @@ public:
    */
    Hashtable(UBool ignoreKeyCase, UErrorCode& status);

+    /**
+     * Construct a hashtable
+     * @param ignoreKeyCase If true, keys are case insensitive.
+     * @param size initial size allocation
+     * @param status Error code
+    */
+    Hashtable(UBool ignoreKeyCase, int32_t size, UErrorCode& status);
+
    /**
     * Construct a hashtable
     * @param keyComp Comparator for comparing the keys
@ -76,9 +86,9 @@ public:
    int32_t puti(const UnicodeString& key, int32_t value, UErrorCode& status);

    void* get(const UnicodeString& key) const;
-    
+
    int32_t geti(const UnicodeString& key) const;
-    
+
    void* remove(const UnicodeString& key);

    int32_t removei(const UnicodeString& key);
@ -92,9 +102,9 @@ public:
     * @see uhash_nextElement
     */
    const UHashElement* nextElement(int32_t& pos) const;
-    
+
    UKeyComparator* setKeyComparator(UKeyComparator*keyComp);
-    
+
    UValueComparator* setValueComparator(UValueComparator* valueComp);

    UBool equals(const Hashtable& that) const;
@ -107,7 +117,7 @@ private:
 * Implementation
 ********************************************************************/

-inline void Hashtable::init(UHashFunction *keyHash, UKeyComparator *keyComp, 
+inline void Hashtable::init(UHashFunction *keyHash, UKeyComparator *keyComp,
                            UValueComparator *valueComp, UErrorCode& status) {
    if (U_FAILURE(status)) {
        return;
@ -119,10 +129,23 @@ inline void Hashtable::init(UHashFunction *keyHash, UKeyComparator *keyComp,
    }
 }

-inline Hashtable::Hashtable(UKeyComparator *keyComp, UValueComparator *valueComp, 
+inline void Hashtable::initSize(UHashFunction *keyHash, UKeyComparator *keyComp,
+                                UValueComparator *valueComp, int32_t size, UErrorCode& status) {
+    if (U_FAILURE(status)) {
+        return;
+    }
+    uhash_initSize(&hashObj, keyHash, keyComp, valueComp, size, &status);
+    if (U_SUCCESS(status)) {
+        hash = &hashObj;
+        uhash_setKeyDeleter(hash, uprv_deleteUObject);
+    }
+}
+
+inline Hashtable::Hashtable(UKeyComparator *keyComp, UValueComparator *valueComp,
                 UErrorCode& status) : hash(0) {
    init( uhash_hashUnicodeString, keyComp, valueComp, status);
 }
+
 inline Hashtable::Hashtable(UBool ignoreKeyCase, UErrorCode& status)
 : hash(0)
 {
@ -134,6 +157,17 @@ inline Hashtable::Hashtable(UBool ignoreKeyCase, UErrorCode& status)
            status);
 }

+inline Hashtable::Hashtable(UBool ignoreKeyCase, int32_t size, UErrorCode& status)
+ : hash(0)
+{
+    initSize(ignoreKeyCase ? uhash_hashCaselessUnicodeString
+                        : uhash_hashUnicodeString,
+            ignoreKeyCase ? uhash_compareCaselessUnicodeString
+                        : uhash_compareUnicodeString,
+            NULL, size,
+            status);
+}
+
 inline Hashtable::Hashtable(UErrorCode& status)
 : hash(0)
 {
@ -200,7 +234,7 @@ inline void Hashtable::removeAll(void) {
 inline UKeyComparator* Hashtable::setKeyComparator(UKeyComparator*keyComp){
    return uhash_setKeyComparator(hash, keyComp);
 }
-    
+
 inline UValueComparator* Hashtable::setValueComparator(UValueComparator* valueComp){
    return uhash_setValueComparator(hash, valueComp);
 }
--- a/icu4c/source/common/locdispnames.cpp
+++ b/icu4c/source/common/locdispnames.cpp
@ -542,7 +542,7 @@ uloc_getDisplayName(const char *locale,
            return 0;
        }
        separator = (const UChar *)p0 + subLen;
-        sepLen = p1 - separator;
+        sepLen = static_cast<int32_t>(p1 - separator);
    }

    if(patLen==0 || (patLen==defaultPatLen && !u_strncmp(pattern, defaultPattern, patLen))) {
@ -558,8 +558,8 @@ uloc_getDisplayName(const char *locale,
            *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
            return 0;
        }
-        sub0Pos=p0-pattern;
-        sub1Pos=p1-pattern;
+        sub0Pos = static_cast<int32_t>(p0-pattern);
+        sub1Pos = static_cast<int32_t>(p1-pattern);
        if (sub1Pos < sub0Pos) { /* a very odd pattern */
            int32_t t=sub0Pos; sub0Pos=sub1Pos; sub1Pos=t;
            langi=1;
--- a/icu4c/source/common/locdspnm.cpp
+++ b/icu4c/source/common/locdspnm.cpp
@ -54,7 +54,7 @@ static int32_t ncat(char *buffer, uint32_t buflen, ...) {
  *p = 0;
  va_end(args);

-  return p - buffer;
+  return static_cast<int32_t>(p - buffer);
 }

 U_NAMESPACE_BEGIN
--- a/icu4c/source/common/norm2_nfc_data.h
+++ b/icu4c/source/common/norm2_nfc_data.h
@ -300,21 +300,21 @@ static const uint16_t norm2_nfc_data_trieIndex[9776]={
 1,1,1,1,0x864,0x198d,1,1,1,1,1,1,0x868,0x1993,1,0x86c,
 0x1999,1,1,1,1,1,1,1,0xfc0e,1,1,1,1,1,1,1,
 1,1,1,1,1,1,1,1,1,0xfe12,1,1,1,0xffcc,0xffb8,0xffcc,
-0xffcc,1,1,1,0x29dd,0x29e3,0x29e9,0x29ef,0x29f5,0x29fb,0x2a01,0x2a07,1,1,1,1,
+0xffcc,1,1,1,0x29dc,0x29e2,0x29e8,0x29ee,0x29f4,0x29fa,0x2a00,0x2a06,1,1,1,1,
 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
 1,1,1,1,1,1,1,1,0xfe0e,1,0xfc00,1,1,1,1,1,
 1,1,1,0x870,1,1,1,0x199f,0x19a5,0xfe12,1,1,1,1,1,1,
-1,1,1,0xfc00,1,1,1,1,0x2a0d,0x2a13,1,0x2a19,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0x2a1f,
-1,1,0x2a25,1,1,1,1,1,0xfe0e,1,1,1,1,1,1,1,
+1,1,1,0xfc00,1,1,1,1,0x2a0c,0x2a12,1,0x2a18,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0x2a1e,
+1,1,0x2a24,1,1,1,1,1,0xfe0e,1,1,1,1,1,1,1,
 1,1,1,1,1,1,1,1,1,0xfe12,1,1,1,1,1,1,
-1,1,1,1,1,0x2a2b,0x2a31,0x2a37,1,1,0x2a3d,1,1,1,1,1,
+1,1,1,1,1,0x2a2a,0x2a30,0x2a36,1,1,0x2a3c,1,1,1,1,1,
 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
 1,1,1,1,1,1,1,1,0xfe0e,1,1,1,1,1,1,1,
 1,1,1,1,1,1,1,1,1,0xfe12,1,1,1,1,1,1,
 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0x878,
 0x19ab,1,1,0x19b1,0x19b7,0xfe12,1,1,1,1,1,1,1,1,0xfc00,0xfc00,
-1,1,1,1,0x2a43,0x2a49,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,0x2a42,0x2a48,1,1,1,1,1,1,1,1,1,1,
 1,1,1,1,1,1,1,1,1,1,0x884,1,0x19bd,1,1,1,
 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
 1,1,1,1,1,1,1,1,1,1,1,1,1,1,0xfc00,1,
@ -342,7 +342,7 @@ static const uint16_t norm2_nfc_data_trieIndex[9776]={
 1,1,1,0x2a4f,1,1,1,1,1,1,1,1,1,0x2a55,1,1,
 1,1,0x2a5b,1,1,1,1,0x2a61,1,1,1,1,0x2a67,1,1,1,
 1,1,1,1,1,1,1,1,1,0x2a6d,1,1,1,1,1,1,
-1,0xff02,0xff04,0x3c40,0xff08,0x3c48,0x2a73,1,0x2a79,1,0xff04,0xff04,0xff04,0xff04,1,1,
+1,0xff02,0xff04,0x3c40,0xff08,0x3c48,0x2a72,1,0x2a78,1,0xff04,0xff04,0xff04,0xff04,1,1,
 0xff04,0x3c50,0xffcc,0xffcc,0xfe12,1,0xffcc,0xffcc,1,1,1,1,1,1,1,1,
 1,1,1,0x2a7f,1,1,1,1,1,1,1,1,1,0x2a85,1,1,
 1,1,0x2a8b,1,1,1,1,0x2a91,1,1,1,1,0x2a97,1,1,1,
@ -406,15 +406,15 @@ static const uint16_t norm2_nfc_data_trieIndex[9776]={
 0x21ef,0x21f9,0x2203,0x220d,0x10d8,0x10e6,0x2217,0x2221,0x222b,0x2235,1,1,0x10f4,0x1102,0x223f,0x2249,
 0x2253,0x225d,1,1,0x1110,0x1122,0x2267,0x2271,0x227b,0x2285,0x228f,0x2299,1,0x1134,1,0x22a3,
 1,0x22ad,1,0x22b7,0x1146,0x115c,0x1174,0x1182,0x1190,0x119e,0x11ac,0x11ba,0x11c6,0x11dc,0x11f4,0x1202,
-0x1210,0x121e,0x122c,0x123a,0x1246,0x3b8e,0x22bf,0x3b97,0x1250,0x3b9e,0x22c5,0x3ba7,0x22cb,0x3baf,0x22d1,0x3bb7,
+0x1210,0x121e,0x122c,0x123a,0x1246,0x3b8e,0x22bf,0x3b96,0x1250,0x3b9e,0x22c5,0x3ba6,0x22cb,0x3bae,0x22d1,0x3bb6,
 0x125a,0x3bbe,1,1,0x22d8,0x22e2,0x22f1,0x2301,0x2311,0x2321,0x2331,0x2341,0x234c,0x2356,0x2365,0x2375,
 0x2385,0x2395,0x23a5,0x23b5,0x23c0,0x23ca,0x23d9,0x23e9,0x23f9,0x2409,0x2419,0x2429,0x2434,0x243e,0x244d,0x245d,
 0x246d,0x247d,0x248d,0x249d,0x24a8,0x24b2,0x24c1,0x24d1,0x24e1,0x24f1,0x2501,0x2511,0x251c,0x2526,0x2535,0x2545,
-0x2555,0x2565,0x2575,0x2585,0x258f,0x2595,0x259d,0x25a4,0x25ad,1,0x1264,0x25b7,0x25bf,0x25c5,0x25cb,0x3bc7,
-0x25d0,1,0x2aa2,0x8f0,1,0x25d7,0x25df,0x25e6,0x25ef,1,0x126e,0x25f9,0x2601,0x3bcf,0x2607,0x3bd7,
-0x260c,0x2613,0x2619,0x261f,0x2625,0x262b,0x2633,0x3be1,1,1,0x263b,0x2643,0x264b,0x2651,0x2657,0x3beb,
-1,0x265d,0x2663,0x2669,0x266f,0x2675,0x267d,0x3bf5,0x2685,0x268b,0x2691,0x2699,0x26a1,0x26a7,0x26ad,0x3bff,
-0x26b3,0x26b9,0x3c07,0x2aa7,1,1,0x26c1,0x26c8,0x26d1,1,0x1278,0x26db,0x26e3,0x3c0f,0x26e9,0x3c17,
+0x2555,0x2565,0x2575,0x2585,0x258f,0x2595,0x259d,0x25a4,0x25ad,1,0x1264,0x25b7,0x25bf,0x25c5,0x25cb,0x3bc6,
+0x25d0,1,0x2aa2,0x8f0,1,0x25d7,0x25df,0x25e6,0x25ef,1,0x126e,0x25f9,0x2601,0x3bce,0x2607,0x3bd6,
+0x260c,0x2613,0x2619,0x261f,0x2625,0x262b,0x2633,0x3be0,1,1,0x263b,0x2643,0x264b,0x2651,0x2657,0x3bea,
+1,0x265d,0x2663,0x2669,0x266f,0x2675,0x267d,0x3bf4,0x2685,0x268b,0x2691,0x2699,0x26a1,0x26a7,0x26ad,0x3bfe,
+0x26b3,0x26b9,0x3c06,0x2aa7,1,1,0x26c1,0x26c8,0x26d1,1,0x1278,0x26db,0x26e3,0x3c0e,0x26e9,0x3c16,
 0x26ee,0x2aab,0x8fc,1,0xfa09,0xfa09,1,1,1,1,1,1,1,1,1,1,
 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
 1,1,1,1,0xffcc,0xffcc,0xfe02,0xfe02,0xffcc,0xffcc,0xffcc,0xffcc,0xfe02,0xfe02,0xfe02,0xffcc,
@ -512,10 +512,10 @@ static const uint16_t norm2_nfc_data_trieIndex[9776]={
 0x311b,0x3009,0x311f,0x3123,0x3127,0x312b,0x312f,0x3011,0x2f09,0x3133,0x3015,0x3137,0x3019,0x313b,0x2ae1,0x313f,
 0x3145,0x314b,0x3151,0x3155,0x3159,0x315d,0x3163,0x3169,0x316f,0x3173,1,1,1,1,1,1,
 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,0x3177,0xfe34,0x317d,1,1,1,1,
-1,1,1,1,1,1,0x3183,0x3189,0x3191,0x319b,0x31a3,0x31a9,0x31af,0x31b5,0x31bb,0x31c1,
-0x31c7,0x31cd,0x31d3,1,0x31d9,0x31df,0x31e5,0x31eb,0x31f1,1,0x31f7,1,0x31fd,0x3203,1,0x3209,
-0x320f,1,0x3215,0x321b,0x3221,0x3227,0x322d,0x3233,0x3239,0x323f,0x3245,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,0x3176,0xfe34,0x317c,1,1,1,1,
+1,1,1,1,1,1,0x3182,0x3188,0x3190,0x319a,0x31a2,0x31a8,0x31ae,0x31b4,0x31ba,0x31c0,
+0x31c6,0x31cc,0x31d2,1,0x31d8,0x31de,0x31e4,0x31ea,0x31f0,1,0x31f6,1,0x31fc,0x3202,1,0x3208,
+0x320e,1,0x3214,0x321a,0x3220,0x3226,0x322c,0x3232,0x3238,0x323e,0x3244,1,1,1,1,1,
 1,1,1,1,1,1,1,1,1,1,1,1,0xffcc,0xffcc,0xffcc,0xffcc,
 0xffcc,0xffcc,0xffcc,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,0xffcc,0xffcc,1,1,1,1,
 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
@ -560,13 +560,13 @@ static const uint16_t norm2_nfc_data_trieIndex[9776]={
 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
 1,1,1,1,1,1,0xfe02,1,1,1,1,1,1,1,1,1,
 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,0x324b,0x3255,0x3269,0x3281,0x3299,0x32b1,0x32c9,0xffb0,0xffb0,0xfe02,
+1,1,1,1,1,1,0x324a,0x3254,0x3268,0x3280,0x3298,0x32b0,0x32c8,0xffb0,0xffb0,0xfe02,
 0xfe02,0xfe02,1,1,1,0xffc4,0xffb0,0xffb0,0xffb0,0xffb0,0xffb0,1,1,1,1,1,
 1,1,1,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,1,1,0xffcc,0xffcc,0xffcc,
 0xffcc,0xffcc,0xffb8,0xffb8,1,1,1,1,1,1,1,1,1,1,1,1,
 1,1,1,1,1,1,1,1,1,1,0xffcc,0xffcc,0xffcc,0xffcc,1,1,
-1,1,1,1,1,1,1,1,1,1,1,0x32d7,0x32e1,0x32f5,0x330d,0x3325,
-0x333d,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,0x32d6,0x32e0,0x32f4,0x330c,0x3324,
+0x333c,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
 1,1,0xffcc,0xffcc,0xffcc,1,1,1,1,1,1,1,1,1,1,1,
 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
--- a/icu4c/source/common/norm2allmodes.h
+++ b/icu4c/source/common/norm2allmodes.h
@ -20,10 +20,10 @@

 #include "unicode/edits.h"
 #include "unicode/normalizer2.h"
+#include "unicode/stringoptions.h"
 #include "unicode/unistr.h"
 #include "cpputils.h"
 #include "normalizer2impl.h"
-#include "ustr_imp.h"  // U_EDITS_NO_RESET

 U_NAMESPACE_BEGIN

@ -226,14 +226,14 @@ public:
 private:
    virtual void
    normalize(const UChar *src, const UChar *limit,
-              ReorderingBuffer &buffer, UErrorCode &errorCode) const override {
+              ReorderingBuffer &buffer, UErrorCode &errorCode) const U_OVERRIDE {
        impl.compose(src, limit, onlyContiguous, TRUE, buffer, errorCode);
    }
    using Normalizer2WithImpl::normalize;  // Avoid warning about hiding base class function.

    void
    normalizeUTF8(uint32_t options, StringPiece src, ByteSink &sink,
-                  Edits *edits, UErrorCode &errorCode) const override {
+                  Edits *edits, UErrorCode &errorCode) const U_OVERRIDE {
        if (U_FAILURE(errorCode)) {
            return;
        }
@ -249,12 +249,12 @@ private:
    virtual void
    normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
                       UnicodeString &safeMiddle,
-                       ReorderingBuffer &buffer, UErrorCode &errorCode) const override {
+                       ReorderingBuffer &buffer, UErrorCode &errorCode) const U_OVERRIDE {
        impl.composeAndAppend(src, limit, doNormalize, onlyContiguous, safeMiddle, buffer, errorCode);
    }

    virtual UBool
-    isNormalized(const UnicodeString &s, UErrorCode &errorCode) const override {
+    isNormalized(const UnicodeString &s, UErrorCode &errorCode) const U_OVERRIDE {
        if(U_FAILURE(errorCode)) {
            return FALSE;
        }
@ -271,7 +271,7 @@ private:
        return impl.compose(sArray, sArray+s.length(), onlyContiguous, FALSE, buffer, errorCode);
    }
    virtual UBool
-    isNormalizedUTF8(StringPiece sp, UErrorCode &errorCode) const override {
+    isNormalizedUTF8(StringPiece sp, UErrorCode &errorCode) const U_OVERRIDE {
        if(U_FAILURE(errorCode)) {
            return FALSE;
        }
@ -279,7 +279,7 @@ private:
        return impl.composeUTF8(0, onlyContiguous, s, s + sp.length(), nullptr, nullptr, errorCode);
    }
    virtual UNormalizationCheckResult
-    quickCheck(const UnicodeString &s, UErrorCode &errorCode) const override {
+    quickCheck(const UnicodeString &s, UErrorCode &errorCode) const U_OVERRIDE {
        if(U_FAILURE(errorCode)) {
            return UNORM_MAYBE;
        }
@ -293,20 +293,20 @@ private:
        return qcResult;
    }
    virtual const UChar *
-    spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &) const override {
+    spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &) const U_OVERRIDE {
        return impl.composeQuickCheck(src, limit, onlyContiguous, NULL);
    }
    using Normalizer2WithImpl::spanQuickCheckYes;  // Avoid warning about hiding base class function.
-    virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const override {
+    virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const U_OVERRIDE {
        return impl.getCompQuickCheck(impl.getNorm16(c));
    }
-    virtual UBool hasBoundaryBefore(UChar32 c) const override {
+    virtual UBool hasBoundaryBefore(UChar32 c) const U_OVERRIDE {
        return impl.hasCompBoundaryBefore(c);
    }
-    virtual UBool hasBoundaryAfter(UChar32 c) const override {
+    virtual UBool hasBoundaryAfter(UChar32 c) const U_OVERRIDE {
        return impl.hasCompBoundaryAfter(c, onlyContiguous);
    }
-    virtual UBool isInert(UChar32 c) const override {
+    virtual UBool isInert(UChar32 c) const U_OVERRIDE {
        return impl.isCompInert(c, onlyContiguous);
    }

--- a/icu4c/source/common/normalizer2.cpp
+++ b/icu4c/source/common/normalizer2.cpp
@ -22,6 +22,7 @@

 #include "unicode/edits.h"
 #include "unicode/normalizer2.h"
+#include "unicode/stringoptions.h"
 #include "unicode/unistr.h"
 #include "unicode/unorm.h"
 #include "cstring.h"
@ -30,7 +31,6 @@
 #include "normalizer2impl.h"
 #include "uassert.h"
 #include "ucln_cmn.h"
-#include "ustr_imp.h"  // U_EDITS_NO_RESET

 using icu::Normalizer2Impl;

@ -85,7 +85,7 @@ class NoopNormalizer2 : public Normalizer2 {
    virtual UnicodeString &
    normalize(const UnicodeString &src,
              UnicodeString &dest,
-              UErrorCode &errorCode) const override {
+              UErrorCode &errorCode) const U_OVERRIDE {
        if(U_SUCCESS(errorCode)) {
            if(&dest!=&src) {
                dest=src;
@ -97,7 +97,7 @@ class NoopNormalizer2 : public Normalizer2 {
    }
    virtual void
    normalizeUTF8(uint32_t options, StringPiece src, ByteSink &sink,
-                  Edits *edits, UErrorCode &errorCode) const override {
+                  Edits *edits, UErrorCode &errorCode) const U_OVERRIDE {
        if(U_SUCCESS(errorCode)) {
            if (edits != nullptr) {
                if ((options & U_EDITS_NO_RESET) == 0) {
@ -115,7 +115,7 @@ class NoopNormalizer2 : public Normalizer2 {
    virtual UnicodeString &
    normalizeSecondAndAppend(UnicodeString &first,
                             const UnicodeString &second,
-                             UErrorCode &errorCode) const override {
+                             UErrorCode &errorCode) const U_OVERRIDE {
        if(U_SUCCESS(errorCode)) {
            if(&first!=&second) {
                first.append(second);
@ -128,7 +128,7 @@ class NoopNormalizer2 : public Normalizer2 {
    virtual UnicodeString &
    append(UnicodeString &first,
           const UnicodeString &second,
-           UErrorCode &errorCode) const override {
+           UErrorCode &errorCode) const U_OVERRIDE {
        if(U_SUCCESS(errorCode)) {
            if(&first!=&second) {
                first.append(second);
@ -139,29 +139,29 @@ class NoopNormalizer2 : public Normalizer2 {
        return first;
    }
    virtual UBool
-    getDecomposition(UChar32, UnicodeString &) const override {
+    getDecomposition(UChar32, UnicodeString &) const U_OVERRIDE {
        return FALSE;
    }
-    // No need to override the default getRawDecomposition().
+    // No need to U_OVERRIDE the default getRawDecomposition().
    virtual UBool
-    isNormalized(const UnicodeString &, UErrorCode &errorCode) const override {
+    isNormalized(const UnicodeString &, UErrorCode &errorCode) const U_OVERRIDE {
        return U_SUCCESS(errorCode);
    }
    virtual UBool
-    isNormalizedUTF8(StringPiece, UErrorCode &errorCode) const override {
+    isNormalizedUTF8(StringPiece, UErrorCode &errorCode) const U_OVERRIDE {
        return U_SUCCESS(errorCode);
    }
    virtual UNormalizationCheckResult
-    quickCheck(const UnicodeString &, UErrorCode &) const override {
+    quickCheck(const UnicodeString &, UErrorCode &) const U_OVERRIDE {
        return UNORM_YES;
    }
    virtual int32_t
-    spanQuickCheckYes(const UnicodeString &s, UErrorCode &) const override {
+    spanQuickCheckYes(const UnicodeString &s, UErrorCode &) const U_OVERRIDE {
        return s.length();
    }
-    virtual UBool hasBoundaryBefore(UChar32) const override { return TRUE; }
-    virtual UBool hasBoundaryAfter(UChar32) const override { return TRUE; }
-    virtual UBool isInert(UChar32) const override { return TRUE; }
+    virtual UBool hasBoundaryBefore(UChar32) const U_OVERRIDE { return TRUE; }
+    virtual UBool hasBoundaryAfter(UChar32) const U_OVERRIDE { return TRUE; }
+    virtual UBool isInert(UChar32) const U_OVERRIDE { return TRUE; }
 };

 NoopNormalizer2::~NoopNormalizer2() {}
--- a/icu4c/source/common/normalizer2impl.cpp
+++ b/icu4c/source/common/normalizer2impl.cpp
@ -28,6 +28,7 @@
 #include "unicode/ustring.h"
 #include "unicode/utf16.h"
 #include "unicode/utf8.h"
+#include "bytesinkutil.h"
 #include "cmemory.h"
 #include "mutex.h"
 #include "normalizer2impl.h"
@ -129,60 +130,6 @@ int32_t getJamoTMinusBase(const uint8_t *src, const uint8_t *limit) {
    return -1;
 }

-/** The bytes at [src, nextSrc[ were mapped to valid (s16, s16Length). */
-UBool
-appendChange(const uint8_t *src, const uint8_t *nextSrc,
-             const char16_t *s16, int32_t s16Length,
-             ByteSink &sink, Edits *edits, UErrorCode &errorCode) {
-    U_ASSERT(U_SUCCESS(errorCode));
-    U_ASSERT((nextSrc - src) <= INT32_MAX);  // ensured by caller
-    char scratch[200];
-    int32_t s8Length = 0;
-    for (int32_t i = 0; i < s16Length;) {
-        int32_t capacity;
-        int32_t desiredCapacity = s16Length - i;
-        if (desiredCapacity < (INT32_MAX / 3)) {
-            desiredCapacity *= 3;  // max 3 UTF-8 bytes per UTF-16 code unit
-        } else if (desiredCapacity < (INT32_MAX / 2)) {
-            desiredCapacity *= 2;
-        } else {
-            desiredCapacity = INT32_MAX;
-        }
-        char *buffer = sink.GetAppendBuffer(U8_MAX_LENGTH, desiredCapacity,
-                                            scratch, UPRV_LENGTHOF(scratch), &capacity);
-        capacity -= U8_MAX_LENGTH - 1;
-        int32_t j = 0;
-        for (; i < s16Length && j < capacity;) {
-            UChar32 c;
-            U16_NEXT_UNSAFE(s16, i, c);
-            U8_APPEND_UNSAFE(buffer, j, c);
-        }
-        if (j > (INT32_MAX - s8Length)) {
-            errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
-            return FALSE;
-        }
-        sink.Append(buffer, j);
-        s8Length += j;
-    }
-    if (edits != nullptr) {
-        edits->addReplace((int32_t)(nextSrc - src), s8Length);
-    }
-    return TRUE;
-}
-
-/** The few bytes at [src, nextSrc[ were mapped to valid code point c. */
-void
-appendCodePoint(const uint8_t *src, const uint8_t *nextSrc, UChar32 c,
-                ByteSink &sink, Edits *edits) {
-    char buffer[U8_MAX_LENGTH];
-    int32_t length = 0;
-    U8_APPEND_UNSAFE(buffer, length, c);
-    if (edits != nullptr) {
-        edits->addReplace((int32_t)(nextSrc - src), length);
-    }
-    sink.Append(buffer, length);
-}
-
 void
 appendCodePointDelta(const uint8_t *cpStart, const uint8_t *cpLimit, int32_t delta,
                     ByteSink &sink, Edits *edits) {
@ -214,27 +161,6 @@ appendCodePointDelta(const uint8_t *cpStart, const uint8_t *cpLimit, int32_t del
    sink.Append(buffer, length);
 }

-UBool
-appendUnchanged(const uint8_t *s, const uint8_t *limit,
-                ByteSink &sink, uint32_t options, Edits *edits,
-                UErrorCode &errorCode) {
-    U_ASSERT(U_SUCCESS(errorCode));
-    if ((limit - s) > INT32_MAX) {
-        errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
-        return FALSE;
-    }
-    int32_t length = (int32_t)(limit - s);
-    if (length > 0) {
-        if (edits != nullptr) {
-            edits->addUnchanged(length);
-        }
-        if ((options & U_OMIT_UNCHANGED_TEXT) ==0) {
-            sink.Append(reinterpret_cast<const char *>(s), length);
-        }
-    }
-    return TRUE;
-}
-
 }  // namespace

 // ReorderingBuffer -------------------------------------------------------- ***
@ -1851,7 +1777,8 @@ Normalizer2Impl::composeUTF8(uint32_t options, UBool onlyContiguous,
        for (;;) {
            if (src == limit) {
                if (prevBoundary != limit && sink != nullptr) {
-                    appendUnchanged(prevBoundary, limit, *sink, options, edits, errorCode);
+                    ByteSinkUtil::appendUnchanged(prevBoundary, limit,
+                                                  *sink, options, edits, errorCode);
                }
                return TRUE;
            }
@ -1884,7 +1811,8 @@ Normalizer2Impl::composeUTF8(uint32_t options, UBool onlyContiguous,
                if (norm16HasCompBoundaryAfter(norm16, onlyContiguous) ||
                        hasCompBoundaryBefore(src, limit)) {
                    if (prevBoundary != prevSrc &&
-                            !appendUnchanged(prevBoundary, prevSrc, *sink, options, edits, errorCode)) {
+                            !ByteSinkUtil::appendUnchanged(prevBoundary, prevSrc,
+                                                           *sink, options, edits, errorCode)) {
                        break;
                    }
                    appendCodePointDelta(prevSrc, src, getAlgorithmicDelta(norm16), *sink, edits);
@ -1896,13 +1824,14 @@ Normalizer2Impl::composeUTF8(uint32_t options, UBool onlyContiguous,
                if (norm16HasCompBoundaryAfter(norm16, onlyContiguous) ||
                        hasCompBoundaryBefore(src, limit)) {
                    if (prevBoundary != prevSrc &&
-                            !appendUnchanged(prevBoundary, prevSrc, *sink, options, edits, errorCode)) {
+                            !ByteSinkUtil::appendUnchanged(prevBoundary, prevSrc,
+                                                           *sink, options, edits, errorCode)) {
                        break;
                    }
                    const uint16_t *mapping = getMapping(norm16);
                    int32_t length = *mapping++ & MAPPING_LENGTH_MASK;
-                    if (!appendChange(prevSrc, src, (const UChar *)mapping, length,
-                                      *sink, edits, errorCode)) {
+                    if (!ByteSinkUtil::appendChange(prevSrc, src, (const UChar *)mapping, length,
+                                                    *sink, edits, errorCode)) {
                        break;
                    }
                    prevBoundary = src;
@ -1915,7 +1844,8 @@ Normalizer2Impl::composeUTF8(uint32_t options, UBool onlyContiguous,
                if (hasCompBoundaryBefore(src, limit) ||
                        hasCompBoundaryAfter(prevBoundary, prevSrc, onlyContiguous)) {
                    if (prevBoundary != prevSrc &&
-                            !appendUnchanged(prevBoundary, prevSrc, *sink, options, edits, errorCode)) {
+                            !ByteSinkUtil::appendUnchanged(prevBoundary, prevSrc,
+                                                           *sink, options, edits, errorCode)) {
                        break;
                    }
                    if (edits != nullptr) {
@ -1955,10 +1885,11 @@ Normalizer2Impl::composeUTF8(uint32_t options, UBool onlyContiguous,
                            Hangul::JAMO_T_COUNT + t;
                        prevSrc -= 3;  // Replace the Jamo L as well.
                        if (prevBoundary != prevSrc &&
-                                !appendUnchanged(prevBoundary, prevSrc, *sink, options, edits, errorCode)) {
+                                !ByteSinkUtil::appendUnchanged(prevBoundary, prevSrc,
+                                                               *sink, options, edits, errorCode)) {
                            break;
                        }
-                        appendCodePoint(prevSrc, src, syllable, *sink, edits);
+                        ByteSinkUtil::appendCodePoint(prevSrc, src, syllable, *sink, edits);
                        prevBoundary = src;
                        continue;
                    }
@ -1979,10 +1910,11 @@ Normalizer2Impl::composeUTF8(uint32_t options, UBool onlyContiguous,
                UChar32 syllable = prev + getJamoTMinusBase(prevSrc, src);
                prevSrc -= 3;  // Replace the Hangul LV as well.
                if (prevBoundary != prevSrc &&
-                        !appendUnchanged(prevBoundary, prevSrc, *sink, options, edits, errorCode)) {
+                        !ByteSinkUtil::appendUnchanged(prevBoundary, prevSrc,
+                                                       *sink, options, edits, errorCode)) {
                    break;
                }
-                appendCodePoint(prevSrc, src, syllable, *sink, edits);
+                ByteSinkUtil::appendCodePoint(prevSrc, src, syllable, *sink, edits);
                prevBoundary = src;
                continue;
            }
@ -2006,7 +1938,8 @@ Normalizer2Impl::composeUTF8(uint32_t options, UBool onlyContiguous,
                for (;;) {
                    if (src == limit) {
                        if (sink != nullptr) {
-                            appendUnchanged(prevBoundary, limit, *sink, options, edits, errorCode);
+                            ByteSinkUtil::appendUnchanged(prevBoundary, limit,
+                                                          *sink, options, edits, errorCode);
                        }
                        return TRUE;
                    }
@ -2070,11 +2003,12 @@ Normalizer2Impl::composeUTF8(uint32_t options, UBool onlyContiguous,
                return FALSE;
            }
            if (prevBoundary != prevSrc &&
-                    !appendUnchanged(prevBoundary, prevSrc, *sink, options, edits, errorCode)) {
+                    !ByteSinkUtil::appendUnchanged(prevBoundary, prevSrc,
+                                                   *sink, options, edits, errorCode)) {
                break;
            }
-            if (!appendChange(prevSrc, src, buffer.getStart(), buffer.length(),
-                              *sink, edits, errorCode)) {
+            if (!ByteSinkUtil::appendChange(prevSrc, src, buffer.getStart(), buffer.length(),
+                                            *sink, edits, errorCode)) {
                break;
            }
            prevBoundary = src;
--- a/icu4c/source/common/putil.cpp
+++ b/icu4c/source/common/putil.cpp
@ -675,6 +675,16 @@ extern U_IMPORT char *U_TZNAME[];

 #if !UCONFIG_NO_FILE_IO && ((U_PLATFORM_IS_DARWIN_BASED && (U_PLATFORM != U_PF_IPHONE || defined(U_TIMEZONE))) || U_PLATFORM_IS_LINUX_BASED || U_PLATFORM == U_PF_BSD || U_PLATFORM == U_PF_SOLARIS)
 /* These platforms are likely to use Olson timezone IDs. */
+/* common targets of the symbolic link at TZDEFAULT are:
+ * "/usr/share/zoneinfo/<olsonID>" default, older Linus distros, macOS to 10.12
+ * "../usr/share/zoneinfo/<olsonID>" newer Linux distros: Red Hat Enterprise Linux 7, Ubuntu, SuSe Linux
+ * "/usr/share/lib/zoneinfo/<olsonID>" Solaris
+ * "../usr/share/lib/zoneinfo/<olsonID>" Solaris
+ * "/var/db/timezone/zoneinfo/<olsonID>" macOS 10.13
+ * To avoid checking lots of paths, just check that the target path
+ * before the <olsonID> ends with "/zoneinfo/", and the <olsonID> is valid.
+ */
+
 #define CHECK_LOCALTIME_LINK 1
 #if U_PLATFORM_IS_DARWIN_BASED
 #include <tzfile.h>
@ -682,12 +692,12 @@ extern U_IMPORT char *U_TZNAME[];
 #elif U_PLATFORM == U_PF_SOLARIS
 #define TZDEFAULT       "/etc/localtime"
 #define TZZONEINFO      "/usr/share/lib/zoneinfo/"
-#define TZZONEINFO2     "../usr/share/lib/zoneinfo/"
 #define TZ_ENV_CHECK    "localtime"
 #else
 #define TZDEFAULT       "/etc/localtime"
 #define TZZONEINFO      "/usr/share/zoneinfo/"
 #endif
+#define TZZONEINFOTAIL  "/zoneinfo/"
 #if U_HAVE_DIRENT_H
 #define TZFILE_SKIP     "posixrules" /* tz file to skip when searching. */
 /* Some Linux distributions have 'localtime' in /usr/share/zoneinfo
@ -1131,24 +1141,15 @@ uprv_tzname(int n)
        */
        int32_t ret = (int32_t)readlink(TZDEFAULT, gTimeZoneBuffer, sizeof(gTimeZoneBuffer)-1);
        if (0 < ret) {
-            int32_t tzZoneInfoLen = uprv_strlen(TZZONEINFO);
+            int32_t tzZoneInfoTailLen = uprv_strlen(TZZONEINFOTAIL);
            gTimeZoneBuffer[ret] = 0;
-            if (uprv_strncmp(gTimeZoneBuffer, TZZONEINFO, tzZoneInfoLen) == 0
-                && isValidOlsonID(gTimeZoneBuffer + tzZoneInfoLen))
+            char *  tzZoneInfoTailPtr = uprv_strstr(gTimeZoneBuffer, TZZONEINFOTAIL);
+            
+            if (tzZoneInfoTailPtr != NULL
+                && isValidOlsonID(tzZoneInfoTailPtr + tzZoneInfoTailLen))
            {
-                return (gTimeZoneBufferPtr = gTimeZoneBuffer + tzZoneInfoLen);
+                return (gTimeZoneBufferPtr = tzZoneInfoTailPtr + tzZoneInfoTailLen);
            }
-#if U_PLATFORM == U_PF_SOLARIS
-            else
-            {
-                tzZoneInfoLen = uprv_strlen(TZZONEINFO2);
-                if (uprv_strncmp(gTimeZoneBuffer, TZZONEINFO2, tzZoneInfoLen) == 0
-                                && isValidOlsonID(gTimeZoneBuffer + tzZoneInfoLen))
-                {
-                    return (gTimeZoneBufferPtr = gTimeZoneBuffer + tzZoneInfoLen);
-                }
-            }
-#endif
        } else {
 #if defined(SEARCH_TZFILE)
            DefaultTZInfo* tzInfo = (DefaultTZInfo*)uprv_malloc(sizeof(DefaultTZInfo));
--- a/icu4c/source/common/rbbi.cpp
+++ b/icu4c/source/common/rbbi.cpp
--- a/icu4c/source/common/rbbi_cache.cpp
+++ b/icu4c/source/common/rbbi_cache.cpp
@ -0,0 +1,622 @@
+// Copyright (C) 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+// file: rbbi_cache.cpp
+
+#include "unicode/utypes.h"
+#include "unicode/ubrk.h"
+#include "unicode/rbbi.h"
+
+#include "rbbi_cache.h"
+
+#include "brkeng.h"
+#include "cmemory.h"
+#include "rbbidata.h"
+#include "uassert.h"
+#include "uvectr32.h"
+
+U_NAMESPACE_BEGIN
+
+/*
+ * DictionaryCache implementation
+ */
+
+RuleBasedBreakIterator::DictionaryCache::DictionaryCache(RuleBasedBreakIterator *bi, UErrorCode &status) :
+        fBI(bi), fBreaks(NULL), fPositionInCache(-1),
+        fStart(0), fLimit(0), fFirstRuleStatusIndex(0), fOtherRuleStatusIndex(0) {
+    fBreaks = new UVector32(status);
+}
+
+RuleBasedBreakIterator::DictionaryCache::~DictionaryCache() {
+    delete fBreaks;
+    fBreaks = NULL;
+}
+
+void RuleBasedBreakIterator::DictionaryCache::reset() {
+    fPositionInCache = -1;
+    fStart = 0;
+    fLimit = 0;
+    fFirstRuleStatusIndex = 0;
+    fOtherRuleStatusIndex = 0;
+    fBreaks->removeAllElements();
+}
+
+UBool RuleBasedBreakIterator::DictionaryCache::following(int32_t fromPos, int32_t *result, int32_t *statusIndex) {
+    if (fromPos >= fLimit || fromPos < fStart) {
+        fPositionInCache = -1;
+        return FALSE;
+    }
+
+    // Sequential iteration, move from previous boundary to the following
+
+    int32_t r = 0;
+    if (fPositionInCache >= 0 && fPositionInCache < fBreaks->size() && fBreaks->elementAti(fPositionInCache) == fromPos) {
+        ++fPositionInCache;
+        if (fPositionInCache >= fBreaks->size()) {
+            fPositionInCache = -1;
+            return FALSE;
+        }
+        r = fBreaks->elementAti(fPositionInCache);
+        U_ASSERT(r > fromPos);
+        *result = r;
+        *statusIndex = fOtherRuleStatusIndex;
+        return TRUE;
+    }
+
+    // Random indexing. Linear search for the boundary following the given position.
+
+    for (fPositionInCache = 0; fPositionInCache < fBreaks->size(); ++fPositionInCache) {
+        r= fBreaks->elementAti(fPositionInCache);
+        if (r > fromPos) {
+            *result = r;
+            *statusIndex = fOtherRuleStatusIndex;
+            return TRUE;
+        }
+    }
+    U_ASSERT(FALSE);
+    fPositionInCache = -1;
+    return FALSE;
+}
+
+
+UBool RuleBasedBreakIterator::DictionaryCache::preceding(int32_t fromPos, int32_t *result, int32_t *statusIndex) {
+    if (fromPos <= fStart || fromPos > fLimit) {
+        fPositionInCache = -1;
+        return FALSE;
+    }
+
+    if (fromPos == fLimit) {
+        fPositionInCache = fBreaks->size() - 1;
+        if (fPositionInCache >= 0) {
+            U_ASSERT(fBreaks->elementAti(fPositionInCache) == fromPos);
+        }
+    }
+
+    int32_t r;
+    if (fPositionInCache > 0 && fPositionInCache < fBreaks->size() && fBreaks->elementAti(fPositionInCache) == fromPos) {
+        --fPositionInCache;
+        r = fBreaks->elementAti(fPositionInCache);
+        U_ASSERT(r < fromPos);
+        *result = r;
+        *statusIndex = ( r== fStart) ? fFirstRuleStatusIndex : fOtherRuleStatusIndex;
+        return TRUE;
+    }
+
+    if (fPositionInCache == 0) {
+        fPositionInCache = -1;
+        return FALSE;
+    }
+
+    for (fPositionInCache = fBreaks->size()-1; fPositionInCache >= 0; --fPositionInCache) {
+        r = fBreaks->elementAti(fPositionInCache);
+        if (r < fromPos) {
+            *result = r;
+            *statusIndex = ( r == fStart) ? fFirstRuleStatusIndex : fOtherRuleStatusIndex;
+            return TRUE;
+        }
+    }
+    U_ASSERT(FALSE);
+    fPositionInCache = -1;
+    return FALSE;
+}
+
+void RuleBasedBreakIterator::DictionaryCache::populateDictionary(int32_t startPos, int32_t endPos,
+                                       int32_t firstRuleStatus, int32_t otherRuleStatus) {
+    if ((endPos - startPos) <= 1) {
+        return;
+    }
+
+    reset();
+    fFirstRuleStatusIndex = firstRuleStatus;
+    fOtherRuleStatusIndex = otherRuleStatus;
+
+    int32_t rangeStart = startPos;
+    int32_t rangeEnd = endPos;
+
+    uint16_t    category;
+    int32_t     current;
+    UErrorCode  status = U_ZERO_ERROR;
+    int32_t     foundBreakCount = 0;
+    UText      *text = fBI->fText;
+
+    // Loop through the text, looking for ranges of dictionary characters.
+    // For each span, find the appropriate break engine, and ask it to find
+    // any breaks within the span.
+
+    utext_setNativeIndex(text, rangeStart);
+    UChar32     c = utext_current32(text);
+    category = UTRIE2_GET16(fBI->fData->fTrie, c);
+
+    while(U_SUCCESS(status)) {
+        while((current = (int32_t)UTEXT_GETNATIVEINDEX(text)) < rangeEnd && (category & 0x4000) == 0) {
+            utext_next32(text);           // TODO: cleaner loop structure.
+            c = utext_current32(text);
+            category = UTRIE2_GET16(fBI->fData->fTrie, c);
+        }
+        if (current >= rangeEnd) {
+            break;
+        }
+
+        // We now have a dictionary character. Get the appropriate language object
+        // to deal with it.
+        const LanguageBreakEngine *lbe = fBI->getLanguageBreakEngine(c);
+
+        // Ask the language object if there are any breaks. It will add them to the cache and
+        // leave the text pointer on the other side of its range, ready to search for the next one.
+        if (lbe != NULL) {
+            foundBreakCount += lbe->findBreaks(text, rangeStart, rangeEnd, fBI->fBreakType, *fBreaks);
+        }
+
+        // Reload the loop variables for the next go-round
+        c = utext_current32(text);
+        category = UTRIE2_GET16(fBI->fData->fTrie, c);
+    }
+
+    // If we found breaks, ensure that the first and last entries are
+    // the original starting and ending position. And initialize the
+    // cache iteration position to the first entry.
+
+    // printf("foundBreakCount = %d\n", foundBreakCount);
+    if (foundBreakCount > 0) {
+        U_ASSERT(foundBreakCount == fBreaks->size());
+        if (startPos < fBreaks->elementAti(0)) {
+            // The dictionary did not place a boundary at the start of the segment of text.
+            // Add one now. This should not commonly happen, but it would be easy for interactions
+            // of the rules for dictionary segments and the break engine implementations to
+            // inadvertently cause it. Cover it here, just in case.
+            fBreaks->insertElementAt(startPos, 0, status);
+        }
+        if (endPos > fBreaks->peeki()) {
+            fBreaks->push(endPos, status);
+        }
+        fPositionInCache = 0;
+        // Note: Dictionary matching may extend beyond the original limit.
+        fStart = fBreaks->elementAti(0);
+        fLimit = fBreaks->peeki();
+    } else {
+        // there were no language-based breaks, even though the segment contained
+        // dictionary characters. Subsequent attempts to fetch boundaries from the dictionary cache
+        // for this range will fail, and the calling code will fall back to the rule based boundaries.
+    }
+}
+
+
+/*
+ *   BreakCache implemetation
+ */
+
+RuleBasedBreakIterator::BreakCache::BreakCache(RuleBasedBreakIterator *bi, UErrorCode &status) : 
+        fBI(bi), fSideBuffer(status) {
+    reset();
+}
+
+
+RuleBasedBreakIterator::BreakCache::~BreakCache() {
+}
+
+
+void RuleBasedBreakIterator::BreakCache::reset(int32_t pos, int32_t ruleStatus) {
+    fStartBufIdx = 0;
+    fEndBufIdx = 0;
+    fTextIdx = pos;
+    fBufIdx = 0;
+    fBoundaries[0] = pos;
+    fStatuses[0] = (uint16_t)ruleStatus;
+}
+
+
+int32_t  RuleBasedBreakIterator::BreakCache::current() {
+    fBI->fPosition = fTextIdx;
+    fBI->fRuleStatusIndex = fStatuses[fBufIdx];
+    fBI->fDone = FALSE;
+    return fTextIdx;
+}
+
+
+void RuleBasedBreakIterator::BreakCache::following(int32_t startPos, UErrorCode &status) {
+    if (U_FAILURE(status)) {
+        return;
+    }
+    if (startPos == fTextIdx || seek(startPos) || populateNear(startPos, status)) {
+        // startPos is in the cache. Do a next() from that position.
+        // TODO: an awkward set of interactions with bi->fDone
+        //       seek() does not clear it; it can't because of interactions with populateNear().
+        //       next() does not clear it in the fast-path case, where everything matters. Maybe it should.
+        //       So clear it here, for the case where seek() succeeded on an iterator that had previously run off the end.
+        fBI->fDone = false;
+        next();
+    }
+    return;
+}
+
+
+void RuleBasedBreakIterator::BreakCache::preceding(int32_t startPos, UErrorCode &status) {
+    if (U_FAILURE(status)) {
+        return;
+    }
+    if (startPos == fTextIdx || seek(startPos) || populateNear(startPos, status)) {
+        if (startPos == fTextIdx) {
+            previous(status);
+        } else {
+            // seek() leaves the BreakCache positioned at the preceding boundary
+            //        if the requested position is between two bounaries.
+            // current() pushes the BreakCache position out to the BreakIterator itself.
+            U_ASSERT(startPos > fTextIdx);
+            current();
+        }
+    }
+    return;
+}
+
+
+/*
+ * Out-of-line code for BreakCache::next().
+ * Cache does not already contain the boundary
+ */
+void RuleBasedBreakIterator::BreakCache::nextOL() {
+    fBI->fDone = !populateFollowing();
+    fBI->fPosition = fTextIdx;
+    fBI->fRuleStatusIndex = fStatuses[fBufIdx];
+    return;
+}
+
+
+void RuleBasedBreakIterator::BreakCache::previous(UErrorCode &status) {
+    if (U_FAILURE(status)) {
+        return;
+    }
+    int32_t initialBufIdx = fBufIdx;
+    if (fBufIdx == fStartBufIdx) {
+        // At start of cache. Prepend to it.
+        populatePreceding(status);
+    } else {
+        // Cache already holds the next boundary
+        fBufIdx = modChunkSize(fBufIdx - 1);
+        fTextIdx = fBoundaries[fBufIdx];
+    }
+    fBI->fDone = (fBufIdx == initialBufIdx);
+    fBI->fPosition = fTextIdx;
+    fBI->fRuleStatusIndex = fStatuses[fBufIdx];
+    return;
+}    
+
+
+UBool RuleBasedBreakIterator::BreakCache::seek(int32_t pos) {
+    if (pos < fBoundaries[fStartBufIdx] || pos > fBoundaries[fEndBufIdx]) {
+        return FALSE;
+    }
+    if (pos == fBoundaries[fStartBufIdx]) {
+        // Common case: seek(0), from BreakIterator::first()
+        fBufIdx = fStartBufIdx;
+        fTextIdx = fBoundaries[fBufIdx];
+        return TRUE;
+    }
+    if (pos == fBoundaries[fEndBufIdx]) {
+        fBufIdx = fEndBufIdx;
+        fTextIdx = fBoundaries[fBufIdx];
+        return TRUE;
+    }
+    
+    int32_t min = fStartBufIdx;
+    int32_t max = fEndBufIdx;
+    while (min != max) {
+        int32_t probe = (min + max + (min>max ? CACHE_SIZE : 0)) / 2;
+        probe = modChunkSize(probe);
+        if (fBoundaries[probe] > pos) {
+            max = probe;
+        } else {
+            min = modChunkSize(probe + 1);
+        }
+    }
+    U_ASSERT(fBoundaries[max] > pos);
+    fBufIdx = modChunkSize(max - 1);
+    fTextIdx = fBoundaries[fBufIdx];
+    U_ASSERT(fTextIdx <= pos);
+    return TRUE;
+}
+
+
+UBool RuleBasedBreakIterator::BreakCache::populateNear(int32_t position, UErrorCode &status) {
+    if (U_FAILURE(status)) {
+        return FALSE;
+    }
+    U_ASSERT(position < fBoundaries[fStartBufIdx] || position > fBoundaries[fEndBufIdx]);
+
+    // Find a boundary somewhere in the vicinity of the requested position.
+    // Depending on the safe rules and the text data, it could be either before, at, or after
+    // the requested position.
+
+
+    // If the requested position is not near already cached positions, clear the existing cache,
+    // find a near-by boundary and begin new cache contents there.
+
+    if ((position < fBoundaries[fStartBufIdx] - 15) || position > (fBoundaries[fEndBufIdx] + 15)) {
+        int32_t aBoundary = 0;
+        int32_t ruleStatusIndex = 0;
+        // TODO: check for position == length of text. Although may still need to back up to get rule status.
+        if (position > 20) {
+            int32_t backupPos = fBI->handlePrevious(position);
+            fBI->fPosition = backupPos;
+            aBoundary = fBI->handleNext();                // Ignore dictionary, just finding a rule based boundary.
+            ruleStatusIndex = fBI->fRuleStatusIndex;
+        }
+        reset(aBoundary, ruleStatusIndex);               // Reset cache to hold aBoundary as a single starting point.
+    }
+    
+    // Fill in boundaries between existing cache content and the new requested position.
+
+    if (fBoundaries[fEndBufIdx] < position) {
+        // The last position in the cache precedes the requested position.
+        // Add following position(s) to the cache.
+        while (fBoundaries[fEndBufIdx] < position) {
+            if (!populateFollowing()) {
+                U_ASSERT(false);
+                return false;
+            }
+        }
+        fBufIdx = fEndBufIdx;                      // Set iterator position to the end of the buffer.
+        fTextIdx = fBoundaries[fBufIdx];           // Required because populateFollowing may add extra boundaries.
+        while (fTextIdx > position) {              // Move backwards to a position at or preceding the requested pos.
+            previous(status);
+        }
+        return true;
+    }
+
+    if (fBoundaries[fStartBufIdx] > position) {
+        // The first position in the cache is beyond the requested position.
+        // back up more until we get a boundary <= the requested position.
+        while (fBoundaries[fStartBufIdx] > position) {
+            populatePreceding(status);
+        }
+        fBufIdx = fStartBufIdx;                    // Set iterator position to the start of the buffer.
+        fTextIdx = fBoundaries[fBufIdx];           // Required because populatePreceding may add extra boundaries.
+        while (fTextIdx < position) {              // Move forwards to a position at or following the requested pos.
+            next();
+        }
+        if (fTextIdx > position) {
+            // If position is not itself a boundary, the next() loop above will overshoot.
+            // Back up one, leaving cache position at the boundary preceding the requested position.
+            previous(status);
+        }
+        return true;
+    }
+
+    U_ASSERT(fTextIdx == position);
+    return true;
+}
+
+
+
+UBool RuleBasedBreakIterator::BreakCache::populateFollowing() {
+    int32_t fromPosition = fBoundaries[fEndBufIdx];
+    int32_t fromRuleStatusIdx = fStatuses[fEndBufIdx];
+    int32_t pos = 0;
+    int32_t ruleStatusIdx = 0;
+
+    if (fBI->fDictionaryCache->following(fromPosition, &pos, &ruleStatusIdx)) {
+        addFollowing(pos, ruleStatusIdx, UpdateCachePosition);
+        return TRUE;
+    }
+
+    fBI->fPosition = fromPosition;
+    pos = fBI->handleNext();
+    if (pos == UBRK_DONE) {
+        return FALSE;
+    }
+
+    ruleStatusIdx = fBI->fRuleStatusIndex;
+    if (fBI->fDictionaryCharCount > 0) {
+        // The text segment obtained from the rules includes dictionary characters.
+        // Subdivide it, with subdivided results going into the dictionary cache.
+        fBI->fDictionaryCache->populateDictionary(fromPosition, pos, fromRuleStatusIdx, ruleStatusIdx);
+        if (fBI->fDictionaryCache->following(fromPosition, &pos, &ruleStatusIdx)) {
+            addFollowing(pos, ruleStatusIdx, UpdateCachePosition);
+            return TRUE;
+            // TODO: may want to move a sizable chunk of dictionary cache to break cache at this point.
+            //       But be careful with interactions with populateNear().
+        }
+    }
+
+    // Rule based segment did not include dictionary characters.
+    // Or, it did contain dictionary chars, but the dictionary segmenter didn't handle them,
+    //    meaning that we didn't take the return, above.
+    // Add its end point to the cache.
+    addFollowing(pos, ruleStatusIdx, UpdateCachePosition);
+
+    // Add several non-dictionary boundaries at this point, to optimize straight forward iteration.
+    //    (subsequent calls to BreakIterator::next() will take the fast path, getting cached results.
+    //
+    for (int count=0; count<6; ++count) {
+        pos = fBI->handleNext();
+        if (pos == UBRK_DONE || fBI->fDictionaryCharCount > 0) {
+            break;
+        }
+        addFollowing(pos, fBI->fRuleStatusIndex, RetainCachePosition);
+    }
+
+    return TRUE;
+}
+
+
+UBool RuleBasedBreakIterator::BreakCache::populatePreceding(UErrorCode &status) {
+    if (U_FAILURE(status)) {
+        return FALSE;
+    }
+
+    int32_t fromPosition = fBoundaries[fStartBufIdx];
+    if (fromPosition == 0) {
+        return FALSE;
+    }
+
+    int32_t position = 0;
+    int32_t positionStatusIdx = 0;
+
+    if (fBI->fDictionaryCache->preceding(fromPosition, &position, &positionStatusIdx)) {
+        addPreceding(position, positionStatusIdx, UpdateCachePosition);
+        return TRUE;
+    }
+
+    int32_t backupPosition = fromPosition;
+
+    // Find a boundary somewhere preceding the first already-cached boundary
+    do {
+        backupPosition = backupPosition - 30;
+        if (backupPosition <= 0) {
+            backupPosition = 0;
+        } else {
+            backupPosition = fBI->handlePrevious(backupPosition);
+        }
+        if (backupPosition == UBRK_DONE || backupPosition == 0) {
+            position = 0;
+            positionStatusIdx = 0;
+        } else {
+            fBI->fPosition = backupPosition;  // TODO: pass starting position in a clearer way.
+            position = fBI->handleNext();
+            positionStatusIdx = fBI->fRuleStatusIndex;
+
+        }
+    } while (position >= fromPosition);
+
+    // Find boundaries between the one we just located and the first already-cached boundary
+    // Put them in a side buffer, because we don't yet know where they will fall in the circular cache buffer..
+
+    fSideBuffer.removeAllElements();
+    fSideBuffer.addElement(position, status);
+    fSideBuffer.addElement(positionStatusIdx, status);
+
+    do {
+        int32_t prevPosition = fBI->fPosition = position;
+        int32_t prevStatusIdx = positionStatusIdx;
+        position = fBI->handleNext();
+        positionStatusIdx = fBI->fRuleStatusIndex;
+        if (position == UBRK_DONE) {
+            break;
+        }
+
+        UBool segmentHandledByDictionary = FALSE;
+        if (fBI->fDictionaryCharCount != 0) {
+            // Segment from the rules includes dictionary characters.
+            // Subdivide it, with subdivided results going into the dictionary cache.
+            int32_t dictSegEndPosition = position;
+            fBI->fDictionaryCache->populateDictionary(prevPosition, dictSegEndPosition, prevStatusIdx, positionStatusIdx);
+            while (fBI->fDictionaryCache->following(prevPosition, &position, &positionStatusIdx)) {
+                segmentHandledByDictionary = true;
+                U_ASSERT(position > prevPosition);
+                if (position >= fromPosition) {
+                    break;
+                }
+                U_ASSERT(position <= dictSegEndPosition);
+                fSideBuffer.addElement(position, status);
+                fSideBuffer.addElement(positionStatusIdx, status);
+                prevPosition = position;
+            }
+            U_ASSERT(position==dictSegEndPosition || position>=fromPosition);
+        }
+            
+        if (!segmentHandledByDictionary && position < fromPosition) {
+            fSideBuffer.addElement(position, status);
+            fSideBuffer.addElement(positionStatusIdx, status);
+        }
+    } while (position < fromPosition);
+
+    // Move boundaries from the side buffer to the main circular buffer.
+    UBool success = FALSE;
+    if (!fSideBuffer.isEmpty()) {
+        positionStatusIdx = fSideBuffer.popi();
+        position = fSideBuffer.popi();
+        addPreceding(position, positionStatusIdx, UpdateCachePosition);
+        success = TRUE;
+    }
+
+    while (!fSideBuffer.isEmpty()) {
+        positionStatusIdx = fSideBuffer.popi();
+        position = fSideBuffer.popi();
+        if (!addPreceding(position, positionStatusIdx, RetainCachePosition)) {
+            // No space in circular buffer to hold a new preceding result while
+            // also retaining the current cache (iteration) position.
+            // Bailing out is safe; the cache will refill again if needed.
+            break;
+        }
+    }
+      
+    return success;
+}
+
+
+void RuleBasedBreakIterator::BreakCache::addFollowing(int32_t position, int32_t ruleStatusIdx, UpdatePositionValues update) {
+    U_ASSERT(position > fBoundaries[fEndBufIdx]);
+    U_ASSERT(ruleStatusIdx <= UINT16_MAX);
+    int32_t nextIdx = modChunkSize(fEndBufIdx + 1);
+    if (nextIdx == fStartBufIdx) {
+        fStartBufIdx = modChunkSize(fStartBufIdx + 6);    // TODO: experiment. Probably revert to 1.
+    }
+    fBoundaries[nextIdx] = position;
+    fStatuses[nextIdx] = ruleStatusIdx;
+    fEndBufIdx = nextIdx;
+    if (update == UpdateCachePosition) {
+        // Set current position to the newly added boundary.
+        fBufIdx = nextIdx;
+        fTextIdx = position;
+    } else {
+        // Retaining the original cache position.
+        // Check if the added boundary wraps around the buffer, and would over-write the original position.
+        // It's the responsibility of callers of this function to not add too many.
+        U_ASSERT(nextIdx != fBufIdx);
+    }
+}
+
+bool RuleBasedBreakIterator::BreakCache::addPreceding(int32_t position, int32_t ruleStatusIdx, UpdatePositionValues update) {
+    U_ASSERT(position < fBoundaries[fStartBufIdx]);
+    U_ASSERT(ruleStatusIdx <= UINT16_MAX);
+    int32_t nextIdx = modChunkSize(fStartBufIdx - 1);
+    if (nextIdx == fEndBufIdx) {
+        if (fBufIdx == fEndBufIdx && update == RetainCachePosition) {
+            // Failure. The insertion of the new boundary would claim the buffer position that is the
+            // current iteration position. And we also want to retain the current iteration position.
+            // (The buffer is already completely full of entries that precede the iteration position.)
+            return false;
+        }
+        fEndBufIdx = modChunkSize(fEndBufIdx - 1);
+    }
+    fBoundaries[nextIdx] = position;
+    fStatuses[nextIdx] = ruleStatusIdx;
+    fStartBufIdx = nextIdx;
+    if (update == UpdateCachePosition) {
+        fBufIdx = nextIdx;
+        fTextIdx = position;
+    }
+    return true;
+}
+
+
+void RuleBasedBreakIterator::BreakCache::dumpCache() {
+    printf("fTextIdx:%d   fBufIdx:%d\n", fTextIdx, fBufIdx);
+    for (int32_t i=fStartBufIdx; ; i=modChunkSize(i+1)) {
+        printf("%d  %d\n", i, fBoundaries[i]);
+        if (i == fEndBufIdx) {
+            break;
+        }
+    }
+}
+
+U_NAMESPACE_END
--- a/icu4c/source/common/rbbi_cache.h
+++ b/icu4c/source/common/rbbi_cache.h
@ -0,0 +1,199 @@
+// Copyright (C) 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+// file: rbbi_cache.h
+//
+#ifndef RBBI_CACHE_H
+#define RBBI_CACHE_H
+
+#include "unicode/utypes.h"
+
+#include "unicode/rbbi.h"
+#include "unicode/uobject.h"
+
+#include "uvectr32.h"
+
+U_NAMESPACE_BEGIN
+
+/* DictionaryCache  stores the boundaries obtained from a run of dictionary characters.
+ *                 Dictionary boundaries are moved first to this cache, then from here
+ *                 to the main BreakCache, where they may inter-leave with non-dictionary
+ *                 boundaries. The public BreakIterator API always fetches directly
+ *                 from the main BreakCache, not from here.
+ *
+ *                 In common situations, the number of boundaries in a single dictionary run
+ *                 should be quite small, it will be terminated by punctuation, spaces,
+ *                 or any other non-dictionary characters. The main BreakCache may end
+ *                 up with boundaries from multiple dictionary based runs.
+ *
+ *                 The boundaries are stored in a simple ArrayList (vector), with the
+ *                 assumption that they will be accessed sequentially.
+ */                 
+class RuleBasedBreakIterator::DictionaryCache: public UMemory {
+  public:
+     DictionaryCache(RuleBasedBreakIterator *bi, UErrorCode &status);
+     ~DictionaryCache();
+
+     void reset();
+
+     UBool following(int32_t fromPos, int32_t *pos, int32_t *statusIndex);
+     UBool preceding(int32_t fromPos, int32_t *pos, int32_t *statusIndex);
+
+    /**
+     * Populate the cache with the dictionary based boundaries within a region of text.
+     * @param startPos  The start position of a range of text
+     * @param endPos    The end position of a range of text
+     * @param firstRuleStatus The rule status index that applies to the break at startPos
+     * @param otherRuleStatus The rule status index that applies to boundaries other than startPos
+     * @internal
+     */
+    void populateDictionary(int32_t startPos, int32_t endPos,
+                         int32_t firstRuleStatus, int32_t otherRuleStatus);
+
+
+
+    RuleBasedBreakIterator *fBI;
+    
+    UVector32          *fBreaks;                // A vector containing the boundaries.
+    int32_t             fPositionInCache;       // Index in fBreaks of last boundary returned by following()
+                                                //    or preceding(). Optimizes sequential access.
+    int32_t             fStart;                 // Text position of first boundary in cache.
+    int32_t             fLimit;                 // Last boundary in cache. Which is the limit of the
+                                                //    text segment being handled by the dictionary.
+    int32_t             fFirstRuleStatusIndex;  // Rule status info for first boundary.
+    int32_t             fOtherRuleStatusIndex;  // Rule status info for 2nd through last boundaries.
+};
+
+
+/*
+ * class BreakCache
+ *
+ * Cache of break boundary positions and rule status values.
+ * Break iterator API functions, next(), previous(), etc., will use cached results
+ * when possible, and otherwise cache new results as they are obtained.
+ *
+ * Uniformly caches both dictionary and rule based (non-dictionary) boundaries.
+ *
+ * The cache is implemented as a single circular buffer.
+ */
+
+/*
+ * size of the circular cache buffer.
+ */
+
+class RuleBasedBreakIterator::BreakCache: public UMemory {
+  public:
+                BreakCache(RuleBasedBreakIterator *bi, UErrorCode &status);
+    virtual     ~BreakCache();
+    void        reset(int32_t pos = 0, int32_t ruleStatus = 0);
+    void        next() {    if (fBufIdx == fEndBufIdx) {
+                                nextOL();
+                            } else {
+                                fBufIdx = modChunkSize(fBufIdx + 1);
+                                fTextIdx = fBI->fPosition = fBoundaries[fBufIdx];
+                                fBI->fRuleStatusIndex = fStatuses[fBufIdx];
+                            }
+                };
+
+
+    void        nextOL();
+    void        previous(UErrorCode &status);
+
+    // Move the iteration state to the position following the startPosition.
+    // Input position must be pinned to the input length.
+    void        following(int32_t startPosition, UErrorCode &status);
+
+    void        preceding(int32_t startPosition, UErrorCode &status);
+
+    /*
+     * Update the state of the public BreakIterator (fBI) to reflect the
+     * current state of the break iterator cache (this).
+     */
+    int32_t     current();
+
+    /**
+     * Add boundaries to the cache near the specified position.
+     * The given position need not be a boundary itself.
+     * The input position must be within the range of the text, and
+     * on a code point boundary.
+     * If the requested position is a break boundary, leave the iteration
+     * position on it.
+     * If the requested position is not a boundary, leave the iteration
+     * position on the preceding boundary and include both the the
+     * preceding and following boundaries in the cache.
+     * Additional boundaries, either preceding or following, may be added
+     * to the cache as a side effect.
+     *
+     * Return FALSE if the operation failed.
+     */
+    UBool populateNear(int32_t position, UErrorCode &status);
+
+    /**
+     *  Add boundary(s) to the cache following the current last boundary.
+     *  Return FALSE if at the end of the text, and no more boundaries can be added.
+     *  Leave iteration position at the first newly added boundary, or unchanged if no boundary was added.
+     */
+    UBool populateFollowing();
+
+    /**
+     *  Add one or more boundaries to the cache preceding the first currently cached boundary.
+     *  Leave the iteration position on the first added boundary.
+     *  Return false if no boundaries could be added (if at the start of the text.)
+     */
+    UBool populatePreceding(UErrorCode &status);
+
+    enum UpdatePositionValues {
+        RetainCachePosition = 0,
+        UpdateCachePosition = 1
+    };
+
+    /*
+     * Add the boundary following the current position.
+     * The current position can be left as it was, or changed to the newly added boundary,
+     * as specified by the update parameter.
+     */
+    void addFollowing(int32_t position, int32_t ruleStatusIdx, UpdatePositionValues update);
+
+
+    /*
+     * Add the boundary preceding the current position.
+     * The current position can be left as it was, or changed to the newly added boundary,
+     * as specified by the update parameter.
+     */
+    bool addPreceding(int32_t position, int32_t ruleStatusIdx, UpdatePositionValues update);
+
+    /**
+     *  Set the cache position to the specified position, or, if the position
+     *  falls between to cached boundaries, to the preceding boundary.
+     *  Fails if the requested position is outside of the range of boundaries currently held by the cache.
+     *  The startPosition must be on a code point boundary.
+     *
+     *  Return TRUE if successful, FALSE if the specified position is after
+     *  the last cached boundary or before the first.
+     */
+    UBool                   seek(int32_t startPosition);
+
+    void dumpCache();
+
+  private:
+    static inline int32_t   modChunkSize(int index) { return index & (CACHE_SIZE - 1); };
+
+    static constexpr int32_t CACHE_SIZE = 128;
+    static_assert((CACHE_SIZE & (CACHE_SIZE-1)) == 0, "CACHE_SIZE must be power of two.");
+
+    RuleBasedBreakIterator *fBI;
+    int32_t                 fStartBufIdx;
+    int32_t                 fEndBufIdx;    // inclusive
+
+    int32_t                 fTextIdx;
+    int32_t                 fBufIdx;
+
+    int32_t                 fBoundaries[CACHE_SIZE];
+    uint16_t                fStatuses[CACHE_SIZE];
+
+    UVector32               fSideBuffer;
+};
+
+U_NAMESPACE_END
+
+#endif // RBBI_CACHE_H
--- a/icu4c/source/common/rbbidata.cpp
+++ b/icu4c/source/common/rbbidata.cpp
@ -14,7 +14,7 @@
 #include "unicode/utypes.h"
 #include "rbbidata.h"
 #include "rbbirb.h"
-#include "utrie.h"
+#include "utrie2.h"
 #include "udatamem.h"
 #include "cmemory.h"
 #include "cstring.h"
@ -83,11 +83,11 @@ void RBBIDataWrapper::init0() {
    fReverseTable = NULL;
    fSafeFwdTable = NULL;
    fSafeRevTable = NULL;
-    fRuleSource = NULL;
+    fRuleSource   = NULL;
    fRuleStatusTable = NULL;
-    fTrie = NULL;
-    fUDataMem = NULL;
-    fRefCount = 0;
+    fTrie         = NULL;
+    fUDataMem     = NULL;
+    fRefCount     = 0;
    fDontFreeData = TRUE;
 }

@ -118,6 +118,14 @@ void RBBIDataWrapper::init(const RBBIDataHeader *data, UErrorCode &status) {
        fSafeRevTable = (RBBIStateTable *)((char *)data + fHeader->fSRTable);
    }

+    // Rule Compatibility Hacks
+    //    If a rule set includes reverse rules but does not explicitly include safe reverse rules,
+    //    the reverse rules are to be treated as safe reverse rules.
+
+    if (fSafeRevTable == NULL && fReverseTable != NULL) {
+        fSafeRevTable = fReverseTable;
+        fReverseTable = NULL;
+    }

    fTrie = utrie2_openFromSerialized(UTRIE2_16_VALUE_BITS,
                                      (uint8_t *)data + fHeader->fTrie,
--- a/icu4c/source/common/rbbidata.h
+++ b/icu4c/source/common/rbbidata.h
@ -184,11 +184,11 @@ public:
    /* number of int32_t values in the rule status table.   Used to sanity check indexing */
    int32_t             fStatusMaxIdx;

-    UTrie2              *fTrie;
+    UTrie2             *fTrie;

 private:
    u_atomic_int32_t    fRefCount;
-    UDataMemory  *fUDataMem;
+    UDataMemory        *fUDataMem;
    UnicodeString       fRuleString;
    UBool               fDontFreeData;

--- a/icu4c/source/common/rbbirb.cpp
+++ b/icu4c/source/common/rbbirb.cpp
@ -24,16 +24,16 @@
 #include "unicode/uchriter.h"
 #include "unicode/parsepos.h"
 #include "unicode/parseerr.h"
+
 #include "cmemory.h"
 #include "cstring.h"
-
 #include "rbbirb.h"
 #include "rbbinode.h"
-
 #include "rbbiscan.h"
 #include "rbbisetb.h"
 #include "rbbitblb.h"
 #include "rbbidata.h"
+#include "uassert.h"


 U_NAMESPACE_BEGIN
@ -164,8 +164,13 @@ RBBIDataHeader *RBBIRuleBuilder::flattenData() {
    int32_t statusTableSize   = align8(fRuleStatusVals->size() * sizeof(int32_t));
    int32_t rulesSize         = align8((strippedRules.length()+1) * sizeof(UChar));

-    int32_t         totalSize = headerSize + forwardTableSize + reverseTableSize
-                                + safeFwdTableSize + safeRevTableSize 
+    (void)safeFwdTableSize;
+
+    int32_t         totalSize = headerSize
+                                + forwardTableSize 
+                                + /* reverseTableSize */ 0
+                                + /* safeFwdTableSize */ 0
+                                + (safeRevTableSize ? safeRevTableSize : reverseTableSize)
                                + statusTableSize + trieSize + rulesSize;

    RBBIDataHeader  *data     = (RBBIDataHeader *)uprv_malloc(totalSize);
@ -184,16 +189,38 @@ RBBIDataHeader *RBBIRuleBuilder::flattenData() {
    data->fLength           = totalSize;
    data->fCatCount         = fSetBuilder->getNumCharCategories();

+    // Only save the forward table and the safe reverse table,
+    // because these are the only ones used at run-time.
+    //
+    // For the moment, we still build the other tables if they are present in the rule source files,
+    // for backwards compatibility. Old rule files need to work, and this is the simplest approach.
+    //
+    // Additional backwards compatibility consideration: if no safe rules are provided, consider the
+    // reverse rules to actually be the safe reverse rules.
+
    data->fFTable        = headerSize;
    data->fFTableLen     = forwardTableSize;
-    data->fRTable        = data->fFTable  + forwardTableSize;
-    data->fRTableLen     = reverseTableSize;
-    data->fSFTable       = data->fRTable  + reverseTableSize;
-    data->fSFTableLen    = safeFwdTableSize;
-    data->fSRTable       = data->fSFTable + safeFwdTableSize;
-    data->fSRTableLen    = safeRevTableSize;

-    data->fTrie          = data->fSRTable + safeRevTableSize;
+    // Do not save Reverse Table.
+    data->fRTable        = data->fFTable  + forwardTableSize;
+    data->fRTableLen     = 0;
+
+    // Do not save the Safe Forward table.
+    data->fSFTable       = data->fRTable + 0;
+    data->fSFTableLen    = 0;
+
+    data->fSRTable       = data->fSFTable + 0;
+    if (safeRevTableSize > 0) {
+        data->fSRTableLen    = safeRevTableSize;
+    } else if (reverseTableSize > 0) {
+        data->fSRTableLen    = reverseTableSize;
+    } else {
+        U_ASSERT(FALSE);    // Rule build should have failed for lack of a reverse table
+                            // before reaching this point.
+    }
+        
+
+    data->fTrie          = data->fSRTable + data->fSRTableLen;
    data->fTrieLen       = fSetBuilder->getTrieSize();
    data->fStatusTable   = data->fTrie    + trieSize;
    data->fStatusTableLen= statusTableSize;
@ -203,9 +230,14 @@ RBBIDataHeader *RBBIRuleBuilder::flattenData() {
    uprv_memset(data->fReserved, 0, sizeof(data->fReserved));

    fForwardTables->exportTable((uint8_t *)data + data->fFTable);
-    fReverseTables->exportTable((uint8_t *)data + data->fRTable);
-    fSafeFwdTables->exportTable((uint8_t *)data + data->fSFTable);
-    fSafeRevTables->exportTable((uint8_t *)data + data->fSRTable);
+    // fReverseTables->exportTable((uint8_t *)data + data->fRTable);
+    // fSafeFwdTables->exportTable((uint8_t *)data + data->fSFTable);
+    if (safeRevTableSize > 0) {
+        fSafeRevTables->exportTable((uint8_t *)data + data->fSRTable);
+    } else {
+        fReverseTables->exportTable((uint8_t *)data + data->fSRTable);
+    }
+
    fSetBuilder->serializeTrie ((uint8_t *)data + data->fTrie);

    int32_t *ruleStatusTable = (int32_t *)((uint8_t *)data + data->fStatusTable);
--- a/icu4c/source/common/rbbirb.h
+++ b/icu4c/source/common/rbbirb.h
@ -15,6 +15,9 @@
 #define RBBIRB_H

 #include "unicode/utypes.h"
+
+#if !UCONFIG_NO_BREAK_ITERATION
+
 #include "unicode/uobject.h"
 #include "unicode/rbbi.h"
 #include "unicode/uniset.h"
@ -207,6 +210,9 @@ struct RBBISetTableEl {
 #endif

 U_NAMESPACE_END
+
+#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
+
 #endif


--- a/icu4c/source/common/rbbiscan.cpp
+++ b/icu4c/source/common/rbbiscan.cpp
@ -47,6 +47,7 @@
 //
 //------------------------------------------------------------------------------
 static const UChar gRuleSet_rule_char_pattern[]       = {
+ // Characters that may appear as literals in patterns without escaping or quoting.
 //   [    ^      [    \     p     {      Z     }     \     u    0      0    2      0
    0x5b, 0x5e, 0x5b, 0x5c, 0x70, 0x7b, 0x5a, 0x7d, 0x5c, 0x75, 0x30, 0x30, 0x32, 0x30,
 //   -    \      u    0     0     7      f     ]     -     [    \      p
@ -558,6 +559,10 @@ UBool RBBIRuleScanner::doParseActions(int32_t action)
                fRB->fDefaultTree   = &fRB->fSafeRevTree;
            } else if (opt == UNICODE_STRING("lookAheadHardBreak", 18)) {
                fRB->fLookAheadHardBreak = TRUE;
+            } else if (opt == UNICODE_STRING("quoted_literals_only", 20)) {
+                fRuleSets[kRuleSet_rule_char-128].clear();
+            } else if (opt == UNICODE_STRING("unquoted_literals",  17)) {
+                fRuleSets[kRuleSet_rule_char-128].applyPattern(UnicodeString(gRuleSet_rule_char_pattern), *fRB->fStatus);
            } else {
                error(U_BRK_UNRECOGNIZED_OPTION);
            }
--- a/icu4c/source/common/rbbisetb.cpp
+++ b/icu4c/source/common/rbbisetb.cpp
@ -250,12 +250,17 @@ void RBBISetBuilder::build() {
    // Build the Trie table for mapping UChar32 values to the corresponding
    //   range group number
    //
-    fTrie = utrie2_open(0,       //  Initial value for all code points
-                        0,       //  errorValue
+    fTrie = utrie2_open(0,       //  Initial value for all code points.
+                        0,       //  Error value for out-of-range input.
                        fStatus);

-    for (rlRange = fRangeList; rlRange!=0; rlRange=rlRange->fNext) {
-        utrie2_setRange32(fTrie, rlRange->fStartChar, rlRange->fEndChar, rlRange->fNum, TRUE, fStatus);
+    for (rlRange = fRangeList; rlRange!=0 && U_SUCCESS(*fStatus); rlRange=rlRange->fNext) {
+        utrie2_setRange32(fTrie,
+                          rlRange->fStartChar,     // Range start
+                          rlRange->fEndChar,       // Range end (inclusive)
+                          rlRange->fNum,           // value for range
+                          TRUE,                    // Overwrite previously written values
+                          fStatus);
    }
 }

@ -265,7 +270,10 @@ void RBBISetBuilder::build() {
 //  getTrieSize()    Return the size that will be required to serialize the Trie.
 //
 //-----------------------------------------------------------------------------------
-int32_t RBBISetBuilder::getTrieSize() /*const*/ {
+int32_t RBBISetBuilder::getTrieSize()  {
+    if (U_FAILURE(*fStatus)) {
+        return 0;
+    }
    utrie2_freeze(fTrie, UTRIE2_16_VALUE_BITS, fStatus);
    fTrieSize  = utrie2_serialize(fTrie,
                                  NULL,                // Buffer
--- a/icu4c/source/common/rbbisetb.h
+++ b/icu4c/source/common/rbbisetb.h
@ -13,6 +13,9 @@
 #define RBBISETB_H

 #include "unicode/utypes.h"
+
+#if !UCONFIG_NO_BREAK_ITERATION
+
 #include "unicode/uobject.h"
 #include "rbbirb.h"
 #include "utrie2.h"
@ -108,8 +111,8 @@ private:

    RangeDescriptor       *fRangeList;      // Head of the linked list of RangeDescriptors

-    UTrie2               *fTrie;            // The mapping TRIE that is the end result of processing
-    uint32_t              fTrieSize;        //  the Unicode Sets.
+    UTrie2                *fTrie;           // The mapping TRIE that is the end result of processing
+    uint32_t               fTrieSize;       //  the Unicode Sets.

    // Groups correspond to character categories -
    //       groups of ranges that are in the same original UnicodeSets.
@ -128,4 +131,7 @@ private:


 U_NAMESPACE_END
+
+#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
+
 #endif
--- a/icu4c/source/common/ucasemap.cpp
+++ b/icu4c/source/common/ucasemap.cpp
@ -20,8 +20,11 @@

 #include "unicode/utypes.h"
 #include "unicode/brkiter.h"
+#include "unicode/bytestream.h"
 #include "unicode/casemap.h"
 #include "unicode/edits.h"
+#include "unicode/stringoptions.h"
+#include "unicode/stringpiece.h"
 #include "unicode/ubrk.h"
 #include "unicode/uloc.h"
 #include "unicode/ustring.h"
@ -32,6 +35,7 @@
 #include "unicode/utf.h"
 #include "unicode/utf8.h"
 #include "unicode/utf16.h"
+#include "bytesinkutil.h"
 #include "cmemory.h"
 #include "cstring.h"
 #include "uassert.h"
@ -39,27 +43,6 @@
 #include "ucasemap_imp.h"
 #include "ustr_imp.h"

-U_NAMESPACE_BEGIN
-
-namespace {
-
-// TODO: share with UTF-16? inline in ucasemap_imp.h?
-int32_t checkOverflowAndEditsError(int32_t destIndex, int32_t destCapacity,
-                                   Edits *edits, UErrorCode &errorCode) {
-    if (U_SUCCESS(errorCode)) {
-        if (destIndex > destCapacity) {
-            errorCode = U_BUFFER_OVERFLOW_ERROR;
-        } else if (edits != NULL) {
-            edits->copyErrorTo(errorCode);
-        }
-    }
-    return destIndex;
-}
-
-}  // namespace
-
-U_NAMESPACE_END
-
 U_NAMESPACE_USE

 /* UCaseMap service object -------------------------------------------------- */
@ -150,152 +133,39 @@ ucasemap_setOptions(UCaseMap *csm, uint32_t options, UErrorCode *pErrorCode) {

 /* TODO(markus): Move to a new, separate utf8case.cpp file. */

+namespace {
+
 /* append a full case mapping result, see UCASE_MAX_STRING_LENGTH */
-static inline int32_t
-appendResult(uint8_t *dest, int32_t destIndex, int32_t destCapacity,
-             int32_t result, const UChar *s,
-             int32_t cpLength, uint32_t options, icu::Edits *edits) {
-    UChar32 c;
-    int32_t length;
-    UErrorCode errorCode;
+inline UBool
+appendResult(int32_t cpLength, int32_t result, const UChar *s,
+             ByteSink &sink, uint32_t options, icu::Edits *edits, UErrorCode &errorCode) {
+    U_ASSERT(U_SUCCESS(errorCode));

    /* decode the result */
    if(result<0) {
        /* (not) original code point */
        if(edits!=NULL) {
            edits->addUnchanged(cpLength);
-            if(options & U_OMIT_UNCHANGED_TEXT) {
-                return destIndex;
-            }
        }
-        c=~result;
-        if(destIndex<destCapacity && c<=0x7f) {  // ASCII slightly-fastpath
-            dest[destIndex++]=(uint8_t)c;
-            return destIndex;
+        if((options & U_OMIT_UNCHANGED_TEXT) == 0) {
+            ByteSinkUtil::appendCodePoint(cpLength, ~result, sink);
        }
-        length=cpLength;
    } else {
        if(result<=UCASE_MAX_STRING_LENGTH) {
            // string: "result" is the UTF-16 length
-            if(result==0) {
-                length=0;
-            } else {
-                errorCode=U_ZERO_ERROR;
-                if(destIndex<destCapacity) {
-                    u_strToUTF8((char *)(dest+destIndex), destCapacity-destIndex, &length,
-                                s, result, &errorCode);
-                } else {
-                    u_strToUTF8(NULL, 0, &length, s, result, &errorCode);
-                }
-                if(U_FAILURE(errorCode) && errorCode != U_BUFFER_OVERFLOW_ERROR) {
-                    return -1;
-                }
-                if(length>(INT32_MAX-destIndex)) {
-                    return -1;  // integer overflow
-                }
-            }
-            if(edits!=NULL) {
-                edits->addReplace(cpLength, length);
-            }
-            // We might have an overflow, but we know the actual length.
-            return destIndex+length;
-        } else if(destIndex<destCapacity && result<=0x7f) {  // ASCII slightly-fastpath
-            dest[destIndex++]=(uint8_t)result;
-            if(edits!=NULL) {
-                edits->addReplace(cpLength, 1);
-            }
-            return destIndex;
+            return ByteSinkUtil::appendChange(cpLength, s, result, sink, edits, errorCode);
        } else {
-            c=result;
-            length=U8_LENGTH(c);
-            if(edits!=NULL) {
-                edits->addReplace(cpLength, length);
-            }
+            ByteSinkUtil::appendCodePoint(cpLength, result, sink, edits);
        }
    }
-    // c>=0 single code point
-    if(length>(INT32_MAX-destIndex)) {
-        return -1;  // integer overflow
-    }
-
-    if(destIndex<destCapacity) {
-        /* append the result */
-        UBool isError=FALSE;
-        U8_APPEND(dest, destIndex, destCapacity, c, isError);
-        if(isError) {
-            /* overflow, nothing written */
-            destIndex+=length;
-        }
-    } else {
-        /* preflight */
-        destIndex+=length;
-    }
-    return destIndex;
-}
-
-static inline int32_t
-appendASCII(uint8_t *dest, int32_t destIndex, int32_t destCapacity, uint8_t c) {
-    if(destIndex<destCapacity) {
-        dest[destIndex]=c;
-    } else if(destIndex==INT32_MAX) {
-        return -1;  // integer overflow
-    }
-    return destIndex+1;
+    return TRUE;
 }

 // See unicode/utf8.h U8_APPEND_UNSAFE().
-static inline uint8_t getTwoByteLead(UChar32 c) { return (uint8_t)((c >> 6) | 0xc0); }
-static inline uint8_t getTwoByteTrail(UChar32 c) { return (uint8_t)((c & 0x3f) | 0x80); }
+inline uint8_t getTwoByteLead(UChar32 c) { return (uint8_t)((c >> 6) | 0xc0); }
+inline uint8_t getTwoByteTrail(UChar32 c) { return (uint8_t)((c & 0x3f) | 0x80); }

-static inline int32_t
-appendTwoBytes(uint8_t *dest, int32_t destIndex, int32_t destCapacity, UChar32 c) {
-    U_ASSERT(0x370 <= c && c <= 0x3ff);  // 2-byte UTF-8, main Greek block
-    if(2>(INT32_MAX-destIndex)) {
-        return -1;  // integer overflow
-    }
-    int32_t limit=destIndex+2;
-    if(limit<=destCapacity) {
-        dest+=destIndex;
-        dest[0]=getTwoByteLead(c);
-        dest[1]=getTwoByteTrail(c);
-    }
-    return limit;
-}
-
-static inline int32_t
-appendTwoBytes(uint8_t *dest, int32_t destIndex, int32_t destCapacity, const char *s) {
-    if(2>(INT32_MAX-destIndex)) {
-        return -1;  // integer overflow
-    }
-    int32_t limit=destIndex+2;
-    if(limit<=destCapacity) {
-        dest+=destIndex;
-        dest[0]=(uint8_t)s[0];
-        dest[1]=(uint8_t)s[1];
-    }
-    return limit;
-}
-
-static inline int32_t
-appendUnchanged(uint8_t *dest, int32_t destIndex, int32_t destCapacity,
-                const uint8_t *s, int32_t length, uint32_t options, icu::Edits *edits) {
-    if(length>0) {
-        if(edits!=NULL) {
-            edits->addUnchanged(length);
-            if(options & U_OMIT_UNCHANGED_TEXT) {
-                return destIndex;
-            }
-        }
-        if(length>(INT32_MAX-destIndex)) {
-            return -1;  // integer overflow
-        }
-        if((destIndex+length)<=destCapacity) {
-            uprv_memcpy(dest+destIndex, s, length);
-        }
-        destIndex+=length;
-    }
-    return destIndex;
-}
+}  // namespace

 static UChar32 U_CALLCONV
 utf8_caseContextIterator(void *context, int8_t dir) {
@ -333,17 +203,15 @@ utf8_caseContextIterator(void *context, int8_t dir) {
 * Case-maps [srcStart..srcLimit[ but takes
 * context [0..srcLength[ into account.
 */
-static int32_t
+static void
 _caseMap(int32_t caseLocale, uint32_t options, UCaseMapFull *map,
-         uint8_t *dest, int32_t destCapacity,
         const uint8_t *src, UCaseContext *csc,
         int32_t srcStart, int32_t srcLimit,
-         icu::Edits *edits,
+         icu::ByteSink &sink, icu::Edits *edits,
         UErrorCode &errorCode) {
    /* case mapping loop */
    int32_t srcIndex=srcStart;
-    int32_t destIndex=0;
-    while(srcIndex<srcLimit) {
+    while (U_SUCCESS(errorCode) && srcIndex<srcLimit) {
        int32_t cpStart;
        csc->cpStart=cpStart=srcIndex;
        UChar32 c;
@ -351,45 +219,32 @@ _caseMap(int32_t caseLocale, uint32_t options, UCaseMapFull *map,
        csc->cpLimit=srcIndex;
        if(c<0) {
            // Malformed UTF-8.
-            destIndex=appendUnchanged(dest, destIndex, destCapacity,
-                                      src+cpStart, srcIndex-cpStart, options, edits);
-            if(destIndex<0) {
-                errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
-                return 0;
-            }
-            continue;
-        }
-        const UChar *s;
-        c=map(c, utf8_caseContextIterator, csc, &s, caseLocale);
-        destIndex = appendResult(dest, destIndex, destCapacity, c, s,
-                                 srcIndex - cpStart, options, edits);
-        if (destIndex < 0) {
-            errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
-            return 0;
+            ByteSinkUtil::appendUnchanged(src+cpStart, srcIndex-cpStart,
+                                          sink, options, edits, errorCode);
+        } else {
+            const UChar *s;
+            c=map(c, utf8_caseContextIterator, csc, &s, caseLocale);
+            appendResult(srcIndex - cpStart, c, s, sink, options, edits, errorCode);
        }
    }
-
-    return destIndex;
 }

 #if !UCONFIG_NO_BREAK_ITERATION

-U_CFUNC int32_t U_CALLCONV
+U_CFUNC void U_CALLCONV
 ucasemap_internalUTF8ToTitle(
        int32_t caseLocale, uint32_t options, BreakIterator *iter,
-        uint8_t *dest, int32_t destCapacity,
        const uint8_t *src, int32_t srcLength,
-        icu::Edits *edits,
+        ByteSink &sink, icu::Edits *edits,
        UErrorCode &errorCode) {
    if (!ustrcase_checkTitleAdjustmentOptions(options, errorCode)) {
-        return 0;
+        return;
    }

    /* set up local variables */
    UCaseContext csc=UCASECONTEXT_INITIALIZER;
    csc.p=(void *)src;
    csc.limit=srcLength;
-    int32_t destIndex=0;
    int32_t prev=0;
    UBool isFirstIndex=TRUE;

@ -434,11 +289,9 @@ ucasemap_internalUTF8ToTitle(
                    U8_NEXT(src, titleLimit, index, c);
                }
                if (prev < titleStart) {
-                    destIndex=appendUnchanged(dest, destIndex, destCapacity,
-                                              src+prev, titleStart-prev, options, edits);
-                    if(destIndex<0) {
-                        errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
-                        return 0;
+                    if (!ByteSinkUtil::appendUnchanged(src+prev, titleStart-prev,
+                                                       sink, options, edits, errorCode)) {
+                        return;
                    }
                }
            }
@ -450,16 +303,15 @@ ucasemap_internalUTF8ToTitle(
                    csc.cpLimit=titleLimit;
                    const UChar *s;
                    c=ucase_toFullTitle(c, utf8_caseContextIterator, &csc, &s, caseLocale);
-                    destIndex=appendResult(dest, destIndex, destCapacity, c, s,
-                                           titleLimit-titleStart, options, edits);
+                    if (!appendResult(titleLimit-titleStart, c, s, sink, options, edits, errorCode)) {
+                        return;
+                    }
                } else {
                    // Malformed UTF-8.
-                    destIndex=appendUnchanged(dest, destIndex, destCapacity,
-                                              src+titleStart, titleLimit-titleStart, options, edits);
-                }
-                if(destIndex<0) {
-                    errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
-                    return 0;
+                    if (!ByteSinkUtil::appendUnchanged(src+titleStart, titleLimit-titleStart,
+                                                       sink, options, edits, errorCode)) {
+                        return;
+                    }
                }

                /* Special case Dutch IJ titlecasing */
@ -467,22 +319,13 @@ ucasemap_internalUTF8ToTitle(
                        caseLocale == UCASE_LOC_DUTCH &&
                        (src[titleStart] == 0x0049 || src[titleStart] == 0x0069)) {
                    if (src[titleStart+1] == 0x006A) {
-                        destIndex=appendASCII(dest, destIndex, destCapacity, 0x004A);
-                        if(destIndex<0) {
-                            errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
-                            return 0;
-                        }
-                        if(edits!=NULL) {
-                            edits->addReplace(1, 1);
-                        }
+                        ByteSinkUtil::appendCodePoint(1, 0x004A, sink, edits);
                        titleLimit++;
                    } else if (src[titleStart+1] == 0x004A) {
                        // Keep the capital J from getting lowercased.
-                        destIndex=appendUnchanged(dest, destIndex, destCapacity,
-                                                  src+titleStart+1, 1, options, edits);
-                        if(destIndex<0) {
-                            errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
-                            return 0;
+                        if (!ByteSinkUtil::appendUnchanged(src+titleStart+1, 1,
+                                                           sink, options, edits, errorCode)) {
+                            return;
                        }
                        titleLimit++;
                    }
@ -492,26 +335,18 @@ ucasemap_internalUTF8ToTitle(
                if(titleLimit<index) {
                    if((options&U_TITLECASE_NO_LOWERCASE)==0) {
                        /* Normal operation: Lowercase the rest of the word. */
-                        destIndex+=
-                            _caseMap(
-                                caseLocale, options, ucase_toFullLower,
-                                dest+destIndex, destCapacity-destIndex,
-                                src, &csc,
-                                titleLimit, index,
-                                edits, errorCode);
-                        if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
-                            errorCode=U_ZERO_ERROR;
-                        }
+                        _caseMap(caseLocale, options, ucase_toFullLower,
+                                 src, &csc,
+                                 titleLimit, index,
+                                 sink, edits, errorCode);
                        if(U_FAILURE(errorCode)) {
-                            return destIndex;
+                            return;
                        }
                    } else {
                        /* Optionally just copy the rest of the word unchanged. */
-                        destIndex=appendUnchanged(dest, destIndex, destCapacity,
-                                                  src+titleLimit, index-titleLimit, options, edits);
-                        if(destIndex<0) {
-                            errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
-                            return 0;
+                        if (!ByteSinkUtil::appendUnchanged(src+titleLimit, index-titleLimit,
+                                                           sink, options, edits, errorCode)) {
+                            return;
                        }
                    }
                }
@ -520,8 +355,6 @@ ucasemap_internalUTF8ToTitle(

        prev=index;
    }
-
-    return checkOverflowAndEditsError(destIndex, destCapacity, edits, errorCode);
 }

 #endif
@ -546,12 +379,10 @@ UBool isFollowedByCasedLetter(const uint8_t *s, int32_t i, int32_t length) {
 }

 // Keep this consistent with the UTF-16 version in ustrcase.cpp and the Java version in CaseMap.java.
-int32_t toUpper(uint32_t options,
-                uint8_t *dest, int32_t destCapacity,
-                const uint8_t *src, int32_t srcLength,
-                Edits *edits,
-                UErrorCode &errorCode) {
-    int32_t destIndex=0;
+void toUpper(uint32_t options,
+             const uint8_t *src, int32_t srcLength,
+             ByteSink &sink, Edits *edits,
+             UErrorCode &errorCode) {
    uint32_t state = 0;
    for (int32_t i = 0; i < srcLength;) {
        int32_t nextIndex = i;
@ -627,8 +458,10 @@ int32_t toUpper(uint32_t options,
                }
            }

-            UBool change = TRUE;
-            if (edits != NULL) {
+            UBool change;
+            if (edits == nullptr && (options & U_OMIT_UNCHANGED_TEXT) == 0) {
+                change = TRUE;  // common, simple usage
+            } else {
                // Find out first whether we are changing the text.
                U_ASSERT(0x370 <= upper && upper <= 0x3ff);  // 2-byte UTF-8, main Greek block
                change = (i + 2) > nextIndex ||
@ -664,143 +497,141 @@ int32_t toUpper(uint32_t options,
            }

            if (change) {
-                destIndex=appendTwoBytes(dest, destIndex, destCapacity, upper);
-                if (destIndex >= 0 && (data & HAS_EITHER_DIALYTIKA) != 0) {
-                    destIndex=appendTwoBytes(dest, destIndex, destCapacity, u8"\u0308");  // restore or add a dialytika
+                ByteSinkUtil::appendTwoBytes(upper, sink);
+                if ((data & HAS_EITHER_DIALYTIKA) != 0) {
+                    sink.Append(u8"\u0308", 2);  // restore or add a dialytika
                }
-                if (destIndex >= 0 && addTonos) {
-                    destIndex=appendTwoBytes(dest, destIndex, destCapacity, u8"\u0301");
+                if (addTonos) {
+                    sink.Append(u8"\u0301", 2);
                }
-                while (destIndex >= 0 && numYpogegrammeni > 0) {
-                    destIndex=appendTwoBytes(dest, destIndex, destCapacity, u8"\u0399");
+                while (numYpogegrammeni > 0) {
+                    sink.Append(u8"\u0399", 2);
                    --numYpogegrammeni;
                }
-                if(destIndex<0) {
-                    errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
-                    return 0;
-                }
            }
        } else if(c>=0) {
            const UChar *s;
            c=ucase_toFullUpper(c, NULL, NULL, &s, UCASE_LOC_GREEK);
-            destIndex = appendResult(dest, destIndex, destCapacity, c, s,
-                                     nextIndex - i, options, edits);
-            if (destIndex < 0) {
-                errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
-                return 0;
+            if (!appendResult(nextIndex - i, c, s, sink, options, edits, errorCode)) {
+                return;
            }
        } else {
            // Malformed UTF-8.
-            destIndex=appendUnchanged(dest, destIndex, destCapacity,
-                                      src+i, nextIndex-i, options, edits);
-            if(destIndex<0) {
-                errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
-                return 0;
+            if (!ByteSinkUtil::appendUnchanged(src+i, nextIndex-i,
+                                               sink, options, edits, errorCode)) {
+                return;
            }
        }
        i = nextIndex;
        state = nextState;
    }
-
-    return destIndex;
 }

 }  // namespace GreekUpper
 U_NAMESPACE_END

-static int32_t U_CALLCONV
+static void U_CALLCONV
 ucasemap_internalUTF8ToLower(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_UNUSED
-                             uint8_t *dest, int32_t destCapacity,
                             const uint8_t *src, int32_t srcLength,
-                             icu::Edits *edits,
+                             icu::ByteSink &sink, icu::Edits *edits,
                             UErrorCode &errorCode) {
    UCaseContext csc=UCASECONTEXT_INITIALIZER;
    csc.p=(void *)src;
    csc.limit=srcLength;
-    int32_t destIndex = _caseMap(
+    _caseMap(
        caseLocale, options, ucase_toFullLower,
-        dest, destCapacity,
        src, &csc, 0, srcLength,
-        edits, errorCode);
-    return checkOverflowAndEditsError(destIndex, destCapacity, edits, errorCode);
+        sink, edits, errorCode);
 }

-static int32_t U_CALLCONV
+static void U_CALLCONV
 ucasemap_internalUTF8ToUpper(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_UNUSED
-                             uint8_t *dest, int32_t destCapacity,
                             const uint8_t *src, int32_t srcLength,
-                             icu::Edits *edits,
+                             icu::ByteSink &sink, icu::Edits *edits,
                             UErrorCode &errorCode) {
-    int32_t destIndex;
    if (caseLocale == UCASE_LOC_GREEK) {
-        destIndex = GreekUpper::toUpper(options, dest, destCapacity,
-                                        src, srcLength, edits, errorCode);
+        GreekUpper::toUpper(options, src, srcLength, sink, edits, errorCode);
    } else {
        UCaseContext csc=UCASECONTEXT_INITIALIZER;
        csc.p=(void *)src;
        csc.limit=srcLength;
-        destIndex = _caseMap(
+        _caseMap(
            caseLocale, options, ucase_toFullUpper,
-            dest, destCapacity,
            src, &csc, 0, srcLength,
-            edits, errorCode);
+            sink, edits, errorCode);
    }
-    return checkOverflowAndEditsError(destIndex, destCapacity, edits, errorCode);
 }

-static int32_t U_CALLCONV
+static void U_CALLCONV
 ucasemap_internalUTF8Fold(int32_t /* caseLocale */, uint32_t options, UCASEMAP_BREAK_ITERATOR_UNUSED
-                          uint8_t *dest, int32_t destCapacity,
                          const uint8_t *src, int32_t srcLength,
-                          icu::Edits *edits,
+                          icu::ByteSink &sink, icu::Edits *edits,
                          UErrorCode &errorCode) {
    /* case mapping loop */
    int32_t srcIndex = 0;
-    int32_t destIndex = 0;
-    while (srcIndex < srcLength) {
+    while (U_SUCCESS(errorCode) && srcIndex < srcLength) {
        int32_t cpStart = srcIndex;
        UChar32 c;
        U8_NEXT(src, srcIndex, srcLength, c);
        if(c<0) {
            // Malformed UTF-8.
-            destIndex=appendUnchanged(dest, destIndex, destCapacity,
-                                      src+cpStart, srcIndex-cpStart, options, edits);
-            if(destIndex<0) {
-                errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
-                return 0;
-            }
-            continue;
-        }
-        const UChar *s;
-        c = ucase_toFullFolding(c, &s, options);
-        destIndex = appendResult(dest, destIndex, destCapacity, c, s,
-                                 srcIndex - cpStart, options, edits);
-        if (destIndex < 0) {
-            errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
-            return 0;
+            ByteSinkUtil::appendUnchanged(src+cpStart, srcIndex-cpStart,
+                                          sink, options, edits, errorCode);
+        } else {
+            const UChar *s;
+            c = ucase_toFullFolding(c, &s, options);
+            appendResult(srcIndex - cpStart, c, s, sink, options, edits, errorCode);
        }
    }
-
-    return checkOverflowAndEditsError(destIndex, destCapacity, edits, errorCode);
 }

-U_CFUNC int32_t
+void
 ucasemap_mapUTF8(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM
-                 uint8_t *dest, int32_t destCapacity,
-                 const uint8_t *src, int32_t srcLength,
+                 const char *src, int32_t srcLength,
+                 UTF8CaseMapper *stringCaseMapper,
+                 icu::ByteSink &sink, icu::Edits *edits,
+                 UErrorCode &errorCode) {
+    /* check argument values */
+    if (U_FAILURE(errorCode)) {
+        return;
+    }
+    if ((src == nullptr && srcLength != 0) || srcLength < -1) {
+        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
+        return;
+    }
+
+    // Get the string length.
+    if (srcLength == -1) {
+        srcLength = (int32_t)uprv_strlen((const char *)src);
+    }
+
+    if (edits != nullptr && (options & U_EDITS_NO_RESET) == 0) {
+        edits->reset();
+    }
+    stringCaseMapper(caseLocale, options, UCASEMAP_BREAK_ITERATOR
+                     (const uint8_t *)src, srcLength, sink, edits, errorCode);
+    sink.Flush();
+    if (U_SUCCESS(errorCode)) {
+        if (edits != nullptr) {
+            edits->copyErrorTo(errorCode);
+        }
+    }
+}
+
+int32_t
+ucasemap_mapUTF8(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM
+                 char *dest, int32_t destCapacity,
+                 const char *src, int32_t srcLength,
                 UTF8CaseMapper *stringCaseMapper,
                 icu::Edits *edits,
                 UErrorCode &errorCode) {
-    int32_t destLength;
-
    /* check argument values */
    if(U_FAILURE(errorCode)) {
        return 0;
    }
    if( destCapacity<0 ||
        (dest==NULL && destCapacity>0) ||
-        src==NULL ||
-        srcLength<-1
+        (src==NULL && srcLength!=0) || srcLength<-1
    ) {
        errorCode=U_ILLEGAL_ARGUMENT_ERROR;
        return 0;
@ -820,12 +651,21 @@ ucasemap_mapUTF8(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_P
        return 0;
    }

+    CheckedArrayByteSink sink(dest, destCapacity);
    if (edits != nullptr && (options & U_EDITS_NO_RESET) == 0) {
        edits->reset();
    }
-    destLength=stringCaseMapper(caseLocale, options, UCASEMAP_BREAK_ITERATOR
-                                dest, destCapacity, src, srcLength, edits, errorCode);
-    return u_terminateChars((char *)dest, destCapacity, destLength, &errorCode);
+    stringCaseMapper(caseLocale, options, UCASEMAP_BREAK_ITERATOR
+                     (const uint8_t *)src, srcLength, sink, edits, errorCode);
+    sink.Flush();
+    if (U_SUCCESS(errorCode)) {
+        if (sink.Overflowed()) {
+            errorCode = U_BUFFER_OVERFLOW_ERROR;
+        } else if (edits != nullptr) {
+            edits->copyErrorTo(errorCode);
+        }
+    }
+    return u_terminateChars(dest, destCapacity, sink.NumberOfBytesAppended(), &errorCode);
 }

 /* public API functions */
@ -837,8 +677,8 @@ ucasemap_utf8ToLower(const UCaseMap *csm,
                     UErrorCode *pErrorCode) {
    return ucasemap_mapUTF8(
        csm->caseLocale, csm->options, UCASEMAP_BREAK_ITERATOR_NULL
-        (uint8_t *)dest, destCapacity,
-        (const uint8_t *)src, srcLength,
+        dest, destCapacity,
+        src, srcLength,
        ucasemap_internalUTF8ToLower, NULL, *pErrorCode);
 }

@ -849,8 +689,8 @@ ucasemap_utf8ToUpper(const UCaseMap *csm,
                     UErrorCode *pErrorCode) {
    return ucasemap_mapUTF8(
        csm->caseLocale, csm->options, UCASEMAP_BREAK_ITERATOR_NULL
-        (uint8_t *)dest, destCapacity,
-        (const uint8_t *)src, srcLength,
+        dest, destCapacity,
+        src, srcLength,
        ucasemap_internalUTF8ToUpper, NULL, *pErrorCode);
 }

@ -861,13 +701,43 @@ ucasemap_utf8FoldCase(const UCaseMap *csm,
                      UErrorCode *pErrorCode) {
    return ucasemap_mapUTF8(
        UCASE_LOC_ROOT, csm->options, UCASEMAP_BREAK_ITERATOR_NULL
-        (uint8_t *)dest, destCapacity,
-        (const uint8_t *)src, srcLength,
+        dest, destCapacity,
+        src, srcLength,
        ucasemap_internalUTF8Fold, NULL, *pErrorCode);
 }

 U_NAMESPACE_BEGIN

+void CaseMap::utf8ToLower(
+        const char *locale, uint32_t options,
+        StringPiece src, ByteSink &sink, Edits *edits,
+        UErrorCode &errorCode) {
+    ucasemap_mapUTF8(
+        ustrcase_getCaseLocale(locale), options, UCASEMAP_BREAK_ITERATOR_NULL
+        src.data(), src.length(),
+        ucasemap_internalUTF8ToLower, sink, edits, errorCode);
+}
+
+void CaseMap::utf8ToUpper(
+        const char *locale, uint32_t options,
+        StringPiece src, ByteSink &sink, Edits *edits,
+        UErrorCode &errorCode) {
+    ucasemap_mapUTF8(
+        ustrcase_getCaseLocale(locale), options, UCASEMAP_BREAK_ITERATOR_NULL
+        src.data(), src.length(),
+        ucasemap_internalUTF8ToUpper, sink, edits, errorCode);
+}
+
+void CaseMap::utf8Fold(
+        uint32_t options,
+        StringPiece src, ByteSink &sink, Edits *edits,
+        UErrorCode &errorCode) {
+    ucasemap_mapUTF8(
+        UCASE_LOC_ROOT, options, UCASEMAP_BREAK_ITERATOR_NULL
+        src.data(), src.length(),
+        ucasemap_internalUTF8Fold, sink, edits, errorCode);
+}
+
 int32_t CaseMap::utf8ToLower(
        const char *locale, uint32_t options,
        const char *src, int32_t srcLength,
@ -875,8 +745,8 @@ int32_t CaseMap::utf8ToLower(
        UErrorCode &errorCode) {
    return ucasemap_mapUTF8(
        ustrcase_getCaseLocale(locale), options, UCASEMAP_BREAK_ITERATOR_NULL
-        (uint8_t *)dest, destCapacity,
-        (const uint8_t *)src, srcLength,
+        dest, destCapacity,
+        src, srcLength,
        ucasemap_internalUTF8ToLower, edits, errorCode);
 }

@ -887,8 +757,8 @@ int32_t CaseMap::utf8ToUpper(
        UErrorCode &errorCode) {
    return ucasemap_mapUTF8(
        ustrcase_getCaseLocale(locale), options, UCASEMAP_BREAK_ITERATOR_NULL
-        (uint8_t *)dest, destCapacity,
-        (const uint8_t *)src, srcLength,
+        dest, destCapacity,
+        src, srcLength,
        ucasemap_internalUTF8ToUpper, edits, errorCode);
 }

@ -899,8 +769,8 @@ int32_t CaseMap::utf8Fold(
        UErrorCode &errorCode) {
    return ucasemap_mapUTF8(
        UCASE_LOC_ROOT, options, UCASEMAP_BREAK_ITERATOR_NULL
-        (uint8_t *)dest, destCapacity,
-        (const uint8_t *)src, srcLength,
+        dest, destCapacity,
+        src, srcLength,
        ucasemap_internalUTF8Fold, edits, errorCode);
 }

--- a/icu4c/source/common/ucasemap_imp.h
+++ b/icu4c/source/common/ucasemap_imp.h
@ -73,6 +73,8 @@ uprv_haveProperties(UErrorCode *pErrorCode);

 U_NAMESPACE_BEGIN

+class ByteSink;
+
 /** Returns TRUE if the options are valid. Otherwise FALSE, and sets an error. */
 inline UBool ustrcase_checkTitleAdjustmentOptions(uint32_t options, UErrorCode &errorCode) {
    if (U_FAILURE(errorCode)) { return FALSE; }
@ -207,39 +209,43 @@ ustrcase_mapWithOverlap(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITE
 * UTF-8 version of UStringCaseMapper.
 * All error checking must be done.
 * The UCaseMap must be fully initialized, with locale and/or iter set as needed.
- * src and dest must not overlap.
 */
-typedef int32_t U_CALLCONV
+typedef void U_CALLCONV
 UTF8CaseMapper(int32_t caseLocale, uint32_t options,
 #if !UCONFIG_NO_BREAK_ITERATION
               icu::BreakIterator *iter,
 #endif
-               uint8_t *dest, int32_t destCapacity,
               const uint8_t *src, int32_t srcLength,
-               icu::Edits *edits,
+               icu::ByteSink &sink, icu::Edits *edits,
               UErrorCode &errorCode);

 #if !UCONFIG_NO_BREAK_ITERATION

 /** Implements UTF8CaseMapper. */
-U_CFUNC int32_t U_CALLCONV
+U_CFUNC void U_CALLCONV
 ucasemap_internalUTF8ToTitle(int32_t caseLocale, uint32_t options,
        icu::BreakIterator *iter,
-        uint8_t *dest, int32_t destCapacity,
        const uint8_t *src, int32_t srcLength,
-        icu::Edits *edits,
+        icu::ByteSink &sink, icu::Edits *edits,
        UErrorCode &errorCode);

 #endif

+void
+ucasemap_mapUTF8(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM
+                 const char *src, int32_t srcLength,
+                 UTF8CaseMapper *stringCaseMapper,
+                 icu::ByteSink &sink, icu::Edits *edits,
+                 UErrorCode &errorCode);
+
 /**
 * Implements argument checking and buffer handling
 * for UTF-8 string case mapping as a common function.
 */
-U_CFUNC int32_t
+int32_t
 ucasemap_mapUTF8(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM
-                 uint8_t *dest, int32_t destCapacity,
-                 const uint8_t *src, int32_t srcLength,
+                 char *dest, int32_t destCapacity,
+                 const char *src, int32_t srcLength,
                 UTF8CaseMapper *stringCaseMapper,
                 icu::Edits *edits,
                 UErrorCode &errorCode);
--- a/icu4c/source/common/ucasemap_titlecase_brkiter.cpp
+++ b/icu4c/source/common/ucasemap_titlecase_brkiter.cpp
@ -31,6 +31,29 @@

 U_NAMESPACE_BEGIN

+void CaseMap::utf8ToTitle(
+        const char *locale, uint32_t options, BreakIterator *iter,
+        StringPiece src, ByteSink &sink, Edits *edits,
+        UErrorCode &errorCode) {
+    if (U_FAILURE(errorCode)) {
+        return;
+    }
+    UText utext = UTEXT_INITIALIZER;
+    utext_openUTF8(&utext, src.data(), src.length(), &errorCode);
+    LocalPointer<BreakIterator> ownedIter;
+    iter = ustrcase_getTitleBreakIterator(nullptr, locale, options, iter, ownedIter, errorCode);
+    if (iter == nullptr) {
+        utext_close(&utext);
+        return;
+    }
+    iter->setText(&utext, errorCode);
+    ucasemap_mapUTF8(
+        ustrcase_getCaseLocale(locale), options, iter,
+        src.data(), src.length(),
+        ucasemap_internalUTF8ToTitle, sink, edits, errorCode);
+    utext_close(&utext);
+}
+
 int32_t CaseMap::utf8ToTitle(
        const char *locale, uint32_t options, BreakIterator *iter,
        const char *src, int32_t srcLength,
@ -50,8 +73,8 @@ int32_t CaseMap::utf8ToTitle(
    iter->setText(&utext, errorCode);
    int32_t length=ucasemap_mapUTF8(
        ustrcase_getCaseLocale(locale), options, iter,
-        (uint8_t *)dest, destCapacity,
-        (const uint8_t *)src, srcLength,
+        dest, destCapacity,
+        src, srcLength,
        ucasemap_internalUTF8ToTitle, edits, errorCode);
    utext_close(&utext);
    return length;
@ -101,8 +124,8 @@ ucasemap_utf8ToTitle(UCaseMap *csm,
    csm->iter->setText(&utext, *pErrorCode);
    int32_t length=ucasemap_mapUTF8(
            csm->caseLocale, csm->options, csm->iter,
-            (uint8_t *)dest, destCapacity,
-            (const uint8_t *)src, srcLength,
+            dest, destCapacity,
+            src, srcLength,
            ucasemap_internalUTF8ToTitle, NULL, *pErrorCode);
    utext_close(&utext);
    return length;
--- a/icu4c/source/common/ucnv_u16.cpp
+++ b/icu4c/source/common/ucnv_u16.cpp
@ -1323,9 +1323,17 @@ _UTF16GetName(const UConverter *cnv) {
 U_CDECL_END
 extern const UConverterSharedData _UTF16Data;

-#define IS_UTF16BE(cnv) ((cnv)->sharedData==&_UTF16BEData)
-#define IS_UTF16LE(cnv) ((cnv)->sharedData==&_UTF16LEData)
-#define IS_UTF16(cnv) ((cnv)->sharedData==&_UTF16Data || (cnv)->sharedData==&_UTF16v2Data)
+static inline bool IS_UTF16BE(const UConverter *cnv) {
+    return ((cnv)->sharedData == &_UTF16BEData);
+}
+
+static inline bool IS_UTF16LE(const UConverter *cnv) {
+    return ((cnv)->sharedData == &_UTF16LEData);
+}
+
+static inline bool IS_UTF16(const UConverter *cnv) {
+    return ((cnv)->sharedData==&_UTF16Data) || ((cnv)->sharedData == &_UTF16v2Data);
+}

 U_CDECL_BEGIN
 static void U_CALLCONV
--- a/icu4c/source/common/ucnv_u8.cpp
+++ b/icu4c/source/common/ucnv_u8.cpp
@ -31,6 +31,7 @@
 #include "ucnv_bld.h"
 #include "ucnv_cnv.h"
 #include "cmemory.h"
+#include "ustr_imp.h"

 /* Prototypes --------------------------------------------------------------- */

@ -44,51 +45,13 @@ U_CFUNC void ucnv_fromUnicode_UTF8_OFFSETS_LOGIC(UConverterFromUnicodeArgs *args

 /* UTF-8 -------------------------------------------------------------------- */

-/* UTF-8 Conversion DATA
- *   for more information see Unicode Standard 2.0, Transformation Formats Appendix A-9
- */
-/*static const uint32_t REPLACEMENT_CHARACTER = 0x0000FFFD;*/
 #define MAXIMUM_UCS2            0x0000FFFF
-#define MAXIMUM_UTF             0x0010FFFF
-#define MAXIMUM_UCS4            0x7FFFFFFF
-#define HALF_SHIFT              10
-#define HALF_BASE               0x0010000
-#define HALF_MASK               0x3FF
-#define SURROGATE_HIGH_START    0xD800
-#define SURROGATE_HIGH_END      0xDBFF
-#define SURROGATE_LOW_START     0xDC00
-#define SURROGATE_LOW_END       0xDFFF

-/* -SURROGATE_LOW_START + HALF_BASE */
-#define SURROGATE_LOW_BASE      9216
-
-static const uint32_t offsetsFromUTF8[7] = {0,
+static const uint32_t offsetsFromUTF8[5] = {0,
  (uint32_t) 0x00000000, (uint32_t) 0x00003080, (uint32_t) 0x000E2080,
-  (uint32_t) 0x03C82080, (uint32_t) 0xFA082080, (uint32_t) 0x82082080
+  (uint32_t) 0x03C82080
 };

-/* END OF UTF-8 Conversion DATA */
-
-static const int8_t bytesFromUTF8[256] = {
-  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
-  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 0, 0
-};
-
-/*
- * Starting with Unicode 3.0.1:
- * UTF-8 byte sequences of length N _must_ encode code points of or above utf8_minChar32[N];
- * byte sequences with more than 4 bytes are illegal in UTF-8,
- * which is tested with impossible values for them
- */
-static const uint32_t
-utf8_minChar32[7]={ 0, 0, 0x80, 0x800, 0x10000, 0xffffffff, 0xffffffff };
-
 static UBool hasCESU8Data(const UConverter *cnv)
 {
 #if UCONFIG_ONLY_HTML_CONVERSION
@ -127,7 +90,7 @@ static void  U_CALLCONV ucnv_toUnicode_UTF8 (UConverterToUnicodeArgs * args,
    while (mySource < sourceLimit && myTarget < targetLimit)
    {
        ch = *(mySource++);
-        if (ch < 0x80)        /* Simple case */
+        if (U8_IS_SINGLE(ch))        /* Simple case */
        {
            *(myTarget++) = (UChar) ch;
        }
@ -135,7 +98,7 @@ static void  U_CALLCONV ucnv_toUnicode_UTF8 (UConverterToUnicodeArgs * args,
        {
            /* store the first char */
            toUBytes[0] = (char)ch;
-            inBytes = bytesFromUTF8[ch]; /* lookup current sequence length */
+            inBytes = U8_COUNT_BYTES_NON_ASCII(ch); /* lookup current sequence length */
            i = 1;

 morebytes:
@ -144,7 +107,8 @@ morebytes:
                if (mySource < sourceLimit)
                {
                    toUBytes[i] = (char) (ch2 = *mySource);
-                    if (!U8_IS_TRAIL(ch2))
+                    if (!icu::UTF8::isValidTrail(ch, ch2, i, inBytes) &&
+                            !(isCESU8 && i == 1 && ch == 0xed && U8_IS_TRAIL(ch2)))
                    {
                        break; /* i < inBytes */
                    }
@ -162,24 +126,12 @@ morebytes:
                }
            }

-            /* Remove the accumulated high bits */
-            ch -= offsetsFromUTF8[inBytes];
-
-            /*
-             * Legal UTF-8 byte sequences in Unicode 3.0.1 and up:
-             * - use only trail bytes after a lead byte (checked above)
-             * - use the right number of trail bytes for a given lead byte
-             * - encode a code point <= U+10ffff
-             * - use the fewest possible number of bytes for their code points
-             * - use at most 4 bytes (for i>=5 it is 0x10ffff<utf8_minChar32[])
-             *
-             * Starting with Unicode 3.2, surrogate code points must not be encoded in UTF-8.
-             * There are no irregular sequences any more.
-             * In CESU-8, only surrogates, not supplementary code points, are encoded directly.
-             */
-            if (i == inBytes && ch <= MAXIMUM_UTF && ch >= utf8_minChar32[i] &&
-                (isCESU8 ? i <= 3 : !U_IS_SURROGATE(ch)))
+            // In CESU-8, only surrogates, not supplementary code points, are encoded directly.
+            if (i == inBytes && (!isCESU8 || i <= 3))
            {
+                /* Remove the accumulated high bits */
+                ch -= offsetsFromUTF8[inBytes];
+
                /* Normal valid byte when the loop has not prematurely terminated (i < inBytes) */
                if (ch <= MAXIMUM_UCS2) 
                {
@ -189,9 +141,8 @@ morebytes:
                else
                {
                    /* write out the surrogates */
-                    ch -= HALF_BASE;
-                    *(myTarget++) = (UChar) ((ch >> HALF_SHIFT) + SURROGATE_HIGH_START);
-                    ch = (ch & HALF_MASK) + SURROGATE_LOW_START;
+                    *(myTarget++) = U16_LEAD(ch);
+                    ch = U16_TRAIL(ch);
                    if (myTarget < targetLimit)
                    {
                        *(myTarget++) = (UChar)ch;
@ -256,7 +207,7 @@ static void  U_CALLCONV ucnv_toUnicode_UTF8_OFFSETS_LOGIC (UConverterToUnicodeAr
    while (mySource < sourceLimit && myTarget < targetLimit)
    {
        ch = *(mySource++);
-        if (ch < 0x80)        /* Simple case */
+        if (U8_IS_SINGLE(ch))        /* Simple case */
        {
            *(myTarget++) = (UChar) ch;
            *(myOffsets++) = offsetNum++;
@ -264,7 +215,7 @@ static void  U_CALLCONV ucnv_toUnicode_UTF8_OFFSETS_LOGIC (UConverterToUnicodeAr
        else
        {
            toUBytes[0] = (char)ch;
-            inBytes = bytesFromUTF8[ch];
+            inBytes = U8_COUNT_BYTES_NON_ASCII(ch);
            i = 1;

 morebytes:
@ -273,7 +224,8 @@ morebytes:
                if (mySource < sourceLimit)
                {
                    toUBytes[i] = (char) (ch2 = *mySource);
-                    if (!U8_IS_TRAIL(ch2))
+                    if (!icu::UTF8::isValidTrail(ch, ch2, i, inBytes) &&
+                            !(isCESU8 && i == 1 && ch == 0xed && U8_IS_TRAIL(ch2)))
                    {
                        break; /* i < inBytes */
                    }
@ -290,24 +242,12 @@ morebytes:
                }
            }

-            /* Remove the accumulated high bits */
-            ch -= offsetsFromUTF8[inBytes];
-
-            /*
-             * Legal UTF-8 byte sequences in Unicode 3.0.1 and up:
-             * - use only trail bytes after a lead byte (checked above)
-             * - use the right number of trail bytes for a given lead byte
-             * - encode a code point <= U+10ffff
-             * - use the fewest possible number of bytes for their code points
-             * - use at most 4 bytes (for i>=5 it is 0x10ffff<utf8_minChar32[])
-             *
-             * Starting with Unicode 3.2, surrogate code points must not be encoded in UTF-8.
-             * There are no irregular sequences any more.
-             * In CESU-8, only surrogates, not supplementary code points, are encoded directly.
-             */
-            if (i == inBytes && ch <= MAXIMUM_UTF && ch >= utf8_minChar32[i] &&
-                (isCESU8 ? i <= 3 : !U_IS_SURROGATE(ch)))
+            // In CESU-8, only surrogates, not supplementary code points, are encoded directly.
+            if (i == inBytes && (!isCESU8 || i <= 3))
            {
+                /* Remove the accumulated high bits */
+                ch -= offsetsFromUTF8[inBytes];
+
                /* Normal valid byte when the loop has not prematurely terminated (i < inBytes) */
                if (ch <= MAXIMUM_UCS2) 
                {
@ -318,10 +258,9 @@ morebytes:
                else
                {
                    /* write out the surrogates */
-                    ch -= HALF_BASE;
-                    *(myTarget++) = (UChar) ((ch >> HALF_SHIFT) + SURROGATE_HIGH_START);
+                    *(myTarget++) = U16_LEAD(ch);
                    *(myOffsets++) = offsetNum;
-                    ch = (ch & HALF_MASK) + SURROGATE_LOW_START;
+                    ch = U16_TRAIL(ch);
                    if (myTarget < targetLimit)
                    {
                        *(myTarget++) = (UChar)ch;
@ -616,10 +555,9 @@ static UChar32 U_CALLCONV ucnv_getNextUChar_UTF8(UConverterToUnicodeArgs *args,
    UConverter *cnv;
    const uint8_t *sourceInitial;
    const uint8_t *source;
-    uint16_t extraBytesToWrite;
    uint8_t myByte;
    UChar32 ch;
-    int8_t i, isLegalSequence;
+    int8_t i;

    /* UTF-8 only here, the framework handles CESU-8 to combine surrogate pairs */

@ -633,14 +571,14 @@ static UChar32 U_CALLCONV ucnv_getNextUChar_UTF8(UConverterToUnicodeArgs *args,
    }

    myByte = (uint8_t)*(source++);
-    if (myByte < 0x80)
+    if (U8_IS_SINGLE(myByte))
    {
        args->source = (const char *)source;
        return (UChar32)myByte;
    }

-    extraBytesToWrite = (uint16_t)bytesFromUTF8[myByte];
-    if (extraBytesToWrite == 0) {
+    uint16_t countTrailBytes = U8_COUNT_TRAIL_BYTES(myByte);
+    if (countTrailBytes == 0) {
        cnv->toUBytes[0] = myByte;
        cnv->toULength = 1;
        *err = U_ILLEGAL_CHAR_FOUND;
@ -649,15 +587,17 @@ static UChar32 U_CALLCONV ucnv_getNextUChar_UTF8(UConverterToUnicodeArgs *args,
    }

    /*The byte sequence is longer than the buffer area passed*/
-    if (((const char *)source + extraBytesToWrite - 1) > args->sourceLimit)
+    if (((const char *)source + countTrailBytes) > args->sourceLimit)
    {
        /* check if all of the remaining bytes are trail bytes */
+        uint16_t extraBytesToWrite = countTrailBytes + 1;
        cnv->toUBytes[0] = myByte;
        i = 1;
        *err = U_TRUNCATED_CHAR_FOUND;
        while(source < (const uint8_t *)args->sourceLimit) {
-            if(U8_IS_TRAIL(myByte = *source)) {
-                cnv->toUBytes[i++] = myByte;
+            uint8_t b = *source;
+            if(icu::UTF8::isValidTrail(myByte, b, i, extraBytesToWrite)) {
+                cnv->toUBytes[i++] = b;
                ++source;
            } else {
                /* error even before we run out of input */
@ -670,81 +610,28 @@ static UChar32 U_CALLCONV ucnv_getNextUChar_UTF8(UConverterToUnicodeArgs *args,
        return 0xffff;
    }

-    isLegalSequence = 1;
    ch = myByte << 6;
-    switch(extraBytesToWrite)
-    {     
-      /* note: code falls through cases! (sic)*/ 
-    case 6:
-        ch += (myByte = *source);
-        ch <<= 6;
-        if (!U8_IS_TRAIL(myByte))
-        {
-            isLegalSequence = 0;
-            break;
+    if(countTrailBytes == 2) {
+        uint8_t t1 = *source, t2;
+        if(U8_IS_VALID_LEAD3_AND_T1(myByte, t1) && U8_IS_TRAIL(t2 = *++source)) {
+            args->source = (const char *)(source + 1);
+            return (((ch + t1) << 6) + t2) - offsetsFromUTF8[3];
        }
-        ++source;
-        U_FALLTHROUGH;
-    case 5:
-        ch += (myByte = *source);
-        ch <<= 6;
-        if (!U8_IS_TRAIL(myByte))
-        {
-            isLegalSequence = 0;
-            break;
+    } else if(countTrailBytes == 1) {
+        uint8_t t1 = *source;
+        if(U8_IS_TRAIL(t1)) {
+            args->source = (const char *)(source + 1);
+            return (ch + t1) - offsetsFromUTF8[2];
        }
-        ++source;
-        U_FALLTHROUGH;
-    case 4:
-        ch += (myByte = *source);
-        ch <<= 6;
-        if (!U8_IS_TRAIL(myByte))
-        {
-            isLegalSequence = 0;
-            break;
+    } else {  // countTrailBytes == 3
+        uint8_t t1 = *source, t2, t3;
+        if(U8_IS_VALID_LEAD4_AND_T1(myByte, t1) && U8_IS_TRAIL(t2 = *++source) &&
+                U8_IS_TRAIL(t3 = *++source)) {
+            args->source = (const char *)(source + 1);
+            return (((((ch + t1) << 6) + t2) << 6) + t3) - offsetsFromUTF8[4];
        }
-        ++source;
-        U_FALLTHROUGH;
-    case 3:
-        ch += (myByte = *source);
-        ch <<= 6;
-        if (!U8_IS_TRAIL(myByte))
-        {
-            isLegalSequence = 0;
-            break;
-        }
-        ++source;
-        U_FALLTHROUGH;
-    case 2:
-        ch += (myByte = *source);
-        if (!U8_IS_TRAIL(myByte))
-        {
-            isLegalSequence = 0;
-            break;
-        }
-        ++source;
-    };
-    ch -= offsetsFromUTF8[extraBytesToWrite];
-    args->source = (const char *)source;
-
-    /*
-     * Legal UTF-8 byte sequences in Unicode 3.0.1 and up:
-     * - use only trail bytes after a lead byte (checked above)
-     * - use the right number of trail bytes for a given lead byte
-     * - encode a code point <= U+10ffff
-     * - use the fewest possible number of bytes for their code points
-     * - use at most 4 bytes (for i>=5 it is 0x10ffff<utf8_minChar32[])
-     *
-     * Starting with Unicode 3.2, surrogate code points must not be encoded in UTF-8.
-     * There are no irregular sequences any more.
-     */
-    if (isLegalSequence &&
-        (uint32_t)ch <= MAXIMUM_UTF &&
-        (uint32_t)ch >= utf8_minChar32[extraBytesToWrite] &&
-        !U_IS_SURROGATE(ch)
-    ) {
-        return ch; /* return the code point */
    }
+    args->source = (const char *)source;

    for(i = 0; sourceInitial < source; ++i) {
        cnv->toUBytes[i] = *sourceInitial++;
@ -757,14 +644,6 @@ U_CDECL_END

 /* UTF-8-from-UTF-8 conversion functions ------------------------------------ */

-/* minimum code point values for n-byte UTF-8 sequences, n=0..4 */
-static const UChar32
-utf8_minLegal[5]={ 0, 0, 0x80, 0x800, 0x10000 };
-
-/* offsets for n-byte UTF-8 sequences that were calculated with ((lead<<6)+trail)<<6+trail... */
-static const UChar32
-utf8_offsets[7]={ 0, 0, 0x3080, 0xE2080, 0x3C82080 };
-
 U_CDECL_BEGIN
 /* "Convert" UTF-8 to UTF-8: Validate and copy. Modified from ucnv_DBCSFromUTF8(). */
 static void U_CALLCONV
@ -812,39 +691,35 @@ ucnv_UTF8FromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
        *pErrorCode=U_USING_DEFAULT_WARNING;
        return;
    } else {
-        /*
-         * Use a single counter for source and target, counting the minimum of
-         * the source length and the target capacity.
-         * As a result, the source length is checked only once per multi-byte
-         * character instead of twice.
-         *
-         * Make sure that the last byte sequence is complete, or else
-         * stop just before it.
-         * (The longest legal byte sequence has 3 trail bytes.)
-         * Count oldToULength (number of source bytes from a previous buffer)
-         * into the source length but reduce the source index by toULimit
-         * while going back over trail bytes in order to not go back into
-         * the bytes that will be read for finishing a partial
-         * sequence from the previous buffer.
-         * Let the standard converter handle edge cases.
-         */
-        int32_t i;
-
+        // Use a single counter for source and target, counting the minimum of
+        // the source length and the target capacity.
+        // Let the standard converter handle edge cases.
        if(count>targetCapacity) {
            count=targetCapacity;
        }

-        i=0;
-        while(i<3 && i<(count-toULimit)) {
-            b=source[count-oldToULength-i-1];
-            if(U8_IS_TRAIL(b)) {
-                ++i;
-            } else {
-                if(i<U8_COUNT_TRAIL_BYTES(b)) {
-                    /* stop converting before the lead byte if there are not enough trail bytes for it */
-                    count-=i+1;
+        // The conversion loop checks count>0 only once per 1/2/3-byte character.
+        // If the buffer ends with a truncated 2- or 3-byte sequence,
+        // then we reduce the count to stop before that,
+        // and collect the remaining bytes after the conversion loop.
+        {
+            // Do not go back into the bytes that will be read for finishing a partial
+            // sequence from the previous buffer.
+            int32_t length=count-toULimit;
+            if(length>0) {
+                uint8_t b1=*(sourceLimit-1);
+                if(U8_IS_SINGLE(b1)) {
+                    // common ASCII character
+                } else if(U8_IS_TRAIL(b1) && length>=2) {
+                    uint8_t b2=*(sourceLimit-2);
+                    if(0xe0<=b2 && b2<0xf0 && U8_IS_VALID_LEAD3_AND_T1(b2, b1)) {
+                        // truncated 3-byte sequence
+                        count-=2;
+                    }
+                } else if(0xc2<=b1 && b1<0xf0) {
+                    // truncated 2- or 3-byte sequence
+                    --count;
                }
-                break;
            }
        }
    }
@ -859,17 +734,17 @@ ucnv_UTF8FromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
    /* conversion loop */
    while(count>0) {
        b=*source++;
-        if((int8_t)b>=0) {
+        if(U8_IS_SINGLE(b)) {
            /* convert ASCII */
            *target++=b;
            --count;
            continue;
        } else {
-            if(b>0xe0) {
-                if( /* handle U+1000..U+D7FF inline */
-                    (t1=source[0]) >= 0x80 && ((b<0xed && (t1 <= 0xbf)) ||
-                                               (b==0xed && (t1 <= 0x9f))) &&
-                    (t2=source[1]) >= 0x80 && t2 <= 0xbf
+            if(b>=0xe0) {
+                if( /* handle U+0800..U+FFFF inline */
+                    b<0xf0 &&
+                    U8_IS_VALID_LEAD3_AND_T1(b, t1=source[0]) &&
+                    U8_IS_TRAIL(t2=source[1])
                ) {
                    source+=2;
                    *target++=b;
@ -878,10 +753,10 @@ ucnv_UTF8FromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
                    count-=3;
                    continue;
                }
-            } else if(b<0xe0) {
+            } else {
                if( /* handle U+0080..U+07FF inline */
                    b>=0xc2 &&
-                    (t1=*source) >= 0x80 && t1 <= 0xbf
+                    U8_IS_TRAIL(t1=*source)
                ) {
                    ++source;
                    *target++=b;
@ -889,30 +764,18 @@ ucnv_UTF8FromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
                    count-=2;
                    continue;
                }
-            } else if(b==0xe0) {
-                if( /* handle U+0800..U+0FFF inline */
-                    (t1=source[0]) >= 0xa0 && t1 <= 0xbf &&
-                    (t2=source[1]) >= 0x80 && t2 <= 0xbf
-                ) {
-                    source+=2;
-                    *target++=b;
-                    *target++=t1;
-                    *target++=t2;
-                    count-=3;
-                    continue;
-                }
            }

            /* handle "complicated" and error cases, and continuing partial characters */
            oldToULength=0;
            toULength=1;
-            toULimit=U8_COUNT_TRAIL_BYTES(b)+1;
+            toULimit=U8_COUNT_BYTES_NON_ASCII(b);
            c=b;
 moreBytes:
            while(toULength<toULimit) {
                if(source<sourceLimit) {
                    b=*source;
-                    if(U8_IS_TRAIL(b)) {
+                    if(icu::UTF8::isValidTrail(c, b, toULength, toULimit)) {
                        ++source;
                        ++toULength;
                        c=(c<<6)+b;
@ -934,18 +797,7 @@ moreBytes:
                }
            }

-            if( toULength==toULimit &&      /* consumed all trail bytes */
-                (toULength==3 || toULength==2) &&             /* BMP */
-                (c-=utf8_offsets[toULength])>=utf8_minLegal[toULength] &&
-                (c<=0xd7ff || 0xe000<=c)    /* not a surrogate */
-            ) {
-                /* legal byte sequence for BMP code point */
-            } else if(
-                toULength==toULimit && toULength==4 &&
-                (0x10000<=(c-=utf8_offsets[4]) && c<=0x10ffff)
-            ) {
-                /* legal byte sequence for supplementary code point */
-            } else {
+            if(toULength!=toULimit) {
                /* error handling: illegal UTF-8 byte sequence */
                source-=(toULength-oldToULength);
                while(oldToULength<toULength) {
@ -979,7 +831,7 @@ moreBytes:
            *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
        } else {
            b=*source;
-            toULimit=U8_COUNT_TRAIL_BYTES(b)+1;
+            toULimit=U8_COUNT_BYTES(b);
            if(toULimit>(sourceLimit-source)) {
                /* collect a truncated byte sequence */
                toULength=0;
--- a/icu4c/source/common/ucnvlat1.cpp
+++ b/icu4c/source/common/ucnvlat1.cpp
@ -23,6 +23,7 @@
 #include "unicode/utf8.h"
 #include "ucnv_bld.h"
 #include "ucnv_cnv.h"
+#include "ustr_imp.h"

 /* control optimizations according to the platform */
 #define LATIN1_UNROLL_FROM_UNICODE 1
@ -374,7 +375,7 @@ ucnv_Latin1FromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
    while(source<sourceLimit) {
        if(targetCapacity>0) {
            b=*source++;
-            if((int8_t)b>=0) {
+            if(U8_IS_SINGLE(b)) {
                /* convert ASCII */
                *target++=(uint8_t)b;
                --targetCapacity;
@ -409,7 +410,7 @@ ucnv_Latin1FromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
    if(U_SUCCESS(*pErrorCode) && source<(sourceLimit=(uint8_t *)pToUArgs->sourceLimit)) {
        utf8->toUnicodeStatus=utf8->toUBytes[0]=b=*source++;
        utf8->toULength=1;
-        utf8->mode=U8_COUNT_TRAIL_BYTES(b)+1;
+        utf8->mode=U8_COUNT_BYTES(b);
    }

    /* write back the updated pointers */
--- a/icu4c/source/common/ucnvmbcs.cpp
+++ b/icu4c/source/common/ucnvmbcs.cpp
@ -59,6 +59,7 @@
 #include "cmemory.h"
 #include "cstring.h"
 #include "umutex.h"
+#include "ustr_imp.h"

 /* control optimizations according to the platform */
 #define MBCS_UNROLL_SINGLE_TO_BMP 1
@ -5011,13 +5012,9 @@ ucnv_MBCSSingleFromUChar32(UConverterSharedData *sharedData,

 /* MBCS-from-UTF-8 conversion functions ------------------------------------- */

-/* minimum code point values for n-byte UTF-8 sequences, n=0..4 */
-static const UChar32
-utf8_minLegal[5]={ 0, 0, 0x80, 0x800, 0x10000 };
-
 /* offsets for n-byte UTF-8 sequences that were calculated with ((lead<<6)+trail)<<6+trail... */
 static const UChar32
-utf8_offsets[7]={ 0, 0, 0x3080, 0xE2080, 0x3C82080 };
+utf8_offsets[5]={ 0, 0, 0x3080, 0xE2080, 0x3C82080 };

 static void U_CALLCONV
 ucnv_SBCSFromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
@ -5075,28 +5072,27 @@ ucnv_SBCSFromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
        toULength=oldToULength=toULimit=0;
    }

-    /*
-     * Make sure that the last byte sequence before sourceLimit is complete
-     * or runs into a lead byte.
-     * Do not go back into the bytes that will be read for finishing a partial
-     * sequence from the previous buffer.
-     * In the conversion loop compare source with sourceLimit only once
-     * per multi-byte character.
-     */
+    // The conversion loop checks source<sourceLimit only once per 1/2/3-byte character.
+    // If the buffer ends with a truncated 2- or 3-byte sequence,
+    // then we reduce the sourceLimit to before that,
+    // and collect the remaining bytes after the conversion loop.
    {
-        int32_t i, length;
-
-        length=(int32_t)(sourceLimit-source) - (toULimit-oldToULength);
-        for(i=0; i<3 && i<length;) {
-            b=*(sourceLimit-i-1);
-            if(U8_IS_TRAIL(b)) {
-                ++i;
-            } else {
-                if(i<U8_COUNT_TRAIL_BYTES(b)) {
-                    /* exit the conversion loop before the lead byte if there are not enough trail bytes for it */
-                    sourceLimit-=i+1;
+        // Do not go back into the bytes that will be read for finishing a partial
+        // sequence from the previous buffer.
+        int32_t length=(int32_t)(sourceLimit-source) - (toULimit-oldToULength);
+        if(length>0) {
+            uint8_t b1=*(sourceLimit-1);
+            if(U8_IS_SINGLE(b1)) {
+                // common ASCII character
+            } else if(U8_IS_TRAIL(b1) && length>=2) {
+                uint8_t b2=*(sourceLimit-2);
+                if(0xe0<=b2 && b2<0xf0 && U8_IS_VALID_LEAD3_AND_T1(b2, b1)) {
+                    // truncated 3-byte sequence
+                    sourceLimit-=2;
                }
-                break;
+            } else if(0xc2<=b1 && b1<0xf0) {
+                // truncated 2- or 3-byte sequence
+                --sourceLimit;
            }
        }
    }
@ -5130,7 +5126,7 @@ ucnv_SBCSFromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
    while(source<sourceLimit) {
        if(targetCapacity>0) {
            b=*source++;
-            if((int8_t)b>=0) {
+            if(U8_IS_SINGLE(b)) {
                /* convert ASCII */
                if(IS_ASCII_ROUNDTRIP(b, asciiRoundtrips)) {
                    *target++=(uint8_t)b;
@ -5185,7 +5181,7 @@ ucnv_SBCSFromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
                    /* handle "complicated" and error cases, and continuing partial characters */
                    oldToULength=0;
                    toULength=1;
-                    toULimit=U8_COUNT_TRAIL_BYTES(b)+1;
+                    toULimit=U8_COUNT_BYTES_NON_ASCII(b);
                    c=b;
 moreBytes:
                    while(toULength<toULimit) {
@ -5198,7 +5194,7 @@ moreBytes:
                         */
                        if(source<(uint8_t *)pToUArgs->sourceLimit) {
                            b=*source;
-                            if(U8_IS_TRAIL(b)) {
+                            if(icu::UTF8::isValidTrail(c, b, toULength, toULimit)) {
                                ++source;
                                ++toULength;
                                c=(c<<6)+b;
@ -5220,22 +5216,18 @@ moreBytes:
                        }
                    }

-                    if( toULength==toULimit &&      /* consumed all trail bytes */
-                        (toULength==3 || toULength==2) &&             /* BMP */
-                        (c-=utf8_offsets[toULength])>=utf8_minLegal[toULength] &&
-                        (c<=0xd7ff || 0xe000<=c)    /* not a surrogate */
-                    ) {
-                        value=MBCS_SINGLE_RESULT_FROM_U(table, results, c);
-                    } else if(
-                        toULength==toULimit && toULength==4 &&
-                        (0x10000<=(c-=utf8_offsets[4]) && c<=0x10ffff)
-                    ) {
-                        /* supplementary code point */
-                        if(!hasSupplementary) {
-                            /* BMP-only codepages are stored without stage 1 entries for supplementary code points */
-                            value=0;
-                        } else {
+                    if(toULength==toULimit) {
+                        c-=utf8_offsets[toULength];
+                        if(toULength<=3) {  /* BMP */
                            value=MBCS_SINGLE_RESULT_FROM_U(table, results, c);
+                        } else {
+                            /* supplementary code point */
+                            if(!hasSupplementary) {
+                                /* BMP-only codepages are stored without stage 1 entries for supplementary code points */
+                                value=0;
+                            } else {
+                                value=MBCS_SINGLE_RESULT_FROM_U(table, results, c);
+                            }
                        }
                    } else {
                        /* error handling: illegal UTF-8 byte sequence */
@ -5310,7 +5302,7 @@ moreBytes:
            source<(sourceLimit=(uint8_t *)pToUArgs->sourceLimit)) {
        c=utf8->toUBytes[0]=b=*source++;
        toULength=1;
-        toULimit=U8_COUNT_TRAIL_BYTES(b)+1;
+        toULimit=U8_COUNT_BYTES(b);
        while(source<sourceLimit) {
            utf8->toUBytes[toULength++]=b=*source++;
            c=(c<<6)+b;
@ -5375,28 +5367,27 @@ ucnv_DBCSFromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
        toULength=oldToULength=toULimit=0;
    }

-    /*
-     * Make sure that the last byte sequence before sourceLimit is complete
-     * or runs into a lead byte.
-     * Do not go back into the bytes that will be read for finishing a partial
-     * sequence from the previous buffer.
-     * In the conversion loop compare source with sourceLimit only once
-     * per multi-byte character.
-     */
+    // The conversion loop checks source<sourceLimit only once per 1/2/3-byte character.
+    // If the buffer ends with a truncated 2- or 3-byte sequence,
+    // then we reduce the sourceLimit to before that,
+    // and collect the remaining bytes after the conversion loop.
    {
-        int32_t i, length;
-
-        length=(int32_t)(sourceLimit-source) - (toULimit-oldToULength);
-        for(i=0; i<3 && i<length;) {
-            b=*(sourceLimit-i-1);
-            if(U8_IS_TRAIL(b)) {
-                ++i;
-            } else {
-                if(i<U8_COUNT_TRAIL_BYTES(b)) {
-                    /* exit the conversion loop before the lead byte if there are not enough trail bytes for it */
-                    sourceLimit-=i+1;
+        // Do not go back into the bytes that will be read for finishing a partial
+        // sequence from the previous buffer.
+        int32_t length=(int32_t)(sourceLimit-source) - (toULimit-oldToULength);
+        if(length>0) {
+            uint8_t b1=*(sourceLimit-1);
+            if(U8_IS_SINGLE(b1)) {
+                // common ASCII character
+            } else if(U8_IS_TRAIL(b1) && length>=2) {
+                uint8_t b2=*(sourceLimit-2);
+                if(0xe0<=b2 && b2<0xf0 && U8_IS_VALID_LEAD3_AND_T1(b2, b1)) {
+                    // truncated 3-byte sequence
+                    sourceLimit-=2;
                }
-                break;
+            } else if(0xc2<=b1 && b1<0xf0) {
+                // truncated 2- or 3-byte sequence
+                --sourceLimit;
            }
        }
    }
@ -5412,7 +5403,7 @@ ucnv_DBCSFromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
    while(source<sourceLimit) {
        if(targetCapacity>0) {
            b=*source++;
-            if((int8_t)b>=0) {
+            if(U8_IS_SINGLE(b)) {
                /* convert ASCII */
                if(IS_ASCII_ROUNDTRIP(b, asciiRoundtrips)) {
                    *target++=b;
@ -5426,13 +5417,13 @@ ucnv_DBCSFromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
                    }
                }
            } else {
-                if(b>0xe0) {
-                    if( /* handle U+1000..U+D7FF inline */
-                        (((t1=(uint8_t)(source[0]-0x80), b<0xed) && (t1 <= 0x3f)) ||
-                                                        (b==0xed && (t1 <= 0x1f))) &&
+                if(b>=0xe0) {
+                    if( /* handle U+0800..U+D7FF inline */
+                        b<=0xed &&  // do not assume maxFastUChar>0xd7ff
+                        U8_IS_VALID_LEAD3_AND_T1(b, t1=source[0]) &&
                        (t2=(uint8_t)(source[1]-0x80)) <= 0x3f
                    ) {
-                        c=((b&0xf)<<6)|t1;
+                        c=((b&0xf)<<6)|(t1&0x3f);
                        source+=2;
                        value=DBCS_RESULT_FROM_UTF8(mbcsIndex, results, c, t2);
                        if(value==0) {
@ -5442,7 +5433,7 @@ ucnv_DBCSFromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
                    } else {
                        c=-1;
                    }
-                } else if(b<0xe0) {
+                } else {
                    if( /* handle U+0080..U+07FF inline */
                        b>=0xc2 &&
                        (t1=(uint8_t)(*source-0x80)) <= 0x3f
@ -5457,15 +5448,13 @@ ucnv_DBCSFromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
                    } else {
                        c=-1;
                    }
-                } else {
-                    c=-1;
                }

                if(c<0) {
                    /* handle "complicated" and error cases, and continuing partial characters */
                    oldToULength=0;
                    toULength=1;
-                    toULimit=U8_COUNT_TRAIL_BYTES(b)+1;
+                    toULimit=U8_COUNT_BYTES_NON_ASCII(b);
                    c=b;
 moreBytes:
                    while(toULength<toULimit) {
@ -5478,7 +5467,7 @@ moreBytes:
                         */
                        if(source<(uint8_t *)pToUArgs->sourceLimit) {
                            b=*source;
-                            if(U8_IS_TRAIL(b)) {
+                            if(icu::UTF8::isValidTrail(c, b, toULength, toULimit)) {
                                ++source;
                                ++toULength;
                                c=(c<<6)+b;
@ -5500,22 +5489,18 @@ moreBytes:
                        }
                    }

-                    if( toULength==toULimit &&      /* consumed all trail bytes */
-                        (toULength==3 || toULength==2) &&             /* BMP */
-                        (c-=utf8_offsets[toULength])>=utf8_minLegal[toULength] &&
-                        (c<=0xd7ff || 0xe000<=c)    /* not a surrogate */
-                    ) {
-                        stage2Entry=MBCS_STAGE_2_FROM_U(table, c);
-                    } else if(
-                        toULength==toULimit && toULength==4 &&
-                        (0x10000<=(c-=utf8_offsets[4]) && c<=0x10ffff)
-                    ) {
-                        /* supplementary code point */
-                        if(!hasSupplementary) {
-                            /* BMP-only codepages are stored without stage 1 entries for supplementary code points */
-                            stage2Entry=0;
-                        } else {
+                    if(toULength==toULimit) {
+                        c-=utf8_offsets[toULength];
+                        if(toULength<=3) {  /* BMP */
                            stage2Entry=MBCS_STAGE_2_FROM_U(table, c);
+                        } else {
+                            /* supplementary code point */
+                            if(!hasSupplementary) {
+                                /* BMP-only codepages are stored without stage 1 entries for supplementary code points */
+                                stage2Entry=0;
+                            } else {
+                                stage2Entry=MBCS_STAGE_2_FROM_U(table, c);
+                            }
                        }
                    } else {
                        /* error handling: illegal UTF-8 byte sequence */
@ -5620,7 +5605,7 @@ unassigned:
            source<(sourceLimit=(uint8_t *)pToUArgs->sourceLimit)) {
        c=utf8->toUBytes[0]=b=*source++;
        toULength=1;
-        toULimit=U8_COUNT_TRAIL_BYTES(b)+1;
+        toULimit=U8_COUNT_BYTES(b);
        while(source<sourceLimit) {
            utf8->toUBytes[toULength++]=b=*source++;
            c=(c<<6)+b;
--- a/icu4c/source/common/uhash.cpp
+++ b/icu4c/source/common/uhash.cpp
@ -79,14 +79,14 @@
 * prime number while being less than a power of two.
 */
 static const int32_t PRIMES[] = {
-    13, 31, 61, 127, 251, 509, 1021, 2039, 4093, 8191, 16381, 32749,
+    7, 13, 31, 61, 127, 251, 509, 1021, 2039, 4093, 8191, 16381, 32749,
    65521, 131071, 262139, 524287, 1048573, 2097143, 4194301, 8388593,
    16777213, 33554393, 67108859, 134217689, 268435399, 536870909,
    1073741789, 2147483647 /*, 4294967291 */
 };

 #define PRIMES_LENGTH UPRV_LENGTHOF(PRIMES)
-#define DEFAULT_PRIME_INDEX 3
+#define DEFAULT_PRIME_INDEX 4

 /* These ratios are tuned to the PRIMES array such that a resize
 * places the table back into the zone of non-resizing.  That is,
@ -231,7 +231,7 @@ _uhash_allocate(UHashtable *hash,

    emptytok.pointer = NULL; /* Only one of these two is needed */
    emptytok.integer = 0;    /* but we don't know which one. */
-    
+
    limit = p + hash->length;
    while (p < limit) {
        p->key = emptytok;
@ -247,7 +247,7 @@ _uhash_allocate(UHashtable *hash,

 static UHashtable*
 _uhash_init(UHashtable *result,
-              UHashFunction *keyHash, 
+              UHashFunction *keyHash,
              UKeyComparator *keyComp,
              UValueComparator *valueComp,
              int32_t primeIndex,
@ -275,7 +275,7 @@ _uhash_init(UHashtable *result,
 }

 static UHashtable*
-_uhash_create(UHashFunction *keyHash, 
+_uhash_create(UHashFunction *keyHash,
              UKeyComparator *keyComp,
              UValueComparator *valueComp,
              int32_t primeIndex,
@ -415,7 +415,7 @@ _uhash_rehash(UHashtable *hash, UErrorCode *status) {

    if (U_FAILURE(*status)) {
        hash->elements = old;
-        hash->length = oldLength;       
+        hash->length = oldLength;
        return;
    }

@ -536,7 +536,7 @@ _uhash_put(UHashtable *hash,
 ********************************************************************/

 U_CAPI UHashtable* U_EXPORT2
-uhash_open(UHashFunction *keyHash, 
+uhash_open(UHashFunction *keyHash,
           UKeyComparator *keyComp,
           UValueComparator *valueComp,
           UErrorCode *status) {
@ -545,7 +545,7 @@ uhash_open(UHashFunction *keyHash,
 }

 U_CAPI UHashtable* U_EXPORT2
-uhash_openSize(UHashFunction *keyHash, 
+uhash_openSize(UHashFunction *keyHash,
               UKeyComparator *keyComp,
               UValueComparator *valueComp,
               int32_t size,
@ -562,7 +562,7 @@ uhash_openSize(UHashFunction *keyHash,

 U_CAPI UHashtable* U_EXPORT2
 uhash_init(UHashtable *fillinResult,
-           UHashFunction *keyHash, 
+           UHashFunction *keyHash,
           UKeyComparator *keyComp,
           UValueComparator *valueComp,
           UErrorCode *status) {
@ -570,6 +570,22 @@ uhash_init(UHashtable *fillinResult,
    return _uhash_init(fillinResult, keyHash, keyComp, valueComp, DEFAULT_PRIME_INDEX, status);
 }

+U_CAPI UHashtable* U_EXPORT2
+uhash_initSize(UHashtable *fillinResult,
+               UHashFunction *keyHash,
+               UKeyComparator *keyComp,
+               UValueComparator *valueComp,
+               int32_t size,
+               UErrorCode *status) {
+
+    // Find the smallest index i for which PRIMES[i] >= size.
+    int32_t i = 0;
+    while (i<(PRIMES_LENGTH-1) && PRIMES[i]<size) {
+        ++i;
+    }
+    return _uhash_init(fillinResult, keyHash, keyComp, valueComp, i, status);
+}
+
 U_CAPI void U_EXPORT2
 uhash_close(UHashtable *hash) {
    if (hash == NULL) {
@ -604,7 +620,7 @@ uhash_setKeyComparator(UHashtable *hash, UKeyComparator *fn) {
    hash->keyComparator = fn;
    return result;
 }
-U_CAPI UValueComparator *U_EXPORT2 
+U_CAPI UValueComparator *U_EXPORT2
 uhash_setValueComparator(UHashtable *hash, UValueComparator *fn){
    UValueComparator *result = hash->valueComparator;
    hash->valueComparator = fn;
@ -630,7 +646,7 @@ uhash_setResizePolicy(UHashtable *hash, enum UHashResizePolicy policy) {
    UErrorCode status = U_ZERO_ERROR;
    _uhash_internalSetResizePolicy(hash, policy);
    hash->lowWaterMark  = (int32_t)(hash->length * hash->lowWaterRatio);
-    hash->highWaterMark = (int32_t)(hash->length * hash->highWaterRatio);    
+    hash->highWaterMark = (int32_t)(hash->length * hash->highWaterRatio);
    _uhash_rehash(hash, &status);
 }

@ -853,7 +869,7 @@ uhash_hashIChars(const UHashTok key) {
    return s == NULL ? 0 : ustr_hashICharsN(s, uprv_strlen(s));
 }

-U_CAPI UBool U_EXPORT2 
+U_CAPI UBool U_EXPORT2
 uhash_equals(const UHashtable* hash1, const UHashtable* hash2){
    int32_t count1, count2, pos, i;

@ -886,14 +902,14 @@ uhash_equals(const UHashtable* hash1, const UHashtable* hash2){
    if(count1!=count2){
        return FALSE;
    }
-    
+
    pos=UHASH_FIRST;
    for(i=0; i<count1; i++){
        const UHashElement* elem1 = uhash_nextElement(hash1, &pos);
        const UHashTok key1 = elem1->key;
        const UHashTok val1 = elem1->value;
        /* here the keys are not compared, instead the key form hash1 is used to fetch
-         * value from hash2. If the hashes are equal then then both hashes should 
+         * value from hash2. If the hashes are equal then then both hashes should
         * contain equal values for the same key!
         */
        const UHashElement* elem2 = _uhash_find(hash2, key1, hash2->keyHasher(key1));
--- a/icu4c/source/common/uhash.h
+++ b/icu4c/source/common/uhash.h
@ -154,7 +154,7 @@ struct UHashtable {
                                   * If NULL won't do anything */

    /* Size parameters */
-  
+
    int32_t     count;      /* The number of key-value pairs in this table.
                             * 0 <= count <= length.  In practice we
                             * never let count == length (see code). */
@ -162,12 +162,12 @@ struct UHashtable {
                             * and values.  Must be prime. */

    /* Rehashing thresholds */
-    
+
    int32_t     highWaterMark;  /* If count > highWaterMark, rehash */
    int32_t     lowWaterMark;   /* If count < lowWaterMark, rehash */
    float       highWaterRatio; /* 0..1; high water as a fraction of length */
    float       lowWaterRatio;  /* 0..1; low water as a fraction of length */
-    
+
    int8_t      primeIndex;     /* Index into our prime table for length.
                                 * length == PRIMES[primeIndex] */
    UBool       allocated; /* Was this UHashtable allocated? */
@ -190,7 +190,7 @@ U_CDECL_END
 * @return A pointer to a UHashtable, or 0 if an error occurred.
 * @see uhash_openSize
 */
-U_CAPI UHashtable* U_EXPORT2 
+U_CAPI UHashtable* U_EXPORT2
 uhash_open(UHashFunction *keyHash,
           UKeyComparator *keyComp,
           UValueComparator *valueComp,
@ -207,7 +207,7 @@ uhash_open(UHashFunction *keyHash,
 * @return A pointer to a UHashtable, or 0 if an error occurred.
 * @see uhash_open
 */
-U_CAPI UHashtable* U_EXPORT2 
+U_CAPI UHashtable* U_EXPORT2
 uhash_openSize(UHashFunction *keyHash,
               UKeyComparator *keyComp,
               UValueComparator *valueComp,
@ -224,18 +224,37 @@ uhash_openSize(UHashFunction *keyHash,
 * @return A pointer to a UHashtable, or 0 if an error occurred.
 * @see uhash_openSize
 */
-U_CAPI UHashtable* U_EXPORT2 
+U_CAPI UHashtable* U_EXPORT2
 uhash_init(UHashtable *hash,
           UHashFunction *keyHash,
           UKeyComparator *keyComp,
           UValueComparator *valueComp,
           UErrorCode *status);

+/**
+ * Initialize an existing UHashtable.
+ * @param keyHash A pointer to the key hashing function.  Must not be
+ * NULL.
+ * @param keyComp A pointer to the function that compares keys.  Must
+ * not be NULL.
+ * @param size The initial capacity of this hash table.
+ * @param status A pointer to an UErrorCode to receive any errors.
+ * @return A pointer to a UHashtable, or 0 if an error occurred.
+ * @see uhash_openSize
+ */
+U_CAPI UHashtable* U_EXPORT2
+uhash_initSize(UHashtable *hash,
+               UHashFunction *keyHash,
+               UKeyComparator *keyComp,
+               UValueComparator *valueComp,
+               int32_t size,
+               UErrorCode *status);
+
 /**
 * Close a UHashtable, releasing the memory used.
 * @param hash The UHashtable to close. If hash is NULL no operation is performed.
 */
-U_CAPI void U_EXPORT2 
+U_CAPI void U_EXPORT2
 uhash_close(UHashtable *hash);


@ -246,7 +265,7 @@ uhash_close(UHashtable *hash);
 * @param fn the function to be used hash keys; must not be NULL
 * @return the previous key hasher; non-NULL
 */
-U_CAPI UHashFunction *U_EXPORT2 
+U_CAPI UHashFunction *U_EXPORT2
 uhash_setKeyHasher(UHashtable *hash, UHashFunction *fn);

 /**
@ -256,7 +275,7 @@ uhash_setKeyHasher(UHashtable *hash, UHashFunction *fn);
 * @param fn the function to be used compare keys; must not be NULL
 * @return the previous key comparator; non-NULL
 */
-U_CAPI UKeyComparator *U_EXPORT2 
+U_CAPI UKeyComparator *U_EXPORT2
 uhash_setKeyComparator(UHashtable *hash, UKeyComparator *fn);

 /**
@ -266,7 +285,7 @@ uhash_setKeyComparator(UHashtable *hash, UKeyComparator *fn);
 * @param fn the function to be used compare keys; must not be NULL
 * @return the previous key comparator; non-NULL
 */
-U_CAPI UValueComparator *U_EXPORT2 
+U_CAPI UValueComparator *U_EXPORT2
 uhash_setValueComparator(UHashtable *hash, UValueComparator *fn);

 /**
@ -279,7 +298,7 @@ uhash_setValueComparator(UHashtable *hash, UValueComparator *fn);
 * @param fn the function to be used delete keys, or NULL
 * @return the previous key deleter; may be NULL
 */
-U_CAPI UObjectDeleter *U_EXPORT2 
+U_CAPI UObjectDeleter *U_EXPORT2
 uhash_setKeyDeleter(UHashtable *hash, UObjectDeleter *fn);

 /**
@ -292,7 +311,7 @@ uhash_setKeyDeleter(UHashtable *hash, UObjectDeleter *fn);
 * @param fn the function to be used delete values, or NULL
 * @return the previous value deleter; may be NULL
 */
-U_CAPI UObjectDeleter *U_EXPORT2 
+U_CAPI UObjectDeleter *U_EXPORT2
 uhash_setValueDeleter(UHashtable *hash, UObjectDeleter *fn);

 /**
@ -302,7 +321,7 @@ uhash_setValueDeleter(UHashtable *hash, UObjectDeleter *fn);
 * @param hash The UHashtable to set
 * @param policy The way the hashtable resizes itself, {U_GROW, U_GROW_AND_SHRINK, U_FIXED}
 */
-U_CAPI void U_EXPORT2 
+U_CAPI void U_EXPORT2
 uhash_setResizePolicy(UHashtable *hash, enum UHashResizePolicy policy);

 /**
@ -310,7 +329,7 @@ uhash_setResizePolicy(UHashtable *hash, enum UHashResizePolicy policy);
 * @param hash The UHashtable to query.
 * @return The number of key-value pairs stored in hash.
 */
-U_CAPI int32_t U_EXPORT2 
+U_CAPI int32_t U_EXPORT2
 uhash_count(const UHashtable *hash);

 /**
@ -326,7 +345,7 @@ uhash_count(const UHashtable *hash);
 * @return The previous value, or NULL if none.
 * @see uhash_get
 */
-U_CAPI void* U_EXPORT2 
+U_CAPI void* U_EXPORT2
 uhash_put(UHashtable *hash,
          void *key,
          void *value,
@ -344,7 +363,7 @@ uhash_put(UHashtable *hash,
 * @return The previous value, or NULL if none.
 * @see uhash_get
 */
-U_CAPI void* U_EXPORT2 
+U_CAPI void* U_EXPORT2
 uhash_iput(UHashtable *hash,
           int32_t key,
           void* value,
@ -362,7 +381,7 @@ uhash_iput(UHashtable *hash,
 * @return The previous value, or 0 if none.
 * @see uhash_get
 */
-U_CAPI int32_t U_EXPORT2 
+U_CAPI int32_t U_EXPORT2
 uhash_puti(UHashtable *hash,
           void* key,
           int32_t value,
@ -380,7 +399,7 @@ uhash_puti(UHashtable *hash,
 * @return The previous value, or 0 if none.
 * @see uhash_get
 */
-U_CAPI int32_t U_EXPORT2 
+U_CAPI int32_t U_EXPORT2
 uhash_iputi(UHashtable *hash,
           int32_t key,
           int32_t value,
@ -393,8 +412,8 @@ uhash_iputi(UHashtable *hash,
 * @param key A pointer key stored in a hashtable
 * @return The requested item, or NULL if not found.
 */
-U_CAPI void* U_EXPORT2 
-uhash_get(const UHashtable *hash, 
+U_CAPI void* U_EXPORT2
+uhash_get(const UHashtable *hash,
          const void *key);

 /**
@ -404,7 +423,7 @@ uhash_get(const UHashtable *hash,
 * @param key An integer key stored in a hashtable
 * @return The requested item, or NULL if not found.
 */
-U_CAPI void* U_EXPORT2 
+U_CAPI void* U_EXPORT2
 uhash_iget(const UHashtable *hash,
           int32_t key);

@ -415,7 +434,7 @@ uhash_iget(const UHashtable *hash,
 * @param key A pointer key stored in a hashtable
 * @return The requested item, or 0 if not found.
 */
-U_CAPI int32_t U_EXPORT2 
+U_CAPI int32_t U_EXPORT2
 uhash_geti(const UHashtable *hash,
           const void* key);
 /**
@ -425,7 +444,7 @@ uhash_geti(const UHashtable *hash,
 * @param key An integer key stored in a hashtable
 * @return The requested item, or 0 if not found.
 */
-U_CAPI int32_t U_EXPORT2 
+U_CAPI int32_t U_EXPORT2
 uhash_igeti(const UHashtable *hash,
           int32_t key);

@ -435,7 +454,7 @@ uhash_igeti(const UHashtable *hash,
 * @param key A key stored in a hashtable
 * @return The item removed, or NULL if not found.
 */
-U_CAPI void* U_EXPORT2 
+U_CAPI void* U_EXPORT2
 uhash_remove(UHashtable *hash,
             const void *key);

@ -445,7 +464,7 @@ uhash_remove(UHashtable *hash,
 * @param key An integer key stored in a hashtable
 * @return The item removed, or NULL if not found.
 */
-U_CAPI void* U_EXPORT2 
+U_CAPI void* U_EXPORT2
 uhash_iremove(UHashtable *hash,
              int32_t key);

@ -455,7 +474,7 @@ uhash_iremove(UHashtable *hash,
 * @param key An key stored in a hashtable
 * @return The item removed, or 0 if not found.
 */
-U_CAPI int32_t U_EXPORT2 
+U_CAPI int32_t U_EXPORT2
 uhash_removei(UHashtable *hash,
              const void* key);

@ -465,7 +484,7 @@ uhash_removei(UHashtable *hash,
 * @param key An integer key stored in a hashtable
 * @return The item removed, or 0 if not found.
 */
-U_CAPI int32_t U_EXPORT2 
+U_CAPI int32_t U_EXPORT2
 uhash_iremovei(UHashtable *hash,
               int32_t key);

@ -473,7 +492,7 @@ uhash_iremovei(UHashtable *hash,
 * Remove all items from a UHashtable.
 * @param hash The target UHashtable.
 */
-U_CAPI void U_EXPORT2 
+U_CAPI void U_EXPORT2
 uhash_removeAll(UHashtable *hash);

 /**
@ -487,7 +506,7 @@ uhash_removeAll(UHashtable *hash);
 * @param key A key stored in a hashtable
 * @return a hash element, or NULL if the key is not found.
 */
-U_CAPI const UHashElement* U_EXPORT2 
+U_CAPI const UHashElement* U_EXPORT2
 uhash_find(const UHashtable *hash, const void* key);

 /**
@ -510,7 +529,7 @@ uhash_find(const UHashtable *hash, const void* key);
 * @return a hash element, or NULL if no further key-value pairs
 * exist in the table.
 */
-U_CAPI const UHashElement* U_EXPORT2 
+U_CAPI const UHashElement* U_EXPORT2
 uhash_nextElement(const UHashtable *hash,
                  int32_t *pos);

@ -525,7 +544,7 @@ uhash_nextElement(const UHashtable *hash,
 * modified.
 * @return the value that was removed.
 */
-U_CAPI void* U_EXPORT2 
+U_CAPI void* U_EXPORT2
 uhash_removeElement(UHashtable *hash, const UHashElement* e);

 /********************************************************************
@ -537,7 +556,7 @@ uhash_removeElement(UHashtable *hash, const UHashElement* e);
 * @param i The given integer
 * @return a UHashTok for an integer.
 */
-/*U_CAPI UHashTok U_EXPORT2 
+/*U_CAPI UHashTok U_EXPORT2
 uhash_toki(int32_t i);*/

 /**
@ -545,7 +564,7 @@ uhash_toki(int32_t i);*/
 * @param p The given pointer
 * @return a UHashTok for a pointer.
 */
-/*U_CAPI UHashTok U_EXPORT2 
+/*U_CAPI UHashTok U_EXPORT2
 uhash_tokp(void* p);*/

 /********************************************************************
@ -559,7 +578,7 @@ uhash_tokp(void* p);*/
 * @param key The string (const UChar*) to hash.
 * @return A hash code for the key.
 */
-U_CAPI int32_t U_EXPORT2 
+U_CAPI int32_t U_EXPORT2
 uhash_hashUChars(const UHashTok key);

 /**
@ -569,7 +588,7 @@ uhash_hashUChars(const UHashTok key);
 * @param key The string (const char*) to hash.
 * @return A hash code for the key.
 */
-U_CAPI int32_t U_EXPORT2 
+U_CAPI int32_t U_EXPORT2
 uhash_hashChars(const UHashTok key);

 /**
@ -589,7 +608,7 @@ uhash_hashIChars(const UHashTok key);
 * @param key2 The string for comparison
 * @return true if key1 and key2 are equal, return false otherwise.
 */
-U_CAPI UBool U_EXPORT2 
+U_CAPI UBool U_EXPORT2
 uhash_compareUChars(const UHashTok key1, const UHashTok key2);

 /**
@ -599,7 +618,7 @@ uhash_compareUChars(const UHashTok key1, const UHashTok key2);
 * @param key2 The string for comparison
 * @return true if key1 and key2 are equal, return false otherwise.
 */
-U_CAPI UBool U_EXPORT2 
+U_CAPI UBool U_EXPORT2
 uhash_compareChars(const UHashTok key1, const UHashTok key2);

 /**
@ -609,7 +628,7 @@ uhash_compareChars(const UHashTok key1, const UHashTok key2);
 * @param key2 The string for comparison
 * @return true if key1 and key2 are equal, return false otherwise.
 */
-U_CAPI UBool U_EXPORT2 
+U_CAPI UBool U_EXPORT2
 uhash_compareIChars(const UHashTok key1, const UHashTok key2);

 /********************************************************************
@ -621,7 +640,7 @@ uhash_compareIChars(const UHashTok key1, const UHashTok key2);
 * @param key The string (const char*) to hash.
 * @return A hash code for the key.
 */
-U_CAPI int32_t U_EXPORT2 
+U_CAPI int32_t U_EXPORT2
 uhash_hashUnicodeString(const UElement key);

 /**
@ -630,7 +649,7 @@ uhash_hashUnicodeString(const UElement key);
 * @param key The string (const char*) to hash.
 * @return A hash code for the key.
 */
-U_CAPI int32_t U_EXPORT2 
+U_CAPI int32_t U_EXPORT2
 uhash_hashCaselessUnicodeString(const UElement key);

 /********************************************************************
@ -642,7 +661,7 @@ uhash_hashCaselessUnicodeString(const UElement key);
 * @param key The string (const char*) to hash.
 * @return A hash code for the key.
 */
-U_CAPI int32_t U_EXPORT2 
+U_CAPI int32_t U_EXPORT2
 uhash_hashLong(const UHashTok key);

 /**
@ -651,7 +670,7 @@ uhash_hashLong(const UHashTok key);
 * @param Key2 The integer for comparison
 * @return true if key1 and key2 are equal, return false otherwise
 */
-U_CAPI UBool U_EXPORT2 
+U_CAPI UBool U_EXPORT2
 uhash_compareLong(const UHashTok key1, const UHashTok key2);

 /********************************************************************
@ -662,7 +681,7 @@ uhash_compareLong(const UHashTok key1, const UHashTok key2);
 * Deleter for Hashtable objects.
 * @param obj The object to be deleted
 */
-U_CAPI void U_EXPORT2 
+U_CAPI void U_EXPORT2
 uhash_deleteHashtable(void *obj);

 /* Use uprv_free() itself as a deleter for any key or value allocated using uprv_malloc. */
@ -673,7 +692,7 @@ uhash_deleteHashtable(void *obj);
 * @param hash2
 * @return true if the hashtables are equal and false if not.
 */
-U_CAPI UBool U_EXPORT2 
+U_CAPI UBool U_EXPORT2
 uhash_equals(const UHashtable* hash1, const UHashtable* hash2);


--- a/icu4c/source/common/unicode/casemap.h
+++ b/icu4c/source/common/unicode/casemap.h
@ -8,6 +8,7 @@
 #define __CASEMAP_H__

 #include "unicode/utypes.h"
+#include "unicode/stringpiece.h"
 #include "unicode/uobject.h"

 /**
@ -20,6 +21,7 @@ U_NAMESPACE_BEGIN
 #ifndef U_HIDE_DRAFT_API

 class BreakIterator;
+class ByteSink;
 class Edits;

 /**
@ -36,7 +38,7 @@ public:
     * The source string and the destination buffer must not overlap.
     *
     * @param locale    The locale ID. ("" = root locale, NULL = default locale.)
-     * @param options   Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT.
+     * @param options   Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
     * @param src       The original string.
     * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
     * @param dest      A buffer for the result string. The result will be NUL-terminated if
@ -48,7 +50,8 @@ public:
     * @param edits     Records edits for index mapping, working with styled text,
     *                  and getting only changes (if any).
     *                  The Edits contents is undefined if any error occurs.
-     *                  This function calls edits->reset() first. edits can be NULL.
+     *                  This function calls edits->reset() first unless
+     *                  options includes U_EDITS_NO_RESET. edits can be NULL.
     * @param errorCode Reference to an in/out error code value
     *                  which must not indicate a failure before the function call.
     * @return The length of the result string, if successful.
@ -71,7 +74,7 @@ public:
     * The source string and the destination buffer must not overlap.
     *
     * @param locale    The locale ID. ("" = root locale, NULL = default locale.)
-     * @param options   Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT.
+     * @param options   Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
     * @param src       The original string.
     * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
     * @param dest      A buffer for the result string. The result will be NUL-terminated if
@ -83,7 +86,8 @@ public:
     * @param edits     Records edits for index mapping, working with styled text,
     *                  and getting only changes (if any).
     *                  The Edits contents is undefined if any error occurs.
-     *                  This function calls edits->reset() first. edits can be NULL.
+     *                  This function calls edits->reset() first unless
+     *                  options includes U_EDITS_NO_RESET. edits can be NULL.
     * @param errorCode Reference to an in/out error code value
     *                  which must not indicate a failure before the function call.
     * @return The length of the result string, if successful.
@ -112,7 +116,7 @@ public:
     * all others. (This can be modified with options bits.)
     *
     * @param locale    The locale ID. ("" = root locale, NULL = default locale.)
-     * @param options   Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT,
+     * @param options   Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET,
     *                  U_TITLECASE_NO_LOWERCASE,
     *                  U_TITLECASE_NO_BREAK_ADJUSTMENT, U_TITLECASE_ADJUST_TO_CASED,
     *                  U_TITLECASE_WHOLE_STRING, U_TITLECASE_SENTENCES.
@ -132,7 +136,8 @@ public:
     * @param edits     Records edits for index mapping, working with styled text,
     *                  and getting only changes (if any).
     *                  The Edits contents is undefined if any error occurs.
-     *                  This function calls edits->reset() first. edits can be NULL.
+     *                  This function calls edits->reset() first unless
+     *                  options includes U_EDITS_NO_RESET. edits can be NULL.
     * @param errorCode Reference to an in/out error code value
     *                  which must not indicate a failure before the function call.
     * @return The length of the result string, if successful.
@ -161,7 +166,7 @@ public:
     * The result may be longer or shorter than the original.
     * The source string and the destination buffer must not overlap.
     *
-     * @param options   Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT,
+     * @param options   Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET,
     *                  U_FOLD_CASE_DEFAULT, U_FOLD_CASE_EXCLUDE_SPECIAL_I.
     * @param src       The original string.
     * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
@ -174,7 +179,8 @@ public:
     * @param edits     Records edits for index mapping, working with styled text,
     *                  and getting only changes (if any).
     *                  The Edits contents is undefined if any error occurs.
-     *                  This function calls edits->reset() first. edits can be NULL.
+     *                  This function calls edits->reset() first unless
+     *                  options includes U_EDITS_NO_RESET. edits can be NULL.
     * @param errorCode Reference to an in/out error code value
     *                  which must not indicate a failure before the function call.
     * @return The length of the result string, if successful.
@ -190,6 +196,129 @@ public:
            char16_t *dest, int32_t destCapacity, Edits *edits,
            UErrorCode &errorCode);

+    /**
+     * Lowercases a UTF-8 string and optionally records edits.
+     * Casing is locale-dependent and context-sensitive.
+     * The result may be longer or shorter than the original.
+     *
+     * @param locale    The locale ID. ("" = root locale, NULL = default locale.)
+     * @param options   Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
+     * @param src       The original string.
+     * @param sink      A ByteSink to which the result string is written.
+     *                  sink.Flush() is called at the end.
+     * @param edits     Records edits for index mapping, working with styled text,
+     *                  and getting only changes (if any).
+     *                  The Edits contents is undefined if any error occurs.
+     *                  This function calls edits->reset() first unless
+     *                  options includes U_EDITS_NO_RESET. edits can be NULL.
+     * @param errorCode Reference to an in/out error code value
+     *                  which must not indicate a failure before the function call.
+     *
+     * @see ucasemap_utf8ToLower
+     * @draft ICU 60
+     */
+    static void utf8ToLower(
+            const char *locale, uint32_t options,
+            StringPiece src, ByteSink &sink, Edits *edits,
+            UErrorCode &errorCode);
+
+    /**
+     * Uppercases a UTF-8 string and optionally records edits.
+     * Casing is locale-dependent and context-sensitive.
+     * The result may be longer or shorter than the original.
+     *
+     * @param locale    The locale ID. ("" = root locale, NULL = default locale.)
+     * @param options   Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
+     * @param src       The original string.
+     * @param sink      A ByteSink to which the result string is written.
+     *                  sink.Flush() is called at the end.
+     * @param edits     Records edits for index mapping, working with styled text,
+     *                  and getting only changes (if any).
+     *                  The Edits contents is undefined if any error occurs.
+     *                  This function calls edits->reset() first unless
+     *                  options includes U_EDITS_NO_RESET. edits can be NULL.
+     * @param errorCode Reference to an in/out error code value
+     *                  which must not indicate a failure before the function call.
+     *
+     * @see ucasemap_utf8ToUpper
+     * @draft ICU 60
+     */
+    static void utf8ToUpper(
+            const char *locale, uint32_t options,
+            StringPiece src, ByteSink &sink, Edits *edits,
+            UErrorCode &errorCode);
+
+#if !UCONFIG_NO_BREAK_ITERATION
+
+    /**
+     * Titlecases a UTF-8 string and optionally records edits.
+     * Casing is locale-dependent and context-sensitive.
+     * The result may be longer or shorter than the original.
+     *
+     * Titlecasing uses a break iterator to find the first characters of words
+     * that are to be titlecased. It titlecases those characters and lowercases
+     * all others. (This can be modified with options bits.)
+     *
+     * @param locale    The locale ID. ("" = root locale, NULL = default locale.)
+     * @param options   Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET,
+     *                  U_TITLECASE_NO_LOWERCASE,
+     *                  U_TITLECASE_NO_BREAK_ADJUSTMENT, U_TITLECASE_ADJUST_TO_CASED,
+     *                  U_TITLECASE_WHOLE_STRING, U_TITLECASE_SENTENCES.
+     * @param iter      A break iterator to find the first characters of words that are to be titlecased.
+     *                  It is set to the source string (setUText())
+     *                  and used one or more times for iteration (first() and next()).
+     *                  If NULL, then a word break iterator for the locale is used
+     *                  (or something equivalent).
+     * @param src       The original string.
+     * @param sink      A ByteSink to which the result string is written.
+     *                  sink.Flush() is called at the end.
+     * @param edits     Records edits for index mapping, working with styled text,
+     *                  and getting only changes (if any).
+     *                  The Edits contents is undefined if any error occurs.
+     *                  This function calls edits->reset() first unless
+     *                  options includes U_EDITS_NO_RESET. edits can be NULL.
+     * @param errorCode Reference to an in/out error code value
+     *                  which must not indicate a failure before the function call.
+     *
+     * @see ucasemap_utf8ToTitle
+     * @draft ICU 60
+     */
+    static void utf8ToTitle(
+            const char *locale, uint32_t options, BreakIterator *iter,
+            StringPiece src, ByteSink &sink, Edits *edits,
+            UErrorCode &errorCode);
+
+#endif  // UCONFIG_NO_BREAK_ITERATION
+
+    /**
+     * Case-folds a UTF-8 string and optionally records edits.
+     *
+     * Case folding is locale-independent and not context-sensitive,
+     * but there is an option for whether to include or exclude mappings for dotted I
+     * and dotless i that are marked with 'T' in CaseFolding.txt.
+     *
+     * The result may be longer or shorter than the original.
+     *
+     * @param options   Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
+     * @param src       The original string.
+     * @param sink      A ByteSink to which the result string is written.
+     *                  sink.Flush() is called at the end.
+     * @param edits     Records edits for index mapping, working with styled text,
+     *                  and getting only changes (if any).
+     *                  The Edits contents is undefined if any error occurs.
+     *                  This function calls edits->reset() first unless
+     *                  options includes U_EDITS_NO_RESET. edits can be NULL.
+     * @param errorCode Reference to an in/out error code value
+     *                  which must not indicate a failure before the function call.
+     *
+     * @see ucasemap_utf8FoldCase
+     * @draft ICU 60
+     */
+    static void utf8Fold(
+            uint32_t options,
+            StringPiece src, ByteSink &sink, Edits *edits,
+            UErrorCode &errorCode);
+
    /**
     * Lowercases a UTF-8 string and optionally records edits.
     * Casing is locale-dependent and context-sensitive.
@ -197,7 +326,7 @@ public:
     * The source string and the destination buffer must not overlap.
     *
     * @param locale    The locale ID. ("" = root locale, NULL = default locale.)
-     * @param options   Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT.
+     * @param options   Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
     * @param src       The original string.
     * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
     * @param dest      A buffer for the result string. The result will be NUL-terminated if
@ -209,7 +338,8 @@ public:
     * @param edits     Records edits for index mapping, working with styled text,
     *                  and getting only changes (if any).
     *                  The Edits contents is undefined if any error occurs.
-     *                  This function calls edits->reset() first. edits can be NULL.
+     *                  This function calls edits->reset() first unless
+     *                  options includes U_EDITS_NO_RESET. edits can be NULL.
     * @param errorCode Reference to an in/out error code value
     *                  which must not indicate a failure before the function call.
     * @return The length of the result string, if successful.
@ -219,7 +349,7 @@ public:
     * @see ucasemap_utf8ToLower
     * @draft ICU 59
     */
-     static int32_t utf8ToLower(
+    static int32_t utf8ToLower(
            const char *locale, uint32_t options,
            const char *src, int32_t srcLength,
            char *dest, int32_t destCapacity, Edits *edits,
@ -232,7 +362,7 @@ public:
     * The source string and the destination buffer must not overlap.
     *
     * @param locale    The locale ID. ("" = root locale, NULL = default locale.)
-     * @param options   Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT.
+     * @param options   Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
     * @param src       The original string.
     * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
     * @param dest      A buffer for the result string. The result will be NUL-terminated if
@ -244,7 +374,8 @@ public:
     * @param edits     Records edits for index mapping, working with styled text,
     *                  and getting only changes (if any).
     *                  The Edits contents is undefined if any error occurs.
-     *                  This function calls edits->reset() first. edits can be NULL.
+     *                  This function calls edits->reset() first unless
+     *                  options includes U_EDITS_NO_RESET. edits can be NULL.
     * @param errorCode Reference to an in/out error code value
     *                  which must not indicate a failure before the function call.
     * @return The length of the result string, if successful.
@ -273,7 +404,7 @@ public:
     * all others. (This can be modified with options bits.)
     *
     * @param locale    The locale ID. ("" = root locale, NULL = default locale.)
-     * @param options   Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT,
+     * @param options   Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET,
     *                  U_TITLECASE_NO_LOWERCASE,
     *                  U_TITLECASE_NO_BREAK_ADJUSTMENT, U_TITLECASE_ADJUST_TO_CASED,
     *                  U_TITLECASE_WHOLE_STRING, U_TITLECASE_SENTENCES.
@ -293,7 +424,8 @@ public:
     * @param edits     Records edits for index mapping, working with styled text,
     *                  and getting only changes (if any).
     *                  The Edits contents is undefined if any error occurs.
-     *                  This function calls edits->reset() first. edits can be NULL.
+     *                  This function calls edits->reset() first unless
+     *                  options includes U_EDITS_NO_RESET. edits can be NULL.
     * @param errorCode Reference to an in/out error code value
     *                  which must not indicate a failure before the function call.
     * @return The length of the result string, if successful.
@ -321,7 +453,7 @@ public:
     * The result may be longer or shorter than the original.
     * The source string and the destination buffer must not overlap.
     *
-     * @param options   Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT,
+     * @param options   Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET,
     *                  U_FOLD_CASE_DEFAULT, U_FOLD_CASE_EXCLUDE_SPECIAL_I.
     * @param src       The original string.
     * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
@ -334,7 +466,8 @@ public:
     * @param edits     Records edits for index mapping, working with styled text,
     *                  and getting only changes (if any).
     *                  The Edits contents is undefined if any error occurs.
-     *                  This function calls edits->reset() first. edits can be NULL.
+     *                  This function calls edits->reset() first unless
+     *                  options includes U_EDITS_NO_RESET. edits can be NULL.
     * @param errorCode Reference to an in/out error code value
     *                  which must not indicate a failure before the function call.
     * @return The length of the result string, if successful.
--- a/icu4c/source/common/unicode/edits.h
+++ b/icu4c/source/common/unicode/edits.h
@ -148,7 +148,7 @@ public:
        Iterator() :
                array(nullptr), index(0), length(0),
                remaining(0), onlyChanges_(FALSE), coarse(FALSE),
-                changed(FALSE), oldLength_(0), newLength_(0),
+                dir(0), changed(FALSE), oldLength_(0), newLength_(0),
                srcIndex(0), replIndex(0), destIndex(0) {}
        /**
         * Copy constructor.
@ -306,17 +306,22 @@ public:
        Iterator(const uint16_t *a, int32_t len, UBool oc, UBool crs);

        int32_t readLength(int32_t head);
-        void updateIndexes();
+        void updateNextIndexes();
+        void updatePreviousIndexes();
        UBool noNext();
        UBool next(UBool onlyChanges, UErrorCode &errorCode);
+        UBool previous(UErrorCode &errorCode);
        /** @return -1: error or i<0; 0: found; 1: i>=string length */
        int32_t findIndex(int32_t i, UBool findSource, UErrorCode &errorCode);

        const uint16_t *array;
        int32_t index, length;
+        // 0 if we are not within compressed equal-length changes.
+        // Otherwise the number of remaining changes, including the current one.
        int32_t remaining;
        UBool onlyChanges_, coarse;

+        int8_t dir;  // iteration direction: back(<0), initial(0), forward(>0)
        UBool changed;
        int32_t oldLength_, newLength_;
        int32_t srcIndex, replIndex, destIndex;
--- a/icu4c/source/common/unicode/filteredbrk.h
+++ b/icu4c/source/common/unicode/filteredbrk.h
@ -55,14 +55,26 @@ class U_COMMON_API FilteredBreakIteratorBuilder : public UObject {
   */
  static FilteredBreakIteratorBuilder *createInstance(const Locale& where, UErrorCode& status);

+  /**
+   * This function has been deprecated in favor of createEmptyInstance, which has
+   * identical behavior.
+   * @param status The error code.
+   * @return the new builder
+   * @deprecated ICU 60 use createEmptyInstance instead
+   * @see createEmptyInstance()
+   */
+  static inline FilteredBreakIteratorBuilder *createInstance(UErrorCode &status) {
+    return createEmptyInstance(status);
+  }
+
  /**
   * Construct an empty FilteredBreakIteratorBuilder.
   * In this state, it will not suppress any segment boundaries.
   * @param status The error code.
   * @return the new builder
-   * @stable ICU 56
+   * @draft ICU 60
   */
-  static FilteredBreakIteratorBuilder *createInstance(UErrorCode &status);
+  static FilteredBreakIteratorBuilder *createEmptyInstance(UErrorCode &status);

  /**
   * Suppress a certain string from being the end of a segment.
@ -89,6 +101,17 @@ class U_COMMON_API FilteredBreakIteratorBuilder : public UObject {
   */
  virtual UBool unsuppressBreakAfter(const UnicodeString& string, UErrorCode& status) = 0;

+  /**
+   * This function has been deprecated in favor of wrapIteratorWithFilter()
+   * The behavior is identical.
+   * @param adoptBreakIterator the break iterator to adopt
+   * @param status error code
+   * @return the new BreakIterator, owned by the caller.
+   * @deprecated ICU 60 use wrapIteratorWithFilter() instead
+   * @see wrapBreakIteratorWithFilter()
+   */
+  virtual BreakIterator *build(BreakIterator* adoptBreakIterator, UErrorCode& status) = 0;
+
  /**
   * Wrap (adopt) an existing break iterator in a new filtered instance.
   * The resulting BreakIterator is owned by the caller.
@ -96,12 +119,15 @@ class U_COMMON_API FilteredBreakIteratorBuilder : public UObject {
   * Note that the adoptBreakIterator is adopted by the new BreakIterator
   * and should no longer be used by the caller.
   * The FilteredBreakIteratorBuilder may be reused.
+   * This function is an alias for build()
   * @param adoptBreakIterator the break iterator to adopt
   * @param status error code
   * @return the new BreakIterator, owned by the caller.
-   * @stable ICU 56
+   * @draft ICU 60
   */
-  virtual BreakIterator *build(BreakIterator* adoptBreakIterator, UErrorCode& status) = 0;
+  inline BreakIterator *wrapIteratorWithFilter(BreakIterator* adoptBreakIterator, UErrorCode& status) {
+    return build(adoptBreakIterator, status);
+  }

 protected:
  /**
--- a/icu4c/source/common/unicode/locid.h
+++ b/icu4c/source/common/unicode/locid.h
@ -88,7 +88,7 @@ class UnicodeString;
 * <P>
 * The third constructor requires a third argument--the <STRONG>Variant.</STRONG>
 * The Variant codes are vendor and browser-specific.
- * For example, use REVISED for a langauge's revised script orthography, and POSIX for POSIX.
+ * For example, use REVISED for a language's revised script orthography, and POSIX for POSIX.
 * Where there are two variants, separate them with an underscore, and
 * put the most important one first. For
 * example, a Traditional Spanish collation might be referenced, with
--- a/icu4c/source/common/unicode/normalizer2.h
+++ b/icu4c/source/common/unicode/normalizer2.h
@ -228,14 +228,15 @@ public:
     * (UNORM2_COMPOSE and UNORM2_COMPOSE_CONTIGUOUS).
     * Otherwise currently converts to & from UTF-16 and does not support edits.
     *
-     * @param options   Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT.
+     * @param options   Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
     * @param src       Source UTF-8 string.
     * @param sink      A ByteSink to which the normalized UTF-8 result string is written.
     *                  sink.Flush() is called at the end.
     * @param edits     Records edits for index mapping, working with styled text,
     *                  and getting only changes (if any).
     *                  The Edits contents is undefined if any error occurs.
-     *                  This function calls edits->reset() first. edits can be nullptr.
+     *                  This function calls edits->reset() first unless
+     *                  options includes U_EDITS_NO_RESET. edits can be nullptr.
     * @param errorCode Standard ICU error code. Its input value must
     *                  pass the U_SUCCESS() test, or else the function returns
     *                  immediately. Check for U_FAILURE() on output or use with
@ -534,7 +535,7 @@ public:
    virtual UnicodeString &
    normalize(const UnicodeString &src,
              UnicodeString &dest,
-              UErrorCode &errorCode) const override;
+              UErrorCode &errorCode) const U_OVERRIDE;

    /**
     * Normalizes a UTF-8 string and optionally records how source substrings
@ -545,14 +546,15 @@ public:
     * (UNORM2_COMPOSE and UNORM2_COMPOSE_CONTIGUOUS).
     * Otherwise currently converts to & from UTF-16 and does not support edits.
     *
-     * @param options   Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT.
+     * @param options   Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
     * @param src       Source UTF-8 string.
     * @param sink      A ByteSink to which the normalized UTF-8 result string is written.
     *                  sink.Flush() is called at the end.
     * @param edits     Records edits for index mapping, working with styled text,
     *                  and getting only changes (if any).
     *                  The Edits contents is undefined if any error occurs.
-     *                  This function calls edits->reset() first. edits can be nullptr.
+     *                  This function calls edits->reset() first unless
+     *                  options includes U_EDITS_NO_RESET. edits can be nullptr.
     * @param errorCode Standard ICU error code. Its input value must
     *                  pass the U_SUCCESS() test, or else the function returns
     *                  immediately. Check for U_FAILURE() on output or use with
@ -561,7 +563,7 @@ public:
     */
    virtual void
    normalizeUTF8(uint32_t options, StringPiece src, ByteSink &sink,
-                  Edits *edits, UErrorCode &errorCode) const override;
+                  Edits *edits, UErrorCode &errorCode) const U_OVERRIDE;

    /**
     * Appends the normalized form of the second string to the first string
@ -580,7 +582,7 @@ public:
    virtual UnicodeString &
    normalizeSecondAndAppend(UnicodeString &first,
                             const UnicodeString &second,
-                             UErrorCode &errorCode) const override;
+                             UErrorCode &errorCode) const U_OVERRIDE;
    /**
     * Appends the second string to the first string
     * (merging them at the boundary) and returns the first string.
@ -598,7 +600,7 @@ public:
    virtual UnicodeString &
    append(UnicodeString &first,
           const UnicodeString &second,
-           UErrorCode &errorCode) const override;
+           UErrorCode &errorCode) const U_OVERRIDE;

    /**
     * Gets the decomposition mapping of c.
@ -612,7 +614,7 @@ public:
     * @stable ICU 4.6
     */
    virtual UBool
-    getDecomposition(UChar32 c, UnicodeString &decomposition) const override;
+    getDecomposition(UChar32 c, UnicodeString &decomposition) const U_OVERRIDE;

    /**
     * Gets the raw decomposition mapping of c.
@ -626,7 +628,7 @@ public:
     * @stable ICU 49
     */
    virtual UBool
-    getRawDecomposition(UChar32 c, UnicodeString &decomposition) const override;
+    getRawDecomposition(UChar32 c, UnicodeString &decomposition) const U_OVERRIDE;

    /**
     * Performs pairwise composition of a & b and returns the composite if there is one.
@ -639,7 +641,7 @@ public:
     * @stable ICU 49
     */
    virtual UChar32
-    composePair(UChar32 a, UChar32 b) const override;
+    composePair(UChar32 a, UChar32 b) const U_OVERRIDE;

    /**
     * Gets the combining class of c.
@ -650,7 +652,7 @@ public:
     * @stable ICU 49
     */
    virtual uint8_t
-    getCombiningClass(UChar32 c) const override;
+    getCombiningClass(UChar32 c) const U_OVERRIDE;

    /**
     * Tests if the string is normalized.
@ -664,7 +666,7 @@ public:
     * @stable ICU 4.4
     */
    virtual UBool
-    isNormalized(const UnicodeString &s, UErrorCode &errorCode) const override;
+    isNormalized(const UnicodeString &s, UErrorCode &errorCode) const U_OVERRIDE;
    /**
     * Tests if the UTF-8 string is normalized.
     * Internally, in cases where the quickCheck() method would return "maybe"
@ -687,7 +689,7 @@ public:
     * @draft ICU 60
     */
    virtual UBool
-    isNormalizedUTF8(StringPiece s, UErrorCode &errorCode) const override;
+    isNormalizedUTF8(StringPiece s, UErrorCode &errorCode) const U_OVERRIDE;
    /**
     * Tests if the string is normalized.
     * For details see the Normalizer2 base class documentation.
@ -700,7 +702,7 @@ public:
     * @stable ICU 4.4
     */
    virtual UNormalizationCheckResult
-    quickCheck(const UnicodeString &s, UErrorCode &errorCode) const override;
+    quickCheck(const UnicodeString &s, UErrorCode &errorCode) const U_OVERRIDE;
    /**
     * Returns the end of the normalized substring of the input string.
     * For details see the Normalizer2 base class documentation.
@ -713,7 +715,7 @@ public:
     * @stable ICU 4.4
     */
    virtual int32_t
-    spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const override;
+    spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const U_OVERRIDE;

    /**
     * Tests if the character always has a normalization boundary before it,
@ -723,7 +725,7 @@ public:
     * @return TRUE if c has a normalization boundary before it
     * @stable ICU 4.4
     */
-    virtual UBool hasBoundaryBefore(UChar32 c) const override;
+    virtual UBool hasBoundaryBefore(UChar32 c) const U_OVERRIDE;

    /**
     * Tests if the character always has a normalization boundary after it,
@ -733,7 +735,7 @@ public:
     * @return TRUE if c has a normalization boundary after it
     * @stable ICU 4.4
     */
-    virtual UBool hasBoundaryAfter(UChar32 c) const override;
+    virtual UBool hasBoundaryAfter(UChar32 c) const U_OVERRIDE;

    /**
     * Tests if the character is normalization-inert.
@ -742,7 +744,7 @@ public:
     * @return TRUE if c is normalization-inert
     * @stable ICU 4.4
     */
-    virtual UBool isInert(UChar32 c) const override;
+    virtual UBool isInert(UChar32 c) const U_OVERRIDE;
 private:
    UnicodeString &
    normalize(const UnicodeString &src,
--- a/icu4c/source/common/unicode/platform.h
+++ b/icu4c/source/common/unicode/platform.h
@ -830,6 +830,16 @@ namespace std {
 #    define U_CALLCONV U_EXPORT2
 #endif

+/**
+ * \def U_CALLCONV_FPTR
+ * Similar to U_CALLCONV, but only used on function pointers.
+ * @internal
+ */
+#if U_PLATFORM == U_PF_OS390 && defined(__cplusplus)
+#    define U_CALLCONV_FPTR U_CALLCONV
+#else
+#    define U_CALLCONV_FPTR
+#endif
 /* @} */

 #endif
--- a/icu4c/source/common/unicode/rbbi.h
+++ b/icu4c/source/common/unicode/rbbi.h
@ -31,21 +31,14 @@
 #include "unicode/schriter.h"
 #include "unicode/uchriter.h"

-
 U_NAMESPACE_BEGIN

 /** @internal */
-struct RBBIDataHeader;
-class  RuleBasedBreakIteratorTables;
-class  BreakIterator;
-class  RBBIDataWrapper;
-class  UStack;
 class  LanguageBreakEngine;
+struct RBBIDataHeader;
+class  RBBIDataWrapper;
 class  UnhandledEngine;
-struct RBBIStateTable;
-
-
-
+class  UStack;

 /**
 *
@ -94,19 +87,36 @@ private:
     */
    RBBIDataWrapper    *fData;

-    /** Index of the Rule {tag} values for the most recent match.
+    /** 
+     *  The iteration state - current position, rule status for the current position,
+     *                        and whether the iterator ran off the end, yielding UBRK_DONE.
+     *                        Current position is pinned to be 0 < position <= text.length.
+     *                        Current position is always set to a boundary.
     *  @internal
    */
-    int32_t             fLastRuleStatusIndex;
+    /**
+      * The current  position of the iterator. Pinned, 0 < fPosition <= text.length.
+      * Never has the value UBRK_DONE (-1).
+      */
+    int32_t         fPosition;

    /**
-     * Rule tag value valid flag.
-     * Some iterator operations don't intrinsically set the correct tag value.
-     * This flag lets us lazily compute the value if we are ever asked for it.
-     * @internal
-     */
-    UBool               fLastStatusIndexValid;
+      * TODO:
+      */
+    int32_t         fRuleStatusIndex;

+    /**
+      * True when iteration has run off the end, and iterator functions should return UBRK_DONE.
+      */
+    UBool           fDone;
+
+    /**
+     *   Cache of previously determined boundary positions.
+     */
+  public:    // TODO: debug, return to private.
+    class BreakCache;
+    BreakCache         *fBreakCache;
+  private:
    /**
     * Counter for the number of characters encountered with the "dictionary"
     *   flag set.
@ -115,26 +125,11 @@ private:
    uint32_t            fDictionaryCharCount;

    /**
-     * When a range of characters is divided up using the dictionary, the break
-     * positions that are discovered are stored here, preventing us from having
-     * to use either the dictionary or the state table again until the iterator
-     * leaves this range of text. Has the most impact for line breaking.
-     * @internal
+     *  Cache of boundary positions within a region of text that has been
+     *  sub-divided by dictionary based breaking.
     */
-    int32_t*            fCachedBreakPositions;
-
-    /**
-     * The number of elements in fCachedBreakPositions
-     * @internal
-     */
-    int32_t             fNumCachedBreakPositions;
-
-    /**
-     * if fCachedBreakPositions is not null, this indicates which item in the
-     * cache the current iteration position refers to
-     * @internal
-     */
-    int32_t             fPositionInCache;
+    class DictionaryCache;
+    DictionaryCache *fDictionaryCache;

    /**
     *
@ -177,13 +172,11 @@ private:
     */
    RuleBasedBreakIterator(RBBIDataHeader* data, UErrorCode &status);

-
+    /** @internal */
    friend class RBBIRuleBuilder;
    /** @internal */
    friend class BreakIterator;

-
-
 public:

    /** Default constructor.  Creates an empty shell of an iterator, with no
@ -467,7 +460,10 @@ public:
    virtual UBool isBoundary(int32_t offset);

    /**
-     * Returns the current iteration position.
+     * Returns the current iteration position. Note that UBRK_DONE is never
+     * returned from this function; if iteration has run to the end of a
+     * string, current() will return the length of the string while
+     * next() will return UBRK_DONE).
     * @return The current iteration position.
     * @stable ICU 2.0
     */
@ -499,6 +495,7 @@ public:
     * Note: this function is not thread safe.  It should not have been
     *       declared const, and the const remains only for compatibility
     *       reasons.  (The function is logically const, but not bit-wise const).
+     *   TODO: check this. Probably thread safe now.
     * <p>
     * @return the status from the break rule that determined the most recently
     * returned break position.
@ -658,46 +655,31 @@ private:
      * Common initialization function, used by constructors and bufferClone.
      * @internal
      */
-    void init();
+    void init(UErrorCode &status);

    /**
-     * This method backs the iterator back up to a "safe position" in the text.
-     * This is a position that we know, without any context, must be a break position.
-     * The various calling methods then iterate forward from this safe position to
-     * the appropriate position to return.  (For more information, see the description
-     * of buildBackwardsStateTable() in RuleBasedBreakIterator.Builder.)
-     * @param statetable state table used of moving backwards
+     * Iterate backwards from an arbitrary position in the input text using the Safe Reverse rules.
+     * This locates a "Safe Position" from which the forward break rules
+     * will operate correctly. A Safe Position is not necessarily a boundary itself.
+     *
+     * @param fromPosition the position in the input text to begin the iteration.
     * @internal
     */
-    int32_t handlePrevious(const RBBIStateTable *statetable);
+    int32_t handlePrevious(int32_t fromPosition);

    /**
-     * This method is the actual implementation of the next() method.  All iteration
-     * vectors through here.  This method initializes the state machine to state 1
-     * and advances through the text character by character until we reach the end
-     * of the text or the state machine transitions to state 0.  We update our return
-     * value every time the state machine passes through a possible end state.
-     * @param statetable state table used of moving forwards
+     * Find a rule-based boundary by running the state machine.
+     * Input
+     *    fPosition, the position in the text to begin from.
+     * Output
+     *    fPosition:           the boundary following the starting position.
+     *    fDictionaryCharCount the number of dictionary characters encountered.
+     *                         If > 0, the segment will be further subdivided
+     *    fRuleStatusIndex     Info from the state table indicating which rules caused the boundary.
+     *
     * @internal
     */
-    int32_t handleNext(const RBBIStateTable *statetable);
-
-
-    /**
-     * This is the function that actually implements dictionary-based
-     * breaking.  Covering at least the range from startPos to endPos,
-     * it checks for dictionary characters, and if it finds them determines
-     * the appropriate object to deal with them. It may cache found breaks in
-     * fCachedBreakPositions as it goes. It may well also look at text outside
-     * the range startPos to endPos.
-     * If going forward, endPos is the normal Unicode break result, and
-     * if goind in reverse, startPos is the normal Unicode break result
-     * @param startPos  The start position of a range of text
-     * @param endPos    The end position of a range of text
-     * @param reverse   The call is for the reverse direction
-     * @internal
-     */
-    int32_t checkDictionary(int32_t startPos, int32_t endPos, UBool reverse);
+    int32_t handleNext();


    /**
@ -708,11 +690,12 @@ private:
     */
    const LanguageBreakEngine *getLanguageBreakEngine(UChar32 c);

+  public:
    /**
-     *  @internal
+     *   Debugging function only.
+     *   @internal
     */
-    void makeRuleStatusValid();
-
+     void dumpCache();
 };

 //------------------------------------------------------------------------------
--- a/icu4c/source/common/unicode/stringoptions.h
+++ b/icu4c/source/common/unicode/stringoptions.h
@ -134,6 +134,17 @@
 */
 #define U_TITLECASE_ADJUST_TO_CASED 0x400

+/**
+ * Option for string transformation functions to not first reset the Edits object.
+ * Used for example in some case-mapping and normalization functions.
+ *
+ * @see CaseMap
+ * @see Edits
+ * @see Normalizer2
+ * @draft ICU 60
+ */
+#define U_EDITS_NO_RESET 0x2000
+
 /**
 * Omit unchanged text when recording how source substrings
 * relate to changed and unchanged result substrings.
@ -182,7 +193,6 @@
 // ucasemap_imp.h #define U_TITLECASE_ITERATOR_MASK 0xe0
 // ucasemap_imp.h #define U_TITLECASE_ADJUSTMENT_MASK 0x600
 // ustr_imp.h #define _STRNCMP_STYLE 0x1000
-// ustr_imp.h #define U_EDITS_NO_RESET 0x2000
 // unormcmp.cpp #define _COMPARE_EQUIV 0x80000

 #endif  // __STRINGOPTIONS_H__
--- a/icu4c/source/common/unicode/ubrk.h
+++ b/icu4c/source/common/unicode/ubrk.h
@ -230,7 +230,8 @@ typedef enum USentenceBreakTag {
 * @param locale The locale specifying the text-breaking conventions. Note that
 * locale keys such as "lb" and "ss" may be used to modify text break behavior,
 * see general discussion of BreakIterator C API.
- * @param text The text to be iterated over.
+ * @param text The text to be iterated over. May be null, in which case ubrk_setText() is
+ *        used to specify the text to be iterated.
 * @param textLength The number of characters in text, or -1 if null-terminated.
 * @param status A UErrorCode to receive any errors.
 * @return A UBreakIterator for the specified locale.
--- a/icu4c/source/common/unicode/uclean.h
+++ b/icu4c/source/common/unicode/uclean.h
@ -149,7 +149,7 @@ typedef void  U_CALLCONV UMemFreeFn (const void *context, void *mem);
 *  @system
 */  
 U_STABLE void U_EXPORT2 
-u_setMemoryFunctions(const void *context, UMemAllocFn * U_CALLCONV a, UMemReallocFn * U_CALLCONV r, UMemFreeFn * U_CALLCONV f, 
+u_setMemoryFunctions(const void *context, UMemAllocFn * U_CALLCONV_FPTR a, UMemReallocFn * U_CALLCONV_FPTR r, UMemFreeFn * U_CALLCONV_FPTR f, 
                    UErrorCode *status);

 U_CDECL_END
--- a/icu4c/source/common/unicode/utext.h
+++ b/icu4c/source/common/unicode/utext.h
@ -768,7 +768,7 @@ utext_extract(UText *ut,
  */
 #define UTEXT_SETNATIVEINDEX(ut, ix)                       \
    { int64_t __offset = (ix) - (ut)->chunkNativeStart; \
-      if (__offset>=0 && __offset<=(int64_t)(ut)->nativeIndexingLimit) { \
+      if (__offset>=0 && __offset<(int64_t)(ut)->nativeIndexingLimit && (ut)->chunkContents[__offset]<0xdc00) { \
          (ut)->chunkOffset=(int32_t)__offset; \
      } else { \
          utext_setNativeIndex((ut), (ix)); } }
--- a/icu4c/source/common/unicode/utf.h
+++ b/icu4c/source/common/unicode/utf.h
@ -23,9 +23,6 @@
 * This file defines macros for checking whether a code point is
 * a surrogate or a non-character etc.
 *
- * The UChar and UChar32 data types for Unicode code units and code points
- * are defined in umachine.h because they can be machine-dependent.
- *
 * If U_NO_DEFAULT_INCLUDE_UTF_HEADERS is 0 then utf.h is included by utypes.h
 * and itself includes utf8.h and utf16.h after some
 * common definitions.
@ -50,11 +47,11 @@
 * but are optimized for the much more frequently occurring BMP code points.
 *
 * umachine.h defines UChar to be an unsigned 16-bit integer.
- * Where available, UChar is defined to be a char16_t
- * or a wchar_t (if that is an unsigned 16-bit type), otherwise uint16_t.
+ * Since ICU 59, ICU uses char16_t in C++, UChar only in C,
+ * and defines UChar=char16_t by default. See the UChar API docs for details.
 *
 * UChar32 is defined to be a signed 32-bit integer (int32_t), large enough for a 21-bit
- * Unicode code point (Unicode scalar value, 0..0x10ffff).
+ * Unicode code point (Unicode scalar value, 0..0x10ffff) and U_SENTINEL (-1).
 * Before ICU 2.4, the definition of UChar32 was similarly platform-dependent as
 * the definition of UChar. For details see the documentation for UChar32 itself.
 *
@ -63,11 +60,20 @@
 * For actual Unicode character properties see uchar.h.
 *
 * By default, string operations must be done with error checking in case
- * a string is not well-formed UTF-16.
- * The macros will detect if a surrogate code unit is unpaired
+ * a string is not well-formed UTF-16 or UTF-8.
+ *
+ * The U16_ macros detect if a surrogate code unit is unpaired
 * (lead unit without trail unit or vice versa) and just return the unit itself
 * as the code point.
 *
+ * The U8_ macros detect illegal byte sequences and return a negative value.
+ * Starting with ICU 60, the observable length of a single illegal byte sequence
+ * skipped by one of these macros follows the Unicode 6+ recommendation
+ * which is consistent with the W3C Encoding Standard.
+ *
+ * There are ..._OR_FFFD versions of both U16_ and U8_ macros
+ * that return U+FFFD for illegal code unit sequences.
+ *
 * The regular "safe" macros require that the initial, passed-in string index
 * is within bounds. They only check the index when they read more than one
 * code unit. This is usually done with code similar to the following loop:
@ -91,10 +97,7 @@
 * The performance differences are much larger here because UTF-8 provides so
 * many opportunities for malformed sequences.
 * The unsafe UTF-8 macros are entirely implemented inside the macro definitions
- * and are fast, while the safe UTF-8 macros call functions for all but the
- * trivial (ASCII) cases.
- * (ICU 3.6 optimizes U8_NEXT() and U8_APPEND() to handle most other common
- * characters inline as well.)
+ * and are fast, while the safe UTF-8 macros call functions for some complicated cases.
 *
 * Unlike with UTF-16, malformed sequences cannot be expressed with distinct
 * code point values (0..U+10ffff). They are indicated with negative values instead.
@ -126,8 +129,7 @@
 */
 #define U_IS_UNICODE_NONCHAR(c) \
    ((c)>=0xfdd0 && \
-     ((uint32_t)(c)<=0xfdef || ((c)&0xfffe)==0xfffe) && \
-     (uint32_t)(c)<=0x10ffff)
+     ((c)<=0xfdef || ((c)&0xfffe)==0xfffe) && (c)<=0x10ffff)

 /**
 * Is c a Unicode code point value (0..U+10ffff)
@ -148,9 +150,7 @@
 */
 #define U_IS_UNICODE_CHAR(c) \
    ((uint32_t)(c)<0xd800 || \
-        ((uint32_t)(c)>0xdfff && \
-         (uint32_t)(c)<=0x10ffff && \
-         !U_IS_UNICODE_NONCHAR(c)))
+        (0xdfff<(c) && (c)<=0x10ffff && !U_IS_UNICODE_NONCHAR(c)))

 /**
 * Is this code point a BMP code point (U+0000..U+ffff)?
--- a/icu4c/source/common/unicode/utf16.h
+++ b/icu4c/source/common/unicode/utf16.h
@ -185,8 +185,8 @@
 *
 * The length can be negative for a NUL-terminated string.
 *
- * If the offset points to a single, unpaired surrogate, then that itself
- * will be returned as the code point.
+ * If the offset points to a single, unpaired surrogate, then
+ * c is set to that unpaired surrogate.
 * Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT.
 *
 * @param s const UChar * string
@ -213,6 +213,53 @@
    } \
 }

+#ifndef U_HIDE_DRAFT_API
+
+/**
+ * Get a code point from a string at a random-access offset,
+ * without changing the offset.
+ * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
+ *
+ * The offset may point to either the lead or trail surrogate unit
+ * for a supplementary code point, in which case the macro will read
+ * the adjacent matching surrogate as well.
+ *
+ * The length can be negative for a NUL-terminated string.
+ *
+ * If the offset points to a single, unpaired surrogate, then
+ * c is set to U+FFFD.
+ * Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT_OR_FFFD.
+ *
+ * @param s const UChar * string
+ * @param start starting string offset (usually 0)
+ * @param i string offset, must be start<=i<length
+ * @param length string length
+ * @param c output UChar32 variable
+ * @see U16_GET_UNSAFE
+ * @draft ICU 60
+ */
+#define U16_GET_OR_FFFD(s, start, i, length, c) { \
+    (c)=(s)[i]; \
+    if(U16_IS_SURROGATE(c)) { \
+        uint16_t __c2; \
+        if(U16_IS_SURROGATE_LEAD(c)) { \
+            if((i)+1!=(length) && U16_IS_TRAIL(__c2=(s)[(i)+1])) { \
+                (c)=U16_GET_SUPPLEMENTARY((c), __c2); \
+            } else { \
+                (c)=0xfffd; \
+            } \
+        } else { \
+            if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
+                (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
+            } else { \
+                (c)=0xfffd; \
+            } \
+        } \
+    } \
+}
+
+#endif  // U_HIDE_DRAFT_API
+
 /* definitions with forward iteration --------------------------------------- */

 /**
@ -253,8 +300,7 @@
 * for a supplementary code point, in which case the macro will read
 * the following trail surrogate as well.
 * If the offset points to a trail surrogate or
- * to a single, unpaired lead surrogate, then that itself
- * will be returned as the code point.
+ * to a single, unpaired lead surrogate, then c is set to that unpaired surrogate.
 *
 * @param s const UChar * string
 * @param i string offset, must be i<length
@ -274,6 +320,44 @@
    } \
 }

+#ifndef U_HIDE_DRAFT_API
+
+/**
+ * Get a code point from a string at a code point boundary offset,
+ * and advance the offset to the next code point boundary.
+ * (Post-incrementing forward iteration.)
+ * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
+ *
+ * The length can be negative for a NUL-terminated string.
+ *
+ * The offset may point to the lead surrogate unit
+ * for a supplementary code point, in which case the macro will read
+ * the following trail surrogate as well.
+ * If the offset points to a trail surrogate or
+ * to a single, unpaired lead surrogate, then c is set to U+FFFD.
+ *
+ * @param s const UChar * string
+ * @param i string offset, must be i<length
+ * @param length string length
+ * @param c output UChar32 variable
+ * @see U16_NEXT_UNSAFE
+ * @draft ICU 60
+ */
+#define U16_NEXT_OR_FFFD(s, i, length, c) { \
+    (c)=(s)[(i)++]; \
+    if(U16_IS_SURROGATE(c)) { \
+        uint16_t __c2; \
+        if(U16_IS_SURROGATE_LEAD(c) && (i)!=(length) && U16_IS_TRAIL(__c2=(s)[(i)])) { \
+            ++(i); \
+            (c)=U16_GET_SUPPLEMENTARY((c), __c2); \
+        } else { \
+            (c)=0xfffd; \
+        } \
+    } \
+}
+
+#endif  // U_HIDE_DRAFT_API
+
 /**
 * Append a code point to a string, overwriting 1 or 2 code units.
 * The offset points to the current end of the string contents
@ -481,8 +565,7 @@
 * for a supplementary code point, then the macro will read
 * the preceding lead surrogate as well.
 * If the offset is behind a lead surrogate or behind a single, unpaired
- * trail surrogate, then that itself
- * will be returned as the code point.
+ * trail surrogate, then c is set to that unpaired surrogate.
 *
 * @param s const UChar * string
 * @param start starting string offset (usually 0)
@ -502,6 +585,43 @@
    } \
 }

+#ifndef U_HIDE_DRAFT_API
+
+/**
+ * Move the string offset from one code point boundary to the previous one
+ * and get the code point between them.
+ * (Pre-decrementing backward iteration.)
+ * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
+ *
+ * The input offset may be the same as the string length.
+ * If the offset is behind a trail surrogate unit
+ * for a supplementary code point, then the macro will read
+ * the preceding lead surrogate as well.
+ * If the offset is behind a lead surrogate or behind a single, unpaired
+ * trail surrogate, then c is set to U+FFFD.
+ *
+ * @param s const UChar * string
+ * @param start starting string offset (usually 0)
+ * @param i string offset, must be start<i
+ * @param c output UChar32 variable
+ * @see U16_PREV_UNSAFE
+ * @draft ICU 60
+ */
+#define U16_PREV_OR_FFFD(s, start, i, c) { \
+    (c)=(s)[--(i)]; \
+    if(U16_IS_SURROGATE(c)) { \
+        uint16_t __c2; \
+        if(U16_IS_SURROGATE_TRAIL(c) && (i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
+            --(i); \
+            (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
+        } else { \
+            (c)=0xfffd; \
+        } \
+    } \
+}
+
+#endif  // U_HIDE_DRAFT_API
+
 /**
 * Move the string offset from one code point boundary to the previous one.
 * (Pre-decrementing backward iteration.)
--- a/icu4c/source/common/unicode/utf8.h
+++ b/icu4c/source/common/unicode/utf8.h
@ -41,34 +41,24 @@

 /* internal definitions ----------------------------------------------------- */

-
-
 /**
 * Counts the trail bytes for a UTF-8 lead byte.
- * Returns 0 for 0..0xbf as well as for 0xfe and 0xff.
+ * Returns 0 for 0..0xc1 as well as for 0xf5..0xff.
+ * leadByte might be evaluated multiple times.
 *
 * This is internal since it is not meant to be called directly by external clients;
 * however it is called by public macros in this file and thus must remain stable.
 *
- * Note: Beginning with ICU 50, the implementation uses a multi-condition expression
- * which was shown in 2012 (on x86-64) to compile to fast, branch-free code.
- * leadByte is evaluated multiple times.
- *
- * The pre-ICU 50 implementation used the exported array utf8_countTrailBytes:
- * #define U8_COUNT_TRAIL_BYTES(leadByte) (utf8_countTrailBytes[leadByte])
- * leadByte was evaluated exactly once.
- *
 * @param leadByte The first byte of a UTF-8 sequence. Must be 0..0xff.
 * @internal
 */
 #define U8_COUNT_TRAIL_BYTES(leadByte) \
-    ((uint8_t)(leadByte)<0xf0 ? \
-        ((uint8_t)(leadByte)>=0xc0)+((uint8_t)(leadByte)>=0xe0) : \
-        (uint8_t)(leadByte)<0xfe ? 3+((uint8_t)(leadByte)>=0xf8)+((uint8_t)(leadByte)>=0xfc) : 0)
+    (U8_IS_LEAD(leadByte) ? \
+        ((uint8_t)(leadByte)>=0xe0)+((uint8_t)(leadByte)>=0xf0)+1 : 0)

 /**
 * Counts the trail bytes for a UTF-8 lead byte of a valid UTF-8 sequence.
- * The maximum supported lead byte is 0xf4 corresponding to U+10FFFF.
+ * Returns 0 for 0..0xc1. Undefined for 0xf5..0xff.
 * leadByte might be evaluated multiple times.
 *
 * This is internal since it is not meant to be called directly by external clients;
@ -78,7 +68,7 @@
 * @internal
 */
 #define U8_COUNT_TRAIL_BYTES_UNSAFE(leadByte) \
-    (((leadByte)>=0xc0)+((leadByte)>=0xe0)+((leadByte)>=0xf0))
+    (((uint8_t)(leadByte)>=0xc2)+((uint8_t)(leadByte)>=0xe0)+((uint8_t)(leadByte)>=0xf0))

 /**
 * Mask a UTF-8 lead byte, leave only the lower bits that form part of the code point value.
@ -89,6 +79,40 @@
 */
 #define U8_MASK_LEAD_BYTE(leadByte, countTrailBytes) ((leadByte)&=(1<<(6-(countTrailBytes)))-1)

+/**
+ * Internal bit vector for 3-byte UTF-8 validity check, for use in U8_IS_VALID_LEAD3_AND_T1.
+ * Each bit indicates whether one lead byte + first trail byte pair starts a valid sequence.
+ * Lead byte E0..EF bits 3..0 are used as byte index,
+ * first trail byte bits 7..5 are used as bit index into that byte.
+ * @see U8_IS_VALID_LEAD3_AND_T1
+ * @internal
+ */
+#define U8_LEAD3_T1_BITS "\x20\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x10\x30\x30"
+
+/**
+ * Internal 3-byte UTF-8 validity check.
+ * Non-zero if lead byte E0..EF and first trail byte 00..FF start a valid sequence.
+ * @internal
+ */
+#define U8_IS_VALID_LEAD3_AND_T1(lead, t1) (U8_LEAD3_T1_BITS[(lead)&0xf]&(1<<((uint8_t)(t1)>>5)))
+
+/**
+ * Internal bit vector for 4-byte UTF-8 validity check, for use in U8_IS_VALID_LEAD4_AND_T1.
+ * Each bit indicates whether one lead byte + first trail byte pair starts a valid sequence.
+ * First trail byte bits 7..4 are used as byte index,
+ * lead byte F0..F4 bits 2..0 are used as bit index into that byte.
+ * @see U8_IS_VALID_LEAD4_AND_T1
+ * @internal
+ */
+#define U8_LEAD4_T1_BITS "\x00\x00\x00\x00\x00\x00\x00\x00\x1E\x0F\x0F\x0F\x00\x00\x00\x00"
+
+/**
+ * Internal 4-byte UTF-8 validity check.
+ * Non-zero if lead byte F0..F4 and first trail byte 00..FF start a valid sequence.
+ * @internal
+ */
+#define U8_IS_VALID_LEAD4_AND_T1(lead, t1) (U8_LEAD4_T1_BITS[(uint8_t)(t1)>>4]&(1<<((lead)&7)))
+
 /**
 * Function for handling "next code point" with error-checking.
 *
@ -148,20 +172,21 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i);
 #define U8_IS_SINGLE(c) (((c)&0x80)==0)

 /**
- * Is this code unit (byte) a UTF-8 lead byte?
+ * Is this code unit (byte) a UTF-8 lead byte? (0xC2..0xF4)
 * @param c 8-bit code unit (byte)
 * @return TRUE or FALSE
 * @stable ICU 2.4
 */
-#define U8_IS_LEAD(c) ((uint8_t)((c)-0xc0)<0x3e)
+#define U8_IS_LEAD(c) ((uint8_t)((c)-0xc2)<=0x32)
+// 0x32=0xf4-0xc2

 /**
- * Is this code unit (byte) a UTF-8 trail byte?
+ * Is this code unit (byte) a UTF-8 trail byte? (0x80..0xBF)
 * @param c 8-bit code unit (byte)
 * @return TRUE or FALSE
 * @stable ICU 2.4
 */
-#define U8_IS_TRAIL(c) (((c)&0xc0)==0x80)
+#define U8_IS_TRAIL(c) ((int8_t)(c)<-0x40)

 /**
 * How many code units (bytes) are used for the UTF-8 encoding
@ -289,7 +314,7 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i);
 */
 #define U8_NEXT_UNSAFE(s, i, c) { \
    (c)=(uint8_t)(s)[(i)++]; \
-    if((c)>=0x80) { \
+    if(!U8_IS_SINGLE(c)) { \
        if((c)<0xe0) { \
            (c)=(((c)&0x1f)<<6)|((s)[(i)++]&0x3f); \
        } else if((c)<0xf0) { \
@ -325,22 +350,19 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i);
 */
 #define U8_NEXT(s, i, length, c) { \
    (c)=(uint8_t)(s)[(i)++]; \
-    if((c)>=0x80) { \
+    if(!U8_IS_SINGLE(c)) { \
        uint8_t __t1, __t2; \
-        if( /* handle U+1000..U+CFFF inline */ \
-            (0xe0<(c) && (c)<=0xec) && \
-            (((i)+1)<(length) || (length)<0) && \
-            (__t1=(uint8_t)((s)[i]-0x80))<=0x3f && \
-            (__t2=(uint8_t)((s)[(i)+1]-0x80))<= 0x3f \
-        ) { \
-            /* no need for (c&0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */ \
-            (c)=(UChar)(((c)<<12)|(__t1<<6)|__t2); \
+        if( /* handle U+0800..U+FFFF inline */ \
+                (0xe0<=(c) && (c)<0xf0) && \
+                (((i)+1)<(length) || (length)<0) && \
+                U8_IS_VALID_LEAD3_AND_T1((c), __t1=(s)[i]) && \
+                (__t2=(s)[(i)+1]-0x80)<=0x3f) { \
+            (c)=(((c)&0xf)<<12)|((__t1&0x3f)<<6)|__t2; \
            (i)+=2; \
        } else if( /* handle U+0080..U+07FF inline */ \
-            ((c)<0xe0 && (c)>=0xc2) && \
-            ((i)!=(length)) && \
-            (__t1=(uint8_t)((s)[i]-0x80))<=0x3f \
-        ) { \
+                ((c)<0xe0 && (c)>=0xc2) && \
+                ((i)!=(length)) && \
+                (__t1=(s)[i]-0x80)<=0x3f) { \
            (c)=(((c)&0x1f)<<6)|__t1; \
            ++(i); \
        } else { \
@ -376,22 +398,19 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i);
 */
 #define U8_NEXT_OR_FFFD(s, i, length, c) { \
    (c)=(uint8_t)(s)[(i)++]; \
-    if((c)>=0x80) { \
+    if(!U8_IS_SINGLE(c)) { \
        uint8_t __t1, __t2; \
-        if( /* handle U+1000..U+CFFF inline */ \
-            (0xe0<(c) && (c)<=0xec) && \
-            (((i)+1)<(length) || (length)<0) && \
-            (__t1=(uint8_t)((s)[i]-0x80))<=0x3f && \
-            (__t2=(uint8_t)((s)[(i)+1]-0x80))<= 0x3f \
-        ) { \
-            /* no need for (c&0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */ \
-            (c)=(UChar)(((c)<<12)|(__t1<<6)|__t2); \
+        if( /* handle U+0800..U+FFFF inline */ \
+                (0xe0<=(c) && (c)<0xf0) && \
+                (((i)+1)<(length) || (length)<0) && \
+                U8_IS_VALID_LEAD3_AND_T1((c), __t1=(s)[i]) && \
+                (__t2=(s)[(i)+1]-0x80)<=0x3f) { \
+            (c)=(((c)&0xf)<<12)|((__t1&0x3f)<<6)|__t2; \
            (i)+=2; \
        } else if( /* handle U+0080..U+07FF inline */ \
-            ((c)<0xe0 && (c)>=0xc2) && \
-            ((i)!=(length)) && \
-            (__t1=(uint8_t)((s)[i]-0x80))<=0x3f \
-        ) { \
+                ((c)<0xe0 && (c)>=0xc2) && \
+                ((i)!=(length)) && \
+                (__t1=(s)[i]-0x80)<=0x3f) { \
            (c)=(((c)&0x1f)<<6)|__t1; \
            ++(i); \
        } else { \
@ -476,7 +495,7 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i);
 * @stable ICU 2.4
 */
 #define U8_FWD_1_UNSAFE(s, i) { \
-    (i)+=1+U8_COUNT_TRAIL_BYTES_UNSAFE((uint8_t)(s)[i]); \
+    (i)+=1+U8_COUNT_TRAIL_BYTES_UNSAFE((s)[i]); \
 }

 /**
@ -493,15 +512,24 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i);
 * @stable ICU 2.4
 */
 #define U8_FWD_1(s, i, length) { \
-    uint8_t __b=(uint8_t)(s)[(i)++]; \
-    if(U8_IS_LEAD(__b)) { \
-        uint8_t __count=U8_COUNT_TRAIL_BYTES(__b); \
-        if((i)+__count>(length) && (length)>=0) { \
-            __count=(uint8_t)((length)-(i)); \
-        } \
-        while(__count>0 && U8_IS_TRAIL((s)[i])) { \
-            ++(i); \
-            --__count; \
+    uint8_t __b=(s)[(i)++]; \
+    if(U8_IS_LEAD(__b) && (i)!=(length)) { \
+        uint8_t __t1=(s)[i]; \
+        if((0xe0<=__b && __b<0xf0)) { \
+            if(U8_IS_VALID_LEAD3_AND_T1(__b, __t1) && \
+                    ++(i)!=(length) && U8_IS_TRAIL((s)[i])) { \
+                ++(i); \
+            } \
+        } else if(__b<0xe0) { \
+            if(U8_IS_TRAIL(__t1)) { \
+                ++(i); \
+            } \
+        } else /* c>=0xf0 */ { \
+            if(U8_IS_VALID_LEAD4_AND_T1(__b, __t1) && \
+                    ++(i)!=(length) && U8_IS_TRAIL((s)[i]) && \
+                    ++(i)!=(length) && U8_IS_TRAIL((s)[i])) { \
+                ++(i); \
+            } \
        } \
    } \
 }
@ -615,7 +643,7 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i);
        /* c is a trail byte */ \
        (c)&=0x3f; \
        for(;;) { \
-            __b=(uint8_t)(s)[--(i)]; \
+            __b=(s)[--(i)]; \
            if(__b>=0xc0) { \
                U8_MASK_LEAD_BYTE(__b, __count); \
                (c)|=(UChar32)__b<<__shift; \
@ -651,7 +679,7 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i);
 */
 #define U8_PREV(s, start, i, c) { \
    (c)=(uint8_t)(s)[--(i)]; \
-    if((c)>=0x80) { \
+    if(!U8_IS_SINGLE(c)) { \
        (c)=utf8_prevCharSafeBody((const uint8_t *)s, start, &(i), c, -1); \
    } \
 }
@ -682,7 +710,7 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i);
 */
 #define U8_PREV_OR_FFFD(s, start, i, c) { \
    (c)=(uint8_t)(s)[--(i)]; \
-    if((c)>=0x80) { \
+    if(!U8_IS_SINGLE(c)) { \
        (c)=utf8_prevCharSafeBody((const uint8_t *)s, start, &(i), c, -3); \
    } \
 }
--- a/icu4c/source/common/unicode/utf_old.h
+++ b/icu4c/source/common/unicode/utf_old.h
@ -145,7 +145,22 @@
 #ifndef __UTF_OLD_H__
 #define __UTF_OLD_H__

-#ifndef U_HIDE_DEPRECATED_API
+/**
+ * \def U_HIDE_OBSOLETE_UTF_OLD_H
+ *
+ * Hides the obsolete definitions in unicode/utf_old.h.
+ * Recommended to be set to 1 at compile time to make sure
+ * the long-deprecated macros are no longer used.
+ *
+ * For reasons for the deprecation see the utf_old.h file comments.
+ *
+ * @internal
+ */
+#ifndef U_HIDE_OBSOLETE_UTF_OLD_H
+#   define U_HIDE_OBSOLETE_UTF_OLD_H 0
+#endif
+
+#if !defined(U_HIDE_DEPRECATED_API) && !U_HIDE_OBSOLETE_UTF_OLD_H

 #include "unicode/utf.h"
 #include "unicode/utf8.h"
@ -1184,7 +1199,6 @@ U_CFUNC U_IMPORT const uint8_t utf8_countTrailBytes[];    /* U_IMPORT2? */ /*U_I
 */
 #define UTF_SET_CHAR_LIMIT(s, start, i, length) U16_SET_CP_LIMIT(s, start, i, length)

-#endif /* U_HIDE_DEPRECATED_API */
+#endif  // !U_HIDE_DEPRECATED_API && !U_HIDE_OBSOLETE_UTF_OLD_H

 #endif
-
--- a/icu4c/source/common/unisetspan.cpp
+++ b/icu4c/source/common/unisetspan.cpp
@ -502,7 +502,7 @@ spanOneBack(const UnicodeSet &set, const UChar *s, int32_t length) {
 static inline int32_t
 spanOneUTF8(const UnicodeSet &set, const uint8_t *s, int32_t length) {
    UChar32 c=*s;
-    if((int8_t)c>=0) {
+    if(U8_IS_SINGLE(c)) {
        return set.contains(c) ? 1 : -1;
    }
    // Take advantage of non-ASCII fastpaths in U8_NEXT_OR_FFFD().
@ -514,7 +514,7 @@ spanOneUTF8(const UnicodeSet &set, const uint8_t *s, int32_t length) {
 static inline int32_t
 spanOneBackUTF8(const UnicodeSet &set, const uint8_t *s, int32_t length) {
    UChar32 c=s[length-1];
-    if((int8_t)c>=0) {
+    if(U8_IS_SINGLE(c)) {
        return set.contains(c) ? 1 : -1;
    }
    int32_t i=length-1;
@ -1006,11 +1006,9 @@ int32_t UnicodeSetStringSpan::spanUTF8(const uint8_t *s, int32_t length, USetSpa
                    // Try to match if the increment is not listed already.
                    // Match at code point boundaries. (The UTF-8 strings were converted
                    // from UTF-16 and are guaranteed to be well-formed.)
-                    if( !U8_IS_TRAIL(s[pos-overlap]) &&
-                        !offsets.containsOffset(inc) &&
-                        matches8(s+pos-overlap, s8, length8)
-                        
-                    ) {
+                    if(!U8_IS_TRAIL(s[pos-overlap]) &&
+                            !offsets.containsOffset(inc) &&
+                            matches8(s+pos-overlap, s8, length8)) {
                        if(inc==rest) {
                            return length;  // Reached the end of the string.
                        }
@ -1052,11 +1050,10 @@ int32_t UnicodeSetStringSpan::spanUTF8(const uint8_t *s, int32_t length, USetSpa
                    // Try to match if the string is longer or starts earlier.
                    // Match at code point boundaries. (The UTF-8 strings were converted
                    // from UTF-16 and are guaranteed to be well-formed.)
-                    if( !U8_IS_TRAIL(s[pos-overlap]) &&
-                        (overlap>maxOverlap || /* redundant overlap==maxOverlap && */ inc>maxInc) &&
-                        matches8(s+pos-overlap, s8, length8)
-                        
-                    ) {
+                    if(!U8_IS_TRAIL(s[pos-overlap]) &&
+                            (overlap>maxOverlap ||
+                                /* redundant overlap==maxOverlap && */ inc>maxInc) &&
+                            matches8(s+pos-overlap, s8, length8)) {
                        maxInc=inc;  // Longest match from earliest start.
                        maxOverlap=overlap;
                        break;
--- a/icu4c/source/common/ustr_imp.h
+++ b/icu4c/source/common/ustr_imp.h
@ -18,6 +18,7 @@
 #define __USTR_IMP_H__

 #include "unicode/utypes.h"
+#include "unicode/utf8.h"

 /**
 * Internal option for unorm_cmpEquivFold() for strncmp style.
@ -25,11 +26,6 @@
 */
 #define _STRNCMP_STYLE 0x1000

-/**
- * Internal option for string transformation functions to not first reset the Edits object.
- */
-#define U_EDITS_NO_RESET 0x2000
-
 /**
 * Compare two strings in code point order or code unit order.
 * Works in strcmp style (both lengths -1),
@ -86,4 +82,62 @@ u_terminateUChar32s(UChar32 *dest, int32_t destCapacity, int32_t length, UErrorC
 U_CAPI int32_t U_EXPORT2
 u_terminateWChars(wchar_t *dest, int32_t destCapacity, int32_t length, UErrorCode *pErrorCode);

+/**
+ * Counts the bytes of any whole valid sequence for a UTF-8 lead byte.
+ * Returns 1 for ASCII 0..0x7f.
+ * Returns 0 for 0x80..0xc1 as well as for 0xf5..0xff.
+ * leadByte might be evaluated multiple times.
+ *
+ * @param leadByte The first byte of a UTF-8 sequence. Must be 0..0xff.
+ * @return 0..4
+ */
+#define U8_COUNT_BYTES(leadByte) \
+    (U8_IS_SINGLE(leadByte) ? 1 : U8_COUNT_BYTES_NON_ASCII(leadByte))
+
+/**
+ * Counts the bytes of any whole valid sequence for a UTF-8 lead byte.
+ * Returns 0 for 0x00..0xc1 as well as for 0xf5..0xff.
+ * leadByte might be evaluated multiple times.
+ *
+ * @param leadByte The first byte of a UTF-8 sequence. Must be 0..0xff.
+ * @return 0 or 2..4
+ */
+#define U8_COUNT_BYTES_NON_ASCII(leadByte) \
+    (U8_IS_LEAD(leadByte) ? ((uint8_t)(leadByte)>=0xe0)+((uint8_t)(leadByte)>=0xf0)+2 : 0)
+
+#ifdef __cplusplus
+
+U_NAMESPACE_BEGIN
+
+class UTF8 {
+public:
+    UTF8() = delete;  // all static
+
+    /**
+     * Is t a valid UTF-8 trail byte?
+     *
+     * @param prev Must be the preceding lead byte if i==1 and length>=3;
+     *             otherwise ignored.
+     * @param t The i-th byte following the lead byte.
+     * @param i The index (1..3) of byte t in the byte sequence. 0<i<length
+     * @param length The length (2..4) of the byte sequence according to the lead byte.
+     * @return TRUE if t is a valid trail byte in this context.
+     */
+    static inline UBool isValidTrail(int32_t prev, uint8_t t, int32_t i, int32_t length) {
+        // The first trail byte after a 3- or 4-byte lead byte
+        // needs to be validated together with its lead byte.
+        if (length <= 2 || i > 1) {
+            return U8_IS_TRAIL(t);
+        } else if (length == 3) {
+            return U8_IS_VALID_LEAD3_AND_T1(prev, t);
+        } else {  // length == 4
+            return U8_IS_VALID_LEAD4_AND_T1(prev, t);
+        }
+    }
+};
+
+U_NAMESPACE_END
+
+#endif  // __cplusplus
+
 #endif
--- a/icu4c/source/common/ustr_titlecase_brkiter.cpp
+++ b/icu4c/source/common/ustr_titlecase_brkiter.cpp
@ -43,28 +43,28 @@ U_NAMESPACE_BEGIN
 class WholeStringBreakIterator : public BreakIterator {
 public:
    WholeStringBreakIterator() : BreakIterator(), length(0) {}
-    ~WholeStringBreakIterator() override;
-    UBool operator==(const BreakIterator&) const override;
-    BreakIterator *clone() const override;
+    ~WholeStringBreakIterator() U_OVERRIDE;
+    UBool operator==(const BreakIterator&) const U_OVERRIDE;
+    BreakIterator *clone() const U_OVERRIDE;
    static UClassID U_EXPORT2 getStaticClassID();
-    UClassID getDynamicClassID() const override;
-    CharacterIterator &getText() const override;
-    UText *getUText(UText *fillIn, UErrorCode &errorCode) const override;
-    void  setText(const UnicodeString &text) override;
-    void  setText(UText *text, UErrorCode &errorCode) override;
-    void  adoptText(CharacterIterator* it) override;
-    int32_t first() override;
-    int32_t last() override;
-    int32_t previous() override;
-    int32_t next() override;
-    int32_t current() const override;
-    int32_t following(int32_t offset) override;
-    int32_t preceding(int32_t offset) override;
-    UBool isBoundary(int32_t offset) override;
-    int32_t next(int32_t n) override;
+    UClassID getDynamicClassID() const U_OVERRIDE;
+    CharacterIterator &getText() const U_OVERRIDE;
+    UText *getUText(UText *fillIn, UErrorCode &errorCode) const U_OVERRIDE;
+    void  setText(const UnicodeString &text) U_OVERRIDE;
+    void  setText(UText *text, UErrorCode &errorCode) U_OVERRIDE;
+    void  adoptText(CharacterIterator* it) U_OVERRIDE;
+    int32_t first() U_OVERRIDE;
+    int32_t last() U_OVERRIDE;
+    int32_t previous() U_OVERRIDE;
+    int32_t next() U_OVERRIDE;
+    int32_t current() const U_OVERRIDE;
+    int32_t following(int32_t offset) U_OVERRIDE;
+    int32_t preceding(int32_t offset) U_OVERRIDE;
+    UBool isBoundary(int32_t offset) U_OVERRIDE;
+    int32_t next(int32_t n) U_OVERRIDE;
    BreakIterator *createBufferClone(void *stackBuffer, int32_t &BufferSize,
-                                     UErrorCode &errorCode) override;
-    BreakIterator &refreshInputText(UText *input, UErrorCode &errorCode) override;
+                                     UErrorCode &errorCode) U_OVERRIDE;
+    BreakIterator &refreshInputText(UText *input, UErrorCode &errorCode) U_OVERRIDE;

 private:
    int32_t length;
--- a/icu4c/source/common/ustrcase.cpp
+++ b/icu4c/source/common/ustrcase.cpp
@ -24,6 +24,7 @@
 #include "unicode/brkiter.h"
 #include "unicode/casemap.h"
 #include "unicode/edits.h"
+#include "unicode/stringoptions.h"
 #include "unicode/ustring.h"
 #include "unicode/ucasemap.h"
 #include "unicode/ubrk.h"
@ -72,9 +73,9 @@ appendResult(UChar *dest, int32_t destIndex, int32_t destCapacity,
        /* (not) original code point */
        if(edits!=NULL) {
            edits->addUnchanged(cpLength);
-            if(options & U_OMIT_UNCHANGED_TEXT) {
-                return destIndex;
-            }
+        }
+        if(options & U_OMIT_UNCHANGED_TEXT) {
+            return destIndex;
        }
        c=~result;
        if(destIndex<destCapacity && c<=0xffff) {  // BMP slightly-fastpath
@ -149,9 +150,9 @@ appendUnchanged(UChar *dest, int32_t destIndex, int32_t destCapacity,
    if(length>0) {
        if(edits!=NULL) {
            edits->addUnchanged(length);
-            if(options & U_OMIT_UNCHANGED_TEXT) {
-                return destIndex;
-            }
+        }
+        if(options & U_OMIT_UNCHANGED_TEXT) {
+            return destIndex;
        }
        if(length>(INT32_MAX-destIndex)) {
            return -1;  // integer overflow
@ -933,8 +934,10 @@ int32_t toUpper(uint32_t options,
                }
            }

-            UBool change = TRUE;
-            if (edits != NULL) {
+            UBool change;
+            if (edits == nullptr && (options & U_OMIT_UNCHANGED_TEXT) == 0) {
+                change = TRUE;  // common, simple usage
+            } else {
                // Find out first whether we are changing the text.
                change = src[i] != upper || numYpogegrammeni > 0;
                int32_t i2 = i + 1;
--- a/icu4c/source/common/ustrtrns.cpp
+++ b/icu4c/source/common/ustrtrns.cpp
@ -256,152 +256,6 @@ u_strToUTF32(UChar32 *dest,
            pErrorCode);
 }

-/* for utf8_nextCharSafeBodyTerminated() */
-static const UChar32
-utf8_minLegal[4]={ 0, 0x80, 0x800, 0x10000 };
-
-/*
- * Version of utf8_nextCharSafeBody() with the following differences:
- * - checks for NUL termination instead of length
- * - works with pointers instead of indexes
- * - always strict (strict==-1)
- *
- * *ps points to after the lead byte and will be moved to after the last trail byte.
- * c is the lead byte.
- * @return the code point, or U_SENTINEL
- */
-static UChar32
-utf8_nextCharSafeBodyTerminated(const uint8_t **ps, UChar32 c) {
-    const uint8_t *s=*ps;
-    uint8_t trail, illegal=0;
-    uint8_t count=U8_COUNT_TRAIL_BYTES(c);
-    U_ASSERT(count<6);
-    U8_MASK_LEAD_BYTE((c), count);
-    /* count==0 for illegally leading trail bytes and the illegal bytes 0xfe and 0xff */
-    switch(count) {
-    /* each branch falls through to the next one */
-    case 5:
-    case 4:
-        /* count>=4 is always illegal: no more than 3 trail bytes in Unicode's UTF-8 */
-        illegal=1;
-        break;
-    case 3:
-        trail=(uint8_t)(*s++ - 0x80);
-        c=(c<<6)|trail;
-        if(trail>0x3f || c>=0x110) {
-            /* not a trail byte, or code point>0x10ffff (outside Unicode) */
-            illegal=1;
-            break;
-        }
-        U_FALLTHROUGH;
-    case 2:
-        trail=(uint8_t)(*s++ - 0x80);
-        if(trail>0x3f) {
-            /* not a trail byte */
-            illegal=1;
-            break;
-        }
-        c=(c<<6)|trail;
-        U_FALLTHROUGH;
-    case 1:
-        trail=(uint8_t)(*s++ - 0x80);
-        if(trail>0x3f) {
-            /* not a trail byte */
-            illegal=1;
-        }
-        c=(c<<6)|trail;
-        break;
-    case 0:
-        return U_SENTINEL;
-    /* no default branch to optimize switch()  - all values are covered */
-    }
-
-    /* correct sequence - all trail bytes have (b7..b6)==(10)? */
-    /* illegal is also set if count>=4 */
-    if(illegal || c<utf8_minLegal[count] || U_IS_SURROGATE(c)) {
-        /* error handling */
-        /* don't go beyond this sequence */
-        s=*ps;
-        while(count>0 && U8_IS_TRAIL(*s)) {
-            ++s;
-            --count;
-        }
-        c=U_SENTINEL;
-    }
-    *ps=s;
-    return c;
-}
-
-/*
- * Version of utf8_nextCharSafeBody() with the following differences:
- * - works with pointers instead of indexes
- * - always strict (strict==-1)
- *
- * *ps points to after the lead byte and will be moved to after the last trail byte.
- * c is the lead byte.
- * @return the code point, or U_SENTINEL
- */
-static UChar32
-utf8_nextCharSafeBodyPointer(const uint8_t **ps, const uint8_t *limit, UChar32 c) {
-    const uint8_t *s=*ps;
-    uint8_t trail, illegal=0;
-    uint8_t count=U8_COUNT_TRAIL_BYTES(c);
-    if((limit-s)>=count) {
-        U8_MASK_LEAD_BYTE((c), count);
-        /* count==0 for illegally leading trail bytes and the illegal bytes 0xfe and 0xff */
-        switch(count) {
-        /* each branch falls through to the next one */
-        case 5:
-        case 4:
-            /* count>=4 is always illegal: no more than 3 trail bytes in Unicode's UTF-8 */
-            illegal=1;
-            break;
-        case 3:
-            trail=*s++;
-            c=(c<<6)|(trail&0x3f);
-            if(c<0x110) {
-                illegal|=(trail&0xc0)^0x80;
-            } else {
-                /* code point>0x10ffff, outside Unicode */
-                illegal=1;
-                break;
-            }
-            U_FALLTHROUGH;
-        case 2:
-            trail=*s++;
-            c=(c<<6)|(trail&0x3f);
-            illegal|=(trail&0xc0)^0x80;
-            U_FALLTHROUGH;
-        case 1:
-            trail=*s++;
-            c=(c<<6)|(trail&0x3f);
-            illegal|=(trail&0xc0)^0x80;
-            break;
-        case 0:
-            return U_SENTINEL;
-        /* no default branch to optimize switch()  - all values are covered */
-        }
-    } else {
-        illegal=1; /* too few bytes left */
-    }
-
-    /* correct sequence - all trail bytes have (b7..b6)==(10)? */
-    /* illegal is also set if count>=4 */
-    U_ASSERT(illegal || count<UPRV_LENGTHOF(utf8_minLegal));
-    if(illegal || c<utf8_minLegal[count] || U_IS_SURROGATE(c)) {
-        /* error handling */
-        /* don't go beyond this sequence */
-        s=*ps;
-        while(count>0 && s<limit && U8_IS_TRAIL(*s)) {
-            ++s;
-            --count;
-        }
-        c=U_SENTINEL;
-    }
-    *ps=s;
-    return c;
-}
-
 U_CAPI UChar* U_EXPORT2
 u_strFromUTF8WithSub(UChar *dest,
              int32_t destCapacity,
@ -410,19 +264,10 @@ u_strFromUTF8WithSub(UChar *dest,
              int32_t srcLength,
              UChar32 subchar, int32_t *pNumSubstitutions,
              UErrorCode *pErrorCode){
-    UChar *pDest = dest;
-    UChar *pDestLimit = dest+destCapacity;
-    UChar32 ch;
-    int32_t reqLength = 0;
-    const uint8_t* pSrc = (const uint8_t*) src;
-    uint8_t t1, t2; /* trail bytes */
-    int32_t numSubstitutions;
-
    /* args check */
-    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)){
+    if(U_FAILURE(*pErrorCode)) {
        return NULL;
    }
-        
    if( (src==NULL && srcLength!=0) || srcLength < -1 ||
        (destCapacity<0) || (dest == NULL && destCapacity > 0) ||
        subchar > 0x10ffff || U_IS_SURROGATE(subchar)
@ -434,7 +279,10 @@ u_strFromUTF8WithSub(UChar *dest,
    if(pNumSubstitutions!=NULL) {
        *pNumSubstitutions=0;
    }
-    numSubstitutions=0;
+    UChar *pDest = dest;
+    UChar *pDestLimit = dest+destCapacity;
+    int32_t reqLength = 0;
+    int32_t numSubstitutions=0;

    /*
     * Inline processing of UTF-8 byte sequences:
@ -455,95 +303,81 @@ u_strFromUTF8WithSub(UChar *dest,
         * The code explicitly checks for NULs only in the lead byte position.
         * A NUL byte in the trail byte position fails the trail byte range check anyway.
         */
-        while(((ch = *pSrc) != 0) && (pDest < pDestLimit)) {
-            if(ch <= 0x7f){
-                *pDest++=(UChar)ch;
-                ++pSrc;
+        int32_t i;
+        UChar32 c;
+        for(i = 0; (c = (uint8_t)src[i]) != 0 && (pDest < pDestLimit);) {
+            // modified copy of U8_NEXT()
+            ++i;
+            if(U8_IS_SINGLE(c)) {
+                *pDest++=(UChar)c;
            } else {
-                if(ch > 0xe0) {
-                    if( /* handle U+1000..U+CFFF inline */
-                        ch <= 0xec &&
-                        (t1 = (uint8_t)(pSrc[1] - 0x80)) <= 0x3f &&
-                        (t2 = (uint8_t)(pSrc[2] - 0x80)) <= 0x3f
-                    ) {
-                        /* no need for (ch & 0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */
-                        *pDest++ = (UChar)((ch << 12) | (t1 << 6) | t2);
-                        pSrc += 3;
-                        continue;
-                    }
-                } else if(ch < 0xe0) {
-                    if( /* handle U+0080..U+07FF inline */
-                        ch >= 0xc2 &&
-                        (t1 = (uint8_t)(pSrc[1] - 0x80)) <= 0x3f
-                    ) {
-                        *pDest++ = (UChar)(((ch & 0x1f) << 6) | t1);
-                        pSrc += 2;
-                        continue;
-                    }
-                }
-
-                /* function call for "complicated" and error cases */
-                ++pSrc; /* continue after the lead byte */
-                ch=utf8_nextCharSafeBodyTerminated(&pSrc, ch);
-                if(ch<0 && (++numSubstitutions, ch = subchar) < 0) {
-                    *pErrorCode = U_INVALID_CHAR_FOUND;
-                    return NULL;
-                } else if(ch<=0xFFFF) {
-                    *(pDest++)=(UChar)ch;
+                uint8_t __t1, __t2;
+                if( /* handle U+0800..U+FFFF inline */
+                        (0xe0<=(c) && (c)<0xf0) &&
+                        U8_IS_VALID_LEAD3_AND_T1((c), src[i]) &&
+                        (__t2=src[(i)+1]-0x80)<=0x3f) {
+                    *pDest++ = (((c)&0xf)<<12)|((src[i]&0x3f)<<6)|__t2;
+                    i+=2;
+                } else if( /* handle U+0080..U+07FF inline */
+                        ((c)<0xe0 && (c)>=0xc2) &&
+                        (__t1=src[i]-0x80)<=0x3f) {
+                    *pDest++ = (((c)&0x1f)<<6)|__t1;
+                    ++(i);
                } else {
-                    *(pDest++)=U16_LEAD(ch);
-                    if(pDest<pDestLimit) {
-                        *(pDest++)=U16_TRAIL(ch);
+                    /* function call for "complicated" and error cases */
+                    (c)=utf8_nextCharSafeBody((const uint8_t *)src, &(i), -1, c, -1);
+                    if(c<0 && (++numSubstitutions, c = subchar) < 0) {
+                        *pErrorCode = U_INVALID_CHAR_FOUND;
+                        return NULL;
+                    } else if(c<=0xFFFF) {
+                        *(pDest++)=(UChar)c;
                    } else {
-                        reqLength++;
-                        break;
+                        *(pDest++)=U16_LEAD(c);
+                        if(pDest<pDestLimit) {
+                            *(pDest++)=U16_TRAIL(c);
+                        } else {
+                            reqLength++;
+                            break;
+                        }
                    }
                }
            }
        }

        /* Pre-flight the rest of the string. */
-        while((ch = *pSrc) != 0) {
-            if(ch <= 0x7f){
+        while((c = (uint8_t)src[i]) != 0) {
+            // modified copy of U8_NEXT()
+            ++i;
+            if(U8_IS_SINGLE(c)) {
                ++reqLength;
-                ++pSrc;
            } else {
-                if(ch > 0xe0) {
-                    if( /* handle U+1000..U+CFFF inline */
-                        ch <= 0xec &&
-                        (uint8_t)(pSrc[1] - 0x80) <= 0x3f &&
-                        (uint8_t)(pSrc[2] - 0x80) <= 0x3f
-                    ) {
-                        ++reqLength;
-                        pSrc += 3;
-                        continue;
-                    }
-                } else if(ch < 0xe0) {
-                    if( /* handle U+0080..U+07FF inline */
-                        ch >= 0xc2 &&
-                        (uint8_t)(pSrc[1] - 0x80) <= 0x3f
-                    ) {
-                        ++reqLength;
-                        pSrc += 2;
-                        continue;
+                uint8_t __t1, __t2;
+                if( /* handle U+0800..U+FFFF inline */
+                        (0xe0<=(c) && (c)<0xf0) &&
+                        U8_IS_VALID_LEAD3_AND_T1((c), src[i]) &&
+                        (__t2=src[(i)+1]-0x80)<=0x3f) {
+                    ++reqLength;
+                    i+=2;
+                } else if( /* handle U+0080..U+07FF inline */
+                        ((c)<0xe0 && (c)>=0xc2) &&
+                        (__t1=src[i]-0x80)<=0x3f) {
+                    ++reqLength;
+                    ++(i);
+                } else {
+                    /* function call for "complicated" and error cases */
+                    (c)=utf8_nextCharSafeBody((const uint8_t *)src, &(i), -1, c, -1);
+                    if(c<0 && (++numSubstitutions, c = subchar) < 0) {
+                        *pErrorCode = U_INVALID_CHAR_FOUND;
+                        return NULL;
                    }
+                    reqLength += U16_LENGTH(c);
                }
-
-                /* function call for "complicated" and error cases */
-                ++pSrc; /* continue after the lead byte */
-                ch=utf8_nextCharSafeBodyTerminated(&pSrc, ch);
-                if(ch<0 && (++numSubstitutions, ch = subchar) < 0) {
-                    *pErrorCode = U_INVALID_CHAR_FOUND;
-                    return NULL;
-                }
-                reqLength += U16_LENGTH(ch);
            }
        }
    } else /* srcLength >= 0 */ {
-        const uint8_t *pSrcLimit = pSrc + srcLength;
-        int32_t count;
-
-        /* Faster loop without ongoing checking for pSrcLimit and pDestLimit. */
+        /* Faster loop without ongoing checking for srcLength and pDestLimit. */
+        int32_t i = 0;
+        UChar32 c;
        for(;;) {
            /*
             * Each iteration of the inner loop progresses by at most 3 UTF-8
@ -551,10 +385,10 @@ u_strFromUTF8WithSub(UChar *dest,
             * For supplementary code points (4 & 2), which are rare,
             * there is an additional adjustment.
             */
-            count = (int32_t)(pDestLimit - pDest);
-            srcLength = (int32_t)((pSrcLimit - pSrc) / 3);
-            if(count > srcLength) {
-                count = srcLength; /* min(remaining dest, remaining src/3) */
+            int32_t count = (int32_t)(pDestLimit - pDest);
+            int32_t count2 = (srcLength - i) / 3;
+            if(count > count2) {
+                count = count2; /* min(remaining dest, remaining src/3) */
            }
            if(count < 3) {
                /*
@ -565,147 +399,123 @@ u_strFromUTF8WithSub(UChar *dest,
            }

            do {
-                ch = *pSrc;
-                if(ch <= 0x7f){
-                    *pDest++=(UChar)ch;
-                    ++pSrc;
+                // modified copy of U8_NEXT()
+                c = (uint8_t)src[i++];
+                if(U8_IS_SINGLE(c)) {
+                    *pDest++=(UChar)c;
                } else {
-                    if(ch > 0xe0) {
-                        if( /* handle U+1000..U+CFFF inline */
-                            ch <= 0xec &&
-                            (t1 = (uint8_t)(pSrc[1] - 0x80)) <= 0x3f &&
-                            (t2 = (uint8_t)(pSrc[2] - 0x80)) <= 0x3f
-                        ) {
-                            /* no need for (ch & 0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */
-                            *pDest++ = (UChar)((ch << 12) | (t1 << 6) | t2);
-                            pSrc += 3;
-                            continue;
+                    uint8_t __t1, __t2;
+                    if( /* handle U+0800..U+FFFF inline */
+                            (0xe0<=(c) && (c)<0xf0) &&
+                            ((i)+1)<srcLength &&
+                            U8_IS_VALID_LEAD3_AND_T1((c), src[i]) &&
+                            (__t2=src[(i)+1]-0x80)<=0x3f) {
+                        *pDest++ = (((c)&0xf)<<12)|((src[i]&0x3f)<<6)|__t2;
+                        i+=2;
+                    } else if( /* handle U+0080..U+07FF inline */
+                            ((c)<0xe0 && (c)>=0xc2) &&
+                            ((i)!=srcLength) &&
+                            (__t1=src[i]-0x80)<=0x3f) {
+                        *pDest++ = (((c)&0x1f)<<6)|__t1;
+                        ++(i);
+                    } else {
+                        if(c >= 0xf0 || subchar > 0xffff) {
+                            // We may read up to four bytes and write up to two UChars,
+                            // which we didn't account for with computing count,
+                            // so we adjust it here.
+                            if(--count == 0) {
+                                --i;  // back out byte c
+                                break;
+                            }
                        }
-                    } else if(ch < 0xe0) {
-                        if( /* handle U+0080..U+07FF inline */
-                            ch >= 0xc2 &&
-                            (t1 = (uint8_t)(pSrc[1] - 0x80)) <= 0x3f
-                        ) {
-                            *pDest++ = (UChar)(((ch & 0x1f) << 6) | t1);
-                            pSrc += 2;
-                            continue;
-                        }
-                    }

-                    if(ch >= 0xf0 || subchar > 0xffff) {
-                        /*
-                         * We may read up to six bytes and write up to two UChars,
-                         * which we didn't account for with computing count,
-                         * so we adjust it here.
-                         */
-                        if(--count == 0) {
-                            break;
+                        /* function call for "complicated" and error cases */
+                        (c)=utf8_nextCharSafeBody((const uint8_t *)src, &(i), srcLength, c, -1);
+                        if(c<0 && (++numSubstitutions, c = subchar) < 0) {
+                            *pErrorCode = U_INVALID_CHAR_FOUND;
+                            return NULL;
+                        } else if(c<=0xFFFF) {
+                            *(pDest++)=(UChar)c;
+                        } else {
+                            *(pDest++)=U16_LEAD(c);
+                            *(pDest++)=U16_TRAIL(c);
                        }
                    }
-
-                    /* function call for "complicated" and error cases */
-                    ++pSrc; /* continue after the lead byte */
-                    ch=utf8_nextCharSafeBodyPointer(&pSrc, pSrcLimit, ch);
-                    if(ch<0 && (++numSubstitutions, ch = subchar) < 0){
-                        *pErrorCode = U_INVALID_CHAR_FOUND;
-                        return NULL;
-                    }else if(ch<=0xFFFF){
-                        *(pDest++)=(UChar)ch;
-                    }else{
-                        *(pDest++)=U16_LEAD(ch);
-                        *(pDest++)=U16_TRAIL(ch);
-                    }
                }
            } while(--count > 0);
        }

-        while((pSrc<pSrcLimit) && (pDest<pDestLimit)) {
-            ch = *pSrc;
-            if(ch <= 0x7f){
-                *pDest++=(UChar)ch;
-                ++pSrc;
+        while(i < srcLength && (pDest < pDestLimit)) {
+            // modified copy of U8_NEXT()
+            c = (uint8_t)src[i++];
+            if(U8_IS_SINGLE(c)) {
+                *pDest++=(UChar)c;
            } else {
-                if(ch > 0xe0) {
-                    if( /* handle U+1000..U+CFFF inline */
-                        ch <= 0xec &&
-                        ((pSrcLimit - pSrc) >= 3) &&
-                        (t1 = (uint8_t)(pSrc[1] - 0x80)) <= 0x3f &&
-                        (t2 = (uint8_t)(pSrc[2] - 0x80)) <= 0x3f
-                    ) {
-                        /* no need for (ch & 0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */
-                        *pDest++ = (UChar)((ch << 12) | (t1 << 6) | t2);
-                        pSrc += 3;
-                        continue;
-                    }
-                } else if(ch < 0xe0) {
-                    if( /* handle U+0080..U+07FF inline */
-                        ch >= 0xc2 &&
-                        ((pSrcLimit - pSrc) >= 2) &&
-                        (t1 = (uint8_t)(pSrc[1] - 0x80)) <= 0x3f
-                    ) {
-                        *pDest++ = (UChar)(((ch & 0x1f) << 6) | t1);
-                        pSrc += 2;
-                        continue;
-                    }
-                }
-
-                /* function call for "complicated" and error cases */
-                ++pSrc; /* continue after the lead byte */
-                ch=utf8_nextCharSafeBodyPointer(&pSrc, pSrcLimit, ch);
-                if(ch<0 && (++numSubstitutions, ch = subchar) < 0){
-                    *pErrorCode = U_INVALID_CHAR_FOUND;
-                    return NULL;
-                }else if(ch<=0xFFFF){
-                    *(pDest++)=(UChar)ch;
-                }else{
-                    *(pDest++)=U16_LEAD(ch);
-                    if(pDest<pDestLimit){
-                        *(pDest++)=U16_TRAIL(ch);
-                    }else{
-                        reqLength++;
-                        break;
+                uint8_t __t1, __t2;
+                if( /* handle U+0800..U+FFFF inline */
+                        (0xe0<=(c) && (c)<0xf0) &&
+                        ((i)+1)<srcLength &&
+                        U8_IS_VALID_LEAD3_AND_T1((c), src[i]) &&
+                        (__t2=src[(i)+1]-0x80)<=0x3f) {
+                    *pDest++ = (((c)&0xf)<<12)|((src[i]&0x3f)<<6)|__t2;
+                    i+=2;
+                } else if( /* handle U+0080..U+07FF inline */
+                        ((c)<0xe0 && (c)>=0xc2) &&
+                        ((i)!=srcLength) &&
+                        (__t1=src[i]-0x80)<=0x3f) {
+                    *pDest++ = (((c)&0x1f)<<6)|__t1;
+                    ++(i);
+                } else {
+                    /* function call for "complicated" and error cases */
+                    (c)=utf8_nextCharSafeBody((const uint8_t *)src, &(i), srcLength, c, -1);
+                    if(c<0 && (++numSubstitutions, c = subchar) < 0) {
+                        *pErrorCode = U_INVALID_CHAR_FOUND;
+                        return NULL;
+                    } else if(c<=0xFFFF) {
+                        *(pDest++)=(UChar)c;
+                    } else {
+                        *(pDest++)=U16_LEAD(c);
+                        if(pDest<pDestLimit) {
+                            *(pDest++)=U16_TRAIL(c);
+                        } else {
+                            reqLength++;
+                            break;
+                        }
                    }
                }
            }
        }
-        /* do not fill the dest buffer just count the UChars needed */
-        while(pSrc < pSrcLimit){
-            ch = *pSrc;
-            if(ch <= 0x7f){
-                reqLength++;
-                ++pSrc;
-            } else {
-                if(ch > 0xe0) {
-                    if( /* handle U+1000..U+CFFF inline */
-                        ch <= 0xec &&
-                        ((pSrcLimit - pSrc) >= 3) &&
-                        (uint8_t)(pSrc[1] - 0x80) <= 0x3f &&
-                        (uint8_t)(pSrc[2] - 0x80) <= 0x3f
-                    ) {
-                        reqLength++;
-                        pSrc += 3;
-                        continue;
-                    }
-                } else if(ch < 0xe0) {
-                    if( /* handle U+0080..U+07FF inline */
-                        ch >= 0xc2 &&
-                        ((pSrcLimit - pSrc) >= 2) &&
-                        (uint8_t)(pSrc[1] - 0x80) <= 0x3f
-                    ) {
-                        reqLength++;
-                        pSrc += 2;
-                        continue;
-                    }
-                }

-                /* function call for "complicated" and error cases */
-                ++pSrc; /* continue after the lead byte */
-                ch=utf8_nextCharSafeBodyPointer(&pSrc, pSrcLimit, ch);
-                if(ch<0 && (++numSubstitutions, ch = subchar) < 0){
-                    *pErrorCode = U_INVALID_CHAR_FOUND;
-                    return NULL;
+        /* Pre-flight the rest of the string. */
+        while(i < srcLength) {
+            // modified copy of U8_NEXT()
+            c = (uint8_t)src[i++];
+            if(U8_IS_SINGLE(c)) {
+                ++reqLength;
+            } else {
+                uint8_t __t1, __t2;
+                if( /* handle U+0800..U+FFFF inline */
+                        (0xe0<=(c) && (c)<0xf0) &&
+                        ((i)+1)<srcLength &&
+                        U8_IS_VALID_LEAD3_AND_T1((c), src[i]) &&
+                        (__t2=src[(i)+1]-0x80)<=0x3f) {
+                    ++reqLength;
+                    i+=2;
+                } else if( /* handle U+0080..U+07FF inline */
+                        ((c)<0xe0 && (c)>=0xc2) &&
+                        ((i)!=srcLength) &&
+                        (__t1=src[i]-0x80)<=0x3f) {
+                    ++reqLength;
+                    ++(i);
+                } else {
+                    /* function call for "complicated" and error cases */
+                    (c)=utf8_nextCharSafeBody((const uint8_t *)src, &(i), srcLength, c, -1);
+                    if(c<0 && (++numSubstitutions, c = subchar) < 0) {
+                        *pErrorCode = U_INVALID_CHAR_FOUND;
+                        return NULL;
+                    }
+                    reqLength += U16_LENGTH(c);
                }
-                reqLength+=U16_LENGTH(ch);
            }
        }
    }
@ -753,7 +563,7 @@ u_strFromUTF8Lenient(UChar *dest,
    uint8_t* pSrc = (uint8_t*) src;

    /* args check */
-    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)){
+    if(U_FAILURE(*pErrorCode)){
        return NULL;
    }
        
@ -994,7 +804,7 @@ u_strToUTF8WithSub(char *dest,
    int32_t numSubstitutions;

    /* args check */
-    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)){
+    if(U_FAILURE(*pErrorCode)){
        return NULL;
    }
        
@ -1266,18 +1076,8 @@ u_strFromJavaModifiedUTF8WithSub(
        int32_t srcLength,
        UChar32 subchar, int32_t *pNumSubstitutions,
        UErrorCode *pErrorCode) {
-    UChar *pDest = dest;
-    UChar *pDestLimit = dest+destCapacity;
-    UChar32 ch;
-    int32_t reqLength = 0;
-    const uint8_t* pSrc = (const uint8_t*) src;
-    const uint8_t *pSrcLimit;
-    int32_t count;
-    uint8_t t1, t2; /* trail bytes */
-    int32_t numSubstitutions;
-
    /* args check */
-    if(U_FAILURE(*pErrorCode)){
+    if(U_FAILURE(*pErrorCode)) {
        return NULL;
    }
    if( (src==NULL && srcLength!=0) || srcLength < -1 ||
@ -1291,18 +1091,22 @@ u_strFromJavaModifiedUTF8WithSub(
    if(pNumSubstitutions!=NULL) {
        *pNumSubstitutions=0;
    }
-    numSubstitutions=0;
+    UChar *pDest = dest;
+    UChar *pDestLimit = dest+destCapacity;
+    int32_t reqLength = 0;
+    int32_t numSubstitutions=0;

    if(srcLength < 0) {
        /*
         * Transform a NUL-terminated ASCII string.
         * Handle non-ASCII strings with slower code.
         */
-        while(((ch = *pSrc) != 0) && ch <= 0x7f && (pDest < pDestLimit)) {
-            *pDest++=(UChar)ch;
-            ++pSrc;
+        UChar32 c;
+        while(((c = (uint8_t)*src) != 0) && c <= 0x7f && (pDest < pDestLimit)) {
+            *pDest++=(UChar)c;
+            ++src;
        }
-        if(ch == 0) {
+        if(c == 0) {
            reqLength=(int32_t)(pDest - dest);
            if(pDestLength) {
                *pDestLength = reqLength;
@ -1312,33 +1116,38 @@ u_strFromJavaModifiedUTF8WithSub(
            u_terminateUChars(dest, destCapacity, reqLength, pErrorCode);
            return dest;
        }
-        srcLength = static_cast<int32_t>(uprv_strlen((const char *)pSrc));
+        srcLength = static_cast<int32_t>(uprv_strlen(src));
    }

-    /* Faster loop without ongoing checking for pSrcLimit and pDestLimit. */
-    pSrcLimit = (pSrc == NULL) ? NULL : pSrc + srcLength;
+    /* Faster loop without ongoing checking for srcLength and pDestLimit. */
+    UChar32 ch;
+    uint8_t t1, t2;
+    int32_t i = 0;
    for(;;) {
-        count = (int32_t)(pDestLimit - pDest);
-        srcLength = (int32_t)(pSrcLimit - pSrc);
-        if(count >= srcLength && srcLength > 0 && *pSrc <= 0x7f) {
+        int32_t count = (int32_t)(pDestLimit - pDest);
+        int32_t count2 = srcLength - i;
+        if(count >= count2 && srcLength > 0 && U8_IS_SINGLE(*src)) {
            /* fast ASCII loop */
-            const uint8_t *prevSrc = pSrc;
-            int32_t delta;
-            while(pSrc < pSrcLimit && (ch = *pSrc) <= 0x7f) {
-                *pDest++=(UChar)ch;
-                ++pSrc;
+            int32_t start = i;
+            uint8_t b;
+            while(i < srcLength && U8_IS_SINGLE(b = src[i])) {
+                *pDest++=b;
+                ++i;
            }
-            delta = (int32_t)(pSrc - prevSrc);
+            int32_t delta = i - start;
            count -= delta;
-            srcLength -= delta;
+            count2 -= delta;
        }
        /*
         * Each iteration of the inner loop progresses by at most 3 UTF-8
         * bytes and one UChar.
         */
-        srcLength /= 3;
-        if(count > srcLength) {
-            count = srcLength; /* min(remaining dest, remaining src/3) */
+        if(subchar > 0xFFFF) {
+            break;
+        }
+        count2 /= 3;
+        if(count > count2) {
+            count = count2; /* min(remaining dest, remaining src/3) */
        }
        if(count < 3) {
            /*
@ -1348,29 +1157,28 @@ u_strFromJavaModifiedUTF8WithSub(
            break;
        }
        do {
-            ch = *pSrc;
-            if(ch <= 0x7f){
+            ch = (uint8_t)src[i++];
+            if(U8_IS_SINGLE(ch)) {
                *pDest++=(UChar)ch;
-                ++pSrc;
            } else {
                if(ch >= 0xe0) {
                    if( /* handle U+0000..U+FFFF inline */
                        ch <= 0xef &&
-                        (t1 = (uint8_t)(pSrc[1] - 0x80)) <= 0x3f &&
-                        (t2 = (uint8_t)(pSrc[2] - 0x80)) <= 0x3f
+                        (t1 = (uint8_t)(src[i] - 0x80)) <= 0x3f &&
+                        (t2 = (uint8_t)(src[i+1] - 0x80)) <= 0x3f
                    ) {
                        /* no need for (ch & 0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */
                        *pDest++ = (UChar)((ch << 12) | (t1 << 6) | t2);
-                        pSrc += 3;
+                        i += 2;
                        continue;
                    }
                } else {
                    if( /* handle U+0000..U+07FF inline */
                        ch >= 0xc0 &&
-                        (t1 = (uint8_t)(pSrc[1] - 0x80)) <= 0x3f
+                        (t1 = (uint8_t)(src[i] - 0x80)) <= 0x3f
                    ) {
                        *pDest++ = (UChar)(((ch & 0x1f) << 6) | t1);
-                        pSrc += 2;
+                        ++i;
                        continue;
                    }
                }
@ -1383,49 +1191,43 @@ u_strFromJavaModifiedUTF8WithSub(
                     * We need to write two UChars, adjusted count for that,
                     * and ran out of space.
                     */
+                    --i;  // back out byte ch
                    break;
                } else {
                    /* function call for error cases */
-                    ++pSrc; /* continue after the lead byte */
-                    utf8_nextCharSafeBodyPointer(&pSrc, pSrcLimit, ch);
+                    utf8_nextCharSafeBody((const uint8_t *)src, &(i), srcLength, ch, -1);
                    ++numSubstitutions;
-                    if(subchar<=0xFFFF) {
-                        *(pDest++)=(UChar)subchar;
-                    } else {
-                        *(pDest++)=U16_LEAD(subchar);
-                        *(pDest++)=U16_TRAIL(subchar);
-                    }
+                    *(pDest++)=(UChar)subchar;
                }
            }
        } while(--count > 0);
    }

-    while((pSrc<pSrcLimit) && (pDest<pDestLimit)) {
-        ch = *pSrc;
-        if(ch <= 0x7f){
+    while(i < srcLength && (pDest < pDestLimit)) {
+        ch = (uint8_t)src[i++];
+        if(U8_IS_SINGLE(ch)){
            *pDest++=(UChar)ch;
-            ++pSrc;
        } else {
            if(ch >= 0xe0) {
                if( /* handle U+0000..U+FFFF inline */
                    ch <= 0xef &&
-                    ((pSrcLimit - pSrc) >= 3) &&
-                    (t1 = (uint8_t)(pSrc[1] - 0x80)) <= 0x3f &&
-                    (t2 = (uint8_t)(pSrc[2] - 0x80)) <= 0x3f
+                    (i+1) < srcLength &&
+                    (t1 = (uint8_t)(src[i] - 0x80)) <= 0x3f &&
+                    (t2 = (uint8_t)(src[i+1] - 0x80)) <= 0x3f
                ) {
                    /* no need for (ch & 0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */
                    *pDest++ = (UChar)((ch << 12) | (t1 << 6) | t2);
-                    pSrc += 3;
+                    i += 2;
                    continue;
                }
            } else {
                if( /* handle U+0000..U+07FF inline */
                    ch >= 0xc0 &&
-                    ((pSrcLimit - pSrc) >= 2) &&
-                    (t1 = (uint8_t)(pSrc[1] - 0x80)) <= 0x3f
+                    i < srcLength &&
+                    (t1 = (uint8_t)(src[i] - 0x80)) <= 0x3f
                ) {
                    *pDest++ = (UChar)(((ch & 0x1f) << 6) | t1);
-                    pSrc += 2;
+                    ++i;
                    continue;
                }
            }
@ -1435,8 +1237,7 @@ u_strFromJavaModifiedUTF8WithSub(
                return NULL;
            } else {
                /* function call for error cases */
-                ++pSrc; /* continue after the lead byte */
-                utf8_nextCharSafeBodyPointer(&pSrc, pSrcLimit, ch);
+                utf8_nextCharSafeBody((const uint8_t *)src, &(i), srcLength, ch, -1);
                ++numSubstitutions;
                if(subchar<=0xFFFF) {
                    *(pDest++)=(UChar)subchar;
@ -1453,32 +1254,31 @@ u_strFromJavaModifiedUTF8WithSub(
        }
    }

-    /* do not fill the dest buffer just count the UChars needed */
-    while(pSrc < pSrcLimit){
-        ch = *pSrc;
-        if(ch <= 0x7f) {
+    /* Pre-flight the rest of the string. */
+    while(i < srcLength) {
+        ch = (uint8_t)src[i++];
+        if(U8_IS_SINGLE(ch)) {
            reqLength++;
-            ++pSrc;
        } else {
            if(ch >= 0xe0) {
                if( /* handle U+0000..U+FFFF inline */
                    ch <= 0xef &&
-                    ((pSrcLimit - pSrc) >= 3) &&
-                    (uint8_t)(pSrc[1] - 0x80) <= 0x3f &&
-                    (uint8_t)(pSrc[2] - 0x80) <= 0x3f
+                    (i+1) < srcLength &&
+                    (uint8_t)(src[i] - 0x80) <= 0x3f &&
+                    (uint8_t)(src[i+1] - 0x80) <= 0x3f
                ) {
                    reqLength++;
-                    pSrc += 3;
+                    i += 2;
                    continue;
                }
            } else {
                if( /* handle U+0000..U+07FF inline */
                    ch >= 0xc0 &&
-                    ((pSrcLimit - pSrc) >= 2) &&
-                    (uint8_t)(pSrc[1] - 0x80) <= 0x3f
+                    i < srcLength &&
+                    (uint8_t)(src[i] - 0x80) <= 0x3f
                ) {
                    reqLength++;
-                    pSrc += 2;
+                    ++i;
                    continue;
                }
            }
@ -1488,8 +1288,7 @@ u_strFromJavaModifiedUTF8WithSub(
                return NULL;
            } else {
                /* function call for error cases */
-                ++pSrc; /* continue after the lead byte */
-                utf8_nextCharSafeBodyPointer(&pSrc, pSrcLimit, ch);
+                utf8_nextCharSafeBody((const uint8_t *)src, &(i), srcLength, ch, -1);
                ++numSubstitutions;
                reqLength+=U16_LENGTH(ch);
            }
--- a/icu4c/source/common/utext.cpp
+++ b/icu4c/source/common/utext.cpp
@ -847,15 +847,11 @@ U_CDECL_END
 //------------------------------------------------------------------------------

 // Chunk size.
-//     Must be less than 42  (256/6), because of byte mapping from UChar indexes to native indexes.
-//     Worst case there are six UTF-8 bytes per UChar.
-//         obsolete 6 byte form fd + 5 trails maps to fffd
-//         obsolete 5 byte form fc + 4 trails maps to fffd
-//         non-shortest 4 byte forms maps to fffd
-//         normal supplementaries map to a pair of utf-16, two utf8 bytes per utf-16 unit
-//     mapToUChars array size must allow for the worst case, 6.
-//     This could be brought down to 4, by treating fd and fc as pure illegal,
-//     rather than obsolete lead bytes. But that is not compatible with the utf-8 access macros.
+//     Must be less than 85 (256/3), because of byte mapping from UChar indexes to native indexes.
+//     Worst case is three native bytes to one UChar.  (Supplemenaries are 4 native bytes
+//     to two UChars.)
+//     The longest illegal byte sequence treated as a single error (and converted to U+FFFD)
+//     is a three-byte sequence (truncated four-byte sequence).
 //
 enum { UTF8_TEXT_CHUNK_SIZE=32 };

@ -895,7 +891,7 @@ struct UTF8Buf {
                                                     //  Requires two extra slots,
                                                     //    one for a supplementary starting in the last normal position,
                                                     //    and one for an entry for the buffer limit position.
-    uint8_t   mapToUChars[UTF8_TEXT_CHUNK_SIZE*6+6]; // Map native offset from bufNativeStart to
+    uint8_t   mapToUChars[UTF8_TEXT_CHUNK_SIZE*3+6]; // Map native offset from bufNativeStart to
                                                     //   correspoding offset in filled part of buf.
    int32_t   align;
 };
--- a/icu4c/source/common/utf_impl.cpp
+++ b/icu4c/source/common/utf_impl.cpp
@ -7,7 +7,7 @@
 *   Corporation and others.  All Rights Reserved.
 *
 ******************************************************************************
-*   file name:  utf_impl.c
+*   file name:  utf_impl.cpp
 *   encoding:   UTF-8
 *   tab size:   8 (not used)
 *   indentation:4
@ -27,7 +27,6 @@
 #include "unicode/utypes.h"
 #include "unicode/utf.h"
 #include "unicode/utf8.h"
-#include "unicode/utf_old.h"
 #include "uassert.h"

 /*
@ -55,10 +54,6 @@
 * - SUB AX, BX (result)
 * -finish:
 * (BSR: Bit Scan Reverse, scans for a 1-bit, starting from the MSB)
- *
- * In Unicode, all UTF-8 byte sequences with more than 4 bytes are illegal;
- * lead bytes above 0xf4 are illegal.
- * We keep them in this table for skipping long ISO 10646-UTF-8 sequences.
 */
 extern "C" U_EXPORT const uint8_t
 utf8_countTrailBytes[256]={
@ -77,24 +72,24 @@ utf8_countTrailBytes[256]={
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    // illegal C0 & C1
+    // 2-byte lead bytes C2..DF
+    0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,

+    // 3-byte lead bytes E0..EF
    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
-    3, 3, 3, 3, 3,
-    3, 3, 3,    /* illegal in Unicode */
-    4, 4, 4, 4, /* illegal in Unicode */
-    5, 5,       /* illegal in Unicode */
-    0, 0        /* illegal bytes 0xfe and 0xff */
+    // 4-byte lead bytes F0..F4
+    // illegal F5..FF
+    3, 3, 3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
 };

-static const UChar32
-utf8_minLegal[4]={ 0, 0x80, 0x800, 0x10000 };
-
 static const UChar32
 utf8_errorValue[6]={
-    UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_2, UTF_ERROR_VALUE, 0x10ffff,
-    0x3ffffff, 0x7fffffff
+    // Same values as UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_2, UTF_ERROR_VALUE,
+    // but without relying on the obsolete unicode/utf_old.h.
+    0x15, 0x9f, 0xffff,
+    0x10ffff
 };

 static UChar32
@ -134,61 +129,59 @@ errorValue(int32_t count, int8_t strict) {
 */
 U_CAPI UChar32 U_EXPORT2
 utf8_nextCharSafeBody(const uint8_t *s, int32_t *pi, int32_t length, UChar32 c, UBool strict) {
+    // *pi is one after byte c.
    int32_t i=*pi;
-    uint8_t count=U8_COUNT_TRAIL_BYTES(c);
-    U_ASSERT(count <= 5); /* U8_COUNT_TRAIL_BYTES returns value 0...5 */
-    if(i+count<=length || length<0) {
-        uint8_t trail;
-
-        U8_MASK_LEAD_BYTE(c, count);
-        /* support NUL-terminated strings: do not read beyond the first non-trail byte */
-        switch(count) {
-        /* each branch falls through to the next one */
-        case 0:
-            /* count==0 for illegally leading trail bytes and the illegal bytes 0xfe and 0xff */
-        case 5:
-        case 4:
-            /* count>=4 is always illegal: no more than 3 trail bytes in Unicode's UTF-8 */
-            break;
-        case 3:
-            trail=s[i++]-0x80;
-            c=(c<<6)|trail;
-            /* c>=0x110 would result in code point>0x10ffff, outside Unicode */
-            if(c>=0x110 || trail>0x3f) { break; }
-            U_FALLTHROUGH;
-        case 2:
-            trail=s[i++]-0x80;
-            c=(c<<6)|trail;
-            /*
-             * test for a surrogate d800..dfff unless we are lenient:
-             * before the last (c<<6), a surrogate is c=360..37f
-             */
-            if(((c&0xffe0)==0x360 && strict!=-2) || trail>0x3f) { break; }
-            U_FALLTHROUGH;
-        case 1:
-            trail=s[i++]-0x80;
-            c=(c<<6)|trail;
-            if(trail>0x3f) { break; }
-            /* correct sequence - all trail bytes have (b7..b6)==(10) */
-            if(c>=utf8_minLegal[count] &&
-                    /* strict: forbid non-characters like U+fffe */
-                    (strict<=0 || !U_IS_UNICODE_NONCHAR(c))) {
+    // length can be negative for NUL-terminated strings: Read and validate one byte at a time.
+    if(i==length || c>0xf4) {
+        // end of string, or not a lead byte
+    } else if(c>=0xf0) {
+        // Test for 4-byte sequences first because
+        // U8_NEXT() handles shorter valid sequences inline.
+        uint8_t t1=s[i], t2, t3;
+        c&=7;
+        if(U8_IS_VALID_LEAD4_AND_T1(c, t1) &&
+                ++i!=length && (t2=s[i]-0x80)<=0x3f &&
+                ++i!=length && (t3=s[i]-0x80)<=0x3f) {
+            ++i;
+            c=(c<<18)|((t1&0x3f)<<12)|(t2<<6)|t3;
+            // strict: forbid non-characters like U+fffe
+            if(strict<=0 || !U_IS_UNICODE_NONCHAR(c)) {
                *pi=i;
                return c;
            }
-        /* no default branch to optimize switch()  - all values are covered */
        }
-    } else {
-        /* too few bytes left */
-        count=length-i;
-    }
+    } else if(c>=0xe0) {
+        c&=0xf;
+        if(strict!=-2) {
+            uint8_t t1=s[i], t2;
+            if(U8_IS_VALID_LEAD3_AND_T1(c, t1) &&
+                    ++i!=length && (t2=s[i]-0x80)<=0x3f) {
+                ++i;
+                c=(c<<12)|((t1&0x3f)<<6)|t2;
+                // strict: forbid non-characters like U+fffe
+                if(strict<=0 || !U_IS_UNICODE_NONCHAR(c)) {
+                    *pi=i;
+                    return c;
+                }
+            }
+        } else {
+            // strict=-2 -> lenient: allow surrogates
+            uint8_t t1=s[i]-0x80, t2;
+            if(t1<=0x3f && (c>0 || t1>=0x20) &&
+                    ++i!=length && (t2=s[i]-0x80)<=0x3f) {
+                *pi=i+1;
+                return (c<<12)|(t1<<6)|t2;
+            }
+        }
+    } else if(c>=0xc2) {
+        uint8_t t1=s[i]-0x80;
+        if(t1<=0x3f) {
+            *pi=i+1;
+            return ((c-0xc0)<<6)|t1;
+        }
+    }  // else 0x80<=c<0xc2 is not a lead byte

    /* error handling */
-    i=*pi;
-    while(count>0 && U8_IS_TRAIL(s[i])) {
-        ++i;
-        --count;
-    }
    c=errorValue(i-*pi, strict);
    *pi=i;
    return c;
@ -232,7 +225,7 @@ utf8_appendCharSafeBody(uint8_t *s, int32_t i, int32_t length, UChar32 c, UBool
            s+=i;
            offset=0;
            c=utf8_errorValue[length-1];
-            UTF8_APPEND_CHAR_UNSAFE(s, offset, c);
+            U8_APPEND_UNSAFE(s, offset, c);
            i=i+offset;
        }
    }
@ -241,99 +234,99 @@ utf8_appendCharSafeBody(uint8_t *s, int32_t i, int32_t length, UChar32 c, UBool

 U_CAPI UChar32 U_EXPORT2
 utf8_prevCharSafeBody(const uint8_t *s, int32_t start, int32_t *pi, UChar32 c, UBool strict) {
+    // *pi is the index of byte c.
    int32_t i=*pi;
-    uint8_t b, count=1, shift=6;
-
-    if(!U8_IS_TRAIL(c)) { return errorValue(0, strict); }
-
-    /* extract value bits from the last trail byte */
-    c&=0x3f;
-
-    for(;;) {
-        if(i<=start) {
-            /* no lead byte at all */
-            return errorValue(0, strict);
-        }
-
-        /* read another previous byte */
-        b=s[--i];
-        if((uint8_t)(b-0x80)<0x7e) { /* 0x80<=b<0xfe */
-            if(b&0x40) {
-                /* lead byte, this will always end the loop */
-                uint8_t shouldCount=U8_COUNT_TRAIL_BYTES(b);
-
-                if(count==shouldCount) {
-                    /* set the new position */
-                    *pi=i;
-                    U8_MASK_LEAD_BYTE(b, count);
-                    c|=(UChar32)b<<shift;
-                    if(count>=4 || c>0x10ffff || c<utf8_minLegal[count] || (U_IS_SURROGATE(c) && strict!=-2) || (strict>0 && U_IS_UNICODE_NONCHAR(c))) {
-                        /* illegal sequence or (strict and non-character) */
-                        if(count>=4) {
-                            count=3;
+    if(U8_IS_TRAIL(c) && i>start) {
+        uint8_t b1=s[--i];
+        if(0xc2<=b1 && b1<0xe0) {
+            *pi=i;
+            return ((b1-0xc0)<<6)|(c&0x3f);
+        } else if(U8_IS_TRAIL(b1) && i>start) {
+            // Extract the value bits from the last trail byte.
+            c&=0x3f;
+            uint8_t b2=s[--i];
+            if(0xe0<=b2 && b2<0xf0) {
+                b2&=0xf;
+                if(strict!=-2) {
+                    if(U8_IS_VALID_LEAD3_AND_T1(b2, b1)) {
+                        *pi=i;
+                        c=(b2<<12)|((b1&0x3f)<<6)|c;
+                        if(strict<=0 || !U_IS_UNICODE_NONCHAR(c)) {
+                            return c;
+                        } else {
+                            // strict: forbid non-characters like U+fffe
+                            return errorValue(2, strict);
                        }
-                        c=errorValue(count, strict);
-                    } else {
-                        /* exit with correct c */
                    }
                } else {
-                    /* the lead byte does not match the number of trail bytes */
-                    /* only set the position to the lead byte if it would
-                       include the trail byte that we started with */
-                    if(count<shouldCount) {
+                    // strict=-2 -> lenient: allow surrogates
+                    b1-=0x80;
+                    if((b2>0 || b1>=0x20)) {
                        *pi=i;
-                        c=errorValue(count, strict);
-                    } else {
-                        c=errorValue(0, strict);
+                        return (b2<<12)|(b1<<6)|c;
                    }
                }
-                break;
-            } else if(count<5) {
-                /* trail byte */
-                c|=(UChar32)(b&0x3f)<<shift;
-                ++count;
-                shift+=6;
-            } else {
-                /* more than 5 trail bytes is illegal */
-                c=errorValue(0, strict);
-                break;
+            } else if(U8_IS_TRAIL(b2) && i>start) {
+                uint8_t b3=s[--i];
+                if(0xf0<=b3 && b3<=0xf4) {
+                    b3&=7;
+                    if(U8_IS_VALID_LEAD4_AND_T1(b3, b2)) {
+                        *pi=i;
+                        c=(b3<<18)|((b2&0x3f)<<12)|((b1&0x3f)<<6)|c;
+                        if(strict<=0 || !U_IS_UNICODE_NONCHAR(c)) {
+                            return c;
+                        } else {
+                            // strict: forbid non-characters like U+fffe
+                            return errorValue(3, strict);
+                        }
+                    }
+                }
+            } else if(0xf0<=b2 && b2<=0xf4 && U8_IS_VALID_LEAD4_AND_T1(b2, b1)) {
+                // Truncated 4-byte sequence.
+                *pi=i;
+                return errorValue(2, strict);
            }
-        } else {
-            /* single-byte character precedes trailing bytes */
-            c=errorValue(0, strict);
-            break;
+        } else if((0xe0<=b1 && b1<0xf0 && U8_IS_VALID_LEAD3_AND_T1(b1, c)) ||
+                (0xf0<=b1 && b1<=0xf4 && U8_IS_VALID_LEAD4_AND_T1(b1, c))) {
+            // Truncated 3- or 4-byte sequence.
+            *pi=i;
+            return errorValue(1, strict);
        }
    }
-    return c;
+    return errorValue(0, strict);
 }

 U_CAPI int32_t U_EXPORT2
 utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i) {
-    /* i had been decremented once before the function call */
-    int32_t I=i, Z;
-    uint8_t b;
-
-    /* read at most the 6 bytes s[Z] to s[i], inclusively */
-    if(I-5>start) {
-        Z=I-5;
-    } else {
-        Z=start;
-    }
-
-    /* return I if the sequence starting there is long enough to include i */
-    do {
-        b=s[I];
-        if((uint8_t)(b-0x80)>=0x7e) { /* not 0x80<=b<0xfe */
-            break;
-        } else if(b>=0xc0) {
-            if(U8_COUNT_TRAIL_BYTES(b)>=(i-I)) {
-                return I;
-            } else {
-                break;
+    // Same as utf8_prevCharSafeBody(..., strict=-1) minus assembling code points.
+    int32_t orig_i=i;
+    uint8_t c=s[i];
+    if(U8_IS_TRAIL(c) && i>start) {
+        uint8_t b1=s[--i];
+        if(0xc2<=b1 && b1<0xe0) {
+            return i;
+        } else if(U8_IS_TRAIL(b1) && i>start) {
+            uint8_t b2=s[--i];
+            if(0xe0<=b2 && b2<0xf0) {
+                if(U8_IS_VALID_LEAD3_AND_T1(b2, b1)) {
+                    return i;
+                }
+            } else if(U8_IS_TRAIL(b2) && i>start) {
+                uint8_t b3=s[--i];
+                if(0xf0<=b3 && b3<=0xf4) {
+                    if(U8_IS_VALID_LEAD4_AND_T1(b3, b2)) {
+                        return i;
+                    }
+                }
+            } else if(0xf0<=b2 && b2<=0xf4 && U8_IS_VALID_LEAD4_AND_T1(b2, b1)) {
+                // Truncated 4-byte sequence.
+                return i;
            }
+        } else if((0xe0<=b1 && b1<0xf0 && U8_IS_VALID_LEAD3_AND_T1(b1, c)) ||
+                (0xf0<=b1 && b1<=0xf4 && U8_IS_VALID_LEAD4_AND_T1(b1, c))) {
+            // Truncated 3- or 4-byte sequence.
+            return i;
        }
-    } while(Z<=--I);
-
-    /* return i itself to be consistent with the FWD_1 macro */
-    return i;
+    }
+    return orig_i;
 }
--- a/icu4c/source/common/utrie2.h
+++ b/icu4c/source/common/utrie2.h
@ -20,6 +20,7 @@
 #define __UTRIE2_H__

 #include "unicode/utypes.h"
+#include "unicode/utf8.h"
 #include "putilimp.h"
 #include "udataswp.h"

@ -54,6 +55,8 @@ typedef struct UTrie UTrie;
 *   is truncated, omitting both the BMP portion and the high range.
 * - There is a special small index for 2-byte UTF-8, and the initial data
 *   entries are designed for fast 1/2-byte UTF-8 lookup.
+ *   Starting with ICU 60, C0 and C1 are not recognized as UTF-8 lead bytes any more at all,
+ *   and the associated 2-byte indexes are unused.
 */

 /**
@ -933,29 +936,29 @@ utrie2_internalU8PrevIndex(const UTrie2 *trie, UChar32 c,
 /** Internal UTF-8 next-post-increment: get the next code point's data. */
 #define _UTRIE2_U8_NEXT(trie, ascii, data, src, limit, result) { \
    uint8_t __lead=(uint8_t)*(src)++; \
-    if(__lead<0xc0) { \
+    if(U8_IS_SINGLE(__lead)) { \
        (result)=(trie)->ascii[__lead]; \
    } else { \
        uint8_t __t1, __t2; \
-        if( /* handle U+0000..U+07FF inline */ \
-            __lead<0xe0 && (src)<(limit) && \
+        if( /* handle U+0800..U+FFFF inline */ \
+            0xe0<=__lead && __lead<0xf0 && ((src)+1)<(limit) && \
+            U8_IS_VALID_LEAD3_AND_T1(__lead, __t1=(uint8_t)*(src)) && \
+            (__t2=(uint8_t)(*((src)+1)-0x80))<= 0x3f \
+        ) { \
+            (src)+=2; \
+            (result)=(trie)->data[ \
+                ((int32_t)((trie)->index[((__lead-0xe0)<<(12-UTRIE2_SHIFT_2))+ \
+                                         ((__t1&0x3f)<<(6-UTRIE2_SHIFT_2))+(__t2>>UTRIE2_SHIFT_2)]) \
+                <<UTRIE2_INDEX_SHIFT)+ \
+                (__t2&UTRIE2_DATA_MASK)]; \
+        } else if( /* handle U+0080..U+07FF inline */ \
+            __lead<0xe0 && __lead>=0xc2 && (src)<(limit) && \
            (__t1=(uint8_t)(*(src)-0x80))<=0x3f \
        ) { \
            ++(src); \
            (result)=(trie)->data[ \
                (trie)->index[(UTRIE2_UTF8_2B_INDEX_2_OFFSET-0xc0)+__lead]+ \
                __t1]; \
-        } else if( /* handle U+0000..U+CFFF inline */ \
-            __lead<0xed && ((src)+1)<(limit) && \
-            (__t1=(uint8_t)(*(src)-0x80))<=0x3f && (__lead>0xe0 || __t1>=0x20) && \
-            (__t2=(uint8_t)(*((src)+1)-0x80))<= 0x3f \
-        ) { \
-            (src)+=2; \
-            (result)=(trie)->data[ \
-                ((int32_t)((trie)->index[((__lead-0xe0)<<(12-UTRIE2_SHIFT_2))+ \
-                                         (__t1<<(6-UTRIE2_SHIFT_2))+(__t2>>UTRIE2_SHIFT_2)]) \
-                <<UTRIE2_INDEX_SHIFT)+ \
-                (__t2&UTRIE2_DATA_MASK)]; \
        } else { \
            int32_t __index=utrie2_internalU8NextIndex((trie), __lead, (const uint8_t *)(src), \
                                                                       (const uint8_t *)(limit)); \
@ -968,7 +971,7 @@ utrie2_internalU8PrevIndex(const UTrie2 *trie, UChar32 c,
 /** Internal UTF-8 pre-decrement-previous: get the previous code point's data. */
 #define _UTRIE2_U8_PREV(trie, ascii, data, start, src, result) { \
    uint8_t __b=(uint8_t)*--(src); \
-    if(__b<0x80) { \
+    if(U8_IS_SINGLE(__b)) { \
        (result)=(trie)->ascii[__b]; \
    } else { \
        int32_t __index=utrie2_internalU8PrevIndex((trie), __b, (const uint8_t *)(start), \
--- a/icu4c/source/data/brkitr/rules/char.txt
+++ b/icu4c/source/data/brkitr/rules/char.txt
@ -1,4 +1,4 @@
-#
+#
 #   Copyright (C) 2016 and later: Unicode, Inc. and others.
 #   License & terms of use: http://www.unicode.org/copyright.html
 #   Copyright (C) 2002-2016, International Business Machines Corporation and others.
@ -12,6 +12,8 @@
 #      Plus revisions to rule GB 11 from http://unicode.org/cldr/trac/ticket/10088
 #      Plus additional characters introduces with Emoji 5, http://www.unicode.org/reports/tr51/proposed.html

+!!quoted_literals_only;
+
 #
 #  Character Class Definitions.
 #
@ -78,42 +80,6 @@ $Prepend [^$Control $CR $LF];

 ## -------------------------------------------------

-!!reverse;
-$LF $CR;
-($L | $V | $LV | $LVT) $L;
-($V | $T) ($LV | $V);
-$T ($LVT | $T);
-
-# GB 9
-($Extend | $ZWJ)   [^$Control $CR $LF];    #note that this will chain into Regional_Indicator when needed.
-
-# GB 9a
-$SpacingMark [^$Control $CR $LF];
-
-# GB 9b
-[^$Control $CR $LF] $Prepend;
-
-# GB 10
-$E_Modifier $Extend* ($E_Base | $E_Base_GAZ);
-
-# GB 11 Don't break between ZWJ and Glue_After_ZWJ
-($Extended_Pict | $EmojiNRK) $ZWJ $Extend* ($Extended_Pict | $EmojiNRK);
-
-# GB 12-13. Going backwards, we must scan through any number of regional indicators as pairs.
-#
-[{bof} $Extend $ZWJ $SpacingMark] $Regional_Indicator $Regional_Indicator / ($Regional_Indicator $Regional_Indicator)+ [{eof}[^$Regional_Indicator]];
-[{bof} $Extend $ZWJ $SpacingMark]                     $Regional_Indicator / ($Regional_Indicator $Regional_Indicator)+ [{eof}[^$Regional_Indicator]];
-$Regional_Indicator $Regional_Indicator;
-$Regional_Indicator $Prepend;
-
-## -------------------------------------------------
-
 !!safe_reverse;
 $Regional_Indicator $Regional_Indicator;
 ($Extend | $ZWJ | $EmojiNRK | $Extended_Pict)+ .;
-
-## -------------------------------------------------
-
-!!safe_forward;
-$Regional_Indicator $Regional_Indicator;
-($Extend | $ZWJ | $EmojiNRK | $Extended_Pict)+ .;
--- a/icu4c/source/data/brkitr/rules/line.txt
+++ b/icu4c/source/data/brkitr/rules/line.txt
@ -1,4 +1,4 @@
-# Copyright (C) 2016 and later: Unicode, Inc. and others.
+# Copyright (C) 2016 and later: Unicode, Inc. and others.
 # License & terms of use: http://www.unicode.org/copyright.html
 # Copyright (c) 2002-2016  International Business Machines Corporation and
 # others. All Rights Reserved.
@ -25,6 +25,7 @@
 #

 !!chain;
+!!quoted_literals_only;

 $AI = [:LineBreak =  Ambiguous:];
 $AL = [:LineBreak =  Alphabetic:];
@ -334,209 +335,6 @@ $RI $CM* $RI $CM* $ZWJ ($ID | $Extended_Pict | $EmojiNRK);
 # LB 30b Do not break between an Emoji Base and an Emoji Modifier
 $EB $CM* $EM;

-#
-#  Reverse Rules.
-#
-## -------------------------------------------------
-
-!!reverse;
-
-#  LB 9 Combining Marks.
-#  Stick together any combining sequences that don't match other rules.
-
-^$CM+ $CAN_CM?;
-
-#
-#  Sequences of the form  (shown forwards)
-#      [CANT_CM]  <break>  [CM]  [whatever]
-#  The CM needs to behave as an AL
-#
-$AL_FOLLOW $CM+ / (
-          [$BK $CR $LF $NL $ZW {eof}] |
-          $SP+ $CM+ $SP |
-          $SP+ $CM* ([^$OP $CM $SP] | [$AL {eof}]));   # if LB 14 will match, need to surpress this break.
-                                               #  LB14 says    OP SP* x .
-                                               #    becomes    OP SP* x AL
-                                               #    becomes    OP SP* x CM+ AL_FOLLOW
-                                               #
-                                               # Further note:  the $AL in [$AL {eof}] is only to work around
-                                               #                a rule compiler bug which complains about
-                                               #                empty sets otherwise.
-
-
-# LB 4, 5, 6
-
-$LB4Breaks [$LB4NonBreaks-$CM];
-$LB4Breaks $CM+ $CAN_CM;
-$LF $CR;
-
-
-# LB 7         x SP
-#              x ZW
-[$SP $ZW] [$LB4NonBreaks-$CM];
-[$SP $ZW] $CM+ $CAN_CM;
-
-# LB 8 ZW SP* <break>
-#     TODO: to implement this, we need more than one look-ahead hard break in play at a time.
-#           Requires an engine enhancement.
-#   / $SP* $ZW
-
-# LB 8a        ZWJ x (ID | Extended_Pict | EmojiNRK)
-#
-($ID | $Extended_Pict | $EmojiNRK) $ZWJ $CM* $CAN_CM?;
-
-
-# LB 9,10  Combining marks.
-#    X   $CM needs to behave like X, where X is not $SP or controls.
-#    $CM not covered by the above needs to behave like $AL
-# Stick together any combining sequences that don't match other rules.
-^$CM+ $CAN_CM;
-
-
-# LB 11
-#
-$WJ $CM* $CAN_CM;
-$WJ      [$LB8NonBreaks-$CM];
-
-     $CANT_CM $CM* $WJ;
-$CAN_CM  $CM* $WJ;
-
-# LB 12a
-#      [^SP BA HY] x GL
-#
-$GL $CM* [$LB8NonBreaks-[$CM $SP $BA $HY]];
-
-# LB 12
-#     GL  x
-#
-$CANT_CM $CM* $GL;
-$CAN_CM $CM* $GL;
-
-
-# LB 13
-$CL $CM+ $CAN_CM;
-$CP $CM+ $CAN_CM;
-$EX $CM+ $CAN_CM;
-$IS $CM+ $CAN_CM;
-$SY $CM+ $CAN_CM;
-
-$CL [$LB8NonBreaks-$CM];
-$CP [$LB8NonBreaks-$CM];
-$EX [$LB8NonBreaks-$CM];
-$IS [$LB8NonBreaks-$CM];
-$SY [$LB8NonBreaks-$CM];
-
-
-# LB 14    OP SP* x
-#
-.   $SP* $CM* $OP;
-$AL_FOLLOW? $CM+ $SP+ $CM* $OP;     #  by LB 10, behaves like $AL_FOLLOW? $AL $SP+ $CM* $OP
-
-
-# LB 15
-$OP $SP* $CM* $QU;
-
-# LB 16
-$NS $SP* $CM* ($CL | $CP);
-
-# LB 17
-$B2 $SP* $CM* $B2;
-
-# LB 18  break after spaces
-#        Nothing explicit needed here.
-
-
-#
-# LB 19
-#
-$QU $CM* $CAN_CM;                                #   . x QU
-$QU      $LB18NonBreaks;
-
-
-$CAN_CM  $CM* $QU;                               #   QU x .
-     $CANT_CM $CM* $QU;
-
-#
-#  LB 20  Break before and after CB.
-#         nothing needed here.
-#
-
-# LB 21
-($BA | $HY | $NS) $CM* [$LB20NonBreaks-$CM];     #  . x (BA | HY | NS)
-
-[$LB20NonBreaks-$CM] $CM* $BB;                   #  BB x .
-[^$CB] $CM* $BB;                                      #
-
-# LB21a
-[^$CB] $CM* ($HY | $BA) $CM* $HL;
-
-# LB21b (reverse)
-$HL $CM* $SY;
-
-# LB 22
-$IN $CM* ($ALPlus | $HL);
-$IN $CM* $EX;
-$IN $CM* ($ID | $EB | $EM);
-$IN $CM* $IN;
-$IN $CM* $NU;
-
-# LB 23
-$NU $CM* ($ALPlus | $HL);
-($ALPlus | $HL) $CM* $NU;
-
-# LB23a
-($ID | $EB | $EM) $CM* $PR;
-$PO $CM* ($ID | $EB | $EM);
-
-# LB 24
-($ALPlus | $HL) $CM* ($PR | $PO);
-($PR | $PO) $CM* ($ALPlus | $HL);
-
-
-# LB 25
-($CM* ($PR | $PO))? ($CM* ($CL | $CP))? ($CM* ($NU | $IS | $SY))* $CM* $NU ($CM* ($OP | $HY))? ($CM* ($PR | $PO))?;
-
-# LB 26
-($H3 | $H2 | $JV | $JL) $CM* $JL;
-($JT | $JV) $CM* ($H2 | $JV);
-$JT $CM* ($H3 | $JT);
-
-# LB 27
-$IN $CM* ($H3 | $H2 | $JT | $JV | $JL);
-$PO $CM* ($H3 | $H2 | $JT | $JV | $JL);
- ($H3 | $H2 | $JT | $JV | $JL) $CM* $PR;
-
-# LB 28
-($ALPlus | $HL) $CM* ($ALPlus | $HL);
-
-
-# LB 29
-($ALPlus | $HL) $CM* $IS;
-
-# LB 30
-$OP $CM* ($ALPlus | $HL | $NU);
-($ALPlus | $HL | $NU) $CM* $CP;
-
-# LB 30a
-#    Pairs of Regional Indicators.
-#    The following two rules are nearly identical. The first matches only sequences with an odd number of adjacent RIs,
-#    the second with an even number. Stripping away the cruft they look like
-#         [^RI] RI / (RI RI)+ ^RI;
-#         [^RI] RI RI / (RI RI)+ ^RI;
-#
-[{bof} $NS $HY $BA $QU $CL $CP $EX $IS $SY $WJ $GL $ZW $SP $BK $CR $LF $NL $ZWJ] $CM* $RI / ($CM* $RI $CM* $RI)+ $CM* [{eof}[^$RI $CM]];
-[{bof} $NS $HY $BA $QU $CL $CP $EX $IS $SY $WJ $GL $ZW $SP $BK $CR $LF $NL $ZWJ] $CM* $RI $CM* $RI / ($CM* $RI $CM* $RI)+ $CM* [{eof}[^$RI $CM]];
-
-# In general, adjacent RIs stay together. The hard-break rules, above, overide this, forcing in the boundaries between pairs.
-$RI $CM* $RI;
-
-#    WJ, GL, QU, etc. are classes with rules like "WJ x "   which includes "WJ x RI".
-$RI $CM* ([$WJ $GL $QU $BB] |  (($HY | $BA)$CM* $HL));
-
-
-# LB 30b Do not break between an Emoji Base and an Emoji Modifier
-$EM $CM* $EB;
-

 ## -------------------------------------------------

@ -544,7 +342,6 @@ $EM $CM* $EB;

 # LB 9
 ^$CM+ [^$CM $BK $CR $LF $NL $ZW $SP];
-^$CM+ $SP / .;

 # LB 14
 $SP+ $CM* $OP;
@ -571,19 +368,3 @@ $CM* ($HY | $BA) $CM* $HL;
 # For dictionary-based break
 $dictionary $dictionary;

-## -------------------------------------------------
-
-!!safe_forward;
-
-# Skip forward over all character classes that are involved in
-#   rules containing patterns with possibly more than one char
-#   of context.
-#
-#  It might be slightly more efficient to have specific rules
-#  instead of one generic one, but only if we could
-#  turn off rule chaining.  We don't want to move more
-#  than necessary.
-#
-^[$CM $OP $QU $CL $CP $B2 $PR $HY $BA $SP $RI $ZWJ $dictionary]+ [^$CM $OP $QU $CL $CP $B2 $PR $HY $BA $RI $ZWJ $dictionary];
-$dictionary $dictionary;
-
--- a/icu4c/source/data/brkitr/rules/line_fi.txt
+++ b/icu4c/source/data/brkitr/rules/line_fi.txt
@ -1,4 +1,4 @@
-# Copyright (C) 2016 and later: Unicode, Inc. and others.
+# Copyright (C) 2016 and later: Unicode, Inc. and others.
 # License & terms of use: http://www.unicode.org/copyright.html
 # Copyright (c) 2002-2016  International Business Machines Corporation and
 # others. All Rights Reserved.
@ -30,6 +30,7 @@
 #

 !!chain;
+!!quoted_literals_only;

 $AI = [:LineBreak =  Ambiguous:];
 $AL = [:LineBreak =  Alphabetic:];
@ -343,220 +344,12 @@ $RI $CM* $RI $CM* $ZWJ ($ID | $Extended_Pict | $EmojiNRK);
 # LB 30b Do not break between an Emoji Base and an Emoji Modifier
 $EB $CM* $EM;

-#
-#  Reverse Rules.
-#
-## -------------------------------------------------
-
-!!reverse;
-
-#  LB 9 Combining Marks.
-#  Stick together any combining sequences that don't match other rules.
-
-^$CM+ $CAN_CM?;
-
-#
-#  Sequences of the form  (shown forwards)
-#      [CANT_CM]  <break>  [CM]  [whatever]
-#  The CM needs to behave as an AL
-#
-$AL_FOLLOW $CM+ / (
-          [$BK $CR $LF $NL $ZW {eof}] |
-          $SP+ $CM+ $SP |
-          $SP+ $CM* ([^$OP $CM $SP] | [$AL {eof}]));   # if LB 14 will match, need to surpress this break.
-                                               #  LB14 says    OP SP* x .
-                                               #    becomes    OP SP* x AL
-                                               #    becomes    OP SP* x CM+ AL_FOLLOW
-                                               #
-                                               # Further note:  the $AL in [$AL {eof}] is only to work around
-                                               #                a rule compiler bug which complains about
-                                               #                empty sets otherwise.
-
-
-# LB 4, 5, 6
-
-$LB4Breaks [$LB4NonBreaks-$CM];
-$LB4Breaks $CM+ $CAN_CM;
-$LF $CR;
-
-
-# LB 7         x SP
-#              x ZW
-[$SP $ZW] [$LB4NonBreaks-$CM];
-[$SP $ZW] $CM+ $CAN_CM;
-
-# LB 8 ZW SP* <break>
-#     TODO: to implement this, we need more than one look-ahead hard break in play at a time.
-#           Requires an engine enhancement.
-#   / $SP* $ZW
-
-# LB 8a        ZWJ x (ID | Extended_Pict | EmojiNRK)
-#
-($ID | $Extended_Pict | $EmojiNRK) $ZWJ $CM* $CAN_CM?;
-
-
-# LB 9,10  Combining marks.
-#    X   $CM needs to behave like X, where X is not $SP or controls.
-#    $CM not covered by the above needs to behave like $AL
-# Stick together any combining sequences that don't match other rules.
-^$CM+ $CAN_CM;
-
-
-# LB 11
-#
-$WJ $CM* $CAN_CM;
-$WJ      [$LB8NonBreaks-$CM];
-
-     $CANT_CM $CM* $WJ;
-$CAN_CM  $CM* $WJ;
-
-# LB 12a
-#      [^SP BA HY] x GL
-#
-$GL $CM* [$LB8NonBreaks-[$CM $SP $BA $HH $HY]];
-
-# LB 12
-#     GL  x
-#
-$CANT_CM $CM* $GL;
-$CAN_CM $CM* $GL;
-
-
-# LB 13
-$CL $CM+ $CAN_CM;
-$CP $CM+ $CAN_CM;
-$EX $CM+ $CAN_CM;
-$IS $CM+ $CAN_CM;
-$SY $CM+ $CAN_CM;
-
-$CL [$LB8NonBreaks-$CM];
-$CP [$LB8NonBreaks-$CM];
-$EX [$LB8NonBreaks-$CM];
-$IS [$LB8NonBreaks-$CM];
-$SY [$LB8NonBreaks-$CM];
-
-
-# LB 14    OP SP* x
-#
-.   $SP* $CM* $OP;
-$AL_FOLLOW? $CM+ $SP+ $CM* $OP;     #  by LB 10, behaves like $AL_FOLLOW? $AL $SP+ $CM* $OP
-
-
-# LB 15
-$OP $SP* $CM* $QU;
-
-# LB 16
-$NS $SP* $CM* ($CL | $CP);
-
-# LB 17
-$B2 $SP* $CM* $B2;
-
-# LB 18  break after spaces
-#        Nothing explicit needed here.
-
-
-#
-# LB 19
-#
-$QU $CM* $CAN_CM;                                #   . x QU
-$QU      $LB18NonBreaks;
-
-
-$CAN_CM  $CM* $QU;                               #   QU x .
-     $CANT_CM $CM* $QU;
-
-#
-#  LB 20  Break before and after CB.
-#         nothing needed here.
-#
-
-# LB 20.09 added rule for Finnish tailoring
-$AL ($HY | $HH) / $SP;
-
-# LB 21
-($BA | $HH | $HY | $NS) $CM* [$LB20NonBreaks-$CM];     #  . x (BA | HY | NS)
-
-[$LB20NonBreaks-$CM] $CM* $BB;                   #  BB x .
-[^$CB] $CM* $BB;                                      #
-
-# LB21a
-[^$CB] $CM* ($HY | $BA | $HH) $CM* $HL;
-
-# LB21b (reverse)
-$HL $CM* $SY;
-
-# LB 22
-$IN $CM* ($ALPlus | $HL);
-$IN $CM* $EX;
-$IN $CM* ($ID | $EB | $EM);
-$IN $CM* $IN;
-$IN $CM* $NU;
-
-# LB 23
-$NU $CM* ($ALPlus | $HL);
-($ALPlus | $HL) $CM* $NU;
-
-# LB23a
-($ID | $EB | $EM) $CM* $PR;
-$PO $CM* ($ID | $EB | $EM);
-
-# LB 24
-($ALPlus | $HL) $CM* ($PR | $PO);
-($PR | $PO) $CM* ($ALPlus | $HL);
-
-
-# LB 25
-($CM* ($PR | $PO))? ($CM* ($CL | $CP))? ($CM* ($NU | $IS | $SY))* $CM* $NU ($CM* ($OP | $HY))? ($CM* ($PR | $PO))?;
-
-# LB 26
-($H3 | $H2 | $JV | $JL) $CM* $JL;
-($JT | $JV) $CM* ($H2 | $JV);
-$JT $CM* ($H3 | $JT);
-
-# LB 27
-$IN $CM* ($H3 | $H2 | $JT | $JV | $JL);
-$PO $CM* ($H3 | $H2 | $JT | $JV | $JL);
- ($H3 | $H2 | $JT | $JV | $JL) $CM* $PR;
-
-# LB 28
-($ALPlus | $HL) $CM* ($ALPlus | $HL);
-
-
-# LB 29
-($ALPlus | $HL) $CM* $IS;
-
-# LB 30
-$OP $CM* ($ALPlus | $HL | $NU);
-($ALPlus | $HL | $NU) $CM* $CP;
-
-# LB 30a
-#    Pairs of Regional Indicators.
-#    The following two rules are nearly identical. The first matches only sequences with an odd number of adjacent RIs,
-#    the second with an even number. Stripping away the cruft they look like
-#         [^RI] RI / (RI RI)+ ^RI;
-#         [^RI] RI RI / (RI RI)+ ^RI;
-#
-[{bof} $NS $HY $BA $QU $CL $CP $EX $IS $SY $WJ $GL $ZW $SP $BK $CR $LF $NL $ZWJ] $CM* $RI / ($CM* $RI $CM* $RI)+ $CM* [{eof}[^$RI $CM]];
-[{bof} $NS $HY $BA $QU $CL $CP $EX $IS $SY $WJ $GL $ZW $SP $BK $CR $LF $NL $ZWJ] $CM* $RI $CM* $RI / ($CM* $RI $CM* $RI)+ $CM* [{eof}[^$RI $CM]];
-
-# In general, adjacent RIs stay together. The hard-break rules, above, overide this, forcing in the boundaries between pairs.
-$RI $CM* $RI;
-
-#    WJ, GL, QU, etc. are classes with rules like "WJ x "   which includes "WJ x RI".
-$RI $CM* ([$WJ $GL $QU $BB] |  (($HY | $BA)$CM* $HL));
-
-
-# LB 30b Do not break between an Emoji Base and an Emoji Modifier
-$EM $CM* $EB;
-
-
 ## -------------------------------------------------

 !!safe_reverse;

 # LB 9
 ^$CM+ [^$CM $BK $CR $LF $NL $ZW $SP];
-^$CM+ $SP / .;

 # LB 14
 $SP+ $CM* $OP;
@ -582,20 +375,3 @@ $CM* ($HY | $BA | $HH) $CM* $HL;

 # For dictionary-based break
 $dictionary $dictionary;
-
-## -------------------------------------------------
-
-!!safe_forward;
-
-# Skip forward over all character classes that are involved in
-#   rules containing patterns with possibly more than one char
-#   of context.
-#
-#  It might be slightly more efficient to have specific rules
-#  instead of one generic one, but only if we could
-#  turn off rule chaining.  We don't want to move more
-#  than necessary.
-#
-^[$CM $OP $QU $CL $CP $B2 $PR $HY $BA $SP $RI $ZWJ $dictionary]+ [^$CM $OP $QU $CL $CP $B2 $PR $HY $BA $RI $ZWJ $dictionary];
-$dictionary $dictionary;
-
--- a/icu4c/source/data/brkitr/rules/line_loose.txt
+++ b/icu4c/source/data/brkitr/rules/line_loose.txt
@ -1,5 +1,6 @@
-# Copyright (C) 2016 and later: Unicode, Inc. and others.
+# Copyright (C) 2016 and later: Unicode, Inc. and others.
 # License & terms of use: http://www.unicode.org/copyright.html
+#
 # Copyright (c) 2002-2016  International Business Machines Corporation and
 # others. All Rights Reserved.
 #
@ -32,6 +33,7 @@
 #

 !!chain;
+!!quoted_literals_only;

 $AI = [:LineBreak =  Ambiguous:];
 $AL = [:LineBreak =  Alphabetic:];
@ -345,212 +347,6 @@ $RI $CM* $RI $CM* $ZWJ ($ID | $Extended_Pict | $EmojiNRK);
 # LB 30b Do not break between an Emoji Base and an Emoji Modifier
 $EB $CM* $EM;

-#
-#  Reverse Rules.
-#
-## -------------------------------------------------
-
-!!reverse;
-
-#  LB 9 Combining Marks.
-#  Stick together any combining sequences that don't match other rules.
-
-^$CM+ $CAN_CM?;
-
-#
-#  Sequences of the form  (shown forwards)
-#      [CANT_CM]  <break>  [CM]  [whatever]
-#  The CM needs to behave as an AL
-#
-$AL_FOLLOW $CM+ / (
-          [$BK $CR $LF $NL $ZW {eof}] |
-          $SP+ $CM+ $SP |
-          $SP+ $CM* ([^$OP $CM $SP] | [$AL {eof}]));   # if LB 14 will match, need to surpress this break.
-                                               #  LB14 says    OP SP* x .
-                                               #    becomes    OP SP* x AL
-                                               #    becomes    OP SP* x CM+ AL_FOLLOW
-                                               #
-                                               # Further note:  the $AL in [$AL {eof}] is only to work around
-                                               #                a rule compiler bug which complains about
-                                               #                empty sets otherwise.
-
-
-# LB 4, 5, 6
-
-$LB4Breaks [$LB4NonBreaks-$CM];
-$LB4Breaks $CM+ $CAN_CM;
-$LF $CR;
-
-
-# LB 7         x SP
-#              x ZW
-[$SP $ZW] [$LB4NonBreaks-$CM];
-[$SP $ZW] $CM+ $CAN_CM;
-
-# LB 8 ZW SP* <break>
-#     TODO: to implement this, we need more than one look-ahead hard break in play at a time.
-#           Requires an engine enhancement.
-#   / $SP* $ZW
-
-# LB 8a        ZWJ x (ID | Extended_Pict | EmojiNRK)
-#
-($ID | $Extended_Pict | $EmojiNRK) $ZWJ $CM* $CAN_CM?;
-
-
-# LB 9,10  Combining marks.
-#    X   $CM needs to behave like X, where X is not $SP or controls.
-#    $CM not covered by the above needs to behave like $AL
-# Stick together any combining sequences that don't match other rules.
-^$CM+ $CAN_CM;
-
-
-# LB 11
-#
-$WJ $CM* $CAN_CM;
-$WJ      [$LB8NonBreaks-$CM];
-
-     $CANT_CM $CM* $WJ;
-$CAN_CM  $CM* $WJ;
-
-# LB 12a
-#      [^SP BA HY] x GL
-#
-$GL $CM* [$LB8NonBreaks-[$CM $SP $BA $HY]];
-
-# LB 12
-#     GL  x
-#
-$CANT_CM $CM* $GL;
-$CAN_CM $CM* $GL;
-
-
-# LB 13
-$CL $CM+ $CAN_CM;
-$CP $CM+ $CAN_CM;
-$EX $CM+ $CAN_CM;
-$IS $CM+ $CAN_CM;
-$SY $CM+ $CAN_CM;
-
-$CL [$LB8NonBreaks-$CM];
-$CP [$LB8NonBreaks-$CM];
-$EX [$LB8NonBreaks-$CM];
-$IS [$LB8NonBreaks-$CM];
-$SY [$LB8NonBreaks-$CM];
-
-
-# LB 14    OP SP* x
-#
-.   $SP* $CM* $OP;
-$AL_FOLLOW? $CM+ $SP+ $CM* $OP;     #  by LB 10, behaves like $AL_FOLLOW? $AL $SP+ $CM* $OP
-
-
-# LB 15
-$OP $SP* $CM* $QU;
-
-# LB 16
-# Don't include $NSX here
-$NS $SP* $CM* ($CL | $CP);
-
-# LB 17
-$B2 $SP* $CM* $B2;
-
-# LB 18  break after spaces
-#        Nothing explicit needed here.
-
-
-#
-# LB 19
-#
-$QU $CM* $CAN_CM;                                #   . x QU
-$QU      $LB18NonBreaks;
-
-
-$CAN_CM  $CM* $QU;                               #   QU x .
-     $CANT_CM $CM* $QU;
-
-#
-#  LB 20  Break before and after CB.
-#         nothing needed here.
-#
-
-# LB 21
-# Don't include $NSX here
-($BA | $HY | $NS) $CM* [$LB20NonBreaks-$CM];     #  . x (BA | HY | NS)
-
-[$LB20NonBreaks-$CM] $CM* $BB;                   #  BB x .
-[^$CB] $CM* $BB;                                      #
-
-# LB21a
-[^$CB] $CM* ($HY | $BA) $CM* $HL;
-
-# LB21b (reverse)
-$HL $CM* $SY;
-
-# LB 22
-$IN $CM* ($ALPlus | $HL);
-$IN $CM* $EX;
-$IN $CM* ($ID | $EB | $EM);
-# $IN $CM* $IN; # delete this rule for CSS loose
-$IN $CM* $NU;
-
-# LB 23
-$NU $CM* ($ALPlus | $HL);
-($ALPlus | $HL) $CM* $NU;
-
-# LB23a
-($ID | $EB | $EM) $CM* $PR;
-$PO $CM* ($ID | $EB | $EM);
-
-# LB 24
-($ALPlus | $HL) $CM* ($PR | $PO);
-($PR | $PO) $CM* ($ALPlus | $HL);
-
-
-# LB 25
-($CM* ($PR | $PO))? ($CM* ($CL | $CP))? ($CM* ($NU | $IS | $SY))* $CM* $NU ($CM* ($OP | $HY))? ($CM* ($PR | $PO))?;
-
-# LB 26
-($H3 | $H2 | $JV | $JL) $CM* $JL;
-($JT | $JV) $CM* ($H2 | $JV);
-$JT $CM* ($H3 | $JT);
-
-# LB 27
-$IN $CM* ($H3 | $H2 | $JT | $JV | $JL);
-$PO $CM* ($H3 | $H2 | $JT | $JV | $JL);
- ($H3 | $H2 | $JT | $JV | $JL) $CM* $PR;
-
-# LB 28
-($ALPlus | $HL) $CM* ($ALPlus | $HL);
-
-
-# LB 29
-($ALPlus | $HL) $CM* $IS;
-
-# LB 30
-$OP $CM* ($ALPlus | $HL | $NU);
-($ALPlus | $HL | $NU) $CM* $CP;
-
-# LB 30a
-#    Pairs of Regional Indicators.
-#    The following two rules are nearly identical. The first matches only sequences with an odd number of adjacent RIs,
-#    the second with an even number. Stripping away the cruft they look like
-#         [^RI] RI / (RI RI)+ ^RI;
-#         [^RI] RI RI / (RI RI)+ ^RI;
-#
-# Line Loose tailoring: Don't include NSX here.
-[{bof} $NS $HY $BA $QU $CL $CP $EX $IS $SY $WJ $GL $ZW $SP $BK $CR $LF $NL $ZWJ] $CM* $RI / ($CM* $RI $CM* $RI)+ $CM* [{eof}[^$RI $CM]];
-[{bof} $NS $HY $BA $QU $CL $CP $EX $IS $SY $WJ $GL $ZW $SP $BK $CR $LF $NL $ZWJ] $CM* $RI $CM* $RI / ($CM* $RI $CM* $RI)+ $CM* [{eof}[^$RI $CM]];
-
-# In general, adjacent RIs stay together. The hard-break rules, above, overide this, forcing in the boundaries between pairs.
-$RI $CM* $RI;
-
-#    WJ, GL, QU, etc. are classes with rules like "WJ x "   which includes "WJ x RI".
-$RI $CM* ([$WJ $GL $QU $BB] |  (($HY | $BA)$CM* $HL));
-
-
-# LB 30b Do not break between an Emoji Base and an Emoji Modifier
-$EM $CM* $EB;
-

 ## -------------------------------------------------

@ -558,7 +354,6 @@ $EM $CM* $EB;

 # LB 9
 ^$CM+ [^$CM $BK $CR $LF $NL $ZW $SP];
-^$CM+ $SP / .;

 # LB 14
 $SP+ $CM* $OP;
@ -584,20 +379,3 @@ $CM* ($HY | $BA) $CM* $HL;

 # For dictionary-based break
 $dictionary $dictionary;
-
-## -------------------------------------------------
-
-!!safe_forward;
-
-# Skip forward over all character classes that are involved in
-#   rules containing patterns with possibly more than one char
-#   of context.
-#
-#  It might be slightly more efficient to have specific rules
-#  instead of one generic one, but only if we could
-#  turn off rule chaining.  We don't want to move more
-#  than necessary.
-#
-^[$CM $OP $QU $CL $CP $B2 $PR $HY $BA $SP $RI $ZWJ $dictionary]+ [^$CM $OP $QU $CL $CP $B2 $PR $HY $BA $RI $ZWJ $dictionary];
-$dictionary $dictionary;
-
--- a/icu4c/source/data/brkitr/rules/line_loose_cj.txt
+++ b/icu4c/source/data/brkitr/rules/line_loose_cj.txt
@ -1,4 +1,4 @@
-# Copyright (C) 2016 and later: Unicode, Inc. and others.
+# Copyright (C) 2016 and later: Unicode, Inc. and others.
 # License & terms of use: http://www.unicode.org/copyright.html
 # Copyright (c) 2002-2016  International Business Machines Corporation and
 # others. All Rights Reserved.
@ -39,6 +39,7 @@
 #

 !!chain;
+!!quoted_literals_only;

 $AI = [:LineBreak =  Ambiguous:];
 $AL = [:LineBreak =  Alphabetic:];
@ -360,226 +361,12 @@ $RI $CM* $RI $CM* $ZWJ ($ID | $Extended_Pict | $EmojiNRK);
 # LB 30b Do not break between an Emoji Base and an Emoji Modifier
 $EB $CM* $EM;

-#
-#  Reverse Rules.
-#
-## -------------------------------------------------
-
-!!reverse;
-
-#  LB 9 Combining Marks.
-#  Stick together any combining sequences that don't match other rules.
-
-^$CM+ $CAN_CM?;
-
-#
-#  Sequences of the form  (shown forwards)
-#      [CANT_CM]  <break>  [CM]  [whatever]
-#  The CM needs to behave as an AL
-#
-$AL_FOLLOW $CM+ / (
-          [$BK $CR $LF $NL $ZW {eof}] |
-          $SP+ $CM+ $SP |
-          $SP+ $CM* ([^$OP $CM $SP] | [$AL {eof}]));   # if LB 14 will match, need to surpress this break.
-                                               #  LB14 says    OP SP* x .
-                                               #    becomes    OP SP* x AL
-                                               #    becomes    OP SP* x CM+ AL_FOLLOW
-                                               #
-                                               # Further note:  the $AL in [$AL {eof}] is only to work around
-                                               #                a rule compiler bug which complains about
-                                               #                empty sets otherwise.
-
-
-# LB 4, 5, 6
-
-$LB4Breaks [$LB4NonBreaks-$CM];
-$LB4Breaks $CM+ $CAN_CM;
-$LF $CR;
-
-
-# LB 7         x SP
-#              x ZW
-[$SP $ZW] [$LB4NonBreaks-$CM];
-[$SP $ZW] $CM+ $CAN_CM;
-
-# LB 8 ZW SP* <break>
-#     TODO: to implement this, we need more than one look-ahead hard break in play at a time.
-#           Requires an engine enhancement.
-#   / $SP* $ZW
-
-# LB 8a        ZWJ x (ID | Extended_Pict | EmojiNRK)
-#
-($ID | $Extended_Pict | $EmojiNRK) $ZWJ $CM* $CAN_CM?;
-
-
-# LB 9,10  Combining marks.
-#    X   $CM needs to behave like X, where X is not $SP or controls.
-#    $CM not covered by the above needs to behave like $AL
-# Stick together any combining sequences that don't match other rules.
-^$CM+ $CAN_CM;
-
-
-# LB 11
-#
-$WJ $CM* $CAN_CM;
-$WJ      [$LB8NonBreaks-$CM];
-
-     $CANT_CM $CM* $WJ;
-$CAN_CM  $CM* $WJ;
-
-# LB 12a
-#      [^SP BA HY] x GL
-#
-$GL $CM* [$LB8NonBreaks-[$CM $SP $BA $BAX $HY]];
-
-# LB 12
-#     GL  x
-#
-$CANT_CM $CM* $GL;
-$CAN_CM $CM* $GL;
-
-
-# LB 13
-# Do not include $EXX here
-$CL $CM+ $CAN_CM;
-$CP $CM+ $CAN_CM;
-$EX $CM+ $CAN_CM;
-$IS $CM+ $CAN_CM;
-$SY $CM+ $CAN_CM;
-
-$CL [$LB8NonBreaks-$CM];
-$CP [$LB8NonBreaks-$CM];
-$EX [$LB8NonBreaks-$CM];
-$IS [$LB8NonBreaks-$CM];
-$SY [$LB8NonBreaks-$CM];
-
-
-# LB 14    OP SP* x
-#
-.   $SP* $CM* $OP;
-$AL_FOLLOW? $CM+ $SP+ $CM* $OP;     #  by LB 10, behaves like $AL_FOLLOW? $AL $SP+ $CM* $OP
-
-
-# LB 15
-$OP $SP* $CM* $QU;
-
-# LB 16
-# Don't include $NSX here
-$NS $SP* $CM* ($CL | $CP);
-
-# LB 17
-$B2 $SP* $CM* $B2;
-
-# LB 18  break after spaces
-#        Nothing explicit needed here.
-
-
-#
-# LB 19
-#
-$QU $CM* $CAN_CM;                                #   . x QU
-$QU      $LB18NonBreaks;
-
-
-$CAN_CM  $CM* $QU;                               #   QU x .
-     $CANT_CM $CM* $QU;
-
-#
-#  LB 20  Break before and after CB.
-#         nothing needed here.
-#
-
-# LB 21
-# Don't include $BAX or $NSX here
-($BA | $HY | $NS) $CM* [$LB20NonBreaks-$CM];     #  . x (BA | HY | NS)
-
-[$LB20NonBreaks-$CM] $CM* $BB;                   #  BB x .
-[^$CB] $CM* $BB;                                      #
-
-# LB21a  Don't break after Hebrew + Hyphen.
-([^$CB] $CM*)? ($HY | $BA | $BAX) $CM* $HL;
-
-# LB21b (reverse)
-$HL $CM* $SY;
-
-# LB 22
-$IN $CM* ($ALPlus | $HL);
-$IN $CM* $EX;
-$IN $CM* ($ID | $EB | $EM);
-# $IN $CM* $IN; # delete this rule for CSS loose
-$IN $CM* $NU;
-
-# LB 23
-# Do not include $POX here
-$NU $CM* ($ALPlus | $HL);
-($ALPlus | $HL) $CM* $NU;
-
-# LB23a
-# Do not include $PRX here
-($ID | $EB | $EM) $CM* $PR;
-$PO $CM* ($ID | $EB | $EM);
-
-# LB 24
-# Do not include $PRX here
-($ALPlus | $HL) $CM* ($PR | $PO | $POX);
-($PR | $PO | $POX) $CM* ($ALPlus | $HL);
-
-
-# LB 25
-# Here do not include $POX at the beginning or $PRX at the end
-($CM* ($PR | $PRX | $PO))? ($CM* ($CL | $CP))? ($CM* ($NU | $IS | $SY))* $CM* $NU ($CM* ($OP | $HY))? ($CM* ($PR | $PO | $POX))?;
-
-# LB 26
-($H3 | $H2 | $JV | $JL) $CM* $JL;
-($JT | $JV) $CM* ($H2 | $JV);
-$JT $CM* ($H3 | $JT);
-
-# LB 27
-# Do not include $POX or $PRX here
-$IN $CM* ($H3 | $H2 | $JT | $JV | $JL);
-$PO $CM* ($H3 | $H2 | $JT | $JV | $JL);
- ($H3 | $H2 | $JT | $JV | $JL) $CM* $PR;
-
-# LB 28
-($ALPlus | $HL) $CM* ($ALPlus | $HL);
-
-
-# LB 29
-($ALPlus | $HL) $CM* $IS;
-
-# LB 30
-$OP $CM* ($ALPlus | $HL | $NU);
-($ALPlus | $HL | $NU) $CM* $CP;
-
-# LB 30a
-#    Pairs of Regional Indicators.
-#    The following two rules are nearly identical. The first matches only sequences with an odd number of adjacent RIs,
-#    the second with an even number. Stripping away the cruft they look like
-#         [^RI] RI / (RI RI)+ ^RI;
-#         [^RI] RI RI / (RI RI)+ ^RI;
-#
-# Line Loose tailoring: Don't include NSX here.
-[{bof} $NS $HY $BA $QU $CL $CP $EX $IS $SY $WJ $GL $ZW $SP $BK $CR $LF $NL $ZWJ] $CM* $RI / ($CM* $RI $CM* $RI)+ $CM* [{eof}[^$RI $CM]];
-[{bof} $NS $HY $BA $QU $CL $CP $EX $IS $SY $WJ $GL $ZW $SP $BK $CR $LF $NL $ZWJ] $CM* $RI $CM* $RI / ($CM* $RI $CM* $RI)+ $CM* [{eof}[^$RI $CM]];
-
-# In general, adjacent RIs stay together. The hard-break rules, above, overide this, forcing in the boundaries between pairs.
-$RI $CM* $RI;
-
-#    WJ, GL, QU, etc. are classes with rules like "WJ x "   which includes "WJ x RI".
-$RI $CM* ([$WJ $GL $QU $BB] |  (($HY | $BA)$CM* $HL));
-
-
-# LB 30b Do not break between an Emoji Base and an Emoji Modifier
-$EM $CM* $EB;
-
-
 ## -------------------------------------------------

 !!safe_reverse;

 # LB 9
 ^$CM+ [^$CM $BK $CR $LF $NL $ZW $SP];
-^$CM+ $SP / .;

 # LB 14
 $SP+ $CM* $OP;
@ -605,20 +392,3 @@ $CM* ($HY | $BA | $BAX) $CM* $HL;

 # For dictionary-based break
 $dictionary $dictionary;
-
-## -------------------------------------------------
-
-!!safe_forward;
-
-# Skip forward over all character classes that are involved in
-#   rules containing patterns with possibly more than one char
-#   of context.
-#
-#  It might be slightly more efficient to have specific rules
-#  instead of one generic one, but only if we could
-#  turn off rule chaining.  We don't want to move more
-#  than necessary.
-#
-^[$CM $OP $QU $CL $CP $B2 $PR $PRX $HY $BA $BAX $SP $RI $ZWJ $dictionary]+ [^$CM $OP $QU $CL $CP $B2 $PR $PRX $HY $BA $BAX $RI $ZWJ $dictionary];
-$dictionary $dictionary;
-
--- a/icu4c/source/data/brkitr/rules/line_loose_fi.txt
+++ b/icu4c/source/data/brkitr/rules/line_loose_fi.txt
@ -1,4 +1,4 @@
-# Copyright (C) 2016 and later: Unicode, Inc. and others.
+# Copyright (C) 2016 and later: Unicode, Inc. and others.
 # License & terms of use: http://www.unicode.org/copyright.html
 # Copyright (c) 2002-2016  International Business Machines Corporation and
 # others. All Rights Reserved.
@ -28,6 +28,7 @@
 #

 !!chain;
+!!quoted_literals_only;

 $AI = [:LineBreak =  Ambiguous:];
 $AL = [:LineBreak =  Alphabetic:];
@ -345,215 +346,6 @@ $RI $CM* $RI $CM* $ZWJ ($ID | $Extended_Pict | $EmojiNRK);
 # LB 30b Do not break between an Emoji Base and an Emoji Modifier
 $EB $CM* $EM;

-#
-#  Reverse Rules.
-#
-## -------------------------------------------------
-
-!!reverse;
-
-#  LB 9 Combining Marks.
-#  Stick together any combining sequences that don't match other rules.
-
-^$CM+ $CAN_CM?;
-
-#
-#  Sequences of the form  (shown forwards)
-#      [CANT_CM]  <break>  [CM]  [whatever]
-#  The CM needs to behave as an AL
-#
-$AL_FOLLOW $CM+ / (
-          [$BK $CR $LF $NL $ZW {eof}] |
-          $SP+ $CM+ $SP |
-          $SP+ $CM* ([^$OP $CM $SP] | [$AL {eof}]));   # if LB 14 will match, need to surpress this break.
-                                               #  LB14 says    OP SP* x .
-                                               #    becomes    OP SP* x AL
-                                               #    becomes    OP SP* x CM+ AL_FOLLOW
-                                               #
-                                               # Further note:  the $AL in [$AL {eof}] is only to work around
-                                               #                a rule compiler bug which complains about
-                                               #                empty sets otherwise.
-
-
-# LB 4, 5, 6
-
-$LB4Breaks [$LB4NonBreaks-$CM];
-$LB4Breaks $CM+ $CAN_CM;
-$LF $CR;
-
-
-# LB 7         x SP
-#              x ZW
-[$SP $ZW] [$LB4NonBreaks-$CM];
-[$SP $ZW] $CM+ $CAN_CM;
-
-# LB 8 ZW SP* <break>
-#     TODO: to implement this, we need more than one look-ahead hard break in play at a time.
-#           Requires an engine enhancement.
-#   / $SP* $ZW
-
-# LB 8a        ZWJ x (ID | Extended_Pict | EmojiNRK)
-#
-($ID | $Extended_Pict | $EmojiNRK) $ZWJ $CM* $CAN_CM?;
-
-
-# LB 9,10  Combining marks.
-#    X   $CM needs to behave like X, where X is not $SP or controls.
-#    $CM not covered by the above needs to behave like $AL
-# Stick together any combining sequences that don't match other rules.
-^$CM+ $CAN_CM;
-
-
-# LB 11
-#
-$WJ $CM* $CAN_CM;
-$WJ      [$LB8NonBreaks-$CM];
-
-     $CANT_CM $CM* $WJ;
-$CAN_CM  $CM* $WJ;
-
-# LB 12a
-#      [^SP BA HY] x GL
-#
-$GL $CM* [$LB8NonBreaks-[$CM $SP $BA $HH $HY]];
-
-# LB 12
-#     GL  x
-#
-$CANT_CM $CM* $GL;
-$CAN_CM $CM* $GL;
-
-
-# LB 13
-$CL $CM+ $CAN_CM;
-$CP $CM+ $CAN_CM;
-$EX $CM+ $CAN_CM;
-$IS $CM+ $CAN_CM;
-$SY $CM+ $CAN_CM;
-
-$CL [$LB8NonBreaks-$CM];
-$CP [$LB8NonBreaks-$CM];
-$EX [$LB8NonBreaks-$CM];
-$IS [$LB8NonBreaks-$CM];
-$SY [$LB8NonBreaks-$CM];
-
-
-# LB 14    OP SP* x
-#
-.   $SP* $CM* $OP;
-$AL_FOLLOW? $CM+ $SP+ $CM* $OP;     #  by LB 10, behaves like $AL_FOLLOW? $AL $SP+ $CM* $OP
-
-
-# LB 15
-$OP $SP* $CM* $QU;
-
-# LB 16
-# Don't include $NSX here
-$NS $SP* $CM* ($CL | $CP);
-
-# LB 17
-$B2 $SP* $CM* $B2;
-
-# LB 18  break after spaces
-#        Nothing explicit needed here.
-
-
-#
-# LB 19
-#
-$QU $CM* $CAN_CM;                                #   . x QU
-$QU      $LB18NonBreaks;
-
-
-$CAN_CM  $CM* $QU;                               #   QU x .
-     $CANT_CM $CM* $QU;
-
-#
-#  LB 20  Break before and after CB.
-#         nothing needed here.
-#
-
-# LB 20.09 added rule for Finnish tailoring
-$AL ($HY | $HH) / $SP;
-
-# LB 21
-# Don't include $NSX here
-($BA | $HH | $HY | $NS) $CM* [$LB20NonBreaks-$CM];     #  . x (BA | HY | NS)
-
-[$LB20NonBreaks-$CM] $CM* $BB;                   #  BB x .
-[^$CB] $CM* $BB;                                      #
-
-# LB21a
-[^$CB] $CM* ($HY | $BA | $HH) $CM* $HL;
-
-# LB21b (reverse)
-$HL $CM* $SY;
-
-# LB 22
-$IN $CM* ($ALPlus | $HL);
-$IN $CM* $EX;
-$IN $CM* ($ID | $EB | $EM);
-# $IN $CM* $IN; # delete this rule for CSS loose
-$IN $CM* $NU;
-
-# LB 23
-$NU $CM* ($ALPlus | $HL);
-($ALPlus | $HL) $CM* $NU;
-
-# LB23a
-($ID | $EB | $EM) $CM* $PR;
-$PO $CM* ($ID | $EB | $EM);
-
-# LB 24
-($ALPlus | $HL) $CM* ($PR | $PO);
-($PR | $PO) $CM* ($ALPlus | $HL);
-
-
-# LB 25
-($CM* ($PR | $PO))? ($CM* ($CL | $CP))? ($CM* ($NU | $IS | $SY))* $CM* $NU ($CM* ($OP | $HY))? ($CM* ($PR | $PO))?;
-
-# LB 26
-($H3 | $H2 | $JV | $JL) $CM* $JL;
-($JT | $JV) $CM* ($H2 | $JV);
-$JT $CM* ($H3 | $JT);
-
-# LB 27
-$IN $CM* ($H3 | $H2 | $JT | $JV | $JL);
-$PO $CM* ($H3 | $H2 | $JT | $JV | $JL);
- ($H3 | $H2 | $JT | $JV | $JL) $CM* $PR;
-
-# LB 28
-($ALPlus | $HL) $CM* ($ALPlus | $HL);
-
-
-# LB 29
-($ALPlus | $HL) $CM* $IS;
-
-# LB 30
-$OP $CM* ($ALPlus | $HL | $NU);
-($ALPlus | $HL | $NU) $CM* $CP;
-
-# LB 30a
-#    Pairs of Regional Indicators.
-#    The following two rules are nearly identical. The first matches only sequences with an odd number of adjacent RIs,
-#    the second with an even number. Stripping away the cruft they look like
-#         [^RI] RI / (RI RI)+ ^RI;
-#         [^RI] RI RI / (RI RI)+ ^RI;
-#
-# Line Loose tailoring: Don't include NSX here.
-[{bof} $NS $HY $BA $QU $CL $CP $EX $IS $SY $WJ $GL $ZW $SP $BK $CR $LF $NL $ZWJ] $CM* $RI / ($CM* $RI $CM* $RI)+ $CM* [{eof}[^$RI $CM]];
-[{bof} $NS $HY $BA $QU $CL $CP $EX $IS $SY $WJ $GL $ZW $SP $BK $CR $LF $NL $ZWJ] $CM* $RI $CM* $RI / ($CM* $RI $CM* $RI)+ $CM* [{eof}[^$RI $CM]];
-
-# In general, adjacent RIs stay together. The hard-break rules, above, overide this, forcing in the boundaries between pairs.
-$RI $CM* $RI;
-
-#    WJ, GL, QU, etc. are classes with rules like "WJ x "   which includes "WJ x RI".
-$RI $CM* ([$WJ $GL $QU $BB] |  (($HY | $BA)$CM* $HL));
-
-
-# LB 30b Do not break between an Emoji Base and an Emoji Modifier
-$EM $CM* $EB;
-

 ## -------------------------------------------------

@ -561,7 +353,6 @@ $EM $CM* $EB;

 # LB 9
 ^$CM+ [^$CM $BK $CR $LF $NL $ZW $SP];
-^$CM+ $SP / .;

 # LB 14
 $SP+ $CM* $OP;
@ -587,20 +378,3 @@ $CM* ($HY | $BA | $HH) $CM* $HL;

 # For dictionary-based break
 $dictionary $dictionary;
-
-## -------------------------------------------------
-
-!!safe_forward;
-
-# Skip forward over all character classes that are involved in
-#   rules containing patterns with possibly more than one char
-#   of context.
-#
-#  It might be slightly more efficient to have specific rules
-#  instead of one generic one, but only if we could
-#  turn off rule chaining.  We don't want to move more
-#  than necessary.
-#
-^[$CM $OP $QU $CL $CP $B2 $PR $HY $BA $SP $RI $ZWJ $dictionary]+ [^$CM $OP $QU $CL $CP $B2 $PR $HY $BA $RI $ZWJ $dictionary];
-$dictionary $dictionary;
-
--- a/icu4c/source/data/brkitr/rules/line_normal.txt
+++ b/icu4c/source/data/brkitr/rules/line_normal.txt
@ -1,4 +1,4 @@
-# Copyright (C) 2016 and later: Unicode, Inc. and others.
+# Copyright (C) 2016 and later: Unicode, Inc. and others.
 # License & terms of use: http://www.unicode.org/copyright.html
 # Copyright (c) 2002-2016  International Business Machines Corporation and
 # others. All Rights Reserved.
@ -29,6 +29,7 @@
 #

 !!chain;
+!!quoted_literals_only;

 $AI = [:LineBreak =  Ambiguous:];
 $AL = [:LineBreak =  Alphabetic:];
@ -338,217 +339,12 @@ $RI $CM* $RI $CM* $ZWJ ($ID | $Extended_Pict | $EmojiNRK);
 # LB 30b Do not break between an Emoji Base and an Emoji Modifier
 $EB $CM* $EM;

-#
-#  Reverse Rules.
-#
-## -------------------------------------------------
-
-!!reverse;
-
-#  LB 9 Combining Marks.
-#  Stick together any combining sequences that don't match other rules.
-
-^$CM+ $CAN_CM?;
-
-#
-#  Sequences of the form  (shown forwards)
-#      [CANT_CM]  <break>  [CM]  [whatever]
-#  The CM needs to behave as an AL
-#
-$AL_FOLLOW $CM+ / (
-          [$BK $CR $LF $NL $ZW {eof}] |
-          $SP+ $CM+ $SP |
-          $SP+ $CM* ([^$OP $CM $SP] | [$AL {eof}]));   # if LB 14 will match, need to surpress this break.
-                                               #  LB14 says    OP SP* x .
-                                               #    becomes    OP SP* x AL
-                                               #    becomes    OP SP* x CM+ AL_FOLLOW
-                                               #
-                                               # Further note:  the $AL in [$AL {eof}] is only to work around
-                                               #                a rule compiler bug which complains about
-                                               #                empty sets otherwise.
-
-
-# LB 4, 5, 6
-
-$LB4Breaks [$LB4NonBreaks-$CM];
-$LB4Breaks $CM+ $CAN_CM;
-$LF $CR;
-
-
-# LB 7         x SP
-#              x ZW
-[$SP $ZW] [$LB4NonBreaks-$CM];
-[$SP $ZW] $CM+ $CAN_CM;
-
-# LB 8 ZW SP* <break>
-#     TODO: to implement this, we need more than one look-ahead hard break in play at a time.
-#           Requires an engine enhancement.
-#   / $SP* $ZW
-
-# LB 8a        ZWJ x (ID | Extended_Pict | EmojiNRK)
-#
-($ID | $Extended_Pict | $EmojiNRK) $ZWJ $CM* $CAN_CM?;
-
-
-# LB 9,10  Combining marks.
-#    X   $CM needs to behave like X, where X is not $SP or controls.
-#    $CM not covered by the above needs to behave like $AL
-# Stick together any combining sequences that don't match other rules.
-^$CM+ $CAN_CM;
-
-
-# LB 11
-#
-$WJ $CM* $CAN_CM;
-$WJ      [$LB8NonBreaks-$CM];
-
-     $CANT_CM $CM* $WJ;
-$CAN_CM  $CM* $WJ;
-
-# LB 12a
-#      [^SP BA HY] x GL
-#
-$GL $CM* [$LB8NonBreaks-[$CM $SP $BA $HY]];
-
-# LB 12
-#     GL  x
-#
-$CANT_CM $CM* $GL;
-$CAN_CM $CM* $GL;
-
-
-# LB 13
-$CL $CM+ $CAN_CM;
-$CP $CM+ $CAN_CM;
-$EX $CM+ $CAN_CM;
-$IS $CM+ $CAN_CM;
-$SY $CM+ $CAN_CM;
-
-$CL [$LB8NonBreaks-$CM];
-$CP [$LB8NonBreaks-$CM];
-$EX [$LB8NonBreaks-$CM];
-$IS [$LB8NonBreaks-$CM];
-$SY [$LB8NonBreaks-$CM];
-
-
-# LB 14    OP SP* x
-#
-.   $SP* $CM* $OP;
-$AL_FOLLOW? $CM+ $SP+ $CM* $OP;     #  by LB 10, behaves like $AL_FOLLOW? $AL $SP+ $CM* $OP
-
-
-# LB 15
-$OP $SP* $CM* $QU;
-
-# LB 16
-$NS $SP* $CM* ($CL | $CP);
-
-# LB 17
-$B2 $SP* $CM* $B2;
-
-# LB 18  break after spaces
-#        Nothing explicit needed here.
-
-
-#
-# LB 19
-#
-$QU $CM* $CAN_CM;                                #   . x QU
-$QU      $LB18NonBreaks;
-
-
-$CAN_CM  $CM* $QU;                               #   QU x .
-     $CANT_CM $CM* $QU;
-
-#
-#  LB 20  Break before and after CB.
-#         nothing needed here.
-#
-
-# LB 21
-($BA | $HY | $NS) $CM* [$LB20NonBreaks-$CM];     #  . x (BA | HY | NS)
-
-[$LB20NonBreaks-$CM] $CM* $BB;                   #  BB x .
-[^$CB] $CM* $BB;                                      #
-
-# LB21a
-[^$CB] $CM* ($HY | $BA) $CM* $HL;
-
-# LB21b (reverse)
-$HL $CM* $SY;
-
-# LB 22
-$IN $CM* ($ALPlus | $HL);
-$IN $CM* $EX;
-$IN $CM* ($ID | $EB | $EM);
-$IN $CM* $IN;
-$IN $CM* $NU;
-
-# LB 23
-$NU $CM* ($ALPlus | $HL);
-($ALPlus | $HL) $CM* $NU;
-
-# LB23a
-($ID | $EB | $EM) $CM* $PR;
-$PO $CM* ($ID | $EB | $EM);
-
-# LB 24
-($ALPlus | $HL) $CM* ($PR | $PO);
-($PR | $PO) $CM* ($ALPlus | $HL);
-
-
-# LB 25
-($CM* ($PR | $PO))? ($CM* ($CL | $CP))? ($CM* ($NU | $IS | $SY))* $CM* $NU ($CM* ($OP | $HY))? ($CM* ($PR | $PO))?;
-
-# LB 26
-($H3 | $H2 | $JV | $JL) $CM* $JL;
-($JT | $JV) $CM* ($H2 | $JV);
-$JT $CM* ($H3 | $JT);
-
-# LB 27
-$IN $CM* ($H3 | $H2 | $JT | $JV | $JL);
-$PO $CM* ($H3 | $H2 | $JT | $JV | $JL);
- ($H3 | $H2 | $JT | $JV | $JL) $CM* $PR;
-
-# LB 28
-($ALPlus | $HL) $CM* ($ALPlus | $HL);
-
-
-# LB 29
-($ALPlus | $HL) $CM* $IS;
-
-# LB 30
-$OP $CM* ($ALPlus | $HL | $NU);
-($ALPlus | $HL | $NU) $CM* $CP;
-
-# LB 30a
-#    Pairs of Regional Indicators.
-#    The following two rules are nearly identical. The first matches only sequences with an odd number of adjacent RIs,
-#    the second with an even number. Stripping away the cruft they look like
-#         [^RI] RI / (RI RI)+ ^RI;
-#         [^RI] RI RI / (RI RI)+ ^RI;
-#
-[{bof} $NS $HY $BA $QU $CL $CP $EX $IS $SY $WJ $GL $ZW $SP $BK $CR $LF $NL $ZWJ] $CM* $RI / ($CM* $RI $CM* $RI)+ $CM* [{eof}[^$RI $CM]];
-[{bof} $NS $HY $BA $QU $CL $CP $EX $IS $SY $WJ $GL $ZW $SP $BK $CR $LF $NL $ZWJ] $CM* $RI $CM* $RI / ($CM* $RI $CM* $RI)+ $CM* [{eof}[^$RI $CM]];
-
-# In general, adjacent RIs stay together. The hard-break rules, above, overide this, forcing in the boundaries between pairs.
-$RI $CM* $RI;
-
-#    WJ, GL, QU, etc. are classes with rules like "WJ x "   which includes "WJ x RI".
-$RI $CM* ([$WJ $GL $QU $BB] |  (($HY | $BA)$CM* $HL));
-
-
-# LB 30b Do not break between an Emoji Base and an Emoji Modifier
-$EM $CM* $EB;
-
-
 ## -------------------------------------------------

 !!safe_reverse;

 # LB 9
 ^$CM+ [^$CM $BK $CR $LF $NL $ZW $SP];
-^$CM+ $SP / .;

 # LB 14
 $SP+ $CM* $OP;
@ -574,20 +370,3 @@ $CM* ($HY | $BA) $CM* $HL;

 # For dictionary-based break
 $dictionary $dictionary;
-
-## -------------------------------------------------
-
-!!safe_forward;
-
-# Skip forward over all character classes that are involved in
-#   rules containing patterns with possibly more than one char
-#   of context.
-#
-#  It might be slightly more efficient to have specific rules
-#  instead of one generic one, but only if we could
-#  turn off rule chaining.  We don't want to move more
-#  than necessary.
-#
-^[$CM $OP $QU $CL $CP $B2 $PR $HY $BA $SP $RI $ZWJ $dictionary]+ [^$CM $OP $QU $CL $CP $B2 $PR $HY $BA $RI $ZWJ $dictionary];
-$dictionary $dictionary;
-
--- a/icu4c/source/data/brkitr/rules/line_normal_cj.txt
+++ b/icu4c/source/data/brkitr/rules/line_normal_cj.txt
@ -1,4 +1,4 @@
-# Copyright (C) 2016 and later: Unicode, Inc. and others.
+# Copyright (C) 2016 and later: Unicode, Inc. and others.
 # License & terms of use: http://www.unicode.org/copyright.html
 # Copyright (c) 2002-2016  International Business Machines Corporation and
 # others. All Rights Reserved.
@ -30,6 +30,7 @@
 #

 !!chain;
+!!quoted_literals_only;

 $AI = [:LineBreak =  Ambiguous:];
 $AL = [:LineBreak =  Alphabetic:];
@ -344,219 +345,12 @@ $RI $CM* $RI $CM* $ZWJ ($ID | $Extended_Pict | $EmojiNRK);
 # LB 30b Do not break between an Emoji Base and an Emoji Modifier
 $EB $CM* $EM;

-#
-#  Reverse Rules.
-#
-## -------------------------------------------------
-
-!!reverse;
-
-#  LB 9 Combining Marks.
-#  Stick together any combining sequences that don't match other rules.
-
-^$CM+ $CAN_CM?;
-
-#
-#  Sequences of the form  (shown forwards)
-#      [CANT_CM]  <break>  [CM]  [whatever]
-#  The CM needs to behave as an AL
-#
-$AL_FOLLOW $CM+ / (
-          [$BK $CR $LF $NL $ZW {eof}] |
-          $SP+ $CM+ $SP |
-          $SP+ $CM* ([^$OP $CM $SP] | [$AL {eof}]));   # if LB 14 will match, need to surpress this break.
-                                               #  LB14 says    OP SP* x .
-                                               #    becomes    OP SP* x AL
-                                               #    becomes    OP SP* x CM+ AL_FOLLOW
-                                               #
-                                               # Further note:  the $AL in [$AL {eof}] is only to work around
-                                               #                a rule compiler bug which complains about
-                                               #                empty sets otherwise.
-
-
-# LB 4, 5, 6
-
-$LB4Breaks [$LB4NonBreaks-$CM];
-$LB4Breaks $CM+ $CAN_CM;
-$LF $CR;
-
-
-# LB 7         x SP
-#              x ZW
-[$SP $ZW] [$LB4NonBreaks-$CM];
-[$SP $ZW] $CM+ $CAN_CM;
-
-# LB 8 ZW SP* <break>
-#     TODO: to implement this, we need more than one look-ahead hard break in play at a time.
-#           Requires an engine enhancement.
-#   / $SP* $ZW
-
-# LB 8a        ZWJ x (ID | Extended_Pict | EmojiNRK)
-#
-($ID | $Extended_Pict | $EmojiNRK) $ZWJ $CM* $CAN_CM?;
-
-
-# LB 9,10  Combining marks.
-#    X   $CM needs to behave like X, where X is not $SP or controls.
-#    $CM not covered by the above needs to behave like $AL
-# Stick together any combining sequences that don't match other rules.
-^$CM+ $CAN_CM;
-
-
-# LB 11
-#
-$WJ $CM* $CAN_CM;
-$WJ      [$LB8NonBreaks-$CM];
-
-     $CANT_CM $CM* $WJ;
-$CAN_CM  $CM* $WJ;
-
-# LB 12a
-#      [^SP BA HY] x GL
-#
-$GL $CM* [$LB8NonBreaks-[$CM $SP $BA $BAX $HY]];
-
-# LB 12
-#     GL  x
-#
-$CANT_CM $CM* $GL;
-$CAN_CM $CM* $GL;
-
-
-# LB 13
-$CL $CM+ $CAN_CM;
-$CP $CM+ $CAN_CM;
-$EX $CM+ $CAN_CM;
-$IS $CM+ $CAN_CM;
-$SY $CM+ $CAN_CM;
-
-$CL [$LB8NonBreaks-$CM];
-$CP [$LB8NonBreaks-$CM];
-$EX [$LB8NonBreaks-$CM];
-$IS [$LB8NonBreaks-$CM];
-$SY [$LB8NonBreaks-$CM];
-
-
-# LB 14    OP SP* x
-#
-.   $SP* $CM* $OP;
-$AL_FOLLOW? $CM+ $SP+ $CM* $OP;     #  by LB 10, behaves like $AL_FOLLOW? $AL $SP+ $CM* $OP
-
-
-# LB 15
-$OP $SP* $CM* $QU;
-
-# LB 16
-# Don't include $NSX here
-$NS $SP* $CM* ($CL | $CP);
-
-# LB 17
-$B2 $SP* $CM* $B2;
-
-# LB 18  break after spaces
-#        Nothing explicit needed here.
-
-
-#
-# LB 19
-#
-$QU $CM* $CAN_CM;                                #   . x QU
-$QU      $LB18NonBreaks;
-
-
-$CAN_CM  $CM* $QU;                               #   QU x .
-     $CANT_CM $CM* $QU;
-
-#
-#  LB 20  Break before and after CB.
-#         nothing needed here.
-#
-
-# LB 21
-# Don't include $BAX or $NSX here
-($BA | $HY | $NS) $CM* [$LB20NonBreaks-$CM];     #  . x (BA | HY | NS)
-
-[$LB20NonBreaks-$CM] $CM* $BB;                   #  BB x .
-[^$CB] $CM* $BB;                                      #
-
-# LB21a  Don't break after Hebrew + Hyphen.
-([^$CB] $CM*)? ($HY | $BA | $BAX) $CM* $HL;
-
-# LB21b (reverse)
-$HL $CM* $SY;
-
-# LB 22
-$IN $CM* ($ALPlus | $HL);
-$IN $CM* $EX;
-$IN $CM* ($ID | $EB | $EM);
-$IN $CM* $IN;
-$IN $CM* $NU;
-
-# LB 23
-$NU $CM* ($ALPlus | $HL);
-($ALPlus | $HL) $CM* $NU;
-
-# LB23a
-($ID | $EB | $EM) $CM* $PR;
-$PO $CM* ($ID | $EB | $EM);
-
-# LB 24
-($ALPlus | $HL) $CM* ($PR | $PO);
-($PR | $PO) $CM* ($ALPlus | $HL);
-
-
-# LB 25
-($CM* ($PR | $PO))? ($CM* ($CL | $CP))? ($CM* ($NU | $IS | $SY))* $CM* $NU ($CM* ($OP | $HY))? ($CM* ($PR | $PO))?;
-
-# LB 26
-($H3 | $H2 | $JV | $JL) $CM* $JL;
-($JT | $JV) $CM* ($H2 | $JV);
-$JT $CM* ($H3 | $JT);
-
-# LB 27
-$IN $CM* ($H3 | $H2 | $JT | $JV | $JL);
-$PO $CM* ($H3 | $H2 | $JT | $JV | $JL);
- ($H3 | $H2 | $JT | $JV | $JL) $CM* $PR;
-
-# LB 28
-($ALPlus | $HL) $CM* ($ALPlus | $HL);
-
-
-# LB 29
-($ALPlus | $HL) $CM* $IS;
-
-# LB 30
-$OP $CM* ($ALPlus | $HL | $NU);
-($ALPlus | $HL | $NU) $CM* $CP;
-
-# LB 30a
-#    Pairs of Regional Indicators.
-#    The following two rules are nearly identical. The first matches only sequences with an odd number of adjacent RIs,
-#    the second with an even number. Stripping away the cruft they look like
-#         [^RI] RI / (RI RI)+ ^RI;
-#         [^RI] RI RI / (RI RI)+ ^RI;
-#
-[{bof} $NS $HY $BA $QU $CL $CP $EX $IS $SY $WJ $GL $ZW $SP $BK $CR $LF $NL $ZWJ] $CM* $RI / ($CM* $RI $CM* $RI)+ $CM* [{eof}[^$RI $CM]];
-[{bof} $NS $HY $BA $QU $CL $CP $EX $IS $SY $WJ $GL $ZW $SP $BK $CR $LF $NL $ZWJ] $CM* $RI $CM* $RI / ($CM* $RI $CM* $RI)+ $CM* [{eof}[^$RI $CM]];
-
-# In general, adjacent RIs stay together. The hard-break rules, above, overide this, forcing in the boundaries between pairs.
-$RI $CM* $RI;
-
-#    WJ, GL, QU, etc. are classes with rules like "WJ x "   which includes "WJ x RI".
-$RI $CM* ([$WJ $GL $QU $BB] |  (($HY | $BA)$CM* $HL));
-
-
-# LB 30b Do not break between an Emoji Base and an Emoji Modifier
-$EM $CM* $EB;
-
-
 ## -------------------------------------------------

 !!safe_reverse;

 # LB 9
 ^$CM+ [^$CM $BK $CR $LF $NL $ZW $SP];
-^$CM+ $SP / .;

 # LB 14
 $SP+ $CM* $OP;
@ -582,20 +376,3 @@ $CM* ($HY | $BA | $BAX) $CM* $HL;

 # For dictionary-based break
 $dictionary $dictionary;
-
-## -------------------------------------------------
-
-!!safe_forward;
-
-# Skip forward over all character classes that are involved in
-#   rules containing patterns with possibly more than one char
-#   of context.
-#
-#  It might be slightly more efficient to have specific rules
-#  instead of one generic one, but only if we could
-#  turn off rule chaining.  We don't want to move more
-#  than necessary.
-#
-^[$CM $OP $QU $CL $CP $B2 $PR $HY $BA $BAX $SP $RI $ZWJ $dictionary]+ [^$CM $OP $QU $CL $CP $B2 $PR $HY $BA $BAX $RI $ZWJ $dictionary];
-$dictionary $dictionary;
-
--- a/icu4c/source/data/brkitr/rules/line_normal_fi.txt
+++ b/icu4c/source/data/brkitr/rules/line_normal_fi.txt
@ -1,4 +1,4 @@
-# Copyright (C) 2016 and later: Unicode, Inc. and others.
+# Copyright (C) 2016 and later: Unicode, Inc. and others.
 # License & terms of use: http://www.unicode.org/copyright.html
 # Copyright (c) 2002-2016  International Business Machines Corporation and
 # others. All Rights Reserved.
@ -28,6 +28,7 @@
 #

 !!chain;
+!!quoted_literals_only;

 $AI = [:LineBreak =  Ambiguous:];
 $AL = [:LineBreak =  Alphabetic:];
@ -341,213 +342,6 @@ $RI $CM* $RI $CM* $ZWJ ($ID | $Extended_Pict | $EmojiNRK);
 # LB 30b Do not break between an Emoji Base and an Emoji Modifier
 $EB $CM* $EM;

-#
-#  Reverse Rules.
-#
-## -------------------------------------------------
-
-!!reverse;
-
-#  LB 9 Combining Marks.
-#  Stick together any combining sequences that don't match other rules.
-
-^$CM+ $CAN_CM?;
-
-#
-#  Sequences of the form  (shown forwards)
-#      [CANT_CM]  <break>  [CM]  [whatever]
-#  The CM needs to behave as an AL
-#
-$AL_FOLLOW $CM+ / (
-          [$BK $CR $LF $NL $ZW {eof}] |
-          $SP+ $CM+ $SP |
-          $SP+ $CM* ([^$OP $CM $SP] | [$AL {eof}]));   # if LB 14 will match, need to surpress this break.
-                                               #  LB14 says    OP SP* x .
-                                               #    becomes    OP SP* x AL
-                                               #    becomes    OP SP* x CM+ AL_FOLLOW
-                                               #
-                                               # Further note:  the $AL in [$AL {eof}] is only to work around
-                                               #                a rule compiler bug which complains about
-                                               #                empty sets otherwise.
-
-
-# LB 4, 5, 6
-
-$LB4Breaks [$LB4NonBreaks-$CM];
-$LB4Breaks $CM+ $CAN_CM;
-$LF $CR;
-
-
-# LB 7         x SP
-#              x ZW
-[$SP $ZW] [$LB4NonBreaks-$CM];
-[$SP $ZW] $CM+ $CAN_CM;
-
-# LB 8 ZW SP* <break>
-#     TODO: to implement this, we need more than one look-ahead hard break in play at a time.
-#           Requires an engine enhancement.
-#   / $SP* $ZW
-
-# LB 8a        ZWJ x (ID | Extended_Pict | EmojiNRK)
-#
-($ID | $Extended_Pict | $EmojiNRK) $ZWJ $CM* $CAN_CM?;
-
-
-# LB 9,10  Combining marks.
-#    X   $CM needs to behave like X, where X is not $SP or controls.
-#    $CM not covered by the above needs to behave like $AL
-# Stick together any combining sequences that don't match other rules.
-^$CM+ $CAN_CM;
-
-
-# LB 11
-#
-$WJ $CM* $CAN_CM;
-$WJ      [$LB8NonBreaks-$CM];
-
-     $CANT_CM $CM* $WJ;
-$CAN_CM  $CM* $WJ;
-
-# LB 12a
-#      [^SP BA HY] x GL
-#
-$GL $CM* [$LB8NonBreaks-[$CM $SP $BA $HH $HY]];
-
-# LB 12
-#     GL  x
-#
-$CANT_CM $CM* $GL;
-$CAN_CM $CM* $GL;
-
-
-# LB 13
-$CL $CM+ $CAN_CM;
-$CP $CM+ $CAN_CM;
-$EX $CM+ $CAN_CM;
-$IS $CM+ $CAN_CM;
-$SY $CM+ $CAN_CM;
-
-$CL [$LB8NonBreaks-$CM];
-$CP [$LB8NonBreaks-$CM];
-$EX [$LB8NonBreaks-$CM];
-$IS [$LB8NonBreaks-$CM];
-$SY [$LB8NonBreaks-$CM];
-
-
-# LB 14    OP SP* x
-#
-.   $SP* $CM* $OP;
-$AL_FOLLOW? $CM+ $SP+ $CM* $OP;     #  by LB 10, behaves like $AL_FOLLOW? $AL $SP+ $CM* $OP
-
-
-# LB 15
-$OP $SP* $CM* $QU;
-
-# LB 16
-$NS $SP* $CM* ($CL | $CP);
-
-# LB 17
-$B2 $SP* $CM* $B2;
-
-# LB 18  break after spaces
-#        Nothing explicit needed here.
-
-
-#
-# LB 19
-#
-$QU $CM* $CAN_CM;                                #   . x QU
-$QU      $LB18NonBreaks;
-
-
-$CAN_CM  $CM* $QU;                               #   QU x .
-     $CANT_CM $CM* $QU;
-
-#
-#  LB 20  Break before and after CB.
-#         nothing needed here.
-#
-
-# LB 20.09 added rule for Finnish tailoring
-$AL ($HY | $HH) / $SP;
-
-# LB 21
-($BA | $HH | $HY | $NS) $CM* [$LB20NonBreaks-$CM];     #  . x (BA | HY | NS)
-
-[$LB20NonBreaks-$CM] $CM* $BB;                   #  BB x .
-[^$CB] $CM* $BB;                                      #
-
-# LB21a
-[^$CB] $CM* ($HY | $BA | $HH) $CM* $HL;
-
-# LB21b (reverse)
-$HL $CM* $SY;
-
-# LB 22
-$IN $CM* ($ALPlus | $HL);
-$IN $CM* $EX;
-$IN $CM* ($ID | $EB | $EM);
-$IN $CM* $IN;
-$IN $CM* $NU;
-
-# LB 23
-$NU $CM* ($ALPlus | $HL);
-($ALPlus | $HL) $CM* $NU;
-
-# LB23a
-($ID | $EB | $EM) $CM* $PR;
-$PO $CM* ($ID | $EB | $EM);
-
-# LB 24
-($ALPlus | $HL) $CM* ($PR | $PO);
-($PR | $PO) $CM* ($ALPlus | $HL);
-
-
-# LB 25
-($CM* ($PR | $PO))? ($CM* ($CL | $CP))? ($CM* ($NU | $IS | $SY))* $CM* $NU ($CM* ($OP | $HY))? ($CM* ($PR | $PO))?;
-
-# LB 26
-($H3 | $H2 | $JV | $JL) $CM* $JL;
-($JT | $JV) $CM* ($H2 | $JV);
-$JT $CM* ($H3 | $JT);
-
-# LB 27
-$IN $CM* ($H3 | $H2 | $JT | $JV | $JL);
-$PO $CM* ($H3 | $H2 | $JT | $JV | $JL);
- ($H3 | $H2 | $JT | $JV | $JL) $CM* $PR;
-
-# LB 28
-($ALPlus | $HL) $CM* ($ALPlus | $HL);
-
-
-# LB 29
-($ALPlus | $HL) $CM* $IS;
-
-# LB 30
-$OP $CM* ($ALPlus | $HL | $NU);
-($ALPlus | $HL | $NU) $CM* $CP;
-
-# LB 30a
-#    Pairs of Regional Indicators.
-#    The following two rules are nearly identical. The first matches only sequences with an odd number of adjacent RIs,
-#    the second with an even number. Stripping away the cruft they look like
-#         [^RI] RI / (RI RI)+ ^RI;
-#         [^RI] RI RI / (RI RI)+ ^RI;
-#
-[{bof} $NS $HY $BA $QU $CL $CP $EX $IS $SY $WJ $GL $ZW $SP $BK $CR $LF $NL $ZWJ] $CM* $RI / ($CM* $RI $CM* $RI)+ $CM* [{eof}[^$RI $CM]];
-[{bof} $NS $HY $BA $QU $CL $CP $EX $IS $SY $WJ $GL $ZW $SP $BK $CR $LF $NL $ZWJ] $CM* $RI $CM* $RI / ($CM* $RI $CM* $RI)+ $CM* [{eof}[^$RI $CM]];
-
-# In general, adjacent RIs stay together. The hard-break rules, above, overide this, forcing in the boundaries between pairs.
-$RI $CM* $RI;
-
-#    WJ, GL, QU, etc. are classes with rules like "WJ x "   which includes "WJ x RI".
-$RI $CM* ([$WJ $GL $QU $BB] |  (($HY | $BA)$CM* $HL));
-
-
-# LB 30b Do not break between an Emoji Base and an Emoji Modifier
-$EM $CM* $EB;
-
-
 ## -------------------------------------------------

 !!safe_reverse;
@ -580,20 +374,3 @@ $CM* ($HY | $BA | $HH) $CM* $HL;

 # For dictionary-based break
 $dictionary $dictionary;
-
-## -------------------------------------------------
-
-!!safe_forward;
-
-# Skip forward over all character classes that are involved in
-#   rules containing patterns with possibly more than one char
-#   of context.
-#
-#  It might be slightly more efficient to have specific rules
-#  instead of one generic one, but only if we could
-#  turn off rule chaining.  We don't want to move more
-#  than necessary.
-#
-^[$CM $OP $QU $CL $CP $B2 $PR $HY $BA $SP $RI $ZWJ $dictionary]+ [^$CM $OP $QU $CL $CP $B2 $PR $HY $BA $RI $ZWJ $dictionary];
-$dictionary $dictionary;
-
--- a/icu4c/source/data/brkitr/rules/sent.txt
+++ b/icu4c/source/data/brkitr/rules/sent.txt
@ -1,6 +1,5 @@
-#
-#   Copyright (C) 2016 and later: Unicode, Inc. and others.
-#   License & terms of use: http://www.unicode.org/copyright.html#License
+# Copyright (C) 2016 and later: Unicode, Inc. and others.
+# License & terms of use: http://www.unicode.org/copyright.html
 #
 #   Copyright (C) 2002-2015, International Business Machines Corporation and others.
 #       All Rights Reserved.
@ -12,6 +11,7 @@
 #      These rules are based on UAX #29 Revision 26 for Unicode Version 8.0
 #

+!!quoted_literals_only;

 #
 # Character categories as defined in TR 29
@ -85,22 +85,13 @@ $ATermEx $CloseEx* $SpEx* $NotLettersEx* $Lower;

 ## -------------------------------------------------

-!!reverse;
+!!safe_reverse;

 $SpEx_R       = ($Extend | $Format)* $Sp;
 $ATermEx_R    = ($Extend | $Format)* $ATerm;
 $STermEx_R    = ($Extend | $Format)* $STerm;
 $CloseEx_R    = ($Extend | $Format)* $Close;

-#
-#  Reverse rules.
-#     For now, use the old style inexact reverse rules, which are easier
-#     to write, but less efficient.
-#     TODO:  exact reverse rules.  It appears that exact reverse rules
-#            may require improving support for look-ahead breaks in the
-#            builder.  Needs more investigation.
-#
-
 [{bof}] (.? | $LF $CR) [^$Sep $CR $LF]* [$Sep $CR $LF {eof}] ($SpEx_R* $CloseEx_R* ($STermEx_R | $ATermEx_R))*;
 #.*;

@ -112,9 +103,9 @@ $CloseEx_R    = ($Extend | $Format)* $Close;
 #        The preceding $Sep, which will be the second one that the rule matches.
 #        Any immediately preceding STerm or ATerm sequences.  We need to see these
 #              to get the correct rule status when moving forwards again.
-#        
+#
 # [{bof}]           inhibit rule chaining.  Without this, rule would loop on itself and match
-#                   the entire string.
+#                   the entire string. TODO: can bof be replaced with ^
 #
 # (.? | $LF $CR)    Match one $Sep instance.  Use .? rather than $Sep because position might be
 #                   at the beginning of the string at this point, and we don't want to fail.
--- a/icu4c/source/data/brkitr/rules/sent_el.txt
+++ b/icu4c/source/data/brkitr/rules/sent_el.txt
@ -1,6 +1,6 @@
+# Copyright (C) 2016 and later: Unicode, Inc. and others.
+# License & terms of use: http://www.unicode.org/copyright.html
 #
-#   Copyright (C) 2016 and later: Unicode, Inc. and others.
-#   License & terms of use: http://www.unicode.org/copyright.html#License
 #
 #   Copyright (C) 2002-2015, International Business Machines Corporation and others.
 #       All Rights Reserved.
@ -12,6 +12,7 @@
 #      These rules are based on UAX #29 Revision 26 for Unicode Version 8.0
 #

+!!quoted_literals_only;

 #
 # Character categories as defined in TR 29
@ -85,7 +86,7 @@ $ATermEx $CloseEx* $SpEx* $NotLettersEx* $Lower;

 ## -------------------------------------------------

-!!reverse;
+!!safe_reverse;

 $SpEx_R       = ($Extend | $Format)* $Sp;
 $ATermEx_R    = ($Extend | $Format)* $ATerm;
@ -102,7 +103,6 @@ $CloseEx_R    = ($Extend | $Format)* $Close;
 #

 [{bof}] (.? | $LF $CR) [^$Sep $CR $LF]* [$Sep $CR $LF {eof}] ($SpEx_R* $CloseEx_R* ($STermEx_R | $ATermEx_R))*;
-#.*;

 # Explanation for this rule:
 #
@ -112,7 +112,7 @@ $CloseEx_R    = ($Extend | $Format)* $Close;
 #        The preceding $Sep, which will be the second one that the rule matches.
 #        Any immediately preceding STerm or ATerm sequences.  We need to see these
 #              to get the correct rule status when moving forwards again.
-#        
+#
 # [{bof}]           inhibit rule chaining.  Without this, rule would loop on itself and match
 #                   the entire string.
 #
--- a/icu4c/source/data/brkitr/rules/title.txt
+++ b/icu4c/source/data/brkitr/rules/title.txt
@ -1,5 +1,5 @@
 # Copyright (C) 2016 and later: Unicode, Inc. and others.
-# License & terms of use: http://www.unicode.org/copyright.html#License
+# License & terms of use: http://www.unicode.org/copyright.html
 #
 # Copyright (c) 2002-2015, International Business Machines Corporation and
 # others. All Rights Reserved.
@ -7,6 +7,7 @@
 #  Title Casing Break Rules
 #

+!!quoted_literals_only;

 $CaseIgnorable   = [[:Mn:][:Me:][:Cf:][:Lm:][:Sk:] \u0027 \u00AD \u2019];
 $Cased           = [[:Upper_Case:][:Lower_Case:][:Lt:]  - $CaseIgnorable];
@ -27,19 +28,6 @@ $NotCased        = [[^ $Cased] - $CaseIgnorable];
 $Cased ($Cased | $CaseIgnorable)* ($NotCased | $CaseIgnorable)*;


-#  Reverse Rules
-!!reverse;
-
-#  Normal Rule, will work nearly universally, so long as there is a
-#    start-of-word preceding the current iteration position.
-
-($NotCased | $CaseIgnorable)* ($Cased | $CaseIgnorable)* $Cased;
-
-#  Short rule, will be effective only when moving to the start of text,
-#    with no word (cased character) preceding the current iteration position.
-
-($NotCased | $CaseIgnorable)*;
-
 !!safe_reverse;

 # Safe Reverse: the exact forward rule must not start in the middle
@ -47,10 +35,3 @@ $Cased ($Cased | $CaseIgnorable)* ($NotCased | $CaseIgnorable)*;
 #  leaving it just before the start of a word.

 ($Cased | $CaseIgnorable)*;
-
-!!safe_forward;
-
-# Safe Forward, nothing needs to be done, the exact Reverse rules will
-#   always find valid boundaries from any starting position.
-#   Still, some rule is needed, so '.', a one character movement.
-.;
--- a/icu4c/source/data/brkitr/rules/word.txt
+++ b/icu4c/source/data/brkitr/rules/word.txt
@ -1,7 +1,7 @@
-#
+#
 # Copyright (C) 2016 and later: Unicode, Inc. and others.
 # License & terms of use: http://www.unicode.org/copyright.html
-# Copyright (C) 2002-2016, International Business Machines Corporation 
+# Copyright (C) 2002-2016, International Business Machines Corporation
 # and others. All Rights Reserved.
 #
 # file:  word.txt
@ -22,6 +22,7 @@
 ##############################################################################

 !!chain;
+!!quoted_literals_only;


 #
@ -194,95 +195,6 @@ $HangulSyllable $HangulSyllable {200};
 $KanaKanji $KanaKanji {400}; # different rule status if both kana and kanji found 


-## -------------------------------------------------
-
-!!reverse;
-
-$BackHebrew_LetterEx      = ($Format | $Extend | $ZWJ)* $Hebrew_Letter;
-$BackALetterEx            = ($Format | $Extend | $ZWJ)* $ALetterPlus;
-$BackSingle_QuoteEx       = ($Format | $Extend | $ZWJ)* $Single_Quote;
-$BackDouble_QuoteEx       = ($Format | $Extend | $ZWJ)* $Double_Quote;
-$BackMidNumLetEx          = ($Format | $Extend | $ZWJ)* $MidNumLet;
-$BackNumericEx            = ($Format | $Extend | $ZWJ)* $Numeric;
-$BackMidNumEx             = ($Format | $Extend | $ZWJ)* $MidNum;
-$BackMidLetterEx          = ($Format | $Extend | $ZWJ)* $MidLetter;
-$BackKatakanaEx           = ($Format | $Extend | $ZWJ)* $Katakana;
-$BackHiraganaEx           = ($Format | $Extend | $ZWJ)* $Hiragana;
-$BackExtendNumLetEx       = ($Format | $Extend | $ZWJ)* $ExtendNumLet;
-$BackRegional_IndicatorEx = ($Format | $Extend | $ZWJ)* $Regional_Indicator;
-
-# rule 3
-$LF $CR;
-
-# Rule 3c   ZWJ x (Extended_Pict | EmojiNRK).  Precedes WB4, so no intervening Extend chars allowed.
-#
-($Extended_Pict | $EmojiNRK) $ZWJ;
-
-# rule 4
-($Format | $Extend | $ZWJ)*  [^$CR $LF $Newline]?;
-
-# rule 5
-
-($BackALetterEx | $BackHebrew_LetterEx) ($BackALetterEx | $BackHebrew_LetterEx);
-
-# rule 6 and 7
-
-($BackALetterEx | $BackHebrew_LetterEx) ($BackMidLetterEx | $BackMidNumLetEx | $BackSingle_QuoteEx) ($BackALetterEx | $BackHebrew_LetterEx);
-
-# rule 7a
-$BackSingle_QuoteEx $BackHebrew_LetterEx;
-
-# Rule 7b and 7c
-$BackHebrew_LetterEx $BackDouble_QuoteEx $BackHebrew_LetterEx;
-
-# rule 8
-
-$BackNumericEx $BackNumericEx;
-
-# rule 9
-
-$BackNumericEx ($BackALetterEx | $BackHebrew_LetterEx);
-
-# rule 10
-
-($BackALetterEx | $BackHebrew_LetterEx) $BackNumericEx;
-
-# rule 11 and 12
-
-$BackNumericEx ($BackMidNumEx | $BackMidNumLetEx | $BackSingle_QuoteEx) $BackNumericEx;
-
-# rule 13
-
-$BackKatakanaEx $BackKatakanaEx;
-
-# rules 13 a/b
-#
-$BackExtendNumLetEx ($BackALetterEx | $BackHebrew_LetterEx | $BackNumericEx | $BackKatakanaEx | $BackExtendNumLetEx);
-($BackALetterEx | $BackHebrew_LetterEx | $BackNumericEx | $BackKatakanaEx) $BackExtendNumLetEx; 
-
-# special handling for CJK characters: chain for later dictionary segmentation
-$HangulSyllable $HangulSyllable;
-$KanaKanji $KanaKanji; #different rule status if both kanji and kana found
-
-# rule 14
-
-$E_Modifier ($Format | $Extend | $ZWJ)* ($E_Base | $EBG);
-
-# rule 15 - 17
-#    Pairs of Regional Indicators stay together.
-
-^$BackRegional_IndicatorEx / ($BackRegional_IndicatorEx $BackRegional_IndicatorEx)* 
-        ($Format | $Extend | $ZWJ)* [[^$Regional_Indicator $Format $Extend $ZWJ] {eof}];
-^$BackRegional_IndicatorEx $BackRegional_IndicatorEx / ($BackRegional_IndicatorEx $BackRegional_IndicatorEx)* 
-        ($Format | $Extend | $ZWJ)* [[^$Regional_Indicator $Format $Extend $ZWJ] {eof}];
-
-($Extended_Pict | $EmojiNRK) $ZWJ $BackRegional_IndicatorEx / ($BackRegional_IndicatorEx $BackRegional_IndicatorEx)* 
-        ($Format | $Extend | $ZWJ)* [[^$Regional_Indicator $Format $Extend $ZWJ] {eof}];
-($Extended_Pict | $EmojiNRK) $ZWJ $BackRegional_IndicatorEx $BackRegional_IndicatorEx / ($BackRegional_IndicatorEx $BackRegional_IndicatorEx)* 
-        ($Format | $Extend | $ZWJ)* [[^$Regional_Indicator $Format $Extend $ZWJ] {eof}];
-
-
-
 ## -------------------------------------------------

 !!safe_reverse;
@ -291,39 +203,17 @@ $E_Modifier ($Format | $Extend | $ZWJ)* ($E_Base | $EBG);
 ($Extend | $Format | $ZWJ)+ .?;

 # rule 6
-($MidLetter | $MidNumLet | $Single_Quote) ($BackALetterEx | $BackHebrew_LetterEx);
+($MidLetter | $MidNumLet | $Single_Quote) ($Format | $Extend | $ZWJ)* ($Hebrew_Letter | $ALetterPlus);

 # rule 7b
-$Double_Quote $BackHebrew_LetterEx;
+$Double_Quote ($Format | $Extend | $ZWJ)* $Hebrew_Letter;


 # rule 11
-($MidNum | $MidNumLet | $Single_Quote) $BackNumericEx;
+($MidNum | $MidNumLet | $Single_Quote) ($Format | $Extend | $ZWJ)* $Numeric;

 # rule 13c
-$BackRegional_IndicatorEx*;
-
-# For dictionary-based break
-$dictionary $dictionary;
-
-## -------------------------------------------------
-
-!!safe_forward;
-
-# rule 4
-($Extend | $Format | $ZWJ)+ .?;
-
-# rule 6
-($MidLetterEx | $MidNumLetEx | $Single_QuoteEx) ($ALetterEx | $Hebrew_LetterEx);
-
-# rule 7b
-$Double_QuoteEx $Hebrew_LetterEx;
-
-# rule 11
-($MidNumEx | $MidNumLetEx | $Single_QuoteEx) $NumericEx;
-
-# rule 13c
-$Regional_IndicatorEx*;
+$Regional_Indicator ($Format | $Extend | $ZWJ)* $Regional_Indicator;

 # For dictionary-based break
 $dictionary $dictionary;
--- a/icu4c/source/data/brkitr/rules/word_POSIX.txt
+++ b/icu4c/source/data/brkitr/rules/word_POSIX.txt
@ -1,7 +1,7 @@
-#
+#
 # Copyright (C) 2016 and later: Unicode, Inc. and others.
 # License & terms of use: http://www.unicode.org/copyright.html
-# Copyright (C) 2002-2016, International Business Machines Corporation 
+# Copyright (C) 2002-2016, International Business Machines Corporation
 # and others. All Rights Reserved.
 #
 # file:  word_POSIX.txt
@ -22,6 +22,7 @@
 ##############################################################################

 !!chain;
+!!quoted_literals_only;


 #
@ -62,7 +63,7 @@ $Hiragana           = [:Hiragana:];
 #   5.0 or later as the definition of Complex_Context was corrected to include all
 #   characters requiring dictionary break.

-$Control        = [\p{Grapheme_Cluster_Break = Control}]; 
+$Control        = [\p{Grapheme_Cluster_Break = Control}];
 $HangulSyllable = [\uac00-\ud7a3];
 $ComplexContext = [:LineBreak = Complex_Context:];
 $KanaKanji      = [$Han $Hiragana $Katakana];
@ -74,7 +75,7 @@ $ALetterPlus  = [$ALetter-$dictionaryCJK [$ComplexContext-$Extend-$Control]];


 #
-#  Rules 4    Ignore Format and Extend characters, 
+#  Rules 4    Ignore Format and Extend characters,
 #             except when they appear at the beginning of a region of text.
 #
 # TODO: check if handling of katakana in dictionary makes rules incorrect/void
@ -154,7 +155,7 @@ $NumericEx $NumericEx {100};

 $NumericEx ($ALetterEx | $Hebrew_LetterEx) {200};

-# rule 11 and 12 
+# rule 11 and 12

 $NumericEx ($MidNumEx | $MidNumLetEx | $Single_QuoteEx) $NumericEx {100};

@ -191,96 +192,7 @@ $ExtendNumLetEx  $KatakanaEx     {400};    #  (13b)

 # special handling for CJK characters: chain for later dictionary segmentation
 $HangulSyllable $HangulSyllable {200};
-$KanaKanji $KanaKanji {400}; # different rule status if both kana and kanji found 
-
-
-## -------------------------------------------------
-
-!!reverse;
-
-$BackHebrew_LetterEx      = ($Format | $Extend | $ZWJ)* $Hebrew_Letter;
-$BackALetterEx            = ($Format | $Extend | $ZWJ)* $ALetterPlus;
-$BackSingle_QuoteEx       = ($Format | $Extend | $ZWJ)* $Single_Quote;
-$BackDouble_QuoteEx       = ($Format | $Extend | $ZWJ)* $Double_Quote;
-$BackMidNumLetEx          = ($Format | $Extend | $ZWJ)* $MidNumLet;
-$BackNumericEx            = ($Format | $Extend | $ZWJ)* $Numeric;
-$BackMidNumEx             = ($Format | $Extend | $ZWJ)* $MidNum;
-$BackMidLetterEx          = ($Format | $Extend | $ZWJ)* $MidLetter;
-$BackKatakanaEx           = ($Format | $Extend | $ZWJ)* $Katakana;
-$BackHiraganaEx           = ($Format | $Extend | $ZWJ)* $Hiragana;
-$BackExtendNumLetEx       = ($Format | $Extend | $ZWJ)* $ExtendNumLet;
-$BackRegional_IndicatorEx = ($Format | $Extend | $ZWJ)* $Regional_Indicator;
-
-# rule 3
-$LF $CR;
-
-# Rule 3c   ZWJ x (Extended_Pict | EmojiNRK).  Precedes WB4, so no intervening Extend chars allowed.
-#
-($Extended_Pict | $EmojiNRK) $ZWJ;
-
-# rule 4
-($Format | $Extend | $ZWJ)*  [^$CR $LF $Newline]?;
-
-# rule 5
-
-($BackALetterEx | $BackHebrew_LetterEx) ($BackALetterEx | $BackHebrew_LetterEx);
-
-# rule 6 and 7
-
-($BackALetterEx | $BackHebrew_LetterEx) ($BackMidLetterEx | $BackMidNumLetEx | $BackSingle_QuoteEx) ($BackALetterEx | $BackHebrew_LetterEx);
-
-# rule 7a
-$BackSingle_QuoteEx $BackHebrew_LetterEx;
-
-# Rule 7b and 7c
-$BackHebrew_LetterEx $BackDouble_QuoteEx $BackHebrew_LetterEx;
-
-# rule 8
-
-$BackNumericEx $BackNumericEx;
-
-# rule 9
-
-$BackNumericEx ($BackALetterEx | $BackHebrew_LetterEx);
-
-# rule 10
-
-($BackALetterEx | $BackHebrew_LetterEx) $BackNumericEx;
-
-# rule 11 and 12
-
-$BackNumericEx ($BackMidNumEx | $BackMidNumLetEx | $BackSingle_QuoteEx) $BackNumericEx;
-
-# rule 13
-
-$BackKatakanaEx $BackKatakanaEx;
-
-# rules 13 a/b
-#
-$BackExtendNumLetEx ($BackALetterEx | $BackHebrew_LetterEx | $BackNumericEx | $BackKatakanaEx | $BackExtendNumLetEx);
-($BackALetterEx | $BackHebrew_LetterEx | $BackNumericEx | $BackKatakanaEx) $BackExtendNumLetEx; 
-
-# special handling for CJK characters: chain for later dictionary segmentation
-$HangulSyllable $HangulSyllable;
-$KanaKanji $KanaKanji; #different rule status if both kanji and kana found
-
-# rule 14
-
-$E_Modifier ($Format | $Extend | $ZWJ)* ($E_Base | $EBG);
-
-# rule 15 - 17
-#    Pairs of Regional Indicators stay together.
-
-^$BackRegional_IndicatorEx / ($BackRegional_IndicatorEx $BackRegional_IndicatorEx)* 
-        ($Format | $Extend | $ZWJ)* [[^$Regional_Indicator $Format $Extend $ZWJ] {eof}];
-^$BackRegional_IndicatorEx $BackRegional_IndicatorEx / ($BackRegional_IndicatorEx $BackRegional_IndicatorEx)* 
-        ($Format | $Extend | $ZWJ)* [[^$Regional_Indicator $Format $Extend $ZWJ] {eof}];
-
-($Extended_Pict | $EmojiNRK) $ZWJ $BackRegional_IndicatorEx / ($BackRegional_IndicatorEx $BackRegional_IndicatorEx)* 
-        ($Format | $Extend | $ZWJ)* [[^$Regional_Indicator $Format $Extend $ZWJ] {eof}];
-($Extended_Pict | $EmojiNRK) $ZWJ $BackRegional_IndicatorEx $BackRegional_IndicatorEx / ($BackRegional_IndicatorEx $BackRegional_IndicatorEx)* 
-        ($Format | $Extend | $ZWJ)* [[^$Regional_Indicator $Format $Extend $ZWJ] {eof}];
-
+$KanaKanji $KanaKanji {400}; # different rule status if both kana and kanji found


 ## -------------------------------------------------
@ -291,39 +203,17 @@ $E_Modifier ($Format | $Extend | $ZWJ)* ($E_Base | $EBG);
 ($Extend | $Format | $ZWJ)+ .?;

 # rule 6
-($MidLetter | $MidNumLet | $Single_Quote) ($BackALetterEx | $BackHebrew_LetterEx);
+($MidLetter | $MidNumLet | $Single_Quote) ($Format | $Extend | $ZWJ)* ($Hebrew_Letter | $ALetterPlus);

 # rule 7b
-$Double_Quote $BackHebrew_LetterEx;
+$Double_Quote ($Format | $Extend | $ZWJ)* $Hebrew_Letter;


 # rule 11
-($MidNum | $MidNumLet | $Single_Quote) $BackNumericEx;
+($MidNum | $MidNumLet | $Single_Quote) ($Format | $Extend | $ZWJ)* $Numeric;

 # rule 13c
-$BackRegional_IndicatorEx*;
-
-# For dictionary-based break
-$dictionary $dictionary;
-
-## -------------------------------------------------
-
-!!safe_forward;
-
-# rule 4
-($Extend | $Format | $ZWJ)+ .?;
-
-# rule 6
-($MidLetterEx | $MidNumLetEx | $Single_QuoteEx) ($ALetterEx | $Hebrew_LetterEx);
-
-# rule 7b
-$Double_QuoteEx $Hebrew_LetterEx;
-
-# rule 11
-($MidNumEx | $MidNumLetEx | $Single_QuoteEx) $NumericEx;
-
-# rule 13c
-$Regional_IndicatorEx*;
+$Regional_Indicator ($Format | $Extend | $ZWJ)* $Regional_Indicator;

 # For dictionary-based break
 $dictionary $dictionary;
--- a/icu4c/source/data/in/nfc.nrm
+++ b/icu4c/source/data/in/nfc.nrm
--- a/icu4c/source/data/in/nfkc.nrm
+++ b/icu4c/source/data/in/nfkc.nrm
--- a/icu4c/source/data/in/nfkc_cf.nrm
+++ b/icu4c/source/data/in/nfkc_cf.nrm
--- a/icu4c/source/data/in/uts46.nrm
+++ b/icu4c/source/data/in/uts46.nrm
--- a/icu4c/source/data/zone/tzdbNames.txt
+++ b/icu4c/source/data/zone/tzdbNames.txt
@ -201,7 +201,7 @@ tzdbNames{
        "meta:China"{
            sd{"CDT"}
            ss{"CST"}
-            parseRegions{"CN", "MO", "TW"}
+            parseRegions{"CN", "MO"}
        }
        "meta:Choibalsan"{
            sd{"CHOST"}
@ -562,6 +562,10 @@ tzdbNames{
        "meta:Ponape"{
            ss{"PONT"}
        }
+        "meta:Pyongyang"{
+            ss{"KST"}
+            parseRegions{"KP"}
+        }
        "meta:Qyzylorda"{
            sd{"QYZST"}
            ss{"QYZT"}
@ -617,6 +621,7 @@ tzdbNames{
        "meta:Taipei"{
            sd{"CDT"}
            ss{"CST"}
+            parseRegions{"TW"}
        }
        "meta:Tajikistan"{
            ss{"TJT"}
--- a/icu4c/source/i18n/anytrans.cpp
+++ b/icu4c/source/i18n/anytrans.cpp
@ -31,9 +31,13 @@

 static const UChar TARGET_SEP = 45; // '-'
 static const UChar VARIANT_SEP = 47; // '/'
-static const UChar ANY[] = {65,110,121,0}; // "Any"
+static const UChar ANY[] = {0x41,0x6E,0x79,0}; // "Any"
 static const UChar NULL_ID[] = {78,117,108,108,0}; // "Null"
-static const UChar LATIN_PIVOT[] = {45,76,97,116,105,110,59,76,97,116,105,110,45,0}; // "-Latin;Latin-"
+static const UChar LATIN_PIVOT[] = {0x2D,0x4C,0x61,0x74,0x6E,0x3B,0x4C,0x61,0x74,0x6E,0x2D,0}; // "-Latn;Latn-"
+
+// initial size for an Any-XXXX transform's cache of script-XXXX transforms
+// (will grow as necessary, but we don't expect to have source text with more than 7 scripts)
+#define ANY_TRANS_CACHE_INIT_SIZE 7

 //------------------------------------------------------------

@ -186,7 +190,7 @@ AnyTransliterator::AnyTransliterator(const UnicodeString& id,
    Transliterator(id, NULL),
    targetScript(theTargetScript)
 {
-    cache = uhash_open(uhash_hashLong, uhash_compareLong, NULL, &ec);
+    cache = uhash_openSize(uhash_hashLong, uhash_compareLong, NULL, ANY_TRANS_CACHE_INIT_SIZE, &ec);
    if (U_FAILURE(ec)) {
        return;
    }
@ -212,7 +216,7 @@ AnyTransliterator::AnyTransliterator(const AnyTransliterator& o) :
 {
    // Don't copy the cache contents
    UErrorCode ec = U_ZERO_ERROR;
-    cache = uhash_open(uhash_hashLong, uhash_compareLong, NULL, &ec);
+    cache = uhash_openSize(uhash_hashLong, uhash_compareLong, NULL, ANY_TRANS_CACHE_INIT_SIZE, &ec);
    if (U_FAILURE(ec)) {
        return;
    }
@ -286,7 +290,7 @@ Transliterator* AnyTransliterator::getTransliterator(UScriptCode source) const {
    }
    if (t == NULL) {
        UErrorCode ec = U_ZERO_ERROR;
-        UnicodeString sourceName(uscript_getName(source), -1, US_INV);
+        UnicodeString sourceName(uscript_getShortName(source), -1, US_INV);
        UnicodeString id(sourceName);
        id.append(TARGET_SEP).append(target);

--- a/icu4c/source/i18n/calendar.cpp
+++ b/icu4c/source/i18n/calendar.cpp
@ -8,12 +8,12 @@
 *
 * File CALENDAR.CPP
 *
-* Modification History: 
+* Modification History:
 *
 *   Date        Name        Description
 *   02/03/97    clhuang     Creation.
-*   04/22/97    aliu        Cleaned up, fixed memory leak, made 
-*                           setWeekCountData() more robust.  
+*   04/22/97    aliu        Cleaned up, fixed memory leak, made
+*                           setWeekCountData() more robust.
 *                           Moved platform code to TPlatformUtilities.
 *   05/01/97    aliu        Made equals(), before(), after() arguments const.
 *   05/20/97    aliu        Changed logic of when to compute fields and time
@ -26,7 +26,7 @@
 *******************************************************************************
 */

-#include "utypeinfo.h"  // for 'typeid' to work 
+#include "utypeinfo.h"  // for 'typeid' to work

 #include "unicode/utypes.h"

@ -66,10 +66,8 @@
 #if !UCONFIG_NO_SERVICE
 static icu::ICULocaleService* gService = NULL;
 static icu::UInitOnce gServiceInitOnce = U_INITONCE_INITIALIZER;
-#endif

 // INTERNAL - for cleanup
-
 U_CDECL_BEGIN
 static UBool calendar_cleanup(void) {
 #if !UCONFIG_NO_SERVICE
@ -82,6 +80,7 @@ static UBool calendar_cleanup(void) {
    return TRUE;
 }
 U_CDECL_END
+#endif

 // ------------------------------------------
 //
@ -93,9 +92,9 @@ U_CDECL_END

 #if defined( U_DEBUG_CALSVC ) || defined (U_DEBUG_CAL)

-/** 
- * fldName was removed as a duplicate implementation. 
- * use  udbg_ services instead, 
+/**
+ * fldName was removed as a duplicate implementation.
+ * use  udbg_ services instead,
 * which depend on include files and library from ../tools/toolutil, the following circular link:
 *   CPPFLAGS+=-I$(top_srcdir)/tools/toolutil
 *   LIBS+=$(LIBICUTOOLUTIL)
@ -123,7 +122,7 @@ void ucal_dump(const Calendar &cal) {
 void Calendar::dump() const {
    int i;
    fprintf(stderr, "@calendar=%s, timeset=%c, fieldset=%c, allfields=%c, virtualset=%c, t=%.2f",
-        getType(), fIsTimeSet?'y':'n',  fAreFieldsSet?'y':'n',  fAreAllFieldsSet?'y':'n',  
+        getType(), fIsTimeSet?'y':'n',  fAreFieldsSet?'y':'n',  fAreAllFieldsSet?'y':'n',
        fAreFieldsVirtuallySet?'y':'n',
        fTime);

@ -135,9 +134,9 @@ void Calendar::dump() const {
        fprintf(stderr, "  %25s: %-11ld", f, fFields[i]);
        if(fStamp[i] == kUnset) {
            fprintf(stderr, " (unset) ");
-        } else if(fStamp[i] == kInternallySet) { 
+        } else if(fStamp[i] == kInternallySet) {
            fprintf(stderr, " (internally set) ");
-            //} else if(fStamp[i] == kInternalDefault) { 
+            //} else if(fStamp[i] == kInternalDefault) {
            //    fprintf(stderr, " (internal default) ");
        } else {
            fprintf(stderr, " %%%d ", fStamp[i]);
@ -213,7 +212,7 @@ const SharedCalendar *LocaleCacheKey<SharedCalendar>::createObject(
        const void * /*unusedCreationContext*/, UErrorCode &status) const {
    Calendar *calendar = Calendar::makeInstance(fLoc, status);
    if (U_FAILURE(status)) {
-        return NULL; 
+        return NULL;
    }
    SharedCalendar *shared = new SharedCalendar(calendar);
    if (shared == NULL) {
@ -234,7 +233,9 @@ static ECalType getCalendarType(const char *s) {
    return CALTYPE_UNKNOWN;
 }

-static UBool isStandardSupportedKeyword(const char *keyword, UErrorCode& status) { 
+#if !UCONFIG_NO_SERVICE
+// Only used with service registration.
+static UBool isStandardSupportedKeyword(const char *keyword, UErrorCode& status) {
    if(U_FAILURE(status)) {
        return FALSE;
    }
@ -242,6 +243,7 @@ static UBool isStandardSupportedKeyword(const char *keyword, UErrorCode& status)
    return (calType != CALTYPE_UNKNOWN);
 }

+// only used with service registration.
 static void getCalendarKeyword(const UnicodeString &id, char *targetBuffer, int32_t targetBufferSize) {
    UnicodeString calendarKeyword = UNICODE_STRING_SIMPLE("calendar=");
    int32_t calKeyLen = calendarKeyword.length();
@ -255,6 +257,7 @@ static void getCalendarKeyword(const UnicodeString &id, char *targetBuffer, int3
    }
    targetBuffer[keyLen] = 0;
 }
+#endif

 static ECalType getCalendarTypeForLocale(const char *locid) {
    UErrorCode status = U_ZERO_ERROR;
@ -291,7 +294,7 @@ static ECalType getCalendarTypeForLocale(const char *locid) {
    if (U_FAILURE(status)) {
        return CALTYPE_GREGORIAN;
    }
-    
+
    // Read preferred calendar values from supplementalData calendarPreference
    UResourceBundle *rb = ures_openDirect(NULL, "supplementalData", &status);
    ures_getByKey(rb, "calendarPreferenceData", rb, &status);
@ -394,7 +397,7 @@ static Calendar *createStandardCalendar(ECalType calType, const Locale &loc, UEr
 // -------------------------------------

 /**
-* a Calendar Factory which creates the "basic" calendar types, that is, those 
+* a Calendar Factory which creates the "basic" calendar types, that is, those
 * shipped with ICU.
 */
 class BasicCalendarFactory : public LocaleKeyFactory {
@ -408,7 +411,7 @@ public:
    virtual ~BasicCalendarFactory();

 protected:
-    //virtual UBool isSupportedID( const UnicodeString& id, UErrorCode& status) const { 
+    //virtual UBool isSupportedID( const UnicodeString& id, UErrorCode& status) const {
    //  if(U_FAILURE(status)) {
    //    return FALSE;
    //  }
@ -466,7 +469,7 @@ protected:

 BasicCalendarFactory::~BasicCalendarFactory() {}

-/** 
+/**
 * A factory which looks up the DefaultCalendar resource to determine which class of calendar to use
 */

@ -510,7 +513,7 @@ public:
    virtual UObject* cloneInstance(UObject* instance) const {
        UnicodeString *s = dynamic_cast<UnicodeString *>(instance);
        if(s != NULL) {
-            return s->clone(); 
+            return s->clone();
        } else {
 #ifdef U_DEBUG_CALSVC_F
            UErrorCode status2 = U_ZERO_ERROR;
@ -573,7 +576,7 @@ initCalendarService(UErrorCode &status)
        fprintf(stderr, "Registering classes..\n");
 #endif

-        // Register all basic instances. 
+        // Register all basic instances.
    gService->registerFactory(new BasicCalendarFactory(),status);

 #ifdef U_DEBUG_CALSVC
@ -589,7 +592,7 @@ initCalendarService(UErrorCode &status)
    }
        }

-static ICULocaleService* 
+static ICULocaleService*
 getCalendarService(UErrorCode &status)
 {
    umtx_initOnce(gServiceInitOnce, &initCalendarService, status);
@ -743,7 +746,7 @@ fSkippedWallTime(UCAL_WALLTIME_LAST)
        return;
    }

-    clear();    
+    clear();
    fZone = zone;
    setWeekData(aLocale, NULL, success);
 }
@ -850,7 +853,7 @@ Calendar::createInstance(const Locale& aLocale, UErrorCode& success)
    return createInstance(TimeZone::createDefault(), aLocale, success);
 }

-// ------------------------------------- Adopting 
+// ------------------------------------- Adopting

 // Note: this is the bottleneck that actually calls the service routines.

@ -903,7 +906,7 @@ Calendar::makeInstance(const Locale& aLocale, UErrorCode& success) {
        c = (Calendar*)getCalendarService(success)->get(l, LocaleKey::KIND_ANY, &actualLoc2, success);

        if(U_FAILURE(success) || !c) {
-            if(U_SUCCESS(success)) { 
+            if(U_SUCCESS(success)) {
                success = U_INTERNAL_PROGRAM_ERROR; // Propagate some err
            }
            return NULL;
@ -911,7 +914,7 @@ Calendar::makeInstance(const Locale& aLocale, UErrorCode& success) {

        str = dynamic_cast<const UnicodeString*>(c);
        if(str != NULL) {
-            // recursed! Second lookup returned a UnicodeString. 
+            // recursed! Second lookup returned a UnicodeString.
            // Perhaps DefaultCalendar{} was set to another locale.
 #ifdef U_DEBUG_CALSVC
            char tmp[200];
@ -985,7 +988,7 @@ Calendar::createInstance(const TimeZone& zone, const Locale& aLocale, UErrorCode
    if(U_SUCCESS(success) && c) {
        c->setTimeZone(zone);
    }
-    return c; 
+    return c;
 }

 // -------------------------------------
@ -1017,7 +1020,7 @@ Calendar::operator==(const Calendar& that) const
        U_SUCCESS(status);
 }

-UBool 
+UBool
 Calendar::isEquivalentTo(const Calendar& other) const
 {
    return typeid(*this) == typeid(other) &&
@ -1099,13 +1102,13 @@ Calendar::getNow()
 * Gets this Calendar's current time as a long.
 * @return the current time as UTC milliseconds from the epoch.
 */
-double 
+double
 Calendar::getTimeInMillis(UErrorCode& status) const
 {
-    if(U_FAILURE(status)) 
+    if(U_FAILURE(status))
        return 0.0;

-    if ( ! fIsTimeSet) 
+    if ( ! fIsTimeSet)
        ((Calendar*)this)->updateTime(status);

    /* Test for buffer overflows */
@ -1124,9 +1127,9 @@ Calendar::getTimeInMillis(UErrorCode& status) const
 * when in lenient mode the out of range values are pinned to their respective min/max.
 * @param date the new time in UTC milliseconds from the epoch.
 */
-void 
+void
 Calendar::setTimeInMillis( double millis, UErrorCode& status ) {
-    if(U_FAILURE(status)) 
+    if(U_FAILURE(status))
        return;

    if (millis > MAX_MILLIS) {
@ -1154,7 +1157,7 @@ Calendar::setTimeInMillis( double millis, UErrorCode& status ) {
        fStamp[i]     = kUnset;
        fIsSet[i]     = FALSE;
    }
-    
+

 }

@ -1479,7 +1482,7 @@ void Calendar::computeFields(UErrorCode &ec)
    double localMillis = internalGetTime();
    int32_t rawOffset, dstOffset;
    getTimeZone().getOffset(localMillis, FALSE, rawOffset, dstOffset, ec);
-    localMillis += (rawOffset + dstOffset); 
+    localMillis += (rawOffset + dstOffset);

    // Mark fields as set.  Do this before calling handleComputeFields().
    uint32_t mask =   //fInternalSetMask;
@ -1488,7 +1491,7 @@ void Calendar::computeFields(UErrorCode &ec)
        (1 << UCAL_MONTH) |
        (1 << UCAL_DAY_OF_MONTH) | // = UCAL_DATE
        (1 << UCAL_DAY_OF_YEAR) |
-        (1 << UCAL_EXTENDED_YEAR);  
+        (1 << UCAL_EXTENDED_YEAR);

    for (int32_t i=0; i<UCAL_FIELD_COUNT; ++i) {
        if ((mask & 1) == 0) {
@ -1517,7 +1520,7 @@ void Calendar::computeFields(UErrorCode &ec)
 #if defined (U_DEBUG_CAL)
    //fprintf(stderr, "%s:%d- Hmm! Jules @ %d, as per %.0lf millis\n",
    //__FILE__, __LINE__, fFields[UCAL_JULIAN_DAY], localMillis);
-#endif  
+#endif

    computeGregorianAndDOWFields(fFields[UCAL_JULIAN_DAY], ec);

@ -1615,7 +1618,7 @@ void Calendar::computeGregorianFields(int32_t julianDay, UErrorCode & /* ec */)
 * proleptic Gregorian calendar, which has no field larger than a year.
 */
 void Calendar::computeWeekFields(UErrorCode &ec) {
-    if(U_FAILURE(ec)) { 
+    if(U_FAILURE(ec)) {
        return;
    }
    int32_t eyear = fFields[UCAL_EXTENDED_YEAR];
@ -1678,7 +1681,7 @@ void Calendar::computeWeekFields(UErrorCode &ec) {
    fFields[UCAL_WEEK_OF_MONTH] = weekNumber(dayOfMonth, dayOfWeek);
    fFields[UCAL_DAY_OF_WEEK_IN_MONTH] = (dayOfMonth-1) / 7 + 1;
 #if defined (U_DEBUG_CAL)
-    if(fFields[UCAL_DAY_OF_WEEK_IN_MONTH]==0) fprintf(stderr, "%s:%d: DOWIM %d on %g\n", 
+    if(fFields[UCAL_DAY_OF_WEEK_IN_MONTH]==0) fprintf(stderr, "%s:%d: DOWIM %d on %g\n",
        __FILE__, __LINE__,fFields[UCAL_DAY_OF_WEEK_IN_MONTH], fTime);
 #endif
 }
@ -1723,7 +1726,7 @@ void Calendar::handleComputeFields(int32_t /* julianDay */, UErrorCode &/* statu
 // -------------------------------------


-void Calendar::roll(EDateFields field, int32_t amount, UErrorCode& status) 
+void Calendar::roll(EDateFields field, int32_t amount, UErrorCode& status)
 {
    roll((UCalendarDateFields)field, amount, status);
 }
@ -2061,7 +2064,7 @@ void Calendar::roll(UCalendarDateFields field, int32_t amount, UErrorCode& statu
    default:
        // Other fields cannot be rolled by this method
 #if defined (U_DEBUG_CAL)
-        fprintf(stderr, "%s:%d: ILLEGAL ARG because of roll on non-rollable field %s\n", 
+        fprintf(stderr, "%s:%d: ILLEGAL ARG because of roll on non-rollable field %s\n",
            __FILE__, __LINE__,fldName(field));
 #endif
        status = U_ILLEGAL_ARGUMENT_ERROR;
@ -2252,7 +2255,7 @@ void Calendar::add(UCalendarDateFields field, int32_t amount, UErrorCode& status
                }
            }
        }
-    } 
+    }
 }

 // -------------------------------------
@ -2617,7 +2620,7 @@ Calendar::isWeekend(void) const

 // ------------------------------------- limits

-int32_t 
+int32_t
 Calendar::getMinimum(EDateFields field) const {
    return getLimit((UCalendarDateFields) field,UCAL_LIMIT_MINIMUM);
 }
@ -2668,7 +2671,7 @@ Calendar::getLeastMaximum(UCalendarDateFields field) const
 }

 // -------------------------------------
-int32_t 
+int32_t
 Calendar::getActualMinimum(EDateFields field, UErrorCode& status) const
 {
    return getActualMinimum((UCalendarDateFields) field, status);
@ -2744,7 +2747,7 @@ Calendar::getActualMinimum(UCalendarDateFields field, UErrorCode& status) const
        work->set(field, fieldValue);
        if (work->get(field, status) != fieldValue) {
            break;
-        } 
+        }
        else {
            result = fieldValue;
            fieldValue--;
@ -2800,7 +2803,7 @@ void Calendar::validateField(UCalendarDateFields field, UErrorCode &status) {
    case UCAL_DAY_OF_WEEK_IN_MONTH:
        if (internalGet(field) == 0) {
 #if defined (U_DEBUG_CAL)
-            fprintf(stderr, "%s:%d: ILLEGAL ARG because DOW in month cannot be 0\n", 
+            fprintf(stderr, "%s:%d: ILLEGAL ARG because DOW in month cannot be 0\n",
                __FILE__, __LINE__);
 #endif
            status = U_ILLEGAL_ARGUMENT_ERROR; // "DAY_OF_WEEK_IN_MONTH cannot be zero"
@ -2826,7 +2829,7 @@ void Calendar::validateField(UCalendarDateFields field, int32_t min, int32_t max
    int32_t value = fFields[field];
    if (value < min || value > max) {
 #if defined (U_DEBUG_CAL)
-        fprintf(stderr, "%s:%d: ILLEGAL ARG because of field %s out of range %d..%d  at %d\n", 
+        fprintf(stderr, "%s:%d: ILLEGAL ARG because of field %s out of range %d..%d  at %d\n",
            __FILE__, __LINE__,fldName(field),min,max,value);
 #endif
        status = U_ILLEGAL_ARGUMENT_ERROR;
@ -2892,7 +2895,7 @@ linesInGroup:
 }

 const UFieldResolutionTable Calendar::kDatePrecedence[] =
-{ 
+{
    {
        { UCAL_DAY_OF_MONTH, kResolveSTOP },
        { UCAL_WEEK_OF_YEAR, UCAL_DAY_OF_WEEK, kResolveSTOP },
@ -2913,12 +2916,12 @@ const UFieldResolutionTable Calendar::kDatePrecedence[] =
        { kResolveRemap | UCAL_DAY_OF_WEEK_IN_MONTH, UCAL_DAY_OF_WEEK, kResolveSTOP },
        { kResolveRemap | UCAL_DAY_OF_WEEK_IN_MONTH, UCAL_DOW_LOCAL, kResolveSTOP },
        { kResolveSTOP }
-    }, 
+    },
    {{kResolveSTOP}}
 };


-const UFieldResolutionTable Calendar::kDOWPrecedence[] = 
+const UFieldResolutionTable Calendar::kDOWPrecedence[] =
 {
    {
        { UCAL_DAY_OF_WEEK,kResolveSTOP, kResolveSTOP },
@ -2929,7 +2932,7 @@ const UFieldResolutionTable Calendar::kDOWPrecedence[] =
 };

 // precedence for calculating a year
-const UFieldResolutionTable Calendar::kYearPrecedence[] = 
+const UFieldResolutionTable Calendar::kYearPrecedence[] =
 {
    {
        { UCAL_YEAR, kResolveSTOP },
@ -2966,7 +2969,7 @@ void Calendar::computeTime(UErrorCode& status) {
    //  }
 #endif

-    int32_t millisInDay;
+    double millisInDay;

    // We only use MILLISECONDS_IN_DAY if it has been set by the user.
    // This makes it possible for the caller to set the calendar to a
@ -3086,10 +3089,10 @@ UBool Calendar::getImmediatePreviousZoneTransition(UDate base, UDate *transition
 * reflects local zone wall time.
 * @stable ICU 2.0
 */
-int32_t Calendar::computeMillisInDay() {
+double Calendar::computeMillisInDay() {
  // Do the time portion of the conversion.

-    int32_t millisInDay = 0;
+    double millisInDay = 0;

    // Find the best set of fields specifying the time of day.  There
    // are only two possibilities here; the HOUR_OF_DAY or the
@ -3131,7 +3134,7 @@ int32_t Calendar::computeMillisInDay() {
 * or range.
 * @stable ICU 2.0
 */
-int32_t Calendar::computeZoneOffset(double millis, int32_t millisInDay, UErrorCode &ec) {
+int32_t Calendar::computeZoneOffset(double millis, double millisInDay, UErrorCode &ec) {
    int32_t rawOffset, dstOffset;
    UDate wall = millis + millisInDay;
    BasicTimeZone* btz = getBasicTimeZone();
@ -3178,7 +3181,7 @@ int32_t Calendar::computeZoneOffset(double millis, int32_t millisInDay, UErrorCo
    return rawOffset + dstOffset;
 }

-int32_t Calendar::computeJulianDay() 
+int32_t Calendar::computeJulianDay()
 {
    // We want to see if any of the date fields is newer than the
    // JULIAN_DAY.  If not, then we use JULIAN_DAY.  If so, then we do
@ -3220,9 +3223,9 @@ int32_t Calendar::handleComputeJulianDay(UCalendarDateFields bestField)  {
        internalSet(UCAL_EXTENDED_YEAR, year);
    }

-#if defined (U_DEBUG_CAL) 
+#if defined (U_DEBUG_CAL)
    fprintf(stderr, "%s:%d: bestField= %s - y=%d\n", __FILE__, __LINE__, fldName(bestField), year);
-#endif 
+#endif

    // Get the Julian day of the day BEFORE the start of this year.
    // If useMonth is true, get the day before the start of the month.
@ -3304,9 +3307,9 @@ int32_t Calendar::handleComputeJulianDay(UCalendarDateFields bestField)  {
            date += ((monthLength - date) / 7 + dim + 1) * 7;
        }
    } else {
-#if defined (U_DEBUG_CAL) 
+#if defined (U_DEBUG_CAL)
        fprintf(stderr, "%s:%d - bf= %s\n", __FILE__, __LINE__, fldName(bestField));
-#endif 
+#endif

        if(bestField == UCAL_WEEK_OF_YEAR) {  // ------------------------------------- WOY -------------
            if(!isSet(UCAL_YEAR_WOY) ||  // YWOY not set at all or
@ -3317,30 +3320,30 @@ int32_t Calendar::handleComputeJulianDay(UCalendarDateFields bestField)  {
                int32_t woy = internalGet(bestField);

                int32_t nextJulianDay = handleComputeMonthStart(year+1, 0, FALSE); // jd of day before jan 1
-                int32_t nextFirst = julianDayToDayOfWeek(nextJulianDay + 1) - firstDayOfWeek; 
+                int32_t nextFirst = julianDayToDayOfWeek(nextJulianDay + 1) - firstDayOfWeek;

                if (nextFirst < 0) { // 0..6 ldow of Jan 1
                    nextFirst += 7;
                }

                if(woy==1) {  // FIRST WEEK ---------------------------------
-#if defined (U_DEBUG_CAL) 
-                    fprintf(stderr, "%s:%d - woy=%d, yp=%d, nj(%d)=%d, nf=%d", __FILE__, __LINE__, 
-                        internalGet(bestField), resolveFields(kYearPrecedence), year+1, 
+#if defined (U_DEBUG_CAL)
+                    fprintf(stderr, "%s:%d - woy=%d, yp=%d, nj(%d)=%d, nf=%d", __FILE__, __LINE__,
+                        internalGet(bestField), resolveFields(kYearPrecedence), year+1,
                        nextJulianDay, nextFirst);

                    fprintf(stderr, " next: %d DFW,  min=%d   \n", (7-nextFirst), getMinimalDaysInFirstWeek() );
-#endif 
+#endif

                    // nextFirst is now the localized DOW of Jan 1  of y-woy+1
                    if((nextFirst > 0) &&   // Jan 1 starts on FDOW
                        (7-nextFirst) >= getMinimalDaysInFirstWeek()) // or enough days in the week
                    {
                        // Jan 1 of (yearWoy+1) is in yearWoy+1 - recalculate JD to next year
-#if defined (U_DEBUG_CAL) 
-                        fprintf(stderr, "%s:%d - was going to move JD from %d to %d [d%d]\n", __FILE__, __LINE__, 
+#if defined (U_DEBUG_CAL)
+                        fprintf(stderr, "%s:%d - was going to move JD from %d to %d [d%d]\n", __FILE__, __LINE__,
                            julianDay, nextJulianDay, (nextJulianDay-julianDay));
-#endif 
+#endif
                        julianDay = nextJulianDay;

                        // recalculate 'first' [0-based local dow of jan 1]
@ -3351,7 +3354,7 @@ int32_t Calendar::handleComputeJulianDay(UCalendarDateFields bestField)  {
                        // recalculate date.
                        date = 1 - first + dowLocal;
                    }
-                } else if(woy>=getLeastMaximum(bestField)) {          
+                } else if(woy>=getLeastMaximum(bestField)) {
                    // could be in the last week- find out if this JD would overstep
                    int32_t testDate = date;
                    if ((7 - first) < getMinimalDaysInFirstWeek()) {
@ -3361,7 +3364,7 @@ int32_t Calendar::handleComputeJulianDay(UCalendarDateFields bestField)  {
                    // Now adjust for the week number.
                    testDate += 7 * (woy - 1);

-#if defined (U_DEBUG_CAL) 
+#if defined (U_DEBUG_CAL)
                    fprintf(stderr, "%s:%d - y=%d, y-1=%d doy%d, njd%d (C.F. %d)\n",
                        __FILE__, __LINE__, year, year-1, testDate, julianDay+testDate, nextJulianDay);
 #endif
@ -3375,7 +3378,7 @@ int32_t Calendar::handleComputeJulianDay(UCalendarDateFields bestField)  {
                        }
                        date = 1 - first + dowLocal;

-#if defined (U_DEBUG_CAL) 
+#if defined (U_DEBUG_CAL)
                        fprintf(stderr, "%s:%d - date now %d, jd%d, ywoy%d\n",
                            __FILE__, __LINE__, date, julianDay, year-1);
 #endif
@ -3400,13 +3403,13 @@ int32_t Calendar::handleComputeJulianDay(UCalendarDateFields bestField)  {
 }

 int32_t
-Calendar::getDefaultMonthInYear(int32_t /*eyear*/) 
+Calendar::getDefaultMonthInYear(int32_t /*eyear*/)
 {
    return 0;
 }

 int32_t
-Calendar::getDefaultDayInMonth(int32_t /*eyear*/, int32_t /*month*/) 
+Calendar::getDefaultDayInMonth(int32_t /*eyear*/, int32_t /*month*/)
 {
    return 1;
 }
@ -3436,13 +3439,13 @@ int32_t Calendar::getLocalDOW()

 int32_t Calendar::handleGetExtendedYearFromWeekFields(int32_t yearWoy, int32_t woy)
 {
-    // We have UCAL_YEAR_WOY and UCAL_WEEK_OF_YEAR - from those, determine 
+    // We have UCAL_YEAR_WOY and UCAL_WEEK_OF_YEAR - from those, determine
    // what year we fall in, so that other code can set it properly.
    // (code borrowed from computeWeekFields and handleComputeJulianDay)
    //return yearWoy;

    // First, we need a reliable DOW.
-    UCalendarDateFields bestField = resolveFields(kDatePrecedence); // !! Note: if subclasses have a different table, they should override handleGetExtendedYearFromWeekFields 
+    UCalendarDateFields bestField = resolveFields(kDatePrecedence); // !! Note: if subclasses have a different table, they should override handleGetExtendedYearFromWeekFields

    // Now, a local DOW
    int32_t dowLocal = getLocalDOW(); // 0..6
@ -3475,9 +3478,9 @@ int32_t Calendar::handleGetExtendedYearFromWeekFields(int32_t yearWoy, int32_t w

    int32_t minDays = getMinimalDaysInFirstWeek();
    UBool jan1InPrevYear = FALSE;  // January 1st in the year of WOY is the 1st week?  (i.e. first week is < minimal )
-    //UBool nextJan1InPrevYear = FALSE; // January 1st of Year of WOY + 1 is in the first week? 
+    //UBool nextJan1InPrevYear = FALSE; // January 1st of Year of WOY + 1 is in the first week?

-    if((7 - first) < minDays) { 
+    if((7 - first) < minDays) {
        jan1InPrevYear = TRUE;
    }

@ -3500,8 +3503,8 @@ int32_t Calendar::handleGetExtendedYearFromWeekFields(int32_t yearWoy, int32_t w
                    return yearWoy; // in this year
                }
            }
-        } else if(woy >= getLeastMaximum(bestField)) {  
-            // we _might_ be in the last week.. 
+        } else if(woy >= getLeastMaximum(bestField)) {
+            // we _might_ be in the last week..
            int32_t jd =  // Calculate JD of our target day:
                jan1Start +  // JD of Jan 1
                (7-first) + //  days in the first week (Jan 1.. )
@ -3538,7 +3541,7 @@ int32_t Calendar::handleGetExtendedYearFromWeekFields(int32_t yearWoy, int32_t w
            }

            //(internalGet(UCAL_DATE) <= (7-first)) /* && in minDow  */ ) {
-            //within 1st week and in this month.. 
+            //within 1st week and in this month..
            //return yearWoy+1;
            return yearWoy;

@ -3671,7 +3674,7 @@ void Calendar::prepareGetActual(UCalendarDateFields field, UBool isMinimum, UErr
                    dow += 7;
                }
            }
-#if defined (U_DEBUG_CAL) 
+#if defined (U_DEBUG_CAL)
            fprintf(stderr, "prepareGetActualHelper(WOM/WOY) - dow=%d\n", dow);
 #endif
            set(UCAL_DAY_OF_WEEK, dow);
@ -3687,7 +3690,7 @@ void Calendar::prepareGetActual(UCalendarDateFields field, UBool isMinimum, UErr

 int32_t Calendar::getActualHelper(UCalendarDateFields field, int32_t startValue, int32_t endValue, UErrorCode &status) const
 {
-#if defined (U_DEBUG_CAL) 
+#if defined (U_DEBUG_CAL)
    fprintf(stderr, "getActualHelper(%d,%d .. %d, %s)\n", field, startValue, endValue, u_errorName(status));
 #endif
    if (startValue == endValue) {
@ -3723,7 +3726,7 @@ int32_t Calendar::getActualHelper(UCalendarDateFields field, int32_t startValue,
    int32_t result = startValue;
    if ((work->get(field, status) != startValue
         && field != UCAL_WEEK_OF_MONTH && delta > 0 ) || U_FAILURE(status)) {
-#if defined (U_DEBUG_CAL) 
+#if defined (U_DEBUG_CAL)
        fprintf(stderr, "getActualHelper(fld %d) - got  %d (not %d) - %s\n", field, work->get(field,status), startValue, u_errorName(status));
 #endif
    } else {
@ -3740,7 +3743,7 @@ int32_t Calendar::getActualHelper(UCalendarDateFields field, int32_t startValue,
        } while (startValue != endValue);
    }
    delete work;
-#if defined (U_DEBUG_CAL) 
+#if defined (U_DEBUG_CAL)
    fprintf(stderr, "getActualHelper(%d) = %d\n", field, result);
 #endif
    return result;
@ -3767,18 +3770,18 @@ Calendar::setWeekData(const Locale& desiredLocale, const char *type, UErrorCode&
    // Since week and weekend data is territory based instead of language based,
    // we may need to tweak the locale that we are using to try to get the appropriate
    // values, using the following logic:
-    // 1). If the locale has a language but no territory, use the territory as defined by 
+    // 1). If the locale has a language but no territory, use the territory as defined by
    //     the likely subtags.
    // 2). If the locale has a script designation then we ignore it,
    //     then remove it ( i.e. "en_Latn_US" becomes "en_US" )
- 
+
    char minLocaleID[ULOC_FULLNAME_CAPACITY] = { 0 };
    UErrorCode myStatus = U_ZERO_ERROR;

    uloc_minimizeSubtags(desiredLocale.getName(),minLocaleID,ULOC_FULLNAME_CAPACITY,&myStatus);
    Locale min = Locale::createFromName(minLocaleID);
    Locale useLocale;
-    if ( uprv_strlen(desiredLocale.getCountry()) == 0 || 
+    if ( uprv_strlen(desiredLocale.getCountry()) == 0 ||
         (uprv_strlen(desiredLocale.getScript()) > 0 && uprv_strlen(min.getScript()) == 0) ) {
        char maxLocaleID[ULOC_FULLNAME_CAPACITY] = { 0 };
        myStatus = U_ZERO_ERROR;
@ -3788,8 +3791,8 @@ Calendar::setWeekData(const Locale& desiredLocale, const char *type, UErrorCode&
    } else {
        useLocale = Locale(desiredLocale);
    }
- 
-    /* The code here is somewhat of a hack, since week data and weekend data aren't really tied to 
+
+    /* The code here is somewhat of a hack, since week data and weekend data aren't really tied to
       a specific calendar, they aren't truly locale data.  But this is the only place where valid and
       actual locale can be set, so we take a shot at it here by loading a representative resource
       from the calendar data.  The code used to use the dateTimeElements resource to get first day
@ -3865,8 +3868,8 @@ Calendar::setWeekData(const Locale& desiredLocale, const char *type, UErrorCode&
 * and areFieldsSet.  Callers should check isTimeSet and only
 * call this method if isTimeSet is false.
 */
-void 
-Calendar::updateTime(UErrorCode& status) 
+void
+Calendar::updateTime(UErrorCode& status)
 {
    computeTime(status);
    if(U_FAILURE(status))
@ -3875,14 +3878,14 @@ Calendar::updateTime(UErrorCode& status)
    // If we are lenient, we need to recompute the fields to normalize
    // the values.  Also, if we haven't set all the fields yet (i.e.,
    // in a newly-created object), we need to fill in the fields. [LIU]
-    if (isLenient() || ! fAreAllFieldsSet) 
+    if (isLenient() || ! fAreAllFieldsSet)
        fAreFieldsSet = FALSE;

    fIsTimeSet = TRUE;
    fAreFieldsVirtuallySet = FALSE;
 }

-Locale 
+Locale
 Calendar::getLocale(ULocDataLocaleType type, UErrorCode& status) const {
    U_LOCALE_BASED(locBased, *this);
    return locBased.getLocale(type, status);
@ -3945,4 +3948,3 @@ U_NAMESPACE_END


 //eof
-
--- a/icu4c/source/i18n/coll.cpp
+++ b/icu4c/source/i18n/coll.cpp
@ -63,8 +63,10 @@

 static icu::Locale* availableLocaleList = NULL;
 static int32_t  availableLocaleListCount;
+#if !UCONFIG_NO_SERVICE
 static icu::ICULocaleService* gService = NULL;
 static icu::UInitOnce gServiceInitOnce = U_INITONCE_INITIALIZER;
+#endif
 static icu::UInitOnce gAvailableLocaleListInitOnce;

 /**
--- a/icu4c/source/i18n/collationdatawriter.cpp
+++ b/icu4c/source/i18n/collationdatawriter.cpp
@ -224,7 +224,7 @@ CollationDataWriter::write(UBool isBase, const UVersionInfo dataVersion,
    int32_t totalSize = indexesLength * 4;

    if(hasMappings && (isBase || data.jamoCE32s != baseData->jamoCE32s)) {
-        indexes[CollationDataReader::IX_JAMO_CE32S_START] = data.jamoCE32s - data.ce32s;
+        indexes[CollationDataReader::IX_JAMO_CE32S_START] = static_cast<int32_t>(data.jamoCE32s - data.ce32s);
    } else {
        indexes[CollationDataReader::IX_JAMO_CE32S_START] = -1;
    }
--- a/icu4c/source/i18n/collationfastlatinbuilder.cpp
+++ b/icu4c/source/i18n/collationfastlatinbuilder.cpp
@ -607,7 +607,7 @@ CollationFastLatinBuilder::encodeContractions(UErrorCode &errorCode) {
        }
        UBool firstTriple = TRUE;
        for(int32_t index = (int32_t)ce & 0x7fffffff;; index += 3) {
-            int32_t x = contractionCEs.elementAti(index);
+            int32_t x = static_cast<int32_t>(contractionCEs.elementAti(index));
            if((uint32_t)x == CollationFastLatin::CONTR_CHAR_MASK && !firstTriple) { break; }
            int64_t cce0 = contractionCEs.elementAti(index + 1);
            int64_t cce1 = contractionCEs.elementAti(index + 2);
--- a/icu4c/source/i18n/datefmt.cpp
+++ b/icu4c/source/i18n/datefmt.cpp
@ -739,7 +739,7 @@ DateFormat::setBooleanAttribute(UDateFormatBooleanAttribute attr,
 UBool 
 DateFormat::getBooleanAttribute(UDateFormatBooleanAttribute attr, UErrorCode &/*status*/) const {

-    return fBoolFlags.get(attr);
+    return static_cast<UBool>(fBoolFlags.get(attr));
 }

 U_NAMESPACE_END
--- a/icu4c/source/i18n/decNumber.cpp
+++ b/icu4c/source/i18n/decNumber.cpp
@ -386,7 +386,7 @@ U_CAPI decNumber * U_EXPORT2 uprv_decNumberFromUInt32(decNumber *dn, uInt uin) {
    *up=(Unit)(uin%(DECDPUNMAX+1));
    uin=uin/(DECDPUNMAX+1);
    }
-  dn->digits=decGetDigits(dn->lsu, up-dn->lsu);
+  dn->digits=decGetDigits(dn->lsu, static_cast<int32_t>(up - dn->lsu));
  return dn;
  } /* decNumberFromUInt32  */

@ -666,7 +666,7 @@ U_CAPI decNumber * U_EXPORT2 uprv_decNumberFromString(decNumber *dn, const char

    /* Handle decimal point...  */
    if (dotchar!=NULL && dotchar<last)  /* non-trailing '.' found?  */
-      exponent-=(last-dotchar);         /* adjust exponent  */
+      exponent -= static_cast<int32_t>(last-dotchar);         /* adjust exponent  */
    /* [we can now ignore the .]  */

    /* OK, the digits string is good.  Assemble in the decNumber, or in  */
@ -866,7 +866,7 @@ U_CAPI decNumber * U_EXPORT2 uprv_decNumberAnd(decNumber *res, const decNumber *
      } /* both OK  */
    } /* each unit  */
  /* [here uc-1 is the msu of the result]  */
-  res->digits=decGetDigits(res->lsu, uc-res->lsu);
+  res->digits=decGetDigits(res->lsu, static_cast<int32_t>(uc - res->lsu));
  res->exponent=0;                      /* integer  */
  res->bits=0;                          /* sign=0  */
  return res;  /* [no status to set]  */
@ -1253,7 +1253,7 @@ U_CAPI decNumber * U_EXPORT2 uprv_decNumberInvert(decNumber *res, const decNumbe
      } /* each digit  */
    } /* each unit  */
  /* [here uc-1 is the msu of the result]  */
-  res->digits=decGetDigits(res->lsu, uc-res->lsu);
+  res->digits=decGetDigits(res->lsu, static_cast<int32_t>(uc - res->lsu));
  res->exponent=0;                      /* integer  */
  res->bits=0;                          /* sign=0  */
  return res;  /* [no status to set]  */
@ -1880,7 +1880,7 @@ U_CAPI decNumber * U_EXPORT2 uprv_decNumberOr(decNumber *res, const decNumber *l
      } /* non-zero  */
    } /* each unit  */
  /* [here uc-1 is the msu of the result]  */
-  res->digits=decGetDigits(res->lsu, uc-res->lsu);
+  res->digits=decGetDigits(res->lsu, static_cast<int32_t>(uc-res->lsu));
  res->exponent=0;                      /* integer  */
  res->bits=0;                          /* sign=0  */
  return res;  /* [no status to set]  */
@ -2586,7 +2586,7 @@ U_CAPI decNumber * U_EXPORT2 uprv_decNumberRotate(decNumber *res, const decNumbe
          } /* whole units to rotate  */
        /* the rotation may have left an undetermined number of zeros  */
        /* on the left, so true length needs to be calculated  */
-        res->digits=decGetDigits(res->lsu, msumax-res->lsu+1);
+        res->digits=decGetDigits(res->lsu, static_cast<int32_t>(msumax-res->lsu+1));
        } /* rotate needed  */
      } /* rhs OK  */
    } /* numerics  */
@ -3310,7 +3310,7 @@ U_CAPI decNumber * U_EXPORT2 uprv_decNumberXor(decNumber *res, const decNumber *
      } /* non-zero  */
    } /* each unit  */
  /* [here uc-1 is the msu of the result]  */
-  res->digits=decGetDigits(res->lsu, uc-res->lsu);
+  res->digits=decGetDigits(res->lsu, static_cast<int32_t>(uc-res->lsu));
  res->exponent=0;                      /* integer  */
  res->bits=0;                          /* sign=0  */
  return res;  /* [no status to set]  */
@ -5101,7 +5101,7 @@ static decNumber * decMultiplyOp(decNumber *res, const decNumber *lhs,
          } /* p  */
        *up=(Unit)item; up++;                /* [final needs no division]  */
        } /* lp  */
-      accunits=up-acc;                       /* count of units  */
+      accunits = static_cast<int32_t>(up-acc);                       /* count of units  */
      }
     else { /* here to use units directly, without chunking ['old code']  */
    #endif
@ -6587,11 +6587,11 @@ static Int decUnitAddSub(const Unit *a, Int alength,

  /* OK, all A and B processed; might still have carry or borrow  */
  /* return number of Units in the result, negated if a borrow  */
-  if (carry==0) return c-clsu;     /* no carry, so no more to do  */
+  if (carry==0) return static_cast<int32_t>(c-clsu);     /* no carry, so no more to do  */
  if (carry>0) {                   /* positive carry  */
    *c=(Unit)carry;                /* place as new unit  */
    c++;                           /* ..  */
-    return c-clsu;
+    return static_cast<int32_t>(c-clsu);
    }
  /* -ve carry: it's a borrow; complement needed  */
  add=1;                           /* temporary carry...  */
@ -6614,7 +6614,7 @@ static Int decUnitAddSub(const Unit *a, Int alength,
    *c=(Unit)(add-carry-1);
    c++;                      /* interesting, include it  */
    }
-  return clsu-c;              /* -ve result indicates borrowed  */
+  return static_cast<int32_t>(clsu-c);              /* -ve result indicates borrowed  */
  } /* decUnitAddSub  */

 /* ------------------------------------------------------------------ */
@ -6798,7 +6798,7 @@ static Int decShiftToLeast(Unit *uar, Int units, Int shift) {
  if (cut==DECDPUN) {              /* unit-boundary case; easy  */
    up=uar+D2U(shift);
    for (; up<uar+units; target++, up++) *target=*up;
-    return target-uar;
+    return static_cast<int32_t>(target-uar);
    }

  /* messier  */
@ -6826,7 +6826,7 @@ static Int decShiftToLeast(Unit *uar, Int units, Int shift) {
    count-=cut;
    if (count<=0) break;
    }
-  return target-uar+1;
+  return static_cast<int32_t>(target-uar+1);
  } /* decShiftToLeast  */

 #if DECSUBSET
@ -7690,7 +7690,7 @@ static decNumber *decDecap(decNumber *dn, Int drop) {
  cut=MSUDIGITS(dn->digits-drop);       /* digits to be in use in msu  */
  if (cut!=DECDPUN) *msu%=powers[cut];  /* clear left digits  */
  /* that may have left leading zero digits, so do a proper count...  */
-  dn->digits=decGetDigits(dn->lsu, msu-dn->lsu+1);
+  dn->digits=decGetDigits(dn->lsu, static_cast<int32_t>(msu-dn->lsu+1));
  return dn;
  } /* decDecap  */

--- a/icu4c/source/i18n/decimfmt.cpp
+++ b/icu4c/source/i18n/decimfmt.cpp
@ -2543,7 +2543,7 @@ UnicodeString DecimalFormat::getPadCharacterString() const {
 }

 void DecimalFormat::setPadCharacter(const UnicodeString &padChar) {
-    UChar pad;
+    UChar32 pad;
    if (padChar.length() > 0) {
        pad = padChar.char32At(0);
    }
@ -2792,7 +2792,7 @@ DecimalFormat::setDecimalSeparatorAlwaysShown(UBool newValue)
 UBool 
 DecimalFormat::isDecimalPatternMatchRequired(void) const
 {
-    return fBoolFlags.contains(UNUM_PARSE_DECIMAL_MARK_REQUIRED);
+    return static_cast<UBool>(fBoolFlags.contains(UNUM_PARSE_DECIMAL_MARK_REQUIRED));
 }

 //------------------------------------------------------------------------------
--- a/icu4c/source/i18n/rbnf.cpp
+++ b/icu4c/source/i18n/rbnf.cpp
@ -1200,12 +1200,11 @@ RuleBasedNumberFormat::format(double number,
                              UnicodeString& toAppendTo,
                              FieldPosition& /* pos */) const
 {
-    int32_t startPos = toAppendTo.length();
    UErrorCode status = U_ZERO_ERROR;
    if (defaultRuleSet) {
        format(number, *defaultRuleSet, toAppendTo, status);
    }
-    return adjustForCapitalizationContext(startPos, toAppendTo, status);
+    return toAppendTo;
 }


--- a/icu4c/source/i18n/transreg.cpp
+++ b/icu4c/source/i18n/transreg.cpp
@ -46,11 +46,29 @@ static const UChar LOCALE_SEP  = 95; // '_'
 //static const UChar VARIANT_SEP = 0x002F; // '/'

 // String constants
-static const UChar ANY[] = { 65, 110, 121, 0 }; // Any
+static const UChar ANY[] = { 0x41, 0x6E, 0x79, 0 }; // Any
+static const UChar LAT[] = { 0x4C, 0x61, 0x74, 0 }; // Lat

 // empty string
 #define NO_VARIANT UnicodeString()

+// initial estimate for specDAG size
+// ICU 60 Transliterator::countAvailableSources()
+#define SPECDAG_INIT_SIZE 149
+
+// initial estimate for number of variant names
+#define VARIANT_LIST_INIT_SIZE 11
+#define VARIANT_LIST_MAX_SIZE 31
+
+// initial estimate for availableIDs count (default estimate is 8 => multiple reallocs)
+// ICU 60 Transliterator::countAvailableIDs()
+#define AVAILABLE_IDS_INIT_SIZE 641
+
+// initial estimate for number of targets for source "Any", "Lat"
+// ICU 60 Transliterator::countAvailableTargets("Any")/("Latn")
+#define ANY_TARGETS_INIT_SIZE 125
+#define LAT_TARGETS_INIT_SIZE 23
+
 /**
 * Resource bundle key for the RuleBasedTransliterator rule.
 */
@ -517,10 +535,17 @@ U_CDECL_END

 TransliteratorRegistry::TransliteratorRegistry(UErrorCode& status) :
    registry(TRUE, status),
-    specDAG(TRUE, status),
-    availableIDs(status)
+    specDAG(TRUE, SPECDAG_INIT_SIZE, status),
+    variantList(VARIANT_LIST_INIT_SIZE, status),
+    availableIDs(AVAILABLE_IDS_INIT_SIZE, status)
 {
    registry.setValueDeleter(deleteEntry);
+    variantList.setDeleter(uprv_deleteUObject);
+    variantList.setComparer(uhash_compareCaselessUnicodeString);
+    UnicodeString *emptyString = new UnicodeString();
+    if (emptyString != NULL) {
+        variantList.addElement(emptyString, status);
+    }
    availableIDs.setDeleter(uprv_deleteUObject);
    availableIDs.setComparer(uhash_compareCaselessUnicodeString);
    specDAG.setValueDeleter(uhash_deleteHashtable);
@ -781,9 +806,15 @@ int32_t TransliteratorRegistry::countAvailableVariants(const UnicodeString& sour
    if (targets == 0) {
        return 0;
    }
-    UVector *variants = (UVector*) targets->get(target);
-    // variants may be 0 if the source/target are invalid
-    return (variants == 0) ? 0 : variants->size();
+    int32_t varMask = targets->geti(target);
+    int32_t varCount = 0;
+    while (varMask > 0) {
+        if (varMask & 1) {
+            varCount++;
+        }
+        varMask >>= 1;
+    }
+    return varCount;
 }

 UnicodeString& TransliteratorRegistry::getAvailableVariant(int32_t index,
@ -795,17 +826,25 @@ UnicodeString& TransliteratorRegistry::getAvailableVariant(int32_t index,
        result.truncate(0); // invalid source
        return result;
    }
-    UVector *variants = (UVector*) targets->get(target);
-    if (variants == 0) {
-        result.truncate(0); // invalid target
-        return result;
-    }
-    UnicodeString *v = (UnicodeString*) variants->elementAt(index);
-    if (v == 0) {
-        result.truncate(0); // invalid index
-    } else {
-        result = *v;
+    int32_t varMask = targets->geti(target);
+    int32_t varCount = 0;
+    int32_t varListIndex = 0;
+    while (varMask > 0) {
+        if (varMask & 1) {
+            if (varCount == index) {
+                UnicodeString *v = (UnicodeString*) variantList.elementAt(varListIndex);
+                if (v != NULL) {
+                    result = *v;
+                    return result;
+                }
+                break;
+            }
+            varCount++;
+        }
+        varMask >>= 1;
+        varListIndex++;
    }
+    result.truncate(0); // invalid target or index
    return result;
 }

@ -911,9 +950,9 @@ void TransliteratorRegistry::registerEntry(const UnicodeString& ID,
            UnicodeString *newID = (UnicodeString *)ID.clone();
            // Check to make sure newID was created.
            if (newID != NULL) {
-	            // NUL-terminate the ID string
-	            newID->getTerminatedBuffer();
-	            availableIDs.addElement(newID, status);
+                // NUL-terminate the ID string
+                newID->getTerminatedBuffer();
+                availableIDs.addElement(newID, status);
            }
        }
    } else {
@ -924,9 +963,7 @@ void TransliteratorRegistry::registerEntry(const UnicodeString& ID,

 /**
 * Register a source-target/variant in the specDAG.  Variant may be
- * empty, but source and target must not be.  If variant is empty then
- * the special variant NO_VARIANT is stored in slot zero of the
- * UVector of variants.
+ * empty, but source and target must not be.
 */
 void TransliteratorRegistry::registerSTV(const UnicodeString& source,
                                         const UnicodeString& target,
@ -936,39 +973,38 @@ void TransliteratorRegistry::registerSTV(const UnicodeString& source,
    UErrorCode status = U_ZERO_ERROR;
    Hashtable *targets = (Hashtable*) specDAG.get(source);
    if (targets == 0) {
-        targets = new Hashtable(TRUE, status);
-        if (U_FAILURE(status) || targets == 0) {
+        int32_t size = 3;
+        if (source.compare(ANY,3) == 0) {
+            size = ANY_TARGETS_INIT_SIZE;
+        } else if (source.compare(LAT,3) == 0) {
+            size = LAT_TARGETS_INIT_SIZE;
+        }
+        targets = new Hashtable(TRUE, size, status);
+        if (U_FAILURE(status) || targets == NULL) {
            return;
        }
-        targets->setValueDeleter(uprv_deleteUObject);
        specDAG.put(source, targets, status);
    }
-    UVector *variants = (UVector*) targets->get(target);
-    if (variants == 0) {
-        variants = new UVector(uprv_deleteUObject,
-                               uhash_compareCaselessUnicodeString, status);
-        if (variants == 0) {
+    int32_t variantListIndex = variantList.indexOf((void*) &variant, 0);
+    if (variantListIndex < 0) {
+        if (variantList.size() >= VARIANT_LIST_MAX_SIZE) {
+            // can't handle any more variants
            return;
        }
-        targets->put(target, variants, status);
-    }
-    // assert(NO_VARIANT == "");
-    // We add the variant string.  If it is the special "no variant"
-    // string, that is, the empty string, we add it at position zero.
-    if (!variants->contains((void*) &variant)) {
-    	UnicodeString *tempus; // Used for null pointer check.
-        if (variant.length() > 0) {
-        	tempus = new UnicodeString(variant);
-        	if (tempus != NULL) {
-        		variants->addElement(tempus, status);
-        	}
-        } else {
-        	tempus = new UnicodeString();  // = NO_VARIANT
-        	if (tempus != NULL) {
-        		variants->insertElementAt(tempus, 0, status);
-        	}
+        UnicodeString *variantEntry = new UnicodeString(variant);
+        if (variantEntry != NULL) {
+            variantList.addElement(variantEntry, status);
+            if (U_SUCCESS(status)) {
+                variantListIndex = variantList.size() - 1;
+            }
+        }
+        if (variantListIndex < 0) {
+            return;
        }
    }
+    int32_t addMask = 1 << variantListIndex;
+    int32_t varMask = targets->geti(target);
+    targets->puti(target, varMask | addMask, status);
 }

 /**
@ -979,17 +1015,24 @@ void TransliteratorRegistry::removeSTV(const UnicodeString& source,
                                       const UnicodeString& variant) {
    // assert(source.length() > 0);
    // assert(target.length() > 0);
-//    UErrorCode status = U_ZERO_ERROR;
+    UErrorCode status = U_ZERO_ERROR;
    Hashtable *targets = (Hashtable*) specDAG.get(source);
-    if (targets == 0) {
+    if (targets == NULL) {
        return; // should never happen for valid s-t/v
    }
-    UVector *variants = (UVector*) targets->get(target);
-    if (variants == 0) {
+    int32_t varMask = targets->geti(target);
+    if (varMask == 0) {
        return; // should never happen for valid s-t/v
    }
-    variants->removeElement((void*) &variant);
-    if (variants->size() == 0) {
+    int32_t variantListIndex = variantList.indexOf((void*) &variant, 0);
+    if (variantListIndex < 0) {
+        return; // should never happen for valid s-t/v
+    }
+    int32_t remMask = 1 << variantListIndex;
+    varMask &= (~remMask);
+    if (varMask != 0) {
+        targets->puti(target, varMask, status);
+    } else {
        targets->remove(target); // should delete variants
        if (targets->count() == 0) {
            specDAG.remove(source); // should delete targets
@ -1281,8 +1324,8 @@ Transliterator* TransliteratorRegistry::instantiateEntry(const UnicodeString& ID
            UVector* rbts = new UVector(entry->u.dataVector->size(), status);
            // Check for null pointer
            if (rbts == NULL) {
-            	status = U_MEMORY_ALLOCATION_ERROR;
-            	return NULL;
+                status = U_MEMORY_ALLOCATION_ERROR;
+                return NULL;
            }
            int32_t passNumber = 1;
            for (int32_t i = 0; U_SUCCESS(status) && i < entry->u.dataVector->size(); i++) {
--- a/icu4c/source/i18n/transreg.h
+++ b/icu4c/source/i18n/transreg.h
@ -440,13 +440,15 @@ class TransliteratorRegistry : public UMemory {

    /**
     * DAG of visible IDs by spec.  Hashtable: source => (Hashtable:
-     * target => (UVector: variant)) The UVector of variants is never
-     * empty.  For a source-target with no variant, the special
-     * variant NO_VARIANT (the empty string) is stored in slot zero of
-     * the UVector.
+     * target => variant bitmask)
     */
    Hashtable specDAG;

+    /**
+     * Vector of all variant names
+     */
+    UVector variantList;
+
    /**
     * Vector of public full IDs.
     */
--- a/icu4c/source/i18n/tznames_impl.cpp
+++ b/icu4c/source/i18n/tznames_impl.cpp
@ -2056,6 +2056,9 @@ static void U_CALLCONV prepareFind(UErrorCode &status) {
    if (U_SUCCESS(status)) {
        while ((mzID = mzIDs->snext(status)) && U_SUCCESS(status)) {
            const TZDBNames *names = TZDBTimeZoneNames::getMetaZoneNames(*mzID, status);
+            if (U_FAILURE(status)) {
+                break;
+            }
            if (names == NULL) {
                continue;
            }
@ -2187,9 +2190,11 @@ TZDBTimeZoneNames::getMetaZoneDisplayName(const UnicodeString& mzID,
    UErrorCode status = U_ZERO_ERROR;
    const TZDBNames *tzdbNames = TZDBTimeZoneNames::getMetaZoneNames(mzID, status);
    if (U_SUCCESS(status)) {
-        const UChar *s = tzdbNames->getName(type);
-        if (s != NULL) {
-            name.setTo(TRUE, s, -1);
+        if (tzdbNames != NULL) {
+            const UChar *s = tzdbNames->getName(type);
+            if (s != NULL) {
+                name.setTo(TRUE, s, -1);
+            }
        }
    }

--- a/Show more
+++ b/Show more