ICU-2150 simplify/improve UTF macros

X-SVN-Rev: 9930
2025-04-13 00:43:32 +00:00 · 2002-09-30 04:00:17 +00:00 · 2002-09-30 04:00:17 +00:00 · 6b1fa6036a
commit 6b1fa6036a
parent 22e1a4fe61
16 changed files with 2991 additions and 1199 deletions
--- a/icu4c/source/common/caniter.cpp
+++ b/icu4c/source/common/caniter.cpp
@ -582,7 +582,7 @@ Hashtable *CanonicalIterator::extract(UChar32 comp, const UChar *segment, int32_
    int32_t inputLen = 0;
    UChar decomp[decompSize];

-    UTF_APPEND_CHAR(temp, inputLen, bufSize, comp);
+    U16_APPEND_UNSAFE(temp, inputLen, comp);
    int32_t decompLen = unorm_getDecomposition(comp, FALSE, decomp, decompSize);
    if(decompLen < 0) {
        decompLen = -decompLen;
@ -597,7 +597,9 @@ Hashtable *CanonicalIterator::extract(UChar32 comp, const UChar *segment, int32_
    UChar32 decompCp;
    UTF_NEXT_CHAR(decomp, decompPos, decompLen, decompCp);

-    int32_t i = 0;
+    int32_t i;
+    UBool overflow = FALSE;
+
    i = segmentPos;
    while(i < segLen) {
      UTF_NEXT_CHAR(segment, i, segLen, cp);
@ -620,7 +622,19 @@ Hashtable *CanonicalIterator::extract(UChar32 comp, const UChar *segment, int32_

            // brute force approach

-          UTF_APPEND_CHAR(buff, bufLen, bufSize, cp);
+            U16_APPEND(buff, bufLen, bufSize, cp, overflow);
+
+            if(overflow) {
+                /*
+                 * ### TODO handle buffer overflow
+                 * The buffer is large, but an overflow may still happen with
+                 * unusual input (many combining marks?).
+                 * Reallocate buffer and continue.
+                 * markus 20020929
+                 */
+
+                overflow = FALSE;
+            }

            /* TODO: optimize
            // since we know that the classes are monotonically increasing, after zero
--- a/icu4c/source/common/common.dsp
+++ b/icu4c/source/common/common.dsp
@ -3100,6 +3100,10 @@ InputPath=.\unicode\utf8.h
 # End Source File
 # Begin Source File

+SOURCE=.\unicode\utf_old.h
+# End Source File
+# Begin Source File
+
 SOURCE=.\util.h
 # End Source File
 # Begin Source File
--- a/icu4c/source/common/ucnv_cnv.c
+++ b/icu4c/source/common/ucnv_cnv.c
@ -141,7 +141,7 @@ ucnv_getUChar32KeepOverflow(UConverter *cnv, const UChar *buffer, int32_t length

    /* get the first code point in the buffer */
    i=0;
-    UTF_NEXT_CHAR_SAFE(buffer, i, length, c, FALSE);
+    UTF_NEXT_CHAR(buffer, i, length, c);
    if(i<length) {
        /* there are UChars left in the buffer that need to go into the overflow buffer */
        UChar *overflow=cnv->UCharErrorBuffer;
--- a/icu4c/source/common/unicode/ustring.h
+++ b/icu4c/source/common/unicode/ustring.h
@ -156,7 +156,7 @@ u_strstr(const UChar *s, const UChar *substring);
 * but u_strchr32() will find neither because they
 * combine to the code point U+10000.
 * Either function will find U+d800 in "a\ud800b".
- * This behavior ensures that UTF_GET_CHAR(u_strchr32(c))==c.
+ * This behavior ensures that U16_GET(u_strchr32(c))==c.
 *
 * @param s The string to search.
 * @param c The code point (0..0x10ffff) to find.
@ -628,7 +628,7 @@ u_memchr(const UChar *src, UChar ch, int32_t count);
 * but u_memchr32() will find neither because they
 * combine to the code point U+10000.
 * Either function will find U+d800 in "a\ud800b".
- * This behavior ensures that UTF_GET_CHAR(u_memchr32(c))==c.
+ * This behavior ensures that U16_GET(u_memchr32(c))==c.
 *
 * @param src string to search in
 * @param ch character to find
--- a/icu4c/source/common/unicode/utf.h
+++ b/icu4c/source/common/unicode/utf.h
@ -1,7 +1,7 @@
 /*
 *******************************************************************************
 *
-*   Copyright (C) 1999-2001, International Business Machines
+*   Copyright (C) 1999-2002, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 *
 *******************************************************************************
@ -15,77 +15,89 @@
 */

 /**
-* \file
-* \brief C API: UChar and UChar32 data types and UTF macros for C Unicode string handling
-*
-*   <p>This file defines the UChar and UChar32 data types for Unicode code units
-*   and code points, as well as macros for efficiently getting code points
-*   in and out of a string.</p>
-*
-*   <p>utf.h is included by utypes.h and itself includes the utfXX.h after some
-*   common definitions. Those files define the macros for each UTF-size.</p>
-*
-*   <p>The original concept for these files was for ICU to allow
-*   in principle to set which UTF (UTF-8/16/32) is used internally
-*   by defining UTF_SIZE to either 8, 16, or 32. utf.h would then define the UChar type
-*   accordingly. UTF-16 was the default.</p>
-*
-*   <p>This concept has been abandoned.
-*   A lot of the ICU source code &mdash; especially low-level code like
-*   conversion, normalization, and collation &mdash; assumes UTF-16,
-*   utf.h enforces the default of UTF-16.
-*   The UTF-8 and UTF-32 macros remain for now for completeness and backward compatibility.</p>
-*
-*   <p>Accordingly, utf.h defines UChar to be an unsigned 16-bit integer. If this matches wchar_t, then
-*   UChar is defined to be exactly wchar_t, otherwise uint16_t.</p>
-*
-*   <p>UChar32 is defined to be a signed 32-bit integer (int32_t), large enough for a 21-bit
-*   Unicode code point (Unicode scalar value, 0..0x10ffff).
-*   Before ICU 2.4, the definition of UChar32 was similarly platform-dependent as
-*   the definition of UChar. For details see the documentation for UChar32 itself.</p>
-*
-*   <p>utf.h also defines a number of C macros for handling single Unicode code points and
-*   for using UTF Unicode strings. It includes utf8.h, utf16.h, and utf32.h for the actual
-*   implementations of those macros and then aliases one set of them (for UTF-16) for general use.
-*   The UTF-specific macros have the UTF size in the macro name prefixes (UTF16_...), while
-*   the general alias macros always begin with UTF_...</p>
-*
-*   <p>Many string operations can be done with or without error checking.
-*   Where such a distinction is useful, there are two versions of the macros, "unsafe" and "safe"
-*   ones with ..._UNSAFE and ..._SAFE suffixes. The unsafe macros are fast but may cause
-*   program failures if the strings are not well-formed. The safe macros have an additional, boolean
-*   parameter "strict". If strict is FALSE, then only illegal sequences are detected.
-*   Otherwise, irregular sequences and non-characters are detected as well (like single surrogates).
-*   Safe macros return special error code points for illegal/irregular sequences:
-*   Typically, U+ffff, or values that would result in a code unit sequence of the same length
-*   as the erroneous input sequence.<br>
-*   Note that _UNSAFE macros have fewer parameters: They do not have the strictness parameter, and
-*   they do not have start/length parameters for boundary checking.</p>
-*
-*   <p>Here, the macros are aliased in two steps:
-*   In the first step, the UTF-specific macros with UTF16_ prefix and _UNSAFE and _SAFE suffixes are
-*   aliased according to the UTF_SIZE to macros with UTF_ prefix and the same suffixes and signatures.
-*   Then, in a second step, the default, general alias macros are set to use either the unsafe or
-*   the safe/not strict (default) or the safe/strict macro;
-*   these general macros do not have a strictness parameter.</p>
-*
-*   <p>It is possible to change the default choice for the general alias macros to be unsafe, safe/not strict or safe/strict.
-*   The default is safe/not strict. It is not recommended to select the unsafe macros as the basis for
-*   Unicode string handling in ICU! To select this, define UTF_SAFE, UTF_STRICT, or UTF_UNSAFE.</p>
-*
-*   <p>For general use, one should use the default, general macros with UTF_ prefix and no _SAFE/_UNSAFE suffix.
-*   Only in some cases it may be necessary to control the choice of macro directly and use a less generic alias.
-*   For example, if it can be assumed that a string is well-formed and the index will stay within the bounds,
-*   then the _UNSAFE version may be used.
-*   If a UTF-8 string is to be processed, then the macros with UTF8_ prefixes need to be used.</p>
-*   <p><b>Usage:</b>  ICU coding guidelines for if() statements should be followed when using these macros.
-*                  Compound statements (curly braces {}) must be used  for if-else-while... 
-*                  bodies and all macro statements should be terminated with semicolon.</p>
-*/
+ * \file
+ * \brief C API: UChar and UChar32 data types and code point macros
+ *
+ * This file defines the UChar and UChar32 data types for Unicode code units
+ * and code points, as well as macros for checking whether a code point is
+ * a surrogate or a non-character.
+ *
+ * utf.h is included by utypes.h and itself includes utf8.h and utf16.h after some
+ * common definitions. Those files define macros for efficiently getting code points
+ * in and out of UTF-8/16 strings.
+ * utf16.h macros have "U16_" prefixes.
+ * utf8.h defines similar macros with "U8_" prefixes for UTF-8 string handling.
+ *
+ * ICU processes 16-bit Unicode strings.
+ * Most of the time, such strings are well-formed UTF-16.
+ * Single, unpaired surrogates must be handled as well, and are treated in ICU
+ * like regular code points where possible.
+ * (Pairs of surrogate code points are indistinguishable from supplementary
+ * code points encoded as pairs of supplementary code units.)
+ *
+ * In fact, almost all Unicode code points in normal text (>99%)
+ * are on the BMP (<=U+ffff) and even <=U+d7ff.
+ * ICU functions handle supplementary code points (U+10000..U+10ffff)
+ * but are optimized for the much more frequently occurring BMP code points.
+ *
+ * utf.h defines UChar to be an unsigned 16-bit integer. If this matches wchar_t, then
+ * UChar is defined to be exactly wchar_t, otherwise uint16_t.
+ *
+ * UChar32 is defined to be a signed 32-bit integer (int32_t), large enough for a 21-bit
+ * Unicode code point (Unicode scalar value, 0..0x10ffff).
+ * Before ICU 2.4, the definition of UChar32 was similarly platform-dependent as
+ * the definition of UChar. For details see the documentation for UChar32 itself.
+ *
+ * utf.h also defines a small number of C macros for single Unicode code points.
+ * These are simple checks for surrogates and non-characters.
+ * For actual Unicode character properties see uchar.h.
+ *
+ * By default, string operations must be done with error checking in case
+ * a string is not well-formed UTF-16.
+ * The macros will detect if a surrogate code unit is unpaired
+ * (lead unit without trail unit or vice versa) and just return the unit itself
+ * as the code point.
+ * (It is an accidental property of Unicode and UTF-16 that all
+ * malformed sequences can be expressed unambiguously with a distinct subrange
+ * of Unicode code points.)
+ *
+ * When it is safe to assume that text is well-formed UTF-16
+ * (does not contain single, unpaired surrogates), then one can use
+ * U16_..._UNSAFE macros.
+ * These do not check for proper code unit sequences or truncated text and may
+ * yield wrong results or even cause a crash if they are used with "malformed"
+ * text.
+ * In practice, U16_..._UNSAFE macros will produce slightly less code but
+ * should not be faster because the processing is only different when a
+ * surrogate code unit is detected, which will be rare.
+ *
+ * Similarly for UTF-8, there are "safe" macros without a suffix,
+ * and U8_..._UNSAFE versions.
+ * The performance differences are much larger here because UTF-8 provides so
+ * many opportunities for malformed sequences.
+ * The unsafe UTF-8 macros are entirely implemented inside the macro definitions
+ * and are fast, while the safe UTF-8 macros call functions for all but the
+ * trivial (ASCII) cases.
+ *
+ * Unlike with UTF-16, malformed sequences cannot be expressed with distinct
+ * code point values (0..U+10ffff). They are indicated with negative values instead.
+ *
+ * For more information see the ICU User Guide Strings chapter
+ * (http://oss.software.ibm.com/icu/userguide/).
+ *
+ * <em>Usage:</em>
+ * ICU coding guidelines for if() statements should be followed when using these macros.
+ * Compound statements (curly braces {}) must be used  for if-else-while... 
+ * bodies and all macro statements should be terminated with semicolon.
+ *
+ * @draft ICU 2.4
+ */

 #ifndef __UTF_H__
 #define __UTF_H__

+/* wchar_t-related definitions ---------------------------------------------- */
+
 /*
 * ANSI C headers:
 * stddef.h defines wchar_t
@ -94,18 +106,11 @@
 #include <stddef.h>
 /* include the utfXX.h after the following definitions */

-/* If there is no compiler option for the preferred UTF size, then default to UTF-16. */
-#ifndef UTF_SIZE
-    /** Number of bits in a Unicode string code unit, same as x in UTF-x (8, 16, or 32). */
-#   define UTF_SIZE 16
-#endif
-
-/** Number of bytes in a UChar (sizeof(UChar)). */
-#define U_SIZEOF_UCHAR (UTF_SIZE>>3)
-
 /*!
 * \def U_SIZEOF_WCHAR_T
 * U_SIZEOF_WCHAR_T==sizeof(wchar_t).
+ *
+ * @stable
 */
 #ifndef U_HAVE_WCHAR_H
 #   define U_HAVE_WCHAR_H 1
@ -120,10 +125,14 @@
 /*!
 * \def U_WCHAR_IS_UTF16
 * Defined if wchar_t uses UTF-16.
+ *
+ * @stable
 */
 /*!
 * \def U_WCHAR_IS_UTF32
 * Defined if wchar_t uses UTF-32.
+ *
+ * @stable
 */
 #if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32)
 #   ifdef __STDC_ISO_10646__ 
@ -145,139 +154,10 @@
 #   endif
 #endif

-/**
- * Define UChar32 as a type for single Unicode code points.
- * UChar32 is a signed 32-bit integer.
- *
- * The Unicode code point range is 0..0x10ffff.
- * All other values (negative or >=0x110000) are illegal as Unicode code points.
- * They may be used as sentinel values to indicate "done", "error"
- * or similar non-code point conditions.
- *
- * Before ICU 2.4 (Jitterbug 2146), UChar32 was defined
- * to be wchar_t if that is 32 bits wide (wchar_t may be signed or unsigned)
- * or else to be uint32_t.
- * That is, the definition of UChar32 was platform-dependent.
- *
- * @see UTF_SENTINEL
- * @draft ICU 2.4
- */
-typedef int32_t UChar32;
+/* UChar and UChar32 definitions -------------------------------------------- */

-/**
- * Unicode string and array offset and index type.
- * ICU always counts Unicode code units (UChars) for
- * string offsets, indexes, and lengths, not Unicode code points.
- *
- * @deprecated Use int32_t directly. UTextOffset to be removed after 2003-mar.
- */
-typedef int32_t UTextOffset;
-
-/* Specify which macro versions are the default ones - safe or fast. */
-#if !defined(UTF_SAFE) && !defined(UTF_STRICT) && !defined(UTF_UNSAFE)
-    /**
-     * The default choice for general Unicode string macros is to use the ..._SAFE macro implementations
-     * with strict=FALSE. See the utf.h file description.
-     */
-#   define UTF_SAFE
-#endif
-
-/* internal definitions ----------------------------------------------------- */
-
-/**
- * <p>UTF8_ERROR_VALUE_1 and UTF8_ERROR_VALUE_2 are special error values for UTF-8,
- * which need 1 or 2 bytes in UTF-8:<br>
- * U+0015 = NAK = Negative Acknowledge, C0 control character<br>
- * U+009f = highest C1 control character</p>
- *
- * <p>These are used by ("safe") UTF-8 macros so that they can return an error value
- * that needs the same number of code units (bytes) as were seen by
- * a macro. They should be tested with UTF_IS_ERROR() or UTF_IS_VALID().</p>
- *
- * @internal
- */
-#define UTF8_ERROR_VALUE_1 0x15
-/**
- * See documentation on UTF8_ERROR_VALUE_1 for details.
- */
-#define UTF8_ERROR_VALUE_2 0x9f
-
-/**
- * Error value for all UTFs. This code point value will be set by macros with error
- * checking if an error is detected.
- */
-#define UTF_ERROR_VALUE 0xffff
-
-/* single-code point definitions -------------------------------------------- */
-
-/**
- * This value is intended for sentinel values for APIs that
- * (take or) return single code points (UChar32).
- * It is outside of the Unicode code point range 0..0x10ffff.
- * 
- * For example, a "done" or "error" value in a new API
- * could be indicated with UTF_SENTINEL.
- *
- * ICU APIs designed before ICU 2.4 usually define service-specific "done"
- * values, mostly 0xffff.
- * Those may need to be distinguished from
- * actual U+ffff text contents by calling functions like
- * CharacterIterator::hasNext() or UnicodeString::length().
- *
- * @see UChar32
- * @draft ICU 2.4
- */
-#define UTF_SENTINEL (-1)
-
-/** Is this code unit or code point a surrogate (U+d800..U+dfff)? */
-#define UTF_IS_SURROGATE(uchar) (((uchar)&0xfffff800)==0xd800)
-
-/**
- * Is a given 32-bit code point a Unicode noncharacter?
- */
-#define UTF_IS_UNICODE_NONCHAR(c) \
-    ((c)>=0xfdd0 && \
-     ((uint32_t)(c)<=0xfdef || ((c)&0xfffe)==0xfffe) && \
-     (uint32_t)(c)<=0x10ffff)
-
-/**
- * Is a given 32-bit code point/Unicode scalar value
- * actually a valid Unicode (abstract) character?
- *
- * Code points that are not characters include:
- * - single surrogate code points (U+d800..U+dfff, 2048 code points)
- * - the last two code points on each plane (U+__fffe and U+__ffff, 34 code points)
- * - U+fdd0..U+fdef (new with Unicode 3.1, 32 code points)
- * - the highest Unicode code point value is U+10ffff
- *
- * This means that all code points below U+d800 are character code points,
- * and that boundary is tested first for performance.
- */
-#define UTF_IS_UNICODE_CHAR(c) \
-    ((uint32_t)(c)<0xd800 || \
-        ((uint32_t)(c)>0xdfff && \
-         (uint32_t)(c)<=0x10ffff && \
-         !UTF_IS_UNICODE_NONCHAR(c)))
-
-/**
- * Is a given 32-bit code an error value
- * as returned by one of the macros for any UTF?
- */
-#define UTF_IS_ERROR(c) \
-    (((c)&0xfffe)==0xfffe || (c)==UTF8_ERROR_VALUE_1 || (c)==UTF8_ERROR_VALUE_2)
-
-/** This is a combined macro: Is c a valid Unicode value _and_ not an error code? */
-#define UTF_IS_VALID(c) \
-    (UTF_IS_UNICODE_CHAR(c) && \
-     (c)!=UTF8_ERROR_VALUE_1 && (c)!=UTF8_ERROR_VALUE_2)
-
-/* include the utfXX.h ------------------------------------------------------ */
-
-#include "unicode/utf8.h"
-#include "unicode/utf16.h"
-#include "unicode/utf32.h"
-
-/* Define types and macros according to the selected UTF size. -------------- */
+/** Number of bytes in a UChar. @stable */
+#define U_SIZEOF_UCHAR 2

 /*!
 * \var UChar
@ -290,262 +170,127 @@ typedef int32_t UTextOffset;
 * @stable
 */

-#if UTF_SIZE==8
-
-#   error UTF-8 is not implemented, undefine UTF_SIZE or define it to 16
-
-/*
- * ANSI C header:
- * limits.h defines CHAR_MAX
- */
-#   include <limits.h>
-
-    /* Define UChar to be compatible with char if possible. */
-#   if CHAR_MAX>=255
-        typedef char UChar;
-#   else
-        typedef uint8_t UChar;
-#   endif
-
-#elif UTF_SIZE==16
-
-    /* Define UChar to be compatible with wchar_t if possible. */
-#   if U_SIZEOF_WCHAR_T==2
-        typedef wchar_t UChar;
-#   else
-        typedef uint16_t UChar;
-#   endif
-
-    /** Does this code unit alone encode a code point? */
-#   define UTF_IS_SINGLE(uchar)                         UTF16_IS_SINGLE(uchar)
-    /** Is this code unit the first one of several? */
-#   define UTF_IS_LEAD(uchar)                           UTF16_IS_LEAD(uchar)
-    /** Is this code unit one of several but not the first one? */
-#   define UTF_IS_TRAIL(uchar)                          UTF16_IS_TRAIL(uchar)
-
-    /** Does this code point require multiple code units? */
-#   define UTF_NEED_MULTIPLE_UCHAR(c)                   UTF16_NEED_MULTIPLE_UCHAR(c)
-    /** How many code units are used to encode this code point? */
-#   define UTF_CHAR_LENGTH(c)                           UTF16_CHAR_LENGTH(c)
-    /** How many code units are used at most for any Unicode code point? */
-#   define UTF_MAX_CHAR_LENGTH                          UTF16_MAX_CHAR_LENGTH
-    /** Estimate the number of code units for a string based on the number of UTF-16 code units. */
-#   define UTF_ARRAY_SIZE(size)                         UTF16_ARRAY_SIZE(size)
-
-    /** See file documentation and UTF_GET_CHAR. */
-#   define UTF_GET_CHAR_UNSAFE(s, i, c)                 UTF16_GET_CHAR_UNSAFE(s, i, c)
-    /** See file documentation and UTF_GET_CHAR. */
-#   define UTF_GET_CHAR_SAFE(s, start, i, length, c, strict) UTF16_GET_CHAR_SAFE(s, start, i, length, c, strict)
-
-    /** See file documentation and UTF_NEXT_CHAR. */
-#   define UTF_NEXT_CHAR_UNSAFE(s, i, c)                UTF16_NEXT_CHAR_UNSAFE(s, i, c)
-    /** See file documentation and UTF_NEXT_CHAR. */
-#   define UTF_NEXT_CHAR_SAFE(s, i, length, c, strict)  UTF16_NEXT_CHAR_SAFE(s, i, length, c, strict)
-
-    /** See file documentation and UTF_APPEND_CHAR. */
-#   define UTF_APPEND_CHAR_UNSAFE(s, i, c)              UTF16_APPEND_CHAR_UNSAFE(s, i, c)
-    /** See file documentation and UTF_APPEND_CHAR. */
-#   define UTF_APPEND_CHAR_SAFE(s, i, length, c)        UTF16_APPEND_CHAR_SAFE(s, i, length, c)
-
-    /** See file documentation and UTF_FWD_1. */
-#   define UTF_FWD_1_UNSAFE(s, i)                       UTF16_FWD_1_UNSAFE(s, i)
-    /** See file documentation and UTF_FWD_1. */
-#   define UTF_FWD_1_SAFE(s, i, length)                 UTF16_FWD_1_SAFE(s, i, length)
-
-    /** See file documentation and UTF_FWD_N. */
-#   define UTF_FWD_N_UNSAFE(s, i, n)                    UTF16_FWD_N_UNSAFE(s, i, n)
-    /** See file documentation and UTF_FWD_N. */
-#   define UTF_FWD_N_SAFE(s, i, length, n)              UTF16_FWD_N_SAFE(s, i, length, n)
-
-    /** See file documentation and UTF_SET_CHAR_START. */
-#   define UTF_SET_CHAR_START_UNSAFE(s, i)              UTF16_SET_CHAR_START_UNSAFE(s, i)
-    /** See file documentation and UTF_SET_CHAR_START. */
-#   define UTF_SET_CHAR_START_SAFE(s, start, i)         UTF16_SET_CHAR_START_SAFE(s, start, i)
-
-    /** See file documentation and UTF_PREV_CHAR. */
-#   define UTF_PREV_CHAR_UNSAFE(s, i, c)                UTF16_PREV_CHAR_UNSAFE(s, i, c)
-    /** See file documentation and UTF_PREV_CHAR. */
-#   define UTF_PREV_CHAR_SAFE(s, start, i, c, strict)   UTF16_PREV_CHAR_SAFE(s, start, i, c, strict)
-
-    /** See file documentation and UTF_BACK_1. */
-#   define UTF_BACK_1_UNSAFE(s, i)                      UTF16_BACK_1_UNSAFE(s, i)
-    /** See file documentation and UTF_BACK_1. */
-#   define UTF_BACK_1_SAFE(s, start, i)                 UTF16_BACK_1_SAFE(s, start, i)
-
-    /** See file documentation and UTF_BACK_N. */
-#   define UTF_BACK_N_UNSAFE(s, i, n)                   UTF16_BACK_N_UNSAFE(s, i, n)
-    /** See file documentation and UTF_BACK_N. */
-#   define UTF_BACK_N_SAFE(s, start, i, n)              UTF16_BACK_N_SAFE(s, start, i, n)
-
-    /** See file documentation and UTF_SET_CHAR_LIMIT. */
-#   define UTF_SET_CHAR_LIMIT_UNSAFE(s, i)              UTF16_SET_CHAR_LIMIT_UNSAFE(s, i)
-    /** See file documentation and UTF_SET_CHAR_LIMIT. */
-#   define UTF_SET_CHAR_LIMIT_SAFE(s, start, i, length) UTF16_SET_CHAR_LIMIT_SAFE(s, start, i, length)
-
-#elif UTF_SIZE==32
-
-#   error UTF-32 is not implemented, undefine UTF_SIZE or define it to 16
-
-    typedef UChar32 UChar;
-
+/* Define UChar to be compatible with wchar_t if possible. */
+#if U_SIZEOF_WCHAR_T==2
+    typedef wchar_t UChar;
 #else
-#   error UTF_SIZE must be undefined or one of { 8, 16, 32 } - only 16 is implemented
+    typedef uint16_t UChar;
 #endif

-/* Define the default macros for handling UTF characters. ------------------- */
+/**
+ * Define UChar32 as a type for single Unicode code points.
+ * UChar32 is a signed 32-bit integer (same as int32_t).
+ *
+ * The Unicode code point range is 0..0x10ffff.
+ * All other values (negative or >=0x110000) are illegal as Unicode code points.
+ * They may be used as sentinel values to indicate "done", "error"
+ * or similar non-code point conditions.
+ *
+ * Before ICU 2.4 (Jitterbug 2146), UChar32 was defined
+ * to be wchar_t if that is 32 bits wide (wchar_t may be signed or unsigned)
+ * or else to be uint32_t.
+ * That is, the definition of UChar32 was platform-dependent.
+ *
+ * @see U_SENTINEL
+ * @draft ICU 2.4
+ */
+typedef int32_t UChar32;
+
+/* single-code point definitions -------------------------------------------- */

 /**
- * \def UTF_GET_CHAR(s, start, i, length, c)
+ * This value is intended for sentinel values for APIs that
+ * (take or) return single code points (UChar32).
+ * It is outside of the Unicode code point range 0..0x10ffff.
+ * 
+ * For example, a "done" or "error" value in a new API
+ * could be indicated with U_SENTINEL.
 *
- * Set c to the code point that contains the code unit i.
- * i could point to the first, the last, or an intermediate code unit.
- * i is not modified.
- * \pre 0<=i<length
+ * ICU APIs designed before ICU 2.4 usually define service-specific "done"
+ * values, mostly 0xffff.
+ * Those may need to be distinguished from
+ * actual U+ffff text contents by calling functions like
+ * CharacterIterator::hasNext() or UnicodeString::length().
+ *
+ * @return -1
+ * @see UChar32
+ * @draft ICU 2.4
 */
+#define U_SENTINEL (-1)

 /**
- * \def UTF_NEXT_CHAR(s, i, length, c)
- *
- * Set c to the code point that starts at code unit i
- * and advance i to beyond the code units of this code point (post-increment).
- * i must point to the first code unit of a code point.
- * \pre 0<=i<length
- * \post 0<i<=length
+ * Is this code point a Unicode noncharacter?
+ * @param c 32-bit code point
+ * @return TRUE or FALSE
+ * @draft ICU 2.4
 */
+#define U_IS_UNICODE_NONCHAR(c) \
+    ((c)>=0xfdd0 && \
+     ((uint32_t)(c)<=0xfdef || ((c)&0xfffe)==0xfffe) && \
+     (uint32_t)(c)<=0x10ffff)

 /**
- * \def UTF_APPEND_CHAR(s, i, length, c)
+ * Is c a Unicode code point value (0..U+10ffff)
+ * that can be assigned a character?
 *
- * Append the code units of code point c to the string at index i
- * and advance i to beyond the new code units (post-increment).
- * The code units beginning at index i will be overwritten.
- * \pre 0<=c<=0x10ffff
- * \pre 0<=i<length
- * \post 0<i<=length
+ * Code points that are not characters include:
+ * - single surrogate code points (U+d800..U+dfff, 2048 code points)
+ * - the last two code points on each plane (U+__fffe and U+__ffff, 34 code points)
+ * - U+fdd0..U+fdef (new with Unicode 3.1, 32 code points)
+ * - the highest Unicode code point value is U+10ffff
+ *
+ * This means that all code points below U+d800 are character code points,
+ * and that boundary is tested first for performance.
+ *
+ * @param c 32-bit code point
+ * @return TRUE or FALSE
+ * @draft ICU 2.4
 */
+#define U_IS_UNICODE_CHAR(c) \
+    ((uint32_t)(c)<0xd800 || \
+        ((uint32_t)(c)>0xdfff && \
+         (uint32_t)(c)<=0x10ffff && \
+         !U_IS_UNICODE_NONCHAR(c)))

 /**
- * \def UTF_FWD_1(s, i, length)
- *
- * Advance i to beyond the code units of the code point that begins at i.
- * I.e., advance i by one code point.
- * i must point to the first code unit of a code point.
- * \pre 0<=i<length
- * \post 0<i<=length
+ * Is this code point a lead surrogate (U+d800..U+dbff)?
+ * @param c 32-bit code point
+ * @return TRUE or FALSE
+ * @draft ICU 2.4
 */
+#define U_IS_LEAD(c) (((c)&0xfffffc00)==0xd800)

 /**
- * \def UTF_FWD_N(s, i, length, n)
- *
- * Advance i to beyond the code units of the n code points where the first one begins at i.
- * I.e., advance i by n code points.
- * i must point to the first code unit of a code point.
- * \pre 0<=i<length
- * \post 0<i<=length
+ * Is this code point a trail surrogate (U+dc00..U+dfff)?
+ * @param c 32-bit code point
+ * @return TRUE or FALSE
+ * @draft ICU 2.4
 */
+#define U_IS_TRAIL(c) (((c)&0xfffffc00)==0xdc00)

 /**
- * \def UTF_SET_CHAR_START(s, start, i)
- *
- * Take the random-access index i and adjust it so that it points to the beginning
- * of a code point.
- * The input index points to any code unit of a code point and is moved to point to
- * the first code unit of the same code point. i is never incremented.
- * This can be used to start an iteration with UTF_NEXT_CHAR() from a random index.
- * \pre start<=i<length
- * \post start<=i<length
+ * Is this code point a surrogate (U+d800..U+dfff)?
+ * @param c 32-bit code point
+ * @return TRUE or FALSE
+ * @draft ICU 2.4
 */
+#define U_IS_SURROGATE(c) (((c)&0xfffff800)==0xd800)

 /**
- * \def UTF_PREV_CHAR(s, start, i, c)
- *
- * Set c to the code point that has code units before i
- * and move i backward (towards the beginning of the string)
- * to the first code unit of this code point (pre-increment).
- * i must point to the first code unit after the last unit of a code point (i==length is allowed).
- * \pre start<i<=length
- * \post start<=i<length
+ * Assuming c is a surrogate code point (U_IS_SURROGATE(c)),
+ * is it a lead surrogate?
+ * @param c 32-bit code point
+ * @return TRUE or FALSE
+ * @draft ICU 2.4
 */
+#define U_IS_SURROGATE_LEAD(c) (((c)&0x400)==0)

-/**
- * \def UTF_BACK_1(s, start, i)
- *
- * Move i backward (towards the beginning of the string)
- * to the first code unit of the code point that has code units before i.
- * I.e., move i backward by one code point.
- * i must point to the first code unit after the last unit of a code point (i==length is allowed).
- * \pre start<i<=length
- * \post start<=i<length
- */
+/* include the utfXX.h ------------------------------------------------------ */

-/**
- * \def UTF_BACK_N(s, start, i, n)
- *
- * Move i backward (towards the beginning of the string)
- * to the first code unit of the n code points that have code units before i.
- * I.e., move i backward by n code points.
- * i must point to the first code unit after the last unit of a code point (i==length is allowed).
- * \pre start<i<=length
- * \post start<=i<length
- */
+#include "unicode/utf8.h"
+#include "unicode/utf16.h"

-/**
- * \def UTF_SET_CHAR_LIMIT(s, start, i, length)
- *
- * Take the random-access index i and adjust it so that it points beyond
- * a code point. The input index points beyond any code unit
- * of a code point and is moved to point beyond the last code unit of the same
- * code point. i is never decremented.
- * This can be used to start an iteration with UTF_PREV_CHAR() from a random index.
- * \pre start<i<=length
- * \post start<i<=length
- */
-
-#ifdef UTF_SAFE
-
-#   define UTF_GET_CHAR(s, start, i, length, c) UTF_GET_CHAR_SAFE(s, start, i, length, c, FALSE)
-
-#   define UTF_NEXT_CHAR(s, i, length, c)       UTF_NEXT_CHAR_SAFE(s, i, length, c, FALSE)
-#   define UTF_APPEND_CHAR(s, i, length, c)     UTF_APPEND_CHAR_SAFE(s, i, length, c)
-#   define UTF_FWD_1(s, i, length)              UTF_FWD_1_SAFE(s, i, length)
-#   define UTF_FWD_N(s, i, length, n)           UTF_FWD_N_SAFE(s, i, length, n)
-#   define UTF_SET_CHAR_START(s, start, i)      UTF_SET_CHAR_START_SAFE(s, start, i)
-
-#   define UTF_PREV_CHAR(s, start, i, c)        UTF_PREV_CHAR_SAFE(s, start, i, c, FALSE)
-#   define UTF_BACK_1(s, start, i)              UTF_BACK_1_SAFE(s, start, i)
-#   define UTF_BACK_N(s, start, i, n)           UTF_BACK_N_SAFE(s, start, i, n)
-#   define UTF_SET_CHAR_LIMIT(s, start, i, length) UTF_SET_CHAR_LIMIT_SAFE(s, start, i, length)
-
-#elif defined(UTF_STRICT)
-
-#   define UTF_GET_CHAR(s, start, i, length, c) UTF_GET_CHAR_SAFE(s, start, i, length, c, TRUE)
-
-#   define UTF_NEXT_CHAR(s, i, length, c)       UTF_NEXT_CHAR_SAFE(s, i, length, c, TRUE)
-#   define UTF_APPEND_CHAR(s, i, length, c)     UTF_APPEND_CHAR_SAFE(s, i, length, c)
-#   define UTF_FWD_1(s, i, length)              UTF_FWD_1_SAFE(s, i, length)
-#   define UTF_FWD_N(s, i, length, n)           UTF_FWD_N_SAFE(s, i, length, n)
-#   define UTF_SET_CHAR_START(s, start, i)      UTF_SET_CHAR_START_SAFE(s, start, i)
-
-#   define UTF_PREV_CHAR(s, start, i, c)        UTF_PREV_CHAR_SAFE(s, start, i, c, TRUE)
-#   define UTF_BACK_1(s, start, i)              UTF_BACK_1_SAFE(s, start, i)
-#   define UTF_BACK_N(s, start, i, n)           UTF_BACK_N_SAFE(s, start, i, n)
-#   define UTF_SET_CHAR_LIMIT(s, start, i, length) UTF_SET_CHAR_LIMIT_SAFE(s, start, i, length)
-
-#else /* UTF_UNSAFE */
-
-#   define UTF_GET_CHAR(s, start, i, length, c) UTF_GET_CHAR_UNSAFE(s, i, c)
-
-#   define UTF_NEXT_CHAR(s, i, length, c)       UTF_NEXT_CHAR_UNSAFE(s, i, c)
-#   define UTF_APPEND_CHAR(s, i, length, c)     UTF_APPEND_CHAR_UNSAFE(s, i, c)
-#   define UTF_FWD_1(s, i, length)              UTF_FWD_1_UNSAFE(s, i)
-#   define UTF_FWD_N(s, i, length, n)           UTF_FWD_N_UNSAFE(s, i, n)
-#   define UTF_SET_CHAR_START(s, start, i)      UTF_SET_CHAR_START_UNSAFE(s, i)
-
-#   define UTF_PREV_CHAR(s, start, i, c)        UTF_PREV_CHAR_UNSAFE(s, i, c)
-#   define UTF_BACK_1(s, start, i)              UTF_BACK_1_UNSAFE(s, i)
-#   define UTF_BACK_N(s, start, i, n)           UTF_BACK_N_UNSAFE(s, i, n)
-#   define UTF_SET_CHAR_LIMIT(s, start, i, length) UTF_SET_CHAR_LIMIT_UNSAFE(s, i)
-
-#endif
+/* utf_old.h contains deprecated, pre-ICU 2.4 definitions */
+#include "unicode/utf_old.h"

 #endif
--- a/icu4c/source/common/unicode/utf16.h
+++ b/icu4c/source/common/unicode/utf16.h
@ -15,141 +15,265 @@
 */

 /**
-* \file
-* \brief C API: UTF-16 macros
-* 
-*   This file defines macros to deal with UTF-16 code units and code points.
-*   "Safe" macros check for length overruns and illegal sequences, and
-*   also for irregular sequences when the strict option is set.
-*   "Unsafe" macros are designed for maximum speed.
-*   utf16.h is included by utf.h after unicode/umachine.h
-*   and some common definitions.</p>
-*   <p><b>Usage:</b>  ICU coding guidelines for if() statements should be followed when using these macros.
-*                  Compound statements (curly braces {}) must be used  for if-else-while...
-*                  bodies and all macro statements should be terminated with semicolon.</p>
-*/
+ * \file
+ * \brief C API: 16-bit Unicode handling macros
+ * 
+ * This file defines macros to deal with 16-bit Unicode (UTF-16) code units and strings.
+ * utf16.h is included by utf.h after unicode/umachine.h
+ * and some common definitions.
+ *
+ * For more information see utf.h and the ICU User Guide Strings chapter
+ * (http://oss.software.ibm.com/icu/userguide/).
+ *
+ * <em>Usage:</em>
+ * ICU coding guidelines for if() statements should be followed when using these macros.
+ * Compound statements (curly braces {}) must be used  for if-else-while... 
+ * bodies and all macro statements should be terminated with semicolon.
+ */
+
+/* utf.h must be included first. */
+#ifndef __UTF_H__
+#   include "unicode/utf.h"
+#endif

 #ifndef __UTF16_H__
 #define __UTF16_H__

 /* single-code point definitions -------------------------------------------- */

-/* handle surrogate pairs */
-#define UTF_IS_FIRST_SURROGATE(uchar) (((uchar)&0xfffffc00)==0xd800)
-#define UTF_IS_SECOND_SURROGATE(uchar) (((uchar)&0xfffffc00)==0xdc00)
-
-#define UTF_IS_SURROGATE_FIRST(c) (((c)&0x400)==0)
-
-/** Get the UTF-32 value directly from the surrogate pseudo-characters */
-#define UTF_SURROGATE_OFFSET ((0xd800<<10UL)+0xdc00-0x10000)
-
-#define UTF16_GET_PAIR_VALUE(first, second) \
-    (((first)<<10UL)+(second)-UTF_SURROGATE_OFFSET)
-
-/* get the first and second surrogates for a supplementary code point */
 /**
- * Takes a supplementary code point (0x10000..0x10ffff)
- * and computes the first surrogate (0xd800..0xdbff)
- * for UTF-16 encoding.
+ * Does this code unit alone encode a code point (BMP, not a surrogate)?
+ * @param c 16-bit code unit
+ * @return TRUE or FALSE
+ * @draft ICU 2.4
 */
-#define UTF_FIRST_SURROGATE(supplementary) (UChar)(((supplementary)>>10)+0xd7c0)
+#define U16_IS_SINGLE(c) !U_IS_SURROGATE(c)

 /**
- * Takes a supplementary code point (0x10000..0x10ffff)
- * and computes the second surrogate (0xdc00..0xdfff)
- * for UTF-16 encoding.
+ * Is this code unit a lead surrogate (U+d800..U+dbff)?
+ * @param c 16-bit code unit
+ * @return TRUE or FALSE
+ * @draft ICU 2.4
 */
-#define UTF_SECOND_SURROGATE(supplementary) (UChar)(((supplementary)&0x3ff)|0xdc00)
-
-/** alias for UTF_FIRST_SURROGATE */
-#define UTF16_LEAD(supplementary) UTF_FIRST_SURROGATE(supplementary)
-
-/** alias for UTF_SECOND_SURROGATE */
-#define UTF16_TRAIL(supplementary) UTF_SECOND_SURROGATE(supplementary)
-
-/* classes of code unit values */
-#define UTF16_IS_SINGLE(uchar) !UTF_IS_SURROGATE(uchar)
-#define UTF16_IS_LEAD(uchar) UTF_IS_FIRST_SURROGATE(uchar)
-#define UTF16_IS_TRAIL(uchar) UTF_IS_SECOND_SURROGATE(uchar)
-
-/* number of code units per code point */
-#define UTF16_NEED_MULTIPLE_UCHAR(c) ((uint32_t)(c)>0xffff)
-#define UTF16_CHAR_LENGTH(c) ((uint32_t)(c)<=0xffff ? 1 : 2)
-#define UTF16_MAX_CHAR_LENGTH 2
-
-/* average number of code units compared to UTF-16 */
-#define UTF16_ARRAY_SIZE(size) (size)
+#define U16_IS_LEAD(c) (((c)&0xfffffc00)==0xd800)

 /**
- * Get a single code point from an offset that points to any
- * of the code units that belong to that code point.
- * Assume 0<=i<length.
+ * Is this code unit a trail surrogate (U+dc00..U+dfff)?
+ * @param c 16-bit code unit
+ * @return TRUE or FALSE
+ * @draft ICU 2.4
+ */
+#define U16_IS_TRAIL(c) (((c)&0xfffffc00)==0xdc00)
+
+/**
+ * Is this code unit a surrogate (U+d800..U+dfff)?
+ * @param c 16-bit code unit
+ * @return TRUE or FALSE
+ * @draft ICU 2.4
+ */
+#define U16_IS_SURROGATE(c) U_IS_SURROGATE(c)
+
+/**
+ * Assuming c is a surrogate code point (U16_IS_SURROGATE(c)),
+ * is it a lead surrogate?
+ * @param c 16-bit code unit
+ * @return TRUE or FALSE
+ * @draft ICU 2.4
+ */
+#define U16_IS_SURROGATE_LEAD(c) (((c)&0x400)==0)
+
+/**
+ * Helper constant for U16_GET_SUPPLEMENTARY.
+ * @internal
+ */
+#define U16_SURROGATE_OFFSET ((0xd800<<10UL)+0xdc00-0x10000)
+
+/**
+ * Get a supplementary code point value (U+10000..U+10ffff)
+ * from its lead and trail surrogates.
+ * The result is undefined if the input values are not
+ * lead and trail surrogates.
 *
- * This could be used for iteration together with
- * UTF16_CHAR_LENGTH() and UTF_IS_ERROR(),
- * but the use of UTF16_NEXT_CHAR_[UN]SAFE() and
- * UTF16_PREV_CHAR_[UN]SAFE() is more efficient for that.
+ * @param lead lead surrogate (U+d800..U+dbff)
+ * @param trail trail surrogate (U+dc00..U+dfff)
+ * @return supplementary code point (U+10000..U+10ffff)
+ * @draft ICU 2.4
 */
-#define UTF16_GET_CHAR_UNSAFE(s, i, c) { \
+#define U16_GET_SUPPLEMENTARY(lead, trail) \
+    (((lead)<<10UL)+(trail)-U16_SURROGATE_OFFSET)
+
+
+/**
+ * Get the lead surrogate (0xd800..0xdbff) for a
+ * supplementary code point (0x10000..0x10ffff).
+ * @param c 32-bit code point (U+10000..U+10ffff)
+ * @return lead surrogate (U+d800..U+dbff) for c
+ * @draft ICU 2.4
+ */
+#define U16_LEAD(supplementary) (UChar)(((supplementary)>>10)+0xd7c0)
+
+/**
+ * Get the trail surrogate (0xdc00..0xdfff) for a
+ * supplementary code point (0x10000..0x10ffff).
+ * @param c 32-bit code point (U+10000..U+10ffff)
+ * @return trail surrogate (U+dc00..U+dfff) for c
+ * @draft ICU 2.4
+ */
+#define U16_TRAIL(supplementary) (UChar)(((supplementary)&0x3ff)|0xdc00)
+
+/**
+ * How many 16-bit code units are used to encode this Unicode code point? (1 or 2)
+ * The result is not defined if c is not a Unicode code point (U+0000..U+10ffff).
+ * @param c 32-bit code point
+ * @return 1 or 2
+ * @draft ICU 2.4
+ */
+#define U16_LENGTH(c) ((uint32_t)(c)<=0xffff ? 1 : 2)
+
+/**
+ * The maximum number of 16-bit code units per Unicode code point (U+0000..U+10ffff).
+ * @return 2
+ * @draft ICU 2.4
+ */
+#define U16_MAX_LENGTH 2
+
+/**
+ * Get a code point from a string at a random-access offset,
+ * without changing the offset.
+ * "Unsafe" macro, assumes well-formed UTF-16.
+ *
+ * The offset may point to either the lead or trail surrogate unit
+ * for a supplementary code point, in which case the macro will read
+ * the adjacent matching surrogate as well.
+ * The result is undefined if the offset points to a single, unpaired surrogate.
+ * Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT.
+ *
+ * @param s const UChar * string
+ * @param i string offset
+ * @param c output UChar32 variable
+ * @see U16_GET
+ * @draft ICU 2.4
+ */
+#define U16_GET_UNSAFE(s, i, c) { \
    (c)=(s)[i]; \
-    if(UTF_IS_SURROGATE(c)) { \
-        if(UTF_IS_SURROGATE_FIRST(c)) { \
-            (c)=UTF16_GET_PAIR_VALUE((c), (s)[(i)+1]); \
+    if(U16_IS_SURROGATE(c)) { \
+        if(U16_IS_SURROGATE_LEAD(c)) { \
+            (c)=U16_GET_SUPPLEMENTARY((c), (s)[(i)+1]); \
        } else { \
-            (c)=UTF16_GET_PAIR_VALUE((s)[(i)-1], (c)); \
+            (c)=U16_GET_SUPPLEMENTARY((s)[(i)-1], (c)); \
        } \
    } \
 }

-#define UTF16_GET_CHAR_SAFE(s, start, i, length, c, strict) { \
+/**
+ * Get a code point from a string at a random-access offset,
+ * without changing the offset.
+ * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
+ *
+ * The offset may point to either the lead or trail surrogate unit
+ * for a supplementary code point, in which case the macro will read
+ * the adjacent matching surrogate as well.
+ * If the offset points to a single, unpaired surrogate, then that itself
+ * will be returned as the code point.
+ * Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT.
+ *
+ * @param s const UChar * string
+ * @param start starting string offset (usually 0)
+ * @param i string offset, start<=i<length
+ * @param length string length
+ * @param c output UChar32 variable
+ * @see U16_GET_UNSAFE
+ * @draft ICU 2.4
+ */
+#define U16_GET(s, start, i, length, c) { \
    (c)=(s)[i]; \
-    if(UTF_IS_SURROGATE(c)) { \
+    if(U16_IS_SURROGATE(c)) { \
        uint16_t __c2; \
-        if(UTF_IS_SURROGATE_FIRST(c)) { \
-            if((i)+1<(length) && UTF_IS_SECOND_SURROGATE(__c2=(s)[(i)+1])) { \
-                (c)=UTF16_GET_PAIR_VALUE((c), __c2); \
-                /* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() and UTF_IS_UNICODE_CHAR() */ \
-            } else if(strict) {\
-                /* unmatched first surrogate */ \
-                (c)=UTF_ERROR_VALUE; \
+        if(U16_IS_SURROGATE_LEAD(c)) { \
+            if((i)+1<(length) && U16_IS_TRAIL(__c2=(s)[(i)+1])) { \
+                (c)=U16_GET_SUPPLEMENTARY((c), __c2); \
            } \
        } else { \
-            if((i)-1>=(start) && UTF_IS_FIRST_SURROGATE(__c2=(s)[(i)-1])) { \
-                (c)=UTF16_GET_PAIR_VALUE(__c2, (c)); \
-                /* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() and UTF_IS_UNICODE_CHAR() */ \
-            } else if(strict) {\
-                /* unmatched second surrogate */ \
-                (c)=UTF_ERROR_VALUE; \
+            if((i)-1>=(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
+                (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
            } \
        } \
-    } else if((strict) && !UTF_IS_UNICODE_CHAR(c)) { \
-        (c)=UTF_ERROR_VALUE; \
    } \
 }

 /* definitions with forward iteration --------------------------------------- */

-/*
- * all the macros that go forward assume that
- * the initial offset is 0<=i<length;
- * they update the offset
- */
-
-/* fast versions, no error-checking */
-
 /**
- * Get a single code point from an offset that points to the first
- * of the code units that belong to that code point.
- * Assume 0<=i<length.
+ * Get a code point from a string at a code point boundary offset,
+ * and advance the offset to the next code point boundary.
+ * (Post-incrementing forward iteration.)
+ * "Unsafe" macro, assumes well-formed UTF-16.
+ *
+ * The offset may point to the lead surrogate unit
+ * for a supplementary code point, in which case the macro will read
+ * the following trail surrogate as well.
+ * If the offset points to a trail surrogate, then that itself
+ * will be returned as the code point.
+ * The result is undefined if the offset points to a single, unpaired lead surrogate.
+ *
+ * @param s const UChar * string
+ * @param i string offset
+ * @param c output UChar32 variable
+ * @see U16_NEXT
+ * @draft ICU 2.4
 */
-#define UTF16_NEXT_CHAR_UNSAFE(s, i, c) { \
+#define U16_NEXT_UNSAFE(s, i, c) { \
    (c)=(s)[(i)++]; \
-    if(UTF_IS_FIRST_SURROGATE(c)) { \
-        (c)=UTF16_GET_PAIR_VALUE((c), (s)[(i)++]); \
+    if(U16_IS_LEAD(c)) { \
+        (c)=U16_GET_SUPPLEMENTARY((c), (s)[(i)++]); \
    } \
 }

-#define UTF16_APPEND_CHAR_UNSAFE(s, i, c) { \
+/**
+ * Get a code point from a string at a code point boundary offset,
+ * and advance the offset to the next code point boundary.
+ * (Post-incrementing forward iteration.)
+ * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
+ *
+ * The offset may point to the lead surrogate unit
+ * for a supplementary code point, in which case the macro will read
+ * the following trail surrogate as well.
+ * If the offset points to a trail surrogate or
+ * to a single, unpaired lead surrogate, then that itself
+ * will be returned as the code point.
+ *
+ * @param s const UChar * string
+ * @param i string offset, i<length
+ * @param length string length
+ * @param c output UChar32 variable
+ * @see U16_NEXT_UNSAFE
+ * @draft ICU 2.4
+ */
+#define U16_NEXT(s, i, length, c) { \
+    (c)=(s)[(i)++]; \
+    if(U16_IS_LEAD(c)) { \
+        uint16_t __c2; \
+        if((i)<(length) && U16_IS_TRAIL(__c2=(s)[(i)])) { \
+            ++(i); \
+            (c)=U16_GET_SUPPLEMENTARY((c), __c2); \
+        } \
+    } \
+}
+
+/**
+ * Append a code point to a string, overwriting 1 or 2 code units.
+ * The offset points to the current end of the string contents
+ * and is advanced (post-increment).
+ * "Unsafe" macro, assumes a valid code point and sufficient space in the string.
+ * Otherwise, the result is undefined.
+ *
+ * @param s const UChar * string buffer
+ * @param i string offset
+ * @param c code point to append
+ * @see U16_APPEND
+ * @draft ICU 2.4
+ */
+#define U16_APPEND_UNSAFE(s, i, c) { \
    if((uint32_t)(c)<=0xffff) { \
        (s)[(i)++]=(uint16_t)(c); \
    } else { \
@ -158,178 +282,323 @@
    } \
 }

-#define UTF16_FWD_1_UNSAFE(s, i) { \
-    if(UTF_IS_FIRST_SURROGATE((s)[(i)++])) { \
+/**
+ * Append a code point to a string, overwriting 1 or 2 code units.
+ * The offset points to the current end of the string contents
+ * and is advanced (post-increment).
+ * "Safe" macro, checks for a valid code point.
+ * If a surrogate pair is written, checks for sufficient space in the string.
+ * If the code point is not valid or a trail surrogate does not fit,
+ * then isError is set to TRUE.
+ *
+ * @param s const UChar * string buffer
+ * @param i string offset, i<length
+ * @param capacity size of the string buffer
+ * @param c code point to append
+ * @param isError output UBool set to TRUE if an error occurs, otherwise not modified
+ * @see U16_APPEND_UNSAFE
+ * @draft ICU 2.4
+ */
+#define U16_APPEND(s, i, capacity, c, isError) { \
+    if((uint32_t)(c)<=0xffff) { \
+        (s)[(i)++]=(uint16_t)(c); \
+    } else if((uint32_t)(c)<=0x10ffff && (i)+1<(capacity)) { \
+        (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \
+        (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \
+    } else /* c>0x10ffff or not enough space */ { \
+        (isError)=TRUE; \
+    } \
+}
+
+/**
+ * Advance the string offset from one code point boundary to the next.
+ * (Post-incrementing iteration.)
+ * "Unsafe" macro, assumes well-formed UTF-16.
+ *
+ * @param s const UChar * string
+ * @param i string offset
+ * @see U16_FWD_1
+ * @draft ICU 2.4
+ */
+#define U16_FWD_1_UNSAFE(s, i) { \
+    if(U16_IS_LEAD((s)[(i)++])) { \
        ++(i); \
    } \
 }

-#define UTF16_FWD_N_UNSAFE(s, i, n) { \
+/**
+ * Advance the string offset from one code point boundary to the next.
+ * (Post-incrementing iteration.)
+ * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
+ *
+ * @param s const UChar * string
+ * @param i string offset, i<length
+ * @param length string length
+ * @see U16_FWD_1_UNSAFE
+ * @draft ICU 2.4
+ */
+#define U16_FWD_1(s, i, length) { \
+    if(U16_IS_LEAD((s)[(i)++]) && (i)<(length) && U16_IS_TRAIL((s)[i])) { \
+        ++(i); \
+    } \
+}
+
+/**
+ * Advance the string offset from one code point boundary to the n-th next one,
+ * i.e., move forward by n code points.
+ * (Post-incrementing iteration.)
+ * "Unsafe" macro, assumes well-formed UTF-16.
+ *
+ * @param s const UChar * string
+ * @param i string offset
+ * @param n number of code points to skip
+ * @see U16_FWD_N
+ * @draft ICU 2.4
+ */
+#define U16_FWD_N_UNSAFE(s, i, n) { \
    int32_t __N=(n); \
    while(__N>0) { \
-        UTF16_FWD_1_UNSAFE(s, i); \
+        U16_FWD_1_UNSAFE(s, i); \
        --__N; \
    } \
 }

 /**
- * Set a random-access offset and adjust it so that
- * it points to the beginning of a Unicode character.
- * The offset that is passed in points to
- * any code unit of a code point
- * and will point to the first code unit after
- * the macro invocation.
- * Never increments the offset.
+ * Advance the string offset from one code point boundary to the n-th next one,
+ * i.e., move forward by n code points.
+ * (Post-incrementing iteration.)
+ * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
+ *
+ * @param s const UChar * string
+ * @param i string offset, i<length
+ * @param length string length
+ * @param n number of code points to skip
+ * @see U16_FWD_N_UNSAFE
+ * @draft ICU 2.4
 */
-#define UTF16_SET_CHAR_START_UNSAFE(s, i) { \
-    if(UTF_IS_SECOND_SURROGATE((s)[i])) { \
-        --(i); \
-    } \
-}
-
-/* safe versions with error-checking and optional regularity-checking */
-
-#define UTF16_NEXT_CHAR_SAFE(s, i, length, c, strict) { \
-    (c)=(s)[(i)++]; \
-    if(UTF_IS_FIRST_SURROGATE(c)) { \
-        uint16_t __c2; \
-        if((i)<(length) && UTF_IS_SECOND_SURROGATE(__c2=(s)[(i)])) { \
-            ++(i); \
-            (c)=UTF16_GET_PAIR_VALUE((c), __c2); \
-            /* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() and UTF_IS_UNICODE_CHAR() */ \
-        } else if(strict) {\
-            /* unmatched first surrogate */ \
-            (c)=UTF_ERROR_VALUE; \
-        } \
-    } else if((strict) && !UTF_IS_UNICODE_CHAR(c)) { \
-        /* unmatched second surrogate or other non-character */ \
-        (c)=UTF_ERROR_VALUE; \
-    } \
-}
-
-#define UTF16_APPEND_CHAR_SAFE(s, i, length, c) { \
-    if((uint32_t)(c)<=0xffff) { \
-        (s)[(i)++]=(uint16_t)(c); \
-    } else if((uint32_t)(c)<=0x10ffff) { \
-        if((i)+1<(length)) { \
-            (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \
-            (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \
-        } else /* not enough space */ { \
-            (s)[(i)++]=UTF_ERROR_VALUE; \
-        } \
-    } else /* c>0x10ffff, write error value */ { \
-        (s)[(i)++]=UTF_ERROR_VALUE; \
-    } \
-}
-
-#define UTF16_FWD_1_SAFE(s, i, length) { \
-    if(UTF_IS_FIRST_SURROGATE((s)[(i)++]) && (i)<(length) && UTF_IS_SECOND_SURROGATE((s)[i])) { \
-        ++(i); \
-    } \
-}
-
-#define UTF16_FWD_N_SAFE(s, i, length, n) { \
+#define U16_FWD_N(s, i, length, n) { \
    int32_t __N=(n); \
    while(__N>0 && (i)<(length)) { \
-        UTF16_FWD_1_SAFE(s, i, length); \
+        U16_FWD_1(s, i, length); \
        --__N; \
    } \
 }

-#define UTF16_SET_CHAR_START_SAFE(s, start, i) { \
-    if(UTF_IS_SECOND_SURROGATE((s)[i]) && (i)>(start) && UTF_IS_FIRST_SURROGATE((s)[(i)-1])) { \
+/**
+ * Adjust a random-access offset to a code point boundary
+ * at the start of a code point.
+ * If the offset points to the trail surrogate of a surrogate pair,
+ * then the offset is decremented.
+ * Otherwise, it is not modified.
+ * "Unsafe" macro, assumes well-formed UTF-16.
+ *
+ * @param s const UChar * string
+ * @param i string offset
+ * @see U16_SET_CP_START
+ * @draft ICU 2.4
+ */
+#define U16_SET_CP_START_UNSAFE(s, i) { \
+    if(U16_IS_TRAIL((s)[i])) { \
+        --(i); \
+    } \
+}
+
+/**
+ * Adjust a random-access offset to a code point boundary
+ * at the start of a code point.
+ * If the offset points to the trail surrogate of a surrogate pair,
+ * then the offset is decremented.
+ * Otherwise, it is not modified.
+ * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
+ *
+ * @param s const UChar * string
+ * @param start starting string offset (usually 0)
+ * @param i string offset, start<=i
+ * @see U16_SET_CP_START_UNSAFE
+ * @draft ICU 2.4
+ */
+#define U16_SET_CP_START(s, start, i) { \
+    if(U16_IS_TRAIL((s)[i]) && (i)>(start) && U16_IS_LEAD((s)[(i)-1])) { \
        --(i); \
    } \
 }

 /* definitions with backward iteration -------------------------------------- */

-/*
- * all the macros that go backward assume that
- * the valid buffer range starts at offset 0
- * and that the initial offset is 0<i<=length;
- * they update the offset
- */
-
-/* fast versions, no error-checking */
-
 /**
- * Get a single code point from an offset that points behind the last
- * of the code units that belong to that code point.
- * Assume 0<=i<length.
+ * Move the string offset from one code point boundary to the previous one
+ * and get the code point between them.
+ * (Pre-decrementing backward iteration.)
+ * "Unsafe" macro, assumes well-formed UTF-16.
+ *
+ * The input offset may be the same as the string length.
+ * If the offset is behind a trail surrogate unit
+ * for a supplementary code point, then the macro will read
+ * the preceding lead surrogate as well.
+ * If the offset is behind a lead surrogate, then that itself
+ * will be returned as the code point.
+ * The result is undefined if the offset is behind a single, unpaired trail surrogate.
+ *
+ * @param s const UChar * string
+ * @param i string offset
+ * @param c output UChar32 variable
+ * @see U16_PREV
+ * @draft ICU 2.4
 */
-#define UTF16_PREV_CHAR_UNSAFE(s, i, c) { \
+#define U16_PREV_UNSAFE(s, i, c) { \
    (c)=(s)[--(i)]; \
-    if(UTF_IS_SECOND_SURROGATE(c)) { \
-        (c)=UTF16_GET_PAIR_VALUE((s)[--(i)], (c)); \
+    if(U16_IS_TRAIL(c)) { \
+        (c)=U16_GET_SUPPLEMENTARY((s)[--(i)], (c)); \
    } \
 }

-#define UTF16_BACK_1_UNSAFE(s, i) { \
-    if(UTF_IS_SECOND_SURROGATE((s)[--(i)])) { \
+/**
+ * Move the string offset from one code point boundary to the previous one
+ * and get the code point between them.
+ * (Pre-decrementing backward iteration.)
+ * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
+ *
+ * The input offset may be the same as the string length.
+ * If the offset is behind a trail surrogate unit
+ * for a supplementary code point, then the macro will read
+ * the preceding lead surrogate as well.
+ * If the offset is behind a lead surrogate or behind a single, unpaired
+ * trail surrogate, then that itself
+ * will be returned as the code point.
+ *
+ * @param s const UChar * string
+ * @param start starting string offset (usually 0)
+ * @param i string offset, start<=i
+ * @param length string length
+ * @param c output UChar32 variable
+ * @see U16_PREV_UNSAFE
+ * @draft ICU 2.4
+ */
+#define U16_PREV(s, start, i, c) { \
+    (c)=(s)[--(i)]; \
+    if(U16_IS_TRAIL(c)) { \
+        uint16_t __c2; \
+        if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
+            --(i); \
+            (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
+        } \
+    } \
+}
+
+/**
+ * Move the string offset from one code point boundary to the previous one.
+ * (Pre-decrementing backward iteration.)
+ * The input offset may be the same as the string length.
+ * "Unsafe" macro, assumes well-formed UTF-16.
+ *
+ * @param s const UChar * string
+ * @param i string offset
+ * @see U16_BACK_1
+ * @draft ICU 2.4
+ */
+#define U16_BACK_1_UNSAFE(s, i) { \
+    if(U16_IS_TRAIL((s)[--(i)])) { \
        --(i); \
    } \
 }

-#define UTF16_BACK_N_UNSAFE(s, i, n) { \
+/**
+ * Move the string offset from one code point boundary to the previous one.
+ * (Pre-decrementing backward iteration.)
+ * The input offset may be the same as the string length.
+ * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
+ *
+ * @param s const UChar * string
+ * @param start starting string offset (usually 0)
+ * @param i string offset, start<=i
+ * @see U16_BACK_1_UNSAFE
+ * @draft ICU 2.4
+ */
+#define U16_BACK_1(s, start, i) { \
+    if(U16_IS_TRAIL((s)[--(i)]) && (i)>(start) && U16_IS_LEAD((s)[(i)-1])) { \
+        --(i); \
+    } \
+}
+
+/**
+ * Move the string offset from one code point boundary to the n-th one before it,
+ * i.e., move backward by n code points.
+ * (Pre-decrementing backward iteration.)
+ * The input offset may be the same as the string length.
+ * "Unsafe" macro, assumes well-formed UTF-16.
+ *
+ * @param s const UChar * string
+ * @param i string offset
+ * @param n number of code points to skip
+ * @see U16_BACK_N
+ * @draft ICU 2.4
+ */
+#define U16_BACK_N_UNSAFE(s, i, n) { \
    int32_t __N=(n); \
    while(__N>0) { \
-        UTF16_BACK_1_UNSAFE(s, i); \
+        U16_BACK_1_UNSAFE(s, i); \
        --__N; \
    } \
 }

 /**
- * Set a random-access offset and adjust it so that
- * it points after the end of a Unicode character.
- * The offset that is passed in points behind
- * any code unit of a code point
- * and will point behind the last code unit after
- * the macro invocation.
- * Never decrements the offset.
+ * Move the string offset from one code point boundary to the n-th one before it,
+ * i.e., move backward by n code points.
+ * (Pre-decrementing backward iteration.)
+ * The input offset may be the same as the string length.
+ * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
+ *
+ * @param s const UChar * string
+ * @param i string offset, i<length
+ * @param length string length
+ * @param n number of code points to skip
+ * @see U16_BACK_N_UNSAFE
+ * @draft ICU 2.4
 */
-#define UTF16_SET_CHAR_LIMIT_UNSAFE(s, i) { \
-    if(UTF_IS_FIRST_SURROGATE((s)[(i)-1])) { \
+#define U16_BACK_N(s, start, i, n) { \
+    int32_t __N=(n); \
+    while(__N>0 && (i)>(start)) { \
+        U16_BACK_1(s, start, i); \
+        --__N; \
+    } \
+}
+
+/**
+ * Adjust a random-access offset to a code point boundary after a code point.
+ * If the offset is behind the lead surrogate of a surrogate pair,
+ * then the offset is incremented.
+ * Otherwise, it is not modified.
+ * The input offset may be the same as the string length.
+ * "Unsafe" macro, assumes well-formed UTF-16.
+ *
+ * @param s const UChar * string
+ * @param i string offset
+ * @see U16_SET_CP_LIMIT
+ * @draft ICU 2.4
+ */
+#define U16_SET_CP_LIMIT_UNSAFE(s, i) { \
+    if(U16_IS_LEAD((s)[(i)-1])) { \
        ++(i); \
    } \
 }

-/* safe versions with error-checking and optional regularity-checking */
-
-#define UTF16_PREV_CHAR_SAFE(s, start, i, c, strict) { \
-    (c)=(s)[--(i)]; \
-    if(UTF_IS_SECOND_SURROGATE(c)) { \
-        uint16_t __c2; \
-        if((i)>(start) && UTF_IS_FIRST_SURROGATE(__c2=(s)[(i)-1])) { \
-            --(i); \
-            (c)=UTF16_GET_PAIR_VALUE(__c2, (c)); \
-            /* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() and UTF_IS_UNICODE_CHAR() */ \
-        } else if(strict) {\
-            /* unmatched second surrogate */ \
-            (c)=UTF_ERROR_VALUE; \
-        } \
-    } else if((strict) && !UTF_IS_UNICODE_CHAR(c)) { \
-        /* unmatched first surrogate or other non-character */ \
-        (c)=UTF_ERROR_VALUE; \
-    } \
-}
-
-#define UTF16_BACK_1_SAFE(s, start, i) { \
-    if(UTF_IS_SECOND_SURROGATE((s)[--(i)]) && (i)>(start) && UTF_IS_FIRST_SURROGATE((s)[(i)-1])) { \
-        --(i); \
-    } \
-}
-
-#define UTF16_BACK_N_SAFE(s, start, i, n) { \
-    int32_t __N=(n); \
-    while(__N>0 && (i)>(start)) { \
-        UTF16_BACK_1_SAFE(s, start, i); \
-        --__N; \
-    } \
-}
-
-#define UTF16_SET_CHAR_LIMIT_SAFE(s, start, i, length) { \
-    if((start)<(i) && (i)<(length) && UTF_IS_FIRST_SURROGATE((s)[(i)-1]) && UTF_IS_SECOND_SURROGATE((s)[i])) { \
+/**
+ * Adjust a random-access offset to a code point boundary after a code point.
+ * If the offset is behind the lead surrogate of a surrogate pair,
+ * then the offset is incremented.
+ * Otherwise, it is not modified.
+ * The input offset may be the same as the string length.
+ * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
+ *
+ * @param s const UChar * string
+ * @param start starting string offset (usually 0)
+ * @param i string offset, start<=i<=length
+ * @param length string length
+ * @see U16_SET_CP_LIMIT_UNSAFE
+ * @draft ICU 2.4
+ */
+#define U16_SET_CP_LIMIT(s, start, i, length) { \
+    if((start)<(i) && (i)<(length) && U16_IS_LEAD((s)[(i)-1]) && U16_IS_TRAIL((s)[i])) { \
        ++(i); \
    } \
 }
--- a/icu4c/source/common/unicode/utf32.h
+++ b/icu4c/source/common/unicode/utf32.h
@ -14,146 +14,10 @@
 *   created by: Markus W. Scherer
 */
 /**
-* \file
-* \brief C API: UTF-32 macros
-*
-*   This file defines macros to deal with UTF-32 code units and code points.
-*   Signatures and semantics are the same as for the similarly named macros
-*   in utf16.h.
-*   utf32.h is included by utf.h after unicode/umachine.h</p>
-*   and some common definitions.
-*   <p><b>Usage:</b>  ICU coding guidelines for if() statements should be followed when using these macros.
-*                  Compound statements (curly braces {}) must be used  for if-else-while...
-*                  bodies and all macro statements should be terminated with semicolon.</p>
-*/
-
-#ifndef __UTF32_H__
-#define __UTF32_H__
-
-/* internal definitions ----------------------------------------------------- */
-
-#define UTF32_IS_SAFE(c, strict) \
-    (!(strict) ? \
-        (uint32_t)(c)<=0x10ffff : \
-        UTF_IS_UNICODE_CHAR(c))
-
-/*
- * For the semantics of all of these macros, see utf16.h.
- * The UTF-32 versions are trivial because any code point is
- * encoded using exactly one code unit.
+ * \file
+ * \brief C API: UTF-32 macros
+ *
+ * This file is deprecated and its contents moved to utf_old.h.
+ * See utf_old.h and Jitterbug 2150 and its discussion on the ICU mailing list
+ * in September 2002.
 */
-
-/* single-code point definitions -------------------------------------------- */
-
-/* classes of code unit values */
-#define UTF32_IS_SINGLE(uchar) 1
-#define UTF32_IS_LEAD(uchar) 0
-#define UTF32_IS_TRAIL(uchar) 0
-
-/* number of code units per code point */
-#define UTF32_NEED_MULTIPLE_UCHAR(c) 0
-#define UTF32_CHAR_LENGTH(c) 1
-#define UTF32_MAX_CHAR_LENGTH 1
-
-/* average number of code units compared to UTF-16 */
-#define UTF32_ARRAY_SIZE(size) (size)
-
-#define UTF32_GET_CHAR_UNSAFE(s, i, c) { \
-    (c)=(s)[i]; \
-}
-
-#define UTF32_GET_CHAR_SAFE(s, start, i, length, c, strict) { \
-    (c)=(s)[i]; \
-    if(!UTF32_IS_SAFE(c, strict)) { \
-        (c)=UTF_ERROR_VALUE; \
-    } \
-}
-
-/* definitions with forward iteration --------------------------------------- */
-
-#define UTF32_NEXT_CHAR_UNSAFE(s, i, c) { \
-    (c)=(s)[(i)++]; \
-}
-
-#define UTF32_APPEND_CHAR_UNSAFE(s, i, c) { \
-    (s)[(i)++]=(c); \
-}
-
-#define UTF32_FWD_1_UNSAFE(s, i) { \
-    ++(i); \
-}
-
-#define UTF32_FWD_N_UNSAFE(s, i, n) { \
-    (i)+=(n); \
-}
-
-#define UTF32_SET_CHAR_START_UNSAFE(s, i) { \
-}
-
-#define UTF32_NEXT_CHAR_SAFE(s, i, length, c, strict) { \
-    (c)=(s)[(i)++]; \
-    if(!UTF32_IS_SAFE(c, strict)) { \
-        (c)=UTF_ERROR_VALUE; \
-    } \
-}
-
-#define UTF32_APPEND_CHAR_SAFE(s, i, length, c) { \
-    if((uint32_t)(c)<=0x10ffff) { \
-        (s)[(i)++]=(c); \
-    } else /* c>0x10ffff, write 0xfffd */ { \
-        (s)[(i)++]=0xfffd; \
-    } \
-}
-
-#define UTF32_FWD_1_SAFE(s, i, length) { \
-    ++(i); \
-}
-
-#define UTF32_FWD_N_SAFE(s, i, length, n) { \
-    if(((i)+=(n))>(length)) { \
-        (i)=(length); \
-    } \
-}
-
-#define UTF32_SET_CHAR_START_SAFE(s, start, i) { \
-}
-
-/* definitions with backward iteration -------------------------------------- */
-
-#define UTF32_PREV_CHAR_UNSAFE(s, i, c) { \
-    (c)=(s)[--(i)]; \
-}
-
-#define UTF32_BACK_1_UNSAFE(s, i) { \
-    --(i); \
-}
-
-#define UTF32_BACK_N_UNSAFE(s, i, n) { \
-    (i)-=(n); \
-}
-
-#define UTF32_SET_CHAR_LIMIT_UNSAFE(s, i) { \
-}
-
-#define UTF32_PREV_CHAR_SAFE(s, start, i, c, strict) { \
-    (c)=(s)[--(i)]; \
-    if(!UTF32_IS_SAFE(c, strict)) { \
-        (c)=UTF_ERROR_VALUE; \
-    } \
-}
-
-#define UTF32_BACK_1_SAFE(s, start, i) { \
-    --(i); \
-}
-
-#define UTF32_BACK_N_SAFE(s, start, i, n) { \
-    (i)-=(n); \
-    if((i)<(start)) { \
-        (i)=(start); \
-    } \
-}
-
-#define UTF32_SET_CHAR_LIMIT_SAFE(s, i, length) { \
-}
-
-#endif
--- a/icu4c/source/common/unicode/utf8.h
+++ b/icu4c/source/common/unicode/utf8.h
@ -15,23 +15,25 @@
 */

 /**
-* \file 
-* \brief C API: UTF-8 macros
-* 
-*   This file defines macros to deal with UTF-8 code units and code points.
-*   Signatures and semantics are the same as for the similarly named macros
-*   in utf16.h.
-*   utf8.h is included by utf.h after unicode/umachine.h
-*   and some common definitions.</p>
-*   <p><b>Usage:</b>  ICU coding guidelines for if() statements should be followed when using these macros.
-*                  Compound statements (curly braces {}) must be used  for if-else-while...
-*                  bodies and all macro statements should be terminated with semicolon.</p>
-*/
-
+ * \file
+ * \brief C API: 8-bit Unicode handling macros
+ * 
+ * This file defines macros to deal with 8-bit Unicode (UTF-8) code units (bytes) and strings.
+ * utf8.h is included by utf.h after unicode/umachine.h
+ * and some common definitions.
+ *
+ * For more information see utf.h and the ICU User Guide Strings chapter
+ * (http://oss.software.ibm.com/icu/userguide/).
+ *
+ * <em>Usage:</em>
+ * ICU coding guidelines for if() statements should be followed when using these macros.
+ * Compound statements (curly braces {}) must be used  for if-else-while... 
+ * bodies and all macro statements should be terminated with semicolon.
+ */

 /* utf.h must be included first. */
 #ifndef __UTF_H__
-# include "unicode/utf.h"
+#   include "unicode/utf.h"
 #endif

 #ifndef __UTF8_H__
@ -39,6 +41,12 @@

 /* internal definitions ----------------------------------------------------- */

+/**
+ * \var utf8_countTrailBytes
+ * Internal array with numbers of trail bytes for any given byte used in
+ * lead byte position.
+ * @internal
+ */
 #ifdef U_UTF8_IMPL
 U_CAPI const uint8_t 
 utf8_countTrailBytes[256];
@ -48,114 +56,166 @@ utf8_countTrailBytes[256];
 #endif

 /**
- * Count the trail bytes for a lead byte -
- * this macro should be used so that the assembler code
- * that is mentioned in utf_impl.c could be used here.
+ * Count the trail bytes for a UTF-8 lead byte.
+ * @internal
 */
-#define UTF8_COUNT_TRAIL_BYTES(leadByte) (utf8_countTrailBytes[(uint8_t)leadByte])
+#define U8_COUNT_TRAIL_BYTES(leadByte) (utf8_countTrailBytes[(uint8_t)leadByte])

-/* use a macro here, too - there may be a simpler way with some machines */
-#define UTF8_MASK_LEAD_BYTE(leadByte, countTrailBytes) ((leadByte)&=(1<<(6-(countTrailBytes)))-1)
+/**
+ * Mask a UTF-8 lead byte, leave only the lower bits that form part of the code point value.
+ * @internal
+ */
+#define U8_MASK_LEAD_BYTE(leadByte, countTrailBytes) ((leadByte)&=(1<<(6-(countTrailBytes)))-1)

+/**
+ * Function for handling "next code point" with error-checking.
+ * @internal
+ */
 U_CAPI UChar32 U_EXPORT2
-utf8_nextCharSafeBody(const uint8_t *s, int32_t *pi, int32_t length, UChar32 c, UBool strict, UBool *pIsError);
+utf8_nextCharSafeBody(const uint8_t *s, int32_t *pi, int32_t length, UChar32 c, UBool strict);

+/**
+ * Function for handling "append code point" with error-checking.
+ * @internal
+ */
 U_CAPI int32_t U_EXPORT2
-utf8_appendCharSafeBody(uint8_t *s, int32_t i, int32_t length, UChar32 c);
+utf8_appendCharSafeBody(uint8_t *s, int32_t i, int32_t length, UChar32 c, UBool *pIsError);

+/**
+ * Function for handling "previous code point" with error-checking.
+ * @internal
+ */
 U_CAPI UChar32 U_EXPORT2
 utf8_prevCharSafeBody(const uint8_t *s, int32_t start, int32_t *pi, UChar32 c, UBool strict);

+/**
+ * Function for handling "skip backward one code point" with error-checking.
+ * @internal
+ */
 U_CAPI int32_t U_EXPORT2
 utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i);

-/*
- * For the semantics of all of these macros, see utf16.h.
- * The UTF-8 macros favor sequences more the shorter they are.
- * Sometimes, only the single-byte case is covered by a macro,
- * while longer sequences are handled by a function call.
- */
-
 /* single-code point definitions -------------------------------------------- */

-/** Is this this code point a single code unit (byte)? */
-#define UTF8_IS_SINGLE(uchar) (((uchar)&0x80)==0)
-/** Is this this code unit the lead code unit (byte) of a code point? */
-#define UTF8_IS_LEAD(uchar) ((uint8_t)((uchar)-0xc0)<0x3e)
-/** Is this this code unit a trailing code unit (byte) of a code point? */
-#define UTF8_IS_TRAIL(uchar) (((uchar)&0xc0)==0x80)
-
-/** Does this scalar Unicode value need multiple code units for storage? */
-#define UTF8_NEED_MULTIPLE_UCHAR(c) ((uint32_t)(c)>0x7f)
+/**
+ * Does this code unit (byte) encode a code point by itself (US-ASCII 0..0x7f)?
+ * @param c 8-bit code unit (byte)
+ * @return TRUE or FALSE
+ * @draft ICU 2.4
+ */
+#define U8_IS_SINGLE(c) (((c)&0x80)==0)

 /**
- * Given the lead character, how many bytes are taken by this code point.
- * ICU does not deal with code points >0x10ffff
- * unless necessary for advancing in the byte stream.
- *
- * These length macros take into account that for values >0x10ffff
- * the "safe" append macros would write the error code point 0xffff
- * with 3 bytes.
- * Code point comparisons need to be in uint32_t because UChar32
- * may be a signed type, and negative values must be recognized.
+ * Is this code unit (byte) a UTF-8 lead byte?
+ * @param c 8-bit code unit (byte)
+ * @return TRUE or FALSE
+ * @draft ICU 2.4
 */
-#if 1
-#   define UTF8_CHAR_LENGTH(c) \
-        ((uint32_t)(c)<=0x7f ? 1 : \
-            ((uint32_t)(c)<=0x7ff ? 2 : \
-                ((uint32_t)((c)-0x10000)>0xfffff ? 3 : 4) \
-            ) \
-        )
-#else
-#   define UTF8_CHAR_LENGTH(c) \
-        ((uint32_t)(c)<=0x7f ? 1 : \
-            ((uint32_t)(c)<=0x7ff ? 2 : \
-                ((uint32_t)(c)<=0xffff ? 3 : \
-                    ((uint32_t)(c)<=0x10ffff ? 4 : \
-                        ((uint32_t)(c)<=0x3ffffff ? 5 : \
-                            ((uint32_t)(c)<=0x7fffffff ? 6 : 3) \
-                        ) \
-                    ) \
+#define U8_IS_LEAD(c) ((uint8_t)((c)-0xc0)<0x3e)
+
+/**
+ * Is this code unit (byte) a UTF-8 trail byte?
+ * @param c 8-bit code unit (byte)
+ * @return TRUE or FALSE
+ * @draft ICU 2.4
+ */
+#define U8_IS_TRAIL(c) (((c)&0xc0)==0x80)
+
+/**
+ * How many code units (bytes) are used for the UTF-8 encoding
+ * of this Unicode code point?
+ * @param c 32-bit code point
+ * @return 1..4, or 0 if c is a surrogate or not a Unicode code point
+ * @draft ICU 2.4
+ */
+#define U8_LENGTH(c) \
+    ((uint32_t)(c)<=0x7f ? 1 : \
+        ((uint32_t)(c)<=0x7ff ? 2 : \
+            ((uint32_t)(c)<=0xd7ff ? 3 : \
+                ((uint32_t)(c)<=0xdfff || (uint32_t)(c)>0x10ffff ? 0 : \
+                    ((uint32_t)(c)<=0xffff ? 3 : 4)\
                ) \
            ) \
-        )
-#endif
+        ) \
+    )

-/** The maximum number of bytes per code point */
-#define UTF8_MAX_CHAR_LENGTH 4
+/**
+ * The maximum number of UTF-8 code units (bytes) per Unicode code point (U+0000..U+10ffff).
+ * @return 4
+ * @draft ICU 2.4
+ */
+#define U8_MAX_LENGTH 4

-/** Average number of code units compared to UTF-16 */
-#define UTF8_ARRAY_SIZE(size) ((5*(size))/2)
-
-#define UTF8_GET_CHAR_UNSAFE(s, i, c) { \
+/**
+ * Get a code point from a string at a random-access offset,
+ * without changing the offset.
+ * The offset may point to either the lead byte or one of the trail bytes
+ * for a code point, in which case the macro will read all of the bytes
+ * for the code point.
+ * The result is undefined if the offset points to an illegal UTF-8
+ * byte sequence.
+ * Iteration through a string is more efficient with U8_NEXT_UNSAFE or U8_NEXT.
+ *
+ * @param s const UChar * string
+ * @param i string offset
+ * @param c output UChar32 variable
+ * @see U8_GET
+ * @draft ICU 2.4
+ */
+#define U8_GET_UNSAFE(s, i, c) { \
    int32_t __I=(int32_t)(i); \
-    UTF8_SET_CHAR_START_UNSAFE(s, __I); \
-    UTF8_NEXT_CHAR_UNSAFE(s, __I, c); \
+    U8_SET_CP_START_UNSAFE(s, __I); \
+    U8_NEXT_UNSAFE(s, __I, c); \
 }

-#define UTF8_GET_CHAR_SAFE(s, start, i, length, c, strict) { \
+/**
+ * Get a code point from a string at a random-access offset,
+ * without changing the offset.
+ * The offset may point to either the lead byte or one of the trail bytes
+ * for a code point, in which case the macro will read all of the bytes
+ * for the code point.
+ * If the offset points to an illegal UTF-8 byte sequence, then
+ * c is set to a negative value.
+ * Iteration through a string is more efficient with U8_NEXT_UNSAFE or U8_NEXT.
+ *
+ * @param s const UChar * string
+ * @param start starting string offset
+ * @param i string offset, start<=i<length
+ * @param length string length
+ * @param c output UChar32 variable, set to <0 in case of an error
+ * @see U8_GET_UNSAFE
+ * @draft ICU 2.4
+ */
+#define U8_GET(s, start, i, length, c) { \
    int32_t __I=(int32_t)(i); \
-    UTF8_SET_CHAR_START_SAFE(s, start, __I); \
-    UTF8_NEXT_CHAR_SAFE(s, __I, length, c, strict); \
+    U8_SET_CP_START(s, start, __I); \
+    U8_NEXT(s, __I, length, c); \
 }

 /* definitions with forward iteration --------------------------------------- */

 /**
- * Read a Unicode scalar value from an array of UTF-8 bytes.
- * Only values <=0x10ffff are accepted, and if an error occurs,
- * then c will be set such that UTF_IS_ERROR(c).
- * The _UNSAFE macro is fast and does not check for errors.
- * The _SAFE macro checks for errors and optionally for
- * irregular sequences, too, i.e., for sequences that
- * are longer than necessary, such as <c0 80> instead of <0>.
- * The strict checks also check for non-characters.
+ * Get a code point from a string at a code point boundary offset,
+ * and advance the offset to the next code point boundary.
+ * (Post-incrementing forward iteration.)
+ * "Unsafe" macro, assumes well-formed UTF-8.
+ *
+ * The offset may point to the lead byte of a multi-byte sequence,
+ * in which case the macro will read the whole sequence.
+ * The result is undefined if the offset points to a trail byte
+ * or an illegal UTF-8 sequence.
+ *
+ * @param s const UChar * string
+ * @param i string offset
+ * @param c output UChar32 variable
+ * @see U8_NEXT
+ * @draft ICU 2.4
 */
-#define UTF8_NEXT_CHAR_UNSAFE(s, i, c) { \
+#define U8_NEXT_UNSAFE(s, i, c) { \
    (c)=(s)[(i)++]; \
    if((uint8_t)((c)-0xc0)<0x35) { \
-        uint8_t __count=UTF8_COUNT_TRAIL_BYTES(c); \
-        UTF8_MASK_LEAD_BYTE(c, __count); \
+        uint8_t __count=U8_COUNT_TRAIL_BYTES(c); \
+        U8_MASK_LEAD_BYTE(c, __count); \
        switch(__count) { \
        /* each following branch falls through to the next one */ \
        case 3: \
@ -170,7 +230,49 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i);
    } \
 }

-#define UTF8_APPEND_CHAR_UNSAFE(s, i, c) { \
+/**
+ * Get a code point from a string at a code point boundary offset,
+ * and advance the offset to the next code point boundary.
+ * (Post-incrementing forward iteration.)
+ * "Safe" macro, checks for illegal sequences and for string boundaries.
+ *
+ * The offset may point to the lead byte of a multi-byte sequence,
+ * in which case the macro will read the whole sequence.
+ * If the offset points to a trail byte or an illegal UTF-8 sequence, then
+ * c is set to a negative value.
+ *
+ * @param s const UChar * string
+ * @param i string offset, i<length
+ * @param length string length
+ * @param c output UChar32 variable, set to <0 in case of an error
+ * @see U8_NEXT_UNSAFE
+ * @draft ICU 2.4
+ */
+#define U8_NEXT(s, i, length, c) { \
+    (c)=(s)[(i)++]; \
+    if((c)>=0x80) { \
+        if(U8_IS_LEAD(c)) { \
+            (c)=utf8_nextCharSafeBody(s, &(i), (int32_t)(length), c, -1); \
+        } else { \
+            (c)=U_SENTINEL; \
+        } \
+    } \
+}
+
+/**
+ * Append a code point to a string, overwriting 1 to 4 bytes.
+ * The offset points to the current end of the string contents
+ * and is advanced (post-increment).
+ * "Unsafe" macro, assumes a valid code point and sufficient space in the string.
+ * Otherwise, the result is undefined.
+ *
+ * @param s const UChar * string buffer
+ * @param i string offset
+ * @param c code point to append
+ * @see U8_APPEND
+ * @draft ICU 2.4
+ */
+#define U8_APPEND_UNSAFE(s, i, c) { \
    if((uint32_t)(c)<=0x7f) { \
        (s)[(i)++]=(uint8_t)(c); \
    } else { \
@ -189,74 +291,172 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i);
    } \
 }

-#define UTF8_FWD_1_UNSAFE(s, i) { \
-    (i)+=1+UTF8_COUNT_TRAIL_BYTES((s)[i]); \
-}
-
-#define UTF8_FWD_N_UNSAFE(s, i, n) { \
-    int32_t __N=(n); \
-    while(__N>0) { \
-        UTF8_FWD_1_UNSAFE(s, i); \
-        --__N; \
-    } \
-}
-
-#define UTF8_SET_CHAR_START_UNSAFE(s, i) { \
-    while(UTF8_IS_TRAIL((s)[i])) { --(i); } \
-}
-
-#define UTF8_NEXT_CHAR_SAFE(s, i, length, c, strict) { \
-    (c)=(s)[(i)++]; \
-    if((c)>=0x80) { \
-        if(UTF8_IS_LEAD(c)) { \
-            (c)=utf8_nextCharSafeBody(s, &(i), (int32_t)(length), c, strict, NULL); \
-        } else { \
-            (c)=UTF8_ERROR_VALUE_1; \
-        } \
-    } \
-}
-
-#define UTF8_APPEND_CHAR_SAFE(s, i, length, c) { \
+/**
+ * Append a code point to a string, overwriting 1 or 2 code units.
+ * The offset points to the current end of the string contents
+ * and is advanced (post-increment).
+ * "Safe" macro, checks for a valid code point.
+ * If a non-ASCII code point is written, checks for sufficient space in the string.
+ * If the code point is not valid or trail bytes do not fit,
+ * then isError is set to TRUE.
+ *
+ * @param s const UChar * string buffer
+ * @param i string offset, i<length
+ * @param capacity size of the string buffer
+ * @param c code point to append
+ * @param isError output UBool set to TRUE if an error occurs, otherwise not modified
+ * @see U8_APPEND_UNSAFE
+ * @draft ICU 2.4
+ */
+#define U8_APPEND(s, i, length, c, isError) { \
    if((uint32_t)(c)<=0x7f) { \
        (s)[(i)++]=(uint8_t)(c); \
    } else { \
-        (i)=utf8_appendCharSafeBody(s, (int32_t)(i), (int32_t)(length), c); \
+        (i)=utf8_appendCharSafeBody(s, (int32_t)(i), (int32_t)(length), c, &(isError)); \
    } \
 }

-#define UTF8_FWD_1_SAFE(s, i, length) { \
+/**
+ * Advance the string offset from one code point boundary to the next.
+ * (Post-incrementing iteration.)
+ * "Unsafe" macro, assumes well-formed UTF-8.
+ *
+ * @param s const UChar * string
+ * @param i string offset
+ * @see U8_FWD_1
+ * @draft ICU 2.4
+ */
+#define U8_FWD_1_UNSAFE(s, i) { \
+    (i)+=1+U8_COUNT_TRAIL_BYTES((s)[i]); \
+}
+
+/**
+ * Advance the string offset from one code point boundary to the next.
+ * (Post-incrementing iteration.)
+ * "Safe" macro, checks for illegal sequences and for string boundaries.
+ *
+ * @param s const UChar * string
+ * @param i string offset, i<length
+ * @param length string length
+ * @see U8_FWD_1_UNSAFE
+ * @draft ICU 2.4
+ */
+#define U8_FWD_1(s, i, length) { \
    uint8_t __b=(s)[(i)++]; \
-    if(UTF8_IS_LEAD(__b)) { \
-        uint8_t __count=UTF8_COUNT_TRAIL_BYTES(__b); \
+    if(U8_IS_LEAD(__b)) { \
+        uint8_t __count=U8_COUNT_TRAIL_BYTES(__b); \
        if((i)+__count>(length)) { \
            __count=(uint8_t)((length)-(i)); \
        } \
-        while(__count>0 && UTF8_IS_TRAIL((s)[i])) { \
+        while(__count>0 && U8_IS_TRAIL((s)[i])) { \
            ++(i); \
            --__count; \
        } \
    } \
 }

-#define UTF8_FWD_N_SAFE(s, i, length, n) { \
+/**
+ * Advance the string offset from one code point boundary to the n-th next one,
+ * i.e., move forward by n code points.
+ * (Post-incrementing iteration.)
+ * "Unsafe" macro, assumes well-formed UTF-8.
+ *
+ * @param s const UChar * string
+ * @param i string offset
+ * @param n number of code points to skip
+ * @see U8_FWD_N
+ * @draft ICU 2.4
+ */
+#define U8_FWD_N_UNSAFE(s, i, n) { \
    int32_t __N=(n); \
-    while(__N>0 && (i)<(length)) { \
-        UTF8_FWD_1_SAFE(s, i, length); \
+    while(__N>0) { \
+        U8_FWD_1_UNSAFE(s, i); \
        --__N; \
    } \
 }

-#define UTF8_SET_CHAR_START_SAFE(s, start, i) { \
-    if(UTF8_IS_TRAIL((s)[(i)])) { \
+/**
+ * Advance the string offset from one code point boundary to the n-th next one,
+ * i.e., move forward by n code points.
+ * (Post-incrementing iteration.)
+ * "Safe" macro, checks for illegal sequences and for string boundaries.
+ *
+ * @param s const UChar * string
+ * @param i string offset, i<length
+ * @param length string length
+ * @param n number of code points to skip
+ * @see U8_FWD_N_UNSAFE
+ * @draft ICU 2.4
+ */
+#define U8_FWD_N(s, i, length, n) { \
+    int32_t __N=(n); \
+    while(__N>0 && (i)<(length)) { \
+        U8_FWD_1(s, i, length); \
+        --__N; \
+    } \
+}
+
+/**
+ * Adjust a random-access offset to a code point boundary
+ * at the start of a code point.
+ * If the offset points to a UTF-8 trail byte,
+ * then the offset is moved backward to the corresponding lead byte.
+ * Otherwise, it is not modified.
+ * "Unsafe" macro, assumes well-formed UTF-8.
+ *
+ * @param s const UChar * string
+ * @param i string offset
+ * @see U8_SET_CP_START
+ * @draft ICU 2.4
+ */
+#define U8_SET_CP_START_UNSAFE(s, i) { \
+    while(U8_IS_TRAIL((s)[i])) { --(i); } \
+}
+
+/**
+ * Adjust a random-access offset to a code point boundary
+ * at the start of a code point.
+ * If the offset points to a UTF-8 trail byte,
+ * then the offset is moved backward to the corresponding lead byte.
+ * Otherwise, it is not modified.
+ * "Safe" macro, checks for illegal sequences and for string boundaries.
+ *
+ * @param s const UChar * string
+ * @param start starting string offset (usually 0)
+ * @param i string offset, start<=i
+ * @see U8_SET_CP_START_UNSAFE
+ * @draft ICU 2.4
+ */
+#define U8_SET_CP_START(s, start, i) { \
+    if(U8_IS_TRAIL((s)[(i)])) { \
        (i)=utf8_back1SafeBody(s, start, (int32_t)(i)); \
    } \
 }

 /* definitions with backward iteration -------------------------------------- */

-#define UTF8_PREV_CHAR_UNSAFE(s, i, c) { \
+/**
+ * Move the string offset from one code point boundary to the previous one
+ * and get the code point between them.
+ * (Pre-decrementing backward iteration.)
+ * "Unsafe" macro, assumes well-formed UTF-8.
+ *
+ * The input offset may be the same as the string length.
+ * If the offset is behind a multi-byte sequence, then the macro will read
+ * the whole sequence.
+ * If the offset is behind a lead byte, then that itself
+ * will be returned as the code point.
+ * The result is undefined if the offset is behind an illegal UTF-8 sequence.
+ *
+ * @param s const UChar * string
+ * @param i string offset
+ * @param c output UChar32 variable
+ * @see U8_PREV
+ * @draft ICU 2.4
+ */
+#define U8_PREV_UNSAFE(s, i, c) { \
    (c)=(s)[--(i)]; \
-    if(UTF8_IS_TRAIL(c)) { \
+    if(U8_IS_TRAIL(c)) { \
        uint8_t __b, __count=1, __shift=6; \
 \
        /* c is a trail byte */ \
@ -264,7 +464,7 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i);
        for(;;) { \
            __b=(s)[--(i)]; \
            if(__b>=0xc0) { \
-                UTF8_MASK_LEAD_BYTE(__b, __count); \
+                U8_MASK_LEAD_BYTE(__b, __count); \
                (c)|=(UChar32)__b<<__shift; \
                break; \
            } else { \
@ -276,57 +476,151 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i);
    } \
 }

-#define UTF8_BACK_1_UNSAFE(s, i) { \
-    while(UTF8_IS_TRAIL((s)[--(i)])) {} \
-}
-
-#define UTF8_BACK_N_UNSAFE(s, i, n) { \
-    int32_t __N=(n); \
-    while(__N>0) { \
-        UTF8_BACK_1_UNSAFE(s, i); \
-        --__N; \
-    } \
-}
-
-#define UTF8_SET_CHAR_LIMIT_UNSAFE(s, i) { \
-    UTF8_BACK_1_UNSAFE(s, i); \
-    UTF8_FWD_1_UNSAFE(s, i); \
-}
-
-#define UTF8_PREV_CHAR_SAFE(s, start, i, c, strict) { \
+/**
+ * Move the string offset from one code point boundary to the previous one
+ * and get the code point between them.
+ * (Pre-decrementing backward iteration.)
+ * "Safe" macro, checks for illegal sequences and for string boundaries.
+ *
+ * The input offset may be the same as the string length.
+ * If the offset is behind a multi-byte sequence, then the macro will read
+ * the whole sequence.
+ * If the offset is behind a lead byte, then that itself
+ * will be returned as the code point.
+ * If the offset is behind an illegal UTF-8 sequence, then c is set to a negative value.
+ *
+ * @param s const UChar * string
+ * @param start starting string offset (usually 0)
+ * @param i string offset, start<=i
+ * @param length string length
+ * @param c output UChar32 variable, set to <0 in case of an error
+ * @see U8_PREV_UNSAFE
+ * @draft ICU 2.4
+ */
+#define U8_PREV(s, start, i, c) { \
    (c)=(s)[--(i)]; \
    if((c)>=0x80) { \
        if((c)<=0xbf) { \
-            (c)=utf8_prevCharSafeBody(s, start, &(i), c, strict); \
+            (c)=utf8_prevCharSafeBody(s, start, &(i), c, -1); \
        } else { \
-            (c)=UTF8_ERROR_VALUE_1; \
+            (c)=U_SENTINEL; \
        } \
    } \
 }

-#define UTF8_BACK_1_SAFE(s, start, i) { \
-    if(UTF8_IS_TRAIL((s)[--(i)])) { \
+/**
+ * Move the string offset from one code point boundary to the previous one.
+ * (Pre-decrementing backward iteration.)
+ * The input offset may be the same as the string length.
+ * "Unsafe" macro, assumes well-formed UTF-8.
+ *
+ * @param s const UChar * string
+ * @param i string offset
+ * @see U8_BACK_1
+ * @draft ICU 2.4
+ */
+#define U8_BACK_1_UNSAFE(s, i) { \
+    while(U8_IS_TRAIL((s)[--(i)])) {} \
+}
+
+/**
+ * Move the string offset from one code point boundary to the previous one.
+ * (Pre-decrementing backward iteration.)
+ * The input offset may be the same as the string length.
+ * "Safe" macro, checks for illegal sequences and for string boundaries.
+ *
+ * @param s const UChar * string
+ * @param start starting string offset (usually 0)
+ * @param i string offset, start<=i
+ * @see U8_BACK_1_UNSAFE
+ * @draft ICU 2.4
+ */
+#define U8_BACK_1(s, start, i) { \
+    if(U8_IS_TRAIL((s)[--(i)])) { \
        (i)=utf8_back1SafeBody(s, start, (int32_t)(i)); \
    } \
 }

-#define UTF8_BACK_N_SAFE(s, start, i, n) { \
+/**
+ * Move the string offset from one code point boundary to the n-th one before it,
+ * i.e., move backward by n code points.
+ * (Pre-decrementing backward iteration.)
+ * The input offset may be the same as the string length.
+ * "Unsafe" macro, assumes well-formed UTF-8.
+ *
+ * @param s const UChar * string
+ * @param i string offset
+ * @param n number of code points to skip
+ * @see U8_BACK_N
+ * @draft ICU 2.4
+ */
+#define U8_BACK_N_UNSAFE(s, i, n) { \
    int32_t __N=(n); \
-    while(__N>0 && (i)>(start)) { \
-        UTF8_BACK_1_SAFE(s, start, i); \
+    while(__N>0) { \
+        U8_BACK_1_UNSAFE(s, i); \
        --__N; \
    } \
 }

-/*
- * Need to use UTF8_FWD_1_SAFE() because UTF8_BACK_1_SAFE()
- * may have started from the middle of the sequence and not checked
- * all trail bytes.
+/**
+ * Move the string offset from one code point boundary to the n-th one before it,
+ * i.e., move backward by n code points.
+ * (Pre-decrementing backward iteration.)
+ * The input offset may be the same as the string length.
+ * "Safe" macro, checks for illegal sequences and for string boundaries.
+ *
+ * @param s const UChar * string
+ * @param i string offset, i<length
+ * @param length string length
+ * @param n number of code points to skip
+ * @see U8_BACK_N_UNSAFE
+ * @draft ICU 2.4
 */
-#define UTF8_SET_CHAR_LIMIT_SAFE(s, start, i, length) { \
+#define U8_BACK_N(s, start, i, n) { \
+    int32_t __N=(n); \
+    while(__N>0 && (i)>(start)) { \
+        U8_BACK_1(s, start, i); \
+        --__N; \
+    } \
+}
+
+/**
+ * Adjust a random-access offset to a code point boundary after a code point.
+ * If the offset is behind a partial multi-byte sequence,
+ * then the offset is incremented to behind the whole sequence.
+ * Otherwise, it is not modified.
+ * The input offset may be the same as the string length.
+ * "Unsafe" macro, assumes well-formed UTF-8.
+ *
+ * @param s const UChar * string
+ * @param i string offset
+ * @see U8_SET_CP_LIMIT
+ * @draft ICU 2.4
+ */
+#define U8_SET_CP_LIMIT_UNSAFE(s, i) { \
+    U8_BACK_1_UNSAFE(s, i); \
+    U8_FWD_1_UNSAFE(s, i); \
+}
+
+/**
+ * Adjust a random-access offset to a code point boundary after a code point.
+ * If the offset is behind a partial multi-byte sequence,
+ * then the offset is incremented to behind the whole sequence.
+ * Otherwise, it is not modified.
+ * The input offset may be the same as the string length.
+ * "Safe" macro, checks for illegal sequences and for string boundaries.
+ *
+ * @param s const UChar * string
+ * @param start starting string offset (usually 0)
+ * @param i string offset, start<=i<=length
+ * @param length string length
+ * @see U8_SET_CP_LIMIT_UNSAFE
+ * @draft ICU 2.4
+ */
+#define U8_SET_CP_LIMIT(s, start, i, length) { \
    if((start)<(i) && (i)<(length)) { \
-        UTF8_BACK_1_SAFE(s, start, i); \
-        UTF8_FWD_1_SAFE(s, i, length); \
+        U8_BACK_1(s, start, i); \
+        U8_FWD_1(s, i, length); \
    } \
 }

--- a/icu4c/source/common/unicode/utf_old.h
+++ b/icu4c/source/common/unicode/utf_old.h
--- a/icu4c/source/common/unistr.cpp
+++ b/icu4c/source/common/unistr.cpp
@ -198,7 +198,8 @@ UnicodeString::UnicodeString(UChar32 ch)
    fFlags(kShortString)
 {
  int32_t i = 0;
-  UTF_APPEND_CHAR(fStackBuffer, i, US_STACKBUF_SIZE, ch);
+  UBool isError = FALSE;
+  U16_APPEND(fStackBuffer, i, US_STACKBUF_SIZE, ch, isError);
  fLength = i;
 }

--- a/icu4c/source/common/ustring.c
+++ b/icu4c/source/common/ustring.c
@ -157,140 +157,118 @@ u_strchr32(const UChar *s, UChar32 c) {
  }
 }

+/*
+ * Match each code point in a string against each code point in the matchSet.
+ * Return the index of the first string code point that
+ * is (polarity==TRUE) or is not (FALSE) contained in the matchSet.
+ * Return -(string length)-1 if there is no such code point.
+ */
+static int32_t
+_matchFromSet(const UChar *string, const UChar *matchSet, UBool polarity) {
+    int32_t matchLen, matchBMPLen, strItr, matchItr;
+    UChar32 stringCh, matchCh;
+    UChar c, c2;
+
+    /* first part of matchSet contains only BMP code points */
+    matchBMPLen = 0;
+    while((c = matchSet[matchBMPLen]) != 0 && U16_IS_SINGLE(c)) {
+        ++matchBMPLen;
+    }
+
+    /* second part of matchSet contains BMP and supplementary code points */
+    matchLen = matchBMPLen;
+    while(matchSet[matchLen] != 0) {
+        ++matchLen;
+    }
+
+    for(strItr = 0; (c = string[strItr]) != 0;) {
+        ++strItr;
+        if(U16_IS_SINGLE(c)) {
+            if(polarity) {
+                for(matchItr = 0; matchItr < matchLen; ++matchItr) {
+                    if(c == matchSet[matchItr]) {
+                        return strItr - 1; /* one matches */
+                    }
+                }
+            } else {
+                for(matchItr = 0; matchItr < matchLen; ++matchItr) {
+                    if(c == matchSet[matchItr]) {
+                        goto endloop;
+                    }
+                }
+                return strItr - 1; /* none matches */
+            }
+        } else {
+            /*
+             * No need to check for string length before U16_IS_TRAIL
+             * because c2 could at worst be the terminating NUL.
+             */
+            if(U16_IS_SURROGATE_LEAD(c) && U16_IS_TRAIL(c2 = string[strItr])) {
+                ++strItr;
+                stringCh = U16_GET_SUPPLEMENTARY(c, c2);
+            } else {
+                stringCh = c; /* unpaired trail surrogate */
+            }
+
+            if(polarity) {
+                for(matchItr = matchBMPLen; matchItr < matchLen;) {
+                    U16_NEXT(matchSet, matchItr, matchLen, matchCh);
+                    if(stringCh == matchCh) {
+                        return strItr - U16_LENGTH(stringCh); /* one matches */
+                    }
+                }
+            } else {
+                for(matchItr = matchBMPLen; matchItr < matchLen;) {
+                    U16_NEXT(matchSet, matchItr, matchLen, matchCh);
+                    if(stringCh == matchCh) {
+                        goto endloop;
+                    }
+                }
+                return strItr - U16_LENGTH(stringCh); /* none matches */
+            }
+        }
+endloop:
+        /* wish C had continue with labels like Java... */;
+    }
+
+    /* Didn't find it. */
+    return -strItr-1;
+}
+
 /* Search for a codepoint in a string that matches one of the matchSet codepoints. */
 U_CAPI UChar * U_EXPORT2
 u_strpbrk(const UChar *string, const UChar *matchSet)
 {
-    int32_t matchLen;
-    UBool single = TRUE;
-
-    for (matchLen = 0; matchSet[matchLen]; matchLen++)
-    {
-        if (!UTF_IS_SINGLE(matchSet[matchLen]))
-        {
-            single = FALSE;
-        }
+    int32_t index = _matchFromSet(string, matchSet, TRUE);
+    if(index >= 0) {
+        return (UChar *)string + index;
+    } else {
+        return NULL;
    }
-
-    if (single)
-    {
-        const UChar *matchItr;
-        const UChar *strItr;
-
-        for (strItr = string; *strItr; strItr++)
-        {
-            for (matchItr = matchSet; *matchItr; matchItr++)
-            {
-                if (*matchItr == *strItr)
-                {
-                    return (UChar *)strItr;
-                }
-            }
-        }
-    }
-    else
-    {
-        int32_t matchItr;
-        int32_t strItr;
-        UChar32 stringCh, matchSetCh;
-        int32_t stringLen = u_strlen(string);
-
-        for (strItr = 0; strItr < stringLen; strItr++)
-        {
-            UTF_GET_CHAR_SAFE(string, 0, strItr, stringLen, stringCh, TRUE);
-            for (matchItr = 0; matchItr < matchLen; matchItr++)
-            {
-                UTF_GET_CHAR_SAFE(matchSet, 0, matchItr, matchLen, matchSetCh, TRUE);
-                if (stringCh == matchSetCh && (stringCh != UTF_ERROR_VALUE
-                    || string[strItr] == UTF_ERROR_VALUE
-                    || (matchSetCh == UTF_ERROR_VALUE && !UTF_IS_SINGLE(matchSet[matchItr]))))
-                {
-                    return (UChar *)string + strItr;
-                }
-            }
-        }
-    }
-
-    /* Didn't find it. */
-    return NULL;
 }

 /* Search for a codepoint in a string that matches one of the matchSet codepoints. */
 U_CAPI int32_t U_EXPORT2
 u_strcspn(const UChar *string, const UChar *matchSet)
 {
-    const UChar *foundStr = u_strpbrk(string, matchSet);
-    if (foundStr == NULL)
-    {
-        return u_strlen(string);
+    int32_t index = _matchFromSet(string, matchSet, TRUE);
+    if(index >= 0) {
+        return index;
+    } else {
+        return -index - 1; /* == u_strlen(string) */
    }
-    return foundStr - string;
 }

 /* Search for a codepoint in a string that does not match one of the matchSet codepoints. */
 U_CAPI int32_t U_EXPORT2
 u_strspn(const UChar *string, const UChar *matchSet)
 {
-    UBool single = TRUE;
-    UBool match = TRUE;
-    int32_t matchLen;
-    int32_t retValue;
-
-    for (matchLen = 0; matchSet[matchLen]; matchLen++)
-    {
-        if (!UTF_IS_SINGLE(matchSet[matchLen]))
-        {
-            single = FALSE;
-        }
+    int32_t index = _matchFromSet(string, matchSet, FALSE);
+    if(index >= 0) {
+        return index;
+    } else {
+        return -index - 1; /* == u_strlen(string) */
    }
-
-    if (single)
-    {
-        const UChar *matchItr;
-        const UChar *strItr;
-
-        for (strItr = string; *strItr && match; strItr++)
-        {
-            match = FALSE;
-            for (matchItr = matchSet; *matchItr; matchItr++)
-            {
-                if (*matchItr == *strItr)
-                {
-                    match = TRUE;
-                    break;
-                }
-            }
-        }
-        retValue = strItr - string - (match == FALSE);
-    }
-    else
-    {
-        int32_t matchItr;
-        int32_t strItr;
-        UChar32 stringCh, matchSetCh;
-        int32_t stringLen = u_strlen(string);
-
-        for (strItr = 0; strItr < stringLen && match; strItr++)
-        {
-            match = FALSE;
-            UTF_GET_CHAR_SAFE(string, 0, strItr, stringLen, stringCh, TRUE);
-            for (matchItr = 0; matchItr < matchLen; matchItr++)
-            {
-                UTF_GET_CHAR_SAFE(matchSet, 0, matchItr, matchLen, matchSetCh, TRUE);
-                if (stringCh == matchSetCh && (stringCh != UTF_ERROR_VALUE
-                    || string[strItr] == UTF_ERROR_VALUE
-                    || (matchSetCh == UTF_ERROR_VALUE && !UTF_IS_SINGLE(matchSet[matchItr]))))
-                {
-                    match = TRUE;
-                    break;
-                }
-            }
-        }
-        retValue = strItr - (match == FALSE);
-    }
-
-    /* Found a mismatch or didn't find it. */
-    return retValue;
 }

 /* ----- Text manipulation functions --- */
--- a/icu4c/source/common/ustrtrns.c
+++ b/icu4c/source/common/ustrtrns.c
@ -228,7 +228,6 @@ u_strFromUTF8(UChar *dest,
    int32_t index = 0;
    int32_t reqLength = 0;
    uint8_t* pSrc = (uint8_t*) src;
-    UBool isError;

    /* args check */
    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)){
@ -249,8 +248,8 @@ u_strFromUTF8(UChar *dest,
        if(ch <=0x7f){
            *pDest++=(UChar)ch;
        }else{
-            ch=utf8_nextCharSafeBody(pSrc, &index, srcLength, ch, FALSE, &isError);
-            if(isError){
+            ch=utf8_nextCharSafeBody(pSrc, &index, srcLength, ch, -1);
+            if(ch<0){
                *pErrorCode = U_INVALID_CHAR_FOUND;
                return NULL;
            }else if(ch<=0xFFFF){
@ -272,8 +271,8 @@ u_strFromUTF8(UChar *dest,
        if(ch <= 0x7f){
            reqLength++;
        }else{
-            ch=utf8_nextCharSafeBody(pSrc, &index, srcLength, ch, FALSE, &isError);
-            if(isError){
+            ch=utf8_nextCharSafeBody(pSrc, &index, srcLength, ch, -1);
+            if(ch<0){
                *pErrorCode = U_INVALID_CHAR_FOUND;
                return NULL;
            }
--- a/icu4c/source/common/utf_impl.c
+++ b/icu4c/source/common/utf_impl.c
@ -83,7 +83,7 @@ utf8_errorValue[6]={
 };

 U_CAPI UChar32 U_EXPORT2
-utf8_nextCharSafeBody(const uint8_t *s, int32_t *pi, int32_t length, UChar32 c, UBool strict, UBool *pIsError) {
+utf8_nextCharSafeBody(const uint8_t *s, int32_t *pi, int32_t length, UChar32 c, UBool strict) {
    int32_t i=*pi;
    uint8_t count=UTF8_COUNT_TRAIL_BYTES(c);
    if((i)+count<=(length)) {
@ -118,10 +118,11 @@ utf8_nextCharSafeBody(const uint8_t *s, int32_t *pi, int32_t length, UChar32 c,
            illegal|=(trail&0xc0)^0x80;
            break;
        case 0:
-            if(pIsError!=NULL) {
-                *pIsError=TRUE;
+            if(strict>=0) {
+                return UTF8_ERROR_VALUE_1;
+            } else {
+                return U_SENTINEL;
            }
-            return UTF8_ERROR_VALUE_1;
        /* no default branch to optimize switch()  - all values are covered */
        }

@ -132,6 +133,8 @@ utf8_nextCharSafeBody(const uint8_t *s, int32_t *pi, int32_t length, UChar32 c,
         * Starting with Unicode 3.0.1, non-shortest forms are illegal.
         * Starting with Unicode 3.2, surrogate code points must not be
         * encoded in UTF-8, and there are no irregular sequences any more.
+         *
+         * U8_ macros (new in ICU 2.4) return negative values for error conditions.
         */

        /* correct sequence - all trail bytes have (b7..b6)==(10)? */
@ -145,21 +148,14 @@ utf8_nextCharSafeBody(const uint8_t *s, int32_t *pi, int32_t length, UChar32 c,
                ++(i);
                --count;
            }
-            c=utf8_errorValue[errorCount-count];
-            if(pIsError!=NULL) {
-                *pIsError=TRUE;
+            if(strict>=0) {
+                c=utf8_errorValue[errorCount-count];
+            } else {
+                c=U_SENTINEL;
            }
-        } else if((strict) && UTF_IS_UNICODE_NONCHAR(c)) {
+        } else if((strict)>0 && UTF_IS_UNICODE_NONCHAR(c)) {
            /* strict: forbid non-characters like U+fffe */
            c=utf8_errorValue[count];
-            if(pIsError!=NULL) {
-                *pIsError=TRUE;
-            }
-        } else {
-            /* good result */
-            if(pIsError!=NULL) {
-                *pIsError=FALSE;
-            }
        }
    } else /* too few bytes left */ {
        /* error handling */
@ -168,9 +164,10 @@ utf8_nextCharSafeBody(const uint8_t *s, int32_t *pi, int32_t length, UChar32 c,
        while((i)<(length) && UTF8_IS_TRAIL(s[i])) {
            ++(i);
        }
-        c=utf8_errorValue[i-i0];
-        if(pIsError!=NULL) {
-            *pIsError=TRUE;
+        if(strict>=0) {
+            c=utf8_errorValue[i-i0];
+        } else {
+            c=U_SENTINEL;
        }
    }
    *pi=i;
@ -178,8 +175,8 @@ utf8_nextCharSafeBody(const uint8_t *s, int32_t *pi, int32_t length, UChar32 c,
 }

 U_CAPI int32_t U_EXPORT2
-utf8_appendCharSafeBody(uint8_t *s, int32_t i, int32_t length, UChar32 c) {
-    if((c)<=0x7ff) {
+utf8_appendCharSafeBody(uint8_t *s, int32_t i, int32_t length, UChar32 c, UBool *pIsError) {
+    if((uint32_t)(c)<=0x7ff) {
        if((i)+1<(length)) {
            (s)[(i)++]=(uint8_t)(((c)>>6)|0xc0);
            (s)[(i)++]=(uint8_t)(((c)&0x3f)|0x80);
@ -187,7 +184,7 @@ utf8_appendCharSafeBody(uint8_t *s, int32_t i, int32_t length, UChar32 c) {
        }
    } else if((uint32_t)(c)<=0xffff) {
        /* Starting with Unicode 3.2, surrogate code points must not be encoded in UTF-8. */
-        if((i)+2<(length) && !UTF_IS_SURROGATE(c)) {
+        if((i)+2<(length) && !U_IS_SURROGATE(c)) {
            (s)[(i)++]=(uint8_t)(((c)>>12)|0xe0);
            (s)[(i)++]=(uint8_t)((((c)>>6)&0x3f)|0x80);
            (s)[(i)++]=(uint8_t)(((c)&0x3f)|0x80);
@ -203,18 +200,22 @@ utf8_appendCharSafeBody(uint8_t *s, int32_t i, int32_t length, UChar32 c) {
        }
    }
    /* c>0x10ffff or not enough space, write an error value */
-    length-=i;
-    if(length>0) {
-        int32_t offset;
-        if(length>3) {
-            length=3;
+    if(pIsError!=NULL) {
+        *pIsError=TRUE;
+    } else {
+        length-=i;
+        if(length>0) {
+            int32_t offset;
+            if(length>3) {
+                length=3;
+            }
+            s+=i;
+            offset=0;
+            c=utf8_errorValue[length-1];
+            UTF8_APPEND_CHAR_UNSAFE(s, offset, c);
+            i=i+offset;
        }
-        s+=i;
-        offset=0;
-        c=utf8_errorValue[length-1];
-        UTF8_APPEND_CHAR_UNSAFE(s, offset, c);
-        i=i+offset;
-     }
+    }
    return i;
 }

@ -229,7 +230,11 @@ utf8_prevCharSafeBody(const uint8_t *s, int32_t start, int32_t *pi, UChar32 c, U
    for(;;) {
        if(i<=start) {
            /* no lead byte at all */
-            c=UTF8_ERROR_VALUE_1;
+            if(strict>=0) {
+                return UTF8_ERROR_VALUE_1;
+            } else {
+                return U_SENTINEL;
+            }
            break;
        }

@ -250,7 +255,11 @@ utf8_prevCharSafeBody(const uint8_t *s, int32_t start, int32_t *pi, UChar32 c, U
                        if(count>=4) {
                            count=3;
                        }
-                        c=utf8_errorValue[count];
+                        if(strict>=0) {
+                            c=utf8_errorValue[count];
+                        } else {
+                            c=U_SENTINEL;
+                        }
                    } else {
                        /* exit with correct c */
                    }
@ -260,9 +269,17 @@ utf8_prevCharSafeBody(const uint8_t *s, int32_t start, int32_t *pi, UChar32 c, U
                       include the trail byte that we started with */
                    if(count<shouldCount) {
                        *pi=i;
-                        c=utf8_errorValue[count];
+                        if(strict>=0) {
+                            c=utf8_errorValue[count];
+                        } else {
+                            c=U_SENTINEL;
+                        }
                    } else {
-                        c=UTF8_ERROR_VALUE_1;
+                        if(strict>=0) {
+                            c=UTF8_ERROR_VALUE_1;
+                        } else {
+                            c=U_SENTINEL;
+                        }
                    }
                }
                break;
@ -273,12 +290,20 @@ utf8_prevCharSafeBody(const uint8_t *s, int32_t start, int32_t *pi, UChar32 c, U
                shift+=6;
            } else {
                /* more than 5 trail bytes is illegal */
-                c=UTF8_ERROR_VALUE_1;
+                if(strict>=0) {
+                    c=UTF8_ERROR_VALUE_1;
+                } else {
+                    c=U_SENTINEL;
+                }
                break;
            }
        } else {
            /* single-byte character precedes trailing bytes */
-            c=UTF8_ERROR_VALUE_1;
+            if(strict>=0) {
+                c=UTF8_ERROR_VALUE_1;
+            } else {
+                c=U_SENTINEL;
+            }
            break;
        }
    }
--- a/icu4c/source/test/cintltst/cucdtst.c
+++ b/icu4c/source/test/cintltst/cucdtst.c
@ -942,39 +942,39 @@ static void TestCodePoint(){
        UChar32 c=codePoint[i];
        log_verbose("Testing code unit value of \\u%4X\n", c);
        if(i<6){
-            if(!UTF_IS_SURROGATE(c)){
+            if(!UTF_IS_SURROGATE(c) || !U_IS_SURROGATE(c) || !U16_IS_SURROGATE(c)){
                log_err("ERROR: isSurrogate() failed for \\u%4X\n", c);
            }
            if(UTF_IS_VALID(c)){
                log_err("ERROR: isValid() failed for \\u%4X\n", c);
            }
-            if(UTF_IS_UNICODE_CHAR(c)){
+            if(UTF_IS_UNICODE_CHAR(c) || U_IS_UNICODE_CHAR(c)){
                log_err("ERROR: isUnicodeChar() failed for \\u%4X\n", c);
            }
            if(UTF_IS_ERROR(c)){
                log_err("ERROR: isError() failed for \\u%4X\n", c);
            }
        }else if(i >=6 && i<18){
-            if(UTF_IS_SURROGATE(c)){
+            if(UTF_IS_SURROGATE(c) || U_IS_SURROGATE(c) || U16_IS_SURROGATE(c)){
                log_err("ERROR: isSurrogate() failed for \\u%4X\n", c);
            }
            if(!UTF_IS_VALID(c)){
                log_err("ERROR: isValid() failed for \\u%4X\n", c);
            }
-            if(!UTF_IS_UNICODE_CHAR(c)){
+            if(!UTF_IS_UNICODE_CHAR(c) || !U_IS_UNICODE_CHAR(c)){
                log_err("ERROR: isUnicodeChar() failed for \\u%4X\n", c);
            }
            if(UTF_IS_ERROR(c)){
                log_err("ERROR: isError() failed for \\u%4X\n", c);
            }
        }else if(i >=18 && i<20){
-            if(UTF_IS_SURROGATE(c)){
+            if(UTF_IS_SURROGATE(c) || U_IS_SURROGATE(c) || U16_IS_SURROGATE(c)){
                log_err("ERROR: isSurrogate() failed for \\u%4X\n", c);
            }
            if(UTF_IS_VALID(c)){
                log_err("ERROR: isValid() failed for \\u%4X\n", c);
            }
-            if(!UTF_IS_UNICODE_CHAR(c)){
+            if(!UTF_IS_UNICODE_CHAR(c) || !U_IS_UNICODE_CHAR(c)){
                log_err("ERROR: isUnicodeChar() failed for \\u%4X\n", c);
            }
            if(!UTF_IS_ERROR(c)){
@ -982,13 +982,13 @@ static void TestCodePoint(){
            }
        }
        else if(i >=18 && i<(int32_t)(sizeof(codePoint)/sizeof(codePoint[0]))){
-            if(UTF_IS_SURROGATE(c)){
+            if(UTF_IS_SURROGATE(c) || U_IS_SURROGATE(c) || U16_IS_SURROGATE(c)){
                log_err("ERROR: isSurrogate() failed for \\u%4X\n", c);
            }
            if(UTF_IS_VALID(c)){
                log_err("ERROR: isValid() failed for \\u%4X\n", c);
            }
-            if(UTF_IS_UNICODE_CHAR(c)){
+            if(UTF_IS_UNICODE_CHAR(c) || U_IS_UNICODE_CHAR(c)){
                log_err("ERROR: isUnicodeChar() failed for \\u%4X\n", c);
            }
            if(!UTF_IS_ERROR(c)){
@ -1018,7 +1018,7 @@ static void TestCharLength()
    UBool multiple;
    for(i=0; i<(int32_t)(sizeof(codepoint)/sizeof(codepoint[0])); i=(int16_t)(i+2)){
        UChar32 c=codepoint[i+1];
-        if(UTF_CHAR_LENGTH(c) != codepoint[i]){
+        if(UTF_CHAR_LENGTH(c) != codepoint[i] || U16_LENGTH(c) != codepoint[i]){
            log_err("The no: of code units for \\u%4X:- Expected: %d Got: %d", c, codepoint[i], UTF_CHAR_LENGTH(c));
        }else{
            log_verbose("The no: of code units for \\u%4X is %d", c, UTF_CHAR_LENGTH(c));
@ -1457,7 +1457,6 @@ static void TestStringFunctions()

 static void TestStringSearching()
 {
-    UChar ucharBuf[255];
    const UChar testString[] = {0x0061, 0x0062, 0x0063, 0x0064, 0x0064, 0x0061, 0};
    const UChar testSurrogateString[] = {0xdbff, 0x0061, 0x0062, 0xdbff, 0xdfff, 0x0063, 0x0064, 0x0064, 0xdbff, 0xdfff, 0xdb00, 0xdf00, 0x0061, 0};
    const UChar surrMatchSet1[] = {0xdbff, 0xdfff, 0};
@ -1467,55 +1466,67 @@ static void TestStringSearching()
    const UChar surrMatchSetBad[] = {0xdbff, 0x0061, 0};
    const UChar surrMatchSetBad2[] = {0x0061, 0xdbff, 0};
    const UChar surrMatchSetBad3[] = {0xdbff, 0x0061, 0x0062, 0xdbff, 0xdfff, 0};   /* has partial surrogate */
+    const UChar
+        empty[] = { 0 },
+        a[] = { 0x61, 0 },
+        ab[] = { 0x61, 0x62, 0 },
+        ba[] = { 0x62, 0x61, 0 },
+        abcd[] = { 0x61, 0x62, 0x63, 0x64, 0 },
+        cd[] = { 0x63, 0x64, 0 },
+        dc[] = { 0x64, 0x63, 0 },
+        cdh[] = { 0x63, 0x64, 0x68, 0 },
+        f[] = { 0x66, 0 },
+        fg[] = { 0x66, 0x67, 0 },
+        gf[] = { 0x67, 0x66, 0 };

    log_verbose("Testing u_strpbrk()");

-    if (u_strpbrk(testString, u_uastrcpy(ucharBuf, "a")) != &testString[0]) {
+    if (u_strpbrk(testString, a) != &testString[0]) {
        log_err("u_strpbrk couldn't find first letter a.\n");
    }
-    if (u_strpbrk(testString, u_uastrcpy(ucharBuf, "dc")) != &testString[2]) {
+    if (u_strpbrk(testString, dc) != &testString[2]) {
        log_err("u_strpbrk couldn't find d or c.\n");
    }
-    if (u_strpbrk(testString, u_uastrcpy(ucharBuf, "cd")) != &testString[2]) {
+    if (u_strpbrk(testString, cd) != &testString[2]) {
        log_err("u_strpbrk couldn't find c or d.\n");
    }
-    if (u_strpbrk(testString, u_uastrcpy(ucharBuf, "cdh")) != &testString[2]) {
+    if (u_strpbrk(testString, cdh) != &testString[2]) {
        log_err("u_strpbrk couldn't find c, d or h.\n");
    }
-    if (u_strpbrk(testString, u_uastrcpy(ucharBuf, "f")) != NULL) {
+    if (u_strpbrk(testString, f) != NULL) {
        log_err("u_strpbrk didn't return NULL for \"f\".\n");
    }
-    if (u_strpbrk(testString, u_uastrcpy(ucharBuf, "fg")) != NULL) {
+    if (u_strpbrk(testString, fg) != NULL) {
        log_err("u_strpbrk didn't return NULL for \"fg\".\n");
    }
-    if (u_strpbrk(testString, u_uastrcpy(ucharBuf, "gf")) != NULL) {
+    if (u_strpbrk(testString, gf) != NULL) {
        log_err("u_strpbrk didn't return NULL for \"gf\".\n");
    }
-    if (u_strpbrk(testString, u_uastrcpy(ucharBuf, "")) != NULL) {
+    if (u_strpbrk(testString, empty) != NULL) {
        log_err("u_strpbrk didn't return NULL for \"\".\n");
    }

    log_verbose("Testing u_strpbrk() with surrogates");

-    if (u_strpbrk(testSurrogateString, u_uastrcpy(ucharBuf, "a")) != &testSurrogateString[1]) {
+    if (u_strpbrk(testSurrogateString, a) != &testSurrogateString[1]) {
        log_err("u_strpbrk couldn't find first letter a.\n");
    }
-    if (u_strpbrk(testSurrogateString, u_uastrcpy(ucharBuf, "dc")) != &testSurrogateString[5]) {
+    if (u_strpbrk(testSurrogateString, dc) != &testSurrogateString[5]) {
        log_err("u_strpbrk couldn't find d or c.\n");
    }
-    if (u_strpbrk(testSurrogateString, u_uastrcpy(ucharBuf, "cd")) != &testSurrogateString[5]) {
+    if (u_strpbrk(testSurrogateString, cd) != &testSurrogateString[5]) {
        log_err("u_strpbrk couldn't find c or d.\n");
    }
-    if (u_strpbrk(testSurrogateString, u_uastrcpy(ucharBuf, "cdh")) != &testSurrogateString[5]) {
+    if (u_strpbrk(testSurrogateString, cdh) != &testSurrogateString[5]) {
        log_err("u_strpbrk couldn't find c, d or h.\n");
    }
-    if (u_strpbrk(testSurrogateString, u_uastrcpy(ucharBuf, "f")) != NULL) {
+    if (u_strpbrk(testSurrogateString, f) != NULL) {
        log_err("u_strpbrk didn't return NULL for \"f\".\n");
    }
-    if (u_strpbrk(testSurrogateString, u_uastrcpy(ucharBuf, "fg")) != NULL) {
+    if (u_strpbrk(testSurrogateString, fg) != NULL) {
        log_err("u_strpbrk didn't return NULL for \"fg\".\n");
    }
-    if (u_strpbrk(testSurrogateString, u_uastrcpy(ucharBuf, "gf")) != NULL) {
+    if (u_strpbrk(testSurrogateString, gf) != NULL) {
        log_err("u_strpbrk didn't return NULL for \"gf\".\n");
    }
    if (u_strpbrk(testSurrogateString, surrMatchSet1) != &testSurrogateString[3]) {
@ -1536,49 +1547,49 @@ static void TestStringSearching()

    log_verbose("Testing u_strcspn()");

-    if (u_strcspn(testString, u_uastrcpy(ucharBuf, "a")) != 0) {
+    if (u_strcspn(testString, a) != 0) {
        log_err("u_strcspn couldn't find first letter a.\n");
    }
-    if (u_strcspn(testString, u_uastrcpy(ucharBuf, "dc")) != 2) {
+    if (u_strcspn(testString, dc) != 2) {
        log_err("u_strcspn couldn't find d or c.\n");
    }
-    if (u_strcspn(testString, u_uastrcpy(ucharBuf, "cd")) != 2) {
+    if (u_strcspn(testString, cd) != 2) {
        log_err("u_strcspn couldn't find c or d.\n");
    }
-    if (u_strcspn(testString, u_uastrcpy(ucharBuf, "cdh")) != 2) {
+    if (u_strcspn(testString, cdh) != 2) {
        log_err("u_strcspn couldn't find c, d or h.\n");
    }
-    if (u_strcspn(testString, u_uastrcpy(ucharBuf, "f")) != u_strlen(testString)) {
+    if (u_strcspn(testString, f) != u_strlen(testString)) {
        log_err("u_strcspn didn't return NULL for \"f\".\n");
    }
-    if (u_strcspn(testString, u_uastrcpy(ucharBuf, "fg")) != u_strlen(testString)) {
+    if (u_strcspn(testString, fg) != u_strlen(testString)) {
        log_err("u_strcspn didn't return NULL for \"fg\".\n");
    }
-    if (u_strcspn(testString, u_uastrcpy(ucharBuf, "gf")) != u_strlen(testString)) {
+    if (u_strcspn(testString, gf) != u_strlen(testString)) {
        log_err("u_strcspn didn't return NULL for \"gf\".\n");
    }

    log_verbose("Testing u_strcspn() with surrogates");

-    if (u_strcspn(testSurrogateString, u_uastrcpy(ucharBuf, "a")) != 1) {
+    if (u_strcspn(testSurrogateString, a) != 1) {
        log_err("u_strcspn couldn't find first letter a.\n");
    }
-    if (u_strcspn(testSurrogateString, u_uastrcpy(ucharBuf, "dc")) != 5) {
+    if (u_strcspn(testSurrogateString, dc) != 5) {
        log_err("u_strcspn couldn't find d or c.\n");
    }
-    if (u_strcspn(testSurrogateString, u_uastrcpy(ucharBuf, "cd")) != 5) {
+    if (u_strcspn(testSurrogateString, cd) != 5) {
        log_err("u_strcspn couldn't find c or d.\n");
    }
-    if (u_strcspn(testSurrogateString, u_uastrcpy(ucharBuf, "cdh")) != 5) {
+    if (u_strcspn(testSurrogateString, cdh) != 5) {
        log_err("u_strcspn couldn't find c, d or h.\n");
    }
-    if (u_strcspn(testSurrogateString, u_uastrcpy(ucharBuf, "f")) != u_strlen(testSurrogateString)) {
+    if (u_strcspn(testSurrogateString, f) != u_strlen(testSurrogateString)) {
        log_err("u_strcspn didn't return NULL for \"f\".\n");
    }
-    if (u_strcspn(testSurrogateString, u_uastrcpy(ucharBuf, "fg")) != u_strlen(testSurrogateString)) {
+    if (u_strcspn(testSurrogateString, fg) != u_strlen(testSurrogateString)) {
        log_err("u_strcspn didn't return NULL for \"fg\".\n");
    }
-    if (u_strcspn(testSurrogateString, u_uastrcpy(ucharBuf, "gf")) != u_strlen(testSurrogateString)) {
+    if (u_strcspn(testSurrogateString, gf) != u_strlen(testSurrogateString)) {
        log_err("u_strcspn didn't return NULL for \"gf\".\n");
    }
    if (u_strcspn(testSurrogateString, surrMatchSet1) != 3) {
@ -1597,25 +1608,25 @@ static void TestStringSearching()

    log_verbose("Testing u_strspn()");

-    if (u_strspn(testString, u_uastrcpy(ucharBuf, "a")) != 1) {
+    if (u_strspn(testString, a) != 1) {
        log_err("u_strspn couldn't skip first letter a.\n");
    }
-    if (u_strspn(testString, u_uastrcpy(ucharBuf, "ab")) != 2) {
+    if (u_strspn(testString, ab) != 2) {
        log_err("u_strspn couldn't skip a or b.\n");
    }
-    if (u_strspn(testString, u_uastrcpy(ucharBuf, "ba")) != 2) {
+    if (u_strspn(testString, ba) != 2) {
        log_err("u_strspn couldn't skip a or b.\n");
    }
-    if (u_strspn(testString, u_uastrcpy(ucharBuf, "f")) != 0) {
+    if (u_strspn(testString, f) != 0) {
        log_err("u_strspn didn't return 0 for \"f\".\n");
    }
-    if (u_strspn(testString, u_uastrcpy(ucharBuf, "dc")) != 0) {
+    if (u_strspn(testString, dc) != 0) {
        log_err("u_strspn couldn't find first letter a (skip d or c).\n");
    }
-    if (u_strspn(testString, u_uastrcpy(ucharBuf, "abcd")) != u_strlen(testString)) {
+    if (u_strspn(testString, abcd) != u_strlen(testString)) {
        log_err("u_strspn couldn't skip over the whole string.\n");
    }
-    if (u_strspn(testString, u_uastrcpy(ucharBuf, "")) != 0) {
+    if (u_strspn(testString, empty) != 0) {
        log_err("u_strspn should have returned 0 for empty string.\n");
    }

@ -1626,13 +1637,13 @@ static void TestStringSearching()
    if (u_strspn(testSurrogateString, surrMatchSetBad2) != 2) {
        log_err("u_strspn couldn't skip 0xdbff or a.\n");
    }
-    if (u_strspn(testSurrogateString, u_uastrcpy(ucharBuf, "f")) != 0) {
+    if (u_strspn(testSurrogateString, f) != 0) {
        log_err("u_strspn couldn't skip d or c (skip first letter).\n");
    }
-    if (u_strspn(testSurrogateString, u_uastrcpy(ucharBuf, "dc")) != 0) {
+    if (u_strspn(testSurrogateString, dc) != 0) {
        log_err("u_strspn couldn't skip d or c (skip first letter).\n");
    }
-    if (u_strspn(testSurrogateString, u_uastrcpy(ucharBuf, "cd")) != 0) {
+    if (u_strspn(testSurrogateString, cd) != 0) {
        log_err("u_strspn couldn't skip d or c (skip first letter).\n");
    }
    if (u_strspn(testSurrogateString, testSurrogateString) != u_strlen(testSurrogateString)) {
--- a/icu4c/source/test/cintltst/utf16tst.c
+++ b/icu4c/source/test/cintltst/utf16tst.c
@ -21,6 +21,7 @@
 #include "cintltst.h"
 #include <stdio.h>

+#define LENGTHOF(array) (sizeof(array)/sizeof((array)[0]))

 static void printUChars(const UChar *uchars);

@ -31,6 +32,7 @@ static void TestNextPrevChar(void);
 static void TestFwdBack(void);
 static void TestSetChar(void);
 static void TestAppendChar(void);
+static void TestAppend(void);
 static void TestSurrogate(void);

 void addUTF16Test(TestNode** root);
@ -45,6 +47,7 @@ addUTF16Test(TestNode** root)
  addTest(root, &TestFwdBack,           "utf16tst/TestFwdBack"       );
  addTest(root, &TestSetChar,           "utf16tst/TestSetChar"       );
  addTest(root, &TestAppendChar,        "utf16tst/TestAppendChar"    );
+  addTest(root, &TestAppend,            "utf8tst/TestAppend"         );
  addTest(root, &TestSurrogate,         "utf16tst/TestSurrogate"     );
 }

@ -57,17 +60,17 @@ static void TestCodeUnitValues()
        UChar c=codeunit[i];
        log_verbose("Testing code unit value of %x\n", c);
        if(i<4){
-            if(!UTF16_IS_SINGLE(c) || UTF16_IS_LEAD(c) || UTF16_IS_TRAIL(c)){
+            if(!UTF16_IS_SINGLE(c) || UTF16_IS_LEAD(c) || UTF16_IS_TRAIL(c) || !U16_IS_SINGLE(c) || U16_IS_LEAD(c) || U16_IS_TRAIL(c)){
                log_err("ERROR: %x is a single character\n", c);
            }
        }
        if(i >= 4 && i< 8){
-            if(!UTF16_IS_LEAD(c) || UTF16_IS_SINGLE(c) || UTF16_IS_TRAIL(c)){
+            if(!UTF16_IS_LEAD(c) || UTF16_IS_SINGLE(c) || UTF16_IS_TRAIL(c) || !U16_IS_LEAD(c) || U16_IS_SINGLE(c) || U16_IS_TRAIL(c)){
                log_err("ERROR: %x is a first surrogate\n", c);
            }
        }
        if(i >= 8 && i< 12){
-            if(!UTF16_IS_TRAIL(c) || UTF16_IS_SINGLE(c) || UTF16_IS_LEAD(c)){
+            if(!UTF16_IS_TRAIL(c) || UTF16_IS_SINGLE(c) || UTF16_IS_LEAD(c) || !U16_IS_TRAIL(c) || U16_IS_SINGLE(c) || U16_IS_LEAD(c)){
                log_err("ERROR: %x is a second surrogate\n", c);
            }
        }
@ -93,7 +96,7 @@ static void TestCharLength()
    UBool multiple;
    for(i=0; i<sizeof(codepoint)/sizeof(codepoint[0]); i=(int16_t)(i+2)){
        UChar32 c=codepoint[i+1];
-        if(UTF16_CHAR_LENGTH(c) != (uint16_t)codepoint[i]){
+        if(UTF16_CHAR_LENGTH(c) != (uint16_t)codepoint[i] || U16_LENGTH(c) != (uint16_t)codepoint[i]){
              log_err("The no: of code units for %lx:- Expected: %d Got: %d\n", c, codepoint[i], UTF16_CHAR_LENGTH(c));
        }else{
              log_verbose("The no: of code units for %lx is %d\n",c, UTF16_CHAR_LENGTH(c) ); 
@ -150,11 +153,23 @@ static void TestGetChar()
            if(c != result[i]){
                log_err("ERROR: UTF16_GET_CHAR_UNSAFE failed for offset=%ld. Expected:%lx Got:%lx\n", offset, result[i], c);
            }
+
+            U16_GET_UNSAFE(input, offset, c);
+            if(c != result[i]){
+                log_err("ERROR: U16_GET_CHAR_UNSAFE failed for offset=%ld. Expected:%lx Got:%lx\n", offset, result[i], c);
+            }
        }
+
        UTF16_GET_CHAR_SAFE(input, 0, offset, sizeof(input)/U_SIZEOF_UCHAR, c, FALSE);
        if(c != result[i+1]){
            log_err("ERROR: UTF16_GET_CHAR_SAFE failed for offset=%ld. Expected:%lx Got:%lx\n", offset, result[i+1], c);
        }
+
+        U16_GET(input, 0, offset, sizeof(input)/U_SIZEOF_UCHAR, c);
+        if(c != result[i+1]){
+            log_err("ERROR: U16_GET failed for offset=%ld. Expected:%lx Got:%lx\n", offset, result[i+1], c);
+        }
+
        UTF16_GET_CHAR_SAFE(input, 0, offset, sizeof(input)/U_SIZEOF_UCHAR, c, TRUE);
        if(c != result[i+2]){
            log_err("ERROR: UTF16_GET_CHAR_SAFE(strict) failed for offset=%ld. Expected:%lx Got:%lx\n", offset, result[i+2], c);
@ -213,6 +228,16 @@ static void TestNextPrevChar(){
             log_err("ERROR: UTF16_NEXT_CHAR_UNSAFE failed for offset=%ld. Expected:%lx Got:%lx\n", offset, result[i], c);
         }

+         setOffset=offset;
+         U16_NEXT_UNSAFE(input, setOffset, c);
+         if(setOffset != movedOffset[i]){
+             log_err("ERROR: U16_NEXT_CHAR_UNSAFE failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n",
+                 offset, movedOffset[i], setOffset);
+         }
+         if(c != result[i]){
+             log_err("ERROR: U16_NEXT_CHAR_UNSAFE failed for offset=%ld. Expected:%lx Got:%lx\n", offset, result[i], c);
+         }
+
         setOffset=offset;
         UTF16_NEXT_CHAR_SAFE(input, setOffset, sizeof(input)/U_SIZEOF_UCHAR, c, FALSE);
         if(setOffset != movedOffset[i+1]){
@ -223,6 +248,16 @@ static void TestNextPrevChar(){
             log_err("ERROR: UTF16_NEXT_CHAR_SAFE failed for input=%ld. Expected:%lx Got:%lx\n", offset, result[i+1], c);
         }

+         setOffset=offset;
+         U16_NEXT(input, setOffset, sizeof(input)/U_SIZEOF_UCHAR, c);
+         if(setOffset != movedOffset[i+1]){
+             log_err("ERROR: U16_NEXT failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n",
+                 offset, movedOffset[i+1], setOffset);
+         }
+         if(c != result[i+1]){
+             log_err("ERROR: U16_NEXT failed for input=%ld. Expected:%lx Got:%lx\n", offset, result[i+1], c);
+         }
+
         setOffset=offset;
         UTF16_NEXT_CHAR_SAFE(input, setOffset, sizeof(input)/U_SIZEOF_UCHAR, c, TRUE);
         if(setOffset != movedOffset[i+1]){
@ -247,6 +282,16 @@ static void TestNextPrevChar(){
             log_err("ERROR: UTF16_PREV_CHAR_UNSAFE failed for offset=%ld. Expected:%lx Got:%lx\n", offset, result[i+3], c);
         }

+         setOffset=offset;
+         U16_PREV_UNSAFE(input, setOffset, c);
+         if(setOffset != movedOffset[i+3]){
+             log_err("ERROR: U16_PREV_CHAR_UNSAFE failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n",
+                 offset, movedOffset[i+3], setOffset);
+         }
+         if(c != result[i+3]){
+             log_err("ERROR: U16_PREV_CHAR_UNSAFE failed for offset=%ld. Expected:%lx Got:%lx\n", offset, result[i+3], c);
+         }
+
         setOffset=offset;
         UTF16_PREV_CHAR_SAFE(input, 0, setOffset, c, FALSE);
         if(setOffset != movedOffset[i+4]){
@ -257,6 +302,16 @@ static void TestNextPrevChar(){
             log_err("ERROR: UTF16_PREV_CHAR_SAFE failed for input=%ld. Expected:%lx Got:%lx\n", offset, result[i+4], c);
         }

+         setOffset=offset;
+         U16_PREV(input, 0, setOffset, c);
+         if(setOffset != movedOffset[i+4]){
+             log_err("ERROR: U16_PREV failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n",
+                 offset, movedOffset[i+4], setOffset);
+         }
+         if(c != result[i+4]){
+             log_err("ERROR: U16_PREV failed for input=%ld. Expected:%lx Got:%lx\n", offset, result[i+4], c);
+         }
+
         setOffset=offset;
         UTF16_PREV_CHAR_SAFE(input, 0,  setOffset, c, TRUE);
         if(setOffset != movedOffset[i+5]){
@ -285,7 +340,6 @@ static void TestFwdBack(){
    static uint16_t back_N_unsafe[]={12, 11, 8, 5, 3};
    static uint16_t back_N_safe[]  ={12, 11, 8, 5, 3, 0, 0};   

-
    uint16_t offunsafe=0, offsafe=0;
    uint16_t i=0;
    while(offunsafe < sizeof(input)/U_SIZEOF_UCHAR){
@ -295,6 +349,17 @@ static void TestFwdBack(){
        }
        i++;
    }
+
+    offunsafe=0, offsafe=0;
+    i=0;
+    while(offunsafe < sizeof(input)/U_SIZEOF_UCHAR){
+        U16_FWD_1_UNSAFE(input, offunsafe);
+        if(offunsafe != fwd_unsafe[i]){
+            log_err("ERROR: U16_FWD_1_UNSAFE offset expected:%d, Got:%d\n", fwd_unsafe[i], offunsafe);
+        }
+        i++;
+    }
+
    i=0;
    while(offsafe < sizeof(input)/U_SIZEOF_UCHAR){
        UTF16_FWD_1_SAFE(input, offsafe, sizeof(input)/U_SIZEOF_UCHAR);
@ -303,6 +368,16 @@ static void TestFwdBack(){
        }
        i++;
    }
+
+    i=0;
+    while(offsafe < sizeof(input)/U_SIZEOF_UCHAR){
+        U16_FWD_1(input, offsafe, sizeof(input)/U_SIZEOF_UCHAR);
+        if(offsafe != fwd_safe[i]){
+            log_err("ERROR: U16_FWD_1 offset expected:%d, Got:%d\n", fwd_safe[i], offsafe);
+        }
+        i++;
+    }
+
    offunsafe=sizeof(input)/U_SIZEOF_UCHAR;
    offsafe=sizeof(input)/U_SIZEOF_UCHAR;
    i=0;
@ -313,6 +388,18 @@ static void TestFwdBack(){
        }
        i++;
    }
+
+    offunsafe=sizeof(input)/U_SIZEOF_UCHAR;
+    offsafe=sizeof(input)/U_SIZEOF_UCHAR;
+    i=0;
+    while(offunsafe > 0){
+        U16_BACK_1_UNSAFE(input, offunsafe);
+        if(offunsafe != back_unsafe[i]){
+            log_err("ERROR: U16_BACK_1_UNSAFE offset expected:%d, Got:%d\n", back_unsafe[i], offunsafe);
+        }
+        i++;
+    }
+
    i=0;
    while(offsafe > 0){
        UTF16_BACK_1_SAFE(input,0,  offsafe);
@ -321,6 +408,16 @@ static void TestFwdBack(){
        }
        i++;
    }
+
+    i=0;
+    while(offsafe > 0){
+        U16_BACK_1(input,0,  offsafe);
+        if(offsafe != back_safe[i]){
+            log_err("ERROR: U16_BACK_1 offset expected:%d, Got:%d\n", back_unsafe[i], offsafe);
+        }
+        i++;
+    }
+
    offunsafe=0;
    offsafe=0;
    for(i=0; i<sizeof(Nvalue)/sizeof(Nvalue[0])-2; i++){  /*didn't want it to fail(we assume 0<i<length)*/
@ -329,6 +426,16 @@ static void TestFwdBack(){
            log_err("ERROR: Forward_N_unsafe offset expected:%d, Got:%d\n", fwd_N_unsafe[i], offunsafe);
        }
    }
+
+    offunsafe=0;
+    for(i=0; i<sizeof(Nvalue)/sizeof(Nvalue[0])-2; i++){  /*didn't want it to fail(we assume 0<i<length)*/
+        U16_FWD_N_UNSAFE(input, offunsafe, Nvalue[i]);
+        if(offunsafe != fwd_N_unsafe[i]){
+            log_err("ERROR: U16_FWD_N_UNSAFE offset expected:%d, Got:%d\n", fwd_N_unsafe[i], offunsafe);
+        }
+    }
+
+    offsafe=0;
    for(i=0; i<sizeof(Nvalue)/sizeof(Nvalue[0]); i++){
        UTF16_FWD_N_SAFE(input, offsafe, sizeof(input)/U_SIZEOF_UCHAR, Nvalue[i]);
        if(offsafe != fwd_N_safe[i]){
@ -336,20 +443,47 @@ static void TestFwdBack(){
        }
    
    }
+
+    offsafe=0;
+    for(i=0; i<sizeof(Nvalue)/sizeof(Nvalue[0]); i++){
+        U16_FWD_N(input, offsafe, sizeof(input)/U_SIZEOF_UCHAR, Nvalue[i]);
+        if(offsafe != fwd_N_safe[i]){
+            log_err("ERROR: U16_FWD_N offset expected:%d, Got:%d\n", fwd_N_safe[i], offsafe);
+        }
+    
+    }
+
    offunsafe=sizeof(input)/U_SIZEOF_UCHAR;
-    offsafe=sizeof(input)/U_SIZEOF_UCHAR;
    for(i=0; i<sizeof(Nvalue)/sizeof(Nvalue[0])-2; i++){
        UTF16_BACK_N_UNSAFE(input, offunsafe, Nvalue[i]);
        if(offunsafe != back_N_unsafe[i]){
            log_err("ERROR: backward_N_unsafe offset expected:%d, Got:%d\n", back_N_unsafe[i], offunsafe);
        }
    }
+
+    offunsafe=sizeof(input)/U_SIZEOF_UCHAR;
+    for(i=0; i<sizeof(Nvalue)/sizeof(Nvalue[0])-2; i++){
+        U16_BACK_N_UNSAFE(input, offunsafe, Nvalue[i]);
+        if(offunsafe != back_N_unsafe[i]){
+            log_err("ERROR: U16_BACK_N_UNSAFE offset expected:%d, Got:%d\n", back_N_unsafe[i], offunsafe);
+        }
+    }
+
+    offsafe=sizeof(input)/U_SIZEOF_UCHAR;
    for(i=0; i<sizeof(Nvalue)/sizeof(Nvalue[0]); i++){
        UTF16_BACK_N_SAFE(input, 0, offsafe, Nvalue[i]);
        if(offsafe != back_N_safe[i]){
            log_err("ERROR: backward_N_safe offset expected:%d, Got:%d\n", back_N_safe[i], offsafe);
        }
    }
+
+    offsafe=sizeof(input)/U_SIZEOF_UCHAR;
+    for(i=0; i<sizeof(Nvalue)/sizeof(Nvalue[0]); i++){
+        U16_BACK_N(input, 0, offsafe, Nvalue[i]);
+        if(offsafe != back_N_safe[i]){
+            log_err("ERROR: U16_BACK_N offset expected:%d, Got:%d\n", back_N_safe[i], offsafe);
+        }
+    }
 }

 static void TestSetChar(){
@ -367,23 +501,45 @@ static void TestSetChar(){
         if(setOffset != start_unsafe[i]){
             log_err("ERROR: UTF16_SET_CHAR_START_UNSAFE failed for offset=%ld. Expected:%lx Got:%lx\n", offset, start_unsafe[i], setOffset);
         }
+
+         setOffset=offset;
+         U16_SET_CP_START_UNSAFE(input, setOffset);
+         if(setOffset != start_unsafe[i]){
+             log_err("ERROR: U16_SET_CHAR_START_UNSAFE failed for offset=%ld. Expected:%lx Got:%lx\n", offset, start_unsafe[i], setOffset);
+         }
+
         setOffset=offset;
         UTF16_SET_CHAR_START_SAFE(input, 0, setOffset);
         if(setOffset != start_safe[i]){
             log_err("ERROR: UTF16_SET_CHAR_START_SAFE failed for offset=%ld. Expected:%lx Got:%lx\n", offset, start_safe[i], setOffset);
         }
+
+         setOffset=offset;
+         U16_SET_CP_START(input, 0, setOffset);
+         if(setOffset != start_safe[i]){
+             log_err("ERROR: U16_SET_CHAR_START failed for offset=%ld. Expected:%lx Got:%lx\n", offset, start_safe[i], setOffset);
+         }
+
         if (offset > 0) {
             setOffset=offset;
             UTF16_SET_CHAR_LIMIT_UNSAFE(input, setOffset);
             if(setOffset != limit_unsafe[i]){
                 log_err("ERROR: UTF16_SET_CHAR_LIMIT_UNSAFE failed for offset=%ld. Expected:%lx Got:%lx\n", offset, limit_unsafe[i], setOffset);
             }
+
+             setOffset=offset;
+             U16_SET_CP_LIMIT_UNSAFE(input, setOffset);
+             if(setOffset != limit_unsafe[i]){
+                 log_err("ERROR: U16_SET_CHAR_LIMIT_UNSAFE failed for offset=%ld. Expected:%lx Got:%lx\n", offset, limit_unsafe[i], setOffset);
+             }
         }
+
         setOffset=offset; 
-         UTF16_SET_CHAR_LIMIT_SAFE(input,0, setOffset, sizeof(input)/U_SIZEOF_UCHAR);
+         U16_SET_CP_LIMIT(input,0, setOffset, sizeof(input)/U_SIZEOF_UCHAR);
         if(setOffset != limit_safe[i]){
-             log_err("ERROR: UTF16_SET_CHAR_LIMIT_SAFE failed for offset=%ld. Expected:%lx Got:%lx\n", offset, limit_safe[i], setOffset);
+             log_err("ERROR: U16_SET_CHAR_LIMIT failed for offset=%ld. Expected:%lx Got:%lx\n", offset, limit_safe[i], setOffset);
         }
+
         i++;
    }
 }
@ -487,6 +643,67 @@ static void TestAppendChar(){

 }

+static void TestAppend() {
+    static const UChar32 codePoints[]={
+        0x61, 0xdf, 0x901, 0x3040,
+        0xac00, 0xd800, 0xdbff, 0xdcde,
+        0xdffd, 0xe000, 0xffff, 0x10000,
+        0x12345, 0xe0021, 0x10ffff, 0x110000,
+        0x234567, 0x7fffffff, -1, -1000,
+        0, 0x400
+    };
+    static const UChar expectUnsafe[]={
+        0x61, 0xdf, 0x901, 0x3040,
+        0xac00, 0xd800, 0xdbff, 0xdcde,
+        0xdffd, 0xe000, 0xffff, 0xd800, 0xdc00,
+        0xd848, 0xdf45, 0xdb40, 0xdc21, 0xdbff, 0xdfff, /* not 0x110000 */
+        /* none from this line */
+        0, 0x400
+    }, expectSafe[]={
+        0x61, 0xdf, 0x901, 0x3040,
+        0xac00, 0xd800, 0xdbff, 0xdcde,
+        0xdffd, 0xe000, 0xffff, 0xd800, 0xdc00,
+        0xd848, 0xdf45, 0xdb40, 0xdc21, 0xdbff, 0xdfff, /* not 0x110000 */
+        /* none from this line */
+        0, 0x400
+    };
+
+    UChar buffer[100];
+    UChar32 c;
+    int32_t i, length;
+    UBool isError, expectIsError, wrongIsError;
+
+    length=0;
+    for(i=0; i<LENGTHOF(codePoints); ++i) {
+        c=codePoints[i];
+        if(c<0 || 0x10ffff<c) {
+            continue; /* skip non-code points for U16_APPEND_UNSAFE */
+        }
+
+        U16_APPEND_UNSAFE(buffer, length, c);
+    }
+    if(length!=LENGTHOF(expectUnsafe) || 0!=memcmp(buffer, expectUnsafe, length*U_SIZEOF_UCHAR)) {
+        log_err("U16_APPEND_UNSAFE did not generate the expected output\n");
+    }
+
+    length=0;
+    wrongIsError=FALSE;
+    for(i=0; i<LENGTHOF(codePoints); ++i) {
+        c=codePoints[i];
+        expectIsError= c<0 || 0x10ffff<c || U_IS_SURROGATE(c);
+        isError=FALSE;
+
+        U16_APPEND(buffer, length, LENGTHOF(buffer), c, isError);
+        wrongIsError|= isError!=expectIsError;
+    }
+    if(wrongIsError) {
+        log_err("U16_APPEND did not set isError correctly\n");
+    }
+    if(length!=LENGTHOF(expectSafe) || 0!=memcmp(buffer, expectSafe, length*U_SIZEOF_UCHAR)) {
+        log_err("U16_APPEND did not generate the expected output\n");
+    }
+}
+
 static void TestSurrogate(){
    static UChar32 s[] = {0x10000, 0x10ffff, 0x50000, 0x100000, 0x1abcd};
    int i = 0;
@ -497,11 +714,11 @@ static void TestSurrogate(){
        UChar firstresult  = (UChar)(((s[i] - 0x10000) / 0x400) + 0xD800);
        UChar secondresult = (UChar)(((s[i] - 0x10000) % 0x400) + 0xDC00);

-        if (first != UTF16_LEAD(s[i]) || first != firstresult) {
+        if (first != UTF16_LEAD(s[i]) || first != U16_LEAD(s[i]) || first != firstresult) {
            log_err("Failure in first surrogate in 0x%x expected to be 0x%x\n",
                    s[i], firstresult);
        }
-        if (second != UTF16_TRAIL(s[i]) || second != secondresult) {
+        if (second != UTF16_TRAIL(s[i]) || second != U16_TRAIL(s[i]) || second != secondresult) {
            log_err("Failure in second surrogate in 0x%x expected to be 0x%x\n",
                    s[i], secondresult);
        }
@ -515,6 +732,3 @@ static void printUChars(const UChar *uchars){
        printf("%x ", *(uchars+i));
    }
 }
-
-
-
--- a/icu4c/source/test/cintltst/utf8tst.c
+++ b/icu4c/source/test/cintltst/utf8tst.c
@ -18,6 +18,7 @@
 #include "cmemory.h"
 #include "cintltst.h"

+#define LENGTHOF(array) (sizeof(array)/sizeof((array)[0]))

 static void printUChars(const uint8_t *uchars, int16_t len);

@ -28,6 +29,7 @@ static void TestNextPrevChar(void);
 static void TestFwdBack(void);
 static void TestSetChar(void);
 static void TestAppendChar(void);
+static void TestAppend(void);

 void addUTF8Test(TestNode** root);

@ -41,6 +43,7 @@ addUTF8Test(TestNode** root)
  addTest(root, &TestFwdBack,           "utf8tst/TestFwdBack"       );
  addTest(root, &TestSetChar,           "utf8tst/TestSetChar"       );
  addTest(root, &TestAppendChar,        "utf8tst/TestAppendChar"    );
+  addTest(root, &TestAppend,            "utf8tst/TestAppend"        );
 }

 static void TestCodeUnitValues()
@ -52,17 +55,17 @@ static void TestCodeUnitValues()
        uint8_t c=codeunit[i];
        log_verbose("Testing code unit value of %x\n", c);
        if(i<4){
-            if(!UTF8_IS_SINGLE(c) || UTF8_IS_LEAD(c) || UTF8_IS_TRAIL(c)){
+            if(!UTF8_IS_SINGLE(c) || UTF8_IS_LEAD(c) || UTF8_IS_TRAIL(c) || !U8_IS_SINGLE(c) || U8_IS_LEAD(c) || U8_IS_TRAIL(c)){
                log_err("ERROR: 0x%02x is a single byte but results in single: %c lead: %c trail: %c\n",
                    c, UTF8_IS_SINGLE(c) ? 'y' : 'n', UTF8_IS_LEAD(c) ? 'y' : 'n', UTF8_IS_TRAIL(c) ? 'y' : 'n');
            }
        } else if(i< 8){
-            if(!UTF8_IS_LEAD(c) || UTF8_IS_SINGLE(c) || UTF8_IS_TRAIL(c)){
+            if(!UTF8_IS_LEAD(c) || UTF8_IS_SINGLE(c) || UTF8_IS_TRAIL(c) || !U8_IS_LEAD(c) || U8_IS_SINGLE(c) || U8_IS_TRAIL(c)){
                log_err("ERROR: 0x%02x is a lead byte but results in single: %c lead: %c trail: %c\n",
                    c, UTF8_IS_SINGLE(c) ? 'y' : 'n', UTF8_IS_LEAD(c) ? 'y' : 'n', UTF8_IS_TRAIL(c) ? 'y' : 'n');
            }
        } else if(i< 12){
-            if(!UTF8_IS_TRAIL(c) || UTF8_IS_SINGLE(c) || UTF8_IS_LEAD(c)){
+            if(!UTF8_IS_TRAIL(c) || UTF8_IS_SINGLE(c) || UTF8_IS_LEAD(c) || !U8_IS_TRAIL(c) || U8_IS_SINGLE(c) || U8_IS_LEAD(c)){
                log_err("ERROR: 0x%02x is a trail byte but results in single: %c lead: %c trail: %c\n",
                    c, UTF8_IS_SINGLE(c) ? 'y' : 'n', UTF8_IS_LEAD(c) ? 'y' : 'n', UTF8_IS_TRAIL(c) ? 'y' : 'n');
            }
@ -93,7 +96,7 @@ static void TestCharLength()
    UBool multiple;
    for(i=0; i<sizeof(codepoint)/sizeof(codepoint[0]); i=(int16_t)(i+2)){
        UChar32 c=codepoint[i+1];
-        if(UTF8_CHAR_LENGTH(c) != (uint16_t)codepoint[i]){
+        if(UTF8_CHAR_LENGTH(c) != (uint16_t)codepoint[i] || U8_LENGTH(c) != (uint16_t)codepoint[i]){
              log_err("The no: of code units for %lx:- Expected: %d Got: %d\n", c, codepoint[i], UTF8_CHAR_LENGTH(c));
        }else{
              log_verbose("The no: of code units for %lx is %d\n",c, UTF8_CHAR_LENGTH(c) ); 
@ -152,11 +155,24 @@ static void TestGetChar()
                log_err("ERROR: UTF8_GET_CHAR_UNSAFE failed for offset=%ld. Expected:%lx Got:%lx\n", offset, result[i], c);
           
            }
+
+            U8_GET_UNSAFE(input, offset, c);
+            if(c != result[i]){
+                log_err("ERROR: U8_GET_UNSAFE failed for offset=%ld. Expected:%lx Got:%lx\n", offset, result[i], c);
+           
+            }
        }
+
+        U8_GET(input, 0, offset, sizeof(input), c);
+        if(UTF_IS_ERROR(result[i+1]) ? c >= 0 : c != result[i+1]){
+            log_err("ERROR: UTF8_GET_CHAR_SAFE failed for offset=%ld. Expected:%lx Got:%lx\n", offset, result[i+1], c);
+        }
+
        UTF8_GET_CHAR_SAFE(input, 0, offset, sizeof(input), c, FALSE);
        if(c != result[i+1]){
            log_err("ERROR: UTF8_GET_CHAR_SAFE failed for offset=%ld. Expected:%lx Got:%lx\n", offset, result[i+1], c);
        }
+
        UTF8_GET_CHAR_SAFE(input, 0, offset, sizeof(input), c, TRUE);
        if(c != result[i+2]){
            log_err("ERROR: UTF8_GET_CHAR_SAFE(strict) failed for offset=%ld. Expected:%lx Got:%lx\n", offset, result[i+2], c);
@ -225,7 +241,18 @@ static void TestNextPrevChar(){
             if(c != result[i]){
                 log_err("ERROR: UTF8_NEXT_CHAR_UNSAFE failed for offset=%ld. Expected:%lx Got:%lx\n", offset, result[i], c);
             }
+
+             setOffset=offset;
+             U8_NEXT_UNSAFE(input, setOffset, c);
+             if(setOffset != movedOffset[i]){
+                 log_err("ERROR: U8_NEXT_UNSAFE failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n",
+                     offset, movedOffset[i], setOffset);
+             }
+             if(c != result[i]){
+                 log_err("ERROR: U8_NEXT_UNSAFE failed for offset=%ld. Expected:%lx Got:%lx\n", offset, result[i], c);
+             }
         }
+
         setOffset=offset;
         UTF8_NEXT_CHAR_SAFE(input, setOffset, sizeof(input), c, FALSE);
         if(setOffset != movedOffset[i+1]){
@ -235,6 +262,17 @@ static void TestNextPrevChar(){
         if(c != result[i+1]){
             log_err("ERROR: UTF8_NEXT_CHAR_SAFE failed for input=%ld. Expected:%lx Got:%lx\n", offset, result[i+1], c);
         }
+
+         setOffset=offset;
+         U8_NEXT(input, setOffset, sizeof(input), c);
+         if(setOffset != movedOffset[i+1]){
+             log_err("ERROR: U8_NEXT failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n",
+                 offset, movedOffset[i+1], setOffset);
+         }
+         if(UTF_IS_ERROR(result[i+1]) ? c >= 0 : c != result[i+1]){
+             log_err("ERROR: U8_NEXT failed for input=%ld. Expected:%lx Got:%lx\n", offset, result[i+1], c);
+         }
+
         setOffset=offset;
         UTF8_NEXT_CHAR_SAFE(input, setOffset, sizeof(input), c, TRUE);
         if(setOffset != movedOffset[i+1]){
@ -244,8 +282,10 @@ static void TestNextPrevChar(){
         if(c != result[i+2]){
             log_err("ERROR: UTF8_NEXT_CHAR_SAFE(strict) failed for input=%ld. Expected:%lx Got:%lx\n", offset, result[i+2], c);
         }
+
         i=i+6;
    }
+
    i=0;
    for(offset=sizeof(input); offset > 0; --offset){
         setOffset=offset;
@ -257,6 +297,7 @@ static void TestNextPrevChar(){
         if(c != result[i+3]){
             log_err("ERROR: UTF8_PREV_CHAR_UNSAFE failed for offset=%ld. Expected:%lx Got:%lx\n", offset, result[i+3], c);
         }
+
         setOffset=offset;
         UTF8_PREV_CHAR_SAFE(input, 0, setOffset, c, FALSE);
         if(setOffset != movedOffset[i+4]){
@ -266,6 +307,17 @@ static void TestNextPrevChar(){
         if(c != result[i+4]){
             log_err("ERROR: UTF8_PREV_CHAR_SAFE failed for input=%ld. Expected:%lx Got:%lx\n", offset, result[i+4], c);
         }
+
+         setOffset=offset;
+         U8_PREV(input, 0, setOffset, c);
+         if(setOffset != movedOffset[i+4]){
+             log_err("ERROR: U8_PREV failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n",
+                 offset, movedOffset[i+4], setOffset);
+         }
+         if(UTF_IS_ERROR(result[i+4]) ? c >= 0 : c != result[i+4]){
+             log_err("ERROR: U8_PREV failed for input=%ld. Expected:%lx Got:%lx\n", offset, result[i+4], c);
+         }
+
         setOffset=offset;
         UTF8_PREV_CHAR_SAFE(input, 0,  setOffset, c, TRUE);
         if(setOffset != movedOffset[i+5]){
@ -275,6 +327,7 @@ static void TestNextPrevChar(){
         if(c != result[i+5]){
             log_err("ERROR: UTF8_PREV_CHAR_SAFE(strict) failed for input=%ld. Expected:%lx Got:%lx\n", offset, result[i+5], c);
         }
+
         i=i+6;
    }

@ -295,6 +348,7 @@ static void TestFwdBack(){


    uint32_t offunsafe=0, offsafe=0;
+
    uint32_t i=0;
    while(offunsafe < sizeof(input)){
        UTF8_FWD_1_UNSAFE(input, offunsafe);
@ -303,6 +357,16 @@ static void TestFwdBack(){
        }
        i++;
    }
+
+    i=0;
+    while(offunsafe < sizeof(input)){
+        U8_FWD_1_UNSAFE(input, offunsafe);
+        if(offunsafe != fwd_unsafe[i]){
+            log_err("ERROR: U8_FWD_1_UNSAFE offset expected:%d, Got:%d\n", fwd_unsafe[i], offunsafe);
+        }
+        i++;
+    }
+
    i=0;
    while(offsafe < sizeof(input)){
        UTF8_FWD_1_SAFE(input, offsafe, sizeof(input));
@ -311,6 +375,16 @@ static void TestFwdBack(){
        }
        i++;
    }
+
+    i=0;
+    while(offsafe < sizeof(input)){
+        U8_FWD_1(input, offsafe, sizeof(input));
+        if(offsafe != fwd_safe[i]){
+            log_err("ERROR: U8_FWD_1 offset expected:%d, Got:%d\n", fwd_safe[i], offsafe);
+        }
+        i++;
+    }
+
    offunsafe=sizeof(input);
    i=0;
    while(offunsafe > 0){
@ -320,6 +394,17 @@ static void TestFwdBack(){
        }
        i++;
    }
+
+    offunsafe=sizeof(input);
+    i=0;
+    while(offunsafe > 0){
+        U8_BACK_1_UNSAFE(input, offunsafe);
+        if(offunsafe != back_unsafe[i]){
+            log_err("ERROR: U8_BACK_1_UNSAFE offset expected:%d, Got:%d\n", back_unsafe[i], offunsafe);
+        }
+        i++;
+    }
+
    i=0;
    offsafe=sizeof(input);
    while(offsafe > 0){
@ -329,14 +414,34 @@ static void TestFwdBack(){
        }
        i++;
    }
+
+    i=0;
+    offsafe=sizeof(input);
+    while(offsafe > 0){
+        U8_BACK_1(input, 0,  offsafe);
+        if(offsafe != back_safe[i]){
+            log_err("ERROR: U8_BACK_1 offset expected:%d, Got:%d\n", back_unsafe[i], offsafe);
+        }
+        i++;
+    }
+
    offunsafe=0;
-    offsafe=0;
    for(i=0; i<sizeof(Nvalue)/sizeof(Nvalue[0])-2; i++){  
        UTF8_FWD_N_UNSAFE(input, offunsafe, Nvalue[i]);
        if(offunsafe != fwd_N_unsafe[i]){
            log_err("ERROR: Forward_N_unsafe offset=%d expected:%d, Got:%d\n", i, fwd_N_unsafe[i], offunsafe);
        }
    }
+
+    offunsafe=0;
+    for(i=0; i<sizeof(Nvalue)/sizeof(Nvalue[0])-2; i++){  
+        U8_FWD_N_UNSAFE(input, offunsafe, Nvalue[i]);
+        if(offunsafe != fwd_N_unsafe[i]){
+            log_err("ERROR: U8_FWD_N_UNSAFE offset=%d expected:%d, Got:%d\n", i, fwd_N_unsafe[i], offunsafe);
+        }
+    }
+
+    offsafe=0;
    for(i=0; i<sizeof(Nvalue)/sizeof(Nvalue[0]); i++){
        UTF8_FWD_N_SAFE(input, offsafe, sizeof(input), Nvalue[i]);
        if(offsafe != fwd_N_safe[i]){
@ -344,20 +449,47 @@ static void TestFwdBack(){
        }
    
    }
+
+    offsafe=0;
+    for(i=0; i<sizeof(Nvalue)/sizeof(Nvalue[0]); i++){
+        U8_FWD_N(input, offsafe, sizeof(input), Nvalue[i]);
+        if(offsafe != fwd_N_safe[i]){
+            log_err("ERROR: U8_FWD_N offset=%d expected:%d, Got:%d\n", i, fwd_N_safe[i], offsafe);
+        }
+    
+    }
+
    offunsafe=sizeof(input);
-    offsafe=sizeof(input);
    for(i=0; i<sizeof(Nvalue)/sizeof(Nvalue[0])-2; i++){
        UTF8_BACK_N_UNSAFE(input, offunsafe, Nvalue[i]);
        if(offunsafe != back_N_unsafe[i]){
            log_err("ERROR: backward_N_unsafe offset=%d expected:%d, Got:%d\n", i, back_N_unsafe[i], offunsafe);
        }
    }
+
+    offunsafe=sizeof(input);
+    for(i=0; i<sizeof(Nvalue)/sizeof(Nvalue[0])-2; i++){
+        U8_BACK_N_UNSAFE(input, offunsafe, Nvalue[i]);
+        if(offunsafe != back_N_unsafe[i]){
+            log_err("ERROR: U8_BACK_N_UNSAFE offset=%d expected:%d, Got:%d\n", i, back_N_unsafe[i], offunsafe);
+        }
+    }
+
+    offsafe=sizeof(input);
    for(i=0; i<sizeof(Nvalue)/sizeof(Nvalue[0]); i++){
        UTF8_BACK_N_SAFE(input, 0, offsafe, Nvalue[i]);
        if(offsafe != back_N_safe[i]){
            log_err("ERROR: backward_N_safe offset=%d expected:%d, Got:%ld\n", i, back_N_safe[i], offsafe);
        }
    }
+
+    offsafe=sizeof(input);
+    for(i=0; i<sizeof(Nvalue)/sizeof(Nvalue[0]); i++){
+        U8_BACK_N(input, 0, offsafe, Nvalue[i]);
+        if(offsafe != back_N_safe[i]){
+            log_err("ERROR: U8_BACK_N offset=%d expected:%d, Got:%ld\n", i, back_N_safe[i], offsafe);
+        }
+    }
 }

 static void TestSetChar(){
@ -380,23 +512,51 @@ static void TestSetChar(){
         if(setOffset != start_unsafe[i]){
             log_err("ERROR: UTF8_SET_CHAR_START_UNSAFE failed for offset=%ld. Expected:%ld Got:%ld\n", offset, start_unsafe[i], setOffset);
         }
+
+         setOffset=offset;
+         U8_SET_CP_START_UNSAFE(input, setOffset);
+         if(setOffset != start_unsafe[i]){
+             log_err("ERROR: U8_SET_CP_START_UNSAFE failed for offset=%ld. Expected:%ld Got:%ld\n", offset, start_unsafe[i], setOffset);
+         }
+
         setOffset=offset;
         UTF8_SET_CHAR_START_SAFE(input, 0, setOffset);
         if(setOffset != start_safe[i]){
             log_err("ERROR: UTF8_SET_CHAR_START_SAFE failed for offset=%ld. Expected:%ld Got:%ld\n", offset, start_safe[i], setOffset);
         }
+
+         setOffset=offset;
+         U8_SET_CP_START(input, 0, setOffset);
+         if(setOffset != start_safe[i]){
+             log_err("ERROR: U8_SET_CP_START failed for offset=%ld. Expected:%ld Got:%ld\n", offset, start_safe[i], setOffset);
+         }
+
         if (offset != 0) { /* Can't have it go off the end of the array */
             setOffset=offset; 
             UTF8_SET_CHAR_LIMIT_UNSAFE(input, setOffset);
             if(setOffset != limit_unsafe[i]){
                 log_err("ERROR: UTF8_SET_CHAR_LIMIT_UNSAFE failed for offset=%ld. Expected:%ld Got:%ld\n", offset, limit_unsafe[i], setOffset);
             }
+
+             setOffset=offset;
+             U8_SET_CP_LIMIT_UNSAFE(input, setOffset);
+             if(setOffset != limit_unsafe[i]){
+                 log_err("ERROR: U8_SET_CP_LIMIT_UNSAFE failed for offset=%ld. Expected:%ld Got:%ld\n", offset, limit_unsafe[i], setOffset);
+             }
         }
+
         setOffset=offset; 
         UTF8_SET_CHAR_LIMIT_SAFE(input,0, setOffset, sizeof(input));
         if(setOffset != limit_safe[i]){
             log_err("ERROR: UTF8_SET_CHAR_LIMIT_SAFE failed for offset=%ld. Expected:%ld Got:%ld\n", offset, limit_safe[i], setOffset);
         }
+
+         setOffset=offset; 
+         U8_SET_CP_LIMIT(input,0, setOffset, sizeof(input));
+         if(setOffset != limit_safe[i]){
+             log_err("ERROR: U8_SET_CP_LIMIT failed for offset=%ld. Expected:%ld Got:%ld\n", offset, limit_safe[i], setOffset);
+         }
+
         i++;
    }
 }
@ -451,7 +611,7 @@ static void TestAppendChar(){
          8,
          9,

-          /*offse-moved-to(safe)*/
+          /*offset-moved-to(safe)*/
          4,              /*for append-pos: 0, CHAR  0x10401*/
          3,
          4,
@ -570,6 +730,67 @@ static void TestAppendChar(){

 }

+static void TestAppend() {
+    static const UChar32 codePoints[]={
+        0x61, 0xdf, 0x901, 0x3040,
+        0xac00, 0xd800, 0xdbff, 0xdcde,
+        0xdffd, 0xe000, 0xffff, 0x10000,
+        0x12345, 0xe0021, 0x10ffff, 0x110000,
+        0x234567, 0x7fffffff, -1, -1000,
+        0, 0x400
+    };
+    static const uint8_t expectUnsafe[]={
+        0x61,  0xc3, 0x9f,  0xe0, 0xa4, 0x81,  0xe3, 0x81, 0x80,
+        0xea, 0xb0, 0x80,  0xed, 0xa0, 0x80,  0xed, 0xaf, 0xbf,  0xed, 0xb3, 0x9e,
+        0xed, 0xbf, 0xbd,  0xee, 0x80, 0x80,  0xef, 0xbf, 0xbf,  0xf0, 0x90, 0x80, 0x80,
+        0xf0, 0x92, 0x8d, 0x85,  0xf3, 0xa0, 0x80, 0xa1,  0xf4, 0x8f, 0xbf, 0xbf,  /* not 0x110000 */
+        /* none from this line */
+        0,  0xd0, 0x80
+    }, expectSafe[]={
+        0x61,  0xc3, 0x9f,  0xe0, 0xa4, 0x81,  0xe3, 0x81, 0x80,
+        0xea, 0xb0, 0x80,  /* no surrogates */
+        /* no surrogates */  0xee, 0x80, 0x80,  0xef, 0xbf, 0xbf,  0xf0, 0x90, 0x80, 0x80,
+        0xf0, 0x92, 0x8d, 0x85,  0xf3, 0xa0, 0x80, 0xa1,  0xf4, 0x8f, 0xbf, 0xbf,  /* not 0x110000 */
+        /* none from this line */
+        0,  0xd0, 0x80
+    };
+
+    uint8_t buffer[100];
+    UChar32 c;
+    int32_t i, length;
+    UBool isError, expectIsError, wrongIsError;
+
+    length=0;
+    for(i=0; i<LENGTHOF(codePoints); ++i) {
+        c=codePoints[i];
+        if(c<0 || 0x10ffff<c) {
+            continue; /* skip non-code points for U8_APPEND_UNSAFE */
+        }
+
+        U8_APPEND_UNSAFE(buffer, length, c);
+    }
+    if(length!=LENGTHOF(expectUnsafe) || 0!=memcmp(buffer, expectUnsafe, length)) {
+        log_err("U8_APPEND_UNSAFE did not generate the expected output\n");
+    }
+
+    length=0;
+    wrongIsError=FALSE;
+    for(i=0; i<LENGTHOF(codePoints); ++i) {
+        c=codePoints[i];
+        expectIsError= c<0 || 0x10ffff<c || U_IS_SURROGATE(c);
+        isError=FALSE;
+
+        U8_APPEND(buffer, length, LENGTHOF(buffer), c, isError);
+        wrongIsError|= isError!=expectIsError;
+    }
+    if(wrongIsError) {
+        log_err("U8_APPEND did not set isError correctly\n");
+    }
+    if(length!=LENGTHOF(expectSafe) || 0!=memcmp(buffer, expectSafe, length)) {
+        log_err("U8_APPEND did not generate the expected output\n");
+    }
+}
+
 static void printUChars(const uint8_t *uchars, int16_t len){
    int16_t i=0;
    for(i=0; i<len; i++){