ICU-2145 consistent behavior of binary string searches

X-SVN-Rev: 10055
2025-04-09 15:27:38 +00:00 · 2002-10-24 01:49:58 +00:00 · 2002-10-24 01:49:58 +00:00 · b1246ef900
commit b1246ef900
parent 8d1a83e3d5
5 changed files with 680 additions and 444 deletions
--- a/icu4c/source/common/unicode/unistr.h
+++ b/icu4c/source/common/unicode/unistr.h
@ -890,7 +890,7 @@ public:
              int32_t length) const;

  /**
-   * Locate in this the first occurrence of the code unit <TT>c</TT>, 
+   * Locate in this the first occurrence of the BMP code point <code>c</code>,
   * using bitwise comparison.
   * @param c The code unit to search for.
   * @return The offset into this of <TT>c</TT>, or -1 if not found.
@ -902,19 +902,6 @@ public:
   * Locate in this the first occurrence of the code point <TT>c</TT>, 
   * using bitwise comparison.
   *
-   * This function finds code points, which differs for BMP code points
-   * from indexOf(UChar c, ...) only for surrogates:
-   * While indexOf(UChar c, ...) finds any surrogate code units in a string,
-   * indexOf(UChar32 c, ...) finds only unmatched surrogate code points,
-   * i.e., only those that do not combine with an adjacent surrogate
-   * to form a supplementary code point.
-   * For example, in a string "\ud800\udc00" indexOf(UChar c, ...)
-   * will find code units U+d800 at 0 and U+dc00 at 1,
-   * but indexOf(UChar32 c, ...) will find neither because they
-   * combine to the code point U+10000.
-   * Either function will find U+d800 in "a\ud800b".
-   * This behavior ensures that char32At(indexOf(UChar32 c))==c.
-   *
   * @param c The code point to search for.
   * @return The offset into this of <TT>c</TT>, or -1 if not found.
   * @stable
@ -922,7 +909,7 @@ public:
  inline int32_t indexOf(UChar32 c) const;

  /**
-   * Locate in this the first occurrence of the code unit <TT>c</TT>
+   * Locate in this the first occurrence of the BMP code point <code>c</code>,
   * starting at offset <TT>start</TT>, using bitwise comparison.
   * @param c The code unit to search for.
   * @param start The offset at which searching will start.
@ -936,19 +923,6 @@ public:
   * Locate in this the first occurrence of the code point <TT>c</TT>
   * starting at offset <TT>start</TT>, using bitwise comparison.
   *
-   * This function finds code points, which differs for BMP code points
-   * from indexOf(UChar c, ...) only for surrogates:
-   * While indexOf(UChar c, ...) finds any surrogate code units in a string,
-   * indexOf(UChar32 c, ...) finds only unmatched surrogate code points,
-   * i.e., only those that do not combine with an adjacent surrogate
-   * to form a supplementary code point.
-   * For example, in a string "\ud800\udc00" indexOf(UChar c, ...)
-   * will find code units U+d800 at 0 and U+dc00 at 1,
-   * but indexOf(UChar32 c, ...) will find neither because they
-   * combine to the code point U+10000.
-   * Either function will find U+d800 in "a\ud800b".
-   * This behavior ensures that char32At(indexOf(UChar32 c))==c.
-   *
   * @param c The code point to search for.
   * @param start The offset at which searching will start.
   * @return The offset into this of <TT>c</TT>, or -1 if not found.
@ -958,7 +932,7 @@ public:
              int32_t start) const;

  /**
-   * Locate in this the first occurrence of the code unit <TT>c</TT> 
+   * Locate in this the first occurrence of the BMP code point <code>c</code>
   * in the range [<TT>start</TT>, <TT>start + length</TT>), 
   * using bitwise comparison.   
   * @param c The code unit to search for.
@ -976,19 +950,6 @@ public:
   * in the range [<TT>start</TT>, <TT>start + length</TT>), 
   * using bitwise comparison.   
   *
-   * This function finds code points, which differs for BMP code points
-   * from indexOf(UChar c, ...) only for surrogates:
-   * While indexOf(UChar c, ...) finds any surrogate code units in a string,
-   * indexOf(UChar32 c, ...) finds only unmatched surrogate code points,
-   * i.e., only those that do not combine with an adjacent surrogate
-   * to form a supplementary code point.
-   * For example, in a string "\ud800\udc00" indexOf(UChar c, ...)
-   * will find code units U+d800 at 0 and U+dc00 at 1,
-   * but indexOf(UChar32 c, ...) will find neither because they
-   * combine to the code point U+10000.
-   * Either function will find U+d800 in "a\ud800b".
-   * This behavior ensures that char32At(indexOf(UChar32 c))==c.
-   *
   * @param c The code point to search for.
   * @param start the offset into this at which to start matching
   * @param length the number of characters in this to search
@ -1112,7 +1073,7 @@ public:
              int32_t length) const;

  /**
-   * Locate in this the last occurrence of the code unit <TT>c</TT>, 
+   * Locate in this the last occurrence of the BMP code point <code>c</code>,
   * using bitwise comparison.
   * @param c The code unit to search for.
   * @return The offset into this of <TT>c</TT>, or -1 if not found.
@ -1124,19 +1085,6 @@ public:
   * Locate in this the last occurrence of the code point <TT>c</TT>, 
   * using bitwise comparison.
   *
-   * This function finds code points, which differs for BMP code points
-   * from lastIndexOf(UChar c, ...) only for surrogates:
-   * While lastIndexOf(UChar c, ...) finds any surrogate code units in a string,
-   * lastIndexOf(UChar32 c, ...) finds only unmatched surrogate code points,
-   * i.e., only those that do not combine with an adjacent surrogate
-   * to form a supplementary code point.
-   * For example, in a string "\ud800\udc00" lastIndexOf(UChar c, ...)
-   * will find code units U+d800 at 0 and U+dc00 at 1,
-   * but lastIndexOf(UChar32 c, ...) will find neither because they
-   * combine to the code point U+10000.
-   * Either function will find U+d800 in "a\ud800b".
-   * This behavior ensures that char32At(lastIndexOf(UChar32 c))==c.
-   *
   * @param c The code point to search for.
   * @return The offset into this of <TT>c</TT>, or -1 if not found.
   * @stable
@ -1144,7 +1092,7 @@ public:
  inline int32_t lastIndexOf(UChar32 c) const;

  /**
-   * Locate in this the last occurrence of the code unit <TT>c</TT>
+   * Locate in this the last occurrence of the BMP code point <code>c</code>
   * starting at offset <TT>start</TT>, using bitwise comparison.
   * @param c The code unit to search for.
   * @param start The offset at which searching will start.
@ -1158,19 +1106,6 @@ public:
   * Locate in this the last occurrence of the code point <TT>c</TT>
   * starting at offset <TT>start</TT>, using bitwise comparison.
   *
-   * This function finds code points, which differs for BMP code points
-   * from lastIndexOf(UChar c, ...) only for surrogates:
-   * While lastIndexOf(UChar c, ...) finds any surrogate code units in a string,
-   * lastIndexOf(UChar32 c, ...) finds only unmatched surrogate code points,
-   * i.e., only those that do not combine with an adjacent surrogate
-   * to form a supplementary code point.
-   * For example, in a string "\ud800\udc00" lastIndexOf(UChar c, ...)
-   * will find code units U+d800 at 0 and U+dc00 at 1,
-   * but lastIndexOf(UChar32 c, ...) will find neither because they
-   * combine to the code point U+10000.
-   * Either function will find U+d800 in "a\ud800b".
-   * This behavior ensures that char32At(lastIndexOf(UChar32 c))==c.
-   *
   * @param c The code point to search for.
   * @param start The offset at which searching will start.
   * @return The offset into this of <TT>c</TT>, or -1 if not found.
@ -1180,7 +1115,7 @@ public:
              int32_t start) const;

  /**
-   * Locate in this the last occurrence of the code unit <TT>c</TT> 
+   * Locate in this the last occurrence of the BMP code point <code>c</code>
   * in the range [<TT>start</TT>, <TT>start + length</TT>), 
   * using bitwise comparison.   
   * @param c The code unit to search for.
@ -1198,19 +1133,6 @@ public:
   * in the range [<TT>start</TT>, <TT>start + length</TT>), 
   * using bitwise comparison.   
   *
-   * This function finds code points, which differs for BMP code points
-   * from lastIndexOf(UChar c, ...) only for surrogates:
-   * While lastIndexOf(UChar c, ...) finds any surrogate code units in a string,
-   * lastIndexOf(UChar32 c, ...) finds only unmatched surrogate code points,
-   * i.e., only those that do not combine with an adjacent surrogate
-   * to form a supplementary code point.
-   * For example, in a string "\ud800\udc00" lastIndexOf(UChar c, ...)
-   * will find code units U+d800 at 0 and U+dc00 at 1,
-   * but lastIndexOf(UChar32 c, ...) will find neither because they
-   * combine to the code point U+10000.
-   * Either function will find U+d800 in "a\ud800b".
-   * This behavior ensures that char32At(lastIndexOf(UChar32 c))==c.
-   *
   * @param c The code point to search for.
   * @param start the offset into this at which to start matching
   * @param length the number of characters in this to search
@ -2981,7 +2903,6 @@ private:
            int32_t start,
            int32_t length) const;

-  // only for c>=0xd800
  int32_t doIndexOf(UChar32 c,
                        int32_t start,
                        int32_t length) const;
@ -2990,7 +2911,6 @@ private:
                int32_t start,
                int32_t length) const;

-  // only for c>=0xd800
  int32_t doLastIndexOf(UChar32 c,
                            int32_t start,
                            int32_t length) const;
@ -3490,11 +3410,7 @@ inline int32_t
 UnicodeString::indexOf(UChar32 c,
               int32_t start,
               int32_t length) const {
-  if((uint32_t)c<0xd800) {
-    return doIndexOf((UChar)c, start, length);
-  } else {
-    return doIndexOf(c, start, length);
-  }
+  return doIndexOf(c, start, length);
 }

 inline int32_t 
@ -3571,11 +3487,7 @@ inline int32_t
 UnicodeString::lastIndexOf(UChar32 c,
               int32_t start,
               int32_t length) const {
-  if((uint32_t)c<0xd800) {
-    return doLastIndexOf((UChar)c, start, length);
-  } else {
-    return doLastIndexOf(c, start, length);
-  }
+  return doLastIndexOf(c, start, length);
 }

 inline UBool 
--- a/icu4c/source/common/unicode/ustring.h
+++ b/icu4c/source/common/unicode/ustring.h
@ -27,7 +27,7 @@
 * \file
 * \brief C API: Unicode string handling functions
 *
- * These C API functions provide Unicode string handling.
+ * These C API functions provide general Unicode string handling.
 *
 * Some functions are equivalent in name, signature, and behavior to the ANSI C <string.h>
 * functions. (For example, they do not check for bad arguments like NULL string pointers.)
@ -39,25 +39,32 @@
 *
 * ICU uses 16-bit Unicode (UTF-16) in the form of arrays of UChar code units.
 * UTF-16 encodes each Unicode code point with either one or two UChar code units.
- * Some APIs accept a 32-bit UChar32 value for a single code point.
 * (This is the default form of Unicode, and a forward-compatible extension of the original,
 * fixed-width form that was known as UCS-2. UTF-16 superseded UCS-2 with Unicode 2.0
 * in 1996.)
 *
+ * Some APIs accept a 32-bit UChar32 value for a single code point.
+ *
+ * ICU also handles 16-bit Unicode text with unpaired surrogates.
+ * Such text is not well-formed UTF-16.
+ * Code-point-related functions treat unpaired surrogates as surrogate code points,
+ * i.e., as separate units.
+ *
 * Although UTF-16 is a variable-width encoding form (like some legacy multi-byte encodings),
 * it is much more efficient even for random access because the code unit values
 * for single-unit characters vs. lead units vs. trail units are completely disjoint.
 * This means that it is easy to determine character (code point) boundaries from
 * random offsets in the string.
- * (It also means, e.g., that u_strstr() does not need to verify that a match was
- * found on actual character boundaries; with some legacy encodings, strstr() may need to
- * scan back to the start of the text to verify this.)
 *
 * Unicode (UTF-16) string processing is optimized for the single-unit case.
 * Although it is important to support supplementary characters
 * (which use pairs of lead/trail code units called "surrogates"),
 * their occurrence is rare. Almost all characters in modern use require only
 * a single UChar code unit (i.e., their code point values are <=0xffff).
+ *
+ * For more details see the User Guide Strings chapter (http://oss.software.ibm.com/icu/userguide/strings.html).
+ * For a discussion of the handling of unpaired surrogates see also
+ * Jitterbug 2145 and its icu mailing list proposal on 2002-sep-18.
 */

 /**
@ -137,59 +144,180 @@ u_strncat(UChar     *dst,
     const UChar     *src, 
     int32_t     n);

-/**
- * Find the first occurrence of a specified character in a ustring.
- *
- * @param s The string to search.
- * @param c The character to find.
- * @return A pointer to the first occurrence of <TT>c</TT> in <TT>s</TT>,
- * or a null pointer if <TT>s</TT> does not contain <TT>c</TT>.
- * @stable
- */
-U_CAPI UChar*  U_EXPORT2
-u_strchr(const UChar     *s, 
-    UChar     c);
-
 /**
 * Find the first occurrence of a substring in a string.
+ * The substring is found at code point boundaries.
+ * That means that if the substring begins with
+ * a trail surrogate or ends with a lead surrogate,
+ * then it is found only if these surrogates stand alone in the text.
+ * Otherwise, the substring edge units would be matched against
+ * halves of surrogate pairs.
 *
- * @param s The string to search.
- * @param substring The substring to find
- * @return A pointer to the first occurrence of <TT>substring</TT> in 
- * <TT>s</TT>, or a null pointer if <TT>substring</TT>
- * is not in <TT>s</TT>.
+ * @param s The string to search (NUL-terminated).
+ * @param substring The substring to find (NUL-terminated).
+ * @return A pointer to the first occurrence of <code>substring</code> in <code>s</code>,
+ *         or <code>s</code> itself if the <code>substring</code> is empty,
+ *         or <code>NULL</code> if <code>substring</code> is not in <code>s</code>.
 * @stable
+ *
+ * @see u_strrstr
+ * @see u_strFindFirst
+ * @see u_strFindLast
 */
 U_CAPI UChar * U_EXPORT2
 u_strstr(const UChar *s, const UChar *substring);

 /**
- * Find the first occurence of a specified code point in a string.
- *
- * This function finds code points, which differs for BMP code points
- * from u_strchr() only for surrogates:
- * While u_strchr() finds any surrogate code units in a string,
- * u_strchr32() finds only unmatched surrogate code points,
- * i.e., only those that do not combine with an adjacent surrogate
- * to form a supplementary code point.
- * For example, in a string "\ud800\udc00" u_strchr()
- * will find code units U+d800 at 0 and U+dc00 at 1,
- * but u_strchr32() will find neither because they
- * combine to the code point U+10000.
- * Either function will find U+d800 in "a\ud800b".
- * This behavior ensures that U16_GET(u_strchr32(c))==c.
+ * Find the first occurrence of a substring in a string.
+ * The substring is found at code point boundaries.
+ * That means that if the substring begins with
+ * a trail surrogate or ends with a lead surrogate,
+ * then it is found only if these surrogates stand alone in the text.
+ * Otherwise, the substring edge units would be matched against
+ * halves of surrogate pairs.
 *
 * @param s The string to search.
- * @param c The code point (0..0x10ffff) to find.
- * @return A pointer to the first occurrence of <TT>c</TT> in <TT>s</TT>,
- * or a null pointer if there is no such character.
- * If <TT>c</TT> is represented with several UChars, then the returned
- * pointer will point to the first of them.
+ * @param length The length of s (number of UChars), or -1 if it is NUL-terminated.
+ * @param substring The substring to find (NUL-terminated).
+ * @param subLength The length of substring (number of UChars), or -1 if it is NUL-terminated.
+ * @return A pointer to the first occurrence of <code>substring</code> in <code>s</code>,
+ *         or <code>s</code> itself if the <code>substring</code> is empty,
+ *         or <code>NULL</code> if <code>substring</code> is not in <code>s</code>.
 * @stable
+ *
+ * @see u_strstr
+ * @see u_strFindLast
+ */
+U_CAPI UChar * U_EXPORT2
+u_strFindFirst(const UChar *s, int32_t length, const UChar *substring, int32_t subLength);
+
+/**
+ * Find the first occurrence of a BMP code point in a string.
+ * A surrogate code point is found only if its match in the text is not
+ * part of a surrogate pair.
+ * A NUL character is found at the string terminator.
+ *
+ * @param s The string to search (NUL-terminated).
+ * @param c The BMP code point to find.
+ * @return A pointer to the first occurrence of <code>c</code> in <code>s</code>
+ *         or <code>NULL</code> if <code>c</code> is not in <code>s</code>.
+ * @stable
+ *
+ * @see u_strchr32
+ * @see u_memchr
+ * @see u_strstr
+ * @see u_strFindFirst
+ */
+U_CAPI UChar * U_EXPORT2
+u_strchr(const UChar *s, UChar c);
+
+/**
+ * Find the first occurrence of a code point in a string.
+ * A surrogate code point is found only if its match in the text is not
+ * part of a surrogate pair.
+ * A NUL character is found at the string terminator.
+ *
+ * @param s The string to search (NUL-terminated).
+ * @param c The code point to find.
+ * @return A pointer to the first occurrence of <code>c</code> in <code>s</code>
+ *         or <code>NULL</code> if <code>c</code> is not in <code>s</code>.
+ * @stable
+ *
+ * @see u_strchr
+ * @see u_memchr32
+ * @see u_strstr
+ * @see u_strFindFirst
 */
 U_CAPI UChar * U_EXPORT2
 u_strchr32(const UChar *s, UChar32 c);

+/**
+ * Find the last occurrence of a substring in a string.
+ * The substring is found at code point boundaries.
+ * That means that if the substring begins with
+ * a trail surrogate or ends with a lead surrogate,
+ * then it is found only if these surrogates stand alone in the text.
+ * Otherwise, the substring edge units would be matched against
+ * halves of surrogate pairs.
+ *
+ * @param s The string to search (NUL-terminated).
+ * @param substring The substring to find (NUL-terminated).
+ * @return A pointer to the last occurrence of <code>substring</code> in <code>s</code>,
+ *         or <code>s</code> itself if the <code>substring</code> is empty,
+ *         or <code>NULL</code> if <code>substring</code> is not in <code>s</code>.
+ * @stable
+ *
+ * @see u_strstr
+ * @see u_strFindFirst
+ * @see u_strFindLast
+ */
+U_CAPI UChar * U_EXPORT2
+u_strrstr(const UChar *s, const UChar *substring);
+
+/**
+ * Find the last occurrence of a substring in a string.
+ * The substring is found at code point boundaries.
+ * That means that if the substring begins with
+ * a trail surrogate or ends with a lead surrogate,
+ * then it is found only if these surrogates stand alone in the text.
+ * Otherwise, the substring edge units would be matched against
+ * halves of surrogate pairs.
+ *
+ * @param s The string to search.
+ * @param length The length of s (number of UChars), or -1 if it is NUL-terminated.
+ * @param substring The substring to find (NUL-terminated).
+ * @param subLength The length of substring (number of UChars), or -1 if it is NUL-terminated.
+ * @return A pointer to the last occurrence of <code>substring</code> in <code>s</code>,
+ *         or <code>s</code> itself if the <code>substring</code> is empty,
+ *         or <code>NULL</code> if <code>substring</code> is not in <code>s</code>.
+ * @stable
+ *
+ * @see u_strstr
+ * @see u_strFindLast
+ */
+U_CAPI UChar * U_EXPORT2
+u_strFindLast(const UChar *s, int32_t length, const UChar *substring, int32_t subLength);
+
+/**
+ * Find the last occurrence of a BMP code point in a string.
+ * A surrogate code point is found only if its match in the text is not
+ * part of a surrogate pair.
+ * A NUL character is found at the string terminator.
+ *
+ * @param s The string to search (NUL-terminated).
+ * @param c The BMP code point to find.
+ * @return A pointer to the last occurrence of <code>c</code> in <code>s</code>
+ *         or <code>NULL</code> if <code>c</code> is not in <code>s</code>.
+ * @stable
+ *
+ * @see u_strrchr32
+ * @see u_memrchr
+ * @see u_strrstr
+ * @see u_strFindLast
+ */
+U_CAPI UChar * U_EXPORT2
+u_strrchr(const UChar *s, UChar c);
+
+/**
+ * Find the last occurrence of a code point in a string.
+ * A surrogate code point is found only if its match in the text is not
+ * part of a surrogate pair.
+ * A NUL character is found at the string terminator.
+ *
+ * @param s The string to search (NUL-terminated).
+ * @param c The code point to find.
+ * @return A pointer to the last occurrence of <code>c</code> in <code>s</code>
+ *         or <code>NULL</code> if <code>c</code> is not in <code>s</code>.
+ * @stable
+ *
+ * @see u_strrchr
+ * @see u_memchr32
+ * @see u_strrstr
+ * @see u_strFindLast
+ */
+U_CAPI UChar * U_EXPORT2
+u_strrchr32(const UChar *s, UChar32 c);
+
 /**
 * Locates the first occurrence in the string str of any of the characters
 * in the string accept.
@ -621,46 +749,84 @@ U_CAPI int32_t U_EXPORT2
 u_memcmpCodePointOrder(const UChar *s1, const UChar *s2, int32_t count);

 /**
- * Search for a UChar within a Unicode string until <TT>count</TT>
- * is reached.
+ * Find the first occurrence of a BMP code point in a string.
+ * A surrogate code point is found only if its match in the text is not
+ * part of a surrogate pair.
+ * A NUL character is found at the string terminator.
 *
- * @param src string to search in
- * @param ch character to find
- * @param count maximum number of UChars in <TT>src</TT>to search for
- *      <TT>ch</TT>.
- * @return A pointer within src, pointing to <TT>ch</TT>, or NULL if it
- *      was not found.
+ * @param s The string to search (contains <code>count</code> UChars).
+ * @param c The BMP code point to find.
+ * @param count The length of the string.
+ * @return A pointer to the first occurrence of <code>c</code> in <code>s</code>
+ *         or <code>NULL</code> if <code>c</code> is not in <code>s</code>.
 * @stable
+ *
+ * @see u_strchr
+ * @see u_memchr32
+ * @see u_strFindFirst
 */
 U_CAPI UChar* U_EXPORT2
-u_memchr(const UChar *src, UChar ch, int32_t count);
+u_memchr(const UChar *s, UChar c, int32_t count);

 /**
- * Find the first occurence of a specified code point in a string.
+ * Find the first occurrence of a code point in a string.
+ * A surrogate code point is found only if its match in the text is not
+ * part of a surrogate pair.
+ * A NUL character is found at the string terminator.
 *
- * This function finds code points, which differs for BMP code points
- * from u_memchr() only for surrogates:
- * While u_memchr() finds any surrogate code units in a string,
- * u_memchr32() finds only unmatched surrogate code points,
- * i.e., only those that do not combine with an adjacent surrogate
- * to form a supplementary code point.
- * For example, in a string "\ud800\udc00" u_memchr()
- * will find code units U+d800 at 0 and U+dc00 at 1,
- * but u_memchr32() will find neither because they
- * combine to the code point U+10000.
- * Either function will find U+d800 in "a\ud800b".
- * This behavior ensures that U16_GET(u_memchr32(c))==c.
- *
- * @param src string to search in
- * @param ch character to find
- * @param count maximum number of UChars in <TT>src</TT>to search for
- *      <TT>ch</TT>.
- * @return A pointer within src, pointing to <TT>ch</TT>, or NULL if it
- *      was not found.
+ * @param s The string to search (contains <code>count</code> UChars).
+ * @param c The code point to find.
+ * @param count The length of the string.
+ * @return A pointer to the first occurrence of <code>c</code> in <code>s</code>
+ *         or <code>NULL</code> if <code>c</code> is not in <code>s</code>.
 * @stable
+ *
+ * @see u_strchr32
+ * @see u_memchr
+ * @see u_strFindFirst
 */
 U_CAPI UChar* U_EXPORT2
-u_memchr32(const UChar *src, UChar32 ch, int32_t count);
+u_memchr32(const UChar *s, UChar32 c, int32_t count);
+
+/**
+ * Find the last occurrence of a BMP code point in a string.
+ * A surrogate code point is found only if its match in the text is not
+ * part of a surrogate pair.
+ * A NUL character is found at the string terminator.
+ *
+ * @param s The string to search (contains <code>count</code> UChars).
+ * @param c The BMP code point to find.
+ * @param count The length of the string.
+ * @return A pointer to the last occurrence of <code>c</code> in <code>s</code>
+ *         or <code>NULL</code> if <code>c</code> is not in <code>s</code>.
+ * @stable
+ *
+ * @see u_strrchr
+ * @see u_memrchr32
+ * @see u_strFindLast
+ */
+U_CAPI UChar* U_EXPORT2
+u_memrchr(const UChar *s, UChar c, int32_t count);
+
+/**
+ * Find the last occurrence of a code point in a string.
+ * A surrogate code point is found only if its match in the text is not
+ * part of a surrogate pair.
+ * A NUL character is found at the string terminator.
+ *
+ * @param s The string to search (contains <code>count</code> UChars).
+ * @param c The code point to find.
+ * @param count The length of the string.
+ * @return A pointer to the last occurrence of <code>c</code> in <code>s</code>
+ *         or <code>NULL</code> if <code>c</code> is not in <code>s</code>.
+ * @stable
+ *
+ * @see u_strrchr32
+ * @see u_memrchr
+ * @see u_strFindLast
+ */
+U_CAPI UChar* U_EXPORT2
+u_memrchr32(const UChar *s, UChar32 c, int32_t count);

 /**
 * Unicode String literals in C.
--- a/icu4c/source/common/unistr.cpp
+++ b/icu4c/source/common/unistr.cpp
@ -814,42 +814,21 @@ UnicodeString::indexOf(const UChar *srcChars,
    return -1;
  }

-  // get the srcLength if necessary
-  if(srcLength < 0) {
-    srcLength = u_strlen(srcChars + srcStart);
-    if(srcLength == 0) {
-      return -1;
-    }
+  // UnicodeString does not find empty substrings
+  if(srcLength < 0 && srcChars[srcStart] == 0) {
+    return -1;
  }

-  // now we will only work with srcLength-1
-  --srcLength;
-
  // get the indices within bounds
  pinIndices(start, length);

-  // set length for the last possible match start position
-  // note the --srcLength above
-  length -= srcLength;
-
-  if(length <= 0) {
+  // find the first occurrence of the substring
+  const UChar *match = u_strFindFirst(fArray + start, length, srcChars + srcStart, srcLength);
+  if(match == NULL) {
    return -1;
+  } else {
+    return match - fArray;
  }
-
-  const UChar *array = getArrayStart();
-  int32_t limit = start + length;
-
-  // search for the first char, then compare the rest of the string
-  // increment srcStart here for that, matching the --srcLength above
-  UChar ch = srcChars[srcStart++];
-
-  do {
-    if(array[start] == ch && (srcLength == 0 || compare(start + 1, srcLength, srcChars, srcStart, srcLength) == 0)) {
-      return start;
-    }
-  } while(++start < limit);
-
-  return -1;
 }

 int32_t
@ -859,21 +838,14 @@ UnicodeString::doIndexOf(UChar c,
 {
  // pin indices
  pinIndices(start, length);
-  if(length == 0) {
-    return -1;
-  }

  // find the first occurrence of c
-  const UChar *begin = getArrayStart() + start;
-  const UChar *limit = begin + length;
-
-  do {
-    if(*begin == c) {
-      return (int32_t)(begin - getArrayStart());
-    }
-  } while(++begin < limit);
-
-  return -1;
+  const UChar *match = u_memchr(fArray + start, c, length);
+  if(match == NULL) {
+    return -1;
+  } else {
+    return match - fArray;
+  }
 }

 int32_t
@ -882,26 +854,13 @@ UnicodeString::doIndexOf(UChar32 c,
                         int32_t length) const {
  // pin indices
  pinIndices(start, length);
-  if(length == 0) {
-    return -1;
-  }

-  // c<0xd800 handled by inline function indexOf(UChar32 c, start, length)
-  if(c<=0xdfff) {
-    // surrogate code point
-    const UChar *t = uprv_strFindSurrogate(fArray + start, length, (UChar)c);
-    if(t != 0) {
-      return (int32_t)(t - fArray);
-    } else {
-      return -1;
-    }
-  } else if(c<=0xffff) {
-    // non-surrogate BMP code point
-    return doIndexOf((UChar)c, start, length);
+  // find the first occurrence of c
+  const UChar *match = u_memchr32(fArray + start, c, length);
+  if(match == NULL) {
+    return -1;
  } else {
-    // supplementary code point, search for string
-    UChar buffer[2] = { UTF16_LEAD(c), UTF16_TRAIL(c) };
-    return indexOf(buffer, 2, start, length);
+    return match - fArray;
  }
 }

@ -916,43 +875,21 @@ UnicodeString::lastIndexOf(const UChar *srcChars,
    return -1;
  }

-  // get the srcLength if necessary
-  if(srcLength < 0) {
-    srcLength = u_strlen(srcChars + srcStart);
-    if(srcLength == 0) {
-      return -1;
-    }
+  // UnicodeString does not find empty substrings
+  if(srcLength < 0 && srcChars[srcStart] == 0) {
+    return -1;
  }

-  // now we will only work with srcLength-1
-  --srcLength;
-
  // get the indices within bounds
  pinIndices(start, length);

-  // set length for the last possible match start position
-  // note the --srcLength above
-  length -= srcLength;
-
-  if(length <= 0) {
+  // find the last occurrence of the substring
+  const UChar *match = u_strFindLast(fArray + start, length, srcChars + srcStart, srcLength);
+  if(match == NULL) {
    return -1;
+  } else {
+    return match - fArray;
  }
-
-  const UChar *array = getArrayStart();
-  int32_t pos;
-
-  // search for the first char, then compare the rest of the string
-  // increment srcStart here for that, matching the --srcLength above
-  UChar ch = srcChars[srcStart++];
-
-  pos = start + length;
-  do {
-    if(array[--pos] == ch && (srcLength == 0 || compare(pos + 1, srcLength, srcChars, srcStart, srcLength) == 0)) {
-      return pos;
-    }
-  } while(pos > start);
-
-  return -1;
 }

 int32_t
@ -966,20 +903,14 @@ UnicodeString::doLastIndexOf(UChar c,

  // pin indices
  pinIndices(start, length);
-  if(length == 0) {
+
+  // find the last occurrence of c
+  const UChar *match = u_memrchr(fArray + start, c, length);
+  if(match == NULL) {
    return -1;
+  } else {
+    return match - fArray;
  }
-
-  const UChar *begin = getArrayStart() + start;
-  const UChar *limit = begin + length;
-
-  do {
-    if(*--limit == c) {
-      return (int32_t)(limit - getArrayStart());
-    }
-  } while(limit > begin);
-
-  return -1;
 }

 int32_t
@ -988,26 +919,13 @@ UnicodeString::doLastIndexOf(UChar32 c,
                             int32_t length) const {
  // pin indices
  pinIndices(start, length);
-  if(length == 0) {
-    return -1;
-  }

-  // c<0xd800 handled by inline function lastIndexOf(UChar32 c, start, length)
-  if(c<=0xdfff) {
-    // surrogate code point
-    const UChar *t = uprv_strFindLastSurrogate(fArray + start, length, (UChar)c);
-    if(t != 0) {
-      return (int32_t)(t - fArray);
-    } else {
-      return -1;
-    }
-  } else if(c<=0xffff) {
-    // non-surrogate BMP code point
-    return doLastIndexOf((UChar)c, start, length);
+  // find the last occurrence of c
+  const UChar *match = u_memrchr32(fArray + start, c, length);
+  if(match == NULL) {
+    return -1;
  } else {
-    // supplementary code point, search for string
-    UChar buffer[2] = { UTF16_LEAD(c), UTF16_TRAIL(c) };
-    return lastIndexOf(buffer, 2, start, length);
+    return match - fArray;
  }
 }

--- a/icu4c/source/common/ustr_imp.h
+++ b/icu4c/source/common/ustr_imp.h
@ -241,20 +241,4 @@ u_terminateWChars(wchar_t *dest, int32_t destCapacity, int32_t length, UErrorCod

 #define u_getMaxCaseExpansion() 10

-/**
- * Find a single (unmatched) surrogate code point in the string s[0..length[ .
- * Find the first such surrogate.
- * @internal
- */
-U_CFUNC const UChar *
-uprv_strFindSurrogate(const UChar *s, int32_t length, UChar surrogate);
-
-/**
- * Find a single (unmatched) surrogate code point in the string s[0..length[ .
- * Find the last such surrogate.
- * @internal
- */
-U_CFUNC const UChar *
-uprv_strFindLastSurrogate(const UChar *s, int32_t length, UChar surrogate);
-
 #endif
--- a/icu4c/source/common/ustring.c
+++ b/icu4c/source/common/ustring.c
@ -34,129 +34,427 @@ static UConverter *gDefaultConverter = NULL;

 #define MAX_STRLEN 0x0FFFFFFF

-/* ---- String searching functions ---- */
+/* Forward binary string search functions ----------------------------------- */

-U_CAPI UChar* U_EXPORT2
-u_strchr(const UChar *s, UChar c) 
-{
-  while (*s && *s != c) {
-    ++s;
-  }
-  if (*s == c)
-    return (UChar *)s;
-  return NULL;
-}
-
-/* A Boyer-Moore algorithm would be better, but that would require a hashtable
-   because UChar is so big. This algorithm doesn't use a lot of extra memory.
- */
-U_CAPI UChar * U_EXPORT2
-u_strstr(const UChar *s, const UChar *substring) {
-
-  UChar *strItr, *subItr;
-
-  if (*substring == 0) {
-    return (UChar *)s;
-  }
-
-  do {
-    strItr = (UChar *)s;
-    subItr = (UChar *)substring;
-
-    /* Only one string iterator needs checking for null terminator */
-    while ((*strItr != 0) && (*strItr == *subItr)) {
-      strItr++;
-      subItr++;
-    }
-
-    if (*subItr == 0) {             /* Was the end of the substring reached? */
-      return (UChar *)s;
-    }
-
-    s++;
-  } while (*strItr != 0);           /* Was the end of the string reached? */
-
-  return NULL;                      /* No match */
-}
-
-/**
- * Check if there is an unmatched surrogate c in a string [start..limit[ at s.
- * start<=s<limit or limit==NULL
- * @return TRUE if *s is unmatched
+/*
+ * Test if a substring match inside a string is at code point boundaries.
+ * All pointers refer to the same buffer.
+ * The limit pointer may be NULL, all others must be real pointers.
 */
 static U_INLINE UBool
-uprv_isSingleSurrogate(const UChar *start, const UChar *s, UChar c, const UChar *limit) {
-    if(UTF_IS_SURROGATE_FIRST(c)) {
-        ++s;
-        return (UBool)(s==limit || !UTF_IS_TRAIL(*s));
-    } else {
-        return (UBool)(s==start || !UTF_IS_LEAD(*(s-1)));
+isMatchAtCPBoundary(const UChar *start, const UChar *match, const UChar *matchLimit, const UChar *limit) {
+    if(U16_IS_TRAIL(*match) && start!=match && U16_IS_LEAD(*(match-1))) {
+        /* the leading edge of the match is in the middle of a surrogate pair */
+        return FALSE;
    }
+    if(U16_IS_LEAD(*(matchLimit-1)) && match!=limit && U16_IS_TRAIL(*matchLimit)) {
+        /* the trailing edge of the match is in the middle of a surrogate pair */
+        return FALSE;
+    }
+    return TRUE;
 }

-U_CFUNC const UChar *
-uprv_strFindSurrogate(const UChar *s, int32_t length, UChar surrogate) {
-    const UChar *limit, *t;
-    UChar c;
+U_CAPI UChar * U_EXPORT2
+u_strFindFirst(const UChar *s, int32_t length,
+               const UChar *sub, int32_t subLength) {
+    const UChar *start, *p, *q, *subLimit;
+    UChar c, cs, cq;

-    if(length>=0) {
-        limit=s+length;
-    } else {
-        limit=NULL;
+    if(sub==NULL || subLength<-1) {
+        return (UChar *)s;
+    }
+    if(s==NULL || length<-1) {
+        return NULL;
    }

-    for(t=s; t!=limit && ((c=*t)!=0 || limit!=NULL); ++t) {
-        if(c==surrogate && uprv_isSingleSurrogate(s, t, c, limit)) {
-            return t;
+    start=s;
+
+    if(length<0 && subLength<0) {
+        /* both strings are NUL-terminated */
+        if((cs=*sub++)==0) {
+            return (UChar *)s;
+        }
+        if(*sub==0 && !U16_IS_SURROGATE(cs)) {
+            /* the substring consists of a single, non-surrogate BMP code point */
+            return u_strchr(s, cs);
+        }
+
+        while((c=*s++)!=0) {
+            if(c==cs) {
+                /* found first substring UChar, compare rest */
+                p=s;
+                q=sub;
+                for(;;) {
+                    if((cq=*q)==0) {
+                        if(isMatchAtCPBoundary(start, s-1, p, NULL)) {
+                            return (UChar *)(s-1); /* well-formed match */
+                        } else {
+                            break; /* no match because surrogate pair is split */
+                        }
+                    }
+                    if((c=*p)==0) {
+                        return NULL; /* no match, and none possible after s */
+                    }
+                    if(c!=cq) {
+                        break; /* no match */
+                    }
+                    ++p;
+                    ++q;
+                }
+            }
+        }
+
+        /* not found */
+        return NULL;
+    }
+
+    if(subLength<0) {
+        subLength=u_strlen(sub);
+    }
+    if(subLength==0) {
+        return (UChar *)s;
+    }
+
+    /* get sub[0] to search for it fast */
+    cs=*sub++;
+    --subLength;
+    subLimit=sub+subLength;
+
+    if(subLength==0 && !U16_IS_SURROGATE(cs)) {
+        /* the substring consists of a single, non-surrogate BMP code point */
+        return length<0 ? u_strchr(s, cs) : u_memchr(s, cs, length);
+    }
+
+    if(length<0) {
+        /* s is NUL-terminated */
+        while((c=*s++)!=0) {
+            if(c==cs) {
+                /* found first substring UChar, compare rest */
+                p=s;
+                q=sub;
+                for(;;) {
+                    if(q==subLimit) {
+                        if(isMatchAtCPBoundary(start, s-1, p, NULL)) {
+                            return (UChar *)(s-1); /* well-formed match */
+                        } else {
+                            break; /* no match because surrogate pair is split */
+                        }
+                    }
+                    if((c=*p)==0) {
+                        return NULL; /* no match, and none possible after s */
+                    }
+                    if(c!=*q) {
+                        break; /* no match */
+                    }
+                    ++p;
+                    ++q;
+                }
+            }
+        }
+    } else {
+        const UChar *limit, *preLimit;
+
+        /* subLength was decremented above */
+        if(length<=subLength) {
+            return NULL; /* s is shorter than sub */
+        }
+
+        limit=s+length;
+
+        /* the substring must start before preLimit */
+        preLimit=limit-subLength;
+
+        while(s!=preLimit) {
+            c=*s++;
+            if(c==cs) {
+                /* found first substring UChar, compare rest */
+                p=s;
+                q=sub;
+                for(;;) {
+                    if(q==subLimit) {
+                        if(isMatchAtCPBoundary(start, s-1, p, limit)) {
+                            return (UChar *)(s-1); /* well-formed match */
+                        } else {
+                            break; /* no match because surrogate pair is split */
+                        }
+                    }
+                    if(*p!=*q) {
+                        break; /* no match */
+                    }
+                    ++p;
+                    ++q;
+                }
+            }
        }
    }

+    /* not found */
    return NULL;
 }

-U_CFUNC const UChar *
-uprv_strFindLastSurrogate(const UChar *s, int32_t length, UChar surrogate) {
-    const UChar *limit, *t;
-    UChar c;
+U_CAPI UChar * U_EXPORT2
+u_strstr(const UChar *s, const UChar *substring) {
+    return u_strFindFirst(s, -1, substring, -1);
+}

-    if(length>=0) {
-        limit=s+length;
+U_CAPI UChar * U_EXPORT2
+u_strchr(const UChar *s, UChar c) {
+    if(U16_IS_SURROGATE(c)) {
+        /* make sure to not find half of a surrogate pair */
+        return u_strFindFirst(s, -1, &c, 1);
    } else {
-        limit=s+u_strlen(s);
-    }
+        UChar cs;

-    for(t=limit; t!=s;) {
-        c=*--t;
-        if(c==surrogate && uprv_isSingleSurrogate(s, t, c, limit)) {
-            return t;
+        /* trivial search for a BMP code point */
+        for(;;) {
+            if((cs=*s)==c) {
+                return (UChar *)s;
+            }
+            if(cs==0) {
+                return NULL;
+            }
+            ++s;
        }
    }
-
-    return NULL;
 }

 U_CAPI UChar * U_EXPORT2
 u_strchr32(const UChar *s, UChar32 c) {
-  if(c < 0xd800) {
-    /* non-surrogate BMP code point */
-    return u_strchr(s, (UChar)c);
-  } else if(c <= 0xdfff) {
-    /* surrogate code point */
-    return (UChar *)uprv_strFindSurrogate(s, -1, (UChar)c);
-  } else if(c <= 0xffff) {
-    /* non-surrogate BMP code point */
-    return u_strchr(s, (UChar)c);
-  } else {
-    /* supplementary code point, search for string */
-    UChar buffer[3];
+    if((uint32_t)c<=0xffff) {
+        /* find BMP code point */
+        return u_strchr(s, (UChar)c);
+    } else if((uint32_t)c<=0x10ffff) {
+        /* find supplementary code point as surrogate pair */
+        UChar cs, lead=U16_LEAD(c), trail=U16_TRAIL(c);

-    buffer[0] = UTF16_LEAD(c);
-    buffer[1] = UTF16_TRAIL(c);
-    buffer[2] = 0;
-    return u_strstr(s, buffer);
-  }
+        while((cs=*s++)!=0) {
+            if(cs==lead && *s==trail) {
+                return (UChar *)(s-1);
+            }
+        }
+        return NULL;
+    } else {
+        /* not a Unicode code point, not findable */
+        return NULL;
+    }
 }

+U_CAPI UChar * U_EXPORT2
+u_memchr(const UChar *s, UChar c, int32_t count) {
+    if(count<=0) {
+        return NULL; /* no string */
+    } else if(U16_IS_SURROGATE(c)) {
+        /* make sure to not find half of a surrogate pair */
+        return u_strFindFirst(s, count, &c, 1);
+    } else {
+        /* trivial search for a BMP code point */
+        const UChar *limit=s+count;
+        do {
+            if(*s==c) {
+                return (UChar *)s;
+            }
+        } while(++s!=limit);
+        return NULL;
+    }
+}
+
+U_CAPI UChar * U_EXPORT2
+u_memchr32(const UChar *s, UChar32 c, int32_t count) {
+    if((uint32_t)c<=0xffff) {
+        /* find BMP code point */
+        return u_memchr(s, (UChar)c, count);
+    } else if(count<2) {
+        /* too short for a surrogate pair */
+        return NULL;
+    } else if((uint32_t)c<=0x10ffff) {
+        /* find supplementary code point as surrogate pair */
+        const UChar *limit=s+count-1; /* -1 so that we do not need a separate check for the trail unit */
+        UChar lead=U16_LEAD(c), trail=U16_TRAIL(c);
+
+        do {
+            if(*s==lead && *(s+1)==trail) {
+                return (UChar *)s;
+            }
+        } while(++s!=limit);
+        return NULL;
+    } else {
+        /* not a Unicode code point, not findable */
+        return NULL;
+    }
+}
+
+/* Backward binary string search functions ---------------------------------- */
+
+U_CAPI UChar * U_EXPORT2
+u_strFindLast(const UChar *s, int32_t length,
+              const UChar *sub, int32_t subLength) {
+    const UChar *start, *limit, *p, *q, *subLimit;
+    UChar c, cs;
+
+    if(sub==NULL || subLength<-1) {
+        return (UChar *)s;
+    }
+    if(s==NULL || length<-1) {
+        return NULL;
+    }
+
+    /*
+     * This implementation is more lazy than the one for u_strFindFirst():
+     * There is no special search code for NUL-terminated strings.
+     * It does not seem to be worth it for searching substrings to
+     * search forward and find all matches like in u_strrchr() and similar.
+     * Therefore, we simply get both string lengths and search backward.
+     *
+     * markus 2002oct23
+     */
+
+    if(subLength<0) {
+        subLength=u_strlen(sub);
+    }
+    if(subLength==0) {
+        return (UChar *)s;
+    }
+
+    /* get sub[subLength-1] to search for it fast */
+    subLimit=sub+subLength;
+    cs=*(--subLimit);
+    --subLength;
+
+    if(subLength==0 && !U16_IS_SURROGATE(cs)) {
+        /* the substring consists of a single, non-surrogate BMP code point */
+        return length<0 ? u_strrchr(s, cs) : u_memrchr(s, cs, length);
+    }
+
+    if(length<0) {
+        length=u_strlen(s);
+    }
+
+    /* subLength was decremented above */
+    if(length<=subLength) {
+        return NULL; /* s is shorter than sub */
+    }
+
+    start=s;
+    limit=s+length;
+
+    /* the substring must start no later than s+subLength */
+    s+=subLength;
+
+    while(s!=limit) {
+        c=*(--limit);
+        if(c==cs) {
+            /* found last substring UChar, compare rest */
+            p=limit;
+            q=subLimit;
+            for(;;) {
+                if(q==sub) {
+                    if(isMatchAtCPBoundary(start, p, limit+1, s+length)) {
+                        return (UChar *)p; /* well-formed match */
+                    } else {
+                        break; /* no match because surrogate pair is split */
+                    }
+                }
+                if(*(--p)!=*(--q)) {
+                    break; /* no match */
+                }
+            }
+        }
+    }
+
+    /* not found */
+    return NULL;
+}
+
+U_CAPI UChar * U_EXPORT2
+u_strrchr(const UChar *s, UChar c) {
+    if(U16_IS_SURROGATE(c)) {
+        /* make sure to not find half of a surrogate pair */
+        return u_strFindLast(s, -1, &c, 1);
+    } else {
+        const UChar *result=NULL;
+        UChar cs;
+
+        /* trivial search for a BMP code point */
+        for(;;) {
+            if((cs=*s)==c) {
+                result=s;
+            }
+            if(cs==0) {
+                return (UChar *)result;
+            }
+            ++s;
+        }
+    }
+}
+
+U_CAPI UChar * U_EXPORT2
+u_strrchr32(const UChar *s, UChar32 c) {
+    if((uint32_t)c<=0xffff) {
+        /* find BMP code point */
+        return u_strrchr(s, (UChar)c);
+    } else if((uint32_t)c<=0x10ffff) {
+        /* find supplementary code point as surrogate pair */
+        const UChar *result=NULL;
+        UChar cs, lead=U16_LEAD(c), trail=U16_TRAIL(c);
+
+        while((cs=*s++)!=0) {
+            if(cs==lead && *s==trail) {
+                result=s-1;
+            }
+        }
+        return (UChar *)result;
+    } else {
+        /* not a Unicode code point, not findable */
+        return NULL;
+    }
+}
+
+U_CAPI UChar * U_EXPORT2
+u_memrchr(const UChar *s, UChar c, int32_t count) {
+    if(count<=0) {
+        return NULL; /* no string */
+    } else if(U16_IS_SURROGATE(c)) {
+        /* make sure to not find half of a surrogate pair */
+        return u_strFindLast(s, count, &c, 1);
+    } else {
+        /* trivial search for a BMP code point */
+        const UChar *limit=s+count;
+        do {
+            if(*(--limit)==c) {
+                return (UChar *)limit;
+            }
+        } while(s!=limit);
+        return NULL;
+    }
+}
+
+U_CAPI UChar * U_EXPORT2
+u_memrchr32(const UChar *s, UChar32 c, int32_t count) {
+    if((uint32_t)c<=0xffff) {
+        /* find BMP code point */
+        return u_memrchr(s, (UChar)c, count);
+    } else if(count<2) {
+        /* too short for a surrogate pair */
+        return NULL;
+    } else if((uint32_t)c<=0x10ffff) {
+        /* find supplementary code point as surrogate pair */
+        const UChar *limit=s+count-1;
+        UChar lead=U16_LEAD(c), trail=U16_TRAIL(c);
+
+        do {
+            if(*limit==trail && *(limit-1)==lead) {
+                return (UChar *)(limit-1);
+            }
+        } while(s!=--limit);
+        return NULL;
+    } else {
+        /* not a Unicode code point, not findable */
+        return NULL;
+    }
+}
+
+/* Tokenization functions --------------------------------------------------- */
+
 /*
 * Match each code point in a string against each code point in the matchSet.
 * Return the index of the first string code point that
@ -321,6 +619,8 @@ u_strtok_r(UChar    *src,
    return NULL;
 }

+/* Miscellaneous functions -------------------------------------------------- */
+
 U_CAPI UChar* U_EXPORT2
 u_strcat(UChar     *dst, 
    const UChar     *src)
@ -754,50 +1054,6 @@ u_memcmpCodePointOrder(const UChar *s1, const UChar *s2, int32_t count) {
    return uprv_strCompare(s1, count, s2, count, FALSE, TRUE);
 }

-U_CAPI UChar * U_EXPORT2
-u_memchr(const UChar *src, UChar ch, int32_t count) {
-    if(count > 0) {
-        const UChar *ptr = src;
-        const UChar *limit = src + count;
-
-        do {
-            if (*ptr == ch) {
-                return (UChar *)ptr;
-            }
-        } while (++ptr < limit);
-    }
-    return NULL;
-}
-
-U_CAPI UChar * U_EXPORT2
-u_memchr32(const UChar *src, UChar32 ch, int32_t count) {
-    if(count<=0 || (uint32_t)ch>0x10ffff) {
-        return NULL; /* no string, or illegal arguments */
-    }
-
-    if(ch<0xd800) {
-        /* non-surrogate BMP code point */
-        return u_memchr(src, (UChar)ch, count); /* BMP, single UChar */
-    } else if(ch<=0xdfff) {
-        /* surrogate code point */
-        return (UChar *)uprv_strFindSurrogate(src, count, (UChar)ch);
-    } else if(ch<=0xffff) {
-        return u_memchr(src, (UChar)ch, count); /* BMP, single UChar */
-    } else if(count<2) {
-        return NULL; /* too short for a surrogate pair */
-    } else {
-        const UChar *limit=src+count-1; /* -1 so that we do not need a separate check for the trail unit */
-        UChar lead=UTF16_LEAD(ch), trail=UTF16_TRAIL(ch);
-
-        do {
-            if(*src==lead && *(src+1)==trail) {
-                return (UChar *)src;
-            }
-        } while(++src<limit);
-        return NULL;
-    }
-}
-
 /* conversions between char* and UChar* ------------------------------------- */

 /*