ICU-5869 Properly NULL terminate strings that are not byte based, like UTF-16 or UTF-32.

X-SVN-Rev: 22476
2025-04-08 06:53:45 +00:00 · 2007-08-22 23:39:11 +00:00 · 2007-08-22 23:39:11 +00:00 · 8620a4a530
commit 8620a4a530
parent 762a820053
15 changed files with 167 additions and 66 deletions
--- a/icu4c/source/common/ucasemap.c
+++ b/icu4c/source/common/ucasemap.c
@ -509,7 +509,7 @@ caseMap(const UCaseMap *csm,
        }
    }

-    return u_terminateChars((char *)dest, destCapacity, destLength, pErrorCode);
+    return u_terminateChars((char *)dest, destCapacity, destLength, 1, pErrorCode);
 }

 /* public API functions */
--- a/icu4c/source/common/ucnv.c
+++ b/icu4c/source/common/ucnv.c
@ -1757,7 +1757,7 @@ ucnv_fromUChars(UConverter *cnv,
        destLength=0;
    }

-    return u_terminateChars(originalDest, destCapacity, destLength, pErrorCode);
+    return u_terminateChars(originalDest, destCapacity, destLength, ucnv_getMinCharSize(cnv), pErrorCode);
 }

 U_CAPI int32_t U_EXPORT2
@ -2410,7 +2410,7 @@ ucnv_internalConvert(UConverter *outConverter, UConverter *inConverter,

    /* if there is no input data, we're done */
    if(source==sourceLimit) {
-        return u_terminateChars(target, targetCapacity, 0, pErrorCode);
+        return u_terminateChars(target, targetCapacity, 0, ucnv_getMinCharSize(outConverter), pErrorCode);
    }

    pivot=pivot2=pivotBuffer;
@ -2454,7 +2454,7 @@ ucnv_internalConvert(UConverter *outConverter, UConverter *inConverter,
        } while(*pErrorCode==U_BUFFER_OVERFLOW_ERROR);

        /* done with preflighting, set warnings and errors as appropriate */
-        return u_terminateChars(target, targetCapacity, targetLength, pErrorCode);
+        return u_terminateChars(target, targetCapacity, targetLength, ucnv_getMinCharSize(outConverter), pErrorCode);
    }

    /* no need to call u_terminateChars() because ucnv_convertEx() took care of that */
@ -2465,10 +2465,11 @@ U_CAPI int32_t U_EXPORT2
 ucnv_convert(const char *toConverterName, const char *fromConverterName,
             char *target, int32_t targetCapacity,
             const char *source, int32_t sourceLength,
-             UErrorCode *pErrorCode) {
+             UErrorCode *pErrorCode)
+{
    UConverter in, out; /* stack-allocated */
-    UConverter *inConverter, *outConverter;
-    int32_t targetLength;
+    UConverter *inConverter = NULL, *outConverter = NULL;
+    int32_t targetLength = 0;

    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
        return 0;
@ -2481,21 +2482,21 @@ ucnv_convert(const char *toConverterName, const char *fromConverterName,
        return 0;
    }

+    outConverter=ucnv_createConverter(&out, toConverterName, pErrorCode);
+    if(U_FAILURE(*pErrorCode)) {
+        goto cleanup;
+    }
+
    /* if there is no input data, we're done */
    if(sourceLength==0 || (sourceLength<0 && *source==0)) {
-        return u_terminateChars(target, targetCapacity, 0, pErrorCode);
+        targetLength = u_terminateChars(target, targetCapacity, 0, ucnv_getMinCharSize(outConverter), pErrorCode);
+        goto cleanup;
    }

    /* create the converters */
    inConverter=ucnv_createConverter(&in, fromConverterName, pErrorCode);
    if(U_FAILURE(*pErrorCode)) {
-        return 0;
-    }
-
-    outConverter=ucnv_createConverter(&out, toConverterName, pErrorCode);
-    if(U_FAILURE(*pErrorCode)) {
-        ucnv_close(inConverter);
-        return 0;
+        goto cleanup;
    }

    targetLength=ucnv_internalConvert(outConverter, inConverter,
@ -2503,6 +2504,7 @@ ucnv_convert(const char *toConverterName, const char *fromConverterName,
                                      source, sourceLength,
                                      pErrorCode);

+cleanup:
    ucnv_close(inConverter);
    ucnv_close(outConverter);

@ -2534,7 +2536,7 @@ ucnv_convertAlgorithmic(UBool convertToAlgorithmic,

    /* if there is no input data, we're done */
    if(sourceLength==0 || (sourceLength<0 && *source==0)) {
-        return u_terminateChars(target, targetCapacity, 0, pErrorCode);
+        return u_terminateChars(target, targetCapacity, 0, ucnv_getMinCharSize(cnv), pErrorCode);
    }

    /* create the algorithmic converter */
--- a/icu4c/source/common/uloc.c
+++ b/icu4c/source/common/uloc.c
@ -774,7 +774,7 @@ _getKeywords(const char *localeID,
                *valLen = valuesLen;
            }
        }
-        return u_terminateChars(keywords, keywordCapacity, keywordsLen, status);   
+        return u_terminateChars(keywords, keywordCapacity, keywordsLen, 1, status);
    } else {
        return 0;
    }
@ -856,14 +856,14 @@ uloc_getKeywordValue(const char* localeID,
                      startSearchHere--;
                  }
                  uprv_strncpy(buffer, nextSeparator, startSearchHere - nextSeparator);
-                  result = u_terminateChars(buffer, bufferCapacity, (int32_t)(startSearchHere - nextSeparator), status);
+                  result = u_terminateChars(buffer, bufferCapacity, (int32_t)(startSearchHere - nextSeparator), 1, status);
              } else if(!startSearchHere && (int32_t)uprv_strlen(nextSeparator) < bufferCapacity) { /* last item in string */
                  i = (int32_t)uprv_strlen(nextSeparator);
                  while(nextSeparator[i - 1] == ' ') {
                      i--;
                  }
                  uprv_strncpy(buffer, nextSeparator, i);
-                  result = u_terminateChars(buffer, bufferCapacity, i, status);
+                  result = u_terminateChars(buffer, bufferCapacity, i, 1, status);
              } else {
                  /* give a bigger buffer, please */
                  *status = U_BUFFER_OVERFLOW_ERROR;
@ -1766,7 +1766,7 @@ _canonicalize(const char* localeID,
        uprv_strncpy(result, localeBuffer, (len > resultCapacity) ? resultCapacity : len);
    }

-    return u_terminateChars(result, resultCapacity, len, err);
+    return u_terminateChars(result, resultCapacity, len, 1, err);
 }

 /* ### ID parsing API **************************************************/
@ -1796,7 +1796,7 @@ uloc_getParent(const char*    localeID,
    if(i>0 && parent != localeID) {
        uprv_memcpy(parent, localeID, uprv_min(i, parentCapacity));
    }
-    return u_terminateChars(parent, parentCapacity, i, err);
+    return u_terminateChars(parent, parentCapacity, i, 1, err);
 }

 U_CAPI int32_t U_EXPORT2
@ -1817,7 +1817,7 @@ uloc_getLanguage(const char*    localeID,
    }

    i=_getLanguage(localeID, language, languageCapacity, NULL);
-    return u_terminateChars(language, languageCapacity, i, err);
+    return u_terminateChars(language, languageCapacity, i, 1, err);
 }

 U_CAPI int32_t U_EXPORT2
@ -1841,7 +1841,7 @@ uloc_getScript(const char*    localeID,
    if(_isIDSeparator(*localeID)) {
        i=_getScript(localeID+1, script, scriptCapacity, NULL);
    }
-    return u_terminateChars(script, scriptCapacity, i, err);
+    return u_terminateChars(script, scriptCapacity, i, 1, err);
 }

 U_CAPI int32_t  U_EXPORT2
@ -1874,7 +1874,7 @@ uloc_getCountry(const char* localeID,
            i=_getCountry(localeID+1, country, countryCapacity, NULL);
        }
    }
-    return u_terminateChars(country, countryCapacity, i, err);
+    return u_terminateChars(country, countryCapacity, i, 1, err);
 }

 U_CAPI int32_t  U_EXPORT2
@ -1919,7 +1919,7 @@ uloc_getVariant(const char* localeID,
        i=_getVariant(localeID+1, '@', variant, variantCapacity);
    }
 */
-    return u_terminateChars(variant, variantCapacity, i, err);
+    return u_terminateChars(variant, variantCapacity, i, 1, err);
 }

 U_CAPI int32_t  U_EXPORT2
@ -2978,7 +2978,7 @@ uloc_acceptLanguage(char *result, int32_t resultAvailable,
                    uprv_free(fallbackList[j]);
                }
                uprv_free(fallbackList);
-                return u_terminateChars(result, resultAvailable, len, status);   
+                return u_terminateChars(result, resultAvailable, len, 1, status);
            }
            if(len>maxLen) {
                maxLen = len;
@ -3018,7 +3018,7 @@ uloc_acceptLanguage(char *result, int32_t resultAvailable,
                            uprv_free(fallbackList[j]);
                        }
                        uprv_free(fallbackList);
-                        return u_terminateChars(result, resultAvailable, len, status);
+                        return u_terminateChars(result, resultAvailable, len, 1, status);
                    }
                }
                uenum_reset(availableLocales, status);    
--- a/icu4c/source/common/unames.c
+++ b/icu4c/source/common/unames.c
@ -1,7 +1,7 @@
 /*
 ******************************************************************************
 *
-*   Copyright (C) 1999-2006, International Business Machines
+*   Copyright (C) 1999-2007, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 *
 ******************************************************************************
@ -1441,7 +1441,7 @@ u_charName(UChar32 code, UCharNameChoice nameChoice,
    }

    if((uint32_t)code>UCHAR_MAX_VALUE || !isDataLoaded(pErrorCode)) {
-        return u_terminateChars(buffer, bufferLength, 0, pErrorCode);
+        return u_terminateChars(buffer, bufferLength, 0, 1, pErrorCode);
    }

    length=0;
@ -1472,7 +1472,7 @@ u_charName(UChar32 code, UCharNameChoice nameChoice,
        }
    }

-    return u_terminateChars(buffer, bufferLength, length, pErrorCode);
+    return u_terminateChars(buffer, bufferLength, length, 1, pErrorCode);
 }

 U_CAPI int32_t U_EXPORT2
@ -1490,12 +1490,12 @@ u_getISOComment(UChar32 c,
    }

    if((uint32_t)c>UCHAR_MAX_VALUE || !isDataLoaded(pErrorCode)) {
-        return u_terminateChars(dest, destCapacity, 0, pErrorCode);
+        return u_terminateChars(dest, destCapacity, 0, 1, pErrorCode);
    }

    /* the ISO comment is stored like a normal character name */
    length=getName(uCharNames, (uint32_t)c, U_ISO_COMMENT, dest, (uint16_t)destCapacity);
-    return u_terminateChars(dest, destCapacity, length, pErrorCode);
+    return u_terminateChars(dest, destCapacity, length, 1, pErrorCode);
 }

 U_CAPI UChar32 U_EXPORT2
--- a/icu4c/source/common/unicode/ucnv.h
+++ b/icu4c/source/common/unicode/ucnv.h
@ -1149,9 +1149,14 @@ ucnv_toUnicode(UConverter *converter,
 *                  common error codes that may be set by this function include
 *                  U_BUFFER_OVERFLOW_ERROR, U_STRING_NOT_TERMINATED_WARNING,
 *                  U_ILLEGAL_ARGUMENT_ERROR, and conversion errors
- * @return the length of the output string, not counting the terminating NUL;
- *         if the length is greater than destCapacity, then the string will not fit
- *         and a buffer of the indicated length would need to be passed in
+ * @return The length of the output string, not counting the terminating NUL.
+ *        The terminating NUL may be larger than one byte for the encodings of
+ *        some codepages, like UTF-32, where the terminating NUL is 4 bytes.
+ *        The terminating NUL is written when there is room in the dest buffer.
+ *        If the length is greater than destCapacity, then the string will not fit
+ *        and a buffer of the indicated length would need to be passed in.
+ *        This return value plus the value of ucnv_getMinCharSize will give you a
+ *        buffer large enough for the conversion.
 * @see ucnv_fromUnicode
 * @see ucnv_convert
 * @see UCNV_GET_MAX_BYTES_FOR_STRING
--- a/icu4c/source/common/unicode/unistr.h
+++ b/icu4c/source/common/unicode/unistr.h
@ -1468,7 +1468,10 @@ public:
   * If <TT>target</TT> is NULL, then the number of bytes required for
   * <TT>target</TT> is returned. It is assumed that the target is big enough
   * to fit all of the characters.
-   * @return the output string length, not including the terminating NUL
+   * @return the output string length, not including the terminating NUL.
+   *        The terminating NUL may be larger than one byte for the encodings of
+   *        some codepages, like UTF-32, where the terminating NUL is 4 bytes.
+   *        The terminating NUL is written when there is room in the target buffer.
   * @stable ICU 2.0
   */
  inline int32_t extract(int32_t start,
@ -1503,6 +1506,9 @@ public:
   * If <TT>target</TT> is NULL, then the number of bytes required for
   * <TT>target</TT> is returned.
   * @return the output string length, not including the terminating NUL
+   *        The terminating NUL may be larger than one byte for the encodings of
+   *        some codepages, like UTF-32, where the terminating NUL is 4 bytes.
+   *        The terminating NUL is written when there is room in the target buffer.
   * @stable ICU 2.0
   */
  int32_t extract(int32_t start,
@ -1523,9 +1529,12 @@ public:
   * @param cnv the converter object to be used (ucnv_resetFromUnicode() will be called),
   *        or NULL for the default converter
   * @param errorCode normal ICU error code
-   * @return the length of the output string, not counting the terminating NUL;
-   *         if the length is greater than destCapacity, then the string will not fit
-   *         and a buffer of the indicated length would need to be passed in
+   * @return the length of the output string, not counting the terminating NUL.
+   *        The terminating NUL may be larger than one byte for the encodings of
+   *        some codepages, like UTF-32, where the terminating NUL is 4 bytes.
+   *        The terminating NUL is written when there is room in the dest buffer.
+   *        If the length is greater than destCapacity, then the string will not fit
+   *        and a buffer of the indicated length would need to be passed in.
   * @stable ICU 2.0
   */
  int32_t extract(char *dest, int32_t destCapacity,
--- a/icu4c/source/common/unistr.cpp
+++ b/icu4c/source/common/unistr.cpp
@ -1,6 +1,6 @@
 /*
 ******************************************************************************
-* Copyright (C) 1999-2006, International Business Machines Corporation and   *
+* Copyright (C) 1999-2007, International Business Machines Corporation and   *
 * others. All Rights Reserved.                                               *
 ******************************************************************************
 *
@ -723,7 +723,7 @@ UnicodeString::extract(int32_t start,
    u_UCharsToChars(getArrayStart() + start, target, length);
  }
  UErrorCode status = U_ZERO_ERROR;
-  return u_terminateChars(target, targetCapacity, length, &status);
+  return u_terminateChars(target, targetCapacity, length, 1, &status);
 }

 void 
--- a/icu4c/source/common/unistr_cnv.cpp
+++ b/icu4c/source/common/unistr_cnv.cpp
@ -1,7 +1,7 @@
 /*
 *******************************************************************************
 *
-*   Copyright (C) 1999-2006, International Business Machines
+*   Copyright (C) 1999-2007, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 *
 *******************************************************************************
@ -123,6 +123,8 @@ UnicodeString::extract(int32_t start,
    UConverter *converter;
    UErrorCode status = U_ZERO_ERROR;

+    // We don't NULL terminate here because we might need 1 to 4 bytes for the termination.
+    /*
    // just write the NUL if the string length is 0
    if(length == 0) {
        if(dstSize >= 0x80000000) {  
@ -131,7 +133,7 @@ UnicodeString::extract(int32_t start,
            dstSize=0x7fffffff;
        }
        return u_terminateChars(target, dstSize, 0, &status);
-    }
+    }*/

    // if the codepage is the default, use our cache
    // if it is an empty string, then use the "invariant character" conversion
@ -151,7 +153,7 @@ UnicodeString::extract(int32_t start,
            destLength = (int32_t)dstSize;
        }
        u_UCharsToChars(getArrayStart() + start, target, destLength);
-        return u_terminateChars(target, (int32_t)dstSize, length, &status);
+        return u_terminateChars(target, (int32_t)dstSize, length, 1, &status);
    } else {
        converter = ucnv_open(codepage, &status);
    }
@ -184,7 +186,7 @@ UnicodeString::extract(char *dest, int32_t destCapacity,

    // nothing to do?
    if(fLength<=0) {
-        return u_terminateChars(dest, destCapacity, 0, &errorCode);
+        return u_terminateChars(dest, destCapacity, 0, ucnv_getMinCharSize(cnv), &errorCode);
    }

    // get the converter
@ -256,7 +258,7 @@ UnicodeString::doExtract(int32_t start, int32_t length,
        } while(errorCode==U_BUFFER_OVERFLOW_ERROR);
    }

-    return u_terminateChars(originalDest, destCapacity, length, &errorCode);
+    return u_terminateChars(originalDest, destCapacity, length, ucnv_getMinCharSize(cnv), &errorCode);
 }

 void
--- a/icu4c/source/common/uresbund.c
+++ b/icu4c/source/common/uresbund.c
@ -1032,7 +1032,7 @@ ures_toUTF8String(const UChar *s16, int32_t length16,
            *pLength = 0;
        }
        if (forceCopy) {
-            u_terminateChars(dest, capacity, 0, status);
+            u_terminateChars(dest, capacity, 0, 1, status);
            return dest;
        } else {
            return "";
@ -2457,7 +2457,7 @@ ures_getFunctionalEquivalent(char *result, int32_t resultCapacity,
        length = 0;
        result[0]=0;
    }
-    return u_terminateChars(result, resultCapacity, length, status);
+    return u_terminateChars(result, resultCapacity, length, 1, status);
 }

 U_CAPI UEnumeration* U_EXPORT2
--- a/icu4c/source/common/ustr_imp.h
+++ b/icu4c/source/common/ustr_imp.h
@ -190,9 +190,16 @@ u_terminateUChars(UChar *dest, int32_t destCapacity, int32_t length, UErrorCode
 /**
 * NUL-terminate a char * string if possible.
 * Same as u_terminateUChars() but for a different string type.
+ * @param dest The buffer to NULL terminate.
+ * @param destCapacity The size in bytes for the dest buffer.
+ * @param length The number of bytes used in dest.
+ * @param sizeOfNULL When a non-ASCII compatible encoding is used, like UTF-32,
+ *      UTF-16 or some DBCS encoding, this specifies the number of bytes used
+ *      by the NULL termination. This should be 1 for the invariant codepage.
+ * @return length
 */
 U_CAPI int32_t U_EXPORT2
-u_terminateChars(char *dest, int32_t destCapacity, int32_t length, UErrorCode *pErrorCode);
+u_terminateChars(char *dest, int32_t destCapacity, int32_t length, int32_t sizeOfNULL, UErrorCode *pErrorCode);

 /**
 * NUL-terminate a UChar32 * string if possible.
--- a/icu4c/source/common/ustring.c
+++ b/icu4c/source/common/ustring.c
@ -1,7 +1,7 @@
 /*
 ******************************************************************************
 *
-*   Copyright (C) 1998-2004, International Business Machines
+*   Copyright (C) 1998-2007, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 *
 ******************************************************************************
@ -1443,15 +1443,16 @@ u_growBufferFromStatic(void *context,
 * NUL-terminate a string no matter what its type.
 * Set warning and error codes accordingly.
 */
-#define __TERMINATE_STRING(dest, destCapacity, length, pErrorCode)      \
+#define __TERMINATE_STRING(dest, destCapacity, length, sizeOfNull, pErrorCode) \
    if(pErrorCode!=NULL && U_SUCCESS(*pErrorCode)) {                    \
        /* not a public function, so no complete argument checking */   \
-                                                                        \
        if(length<0) {                                                  \
            /* assume that the caller handles this */                   \
-        } else if(length<destCapacity) {                                \
+        } else if(sizeOfNull > 0 && (length+sizeOfNull)<=destCapacity) {\
            /* NUL-terminate the string, the NUL fits */                \
-            dest[length]=0;                                             \
+            do {                                                        \
+                dest[length+(--sizeOfNull)]=0;                          \
+            } while (sizeOfNull > 0);                                   \
            /* unset the not-terminated warning but leave all others */ \
            if(*pErrorCode==U_STRING_NOT_TERMINATED_WARNING) {          \
                *pErrorCode=U_ZERO_ERROR;                               \
@ -1467,24 +1468,27 @@ u_growBufferFromStatic(void *context,

 U_CAPI int32_t U_EXPORT2
 u_terminateUChars(UChar *dest, int32_t destCapacity, int32_t length, UErrorCode *pErrorCode) {
-    __TERMINATE_STRING(dest, destCapacity, length, pErrorCode);
+    int32_t sizeOfNull = 1;
+    __TERMINATE_STRING(dest, destCapacity, length, sizeOfNull, pErrorCode);
    return length;
 }

 U_CAPI int32_t U_EXPORT2
-u_terminateChars(char *dest, int32_t destCapacity, int32_t length, UErrorCode *pErrorCode) {
-    __TERMINATE_STRING(dest, destCapacity, length, pErrorCode);
+u_terminateChars(char *dest, int32_t destCapacity, int32_t length, int32_t sizeOfNull, UErrorCode *pErrorCode) {
+    __TERMINATE_STRING(dest, destCapacity, length, sizeOfNull, pErrorCode);
    return length;
 }

 U_CAPI int32_t U_EXPORT2
 u_terminateUChar32s(UChar32 *dest, int32_t destCapacity, int32_t length, UErrorCode *pErrorCode) {
-    __TERMINATE_STRING(dest, destCapacity, length, pErrorCode);
+    int32_t sizeOfNull = 1;
+    __TERMINATE_STRING(dest, destCapacity, length, sizeOfNull, pErrorCode);
    return length;
 }

 U_CAPI int32_t U_EXPORT2
 u_terminateWChars(wchar_t *dest, int32_t destCapacity, int32_t length, UErrorCode *pErrorCode) {
-    __TERMINATE_STRING(dest, destCapacity, length, pErrorCode);
+    int32_t sizeOfNull = 1;
+    __TERMINATE_STRING(dest, destCapacity, length, sizeOfNull, pErrorCode);
    return length;
 }
--- a/icu4c/source/common/ustrtrns.c
+++ b/icu4c/source/common/ustrtrns.c
@ -1161,7 +1161,7 @@ u_strToUTF8WithSub(char *dest,
    }

    /* Terminate the buffer */
-    u_terminateChars((char*)dest,destCapacity,reqLength,pErrorCode);
+    u_terminateChars((char*)dest,destCapacity,reqLength,1,pErrorCode);

    return (char*)dest;
 }
--- a/icu4c/source/test/cintltst/nfsprep.c
+++ b/icu4c/source/test/cintltst/nfsprep.c
@ -1,7 +1,7 @@
 /*
 *******************************************************************************
 *
- *   Copyright (C) 2003-2006, International Business Machines
+ *   Copyright (C) 2003-2007, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 *
 *******************************************************************************
@ -129,7 +129,7 @@ CLEANUP:
        free(b3);
    }

-    return u_terminateChars(dest, destCapacity, reqLength, status);
+    return u_terminateChars(dest, destCapacity, reqLength, 1, status);
 }

 /* sorted array for binary search*/
@ -273,7 +273,7 @@ CLEANUP:
        free(s);
    }
    
-    return u_terminateChars(dest, destCapacity, reqLen, status);
+    return u_terminateChars(dest, destCapacity, reqLen, 1, status);
 }

 int32_t
--- a/icu4c/source/test/intltest/ustrtest.cpp
+++ b/icu4c/source/test/intltest/ustrtest.cpp
@ -1,6 +1,6 @@
 /********************************************************************
 * COPYRIGHT: 
- * Copyright (c) 1997-2006, International Business Machines Corporation and
+ * Copyright (c) 1997-2007, International Business Machines Corporation and
 * others. All Rights Reserved.
 ********************************************************************/

@ -617,6 +617,78 @@ UnicodeStringTest::TestExtract()
            ucnv_close(cnv);
        }
    }
+
+    {
+        // test UConverter extract() and constructor to properly NULL terminate
+        UnicodeString s=UNICODE_STRING_SIMPLE("hello");
+        char buffer[24];
+        static const char expect[]={
+            0,0,0,0x68,
+            0,0,0,0x65,
+            0,0,0,0x6C,
+            0,0,0,0x6C,
+            0,0,0,0x6F
+        };
+        UErrorCode errorCode=U_ZERO_ERROR;
+        UConverter *cnv=ucnv_open("UTF-32BE", &errorCode);
+        int32_t length;
+
+        if(U_SUCCESS(errorCode)) {
+            // test preflighting
+            if( (length=s.extract(NULL, 0, cnv, errorCode))!=20 ||
+                errorCode!=U_BUFFER_OVERFLOW_ERROR
+            ) {
+                errln("UnicodeString::extract(NULL, UConverter) preflighting failed (length=%ld, %s)",
+                      length, u_errorName(errorCode));
+            }
+            errorCode=U_ZERO_ERROR;
+            if( (length=s.extract(buffer, 2, cnv, errorCode))!=20 ||
+                errorCode!=U_BUFFER_OVERFLOW_ERROR
+            ) {
+                errln("UnicodeString::extract(too small, UConverter) preflighting failed (length=%ld, %s)",
+                      length, u_errorName(errorCode));
+            }
+
+            // try error cases
+            errorCode=U_ZERO_ERROR;
+            if( s.extract(NULL, 2, cnv, errorCode)==20 || U_SUCCESS(errorCode)) {
+                errln("UnicodeString::extract(UConverter) succeeded with an illegal destination");
+            }
+            errorCode=U_ILLEGAL_ARGUMENT_ERROR;
+            if( s.extract(NULL, 0, cnv, errorCode)==20 || U_SUCCESS(errorCode)) {
+                errln("UnicodeString::extract(UConverter) succeeded with a previous error code");
+            }
+            errorCode=U_ZERO_ERROR;
+
+            // extract for real
+            if( (length=s.extract(buffer, sizeof(buffer), cnv, errorCode))!=20 ||
+                uprv_memcmp(buffer, expect, 20)!=0 ||
+                buffer[20]!=0 || buffer[21]!=0 || buffer[22]!=0 || buffer[23]!=0 ||
+                U_FAILURE(errorCode)
+            ) {
+                errln("UnicodeString::extract(UConverter) conversion failed (length=%ld, %s)",
+                      length, u_errorName(errorCode));
+            }
+            // Test again with just the converter name.
+            if( (length=s.extract(0, s.length(), buffer, sizeof(buffer), "UTF-32BE"))!=20 ||
+                uprv_memcmp(buffer, expect, 20)!=0 ||
+                buffer[20]!=0 || buffer[21]!=0 || buffer[22]!=0 || buffer[23]!=0 ||
+                U_FAILURE(errorCode)
+            ) {
+                errln("UnicodeString::extract(\"UTF-32BE\") conversion failed (length=%ld, %s)",
+                      length, u_errorName(errorCode));
+            }
+
+            // try the constructor
+            UnicodeString t((const char *)expect, sizeof(expect), cnv, errorCode);
+            if(U_FAILURE(errorCode) || s!=t) {
+                errln("UnicodeString(UConverter) conversion failed (%s)",
+                      u_errorName(errorCode));
+            }
+
+            ucnv_close(cnv);
+        }
+    }
 }

 void
--- a/icu4c/source/tools/toolutil/uparse.c
+++ b/icu4c/source/tools/toolutil/uparse.c
@ -1,7 +1,7 @@
 /*
 *******************************************************************************
 *
-*   Copyright (C) 2000-2006, International Business Machines
+*   Copyright (C) 2000-2007, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 *
 *******************************************************************************
@ -366,5 +366,5 @@ u_parseUTF8(const char *source, int32_t sLen, char *dest, int32_t destCapacity,
        i++;
        read += 2;
    }
-    return u_terminateChars(dest, destCapacity, i, status);
+    return u_terminateChars(dest, destCapacity, i, 1, status);
 }