ICU-6677 add u_strToUTF32WithSub() and u_strFromUTF32WithSub()

X-SVN-Rev: 25444
This commit is contained in:
Markus Scherer 2009-02-19 20:02:40 +00:00
parent a679ccf60c
commit c4e1d3e0be
3 changed files with 309 additions and 71 deletions

View file

@ -1,6 +1,6 @@
/*
**********************************************************************
* Copyright (C) 1998-2008, International Business Machines
* Copyright (C) 1998-2009, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
*
@ -1183,7 +1183,10 @@ u_strFoldCase(UChar *dest, int32_t destCapacity,
#if defined(U_WCHAR_IS_UTF16) || defined(U_WCHAR_IS_UTF32) || !UCONFIG_NO_CONVERSION
/**
* Converts a sequence of UChars to wchar_t units.
* Convert a UTF-16 string to a wchar_t string.
* If it is known at compile time that wchar_t strings are in UTF-16 or UTF-32, then
* this function simply calls the fast, dedicated function for that.
* Otherwise, two conversions UTF-16 -> default charset -> wchar_t* are performed.
*
* @param dest A buffer for the result string. The result will be zero-terminated if
* the buffer is large enough.
@ -1209,7 +1212,10 @@ u_strToWCS(wchar_t *dest,
int32_t srcLength,
UErrorCode *pErrorCode);
/**
* Converts a sequence of wchar_t units to UChars
* Convert a wchar_t string to UTF-16.
* If it is known at compile time that wchar_t strings are in UTF-16 or UTF-32, then
* this function simply calls the fast, dedicated function for that.
* Otherwise, two conversions wchar_t* -> default charset -> UTF-16 are performed.
*
* @param dest A buffer for the result string. The result will be zero-terminated if
* the buffer is large enough.
@ -1237,7 +1243,8 @@ u_strFromWCS(UChar *dest,
#endif /* defined(U_WCHAR_IS_UTF16) || defined(U_WCHAR_IS_UTF32) || !UCONFIG_NO_CONVERSION */
/**
* Converts a sequence of UChars (UTF-16) to UTF-8 bytes
* Convert a UTF-16 string to UTF-8.
* If the input string is not well-formed, then the U_INVALID_CHAR_FOUND error code is set.
*
* @param dest A buffer for the result string. The result will be zero-terminated if
* the buffer is large enough.
@ -1266,7 +1273,8 @@ u_strToUTF8(char *dest,
UErrorCode *pErrorCode);
/**
* Converts a sequence of UTF-8 bytes to UChars (UTF-16).
* Convert a UTF-8 string to UTF-16.
* If the input string is not well-formed, then the U_INVALID_CHAR_FOUND error code is set.
*
* @param dest A buffer for the result string. The result will be zero-terminated if
* the buffer is large enough.
@ -1295,7 +1303,9 @@ u_strFromUTF8(UChar *dest,
UErrorCode *pErrorCode);
/**
* Converts a sequence of UChars (UTF-16) to UTF-8 bytes.
* Convert a UTF-16 string to UTF-8.
* If the input string is not well-formed, then the U_INVALID_CHAR_FOUND error code is set.
*
* Same as u_strToUTF8() except for the additional subchar which is output for
* illegal input sequences, instead of stopping with the U_INVALID_CHAR_FOUND error code.
* With subchar==U_SENTINEL, this function behaves exactly like u_strToUTF8().
@ -1338,7 +1348,9 @@ u_strToUTF8WithSub(char *dest,
UErrorCode *pErrorCode);
/**
* Converts a sequence of UTF-8 bytes to UChars (UTF-16).
* Convert a UTF-8 string to UTF-16.
* If the input string is not well-formed, then the U_INVALID_CHAR_FOUND error code is set.
*
* Same as u_strFromUTF8() except for the additional subchar which is output for
* illegal input sequences, instead of stopping with the U_INVALID_CHAR_FOUND error code.
* With subchar==U_SENTINEL, this function behaves exactly like u_strFromUTF8().
@ -1382,7 +1394,8 @@ u_strFromUTF8WithSub(UChar *dest,
UErrorCode *pErrorCode);
/**
* Converts a sequence of UTF-8 bytes to UChars (UTF-16).
* Convert a UTF-8 string to UTF-16.
*
* Same as u_strFromUTF8() except that this function is designed to be very fast,
* which it achieves by being lenient about malformed UTF-8 sequences.
* This function is intended for use in environments where UTF-8 text is
@ -1401,6 +1414,9 @@ u_strFromUTF8WithSub(UChar *dest,
* For further performance improvement, if srcLength is given (>=0),
* then it must be destCapacity>=srcLength.
*
* There is no inverse u_strToUTF8Lenient() function because there is practically
* no performance gain from not checking that a UTF-16 string is well-formed.
*
* @param dest A buffer for the result string. The result will be zero-terminated if
* the buffer is large enough.
* @param destCapacity The size of the buffer (number of UChars). If it is 0, then
@ -1437,7 +1453,8 @@ u_strFromUTF8Lenient(UChar *dest,
UErrorCode *pErrorCode);
/**
* Converts a sequence of UChars (UTF-16) to UTF32 units.
* Convert a UTF-16 string to UTF-32.
* If the input string is not well-formed, then the U_INVALID_CHAR_FOUND error code is set.
*
* @param dest A buffer for the result string. The result will be zero-terminated if
* the buffer is large enough.
@ -1453,6 +1470,8 @@ u_strFromUTF8Lenient(UChar *dest,
* @param pErrorCode Must be a valid pointer to an error code value,
* which must not indicate a failure before the function call.
* @return The pointer to destination buffer.
* @see u_strToUTF32WithSub
* @see u_strFromUTF32
* @stable ICU 2.0
*/
U_STABLE UChar32* U_EXPORT2
@ -1464,7 +1483,8 @@ u_strToUTF32(UChar32 *dest,
UErrorCode *pErrorCode);
/**
* Converts a sequence of UTF32 units to UChars (UTF-16)
* Convert a UTF-32 string to UTF-16.
* If the input string is not well-formed, then the U_INVALID_CHAR_FOUND error code is set.
*
* @param dest A buffer for the result string. The result will be zero-terminated if
* the buffer is large enough.
@ -1480,6 +1500,8 @@ u_strToUTF32(UChar32 *dest,
* @param pErrorCode Must be a valid pointer to an error code value,
* which must not indicate a failure before the function call.
* @return The pointer to destination buffer.
* @see u_strFromUTF32WithSub
* @see u_strToUTF32
* @stable ICU 2.0
*/
U_STABLE UChar* U_EXPORT2
@ -1490,4 +1512,94 @@ u_strFromUTF32(UChar *dest,
int32_t srcLength,
UErrorCode *pErrorCode);
/**
* Convert a UTF-16 string to UTF-32.
* If the input string is not well-formed, then the U_INVALID_CHAR_FOUND error code is set.
*
* Same as u_strToUTF32() except for the additional subchar which is output for
* illegal input sequences, instead of stopping with the U_INVALID_CHAR_FOUND error code.
* With subchar==U_SENTINEL, this function behaves exactly like u_strToUTF32().
*
* @param dest A buffer for the result string. The result will be zero-terminated if
* the buffer is large enough.
* @param destCapacity The size of the buffer (number of UChar32s). If it is 0, then
* dest may be NULL and the function will only return the length of the
* result without writing any of the result string (pre-flighting).
* @param pDestLength A pointer to receive the number of units written to the destination. If
* pDestLength!=NULL then *pDestLength is always set to the
* number of output units corresponding to the transformation of
* all the input units, even in case of a buffer overflow.
* @param src The original source string
* @param srcLength The length of the original string. If -1, then src must be zero-terminated.
* @param subchar The substitution character to use in place of an illegal input sequence,
* or U_SENTINEL if the function is to return with U_INVALID_CHAR_FOUND instead.
* A substitution character can be any valid Unicode code point (up to U+10FFFF)
* except for surrogate code points (U+D800..U+DFFF).
* The recommended value is U+FFFD "REPLACEMENT CHARACTER".
* @param pNumSubstitutions Output parameter receiving the number of substitutions if subchar>=0.
* Set to 0 if no substitutions occur or subchar<0.
* pNumSubstitutions can be NULL.
* @param pErrorCode Pointer to a standard ICU error code. Its input value must
* pass the U_SUCCESS() test, or else the function returns
* immediately. Check for U_FAILURE() on output or use with
* function chaining. (See User Guide for details.)
* @return The pointer to destination buffer.
* @see u_strToUTF32
* @see u_strFromUTF32WithSub
* @draft ICU 4.2
*/
U_DRAFT UChar32* U_EXPORT2
u_strToUTF32WithSub(UChar32 *dest,
int32_t destCapacity,
int32_t *pDestLength,
const UChar *src,
int32_t srcLength,
UChar32 subchar, int32_t *pNumSubstitutions,
UErrorCode *pErrorCode);
/**
* Convert a UTF-32 string to UTF-16.
* If the input string is not well-formed, then the U_INVALID_CHAR_FOUND error code is set.
*
* Same as u_strFromUTF32() except for the additional subchar which is output for
* illegal input sequences, instead of stopping with the U_INVALID_CHAR_FOUND error code.
* With subchar==U_SENTINEL, this function behaves exactly like u_strFromUTF32().
*
* @param dest A buffer for the result string. The result will be zero-terminated if
* the buffer is large enough.
* @param destCapacity The size of the buffer (number of UChars). If it is 0, then
* dest may be NULL and the function will only return the length of the
* result without writing any of the result string (pre-flighting).
* @param pDestLength A pointer to receive the number of units written to the destination. If
* pDestLength!=NULL then *pDestLength is always set to the
* number of output units corresponding to the transformation of
* all the input units, even in case of a buffer overflow.
* @param src The original source string
* @param srcLength The length of the original string. If -1, then src must be zero-terminated.
* @param subchar The substitution character to use in place of an illegal input sequence,
* or U_SENTINEL if the function is to return with U_INVALID_CHAR_FOUND instead.
* A substitution character can be any valid Unicode code point (up to U+10FFFF)
* except for surrogate code points (U+D800..U+DFFF).
* The recommended value is U+FFFD "REPLACEMENT CHARACTER".
* @param pNumSubstitutions Output parameter receiving the number of substitutions if subchar>=0.
* Set to 0 if no substitutions occur or subchar<0.
* pNumSubstitutions can be NULL.
* @param pErrorCode Pointer to a standard ICU error code. Its input value must
* pass the U_SUCCESS() test, or else the function returns
* immediately. Check for U_FAILURE() on output or use with
* function chaining. (See User Guide for details.)
* @return The pointer to destination buffer.
* @see u_strFromUTF32
* @see u_strToUTF32WithSub
* @draft ICU 4.2
*/
U_DRAFT UChar* U_EXPORT2
u_strFromUTF32WithSub(UChar *dest,
int32_t destCapacity,
int32_t *pDestLength,
const UChar32 *src,
int32_t srcLength,
UChar32 subchar, int32_t *pNumSubstitutions,
UErrorCode *pErrorCode);
#endif

View file

@ -31,38 +31,50 @@
#include "ustr_imp.h"
U_CAPI UChar* U_EXPORT2
u_strFromUTF32(UChar *dest,
int32_t destCapacity,
u_strFromUTF32WithSub(UChar *dest,
int32_t destCapacity,
int32_t *pDestLength,
const UChar32 *src,
int32_t srcLength,
UChar32 subchar, int32_t *pNumSubstitutions,
UErrorCode *pErrorCode) {
const UChar32 *srcLimit;
UChar32 ch;
UChar *destLimit;
UChar *pDest;
int32_t reqLength;
int32_t numSubstitutions;
/* args check */
if(U_FAILURE(*pErrorCode)){
return NULL;
}
if((src==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)) {
if( (src==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0) ||
subchar > 0x10ffff || U_IS_SURROGATE(subchar)
) {
*pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
return NULL;
}
if(pNumSubstitutions != NULL) {
*pNumSubstitutions = 0;
}
pDest = dest;
destLimit = dest + destCapacity;
reqLength = 0;
numSubstitutions = 0;
if(srcLength < 0) {
/* simple loop for conversion of a NUL-terminated BMP string */
while((ch=*src) != 0 &&
((uint32_t)ch < 0xd800 || (0xe000 <= ch && ch <= 0xffff)) &&
pDest < destLimit) {
((uint32_t)ch < 0xd800 || (0xe000 <= ch && ch <= 0xffff))) {
++src;
*pDest++ = (UChar)ch;
if(pDest < destLimit) {
*pDest++ = (UChar)ch;
} else {
++reqLength;
}
}
srcLimit = src;
if(ch != 0) {
@ -74,43 +86,42 @@ u_strFromUTF32(UChar *dest,
}
/* convert with length */
while(src < srcLimit && pDest < destLimit) {
ch = *src++;
if((uint32_t)ch < 0xd800 || (0xe000 <= ch && ch <= 0xffff)) {
*pDest++ = (UChar)ch;
} else if(0x10000 <= ch && ch <= 0x10ffff) {
*pDest++ = U16_LEAD(ch);
if(pDest < destLimit) {
*pDest++ = U16_TRAIL(ch);
} else {
reqLength = 1;
break;
}
} else {
/* surrogate code point, or not a Unicode code point at all */
*pErrorCode = U_INVALID_CHAR_FOUND;
return NULL;
}
}
/* preflight the remaining string */
while(src < srcLimit) {
ch = *src++;
if((uint32_t)ch < 0xd800 || (0xe000 <= ch && ch <= 0xffff)) {
++reqLength;
} else if(0x10000 <= ch && ch <= 0x10ffff) {
reqLength += 2;
} else {
/* surrogate code point, or not a Unicode code point at all */
*pErrorCode = U_INVALID_CHAR_FOUND;
return NULL;
}
do {
/* usually "loops" once; twice only for writing subchar */
if((uint32_t)ch < 0xd800 || (0xe000 <= ch && ch <= 0xffff)) {
if(pDest < destLimit) {
*pDest++ = (UChar)ch;
} else {
++reqLength;
}
break;
} else if(0x10000 <= ch && ch <= 0x10ffff) {
if((pDest + 2) <= destLimit) {
*pDest++ = U16_LEAD(ch);
*pDest++ = U16_TRAIL(ch);
} else {
reqLength += 2;
}
break;
} else if((ch = subchar) < 0) {
/* surrogate code point, or not a Unicode code point at all */
*pErrorCode = U_INVALID_CHAR_FOUND;
return NULL;
} else {
++numSubstitutions;
}
} while(TRUE);
}
reqLength += (int32_t)(pDest - dest);
if(pDestLength) {
*pDestLength = reqLength;
}
if(pNumSubstitutions != NULL) {
*pNumSubstitutions = numSubstitutions;
}
/* Terminate the buffer */
u_terminateUChars(dest, destCapacity, reqLength, pErrorCode);
@ -118,13 +129,27 @@ u_strFromUTF32(UChar *dest,
return dest;
}
U_CAPI UChar* U_EXPORT2
u_strFromUTF32(UChar *dest,
int32_t destCapacity,
int32_t *pDestLength,
const UChar32 *src,
int32_t srcLength,
UErrorCode *pErrorCode) {
return u_strFromUTF32WithSub(
dest, destCapacity, pDestLength,
src, srcLength,
U_SENTINEL, NULL,
pErrorCode);
}
U_CAPI UChar32* U_EXPORT2
u_strToUTF32(UChar32 *dest,
u_strToUTF32WithSub(UChar32 *dest,
int32_t destCapacity,
int32_t *pDestLength,
const UChar *src,
const UChar *src,
int32_t srcLength,
UChar32 subchar, int32_t *pNumSubstitutions,
UErrorCode *pErrorCode) {
const UChar *srcLimit;
UChar32 ch;
@ -132,25 +157,37 @@ u_strToUTF32(UChar32 *dest,
UChar32 *destLimit;
UChar32 *pDest;
int32_t reqLength;
int32_t numSubstitutions;
/* args check */
if(U_FAILURE(*pErrorCode)){
return NULL;
}
if((src==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)) {
if( (src==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0) ||
subchar > 0x10ffff || U_IS_SURROGATE(subchar)
) {
*pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
return NULL;
}
if(pNumSubstitutions != NULL) {
*pNumSubstitutions = 0;
}
pDest = dest;
destLimit = dest + destCapacity;
reqLength = 0;
numSubstitutions = 0;
if(srcLength < 0) {
/* simple loop for conversion of a NUL-terminated BMP string */
while((ch=*src) != 0 && !U16_IS_SURROGATE(ch) && pDest < destLimit) {
while((ch=*src) != 0 && !U16_IS_SURROGATE(ch)) {
++src;
*pDest++ = ch;
if(pDest < destLimit) {
*pDest++ = ch;
} else {
++reqLength;
}
}
srcLimit = src;
if(ch != 0) {
@ -162,47 +199,55 @@ u_strToUTF32(UChar32 *dest,
}
/* convert with length */
while(src < srcLimit && pDest < destLimit) {
ch = *src++;
if(!U16_IS_SURROGATE(ch)) {
/* write ch below */
} else if(U16_IS_SURROGATE_LEAD(ch) && src < srcLimit && U16_IS_TRAIL(ch2 = *src)) {
++src;
ch = U16_GET_SUPPLEMENTARY(ch, ch2);
} else {
/* unpaired surrogate */
*pErrorCode = U_INVALID_CHAR_FOUND;
return NULL;
}
*pDest++ = ch;
}
/* preflight the remaining string */
while(src < srcLimit) {
ch = *src++;
if(!U16_IS_SURROGATE(ch)) {
/* ++reqLength below */
} else if(U16_IS_SURROGATE_LEAD(ch) && src < srcLimit && U16_IS_TRAIL(*src)) {
/* write or count ch below */
} else if(U16_IS_SURROGATE_LEAD(ch) && src < srcLimit && U16_IS_TRAIL(ch2 = *src)) {
++src;
} else {
ch = U16_GET_SUPPLEMENTARY(ch, ch2);
} else if((ch = subchar) < 0) {
/* unpaired surrogate */
*pErrorCode = U_INVALID_CHAR_FOUND;
return NULL;
} else {
++numSubstitutions;
}
if(pDest < destLimit) {
*pDest++ = ch;
} else {
++reqLength;
}
++reqLength;
}
reqLength += (int32_t)(pDest - dest);
if(pDestLength) {
*pDestLength = reqLength;
}
if(pNumSubstitutions != NULL) {
*pNumSubstitutions = numSubstitutions;
}
/* Terminate the buffer */
u_terminateUChar32s(dest, destCapacity, reqLength, pErrorCode);
return dest;
}
U_CAPI UChar32* U_EXPORT2
u_strToUTF32(UChar32 *dest,
int32_t destCapacity,
int32_t *pDestLength,
const UChar *src,
int32_t srcLength,
UErrorCode *pErrorCode) {
return u_strToUTF32WithSub(
dest, destCapacity, pDestLength,
src, srcLength,
U_SENTINEL, NULL,
pErrorCode);
}
/* for utf8_nextCharSafeBodyTerminated() */
static const UChar32
utf8_minLegal[4]={ 0, 0x80, 0x800, 0x10000 };
@ -372,6 +417,9 @@ u_strFromUTF8WithSub(UChar *dest,
return NULL;
}
if(pNumSubstitutions!=NULL) {
*pNumSubstitutions=0;
}
numSubstitutions=0;
/*
@ -948,6 +996,9 @@ u_strToUTF8WithSub(char *dest,
return NULL;
}
if(pNumSubstitutions!=NULL) {
*pNumSubstitutions=0;
}
numSubstitutions=0;
if(srcLength==-1) {

View file

@ -202,10 +202,13 @@ static void Test_strToUTF32_surrogates() {
UErrorCode err = U_ZERO_ERROR;
UChar32 u32Target[400];
int32_t len16, u32DestLen;
int32_t numSubstitutions;
int i;
static const UChar surr16[] = { 0x41, 0xd900, 0x61, 0xdc00, 0x5a, 0xd900, 0xdc00, 0x7a, 0 };
static const UChar32 expected[] = { 0x5a, 0x50000, 0x7a, 0 };
static const UChar32 expected_FFFD[] = { 0x41, 0xfffd, 0x61, 0xfffd, 0x5a, 0x50000, 0x7a, 0 };
static const UChar32 expected_12345[] = { 0x41, 0x12345, 0x61, 0x12345, 0x5a, 0x50000, 0x7a, 0 };
len16 = LENGTHOF(surr16);
for(i = 0; i < 4; ++i) {
err = U_ZERO_ERROR;
@ -272,6 +275,40 @@ static void Test_strToUTF32_surrogates() {
u_errorName(err));
return;
}
/* with substitution character */
numSubstitutions = -1;
err = U_ZERO_ERROR;
u_strToUTF32WithSub(u32Target, 0, &u32DestLen, surr16, len16-1, 0xfffd, &numSubstitutions, &err);
if(err != U_BUFFER_OVERFLOW_ERROR || u32DestLen != 7 || numSubstitutions != 2) {
log_err("u_strToUTF32WithSub(preflight surr16) sets %s != U_BUFFER_OVERFLOW_ERROR or an unexpected length\n",
u_errorName(err));
return;
}
err = U_ZERO_ERROR;
u_strToUTF32WithSub(u32Target, LENGTHOF(u32Target), &u32DestLen, surr16, len16-1, 0xfffd, &numSubstitutions, &err);
if(err != U_ZERO_ERROR || u32DestLen != 7 || numSubstitutions != 2 || uprv_memcmp(u32Target, expected_FFFD, 8*4)) {
log_err("u_strToUTF32WithSub(surr16) sets %s != U_ZERO_ERROR or does not produce the expected string\n",
u_errorName(err));
return;
}
err = U_ZERO_ERROR;
u_strToUTF32WithSub(NULL, 0, &u32DestLen, surr16, -1, 0x12345, &numSubstitutions, &err);
if(err != U_BUFFER_OVERFLOW_ERROR || u32DestLen != 7 || numSubstitutions != 2) {
log_err("u_strToUTF32WithSub(preflight surr16/NUL) sets %s != U_BUFFER_OVERFLOW_ERROR or an unexpected length\n",
u_errorName(err));
return;
}
err = U_ZERO_ERROR;
u_strToUTF32WithSub(u32Target, LENGTHOF(u32Target), &u32DestLen, surr16, -1, 0x12345, &numSubstitutions, &err);
if(err != U_ZERO_ERROR || u32DestLen != 7 || numSubstitutions != 2 || uprv_memcmp(u32Target, expected_12345, 8*4)) {
log_err("u_strToUTF32WithSub(surr16/NUL) sets %s != U_ZERO_ERROR or does not produce the expected string\n",
u_errorName(err));
return;
}
}
static void Test_strFromUTF32(void){
@ -345,10 +382,14 @@ static void Test_strFromUTF32_surrogates() {
UErrorCode err = U_ZERO_ERROR;
UChar uTarget[400];
int32_t len32, uDestLen;
int32_t numSubstitutions;
int i;
static const UChar32 surr32[] = { 0x41, 0xd900, 0x61, 0xdc00, -1, 0x110000, 0x5a, 0x50000, 0x7a, 0 };
static const UChar expected[] = { 0x5a, 0xd900, 0xdc00, 0x7a, 0 };
static const UChar expected_FFFD[] = { 0x41, 0xfffd, 0x61, 0xfffd, 0xfffd, 0xfffd, 0x5a, 0xd900, 0xdc00, 0x7a, 0 };
static const UChar expected_12345[] = { 0x41, 0xd808, 0xdf45, 0x61, 0xd808, 0xdf45, 0xd808, 0xdf45, 0xd808, 0xdf45,
0x5a, 0xd900, 0xdc00, 0x7a, 0 };
len32 = LENGTHOF(surr32);
for(i = 0; i < 6; ++i) {
err = U_ZERO_ERROR;
@ -415,6 +456,40 @@ static void Test_strFromUTF32_surrogates() {
u_errorName(err));
return;
}
/* with substitution character */
numSubstitutions = -1;
err = U_ZERO_ERROR;
u_strFromUTF32WithSub(uTarget, 0, &uDestLen, surr32, len32-1, 0xfffd, &numSubstitutions, &err);
if(err != U_BUFFER_OVERFLOW_ERROR || uDestLen != 10 || numSubstitutions != 4) {
log_err("u_strFromUTF32WithSub(preflight surr32) sets %s != U_BUFFER_OVERFLOW_ERROR or an unexpected length\n",
u_errorName(err));
return;
}
err = U_ZERO_ERROR;
u_strFromUTF32WithSub(uTarget, LENGTHOF(uTarget), &uDestLen, surr32, len32-1, 0xfffd, &numSubstitutions, &err);
if(err != U_ZERO_ERROR || uDestLen != 10 || numSubstitutions != 4 || u_memcmp(uTarget, expected_FFFD, 11)) {
log_err("u_strFromUTF32WithSub(surr32) sets %s != U_ZERO_ERROR or does not produce the expected string\n",
u_errorName(err));
return;
}
err = U_ZERO_ERROR;
u_strFromUTF32WithSub(NULL, 0, &uDestLen, surr32, -1, 0x12345, &numSubstitutions, &err);
if(err != U_BUFFER_OVERFLOW_ERROR || uDestLen != 14 || numSubstitutions != 4) {
log_err("u_strFromUTF32WithSub(preflight surr32/NUL) sets %s != U_BUFFER_OVERFLOW_ERROR or an unexpected length\n",
u_errorName(err));
return;
}
err = U_ZERO_ERROR;
u_strFromUTF32WithSub(uTarget, LENGTHOF(uTarget), &uDestLen, surr32, -1, 0x12345, &numSubstitutions, &err);
if(err != U_ZERO_ERROR || uDestLen != 14 || numSubstitutions != 4 || u_memcmp(uTarget, expected_12345, 15)) {
log_err("u_strFromUTF32WithSub(surr32/NUL) sets %s != U_ZERO_ERROR or does not produce the expected string\n",
u_errorName(err));
return;
}
}
static void Test_UChar_UTF8_API(void){