ICU-5869 Properly NULL terminate strings that are not byte based, like UTF-16 or UTF-32.

X-SVN-Rev: 22476
This commit is contained in:
George Rhoten 2007-08-22 23:39:11 +00:00
parent 762a820053
commit 8620a4a530
15 changed files with 167 additions and 66 deletions

View file

@ -509,7 +509,7 @@ caseMap(const UCaseMap *csm,
}
}
return u_terminateChars((char *)dest, destCapacity, destLength, pErrorCode);
return u_terminateChars((char *)dest, destCapacity, destLength, 1, pErrorCode);
}
/* public API functions */

View file

@ -1757,7 +1757,7 @@ ucnv_fromUChars(UConverter *cnv,
destLength=0;
}
return u_terminateChars(originalDest, destCapacity, destLength, pErrorCode);
return u_terminateChars(originalDest, destCapacity, destLength, ucnv_getMinCharSize(cnv), pErrorCode);
}
U_CAPI int32_t U_EXPORT2
@ -2410,7 +2410,7 @@ ucnv_internalConvert(UConverter *outConverter, UConverter *inConverter,
/* if there is no input data, we're done */
if(source==sourceLimit) {
return u_terminateChars(target, targetCapacity, 0, pErrorCode);
return u_terminateChars(target, targetCapacity, 0, ucnv_getMinCharSize(outConverter), pErrorCode);
}
pivot=pivot2=pivotBuffer;
@ -2454,7 +2454,7 @@ ucnv_internalConvert(UConverter *outConverter, UConverter *inConverter,
} while(*pErrorCode==U_BUFFER_OVERFLOW_ERROR);
/* done with preflighting, set warnings and errors as appropriate */
return u_terminateChars(target, targetCapacity, targetLength, pErrorCode);
return u_terminateChars(target, targetCapacity, targetLength, ucnv_getMinCharSize(outConverter), pErrorCode);
}
/* no need to call u_terminateChars() because ucnv_convertEx() took care of that */
@ -2465,10 +2465,11 @@ U_CAPI int32_t U_EXPORT2
ucnv_convert(const char *toConverterName, const char *fromConverterName,
char *target, int32_t targetCapacity,
const char *source, int32_t sourceLength,
UErrorCode *pErrorCode) {
UErrorCode *pErrorCode)
{
UConverter in, out; /* stack-allocated */
UConverter *inConverter, *outConverter;
int32_t targetLength;
UConverter *inConverter = NULL, *outConverter = NULL;
int32_t targetLength = 0;
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
return 0;
@ -2481,21 +2482,21 @@ ucnv_convert(const char *toConverterName, const char *fromConverterName,
return 0;
}
outConverter=ucnv_createConverter(&out, toConverterName, pErrorCode);
if(U_FAILURE(*pErrorCode)) {
goto cleanup;
}
/* if there is no input data, we're done */
if(sourceLength==0 || (sourceLength<0 && *source==0)) {
return u_terminateChars(target, targetCapacity, 0, pErrorCode);
targetLength = u_terminateChars(target, targetCapacity, 0, ucnv_getMinCharSize(outConverter), pErrorCode);
goto cleanup;
}
/* create the converters */
inConverter=ucnv_createConverter(&in, fromConverterName, pErrorCode);
if(U_FAILURE(*pErrorCode)) {
return 0;
}
outConverter=ucnv_createConverter(&out, toConverterName, pErrorCode);
if(U_FAILURE(*pErrorCode)) {
ucnv_close(inConverter);
return 0;
goto cleanup;
}
targetLength=ucnv_internalConvert(outConverter, inConverter,
@ -2503,6 +2504,7 @@ ucnv_convert(const char *toConverterName, const char *fromConverterName,
source, sourceLength,
pErrorCode);
cleanup:
ucnv_close(inConverter);
ucnv_close(outConverter);
@ -2534,7 +2536,7 @@ ucnv_convertAlgorithmic(UBool convertToAlgorithmic,
/* if there is no input data, we're done */
if(sourceLength==0 || (sourceLength<0 && *source==0)) {
return u_terminateChars(target, targetCapacity, 0, pErrorCode);
return u_terminateChars(target, targetCapacity, 0, ucnv_getMinCharSize(cnv), pErrorCode);
}
/* create the algorithmic converter */

View file

@ -774,7 +774,7 @@ _getKeywords(const char *localeID,
*valLen = valuesLen;
}
}
return u_terminateChars(keywords, keywordCapacity, keywordsLen, status);
return u_terminateChars(keywords, keywordCapacity, keywordsLen, 1, status);
} else {
return 0;
}
@ -856,14 +856,14 @@ uloc_getKeywordValue(const char* localeID,
startSearchHere--;
}
uprv_strncpy(buffer, nextSeparator, startSearchHere - nextSeparator);
result = u_terminateChars(buffer, bufferCapacity, (int32_t)(startSearchHere - nextSeparator), status);
result = u_terminateChars(buffer, bufferCapacity, (int32_t)(startSearchHere - nextSeparator), 1, status);
} else if(!startSearchHere && (int32_t)uprv_strlen(nextSeparator) < bufferCapacity) { /* last item in string */
i = (int32_t)uprv_strlen(nextSeparator);
while(nextSeparator[i - 1] == ' ') {
i--;
}
uprv_strncpy(buffer, nextSeparator, i);
result = u_terminateChars(buffer, bufferCapacity, i, status);
result = u_terminateChars(buffer, bufferCapacity, i, 1, status);
} else {
/* give a bigger buffer, please */
*status = U_BUFFER_OVERFLOW_ERROR;
@ -1766,7 +1766,7 @@ _canonicalize(const char* localeID,
uprv_strncpy(result, localeBuffer, (len > resultCapacity) ? resultCapacity : len);
}
return u_terminateChars(result, resultCapacity, len, err);
return u_terminateChars(result, resultCapacity, len, 1, err);
}
/* ### ID parsing API **************************************************/
@ -1796,7 +1796,7 @@ uloc_getParent(const char* localeID,
if(i>0 && parent != localeID) {
uprv_memcpy(parent, localeID, uprv_min(i, parentCapacity));
}
return u_terminateChars(parent, parentCapacity, i, err);
return u_terminateChars(parent, parentCapacity, i, 1, err);
}
U_CAPI int32_t U_EXPORT2
@ -1817,7 +1817,7 @@ uloc_getLanguage(const char* localeID,
}
i=_getLanguage(localeID, language, languageCapacity, NULL);
return u_terminateChars(language, languageCapacity, i, err);
return u_terminateChars(language, languageCapacity, i, 1, err);
}
U_CAPI int32_t U_EXPORT2
@ -1841,7 +1841,7 @@ uloc_getScript(const char* localeID,
if(_isIDSeparator(*localeID)) {
i=_getScript(localeID+1, script, scriptCapacity, NULL);
}
return u_terminateChars(script, scriptCapacity, i, err);
return u_terminateChars(script, scriptCapacity, i, 1, err);
}
U_CAPI int32_t U_EXPORT2
@ -1874,7 +1874,7 @@ uloc_getCountry(const char* localeID,
i=_getCountry(localeID+1, country, countryCapacity, NULL);
}
}
return u_terminateChars(country, countryCapacity, i, err);
return u_terminateChars(country, countryCapacity, i, 1, err);
}
U_CAPI int32_t U_EXPORT2
@ -1919,7 +1919,7 @@ uloc_getVariant(const char* localeID,
i=_getVariant(localeID+1, '@', variant, variantCapacity);
}
*/
return u_terminateChars(variant, variantCapacity, i, err);
return u_terminateChars(variant, variantCapacity, i, 1, err);
}
U_CAPI int32_t U_EXPORT2
@ -2978,7 +2978,7 @@ uloc_acceptLanguage(char *result, int32_t resultAvailable,
uprv_free(fallbackList[j]);
}
uprv_free(fallbackList);
return u_terminateChars(result, resultAvailable, len, status);
return u_terminateChars(result, resultAvailable, len, 1, status);
}
if(len>maxLen) {
maxLen = len;
@ -3018,7 +3018,7 @@ uloc_acceptLanguage(char *result, int32_t resultAvailable,
uprv_free(fallbackList[j]);
}
uprv_free(fallbackList);
return u_terminateChars(result, resultAvailable, len, status);
return u_terminateChars(result, resultAvailable, len, 1, status);
}
}
uenum_reset(availableLocales, status);

View file

@ -1,7 +1,7 @@
/*
******************************************************************************
*
* Copyright (C) 1999-2006, International Business Machines
* Copyright (C) 1999-2007, International Business Machines
* Corporation and others. All Rights Reserved.
*
******************************************************************************
@ -1441,7 +1441,7 @@ u_charName(UChar32 code, UCharNameChoice nameChoice,
}
if((uint32_t)code>UCHAR_MAX_VALUE || !isDataLoaded(pErrorCode)) {
return u_terminateChars(buffer, bufferLength, 0, pErrorCode);
return u_terminateChars(buffer, bufferLength, 0, 1, pErrorCode);
}
length=0;
@ -1472,7 +1472,7 @@ u_charName(UChar32 code, UCharNameChoice nameChoice,
}
}
return u_terminateChars(buffer, bufferLength, length, pErrorCode);
return u_terminateChars(buffer, bufferLength, length, 1, pErrorCode);
}
U_CAPI int32_t U_EXPORT2
@ -1490,12 +1490,12 @@ u_getISOComment(UChar32 c,
}
if((uint32_t)c>UCHAR_MAX_VALUE || !isDataLoaded(pErrorCode)) {
return u_terminateChars(dest, destCapacity, 0, pErrorCode);
return u_terminateChars(dest, destCapacity, 0, 1, pErrorCode);
}
/* the ISO comment is stored like a normal character name */
length=getName(uCharNames, (uint32_t)c, U_ISO_COMMENT, dest, (uint16_t)destCapacity);
return u_terminateChars(dest, destCapacity, length, pErrorCode);
return u_terminateChars(dest, destCapacity, length, 1, pErrorCode);
}
U_CAPI UChar32 U_EXPORT2

View file

@ -1149,9 +1149,14 @@ ucnv_toUnicode(UConverter *converter,
* common error codes that may be set by this function include
* U_BUFFER_OVERFLOW_ERROR, U_STRING_NOT_TERMINATED_WARNING,
* U_ILLEGAL_ARGUMENT_ERROR, and conversion errors
* @return the length of the output string, not counting the terminating NUL;
* if the length is greater than destCapacity, then the string will not fit
* and a buffer of the indicated length would need to be passed in
* @return The length of the output string, not counting the terminating NUL.
* The terminating NUL may be larger than one byte for the encodings of
* some codepages, like UTF-32, where the terminating NUL is 4 bytes.
* The terminating NUL is written when there is room in the dest buffer.
* If the length is greater than destCapacity, then the string will not fit
* and a buffer of the indicated length would need to be passed in.
* This return value plus the value of ucnv_getMinCharSize will give you a
* buffer large enough for the conversion.
* @see ucnv_fromUnicode
* @see ucnv_convert
* @see UCNV_GET_MAX_BYTES_FOR_STRING

View file

@ -1468,7 +1468,10 @@ public:
* If <TT>target</TT> is NULL, then the number of bytes required for
* <TT>target</TT> is returned. It is assumed that the target is big enough
* to fit all of the characters.
* @return the output string length, not including the terminating NUL
* @return the output string length, not including the terminating NUL.
* The terminating NUL may be larger than one byte for the encodings of
* some codepages, like UTF-32, where the terminating NUL is 4 bytes.
* The terminating NUL is written when there is room in the target buffer.
* @stable ICU 2.0
*/
inline int32_t extract(int32_t start,
@ -1503,6 +1506,9 @@ public:
* If <TT>target</TT> is NULL, then the number of bytes required for
* <TT>target</TT> is returned.
* @return the output string length, not including the terminating NUL
* The terminating NUL may be larger than one byte for the encodings of
* some codepages, like UTF-32, where the terminating NUL is 4 bytes.
* The terminating NUL is written when there is room in the target buffer.
* @stable ICU 2.0
*/
int32_t extract(int32_t start,
@ -1523,9 +1529,12 @@ public:
* @param cnv the converter object to be used (ucnv_resetFromUnicode() will be called),
* or NULL for the default converter
* @param errorCode normal ICU error code
* @return the length of the output string, not counting the terminating NUL;
* if the length is greater than destCapacity, then the string will not fit
* and a buffer of the indicated length would need to be passed in
* @return the length of the output string, not counting the terminating NUL.
* The terminating NUL may be larger than one byte for the encodings of
* some codepages, like UTF-32, where the terminating NUL is 4 bytes.
* The terminating NUL is written when there is room in the dest buffer.
* If the length is greater than destCapacity, then the string will not fit
* and a buffer of the indicated length would need to be passed in.
* @stable ICU 2.0
*/
int32_t extract(char *dest, int32_t destCapacity,

View file

@ -1,6 +1,6 @@
/*
******************************************************************************
* Copyright (C) 1999-2006, International Business Machines Corporation and *
* Copyright (C) 1999-2007, International Business Machines Corporation and *
* others. All Rights Reserved. *
******************************************************************************
*
@ -723,7 +723,7 @@ UnicodeString::extract(int32_t start,
u_UCharsToChars(getArrayStart() + start, target, length);
}
UErrorCode status = U_ZERO_ERROR;
return u_terminateChars(target, targetCapacity, length, &status);
return u_terminateChars(target, targetCapacity, length, 1, &status);
}
void

View file

@ -1,7 +1,7 @@
/*
*******************************************************************************
*
* Copyright (C) 1999-2006, International Business Machines
* Copyright (C) 1999-2007, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
@ -123,6 +123,8 @@ UnicodeString::extract(int32_t start,
UConverter *converter;
UErrorCode status = U_ZERO_ERROR;
// We don't NULL terminate here because we might need 1 to 4 bytes for the termination.
/*
// just write the NUL if the string length is 0
if(length == 0) {
if(dstSize >= 0x80000000) {
@ -131,7 +133,7 @@ UnicodeString::extract(int32_t start,
dstSize=0x7fffffff;
}
return u_terminateChars(target, dstSize, 0, &status);
}
}*/
// if the codepage is the default, use our cache
// if it is an empty string, then use the "invariant character" conversion
@ -151,7 +153,7 @@ UnicodeString::extract(int32_t start,
destLength = (int32_t)dstSize;
}
u_UCharsToChars(getArrayStart() + start, target, destLength);
return u_terminateChars(target, (int32_t)dstSize, length, &status);
return u_terminateChars(target, (int32_t)dstSize, length, 1, &status);
} else {
converter = ucnv_open(codepage, &status);
}
@ -184,7 +186,7 @@ UnicodeString::extract(char *dest, int32_t destCapacity,
// nothing to do?
if(fLength<=0) {
return u_terminateChars(dest, destCapacity, 0, &errorCode);
return u_terminateChars(dest, destCapacity, 0, ucnv_getMinCharSize(cnv), &errorCode);
}
// get the converter
@ -256,7 +258,7 @@ UnicodeString::doExtract(int32_t start, int32_t length,
} while(errorCode==U_BUFFER_OVERFLOW_ERROR);
}
return u_terminateChars(originalDest, destCapacity, length, &errorCode);
return u_terminateChars(originalDest, destCapacity, length, ucnv_getMinCharSize(cnv), &errorCode);
}
void

View file

@ -1032,7 +1032,7 @@ ures_toUTF8String(const UChar *s16, int32_t length16,
*pLength = 0;
}
if (forceCopy) {
u_terminateChars(dest, capacity, 0, status);
u_terminateChars(dest, capacity, 0, 1, status);
return dest;
} else {
return "";
@ -2457,7 +2457,7 @@ ures_getFunctionalEquivalent(char *result, int32_t resultCapacity,
length = 0;
result[0]=0;
}
return u_terminateChars(result, resultCapacity, length, status);
return u_terminateChars(result, resultCapacity, length, 1, status);
}
U_CAPI UEnumeration* U_EXPORT2

View file

@ -190,9 +190,16 @@ u_terminateUChars(UChar *dest, int32_t destCapacity, int32_t length, UErrorCode
/**
* NUL-terminate a char * string if possible.
* Same as u_terminateUChars() but for a different string type.
* @param dest The buffer to NULL terminate.
* @param destCapacity The size in bytes for the dest buffer.
* @param length The number of bytes used in dest.
* @param sizeOfNULL When a non-ASCII compatible encoding is used, like UTF-32,
* UTF-16 or some DBCS encoding, this specifies the number of bytes used
* by the NULL termination. This should be 1 for the invariant codepage.
* @return length
*/
U_CAPI int32_t U_EXPORT2
u_terminateChars(char *dest, int32_t destCapacity, int32_t length, UErrorCode *pErrorCode);
u_terminateChars(char *dest, int32_t destCapacity, int32_t length, int32_t sizeOfNULL, UErrorCode *pErrorCode);
/**
* NUL-terminate a UChar32 * string if possible.

View file

@ -1,7 +1,7 @@
/*
******************************************************************************
*
* Copyright (C) 1998-2004, International Business Machines
* Copyright (C) 1998-2007, International Business Machines
* Corporation and others. All Rights Reserved.
*
******************************************************************************
@ -1443,15 +1443,16 @@ u_growBufferFromStatic(void *context,
* NUL-terminate a string no matter what its type.
* Set warning and error codes accordingly.
*/
#define __TERMINATE_STRING(dest, destCapacity, length, pErrorCode) \
#define __TERMINATE_STRING(dest, destCapacity, length, sizeOfNull, pErrorCode) \
if(pErrorCode!=NULL && U_SUCCESS(*pErrorCode)) { \
/* not a public function, so no complete argument checking */ \
\
if(length<0) { \
/* assume that the caller handles this */ \
} else if(length<destCapacity) { \
} else if(sizeOfNull > 0 && (length+sizeOfNull)<=destCapacity) {\
/* NUL-terminate the string, the NUL fits */ \
dest[length]=0; \
do { \
dest[length+(--sizeOfNull)]=0; \
} while (sizeOfNull > 0); \
/* unset the not-terminated warning but leave all others */ \
if(*pErrorCode==U_STRING_NOT_TERMINATED_WARNING) { \
*pErrorCode=U_ZERO_ERROR; \
@ -1467,24 +1468,27 @@ u_growBufferFromStatic(void *context,
U_CAPI int32_t U_EXPORT2
u_terminateUChars(UChar *dest, int32_t destCapacity, int32_t length, UErrorCode *pErrorCode) {
__TERMINATE_STRING(dest, destCapacity, length, pErrorCode);
int32_t sizeOfNull = 1;
__TERMINATE_STRING(dest, destCapacity, length, sizeOfNull, pErrorCode);
return length;
}
U_CAPI int32_t U_EXPORT2
u_terminateChars(char *dest, int32_t destCapacity, int32_t length, UErrorCode *pErrorCode) {
__TERMINATE_STRING(dest, destCapacity, length, pErrorCode);
u_terminateChars(char *dest, int32_t destCapacity, int32_t length, int32_t sizeOfNull, UErrorCode *pErrorCode) {
__TERMINATE_STRING(dest, destCapacity, length, sizeOfNull, pErrorCode);
return length;
}
U_CAPI int32_t U_EXPORT2
u_terminateUChar32s(UChar32 *dest, int32_t destCapacity, int32_t length, UErrorCode *pErrorCode) {
__TERMINATE_STRING(dest, destCapacity, length, pErrorCode);
int32_t sizeOfNull = 1;
__TERMINATE_STRING(dest, destCapacity, length, sizeOfNull, pErrorCode);
return length;
}
U_CAPI int32_t U_EXPORT2
u_terminateWChars(wchar_t *dest, int32_t destCapacity, int32_t length, UErrorCode *pErrorCode) {
__TERMINATE_STRING(dest, destCapacity, length, pErrorCode);
int32_t sizeOfNull = 1;
__TERMINATE_STRING(dest, destCapacity, length, sizeOfNull, pErrorCode);
return length;
}

View file

@ -1161,7 +1161,7 @@ u_strToUTF8WithSub(char *dest,
}
/* Terminate the buffer */
u_terminateChars((char*)dest,destCapacity,reqLength,pErrorCode);
u_terminateChars((char*)dest,destCapacity,reqLength,1,pErrorCode);
return (char*)dest;
}

View file

@ -1,7 +1,7 @@
/*
*******************************************************************************
*
* Copyright (C) 2003-2006, International Business Machines
* Copyright (C) 2003-2007, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
@ -129,7 +129,7 @@ CLEANUP:
free(b3);
}
return u_terminateChars(dest, destCapacity, reqLength, status);
return u_terminateChars(dest, destCapacity, reqLength, 1, status);
}
/* sorted array for binary search*/
@ -273,7 +273,7 @@ CLEANUP:
free(s);
}
return u_terminateChars(dest, destCapacity, reqLen, status);
return u_terminateChars(dest, destCapacity, reqLen, 1, status);
}
int32_t

View file

@ -1,6 +1,6 @@
/********************************************************************
* COPYRIGHT:
* Copyright (c) 1997-2006, International Business Machines Corporation and
* Copyright (c) 1997-2007, International Business Machines Corporation and
* others. All Rights Reserved.
********************************************************************/
@ -617,6 +617,78 @@ UnicodeStringTest::TestExtract()
ucnv_close(cnv);
}
}
{
// test UConverter extract() and constructor to properly NULL terminate
UnicodeString s=UNICODE_STRING_SIMPLE("hello");
char buffer[24];
static const char expect[]={
0,0,0,0x68,
0,0,0,0x65,
0,0,0,0x6C,
0,0,0,0x6C,
0,0,0,0x6F
};
UErrorCode errorCode=U_ZERO_ERROR;
UConverter *cnv=ucnv_open("UTF-32BE", &errorCode);
int32_t length;
if(U_SUCCESS(errorCode)) {
// test preflighting
if( (length=s.extract(NULL, 0, cnv, errorCode))!=20 ||
errorCode!=U_BUFFER_OVERFLOW_ERROR
) {
errln("UnicodeString::extract(NULL, UConverter) preflighting failed (length=%ld, %s)",
length, u_errorName(errorCode));
}
errorCode=U_ZERO_ERROR;
if( (length=s.extract(buffer, 2, cnv, errorCode))!=20 ||
errorCode!=U_BUFFER_OVERFLOW_ERROR
) {
errln("UnicodeString::extract(too small, UConverter) preflighting failed (length=%ld, %s)",
length, u_errorName(errorCode));
}
// try error cases
errorCode=U_ZERO_ERROR;
if( s.extract(NULL, 2, cnv, errorCode)==20 || U_SUCCESS(errorCode)) {
errln("UnicodeString::extract(UConverter) succeeded with an illegal destination");
}
errorCode=U_ILLEGAL_ARGUMENT_ERROR;
if( s.extract(NULL, 0, cnv, errorCode)==20 || U_SUCCESS(errorCode)) {
errln("UnicodeString::extract(UConverter) succeeded with a previous error code");
}
errorCode=U_ZERO_ERROR;
// extract for real
if( (length=s.extract(buffer, sizeof(buffer), cnv, errorCode))!=20 ||
uprv_memcmp(buffer, expect, 20)!=0 ||
buffer[20]!=0 || buffer[21]!=0 || buffer[22]!=0 || buffer[23]!=0 ||
U_FAILURE(errorCode)
) {
errln("UnicodeString::extract(UConverter) conversion failed (length=%ld, %s)",
length, u_errorName(errorCode));
}
// Test again with just the converter name.
if( (length=s.extract(0, s.length(), buffer, sizeof(buffer), "UTF-32BE"))!=20 ||
uprv_memcmp(buffer, expect, 20)!=0 ||
buffer[20]!=0 || buffer[21]!=0 || buffer[22]!=0 || buffer[23]!=0 ||
U_FAILURE(errorCode)
) {
errln("UnicodeString::extract(\"UTF-32BE\") conversion failed (length=%ld, %s)",
length, u_errorName(errorCode));
}
// try the constructor
UnicodeString t((const char *)expect, sizeof(expect), cnv, errorCode);
if(U_FAILURE(errorCode) || s!=t) {
errln("UnicodeString(UConverter) conversion failed (%s)",
u_errorName(errorCode));
}
ucnv_close(cnv);
}
}
}
void

View file

@ -1,7 +1,7 @@
/*
*******************************************************************************
*
* Copyright (C) 2000-2006, International Business Machines
* Copyright (C) 2000-2007, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
@ -366,5 +366,5 @@ u_parseUTF8(const char *source, int32_t sLen, char *dest, int32_t destCapacity,
i++;
read += 2;
}
return u_terminateChars(dest, destCapacity, i, status);
return u_terminateChars(dest, destCapacity, i, 1, status);
}